PyPI - describealign - Versions diffs - 2.0.1__tar.gz → 2.0.3__tar.gz - Mend

describealign 2.0.1tar.gz → 2.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{describealign-2.0.1 → describealign-2.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: describealign
-Version: 2.0.1
+Version: 2.0.3
 Summary: Combines videos with matching audio files (e.g. audio descriptions)
 Author-email: Julian Brown <julbean@proton.me>
 License-Expression: GPL-3.0-only
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: ffmpeg_python~=0.2.0
-Requires-Dist: static-ffmpeg~=2.5
+Requires-Dist: static-ffmpeg~=3.0
 Requires-Dist: matplotlib~=3.9
 Requires-Dist: numpy<3.0,>=1.21
 Requires-Dist: scipy~=1.10

{describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: describealign
-Version: 2.0.1
+Version: 2.0.3
 Summary: Combines videos with matching audio files (e.g. audio descriptions)
 Author-email: Julian Brown <julbean@proton.me>
 License-Expression: GPL-3.0-only
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: ffmpeg_python~=0.2.0
-Requires-Dist: static-ffmpeg~=2.5
+Requires-Dist: static-ffmpeg~=3.0
 Requires-Dist: matplotlib~=3.9
 Requires-Dist: numpy<3.0,>=1.21
 Requires-Dist: scipy~=1.10

{describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 ffmpeg_python~=0.2.0
-static-ffmpeg~=2.5
+static-ffmpeg~=3.0
 matplotlib~=3.9
 numpy<3.0,>=1.21
 scipy~=1.10

{describealign-2.0.1 → describealign-2.0.3}/describealign.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = '2.0.1'
+__version__ = '2.0.3'
 # combines videos with matching audio files (e.g. audio descriptions)
 # input: video or folder of videos and an audio file or folder of audio files
@@ -141,6 +141,11 @@ def run_async_ffmpeg_command(command, media_arr, err_msg):
     print(e.stderr.decode('utf-8'))
     raise
+def get_ffmpeg_version():
+  ffmpeg_command = ffmpeg.input('').output('', version='')
+  stdout, _ = run_ffmpeg_command(ffmpeg_command, "get version information")
+  return float(str(stdout).split('version ')[1][:2])
 # read audio from file with ffmpeg and convert to numpy array
 def parse_audio_from_file(media_file, num_channels=2):
   # retrieve only the first audio track, injecting silence/trimming to force timestamps to match up
@@ -149,7 +154,6 @@ def parse_audio_from_file(media_file, num_channels=2):
                                                    af='aresample=async=1:first_pts=0', map='0:a:0',
                                                    ac=num_channels, ar=AUDIO_SAMPLE_RATE, loglevel='error')
   media_stream, _ = run_ffmpeg_command(ffmpeg_command, f"parse audio from input file: {media_file}")
-  # media_arr = np.frombuffer(media_stream, np.int16).astype(np.float32).reshape((-1, num_channels)).T
   media_arr = np.frombuffer(media_stream, np.int16).astype(np.float16).reshape((-1, num_channels)).T
   return media_arr
@@ -452,7 +456,8 @@ def get_closest_key_frame_time(video_file, time):
 # outputs a new media file with the replaced audio (which includes audio descriptions)
 def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, audio_desc_file=None,
-                                 setts_cmd=None, video_offset=None, after_start_key_frame=None):
+                                 setts_cmd=None, video_offset=None, after_start_key_frame=None,
+                                 median_slope=1.):
   # if a media array is given, stretch_audio is enabled and media_arr should be added to the video
   if media_arr is not None:
     media_input = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le', ac=2, ar=AUDIO_SAMPLE_RATE)
@@ -481,6 +486,8 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
     audio_codec = 'copy' if os.path.splitext(audio_desc_file)[1] != '.wav' else 'aac'
     # flac audio may only have experimental support in some video containers (e.g. mp4)
     standards = 'normal' if os.path.splitext(audio_desc_file)[1] != '.flac' else 'experimental'
+    # stretch subtitle durations along with video so they don't overlap or have gaps
+    sub_stretch = f':duration=\'DURATION*{1./median_slope:.6f}\''
     # add frag_keyframe flag to prevent some players from ignoring audio/video start offsets
     # set both pts and dts simultaneously in video manually, as ts= does not do the same thing
     write_command = ffmpeg.output(media_input, original_video, output_filename,
@@ -488,16 +495,30 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
                                   max_interleave_delta='0', loglevel='error',
                                   strict=standards, movflags='frag_keyframe',
                                   **{'bsf:v': f'setts=pts=\'{setts_cmd}\':dts=\'{setts_cmd}\'',
-                                     'bsf:s': f'setts=ts=\'{setts_cmd}\'',
+                                     'bsf:s': f'setts=ts=\'{setts_cmd}\'' + sub_stretch,
                                      "disposition:a:0": "default+visual_impaired+descriptions",
                                      "metadata:s:a:0": "title=AD"}).overwrite_output()
     run_ffmpeg_command(write_command, f"write output file: {output_filename}")
+def get_static_ffmpeg_version():
+  # if running from compiled binary, assume correct version of static_ffmpeg
+  if "__compiled__" in globals() or getattr(sys, 'frozen', False):
+    return 3
+  import importlib
+  static_ffmpeg_version = importlib.metadata.version('static_ffmpeg')
+  return float(static_ffmpeg_version[:2])
 # check whether static_ffmpeg has already installed ffmpeg and ffprobe
 def is_ffmpeg_installed():
   ffmpeg_dir = static_ffmpeg.run.get_platform_dir()
   indicator_file = os.path.join(ffmpeg_dir, "installed.crumb")
-  return os.path.exists(indicator_file)
+  if not os.path.exists(indicator_file):
+    return False
+  if get_ffmpeg_version() < 6:
+    print("Old ffmpeg version detected, updating to newer version...")
+    os.remove(indicator_file)
+    return False
+  return True
 def get_energy(arr):
   # downsample of 105, hann size 15, downsample by 2 gives 210 samples per second, ~65 halfwindows/second
@@ -599,6 +620,9 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
     bins = np.floor(bins).astype(int)
     np.clip(bins, 0, 6, out=bins)
     audio_desc_bins.append(np.dot(bins, 7**np.arange(num_bins)).tolist())
+  del feature
+  del norm
+  del bins
   def pairwise_intersection(set1, set2, set3):
     return (set1 & set2).union((set1 & set3), (set2 & set3))
@@ -635,6 +659,12 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
       best_so_far.add((video_index, audio_desc_index, cum_qual))
       backpointers[(video_index, audio_desc_index)] = (prev_video_index, prev_audio_desc_index)
   del video_dicts
+  del video_dict
+  del audio_desc_bins
+  del video_features_mean_sub
+  del audio_desc_features_mean_sub
+  del video_uniform_norms
+  del audio_desc_uniform_norms
   path = [best_so_far[-1][:2]]
   while path[-1][:2] in backpointers:
     # failsafe to prevent an infinite loop that should never happen anyways
@@ -887,6 +917,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
         seen_points.add(point)
         points[i].append((j, cluster_index, qual))
   del seen_points
+  del video_interp
   points = [sorted(point) for point in points]
   best_so_far = SortedList(key=lambda x:x[0])
@@ -897,6 +928,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
   prev_cache[0] = (0, 0, -1, 0, 0)  # video_index, audio_desc_index, cluster_index, qual, cum_qual
   reversed_min_points = [min(x)[0] if len(x) > 0 else np.inf for x in points[::-1]]
   forward_min = list(itertools.accumulate(reversed_min_points, min))[::-1]
+  del reversed_min_points
   for i in range(len(audio_desc_features_scaled)):
     for j, cluster_index, qual in points[i]:
       cur_index = best_so_far.bisect_right((j,))
@@ -926,6 +958,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
       if cluster_last[3] < cum_qual_cluster_jump:
         clusters_best_so_far[cluster_index] = (j, i, qual, cum_qual_cluster_jump)
       backpointers[(j, i)] = (prev_j, prev_i, prev_cluster_index, prev_qual, best_prev_cum_qual)
   path = [best_so_far[-1]]
   while path[-1][:2] in backpointers:
     path.append(backpointers[path[-1][:2]])
@@ -1005,6 +1038,9 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
   # if ffmpeg isn't installed, install it
   if not is_ffmpeg_installed():
+    if get_static_ffmpeg_version() < 3:
+      print(f"  ERROR: outdated static_ffmpeg version")
+      raise ImportError("static_ffmpeg must be at least version 3.0")
     print("Downloading and installing ffmpeg (media editor, 50 MB download)...")
     get_ffmpeg()
     if not is_ffmpeg_installed():
@@ -1084,7 +1120,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
       video_arr *= (2**15 - 2.) / np.max(np.abs(video_arr))
       print("  processing output file...                   \r", end='')
-      write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file)
+      write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file,
+                                   median_slope=median_slope)
       del video_arr
     else:
       video_offset = video_times[0] - audio_desc_times[0]
@@ -1093,7 +1130,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
       print("  processing output file...                   \r", end='')
       setts_cmd = encode_fit_as_ffmpeg_expr(audio_desc_times, video_times, video_offset)
       write_replaced_media_to_disk(output_filename, None, video_file, audio_desc_file,
-                                   setts_cmd, video_offset, after_start_key_frame)
+                                   setts_cmd, video_offset, after_start_key_frame,
+                                   median_slope=median_slope)
     if PLOT_ALIGNMENT_TO_FILE:
       plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
@@ -1132,7 +1170,7 @@ if wx is not None:
   class DialogSettings(wx.Dialog):
     def __init__(self, parent, config_path, is_dark):
-      wx.Dialog.__init__(self, parent, title="Settings - describealign", size=wx.Size(450,330),
+      wx.Dialog.__init__(self, parent, title="Settings - describealign", size=wx.Size(450,370),
                          style=wx.DEFAULT_DIALOG_STYLE|wx.TAB_TRAVERSAL)
       # setting the GUI dialog's font causes all contained elements to inherit that font by default
       self.SetFont(wx.Font(*gui_font))
@@ -1209,14 +1247,14 @@ if wx is not None:
       #
       self.SetSizer(sizer_dialog)
       sizer_dialog.Add(self.text_header, 0, wx.ALL, 5)
-      sizer_dialog.Add(sizer_output_dir, 1, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
-      sizer_dialog.Add(sizer_alignment_dir, 1, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
-      sizer_dialog.Add(sizer_prepend, 1, wx.LEFT|wx.EXPAND, 5)
-      sizer_dialog.Add(sizer_stretch_audio_no_pitch_correction_outer, 1, wx.LEFT|wx.EXPAND, 5)
+      sizer_dialog.Add(sizer_output_dir, 3, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
+      sizer_dialog.Add(sizer_alignment_dir, 3, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
+      sizer_dialog.Add(sizer_prepend, 3, wx.LEFT|wx.EXPAND, 5)
+      sizer_dialog.Add(sizer_stretch_audio_no_pitch_correction_outer, 3, wx.LEFT|wx.EXPAND, 5)
       sizer_stretch_audio_no_pitch_correction_outer.Add(panel_stretch_audio_no_pitch_correction,
                                                         1, wx.LEFT|wx.EXPAND, 5)
       sizer_stretch_audio_no_pitch_correction_outer.Add((0, 0), 2, wx.EXPAND, 5)  # spacer
-      sizer_dialog.Add(sizer_save_cancel, 2, wx.BOTTOM|wx.EXPAND, 5)
+      sizer_dialog.Add(sizer_save_cancel, 5, wx.BOTTOM|wx.EXPAND, 5)
       sizer_prepend.Add(self.text_prepend, 0, wx.ALL|wx.ALIGN_CENTER_VERTICAL, 5)
       sizer_prepend.Add(self.text_ctrl_prepend, 0, wx.ALIGN_CENTER_VERTICAL, 5)
       sizer_output_dir.Add(self.static_box_sizer_output, 1, wx.LEFT|wx.RIGHT|wx.ALIGN_CENTER_VERTICAL, 5)