PyPI - describealign - Versions diffs - 1.1.2__tar.gz → 1.2.0__tar.gz - Mend

describealign 1.1.2tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{describealign-1.1.2 → describealign-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: describealign
-Version: 1.1.2
+Version: 1.2.0
 Summary: Combines videos with matching audio files (e.g. audio descriptions)
 Author-email: Julian Brown <julbean@proton.me>
 Project-URL: Homepage, https://github.com/julbean/describealign

{describealign-1.1.2 → describealign-1.2.0}/describealign.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: describealign
-Version: 1.1.2
+Version: 1.2.0
 Summary: Combines videos with matching audio files (e.g. audio descriptions)
 Author-email: Julian Brown <julbean@proton.me>
 Project-URL: Homepage, https://github.com/julbean/describealign

{describealign-1.1.2 → describealign-1.2.0}/describealign.py RENAMED Viewed

@@ -461,7 +461,8 @@ def cap_synced_end_points(smooth_path, video_arr, audio_desc_arr):
     smooth_path[-1] = new_end_point
 # visualize both the rough and smooth alignments
-def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings):
+def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
+                   bad_clips, ad_timings, similarity_percent):
   scatter_color = [.2,.4,.8]
   lcs_rgba = np.zeros((len(quals),4))
   lcs_rgba[:,:3] = np.array(scatter_color)[None,:]
@@ -500,7 +501,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
       plt.plot(video_times / 60., audio_offsets, 'r-', lw=1, label='Replaced Audio')
   plt.xlabel('Video Time (minutes)')
   plt.ylabel('Audio Description Offset (seconds)')
-  plt.title('Alignment')
+  plt.title(f"Alignment - Media Similarity {similarity_percent:.2f}%")
   plt.legend().legend_handles[0].set_color(scatter_color)
   plt.tight_layout()
   plt.savefig(plot_filename_no_ext + '.png', dpi=400)
@@ -509,6 +510,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
   with open(plot_filename_no_ext + '.txt', 'w') as file:
     rough_clips, median_slope, _ = chunk_path(smooth_path, tol=2e-2)
     video_offset = np.diff(smooth_path[rough_clips[0][0]])[0]
+    print(f"Input file similarity: {similarity_percent:.2f}%", file=file)
     print("Main changes needed to video to align it to audio input:", file=file)
     print(f"Start Offset: {-video_offset:.2f} seconds", file=file)
     print(f"Median Rate Change: {(median_slope-1.)*100:.2f}%", file=file)
@@ -553,6 +555,11 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
     segment = np.hstack(segment)
     return segment
+  # compress dual channel audio to mono for use in pitch corrected stretching
+  # pytsmod's wsola treats channels separately, so without this it sounds weird
+  if not no_pitch_correction:
+    audio_desc_arr_mono = np.mean(audio_desc_arr, axis=0)
   x,y = zip(*smooth_path)
   for run in runs:
     run_length_seconds = y[run[-1][1]] - y[run[0][0]]
@@ -573,7 +580,7 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
         anchor_point_pair[1][-1] -= 1
         anchor_y_offset = anchor_point_pair[1][0]
         anchor_point_pair[1,:] -= anchor_y_offset
-        stretched_audio = pytsmod.wsola(audio_desc_arr, anchor_point_pair)
+        stretched_audio = pytsmod.wsola(audio_desc_arr_mono, anchor_point_pair)
       video_arr[:,slice(*anchor_points[1,clip_index:clip_index+2])] = stretched_audio
 # identify which segments of the replaced audio actually have the describer speaking
@@ -755,7 +762,7 @@ def get_ffprobe():
 def get_closest_key_frame_time(video_file, time):
   if time <= 0:
     return 0
-  key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='v',
+  key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='V',
                             show_frames=None, skip_frame='nokey')['frames']
   key_frame_times = np.array([float(frame['pts_time']) for frame in key_frames] + [0])
   return np.max(key_frame_times[key_frame_times <= time])
@@ -891,6 +898,11 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
       display("   output file already exists, skipping...", display_func)
       continue
+    # print warning if output file's full path is longer than Windows MAX_PATH (260)
+    full_output_filename = os.path.abspath(output_filename)
+    if IS_RUNNING_WINDOWS and len(full_output_filename) >= 260:
+      display("   WARNING: very long output path, ffmpeg may fail...", display_func)
     video_arr = parse_audio_from_file(video_file)
     audio_desc_arr = parse_audio_from_file(audio_desc_file)
     video_spec_raw, video_timings = tokenize_audio(video_arr)
@@ -903,6 +915,13 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
     path, quals = rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings)
+    similarity_ratio = float(len(quals)) / max(video_spec.shape[0], audio_desc_spec.shape[0])
+    similarity_percent = min(100, 100 * similarity_ratio)
+    if similarity_percent < 10:
+      display(f"   WARNING: similarity {similarity_percent:.1f}%, likely mismatched files", display_func)
+    if similarity_percent > 90:
+      display(f"   WARNING: similarity {similarity_percent:.1f}%, likely undescribed media", display_func)
     smooth_path, runs, bad_clips, clips = smooth_align(path, quals, smoothness)
     cap_synced_end_points(smooth_path, video_arr, audio_desc_arr)
@@ -951,7 +970,8 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
     del video_arr
     if PLOT_ALIGNMENT_TO_FILE:
       plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
-      plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings)
+      plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
+                     runs, bad_clips, ad_timings, similarity_percent)
   display("All files processed.", display_func)
 def write_config_file(config_path, settings):
@@ -1206,6 +1226,7 @@ def main_gui():
         window.disable()
         sg.Popup('Error: empty input field.', font=('Arial', 20))
         window.enable()
+        window['-VIDEO_FILES-'].set_focus()
         continue
       video_files = values['-VIDEO_FILES-'].split(';')
       if len(video_files) == 1:
@@ -1213,11 +1234,15 @@ def main_gui():
       audio_files = values['-AUDIO_FILES-'].split(';')
       if len(audio_files) == 1:
         audio_files = audio_files[0]
+      window.disable()
       combine_gui(video_files, audio_files, config_path)
+      window.enable()
+      window['-VIDEO_FILES-'].set_focus()
     if event == 'Settings':
       window.disable()
       settings_gui(config_path)
       window.enable()
+      window['-VIDEO_FILES-'].set_focus()
     if event == sg.WIN_CLOSED:
       break
   window.close()