describealign 1.1.2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: describealign
3
- Version: 1.1.2
3
+ Version: 1.2.0
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  Project-URL: Homepage, https://github.com/julbean/describealign
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: describealign
3
- Version: 1.1.2
3
+ Version: 1.2.0
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  Project-URL: Homepage, https://github.com/julbean/describealign
@@ -461,7 +461,8 @@ def cap_synced_end_points(smooth_path, video_arr, audio_desc_arr):
461
461
  smooth_path[-1] = new_end_point
462
462
 
463
463
  # visualize both the rough and smooth alignments
464
- def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings):
464
+ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
465
+ bad_clips, ad_timings, similarity_percent):
465
466
  scatter_color = [.2,.4,.8]
466
467
  lcs_rgba = np.zeros((len(quals),4))
467
468
  lcs_rgba[:,:3] = np.array(scatter_color)[None,:]
@@ -500,7 +501,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
500
501
  plt.plot(video_times / 60., audio_offsets, 'r-', lw=1, label='Replaced Audio')
501
502
  plt.xlabel('Video Time (minutes)')
502
503
  plt.ylabel('Audio Description Offset (seconds)')
503
- plt.title('Alignment')
504
+ plt.title(f"Alignment - Media Similarity {similarity_percent:.2f}%")
504
505
  plt.legend().legend_handles[0].set_color(scatter_color)
505
506
  plt.tight_layout()
506
507
  plt.savefig(plot_filename_no_ext + '.png', dpi=400)
@@ -509,6 +510,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
509
510
  with open(plot_filename_no_ext + '.txt', 'w') as file:
510
511
  rough_clips, median_slope, _ = chunk_path(smooth_path, tol=2e-2)
511
512
  video_offset = np.diff(smooth_path[rough_clips[0][0]])[0]
513
+ print(f"Input file similarity: {similarity_percent:.2f}%", file=file)
512
514
  print("Main changes needed to video to align it to audio input:", file=file)
513
515
  print(f"Start Offset: {-video_offset:.2f} seconds", file=file)
514
516
  print(f"Median Rate Change: {(median_slope-1.)*100:.2f}%", file=file)
@@ -553,6 +555,11 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
553
555
  segment = np.hstack(segment)
554
556
  return segment
555
557
 
558
+ # compress dual channel audio to mono for use in pitch corrected stretching
559
+ # pytsmod's wsola treats channels separately, so without this it sounds weird
560
+ if not no_pitch_correction:
561
+ audio_desc_arr_mono = np.mean(audio_desc_arr, axis=0)
562
+
556
563
  x,y = zip(*smooth_path)
557
564
  for run in runs:
558
565
  run_length_seconds = y[run[-1][1]] - y[run[0][0]]
@@ -573,7 +580,7 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
573
580
  anchor_point_pair[1][-1] -= 1
574
581
  anchor_y_offset = anchor_point_pair[1][0]
575
582
  anchor_point_pair[1,:] -= anchor_y_offset
576
- stretched_audio = pytsmod.wsola(audio_desc_arr, anchor_point_pair)
583
+ stretched_audio = pytsmod.wsola(audio_desc_arr_mono, anchor_point_pair)
577
584
  video_arr[:,slice(*anchor_points[1,clip_index:clip_index+2])] = stretched_audio
578
585
 
579
586
  # identify which segments of the replaced audio actually have the describer speaking
@@ -755,7 +762,7 @@ def get_ffprobe():
755
762
  def get_closest_key_frame_time(video_file, time):
756
763
  if time <= 0:
757
764
  return 0
758
- key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='v',
765
+ key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='V',
759
766
  show_frames=None, skip_frame='nokey')['frames']
760
767
  key_frame_times = np.array([float(frame['pts_time']) for frame in key_frames] + [0])
761
768
  return np.max(key_frame_times[key_frame_times <= time])
@@ -891,6 +898,11 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
891
898
  display(" output file already exists, skipping...", display_func)
892
899
  continue
893
900
 
901
+ # print warning if output file's full path is longer than Windows MAX_PATH (260)
902
+ full_output_filename = os.path.abspath(output_filename)
903
+ if IS_RUNNING_WINDOWS and len(full_output_filename) >= 260:
904
+ display(" WARNING: very long output path, ffmpeg may fail...", display_func)
905
+
894
906
  video_arr = parse_audio_from_file(video_file)
895
907
  audio_desc_arr = parse_audio_from_file(audio_desc_file)
896
908
  video_spec_raw, video_timings = tokenize_audio(video_arr)
@@ -903,6 +915,13 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
903
915
 
904
916
  path, quals = rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings)
905
917
 
918
+ similarity_ratio = float(len(quals)) / max(video_spec.shape[0], audio_desc_spec.shape[0])
919
+ similarity_percent = min(100, 100 * similarity_ratio)
920
+ if similarity_percent < 10:
921
+ display(f" WARNING: similarity {similarity_percent:.1f}%, likely mismatched files", display_func)
922
+ if similarity_percent > 90:
923
+ display(f" WARNING: similarity {similarity_percent:.1f}%, likely undescribed media", display_func)
924
+
906
925
  smooth_path, runs, bad_clips, clips = smooth_align(path, quals, smoothness)
907
926
 
908
927
  cap_synced_end_points(smooth_path, video_arr, audio_desc_arr)
@@ -951,7 +970,8 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
951
970
  del video_arr
952
971
  if PLOT_ALIGNMENT_TO_FILE:
953
972
  plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
954
- plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings)
973
+ plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
974
+ runs, bad_clips, ad_timings, similarity_percent)
955
975
  display("All files processed.", display_func)
956
976
 
957
977
  def write_config_file(config_path, settings):
@@ -1206,6 +1226,7 @@ def main_gui():
1206
1226
  window.disable()
1207
1227
  sg.Popup('Error: empty input field.', font=('Arial', 20))
1208
1228
  window.enable()
1229
+ window['-VIDEO_FILES-'].set_focus()
1209
1230
  continue
1210
1231
  video_files = values['-VIDEO_FILES-'].split(';')
1211
1232
  if len(video_files) == 1:
@@ -1213,11 +1234,15 @@ def main_gui():
1213
1234
  audio_files = values['-AUDIO_FILES-'].split(';')
1214
1235
  if len(audio_files) == 1:
1215
1236
  audio_files = audio_files[0]
1237
+ window.disable()
1216
1238
  combine_gui(video_files, audio_files, config_path)
1239
+ window.enable()
1240
+ window['-VIDEO_FILES-'].set_focus()
1217
1241
  if event == 'Settings':
1218
1242
  window.disable()
1219
1243
  settings_gui(config_path)
1220
1244
  window.enable()
1245
+ window['-VIDEO_FILES-'].set_focus()
1221
1246
  if event == sg.WIN_CLOSED:
1222
1247
  break
1223
1248
  window.close()
@@ -0,0 +1 @@
1
+ 1.2.0
@@ -1 +0,0 @@
1
- 1.1.2
File without changes
File without changes
File without changes