describealign 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: describealign
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  Project-URL: Homepage, https://github.com/julbean/describealign
@@ -22,6 +22,7 @@ Requires-Dist: pytsmod~=0.3.7
22
22
  Requires-Dist: PySimpleGUIWx~=0.17.2; platform_system == "Windows"
23
23
  Requires-Dist: PySimpleGUIQt~=0.35.0; platform_system != "Windows"
24
24
  Requires-Dist: PySide2~=5.15; platform_system != "Windows"
25
+ Dynamic: license-file
25
26
 
26
27
  For usage help, simply run the script directly.
27
28
  If the Scripts folder has been added to PATH, can be run
@@ -0,0 +1,7 @@
1
+ describealign.py,sha256=Pdt6OgYCSCHtpM22CwHsw77wKUIBBYbVxRc9TdG494w,71259
2
+ describealign-1.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
+ describealign-1.2.0.dist-info/METADATA,sha256=vh8ecBA6BIivlyI-B92HN2wKiFo_o254sr5yzvx4Dd4,1295
4
+ describealign-1.2.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
5
+ describealign-1.2.0.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
6
+ describealign-1.2.0.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
7
+ describealign-1.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
describealign.py CHANGED
@@ -380,7 +380,10 @@ def smooth_align(path, quals, smoothness):
380
380
  -scipy.sparse.eye(num_fit_points)])
381
381
  b_eq = y_diffs[1: ] / x_diffs[1: ] - \
382
382
  y_diffs[ :-1] / x_diffs[ :-1]
383
- fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq)
383
+ fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ds')
384
+ # if dual simplex solver encounters numerical problems, retry with interior point solver
385
+ if not fit.success and fit.status == 4:
386
+ fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ipm')
384
387
  if not fit.success:
385
388
  print(fit)
386
389
  raise RuntimeError("Smooth Alignment L1-Min Optimization Failed!")
@@ -458,7 +461,8 @@ def cap_synced_end_points(smooth_path, video_arr, audio_desc_arr):
458
461
  smooth_path[-1] = new_end_point
459
462
 
460
463
  # visualize both the rough and smooth alignments
461
- def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings):
464
+ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
465
+ bad_clips, ad_timings, similarity_percent):
462
466
  scatter_color = [.2,.4,.8]
463
467
  lcs_rgba = np.zeros((len(quals),4))
464
468
  lcs_rgba[:,:3] = np.array(scatter_color)[None,:]
@@ -497,7 +501,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
497
501
  plt.plot(video_times / 60., audio_offsets, 'r-', lw=1, label='Replaced Audio')
498
502
  plt.xlabel('Video Time (minutes)')
499
503
  plt.ylabel('Audio Description Offset (seconds)')
500
- plt.title('Alignment')
504
+ plt.title(f"Alignment - Media Similarity {similarity_percent:.2f}%")
501
505
  plt.legend().legend_handles[0].set_color(scatter_color)
502
506
  plt.tight_layout()
503
507
  plt.savefig(plot_filename_no_ext + '.png', dpi=400)
@@ -506,6 +510,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
506
510
  with open(plot_filename_no_ext + '.txt', 'w') as file:
507
511
  rough_clips, median_slope, _ = chunk_path(smooth_path, tol=2e-2)
508
512
  video_offset = np.diff(smooth_path[rough_clips[0][0]])[0]
513
+ print(f"Input file similarity: {similarity_percent:.2f}%", file=file)
509
514
  print("Main changes needed to video to align it to audio input:", file=file)
510
515
  print(f"Start Offset: {-video_offset:.2f} seconds", file=file)
511
516
  print(f"Median Rate Change: {(median_slope-1.)*100:.2f}%", file=file)
@@ -550,6 +555,11 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
550
555
  segment = np.hstack(segment)
551
556
  return segment
552
557
 
558
+ # compress dual channel audio to mono for use in pitch corrected stretching
559
+ # pytsmod's wsola treats channels separately, so without this it sounds weird
560
+ if not no_pitch_correction:
561
+ audio_desc_arr_mono = np.mean(audio_desc_arr, axis=0)
562
+
553
563
  x,y = zip(*smooth_path)
554
564
  for run in runs:
555
565
  run_length_seconds = y[run[-1][1]] - y[run[0][0]]
@@ -570,7 +580,7 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
570
580
  anchor_point_pair[1][-1] -= 1
571
581
  anchor_y_offset = anchor_point_pair[1][0]
572
582
  anchor_point_pair[1,:] -= anchor_y_offset
573
- stretched_audio = pytsmod.wsola(audio_desc_arr, anchor_point_pair)
583
+ stretched_audio = pytsmod.wsola(audio_desc_arr_mono, anchor_point_pair)
574
584
  video_arr[:,slice(*anchor_points[1,clip_index:clip_index+2])] = stretched_audio
575
585
 
576
586
  # identify which segments of the replaced audio actually have the describer speaking
@@ -668,7 +678,10 @@ def detect_describer(video_arr, video_spec, video_spec_raw, video_timings,
668
678
  scipy.sparse.eye(num_fit_points-1),
669
679
  -scipy.sparse.eye(num_fit_points-1)])
670
680
  b_eq = y_diffs
671
- fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq)
681
+ fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ds')
682
+ # if dual simplex solver encounters numerical problems, retry with interior point solver
683
+ if not fit.success and fit.status == 4:
684
+ fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ipm')
672
685
  if not fit.success:
673
686
  print(fit)
674
687
  raise RuntimeError("Describer Voice Detection L1-Min Optimization Failed!")
@@ -749,7 +762,7 @@ def get_ffprobe():
749
762
  def get_closest_key_frame_time(video_file, time):
750
763
  if time <= 0:
751
764
  return 0
752
- key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='v',
765
+ key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='V',
753
766
  show_frames=None, skip_frame='nokey')['frames']
754
767
  key_frame_times = np.array([float(frame['pts_time']) for frame in key_frames] + [0])
755
768
  return np.max(key_frame_times[key_frame_times <= time])
@@ -885,6 +898,11 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
885
898
  display(" output file already exists, skipping...", display_func)
886
899
  continue
887
900
 
901
+ # print warning if output file's full path is longer than Windows MAX_PATH (260)
902
+ full_output_filename = os.path.abspath(output_filename)
903
+ if IS_RUNNING_WINDOWS and len(full_output_filename) >= 260:
904
+ display(" WARNING: very long output path, ffmpeg may fail...", display_func)
905
+
888
906
  video_arr = parse_audio_from_file(video_file)
889
907
  audio_desc_arr = parse_audio_from_file(audio_desc_file)
890
908
  video_spec_raw, video_timings = tokenize_audio(video_arr)
@@ -897,6 +915,13 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
897
915
 
898
916
  path, quals = rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings)
899
917
 
918
+ similarity_ratio = float(len(quals)) / max(video_spec.shape[0], audio_desc_spec.shape[0])
919
+ similarity_percent = min(100, 100 * similarity_ratio)
920
+ if similarity_percent < 10:
921
+ display(f" WARNING: similarity {similarity_percent:.1f}%, likely mismatched files", display_func)
922
+ if similarity_percent > 90:
923
+ display(f" WARNING: similarity {similarity_percent:.1f}%, likely undescribed media", display_func)
924
+
900
925
  smooth_path, runs, bad_clips, clips = smooth_align(path, quals, smoothness)
901
926
 
902
927
  cap_synced_end_points(smooth_path, video_arr, audio_desc_arr)
@@ -945,7 +970,8 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
945
970
  del video_arr
946
971
  if PLOT_ALIGNMENT_TO_FILE:
947
972
  plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
948
- plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_clips, ad_timings)
973
+ plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
974
+ runs, bad_clips, ad_timings, similarity_percent)
949
975
  display("All files processed.", display_func)
950
976
 
951
977
  def write_config_file(config_path, settings):
@@ -1200,6 +1226,7 @@ def main_gui():
1200
1226
  window.disable()
1201
1227
  sg.Popup('Error: empty input field.', font=('Arial', 20))
1202
1228
  window.enable()
1229
+ window['-VIDEO_FILES-'].set_focus()
1203
1230
  continue
1204
1231
  video_files = values['-VIDEO_FILES-'].split(';')
1205
1232
  if len(video_files) == 1:
@@ -1207,11 +1234,15 @@ def main_gui():
1207
1234
  audio_files = values['-AUDIO_FILES-'].split(';')
1208
1235
  if len(audio_files) == 1:
1209
1236
  audio_files = audio_files[0]
1237
+ window.disable()
1210
1238
  combine_gui(video_files, audio_files, config_path)
1239
+ window.enable()
1240
+ window['-VIDEO_FILES-'].set_focus()
1211
1241
  if event == 'Settings':
1212
1242
  window.disable()
1213
1243
  settings_gui(config_path)
1214
1244
  window.enable()
1245
+ window['-VIDEO_FILES-'].set_focus()
1215
1246
  if event == sg.WIN_CLOSED:
1216
1247
  break
1217
1248
  window.close()
@@ -1,7 +0,0 @@
1
- describealign.py,sha256=pn7vyyQaq6D-xanF9YL3-phYaeGHQMHHH9wiNWC8M6A,69439
2
- describealign-1.1.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- describealign-1.1.1.dist-info/METADATA,sha256=0p6pNV7U1JmkNqTypD4UIXl-pYljMoVzK-2efKA5AqI,1272
4
- describealign-1.1.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
5
- describealign-1.1.1.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
6
- describealign-1.1.1.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
7
- describealign-1.1.1.dist-info/RECORD,,