describealign 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {describealign-1.1.1.dist-info → describealign-1.2.0.dist-info}/METADATA +3 -2
- describealign-1.2.0.dist-info/RECORD +7 -0
- {describealign-1.1.1.dist-info → describealign-1.2.0.dist-info}/WHEEL +1 -1
- describealign.py +38 -7
- describealign-1.1.1.dist-info/RECORD +0 -7
- {describealign-1.1.1.dist-info → describealign-1.2.0.dist-info}/entry_points.txt +0 -0
- {describealign-1.1.1.dist-info → describealign-1.2.0.dist-info/licenses}/LICENSE +0 -0
- {describealign-1.1.1.dist-info → describealign-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: describealign
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Combines videos with matching audio files (e.g. audio descriptions)
|
|
5
5
|
Author-email: Julian Brown <julbean@proton.me>
|
|
6
6
|
Project-URL: Homepage, https://github.com/julbean/describealign
|
|
@@ -22,6 +22,7 @@ Requires-Dist: pytsmod~=0.3.7
|
|
|
22
22
|
Requires-Dist: PySimpleGUIWx~=0.17.2; platform_system == "Windows"
|
|
23
23
|
Requires-Dist: PySimpleGUIQt~=0.35.0; platform_system != "Windows"
|
|
24
24
|
Requires-Dist: PySide2~=5.15; platform_system != "Windows"
|
|
25
|
+
Dynamic: license-file
|
|
25
26
|
|
|
26
27
|
For usage help, simply run the script directly.
|
|
27
28
|
If the Scripts folder has been added to PATH, can be run
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
describealign.py,sha256=Pdt6OgYCSCHtpM22CwHsw77wKUIBBYbVxRc9TdG494w,71259
|
|
2
|
+
describealign-1.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
+
describealign-1.2.0.dist-info/METADATA,sha256=vh8ecBA6BIivlyI-B92HN2wKiFo_o254sr5yzvx4Dd4,1295
|
|
4
|
+
describealign-1.2.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
5
|
+
describealign-1.2.0.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
|
|
6
|
+
describealign-1.2.0.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
|
|
7
|
+
describealign-1.2.0.dist-info/RECORD,,
|
describealign.py
CHANGED
|
@@ -380,7 +380,10 @@ def smooth_align(path, quals, smoothness):
|
|
|
380
380
|
-scipy.sparse.eye(num_fit_points)])
|
|
381
381
|
b_eq = y_diffs[1: ] / x_diffs[1: ] - \
|
|
382
382
|
y_diffs[ :-1] / x_diffs[ :-1]
|
|
383
|
-
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq)
|
|
383
|
+
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ds')
|
|
384
|
+
# if dual simplex solver encounters numerical problems, retry with interior point solver
|
|
385
|
+
if not fit.success and fit.status == 4:
|
|
386
|
+
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ipm')
|
|
384
387
|
if not fit.success:
|
|
385
388
|
print(fit)
|
|
386
389
|
raise RuntimeError("Smooth Alignment L1-Min Optimization Failed!")
|
|
@@ -458,7 +461,8 @@ def cap_synced_end_points(smooth_path, video_arr, audio_desc_arr):
|
|
|
458
461
|
smooth_path[-1] = new_end_point
|
|
459
462
|
|
|
460
463
|
# visualize both the rough and smooth alignments
|
|
461
|
-
def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
|
|
464
|
+
def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
|
|
465
|
+
bad_clips, ad_timings, similarity_percent):
|
|
462
466
|
scatter_color = [.2,.4,.8]
|
|
463
467
|
lcs_rgba = np.zeros((len(quals),4))
|
|
464
468
|
lcs_rgba[:,:3] = np.array(scatter_color)[None,:]
|
|
@@ -497,7 +501,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
|
|
|
497
501
|
plt.plot(video_times / 60., audio_offsets, 'r-', lw=1, label='Replaced Audio')
|
|
498
502
|
plt.xlabel('Video Time (minutes)')
|
|
499
503
|
plt.ylabel('Audio Description Offset (seconds)')
|
|
500
|
-
plt.title(
|
|
504
|
+
plt.title(f"Alignment - Media Similarity {similarity_percent:.2f}%")
|
|
501
505
|
plt.legend().legend_handles[0].set_color(scatter_color)
|
|
502
506
|
plt.tight_layout()
|
|
503
507
|
plt.savefig(plot_filename_no_ext + '.png', dpi=400)
|
|
@@ -506,6 +510,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
|
|
|
506
510
|
with open(plot_filename_no_ext + '.txt', 'w') as file:
|
|
507
511
|
rough_clips, median_slope, _ = chunk_path(smooth_path, tol=2e-2)
|
|
508
512
|
video_offset = np.diff(smooth_path[rough_clips[0][0]])[0]
|
|
513
|
+
print(f"Input file similarity: {similarity_percent:.2f}%", file=file)
|
|
509
514
|
print("Main changes needed to video to align it to audio input:", file=file)
|
|
510
515
|
print(f"Start Offset: {-video_offset:.2f} seconds", file=file)
|
|
511
516
|
print(f"Median Rate Change: {(median_slope-1.)*100:.2f}%", file=file)
|
|
@@ -550,6 +555,11 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
|
|
|
550
555
|
segment = np.hstack(segment)
|
|
551
556
|
return segment
|
|
552
557
|
|
|
558
|
+
# compress dual channel audio to mono for use in pitch corrected stretching
|
|
559
|
+
# pytsmod's wsola treats channels separately, so without this it sounds weird
|
|
560
|
+
if not no_pitch_correction:
|
|
561
|
+
audio_desc_arr_mono = np.mean(audio_desc_arr, axis=0)
|
|
562
|
+
|
|
553
563
|
x,y = zip(*smooth_path)
|
|
554
564
|
for run in runs:
|
|
555
565
|
run_length_seconds = y[run[-1][1]] - y[run[0][0]]
|
|
@@ -570,7 +580,7 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
|
|
|
570
580
|
anchor_point_pair[1][-1] -= 1
|
|
571
581
|
anchor_y_offset = anchor_point_pair[1][0]
|
|
572
582
|
anchor_point_pair[1,:] -= anchor_y_offset
|
|
573
|
-
stretched_audio = pytsmod.wsola(
|
|
583
|
+
stretched_audio = pytsmod.wsola(audio_desc_arr_mono, anchor_point_pair)
|
|
574
584
|
video_arr[:,slice(*anchor_points[1,clip_index:clip_index+2])] = stretched_audio
|
|
575
585
|
|
|
576
586
|
# identify which segments of the replaced audio actually have the describer speaking
|
|
@@ -668,7 +678,10 @@ def detect_describer(video_arr, video_spec, video_spec_raw, video_timings,
|
|
|
668
678
|
scipy.sparse.eye(num_fit_points-1),
|
|
669
679
|
-scipy.sparse.eye(num_fit_points-1)])
|
|
670
680
|
b_eq = y_diffs
|
|
671
|
-
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq)
|
|
681
|
+
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ds')
|
|
682
|
+
# if dual simplex solver encounters numerical problems, retry with interior point solver
|
|
683
|
+
if not fit.success and fit.status == 4:
|
|
684
|
+
fit = scipy.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method='highs-ipm')
|
|
672
685
|
if not fit.success:
|
|
673
686
|
print(fit)
|
|
674
687
|
raise RuntimeError("Describer Voice Detection L1-Min Optimization Failed!")
|
|
@@ -749,7 +762,7 @@ def get_ffprobe():
|
|
|
749
762
|
def get_closest_key_frame_time(video_file, time):
|
|
750
763
|
if time <= 0:
|
|
751
764
|
return 0
|
|
752
|
-
key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='
|
|
765
|
+
key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='V',
|
|
753
766
|
show_frames=None, skip_frame='nokey')['frames']
|
|
754
767
|
key_frame_times = np.array([float(frame['pts_time']) for frame in key_frames] + [0])
|
|
755
768
|
return np.max(key_frame_times[key_frame_times <= time])
|
|
@@ -885,6 +898,11 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
885
898
|
display(" output file already exists, skipping...", display_func)
|
|
886
899
|
continue
|
|
887
900
|
|
|
901
|
+
# print warning if output file's full path is longer than Windows MAX_PATH (260)
|
|
902
|
+
full_output_filename = os.path.abspath(output_filename)
|
|
903
|
+
if IS_RUNNING_WINDOWS and len(full_output_filename) >= 260:
|
|
904
|
+
display(" WARNING: very long output path, ffmpeg may fail...", display_func)
|
|
905
|
+
|
|
888
906
|
video_arr = parse_audio_from_file(video_file)
|
|
889
907
|
audio_desc_arr = parse_audio_from_file(audio_desc_file)
|
|
890
908
|
video_spec_raw, video_timings = tokenize_audio(video_arr)
|
|
@@ -897,6 +915,13 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
897
915
|
|
|
898
916
|
path, quals = rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings)
|
|
899
917
|
|
|
918
|
+
similarity_ratio = float(len(quals)) / max(video_spec.shape[0], audio_desc_spec.shape[0])
|
|
919
|
+
similarity_percent = min(100, 100 * similarity_ratio)
|
|
920
|
+
if similarity_percent < 10:
|
|
921
|
+
display(f" WARNING: similarity {similarity_percent:.1f}%, likely mismatched files", display_func)
|
|
922
|
+
if similarity_percent > 90:
|
|
923
|
+
display(f" WARNING: similarity {similarity_percent:.1f}%, likely undescribed media", display_func)
|
|
924
|
+
|
|
900
925
|
smooth_path, runs, bad_clips, clips = smooth_align(path, quals, smoothness)
|
|
901
926
|
|
|
902
927
|
cap_synced_end_points(smooth_path, video_arr, audio_desc_arr)
|
|
@@ -945,7 +970,8 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
945
970
|
del video_arr
|
|
946
971
|
if PLOT_ALIGNMENT_TO_FILE:
|
|
947
972
|
plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
|
|
948
|
-
plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
|
|
973
|
+
plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
|
|
974
|
+
runs, bad_clips, ad_timings, similarity_percent)
|
|
949
975
|
display("All files processed.", display_func)
|
|
950
976
|
|
|
951
977
|
def write_config_file(config_path, settings):
|
|
@@ -1200,6 +1226,7 @@ def main_gui():
|
|
|
1200
1226
|
window.disable()
|
|
1201
1227
|
sg.Popup('Error: empty input field.', font=('Arial', 20))
|
|
1202
1228
|
window.enable()
|
|
1229
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1203
1230
|
continue
|
|
1204
1231
|
video_files = values['-VIDEO_FILES-'].split(';')
|
|
1205
1232
|
if len(video_files) == 1:
|
|
@@ -1207,11 +1234,15 @@ def main_gui():
|
|
|
1207
1234
|
audio_files = values['-AUDIO_FILES-'].split(';')
|
|
1208
1235
|
if len(audio_files) == 1:
|
|
1209
1236
|
audio_files = audio_files[0]
|
|
1237
|
+
window.disable()
|
|
1210
1238
|
combine_gui(video_files, audio_files, config_path)
|
|
1239
|
+
window.enable()
|
|
1240
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1211
1241
|
if event == 'Settings':
|
|
1212
1242
|
window.disable()
|
|
1213
1243
|
settings_gui(config_path)
|
|
1214
1244
|
window.enable()
|
|
1245
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1215
1246
|
if event == sg.WIN_CLOSED:
|
|
1216
1247
|
break
|
|
1217
1248
|
window.close()
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
describealign.py,sha256=pn7vyyQaq6D-xanF9YL3-phYaeGHQMHHH9wiNWC8M6A,69439
|
|
2
|
-
describealign-1.1.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
describealign-1.1.1.dist-info/METADATA,sha256=0p6pNV7U1JmkNqTypD4UIXl-pYljMoVzK-2efKA5AqI,1272
|
|
4
|
-
describealign-1.1.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
5
|
-
describealign-1.1.1.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
|
|
6
|
-
describealign-1.1.1.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
|
|
7
|
-
describealign-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|