describealign 1.1.2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {describealign-1.1.2.dist-info → describealign-1.2.0.dist-info}/METADATA +1 -1
- describealign-1.2.0.dist-info/RECORD +7 -0
- {describealign-1.1.2.dist-info → describealign-1.2.0.dist-info}/WHEEL +1 -1
- describealign.py +30 -5
- describealign-1.1.2.dist-info/RECORD +0 -7
- {describealign-1.1.2.dist-info → describealign-1.2.0.dist-info}/entry_points.txt +0 -0
- {describealign-1.1.2.dist-info → describealign-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {describealign-1.1.2.dist-info → describealign-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: describealign
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Combines videos with matching audio files (e.g. audio descriptions)
|
|
5
5
|
Author-email: Julian Brown <julbean@proton.me>
|
|
6
6
|
Project-URL: Homepage, https://github.com/julbean/describealign
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
describealign.py,sha256=Pdt6OgYCSCHtpM22CwHsw77wKUIBBYbVxRc9TdG494w,71259
|
|
2
|
+
describealign-1.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
+
describealign-1.2.0.dist-info/METADATA,sha256=vh8ecBA6BIivlyI-B92HN2wKiFo_o254sr5yzvx4Dd4,1295
|
|
4
|
+
describealign-1.2.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
5
|
+
describealign-1.2.0.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
|
|
6
|
+
describealign-1.2.0.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
|
|
7
|
+
describealign-1.2.0.dist-info/RECORD,,
|
describealign.py
CHANGED
|
@@ -461,7 +461,8 @@ def cap_synced_end_points(smooth_path, video_arr, audio_desc_arr):
|
|
|
461
461
|
smooth_path[-1] = new_end_point
|
|
462
462
|
|
|
463
463
|
# visualize both the rough and smooth alignments
|
|
464
|
-
def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
|
|
464
|
+
def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs,
|
|
465
|
+
bad_clips, ad_timings, similarity_percent):
|
|
465
466
|
scatter_color = [.2,.4,.8]
|
|
466
467
|
lcs_rgba = np.zeros((len(quals),4))
|
|
467
468
|
lcs_rgba[:,:3] = np.array(scatter_color)[None,:]
|
|
@@ -500,7 +501,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
|
|
|
500
501
|
plt.plot(video_times / 60., audio_offsets, 'r-', lw=1, label='Replaced Audio')
|
|
501
502
|
plt.xlabel('Video Time (minutes)')
|
|
502
503
|
plt.ylabel('Audio Description Offset (seconds)')
|
|
503
|
-
plt.title(
|
|
504
|
+
plt.title(f"Alignment - Media Similarity {similarity_percent:.2f}%")
|
|
504
505
|
plt.legend().legend_handles[0].set_color(scatter_color)
|
|
505
506
|
plt.tight_layout()
|
|
506
507
|
plt.savefig(plot_filename_no_ext + '.png', dpi=400)
|
|
@@ -509,6 +510,7 @@ def plot_alignment(plot_filename_no_ext, path, smooth_path, quals, runs, bad_cli
|
|
|
509
510
|
with open(plot_filename_no_ext + '.txt', 'w') as file:
|
|
510
511
|
rough_clips, median_slope, _ = chunk_path(smooth_path, tol=2e-2)
|
|
511
512
|
video_offset = np.diff(smooth_path[rough_clips[0][0]])[0]
|
|
513
|
+
print(f"Input file similarity: {similarity_percent:.2f}%", file=file)
|
|
512
514
|
print("Main changes needed to video to align it to audio input:", file=file)
|
|
513
515
|
print(f"Start Offset: {-video_offset:.2f} seconds", file=file)
|
|
514
516
|
print(f"Median Rate Change: {(median_slope-1.)*100:.2f}%", file=file)
|
|
@@ -553,6 +555,11 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
|
|
|
553
555
|
segment = np.hstack(segment)
|
|
554
556
|
return segment
|
|
555
557
|
|
|
558
|
+
# compress dual channel audio to mono for use in pitch corrected stretching
|
|
559
|
+
# pytsmod's wsola treats channels separately, so without this it sounds weird
|
|
560
|
+
if not no_pitch_correction:
|
|
561
|
+
audio_desc_arr_mono = np.mean(audio_desc_arr, axis=0)
|
|
562
|
+
|
|
556
563
|
x,y = zip(*smooth_path)
|
|
557
564
|
for run in runs:
|
|
558
565
|
run_length_seconds = y[run[-1][1]] - y[run[0][0]]
|
|
@@ -573,7 +580,7 @@ def replace_aligned_segments(video_arr, audio_desc_arr, smooth_path, runs, no_pi
|
|
|
573
580
|
anchor_point_pair[1][-1] -= 1
|
|
574
581
|
anchor_y_offset = anchor_point_pair[1][0]
|
|
575
582
|
anchor_point_pair[1,:] -= anchor_y_offset
|
|
576
|
-
stretched_audio = pytsmod.wsola(
|
|
583
|
+
stretched_audio = pytsmod.wsola(audio_desc_arr_mono, anchor_point_pair)
|
|
577
584
|
video_arr[:,slice(*anchor_points[1,clip_index:clip_index+2])] = stretched_audio
|
|
578
585
|
|
|
579
586
|
# identify which segments of the replaced audio actually have the describer speaking
|
|
@@ -755,7 +762,7 @@ def get_ffprobe():
|
|
|
755
762
|
def get_closest_key_frame_time(video_file, time):
|
|
756
763
|
if time <= 0:
|
|
757
764
|
return 0
|
|
758
|
-
key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='
|
|
765
|
+
key_frames = ffmpeg.probe(video_file, cmd=get_ffprobe(), select_streams='V',
|
|
759
766
|
show_frames=None, skip_frame='nokey')['frames']
|
|
760
767
|
key_frame_times = np.array([float(frame['pts_time']) for frame in key_frames] + [0])
|
|
761
768
|
return np.max(key_frame_times[key_frame_times <= time])
|
|
@@ -891,6 +898,11 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
891
898
|
display(" output file already exists, skipping...", display_func)
|
|
892
899
|
continue
|
|
893
900
|
|
|
901
|
+
# print warning if output file's full path is longer than Windows MAX_PATH (260)
|
|
902
|
+
full_output_filename = os.path.abspath(output_filename)
|
|
903
|
+
if IS_RUNNING_WINDOWS and len(full_output_filename) >= 260:
|
|
904
|
+
display(" WARNING: very long output path, ffmpeg may fail...", display_func)
|
|
905
|
+
|
|
894
906
|
video_arr = parse_audio_from_file(video_file)
|
|
895
907
|
audio_desc_arr = parse_audio_from_file(audio_desc_file)
|
|
896
908
|
video_spec_raw, video_timings = tokenize_audio(video_arr)
|
|
@@ -903,6 +915,13 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
903
915
|
|
|
904
916
|
path, quals = rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings)
|
|
905
917
|
|
|
918
|
+
similarity_ratio = float(len(quals)) / max(video_spec.shape[0], audio_desc_spec.shape[0])
|
|
919
|
+
similarity_percent = min(100, 100 * similarity_ratio)
|
|
920
|
+
if similarity_percent < 10:
|
|
921
|
+
display(f" WARNING: similarity {similarity_percent:.1f}%, likely mismatched files", display_func)
|
|
922
|
+
if similarity_percent > 90:
|
|
923
|
+
display(f" WARNING: similarity {similarity_percent:.1f}%, likely undescribed media", display_func)
|
|
924
|
+
|
|
906
925
|
smooth_path, runs, bad_clips, clips = smooth_align(path, quals, smoothness)
|
|
907
926
|
|
|
908
927
|
cap_synced_end_points(smooth_path, video_arr, audio_desc_arr)
|
|
@@ -951,7 +970,8 @@ def combine(video, audio, smoothness=50, stretch_audio=False, keep_non_ad=False,
|
|
|
951
970
|
del video_arr
|
|
952
971
|
if PLOT_ALIGNMENT_TO_FILE:
|
|
953
972
|
plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
|
|
954
|
-
plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
|
|
973
|
+
plot_alignment(plot_filename_no_ext, path, smooth_path, quals,
|
|
974
|
+
runs, bad_clips, ad_timings, similarity_percent)
|
|
955
975
|
display("All files processed.", display_func)
|
|
956
976
|
|
|
957
977
|
def write_config_file(config_path, settings):
|
|
@@ -1206,6 +1226,7 @@ def main_gui():
|
|
|
1206
1226
|
window.disable()
|
|
1207
1227
|
sg.Popup('Error: empty input field.', font=('Arial', 20))
|
|
1208
1228
|
window.enable()
|
|
1229
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1209
1230
|
continue
|
|
1210
1231
|
video_files = values['-VIDEO_FILES-'].split(';')
|
|
1211
1232
|
if len(video_files) == 1:
|
|
@@ -1213,11 +1234,15 @@ def main_gui():
|
|
|
1213
1234
|
audio_files = values['-AUDIO_FILES-'].split(';')
|
|
1214
1235
|
if len(audio_files) == 1:
|
|
1215
1236
|
audio_files = audio_files[0]
|
|
1237
|
+
window.disable()
|
|
1216
1238
|
combine_gui(video_files, audio_files, config_path)
|
|
1239
|
+
window.enable()
|
|
1240
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1217
1241
|
if event == 'Settings':
|
|
1218
1242
|
window.disable()
|
|
1219
1243
|
settings_gui(config_path)
|
|
1220
1244
|
window.enable()
|
|
1245
|
+
window['-VIDEO_FILES-'].set_focus()
|
|
1221
1246
|
if event == sg.WIN_CLOSED:
|
|
1222
1247
|
break
|
|
1223
1248
|
window.close()
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
describealign.py,sha256=d_FI6sjNtUheWmo5GcO28TkqboBAyBau_00162my3VY,69899
|
|
2
|
-
describealign-1.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
describealign-1.1.2.dist-info/METADATA,sha256=L_BeZa79bhZNEUbMjkuE35IbRzgIu6R7LEKjl2Jqc7I,1295
|
|
4
|
-
describealign-1.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
5
|
-
describealign-1.1.2.dist-info/entry_points.txt,sha256=7o7N6v3r4vFIH_XBdgk7WWhr-vZ_YitY8JWMdzN5xU0,71
|
|
6
|
-
describealign-1.1.2.dist-info/top_level.txt,sha256=VYHWy4TeimBAF5BQAuDj4adGdLaWs2AoYx6qQjGPJ4M,14
|
|
7
|
-
describealign-1.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|