describealign 2.0.1__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {describealign-2.0.1 → describealign-2.0.3}/PKG-INFO +2 -2
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/PKG-INFO +2 -2
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/requires.txt +1 -1
- {describealign-2.0.1 → describealign-2.0.3}/describealign.py +51 -13
- {describealign-2.0.1 → describealign-2.0.3}/requirements.txt +1 -1
- {describealign-2.0.1 → describealign-2.0.3}/LICENSE +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/README.md +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/SOURCES.txt +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/dependency_links.txt +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/entry_points.txt +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/describealign.egg-info/top_level.txt +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/pyproject.toml +0 -0
- {describealign-2.0.1 → describealign-2.0.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: describealign
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: Combines videos with matching audio files (e.g. audio descriptions)
|
|
5
5
|
Author-email: Julian Brown <julbean@proton.me>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: ffmpeg_python~=0.2.0
|
|
16
|
-
Requires-Dist: static-ffmpeg~=
|
|
16
|
+
Requires-Dist: static-ffmpeg~=3.0
|
|
17
17
|
Requires-Dist: matplotlib~=3.9
|
|
18
18
|
Requires-Dist: numpy<3.0,>=1.21
|
|
19
19
|
Requires-Dist: scipy~=1.10
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: describealign
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: Combines videos with matching audio files (e.g. audio descriptions)
|
|
5
5
|
Author-email: Julian Brown <julbean@proton.me>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: ffmpeg_python~=0.2.0
|
|
16
|
-
Requires-Dist: static-ffmpeg~=
|
|
16
|
+
Requires-Dist: static-ffmpeg~=3.0
|
|
17
17
|
Requires-Dist: matplotlib~=3.9
|
|
18
18
|
Requires-Dist: numpy<3.0,>=1.21
|
|
19
19
|
Requires-Dist: scipy~=1.10
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '2.0.
|
|
1
|
+
__version__ = '2.0.3'
|
|
2
2
|
|
|
3
3
|
# combines videos with matching audio files (e.g. audio descriptions)
|
|
4
4
|
# input: video or folder of videos and an audio file or folder of audio files
|
|
@@ -141,6 +141,11 @@ def run_async_ffmpeg_command(command, media_arr, err_msg):
|
|
|
141
141
|
print(e.stderr.decode('utf-8'))
|
|
142
142
|
raise
|
|
143
143
|
|
|
144
|
+
def get_ffmpeg_version():
|
|
145
|
+
ffmpeg_command = ffmpeg.input('').output('', version='')
|
|
146
|
+
stdout, _ = run_ffmpeg_command(ffmpeg_command, "get version information")
|
|
147
|
+
return float(str(stdout).split('version ')[1][:2])
|
|
148
|
+
|
|
144
149
|
# read audio from file with ffmpeg and convert to numpy array
|
|
145
150
|
def parse_audio_from_file(media_file, num_channels=2):
|
|
146
151
|
# retrieve only the first audio track, injecting silence/trimming to force timestamps to match up
|
|
@@ -149,7 +154,6 @@ def parse_audio_from_file(media_file, num_channels=2):
|
|
|
149
154
|
af='aresample=async=1:first_pts=0', map='0:a:0',
|
|
150
155
|
ac=num_channels, ar=AUDIO_SAMPLE_RATE, loglevel='error')
|
|
151
156
|
media_stream, _ = run_ffmpeg_command(ffmpeg_command, f"parse audio from input file: {media_file}")
|
|
152
|
-
# media_arr = np.frombuffer(media_stream, np.int16).astype(np.float32).reshape((-1, num_channels)).T
|
|
153
157
|
media_arr = np.frombuffer(media_stream, np.int16).astype(np.float16).reshape((-1, num_channels)).T
|
|
154
158
|
return media_arr
|
|
155
159
|
|
|
@@ -452,7 +456,8 @@ def get_closest_key_frame_time(video_file, time):
|
|
|
452
456
|
|
|
453
457
|
# outputs a new media file with the replaced audio (which includes audio descriptions)
|
|
454
458
|
def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, audio_desc_file=None,
|
|
455
|
-
setts_cmd=None, video_offset=None, after_start_key_frame=None
|
|
459
|
+
setts_cmd=None, video_offset=None, after_start_key_frame=None,
|
|
460
|
+
median_slope=1.):
|
|
456
461
|
# if a media array is given, stretch_audio is enabled and media_arr should be added to the video
|
|
457
462
|
if media_arr is not None:
|
|
458
463
|
media_input = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le', ac=2, ar=AUDIO_SAMPLE_RATE)
|
|
@@ -481,6 +486,8 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
|
|
|
481
486
|
audio_codec = 'copy' if os.path.splitext(audio_desc_file)[1] != '.wav' else 'aac'
|
|
482
487
|
# flac audio may only have experimental support in some video containers (e.g. mp4)
|
|
483
488
|
standards = 'normal' if os.path.splitext(audio_desc_file)[1] != '.flac' else 'experimental'
|
|
489
|
+
# stretch subtitle durations along with video so they don't overlap or have gaps
|
|
490
|
+
sub_stretch = f':duration=\'DURATION*{1./median_slope:.6f}\''
|
|
484
491
|
# add frag_keyframe flag to prevent some players from ignoring audio/video start offsets
|
|
485
492
|
# set both pts and dts simultaneously in video manually, as ts= does not do the same thing
|
|
486
493
|
write_command = ffmpeg.output(media_input, original_video, output_filename,
|
|
@@ -488,16 +495,30 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
|
|
|
488
495
|
max_interleave_delta='0', loglevel='error',
|
|
489
496
|
strict=standards, movflags='frag_keyframe',
|
|
490
497
|
**{'bsf:v': f'setts=pts=\'{setts_cmd}\':dts=\'{setts_cmd}\'',
|
|
491
|
-
'bsf:s': f'setts=ts=\'{setts_cmd}\'',
|
|
498
|
+
'bsf:s': f'setts=ts=\'{setts_cmd}\'' + sub_stretch,
|
|
492
499
|
"disposition:a:0": "default+visual_impaired+descriptions",
|
|
493
500
|
"metadata:s:a:0": "title=AD"}).overwrite_output()
|
|
494
501
|
run_ffmpeg_command(write_command, f"write output file: {output_filename}")
|
|
495
502
|
|
|
503
|
+
def get_static_ffmpeg_version():
|
|
504
|
+
# if running from compiled binary, assume correct version of static_ffmpeg
|
|
505
|
+
if "__compiled__" in globals() or getattr(sys, 'frozen', False):
|
|
506
|
+
return 3
|
|
507
|
+
import importlib
|
|
508
|
+
static_ffmpeg_version = importlib.metadata.version('static_ffmpeg')
|
|
509
|
+
return float(static_ffmpeg_version[:2])
|
|
510
|
+
|
|
496
511
|
# check whether static_ffmpeg has already installed ffmpeg and ffprobe
|
|
497
512
|
def is_ffmpeg_installed():
|
|
498
513
|
ffmpeg_dir = static_ffmpeg.run.get_platform_dir()
|
|
499
514
|
indicator_file = os.path.join(ffmpeg_dir, "installed.crumb")
|
|
500
|
-
|
|
515
|
+
if not os.path.exists(indicator_file):
|
|
516
|
+
return False
|
|
517
|
+
if get_ffmpeg_version() < 6:
|
|
518
|
+
print("Old ffmpeg version detected, updating to newer version...")
|
|
519
|
+
os.remove(indicator_file)
|
|
520
|
+
return False
|
|
521
|
+
return True
|
|
501
522
|
|
|
502
523
|
def get_energy(arr):
|
|
503
524
|
# downsample of 105, hann size 15, downsample by 2 gives 210 samples per second, ~65 halfwindows/second
|
|
@@ -599,6 +620,9 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
|
|
|
599
620
|
bins = np.floor(bins).astype(int)
|
|
600
621
|
np.clip(bins, 0, 6, out=bins)
|
|
601
622
|
audio_desc_bins.append(np.dot(bins, 7**np.arange(num_bins)).tolist())
|
|
623
|
+
del feature
|
|
624
|
+
del norm
|
|
625
|
+
del bins
|
|
602
626
|
|
|
603
627
|
def pairwise_intersection(set1, set2, set3):
|
|
604
628
|
return (set1 & set2).union((set1 & set3), (set2 & set3))
|
|
@@ -635,6 +659,12 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
|
|
|
635
659
|
best_so_far.add((video_index, audio_desc_index, cum_qual))
|
|
636
660
|
backpointers[(video_index, audio_desc_index)] = (prev_video_index, prev_audio_desc_index)
|
|
637
661
|
del video_dicts
|
|
662
|
+
del video_dict
|
|
663
|
+
del audio_desc_bins
|
|
664
|
+
del video_features_mean_sub
|
|
665
|
+
del audio_desc_features_mean_sub
|
|
666
|
+
del video_uniform_norms
|
|
667
|
+
del audio_desc_uniform_norms
|
|
638
668
|
path = [best_so_far[-1][:2]]
|
|
639
669
|
while path[-1][:2] in backpointers:
|
|
640
670
|
# failsafe to prevent an infinite loop that should never happen anyways
|
|
@@ -887,6 +917,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
|
|
|
887
917
|
seen_points.add(point)
|
|
888
918
|
points[i].append((j, cluster_index, qual))
|
|
889
919
|
del seen_points
|
|
920
|
+
del video_interp
|
|
890
921
|
points = [sorted(point) for point in points]
|
|
891
922
|
|
|
892
923
|
best_so_far = SortedList(key=lambda x:x[0])
|
|
@@ -897,6 +928,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
|
|
|
897
928
|
prev_cache[0] = (0, 0, -1, 0, 0) # video_index, audio_desc_index, cluster_index, qual, cum_qual
|
|
898
929
|
reversed_min_points = [min(x)[0] if len(x) > 0 else np.inf for x in points[::-1]]
|
|
899
930
|
forward_min = list(itertools.accumulate(reversed_min_points, min))[::-1]
|
|
931
|
+
del reversed_min_points
|
|
900
932
|
for i in range(len(audio_desc_features_scaled)):
|
|
901
933
|
for j, cluster_index, qual in points[i]:
|
|
902
934
|
cur_index = best_so_far.bisect_right((j,))
|
|
@@ -926,6 +958,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
|
|
|
926
958
|
if cluster_last[3] < cum_qual_cluster_jump:
|
|
927
959
|
clusters_best_so_far[cluster_index] = (j, i, qual, cum_qual_cluster_jump)
|
|
928
960
|
backpointers[(j, i)] = (prev_j, prev_i, prev_cluster_index, prev_qual, best_prev_cum_qual)
|
|
961
|
+
|
|
929
962
|
path = [best_so_far[-1]]
|
|
930
963
|
while path[-1][:2] in backpointers:
|
|
931
964
|
path.append(backpointers[path[-1][:2]])
|
|
@@ -1005,6 +1038,9 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
|
|
|
1005
1038
|
|
|
1006
1039
|
# if ffmpeg isn't installed, install it
|
|
1007
1040
|
if not is_ffmpeg_installed():
|
|
1041
|
+
if get_static_ffmpeg_version() < 3:
|
|
1042
|
+
print(f" ERROR: outdated static_ffmpeg version")
|
|
1043
|
+
raise ImportError("static_ffmpeg must be at least version 3.0")
|
|
1008
1044
|
print("Downloading and installing ffmpeg (media editor, 50 MB download)...")
|
|
1009
1045
|
get_ffmpeg()
|
|
1010
1046
|
if not is_ffmpeg_installed():
|
|
@@ -1084,7 +1120,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
|
|
|
1084
1120
|
video_arr *= (2**15 - 2.) / np.max(np.abs(video_arr))
|
|
1085
1121
|
|
|
1086
1122
|
print(" processing output file... \r", end='')
|
|
1087
|
-
write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file
|
|
1123
|
+
write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file,
|
|
1124
|
+
median_slope=median_slope)
|
|
1088
1125
|
del video_arr
|
|
1089
1126
|
else:
|
|
1090
1127
|
video_offset = video_times[0] - audio_desc_times[0]
|
|
@@ -1093,7 +1130,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
|
|
|
1093
1130
|
print(" processing output file... \r", end='')
|
|
1094
1131
|
setts_cmd = encode_fit_as_ffmpeg_expr(audio_desc_times, video_times, video_offset)
|
|
1095
1132
|
write_replaced_media_to_disk(output_filename, None, video_file, audio_desc_file,
|
|
1096
|
-
setts_cmd, video_offset, after_start_key_frame
|
|
1133
|
+
setts_cmd, video_offset, after_start_key_frame,
|
|
1134
|
+
median_slope=median_slope)
|
|
1097
1135
|
|
|
1098
1136
|
if PLOT_ALIGNMENT_TO_FILE:
|
|
1099
1137
|
plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
|
|
@@ -1132,7 +1170,7 @@ if wx is not None:
|
|
|
1132
1170
|
|
|
1133
1171
|
class DialogSettings(wx.Dialog):
|
|
1134
1172
|
def __init__(self, parent, config_path, is_dark):
|
|
1135
|
-
wx.Dialog.__init__(self, parent, title="Settings - describealign", size=wx.Size(450,
|
|
1173
|
+
wx.Dialog.__init__(self, parent, title="Settings - describealign", size=wx.Size(450,370),
|
|
1136
1174
|
style=wx.DEFAULT_DIALOG_STYLE|wx.TAB_TRAVERSAL)
|
|
1137
1175
|
# setting the GUI dialog's font causes all contained elements to inherit that font by default
|
|
1138
1176
|
self.SetFont(wx.Font(*gui_font))
|
|
@@ -1209,14 +1247,14 @@ if wx is not None:
|
|
|
1209
1247
|
#
|
|
1210
1248
|
self.SetSizer(sizer_dialog)
|
|
1211
1249
|
sizer_dialog.Add(self.text_header, 0, wx.ALL, 5)
|
|
1212
|
-
sizer_dialog.Add(sizer_output_dir,
|
|
1213
|
-
sizer_dialog.Add(sizer_alignment_dir,
|
|
1214
|
-
sizer_dialog.Add(sizer_prepend,
|
|
1215
|
-
sizer_dialog.Add(sizer_stretch_audio_no_pitch_correction_outer,
|
|
1250
|
+
sizer_dialog.Add(sizer_output_dir, 3, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
|
|
1251
|
+
sizer_dialog.Add(sizer_alignment_dir, 3, wx.LEFT|wx.RIGHT|wx.EXPAND, 2)
|
|
1252
|
+
sizer_dialog.Add(sizer_prepend, 3, wx.LEFT|wx.EXPAND, 5)
|
|
1253
|
+
sizer_dialog.Add(sizer_stretch_audio_no_pitch_correction_outer, 3, wx.LEFT|wx.EXPAND, 5)
|
|
1216
1254
|
sizer_stretch_audio_no_pitch_correction_outer.Add(panel_stretch_audio_no_pitch_correction,
|
|
1217
1255
|
1, wx.LEFT|wx.EXPAND, 5)
|
|
1218
1256
|
sizer_stretch_audio_no_pitch_correction_outer.Add((0, 0), 2, wx.EXPAND, 5) # spacer
|
|
1219
|
-
sizer_dialog.Add(sizer_save_cancel,
|
|
1257
|
+
sizer_dialog.Add(sizer_save_cancel, 5, wx.BOTTOM|wx.EXPAND, 5)
|
|
1220
1258
|
sizer_prepend.Add(self.text_prepend, 0, wx.ALL|wx.ALIGN_CENTER_VERTICAL, 5)
|
|
1221
1259
|
sizer_prepend.Add(self.text_ctrl_prepend, 0, wx.ALIGN_CENTER_VERTICAL, 5)
|
|
1222
1260
|
sizer_output_dir.Add(self.static_box_sizer_output, 1, wx.LEFT|wx.RIGHT|wx.ALIGN_CENTER_VERTICAL, 5)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|