describealign 2.0.2__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: describealign
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  License-Expression: GPL-3.0-only
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: ffmpeg_python~=0.2.0
16
- Requires-Dist: static-ffmpeg~=2.5
16
+ Requires-Dist: static-ffmpeg~=3.0
17
17
  Requires-Dist: matplotlib~=3.9
18
18
  Requires-Dist: numpy<3.0,>=1.21
19
19
  Requires-Dist: scipy~=1.10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: describealign
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  License-Expression: GPL-3.0-only
@@ -13,7 +13,7 @@ Requires-Python: >=3.8
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: ffmpeg_python~=0.2.0
16
- Requires-Dist: static-ffmpeg~=2.5
16
+ Requires-Dist: static-ffmpeg~=3.0
17
17
  Requires-Dist: matplotlib~=3.9
18
18
  Requires-Dist: numpy<3.0,>=1.21
19
19
  Requires-Dist: scipy~=1.10
@@ -1,5 +1,5 @@
1
1
  ffmpeg_python~=0.2.0
2
- static-ffmpeg~=2.5
2
+ static-ffmpeg~=3.0
3
3
  matplotlib~=3.9
4
4
  numpy<3.0,>=1.21
5
5
  scipy~=1.10
@@ -1,4 +1,4 @@
1
- __version__ = '2.0.2'
1
+ __version__ = '2.0.3'
2
2
 
3
3
  # combines videos with matching audio files (e.g. audio descriptions)
4
4
  # input: video or folder of videos and an audio file or folder of audio files
@@ -141,6 +141,11 @@ def run_async_ffmpeg_command(command, media_arr, err_msg):
141
141
  print(e.stderr.decode('utf-8'))
142
142
  raise
143
143
 
144
+ def get_ffmpeg_version():
145
+ ffmpeg_command = ffmpeg.input('').output('', version='')
146
+ stdout, _ = run_ffmpeg_command(ffmpeg_command, "get version information")
147
+ return float(str(stdout).split('version ')[1][:2])
148
+
144
149
  # read audio from file with ffmpeg and convert to numpy array
145
150
  def parse_audio_from_file(media_file, num_channels=2):
146
151
  # retrieve only the first audio track, injecting silence/trimming to force timestamps to match up
@@ -149,7 +154,6 @@ def parse_audio_from_file(media_file, num_channels=2):
149
154
  af='aresample=async=1:first_pts=0', map='0:a:0',
150
155
  ac=num_channels, ar=AUDIO_SAMPLE_RATE, loglevel='error')
151
156
  media_stream, _ = run_ffmpeg_command(ffmpeg_command, f"parse audio from input file: {media_file}")
152
- # media_arr = np.frombuffer(media_stream, np.int16).astype(np.float32).reshape((-1, num_channels)).T
153
157
  media_arr = np.frombuffer(media_stream, np.int16).astype(np.float16).reshape((-1, num_channels)).T
154
158
  return media_arr
155
159
 
@@ -452,7 +456,8 @@ def get_closest_key_frame_time(video_file, time):
452
456
 
453
457
  # outputs a new media file with the replaced audio (which includes audio descriptions)
454
458
  def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, audio_desc_file=None,
455
- setts_cmd=None, video_offset=None, after_start_key_frame=None):
459
+ setts_cmd=None, video_offset=None, after_start_key_frame=None,
460
+ median_slope=1.):
456
461
  # if a media array is given, stretch_audio is enabled and media_arr should be added to the video
457
462
  if media_arr is not None:
458
463
  media_input = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le', ac=2, ar=AUDIO_SAMPLE_RATE)
@@ -481,6 +486,8 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
481
486
  audio_codec = 'copy' if os.path.splitext(audio_desc_file)[1] != '.wav' else 'aac'
482
487
  # flac audio may only have experimental support in some video containers (e.g. mp4)
483
488
  standards = 'normal' if os.path.splitext(audio_desc_file)[1] != '.flac' else 'experimental'
489
+ # stretch subtitle durations along with video so they don't overlap or have gaps
490
+ sub_stretch = f':duration=\'DURATION*{1./median_slope:.6f}\''
484
491
  # add frag_keyframe flag to prevent some players from ignoring audio/video start offsets
485
492
  # set both pts and dts simultaneously in video manually, as ts= does not do the same thing
486
493
  write_command = ffmpeg.output(media_input, original_video, output_filename,
@@ -488,16 +495,30 @@ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None, au
488
495
  max_interleave_delta='0', loglevel='error',
489
496
  strict=standards, movflags='frag_keyframe',
490
497
  **{'bsf:v': f'setts=pts=\'{setts_cmd}\':dts=\'{setts_cmd}\'',
491
- 'bsf:s': f'setts=ts=\'{setts_cmd}\'',
498
+ 'bsf:s': f'setts=ts=\'{setts_cmd}\'' + sub_stretch,
492
499
  "disposition:a:0": "default+visual_impaired+descriptions",
493
500
  "metadata:s:a:0": "title=AD"}).overwrite_output()
494
501
  run_ffmpeg_command(write_command, f"write output file: {output_filename}")
495
502
 
503
+ def get_static_ffmpeg_version():
504
+ # if running from compiled binary, assume correct version of static_ffmpeg
505
+ if "__compiled__" in globals() or getattr(sys, 'frozen', False):
506
+ return 3
507
+ import importlib
508
+ static_ffmpeg_version = importlib.metadata.version('static_ffmpeg')
509
+ return float(static_ffmpeg_version[:2])
510
+
496
511
  # check whether static_ffmpeg has already installed ffmpeg and ffprobe
497
512
  def is_ffmpeg_installed():
498
513
  ffmpeg_dir = static_ffmpeg.run.get_platform_dir()
499
514
  indicator_file = os.path.join(ffmpeg_dir, "installed.crumb")
500
- return os.path.exists(indicator_file)
515
+ if not os.path.exists(indicator_file):
516
+ return False
517
+ if get_ffmpeg_version() < 6:
518
+ print("Old ffmpeg version detected, updating to newer version...")
519
+ os.remove(indicator_file)
520
+ return False
521
+ return True
501
522
 
502
523
  def get_energy(arr):
503
524
  # downsample of 105, hann size 15, downsample by 2 gives 210 samples per second, ~65 halfwindows/second
@@ -599,6 +620,9 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
599
620
  bins = np.floor(bins).astype(int)
600
621
  np.clip(bins, 0, 6, out=bins)
601
622
  audio_desc_bins.append(np.dot(bins, 7**np.arange(num_bins)).tolist())
623
+ del feature
624
+ del norm
625
+ del bins
602
626
 
603
627
  def pairwise_intersection(set1, set2, set3):
604
628
  return (set1 & set2).union((set1 & set3), (set2 & set3))
@@ -635,6 +659,12 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
635
659
  best_so_far.add((video_index, audio_desc_index, cum_qual))
636
660
  backpointers[(video_index, audio_desc_index)] = (prev_video_index, prev_audio_desc_index)
637
661
  del video_dicts
662
+ del video_dict
663
+ del audio_desc_bins
664
+ del video_features_mean_sub
665
+ del audio_desc_features_mean_sub
666
+ del video_uniform_norms
667
+ del audio_desc_uniform_norms
638
668
  path = [best_so_far[-1][:2]]
639
669
  while path[-1][:2] in backpointers:
640
670
  # failsafe to prevent an infinite loop that should never happen anyways
@@ -887,6 +917,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
887
917
  seen_points.add(point)
888
918
  points[i].append((j, cluster_index, qual))
889
919
  del seen_points
920
+ del video_interp
890
921
  points = [sorted(point) for point in points]
891
922
 
892
923
  best_so_far = SortedList(key=lambda x:x[0])
@@ -897,6 +928,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
897
928
  prev_cache[0] = (0, 0, -1, 0, 0) # video_index, audio_desc_index, cluster_index, qual, cum_qual
898
929
  reversed_min_points = [min(x)[0] if len(x) > 0 else np.inf for x in points[::-1]]
899
930
  forward_min = list(itertools.accumulate(reversed_min_points, min))[::-1]
931
+ del reversed_min_points
900
932
  for i in range(len(audio_desc_features_scaled)):
901
933
  for j, cluster_index, qual in points[i]:
902
934
  cur_index = best_so_far.bisect_right((j,))
@@ -926,6 +958,7 @@ def align(video_features, audio_desc_features, video_energy, audio_desc_energy):
926
958
  if cluster_last[3] < cum_qual_cluster_jump:
927
959
  clusters_best_so_far[cluster_index] = (j, i, qual, cum_qual_cluster_jump)
928
960
  backpointers[(j, i)] = (prev_j, prev_i, prev_cluster_index, prev_qual, best_prev_cum_qual)
961
+
929
962
  path = [best_so_far[-1]]
930
963
  while path[-1][:2] in backpointers:
931
964
  path.append(backpointers[path[-1][:2]])
@@ -1005,6 +1038,9 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
1005
1038
 
1006
1039
  # if ffmpeg isn't installed, install it
1007
1040
  if not is_ffmpeg_installed():
1041
+ if get_static_ffmpeg_version() < 3:
1042
+ print(f" ERROR: outdated static_ffmpeg version")
1043
+ raise ImportError("static_ffmpeg must be at least version 3.0")
1008
1044
  print("Downloading and installing ffmpeg (media editor, 50 MB download)...")
1009
1045
  get_ffmpeg()
1010
1046
  if not is_ffmpeg_installed():
@@ -1084,7 +1120,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
1084
1120
  video_arr *= (2**15 - 2.) / np.max(np.abs(video_arr))
1085
1121
 
1086
1122
  print(" processing output file... \r", end='')
1087
- write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file)
1123
+ write_replaced_media_to_disk(output_filename, video_arr, None if has_audio_extension else video_file,
1124
+ median_slope=median_slope)
1088
1125
  del video_arr
1089
1126
  else:
1090
1127
  video_offset = video_times[0] - audio_desc_times[0]
@@ -1093,7 +1130,8 @@ def combine(video, audio, stretch_audio=False, yes=False, prepend="ad_", no_pitc
1093
1130
  print(" processing output file... \r", end='')
1094
1131
  setts_cmd = encode_fit_as_ffmpeg_expr(audio_desc_times, video_times, video_offset)
1095
1132
  write_replaced_media_to_disk(output_filename, None, video_file, audio_desc_file,
1096
- setts_cmd, video_offset, after_start_key_frame)
1133
+ setts_cmd, video_offset, after_start_key_frame,
1134
+ median_slope=median_slope)
1097
1135
 
1098
1136
  if PLOT_ALIGNMENT_TO_FILE:
1099
1137
  plot_filename_no_ext = os.path.join(alignment_dir, os.path.splitext(os.path.split(video_file)[1])[0])
@@ -1,5 +1,5 @@
1
1
  ffmpeg_python~=0.2.0
2
- static-ffmpeg~=2.5
2
+ static-ffmpeg~=3.0
3
3
  matplotlib~=3.9
4
4
  numpy>=1.21,<3.0
5
5
  scipy~=1.10
File without changes
File without changes
File without changes