describealign 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: describealign
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  Project-URL: Homepage, https://github.com/julbean/describealign
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: describealign
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Combines videos with matching audio files (e.g. audio descriptions)
5
5
  Author-email: Julian Brown <julbean@proton.me>
6
6
  Project-URL: Homepage, https://github.com/julbean/describealign
@@ -29,6 +29,7 @@ AUDIO_EXTENSIONS = set(['mp3', 'm4a', 'opus', 'wav', 'aac', 'flac', 'ac3', 'mka'
29
29
  OUTPUT_FILE_PREPEND_TEXT = "ad_"
30
30
  OUTPUT_DIR = "videos_with_ad"
31
31
  PLOT_DIR = "alignment_plots"
32
+ EXTERNAL_FILES_FOLDER = "resources"
32
33
  PLOT_ALIGNMENT_TO_FILE = True
33
34
 
34
35
  TIMESTEP_SIZE_SECONDS = .16
@@ -72,7 +73,7 @@ def ensure_folders_exist(dirs):
72
73
  print("Directory not found, creating it:", dir)
73
74
  os.makedirs(dir)
74
75
 
75
- def get_sorted_filenames(path, extensions):
76
+ def get_sorted_filenames(path, extensions, alt_extensions=set([])):
76
77
  path = os.path.abspath(path)
77
78
  if os.path.isdir(path):
78
79
  files = glob.glob(path + "/*")
@@ -81,21 +82,23 @@ def get_sorted_filenames(path, extensions):
81
82
  print("No file found at:", path)
82
83
  raise RuntimeError("No valid file found at input path.")
83
84
  files = [path]
84
- files = [file for file in files if os.path.splitext(file)[1][1:] in extensions]
85
+ files = [file for file in files if os.path.splitext(file)[1][1:] in extensions | alt_extensions]
85
86
  if len(files) == 0:
86
87
  print("Not enough files with valid extensions present at:", path)
87
88
  print("Did you accidentally put the audio filepath before the video filepath?")
88
89
  print("The video path should be the first positional input, audio second.")
89
90
  print("Or maybe you need to add a new extension to this script's regex?")
90
91
  raise RuntimeError("No valid files found at input path.")
91
- return sorted(files)
92
+ files = sorted(files)
93
+ file_types = [0 if os.path.splitext(file)[1][1:] in extensions else 1 for file in files]
94
+ return files, file_types
92
95
 
93
96
  # read audio from file with ffmpeg and convert to numpy array
94
97
  def parse_audio_from_file(media_file):
95
98
  media_stream, _ = (ffmpeg
96
99
  .input(media_file)
97
100
  .output('-', format='s16le', acodec='pcm_s16le', ac=2, ar=AUDIO_SAMPLE_RATE, loglevel='fatal')
98
- .run(capture_stdout=True, cmd=imageio_ffmpeg.get_ffmpeg_exe())
101
+ .run(capture_stdout=True, cmd=get_ffmpeg())
99
102
  )
100
103
  media_arr = np.frombuffer(media_stream, np.int16).astype(np.float32).reshape((-1,2)).T
101
104
  return media_arr
@@ -257,7 +260,7 @@ def rough_align(video_spec, audio_desc_spec, video_timings, audio_desc_timings):
257
260
 
258
261
  # filter out low match quality nodes from LCS path
259
262
  quals = [get_match_quality(node) for node in path]
260
- if max(quals) <= 0:
263
+ if len(quals) == 0 or max(quals) <= 0:
261
264
  raise RuntimeError("Rough alignment failed, are the input files mismatched?")
262
265
  path, quals = zip(*[(path, qual) for (path, qual) in zip(path, quals) if qual > 0])
263
266
 
@@ -402,9 +405,14 @@ def plot_alignment(plot_filename, path, smooth_path, quals, runs, bad_clips, ad_
402
405
  lcs_rgba[:,3] = np.minimum(1, np.array(quals) * 500. / len(quals))
403
406
  audio_times, video_times = np.array(path).T.reshape((2,-1))
404
407
  audio_offsets = audio_times - video_times
405
- plt.xlim((0, np.max(video_times) / 60.))
406
- plt.ylim((np.min(audio_offsets) - TIMESTEP_SIZE_SECONDS / 2.,
407
- np.max(audio_offsets) + TIMESTEP_SIZE_SECONDS / 2.))
408
+ def expand_limits(start, end, ratio=.01):
409
+ average = (end + start) / 2.
410
+ half_diff = (end - start) / 2.
411
+ half_diff *= (1 + ratio)
412
+ return (average - half_diff, average + half_diff)
413
+ plt.xlim(expand_limits(*(0, np.max(video_times) / 60.)))
414
+ plt.ylim(expand_limits(*(np.min(audio_offsets) - TIMESTEP_SIZE_SECONDS / 2.,
415
+ np.max(audio_offsets) + TIMESTEP_SIZE_SECONDS / 2.)))
408
416
  plt.scatter(video_times / 60., audio_offsets, s=3, c=lcs_rgba, label='LCS Matches')
409
417
  audio_times, video_times = np.array(smooth_path).T.reshape((2,-1))
410
418
  audio_offsets = audio_times - video_times
@@ -625,28 +633,45 @@ def detect_describer(video_arr, video_spec, video_spec_raw, video_timings,
625
633
 
626
634
  return speech_sample_mask, boost_sample_mask, ad_timings
627
635
 
628
- # outputs a new video file with the replaced audio (which includes audio descriptions)
629
- def write_replaced_media_to_disk(output_filename, video_file, video_arr):
630
- video_arr_pipe = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le',
636
+ # check whether ffmpeg is available locally before checking for an installed version
637
+ def get_ffmpeg():
638
+ if os.path.isdir(EXTERNAL_FILES_FOLDER):
639
+ files = glob.glob(EXTERNAL_FILES_FOLDER + "/ffmpeg*")
640
+ if len(files) > 0:
641
+ return files[0]
642
+ return imageio_ffmpeg.get_ffmpeg_exe()
643
+
644
+ # outputs a new media file with the replaced audio (which includes audio descriptions)
645
+ def write_replaced_media_to_disk(output_filename, media_arr, video_file=None):
646
+ media_arr_pipe = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le',
631
647
  ac=2, ar=AUDIO_SAMPLE_RATE)
632
648
  original_video = ffmpeg.input(video_file, an=None)
633
- # "-max_interleave_delta 0" is sometimes necessary to fix an .mkv bug that freezes audio/video:
634
- # ffmpeg bug warning: [matroska @ 0000000002c814c0] Starting new cluster due to timestamp
635
- # more info about the bug and fix: https://reddit.com/r/ffmpeg/comments/efddfs/
636
- write_command = ffmpeg.output(video_arr_pipe, original_video, output_filename,
637
- acodec='aac', vcodec='copy', scodec='copy',
638
- max_interleave_delta='0', loglevel='fatal')
639
- ffmpeg_caller = write_command.run_async(pipe_stdin=True, cmd=imageio_ffmpeg.get_ffmpeg_exe())
640
- ffmpeg_caller.stdin.write(video_arr.astype(np.int16).T.tobytes())
649
+ if video_file is None:
650
+ write_command = ffmpeg.output(media_arr_pipe, output_filename, loglevel='fatal')
651
+ else:
652
+ # "-max_interleave_delta 0" is sometimes necessary to fix an .mkv bug that freezes audio/video:
653
+ # ffmpeg bug warning: [matroska @ 0000000002c814c0] Starting new cluster due to timestamp
654
+ # more info about the bug and fix: https://reddit.com/r/ffmpeg/comments/efddfs/
655
+ write_command = ffmpeg.output(media_arr_pipe, original_video, output_filename,
656
+ acodec='aac', vcodec='copy', scodec='copy',
657
+ max_interleave_delta='0', loglevel='fatal')
658
+ ffmpeg_caller = write_command.run_async(pipe_stdin=True, cmd=get_ffmpeg())
659
+ ffmpeg_caller.stdin.write(media_arr.astype(np.int16).T.tobytes())
641
660
  ffmpeg_caller.stdin.close()
642
661
  ffmpeg_caller.wait()
643
662
 
644
663
  # combines videos with matching audio files (e.g. audio descriptions)
645
664
  # this is the main function of this script, it calls the other functions in order
646
665
  def combine(video, audio, smoothness=50, keep_non_ad=False, boost=0,
647
- ad_detect_sensitivity=.6, boost_sensitivity=.4):
648
- video_files = get_sorted_filenames(video, VIDEO_EXTENSIONS)
649
- audio_desc_files = get_sorted_filenames(audio, AUDIO_EXTENSIONS)
666
+ ad_detect_sensitivity=.6, boost_sensitivity=.4, yes=False):
667
+ video_files, video_file_types = get_sorted_filenames(video, VIDEO_EXTENSIONS, AUDIO_EXTENSIONS)
668
+ if yes == False and sum(video_file_types) > 0:
669
+ print("")
670
+ print("One or more audio files found in video input. Was this intentional?")
671
+ print("If not, press ctrl+c to kill this script.")
672
+ input("If this was intended, press Enter to continue...")
673
+ print("")
674
+ audio_desc_files, _ = get_sorted_filenames(audio, AUDIO_EXTENSIONS)
650
675
  if len(video_files) != len(audio_desc_files):
651
676
  raise RuntimeError("Number of valid files in input directories are not the same.")
652
677
 
@@ -659,13 +684,15 @@ def combine(video, audio, smoothness=50, keep_non_ad=False, boost=0,
659
684
  print(os.path.split(video_file)[1])
660
685
  print(os.path.split(audio_desc_file)[1])
661
686
  print("")
662
- print("Are the above input file pairings correct?")
663
- print("If not, press ctrl+c to kill this script.")
664
- input("If they are correct, press Enter to continue...")
665
- print("")
687
+ if yes == False:
688
+ print("Are the above input file pairings correct?")
689
+ print("If not, press ctrl+c to kill this script.")
690
+ input("If they are correct, press Enter to continue...")
691
+ print("")
666
692
  print("Processing files:")
667
693
 
668
- for (video_file, audio_desc_file) in zip(video_files, audio_desc_files):
694
+ for (video_file, audio_desc_file, video_filetype) in zip(video_files, audio_desc_files,
695
+ video_file_types):
669
696
  output_filename = os.path.join(OUTPUT_DIR, OUTPUT_FILE_PREPEND_TEXT + \
670
697
  os.path.split(video_file)[1])
671
698
  print(" ", output_filename)
@@ -716,8 +743,10 @@ def combine(video, audio, smoothness=50, keep_non_ad=False, boost=0,
716
743
  if PLOT_ALIGNMENT_TO_FILE:
717
744
  plot_filename = os.path.join(PLOT_DIR, os.path.splitext(os.path.split(video_file)[1])[0] + '.png')
718
745
  plot_alignment(plot_filename, path, smooth_path, quals, runs, bad_clips, ad_timings)
719
-
720
- write_replaced_media_to_disk(output_filename, video_file, video_arr)
746
+ if video_filetype == 0:
747
+ write_replaced_media_to_disk(output_filename, video_arr, video_file)
748
+ else:
749
+ write_replaced_media_to_disk(output_filename, video_arr)
721
750
  del video_arr
722
751
 
723
752
  # Entry point for command line interaction, for example:
@@ -743,10 +772,12 @@ def command_line_interface():
743
772
  parser.add_argument('--boost_sensitivity', type=float, default=.4,
744
773
  help='Higher values make --boost less likely to miss a description, but ' + \
745
774
  'also make it more likely to boost non-description audio. Default is 0.4')
775
+ parser.add_argument('--yes', action='store_true',
776
+ help='Auto-skips user prompts asking to verify information.')
746
777
  args = parser.parse_args()
747
778
 
748
779
  combine(args.video, args.audio, args.smoothness, args.keep_non_ad, args.boost,
749
- args.ad_detect_sensitivity, args.boost_sensitivity)
780
+ args.ad_detect_sensitivity, args.boost_sensitivity, args.yes)
750
781
 
751
782
  # allows the script to be run on its own, rather than through the package, for example:
752
783
  # python3 describealign.py video.mp4 audio_desc.mp3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "describealign"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  authors = [
9
9
  { name="Julian Brown", email="julbean@proton.me" },
10
10
  ]
File without changes
File without changes
File without changes