ffmpeg-normalize 1.32.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,594 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ from typing import TYPE_CHECKING, Iterator, Literal, TypedDict, cast
8
+
9
+ from ._cmd_utils import CommandRunner, dict_to_filter_opts
10
+ from ._errors import FFmpegNormalizeError
11
+
12
+ if TYPE_CHECKING:
13
+ from ._ffmpeg_normalize import FFmpegNormalize
14
+ from ._media_file import MediaFile
15
+
16
+ _logger = logging.getLogger(__name__)
17
+
18
+ _loudnorm_pattern = re.compile(r"\[Parsed_loudnorm_(\d+)")
19
+
20
+
21
+ class EbuLoudnessStatistics(TypedDict):
22
+ input_i: float
23
+ input_tp: float
24
+ input_lra: float
25
+ input_thresh: float
26
+ output_i: float
27
+ output_tp: float
28
+ output_lra: float
29
+ output_thresh: float
30
+ target_offset: float
31
+ normalization_type: str
32
+
33
+
34
+ class LoudnessStatistics(TypedDict):
35
+ ebu_pass1: EbuLoudnessStatistics | None
36
+ ebu_pass2: EbuLoudnessStatistics | None
37
+ mean: float | None
38
+ max: float | None
39
+
40
+
41
+ class LoudnessStatisticsWithMetadata(LoudnessStatistics):
42
+ input_file: str
43
+ output_file: str
44
+ stream_id: int
45
+
46
+
47
+ class MediaStream:
48
+ def __init__(
49
+ self,
50
+ ffmpeg_normalize: FFmpegNormalize,
51
+ media_file: MediaFile,
52
+ stream_type: Literal["audio", "video", "subtitle"],
53
+ stream_id: int,
54
+ ):
55
+ """
56
+ Create a MediaStream object.
57
+
58
+ Args:
59
+ ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
60
+ media_file (MediaFile): The MediaFile object.
61
+ stream_type (Literal["audio", "video", "subtitle"]): The type of the stream.
62
+ stream_id (int): The stream ID.
63
+ """
64
+ self.ffmpeg_normalize = ffmpeg_normalize
65
+ self.media_file = media_file
66
+ self.stream_type = stream_type
67
+ self.stream_id = stream_id
68
+ _logger.debug(
69
+ f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}"
70
+ )
71
+
72
+ def __repr__(self) -> str:
73
+ return (
74
+ f"<{os.path.basename(self.media_file.input_file)}, "
75
+ f"{self.stream_type} stream {self.stream_id}>"
76
+ )
77
+
78
+
79
+ class VideoStream(MediaStream):
80
+ def __init__(
81
+ self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
82
+ ):
83
+ super().__init__(ffmpeg_normalize, media_file, "video", stream_id)
84
+
85
+
86
+ class SubtitleStream(MediaStream):
87
+ def __init__(
88
+ self, ffmpeg_normalize: FFmpegNormalize, media_file: MediaFile, stream_id: int
89
+ ):
90
+ super().__init__(ffmpeg_normalize, media_file, "subtitle", stream_id)
91
+
92
+
93
+ class AudioStream(MediaStream):
94
+ def __init__(
95
+ self,
96
+ ffmpeg_normalize: FFmpegNormalize,
97
+ media_file: MediaFile,
98
+ stream_id: int,
99
+ sample_rate: int | None,
100
+ bit_depth: int | None,
101
+ duration: float | None,
102
+ ):
103
+ """
104
+ Create an AudioStream object.
105
+
106
+ Args:
107
+ ffmpeg_normalize (FFmpegNormalize): The FFmpegNormalize object.
108
+ media_file (MediaFile): The MediaFile object.
109
+ stream_id (int): The stream ID.
110
+ sample_rate (int): sample rate in Hz
111
+ bit_depth (int): bit depth in bits
112
+ duration (float): duration in seconds
113
+ """
114
+ super().__init__(ffmpeg_normalize, media_file, "audio", stream_id)
115
+
116
+ self.loudness_statistics: LoudnessStatistics = {
117
+ "ebu_pass1": None,
118
+ "ebu_pass2": None,
119
+ "mean": None,
120
+ "max": None,
121
+ }
122
+
123
+ self.sample_rate = sample_rate
124
+ self.bit_depth = bit_depth
125
+
126
+ self.duration = duration
127
+
128
+ @staticmethod
129
+ def _constrain(
130
+ number: float, min_range: float, max_range: float, name: str | None = None
131
+ ) -> float:
132
+ """
133
+ Constrain a number between two values.
134
+
135
+ Args:
136
+ number (float): The number to constrain.
137
+ min_range (float): The minimum value.
138
+ max_range (float): The maximum value.
139
+ name (str): The name of the number (for logging).
140
+
141
+ Returns:
142
+ float: The constrained number.
143
+
144
+ Raises:
145
+ ValueError: If min_range is greater than max_range.
146
+ """
147
+ if min_range > max_range:
148
+ raise ValueError("min must be smaller than max")
149
+ result = max(min(number, max_range), min_range)
150
+ if result != number and name is not None:
151
+ _logger.warning(
152
+ f"Constraining {name} to range of [{min_range}, {max_range}]: {number} -> {result}"
153
+ )
154
+ return result
155
+
156
+ def get_stats(self) -> LoudnessStatisticsWithMetadata:
157
+ """
158
+ Return loudness statistics for the stream.
159
+
160
+ Returns:
161
+ dict: A dictionary containing the loudness statistics.
162
+ """
163
+ stats: LoudnessStatisticsWithMetadata = {
164
+ "input_file": self.media_file.input_file,
165
+ "output_file": self.media_file.output_file,
166
+ "stream_id": self.stream_id,
167
+ "ebu_pass1": self.loudness_statistics["ebu_pass1"],
168
+ "ebu_pass2": self.loudness_statistics["ebu_pass2"],
169
+ "mean": self.loudness_statistics["mean"],
170
+ "max": self.loudness_statistics["max"],
171
+ }
172
+ return stats
173
+
174
+ def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None:
175
+ """
176
+ Set the EBU loudness statistics for the second pass.
177
+
178
+ Args:
179
+ stats (dict): The EBU loudness statistics.
180
+ """
181
+ _logger.debug(
182
+ f"Setting second pass stats for stream {self.stream_id} from {stats}"
183
+ )
184
+ self.loudness_statistics["ebu_pass2"] = stats
185
+
186
+ def get_pcm_codec(self) -> str:
187
+ """
188
+ Get the PCM codec string for the stream.
189
+
190
+ Returns:
191
+ str: The PCM codec string.
192
+ """
193
+ if not self.bit_depth:
194
+ return "pcm_s16le"
195
+ elif self.bit_depth <= 8:
196
+ return "pcm_s8"
197
+ elif self.bit_depth in [16, 24, 32, 64]:
198
+ return f"pcm_s{self.bit_depth}le"
199
+ else:
200
+ _logger.warning(
201
+ f"Unsupported bit depth {self.bit_depth}, falling back to pcm_s16le"
202
+ )
203
+ return "pcm_s16le"
204
+
205
+ def _get_filter_str_with_pre_filter(self, current_filter: str) -> str:
206
+ """
207
+ Get a filter string for current_filter, with the pre-filter
208
+ added before. Applies the input label before.
209
+
210
+ Args:
211
+ current_filter (str): The current filter.
212
+
213
+ Returns:
214
+ str: The filter string.
215
+ """
216
+ input_label = f"[0:{self.stream_id}]"
217
+ filter_chain = []
218
+ if self.media_file.ffmpeg_normalize.pre_filter:
219
+ filter_chain.append(self.media_file.ffmpeg_normalize.pre_filter)
220
+ filter_chain.append(current_filter)
221
+ filter_str = input_label + ",".join(filter_chain)
222
+ return filter_str
223
+
224
+ def parse_astats(self) -> Iterator[float]:
225
+ """
226
+ Use ffmpeg with astats filter to get the mean (RMS) and max (peak) volume of the input file.
227
+
228
+ Yields:
229
+ float: The progress of the command.
230
+ """
231
+ _logger.info(f"Running first pass astats filter for stream {self.stream_id}")
232
+
233
+ filter_str = self._get_filter_str_with_pre_filter(
234
+ "astats=measure_overall=Peak_level+RMS_level:measure_perchannel=0"
235
+ )
236
+
237
+ cmd = [
238
+ self.media_file.ffmpeg_normalize.ffmpeg_exe,
239
+ "-hide_banner",
240
+ "-y",
241
+ "-i",
242
+ self.media_file.input_file,
243
+ "-filter_complex",
244
+ filter_str,
245
+ "-vn",
246
+ "-sn",
247
+ "-f",
248
+ "null",
249
+ os.devnull,
250
+ ]
251
+
252
+ cmd_runner = CommandRunner()
253
+ yield from cmd_runner.run_ffmpeg_command(cmd)
254
+ output = cmd_runner.get_output()
255
+
256
+ _logger.debug(
257
+ f"astats command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
258
+ )
259
+
260
+ mean_volume_matches = re.findall(r"RMS level dB: ([\-\d\.]+)", output)
261
+ if mean_volume_matches:
262
+ if mean_volume_matches[0] == "-":
263
+ self.loudness_statistics["mean"] = float("-inf")
264
+ else:
265
+ self.loudness_statistics["mean"] = float(mean_volume_matches[0])
266
+ else:
267
+ raise FFmpegNormalizeError(
268
+ f"Could not get mean volume for {self.media_file.input_file}"
269
+ )
270
+
271
+ max_volume_matches = re.findall(r"Peak level dB: ([\-\d\.]+)", output)
272
+ if max_volume_matches:
273
+ if max_volume_matches[0] == "-":
274
+ self.loudness_statistics["max"] = float("-inf")
275
+ else:
276
+ self.loudness_statistics["max"] = float(max_volume_matches[0])
277
+ else:
278
+ raise FFmpegNormalizeError(
279
+ f"Could not get max volume for {self.media_file.input_file}"
280
+ )
281
+
282
+ def parse_loudnorm_stats(self) -> Iterator[float]:
283
+ """
284
+ Run a first pass loudnorm filter to get measured data.
285
+
286
+ Yields:
287
+ float: The progress of the command.
288
+ """
289
+ _logger.info(f"Running first pass loudnorm filter for stream {self.stream_id}")
290
+
291
+ opts = {
292
+ "i": self.media_file.ffmpeg_normalize.target_level,
293
+ "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
294
+ "tp": self.media_file.ffmpeg_normalize.true_peak,
295
+ "offset": self.media_file.ffmpeg_normalize.offset,
296
+ "print_format": "json",
297
+ }
298
+
299
+ if self.media_file.ffmpeg_normalize.dual_mono:
300
+ opts["dual_mono"] = "true"
301
+
302
+ filter_str = self._get_filter_str_with_pre_filter(
303
+ "loudnorm=" + dict_to_filter_opts(opts)
304
+ )
305
+
306
+ cmd = [
307
+ self.media_file.ffmpeg_normalize.ffmpeg_exe,
308
+ "-hide_banner",
309
+ "-y",
310
+ "-i",
311
+ self.media_file.input_file,
312
+ "-map",
313
+ f"0:{self.stream_id}",
314
+ "-filter_complex",
315
+ filter_str,
316
+ "-vn",
317
+ "-sn",
318
+ "-f",
319
+ "null",
320
+ os.devnull,
321
+ ]
322
+
323
+ cmd_runner = CommandRunner()
324
+ yield from cmd_runner.run_ffmpeg_command(cmd)
325
+ output = cmd_runner.get_output()
326
+
327
+ _logger.debug(
328
+ f"Loudnorm first pass command output: {CommandRunner.prune_ffmpeg_progress_from_output(output)}"
329
+ )
330
+
331
+ # only one stream
332
+ self.loudness_statistics["ebu_pass1"] = next(
333
+ iter(AudioStream.prune_and_parse_loudnorm_output(output).values())
334
+ )
335
+
336
+ @staticmethod
337
+ def prune_and_parse_loudnorm_output(
338
+ output: str,
339
+ ) -> dict[int, EbuLoudnessStatistics]:
340
+ """
341
+ Prune ffmpeg progress lines from output and parse the loudnorm filter output.
342
+ There may be multiple outputs if multiple streams were processed.
343
+
344
+ Args:
345
+ output (str): The output from ffmpeg.
346
+
347
+ Returns:
348
+ dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.
349
+ """
350
+ _logger.debug("Parsing loudnorm stats from output")
351
+ pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
352
+ output_lines = [line.strip() for line in pruned_output.split("\n")]
353
+ return AudioStream._parse_loudnorm_output(output_lines)
354
+
355
+ @staticmethod
356
+ def _parse_loudnorm_output(
357
+ output_lines: list[str],
358
+ ) -> dict[int, EbuLoudnessStatistics]:
359
+ """
360
+ Parse the output of a loudnorm filter to get the EBU loudness statistics.
361
+
362
+ Args:
363
+ output_lines (list[str]): The output lines of the loudnorm filter.
364
+
365
+ Raises:
366
+ FFmpegNormalizeError: When the output could not be parsed.
367
+
368
+ Returns:
369
+ dict[int, EbuLoudnessStatistics]: stream index and the EBU loudness statistics, if found.
370
+ """
371
+ result = dict[int, EbuLoudnessStatistics]()
372
+ stream_index = -1
373
+ loudnorm_start = 0
374
+ for index, line in enumerate(output_lines):
375
+ if stream_index < 0:
376
+ if m := _loudnorm_pattern.match(line):
377
+ loudnorm_start = index + 1
378
+ stream_index = int(m.group(1))
379
+ else:
380
+ if line.startswith("}"):
381
+ loudnorm_end = index + 1
382
+ loudnorm_data = "\n".join(output_lines[loudnorm_start:loudnorm_end])
383
+
384
+ try:
385
+ loudnorm_stats = json.loads(loudnorm_data)
386
+
387
+ _logger.debug(
388
+ f"Loudnorm stats for stream {stream_index} parsed: {loudnorm_data}"
389
+ )
390
+
391
+ for key in [
392
+ "input_i",
393
+ "input_tp",
394
+ "input_lra",
395
+ "input_thresh",
396
+ "output_i",
397
+ "output_tp",
398
+ "output_lra",
399
+ "output_thresh",
400
+ "target_offset",
401
+ "normalization_type",
402
+ ]:
403
+ if key not in loudnorm_stats:
404
+ continue
405
+ if key == "normalization_type":
406
+ loudnorm_stats[key] = loudnorm_stats[key].lower()
407
+ # handle infinite values
408
+ elif float(loudnorm_stats[key]) == -float("inf"):
409
+ loudnorm_stats[key] = -99
410
+ elif float(loudnorm_stats[key]) == float("inf"):
411
+ loudnorm_stats[key] = 0
412
+ else:
413
+ # convert to floats
414
+ loudnorm_stats[key] = float(loudnorm_stats[key])
415
+
416
+ result[stream_index] = cast(
417
+ EbuLoudnessStatistics, loudnorm_stats
418
+ )
419
+ stream_index = -1
420
+ except Exception as e:
421
+ raise FFmpegNormalizeError(
422
+ f"Could not parse loudnorm stats; wrong JSON format in string: {e}"
423
+ )
424
+ return result
425
+
426
+ def get_second_pass_opts_ebu(self) -> str:
427
+ """
428
+ Return second pass loudnorm filter options string for ffmpeg
429
+ """
430
+
431
+ # In dynamic mode, we can do everything in one pass, and we do not have first pass stats
432
+ if self.media_file.ffmpeg_normalize.dynamic:
433
+ if not self.ffmpeg_normalize.sample_rate:
434
+ _logger.warning(
435
+ "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
436
+ "Specify -ar/--sample-rate to override it."
437
+ )
438
+
439
+ opts = {
440
+ "i": self.media_file.ffmpeg_normalize.target_level,
441
+ "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
442
+ "tp": self.media_file.ffmpeg_normalize.true_peak,
443
+ "offset": self.media_file.ffmpeg_normalize.offset,
444
+ "linear": "false",
445
+ "print_format": "json",
446
+ }
447
+
448
+ if self.media_file.ffmpeg_normalize.dual_mono:
449
+ opts["dual_mono"] = "true"
450
+
451
+ return "loudnorm=" + dict_to_filter_opts(opts)
452
+
453
+ if not self.loudness_statistics["ebu_pass1"]:
454
+ raise FFmpegNormalizeError(
455
+ "First pass not run, you must call parse_loudnorm_stats first"
456
+ )
457
+
458
+ if float(self.loudness_statistics["ebu_pass1"]["input_i"]) > 0:
459
+ _logger.warning(
460
+ "Input file had measured input loudness greater than zero "
461
+ f"({self.loudness_statistics['ebu_pass1']['input_i']}), capping at 0"
462
+ )
463
+ self.loudness_statistics["ebu_pass1"]["input_i"] = 0
464
+
465
+ will_use_dynamic_mode = self.media_file.ffmpeg_normalize.dynamic
466
+
467
+ if self.media_file.ffmpeg_normalize.keep_loudness_range_target:
468
+ _logger.debug(
469
+ "Keeping target loudness range in second pass loudnorm filter"
470
+ )
471
+ input_lra = self.loudness_statistics["ebu_pass1"]["input_lra"]
472
+ if input_lra < 1 or input_lra > 50:
473
+ _logger.warning(
474
+ "Input file had measured loudness range outside of [1,50] "
475
+ f"({input_lra}), capping to allowed range"
476
+ )
477
+
478
+ self.media_file.ffmpeg_normalize.loudness_range_target = self._constrain(
479
+ self.loudness_statistics["ebu_pass1"]["input_lra"], 1, 50
480
+ )
481
+
482
+ if self.media_file.ffmpeg_normalize.keep_lra_above_loudness_range_target:
483
+ if (
484
+ self.loudness_statistics["ebu_pass1"]["input_lra"]
485
+ <= self.media_file.ffmpeg_normalize.loudness_range_target
486
+ ):
487
+ _logger.debug(
488
+ "Setting loudness range target in second pass loudnorm filter"
489
+ )
490
+ else:
491
+ self.media_file.ffmpeg_normalize.loudness_range_target = (
492
+ self.loudness_statistics["ebu_pass1"]["input_lra"]
493
+ )
494
+ _logger.debug(
495
+ "Keeping target loudness range in second pass loudnorm filter"
496
+ )
497
+
498
+ if (
499
+ self.media_file.ffmpeg_normalize.loudness_range_target
500
+ < self.loudness_statistics["ebu_pass1"]["input_lra"]
501
+ and not will_use_dynamic_mode
502
+ ):
503
+ _logger.warning(
504
+ f"Input file had loudness range of {self.loudness_statistics['ebu_pass1']['input_lra']}. "
505
+ f"This is larger than the loudness range target ({self.media_file.ffmpeg_normalize.loudness_range_target}). "
506
+ "Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization. "
507
+ "Alternatively, use the --keep-loudness-range-target or --keep-lra-above-loudness-range-target option to keep the target loudness range from "
508
+ "the input."
509
+ )
510
+ will_use_dynamic_mode = True
511
+
512
+ if will_use_dynamic_mode and not self.ffmpeg_normalize.sample_rate:
513
+ _logger.warning(
514
+ "In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. "
515
+ "Specify -ar/--sample-rate to override it."
516
+ )
517
+
518
+ target_level = self.ffmpeg_normalize.target_level
519
+ if self.ffmpeg_normalize.auto_lower_loudness_target:
520
+ safe_target = (
521
+ self.loudness_statistics["ebu_pass1"]["input_i"]
522
+ - self.loudness_statistics["ebu_pass1"]["input_tp"]
523
+ + self.ffmpeg_normalize.true_peak
524
+ - 0.1
525
+ )
526
+ if safe_target < self.ffmpeg_normalize.target_level:
527
+ target_level = safe_target
528
+ _logger.warning(
529
+ f"Using loudness target {target_level} because --auto-lower-loudness-target given.",
530
+ )
531
+
532
+ stats = self.loudness_statistics["ebu_pass1"]
533
+
534
+ opts = {
535
+ "i": target_level,
536
+ "lra": self.media_file.ffmpeg_normalize.loudness_range_target,
537
+ "tp": self.media_file.ffmpeg_normalize.true_peak,
538
+ "offset": self._constrain(
539
+ stats["target_offset"], -99, 99, name="target_offset"
540
+ ),
541
+ "measured_i": self._constrain(stats["input_i"], -99, 0, name="input_i"),
542
+ "measured_lra": self._constrain(
543
+ stats["input_lra"], 0, 99, name="input_lra"
544
+ ),
545
+ "measured_tp": self._constrain(stats["input_tp"], -99, 99, name="input_tp"),
546
+ "measured_thresh": self._constrain(
547
+ stats["input_thresh"], -99, 0, name="input_thresh"
548
+ ),
549
+ "linear": "false" if self.media_file.ffmpeg_normalize.dynamic else "true",
550
+ "print_format": "json",
551
+ }
552
+
553
+ if self.media_file.ffmpeg_normalize.dual_mono:
554
+ opts["dual_mono"] = "true"
555
+
556
+ return "loudnorm=" + dict_to_filter_opts(opts)
557
+
558
+ def get_second_pass_opts_peakrms(self) -> str:
559
+ """
560
+ Set the adjustment gain based on chosen option and mean/max volume,
561
+ return the matching ffmpeg volume filter.
562
+
563
+ Returns:
564
+ str: ffmpeg volume filter string
565
+ """
566
+ if (
567
+ self.loudness_statistics["max"] is None
568
+ or self.loudness_statistics["mean"] is None
569
+ ):
570
+ raise FFmpegNormalizeError(
571
+ "First pass not run, no mean/max volume to normalize to"
572
+ )
573
+
574
+ normalization_type = self.media_file.ffmpeg_normalize.normalization_type
575
+ target_level = self.media_file.ffmpeg_normalize.target_level
576
+
577
+ if normalization_type == "peak":
578
+ adjustment = 0 + target_level - self.loudness_statistics["max"]
579
+ elif normalization_type == "rms":
580
+ adjustment = target_level - self.loudness_statistics["mean"]
581
+ else:
582
+ raise FFmpegNormalizeError(
583
+ "Can only set adjustment for peak and RMS normalization"
584
+ )
585
+
586
+ _logger.info(
587
+ f"Adjusting stream {self.stream_id} by {adjustment} dB to reach {target_level}"
588
+ )
589
+
590
+ clip_amount = self.loudness_statistics["max"] + adjustment
591
+ if clip_amount > 0:
592
+ _logger.warning(f"Adjusting will lead to clipping of {clip_amount} dB")
593
+
594
+ return f"volume={adjustment}dB"
@@ -0,0 +1 @@
1
+ __version__ = "1.32.5"
File without changes