clipwright-render 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1595 @@
1
+ """plan.py — pure logic layer for clipwright-render.
2
+
3
+ Does not execute ffmpeg/ffprobe. Probe results are received as ProbeInfo arguments
4
+ (DC-AM-007). Responsible for three concerns: timeline analysis, filter_complex
5
+ construction, and dry-run size estimation.
6
+
7
+ Design decisions:
8
+ - Single re-encode (ADR-1): filter_complex uses trim+concat for frame-accurate
9
+ time control with less degradation than repeated re-encodes.
10
+ - concat=n=1 unconditionally (DC-AS-005): simplifies implementation; no branch for
11
+ a single segment. ffmpeg handles n=1 correctly.
12
+ - First audio stream only (ADR-7): mapping multiple audio streams adds significant
13
+ complexity; only the first stream is handled in this iteration.
14
+ - afftdn denoise injection (§B-2):
15
+ filter_parts order is fixed as trim/atrim → concat → afftdn → scale.
16
+ afftdn (audio chain) and scale (video chain) use independent labels without
17
+ conflict. When has_audio=False, afftdn is not inserted and a warning is
18
+ appended.
19
+ - loudness injection (ADR-L5/L5b/L6):
20
+ loudness filter is chained after denoise (acoustically correct order).
21
+ The audio map terminal label is resolved via a cumulative-pipe helper
22
+ (DC-AM-001): [outa] → (denoise present → [outa_dn]) → (track loudness
23
+ present → [outa_ln]). No loudness directive is fully backward compatible
24
+ (ADR-L6).
25
+ - Multi-source support (ADR-C1–C12, §7 v2):
26
+ Routing branches on unique source count; single-source backward compatibility
27
+ is strictly preserved (ADR-C3). unique_sources_in_order is the single source
28
+ of truth for input index assignment (ADR-C9-r2).
29
+ - Resolution pair constraint (DC-AM-004): width/height with only one specified is
30
+ rejected by RenderOptions model_validator (schemas.py) as ValidationError.
31
+ _build_multi_source_filter_complex assumes either both specified or both None.
32
+ - BGM mixing (ADR-B4-r2/B5-r2/B5-r3/B6-r2/B9-r3):
33
+ resolve_bgm detects kind=="bgm" clips from all Audio tracks (ADR-B4-r2).
34
+ When build_plan receives a non-None bgm argument, _append_bgm_pipe appends
35
+ the BGM stage. has_main_audio (presence of main audio) and has_audio_output
36
+ (final output audio presence) are separated (ADR-B5-r2).
37
+ -stream_loop -1 is added by render.py; plan uses atrim=0:{main_dur} for
38
+ duration (ADR-B6-r2). BGM index = len(input_sources) (bgm_source is not
39
+ included in input_sources; DC-AS-005).
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import os
45
+ from collections.abc import Mapping
46
+ from dataclasses import dataclass, field
47
+ from typing import Annotated, Any, Literal
48
+
49
+ import opentimelineio as otio
50
+ from clipwright.errors import ClipwrightError, ErrorCode
51
+ from pydantic import BaseModel, Field, ValidationError, model_validator
52
+
53
+ from clipwright_render.schemas import RenderOptions, SubtitleOptions
54
+
55
+ # ===========================================================================
56
+ # Denoise schema (no dependency on clipwright-noise; defined inline for render)
57
+ # ===========================================================================
58
+
59
+
60
+ class AfftdnParams(BaseModel):
61
+ """Parameter validation model for the afftdn filter (DC-AS-006).
62
+
63
+ nr: noise reduction amount (dB). Range: 0.01–97.
64
+ nf: noise floor (dB). Range: -80 to -20.
65
+ nt: noise type. "w" = white noise, "v" = vinyl noise.
66
+ """
67
+
68
+ nr: Annotated[float, Field(ge=0.01, le=97)]
69
+ nf: Annotated[float, Field(ge=-80, le=-20)]
70
+ nt: Literal["w", "v"] = "w"
71
+
72
+
73
+ # SR M-1: allowed value set for afftdn nt (module-level constant).
74
+ # Defence-in-depth alongside the Literal["w","v"] type constraint,
75
+ # referenced from _append_audio_pipe.
76
+ _VALID_NT_VALUES: frozenset[str] = frozenset({"w", "v"})
77
+
78
+
79
+ class DenoiseDirective(BaseModel):
80
+ """Validation model for timeline metadata["clipwright"]["denoise"]
81
+ (DC-AS-006/ADR-N9).
82
+
83
+ Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
84
+ on failure. When backend=="afftdn", params are re-validated with AfftdnParams
85
+ (done in render.py). When backend=="deepfilternet", params must be {}.
86
+
87
+ SR L-1: max_length constraint on tool/version (guards against oversized string
88
+ injection). SR L-3: measured_noise_floor_db accepts only finite values in -200–0
89
+ dB (no inf/nan).
90
+ """
91
+
92
+ # NR-M-1: align max_length with noise-side schemas.py (writer); reader must
93
+ # not be stricter than writer or it will reject valid values. Unified at 64
94
+ # for tool/version.
95
+ tool: Annotated[str, Field(max_length=64)]
96
+ version: Annotated[str, Field(max_length=64)]
97
+ kind: Literal["denoise"]
98
+ backend: Literal["afftdn", "deepfilternet"]
99
+ params: dict[str, Any]
100
+ measured_noise_floor_db: (
101
+ Annotated[float, Field(ge=-200.0, le=0.0, allow_inf_nan=False)] | None
102
+ ) = None
103
+
104
+
105
+ # ===========================================================================
106
+ # Loudness schema (no dependency on clipwright-loudness; defined inline for render)
107
+ # NR-M-1: align max_length with loudness-side schemas.py (writer); unified at 64.
108
+ # ===========================================================================
109
+
110
+
111
+ class LoudnormTarget(BaseModel):
112
+ """Target validation model for loudnorm mode (ADR-L1).
113
+
114
+ i: integrated loudness target LUFS (-70 to -5).
115
+ tp: true peak target dBTP (-9 to 0).
116
+ lra: loudness range target LU (1 to 50).
117
+ """
118
+
119
+ i: Annotated[float, Field(ge=-70.0, le=-5.0)]
120
+ tp: Annotated[float, Field(ge=-9.0, le=0.0)]
121
+ lra: Annotated[float, Field(ge=1.0, le=50.0)]
122
+
123
+
124
+ class PeakTarget(BaseModel):
125
+ """Target validation model for peak mode (ADR-L2).
126
+
127
+ peak_db: peak target dB (-60 to 0).
128
+ """
129
+
130
+ peak_db: Annotated[float, Field(ge=-60.0, le=0.0)]
131
+
132
+
133
+ class LoudnormMeasured(BaseModel):
134
+ """Measured-value validation model for loudnorm mode (ADR-L1 linear two-pass).
135
+
136
+ All values must be finite (no inf/nan; CWE-20).
137
+ """
138
+
139
+ input_i: Annotated[float, Field(allow_inf_nan=False)]
140
+ input_tp: Annotated[float, Field(allow_inf_nan=False)]
141
+ input_lra: Annotated[float, Field(allow_inf_nan=False)]
142
+ input_thresh: Annotated[float, Field(allow_inf_nan=False)]
143
+ target_offset: Annotated[float, Field(allow_inf_nan=False)]
144
+
145
+
146
+ class PeakMeasured(BaseModel):
147
+ """Measured-value validation model for peak mode (ADR-L2).
148
+
149
+ max_volume_db: measured peak value dB (-200 to 0). Finite values only.
150
+ """
151
+
152
+ max_volume_db: Annotated[float, Field(ge=-200.0, le=0.0, allow_inf_nan=False)]
153
+
154
+
155
+ class LoudnessDirective(BaseModel):
156
+ """Validation model for timeline metadata["clipwright"]["loudness"]
157
+ (ADR-L4/ADR-L6).
158
+
159
+ Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
160
+ on failure. Only scope="track" is supported (per_clip deferred until after
161
+ concatenation; DC-AS-003). When mode="loudnorm", measured is required (needed
162
+ for linear application). measured=None is INVALID_INPUT.
163
+
164
+ NR-M-1: tool/version max_length=64 (maintains reader/writer compatibility).
165
+
166
+ Difference from writer side (clipwright-loudness/schemas.py) — CR-M-001
167
+ reader-strict:
168
+ - schemas.py LoudnessDirective allows measured=None (U-1: design does not
169
+ write loudness directive to OTIO when measurement fails).
170
+ - This reader side treats loudnorm+measured=None as INVALID_INPUT
171
+ (measured_* values are required for linear two-pass; a directive written
172
+ to OTIO with measured=None is itself an invalid state; reader-strict).
173
+ """
174
+
175
+ tool: Annotated[str, Field(max_length=64)]
176
+ version: Annotated[str, Field(max_length=64)]
177
+ kind: Literal["loudness"]
178
+ mode: Literal["loudnorm", "peak"]
179
+ scope: Literal["track"]
180
+ target: LoudnormTarget | PeakTarget
181
+ # None is kept in the type for compatibility with the writer side
182
+ # (schemas.py). The writer allows measured=None for peak, so the reader
183
+ # must be able to receive it. The invalid loudnorm + measured=None case is
184
+ # rejected reader-strict by the model_validator below (runtime enforcement;
185
+ # see docstring CR-M-001).
186
+ measured: LoudnormMeasured | PeakMeasured | None = None
187
+
188
+ @model_validator(mode="after")
189
+ def _validate_measured_required_for_loudnorm(self) -> LoudnessDirective:
190
+ """measured is required for loudnorm mode (needed for linear
191
+ application)."""
192
+ if self.mode == "loudnorm" and self.measured is None:
193
+ raise ValueError(
194
+ "measured is required for loudnorm mode (needed for linear"
195
+ " application)."
196
+ )
197
+ return self
198
+
199
+ @model_validator(mode="after")
200
+ def _validate_target_matches_mode(self) -> LoudnessDirective:
201
+ """Validate that mode and target type are consistent.""" # noqa: E501
202
+ if self.mode == "loudnorm" and not isinstance(self.target, LoudnormTarget):
203
+ raise ValueError(
204
+ "loudnorm mode requires a LoudnormTarget (i/tp/lra) for target."
205
+ )
206
+ if self.mode == "peak" and not isinstance(self.target, PeakTarget):
207
+ raise ValueError("peak mode requires a PeakTarget (peak_db) for target.")
208
+ return self
209
+
210
+
211
+ # ===========================================================================
212
+ # BGM schema (no dependency on clipwright-bgm; defined inline for render)
213
+ # ADR-B9-r2: reader-strict, unknown keys forbidden, allow_inf_nan=False
214
+ # NR-M-1: tool/version max_length=64 (consistent with clipwright-bgm writer)
215
+ # ===========================================================================
216
+
217
+
218
+ class DuckingDirective(BaseModel):
219
+ """Validation model for BGM ducking settings (ADR-B5-r3/DC-AS-006).
220
+
221
+ enabled: when True, injects sidechaincompress to duck BGM under main audio.
222
+ threshold: sidechaincompress threshold parameter. ffmpeg accepted range:
223
+ 0.000976563–1.0.
224
+ ratio: sidechaincompress ratio parameter. ffmpeg accepted range: 1.0–20.0.
225
+ SR M-1: allow_inf_nan=False rejects inf/nan originating from OTIO.
226
+ """
227
+
228
+ model_config = {"extra": "forbid", "allow_inf_nan": False}
229
+
230
+ enabled: bool = False
231
+ threshold: Annotated[float, Field(gt=0.0, le=1.0)]
232
+ ratio: Annotated[float, Field(ge=1.0, le=20.0)]
233
+
234
+
235
+ class BgmDirective(BaseModel):
236
+ """Validation model for BGM clip metadata["clipwright"] (ADR-B9-r2/B9-r3).
237
+
238
+ Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
239
+ on failure. Reader-strict (unknown keys forbidden), allow_inf_nan=False.
240
+ fade_in_sec / fade_out_sec default to 0.0 (no fade; ADR-B9-r3).
241
+ afade is only injected when the value is > 0.
242
+ SR I-1: volume_db has ge=-60.0/le=20.0 constraint (consistent with writer
243
+ BgmOptions).
244
+ """
245
+
246
+ model_config = {"extra": "forbid", "allow_inf_nan": False}
247
+
248
+ tool: Annotated[str, Field(max_length=64)]
249
+ version: Annotated[str, Field(max_length=64)]
250
+ kind: Literal["bgm"]
251
+ volume_db: Annotated[float, Field(ge=-60.0, le=20.0, allow_inf_nan=False)]
252
+ fade_in_sec: Annotated[float, Field(ge=0)] = 0.0
253
+ fade_out_sec: Annotated[float, Field(ge=0)] = 0.0
254
+ ducking: DuckingDirective
255
+
256
+
257
+ # ===========================================================================
258
+ # Data types
259
+ # ===========================================================================
260
+
261
+
262
+ @dataclass
263
+ class KeptRange:
264
+ """Value object representing a kept segment on the timeline.
265
+
266
+ source: target_url of the media file (source path).
267
+ source_range: OTIO TimeRange (held as opentime; seconds conversion is deferred).
268
+ """
269
+
270
+ source: str
271
+ source_range: otio.opentime.TimeRange
272
+
273
+
274
+ @dataclass(frozen=True)
275
+ class BgmClip:
276
+ """Value object representing BGM clip information (ADR-B4-r2).
277
+
278
+ source: target_url of the BGM media file (source path).
279
+ source_range: full duration of the BGM media (OTIO TimeRange).
280
+ directive: BGM directive validated by BgmDirective.
281
+ """
282
+
283
+ source: str
284
+ source_range: otio.opentime.TimeRange
285
+ directive: BgmDirective
286
+
287
+
288
+ @dataclass
289
+ class ProbeInfo:
290
+ """Value object representing ffprobe probe results (DC-AM-007).
291
+
292
+ plan.py receives this type as an argument and never calls subprocess directly.
293
+ bit_rate: when None, estimated_size_bytes cannot be computed (ADR-3).
294
+ width/height/fps: used for output spec normalisation in multi-source paths
295
+ (ADR-C2; optional for backward compatibility).
296
+ """
297
+
298
+ has_video: bool
299
+ audio_count: int
300
+ bit_rate: int | None = None
301
+ width: int | None = None
302
+ height: int | None = None
303
+ fps: float | None = None
304
+
305
+
306
+ @dataclass
307
+ class RenderPlan:
308
+ """Execution plan returned by build_plan.
309
+
310
+ filter_complex: single string for the ffmpeg -filter_complex argument
311
+ (prevents injection).
312
+ ffmpeg_args: argument list passed to ffmpeg (excluding -filter_complex).
313
+ All elements are str (M-1).
314
+ segment_count: number of kept segments.
315
+ total_duration_seconds: total output duration (seconds).
316
+ estimated_size_bytes: estimated file size (bytes). None when bit_rate is None.
317
+ warnings: notes about the dry-run estimate.
318
+ input_sources: ordered, deduplicated list of input sources. Single source
319
+ of truth for ADR-C9-r2.
320
+ bgm_source: BGM source path. None when there is no BGM (ADR-B5/B7).
321
+ """
322
+
323
+ filter_complex: str
324
+ ffmpeg_args: list[str]
325
+ segment_count: int
326
+ total_duration_seconds: float
327
+ estimated_size_bytes: float | None = None
328
+ warnings: list[str] = field(default_factory=list)
329
+ input_sources: list[str] = field(default_factory=list)
330
+ bgm_source: str | None = None
331
+
332
+
333
+ # ===========================================================================
334
+ # Utility functions
335
+ # ===========================================================================
336
+
337
+
338
+ def unique_sources_in_order(ranges: list[KeptRange]) -> list[str]:
339
+ """Return source URLs from a KeptRange list in order of first appearance,
340
+ deduplicated (ADR-C9-r2).
341
+
342
+ Serves as the single source of truth for input index assignment and
343
+ input_sources. When the same source appears in multiple clips, its position
344
+ is determined by its first occurrence.
345
+ """
346
+ seen: set[str] = set()
347
+ result: list[str] = []
348
+ for r in ranges:
349
+ if r.source not in seen:
350
+ seen.add(r.source)
351
+ result.append(r.source)
352
+ return result
353
+
354
+
355
+ # ===========================================================================
356
+ # resolve_kept_ranges
357
+ # ===========================================================================
358
+
359
+
360
+ def resolve_kept_ranges(timeline: otio.schema.Timeline) -> list[KeptRange]:
361
+ """Scan the first video track's Clips and return the list of kept segments
362
+ (ADR-5/DC-AS-006).
363
+
364
+ - Gaps are skipped (they represent removed regions).
365
+ - Raises UNSUPPORTED_OPERATION if Transitions are present.
366
+ - Raises UNSUPPORTED_OPERATION if two or more video tracks are present.
367
+ - Multiple sources are allowed (ADR-C3; old single-source-only behaviour
368
+ removed per DC-AS-005). Each Clip retains its own source in the KeptRange.
369
+ - Raises INVALID_INPUT if there are zero Clips.
370
+
371
+ Returns:
372
+ List of KeptRange (source and source_range held as opentime).
373
+ """
374
+ # Retrieve the first video track (multiple video tracks are not supported)
375
+ video_tracks = [t for t in timeline.tracks if t.kind == otio.schema.TrackKind.Video]
376
+ if len(video_tracks) >= 2:
377
+ raise ClipwrightError(
378
+ code=ErrorCode.UNSUPPORTED_OPERATION,
379
+ message="The timeline contains two or more video tracks.",
380
+ hint=("Use an OTIO timeline with only a single video track."),
381
+ )
382
+
383
+ if len(video_tracks) == 0:
384
+ raise ClipwrightError(
385
+ code=ErrorCode.UNSUPPORTED_OPERATION,
386
+ message="No video track found.",
387
+ hint="Use an OTIO timeline that contains a video track.",
388
+ )
389
+
390
+ video_track = video_tracks[0]
391
+
392
+ ranges: list[KeptRange] = []
393
+
394
+ for item in video_track:
395
+ if isinstance(item, otio.schema.Gap):
396
+ # Gaps represent removed regions; skip them
397
+ continue
398
+ if isinstance(item, otio.schema.Transition):
399
+ raise ClipwrightError(
400
+ code=ErrorCode.UNSUPPORTED_OPERATION,
401
+ message="The timeline contains a Transition.",
402
+ hint="Use an OTIO timeline that does not contain Transitions.",
403
+ )
404
+ if isinstance(item, otio.schema.Clip):
405
+ mr = item.media_reference
406
+ if isinstance(mr, otio.schema.MissingReference):
407
+ # MissingReference indicates invalid timeline data (missing
408
+ # reference). Treated as INVALID_INPUT (invalid data) rather than
409
+ # UNSUPPORTED_OPERATION (unsupported configuration).
410
+ raise ClipwrightError(
411
+ code=ErrorCode.INVALID_INPUT,
412
+ message="Media reference is missing (MissingReference).",
413
+ hint="Use an ExternalReference with a target_url.",
414
+ )
415
+ if not isinstance(mr, otio.schema.ExternalReference):
416
+ # Unsupported configuration (e.g. GeneratorReference) →
417
+ # UNSUPPORTED_OPERATION.
418
+ raise ClipwrightError(
419
+ code=ErrorCode.UNSUPPORTED_OPERATION,
420
+ message=(
421
+ "Media references other than ExternalReference are not"
422
+ " supported."
423
+ ),
424
+ hint="Use an ExternalReference with a target_url.",
425
+ )
426
+ source = mr.target_url
427
+ source_range = item.source_range
428
+ ranges.append(KeptRange(source=source, source_range=source_range))
429
+
430
+ if len(ranges) == 0:
431
+ raise ClipwrightError(
432
+ code=ErrorCode.INVALID_INPUT,
433
+ message="No kept segments found (no Clips).",
434
+ hint="Use an OTIO timeline that contains at least one Clip.",
435
+ )
436
+
437
+ return ranges
438
+
439
+
440
+ # ===========================================================================
441
+ # resolve_bgm
442
+ # ===========================================================================
443
+
444
+
445
+ def resolve_bgm(timeline: otio.schema.Timeline) -> BgmClip | None:
446
+ """Scan all Audio tracks and return a BgmClip when a kind=="bgm" clip is
447
+ detected.
448
+
449
+ Conforms to ADR-B4-r2.
450
+
451
+ Detection is based on the count of kind=="bgm" clips, not the number of Audio
452
+ tracks (DC-AS-002). A single BGM clip is detected correctly even when a main
453
+ audio track (kind!="bgm") is also present.
454
+
455
+ Returns:
456
+ BgmClip when exactly one BGM clip exists. None when there are zero
457
+ (backward compatible).
458
+
459
+ Raises:
460
+ ClipwrightError(UNSUPPORTED_OPERATION): when two or more BGM clips are
461
+ found (only a single BGM is supported).
462
+ ClipwrightError(INVALID_INPUT): when BGM clip metadata validation fails.
463
+ """
464
+ bgm_clips: list[tuple[str, otio.opentime.TimeRange, Mapping[str, Any]]] = []
465
+
466
+ # Scan all Audio tracks and collect kind=="bgm" clips
467
+ for track in timeline.tracks:
468
+ if track.kind != otio.schema.TrackKind.Audio:
469
+ continue
470
+ for item in track:
471
+ if not isinstance(item, otio.schema.Clip):
472
+ continue
473
+ cw_meta = item.metadata.get("clipwright")
474
+ # OTIO metadata values are AnyDictionary (not a dict subclass);
475
+ # use the Mapping protocol for type checking (DC-AS-002).
476
+ if not isinstance(cw_meta, Mapping):
477
+ continue
478
+ if cw_meta.get("kind") != "bgm":
479
+ continue
480
+ mr = item.media_reference
481
+ if not isinstance(mr, otio.schema.ExternalReference):
482
+ continue
483
+ source_range = item.source_range
484
+ bgm_clips.append((mr.target_url, source_range, cw_meta))
485
+
486
+ if len(bgm_clips) == 0:
487
+ return None
488
+
489
+ if len(bgm_clips) >= 2:
490
+ raise ClipwrightError(
491
+ code=ErrorCode.UNSUPPORTED_OPERATION,
492
+ message=(
493
+ "The timeline contains two or more BGM clips (only a single BGM is"
494
+ " supported)."
495
+ ),
496
+ hint=(
497
+ "Reduce the number of BGM clips in the timeline to one."
498
+ " Mixing multiple BGM tracks is not currently supported."
499
+ ),
500
+ )
501
+
502
+ # Exactly one clip: validate BgmDirective and return a BgmClip
503
+ source, source_range, raw_meta = bgm_clips[0]
504
+ try:
505
+ directive = BgmDirective(**raw_meta)
506
+ except (ValidationError, TypeError, ValueError):
507
+ # ValueError is included because future model_validator raise ValueError
508
+ # calls must also be caught (follows the same catch list as
509
+ # _validate_loudness_directive).
510
+ raise ClipwrightError(
511
+ code=ErrorCode.INVALID_INPUT,
512
+ message=(
513
+ "BGM clip metadata validation failed. Check field names, types,"
514
+ " and values."
515
+ ),
516
+ hint=(
517
+ "Verify that metadata['clipwright'] of the BGM clip has"
518
+ " kind='bgm', volume_db, fade_in_sec, fade_out_sec, and ducking"
519
+ " set correctly."
520
+ ),
521
+ ) from None
522
+
523
+ return BgmClip(source=source, source_range=source_range, directive=directive)
524
+
525
+
526
+ # ===========================================================================
527
+ # build_plan
528
+ # ===========================================================================
529
+
530
+
531
+ def _escape_filtergraph(path: str) -> str:
532
+ """Escape a path for use in filtergraph filename= / fontsdir= options.
533
+
534
+ Verified escape rules (M2 2026-06-11 / DC-AS-005):
535
+ 1. Backslash (\\) → \\\\
536
+ 2. Colon (:) → \\:
537
+ Applying in this order ensures Windows absolute paths (C:\\...) reach ffmpeg
538
+ without depending on the current working directory.
539
+
540
+ Example: C:\\Users\\sub.srt → C\\:\\\\Users\\\\sub.srt
541
+ """ # noqa: E501
542
+ return path.replace("\\", "\\\\").replace(":", "\\:")
543
+
544
+
545
+ def _rgb_to_ass_colour(hex_color: str) -> str:
546
+ """Convert a #RRGGBB colour string to ASS PrimaryColour (&H00BBGGRR).
547
+
548
+ Verified in practice (M2 2026-06-11 / DC-AM-002):
549
+ - 8-digit &H00BBGGRR (AA=00 = fully opaque) ensures opaque rendering.
550
+ - Example: #FF0000 (red: R=FF, G=00, B=00) → &H000000FF (BGR order).
551
+
552
+ Args:
553
+ hex_color: colour string in '#RRGGBB' format.
554
+
555
+ Returns:
556
+ ASS PrimaryColour string in '&H00BBGGRR' format (uppercase).
557
+ """
558
+ # Strip leading # and extract R/G/B
559
+ hex_str = hex_color.lstrip("#")
560
+ r = int(hex_str[0:2], 16)
561
+ g = int(hex_str[2:4], 16)
562
+ b = int(hex_str[4:6], 16)
563
+ # ASS uses BGR order; AA=00 (fully opaque), 8 digits
564
+ return f"&H00{b:02X}{g:02X}{r:02X}"
565
+
566
+
567
+ def _build_force_style(subtitle: SubtitleOptions, is_ass: bool) -> str | None:
568
+ """Build the force_style string for the filtergraph from SubtitleOptions.
569
+
570
+ Returns None for ASS input (force_style not applied; ADR-S6-r2 / DC-AS-002).
571
+ Returns None when all style fields are None (omit force_style= entirely).
572
+
573
+ Returns:
574
+ String in 'FontName=...,FontSize=...' format, or None when not needed.
575
+ """
576
+ if is_ass:
577
+ # ASS has embedded styles; do not apply force_style (DC-AS-002)
578
+ return None
579
+
580
+ parts: list[str] = []
581
+ if subtitle.font_name is not None:
582
+ parts.append(f"FontName={subtitle.font_name}")
583
+ if subtitle.font_size is not None:
584
+ parts.append(f"FontSize={subtitle.font_size}")
585
+ if subtitle.font_color is not None:
586
+ ass_colour = _rgb_to_ass_colour(subtitle.font_color)
587
+ parts.append(f"PrimaryColour={ass_colour}")
588
+ if subtitle.outline is not None:
589
+ # :g format removes trailing decimal zeros
590
+ parts.append(f"Outline={subtitle.outline:g}")
591
+ if subtitle.alignment is not None:
592
+ parts.append(f"Alignment={subtitle.alignment}")
593
+ if subtitle.margin_v is not None:
594
+ parts.append(f"MarginV={subtitle.margin_v}")
595
+
596
+ if not parts:
597
+ return None
598
+ return ",".join(parts)
599
+
600
+
601
+ def _append_subtitle_filter(
602
+ filter_parts: list[str],
603
+ video_map_label: str,
604
+ subtitle: SubtitleOptions,
605
+ ) -> str:
606
+ """Append the subtitle stage (subtitles filter) to filter_parts and return
607
+ the new video label.
608
+
609
+ Follows the verified syntax (M2 2026-06-11) per ADR-S4-r2 / ADR-S5-r2 /
610
+ ADR-S6-r2. Does not take a timeline_dir argument (boundary validation is
611
+ centralised in render.py; DC-AS-001).
612
+
613
+ Filter format:
614
+ {L_v}subtitles=filename='{esc(path)}'[:fontsdir='{esc(dir)}']
615
+ [:force_style='{style}'][:charenc=UTF-8][outvsub]
616
+
617
+ ASS input: force_style not applied; charenc/fontsdir may still be added
618
+ (DC-AS-002). SRT/VTT input: charenc=UTF-8 and force_style are added
619
+ (M2 truth table).
620
+
621
+ Args:
622
+ filter_parts: list of filter_complex segments (mutated in place).
623
+ video_map_label: terminal label of the video chain (e.g. '[outv]').
624
+ subtitle: SubtitleOptions with path already resolved to absolute
625
+ (ADR-S5-r2).
626
+
627
+ Returns:
628
+ New video_map_label '[outvsub]'.
629
+ """
630
+ path = subtitle.path
631
+ ext = os.path.splitext(path)[1].lower()
632
+ is_ass = ext == ".ass"
633
+
634
+ # Escape the path (verified syntax: \\ → \\\\ then : → \\:)
635
+ esc_path = _escape_filtergraph(path)
636
+
637
+ # Build the subtitles filter
638
+ # filename= wraps the absolute path in single quotes (ADR-S5-r2)
639
+ filter_str = f"{video_map_label}subtitles=filename='{esc_path}'"
640
+
641
+ # Add fontsdir if specified (applies to ASS, SRT, and VTT)
642
+ if subtitle.fonts_dir is not None:
643
+ esc_dir = _escape_filtergraph(subtitle.fonts_dir)
644
+ filter_str += f":fontsdir='{esc_dir}'"
645
+
646
+ # Add force_style (SRT/VTT only; ASS uses its embedded styles)
647
+ force_style = _build_force_style(subtitle, is_ass)
648
+ if force_style is not None:
649
+ filter_str += f":force_style='{force_style}'"
650
+
651
+ # Add charenc=UTF-8 (SRT/VTT only; ASS encodes its own character set)
652
+ if not is_ass:
653
+ filter_str += ":charenc=UTF-8"
654
+
655
+ filter_str += "[outvsub]"
656
+ filter_parts.append(filter_str)
657
+
658
+ return "[outvsub]"
659
+
660
+
661
+ def _to_seconds(rt: otio.opentime.RationalTime) -> float:
662
+ """Convert RationalTime to seconds (6 decimal places).
663
+
664
+ OTIO's type stubs define to_seconds() as Any, so an explicit float
665
+ cast is used to satisfy mypy strict mode.
666
+ """
667
+ return round(float(rt.to_seconds()), 6)
668
+
669
+
670
+ def _validate_denoise_directive(denoise: dict[str, Any]) -> DenoiseDirective:
671
+ """Validate the denoise directive dict with DenoiseDirective; raises
672
+ INVALID_INPUT on failure.
673
+
674
+ Also re-validates params with AfftdnParams when backend=="afftdn".
675
+ """
676
+ try:
677
+ directive = DenoiseDirective(**denoise)
678
+ except (ValidationError, TypeError):
679
+ raise ClipwrightError(
680
+ code=ErrorCode.INVALID_INPUT,
681
+ message=(
682
+ "Denoise directive validation failed. Check field names, types,"
683
+ " and values."
684
+ ),
685
+ hint=(
686
+ "Verify that the denoise field in the timeline metadata is in the"
687
+ " correct format. backend must be 'afftdn' or 'deepfilternet'."
688
+ ),
689
+ ) from None
690
+
691
+ if directive.backend == "afftdn":
692
+ try:
693
+ AfftdnParams(**directive.params)
694
+ except (ValidationError, TypeError):
695
+ raise ClipwrightError(
696
+ code=ErrorCode.INVALID_INPUT,
697
+ message=(
698
+ "afftdn params validation failed. Check field names, types,"
699
+ " and values."
700
+ ),
701
+ hint=(
702
+ "params.nr must be a float in 0.01–97, params.nf in -80 to"
703
+ " -20, and params.nt must be 'w' or 'v'."
704
+ ),
705
+ ) from None
706
+
707
+ return directive
708
+
709
+
710
+ def _validate_loudness_directive(loudness: dict[str, Any]) -> LoudnessDirective:
711
+ """Validate the loudness directive dict; raises INVALID_INPUT on failure.
712
+
713
+ Also validates consistency between mode and target type.
714
+ Security: input values are not included in error messages (SR M-1).
715
+ """
716
+ try:
717
+ # Manually convert target/measured to model instances before constructing
718
+ # LoudnessDirective. Pydantic v2 attempts the first matching model for a
719
+ # bare Union[LoudnormTarget, PeakTarget] from a dict; since the two models
720
+ # have different field names, auto-conversion is usually correct, but
721
+ # mode/target consistency is delegated to the model_validator.
722
+ # Pre-converting makes ValidationError easier to attribute to target/measured
723
+ # issues (L-3).
724
+ raw = dict(loudness)
725
+ if isinstance(raw.get("target"), dict):
726
+ mode = raw.get("mode")
727
+ if mode == "loudnorm":
728
+ raw["target"] = LoudnormTarget(**raw["target"])
729
+ elif mode == "peak":
730
+ raw["target"] = PeakTarget(**raw["target"])
731
+ if isinstance(raw.get("measured"), dict):
732
+ mode = raw.get("mode")
733
+ if mode == "loudnorm":
734
+ raw["measured"] = LoudnormMeasured(**raw["measured"])
735
+ elif mode == "peak":
736
+ raw["measured"] = PeakMeasured(**raw["measured"])
737
+ directive = LoudnessDirective(**raw)
738
+ except (ValidationError, TypeError, ValueError):
739
+ # ValueError is included because model_validator uses raise ValueError.
740
+ # ValidationError alone would miss ValueError raised inside model_validator.
741
+ # from None: CWE-209 information leakage prevention.
742
+ # ValidationError details may contain paths, so they are not exposed
743
+ # externally.
744
+ raise ClipwrightError(
745
+ code=ErrorCode.INVALID_INPUT,
746
+ message=(
747
+ "Loudness directive validation failed."
748
+ " Check field names, types, and values."
749
+ ),
750
+ hint=(
751
+ "Check the format of the loudness field in the timeline metadata."
752
+ " mode must be 'loudnorm' or 'peak'; scope must be 'track'."
753
+ " loudnorm mode requires measured."
754
+ ),
755
+ ) from None
756
+ return directive
757
+
758
+
759
+ def _append_audio_pipe(
760
+ filter_parts: list[str],
761
+ has_audio: bool,
762
+ denoise_directive: DenoiseDirective | None,
763
+ loudness_directive: LoudnessDirective | None,
764
+ ) -> tuple[bool, bool]:
765
+ """Append denoise afftdn / loudness filters to filter_parts and return usage
766
+ flags.
767
+
768
+ Shared helper for single-source and multi-source paths (ADR-C11-r2; eliminates
769
+ duplication). Uses [outa] as the starting point and chains labels cumulatively.
770
+ When has_audio=False, nothing is added (warnings are the responsibility of
771
+ build_plan).
772
+
773
+ Returns:
774
+ (use_afftdn, use_loudness)
775
+ """
776
+ use_afftdn = False
777
+ use_loudness = False
778
+
779
+ if not has_audio:
780
+ return use_afftdn, use_loudness
781
+
782
+ # Inject afftdn denoise
783
+ if denoise_directive is not None and denoise_directive.backend == "afftdn":
784
+ params = AfftdnParams(**denoise_directive.params)
785
+ nr_str = f"{params.nr:g}"
786
+ nf_str = f"{params.nf:g}"
787
+ # SR M-1: defence-in-depth with frozenset alongside the Literal["w","v"]
788
+ # constraint (guards against injection if the Literal constraint is ever
789
+ # removed).
790
+ nt_str = params.nt
791
+ if nt_str not in _VALID_NT_VALUES:
792
+ raise ClipwrightError(
793
+ code=ErrorCode.INTERNAL,
794
+ message="afftdn nt parameter is invalid (internal error).",
795
+ hint="params.nt must be 'w' or 'v'.",
796
+ )
797
+ filter_parts.append(
798
+ f"[outa]afftdn=nr={nr_str}:nf={nf_str}:nt={nt_str}[outa_dn]"
799
+ )
800
+ use_afftdn = True
801
+
802
+ # Inject loudness
803
+ if loudness_directive is not None:
804
+ loudness_input_label = "[outa_dn]" if use_afftdn else "[outa]"
805
+
806
+ if loudness_directive.mode == "loudnorm":
807
+ target = loudness_directive.target
808
+ measured = loudness_directive.measured
809
+ if not isinstance(target, LoudnormTarget) or not isinstance(
810
+ measured, LoudnormMeasured
811
+ ):
812
+ raise ClipwrightError(
813
+ code=ErrorCode.INTERNAL,
814
+ message=(
815
+ "loudnorm directive type consistency is invalid (internal"
816
+ " error)."
817
+ ),
818
+ hint="LoudnessDirective model_validator is not functioning.",
819
+ )
820
+ i_str = f"{target.i:g}"
821
+ tp_str = f"{target.tp:g}"
822
+ lra_str = f"{target.lra:g}"
823
+ mi_str = f"{measured.input_i:g}"
824
+ mtp_str = f"{measured.input_tp:g}"
825
+ mlra_str = f"{measured.input_lra:g}"
826
+ mthresh_str = f"{measured.input_thresh:g}"
827
+ offset_str = f"{measured.target_offset:g}"
828
+ filter_parts.append(
829
+ f"{loudness_input_label}loudnorm="
830
+ f"I={i_str}:TP={tp_str}:LRA={lra_str}"
831
+ f":measured_I={mi_str}:measured_TP={mtp_str}"
832
+ f":measured_LRA={mlra_str}:measured_thresh={mthresh_str}"
833
+ f":offset={offset_str}:linear=true[outa_ln]"
834
+ )
835
+ use_loudness = True
836
+
837
+ elif loudness_directive.mode == "peak":
838
+ target = loudness_directive.target
839
+ measured = loudness_directive.measured
840
+ if not isinstance(target, PeakTarget) or not isinstance(
841
+ measured, PeakMeasured
842
+ ):
843
+ raise ClipwrightError(
844
+ code=ErrorCode.INTERNAL,
845
+ message=(
846
+ "peak directive type consistency is invalid (internal error)."
847
+ ),
848
+ hint="LoudnessDirective model_validator is not functioning.",
849
+ )
850
+ gain_db = target.peak_db - measured.max_volume_db
851
+ gain_str = f"{gain_db:g}"
852
+ filter_parts.append(f"{loudness_input_label}volume={gain_str}dB[outa_ln]")
853
+ use_loudness = True
854
+
855
+ return use_afftdn, use_loudness
856
+
857
+
858
+ def _build_filter_complex(
859
+ ranges: list[KeptRange],
860
+ has_audio: bool,
861
+ denoise_directive: DenoiseDirective | None,
862
+ loudness_directive: LoudnessDirective | None,
863
+ options: RenderOptions,
864
+ ) -> tuple[str, str, str, bool, bool]:
865
+ """Build the filter_complex string, video_map_label, and audio_map_label
866
+ (M-2).
867
+
868
+ Responsibility: constructs the filter_complex string for trim/atrim → concat
869
+ → denoise afftdn → loudness → scale, and determines the terminal label for
870
+ each chain. Single-source path only (maintains backward compatibility; ADR-C3).
871
+
872
+ Returns:
873
+ (filter_complex, video_map_label, audio_map_label, use_afftdn,
874
+ use_loudness)
875
+ """
876
+ n = len(ranges)
877
+
878
+ # Generate trim/atrim filter segments for each segment
879
+ video_labels: list[str] = []
880
+ audio_labels: list[str] = []
881
+ filter_parts: list[str] = []
882
+
883
+ for i, r in enumerate(ranges):
884
+ start = _to_seconds(r.source_range.start_time)
885
+ end = round(start + _to_seconds(r.source_range.duration), 6)
886
+ vl = f"v{i}"
887
+ filter_parts.append(
888
+ f"[0:v]trim=start={start}:end={end},setpts=PTS-STARTPTS[{vl}]"
889
+ )
890
+ video_labels.append(f"[{vl}]")
891
+
892
+ if has_audio:
893
+ al = f"a{i}"
894
+ filter_parts.append(
895
+ f"[0:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[{al}]"
896
+ )
897
+ audio_labels.append(f"[{al}]")
898
+
899
+ # concat filter (interleave video/audio labels as inputs)
900
+ v_count = 1
901
+ a_count = 1 if has_audio else 0
902
+ if has_audio:
903
+ interleaved: list[str] = []
904
+ for vl, al in zip(video_labels, audio_labels, strict=True):
905
+ interleaved.append(vl)
906
+ interleaved.append(al)
907
+ input_labels = "".join(interleaved)
908
+ else:
909
+ input_labels = "".join(video_labels)
910
+
911
+ concat_output = "[outv]" if not has_audio else "[outv][outa]"
912
+ filter_parts.append(
913
+ f"{input_labels}concat=n={n}:v={v_count}:a={a_count}{concat_output}"
914
+ )
915
+
916
+ # Cumulative audio pipe for denoise/loudness (shared single/multi-source helper)
917
+ use_afftdn, use_loudness = _append_audio_pipe(
918
+ filter_parts, has_audio, denoise_directive, loudness_directive
919
+ )
920
+
921
+ # When width/height is specified: integrate scale into filter_complex
922
+ # (ADR-1 compliant). -vf and -filter_complex cannot be used simultaneously
923
+ # (ffmpeg error), so scale is chained after concat output [outv] to produce
924
+ # [outvscaled], and -map [outvscaled] is used instead.
925
+ use_scale = options.width is not None and options.height is not None
926
+ if use_scale:
927
+ filter_parts.append(f"[outv]scale={options.width}:{options.height}[outvscaled]")
928
+ video_map_label = "[outvscaled]"
929
+ else:
930
+ video_map_label = "[outv]"
931
+
932
+ # Inject subtitle stage after video_map_label is finalised (ADR-S4-r3).
933
+ # When subtitle=None, nothing is done (backward compatible; ADR-S8).
934
+ if options.subtitle is not None:
935
+ video_map_label = _append_subtitle_filter(
936
+ filter_parts, video_map_label, options.subtitle
937
+ )
938
+
939
+ filter_complex = ";".join(filter_parts)
940
+
941
+ # Determine the audio map terminal label via cumulative pipe (ADR-L5b; DC-AM-001):
942
+ # loudness present → [outa_ln], denoise only → [outa_dn], neither → [outa]
943
+ if use_loudness:
944
+ audio_map_label = "[outa_ln]"
945
+ elif use_afftdn:
946
+ audio_map_label = "[outa_dn]"
947
+ else:
948
+ audio_map_label = "[outa]"
949
+
950
+ return filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness
951
+
952
+
953
+ def _resolve_target_spec(
954
+ source_probes: dict[str, ProbeInfo],
955
+ first_source: str,
956
+ options: RenderOptions,
957
+ ) -> tuple[int, int, float]:
958
+ """Determine output spec (target_w, target_h, target_fps) and return it
959
+ (ADR-C4-r2).
960
+
961
+ Helper extracted from _build_multi_source_filter_complex.
962
+ When width/height are both specified, they are used; otherwise the first
963
+ source spec is used. Specifying only one is rejected by
964
+ RenderOptions._validate_resolution_pair (DC-AM-004), so this function is
965
+ only reached with both specified or both None.
966
+
967
+ Even-number rounding (ADR-C4-r2; yuv420p even constraint) is also applied
968
+ here.
969
+
970
+ Returns:
971
+ Tuple of (target_w, target_h, target_fps).
972
+
973
+ Raises:
974
+ ClipwrightError: when the first source's resolution or fps cannot be
975
+ obtained.
976
+ """
977
+ first_probe = source_probes[first_source]
978
+ if options.width is not None and options.height is not None:
979
+ raw_w = options.width
980
+ raw_h = options.height
981
+ else:
982
+ if first_probe.width is None or first_probe.height is None:
983
+ raise ClipwrightError(
984
+ code=ErrorCode.INVALID_INPUT,
985
+ message="Cannot obtain resolution from the first source clip.",
986
+ hint=(
987
+ "Set width/height on the first source in source_probes, or"
988
+ " specify both width and height in RenderOptions."
989
+ ),
990
+ )
991
+ raw_w = first_probe.width
992
+ raw_h = first_probe.height
993
+
994
+ # Even-number rounding (ADR-C4-r2; yuv420p even constraint)
995
+ target_w = (raw_w // 2) * 2
996
+ target_h = (raw_h // 2) * 2
997
+
998
+ # fps: use options.fps if specified; otherwise use the first source fps
999
+ if options.fps is not None:
1000
+ target_fps: float = options.fps
1001
+ else:
1002
+ if first_probe.fps is None:
1003
+ raise ClipwrightError(
1004
+ code=ErrorCode.INVALID_INPUT,
1005
+ message="Cannot obtain fps from the first source clip.",
1006
+ hint=(
1007
+ "Set fps on the first source in source_probes, or specify"
1008
+ " fps in RenderOptions."
1009
+ ),
1010
+ )
1011
+ target_fps = first_probe.fps
1012
+
1013
+ return target_w, target_h, target_fps
1014
+
1015
+
1016
+ def _build_clip_filters(
1017
+ ranges: list[KeptRange],
1018
+ source_index: dict[str, int],
1019
+ source_probes: dict[str, ProbeInfo],
1020
+ has_audio_overall: bool,
1021
+ target_w: int,
1022
+ target_h: int,
1023
+ target_fps: float,
1024
+ ) -> tuple[list[str], list[str], list[str]]:
1025
+ """Generate video/audio filter strings for each clip (ADR-C5-r2/C7-r2).
1026
+
1027
+ Helper extracted from _build_multi_source_filter_complex.
1028
+ Handles per-clip spec normalisation (fps/scale/pad/setsar) and silent audio
1029
+ padding (anullsrc) for audio-less clips.
1030
+
1031
+ Returns:
1032
+ Tuple of (filter_parts, video_labels, audio_labels).
1033
+ """
1034
+ video_labels: list[str] = []
1035
+ audio_labels: list[str] = []
1036
+ filter_parts: list[str] = []
1037
+
1038
+ for i, r in enumerate(ranges):
1039
+ k = source_index[r.source]
1040
+ start = _to_seconds(r.source_range.start_time)
1041
+ dur = _to_seconds(r.source_range.duration)
1042
+ end = round(start + dur, 6)
1043
+ vl = f"v{i}"
1044
+ # Per-clip video: trim → setpts → fps → scale(decrease) → pad → setsar.
1045
+ # fps written with at least 5 decimal places (ADR-C2-r2; NTSC fps
1046
+ # precision)
1047
+ filter_parts.append(
1048
+ f"[{k}:v]trim=start={start}:end={end},setpts=PTS-STARTPTS,"
1049
+ f"fps={target_fps:.5f},"
1050
+ f"scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,"
1051
+ f"pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2,setsar=1[{vl}]"
1052
+ )
1053
+ video_labels.append(f"[{vl}]")
1054
+
1055
+ if has_audio_overall:
1056
+ al = f"a{i}"
1057
+ probe = source_probes[r.source]
1058
+ if probe.audio_count >= 1:
1059
+ # Audio present: atrim → asetpts → aformat for spec normalisation.
1060
+ filter_parts.append(
1061
+ f"[{k}:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS,"
1062
+ f"aformat=sample_rates=48000:channel_layouts=stereo[{al}]"
1063
+ )
1064
+ else:
1065
+ # No audio: pad with anullsrc (same duration as the video clip)
1066
+ filter_parts.append(
1067
+ f"anullsrc=channel_layout=stereo:sample_rate=48000,"
1068
+ f"atrim=0:{dur},asetpts=PTS-STARTPTS[{al}]"
1069
+ )
1070
+ audio_labels.append(f"[{al}]")
1071
+
1072
+ return filter_parts, video_labels, audio_labels
1073
+
1074
+
1075
+ def _build_multi_source_filter_complex(
1076
+ ranges: list[KeptRange],
1077
+ source_index: dict[str, int],
1078
+ source_probes: dict[str, ProbeInfo],
1079
+ has_audio_overall: bool,
1080
+ denoise_directive: DenoiseDirective | None,
1081
+ loudness_directive: LoudnessDirective | None,
1082
+ options: RenderOptions,
1083
+ first_source: str,
1084
+ ) -> tuple[str, str, str, bool, bool]:
1085
+ """Build the filter_complex for the multi-source path
1086
+ (ADR-C1/C5-r2/C7-r2/C11-r2).
1087
+
1088
+ Normalises each clip's spec (fps/scale/pad/setsar) before concatenating.
1089
+ When has_audio_overall=True, audio-less sources are padded with anullsrc
1090
+ (ADR-C7-r2). Output labels are unified with the single-source version
1091
+ ([outv]/[outa]; ADR-C11-r2).
1092
+
1093
+ Responsibility breakdown:
1094
+ - _resolve_target_spec: determines output spec (target_w/h/fps).
1095
+ - _build_clip_filters: generates per-clip video/audio filter strings.
1096
+ - This function: assembles the concat filter, calls _append_audio_pipe,
1097
+ and determines return values.
1098
+
1099
+ Returns:
1100
+ (filter_complex, video_map_label, audio_map_label, use_afftdn,
1101
+ use_loudness)
1102
+ """
1103
+ n = len(ranges)
1104
+
1105
+ # Delegate output spec determination to helper (ADR-C4-r2)
1106
+ target_w, target_h, target_fps = _resolve_target_spec(
1107
+ source_probes, first_source, options
1108
+ )
1109
+
1110
+ # Generate per-clip video/audio filter strings
1111
+ clip_filter_parts, video_labels, audio_labels = _build_clip_filters(
1112
+ ranges,
1113
+ source_index,
1114
+ source_probes,
1115
+ has_audio_overall,
1116
+ target_w,
1117
+ target_h,
1118
+ target_fps,
1119
+ )
1120
+ # Carry forward as local variable to append concat filter and audio pipe.
1121
+ filter_parts: list[str] = clip_filter_parts
1122
+
1123
+ # concat filter
1124
+ v_count = 1
1125
+ a_count = 1 if has_audio_overall else 0
1126
+ if has_audio_overall:
1127
+ interleaved: list[str] = []
1128
+ for vl, al in zip(video_labels, audio_labels, strict=True):
1129
+ interleaved.append(vl)
1130
+ interleaved.append(al)
1131
+ input_labels = "".join(interleaved)
1132
+ else:
1133
+ input_labels = "".join(video_labels)
1134
+
1135
+ concat_output = "[outv]" if not has_audio_overall else "[outv][outa]"
1136
+ filter_parts.append(
1137
+ f"{input_labels}concat=n={n}:v={v_count}:a={a_count}{concat_output}"
1138
+ )
1139
+
1140
+ # Cumulative audio pipe for denoise/loudness (shared single/multi-source
1141
+ # helper; ADR-C11-r2)
1142
+ use_afftdn, use_loudness = _append_audio_pipe(
1143
+ filter_parts, has_audio_overall, denoise_directive, loudness_directive
1144
+ )
1145
+
1146
+ # In the multi-source path, per-clip spec normalisation is already done up
1147
+ # front, so no post-concat scale is applied (ADR-C5-r2).
1148
+ video_map_label = "[outv]"
1149
+
1150
+ # Inject subtitle stage after video_map_label is finalised (ADR-S4-r3).
1151
+ # When subtitle=None, nothing is done (backward compatible; ADR-S8).
1152
+ if options.subtitle is not None:
1153
+ video_map_label = _append_subtitle_filter(
1154
+ filter_parts, video_map_label, options.subtitle
1155
+ )
1156
+
1157
+ filter_complex = ";".join(filter_parts)
1158
+
1159
+ # Determine the audio map terminal label via cumulative pipe
1160
+ if use_loudness:
1161
+ audio_map_label = "[outa_ln]"
1162
+ elif use_afftdn:
1163
+ audio_map_label = "[outa_dn]"
1164
+ else:
1165
+ audio_map_label = "[outa]"
1166
+
1167
+ return filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness
1168
+
1169
+
1170
+ def _append_bgm_pipe(
1171
+ filter_parts: list[str],
1172
+ bgm: BgmClip,
1173
+ audio_map_label: str,
1174
+ has_main_audio: bool,
1175
+ main_dur: float,
1176
+ bgm_index: int,
1177
+ ) -> str:
1178
+ """Append the BGM audio chain to filter_parts and return the new
1179
+ audio_map_label.
1180
+
1181
+ Conforms to ADR-B5-r2/B5-r3. Follows the verified syntax exactly
1182
+ (DC-AS-004).
1183
+
1184
+ When has_main_audio=True:
1185
+ Aformats the main terminal label L to [main_fmt], then amixes with BGM.
1186
+ ducking OFF:
1187
+ [main_fmt][bgm]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]
1188
+ ducking ON:
1189
+ [main_fmt]asplit→[bgm][main_sc]sidechaincompress→amix→alimiter
1190
+ [outa_bgm]
1191
+ When has_main_audio=False:
1192
+ BGM-only path:
1193
+ [{bgm_index}:a]aformat...atrim,asetpts,volume,(afade)[outa_bgm]
1194
+
1195
+ -stream_loop -1 is added by render.py, so plan.py uses only atrim=0:{main_dur}
1196
+ for duration (ADR-B6-r2). afade is injected only when fade_in_sec > 0 /
1197
+ fade_out_sec > 0 (ADR-B9-r3).
1198
+ """
1199
+ d = bgm.directive
1200
+ vol_str = f"{d.volume_db:g}dB"
1201
+ dur_str = f"{main_dur:g}"
1202
+
1203
+ # SR M-3: raise INVALID_INPUT when fade duration exceeds the main duration,
1204
+ # as this would produce unintended audio output. BgmOptions cannot enforce an
1205
+ # upper bound without knowing main_dur, so a runtime guard is required.
1206
+ if d.fade_in_sec > main_dur:
1207
+ raise ClipwrightError(
1208
+ code=ErrorCode.INVALID_INPUT,
1209
+ message="fade_in_sec exceeds the main content duration.",
1210
+ hint=f"Keep fade within the main duration of {main_dur:.2f} seconds.",
1211
+ )
1212
+ if d.fade_out_sec > main_dur:
1213
+ raise ClipwrightError(
1214
+ code=ErrorCode.INVALID_INPUT,
1215
+ message="fade_out_sec exceeds the main content duration.",
1216
+ hint=f"Keep fade within the main duration of {main_dur:.2f} seconds.",
1217
+ )
1218
+
1219
+ # BGM audio chain common part: aformat → atrim → asetpts → volume → (afade).
1220
+ # afade is injected only when > 0 (ADR-B9-r3; DC-AM-003)
1221
+ bgm_chain = (
1222
+ f"[{bgm_index}:a]aformat=sample_rates=48000:channel_layouts=stereo,"
1223
+ f"atrim=0:{dur_str},asetpts=PTS-STARTPTS,volume={vol_str}"
1224
+ )
1225
+ if d.fade_in_sec > 0:
1226
+ bgm_chain += f",afade=t=in:st=0:d={d.fade_in_sec:g}"
1227
+ if d.fade_out_sec > 0:
1228
+ st_out = max(0.0, main_dur - d.fade_out_sec)
1229
+ bgm_chain += f",afade=t=out:st={st_out:g}:d={d.fade_out_sec:g}"
1230
+
1231
+ if not has_main_audio:
1232
+ # No main audio + BGM-only path (ADR-B5-r2/DC-AS-004): route BGM
1233
+ # directly to [outa_bgm]
1234
+ filter_parts.append(f"{bgm_chain}[outa_bgm]")
1235
+ else:
1236
+ # Main audio present: output BGM to intermediate label [bgm], then amix
1237
+ filter_parts.append(f"{bgm_chain}[bgm]")
1238
+
1239
+ # Aformat the main terminal label L to [main_fmt] (DC-AS-007)
1240
+ filter_parts.append(
1241
+ f"{audio_map_label}aformat=sample_rates=48000:channel_layouts=stereo[main_fmt]"
1242
+ )
1243
+
1244
+ if d.ducking.enabled:
1245
+ # ducking ON: [bgm][main_sc]sidechaincompress input order (DC-AS-006)
1246
+ filter_parts.append("[main_fmt]asplit[main_mix][main_sc]")
1247
+ filter_parts.append(
1248
+ f"[bgm][main_sc]sidechaincompress="
1249
+ f"threshold={d.ducking.threshold:g}:ratio={d.ducking.ratio:g}[bgm_duck]"
1250
+ )
1251
+ filter_parts.append(
1252
+ "[main_mix][bgm_duck]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]"
1253
+ )
1254
+ else:
1255
+ # ducking OFF: [main_fmt][bgm]amix→alimiter (DC-AM-001)
1256
+ filter_parts.append(
1257
+ "[main_fmt][bgm]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]"
1258
+ )
1259
+
1260
+ return "[outa_bgm]"
1261
+
1262
+
1263
+ def _build_ffmpeg_args(
1264
+ filter_complex: str,
1265
+ video_map_label: str,
1266
+ audio_map_label: str,
1267
+ has_audio: bool,
1268
+ options: RenderOptions,
1269
+ use_multi_source: bool = False,
1270
+ ) -> list[str]:
1271
+ """Assemble and return the ffmpeg argument list from filter_complex and map
1272
+ labels (M-2).
1273
+
1274
+ Centralises management of filter_complex / -map / codec / fps / crf options.
1275
+ ffmpeg_args is unified as list[str]; numeric values are converted with str()
1276
+ (M-1).
1277
+
1278
+ When use_multi_source=True, fps has already been normalised by the per-clip
1279
+ fps filter in filter_complex, so -r is skipped to avoid unintended double
1280
+ resampling (CR M-2). For single-source paths (use_multi_source=False), -r is
1281
+ added as before (backward compatible).
1282
+ """
1283
+ ffmpeg_args: list[str] = [
1284
+ "-filter_complex",
1285
+ filter_complex,
1286
+ "-map",
1287
+ video_map_label,
1288
+ ]
1289
+ if has_audio:
1290
+ ffmpeg_args += ["-map", audio_map_label]
1291
+
1292
+ # Map RenderOptions fields to ffmpeg arguments
1293
+ if options.video_codec is not None:
1294
+ ffmpeg_args += ["-c:v", options.video_codec]
1295
+ if options.audio_codec is not None:
1296
+ ffmpeg_args += ["-c:a", options.audio_codec]
1297
+ # width/height are integrated into filter_complex; -vf is not added (L-4).
1298
+ if options.fps is not None:
1299
+ if use_multi_source:
1300
+ # Multi-source path: fps is already normalised by the per-clip fps filter;
1301
+ # -r would cause unintended double resampling (CR M-2)
1302
+ pass
1303
+ else:
1304
+ # Single-source path: add -r as before (backward compatible; ADR-C3).
1305
+ ffmpeg_args += ["-r", str(options.fps)]
1306
+ if options.crf is not None:
1307
+ ffmpeg_args += ["-crf", str(options.crf)]
1308
+
1309
+ return ffmpeg_args
1310
+
1311
+
1312
+ def build_plan(
1313
+ ranges: list[KeptRange],
1314
+ probe_info: ProbeInfo,
1315
+ options: RenderOptions,
1316
+ denoise: dict[str, Any] | None = None,
1317
+ loudness: dict[str, Any] | None = None,
1318
+ source_probes: dict[str, ProbeInfo] | None = None,
1319
+ bgm: BgmClip | None = None,
1320
+ ) -> RenderPlan:
1321
+ """Return filter_complex string and ffmpeg argument list as a RenderPlan
1322
+ (ADR-1/ADR-7).
1323
+
1324
+ Acts as a thin orchestrator: validate → build filter_complex
1325
+ (_build_filter_complex or _build_multi_source_filter_complex) →
1326
+ append BGM stage (_append_bgm_pipe) →
1327
+ build ffmpeg_args (_build_ffmpeg_args) → dry-run estimate and warning
1328
+ generation.
1329
+
1330
+ - source_probes not provided or single unique source → single-source path
1331
+ (backward compatible).
1332
+ - Unique sources ≥ 2 → multi-source path (ADR-C3).
1333
+ - No video → UNSUPPORTED_OPERATION (DC-AS-002).
1334
+ - Single segment still uses concat=n=1 unconditionally (DC-AS-005).
1335
+ - Audio 0: a=0 (-map [outv] only).
1336
+ - Audio ≥ 1: a=1, first audio stream only (ADR-7).
1337
+ - Trim coordinates: opentime → seconds (6 decimal places) as numeric
1338
+ arguments (DC-AS-004).
1339
+ - filter_complex returned as a single string (prevents command injection).
1340
+ - When bit_rate is None: estimated_size_bytes=None + warning added (ADR-3).
1341
+ - When any of codec/resolution/fps/crf is non-None: "estimate is approximate"
1342
+ warning (DC-AM-005).
1343
+ - denoise: afftdn injection (B-2).
1344
+ has_audio=True + backend=="afftdn" → inject afftdn after concat, produce
1345
+ [outa_dn]. has_audio=False + denoise → skip afftdn and add warning.
1346
+ backend=="deepfilternet" → UNSUPPORTED_OPERATION.
1347
+ - loudness: track loudness injection (ADR-L5/L5b/L6).
1348
+ loudnorm mode: inject loudnorm linear=true after concat (after denoise if
1349
+ present). peak mode: inject volume filter (gain = target_peak - max_volume).
1350
+ has_audio=False + loudness → skip filter + add warning.
1351
+ peak + denoise together → add warning (DC-AM-002: measurement timing
1352
+ mismatch). audio map terminal label resolved via cumulative pipe (DC-AM-001
1353
+ ADR-L5b): [outa] → (denoise → [outa_dn]) → (loudness → [outa_ln])
1354
+ - When source_probes is provided (unique sources ≥ 2): raises
1355
+ UNSUPPORTED_OPERATION for any source with has_video=False (ADR-C12).
1356
+ - RenderPlan.input_sources = unique_sources_in_order(ranges) (ADR-C9-r2).
1357
+ - bgm: when BgmClip is non-None, appends the BGM stage as the final stage
1358
+ (ADR-B4-r2/B5-r2/B5-r3). has_main_audio (main audio presence) and
1359
+ has_audio_output (final output audio presence) are separated. BGM index =
1360
+ len(input_sources) (bgm_source is not included in input_sources; DC-AS-005).
1361
+ bgm=None is identical to the previous behaviour (backward compatible;
1362
+ ADR-B7).
1363
+ """
1364
+ # Validate the denoise directive (raises INVALID_INPUT /
1365
+ # UNSUPPORTED_OPERATION on failure)
1366
+ denoise_directive: DenoiseDirective | None = None
1367
+ if denoise is not None:
1368
+ denoise_directive = _validate_denoise_directive(denoise)
1369
+ if denoise_directive.backend == "deepfilternet":
1370
+ raise ClipwrightError(
1371
+ code=ErrorCode.UNSUPPORTED_OPERATION,
1372
+ message=(
1373
+ "backend=deepfilternet is not supported for render application."
1374
+ ),
1375
+ hint=(
1376
+ "Re-detect with backend=afftdn, or wait for a future"
1377
+ " version with deepfilternet render support."
1378
+ ),
1379
+ )
1380
+
1381
+ # Validate the loudness directive (raises INVALID_INPUT on failure)
1382
+ loudness_directive: LoudnessDirective | None = None
1383
+ if loudness is not None:
1384
+ loudness_directive = _validate_loudness_directive(loudness)
1385
+
1386
+ # Unique source list (single source of truth for ADR-C9-r2)
1387
+ input_sources = unique_sources_in_order(ranges)
1388
+ n = len(ranges)
1389
+
1390
+ # Branch on source count (ADR-C3)
1391
+ use_multi_source = source_probes is not None and len(input_sources) >= 2
1392
+
1393
+ if use_multi_source:
1394
+ # Multi-source path. When use_multi_source is True, source_probes is
1395
+ # guaranteed to be non-None (by the condition use_multi_source =
1396
+ # source_probes is not None and ...). assert is removed by -O, so an
1397
+ # if-raise is used for type narrowing (CR-CT-002). This defensive code
1398
+ # is structurally unreachable but is intentionally kept for mypy type
1399
+ # narrowing (CR L-2: unreachable defensive code is intentional).
1400
+ if source_probes is None:
1401
+ raise ClipwrightError(
1402
+ code=ErrorCode.INTERNAL,
1403
+ message="source_probes is None (internal error).",
1404
+ hint="Check the caller of build_plan.",
1405
+ )
1406
+ # SR Info-1: source_probes keys are built by render.py's _render_inner
1407
+ # from unique_sources_in_order(ranges) (after boundary validation,
1408
+ # existence checks, and probing), so there is no path for external
1409
+ # injection of arbitrary keys. Consistency with input_sources is
1410
+ # guaranteed on the render.py side.
1411
+
1412
+ # has_video mix check (ADR-C12)
1413
+ for src in input_sources:
1414
+ probe = source_probes[src]
1415
+ if not probe.has_video:
1416
+ basename = os.path.basename(src)
1417
+ raise ClipwrightError(
1418
+ code=ErrorCode.UNSUPPORTED_OPERATION,
1419
+ message=(
1420
+ f"A source without a video stream is included: {basename}"
1421
+ ),
1422
+ hint=(
1423
+ f"'{basename}' has no video stream."
1424
+ " Use only media files that contain a video stream."
1425
+ ),
1426
+ )
1427
+
1428
+ # Overall audio presence check (ADR-C7-r2).
1429
+ has_audio_overall = any(
1430
+ source_probes[src].audio_count >= 1 for src in input_sources
1431
+ )
1432
+
1433
+ # First source (first clip in ranges)
1434
+ first_source = ranges[0].source
1435
+
1436
+ # Source → index mapping (ADR-C1)
1437
+ source_index: dict[str, int] = {src: i for i, src in enumerate(input_sources)}
1438
+
1439
+ filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness = (
1440
+ _build_multi_source_filter_complex(
1441
+ ranges,
1442
+ source_index,
1443
+ source_probes,
1444
+ has_audio_overall,
1445
+ denoise_directive,
1446
+ loudness_directive,
1447
+ options,
1448
+ first_source,
1449
+ )
1450
+ )
1451
+
1452
+ has_audio = has_audio_overall
1453
+
1454
+ else:
1455
+ # Single-source path (backward compatible; ADR-C3)
1456
+ if not probe_info.has_video:
1457
+ raise ClipwrightError(
1458
+ code=ErrorCode.UNSUPPORTED_OPERATION,
1459
+ message="No video stream found.",
1460
+ hint="Use a media file that contains a video stream.",
1461
+ )
1462
+
1463
+ # Audio presence: multiple audio streams use first only (treated as a=1)
1464
+ has_audio = probe_info.audio_count >= 1
1465
+
1466
+ filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness = (
1467
+ _build_filter_complex(
1468
+ ranges, has_audio, denoise_directive, loudness_directive, options
1469
+ )
1470
+ )
1471
+
1472
+ # ---------- Append BGM stage (ADR-B5-r2/B5-r3) ----------
1473
+ # has_main_audio: main audio presence after concat (equivalent to existing
1474
+ # has_audio). has_audio_output: final output audio presence (has_main_audio
1475
+ # or BGM present)
1476
+ has_main_audio = has_audio
1477
+ bgm_source_out: str | None = None
1478
+
1479
+ if bgm is not None:
1480
+ # BGM index = len(input_sources) (bgm_source not included in
1481
+ # input_sources; DC-AS-005)
1482
+ bgm_index = len(input_sources)
1483
+ total_duration_for_bgm = sum(
1484
+ _to_seconds(r.source_range.duration) for r in ranges
1485
+ )
1486
+
1487
+ # Expand filter_complex into filter_parts list and append the BGM stage
1488
+ filter_parts_bgm = filter_complex.split(";")
1489
+ audio_map_label = _append_bgm_pipe(
1490
+ filter_parts_bgm,
1491
+ bgm,
1492
+ audio_map_label,
1493
+ has_main_audio,
1494
+ total_duration_for_bgm,
1495
+ bgm_index,
1496
+ )
1497
+ filter_complex = ";".join(filter_parts_bgm)
1498
+ has_audio = (
1499
+ True # BGM present means the final output has audio (has_audio_output=True)
1500
+ )
1501
+ bgm_source_out = bgm.source
1502
+
1503
+ # ---------- Build ffmpeg_args ----------
1504
+ ffmpeg_args = _build_ffmpeg_args(
1505
+ filter_complex,
1506
+ video_map_label,
1507
+ audio_map_label,
1508
+ has_audio,
1509
+ options,
1510
+ use_multi_source=use_multi_source,
1511
+ )
1512
+
1513
+ # ---------- Dry-run estimate ----------
1514
+ total_duration = sum(_to_seconds(r.source_range.duration) for r in ranges)
1515
+
1516
+ estimated_size: float | None = None
1517
+ warnings: list[str] = []
1518
+
1519
+ # has_main_audio=False + denoise directive → denoise skipped (no main
1520
+ # audio; DC-AM-004). Note: regardless of BGM presence, denoise does not
1521
+ # apply when there is no main audio.
1522
+ if denoise_directive is not None and not has_main_audio:
1523
+ warnings.append("No audio: denoise skipped — afftdn filter was not applied.")
1524
+
1525
+ # has_main_audio=False + loudness directive → loudness skipped
1526
+ # (no main audio; DC-AM-004)
1527
+ if loudness_directive is not None and not has_main_audio:
1528
+ warnings.append(
1529
+ "No audio: loudness skipped — loudnorm/volume filter was not applied."
1530
+ )
1531
+
1532
+ # peak + denoise together → measurement timing mismatch warning
1533
+ # (DC-AM-002). peak's max_volume was measured before denoise; applying it to
1534
+ # denoised audio may deviate from the target peak.
1535
+ if (
1536
+ loudness_directive is not None
1537
+ and loudness_directive.mode == "peak"
1538
+ and denoise_directive is not None
1539
+ and has_main_audio
1540
+ ):
1541
+ warnings.append(
1542
+ "peak mode combined with denoise: peak max_volume was measured"
1543
+ " before denoise was applied; applying it to denoised audio may"
1544
+ " deviate from the target peak (DC-AM-002)."
1545
+ )
1546
+
1547
+ # Multi-source (unique sources ≥ 2) + loudness → measurement mismatch
1548
+ # warning (ADR-C11-r2)
1549
+ if loudness_directive is not None and has_main_audio and len(input_sources) >= 2:
1550
+ warnings.append(
1551
+ "track loudness applied to multi-source concatenation."
1552
+ " The measured values are from a single source; applying them to the"
1553
+ " entire concatenated track may not be strictly accurate"
1554
+ " (per_clip loudness is not supported)."
1555
+ )
1556
+
1557
+ # Dry-run estimated size (ADR-C10: based on first source bit_rate)
1558
+ # For multi-source, probe_info (first source) is used as the representative value
1559
+ if probe_info.bit_rate is not None:
1560
+ estimated_size = probe_info.bit_rate * total_duration / 8.0
1561
+ if len(input_sources) >= 2:
1562
+ warnings.append(
1563
+ "Estimated file size is approximate for multi-source input. The"
1564
+ " bit_rate of the first source is used as the representative"
1565
+ " value."
1566
+ )
1567
+ else:
1568
+ warnings.append("Cannot estimate file size: bit_rate is not available.")
1569
+
1570
+ # When any of codec/resolution/fps/crf/audio_codec is specified, add
1571
+ # "estimate is approximate" warning. audio_codec also affects output bit rate
1572
+ # and thus estimate accuracy (DC-AM-005)
1573
+ if (
1574
+ options.video_codec is not None
1575
+ or options.audio_codec is not None
1576
+ or options.width is not None
1577
+ or options.height is not None
1578
+ or options.fps is not None
1579
+ or options.crf is not None
1580
+ ):
1581
+ warnings.append(
1582
+ "Conversion options (codec/resolution/fps/crf) are specified; the"
1583
+ " estimated file size is approximate and the actual size may differ."
1584
+ )
1585
+
1586
+ return RenderPlan(
1587
+ filter_complex=filter_complex,
1588
+ ffmpeg_args=ffmpeg_args,
1589
+ segment_count=n,
1590
+ total_duration_seconds=total_duration,
1591
+ estimated_size_bytes=estimated_size,
1592
+ warnings=warnings,
1593
+ input_sources=input_sources,
1594
+ bgm_source=bgm_source_out,
1595
+ )