clipwright-render 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clipwright_render/__init__.py +3 -0
- clipwright_render/plan.py +1595 -0
- clipwright_render/py.typed +0 -0
- clipwright_render/render.py +621 -0
- clipwright_render/schemas.py +344 -0
- clipwright_render/server.py +175 -0
- clipwright_render-0.1.1.dist-info/METADATA +258 -0
- clipwright_render-0.1.1.dist-info/RECORD +10 -0
- clipwright_render-0.1.1.dist-info/WHEEL +4 -0
- clipwright_render-0.1.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,1595 @@
|
|
|
1
|
+
"""plan.py — pure logic layer for clipwright-render.
|
|
2
|
+
|
|
3
|
+
Does not execute ffmpeg/ffprobe. Probe results are received as ProbeInfo arguments
|
|
4
|
+
(DC-AM-007). Responsible for three concerns: timeline analysis, filter_complex
|
|
5
|
+
construction, and dry-run size estimation.
|
|
6
|
+
|
|
7
|
+
Design decisions:
|
|
8
|
+
- Single re-encode (ADR-1): filter_complex uses trim+concat for frame-accurate
|
|
9
|
+
time control with less degradation than repeated re-encodes.
|
|
10
|
+
- concat=n=1 unconditionally (DC-AS-005): simplifies implementation; no branch for
|
|
11
|
+
a single segment. ffmpeg handles n=1 correctly.
|
|
12
|
+
- First audio stream only (ADR-7): mapping multiple audio streams adds significant
|
|
13
|
+
complexity; only the first stream is handled in this iteration.
|
|
14
|
+
- afftdn denoise injection (§B-2):
|
|
15
|
+
filter_parts order is fixed as trim/atrim → concat → afftdn → scale.
|
|
16
|
+
afftdn (audio chain) and scale (video chain) use independent labels without
|
|
17
|
+
conflict. When has_audio=False, afftdn is not inserted and a warning is
|
|
18
|
+
appended.
|
|
19
|
+
- loudness injection (ADR-L5/L5b/L6):
|
|
20
|
+
loudness filter is chained after denoise (acoustically correct order).
|
|
21
|
+
The audio map terminal label is resolved via a cumulative-pipe helper
|
|
22
|
+
(DC-AM-001): [outa] → (denoise present → [outa_dn]) → (track loudness
|
|
23
|
+
present → [outa_ln]). No loudness directive is fully backward compatible
|
|
24
|
+
(ADR-L6).
|
|
25
|
+
- Multi-source support (ADR-C1–C12, §7 v2):
|
|
26
|
+
Routing branches on unique source count; single-source backward compatibility
|
|
27
|
+
is strictly preserved (ADR-C3). unique_sources_in_order is the single source
|
|
28
|
+
of truth for input index assignment (ADR-C9-r2).
|
|
29
|
+
- Resolution pair constraint (DC-AM-004): width/height with only one specified is
|
|
30
|
+
rejected by RenderOptions model_validator (schemas.py) as ValidationError.
|
|
31
|
+
_build_multi_source_filter_complex assumes either both specified or both None.
|
|
32
|
+
- BGM mixing (ADR-B4-r2/B5-r2/B5-r3/B6-r2/B9-r3):
|
|
33
|
+
resolve_bgm detects kind=="bgm" clips from all Audio tracks (ADR-B4-r2).
|
|
34
|
+
When build_plan receives a non-None bgm argument, _append_bgm_pipe appends
|
|
35
|
+
the BGM stage. has_main_audio (presence of main audio) and has_audio_output
|
|
36
|
+
(final output audio presence) are separated (ADR-B5-r2).
|
|
37
|
+
-stream_loop -1 is added by render.py; plan uses atrim=0:{main_dur} for
|
|
38
|
+
duration (ADR-B6-r2). BGM index = len(input_sources) (bgm_source is not
|
|
39
|
+
included in input_sources; DC-AS-005).
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import os
|
|
45
|
+
from collections.abc import Mapping
|
|
46
|
+
from dataclasses import dataclass, field
|
|
47
|
+
from typing import Annotated, Any, Literal
|
|
48
|
+
|
|
49
|
+
import opentimelineio as otio
|
|
50
|
+
from clipwright.errors import ClipwrightError, ErrorCode
|
|
51
|
+
from pydantic import BaseModel, Field, ValidationError, model_validator
|
|
52
|
+
|
|
53
|
+
from clipwright_render.schemas import RenderOptions, SubtitleOptions
|
|
54
|
+
|
|
55
|
+
# ===========================================================================
|
|
56
|
+
# Denoise schema (no dependency on clipwright-noise; defined inline for render)
|
|
57
|
+
# ===========================================================================
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class AfftdnParams(BaseModel):
|
|
61
|
+
"""Parameter validation model for the afftdn filter (DC-AS-006).
|
|
62
|
+
|
|
63
|
+
nr: noise reduction amount (dB). Range: 0.01–97.
|
|
64
|
+
nf: noise floor (dB). Range: -80 to -20.
|
|
65
|
+
nt: noise type. "w" = white noise, "v" = vinyl noise.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
nr: Annotated[float, Field(ge=0.01, le=97)]
|
|
69
|
+
nf: Annotated[float, Field(ge=-80, le=-20)]
|
|
70
|
+
nt: Literal["w", "v"] = "w"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# SR M-1: allowed value set for afftdn nt (module-level constant).
|
|
74
|
+
# Defence-in-depth alongside the Literal["w","v"] type constraint,
|
|
75
|
+
# referenced from _append_audio_pipe.
|
|
76
|
+
_VALID_NT_VALUES: frozenset[str] = frozenset({"w", "v"})
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DenoiseDirective(BaseModel):
|
|
80
|
+
"""Validation model for timeline metadata["clipwright"]["denoise"]
|
|
81
|
+
(DC-AS-006/ADR-N9).
|
|
82
|
+
|
|
83
|
+
Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
|
|
84
|
+
on failure. When backend=="afftdn", params are re-validated with AfftdnParams
|
|
85
|
+
(done in render.py). When backend=="deepfilternet", params must be {}.
|
|
86
|
+
|
|
87
|
+
SR L-1: max_length constraint on tool/version (guards against oversized string
|
|
88
|
+
injection). SR L-3: measured_noise_floor_db accepts only finite values in -200–0
|
|
89
|
+
dB (no inf/nan).
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# NR-M-1: align max_length with noise-side schemas.py (writer); reader must
|
|
93
|
+
# not be stricter than writer or it will reject valid values. Unified at 64
|
|
94
|
+
# for tool/version.
|
|
95
|
+
tool: Annotated[str, Field(max_length=64)]
|
|
96
|
+
version: Annotated[str, Field(max_length=64)]
|
|
97
|
+
kind: Literal["denoise"]
|
|
98
|
+
backend: Literal["afftdn", "deepfilternet"]
|
|
99
|
+
params: dict[str, Any]
|
|
100
|
+
measured_noise_floor_db: (
|
|
101
|
+
Annotated[float, Field(ge=-200.0, le=0.0, allow_inf_nan=False)] | None
|
|
102
|
+
) = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ===========================================================================
|
|
106
|
+
# Loudness schema (no dependency on clipwright-loudness; defined inline for render)
|
|
107
|
+
# NR-M-1: align max_length with loudness-side schemas.py (writer); unified at 64.
|
|
108
|
+
# ===========================================================================
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class LoudnormTarget(BaseModel):
|
|
112
|
+
"""Target validation model for loudnorm mode (ADR-L1).
|
|
113
|
+
|
|
114
|
+
i: integrated loudness target LUFS (-70 to -5).
|
|
115
|
+
tp: true peak target dBTP (-9 to 0).
|
|
116
|
+
lra: loudness range target LU (1 to 50).
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
i: Annotated[float, Field(ge=-70.0, le=-5.0)]
|
|
120
|
+
tp: Annotated[float, Field(ge=-9.0, le=0.0)]
|
|
121
|
+
lra: Annotated[float, Field(ge=1.0, le=50.0)]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class PeakTarget(BaseModel):
|
|
125
|
+
"""Target validation model for peak mode (ADR-L2).
|
|
126
|
+
|
|
127
|
+
peak_db: peak target dB (-60 to 0).
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
peak_db: Annotated[float, Field(ge=-60.0, le=0.0)]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class LoudnormMeasured(BaseModel):
|
|
134
|
+
"""Measured-value validation model for loudnorm mode (ADR-L1 linear two-pass).
|
|
135
|
+
|
|
136
|
+
All values must be finite (no inf/nan; CWE-20).
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
input_i: Annotated[float, Field(allow_inf_nan=False)]
|
|
140
|
+
input_tp: Annotated[float, Field(allow_inf_nan=False)]
|
|
141
|
+
input_lra: Annotated[float, Field(allow_inf_nan=False)]
|
|
142
|
+
input_thresh: Annotated[float, Field(allow_inf_nan=False)]
|
|
143
|
+
target_offset: Annotated[float, Field(allow_inf_nan=False)]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class PeakMeasured(BaseModel):
|
|
147
|
+
"""Measured-value validation model for peak mode (ADR-L2).
|
|
148
|
+
|
|
149
|
+
max_volume_db: measured peak value dB (-200 to 0). Finite values only.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
max_volume_db: Annotated[float, Field(ge=-200.0, le=0.0, allow_inf_nan=False)]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class LoudnessDirective(BaseModel):
|
|
156
|
+
"""Validation model for timeline metadata["clipwright"]["loudness"]
|
|
157
|
+
(ADR-L4/ADR-L6).
|
|
158
|
+
|
|
159
|
+
Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
|
|
160
|
+
on failure. Only scope="track" is supported (per_clip deferred until after
|
|
161
|
+
concatenation; DC-AS-003). When mode="loudnorm", measured is required (needed
|
|
162
|
+
for linear application). measured=None is INVALID_INPUT.
|
|
163
|
+
|
|
164
|
+
NR-M-1: tool/version max_length=64 (maintains reader/writer compatibility).
|
|
165
|
+
|
|
166
|
+
Difference from writer side (clipwright-loudness/schemas.py) — CR-M-001
|
|
167
|
+
reader-strict:
|
|
168
|
+
- schemas.py LoudnessDirective allows measured=None (U-1: design does not
|
|
169
|
+
write loudness directive to OTIO when measurement fails).
|
|
170
|
+
- This reader side treats loudnorm+measured=None as INVALID_INPUT
|
|
171
|
+
(measured_* values are required for linear two-pass; a directive written
|
|
172
|
+
to OTIO with measured=None is itself an invalid state; reader-strict).
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
tool: Annotated[str, Field(max_length=64)]
|
|
176
|
+
version: Annotated[str, Field(max_length=64)]
|
|
177
|
+
kind: Literal["loudness"]
|
|
178
|
+
mode: Literal["loudnorm", "peak"]
|
|
179
|
+
scope: Literal["track"]
|
|
180
|
+
target: LoudnormTarget | PeakTarget
|
|
181
|
+
# None is kept in the type for compatibility with the writer side
|
|
182
|
+
# (schemas.py). The writer allows measured=None for peak, so the reader
|
|
183
|
+
# must be able to receive it. The invalid loudnorm + measured=None case is
|
|
184
|
+
# rejected reader-strict by the model_validator below (runtime enforcement;
|
|
185
|
+
# see docstring CR-M-001).
|
|
186
|
+
measured: LoudnormMeasured | PeakMeasured | None = None
|
|
187
|
+
|
|
188
|
+
@model_validator(mode="after")
|
|
189
|
+
def _validate_measured_required_for_loudnorm(self) -> LoudnessDirective:
|
|
190
|
+
"""measured is required for loudnorm mode (needed for linear
|
|
191
|
+
application)."""
|
|
192
|
+
if self.mode == "loudnorm" and self.measured is None:
|
|
193
|
+
raise ValueError(
|
|
194
|
+
"measured is required for loudnorm mode (needed for linear"
|
|
195
|
+
" application)."
|
|
196
|
+
)
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
@model_validator(mode="after")
|
|
200
|
+
def _validate_target_matches_mode(self) -> LoudnessDirective:
|
|
201
|
+
"""Validate that mode and target type are consistent.""" # noqa: E501
|
|
202
|
+
if self.mode == "loudnorm" and not isinstance(self.target, LoudnormTarget):
|
|
203
|
+
raise ValueError(
|
|
204
|
+
"loudnorm mode requires a LoudnormTarget (i/tp/lra) for target."
|
|
205
|
+
)
|
|
206
|
+
if self.mode == "peak" and not isinstance(self.target, PeakTarget):
|
|
207
|
+
raise ValueError("peak mode requires a PeakTarget (peak_db) for target.")
|
|
208
|
+
return self
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ===========================================================================
|
|
212
|
+
# BGM schema (no dependency on clipwright-bgm; defined inline for render)
|
|
213
|
+
# ADR-B9-r2: reader-strict, unknown keys forbidden, allow_inf_nan=False
|
|
214
|
+
# NR-M-1: tool/version max_length=64 (consistent with clipwright-bgm writer)
|
|
215
|
+
# ===========================================================================
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class DuckingDirective(BaseModel):
|
|
219
|
+
"""Validation model for BGM ducking settings (ADR-B5-r3/DC-AS-006).
|
|
220
|
+
|
|
221
|
+
enabled: when True, injects sidechaincompress to duck BGM under main audio.
|
|
222
|
+
threshold: sidechaincompress threshold parameter. ffmpeg accepted range:
|
|
223
|
+
0.000976563–1.0.
|
|
224
|
+
ratio: sidechaincompress ratio parameter. ffmpeg accepted range: 1.0–20.0.
|
|
225
|
+
SR M-1: allow_inf_nan=False rejects inf/nan originating from OTIO.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
model_config = {"extra": "forbid", "allow_inf_nan": False}
|
|
229
|
+
|
|
230
|
+
enabled: bool = False
|
|
231
|
+
threshold: Annotated[float, Field(gt=0.0, le=1.0)]
|
|
232
|
+
ratio: Annotated[float, Field(ge=1.0, le=20.0)]
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class BgmDirective(BaseModel):
|
|
236
|
+
"""Validation model for BGM clip metadata["clipwright"] (ADR-B9-r2/B9-r3).
|
|
237
|
+
|
|
238
|
+
Validated with Pydantic when render reads the timeline; raises INVALID_INPUT
|
|
239
|
+
on failure. Reader-strict (unknown keys forbidden), allow_inf_nan=False.
|
|
240
|
+
fade_in_sec / fade_out_sec default to 0.0 (no fade; ADR-B9-r3).
|
|
241
|
+
afade is only injected when the value is > 0.
|
|
242
|
+
SR I-1: volume_db has ge=-60.0/le=20.0 constraint (consistent with writer
|
|
243
|
+
BgmOptions).
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
model_config = {"extra": "forbid", "allow_inf_nan": False}
|
|
247
|
+
|
|
248
|
+
tool: Annotated[str, Field(max_length=64)]
|
|
249
|
+
version: Annotated[str, Field(max_length=64)]
|
|
250
|
+
kind: Literal["bgm"]
|
|
251
|
+
volume_db: Annotated[float, Field(ge=-60.0, le=20.0, allow_inf_nan=False)]
|
|
252
|
+
fade_in_sec: Annotated[float, Field(ge=0)] = 0.0
|
|
253
|
+
fade_out_sec: Annotated[float, Field(ge=0)] = 0.0
|
|
254
|
+
ducking: DuckingDirective
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# ===========================================================================
|
|
258
|
+
# Data types
|
|
259
|
+
# ===========================================================================
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@dataclass
|
|
263
|
+
class KeptRange:
|
|
264
|
+
"""Value object representing a kept segment on the timeline.
|
|
265
|
+
|
|
266
|
+
source: target_url of the media file (source path).
|
|
267
|
+
source_range: OTIO TimeRange (held as opentime; seconds conversion is deferred).
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
source: str
|
|
271
|
+
source_range: otio.opentime.TimeRange
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@dataclass(frozen=True)
|
|
275
|
+
class BgmClip:
|
|
276
|
+
"""Value object representing BGM clip information (ADR-B4-r2).
|
|
277
|
+
|
|
278
|
+
source: target_url of the BGM media file (source path).
|
|
279
|
+
source_range: full duration of the BGM media (OTIO TimeRange).
|
|
280
|
+
directive: BGM directive validated by BgmDirective.
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
source: str
|
|
284
|
+
source_range: otio.opentime.TimeRange
|
|
285
|
+
directive: BgmDirective
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@dataclass
|
|
289
|
+
class ProbeInfo:
|
|
290
|
+
"""Value object representing ffprobe probe results (DC-AM-007).
|
|
291
|
+
|
|
292
|
+
plan.py receives this type as an argument and never calls subprocess directly.
|
|
293
|
+
bit_rate: when None, estimated_size_bytes cannot be computed (ADR-3).
|
|
294
|
+
width/height/fps: used for output spec normalisation in multi-source paths
|
|
295
|
+
(ADR-C2; optional for backward compatibility).
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
has_video: bool
|
|
299
|
+
audio_count: int
|
|
300
|
+
bit_rate: int | None = None
|
|
301
|
+
width: int | None = None
|
|
302
|
+
height: int | None = None
|
|
303
|
+
fps: float | None = None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@dataclass
|
|
307
|
+
class RenderPlan:
|
|
308
|
+
"""Execution plan returned by build_plan.
|
|
309
|
+
|
|
310
|
+
filter_complex: single string for the ffmpeg -filter_complex argument
|
|
311
|
+
(prevents injection).
|
|
312
|
+
ffmpeg_args: argument list passed to ffmpeg (excluding -filter_complex).
|
|
313
|
+
All elements are str (M-1).
|
|
314
|
+
segment_count: number of kept segments.
|
|
315
|
+
total_duration_seconds: total output duration (seconds).
|
|
316
|
+
estimated_size_bytes: estimated file size (bytes). None when bit_rate is None.
|
|
317
|
+
warnings: notes about the dry-run estimate.
|
|
318
|
+
input_sources: ordered, deduplicated list of input sources. Single source
|
|
319
|
+
of truth for ADR-C9-r2.
|
|
320
|
+
bgm_source: BGM source path. None when there is no BGM (ADR-B5/B7).
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
filter_complex: str
|
|
324
|
+
ffmpeg_args: list[str]
|
|
325
|
+
segment_count: int
|
|
326
|
+
total_duration_seconds: float
|
|
327
|
+
estimated_size_bytes: float | None = None
|
|
328
|
+
warnings: list[str] = field(default_factory=list)
|
|
329
|
+
input_sources: list[str] = field(default_factory=list)
|
|
330
|
+
bgm_source: str | None = None
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# ===========================================================================
|
|
334
|
+
# Utility functions
|
|
335
|
+
# ===========================================================================
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def unique_sources_in_order(ranges: list[KeptRange]) -> list[str]:
|
|
339
|
+
"""Return source URLs from a KeptRange list in order of first appearance,
|
|
340
|
+
deduplicated (ADR-C9-r2).
|
|
341
|
+
|
|
342
|
+
Serves as the single source of truth for input index assignment and
|
|
343
|
+
input_sources. When the same source appears in multiple clips, its position
|
|
344
|
+
is determined by its first occurrence.
|
|
345
|
+
"""
|
|
346
|
+
seen: set[str] = set()
|
|
347
|
+
result: list[str] = []
|
|
348
|
+
for r in ranges:
|
|
349
|
+
if r.source not in seen:
|
|
350
|
+
seen.add(r.source)
|
|
351
|
+
result.append(r.source)
|
|
352
|
+
return result
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ===========================================================================
|
|
356
|
+
# resolve_kept_ranges
|
|
357
|
+
# ===========================================================================
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def resolve_kept_ranges(timeline: otio.schema.Timeline) -> list[KeptRange]:
|
|
361
|
+
"""Scan the first video track's Clips and return the list of kept segments
|
|
362
|
+
(ADR-5/DC-AS-006).
|
|
363
|
+
|
|
364
|
+
- Gaps are skipped (they represent removed regions).
|
|
365
|
+
- Raises UNSUPPORTED_OPERATION if Transitions are present.
|
|
366
|
+
- Raises UNSUPPORTED_OPERATION if two or more video tracks are present.
|
|
367
|
+
- Multiple sources are allowed (ADR-C3; old single-source-only behaviour
|
|
368
|
+
removed per DC-AS-005). Each Clip retains its own source in the KeptRange.
|
|
369
|
+
- Raises INVALID_INPUT if there are zero Clips.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
List of KeptRange (source and source_range held as opentime).
|
|
373
|
+
"""
|
|
374
|
+
# Retrieve the first video track (multiple video tracks are not supported)
|
|
375
|
+
video_tracks = [t for t in timeline.tracks if t.kind == otio.schema.TrackKind.Video]
|
|
376
|
+
if len(video_tracks) >= 2:
|
|
377
|
+
raise ClipwrightError(
|
|
378
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
379
|
+
message="The timeline contains two or more video tracks.",
|
|
380
|
+
hint=("Use an OTIO timeline with only a single video track."),
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
if len(video_tracks) == 0:
|
|
384
|
+
raise ClipwrightError(
|
|
385
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
386
|
+
message="No video track found.",
|
|
387
|
+
hint="Use an OTIO timeline that contains a video track.",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
video_track = video_tracks[0]
|
|
391
|
+
|
|
392
|
+
ranges: list[KeptRange] = []
|
|
393
|
+
|
|
394
|
+
for item in video_track:
|
|
395
|
+
if isinstance(item, otio.schema.Gap):
|
|
396
|
+
# Gaps represent removed regions; skip them
|
|
397
|
+
continue
|
|
398
|
+
if isinstance(item, otio.schema.Transition):
|
|
399
|
+
raise ClipwrightError(
|
|
400
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
401
|
+
message="The timeline contains a Transition.",
|
|
402
|
+
hint="Use an OTIO timeline that does not contain Transitions.",
|
|
403
|
+
)
|
|
404
|
+
if isinstance(item, otio.schema.Clip):
|
|
405
|
+
mr = item.media_reference
|
|
406
|
+
if isinstance(mr, otio.schema.MissingReference):
|
|
407
|
+
# MissingReference indicates invalid timeline data (missing
|
|
408
|
+
# reference). Treated as INVALID_INPUT (invalid data) rather than
|
|
409
|
+
# UNSUPPORTED_OPERATION (unsupported configuration).
|
|
410
|
+
raise ClipwrightError(
|
|
411
|
+
code=ErrorCode.INVALID_INPUT,
|
|
412
|
+
message="Media reference is missing (MissingReference).",
|
|
413
|
+
hint="Use an ExternalReference with a target_url.",
|
|
414
|
+
)
|
|
415
|
+
if not isinstance(mr, otio.schema.ExternalReference):
|
|
416
|
+
# Unsupported configuration (e.g. GeneratorReference) →
|
|
417
|
+
# UNSUPPORTED_OPERATION.
|
|
418
|
+
raise ClipwrightError(
|
|
419
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
420
|
+
message=(
|
|
421
|
+
"Media references other than ExternalReference are not"
|
|
422
|
+
" supported."
|
|
423
|
+
),
|
|
424
|
+
hint="Use an ExternalReference with a target_url.",
|
|
425
|
+
)
|
|
426
|
+
source = mr.target_url
|
|
427
|
+
source_range = item.source_range
|
|
428
|
+
ranges.append(KeptRange(source=source, source_range=source_range))
|
|
429
|
+
|
|
430
|
+
if len(ranges) == 0:
|
|
431
|
+
raise ClipwrightError(
|
|
432
|
+
code=ErrorCode.INVALID_INPUT,
|
|
433
|
+
message="No kept segments found (no Clips).",
|
|
434
|
+
hint="Use an OTIO timeline that contains at least one Clip.",
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
return ranges
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# ===========================================================================
|
|
441
|
+
# resolve_bgm
|
|
442
|
+
# ===========================================================================
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def resolve_bgm(timeline: otio.schema.Timeline) -> BgmClip | None:
|
|
446
|
+
"""Scan all Audio tracks and return a BgmClip when a kind=="bgm" clip is
|
|
447
|
+
detected.
|
|
448
|
+
|
|
449
|
+
Conforms to ADR-B4-r2.
|
|
450
|
+
|
|
451
|
+
Detection is based on the count of kind=="bgm" clips, not the number of Audio
|
|
452
|
+
tracks (DC-AS-002). A single BGM clip is detected correctly even when a main
|
|
453
|
+
audio track (kind!="bgm") is also present.
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
BgmClip when exactly one BGM clip exists. None when there are zero
|
|
457
|
+
(backward compatible).
|
|
458
|
+
|
|
459
|
+
Raises:
|
|
460
|
+
ClipwrightError(UNSUPPORTED_OPERATION): when two or more BGM clips are
|
|
461
|
+
found (only a single BGM is supported).
|
|
462
|
+
ClipwrightError(INVALID_INPUT): when BGM clip metadata validation fails.
|
|
463
|
+
"""
|
|
464
|
+
bgm_clips: list[tuple[str, otio.opentime.TimeRange, Mapping[str, Any]]] = []
|
|
465
|
+
|
|
466
|
+
# Scan all Audio tracks and collect kind=="bgm" clips
|
|
467
|
+
for track in timeline.tracks:
|
|
468
|
+
if track.kind != otio.schema.TrackKind.Audio:
|
|
469
|
+
continue
|
|
470
|
+
for item in track:
|
|
471
|
+
if not isinstance(item, otio.schema.Clip):
|
|
472
|
+
continue
|
|
473
|
+
cw_meta = item.metadata.get("clipwright")
|
|
474
|
+
# OTIO metadata values are AnyDictionary (not a dict subclass);
|
|
475
|
+
# use the Mapping protocol for type checking (DC-AS-002).
|
|
476
|
+
if not isinstance(cw_meta, Mapping):
|
|
477
|
+
continue
|
|
478
|
+
if cw_meta.get("kind") != "bgm":
|
|
479
|
+
continue
|
|
480
|
+
mr = item.media_reference
|
|
481
|
+
if not isinstance(mr, otio.schema.ExternalReference):
|
|
482
|
+
continue
|
|
483
|
+
source_range = item.source_range
|
|
484
|
+
bgm_clips.append((mr.target_url, source_range, cw_meta))
|
|
485
|
+
|
|
486
|
+
if len(bgm_clips) == 0:
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
if len(bgm_clips) >= 2:
|
|
490
|
+
raise ClipwrightError(
|
|
491
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
492
|
+
message=(
|
|
493
|
+
"The timeline contains two or more BGM clips (only a single BGM is"
|
|
494
|
+
" supported)."
|
|
495
|
+
),
|
|
496
|
+
hint=(
|
|
497
|
+
"Reduce the number of BGM clips in the timeline to one."
|
|
498
|
+
" Mixing multiple BGM tracks is not currently supported."
|
|
499
|
+
),
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
# Exactly one clip: validate BgmDirective and return a BgmClip
|
|
503
|
+
source, source_range, raw_meta = bgm_clips[0]
|
|
504
|
+
try:
|
|
505
|
+
directive = BgmDirective(**raw_meta)
|
|
506
|
+
except (ValidationError, TypeError, ValueError):
|
|
507
|
+
# ValueError is included because future model_validator raise ValueError
|
|
508
|
+
# calls must also be caught (follows the same catch list as
|
|
509
|
+
# _validate_loudness_directive).
|
|
510
|
+
raise ClipwrightError(
|
|
511
|
+
code=ErrorCode.INVALID_INPUT,
|
|
512
|
+
message=(
|
|
513
|
+
"BGM clip metadata validation failed. Check field names, types,"
|
|
514
|
+
" and values."
|
|
515
|
+
),
|
|
516
|
+
hint=(
|
|
517
|
+
"Verify that metadata['clipwright'] of the BGM clip has"
|
|
518
|
+
" kind='bgm', volume_db, fade_in_sec, fade_out_sec, and ducking"
|
|
519
|
+
" set correctly."
|
|
520
|
+
),
|
|
521
|
+
) from None
|
|
522
|
+
|
|
523
|
+
return BgmClip(source=source, source_range=source_range, directive=directive)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
# ===========================================================================
|
|
527
|
+
# build_plan
|
|
528
|
+
# ===========================================================================
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _escape_filtergraph(path: str) -> str:
|
|
532
|
+
"""Escape a path for use in filtergraph filename= / fontsdir= options.
|
|
533
|
+
|
|
534
|
+
Verified escape rules (M2 2026-06-11 / DC-AS-005):
|
|
535
|
+
1. Backslash (\\) → \\\\
|
|
536
|
+
2. Colon (:) → \\:
|
|
537
|
+
Applying in this order ensures Windows absolute paths (C:\\...) reach ffmpeg
|
|
538
|
+
without depending on the current working directory.
|
|
539
|
+
|
|
540
|
+
Example: C:\\Users\\sub.srt → C\\:\\\\Users\\\\sub.srt
|
|
541
|
+
""" # noqa: E501
|
|
542
|
+
return path.replace("\\", "\\\\").replace(":", "\\:")
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _rgb_to_ass_colour(hex_color: str) -> str:
|
|
546
|
+
"""Convert a #RRGGBB colour string to ASS PrimaryColour (&H00BBGGRR).
|
|
547
|
+
|
|
548
|
+
Verified in practice (M2 2026-06-11 / DC-AM-002):
|
|
549
|
+
- 8-digit &H00BBGGRR (AA=00 = fully opaque) ensures opaque rendering.
|
|
550
|
+
- Example: #FF0000 (red: R=FF, G=00, B=00) → &H000000FF (BGR order).
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
hex_color: colour string in '#RRGGBB' format.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
ASS PrimaryColour string in '&H00BBGGRR' format (uppercase).
|
|
557
|
+
"""
|
|
558
|
+
# Strip leading # and extract R/G/B
|
|
559
|
+
hex_str = hex_color.lstrip("#")
|
|
560
|
+
r = int(hex_str[0:2], 16)
|
|
561
|
+
g = int(hex_str[2:4], 16)
|
|
562
|
+
b = int(hex_str[4:6], 16)
|
|
563
|
+
# ASS uses BGR order; AA=00 (fully opaque), 8 digits
|
|
564
|
+
return f"&H00{b:02X}{g:02X}{r:02X}"
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def _build_force_style(subtitle: SubtitleOptions, is_ass: bool) -> str | None:
|
|
568
|
+
"""Build the force_style string for the filtergraph from SubtitleOptions.
|
|
569
|
+
|
|
570
|
+
Returns None for ASS input (force_style not applied; ADR-S6-r2 / DC-AS-002).
|
|
571
|
+
Returns None when all style fields are None (omit force_style= entirely).
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
String in 'FontName=...,FontSize=...' format, or None when not needed.
|
|
575
|
+
"""
|
|
576
|
+
if is_ass:
|
|
577
|
+
# ASS has embedded styles; do not apply force_style (DC-AS-002)
|
|
578
|
+
return None
|
|
579
|
+
|
|
580
|
+
parts: list[str] = []
|
|
581
|
+
if subtitle.font_name is not None:
|
|
582
|
+
parts.append(f"FontName={subtitle.font_name}")
|
|
583
|
+
if subtitle.font_size is not None:
|
|
584
|
+
parts.append(f"FontSize={subtitle.font_size}")
|
|
585
|
+
if subtitle.font_color is not None:
|
|
586
|
+
ass_colour = _rgb_to_ass_colour(subtitle.font_color)
|
|
587
|
+
parts.append(f"PrimaryColour={ass_colour}")
|
|
588
|
+
if subtitle.outline is not None:
|
|
589
|
+
# :g format removes trailing decimal zeros
|
|
590
|
+
parts.append(f"Outline={subtitle.outline:g}")
|
|
591
|
+
if subtitle.alignment is not None:
|
|
592
|
+
parts.append(f"Alignment={subtitle.alignment}")
|
|
593
|
+
if subtitle.margin_v is not None:
|
|
594
|
+
parts.append(f"MarginV={subtitle.margin_v}")
|
|
595
|
+
|
|
596
|
+
if not parts:
|
|
597
|
+
return None
|
|
598
|
+
return ",".join(parts)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def _append_subtitle_filter(
|
|
602
|
+
filter_parts: list[str],
|
|
603
|
+
video_map_label: str,
|
|
604
|
+
subtitle: SubtitleOptions,
|
|
605
|
+
) -> str:
|
|
606
|
+
"""Append the subtitle stage (subtitles filter) to filter_parts and return
|
|
607
|
+
the new video label.
|
|
608
|
+
|
|
609
|
+
Follows the verified syntax (M2 2026-06-11) per ADR-S4-r2 / ADR-S5-r2 /
|
|
610
|
+
ADR-S6-r2. Does not take a timeline_dir argument (boundary validation is
|
|
611
|
+
centralised in render.py; DC-AS-001).
|
|
612
|
+
|
|
613
|
+
Filter format:
|
|
614
|
+
{L_v}subtitles=filename='{esc(path)}'[:fontsdir='{esc(dir)}']
|
|
615
|
+
[:force_style='{style}'][:charenc=UTF-8][outvsub]
|
|
616
|
+
|
|
617
|
+
ASS input: force_style not applied; charenc/fontsdir may still be added
|
|
618
|
+
(DC-AS-002). SRT/VTT input: charenc=UTF-8 and force_style are added
|
|
619
|
+
(M2 truth table).
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
filter_parts: list of filter_complex segments (mutated in place).
|
|
623
|
+
video_map_label: terminal label of the video chain (e.g. '[outv]').
|
|
624
|
+
subtitle: SubtitleOptions with path already resolved to absolute
|
|
625
|
+
(ADR-S5-r2).
|
|
626
|
+
|
|
627
|
+
Returns:
|
|
628
|
+
New video_map_label '[outvsub]'.
|
|
629
|
+
"""
|
|
630
|
+
path = subtitle.path
|
|
631
|
+
ext = os.path.splitext(path)[1].lower()
|
|
632
|
+
is_ass = ext == ".ass"
|
|
633
|
+
|
|
634
|
+
# Escape the path (verified syntax: \\ → \\\\ then : → \\:)
|
|
635
|
+
esc_path = _escape_filtergraph(path)
|
|
636
|
+
|
|
637
|
+
# Build the subtitles filter
|
|
638
|
+
# filename= wraps the absolute path in single quotes (ADR-S5-r2)
|
|
639
|
+
filter_str = f"{video_map_label}subtitles=filename='{esc_path}'"
|
|
640
|
+
|
|
641
|
+
# Add fontsdir if specified (applies to ASS, SRT, and VTT)
|
|
642
|
+
if subtitle.fonts_dir is not None:
|
|
643
|
+
esc_dir = _escape_filtergraph(subtitle.fonts_dir)
|
|
644
|
+
filter_str += f":fontsdir='{esc_dir}'"
|
|
645
|
+
|
|
646
|
+
# Add force_style (SRT/VTT only; ASS uses its embedded styles)
|
|
647
|
+
force_style = _build_force_style(subtitle, is_ass)
|
|
648
|
+
if force_style is not None:
|
|
649
|
+
filter_str += f":force_style='{force_style}'"
|
|
650
|
+
|
|
651
|
+
# Add charenc=UTF-8 (SRT/VTT only; ASS encodes its own character set)
|
|
652
|
+
if not is_ass:
|
|
653
|
+
filter_str += ":charenc=UTF-8"
|
|
654
|
+
|
|
655
|
+
filter_str += "[outvsub]"
|
|
656
|
+
filter_parts.append(filter_str)
|
|
657
|
+
|
|
658
|
+
return "[outvsub]"
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _to_seconds(rt: otio.opentime.RationalTime) -> float:
|
|
662
|
+
"""Convert RationalTime to seconds (6 decimal places).
|
|
663
|
+
|
|
664
|
+
OTIO's type stubs define to_seconds() as Any, so an explicit float
|
|
665
|
+
cast is used to satisfy mypy strict mode.
|
|
666
|
+
"""
|
|
667
|
+
return round(float(rt.to_seconds()), 6)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def _validate_denoise_directive(denoise: dict[str, Any]) -> DenoiseDirective:
|
|
671
|
+
"""Validate the denoise directive dict with DenoiseDirective; raises
|
|
672
|
+
INVALID_INPUT on failure.
|
|
673
|
+
|
|
674
|
+
Also re-validates params with AfftdnParams when backend=="afftdn".
|
|
675
|
+
"""
|
|
676
|
+
try:
|
|
677
|
+
directive = DenoiseDirective(**denoise)
|
|
678
|
+
except (ValidationError, TypeError):
|
|
679
|
+
raise ClipwrightError(
|
|
680
|
+
code=ErrorCode.INVALID_INPUT,
|
|
681
|
+
message=(
|
|
682
|
+
"Denoise directive validation failed. Check field names, types,"
|
|
683
|
+
" and values."
|
|
684
|
+
),
|
|
685
|
+
hint=(
|
|
686
|
+
"Verify that the denoise field in the timeline metadata is in the"
|
|
687
|
+
" correct format. backend must be 'afftdn' or 'deepfilternet'."
|
|
688
|
+
),
|
|
689
|
+
) from None
|
|
690
|
+
|
|
691
|
+
if directive.backend == "afftdn":
|
|
692
|
+
try:
|
|
693
|
+
AfftdnParams(**directive.params)
|
|
694
|
+
except (ValidationError, TypeError):
|
|
695
|
+
raise ClipwrightError(
|
|
696
|
+
code=ErrorCode.INVALID_INPUT,
|
|
697
|
+
message=(
|
|
698
|
+
"afftdn params validation failed. Check field names, types,"
|
|
699
|
+
" and values."
|
|
700
|
+
),
|
|
701
|
+
hint=(
|
|
702
|
+
"params.nr must be a float in 0.01–97, params.nf in -80 to"
|
|
703
|
+
" -20, and params.nt must be 'w' or 'v'."
|
|
704
|
+
),
|
|
705
|
+
) from None
|
|
706
|
+
|
|
707
|
+
return directive
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _validate_loudness_directive(loudness: dict[str, Any]) -> LoudnessDirective:
|
|
711
|
+
"""Validate the loudness directive dict; raises INVALID_INPUT on failure.
|
|
712
|
+
|
|
713
|
+
Also validates consistency between mode and target type.
|
|
714
|
+
Security: input values are not included in error messages (SR M-1).
|
|
715
|
+
"""
|
|
716
|
+
try:
|
|
717
|
+
# Manually convert target/measured to model instances before constructing
|
|
718
|
+
# LoudnessDirective. Pydantic v2 attempts the first matching model for a
|
|
719
|
+
# bare Union[LoudnormTarget, PeakTarget] from a dict; since the two models
|
|
720
|
+
# have different field names, auto-conversion is usually correct, but
|
|
721
|
+
# mode/target consistency is delegated to the model_validator.
|
|
722
|
+
# Pre-converting makes ValidationError easier to attribute to target/measured
|
|
723
|
+
# issues (L-3).
|
|
724
|
+
raw = dict(loudness)
|
|
725
|
+
if isinstance(raw.get("target"), dict):
|
|
726
|
+
mode = raw.get("mode")
|
|
727
|
+
if mode == "loudnorm":
|
|
728
|
+
raw["target"] = LoudnormTarget(**raw["target"])
|
|
729
|
+
elif mode == "peak":
|
|
730
|
+
raw["target"] = PeakTarget(**raw["target"])
|
|
731
|
+
if isinstance(raw.get("measured"), dict):
|
|
732
|
+
mode = raw.get("mode")
|
|
733
|
+
if mode == "loudnorm":
|
|
734
|
+
raw["measured"] = LoudnormMeasured(**raw["measured"])
|
|
735
|
+
elif mode == "peak":
|
|
736
|
+
raw["measured"] = PeakMeasured(**raw["measured"])
|
|
737
|
+
directive = LoudnessDirective(**raw)
|
|
738
|
+
except (ValidationError, TypeError, ValueError):
|
|
739
|
+
# ValueError is included because model_validator uses raise ValueError.
|
|
740
|
+
# ValidationError alone would miss ValueError raised inside model_validator.
|
|
741
|
+
# from None: CWE-209 information leakage prevention.
|
|
742
|
+
# ValidationError details may contain paths, so they are not exposed
|
|
743
|
+
# externally.
|
|
744
|
+
raise ClipwrightError(
|
|
745
|
+
code=ErrorCode.INVALID_INPUT,
|
|
746
|
+
message=(
|
|
747
|
+
"Loudness directive validation failed."
|
|
748
|
+
" Check field names, types, and values."
|
|
749
|
+
),
|
|
750
|
+
hint=(
|
|
751
|
+
"Check the format of the loudness field in the timeline metadata."
|
|
752
|
+
" mode must be 'loudnorm' or 'peak'; scope must be 'track'."
|
|
753
|
+
" loudnorm mode requires measured."
|
|
754
|
+
),
|
|
755
|
+
) from None
|
|
756
|
+
return directive
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def _append_audio_pipe(
|
|
760
|
+
filter_parts: list[str],
|
|
761
|
+
has_audio: bool,
|
|
762
|
+
denoise_directive: DenoiseDirective | None,
|
|
763
|
+
loudness_directive: LoudnessDirective | None,
|
|
764
|
+
) -> tuple[bool, bool]:
|
|
765
|
+
"""Append denoise afftdn / loudness filters to filter_parts and return usage
|
|
766
|
+
flags.
|
|
767
|
+
|
|
768
|
+
Shared helper for single-source and multi-source paths (ADR-C11-r2; eliminates
|
|
769
|
+
duplication). Uses [outa] as the starting point and chains labels cumulatively.
|
|
770
|
+
When has_audio=False, nothing is added (warnings are the responsibility of
|
|
771
|
+
build_plan).
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
(use_afftdn, use_loudness)
|
|
775
|
+
"""
|
|
776
|
+
use_afftdn = False
|
|
777
|
+
use_loudness = False
|
|
778
|
+
|
|
779
|
+
if not has_audio:
|
|
780
|
+
return use_afftdn, use_loudness
|
|
781
|
+
|
|
782
|
+
# Inject afftdn denoise
|
|
783
|
+
if denoise_directive is not None and denoise_directive.backend == "afftdn":
|
|
784
|
+
params = AfftdnParams(**denoise_directive.params)
|
|
785
|
+
nr_str = f"{params.nr:g}"
|
|
786
|
+
nf_str = f"{params.nf:g}"
|
|
787
|
+
# SR M-1: defence-in-depth with frozenset alongside the Literal["w","v"]
|
|
788
|
+
# constraint (guards against injection if the Literal constraint is ever
|
|
789
|
+
# removed).
|
|
790
|
+
nt_str = params.nt
|
|
791
|
+
if nt_str not in _VALID_NT_VALUES:
|
|
792
|
+
raise ClipwrightError(
|
|
793
|
+
code=ErrorCode.INTERNAL,
|
|
794
|
+
message="afftdn nt parameter is invalid (internal error).",
|
|
795
|
+
hint="params.nt must be 'w' or 'v'.",
|
|
796
|
+
)
|
|
797
|
+
filter_parts.append(
|
|
798
|
+
f"[outa]afftdn=nr={nr_str}:nf={nf_str}:nt={nt_str}[outa_dn]"
|
|
799
|
+
)
|
|
800
|
+
use_afftdn = True
|
|
801
|
+
|
|
802
|
+
# Inject loudness
|
|
803
|
+
if loudness_directive is not None:
|
|
804
|
+
loudness_input_label = "[outa_dn]" if use_afftdn else "[outa]"
|
|
805
|
+
|
|
806
|
+
if loudness_directive.mode == "loudnorm":
|
|
807
|
+
target = loudness_directive.target
|
|
808
|
+
measured = loudness_directive.measured
|
|
809
|
+
if not isinstance(target, LoudnormTarget) or not isinstance(
|
|
810
|
+
measured, LoudnormMeasured
|
|
811
|
+
):
|
|
812
|
+
raise ClipwrightError(
|
|
813
|
+
code=ErrorCode.INTERNAL,
|
|
814
|
+
message=(
|
|
815
|
+
"loudnorm directive type consistency is invalid (internal"
|
|
816
|
+
" error)."
|
|
817
|
+
),
|
|
818
|
+
hint="LoudnessDirective model_validator is not functioning.",
|
|
819
|
+
)
|
|
820
|
+
i_str = f"{target.i:g}"
|
|
821
|
+
tp_str = f"{target.tp:g}"
|
|
822
|
+
lra_str = f"{target.lra:g}"
|
|
823
|
+
mi_str = f"{measured.input_i:g}"
|
|
824
|
+
mtp_str = f"{measured.input_tp:g}"
|
|
825
|
+
mlra_str = f"{measured.input_lra:g}"
|
|
826
|
+
mthresh_str = f"{measured.input_thresh:g}"
|
|
827
|
+
offset_str = f"{measured.target_offset:g}"
|
|
828
|
+
filter_parts.append(
|
|
829
|
+
f"{loudness_input_label}loudnorm="
|
|
830
|
+
f"I={i_str}:TP={tp_str}:LRA={lra_str}"
|
|
831
|
+
f":measured_I={mi_str}:measured_TP={mtp_str}"
|
|
832
|
+
f":measured_LRA={mlra_str}:measured_thresh={mthresh_str}"
|
|
833
|
+
f":offset={offset_str}:linear=true[outa_ln]"
|
|
834
|
+
)
|
|
835
|
+
use_loudness = True
|
|
836
|
+
|
|
837
|
+
elif loudness_directive.mode == "peak":
|
|
838
|
+
target = loudness_directive.target
|
|
839
|
+
measured = loudness_directive.measured
|
|
840
|
+
if not isinstance(target, PeakTarget) or not isinstance(
|
|
841
|
+
measured, PeakMeasured
|
|
842
|
+
):
|
|
843
|
+
raise ClipwrightError(
|
|
844
|
+
code=ErrorCode.INTERNAL,
|
|
845
|
+
message=(
|
|
846
|
+
"peak directive type consistency is invalid (internal error)."
|
|
847
|
+
),
|
|
848
|
+
hint="LoudnessDirective model_validator is not functioning.",
|
|
849
|
+
)
|
|
850
|
+
gain_db = target.peak_db - measured.max_volume_db
|
|
851
|
+
gain_str = f"{gain_db:g}"
|
|
852
|
+
filter_parts.append(f"{loudness_input_label}volume={gain_str}dB[outa_ln]")
|
|
853
|
+
use_loudness = True
|
|
854
|
+
|
|
855
|
+
return use_afftdn, use_loudness
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def _build_filter_complex(
|
|
859
|
+
ranges: list[KeptRange],
|
|
860
|
+
has_audio: bool,
|
|
861
|
+
denoise_directive: DenoiseDirective | None,
|
|
862
|
+
loudness_directive: LoudnessDirective | None,
|
|
863
|
+
options: RenderOptions,
|
|
864
|
+
) -> tuple[str, str, str, bool, bool]:
|
|
865
|
+
"""Build the filter_complex string, video_map_label, and audio_map_label
|
|
866
|
+
(M-2).
|
|
867
|
+
|
|
868
|
+
Responsibility: constructs the filter_complex string for trim/atrim → concat
|
|
869
|
+
→ denoise afftdn → loudness → scale, and determines the terminal label for
|
|
870
|
+
each chain. Single-source path only (maintains backward compatibility; ADR-C3).
|
|
871
|
+
|
|
872
|
+
Returns:
|
|
873
|
+
(filter_complex, video_map_label, audio_map_label, use_afftdn,
|
|
874
|
+
use_loudness)
|
|
875
|
+
"""
|
|
876
|
+
n = len(ranges)
|
|
877
|
+
|
|
878
|
+
# Generate trim/atrim filter segments for each segment
|
|
879
|
+
video_labels: list[str] = []
|
|
880
|
+
audio_labels: list[str] = []
|
|
881
|
+
filter_parts: list[str] = []
|
|
882
|
+
|
|
883
|
+
for i, r in enumerate(ranges):
|
|
884
|
+
start = _to_seconds(r.source_range.start_time)
|
|
885
|
+
end = round(start + _to_seconds(r.source_range.duration), 6)
|
|
886
|
+
vl = f"v{i}"
|
|
887
|
+
filter_parts.append(
|
|
888
|
+
f"[0:v]trim=start={start}:end={end},setpts=PTS-STARTPTS[{vl}]"
|
|
889
|
+
)
|
|
890
|
+
video_labels.append(f"[{vl}]")
|
|
891
|
+
|
|
892
|
+
if has_audio:
|
|
893
|
+
al = f"a{i}"
|
|
894
|
+
filter_parts.append(
|
|
895
|
+
f"[0:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[{al}]"
|
|
896
|
+
)
|
|
897
|
+
audio_labels.append(f"[{al}]")
|
|
898
|
+
|
|
899
|
+
# concat filter (interleave video/audio labels as inputs)
|
|
900
|
+
v_count = 1
|
|
901
|
+
a_count = 1 if has_audio else 0
|
|
902
|
+
if has_audio:
|
|
903
|
+
interleaved: list[str] = []
|
|
904
|
+
for vl, al in zip(video_labels, audio_labels, strict=True):
|
|
905
|
+
interleaved.append(vl)
|
|
906
|
+
interleaved.append(al)
|
|
907
|
+
input_labels = "".join(interleaved)
|
|
908
|
+
else:
|
|
909
|
+
input_labels = "".join(video_labels)
|
|
910
|
+
|
|
911
|
+
concat_output = "[outv]" if not has_audio else "[outv][outa]"
|
|
912
|
+
filter_parts.append(
|
|
913
|
+
f"{input_labels}concat=n={n}:v={v_count}:a={a_count}{concat_output}"
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# Cumulative audio pipe for denoise/loudness (shared single/multi-source helper)
|
|
917
|
+
use_afftdn, use_loudness = _append_audio_pipe(
|
|
918
|
+
filter_parts, has_audio, denoise_directive, loudness_directive
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
# When width/height is specified: integrate scale into filter_complex
|
|
922
|
+
# (ADR-1 compliant). -vf and -filter_complex cannot be used simultaneously
|
|
923
|
+
# (ffmpeg error), so scale is chained after concat output [outv] to produce
|
|
924
|
+
# [outvscaled], and -map [outvscaled] is used instead.
|
|
925
|
+
use_scale = options.width is not None and options.height is not None
|
|
926
|
+
if use_scale:
|
|
927
|
+
filter_parts.append(f"[outv]scale={options.width}:{options.height}[outvscaled]")
|
|
928
|
+
video_map_label = "[outvscaled]"
|
|
929
|
+
else:
|
|
930
|
+
video_map_label = "[outv]"
|
|
931
|
+
|
|
932
|
+
# Inject subtitle stage after video_map_label is finalised (ADR-S4-r3).
|
|
933
|
+
# When subtitle=None, nothing is done (backward compatible; ADR-S8).
|
|
934
|
+
if options.subtitle is not None:
|
|
935
|
+
video_map_label = _append_subtitle_filter(
|
|
936
|
+
filter_parts, video_map_label, options.subtitle
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
filter_complex = ";".join(filter_parts)
|
|
940
|
+
|
|
941
|
+
# Determine the audio map terminal label via cumulative pipe (ADR-L5b; DC-AM-001):
|
|
942
|
+
# loudness present → [outa_ln], denoise only → [outa_dn], neither → [outa]
|
|
943
|
+
if use_loudness:
|
|
944
|
+
audio_map_label = "[outa_ln]"
|
|
945
|
+
elif use_afftdn:
|
|
946
|
+
audio_map_label = "[outa_dn]"
|
|
947
|
+
else:
|
|
948
|
+
audio_map_label = "[outa]"
|
|
949
|
+
|
|
950
|
+
return filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _resolve_target_spec(
|
|
954
|
+
source_probes: dict[str, ProbeInfo],
|
|
955
|
+
first_source: str,
|
|
956
|
+
options: RenderOptions,
|
|
957
|
+
) -> tuple[int, int, float]:
|
|
958
|
+
"""Determine output spec (target_w, target_h, target_fps) and return it
|
|
959
|
+
(ADR-C4-r2).
|
|
960
|
+
|
|
961
|
+
Helper extracted from _build_multi_source_filter_complex.
|
|
962
|
+
When width/height are both specified, they are used; otherwise the first
|
|
963
|
+
source spec is used. Specifying only one is rejected by
|
|
964
|
+
RenderOptions._validate_resolution_pair (DC-AM-004), so this function is
|
|
965
|
+
only reached with both specified or both None.
|
|
966
|
+
|
|
967
|
+
Even-number rounding (ADR-C4-r2; yuv420p even constraint) is also applied
|
|
968
|
+
here.
|
|
969
|
+
|
|
970
|
+
Returns:
|
|
971
|
+
Tuple of (target_w, target_h, target_fps).
|
|
972
|
+
|
|
973
|
+
Raises:
|
|
974
|
+
ClipwrightError: when the first source's resolution or fps cannot be
|
|
975
|
+
obtained.
|
|
976
|
+
"""
|
|
977
|
+
first_probe = source_probes[first_source]
|
|
978
|
+
if options.width is not None and options.height is not None:
|
|
979
|
+
raw_w = options.width
|
|
980
|
+
raw_h = options.height
|
|
981
|
+
else:
|
|
982
|
+
if first_probe.width is None or first_probe.height is None:
|
|
983
|
+
raise ClipwrightError(
|
|
984
|
+
code=ErrorCode.INVALID_INPUT,
|
|
985
|
+
message="Cannot obtain resolution from the first source clip.",
|
|
986
|
+
hint=(
|
|
987
|
+
"Set width/height on the first source in source_probes, or"
|
|
988
|
+
" specify both width and height in RenderOptions."
|
|
989
|
+
),
|
|
990
|
+
)
|
|
991
|
+
raw_w = first_probe.width
|
|
992
|
+
raw_h = first_probe.height
|
|
993
|
+
|
|
994
|
+
# Even-number rounding (ADR-C4-r2; yuv420p even constraint)
|
|
995
|
+
target_w = (raw_w // 2) * 2
|
|
996
|
+
target_h = (raw_h // 2) * 2
|
|
997
|
+
|
|
998
|
+
# fps: use options.fps if specified; otherwise use the first source fps
|
|
999
|
+
if options.fps is not None:
|
|
1000
|
+
target_fps: float = options.fps
|
|
1001
|
+
else:
|
|
1002
|
+
if first_probe.fps is None:
|
|
1003
|
+
raise ClipwrightError(
|
|
1004
|
+
code=ErrorCode.INVALID_INPUT,
|
|
1005
|
+
message="Cannot obtain fps from the first source clip.",
|
|
1006
|
+
hint=(
|
|
1007
|
+
"Set fps on the first source in source_probes, or specify"
|
|
1008
|
+
" fps in RenderOptions."
|
|
1009
|
+
),
|
|
1010
|
+
)
|
|
1011
|
+
target_fps = first_probe.fps
|
|
1012
|
+
|
|
1013
|
+
return target_w, target_h, target_fps
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _build_clip_filters(
|
|
1017
|
+
ranges: list[KeptRange],
|
|
1018
|
+
source_index: dict[str, int],
|
|
1019
|
+
source_probes: dict[str, ProbeInfo],
|
|
1020
|
+
has_audio_overall: bool,
|
|
1021
|
+
target_w: int,
|
|
1022
|
+
target_h: int,
|
|
1023
|
+
target_fps: float,
|
|
1024
|
+
) -> tuple[list[str], list[str], list[str]]:
|
|
1025
|
+
"""Generate video/audio filter strings for each clip (ADR-C5-r2/C7-r2).
|
|
1026
|
+
|
|
1027
|
+
Helper extracted from _build_multi_source_filter_complex.
|
|
1028
|
+
Handles per-clip spec normalisation (fps/scale/pad/setsar) and silent audio
|
|
1029
|
+
padding (anullsrc) for audio-less clips.
|
|
1030
|
+
|
|
1031
|
+
Returns:
|
|
1032
|
+
Tuple of (filter_parts, video_labels, audio_labels).
|
|
1033
|
+
"""
|
|
1034
|
+
video_labels: list[str] = []
|
|
1035
|
+
audio_labels: list[str] = []
|
|
1036
|
+
filter_parts: list[str] = []
|
|
1037
|
+
|
|
1038
|
+
for i, r in enumerate(ranges):
|
|
1039
|
+
k = source_index[r.source]
|
|
1040
|
+
start = _to_seconds(r.source_range.start_time)
|
|
1041
|
+
dur = _to_seconds(r.source_range.duration)
|
|
1042
|
+
end = round(start + dur, 6)
|
|
1043
|
+
vl = f"v{i}"
|
|
1044
|
+
# Per-clip video: trim → setpts → fps → scale(decrease) → pad → setsar.
|
|
1045
|
+
# fps written with at least 5 decimal places (ADR-C2-r2; NTSC fps
|
|
1046
|
+
# precision)
|
|
1047
|
+
filter_parts.append(
|
|
1048
|
+
f"[{k}:v]trim=start={start}:end={end},setpts=PTS-STARTPTS,"
|
|
1049
|
+
f"fps={target_fps:.5f},"
|
|
1050
|
+
f"scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,"
|
|
1051
|
+
f"pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2,setsar=1[{vl}]"
|
|
1052
|
+
)
|
|
1053
|
+
video_labels.append(f"[{vl}]")
|
|
1054
|
+
|
|
1055
|
+
if has_audio_overall:
|
|
1056
|
+
al = f"a{i}"
|
|
1057
|
+
probe = source_probes[r.source]
|
|
1058
|
+
if probe.audio_count >= 1:
|
|
1059
|
+
# Audio present: atrim → asetpts → aformat for spec normalisation.
|
|
1060
|
+
filter_parts.append(
|
|
1061
|
+
f"[{k}:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS,"
|
|
1062
|
+
f"aformat=sample_rates=48000:channel_layouts=stereo[{al}]"
|
|
1063
|
+
)
|
|
1064
|
+
else:
|
|
1065
|
+
# No audio: pad with anullsrc (same duration as the video clip)
|
|
1066
|
+
filter_parts.append(
|
|
1067
|
+
f"anullsrc=channel_layout=stereo:sample_rate=48000,"
|
|
1068
|
+
f"atrim=0:{dur},asetpts=PTS-STARTPTS[{al}]"
|
|
1069
|
+
)
|
|
1070
|
+
audio_labels.append(f"[{al}]")
|
|
1071
|
+
|
|
1072
|
+
return filter_parts, video_labels, audio_labels
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def _build_multi_source_filter_complex(
|
|
1076
|
+
ranges: list[KeptRange],
|
|
1077
|
+
source_index: dict[str, int],
|
|
1078
|
+
source_probes: dict[str, ProbeInfo],
|
|
1079
|
+
has_audio_overall: bool,
|
|
1080
|
+
denoise_directive: DenoiseDirective | None,
|
|
1081
|
+
loudness_directive: LoudnessDirective | None,
|
|
1082
|
+
options: RenderOptions,
|
|
1083
|
+
first_source: str,
|
|
1084
|
+
) -> tuple[str, str, str, bool, bool]:
|
|
1085
|
+
"""Build the filter_complex for the multi-source path
|
|
1086
|
+
(ADR-C1/C5-r2/C7-r2/C11-r2).
|
|
1087
|
+
|
|
1088
|
+
Normalises each clip's spec (fps/scale/pad/setsar) before concatenating.
|
|
1089
|
+
When has_audio_overall=True, audio-less sources are padded with anullsrc
|
|
1090
|
+
(ADR-C7-r2). Output labels are unified with the single-source version
|
|
1091
|
+
([outv]/[outa]; ADR-C11-r2).
|
|
1092
|
+
|
|
1093
|
+
Responsibility breakdown:
|
|
1094
|
+
- _resolve_target_spec: determines output spec (target_w/h/fps).
|
|
1095
|
+
- _build_clip_filters: generates per-clip video/audio filter strings.
|
|
1096
|
+
- This function: assembles the concat filter, calls _append_audio_pipe,
|
|
1097
|
+
and determines return values.
|
|
1098
|
+
|
|
1099
|
+
Returns:
|
|
1100
|
+
(filter_complex, video_map_label, audio_map_label, use_afftdn,
|
|
1101
|
+
use_loudness)
|
|
1102
|
+
"""
|
|
1103
|
+
n = len(ranges)
|
|
1104
|
+
|
|
1105
|
+
# Delegate output spec determination to helper (ADR-C4-r2)
|
|
1106
|
+
target_w, target_h, target_fps = _resolve_target_spec(
|
|
1107
|
+
source_probes, first_source, options
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
# Generate per-clip video/audio filter strings
|
|
1111
|
+
clip_filter_parts, video_labels, audio_labels = _build_clip_filters(
|
|
1112
|
+
ranges,
|
|
1113
|
+
source_index,
|
|
1114
|
+
source_probes,
|
|
1115
|
+
has_audio_overall,
|
|
1116
|
+
target_w,
|
|
1117
|
+
target_h,
|
|
1118
|
+
target_fps,
|
|
1119
|
+
)
|
|
1120
|
+
# Carry forward as local variable to append concat filter and audio pipe.
|
|
1121
|
+
filter_parts: list[str] = clip_filter_parts
|
|
1122
|
+
|
|
1123
|
+
# concat filter
|
|
1124
|
+
v_count = 1
|
|
1125
|
+
a_count = 1 if has_audio_overall else 0
|
|
1126
|
+
if has_audio_overall:
|
|
1127
|
+
interleaved: list[str] = []
|
|
1128
|
+
for vl, al in zip(video_labels, audio_labels, strict=True):
|
|
1129
|
+
interleaved.append(vl)
|
|
1130
|
+
interleaved.append(al)
|
|
1131
|
+
input_labels = "".join(interleaved)
|
|
1132
|
+
else:
|
|
1133
|
+
input_labels = "".join(video_labels)
|
|
1134
|
+
|
|
1135
|
+
concat_output = "[outv]" if not has_audio_overall else "[outv][outa]"
|
|
1136
|
+
filter_parts.append(
|
|
1137
|
+
f"{input_labels}concat=n={n}:v={v_count}:a={a_count}{concat_output}"
|
|
1138
|
+
)
|
|
1139
|
+
|
|
1140
|
+
# Cumulative audio pipe for denoise/loudness (shared single/multi-source
|
|
1141
|
+
# helper; ADR-C11-r2)
|
|
1142
|
+
use_afftdn, use_loudness = _append_audio_pipe(
|
|
1143
|
+
filter_parts, has_audio_overall, denoise_directive, loudness_directive
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
# In the multi-source path, per-clip spec normalisation is already done up
|
|
1147
|
+
# front, so no post-concat scale is applied (ADR-C5-r2).
|
|
1148
|
+
video_map_label = "[outv]"
|
|
1149
|
+
|
|
1150
|
+
# Inject subtitle stage after video_map_label is finalised (ADR-S4-r3).
|
|
1151
|
+
# When subtitle=None, nothing is done (backward compatible; ADR-S8).
|
|
1152
|
+
if options.subtitle is not None:
|
|
1153
|
+
video_map_label = _append_subtitle_filter(
|
|
1154
|
+
filter_parts, video_map_label, options.subtitle
|
|
1155
|
+
)
|
|
1156
|
+
|
|
1157
|
+
filter_complex = ";".join(filter_parts)
|
|
1158
|
+
|
|
1159
|
+
# Determine the audio map terminal label via cumulative pipe
|
|
1160
|
+
if use_loudness:
|
|
1161
|
+
audio_map_label = "[outa_ln]"
|
|
1162
|
+
elif use_afftdn:
|
|
1163
|
+
audio_map_label = "[outa_dn]"
|
|
1164
|
+
else:
|
|
1165
|
+
audio_map_label = "[outa]"
|
|
1166
|
+
|
|
1167
|
+
return filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness
|
|
1168
|
+
|
|
1169
|
+
|
|
1170
|
+
def _append_bgm_pipe(
|
|
1171
|
+
filter_parts: list[str],
|
|
1172
|
+
bgm: BgmClip,
|
|
1173
|
+
audio_map_label: str,
|
|
1174
|
+
has_main_audio: bool,
|
|
1175
|
+
main_dur: float,
|
|
1176
|
+
bgm_index: int,
|
|
1177
|
+
) -> str:
|
|
1178
|
+
"""Append the BGM audio chain to filter_parts and return the new
|
|
1179
|
+
audio_map_label.
|
|
1180
|
+
|
|
1181
|
+
Conforms to ADR-B5-r2/B5-r3. Follows the verified syntax exactly
|
|
1182
|
+
(DC-AS-004).
|
|
1183
|
+
|
|
1184
|
+
When has_main_audio=True:
|
|
1185
|
+
Aformats the main terminal label L to [main_fmt], then amixes with BGM.
|
|
1186
|
+
ducking OFF:
|
|
1187
|
+
[main_fmt][bgm]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]
|
|
1188
|
+
ducking ON:
|
|
1189
|
+
[main_fmt]asplit→[bgm][main_sc]sidechaincompress→amix→alimiter
|
|
1190
|
+
[outa_bgm]
|
|
1191
|
+
When has_main_audio=False:
|
|
1192
|
+
BGM-only path:
|
|
1193
|
+
[{bgm_index}:a]aformat...atrim,asetpts,volume,(afade)[outa_bgm]
|
|
1194
|
+
|
|
1195
|
+
-stream_loop -1 is added by render.py, so plan.py uses only atrim=0:{main_dur}
|
|
1196
|
+
for duration (ADR-B6-r2). afade is injected only when fade_in_sec > 0 /
|
|
1197
|
+
fade_out_sec > 0 (ADR-B9-r3).
|
|
1198
|
+
"""
|
|
1199
|
+
d = bgm.directive
|
|
1200
|
+
vol_str = f"{d.volume_db:g}dB"
|
|
1201
|
+
dur_str = f"{main_dur:g}"
|
|
1202
|
+
|
|
1203
|
+
# SR M-3: raise INVALID_INPUT when fade duration exceeds the main duration,
|
|
1204
|
+
# as this would produce unintended audio output. BgmOptions cannot enforce an
|
|
1205
|
+
# upper bound without knowing main_dur, so a runtime guard is required.
|
|
1206
|
+
if d.fade_in_sec > main_dur:
|
|
1207
|
+
raise ClipwrightError(
|
|
1208
|
+
code=ErrorCode.INVALID_INPUT,
|
|
1209
|
+
message="fade_in_sec exceeds the main content duration.",
|
|
1210
|
+
hint=f"Keep fade within the main duration of {main_dur:.2f} seconds.",
|
|
1211
|
+
)
|
|
1212
|
+
if d.fade_out_sec > main_dur:
|
|
1213
|
+
raise ClipwrightError(
|
|
1214
|
+
code=ErrorCode.INVALID_INPUT,
|
|
1215
|
+
message="fade_out_sec exceeds the main content duration.",
|
|
1216
|
+
hint=f"Keep fade within the main duration of {main_dur:.2f} seconds.",
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
# BGM audio chain common part: aformat → atrim → asetpts → volume → (afade).
|
|
1220
|
+
# afade is injected only when > 0 (ADR-B9-r3; DC-AM-003)
|
|
1221
|
+
bgm_chain = (
|
|
1222
|
+
f"[{bgm_index}:a]aformat=sample_rates=48000:channel_layouts=stereo,"
|
|
1223
|
+
f"atrim=0:{dur_str},asetpts=PTS-STARTPTS,volume={vol_str}"
|
|
1224
|
+
)
|
|
1225
|
+
if d.fade_in_sec > 0:
|
|
1226
|
+
bgm_chain += f",afade=t=in:st=0:d={d.fade_in_sec:g}"
|
|
1227
|
+
if d.fade_out_sec > 0:
|
|
1228
|
+
st_out = max(0.0, main_dur - d.fade_out_sec)
|
|
1229
|
+
bgm_chain += f",afade=t=out:st={st_out:g}:d={d.fade_out_sec:g}"
|
|
1230
|
+
|
|
1231
|
+
if not has_main_audio:
|
|
1232
|
+
# No main audio + BGM-only path (ADR-B5-r2/DC-AS-004): route BGM
|
|
1233
|
+
# directly to [outa_bgm]
|
|
1234
|
+
filter_parts.append(f"{bgm_chain}[outa_bgm]")
|
|
1235
|
+
else:
|
|
1236
|
+
# Main audio present: output BGM to intermediate label [bgm], then amix
|
|
1237
|
+
filter_parts.append(f"{bgm_chain}[bgm]")
|
|
1238
|
+
|
|
1239
|
+
# Aformat the main terminal label L to [main_fmt] (DC-AS-007)
|
|
1240
|
+
filter_parts.append(
|
|
1241
|
+
f"{audio_map_label}aformat=sample_rates=48000:channel_layouts=stereo[main_fmt]"
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
if d.ducking.enabled:
|
|
1245
|
+
# ducking ON: [bgm][main_sc]sidechaincompress input order (DC-AS-006)
|
|
1246
|
+
filter_parts.append("[main_fmt]asplit[main_mix][main_sc]")
|
|
1247
|
+
filter_parts.append(
|
|
1248
|
+
f"[bgm][main_sc]sidechaincompress="
|
|
1249
|
+
f"threshold={d.ducking.threshold:g}:ratio={d.ducking.ratio:g}[bgm_duck]"
|
|
1250
|
+
)
|
|
1251
|
+
filter_parts.append(
|
|
1252
|
+
"[main_mix][bgm_duck]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]"
|
|
1253
|
+
)
|
|
1254
|
+
else:
|
|
1255
|
+
# ducking OFF: [main_fmt][bgm]amix→alimiter (DC-AM-001)
|
|
1256
|
+
filter_parts.append(
|
|
1257
|
+
"[main_fmt][bgm]amix=inputs=2:normalize=0,alimiter=limit=1.0[outa_bgm]"
|
|
1258
|
+
)
|
|
1259
|
+
|
|
1260
|
+
return "[outa_bgm]"
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
def _build_ffmpeg_args(
|
|
1264
|
+
filter_complex: str,
|
|
1265
|
+
video_map_label: str,
|
|
1266
|
+
audio_map_label: str,
|
|
1267
|
+
has_audio: bool,
|
|
1268
|
+
options: RenderOptions,
|
|
1269
|
+
use_multi_source: bool = False,
|
|
1270
|
+
) -> list[str]:
|
|
1271
|
+
"""Assemble and return the ffmpeg argument list from filter_complex and map
|
|
1272
|
+
labels (M-2).
|
|
1273
|
+
|
|
1274
|
+
Centralises management of filter_complex / -map / codec / fps / crf options.
|
|
1275
|
+
ffmpeg_args is unified as list[str]; numeric values are converted with str()
|
|
1276
|
+
(M-1).
|
|
1277
|
+
|
|
1278
|
+
When use_multi_source=True, fps has already been normalised by the per-clip
|
|
1279
|
+
fps filter in filter_complex, so -r is skipped to avoid unintended double
|
|
1280
|
+
resampling (CR M-2). For single-source paths (use_multi_source=False), -r is
|
|
1281
|
+
added as before (backward compatible).
|
|
1282
|
+
"""
|
|
1283
|
+
ffmpeg_args: list[str] = [
|
|
1284
|
+
"-filter_complex",
|
|
1285
|
+
filter_complex,
|
|
1286
|
+
"-map",
|
|
1287
|
+
video_map_label,
|
|
1288
|
+
]
|
|
1289
|
+
if has_audio:
|
|
1290
|
+
ffmpeg_args += ["-map", audio_map_label]
|
|
1291
|
+
|
|
1292
|
+
# Map RenderOptions fields to ffmpeg arguments
|
|
1293
|
+
if options.video_codec is not None:
|
|
1294
|
+
ffmpeg_args += ["-c:v", options.video_codec]
|
|
1295
|
+
if options.audio_codec is not None:
|
|
1296
|
+
ffmpeg_args += ["-c:a", options.audio_codec]
|
|
1297
|
+
# width/height are integrated into filter_complex; -vf is not added (L-4).
|
|
1298
|
+
if options.fps is not None:
|
|
1299
|
+
if use_multi_source:
|
|
1300
|
+
# Multi-source path: fps is already normalised by the per-clip fps filter;
|
|
1301
|
+
# -r would cause unintended double resampling (CR M-2)
|
|
1302
|
+
pass
|
|
1303
|
+
else:
|
|
1304
|
+
# Single-source path: add -r as before (backward compatible; ADR-C3).
|
|
1305
|
+
ffmpeg_args += ["-r", str(options.fps)]
|
|
1306
|
+
if options.crf is not None:
|
|
1307
|
+
ffmpeg_args += ["-crf", str(options.crf)]
|
|
1308
|
+
|
|
1309
|
+
return ffmpeg_args
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
def build_plan(
|
|
1313
|
+
ranges: list[KeptRange],
|
|
1314
|
+
probe_info: ProbeInfo,
|
|
1315
|
+
options: RenderOptions,
|
|
1316
|
+
denoise: dict[str, Any] | None = None,
|
|
1317
|
+
loudness: dict[str, Any] | None = None,
|
|
1318
|
+
source_probes: dict[str, ProbeInfo] | None = None,
|
|
1319
|
+
bgm: BgmClip | None = None,
|
|
1320
|
+
) -> RenderPlan:
|
|
1321
|
+
"""Return filter_complex string and ffmpeg argument list as a RenderPlan
|
|
1322
|
+
(ADR-1/ADR-7).
|
|
1323
|
+
|
|
1324
|
+
Acts as a thin orchestrator: validate → build filter_complex
|
|
1325
|
+
(_build_filter_complex or _build_multi_source_filter_complex) →
|
|
1326
|
+
append BGM stage (_append_bgm_pipe) →
|
|
1327
|
+
build ffmpeg_args (_build_ffmpeg_args) → dry-run estimate and warning
|
|
1328
|
+
generation.
|
|
1329
|
+
|
|
1330
|
+
- source_probes not provided or single unique source → single-source path
|
|
1331
|
+
(backward compatible).
|
|
1332
|
+
- Unique sources ≥ 2 → multi-source path (ADR-C3).
|
|
1333
|
+
- No video → UNSUPPORTED_OPERATION (DC-AS-002).
|
|
1334
|
+
- Single segment still uses concat=n=1 unconditionally (DC-AS-005).
|
|
1335
|
+
- Audio 0: a=0 (-map [outv] only).
|
|
1336
|
+
- Audio ≥ 1: a=1, first audio stream only (ADR-7).
|
|
1337
|
+
- Trim coordinates: opentime → seconds (6 decimal places) as numeric
|
|
1338
|
+
arguments (DC-AS-004).
|
|
1339
|
+
- filter_complex returned as a single string (prevents command injection).
|
|
1340
|
+
- When bit_rate is None: estimated_size_bytes=None + warning added (ADR-3).
|
|
1341
|
+
- When any of codec/resolution/fps/crf is non-None: "estimate is approximate"
|
|
1342
|
+
warning (DC-AM-005).
|
|
1343
|
+
- denoise: afftdn injection (B-2).
|
|
1344
|
+
has_audio=True + backend=="afftdn" → inject afftdn after concat, produce
|
|
1345
|
+
[outa_dn]. has_audio=False + denoise → skip afftdn and add warning.
|
|
1346
|
+
backend=="deepfilternet" → UNSUPPORTED_OPERATION.
|
|
1347
|
+
- loudness: track loudness injection (ADR-L5/L5b/L6).
|
|
1348
|
+
loudnorm mode: inject loudnorm linear=true after concat (after denoise if
|
|
1349
|
+
present). peak mode: inject volume filter (gain = target_peak - max_volume).
|
|
1350
|
+
has_audio=False + loudness → skip filter + add warning.
|
|
1351
|
+
peak + denoise together → add warning (DC-AM-002: measurement timing
|
|
1352
|
+
mismatch). audio map terminal label resolved via cumulative pipe (DC-AM-001
|
|
1353
|
+
ADR-L5b): [outa] → (denoise → [outa_dn]) → (loudness → [outa_ln])
|
|
1354
|
+
- When source_probes is provided (unique sources ≥ 2): raises
|
|
1355
|
+
UNSUPPORTED_OPERATION for any source with has_video=False (ADR-C12).
|
|
1356
|
+
- RenderPlan.input_sources = unique_sources_in_order(ranges) (ADR-C9-r2).
|
|
1357
|
+
- bgm: when BgmClip is non-None, appends the BGM stage as the final stage
|
|
1358
|
+
(ADR-B4-r2/B5-r2/B5-r3). has_main_audio (main audio presence) and
|
|
1359
|
+
has_audio_output (final output audio presence) are separated. BGM index =
|
|
1360
|
+
len(input_sources) (bgm_source is not included in input_sources; DC-AS-005).
|
|
1361
|
+
bgm=None is identical to the previous behaviour (backward compatible;
|
|
1362
|
+
ADR-B7).
|
|
1363
|
+
"""
|
|
1364
|
+
# Validate the denoise directive (raises INVALID_INPUT /
|
|
1365
|
+
# UNSUPPORTED_OPERATION on failure)
|
|
1366
|
+
denoise_directive: DenoiseDirective | None = None
|
|
1367
|
+
if denoise is not None:
|
|
1368
|
+
denoise_directive = _validate_denoise_directive(denoise)
|
|
1369
|
+
if denoise_directive.backend == "deepfilternet":
|
|
1370
|
+
raise ClipwrightError(
|
|
1371
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
1372
|
+
message=(
|
|
1373
|
+
"backend=deepfilternet is not supported for render application."
|
|
1374
|
+
),
|
|
1375
|
+
hint=(
|
|
1376
|
+
"Re-detect with backend=afftdn, or wait for a future"
|
|
1377
|
+
" version with deepfilternet render support."
|
|
1378
|
+
),
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
# Validate the loudness directive (raises INVALID_INPUT on failure)
|
|
1382
|
+
loudness_directive: LoudnessDirective | None = None
|
|
1383
|
+
if loudness is not None:
|
|
1384
|
+
loudness_directive = _validate_loudness_directive(loudness)
|
|
1385
|
+
|
|
1386
|
+
# Unique source list (single source of truth for ADR-C9-r2)
|
|
1387
|
+
input_sources = unique_sources_in_order(ranges)
|
|
1388
|
+
n = len(ranges)
|
|
1389
|
+
|
|
1390
|
+
# Branch on source count (ADR-C3)
|
|
1391
|
+
use_multi_source = source_probes is not None and len(input_sources) >= 2
|
|
1392
|
+
|
|
1393
|
+
if use_multi_source:
|
|
1394
|
+
# Multi-source path. When use_multi_source is True, source_probes is
|
|
1395
|
+
# guaranteed to be non-None (by the condition use_multi_source =
|
|
1396
|
+
# source_probes is not None and ...). assert is removed by -O, so an
|
|
1397
|
+
# if-raise is used for type narrowing (CR-CT-002). This defensive code
|
|
1398
|
+
# is structurally unreachable but is intentionally kept for mypy type
|
|
1399
|
+
# narrowing (CR L-2: unreachable defensive code is intentional).
|
|
1400
|
+
if source_probes is None:
|
|
1401
|
+
raise ClipwrightError(
|
|
1402
|
+
code=ErrorCode.INTERNAL,
|
|
1403
|
+
message="source_probes is None (internal error).",
|
|
1404
|
+
hint="Check the caller of build_plan.",
|
|
1405
|
+
)
|
|
1406
|
+
# SR Info-1: source_probes keys are built by render.py's _render_inner
|
|
1407
|
+
# from unique_sources_in_order(ranges) (after boundary validation,
|
|
1408
|
+
# existence checks, and probing), so there is no path for external
|
|
1409
|
+
# injection of arbitrary keys. Consistency with input_sources is
|
|
1410
|
+
# guaranteed on the render.py side.
|
|
1411
|
+
|
|
1412
|
+
# has_video mix check (ADR-C12)
|
|
1413
|
+
for src in input_sources:
|
|
1414
|
+
probe = source_probes[src]
|
|
1415
|
+
if not probe.has_video:
|
|
1416
|
+
basename = os.path.basename(src)
|
|
1417
|
+
raise ClipwrightError(
|
|
1418
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
1419
|
+
message=(
|
|
1420
|
+
f"A source without a video stream is included: {basename}"
|
|
1421
|
+
),
|
|
1422
|
+
hint=(
|
|
1423
|
+
f"'{basename}' has no video stream."
|
|
1424
|
+
" Use only media files that contain a video stream."
|
|
1425
|
+
),
|
|
1426
|
+
)
|
|
1427
|
+
|
|
1428
|
+
# Overall audio presence check (ADR-C7-r2).
|
|
1429
|
+
has_audio_overall = any(
|
|
1430
|
+
source_probes[src].audio_count >= 1 for src in input_sources
|
|
1431
|
+
)
|
|
1432
|
+
|
|
1433
|
+
# First source (first clip in ranges)
|
|
1434
|
+
first_source = ranges[0].source
|
|
1435
|
+
|
|
1436
|
+
# Source → index mapping (ADR-C1)
|
|
1437
|
+
source_index: dict[str, int] = {src: i for i, src in enumerate(input_sources)}
|
|
1438
|
+
|
|
1439
|
+
filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness = (
|
|
1440
|
+
_build_multi_source_filter_complex(
|
|
1441
|
+
ranges,
|
|
1442
|
+
source_index,
|
|
1443
|
+
source_probes,
|
|
1444
|
+
has_audio_overall,
|
|
1445
|
+
denoise_directive,
|
|
1446
|
+
loudness_directive,
|
|
1447
|
+
options,
|
|
1448
|
+
first_source,
|
|
1449
|
+
)
|
|
1450
|
+
)
|
|
1451
|
+
|
|
1452
|
+
has_audio = has_audio_overall
|
|
1453
|
+
|
|
1454
|
+
else:
|
|
1455
|
+
# Single-source path (backward compatible; ADR-C3)
|
|
1456
|
+
if not probe_info.has_video:
|
|
1457
|
+
raise ClipwrightError(
|
|
1458
|
+
code=ErrorCode.UNSUPPORTED_OPERATION,
|
|
1459
|
+
message="No video stream found.",
|
|
1460
|
+
hint="Use a media file that contains a video stream.",
|
|
1461
|
+
)
|
|
1462
|
+
|
|
1463
|
+
# Audio presence: multiple audio streams use first only (treated as a=1)
|
|
1464
|
+
has_audio = probe_info.audio_count >= 1
|
|
1465
|
+
|
|
1466
|
+
filter_complex, video_map_label, audio_map_label, use_afftdn, use_loudness = (
|
|
1467
|
+
_build_filter_complex(
|
|
1468
|
+
ranges, has_audio, denoise_directive, loudness_directive, options
|
|
1469
|
+
)
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1472
|
+
# ---------- Append BGM stage (ADR-B5-r2/B5-r3) ----------
|
|
1473
|
+
# has_main_audio: main audio presence after concat (equivalent to existing
|
|
1474
|
+
# has_audio). has_audio_output: final output audio presence (has_main_audio
|
|
1475
|
+
# or BGM present)
|
|
1476
|
+
has_main_audio = has_audio
|
|
1477
|
+
bgm_source_out: str | None = None
|
|
1478
|
+
|
|
1479
|
+
if bgm is not None:
|
|
1480
|
+
# BGM index = len(input_sources) (bgm_source not included in
|
|
1481
|
+
# input_sources; DC-AS-005)
|
|
1482
|
+
bgm_index = len(input_sources)
|
|
1483
|
+
total_duration_for_bgm = sum(
|
|
1484
|
+
_to_seconds(r.source_range.duration) for r in ranges
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
# Expand filter_complex into filter_parts list and append the BGM stage
|
|
1488
|
+
filter_parts_bgm = filter_complex.split(";")
|
|
1489
|
+
audio_map_label = _append_bgm_pipe(
|
|
1490
|
+
filter_parts_bgm,
|
|
1491
|
+
bgm,
|
|
1492
|
+
audio_map_label,
|
|
1493
|
+
has_main_audio,
|
|
1494
|
+
total_duration_for_bgm,
|
|
1495
|
+
bgm_index,
|
|
1496
|
+
)
|
|
1497
|
+
filter_complex = ";".join(filter_parts_bgm)
|
|
1498
|
+
has_audio = (
|
|
1499
|
+
True # BGM present means the final output has audio (has_audio_output=True)
|
|
1500
|
+
)
|
|
1501
|
+
bgm_source_out = bgm.source
|
|
1502
|
+
|
|
1503
|
+
# ---------- Build ffmpeg_args ----------
|
|
1504
|
+
ffmpeg_args = _build_ffmpeg_args(
|
|
1505
|
+
filter_complex,
|
|
1506
|
+
video_map_label,
|
|
1507
|
+
audio_map_label,
|
|
1508
|
+
has_audio,
|
|
1509
|
+
options,
|
|
1510
|
+
use_multi_source=use_multi_source,
|
|
1511
|
+
)
|
|
1512
|
+
|
|
1513
|
+
# ---------- Dry-run estimate ----------
|
|
1514
|
+
total_duration = sum(_to_seconds(r.source_range.duration) for r in ranges)
|
|
1515
|
+
|
|
1516
|
+
estimated_size: float | None = None
|
|
1517
|
+
warnings: list[str] = []
|
|
1518
|
+
|
|
1519
|
+
# has_main_audio=False + denoise directive → denoise skipped (no main
|
|
1520
|
+
# audio; DC-AM-004). Note: regardless of BGM presence, denoise does not
|
|
1521
|
+
# apply when there is no main audio.
|
|
1522
|
+
if denoise_directive is not None and not has_main_audio:
|
|
1523
|
+
warnings.append("No audio: denoise skipped — afftdn filter was not applied.")
|
|
1524
|
+
|
|
1525
|
+
# has_main_audio=False + loudness directive → loudness skipped
|
|
1526
|
+
# (no main audio; DC-AM-004)
|
|
1527
|
+
if loudness_directive is not None and not has_main_audio:
|
|
1528
|
+
warnings.append(
|
|
1529
|
+
"No audio: loudness skipped — loudnorm/volume filter was not applied."
|
|
1530
|
+
)
|
|
1531
|
+
|
|
1532
|
+
# peak + denoise together → measurement timing mismatch warning
|
|
1533
|
+
# (DC-AM-002). peak's max_volume was measured before denoise; applying it to
|
|
1534
|
+
# denoised audio may deviate from the target peak.
|
|
1535
|
+
if (
|
|
1536
|
+
loudness_directive is not None
|
|
1537
|
+
and loudness_directive.mode == "peak"
|
|
1538
|
+
and denoise_directive is not None
|
|
1539
|
+
and has_main_audio
|
|
1540
|
+
):
|
|
1541
|
+
warnings.append(
|
|
1542
|
+
"peak mode combined with denoise: peak max_volume was measured"
|
|
1543
|
+
" before denoise was applied; applying it to denoised audio may"
|
|
1544
|
+
" deviate from the target peak (DC-AM-002)."
|
|
1545
|
+
)
|
|
1546
|
+
|
|
1547
|
+
# Multi-source (unique sources ≥ 2) + loudness → measurement mismatch
|
|
1548
|
+
# warning (ADR-C11-r2)
|
|
1549
|
+
if loudness_directive is not None and has_main_audio and len(input_sources) >= 2:
|
|
1550
|
+
warnings.append(
|
|
1551
|
+
"track loudness applied to multi-source concatenation."
|
|
1552
|
+
" The measured values are from a single source; applying them to the"
|
|
1553
|
+
" entire concatenated track may not be strictly accurate"
|
|
1554
|
+
" (per_clip loudness is not supported)."
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
# Dry-run estimated size (ADR-C10: based on first source bit_rate)
|
|
1558
|
+
# For multi-source, probe_info (first source) is used as the representative value
|
|
1559
|
+
if probe_info.bit_rate is not None:
|
|
1560
|
+
estimated_size = probe_info.bit_rate * total_duration / 8.0
|
|
1561
|
+
if len(input_sources) >= 2:
|
|
1562
|
+
warnings.append(
|
|
1563
|
+
"Estimated file size is approximate for multi-source input. The"
|
|
1564
|
+
" bit_rate of the first source is used as the representative"
|
|
1565
|
+
" value."
|
|
1566
|
+
)
|
|
1567
|
+
else:
|
|
1568
|
+
warnings.append("Cannot estimate file size: bit_rate is not available.")
|
|
1569
|
+
|
|
1570
|
+
# When any of codec/resolution/fps/crf/audio_codec is specified, add
|
|
1571
|
+
# "estimate is approximate" warning. audio_codec also affects output bit rate
|
|
1572
|
+
# and thus estimate accuracy (DC-AM-005)
|
|
1573
|
+
if (
|
|
1574
|
+
options.video_codec is not None
|
|
1575
|
+
or options.audio_codec is not None
|
|
1576
|
+
or options.width is not None
|
|
1577
|
+
or options.height is not None
|
|
1578
|
+
or options.fps is not None
|
|
1579
|
+
or options.crf is not None
|
|
1580
|
+
):
|
|
1581
|
+
warnings.append(
|
|
1582
|
+
"Conversion options (codec/resolution/fps/crf) are specified; the"
|
|
1583
|
+
" estimated file size is approximate and the actual size may differ."
|
|
1584
|
+
)
|
|
1585
|
+
|
|
1586
|
+
return RenderPlan(
|
|
1587
|
+
filter_complex=filter_complex,
|
|
1588
|
+
ffmpeg_args=ffmpeg_args,
|
|
1589
|
+
segment_count=n,
|
|
1590
|
+
total_duration_seconds=total_duration,
|
|
1591
|
+
estimated_size_bytes=estimated_size,
|
|
1592
|
+
warnings=warnings,
|
|
1593
|
+
input_sources=input_sources,
|
|
1594
|
+
bgm_source=bgm_source_out,
|
|
1595
|
+
)
|