auto-editor 28.1.0__py3-none-any.whl → 29.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auto_editor/__init__.py +3 -1
- auto_editor/__main__.py +31 -497
- auto_editor/cli.py +12 -0
- {auto_editor-28.1.0.dist-info → auto_editor-29.0.1.dist-info}/METADATA +5 -6
- auto_editor-29.0.1.dist-info/RECORD +9 -0
- auto_editor-29.0.1.dist-info/entry_points.txt +2 -0
- {auto_editor-28.1.0.dist-info → auto_editor-29.0.1.dist-info}/top_level.txt +0 -1
- auto_editor/analyze.py +0 -393
- auto_editor/cmds/__init__.py +0 -0
- auto_editor/cmds/cache.py +0 -69
- auto_editor/cmds/desc.py +0 -32
- auto_editor/cmds/info.py +0 -213
- auto_editor/cmds/levels.py +0 -199
- auto_editor/cmds/palet.py +0 -29
- auto_editor/cmds/repl.py +0 -113
- auto_editor/cmds/subdump.py +0 -72
- auto_editor/cmds/test.py +0 -816
- auto_editor/edit.py +0 -560
- auto_editor/exports/__init__.py +0 -0
- auto_editor/exports/fcp11.py +0 -195
- auto_editor/exports/fcp7.py +0 -313
- auto_editor/exports/json.py +0 -63
- auto_editor/exports/kdenlive.py +0 -322
- auto_editor/exports/shotcut.py +0 -147
- auto_editor/ffwrapper.py +0 -187
- auto_editor/help.py +0 -224
- auto_editor/imports/__init__.py +0 -0
- auto_editor/imports/fcp7.py +0 -275
- auto_editor/imports/json.py +0 -234
- auto_editor/json.py +0 -297
- auto_editor/lang/__init__.py +0 -0
- auto_editor/lang/libintrospection.py +0 -10
- auto_editor/lang/libmath.py +0 -23
- auto_editor/lang/palet.py +0 -724
- auto_editor/lang/stdenv.py +0 -1179
- auto_editor/lib/__init__.py +0 -0
- auto_editor/lib/contracts.py +0 -235
- auto_editor/lib/data_structs.py +0 -278
- auto_editor/lib/err.py +0 -2
- auto_editor/make_layers.py +0 -315
- auto_editor/preview.py +0 -93
- auto_editor/render/__init__.py +0 -0
- auto_editor/render/audio.py +0 -517
- auto_editor/render/subtitle.py +0 -205
- auto_editor/render/video.py +0 -307
- auto_editor/timeline.py +0 -331
- auto_editor/utils/__init__.py +0 -0
- auto_editor/utils/bar.py +0 -142
- auto_editor/utils/chunks.py +0 -2
- auto_editor/utils/cmdkw.py +0 -206
- auto_editor/utils/container.py +0 -101
- auto_editor/utils/func.py +0 -128
- auto_editor/utils/log.py +0 -126
- auto_editor/utils/types.py +0 -277
- auto_editor/vanparse.py +0 -313
- auto_editor-28.1.0.dist-info/RECORD +0 -57
- auto_editor-28.1.0.dist-info/entry_points.txt +0 -6
- docs/build.py +0 -70
- {auto_editor-28.1.0.dist-info → auto_editor-29.0.1.dist-info}/WHEEL +0 -0
- {auto_editor-28.1.0.dist-info → auto_editor-29.0.1.dist-info}/licenses/LICENSE +0 -0
auto_editor/render/audio.py
DELETED
@@ -1,517 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from fractions import Fraction
|
4
|
-
from io import BytesIO
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING, cast
|
7
|
-
|
8
|
-
import av
|
9
|
-
import numpy as np
|
10
|
-
from av import AudioFrame
|
11
|
-
from av.filter.loudnorm import stats
|
12
|
-
|
13
|
-
from auto_editor.ffwrapper import FileInfo
|
14
|
-
from auto_editor.json import load
|
15
|
-
from auto_editor.lang.palet import env
|
16
|
-
from auto_editor.lib.contracts import andc, between_c, is_int_or_float
|
17
|
-
from auto_editor.lib.err import MyError
|
18
|
-
from auto_editor.timeline import Clip, v3
|
19
|
-
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
|
20
|
-
from auto_editor.utils.func import parse_bitrate
|
21
|
-
from auto_editor.utils.log import Log
|
22
|
-
|
23
|
-
if TYPE_CHECKING:
|
24
|
-
from collections.abc import Iterator
|
25
|
-
|
26
|
-
from auto_editor.__main__ import Args
|
27
|
-
|
28
|
-
|
29
|
-
norm_types = {
|
30
|
-
"ebu": pAttrs(
|
31
|
-
"ebu",
|
32
|
-
pAttr("i", -24.0, andc(is_int_or_float, between_c(-70, 5))),
|
33
|
-
pAttr("lra", 7.0, andc(is_int_or_float, between_c(1, 50))),
|
34
|
-
pAttr("tp", -2.0, andc(is_int_or_float, between_c(-9, 0))),
|
35
|
-
pAttr("gain", 0.0, andc(is_int_or_float, between_c(-99, 99))),
|
36
|
-
),
|
37
|
-
"peak": pAttrs(
|
38
|
-
"peak",
|
39
|
-
pAttr("t", -8.0, andc(is_int_or_float, between_c(-99, 0))),
|
40
|
-
),
|
41
|
-
}
|
42
|
-
|
43
|
-
|
44
|
-
def parse_norm(norm: str, log: Log) -> dict | None:
|
45
|
-
if norm == "#f":
|
46
|
-
return None
|
47
|
-
|
48
|
-
exploded = norm.split(":", 1)
|
49
|
-
norm_type = exploded[0]
|
50
|
-
attrs = "" if len(exploded) == 1 else exploded[1]
|
51
|
-
|
52
|
-
obj = norm_types.get(norm_type, None)
|
53
|
-
if obj is None:
|
54
|
-
log.error(f"Unknown audio normalize object: '{norm_type}'")
|
55
|
-
|
56
|
-
try:
|
57
|
-
obj_dict = parse_with_palet(attrs, obj, env)
|
58
|
-
obj_dict["tag"] = norm_type
|
59
|
-
return obj_dict
|
60
|
-
except ParserError as e:
|
61
|
-
log.error(e)
|
62
|
-
|
63
|
-
|
64
|
-
def parse_ebu_bytes(norm: dict, stat: bytes, log: Log) -> tuple[str, str]:
|
65
|
-
try:
|
66
|
-
parsed = load("loudnorm", stat)
|
67
|
-
except MyError:
|
68
|
-
log.error(f"Invalid loudnorm stats.\n{stat!r}")
|
69
|
-
|
70
|
-
for key in {"input_i", "input_tp", "input_lra", "input_thresh", "target_offset"}:
|
71
|
-
val_ = parsed[key]
|
72
|
-
assert isinstance(val_, int | float | str | bytes)
|
73
|
-
val = float(val_)
|
74
|
-
if val == float("-inf"):
|
75
|
-
parsed[key] = -99
|
76
|
-
elif val == float("inf"):
|
77
|
-
parsed[key] = 0
|
78
|
-
else:
|
79
|
-
parsed[key] = val
|
80
|
-
|
81
|
-
log.debug(f"{parsed}")
|
82
|
-
m_i = parsed["input_i"]
|
83
|
-
m_tp = parsed["input_tp"]
|
84
|
-
m_lra = parsed["input_lra"]
|
85
|
-
m_thresh = parsed["input_thresh"]
|
86
|
-
target_offset = parsed["target_offset"]
|
87
|
-
|
88
|
-
filter = (
|
89
|
-
f"i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:offset={target_offset}"
|
90
|
-
f":measured_i={m_i}:measured_lra={m_lra}:measured_tp={m_tp}"
|
91
|
-
f":measured_thresh={m_thresh}:linear=true:print_format=json"
|
92
|
-
)
|
93
|
-
return "loudnorm", filter
|
94
|
-
|
95
|
-
|
96
|
-
def apply_audio_normalization(
|
97
|
-
norm: dict, pre_master: Path, path: Path, log: Log
|
98
|
-
) -> None:
|
99
|
-
if norm["tag"] == "ebu":
|
100
|
-
first_pass = (
|
101
|
-
f"i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:offset={norm['gain']}"
|
102
|
-
)
|
103
|
-
log.debug(f"audio norm first pass: {first_pass}")
|
104
|
-
with av.open(f"{pre_master}") as container:
|
105
|
-
stats_ = stats(first_pass, container.streams.audio[0])
|
106
|
-
|
107
|
-
name, filter_args = parse_ebu_bytes(norm, stats_, log)
|
108
|
-
else:
|
109
|
-
assert "t" in norm
|
110
|
-
|
111
|
-
def get_peak_level(frame: AudioFrame) -> float:
|
112
|
-
# Calculate peak level in dB
|
113
|
-
# Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
|
114
|
-
max_amplitude = np.abs(frame.to_ndarray()).max()
|
115
|
-
if max_amplitude > 0.0:
|
116
|
-
return -20.0 * np.log10(max_amplitude)
|
117
|
-
return -99.0
|
118
|
-
|
119
|
-
with av.open(pre_master) as container:
|
120
|
-
max_peak_level = -99.0
|
121
|
-
assert len(container.streams.video) == 0
|
122
|
-
for frame in container.decode(audio=0):
|
123
|
-
peak_level = get_peak_level(frame)
|
124
|
-
max_peak_level = max(max_peak_level, peak_level)
|
125
|
-
|
126
|
-
adjustment = norm["t"] - max_peak_level
|
127
|
-
log.debug(f"current peak level: {max_peak_level}")
|
128
|
-
log.print(f"peak adjustment: {adjustment:.3f}dB")
|
129
|
-
name, filter_args = "volume", f"{adjustment}"
|
130
|
-
|
131
|
-
with av.open(pre_master) as container:
|
132
|
-
input_stream = container.streams.audio[0]
|
133
|
-
|
134
|
-
output_file = av.open(path, mode="w")
|
135
|
-
output_stream = output_file.add_stream("pcm_s16le", rate=input_stream.rate)
|
136
|
-
|
137
|
-
graph = av.filter.Graph()
|
138
|
-
graph.link_nodes(
|
139
|
-
graph.add_abuffer(template=input_stream),
|
140
|
-
graph.add(name, filter_args),
|
141
|
-
graph.add("abuffersink"),
|
142
|
-
).configure()
|
143
|
-
for frame in container.decode(input_stream):
|
144
|
-
graph.push(frame)
|
145
|
-
while True:
|
146
|
-
try:
|
147
|
-
aframe = graph.pull()
|
148
|
-
assert isinstance(aframe, AudioFrame)
|
149
|
-
output_file.mux(output_stream.encode(aframe))
|
150
|
-
except (av.BlockingIOError, av.EOFError):
|
151
|
-
break
|
152
|
-
|
153
|
-
output_file.mux(output_stream.encode(None))
|
154
|
-
output_file.close()
|
155
|
-
|
156
|
-
|
157
|
-
def process_audio_clip(clip: Clip, data: np.ndarray, sr: int, log: Log) -> np.ndarray:
|
158
|
-
to_s16 = av.AudioResampler(format="s16", layout="stereo", rate=sr)
|
159
|
-
input_buffer = BytesIO()
|
160
|
-
|
161
|
-
with av.open(input_buffer, "w", format="wav") as container:
|
162
|
-
output_stream = container.add_stream(
|
163
|
-
"pcm_s16le", sample_rate=sr, format="s16", layout="stereo"
|
164
|
-
)
|
165
|
-
|
166
|
-
frame = AudioFrame.from_ndarray(data, format="s16p", layout="stereo")
|
167
|
-
frame.rate = sr
|
168
|
-
|
169
|
-
for reframe in to_s16.resample(frame):
|
170
|
-
container.mux(output_stream.encode(reframe))
|
171
|
-
container.mux(output_stream.encode(None))
|
172
|
-
|
173
|
-
input_buffer.seek(0)
|
174
|
-
|
175
|
-
input_file = av.open(input_buffer, "r")
|
176
|
-
input_stream = input_file.streams.audio[0]
|
177
|
-
|
178
|
-
graph = av.filter.Graph()
|
179
|
-
args = [graph.add_abuffer(template=input_stream)]
|
180
|
-
|
181
|
-
if clip.speed != 1:
|
182
|
-
if clip.speed > 10_000:
|
183
|
-
for _ in range(3):
|
184
|
-
args.append(graph.add("atempo", f"{clip.speed ** (1 / 3)}"))
|
185
|
-
elif clip.speed > 100:
|
186
|
-
for _ in range(2):
|
187
|
-
args.append(graph.add("atempo", f"{clip.speed**0.5}"))
|
188
|
-
elif clip.speed >= 0.5:
|
189
|
-
args.append(graph.add("atempo", f"{clip.speed}"))
|
190
|
-
else:
|
191
|
-
start = 0.5
|
192
|
-
while start * 0.5 > clip.speed:
|
193
|
-
start *= 0.5
|
194
|
-
args.append(graph.add("atempo", "0.5"))
|
195
|
-
args.append(graph.add("atempo", f"{clip.speed / start}"))
|
196
|
-
|
197
|
-
if clip.volume != 1:
|
198
|
-
args.append(graph.add("volume", f"{clip.volume}"))
|
199
|
-
|
200
|
-
args.append(graph.add("abuffersink"))
|
201
|
-
graph.link_nodes(*args).configure()
|
202
|
-
|
203
|
-
all_frames = []
|
204
|
-
resampler = av.AudioResampler(format="s16p", layout="stereo", rate=sr)
|
205
|
-
|
206
|
-
for frame in input_file.decode(input_stream):
|
207
|
-
graph.push(frame)
|
208
|
-
while True:
|
209
|
-
try:
|
210
|
-
aframe = graph.pull()
|
211
|
-
assert isinstance(aframe, AudioFrame)
|
212
|
-
|
213
|
-
for resampled_frame in resampler.resample(aframe):
|
214
|
-
all_frames.append(resampled_frame.to_ndarray())
|
215
|
-
|
216
|
-
except (av.BlockingIOError, av.EOFError):
|
217
|
-
break
|
218
|
-
|
219
|
-
if not all_frames:
|
220
|
-
log.debug(f"No audio frames at {clip=}")
|
221
|
-
return np.zeros_like(data)
|
222
|
-
return np.concatenate(all_frames, axis=1)
|
223
|
-
|
224
|
-
|
225
|
-
def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
226
|
-
mixed_audio = None
|
227
|
-
max_length = 0
|
228
|
-
|
229
|
-
# First pass: determine the maximum length
|
230
|
-
for path in audio_paths:
|
231
|
-
container = av.open(path)
|
232
|
-
stream = container.streams.audio[0]
|
233
|
-
|
234
|
-
# Calculate duration in samples
|
235
|
-
assert stream.duration is not None
|
236
|
-
assert stream.time_base is not None
|
237
|
-
duration_samples = int(stream.duration * sr / stream.time_base.denominator)
|
238
|
-
max_length = max(max_length, duration_samples)
|
239
|
-
container.close()
|
240
|
-
|
241
|
-
# Second pass: read and mix audio
|
242
|
-
for path in audio_paths:
|
243
|
-
container = av.open(path)
|
244
|
-
stream = container.streams.audio[0]
|
245
|
-
|
246
|
-
audio_array: list[np.ndarray] = []
|
247
|
-
resampler = av.AudioResampler(format="s16", layout="mono", rate=sr)
|
248
|
-
for frame in container.decode(audio=0):
|
249
|
-
frame.pts = None
|
250
|
-
resampled = resampler.resample(frame)[0]
|
251
|
-
audio_array.extend(resampled.to_ndarray().flatten())
|
252
|
-
|
253
|
-
# Pad or truncate to max_length
|
254
|
-
current_audio = np.array(audio_array[:max_length])
|
255
|
-
if len(current_audio) < max_length:
|
256
|
-
current_audio = np.pad(
|
257
|
-
current_audio, (0, max_length - len(current_audio)), "constant"
|
258
|
-
)
|
259
|
-
|
260
|
-
if mixed_audio is None:
|
261
|
-
mixed_audio = current_audio.astype(np.float32)
|
262
|
-
else:
|
263
|
-
mixed_audio += current_audio.astype(np.float32)
|
264
|
-
|
265
|
-
container.close()
|
266
|
-
|
267
|
-
if mixed_audio is None:
|
268
|
-
raise ValueError("mixed_audio is None")
|
269
|
-
|
270
|
-
# Normalize the mixed audio
|
271
|
-
max_val = np.max(np.abs(mixed_audio))
|
272
|
-
if max_val > 0:
|
273
|
-
mixed_audio = mixed_audio * (32767 / max_val)
|
274
|
-
mixed_audio = mixed_audio.astype(np.int16)
|
275
|
-
|
276
|
-
output_container = av.open(output_path, mode="w")
|
277
|
-
output_stream = output_container.add_stream("pcm_s16le", rate=sr)
|
278
|
-
|
279
|
-
chunk_size = sr # Process 1 second at a time
|
280
|
-
for i in range(0, len(mixed_audio), chunk_size):
|
281
|
-
# Shape becomes (1, samples) for mono
|
282
|
-
chunk = np.array([mixed_audio[i : i + chunk_size]])
|
283
|
-
|
284
|
-
frame = AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
|
285
|
-
frame.rate = sr
|
286
|
-
frame.pts = i # Set presentation timestamp
|
287
|
-
|
288
|
-
output_container.mux(output_stream.encode(frame))
|
289
|
-
|
290
|
-
output_container.mux(output_stream.encode(None))
|
291
|
-
output_container.close()
|
292
|
-
|
293
|
-
|
294
|
-
def file_to_ndarray(src: FileInfo, stream: int, sr: int) -> np.ndarray:
|
295
|
-
all_frames = []
|
296
|
-
|
297
|
-
resampler = av.AudioResampler(format="s16p", layout="stereo", rate=sr)
|
298
|
-
|
299
|
-
with av.open(src.path) as container:
|
300
|
-
for frame in container.decode(audio=stream):
|
301
|
-
for resampled_frame in resampler.resample(frame):
|
302
|
-
all_frames.append(resampled_frame.to_ndarray())
|
303
|
-
|
304
|
-
return np.concatenate(all_frames, axis=1)
|
305
|
-
|
306
|
-
|
307
|
-
def ndarray_to_file(audio_data: np.ndarray, rate: int, out: str | Path) -> None:
|
308
|
-
layout = "stereo"
|
309
|
-
|
310
|
-
with av.open(out, mode="w") as output:
|
311
|
-
stream = output.add_stream("pcm_s16le", rate=rate, format="s16", layout=layout)
|
312
|
-
|
313
|
-
frame = AudioFrame.from_ndarray(audio_data, format="s16p", layout=layout)
|
314
|
-
frame.rate = rate
|
315
|
-
|
316
|
-
output.mux(stream.encode(frame))
|
317
|
-
output.mux(stream.encode(None))
|
318
|
-
|
319
|
-
|
320
|
-
def ndarray_to_iter(
|
321
|
-
audio_data: np.ndarray, fmt: av.AudioFormat, layout: str, rate: int
|
322
|
-
) -> Iterator[AudioFrame]:
|
323
|
-
chunk_size = rate // 4 # Process 0.25 seconds at a time
|
324
|
-
|
325
|
-
resampler = av.AudioResampler(rate=rate, format=fmt, layout=layout)
|
326
|
-
for i in range(0, audio_data.shape[1], chunk_size):
|
327
|
-
chunk = audio_data[:, i : i + chunk_size]
|
328
|
-
|
329
|
-
frame = AudioFrame.from_ndarray(chunk, format="s16p", layout="stereo")
|
330
|
-
frame.rate = rate
|
331
|
-
frame.pts = i
|
332
|
-
|
333
|
-
yield from resampler.resample(frame)
|
334
|
-
|
335
|
-
|
336
|
-
def make_new_audio(
|
337
|
-
output: av.container.OutputContainer,
|
338
|
-
audio_format: av.AudioFormat,
|
339
|
-
tl: v3,
|
340
|
-
args: Args,
|
341
|
-
log: Log,
|
342
|
-
) -> tuple[list[av.AudioStream], list[Iterator[AudioFrame]]]:
|
343
|
-
audio_inputs = []
|
344
|
-
audio_gen_frames = []
|
345
|
-
audio_streams: list[av.AudioStream] = []
|
346
|
-
audio_paths = _make_new_audio(tl, audio_format, args, log)
|
347
|
-
|
348
|
-
for i, audio_path in enumerate(audio_paths):
|
349
|
-
audio_stream = output.add_stream(
|
350
|
-
args.audio_codec,
|
351
|
-
rate=tl.sr,
|
352
|
-
format=audio_format,
|
353
|
-
layout=tl.T.layout,
|
354
|
-
time_base=Fraction(1, tl.sr),
|
355
|
-
)
|
356
|
-
if not isinstance(audio_stream, av.AudioStream):
|
357
|
-
log.error(f"Not a known audio codec: {args.audio_codec}")
|
358
|
-
|
359
|
-
if args.audio_bitrate != "auto":
|
360
|
-
audio_stream.bit_rate = parse_bitrate(args.audio_bitrate, log)
|
361
|
-
log.debug(f"audio bitrate: {audio_stream.bit_rate}")
|
362
|
-
else:
|
363
|
-
log.debug(f"[auto] audio bitrate: {audio_stream.bit_rate}")
|
364
|
-
|
365
|
-
if i < len(tl.T.audios) and (lang := tl.T.audios[i].lang) is not None:
|
366
|
-
audio_stream.metadata["language"] = lang
|
367
|
-
|
368
|
-
audio_streams.append(audio_stream)
|
369
|
-
|
370
|
-
if isinstance(audio_path, str):
|
371
|
-
audio_input = av.open(audio_path)
|
372
|
-
audio_inputs.append(audio_input)
|
373
|
-
audio_gen_frames.append(audio_input.decode(audio=0))
|
374
|
-
else:
|
375
|
-
audio_gen_frames.append(audio_path)
|
376
|
-
|
377
|
-
return audio_streams, audio_gen_frames
|
378
|
-
|
379
|
-
|
380
|
-
class Getter:
|
381
|
-
__slots__ = ("container", "stream", "rate")
|
382
|
-
|
383
|
-
def __init__(self, path: Path, stream: int, rate: int):
|
384
|
-
self.container = av.open(path)
|
385
|
-
self.stream = self.container.streams.audio[stream]
|
386
|
-
self.rate = rate
|
387
|
-
|
388
|
-
def get(self, start: int, end: int) -> np.ndarray:
|
389
|
-
# start/end is in samples
|
390
|
-
|
391
|
-
container = self.container
|
392
|
-
stream = self.stream
|
393
|
-
resampler = av.AudioResampler(format="s16p", layout="stereo", rate=self.rate)
|
394
|
-
|
395
|
-
time_base = stream.time_base
|
396
|
-
assert time_base is not None
|
397
|
-
start_pts = int(start / self.rate / time_base)
|
398
|
-
|
399
|
-
# Seek to the approximate position
|
400
|
-
container.seek(start_pts, stream=stream)
|
401
|
-
|
402
|
-
all_frames = []
|
403
|
-
total_samples = 0
|
404
|
-
target_samples = end - start
|
405
|
-
|
406
|
-
# Decode frames until we have enough samples
|
407
|
-
for frame in container.decode(stream):
|
408
|
-
for resampled_frame in resampler.resample(frame):
|
409
|
-
frame_array = resampled_frame.to_ndarray()
|
410
|
-
all_frames.append(frame_array)
|
411
|
-
total_samples += frame_array.shape[1]
|
412
|
-
|
413
|
-
if total_samples >= target_samples:
|
414
|
-
break
|
415
|
-
|
416
|
-
if total_samples >= target_samples:
|
417
|
-
break
|
418
|
-
|
419
|
-
result = np.concatenate(all_frames, axis=1)
|
420
|
-
|
421
|
-
# Trim to exact size
|
422
|
-
if result.shape[1] > target_samples:
|
423
|
-
result = result[:, :target_samples]
|
424
|
-
elif result.shape[1] < target_samples:
|
425
|
-
# Pad with zeros if we don't have enough samples
|
426
|
-
padding = np.zeros(
|
427
|
-
(result.shape[0], target_samples - result.shape[1]), dtype=result.dtype
|
428
|
-
)
|
429
|
-
result = np.concatenate([result, padding], axis=1)
|
430
|
-
|
431
|
-
assert result.shape[1] == end - start
|
432
|
-
return result # Return NumPy array with shape (channels, samples)
|
433
|
-
|
434
|
-
|
435
|
-
def _make_new_audio(
|
436
|
-
tl: v3, fmt: av.AudioFormat, args: Args, log: Log
|
437
|
-
) -> list[str | Iterator[AudioFrame]]:
|
438
|
-
sr = tl.sr
|
439
|
-
tb = tl.tb
|
440
|
-
output: list[str | Iterator[AudioFrame]] = []
|
441
|
-
samples: dict[tuple[FileInfo, int], Getter] = {}
|
442
|
-
|
443
|
-
norm = parse_norm(args.audio_normalize, log)
|
444
|
-
|
445
|
-
if not tl.a[0]:
|
446
|
-
log.error("Trying to render empty audio timeline")
|
447
|
-
|
448
|
-
layout = tl.T.layout
|
449
|
-
try:
|
450
|
-
av.AudioLayout(layout)
|
451
|
-
except ValueError:
|
452
|
-
log.error(f"Invalid audio layout: {layout}")
|
453
|
-
|
454
|
-
for i, layer in enumerate(tl.a):
|
455
|
-
arr: np.ndarray | None = None
|
456
|
-
use_iter = False
|
457
|
-
|
458
|
-
for clip in layer:
|
459
|
-
if (clip.src, clip.stream) not in samples:
|
460
|
-
samples[(clip.src, clip.stream)] = Getter(
|
461
|
-
clip.src.path, clip.stream, sr
|
462
|
-
)
|
463
|
-
|
464
|
-
log.conwrite("Creating audio")
|
465
|
-
if arr is None:
|
466
|
-
leng = max(round((layer[-1].start + layer[-1].dur) * sr / tb), sr // tb)
|
467
|
-
map_path = Path(log.temp, f"{i}.map")
|
468
|
-
arr = np.memmap(map_path, mode="w+", dtype=np.int16, shape=(2, leng))
|
469
|
-
|
470
|
-
samp_start = round(clip.offset * clip.speed * sr / tb)
|
471
|
-
samp_end = round((clip.offset + clip.dur) * clip.speed * sr / tb)
|
472
|
-
|
473
|
-
getter = samples[(clip.src, clip.stream)]
|
474
|
-
|
475
|
-
if clip.speed != 1 or clip.volume != 1:
|
476
|
-
clip_arr = process_audio_clip(
|
477
|
-
clip, getter.get(samp_start, samp_end), sr, log
|
478
|
-
)
|
479
|
-
else:
|
480
|
-
clip_arr = getter.get(samp_start, samp_end)
|
481
|
-
|
482
|
-
# Mix numpy arrays
|
483
|
-
start = clip.start * sr // tb
|
484
|
-
clip_samples = clip_arr.shape[1]
|
485
|
-
if start + clip_samples > arr.shape[1]:
|
486
|
-
# Shorten `clip_arr` if bigger than expected.
|
487
|
-
arr[:, start:] += clip_arr[:, : arr.shape[1] - start]
|
488
|
-
else:
|
489
|
-
arr[:, start : start + clip_samples] += clip_arr
|
490
|
-
|
491
|
-
if arr is not None:
|
492
|
-
if norm is None:
|
493
|
-
if args.mix_audio_streams:
|
494
|
-
path = Path(log.temp, f"new{i}.wav")
|
495
|
-
ndarray_to_file(arr, sr, path)
|
496
|
-
output.append(f"{path}")
|
497
|
-
else:
|
498
|
-
use_iter = True
|
499
|
-
else:
|
500
|
-
path = Path(log.temp, f"new{i}.wav")
|
501
|
-
pre_master = Path(log.temp, "premaster.wav")
|
502
|
-
|
503
|
-
ndarray_to_file(arr, sr, pre_master)
|
504
|
-
apply_audio_normalization(norm, pre_master, path, log)
|
505
|
-
output.append(f"{path}")
|
506
|
-
|
507
|
-
if use_iter and arr is not None:
|
508
|
-
output.append(ndarray_to_iter(arr, fmt, layout, sr))
|
509
|
-
|
510
|
-
if args.mix_audio_streams and len(output) > 1:
|
511
|
-
new_a_file = f"{Path(log.temp, 'new_audio.wav')}"
|
512
|
-
# When mix_audio_streams is True, output only contains strings
|
513
|
-
audio_paths = cast(list[str], output)
|
514
|
-
mix_audio_files(sr, audio_paths, new_a_file)
|
515
|
-
return [new_a_file]
|
516
|
-
|
517
|
-
return output
|