auto-editor 26.3.3__py3-none-any.whl → 27.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auto_editor/__init__.py +1 -1
- auto_editor/__main__.py +17 -5
- auto_editor/analyze.py +30 -36
- auto_editor/cmds/desc.py +2 -2
- auto_editor/cmds/info.py +3 -3
- auto_editor/cmds/levels.py +5 -5
- auto_editor/cmds/repl.py +3 -8
- auto_editor/cmds/subdump.py +62 -8
- auto_editor/cmds/test.py +92 -42
- auto_editor/edit.py +59 -111
- auto_editor/ffwrapper.py +91 -87
- auto_editor/formats/fcp11.py +10 -8
- auto_editor/formats/fcp7.py +11 -12
- auto_editor/formats/json.py +10 -11
- auto_editor/{lang/json.py → json.py} +39 -43
- auto_editor/lang/palet.py +2 -2
- auto_editor/lang/stdenv.py +13 -0
- auto_editor/make_layers.py +18 -8
- auto_editor/render/audio.py +239 -102
- auto_editor/render/subtitle.py +10 -14
- auto_editor/render/video.py +41 -46
- auto_editor/timeline.py +60 -10
- auto_editor/utils/container.py +21 -14
- auto_editor/utils/func.py +21 -0
- {auto_editor-26.3.3.dist-info → auto_editor-27.1.0.dist-info}/METADATA +8 -7
- auto_editor-27.1.0.dist-info/RECORD +54 -0
- {auto_editor-26.3.3.dist-info → auto_editor-27.1.0.dist-info}/WHEEL +1 -1
- docs/build.py +16 -7
- auto_editor/output.py +0 -86
- auto_editor/wavfile.py +0 -310
- auto_editor-26.3.3.dist-info/RECORD +0 -56
- {auto_editor-26.3.3.dist-info → auto_editor-27.1.0.dist-info}/entry_points.txt +0 -0
- {auto_editor-26.3.3.dist-info → auto_editor-27.1.0.dist-info/licenses}/LICENSE +0 -0
- {auto_editor-26.3.3.dist-info → auto_editor-27.1.0.dist-info}/top_level.txt +0 -0
auto_editor/render/audio.py
CHANGED
@@ -1,29 +1,32 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import
|
3
|
+
from fractions import Fraction
|
4
|
+
from io import BytesIO
|
4
5
|
from pathlib import Path
|
5
6
|
from typing import TYPE_CHECKING
|
6
7
|
|
7
|
-
import
|
8
|
+
import bv
|
8
9
|
import numpy as np
|
9
|
-
from
|
10
|
+
from bv import AudioFrame
|
11
|
+
from bv.filter.loudnorm import stats
|
10
12
|
|
11
13
|
from auto_editor.ffwrapper import FileInfo
|
12
|
-
from auto_editor.
|
14
|
+
from auto_editor.json import load
|
13
15
|
from auto_editor.lang.palet import env
|
14
16
|
from auto_editor.lib.contracts import andc, between_c, is_int_or_float
|
15
17
|
from auto_editor.lib.err import MyError
|
16
|
-
from auto_editor.output import Ensure
|
17
18
|
from auto_editor.timeline import TlAudio, v3
|
18
|
-
from auto_editor.utils.bar import Bar
|
19
19
|
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
|
20
|
-
from auto_editor.utils.
|
20
|
+
from auto_editor.utils.func import parse_bitrate
|
21
21
|
from auto_editor.utils.log import Log
|
22
|
-
from auto_editor.wavfile import AudioData, read, write
|
23
22
|
|
24
23
|
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Iterator
|
25
|
+
from typing import Any
|
26
|
+
|
25
27
|
from auto_editor.__main__ import Args
|
26
28
|
|
29
|
+
|
27
30
|
norm_types = {
|
28
31
|
"ebu": pAttrs(
|
29
32
|
"ebu",
|
@@ -61,12 +64,14 @@ def parse_norm(norm: str, log: Log) -> dict | None:
|
|
61
64
|
|
62
65
|
def parse_ebu_bytes(norm: dict, stat: bytes, log: Log) -> tuple[str, str]:
|
63
66
|
try:
|
64
|
-
parsed =
|
67
|
+
parsed = load("loudnorm", stat)
|
65
68
|
except MyError:
|
66
69
|
log.error(f"Invalid loudnorm stats.\n{stat!r}")
|
67
70
|
|
68
71
|
for key in {"input_i", "input_tp", "input_lra", "input_thresh", "target_offset"}:
|
69
|
-
|
72
|
+
val_ = parsed[key]
|
73
|
+
assert isinstance(val_, int | float | str | bytes)
|
74
|
+
val = float(val_)
|
70
75
|
if val == float("-inf"):
|
71
76
|
parsed[key] = -99
|
72
77
|
elif val == float("inf"):
|
@@ -97,14 +102,14 @@ def apply_audio_normalization(
|
|
97
102
|
f"i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:offset={norm['gain']}"
|
98
103
|
)
|
99
104
|
log.debug(f"audio norm first pass: {first_pass}")
|
100
|
-
with
|
105
|
+
with bv.open(f"{pre_master}") as container:
|
101
106
|
stats_ = stats(first_pass, container.streams.audio[0])
|
102
107
|
|
103
108
|
name, filter_args = parse_ebu_bytes(norm, stats_, log)
|
104
109
|
else:
|
105
110
|
assert "t" in norm
|
106
111
|
|
107
|
-
def get_peak_level(frame:
|
112
|
+
def get_peak_level(frame: AudioFrame) -> float:
|
108
113
|
# Calculate peak level in dB
|
109
114
|
# Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
|
110
115
|
max_amplitude = np.abs(frame.to_ndarray()).max()
|
@@ -112,7 +117,7 @@ def apply_audio_normalization(
|
|
112
117
|
return -20.0 * np.log10(max_amplitude)
|
113
118
|
return -99.0
|
114
119
|
|
115
|
-
with
|
120
|
+
with bv.open(pre_master) as container:
|
116
121
|
max_peak_level = -99.0
|
117
122
|
assert len(container.streams.video) == 0
|
118
123
|
for frame in container.decode(audio=0):
|
@@ -124,13 +129,13 @@ def apply_audio_normalization(
|
|
124
129
|
log.print(f"peak adjustment: {adjustment:.3f}dB")
|
125
130
|
name, filter_args = "volume", f"{adjustment}"
|
126
131
|
|
127
|
-
with
|
132
|
+
with bv.open(pre_master) as container:
|
128
133
|
input_stream = container.streams.audio[0]
|
129
134
|
|
130
|
-
output_file =
|
135
|
+
output_file = bv.open(path, mode="w")
|
131
136
|
output_stream = output_file.add_stream("pcm_s16le", rate=input_stream.rate)
|
132
137
|
|
133
|
-
graph =
|
138
|
+
graph = bv.filter.Graph()
|
134
139
|
graph.link_nodes(
|
135
140
|
graph.add_abuffer(template=input_stream),
|
136
141
|
graph.add(name, filter_args),
|
@@ -141,30 +146,37 @@ def apply_audio_normalization(
|
|
141
146
|
while True:
|
142
147
|
try:
|
143
148
|
aframe = graph.pull()
|
144
|
-
assert isinstance(aframe,
|
149
|
+
assert isinstance(aframe, AudioFrame)
|
145
150
|
output_file.mux(output_stream.encode(aframe))
|
146
|
-
except (
|
151
|
+
except (bv.BlockingIOError, bv.EOFError):
|
147
152
|
break
|
148
153
|
|
149
154
|
output_file.mux(output_stream.encode(None))
|
150
155
|
output_file.close()
|
151
156
|
|
152
157
|
|
153
|
-
def process_audio_clip(
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
+
def process_audio_clip(clip: TlAudio, data: np.ndarray, sr: int) -> np.ndarray:
|
159
|
+
to_s16 = bv.AudioResampler(format="s16", layout="stereo", rate=sr)
|
160
|
+
input_buffer = BytesIO()
|
161
|
+
|
162
|
+
with bv.open(input_buffer, "w", format="wav") as container:
|
163
|
+
output_stream = container.add_stream(
|
164
|
+
"pcm_s16le", sample_rate=sr, format="s16", layout="stereo"
|
165
|
+
)
|
166
|
+
|
167
|
+
frame = AudioFrame.from_ndarray(data, format="s16p", layout="stereo")
|
168
|
+
frame.rate = sr
|
169
|
+
|
170
|
+
for reframe in to_s16.resample(frame):
|
171
|
+
container.mux(output_stream.encode(reframe))
|
172
|
+
container.mux(output_stream.encode(None))
|
173
|
+
|
158
174
|
input_buffer.seek(0)
|
159
175
|
|
160
|
-
input_file =
|
176
|
+
input_file = bv.open(input_buffer, "r")
|
161
177
|
input_stream = input_file.streams.audio[0]
|
162
178
|
|
163
|
-
|
164
|
-
output_file = av.open(output_bytes, mode="w", format="wav")
|
165
|
-
output_stream = output_file.add_stream("pcm_s16le", rate=sr)
|
166
|
-
|
167
|
-
graph = av.filter.Graph()
|
179
|
+
graph = bv.filter.Graph()
|
168
180
|
args = [graph.add_abuffer(template=input_stream)]
|
169
181
|
|
170
182
|
if clip.speed != 1:
|
@@ -189,29 +201,23 @@ def process_audio_clip(
|
|
189
201
|
args.append(graph.add("abuffersink"))
|
190
202
|
graph.link_nodes(*args).configure()
|
191
203
|
|
204
|
+
all_frames = []
|
205
|
+
resampler = bv.AudioResampler(format="s16p", layout="stereo", rate=sr)
|
206
|
+
|
192
207
|
for frame in input_file.decode(input_stream):
|
193
208
|
graph.push(frame)
|
194
209
|
while True:
|
195
210
|
try:
|
196
211
|
aframe = graph.pull()
|
197
|
-
assert isinstance(aframe,
|
198
|
-
output_file.mux(output_stream.encode(aframe))
|
199
|
-
except (av.BlockingIOError, av.EOFError):
|
200
|
-
break
|
201
|
-
|
202
|
-
# Flush the stream
|
203
|
-
output_file.mux(output_stream.encode(None))
|
212
|
+
assert isinstance(aframe, AudioFrame)
|
204
213
|
|
205
|
-
|
206
|
-
|
214
|
+
for resampled_frame in resampler.resample(aframe):
|
215
|
+
all_frames.append(resampled_frame.to_ndarray())
|
207
216
|
|
208
|
-
|
209
|
-
|
210
|
-
output_bytes.seek(0)
|
211
|
-
if not has_filesig: # Can rarely happen when clip is extremely small
|
212
|
-
return np.empty((0, 2), dtype=np.int16)
|
217
|
+
except (bv.BlockingIOError, bv.EOFError):
|
218
|
+
break
|
213
219
|
|
214
|
-
return
|
220
|
+
return np.concatenate(all_frames, axis=1)
|
215
221
|
|
216
222
|
|
217
223
|
def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
@@ -220,7 +226,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
|
220
226
|
|
221
227
|
# First pass: determine the maximum length
|
222
228
|
for path in audio_paths:
|
223
|
-
container =
|
229
|
+
container = bv.open(path)
|
224
230
|
stream = container.streams.audio[0]
|
225
231
|
|
226
232
|
# Calculate duration in samples
|
@@ -232,10 +238,10 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
|
232
238
|
|
233
239
|
# Second pass: read and mix audio
|
234
240
|
for path in audio_paths:
|
235
|
-
container =
|
241
|
+
container = bv.open(path)
|
236
242
|
stream = container.streams.audio[0]
|
237
243
|
|
238
|
-
resampler =
|
244
|
+
resampler = bv.audio.resampler.AudioResampler(
|
239
245
|
format="s16", layout="mono", rate=sr
|
240
246
|
)
|
241
247
|
|
@@ -268,7 +274,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
|
268
274
|
mixed_audio = mixed_audio * (32767 / max_val)
|
269
275
|
mixed_audio = mixed_audio.astype(np.int16) # type: ignore
|
270
276
|
|
271
|
-
output_container =
|
277
|
+
output_container = bv.open(output_path, mode="w")
|
272
278
|
output_stream = output_container.add_stream("pcm_s16le", rate=sr)
|
273
279
|
|
274
280
|
chunk_size = sr # Process 1 second at a time
|
@@ -276,7 +282,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
|
276
282
|
# Shape becomes (1, samples) for mono
|
277
283
|
chunk = np.array([mixed_audio[i : i + chunk_size]])
|
278
284
|
|
279
|
-
frame =
|
285
|
+
frame = AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
|
280
286
|
frame.rate = sr
|
281
287
|
frame.pts = i # Set presentation timestamp
|
282
288
|
|
@@ -286,92 +292,223 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
|
286
292
|
output_container.close()
|
287
293
|
|
288
294
|
|
295
|
+
def file_to_ndarray(src: FileInfo, stream: int, sr: int) -> np.ndarray:
|
296
|
+
all_frames = []
|
297
|
+
|
298
|
+
resampler = bv.AudioResampler(format="s16p", layout="stereo", rate=sr)
|
299
|
+
|
300
|
+
with bv.open(src.path) as container:
|
301
|
+
for frame in container.decode(audio=stream):
|
302
|
+
for resampled_frame in resampler.resample(frame):
|
303
|
+
all_frames.append(resampled_frame.to_ndarray())
|
304
|
+
|
305
|
+
return np.concatenate(all_frames, axis=1)
|
306
|
+
|
307
|
+
|
308
|
+
def ndarray_to_file(audio_data: np.ndarray, rate: int, out: str | Path) -> None:
|
309
|
+
layout = "stereo"
|
310
|
+
|
311
|
+
with bv.open(out, mode="w") as output:
|
312
|
+
stream = output.add_stream("pcm_s16le", rate=rate, format="s16", layout=layout)
|
313
|
+
|
314
|
+
frame = bv.AudioFrame.from_ndarray(audio_data, format="s16p", layout=layout)
|
315
|
+
frame.rate = rate
|
316
|
+
|
317
|
+
output.mux(stream.encode(frame))
|
318
|
+
output.mux(stream.encode(None))
|
319
|
+
|
320
|
+
|
321
|
+
def ndarray_to_iter(
|
322
|
+
audio_data: np.ndarray, fmt: bv.AudioFormat, layout: str, rate: int
|
323
|
+
) -> Iterator[AudioFrame]:
|
324
|
+
chunk_size = rate // 4 # Process 0.25 seconds at a time
|
325
|
+
|
326
|
+
resampler = bv.AudioResampler(rate=rate, format=fmt, layout=layout)
|
327
|
+
for i in range(0, audio_data.shape[1], chunk_size):
|
328
|
+
chunk = audio_data[:, i : i + chunk_size]
|
329
|
+
|
330
|
+
frame = AudioFrame.from_ndarray(chunk, format="s16p", layout="stereo")
|
331
|
+
frame.rate = rate
|
332
|
+
frame.pts = i
|
333
|
+
|
334
|
+
yield from resampler.resample(frame)
|
335
|
+
|
336
|
+
|
289
337
|
def make_new_audio(
|
290
|
-
|
291
|
-
|
338
|
+
output: bv.container.OutputContainer,
|
339
|
+
audio_format: bv.AudioFormat,
|
340
|
+
tl: v3,
|
341
|
+
args: Args,
|
342
|
+
log: Log,
|
343
|
+
) -> tuple[list[bv.AudioStream], list[Iterator[AudioFrame]]]:
|
344
|
+
audio_inputs = []
|
345
|
+
audio_gen_frames = []
|
346
|
+
audio_streams: list[bv.AudioStream] = []
|
347
|
+
audio_paths = _make_new_audio(tl, audio_format, args, log)
|
348
|
+
|
349
|
+
for i, audio_path in enumerate(audio_paths):
|
350
|
+
audio_stream = output.add_stream(
|
351
|
+
args.audio_codec,
|
352
|
+
rate=tl.sr,
|
353
|
+
format=audio_format,
|
354
|
+
layout=tl.T.layout,
|
355
|
+
time_base=Fraction(1, tl.sr),
|
356
|
+
)
|
357
|
+
if not isinstance(audio_stream, bv.AudioStream):
|
358
|
+
log.error(f"Not a known audio codec: {args.audio_codec}")
|
359
|
+
|
360
|
+
if args.audio_bitrate != "auto":
|
361
|
+
audio_stream.bit_rate = parse_bitrate(args.audio_bitrate, log)
|
362
|
+
log.debug(f"audio bitrate: {audio_stream.bit_rate}")
|
363
|
+
else:
|
364
|
+
log.debug(f"[auto] audio bitrate: {audio_stream.bit_rate}")
|
365
|
+
|
366
|
+
if i < len(tl.T.audios) and (lang := tl.T.audios[i].lang) is not None:
|
367
|
+
audio_stream.metadata["language"] = lang
|
368
|
+
|
369
|
+
audio_streams.append(audio_stream)
|
370
|
+
|
371
|
+
if isinstance(audio_path, str):
|
372
|
+
audio_input = bv.open(audio_path)
|
373
|
+
audio_inputs.append(audio_input)
|
374
|
+
audio_gen_frames.append(audio_input.decode(audio=0))
|
375
|
+
else:
|
376
|
+
audio_gen_frames.append(audio_path)
|
377
|
+
|
378
|
+
return audio_streams, audio_gen_frames
|
379
|
+
|
380
|
+
|
381
|
+
class Getter:
|
382
|
+
__slots__ = ("container", "stream", "rate")
|
383
|
+
|
384
|
+
def __init__(self, path: Path, stream: int, rate: int):
|
385
|
+
self.container = bv.open(path)
|
386
|
+
self.stream = self.container.streams.audio[0]
|
387
|
+
self.rate = rate
|
388
|
+
|
389
|
+
def get(self, start: int, end: int) -> np.ndarray:
|
390
|
+
# start/end is in samples
|
391
|
+
|
392
|
+
container = self.container
|
393
|
+
stream = self.stream
|
394
|
+
resampler = bv.AudioResampler(format="s16p", layout="stereo", rate=self.rate)
|
395
|
+
|
396
|
+
time_base = stream.time_base
|
397
|
+
assert time_base is not None
|
398
|
+
start_pts = int(start / self.rate / time_base)
|
399
|
+
|
400
|
+
# Seek to the approximate position
|
401
|
+
container.seek(start_pts, stream=stream)
|
402
|
+
|
403
|
+
all_frames = []
|
404
|
+
total_samples = 0
|
405
|
+
target_samples = end - start
|
406
|
+
|
407
|
+
# Decode frames until we have enough samples
|
408
|
+
for frame in container.decode(stream):
|
409
|
+
for resampled_frame in resampler.resample(frame):
|
410
|
+
frame_array = resampled_frame.to_ndarray()
|
411
|
+
all_frames.append(frame_array)
|
412
|
+
total_samples += frame_array.shape[1]
|
413
|
+
|
414
|
+
if total_samples >= target_samples:
|
415
|
+
break
|
416
|
+
|
417
|
+
if total_samples >= target_samples:
|
418
|
+
break
|
419
|
+
|
420
|
+
result = np.concatenate(all_frames, axis=1)
|
421
|
+
|
422
|
+
# Trim to exact size
|
423
|
+
if result.shape[1] > target_samples:
|
424
|
+
result = result[:, :target_samples]
|
425
|
+
elif result.shape[1] < target_samples:
|
426
|
+
# Pad with zeros if we don't have enough samples
|
427
|
+
padding = np.zeros(
|
428
|
+
(result.shape[0], target_samples - result.shape[1]), dtype=result.dtype
|
429
|
+
)
|
430
|
+
result = np.concatenate([result, padding], axis=1)
|
431
|
+
|
432
|
+
assert result.shape[1] == end - start
|
433
|
+
return result # Return NumPy array with shape (channels, samples)
|
434
|
+
|
435
|
+
|
436
|
+
def _make_new_audio(tl: v3, fmt: bv.AudioFormat, args: Args, log: Log) -> list[Any]:
|
292
437
|
sr = tl.sr
|
293
438
|
tb = tl.tb
|
294
|
-
output: list[
|
295
|
-
samples: dict[tuple[FileInfo, int],
|
439
|
+
output: list[Any] = []
|
440
|
+
samples: dict[tuple[FileInfo, int], Getter] = {}
|
296
441
|
|
297
442
|
norm = parse_norm(args.audio_normalize, log)
|
298
443
|
|
299
|
-
|
300
|
-
|
301
|
-
if not tl.a or not tl.a[0]:
|
444
|
+
if not tl.a[0]:
|
302
445
|
log.error("Trying to render empty audio timeline")
|
303
446
|
|
304
|
-
|
305
|
-
|
447
|
+
layout = tl.T.layout
|
448
|
+
try:
|
449
|
+
bv.AudioLayout(layout)
|
450
|
+
except ValueError:
|
451
|
+
log.error(f"Invalid audio layout: {layout}")
|
306
452
|
|
307
|
-
|
308
|
-
|
309
|
-
|
453
|
+
for i, layer in enumerate(tl.a):
|
454
|
+
arr: np.ndarray | None = None
|
455
|
+
use_iter = False
|
310
456
|
|
311
457
|
for c, clip in enumerate(layer):
|
312
458
|
if (clip.src, clip.stream) not in samples:
|
313
|
-
|
314
|
-
|
315
|
-
|
459
|
+
samples[(clip.src, clip.stream)] = Getter(
|
460
|
+
clip.src.path, clip.stream, sr
|
461
|
+
)
|
316
462
|
|
463
|
+
log.conwrite("Creating audio")
|
317
464
|
if arr is None:
|
318
465
|
leng = max(round((layer[-1].start + layer[-1].dur) * sr / tb), sr // tb)
|
319
|
-
|
320
|
-
|
321
|
-
dtype = _samp_arr.dtype
|
322
|
-
break
|
466
|
+
map_path = Path(log.temp, f"{i}.map")
|
467
|
+
arr = np.memmap(map_path, mode="w+", dtype=np.int16, shape=(2, leng))
|
323
468
|
|
324
|
-
arr = np.memmap(
|
325
|
-
Path(temp, "asdf.map"),
|
326
|
-
mode="w+",
|
327
|
-
dtype=dtype,
|
328
|
-
shape=(leng, 2),
|
329
|
-
)
|
330
|
-
del leng
|
331
|
-
|
332
|
-
samp_list = samples[(clip.src, clip.stream)]
|
333
469
|
samp_start = round(clip.offset * clip.speed * sr / tb)
|
334
470
|
samp_end = round((clip.offset + clip.dur) * clip.speed * sr / tb)
|
335
|
-
|
336
|
-
|
471
|
+
|
472
|
+
getter = samples[(clip.src, clip.stream)]
|
337
473
|
|
338
474
|
if clip.speed != 1 or clip.volume != 1:
|
339
|
-
clip_arr = process_audio_clip(
|
475
|
+
clip_arr = process_audio_clip(
|
476
|
+
clip, getter.get(samp_start, samp_end), sr
|
477
|
+
)
|
340
478
|
else:
|
341
|
-
clip_arr =
|
479
|
+
clip_arr = getter.get(samp_start, samp_end)
|
342
480
|
|
343
481
|
# Mix numpy arrays
|
344
482
|
start = clip.start * sr // tb
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
arr[start:] += clip_arr[: len(arr) - start]
|
483
|
+
clip_samples = clip_arr.shape[1]
|
484
|
+
if start + clip_samples > arr.shape[1]:
|
485
|
+
# Shorten `clip_arr` if bigger than expected.
|
486
|
+
arr[:, start:] += clip_arr[:, : arr.shape[1] - start]
|
350
487
|
else:
|
351
|
-
arr[start : start +
|
352
|
-
|
353
|
-
bar.tick(c)
|
488
|
+
arr[:, start : start + clip_samples] += clip_arr
|
354
489
|
|
355
490
|
if arr is not None:
|
356
491
|
if norm is None:
|
357
|
-
|
358
|
-
|
492
|
+
if args.mix_audio_streams:
|
493
|
+
path = Path(log.temp, f"new{i}.wav")
|
494
|
+
ndarray_to_file(arr, sr, path)
|
495
|
+
output.append(f"{path}")
|
496
|
+
else:
|
497
|
+
use_iter = True
|
359
498
|
else:
|
360
|
-
|
361
|
-
|
362
|
-
write(fid, sr, arr)
|
499
|
+
path = Path(log.temp, f"new{i}.wav")
|
500
|
+
pre_master = Path(log.temp, "premaster.wav")
|
363
501
|
|
502
|
+
ndarray_to_file(arr, sr, pre_master)
|
364
503
|
apply_audio_normalization(norm, pre_master, path, log)
|
504
|
+
output.append(f"{path}")
|
365
505
|
|
366
|
-
|
367
|
-
|
368
|
-
try:
|
369
|
-
Path(temp, "asdf.map").unlink(missing_ok=True)
|
370
|
-
except PermissionError:
|
371
|
-
pass
|
506
|
+
if use_iter and arr is not None:
|
507
|
+
output.append(ndarray_to_iter(arr, fmt, layout, sr))
|
372
508
|
|
373
|
-
if
|
374
|
-
new_a_file = f"{Path(temp, 'new_audio.wav')}"
|
509
|
+
if args.mix_audio_streams and len(output) > 1:
|
510
|
+
new_a_file = f"{Path(log.temp, 'new_audio.wav')}"
|
375
511
|
mix_audio_files(sr, output, new_a_file)
|
376
512
|
return [new_a_file]
|
513
|
+
|
377
514
|
return output
|
auto_editor/render/subtitle.py
CHANGED
@@ -6,7 +6,7 @@ import re
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import TYPE_CHECKING
|
8
8
|
|
9
|
-
import
|
9
|
+
import bv
|
10
10
|
|
11
11
|
from auto_editor.utils.func import to_timecode
|
12
12
|
|
@@ -17,7 +17,7 @@ if TYPE_CHECKING:
|
|
17
17
|
from auto_editor.utils.chunks import Chunks
|
18
18
|
from auto_editor.utils.log import Log
|
19
19
|
|
20
|
-
Input =
|
20
|
+
Input = bv.container.InputContainer
|
21
21
|
|
22
22
|
|
23
23
|
@dataclass(slots=True)
|
@@ -138,18 +138,14 @@ def make_srt(input_: Input, stream: int) -> str:
|
|
138
138
|
if packet.dts is None or packet.pts is None or packet.duration is None:
|
139
139
|
continue
|
140
140
|
|
141
|
-
|
142
|
-
|
141
|
+
start_num = packet.pts * input_stream.time_base
|
142
|
+
start = to_timecode(start_num, "srt")
|
143
|
+
end = to_timecode(start_num + packet.duration * input_stream.time_base, "srt")
|
143
144
|
|
144
|
-
for
|
145
|
-
|
146
|
-
end_time = to_timecode(end, "srt")
|
145
|
+
for sub in packet.decode():
|
146
|
+
assert isinstance(sub, bv.subtitles.subtitle.AssSubtitle)
|
147
147
|
|
148
|
-
|
149
|
-
assert len(subset) == 1
|
150
|
-
assert isinstance(sub, av.subtitles.subtitle.AssSubtitle)
|
151
|
-
|
152
|
-
output_bytes.write(f"{s}\n{start_time} --> {end_time}\n")
|
148
|
+
output_bytes.write(f"{s}\n{start} --> {end}\n")
|
153
149
|
output_bytes.write(sub.dialogue.decode("utf-8", errors="ignore") + "\n\n")
|
154
150
|
s += 1
|
155
151
|
|
@@ -159,7 +155,7 @@ def make_srt(input_: Input, stream: int) -> str:
|
|
159
155
|
|
160
156
|
def _ensure(input_: Input, format: str, stream: int) -> str:
|
161
157
|
output_bytes = io.BytesIO()
|
162
|
-
output =
|
158
|
+
output = bv.open(output_bytes, "w", format=format)
|
163
159
|
|
164
160
|
in_stream = input_.streams.subtitles[stream]
|
165
161
|
out_stream = output.add_stream_from_template(in_stream)
|
@@ -179,7 +175,7 @@ def make_new_subtitles(tl: v3, log: Log) -> list[str]:
|
|
179
175
|
if tl.v1 is None:
|
180
176
|
return []
|
181
177
|
|
182
|
-
input_ =
|
178
|
+
input_ = bv.open(tl.v1.source.path)
|
183
179
|
new_paths = []
|
184
180
|
|
185
181
|
for s, sub in enumerate(tl.v1.source.subtitles):
|