auto-editor 25.3.1__py3-none-any.whl → 26.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auto_editor/__init__.py +1 -1
- auto_editor/__main__.py +6 -33
- auto_editor/edit.py +146 -52
- auto_editor/ffwrapper.py +19 -81
- auto_editor/formats/fcp7.py +1 -1
- auto_editor/help.py +4 -3
- auto_editor/lang/palet.py +3 -9
- auto_editor/lang/stdenv.py +0 -7
- auto_editor/output.py +25 -183
- auto_editor/render/audio.py +150 -58
- auto_editor/render/subtitle.py +71 -10
- auto_editor/render/video.py +167 -182
- auto_editor/subcommands/repl.py +12 -3
- auto_editor/subcommands/test.py +42 -38
- auto_editor/timeline.py +2 -2
- auto_editor/utils/cmdkw.py +5 -8
- auto_editor/utils/container.py +4 -5
- auto_editor/utils/func.py +2 -35
- auto_editor/utils/types.py +4 -30
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/METADATA +1 -2
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/RECORD +25 -26
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/WHEEL +1 -1
- auto_editor/utils/encoder.py +0 -135
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/LICENSE +0 -0
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/entry_points.txt +0 -0
- {auto_editor-25.3.1.dist-info → auto_editor-26.0.1.dist-info}/top_level.txt +0 -0
auto_editor/output.py
CHANGED
@@ -2,26 +2,40 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import os.path
|
4
4
|
from dataclasses import dataclass, field
|
5
|
-
from fractions import Fraction
|
6
5
|
|
7
6
|
import av
|
8
7
|
from av.audio.resampler import AudioResampler
|
9
8
|
|
10
|
-
from auto_editor.ffwrapper import
|
9
|
+
from auto_editor.ffwrapper import FileInfo
|
11
10
|
from auto_editor.utils.bar import Bar
|
12
|
-
from auto_editor.utils.container import Container
|
13
11
|
from auto_editor.utils.log import Log
|
14
|
-
from auto_editor.utils.types import
|
12
|
+
from auto_editor.utils.types import _split_num_str
|
13
|
+
|
14
|
+
|
15
|
+
def parse_bitrate(input_: str, log: Log) -> int:
|
16
|
+
try:
|
17
|
+
val, unit = _split_num_str(input_)
|
18
|
+
except Exception as e:
|
19
|
+
log.error(e)
|
20
|
+
|
21
|
+
if unit.lower() == "k":
|
22
|
+
return int(val * 1000)
|
23
|
+
if unit == "M":
|
24
|
+
return int(val * 1_000_000)
|
25
|
+
if unit == "G":
|
26
|
+
return int(val * 1_000_000_000)
|
27
|
+
if unit == "":
|
28
|
+
return int(val)
|
29
|
+
|
30
|
+
log.error(f"Unknown bitrate: {input_}")
|
15
31
|
|
16
32
|
|
17
33
|
@dataclass(slots=True)
|
18
34
|
class Ensure:
|
19
|
-
_ffmpeg: FFmpeg
|
20
35
|
_bar: Bar
|
21
36
|
_sr: int
|
22
37
|
log: Log
|
23
38
|
_audios: list[tuple[FileInfo, int]] = field(default_factory=list)
|
24
|
-
_subtitles: list[tuple[FileInfo, int, str]] = field(default_factory=list)
|
25
39
|
|
26
40
|
def audio(self, src: FileInfo, stream: int) -> str:
|
27
41
|
try:
|
@@ -52,193 +66,21 @@ class Ensure:
|
|
52
66
|
|
53
67
|
bar.start(dur, "Extracting audio")
|
54
68
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
69
|
+
output_astream = out_container.add_stream(
|
70
|
+
"pcm_s16le", layout="stereo", rate=sample_rate
|
71
|
+
)
|
59
72
|
resampler = AudioResampler(format="s16", layout="stereo", rate=sample_rate)
|
60
73
|
for i, frame in enumerate(in_container.decode(astream)):
|
61
74
|
if i % 1500 == 0 and frame.time is not None:
|
62
75
|
bar.tick(frame.time)
|
63
76
|
|
64
77
|
for new_frame in resampler.resample(frame):
|
65
|
-
|
66
|
-
out_container.mux_one(packet)
|
78
|
+
out_container.mux(output_astream.encode(new_frame))
|
67
79
|
|
68
|
-
|
69
|
-
out_container.mux_one(packet)
|
80
|
+
out_container.mux(output_astream.encode(None))
|
70
81
|
|
71
82
|
out_container.close()
|
72
83
|
in_container.close()
|
73
84
|
bar.end()
|
74
85
|
|
75
86
|
return out_path
|
76
|
-
|
77
|
-
def subtitle(self, src: FileInfo, stream: int, ext: str) -> str:
|
78
|
-
try:
|
79
|
-
self._subtitles.index((src, stream, ext))
|
80
|
-
first_time = False
|
81
|
-
except ValueError:
|
82
|
-
self._subtitles.append((src, stream, ext))
|
83
|
-
first_time = True
|
84
|
-
|
85
|
-
out_path = os.path.join(self.log.temp, f"{stream}s.{ext}")
|
86
|
-
|
87
|
-
if first_time:
|
88
|
-
self.log.debug(f"Making external subtitle: {out_path}")
|
89
|
-
self.log.conwrite("Extracting subtitle")
|
90
|
-
self._ffmpeg.run(["-i", f"{src.path}", "-map", f"0:s:{stream}", out_path])
|
91
|
-
|
92
|
-
return out_path
|
93
|
-
|
94
|
-
|
95
|
-
def _ffset(option: str, value: str | None) -> list[str]:
|
96
|
-
if value is None or value == "unset" or value == "reserved":
|
97
|
-
return []
|
98
|
-
return [option] + [value]
|
99
|
-
|
100
|
-
|
101
|
-
def video_quality(args: Args) -> list[str]:
|
102
|
-
return (
|
103
|
-
_ffset("-b:v", args.video_bitrate)
|
104
|
-
+ ["-c:v", args.video_codec]
|
105
|
-
+ _ffset("-qscale:v", args.video_quality_scale)
|
106
|
-
+ ["-movflags", "faststart"]
|
107
|
-
)
|
108
|
-
|
109
|
-
|
110
|
-
def mux_quality_media(
|
111
|
-
ffmpeg: FFmpeg,
|
112
|
-
visual_output: list[tuple[bool, str]],
|
113
|
-
audio_output: list[str],
|
114
|
-
sub_output: list[str],
|
115
|
-
apply_v: bool,
|
116
|
-
ctr: Container,
|
117
|
-
output_path: str,
|
118
|
-
tb: Fraction,
|
119
|
-
args: Args,
|
120
|
-
src: FileInfo,
|
121
|
-
log: Log,
|
122
|
-
) -> None:
|
123
|
-
v_tracks = len(visual_output)
|
124
|
-
a_tracks = len(audio_output)
|
125
|
-
s_tracks = 0 if args.sn else len(sub_output)
|
126
|
-
|
127
|
-
cmd = ["-hide_banner", "-y", "-i", f"{src.path}"]
|
128
|
-
|
129
|
-
same_container = src.path.suffix == os.path.splitext(output_path)[1]
|
130
|
-
|
131
|
-
for is_video, path in visual_output:
|
132
|
-
if is_video or ctr.allow_image:
|
133
|
-
cmd.extend(["-i", path])
|
134
|
-
else:
|
135
|
-
v_tracks -= 1
|
136
|
-
|
137
|
-
if a_tracks > 0:
|
138
|
-
if args.keep_tracks_separate and ctr.max_audios is None:
|
139
|
-
for path in audio_output:
|
140
|
-
cmd.extend(["-i", path])
|
141
|
-
else:
|
142
|
-
# Merge all the audio a_tracks into one.
|
143
|
-
new_a_file = os.path.join(log.temp, "new_audio.wav")
|
144
|
-
if a_tracks > 1:
|
145
|
-
new_cmd = []
|
146
|
-
for path in audio_output:
|
147
|
-
new_cmd.extend(["-i", path])
|
148
|
-
new_cmd.extend(
|
149
|
-
[
|
150
|
-
"-filter_complex",
|
151
|
-
f"amix=inputs={a_tracks}:duration=longest",
|
152
|
-
"-ac",
|
153
|
-
"2",
|
154
|
-
new_a_file,
|
155
|
-
]
|
156
|
-
)
|
157
|
-
ffmpeg.run(new_cmd)
|
158
|
-
a_tracks = 1
|
159
|
-
else:
|
160
|
-
new_a_file = audio_output[0]
|
161
|
-
cmd.extend(["-i", new_a_file])
|
162
|
-
|
163
|
-
for subfile in sub_output:
|
164
|
-
cmd.extend(["-i", subfile])
|
165
|
-
|
166
|
-
for i in range(v_tracks + s_tracks + a_tracks):
|
167
|
-
cmd.extend(["-map", f"{i+1}:0"])
|
168
|
-
|
169
|
-
cmd.extend(["-map_metadata", "0"])
|
170
|
-
|
171
|
-
track = 0
|
172
|
-
for is_video, path in visual_output:
|
173
|
-
if is_video:
|
174
|
-
if apply_v:
|
175
|
-
cmd += video_quality(args)
|
176
|
-
else:
|
177
|
-
# Real video is only allowed on track 0
|
178
|
-
cmd += ["-c:v:0", "copy"]
|
179
|
-
|
180
|
-
if float(tb).is_integer():
|
181
|
-
cmd += ["-video_track_timescale", f"{tb}"]
|
182
|
-
|
183
|
-
elif ctr.allow_image:
|
184
|
-
ext = os.path.splitext(path)[1][1:]
|
185
|
-
cmd += [f"-c:v:{track}", ext, f"-disposition:v:{track}", "attached_pic"]
|
186
|
-
|
187
|
-
track += 1
|
188
|
-
del track
|
189
|
-
|
190
|
-
for i, vstream in enumerate(src.videos):
|
191
|
-
if i > v_tracks:
|
192
|
-
break
|
193
|
-
if vstream.lang is not None:
|
194
|
-
cmd.extend([f"-metadata:s:v:{i}", f"language={vstream.lang}"])
|
195
|
-
for i, astream in enumerate(src.audios):
|
196
|
-
if i > a_tracks:
|
197
|
-
break
|
198
|
-
if astream.lang is not None:
|
199
|
-
cmd.extend([f"-metadata:s:a:{i}", f"language={astream.lang}"])
|
200
|
-
for i, sstream in enumerate(src.subtitles):
|
201
|
-
if i > s_tracks:
|
202
|
-
break
|
203
|
-
if sstream.lang is not None:
|
204
|
-
cmd.extend([f"-metadata:s:s:{i}", f"language={sstream.lang}"])
|
205
|
-
|
206
|
-
if s_tracks > 0:
|
207
|
-
scodec = src.subtitles[0].codec
|
208
|
-
if same_container:
|
209
|
-
cmd.extend(["-c:s", scodec])
|
210
|
-
elif ctr.scodecs is not None:
|
211
|
-
if scodec not in ctr.scodecs:
|
212
|
-
scodec = ctr.default_sub
|
213
|
-
cmd.extend(["-c:s", scodec])
|
214
|
-
|
215
|
-
if a_tracks > 0:
|
216
|
-
cmd += _ffset("-c:a", args.audio_codec) + _ffset("-b:a", args.audio_bitrate)
|
217
|
-
|
218
|
-
if same_container and v_tracks > 0:
|
219
|
-
color_range = src.videos[0].color_range
|
220
|
-
colorspace = src.videos[0].color_space
|
221
|
-
color_prim = src.videos[0].color_primaries
|
222
|
-
color_trc = src.videos[0].color_transfer
|
223
|
-
|
224
|
-
if color_range == 1 or color_range == 2:
|
225
|
-
cmd.extend(["-color_range", f"{color_range}"])
|
226
|
-
if colorspace in (0, 1) or (colorspace >= 3 and colorspace < 16):
|
227
|
-
cmd.extend(["-colorspace", f"{colorspace}"])
|
228
|
-
if color_prim == 1 or (color_prim >= 4 and color_prim < 17):
|
229
|
-
cmd.extend(["-color_primaries", f"{color_prim}"])
|
230
|
-
if color_trc == 1 or (color_trc >= 4 and color_trc < 22):
|
231
|
-
cmd.extend(["-color_trc", f"{color_trc}"])
|
232
|
-
|
233
|
-
if args.extras is not None:
|
234
|
-
cmd.extend(args.extras.split(" "))
|
235
|
-
cmd.extend(["-strict", "-2"]) # Allow experimental codecs.
|
236
|
-
|
237
|
-
if s_tracks > 0:
|
238
|
-
cmd.extend(["-map", "0:t?"]) # Add input attachments to output.
|
239
|
-
|
240
|
-
if not args.dn:
|
241
|
-
cmd.extend(["-map", "0:d?"])
|
242
|
-
|
243
|
-
cmd.append(output_path)
|
244
|
-
ffmpeg.run_check_errors(cmd, path=output_path)
|
auto_editor/render/audio.py
CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
3
3
|
import io
|
4
4
|
from pathlib import Path
|
5
5
|
from platform import system
|
6
|
-
from subprocess import PIPE
|
7
6
|
|
8
7
|
import av
|
9
8
|
import numpy as np
|
@@ -17,6 +16,7 @@ from auto_editor.output import Ensure
|
|
17
16
|
from auto_editor.timeline import TlAudio, v3
|
18
17
|
from auto_editor.utils.bar import Bar
|
19
18
|
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
|
19
|
+
from auto_editor.utils.container import Container
|
20
20
|
from auto_editor.utils.log import Log
|
21
21
|
from auto_editor.utils.types import Args
|
22
22
|
from auto_editor.wavfile import AudioData, read, write
|
@@ -35,8 +35,6 @@ norm_types = {
|
|
35
35
|
),
|
36
36
|
}
|
37
37
|
|
38
|
-
file_null = "NUL" if system() in ("Windows", "cli") else "/dev/null"
|
39
|
-
|
40
38
|
|
41
39
|
def parse_norm(norm: str, log: Log) -> dict | None:
|
42
40
|
if norm == "#f":
|
@@ -58,7 +56,7 @@ def parse_norm(norm: str, log: Log) -> dict | None:
|
|
58
56
|
log.error(e)
|
59
57
|
|
60
58
|
|
61
|
-
def parse_ebu_bytes(norm: dict, stderr: bytes, log: Log) ->
|
59
|
+
def parse_ebu_bytes(norm: dict, stderr: bytes, log: Log) -> tuple[str, str]:
|
62
60
|
start = end = 0
|
63
61
|
lines = stderr.splitlines()
|
64
62
|
|
@@ -78,13 +76,7 @@ def parse_ebu_bytes(norm: dict, stderr: bytes, log: Log) -> list[str]:
|
|
78
76
|
except MyError:
|
79
77
|
log.error(f"Invalid loudnorm stats.\n{start=},{end=}\n{stderr!r}")
|
80
78
|
|
81
|
-
for key in (
|
82
|
-
"input_i",
|
83
|
-
"input_tp",
|
84
|
-
"input_lra",
|
85
|
-
"input_thresh",
|
86
|
-
"target_offset",
|
87
|
-
):
|
79
|
+
for key in ("input_i", "input_tp", "input_lra", "input_thresh", "target_offset"):
|
88
80
|
val = float(parsed[key])
|
89
81
|
if val == float("-inf"):
|
90
82
|
parsed[key] = -99
|
@@ -100,31 +92,12 @@ def parse_ebu_bytes(norm: dict, stderr: bytes, log: Log) -> list[str]:
|
|
100
92
|
m_thresh = parsed["input_thresh"]
|
101
93
|
target_offset = parsed["target_offset"]
|
102
94
|
|
103
|
-
|
104
|
-
"
|
105
|
-
f"loudnorm=i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:offset={target_offset}"
|
95
|
+
filter = (
|
96
|
+
f"i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:offset={target_offset}"
|
106
97
|
f":measured_i={m_i}:measured_lra={m_lra}:measured_tp={m_tp}"
|
107
|
-
f":measured_thresh={m_thresh}:linear=true:print_format=json"
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
def parse_peak_bytes(t: float, stderr: bytes, log: Log) -> list[str]:
|
112
|
-
peak_level = None
|
113
|
-
for line in stderr.splitlines():
|
114
|
-
if line.startswith(b"[Parsed_astats_0") and b"Peak level dB:" in line:
|
115
|
-
try:
|
116
|
-
peak_level = float(line.split(b":")[1])
|
117
|
-
except Exception:
|
118
|
-
log.error(f"Invalid `astats` stats.\n{stderr!r}")
|
119
|
-
break
|
120
|
-
|
121
|
-
if peak_level is None:
|
122
|
-
log.error(f"Invalid `astats` stats.\n{stderr!r}")
|
123
|
-
|
124
|
-
adjustment = t - peak_level
|
125
|
-
log.debug(f"current peak level: {peak_level}")
|
126
|
-
log.print(f"peak adjustment: {adjustment}")
|
127
|
-
return ["-af", f"volume={adjustment}"]
|
98
|
+
f":measured_thresh={m_thresh}:linear=true:print_format=json"
|
99
|
+
)
|
100
|
+
return "loudnorm", filter
|
128
101
|
|
129
102
|
|
130
103
|
def apply_audio_normalization(
|
@@ -135,13 +108,9 @@ def apply_audio_normalization(
|
|
135
108
|
f"loudnorm=i={norm['i']}:lra={norm['lra']}:tp={norm['tp']}:"
|
136
109
|
f"offset={norm['gain']}:print_format=json"
|
137
110
|
)
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
log.debug(f"audio norm first pass: {first_pass}")
|
142
|
-
|
143
|
-
stderr = ffmpeg.Popen(
|
144
|
-
[
|
111
|
+
log.debug(f"audio norm first pass: {first_pass}")
|
112
|
+
file_null = "NUL" if system() in ("Windows", "cli") else "/dev/null"
|
113
|
+
cmd = [
|
145
114
|
"-hide_banner",
|
146
115
|
"-i",
|
147
116
|
f"{pre_master}",
|
@@ -152,19 +121,56 @@ def apply_audio_normalization(
|
|
152
121
|
"-f",
|
153
122
|
"null",
|
154
123
|
file_null,
|
155
|
-
]
|
156
|
-
|
157
|
-
|
158
|
-
stderr=PIPE,
|
159
|
-
).communicate()[1]
|
160
|
-
|
161
|
-
if norm["tag"] == "ebu":
|
162
|
-
cmd = parse_ebu_bytes(norm, stderr, log)
|
124
|
+
]
|
125
|
+
stderr = ffmpeg.Popen("EBU", cmd, log).communicate()[1]
|
126
|
+
name, filter_args = parse_ebu_bytes(norm, stderr, log)
|
163
127
|
else:
|
164
128
|
assert "t" in norm
|
165
|
-
cmd = parse_peak_bytes(norm["t"], stderr, log)
|
166
129
|
|
167
|
-
|
130
|
+
def get_peak_level(frame: av.AudioFrame) -> float:
|
131
|
+
# Calculate peak level in dB
|
132
|
+
# Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
|
133
|
+
max_amplitude = np.abs(frame.to_ndarray()).max()
|
134
|
+
if max_amplitude > 0.0:
|
135
|
+
return -20.0 * np.log10(max_amplitude)
|
136
|
+
return -99.0
|
137
|
+
|
138
|
+
with av.open(pre_master) as container:
|
139
|
+
max_peak_level = -99.0
|
140
|
+
assert len(container.streams.video) == 0
|
141
|
+
for frame in container.decode(audio=0):
|
142
|
+
peak_level = get_peak_level(frame)
|
143
|
+
max_peak_level = max(max_peak_level, peak_level)
|
144
|
+
|
145
|
+
adjustment = norm["t"] - max_peak_level
|
146
|
+
log.debug(f"current peak level: {max_peak_level}")
|
147
|
+
log.print(f"peak adjustment: {adjustment:.3f}dB")
|
148
|
+
name, filter_args = "volume", f"{adjustment}"
|
149
|
+
|
150
|
+
with av.open(pre_master) as container:
|
151
|
+
input_stream = container.streams.audio[0]
|
152
|
+
|
153
|
+
output_file = av.open(path, mode="w")
|
154
|
+
output_stream = output_file.add_stream("pcm_s16le", rate=input_stream.rate)
|
155
|
+
|
156
|
+
graph = av.filter.Graph()
|
157
|
+
graph.link_nodes(
|
158
|
+
graph.add_abuffer(template=input_stream),
|
159
|
+
graph.add(name, filter_args),
|
160
|
+
graph.add("abuffersink"),
|
161
|
+
).configure()
|
162
|
+
for frame in container.decode(input_stream):
|
163
|
+
graph.push(frame)
|
164
|
+
while True:
|
165
|
+
try:
|
166
|
+
aframe = graph.pull()
|
167
|
+
assert isinstance(aframe, av.AudioFrame)
|
168
|
+
output_file.mux(output_stream.encode(aframe))
|
169
|
+
except (av.BlockingIOError, av.EOFError):
|
170
|
+
break
|
171
|
+
|
172
|
+
output_file.mux(output_stream.encode(None))
|
173
|
+
output_file.close()
|
168
174
|
|
169
175
|
|
170
176
|
def process_audio_clip(
|
@@ -212,28 +218,109 @@ def process_audio_clip(
|
|
212
218
|
try:
|
213
219
|
aframe = graph.pull()
|
214
220
|
assert isinstance(aframe, av.AudioFrame)
|
215
|
-
|
216
|
-
output_file.mux(packet)
|
221
|
+
output_file.mux(output_stream.encode(aframe))
|
217
222
|
except (av.BlockingIOError, av.EOFError):
|
218
223
|
break
|
219
224
|
|
220
225
|
# Flush the stream
|
221
|
-
|
222
|
-
output_file.mux(packet)
|
226
|
+
output_file.mux(output_stream.encode(None))
|
223
227
|
|
224
228
|
input_file.close()
|
225
229
|
output_file.close()
|
226
230
|
|
227
231
|
output_bytes.seek(0)
|
232
|
+
has_filesig = output_bytes.read(4)
|
233
|
+
output_bytes.seek(0)
|
234
|
+
if not has_filesig: # Can rarely happen when clip is extremely small
|
235
|
+
return np.empty((0, 2), dtype=np.int16)
|
236
|
+
|
228
237
|
return read(output_bytes)[1]
|
229
238
|
|
230
239
|
|
240
|
+
def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
|
241
|
+
mixed_audio = None
|
242
|
+
max_length = 0
|
243
|
+
|
244
|
+
# First pass: determine the maximum length
|
245
|
+
for path in audio_paths:
|
246
|
+
container = av.open(path)
|
247
|
+
stream = container.streams.audio[0]
|
248
|
+
|
249
|
+
# Calculate duration in samples
|
250
|
+
assert stream.duration is not None
|
251
|
+
assert stream.time_base is not None
|
252
|
+
duration_samples = int(stream.duration * sr / stream.time_base.denominator)
|
253
|
+
max_length = max(max_length, duration_samples)
|
254
|
+
container.close()
|
255
|
+
|
256
|
+
# Second pass: read and mix audio
|
257
|
+
for path in audio_paths:
|
258
|
+
container = av.open(path)
|
259
|
+
stream = container.streams.audio[0]
|
260
|
+
|
261
|
+
resampler = av.audio.resampler.AudioResampler(
|
262
|
+
format="s16", layout="mono", rate=sr
|
263
|
+
)
|
264
|
+
|
265
|
+
audio_array: list[np.ndarray] = []
|
266
|
+
for frame in container.decode(audio=0):
|
267
|
+
frame.pts = None
|
268
|
+
resampled = resampler.resample(frame)[0]
|
269
|
+
audio_array.extend(resampled.to_ndarray().flatten())
|
270
|
+
|
271
|
+
# Pad or truncate to max_length
|
272
|
+
current_audio = np.array(audio_array[:max_length])
|
273
|
+
if len(current_audio) < max_length:
|
274
|
+
current_audio = np.pad(
|
275
|
+
current_audio, (0, max_length - len(current_audio)), "constant"
|
276
|
+
)
|
277
|
+
|
278
|
+
if mixed_audio is None:
|
279
|
+
mixed_audio = current_audio.astype(np.float32)
|
280
|
+
else:
|
281
|
+
mixed_audio += current_audio.astype(np.float32)
|
282
|
+
|
283
|
+
container.close()
|
284
|
+
|
285
|
+
if mixed_audio is None:
|
286
|
+
raise ValueError("mixed_audio is None")
|
287
|
+
|
288
|
+
# Normalize the mixed audio
|
289
|
+
max_val = np.max(np.abs(mixed_audio))
|
290
|
+
if max_val > 0:
|
291
|
+
mixed_audio = mixed_audio * (32767 / max_val)
|
292
|
+
mixed_audio = mixed_audio.astype(np.int16) # type: ignore
|
293
|
+
|
294
|
+
output_container = av.open(output_path, mode="w")
|
295
|
+
output_stream = output_container.add_stream("pcm_s16le", rate=sr)
|
296
|
+
|
297
|
+
chunk_size = sr # Process 1 second at a time
|
298
|
+
for i in range(0, len(mixed_audio), chunk_size):
|
299
|
+
# Shape becomes (1, samples) for mono
|
300
|
+
chunk = np.array([mixed_audio[i : i + chunk_size]])
|
301
|
+
|
302
|
+
frame = av.AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
|
303
|
+
frame.rate = sr
|
304
|
+
frame.pts = i # Set presentation timestamp
|
305
|
+
|
306
|
+
output_container.mux(output_stream.encode(frame))
|
307
|
+
|
308
|
+
output_container.mux(output_stream.encode(None))
|
309
|
+
output_container.close()
|
310
|
+
|
311
|
+
|
231
312
|
def make_new_audio(
|
232
|
-
tl: v3,
|
313
|
+
tl: v3,
|
314
|
+
ctr: Container,
|
315
|
+
ensure: Ensure,
|
316
|
+
args: Args,
|
317
|
+
ffmpeg: FFmpeg,
|
318
|
+
bar: Bar,
|
319
|
+
log: Log,
|
233
320
|
) -> list[str]:
|
234
321
|
sr = tl.sr
|
235
322
|
tb = tl.tb
|
236
|
-
output = []
|
323
|
+
output: list[str] = []
|
237
324
|
samples: dict[tuple[FileInfo, int], AudioData] = {}
|
238
325
|
|
239
326
|
norm = parse_norm(args.audio_normalize, log)
|
@@ -311,4 +398,9 @@ def make_new_audio(
|
|
311
398
|
Path(temp, "asdf.map").unlink(missing_ok=True)
|
312
399
|
except PermissionError:
|
313
400
|
pass
|
401
|
+
|
402
|
+
if not (args.keep_tracks_separate and ctr.max_audios is None) and len(output) > 1:
|
403
|
+
new_a_file = f"{Path(temp, 'new_audio.wav')}"
|
404
|
+
mix_audio_files(sr, output, new_a_file)
|
405
|
+
return [new_a_file]
|
314
406
|
return output
|
auto_editor/render/subtitle.py
CHANGED
@@ -1,18 +1,23 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import io
|
3
4
|
import os
|
4
5
|
import re
|
5
6
|
from dataclasses import dataclass
|
6
7
|
from typing import TYPE_CHECKING
|
7
8
|
|
9
|
+
import av
|
10
|
+
|
8
11
|
from auto_editor.utils.func import to_timecode
|
9
12
|
|
10
13
|
if TYPE_CHECKING:
|
11
14
|
from fractions import Fraction
|
12
15
|
|
13
|
-
from auto_editor.output import Ensure
|
14
16
|
from auto_editor.timeline import v3
|
15
17
|
from auto_editor.utils.chunks import Chunks
|
18
|
+
from auto_editor.utils.log import Log
|
19
|
+
|
20
|
+
Input = av.container.InputContainer
|
16
21
|
|
17
22
|
|
18
23
|
@dataclass(slots=True)
|
@@ -26,7 +31,6 @@ class SerialSub:
|
|
26
31
|
|
27
32
|
class SubtitleParser:
|
28
33
|
def __init__(self, tb: Fraction) -> None:
|
29
|
-
self.supported_codecs = ("ass", "webvtt", "mov_text")
|
30
34
|
self.tb = tb
|
31
35
|
self.contents: list[SerialSub] = []
|
32
36
|
self.header = ""
|
@@ -125,24 +129,81 @@ class SubtitleParser:
|
|
125
129
|
file.write(self.footer)
|
126
130
|
|
127
131
|
|
128
|
-
def
|
132
|
+
def make_srt(input_: Input, stream: int) -> str:
|
133
|
+
output_bytes = io.StringIO()
|
134
|
+
input_stream = input_.streams.subtitles[stream]
|
135
|
+
assert input_stream.time_base is not None
|
136
|
+
s = 1
|
137
|
+
for packet in input_.demux(input_stream):
|
138
|
+
if packet.dts is None or packet.pts is None or packet.duration is None:
|
139
|
+
continue
|
140
|
+
|
141
|
+
start = packet.pts * input_stream.time_base
|
142
|
+
end = start + packet.duration * input_stream.time_base
|
143
|
+
|
144
|
+
for subset in packet.decode():
|
145
|
+
start_time = to_timecode(start, "srt")
|
146
|
+
end_time = to_timecode(end, "srt")
|
147
|
+
|
148
|
+
sub = subset[0]
|
149
|
+
assert len(subset) == 1
|
150
|
+
assert isinstance(sub, av.subtitles.subtitle.AssSubtitle)
|
151
|
+
|
152
|
+
output_bytes.write(f"{s}\n{start_time} --> {end_time}\n")
|
153
|
+
output_bytes.write(sub.dialogue.decode("utf-8", errors="ignore") + "\n\n")
|
154
|
+
s += 1
|
155
|
+
|
156
|
+
output_bytes.seek(0)
|
157
|
+
return output_bytes.getvalue()
|
158
|
+
|
159
|
+
|
160
|
+
def _ensure(input_: Input, format: str, stream: int) -> str:
|
161
|
+
output_bytes = io.BytesIO()
|
162
|
+
output = av.open(output_bytes, "w", format=format)
|
163
|
+
|
164
|
+
in_stream = input_.streams.subtitles[stream]
|
165
|
+
out_stream = output.add_stream(template=in_stream)
|
166
|
+
|
167
|
+
for packet in input_.demux(in_stream):
|
168
|
+
if packet.dts is None:
|
169
|
+
continue
|
170
|
+
packet.stream = out_stream
|
171
|
+
output.mux(packet)
|
172
|
+
|
173
|
+
output.close()
|
174
|
+
output_bytes.seek(0)
|
175
|
+
return output_bytes.getvalue().decode("utf-8", errors="ignore")
|
176
|
+
|
177
|
+
|
178
|
+
def make_new_subtitles(tl: v3, log: Log) -> list[str]:
|
129
179
|
if tl.v1 is None:
|
130
180
|
return []
|
131
181
|
|
182
|
+
input_ = av.open(tl.v1.source.path)
|
132
183
|
new_paths = []
|
133
184
|
|
134
185
|
for s, sub in enumerate(tl.v1.source.subtitles):
|
135
|
-
|
136
|
-
|
186
|
+
if sub.codec == "mov_text":
|
187
|
+
continue
|
137
188
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
189
|
+
parser = SubtitleParser(tl.tb)
|
190
|
+
if sub.codec == "ssa":
|
191
|
+
format = "ass"
|
192
|
+
elif sub.codec in ("webvtt", "ass"):
|
193
|
+
format = sub.codec
|
194
|
+
else:
|
195
|
+
log.error(f"Unknown subtitle codec: {sub.codec}")
|
143
196
|
|
197
|
+
if sub.codec == "mov_text":
|
198
|
+
ret = make_srt(input_, s)
|
199
|
+
else:
|
200
|
+
ret = _ensure(input_, format, s)
|
201
|
+
parser.parse(ret, format)
|
144
202
|
parser.edit(tl.v1.chunks)
|
203
|
+
|
204
|
+
new_path = os.path.join(log.temp, f"new{s}s.{sub.ext}")
|
145
205
|
parser.write(new_path)
|
146
206
|
new_paths.append(new_path)
|
147
207
|
|
208
|
+
input_.close()
|
148
209
|
return new_paths
|