lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/_init.py +20 -0
- lattifai/alignment/__init__.py +2 -3
- lattifai/alignment/lattice1_aligner.py +117 -4
- lattifai/alignment/lattice1_worker.py +47 -4
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/text_align.py +2 -1
- lattifai/alignment/tokenizer.py +56 -29
- lattifai/audio2.py +162 -183
- lattifai/cli/alignment.py +5 -0
- lattifai/cli/caption.py +6 -6
- lattifai/cli/transcribe.py +1 -5
- lattifai/cli/youtube.py +3 -0
- lattifai/client.py +41 -12
- lattifai/config/__init__.py +21 -3
- lattifai/config/alignment.py +7 -0
- lattifai/config/caption.py +13 -243
- lattifai/config/client.py +16 -0
- lattifai/config/event.py +102 -0
- lattifai/config/transcription.py +25 -1
- lattifai/data/__init__.py +8 -0
- lattifai/data/caption.py +228 -0
- lattifai/errors.py +78 -53
- lattifai/event/__init__.py +65 -0
- lattifai/event/lattifai.py +166 -0
- lattifai/mixin.py +22 -17
- lattifai/transcription/base.py +2 -1
- lattifai/transcription/gemini.py +147 -16
- lattifai/transcription/lattifai.py +8 -11
- lattifai/types.py +1 -1
- lattifai/youtube/client.py +143 -48
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
- lattifai-1.3.1.dist-info/RECORD +57 -0
- lattifai/__init__.py +0 -88
- lattifai/alignment/sentence_splitter.py +0 -350
- lattifai/caption/__init__.py +0 -96
- lattifai/caption/caption.py +0 -661
- lattifai/caption/formats/__init__.py +0 -199
- lattifai/caption/formats/base.py +0 -211
- lattifai/caption/formats/gemini.py +0 -722
- lattifai/caption/formats/json.py +0 -194
- lattifai/caption/formats/lrc.py +0 -309
- lattifai/caption/formats/nle/__init__.py +0 -9
- lattifai/caption/formats/nle/audition.py +0 -561
- lattifai/caption/formats/nle/avid.py +0 -423
- lattifai/caption/formats/nle/fcpxml.py +0 -549
- lattifai/caption/formats/nle/premiere.py +0 -589
- lattifai/caption/formats/pysubs2.py +0 -642
- lattifai/caption/formats/sbv.py +0 -147
- lattifai/caption/formats/tabular.py +0 -338
- lattifai/caption/formats/textgrid.py +0 -193
- lattifai/caption/formats/ttml.py +0 -652
- lattifai/caption/formats/vtt.py +0 -469
- lattifai/caption/parsers/__init__.py +0 -9
- lattifai/caption/parsers/text_parser.py +0 -147
- lattifai/caption/standardize.py +0 -636
- lattifai/caption/supervision.py +0 -34
- lattifai/caption/utils.py +0 -474
- lattifai-1.2.2.dist-info/RECORD +0 -76
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,642 +0,0 @@
|
|
|
1
|
-
"""Standard subtitle formats using pysubs2 library.
|
|
2
|
-
|
|
3
|
-
Handles: SRT, VTT, ASS, SSA, SUB (MicroDVD), SAMI/SMI
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Dict, List, Optional
|
|
8
|
-
|
|
9
|
-
import pysubs2
|
|
10
|
-
|
|
11
|
-
from ...config.caption import CaptionStyle, KaraokeConfig
|
|
12
|
-
from ..parsers.text_parser import normalize_text as normalize_text_fn
|
|
13
|
-
from ..parsers.text_parser import parse_speaker_text
|
|
14
|
-
from ..supervision import Supervision
|
|
15
|
-
from . import register_format
|
|
16
|
-
from .base import FormatHandler
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Pysubs2Format(FormatHandler):
|
|
20
|
-
"""Base class for formats handled by pysubs2."""
|
|
21
|
-
|
|
22
|
-
# Subclasses should set these
|
|
23
|
-
pysubs2_format: str = ""
|
|
24
|
-
|
|
25
|
-
@classmethod
|
|
26
|
-
def read(
|
|
27
|
-
cls,
|
|
28
|
-
source,
|
|
29
|
-
normalize_text: bool = True,
|
|
30
|
-
**kwargs,
|
|
31
|
-
) -> List[Supervision]:
|
|
32
|
-
"""Read caption using pysubs2."""
|
|
33
|
-
try:
|
|
34
|
-
if cls.is_content(source):
|
|
35
|
-
subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
|
|
36
|
-
else:
|
|
37
|
-
subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
|
|
38
|
-
except Exception:
|
|
39
|
-
# Fallback: auto-detect format
|
|
40
|
-
if cls.is_content(source):
|
|
41
|
-
subs = pysubs2.SSAFile.from_string(source)
|
|
42
|
-
else:
|
|
43
|
-
subs = pysubs2.load(str(source), encoding="utf-8")
|
|
44
|
-
|
|
45
|
-
supervisions = []
|
|
46
|
-
for event in subs.events:
|
|
47
|
-
text = event.text
|
|
48
|
-
if normalize_text:
|
|
49
|
-
text = normalize_text_fn(text)
|
|
50
|
-
|
|
51
|
-
speaker, text = parse_speaker_text(text)
|
|
52
|
-
|
|
53
|
-
supervisions.append(
|
|
54
|
-
Supervision(
|
|
55
|
-
text=text,
|
|
56
|
-
speaker=speaker or event.name or None,
|
|
57
|
-
start=event.start / 1000.0 if event.start is not None else 0,
|
|
58
|
-
duration=(event.end - event.start) / 1000.0 if event.end is not None else 0,
|
|
59
|
-
)
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
return supervisions
|
|
63
|
-
|
|
64
|
-
@classmethod
|
|
65
|
-
def extract_metadata(cls, source, **kwargs) -> Dict[str, str]:
|
|
66
|
-
"""Extract metadata from VTT or SRT."""
|
|
67
|
-
import re
|
|
68
|
-
from pathlib import Path
|
|
69
|
-
|
|
70
|
-
metadata = {}
|
|
71
|
-
if cls.is_content(source):
|
|
72
|
-
content = source[:4096]
|
|
73
|
-
else:
|
|
74
|
-
path = Path(str(source))
|
|
75
|
-
if not path.exists():
|
|
76
|
-
return {}
|
|
77
|
-
try:
|
|
78
|
-
with open(path, "r", encoding="utf-8") as f:
|
|
79
|
-
content = f.read(4096)
|
|
80
|
-
except Exception:
|
|
81
|
-
return {}
|
|
82
|
-
|
|
83
|
-
# WebVTT metadata extraction
|
|
84
|
-
if cls.pysubs2_format == "vtt" or (isinstance(source, str) and source.startswith("WEBVTT")):
|
|
85
|
-
lines = content.split("\n")
|
|
86
|
-
for line in lines[:10]:
|
|
87
|
-
line = line.strip()
|
|
88
|
-
if line.startswith("Kind:"):
|
|
89
|
-
metadata["kind"] = line.split(":", 1)[1].strip()
|
|
90
|
-
elif line.startswith("Language:"):
|
|
91
|
-
metadata["language"] = line.split(":", 1)[1].strip()
|
|
92
|
-
elif line.startswith("NOTE"):
|
|
93
|
-
match = re.search(r"NOTE\s+(\w+):\s*(.+)", line)
|
|
94
|
-
if match:
|
|
95
|
-
key, value = match.groups()
|
|
96
|
-
metadata[key.lower()] = value.strip()
|
|
97
|
-
|
|
98
|
-
# SRT doesn't have standard metadata, but check for BOM
|
|
99
|
-
elif cls.pysubs2_format == "srt":
|
|
100
|
-
if content.startswith("\ufeff"):
|
|
101
|
-
metadata["encoding"] = "utf-8-sig"
|
|
102
|
-
|
|
103
|
-
return metadata
|
|
104
|
-
|
|
105
|
-
@classmethod
|
|
106
|
-
def write(
|
|
107
|
-
cls,
|
|
108
|
-
supervisions: List[Supervision],
|
|
109
|
-
output_path,
|
|
110
|
-
include_speaker: bool = True,
|
|
111
|
-
fps: float = 25.0,
|
|
112
|
-
**kwargs,
|
|
113
|
-
) -> Path:
|
|
114
|
-
"""Write caption using pysubs2."""
|
|
115
|
-
output_path = Path(output_path)
|
|
116
|
-
content = cls.to_bytes(supervisions, include_speaker=include_speaker, fps=fps, **kwargs)
|
|
117
|
-
output_path.write_bytes(content)
|
|
118
|
-
return output_path
|
|
119
|
-
|
|
120
|
-
@classmethod
|
|
121
|
-
def to_bytes(
|
|
122
|
-
cls,
|
|
123
|
-
supervisions: List[Supervision],
|
|
124
|
-
include_speaker: bool = True,
|
|
125
|
-
fps: float = 25.0,
|
|
126
|
-
word_level: bool = False,
|
|
127
|
-
karaoke_config: Optional[KaraokeConfig] = None,
|
|
128
|
-
**kwargs,
|
|
129
|
-
) -> bytes:
|
|
130
|
-
"""Convert to bytes using pysubs2.
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
supervisions: List of Supervision objects
|
|
134
|
-
include_speaker: Whether to include speaker in output
|
|
135
|
-
fps: Frames per second (for MicroDVD format)
|
|
136
|
-
word_level: If True and alignment exists, output word-per-segment
|
|
137
|
-
karaoke_config: Karaoke configuration. When provided with enabled=True,
|
|
138
|
-
use karaoke styling (format-specific)
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
Subtitle content as bytes
|
|
142
|
-
"""
|
|
143
|
-
from .base import expand_to_word_supervisions
|
|
144
|
-
|
|
145
|
-
# Check if karaoke is enabled
|
|
146
|
-
karaoke_enabled = karaoke_config is not None and karaoke_config.enabled
|
|
147
|
-
|
|
148
|
-
# Expand to word-per-segment if word_level=True and karaoke is not enabled
|
|
149
|
-
if word_level and not karaoke_enabled:
|
|
150
|
-
supervisions = expand_to_word_supervisions(supervisions)
|
|
151
|
-
|
|
152
|
-
subs = pysubs2.SSAFile()
|
|
153
|
-
|
|
154
|
-
for sup in supervisions:
|
|
155
|
-
text = sup.text or ""
|
|
156
|
-
if cls._should_include_speaker(sup, include_speaker):
|
|
157
|
-
text = f"{sup.speaker} {text}"
|
|
158
|
-
|
|
159
|
-
subs.append(
|
|
160
|
-
pysubs2.SSAEvent(
|
|
161
|
-
start=int(sup.start * 1000),
|
|
162
|
-
end=int(sup.end * 1000),
|
|
163
|
-
text=text,
|
|
164
|
-
name=sup.speaker or "",
|
|
165
|
-
)
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
# MicroDVD format requires framerate
|
|
169
|
-
if cls.pysubs2_format == "microdvd":
|
|
170
|
-
return subs.to_string(format_=cls.pysubs2_format, fps=fps).encode("utf-8")
|
|
171
|
-
|
|
172
|
-
return subs.to_string(format_=cls.pysubs2_format).encode("utf-8")
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
@register_format("srt")
|
|
176
|
-
class SRTFormat(Pysubs2Format):
|
|
177
|
-
"""SRT (SubRip) format - the most widely used subtitle format."""
|
|
178
|
-
|
|
179
|
-
extensions = [".srt"]
|
|
180
|
-
pysubs2_format = "srt"
|
|
181
|
-
description = "SubRip Subtitle format - universal compatibility"
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def to_bytes(
|
|
185
|
-
cls,
|
|
186
|
-
supervisions: List[Supervision],
|
|
187
|
-
include_speaker: bool = True,
|
|
188
|
-
use_bom: bool = False,
|
|
189
|
-
metadata: Optional[Dict] = None,
|
|
190
|
-
**kwargs,
|
|
191
|
-
) -> bytes:
|
|
192
|
-
"""Generate SRT with proper formatting (comma for milliseconds).
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
supervisions: List of supervision segments
|
|
196
|
-
include_speaker: Whether to include speaker in output
|
|
197
|
-
use_bom: Whether to add BOM for Windows compatibility
|
|
198
|
-
metadata: Optional metadata dict. If encoding is 'utf-8-sig', adds BOM.
|
|
199
|
-
"""
|
|
200
|
-
content = super().to_bytes(supervisions, include_speaker=include_speaker, **kwargs)
|
|
201
|
-
|
|
202
|
-
# Add BOM if requested or if original had BOM
|
|
203
|
-
add_bom = use_bom
|
|
204
|
-
if metadata and metadata.get("encoding") == "utf-8-sig":
|
|
205
|
-
add_bom = True
|
|
206
|
-
|
|
207
|
-
if add_bom:
|
|
208
|
-
content = b"\xef\xbb\xbf" + content
|
|
209
|
-
|
|
210
|
-
return content
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
@register_format("ass")
|
|
214
|
-
class ASSFormat(Pysubs2Format):
|
|
215
|
-
"""Advanced SubStation Alpha format with karaoke support."""
|
|
216
|
-
|
|
217
|
-
extensions = [".ass"]
|
|
218
|
-
pysubs2_format = "ass"
|
|
219
|
-
description = "Advanced SubStation Alpha - rich styling support"
|
|
220
|
-
|
|
221
|
-
@classmethod
|
|
222
|
-
def read(
|
|
223
|
-
cls,
|
|
224
|
-
source,
|
|
225
|
-
normalize_text: bool = True,
|
|
226
|
-
**kwargs,
|
|
227
|
-
) -> List[Supervision]:
|
|
228
|
-
"""Read ASS format with style and event metadata preservation.
|
|
229
|
-
|
|
230
|
-
Preserves ASS-specific event attributes in Supervision.custom:
|
|
231
|
-
- ass_style: Style name reference
|
|
232
|
-
- ass_layer: Layer number
|
|
233
|
-
- ass_margin_l/r/v: Margin overrides
|
|
234
|
-
- ass_effect: Effect string
|
|
235
|
-
"""
|
|
236
|
-
try:
|
|
237
|
-
if cls.is_content(source):
|
|
238
|
-
subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
|
|
239
|
-
else:
|
|
240
|
-
subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
|
|
241
|
-
except Exception:
|
|
242
|
-
if cls.is_content(source):
|
|
243
|
-
subs = pysubs2.SSAFile.from_string(source)
|
|
244
|
-
else:
|
|
245
|
-
subs = pysubs2.load(str(source), encoding="utf-8")
|
|
246
|
-
|
|
247
|
-
supervisions = []
|
|
248
|
-
for event in subs.events:
|
|
249
|
-
text = event.text
|
|
250
|
-
if normalize_text:
|
|
251
|
-
text = normalize_text_fn(text)
|
|
252
|
-
|
|
253
|
-
speaker, text = parse_speaker_text(text)
|
|
254
|
-
|
|
255
|
-
# Preserve ASS-specific event attributes
|
|
256
|
-
custom = {
|
|
257
|
-
"ass_style": event.style,
|
|
258
|
-
"ass_layer": event.layer,
|
|
259
|
-
"ass_margin_l": event.marginl,
|
|
260
|
-
"ass_margin_r": event.marginr,
|
|
261
|
-
"ass_margin_v": event.marginv,
|
|
262
|
-
"ass_effect": event.effect,
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
supervisions.append(
|
|
266
|
-
Supervision(
|
|
267
|
-
text=text,
|
|
268
|
-
speaker=speaker or event.name or None,
|
|
269
|
-
start=event.start / 1000.0 if event.start is not None else 0,
|
|
270
|
-
duration=(event.end - event.start) / 1000.0 if event.end is not None else 0,
|
|
271
|
-
custom=custom,
|
|
272
|
-
)
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
return supervisions
|
|
276
|
-
|
|
277
|
-
@classmethod
|
|
278
|
-
def extract_metadata(cls, source, **kwargs) -> Dict:
|
|
279
|
-
"""Extract ASS global metadata including Script Info and Styles.
|
|
280
|
-
|
|
281
|
-
Returns:
|
|
282
|
-
Dict containing:
|
|
283
|
-
- ass_info: Script Info section as dict
|
|
284
|
-
- ass_styles: Style definitions as dict of dicts
|
|
285
|
-
"""
|
|
286
|
-
try:
|
|
287
|
-
if cls.is_content(source):
|
|
288
|
-
subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
|
|
289
|
-
else:
|
|
290
|
-
subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
|
|
291
|
-
except Exception:
|
|
292
|
-
return {}
|
|
293
|
-
|
|
294
|
-
# Convert styles to serializable dict
|
|
295
|
-
styles_dict = {}
|
|
296
|
-
for name, style in subs.styles.items():
|
|
297
|
-
styles_dict[name] = {
|
|
298
|
-
"fontname": style.fontname,
|
|
299
|
-
"fontsize": style.fontsize,
|
|
300
|
-
"primarycolor": cls._color_to_str(style.primarycolor),
|
|
301
|
-
"secondarycolor": cls._color_to_str(style.secondarycolor),
|
|
302
|
-
"tertiarycolor": cls._color_to_str(style.tertiarycolor),
|
|
303
|
-
"outlinecolor": cls._color_to_str(style.outlinecolor),
|
|
304
|
-
"backcolor": cls._color_to_str(style.backcolor),
|
|
305
|
-
"bold": style.bold,
|
|
306
|
-
"italic": style.italic,
|
|
307
|
-
"underline": style.underline,
|
|
308
|
-
"strikeout": style.strikeout,
|
|
309
|
-
"scalex": style.scalex,
|
|
310
|
-
"scaley": style.scaley,
|
|
311
|
-
"spacing": style.spacing,
|
|
312
|
-
"angle": style.angle,
|
|
313
|
-
"borderstyle": style.borderstyle,
|
|
314
|
-
"outline": style.outline,
|
|
315
|
-
"shadow": style.shadow,
|
|
316
|
-
"alignment": style.alignment,
|
|
317
|
-
"marginl": style.marginl,
|
|
318
|
-
"marginr": style.marginr,
|
|
319
|
-
"marginv": style.marginv,
|
|
320
|
-
"alphalevel": style.alphalevel,
|
|
321
|
-
"encoding": style.encoding,
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
return {
|
|
325
|
-
"ass_info": dict(subs.info),
|
|
326
|
-
"ass_styles": styles_dict,
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
@staticmethod
|
|
330
|
-
def _color_to_str(color: pysubs2.Color) -> str:
|
|
331
|
-
"""Convert pysubs2.Color to ASS color string &HAABBGGRR."""
|
|
332
|
-
return f"&H{color.a:02X}{color.b:02X}{color.g:02X}{color.r:02X}"
|
|
333
|
-
|
|
334
|
-
@staticmethod
|
|
335
|
-
def _str_to_color(color_str: str) -> pysubs2.Color:
|
|
336
|
-
"""Convert ASS color string &HAABBGGRR to pysubs2.Color."""
|
|
337
|
-
color_str = color_str.lstrip("&H").lstrip("&h")
|
|
338
|
-
if len(color_str) == 8:
|
|
339
|
-
a = int(color_str[0:2], 16)
|
|
340
|
-
b = int(color_str[2:4], 16)
|
|
341
|
-
g = int(color_str[4:6], 16)
|
|
342
|
-
r = int(color_str[6:8], 16)
|
|
343
|
-
elif len(color_str) == 6:
|
|
344
|
-
a = 0
|
|
345
|
-
b = int(color_str[0:2], 16)
|
|
346
|
-
g = int(color_str[2:4], 16)
|
|
347
|
-
r = int(color_str[4:6], 16)
|
|
348
|
-
else:
|
|
349
|
-
return pysubs2.Color(r=255, g=255, b=255, a=0)
|
|
350
|
-
return pysubs2.Color(r=r, g=g, b=b, a=a)
|
|
351
|
-
|
|
352
|
-
@classmethod
|
|
353
|
-
def to_bytes(
|
|
354
|
-
cls,
|
|
355
|
-
supervisions: List[Supervision],
|
|
356
|
-
include_speaker: bool = True,
|
|
357
|
-
fps: float = 25.0,
|
|
358
|
-
word_level: bool = False,
|
|
359
|
-
karaoke_config: Optional[KaraokeConfig] = None,
|
|
360
|
-
metadata: Optional[Dict] = None,
|
|
361
|
-
**kwargs,
|
|
362
|
-
) -> bytes:
|
|
363
|
-
"""Convert to ASS bytes with style preservation and optional karaoke tags.
|
|
364
|
-
|
|
365
|
-
Args:
|
|
366
|
-
supervisions: List of supervision segments
|
|
367
|
-
include_speaker: Whether to include speaker in output
|
|
368
|
-
fps: Frames per second (not used for ASS)
|
|
369
|
-
word_level: If True and alignment exists, output word-per-segment or karaoke
|
|
370
|
-
karaoke_config: Karaoke configuration. When provided with enabled=True,
|
|
371
|
-
generate karaoke tags
|
|
372
|
-
metadata: Optional metadata dict containing ass_info and ass_styles
|
|
373
|
-
to restore original ASS formatting
|
|
374
|
-
|
|
375
|
-
Returns:
|
|
376
|
-
ASS content as bytes
|
|
377
|
-
"""
|
|
378
|
-
from .base import expand_to_word_supervisions
|
|
379
|
-
|
|
380
|
-
karaoke_enabled = karaoke_config is not None and karaoke_config.enabled
|
|
381
|
-
|
|
382
|
-
# Expand to word-per-segment if word_level=True and karaoke is not enabled
|
|
383
|
-
if word_level and not karaoke_enabled:
|
|
384
|
-
supervisions = expand_to_word_supervisions(supervisions)
|
|
385
|
-
|
|
386
|
-
# Create ASS file and restore global styles from metadata
|
|
387
|
-
subs = cls._create_ass_file_with_metadata(metadata)
|
|
388
|
-
|
|
389
|
-
# Add karaoke style if needed
|
|
390
|
-
if karaoke_enabled:
|
|
391
|
-
subs.styles["Karaoke"] = cls._create_karaoke_style(karaoke_config.style)
|
|
392
|
-
|
|
393
|
-
for sup in supervisions:
|
|
394
|
-
alignment = getattr(sup, "alignment", None)
|
|
395
|
-
word_items = alignment.get("word") if alignment else None
|
|
396
|
-
|
|
397
|
-
# Karaoke mode with word alignment
|
|
398
|
-
if word_level and karaoke_enabled and word_items:
|
|
399
|
-
karaoke_text = cls._build_karaoke_text(word_items, karaoke_config.effect)
|
|
400
|
-
event_start = int(word_items[0].start * 1000)
|
|
401
|
-
event_end = int(word_items[-1].end * 1000)
|
|
402
|
-
|
|
403
|
-
subs.append(
|
|
404
|
-
pysubs2.SSAEvent(
|
|
405
|
-
start=event_start,
|
|
406
|
-
end=event_end,
|
|
407
|
-
text=karaoke_text,
|
|
408
|
-
style="Karaoke",
|
|
409
|
-
)
|
|
410
|
-
)
|
|
411
|
-
else:
|
|
412
|
-
# Standard mode: restore custom attributes from supervision
|
|
413
|
-
text = sup.text or ""
|
|
414
|
-
if cls._should_include_speaker(sup, include_speaker):
|
|
415
|
-
text = f"{sup.speaker} {text}"
|
|
416
|
-
|
|
417
|
-
event = cls._create_event_from_supervision(sup, text)
|
|
418
|
-
subs.append(event)
|
|
419
|
-
|
|
420
|
-
return subs.to_string(format_="ass").encode("utf-8")
|
|
421
|
-
|
|
422
|
-
@classmethod
|
|
423
|
-
def _create_ass_file_with_metadata(cls, metadata: Optional[Dict]) -> pysubs2.SSAFile:
|
|
424
|
-
"""Create SSAFile and restore global styles from metadata.
|
|
425
|
-
|
|
426
|
-
Args:
|
|
427
|
-
metadata: Dict containing ass_info and ass_styles
|
|
428
|
-
|
|
429
|
-
Returns:
|
|
430
|
-
pysubs2.SSAFile with restored styles
|
|
431
|
-
"""
|
|
432
|
-
subs = pysubs2.SSAFile()
|
|
433
|
-
|
|
434
|
-
if not metadata:
|
|
435
|
-
return subs
|
|
436
|
-
|
|
437
|
-
# Restore Script Info
|
|
438
|
-
if "ass_info" in metadata:
|
|
439
|
-
subs.info.update(metadata["ass_info"])
|
|
440
|
-
|
|
441
|
-
# Restore Styles
|
|
442
|
-
if "ass_styles" in metadata:
|
|
443
|
-
for name, style_dict in metadata["ass_styles"].items():
|
|
444
|
-
subs.styles[name] = cls._dict_to_style(style_dict)
|
|
445
|
-
|
|
446
|
-
return subs
|
|
447
|
-
|
|
448
|
-
@classmethod
|
|
449
|
-
def _dict_to_style(cls, style_dict: Dict) -> pysubs2.SSAStyle:
|
|
450
|
-
"""Convert style dict back to pysubs2.SSAStyle."""
|
|
451
|
-
return pysubs2.SSAStyle(
|
|
452
|
-
fontname=style_dict.get("fontname", "Arial"),
|
|
453
|
-
fontsize=style_dict.get("fontsize", 20.0),
|
|
454
|
-
primarycolor=cls._str_to_color(style_dict.get("primarycolor", "&H00FFFFFF")),
|
|
455
|
-
secondarycolor=cls._str_to_color(style_dict.get("secondarycolor", "&H000000FF")),
|
|
456
|
-
tertiarycolor=cls._str_to_color(style_dict.get("tertiarycolor", "&H00000000")),
|
|
457
|
-
outlinecolor=cls._str_to_color(style_dict.get("outlinecolor", "&H00000000")),
|
|
458
|
-
backcolor=cls._str_to_color(style_dict.get("backcolor", "&H00000000")),
|
|
459
|
-
bold=style_dict.get("bold", False),
|
|
460
|
-
italic=style_dict.get("italic", False),
|
|
461
|
-
underline=style_dict.get("underline", False),
|
|
462
|
-
strikeout=style_dict.get("strikeout", False),
|
|
463
|
-
scalex=style_dict.get("scalex", 100.0),
|
|
464
|
-
scaley=style_dict.get("scaley", 100.0),
|
|
465
|
-
spacing=style_dict.get("spacing", 0.0),
|
|
466
|
-
angle=style_dict.get("angle", 0.0),
|
|
467
|
-
borderstyle=style_dict.get("borderstyle", 1),
|
|
468
|
-
outline=style_dict.get("outline", 2.0),
|
|
469
|
-
shadow=style_dict.get("shadow", 2.0),
|
|
470
|
-
alignment=pysubs2.Alignment(style_dict.get("alignment", 2)),
|
|
471
|
-
marginl=style_dict.get("marginl", 10),
|
|
472
|
-
marginr=style_dict.get("marginr", 10),
|
|
473
|
-
marginv=style_dict.get("marginv", 10),
|
|
474
|
-
alphalevel=style_dict.get("alphalevel", 0),
|
|
475
|
-
encoding=style_dict.get("encoding", 1),
|
|
476
|
-
)
|
|
477
|
-
|
|
478
|
-
@classmethod
|
|
479
|
-
def _create_event_from_supervision(cls, sup: Supervision, text: str) -> pysubs2.SSAEvent:
|
|
480
|
-
"""Create SSAEvent from Supervision, restoring custom attributes.
|
|
481
|
-
|
|
482
|
-
Args:
|
|
483
|
-
sup: Supervision with optional custom dict containing ass_* attributes
|
|
484
|
-
text: Processed text content
|
|
485
|
-
|
|
486
|
-
Returns:
|
|
487
|
-
pysubs2.SSAEvent with restored attributes
|
|
488
|
-
"""
|
|
489
|
-
custom = getattr(sup, "custom", None) or {}
|
|
490
|
-
|
|
491
|
-
return pysubs2.SSAEvent(
|
|
492
|
-
start=int(sup.start * 1000),
|
|
493
|
-
end=int(sup.end * 1000),
|
|
494
|
-
text=text,
|
|
495
|
-
name=sup.speaker or "",
|
|
496
|
-
style=custom.get("ass_style", "Default"),
|
|
497
|
-
layer=custom.get("ass_layer", 0),
|
|
498
|
-
marginl=custom.get("ass_margin_l", 0),
|
|
499
|
-
marginr=custom.get("ass_margin_r", 0),
|
|
500
|
-
marginv=custom.get("ass_margin_v", 0),
|
|
501
|
-
effect=custom.get("ass_effect", ""),
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
@classmethod
|
|
505
|
-
def _create_karaoke_style(cls, style: CaptionStyle) -> pysubs2.SSAStyle:
|
|
506
|
-
"""Create pysubs2 SSAStyle from CaptionStyle config.
|
|
507
|
-
|
|
508
|
-
Args:
|
|
509
|
-
style: KaraokeStyle configuration
|
|
510
|
-
|
|
511
|
-
Returns:
|
|
512
|
-
pysubs2.SSAStyle object
|
|
513
|
-
"""
|
|
514
|
-
# Convert int alignment to pysubs2.Alignment enum
|
|
515
|
-
alignment = pysubs2.Alignment(style.alignment)
|
|
516
|
-
|
|
517
|
-
return pysubs2.SSAStyle(
|
|
518
|
-
fontname=style.font_name,
|
|
519
|
-
fontsize=style.font_size,
|
|
520
|
-
primarycolor=cls._hex_to_ass_color(style.primary_color),
|
|
521
|
-
secondarycolor=cls._hex_to_ass_color(style.secondary_color),
|
|
522
|
-
outlinecolor=cls._hex_to_ass_color(style.outline_color),
|
|
523
|
-
backcolor=cls._hex_to_ass_color(style.back_color),
|
|
524
|
-
bold=style.bold,
|
|
525
|
-
italic=style.italic,
|
|
526
|
-
outline=style.outline_width,
|
|
527
|
-
shadow=style.shadow_depth,
|
|
528
|
-
alignment=alignment,
|
|
529
|
-
marginl=style.margin_l,
|
|
530
|
-
marginr=style.margin_r,
|
|
531
|
-
marginv=style.margin_v,
|
|
532
|
-
)
|
|
533
|
-
|
|
534
|
-
@staticmethod
|
|
535
|
-
def _hex_to_ass_color(hex_color: str) -> pysubs2.Color:
|
|
536
|
-
"""Convert #RRGGBB to pysubs2 Color.
|
|
537
|
-
|
|
538
|
-
ASS uses &HAABBGGRR format (reversed RGB with alpha).
|
|
539
|
-
|
|
540
|
-
Args:
|
|
541
|
-
hex_color: Color in #RRGGBB format
|
|
542
|
-
|
|
543
|
-
Returns:
|
|
544
|
-
pysubs2.Color object
|
|
545
|
-
"""
|
|
546
|
-
# Remove # prefix if present
|
|
547
|
-
hex_color = hex_color.lstrip("#")
|
|
548
|
-
|
|
549
|
-
# Parse RGB
|
|
550
|
-
r = int(hex_color[0:2], 16)
|
|
551
|
-
g = int(hex_color[2:4], 16)
|
|
552
|
-
b = int(hex_color[4:6], 16)
|
|
553
|
-
|
|
554
|
-
return pysubs2.Color(r=r, g=g, b=b, a=0)
|
|
555
|
-
|
|
556
|
-
@staticmethod
|
|
557
|
-
def _build_karaoke_text(words: list, effect: str = "sweep") -> str:
|
|
558
|
-
"""Build karaoke tag text.
|
|
559
|
-
|
|
560
|
-
Args:
|
|
561
|
-
words: List of AlignmentItem objects
|
|
562
|
-
effect: Karaoke effect type ("sweep", "instant", "outline")
|
|
563
|
-
|
|
564
|
-
Returns:
|
|
565
|
-
Text with karaoke tags, e.g. "{\\kf45}Hello {\\kf55}world"
|
|
566
|
-
"""
|
|
567
|
-
tag_map = {"sweep": "kf", "instant": "k", "outline": "ko"}
|
|
568
|
-
tag = tag_map.get(effect, "kf")
|
|
569
|
-
|
|
570
|
-
parts = []
|
|
571
|
-
for word in words:
|
|
572
|
-
# Duration in centiseconds (multiply by 100)
|
|
573
|
-
centiseconds = int(word.duration * 100)
|
|
574
|
-
parts.append(f"{{\\{tag}{centiseconds}}}{word.symbol}")
|
|
575
|
-
|
|
576
|
-
return " ".join(parts)
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
@register_format("ssa")
|
|
580
|
-
class SSAFormat(ASSFormat):
|
|
581
|
-
"""SubStation Alpha format (predecessor to ASS).
|
|
582
|
-
|
|
583
|
-
Inherits ASS metadata preservation - SSA and ASS share the same structure.
|
|
584
|
-
"""
|
|
585
|
-
|
|
586
|
-
extensions = [".ssa"]
|
|
587
|
-
pysubs2_format = "ssa"
|
|
588
|
-
description = "SubStation Alpha - legacy format"
|
|
589
|
-
|
|
590
|
-
@classmethod
|
|
591
|
-
def to_bytes(
|
|
592
|
-
cls,
|
|
593
|
-
supervisions: List[Supervision],
|
|
594
|
-
include_speaker: bool = True,
|
|
595
|
-
fps: float = 25.0,
|
|
596
|
-
word_level: bool = False,
|
|
597
|
-
karaoke_config: Optional[KaraokeConfig] = None,
|
|
598
|
-
metadata: Optional[Dict] = None,
|
|
599
|
-
**kwargs,
|
|
600
|
-
) -> bytes:
|
|
601
|
-
"""Convert to SSA bytes with style preservation."""
|
|
602
|
-
from .base import expand_to_word_supervisions
|
|
603
|
-
|
|
604
|
-
if word_level and not (karaoke_config and karaoke_config.enabled):
|
|
605
|
-
supervisions = expand_to_word_supervisions(supervisions)
|
|
606
|
-
|
|
607
|
-
subs = cls._create_ass_file_with_metadata(metadata)
|
|
608
|
-
|
|
609
|
-
for sup in supervisions:
|
|
610
|
-
text = sup.text or ""
|
|
611
|
-
if cls._should_include_speaker(sup, include_speaker):
|
|
612
|
-
text = f"{sup.speaker} {text}"
|
|
613
|
-
event = cls._create_event_from_supervision(sup, text)
|
|
614
|
-
subs.append(event)
|
|
615
|
-
|
|
616
|
-
return subs.to_string(format_="ssa").encode("utf-8")
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
@register_format("sub")
|
|
620
|
-
class MicroDVDFormat(Pysubs2Format):
|
|
621
|
-
"""MicroDVD format (frame-based)."""
|
|
622
|
-
|
|
623
|
-
extensions = [".sub"]
|
|
624
|
-
pysubs2_format = "microdvd"
|
|
625
|
-
description = "MicroDVD - frame-based subtitle format"
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
@register_format("sami")
|
|
629
|
-
class SAMIFormat(Pysubs2Format):
|
|
630
|
-
"""SAMI (Synchronized Accessible Media Interchange) format."""
|
|
631
|
-
|
|
632
|
-
extensions = [".smi", ".sami"]
|
|
633
|
-
pysubs2_format = "sami"
|
|
634
|
-
description = "SAMI - Microsoft format for accessibility"
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
# Register alias for SMI extension
|
|
638
|
-
@register_format("smi")
|
|
639
|
-
class SMIFormat(SAMIFormat):
|
|
640
|
-
"""SMI format (alias for SAMI)."""
|
|
641
|
-
|
|
642
|
-
pass
|