lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
  32. lattifai-1.3.1.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,642 +0,0 @@
1
- """Standard subtitle formats using pysubs2 library.
2
-
3
- Handles: SRT, VTT, ASS, SSA, SUB (MicroDVD), SAMI/SMI
4
- """
5
-
6
- from pathlib import Path
7
- from typing import Dict, List, Optional
8
-
9
- import pysubs2
10
-
11
- from ...config.caption import CaptionStyle, KaraokeConfig
12
- from ..parsers.text_parser import normalize_text as normalize_text_fn
13
- from ..parsers.text_parser import parse_speaker_text
14
- from ..supervision import Supervision
15
- from . import register_format
16
- from .base import FormatHandler
17
-
18
-
19
- class Pysubs2Format(FormatHandler):
20
- """Base class for formats handled by pysubs2."""
21
-
22
- # Subclasses should set these
23
- pysubs2_format: str = ""
24
-
25
- @classmethod
26
- def read(
27
- cls,
28
- source,
29
- normalize_text: bool = True,
30
- **kwargs,
31
- ) -> List[Supervision]:
32
- """Read caption using pysubs2."""
33
- try:
34
- if cls.is_content(source):
35
- subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
36
- else:
37
- subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
38
- except Exception:
39
- # Fallback: auto-detect format
40
- if cls.is_content(source):
41
- subs = pysubs2.SSAFile.from_string(source)
42
- else:
43
- subs = pysubs2.load(str(source), encoding="utf-8")
44
-
45
- supervisions = []
46
- for event in subs.events:
47
- text = event.text
48
- if normalize_text:
49
- text = normalize_text_fn(text)
50
-
51
- speaker, text = parse_speaker_text(text)
52
-
53
- supervisions.append(
54
- Supervision(
55
- text=text,
56
- speaker=speaker or event.name or None,
57
- start=event.start / 1000.0 if event.start is not None else 0,
58
- duration=(event.end - event.start) / 1000.0 if event.end is not None else 0,
59
- )
60
- )
61
-
62
- return supervisions
63
-
64
- @classmethod
65
- def extract_metadata(cls, source, **kwargs) -> Dict[str, str]:
66
- """Extract metadata from VTT or SRT."""
67
- import re
68
- from pathlib import Path
69
-
70
- metadata = {}
71
- if cls.is_content(source):
72
- content = source[:4096]
73
- else:
74
- path = Path(str(source))
75
- if not path.exists():
76
- return {}
77
- try:
78
- with open(path, "r", encoding="utf-8") as f:
79
- content = f.read(4096)
80
- except Exception:
81
- return {}
82
-
83
- # WebVTT metadata extraction
84
- if cls.pysubs2_format == "vtt" or (isinstance(source, str) and source.startswith("WEBVTT")):
85
- lines = content.split("\n")
86
- for line in lines[:10]:
87
- line = line.strip()
88
- if line.startswith("Kind:"):
89
- metadata["kind"] = line.split(":", 1)[1].strip()
90
- elif line.startswith("Language:"):
91
- metadata["language"] = line.split(":", 1)[1].strip()
92
- elif line.startswith("NOTE"):
93
- match = re.search(r"NOTE\s+(\w+):\s*(.+)", line)
94
- if match:
95
- key, value = match.groups()
96
- metadata[key.lower()] = value.strip()
97
-
98
- # SRT doesn't have standard metadata, but check for BOM
99
- elif cls.pysubs2_format == "srt":
100
- if content.startswith("\ufeff"):
101
- metadata["encoding"] = "utf-8-sig"
102
-
103
- return metadata
104
-
105
- @classmethod
106
- def write(
107
- cls,
108
- supervisions: List[Supervision],
109
- output_path,
110
- include_speaker: bool = True,
111
- fps: float = 25.0,
112
- **kwargs,
113
- ) -> Path:
114
- """Write caption using pysubs2."""
115
- output_path = Path(output_path)
116
- content = cls.to_bytes(supervisions, include_speaker=include_speaker, fps=fps, **kwargs)
117
- output_path.write_bytes(content)
118
- return output_path
119
-
120
- @classmethod
121
- def to_bytes(
122
- cls,
123
- supervisions: List[Supervision],
124
- include_speaker: bool = True,
125
- fps: float = 25.0,
126
- word_level: bool = False,
127
- karaoke_config: Optional[KaraokeConfig] = None,
128
- **kwargs,
129
- ) -> bytes:
130
- """Convert to bytes using pysubs2.
131
-
132
- Args:
133
- supervisions: List of Supervision objects
134
- include_speaker: Whether to include speaker in output
135
- fps: Frames per second (for MicroDVD format)
136
- word_level: If True and alignment exists, output word-per-segment
137
- karaoke_config: Karaoke configuration. When provided with enabled=True,
138
- use karaoke styling (format-specific)
139
-
140
- Returns:
141
- Subtitle content as bytes
142
- """
143
- from .base import expand_to_word_supervisions
144
-
145
- # Check if karaoke is enabled
146
- karaoke_enabled = karaoke_config is not None and karaoke_config.enabled
147
-
148
- # Expand to word-per-segment if word_level=True and karaoke is not enabled
149
- if word_level and not karaoke_enabled:
150
- supervisions = expand_to_word_supervisions(supervisions)
151
-
152
- subs = pysubs2.SSAFile()
153
-
154
- for sup in supervisions:
155
- text = sup.text or ""
156
- if cls._should_include_speaker(sup, include_speaker):
157
- text = f"{sup.speaker} {text}"
158
-
159
- subs.append(
160
- pysubs2.SSAEvent(
161
- start=int(sup.start * 1000),
162
- end=int(sup.end * 1000),
163
- text=text,
164
- name=sup.speaker or "",
165
- )
166
- )
167
-
168
- # MicroDVD format requires framerate
169
- if cls.pysubs2_format == "microdvd":
170
- return subs.to_string(format_=cls.pysubs2_format, fps=fps).encode("utf-8")
171
-
172
- return subs.to_string(format_=cls.pysubs2_format).encode("utf-8")
173
-
174
-
175
- @register_format("srt")
176
- class SRTFormat(Pysubs2Format):
177
- """SRT (SubRip) format - the most widely used subtitle format."""
178
-
179
- extensions = [".srt"]
180
- pysubs2_format = "srt"
181
- description = "SubRip Subtitle format - universal compatibility"
182
-
183
- @classmethod
184
- def to_bytes(
185
- cls,
186
- supervisions: List[Supervision],
187
- include_speaker: bool = True,
188
- use_bom: bool = False,
189
- metadata: Optional[Dict] = None,
190
- **kwargs,
191
- ) -> bytes:
192
- """Generate SRT with proper formatting (comma for milliseconds).
193
-
194
- Args:
195
- supervisions: List of supervision segments
196
- include_speaker: Whether to include speaker in output
197
- use_bom: Whether to add BOM for Windows compatibility
198
- metadata: Optional metadata dict. If encoding is 'utf-8-sig', adds BOM.
199
- """
200
- content = super().to_bytes(supervisions, include_speaker=include_speaker, **kwargs)
201
-
202
- # Add BOM if requested or if original had BOM
203
- add_bom = use_bom
204
- if metadata and metadata.get("encoding") == "utf-8-sig":
205
- add_bom = True
206
-
207
- if add_bom:
208
- content = b"\xef\xbb\xbf" + content
209
-
210
- return content
211
-
212
-
213
- @register_format("ass")
214
- class ASSFormat(Pysubs2Format):
215
- """Advanced SubStation Alpha format with karaoke support."""
216
-
217
- extensions = [".ass"]
218
- pysubs2_format = "ass"
219
- description = "Advanced SubStation Alpha - rich styling support"
220
-
221
- @classmethod
222
- def read(
223
- cls,
224
- source,
225
- normalize_text: bool = True,
226
- **kwargs,
227
- ) -> List[Supervision]:
228
- """Read ASS format with style and event metadata preservation.
229
-
230
- Preserves ASS-specific event attributes in Supervision.custom:
231
- - ass_style: Style name reference
232
- - ass_layer: Layer number
233
- - ass_margin_l/r/v: Margin overrides
234
- - ass_effect: Effect string
235
- """
236
- try:
237
- if cls.is_content(source):
238
- subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
239
- else:
240
- subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
241
- except Exception:
242
- if cls.is_content(source):
243
- subs = pysubs2.SSAFile.from_string(source)
244
- else:
245
- subs = pysubs2.load(str(source), encoding="utf-8")
246
-
247
- supervisions = []
248
- for event in subs.events:
249
- text = event.text
250
- if normalize_text:
251
- text = normalize_text_fn(text)
252
-
253
- speaker, text = parse_speaker_text(text)
254
-
255
- # Preserve ASS-specific event attributes
256
- custom = {
257
- "ass_style": event.style,
258
- "ass_layer": event.layer,
259
- "ass_margin_l": event.marginl,
260
- "ass_margin_r": event.marginr,
261
- "ass_margin_v": event.marginv,
262
- "ass_effect": event.effect,
263
- }
264
-
265
- supervisions.append(
266
- Supervision(
267
- text=text,
268
- speaker=speaker or event.name or None,
269
- start=event.start / 1000.0 if event.start is not None else 0,
270
- duration=(event.end - event.start) / 1000.0 if event.end is not None else 0,
271
- custom=custom,
272
- )
273
- )
274
-
275
- return supervisions
276
-
277
- @classmethod
278
- def extract_metadata(cls, source, **kwargs) -> Dict:
279
- """Extract ASS global metadata including Script Info and Styles.
280
-
281
- Returns:
282
- Dict containing:
283
- - ass_info: Script Info section as dict
284
- - ass_styles: Style definitions as dict of dicts
285
- """
286
- try:
287
- if cls.is_content(source):
288
- subs = pysubs2.SSAFile.from_string(source, format_=cls.pysubs2_format)
289
- else:
290
- subs = pysubs2.load(str(source), encoding="utf-8", format_=cls.pysubs2_format)
291
- except Exception:
292
- return {}
293
-
294
- # Convert styles to serializable dict
295
- styles_dict = {}
296
- for name, style in subs.styles.items():
297
- styles_dict[name] = {
298
- "fontname": style.fontname,
299
- "fontsize": style.fontsize,
300
- "primarycolor": cls._color_to_str(style.primarycolor),
301
- "secondarycolor": cls._color_to_str(style.secondarycolor),
302
- "tertiarycolor": cls._color_to_str(style.tertiarycolor),
303
- "outlinecolor": cls._color_to_str(style.outlinecolor),
304
- "backcolor": cls._color_to_str(style.backcolor),
305
- "bold": style.bold,
306
- "italic": style.italic,
307
- "underline": style.underline,
308
- "strikeout": style.strikeout,
309
- "scalex": style.scalex,
310
- "scaley": style.scaley,
311
- "spacing": style.spacing,
312
- "angle": style.angle,
313
- "borderstyle": style.borderstyle,
314
- "outline": style.outline,
315
- "shadow": style.shadow,
316
- "alignment": style.alignment,
317
- "marginl": style.marginl,
318
- "marginr": style.marginr,
319
- "marginv": style.marginv,
320
- "alphalevel": style.alphalevel,
321
- "encoding": style.encoding,
322
- }
323
-
324
- return {
325
- "ass_info": dict(subs.info),
326
- "ass_styles": styles_dict,
327
- }
328
-
329
- @staticmethod
330
- def _color_to_str(color: pysubs2.Color) -> str:
331
- """Convert pysubs2.Color to ASS color string &HAABBGGRR."""
332
- return f"&H{color.a:02X}{color.b:02X}{color.g:02X}{color.r:02X}"
333
-
334
- @staticmethod
335
- def _str_to_color(color_str: str) -> pysubs2.Color:
336
- """Convert ASS color string &HAABBGGRR to pysubs2.Color."""
337
- color_str = color_str.lstrip("&H").lstrip("&h")
338
- if len(color_str) == 8:
339
- a = int(color_str[0:2], 16)
340
- b = int(color_str[2:4], 16)
341
- g = int(color_str[4:6], 16)
342
- r = int(color_str[6:8], 16)
343
- elif len(color_str) == 6:
344
- a = 0
345
- b = int(color_str[0:2], 16)
346
- g = int(color_str[2:4], 16)
347
- r = int(color_str[4:6], 16)
348
- else:
349
- return pysubs2.Color(r=255, g=255, b=255, a=0)
350
- return pysubs2.Color(r=r, g=g, b=b, a=a)
351
-
352
- @classmethod
353
- def to_bytes(
354
- cls,
355
- supervisions: List[Supervision],
356
- include_speaker: bool = True,
357
- fps: float = 25.0,
358
- word_level: bool = False,
359
- karaoke_config: Optional[KaraokeConfig] = None,
360
- metadata: Optional[Dict] = None,
361
- **kwargs,
362
- ) -> bytes:
363
- """Convert to ASS bytes with style preservation and optional karaoke tags.
364
-
365
- Args:
366
- supervisions: List of supervision segments
367
- include_speaker: Whether to include speaker in output
368
- fps: Frames per second (not used for ASS)
369
- word_level: If True and alignment exists, output word-per-segment or karaoke
370
- karaoke_config: Karaoke configuration. When provided with enabled=True,
371
- generate karaoke tags
372
- metadata: Optional metadata dict containing ass_info and ass_styles
373
- to restore original ASS formatting
374
-
375
- Returns:
376
- ASS content as bytes
377
- """
378
- from .base import expand_to_word_supervisions
379
-
380
- karaoke_enabled = karaoke_config is not None and karaoke_config.enabled
381
-
382
- # Expand to word-per-segment if word_level=True and karaoke is not enabled
383
- if word_level and not karaoke_enabled:
384
- supervisions = expand_to_word_supervisions(supervisions)
385
-
386
- # Create ASS file and restore global styles from metadata
387
- subs = cls._create_ass_file_with_metadata(metadata)
388
-
389
- # Add karaoke style if needed
390
- if karaoke_enabled:
391
- subs.styles["Karaoke"] = cls._create_karaoke_style(karaoke_config.style)
392
-
393
- for sup in supervisions:
394
- alignment = getattr(sup, "alignment", None)
395
- word_items = alignment.get("word") if alignment else None
396
-
397
- # Karaoke mode with word alignment
398
- if word_level and karaoke_enabled and word_items:
399
- karaoke_text = cls._build_karaoke_text(word_items, karaoke_config.effect)
400
- event_start = int(word_items[0].start * 1000)
401
- event_end = int(word_items[-1].end * 1000)
402
-
403
- subs.append(
404
- pysubs2.SSAEvent(
405
- start=event_start,
406
- end=event_end,
407
- text=karaoke_text,
408
- style="Karaoke",
409
- )
410
- )
411
- else:
412
- # Standard mode: restore custom attributes from supervision
413
- text = sup.text or ""
414
- if cls._should_include_speaker(sup, include_speaker):
415
- text = f"{sup.speaker} {text}"
416
-
417
- event = cls._create_event_from_supervision(sup, text)
418
- subs.append(event)
419
-
420
- return subs.to_string(format_="ass").encode("utf-8")
421
-
422
- @classmethod
423
- def _create_ass_file_with_metadata(cls, metadata: Optional[Dict]) -> pysubs2.SSAFile:
424
- """Create SSAFile and restore global styles from metadata.
425
-
426
- Args:
427
- metadata: Dict containing ass_info and ass_styles
428
-
429
- Returns:
430
- pysubs2.SSAFile with restored styles
431
- """
432
- subs = pysubs2.SSAFile()
433
-
434
- if not metadata:
435
- return subs
436
-
437
- # Restore Script Info
438
- if "ass_info" in metadata:
439
- subs.info.update(metadata["ass_info"])
440
-
441
- # Restore Styles
442
- if "ass_styles" in metadata:
443
- for name, style_dict in metadata["ass_styles"].items():
444
- subs.styles[name] = cls._dict_to_style(style_dict)
445
-
446
- return subs
447
-
448
- @classmethod
449
- def _dict_to_style(cls, style_dict: Dict) -> pysubs2.SSAStyle:
450
- """Convert style dict back to pysubs2.SSAStyle."""
451
- return pysubs2.SSAStyle(
452
- fontname=style_dict.get("fontname", "Arial"),
453
- fontsize=style_dict.get("fontsize", 20.0),
454
- primarycolor=cls._str_to_color(style_dict.get("primarycolor", "&H00FFFFFF")),
455
- secondarycolor=cls._str_to_color(style_dict.get("secondarycolor", "&H000000FF")),
456
- tertiarycolor=cls._str_to_color(style_dict.get("tertiarycolor", "&H00000000")),
457
- outlinecolor=cls._str_to_color(style_dict.get("outlinecolor", "&H00000000")),
458
- backcolor=cls._str_to_color(style_dict.get("backcolor", "&H00000000")),
459
- bold=style_dict.get("bold", False),
460
- italic=style_dict.get("italic", False),
461
- underline=style_dict.get("underline", False),
462
- strikeout=style_dict.get("strikeout", False),
463
- scalex=style_dict.get("scalex", 100.0),
464
- scaley=style_dict.get("scaley", 100.0),
465
- spacing=style_dict.get("spacing", 0.0),
466
- angle=style_dict.get("angle", 0.0),
467
- borderstyle=style_dict.get("borderstyle", 1),
468
- outline=style_dict.get("outline", 2.0),
469
- shadow=style_dict.get("shadow", 2.0),
470
- alignment=pysubs2.Alignment(style_dict.get("alignment", 2)),
471
- marginl=style_dict.get("marginl", 10),
472
- marginr=style_dict.get("marginr", 10),
473
- marginv=style_dict.get("marginv", 10),
474
- alphalevel=style_dict.get("alphalevel", 0),
475
- encoding=style_dict.get("encoding", 1),
476
- )
477
-
478
- @classmethod
479
- def _create_event_from_supervision(cls, sup: Supervision, text: str) -> pysubs2.SSAEvent:
480
- """Create SSAEvent from Supervision, restoring custom attributes.
481
-
482
- Args:
483
- sup: Supervision with optional custom dict containing ass_* attributes
484
- text: Processed text content
485
-
486
- Returns:
487
- pysubs2.SSAEvent with restored attributes
488
- """
489
- custom = getattr(sup, "custom", None) or {}
490
-
491
- return pysubs2.SSAEvent(
492
- start=int(sup.start * 1000),
493
- end=int(sup.end * 1000),
494
- text=text,
495
- name=sup.speaker or "",
496
- style=custom.get("ass_style", "Default"),
497
- layer=custom.get("ass_layer", 0),
498
- marginl=custom.get("ass_margin_l", 0),
499
- marginr=custom.get("ass_margin_r", 0),
500
- marginv=custom.get("ass_margin_v", 0),
501
- effect=custom.get("ass_effect", ""),
502
- )
503
-
504
- @classmethod
505
- def _create_karaoke_style(cls, style: CaptionStyle) -> pysubs2.SSAStyle:
506
- """Create pysubs2 SSAStyle from CaptionStyle config.
507
-
508
- Args:
509
- style: KaraokeStyle configuration
510
-
511
- Returns:
512
- pysubs2.SSAStyle object
513
- """
514
- # Convert int alignment to pysubs2.Alignment enum
515
- alignment = pysubs2.Alignment(style.alignment)
516
-
517
- return pysubs2.SSAStyle(
518
- fontname=style.font_name,
519
- fontsize=style.font_size,
520
- primarycolor=cls._hex_to_ass_color(style.primary_color),
521
- secondarycolor=cls._hex_to_ass_color(style.secondary_color),
522
- outlinecolor=cls._hex_to_ass_color(style.outline_color),
523
- backcolor=cls._hex_to_ass_color(style.back_color),
524
- bold=style.bold,
525
- italic=style.italic,
526
- outline=style.outline_width,
527
- shadow=style.shadow_depth,
528
- alignment=alignment,
529
- marginl=style.margin_l,
530
- marginr=style.margin_r,
531
- marginv=style.margin_v,
532
- )
533
-
534
- @staticmethod
535
- def _hex_to_ass_color(hex_color: str) -> pysubs2.Color:
536
- """Convert #RRGGBB to pysubs2 Color.
537
-
538
- ASS uses &HAABBGGRR format (reversed RGB with alpha).
539
-
540
- Args:
541
- hex_color: Color in #RRGGBB format
542
-
543
- Returns:
544
- pysubs2.Color object
545
- """
546
- # Remove # prefix if present
547
- hex_color = hex_color.lstrip("#")
548
-
549
- # Parse RGB
550
- r = int(hex_color[0:2], 16)
551
- g = int(hex_color[2:4], 16)
552
- b = int(hex_color[4:6], 16)
553
-
554
- return pysubs2.Color(r=r, g=g, b=b, a=0)
555
-
556
- @staticmethod
557
- def _build_karaoke_text(words: list, effect: str = "sweep") -> str:
558
- """Build karaoke tag text.
559
-
560
- Args:
561
- words: List of AlignmentItem objects
562
- effect: Karaoke effect type ("sweep", "instant", "outline")
563
-
564
- Returns:
565
- Text with karaoke tags, e.g. "{\\kf45}Hello {\\kf55}world"
566
- """
567
- tag_map = {"sweep": "kf", "instant": "k", "outline": "ko"}
568
- tag = tag_map.get(effect, "kf")
569
-
570
- parts = []
571
- for word in words:
572
- # Duration in centiseconds (multiply by 100)
573
- centiseconds = int(word.duration * 100)
574
- parts.append(f"{{\\{tag}{centiseconds}}}{word.symbol}")
575
-
576
- return " ".join(parts)
577
-
578
-
579
- @register_format("ssa")
580
- class SSAFormat(ASSFormat):
581
- """SubStation Alpha format (predecessor to ASS).
582
-
583
- Inherits ASS metadata preservation - SSA and ASS share the same structure.
584
- """
585
-
586
- extensions = [".ssa"]
587
- pysubs2_format = "ssa"
588
- description = "SubStation Alpha - legacy format"
589
-
590
- @classmethod
591
- def to_bytes(
592
- cls,
593
- supervisions: List[Supervision],
594
- include_speaker: bool = True,
595
- fps: float = 25.0,
596
- word_level: bool = False,
597
- karaoke_config: Optional[KaraokeConfig] = None,
598
- metadata: Optional[Dict] = None,
599
- **kwargs,
600
- ) -> bytes:
601
- """Convert to SSA bytes with style preservation."""
602
- from .base import expand_to_word_supervisions
603
-
604
- if word_level and not (karaoke_config and karaoke_config.enabled):
605
- supervisions = expand_to_word_supervisions(supervisions)
606
-
607
- subs = cls._create_ass_file_with_metadata(metadata)
608
-
609
- for sup in supervisions:
610
- text = sup.text or ""
611
- if cls._should_include_speaker(sup, include_speaker):
612
- text = f"{sup.speaker} {text}"
613
- event = cls._create_event_from_supervision(sup, text)
614
- subs.append(event)
615
-
616
- return subs.to_string(format_="ssa").encode("utf-8")
617
-
618
-
619
- @register_format("sub")
620
- class MicroDVDFormat(Pysubs2Format):
621
- """MicroDVD format (frame-based)."""
622
-
623
- extensions = [".sub"]
624
- pysubs2_format = "microdvd"
625
- description = "MicroDVD - frame-based subtitle format"
626
-
627
-
628
- @register_format("sami")
629
- class SAMIFormat(Pysubs2Format):
630
- """SAMI (Synchronized Accessible Media Interchange) format."""
631
-
632
- extensions = [".smi", ".sami"]
633
- pysubs2_format = "sami"
634
- description = "SAMI - Microsoft format for accessibility"
635
-
636
-
637
- # Register alias for SMI extension
638
- @register_format("smi")
639
- class SMIFormat(SAMIFormat):
640
- """SMI format (alias for SAMI)."""
641
-
642
- pass