lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
  32. lattifai-1.3.1.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,652 +0,0 @@
1
- """TTML/IMSC1/EBU-TT-D format handler.
2
-
3
- TTML (Timed Text Markup Language) is a W3C standard used by:
4
- - Netflix (IMSC1 profile)
5
- - European broadcasters (EBU-TT-D profile)
6
- - IMF workflows
7
- - Apple Music (iTunes timing)
8
- """
9
-
10
- import xml.etree.ElementTree as ET
11
- from dataclasses import dataclass, field
12
- from pathlib import Path
13
- from typing import Dict, List, Optional, Union
14
- from xml.dom import minidom
15
-
16
- from lhotse.supervision import AlignmentItem
17
- from lhotse.utils import Pathlike
18
-
19
- from ...config.caption import KaraokeConfig
20
- from ..supervision import Supervision
21
- from . import register_format
22
- from .base import FormatHandler
23
-
24
- # XML namespaces
25
- TTML_NS = "http://www.w3.org/ns/ttml"
26
- TTML_STYLE_NS = "http://www.w3.org/ns/ttml#styling"
27
- TTML_PARAM_NS = "http://www.w3.org/ns/ttml#parameter"
28
- XML_NS = "http://www.w3.org/XML/1998/namespace"
29
- ITUNES_NS = "http://music.apple.com/lyric-ttml-internal"
30
-
31
-
32
- @dataclass
33
- class TTMLStyle:
34
- """Text style configuration for TTML."""
35
-
36
- font_family: str = "proportionalSansSerif"
37
- font_size: str = "100%"
38
- color: str = "#FFFFFF"
39
- background_color: Optional[str] = "#000000C0"
40
- text_align: str = "center"
41
- display_align: str = "after"
42
-
43
-
44
- @dataclass
45
- class TTMLRegion:
46
- """Region definition for TTML positioning."""
47
-
48
- id: str = "bottom"
49
- origin: str = "10% 80%"
50
- extent: str = "80% 15%"
51
-
52
-
53
- @dataclass
54
- class TTMLConfig:
55
- """Configuration for TTML export."""
56
-
57
- profile: str = "imsc1" # "imsc1", "ebu-tt-d", or "basic"
58
- default_style: TTMLStyle = field(default_factory=TTMLStyle)
59
- default_region: TTMLRegion = field(default_factory=TTMLRegion)
60
- speaker_regions: Dict[str, TTMLRegion] = field(default_factory=dict)
61
- speaker_styles: Dict[str, TTMLStyle] = field(default_factory=dict)
62
- language: str = "en"
63
-
64
-
65
- class TTMLFormatBase(FormatHandler):
66
- """Base TTML format handler (reader/writer)."""
67
-
68
- @classmethod
69
- def _parse_ttml_time(cls, time_str: str) -> float:
70
- """Parse TTML time string to seconds.
71
-
72
- Supports:
73
- - Clock time: HH:MM:SS.mmm or HH:MM:SS:frames
74
- - Offset time: 10s, 10.5s, 500ms, 100f
75
- """
76
- if not time_str:
77
- return 0.0
78
-
79
- time_str = time_str.strip()
80
-
81
- # Handle offset time
82
- if time_str.endswith("ms"):
83
- return float(time_str[:-2]) / 1000.0
84
- if time_str.endswith("s"):
85
- return float(time_str[:-1])
86
- if time_str.endswith("f"):
87
- # Assuming default 30fps if frame count provided without explicit frame rate
88
- # This is imprecise but a fallback
89
- return float(time_str[:-1]) / 30.0
90
-
91
- # Handle clock time: HH:MM:SS.mmm or HH:MM:SS:fff
92
- parts = time_str.split(":")
93
- if len(parts) >= 3:
94
- hours = float(parts[0])
95
- minutes = float(parts[1])
96
-
97
- # Check for seconds and frames/milliseconds
98
- last_part = parts[2]
99
- seconds = 0.0
100
-
101
- if "." in last_part:
102
- seconds = float(last_part)
103
- elif len(parts) == 4:
104
- # HH:MM:SS:FF
105
- seconds = float(parts[2])
106
- frames = float(parts[3])
107
- # Assume 30fps for HH:MM:SS:FF standard if not specified
108
- seconds += frames / 30.0
109
- else:
110
- seconds = float(last_part)
111
-
112
- return hours * 3600 + minutes * 60 + seconds
113
-
114
- # Fallback: try parsing as simple float seconds
115
- try:
116
- return float(time_str)
117
- except ValueError:
118
- return 0.0
119
-
120
- @classmethod
121
- def extract_metadata(cls, source: Union[Pathlike, str], **kwargs) -> Dict:
122
- """Extract TTML metadata including profile, language, and timing mode.
123
-
124
- Returns:
125
- Dict containing:
126
- - ttml_profile: Profile URI (imsc1, ebu-tt-d, or basic)
127
- - ttml_language: Language code from xml:lang
128
- - ttml_timing: iTunes timing mode if present
129
- """
130
- if isinstance(source, (str, Path)) and not cls.is_content(source):
131
- try:
132
- with open(source, "r", encoding="utf-8") as f:
133
- content = f.read()
134
- except Exception:
135
- return {}
136
- else:
137
- content = str(source)
138
-
139
- metadata = {}
140
-
141
- try:
142
- # Don't strip namespaces for metadata extraction
143
- root = ET.fromstring(content)
144
-
145
- # Extract language
146
- lang = root.get(f"{{{XML_NS}}}lang") or root.get("lang")
147
- if lang:
148
- metadata["ttml_language"] = lang
149
-
150
- # Extract profile
151
- profile = root.get(f"{{{TTML_PARAM_NS}}}profile") or root.get("profile")
152
- if profile:
153
- if "imsc1" in profile.lower():
154
- metadata["ttml_profile"] = "imsc1"
155
- elif "ebu" in profile.lower():
156
- metadata["ttml_profile"] = "ebu-tt-d"
157
- else:
158
- metadata["ttml_profile"] = "basic"
159
-
160
- # Extract iTunes timing mode if present
161
- timing = root.get(f"{{{ITUNES_NS}}}timing")
162
- if timing:
163
- metadata["ttml_timing"] = timing
164
-
165
- except ET.ParseError:
166
- pass
167
-
168
- return metadata
169
-
170
- @classmethod
171
- def read(
172
- cls,
173
- source: Union[Pathlike, str],
174
- normalize_text: bool = True,
175
- **kwargs,
176
- ) -> List[Supervision]:
177
- """Read TTML content and return supervisions."""
178
- if isinstance(source, (str, Path)) and not cls.is_content(source):
179
- with open(source, "r", encoding="utf-8") as f:
180
- content = f.read()
181
- else:
182
- content = str(source)
183
-
184
- # Parse XML
185
- try:
186
- # Strip namespaces for easier parsing
187
- # This is a bit hacky but robust against different namespace prefixes
188
- import re
189
-
190
- content = re.sub(r' xmlns="[^"]+"', "", content, count=1)
191
- content = re.sub(r' xmlns:t?ts="[^"]+"', "", content)
192
- content = re.sub(r' xmlns:t?tp="[^"]+"', "", content)
193
- # Also strip the namespace prefixes from attributes since we removed definitions
194
- content = re.sub(r" (t?ts|t?tp):", " ", content)
195
-
196
- root = ET.fromstring(content)
197
- except ET.ParseError:
198
- return []
199
-
200
- supervisions = []
201
-
202
- # Find body/div/p structure
203
- body = root.find("body") or root.find(f"{{{TTML_NS}}}body")
204
- if body is None:
205
- return []
206
-
207
- # Traverse all divs and p tags
208
- # Note: TTML structure can be nested div->div->p
209
- for p in body.iter("p"):
210
- # Or with explicit namespace if stripping failed
211
- # for p in body.iter(f"{{{TTML_NS}}}p"):
212
-
213
- begin_str = p.get("begin")
214
- end_str = p.get("end")
215
- dur_str = p.get("dur")
216
-
217
- if not begin_str:
218
- continue
219
-
220
- start = cls._parse_ttml_time(begin_str)
221
-
222
- if end_str:
223
- end = cls._parse_ttml_time(end_str)
224
- duration = end - start
225
- elif dur_str:
226
- duration = cls._parse_ttml_time(dur_str)
227
- else:
228
- duration = 0.0
229
-
230
- # Extract text and potential word-level spans
231
- alignment = None
232
- text_parts = []
233
- word_items = []
234
-
235
- # Text directly in p
236
- if p.text and p.text.strip():
237
- text_parts.append(p.text.strip())
238
-
239
- # Child spans
240
- for child in p:
241
- if child.tag.endswith("span"):
242
- span_text = child.text.strip() if child.text else ""
243
- if not span_text:
244
- pass
245
-
246
- # Check for timing on span (word-level or phrase-level)
247
- span_begin = child.get("begin")
248
- span_end = child.get("end")
249
-
250
- if span_begin and (span_end or child.get("dur")):
251
- # It's a timed span
252
- s_start = cls._parse_ttml_time(span_begin)
253
- if span_end:
254
- s_end = cls._parse_ttml_time(span_end)
255
- s_dur = s_end - s_start
256
- else:
257
- s_dur = cls._parse_ttml_time(child.get("dur"))
258
-
259
- # If start is relative to p? TTML spec says absolute usually unless offset
260
- # We assume absolute for now as per simple profile
261
-
262
- word_items.append(AlignmentItem(symbol=span_text, start=s_start, duration=s_dur))
263
- text_parts.append(span_text)
264
- else:
265
- # Just styled text
266
- text_parts.append(span_text)
267
-
268
- # Tail text after span
269
- if child.tail and child.tail.strip():
270
- text_parts.append(child.tail.strip())
271
-
272
- full_text = " ".join(text_parts).strip()
273
-
274
- if word_items:
275
- alignment = {"word": word_items}
276
- # Update line timing based on words if p timing was missing/zero
277
- if duration <= 0:
278
- start = word_items[0].start
279
- end = word_items[-1].start + word_items[-1].duration
280
- duration = end - start
281
-
282
- if full_text:
283
- supervisions.append(
284
- Supervision(
285
- id=p.get("id", ""),
286
- recording_id="ttml_import",
287
- start=start,
288
- duration=duration,
289
- text=full_text,
290
- alignment=alignment,
291
- speaker=p.get("agent") or p.get(f"{{{TTML_PARAM_NS}}}agent"), # Metadata agent
292
- )
293
- )
294
-
295
- return sorted(supervisions, key=lambda s: s.start)
296
-
297
- @classmethod
298
- def _seconds_to_ttml_time(cls, seconds: float) -> str:
299
- """Convert seconds to TTML time format (HH:MM:SS.mmm)."""
300
- if seconds < 0:
301
- seconds = 0
302
- hours = int(seconds // 3600)
303
- minutes = int((seconds % 3600) // 60)
304
- secs = seconds % 60
305
- return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
306
-
307
- @classmethod
308
- def _create_style_element(cls, parent: ET.Element, style_id: str, style: TTMLStyle) -> ET.Element:
309
- """Create a style element."""
310
- style_elem = ET.SubElement(parent, f"{{{TTML_NS}}}style")
311
- style_elem.set(f"{{{XML_NS}}}id", style_id)
312
- style_elem.set(f"{{{TTML_STYLE_NS}}}fontFamily", style.font_family)
313
- style_elem.set(f"{{{TTML_STYLE_NS}}}fontSize", style.font_size)
314
- style_elem.set(f"{{{TTML_STYLE_NS}}}color", style.color)
315
- style_elem.set(f"{{{TTML_STYLE_NS}}}textAlign", style.text_align)
316
- style_elem.set(f"{{{TTML_STYLE_NS}}}displayAlign", style.display_align)
317
- if style.background_color:
318
- style_elem.set(f"{{{TTML_STYLE_NS}}}backgroundColor", style.background_color)
319
- return style_elem
320
-
321
- @classmethod
322
- def _create_region_element(cls, parent: ET.Element, region: TTMLRegion) -> ET.Element:
323
- """Create a region element."""
324
- region_elem = ET.SubElement(parent, f"{{{TTML_NS}}}region")
325
- region_elem.set(f"{{{XML_NS}}}id", region.id)
326
- region_elem.set(f"{{{TTML_STYLE_NS}}}origin", region.origin)
327
- region_elem.set(f"{{{TTML_STYLE_NS}}}extent", region.extent)
328
- return region_elem
329
-
330
- @classmethod
331
- def _build_ttml(
332
- cls,
333
- supervisions: List[Supervision],
334
- config: TTMLConfig,
335
- include_speaker: bool = True,
336
- word_level: bool = False,
337
- karaoke_config: Optional[KaraokeConfig] = None,
338
- ) -> ET.Element:
339
- """Build TTML document structure.
340
-
341
- Args:
342
- supervisions: List of supervisions to convert
343
- config: TTML configuration
344
- include_speaker: Whether to include speaker names
345
- word_level: Whether to output word-level timing
346
- karaoke_config: Karaoke configuration. When provided with enabled=True,
347
- use span-based karaoke; otherwise use p-per-word
348
- """
349
- from .base import expand_to_word_supervisions
350
-
351
- # Check if karaoke is enabled
352
- karaoke_enabled = karaoke_config is not None and karaoke_config.enabled
353
-
354
- # If word_level=True and karaoke is not enabled, expand to word-per-paragraph
355
- if word_level and not karaoke_enabled:
356
- supervisions = expand_to_word_supervisions(supervisions)
357
-
358
- ET.register_namespace("", TTML_NS)
359
- ET.register_namespace("tts", TTML_STYLE_NS)
360
- ET.register_namespace("ttp", TTML_PARAM_NS)
361
- ET.register_namespace("xml", XML_NS)
362
-
363
- # Register iTunes namespace if karaoke mode is enabled
364
- if word_level and karaoke_enabled:
365
- ET.register_namespace("itunes", ITUNES_NS)
366
-
367
- root = ET.Element(
368
- f"{{{TTML_NS}}}tt",
369
- attrib={
370
- f"{{{XML_NS}}}lang": config.language,
371
- f"{{{TTML_PARAM_NS}}}timeBase": "media",
372
- },
373
- )
374
-
375
- if config.profile == "imsc1":
376
- root.set(f"{{{TTML_PARAM_NS}}}profile", "http://www.w3.org/ns/ttml/profile/imsc1/text")
377
- elif config.profile == "ebu-tt-d":
378
- root.set(f"{{{TTML_PARAM_NS}}}profile", "urn:ebu:tt:distribution:2014-01")
379
-
380
- # Add iTunes timing attribute for karaoke mode
381
- if word_level and karaoke_enabled:
382
- timing_mode = karaoke_config.ttml_timing_mode
383
- root.set(f"{{{ITUNES_NS}}}timing", timing_mode)
384
-
385
- # Head section
386
- head = ET.SubElement(root, f"{{{TTML_NS}}}head")
387
- styling = ET.SubElement(head, f"{{{TTML_NS}}}styling")
388
- cls._create_style_element(styling, "default", config.default_style)
389
-
390
- for speaker, style in config.speaker_styles.items():
391
- style_id = f"speaker_{speaker.replace(' ', '_')}"
392
- cls._create_style_element(styling, style_id, style)
393
-
394
- layout = ET.SubElement(head, f"{{{TTML_NS}}}layout")
395
- cls._create_region_element(layout, config.default_region)
396
-
397
- for speaker, region in config.speaker_regions.items():
398
- cls._create_region_element(layout, region)
399
-
400
- # Body section
401
- body = ET.SubElement(root, f"{{{TTML_NS}}}body")
402
- div = ET.SubElement(body, f"{{{TTML_NS}}}div")
403
-
404
- for sup in supervisions:
405
- # Check if karaoke mode should be used for this supervision
406
- has_word_alignment = (
407
- word_level
408
- and karaoke_enabled
409
- and sup.alignment
410
- and "word" in sup.alignment
411
- and len(sup.alignment["word"]) > 0
412
- )
413
-
414
- # Use word timestamps for timing when available (more accurate)
415
- if has_word_alignment:
416
- word_items = sup.alignment["word"]
417
- begin = cls._seconds_to_ttml_time(word_items[0].start)
418
- end = cls._seconds_to_ttml_time(word_items[-1].end)
419
- else:
420
- begin = cls._seconds_to_ttml_time(sup.start)
421
- end = cls._seconds_to_ttml_time(sup.end)
422
-
423
- p = ET.SubElement(div, f"{{{TTML_NS}}}p")
424
- p.set("begin", begin)
425
- p.set("end", end)
426
-
427
- if sup.speaker and sup.speaker in config.speaker_regions:
428
- p.set("region", config.speaker_regions[sup.speaker].id)
429
- else:
430
- p.set("region", config.default_region.id)
431
-
432
- if sup.speaker and sup.speaker in config.speaker_styles:
433
- style_id = f"speaker_{sup.speaker.replace(' ', '_')}"
434
- p.set("style", style_id)
435
- else:
436
- p.set("style", "default")
437
-
438
- include_this_speaker = cls._should_include_speaker(sup, include_speaker)
439
-
440
- if has_word_alignment:
441
- # Karaoke mode: create span for each word with timing
442
- for i, item in enumerate(word_items):
443
- span = ET.SubElement(p, f"{{{TTML_NS}}}span")
444
- span.set("begin", cls._seconds_to_ttml_time(item.start))
445
- span.set("end", cls._seconds_to_ttml_time(item.start + item.duration))
446
- span.text = item.symbol
447
- # Add space between words (except after last word)
448
- if i < len(word_items) - 1:
449
- span.tail = " "
450
- elif include_this_speaker and config.profile != "basic":
451
- span = ET.SubElement(p, f"{{{TTML_NS}}}span")
452
- span.set(f"{{{TTML_STYLE_NS}}}fontWeight", "bold")
453
- span.text = f"{sup.speaker} "
454
- span.tail = sup.text.strip() if sup.text else ""
455
- else:
456
- p.text = sup.text.strip() if sup.text else ""
457
-
458
- return root
459
-
460
- @classmethod
461
- def _prettify_xml(cls, element: ET.Element) -> str:
462
- """Convert XML element to pretty-printed string."""
463
- rough_string = ET.tostring(element, encoding="unicode")
464
- reparsed = minidom.parseString(rough_string)
465
- pretty = reparsed.toprettyxml(indent=" ")
466
- lines = [line for line in pretty.split("\n") if line.strip()]
467
- return "\n".join(lines)
468
-
469
-
470
- @register_format("ttml")
471
- class TTMLFormat(TTMLFormatBase):
472
- """Standard TTML format."""
473
-
474
- extensions = [".ttml", ".xml"]
475
- description = "Timed Text Markup Language - W3C standard"
476
-
477
- @classmethod
478
- def write(
479
- cls,
480
- supervisions: List[Supervision],
481
- output_path,
482
- include_speaker: bool = True,
483
- config: Optional[TTMLConfig] = None,
484
- word_level: bool = False,
485
- karaoke_config: Optional[KaraokeConfig] = None,
486
- **kwargs,
487
- ) -> Path:
488
- """Write TTML format.
489
-
490
- Args:
491
- supervisions: List of supervisions to write
492
- output_path: Output file path
493
- include_speaker: Whether to include speaker names
494
- config: TTML configuration
495
- word_level: Whether to output word-level timing
496
- karaoke_config: Karaoke configuration. When provided with enabled=True,
497
- use span-based karaoke; otherwise use p-per-word
498
- """
499
- if config is None:
500
- config = TTMLConfig()
501
-
502
- output_path = Path(output_path)
503
- if output_path.suffix.lower() not in [".ttml", ".xml"]:
504
- output_path = output_path.with_suffix(".ttml")
505
-
506
- root = cls._build_ttml(
507
- supervisions,
508
- config,
509
- include_speaker=include_speaker,
510
- word_level=word_level,
511
- karaoke_config=karaoke_config,
512
- )
513
- xml_content = cls._prettify_xml(root)
514
-
515
- output_path.write_text(xml_content, encoding="utf-8")
516
- return output_path
517
-
518
- @classmethod
519
- def to_bytes(
520
- cls,
521
- supervisions: List[Supervision],
522
- include_speaker: bool = True,
523
- config: Optional[TTMLConfig] = None,
524
- word_level: bool = False,
525
- karaoke_config: Optional[KaraokeConfig] = None,
526
- metadata: Optional[Dict] = None,
527
- **kwargs,
528
- ) -> bytes:
529
- """Convert to TTML format bytes.
530
-
531
- Args:
532
- supervisions: List of supervisions to convert
533
- include_speaker: Whether to include speaker names
534
- config: TTML configuration
535
- word_level: Whether to output word-level timing
536
- karaoke_config: Karaoke configuration. When provided with enabled=True,
537
- use span-based karaoke; otherwise use p-per-word
538
- metadata: Optional metadata dict containing ttml_* keys to restore
539
- """
540
- if config is None:
541
- config = TTMLConfig()
542
-
543
- # Apply metadata to config if available
544
- if metadata:
545
- if metadata.get("ttml_language"):
546
- config.language = metadata["ttml_language"]
547
- if metadata.get("ttml_profile"):
548
- config.profile = metadata["ttml_profile"]
549
-
550
- root = cls._build_ttml(
551
- supervisions,
552
- config,
553
- include_speaker=include_speaker,
554
- word_level=word_level,
555
- karaoke_config=karaoke_config,
556
- )
557
- xml_content = cls._prettify_xml(root)
558
- return xml_content.encode("utf-8")
559
-
560
- @classmethod
561
- def write_imsc1(
562
- cls,
563
- supervisions: List[Supervision],
564
- output_path,
565
- language: str = "en",
566
- **kwargs,
567
- ) -> Path:
568
- """Convenience method to write IMSC1 format."""
569
- config = TTMLConfig(profile="imsc1", language=language)
570
- return cls.write(supervisions, output_path, config=config, **kwargs)
571
-
572
- @classmethod
573
- def write_ebu_tt_d(
574
- cls,
575
- supervisions: List[Supervision],
576
- output_path,
577
- language: str = "en",
578
- **kwargs,
579
- ) -> Path:
580
- """Convenience method to write EBU-TT-D format."""
581
- config = TTMLConfig(profile="ebu-tt-d", language=language)
582
- return cls.write(supervisions, output_path, config=config, **kwargs)
583
-
584
-
585
- @register_format("imsc1")
586
- class IMSC1Format(TTMLFormatBase):
587
- """IMSC1 format - Netflix/streaming profile."""
588
-
589
- extensions = [".ttml"]
590
- description = "IMSC1 - Netflix/streaming TTML profile"
591
-
592
- @classmethod
593
- def write(
594
- cls,
595
- supervisions: List[Supervision],
596
- output_path,
597
- include_speaker: bool = True,
598
- language: str = "en",
599
- **kwargs,
600
- ) -> Path:
601
- """Write IMSC1 format."""
602
- config = TTMLConfig(profile="imsc1", language=language)
603
- return TTMLFormat.write(supervisions, output_path, include_speaker, config, **kwargs)
604
-
605
- @classmethod
606
- def to_bytes(
607
- cls,
608
- supervisions: List[Supervision],
609
- include_speaker: bool = True,
610
- language: str = "en",
611
- **kwargs,
612
- ) -> bytes:
613
- """Convert to IMSC1 format bytes."""
614
- config = TTMLConfig(profile="imsc1", language=language)
615
- return TTMLFormat.to_bytes(supervisions, include_speaker, config, **kwargs)
616
-
617
-
618
- @register_format("ebu_tt_d")
619
- class EBUTD_Format(TTMLFormatBase):
620
- """EBU-TT-D format - European broadcast profile."""
621
-
622
- extensions = [".ttml"]
623
- description = "EBU-TT-D - European broadcast TTML profile"
624
-
625
- @classmethod
626
- def write(
627
- cls,
628
- supervisions: List[Supervision],
629
- output_path,
630
- include_speaker: bool = True,
631
- language: str = "en",
632
- **kwargs,
633
- ) -> Path:
634
- """Write EBU-TT-D format."""
635
- config = TTMLConfig(profile="ebu-tt-d", language=language)
636
- return TTMLFormat.write(supervisions, output_path, include_speaker, config, **kwargs)
637
-
638
- @classmethod
639
- def to_bytes(
640
- cls,
641
- supervisions: List[Supervision],
642
- include_speaker: bool = True,
643
- language: str = "en",
644
- **kwargs,
645
- ) -> bytes:
646
- """Convert to EBU-TT-D format bytes."""
647
- config = TTMLConfig(profile="ebu-tt-d", language=language)
648
- return TTMLFormat.to_bytes(supervisions, include_speaker, config, **kwargs)
649
-
650
-
651
- # Export config classes
652
- __all__ = ["TTMLFormat", "IMSC1Format", "EBUTD_Format", "TTMLConfig", "TTMLStyle", "TTMLRegion", "ITUNES_NS"]