lattifai 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lattifai/__init__.py +0 -24
  2. lattifai/alignment/__init__.py +10 -1
  3. lattifai/alignment/lattice1_aligner.py +66 -58
  4. lattifai/alignment/lattice1_worker.py +1 -6
  5. lattifai/alignment/punctuation.py +38 -0
  6. lattifai/alignment/segmenter.py +1 -1
  7. lattifai/alignment/sentence_splitter.py +350 -0
  8. lattifai/alignment/text_align.py +440 -0
  9. lattifai/alignment/tokenizer.py +91 -220
  10. lattifai/caption/__init__.py +82 -6
  11. lattifai/caption/caption.py +335 -1143
  12. lattifai/caption/formats/__init__.py +199 -0
  13. lattifai/caption/formats/base.py +211 -0
  14. lattifai/caption/formats/gemini.py +722 -0
  15. lattifai/caption/formats/json.py +194 -0
  16. lattifai/caption/formats/lrc.py +309 -0
  17. lattifai/caption/formats/nle/__init__.py +9 -0
  18. lattifai/caption/formats/nle/audition.py +561 -0
  19. lattifai/caption/formats/nle/avid.py +423 -0
  20. lattifai/caption/formats/nle/fcpxml.py +549 -0
  21. lattifai/caption/formats/nle/premiere.py +589 -0
  22. lattifai/caption/formats/pysubs2.py +642 -0
  23. lattifai/caption/formats/sbv.py +147 -0
  24. lattifai/caption/formats/tabular.py +338 -0
  25. lattifai/caption/formats/textgrid.py +193 -0
  26. lattifai/caption/formats/ttml.py +652 -0
  27. lattifai/caption/formats/vtt.py +469 -0
  28. lattifai/caption/parsers/__init__.py +9 -0
  29. lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
  30. lattifai/caption/standardize.py +636 -0
  31. lattifai/caption/utils.py +474 -0
  32. lattifai/cli/__init__.py +2 -1
  33. lattifai/cli/caption.py +108 -1
  34. lattifai/cli/transcribe.py +4 -9
  35. lattifai/cli/youtube.py +4 -1
  36. lattifai/client.py +48 -84
  37. lattifai/config/__init__.py +11 -1
  38. lattifai/config/alignment.py +9 -2
  39. lattifai/config/caption.py +267 -23
  40. lattifai/config/media.py +20 -0
  41. lattifai/diarization/__init__.py +41 -1
  42. lattifai/mixin.py +36 -18
  43. lattifai/transcription/base.py +6 -1
  44. lattifai/transcription/lattifai.py +19 -54
  45. lattifai/utils.py +81 -13
  46. lattifai/workflow/__init__.py +28 -4
  47. lattifai/workflow/file_manager.py +2 -5
  48. lattifai/youtube/__init__.py +43 -0
  49. lattifai/youtube/client.py +1170 -0
  50. lattifai/youtube/types.py +23 -0
  51. lattifai-1.2.2.dist-info/METADATA +615 -0
  52. lattifai-1.2.2.dist-info/RECORD +76 -0
  53. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
  54. lattifai/caption/gemini_reader.py +0 -371
  55. lattifai/caption/gemini_writer.py +0 -173
  56. lattifai/cli/app_installer.py +0 -142
  57. lattifai/cli/server.py +0 -44
  58. lattifai/server/app.py +0 -427
  59. lattifai/workflow/youtube.py +0 -577
  60. lattifai-1.2.0.dist-info/METADATA +0 -1133
  61. lattifai-1.2.0.dist-info/RECORD +0 -57
  62. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
  63. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
  64. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
@@ -2,12 +2,13 @@ import gzip
2
2
  import pickle
3
3
  import re
4
4
  from collections import defaultdict
5
- from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
5
+ from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
6
6
 
7
7
  import numpy as np
8
8
 
9
- from lattifai.alignment.phonemizer import G2Phonemizer
10
- from lattifai.caption import Supervision
9
+ # from lattifai.caption import Supervision
10
+ from lhotse.supervision import SupervisionSegment as Supervision # NOTE: Transcriber SupervisionSegment
11
+
11
12
  from lattifai.caption import normalize_text as normalize_html_text
12
13
  from lattifai.errors import (
13
14
  LATTICE_DECODING_FAILURE_HELP,
@@ -16,12 +17,10 @@ from lattifai.errors import (
16
17
  QuotaExceededError,
17
18
  )
18
19
 
19
- PUNCTUATION = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~'
20
- END_PUNCTUATION = '.!?"]。!?”】'
21
- PUNCTUATION_SPACE = PUNCTUATION + " "
22
- STAR_TOKEN = "※"
23
-
24
- GROUPING_SEPARATOR = "✹"
20
+ from .phonemizer import G2Phonemizer
21
+ from .punctuation import PUNCTUATION, PUNCTUATION_SPACE
22
+ from .sentence_splitter import SentenceSplitter
23
+ from .text_align import TextAlignResult
25
24
 
26
25
  MAXIMUM_WORD_LENGTH = 40
27
26
 
@@ -79,8 +78,11 @@ def tokenize_multilingual_text(text: str, keep_spaces: bool = True, attach_punct
79
78
  ['Kühlschrank']
80
79
  >>> tokenize_multilingual_text("Hello, World!", attach_punctuation=True)
81
80
  ['Hello,', ' ', 'World!']
81
+ >>> tokenize_multilingual_text("[AED], World!", keep_spaces=False, attach_punctuation=True)
82
+ ['[AED],', 'World!']
82
83
  """
83
84
  # Regex pattern:
85
+ # - \[[A-Z_]+\] matches bracketed annotations like [APPLAUSE], [MUSIC], [SPEAKER_01]
84
86
  # - [a-zA-Z0-9\u00C0-\u024F]+ matches Latin letters (including accented chars like ü, ö, ä, ß, é, etc.)
85
87
  # - (?:'[a-zA-Z]{1,2})? optionally matches contractions like 's, 't, 'm, 'll, 're, 've
86
88
  # - [\u4e00-\u9fff] matches CJK characters
@@ -89,7 +91,7 @@ def tokenize_multilingual_text(text: str, keep_spaces: bool = True, attach_punct
89
91
  # - \u00C0-\u00FF: Latin-1 Supplement (À-ÿ)
90
92
  # - \u0100-\u017F: Latin Extended-A
91
93
  # - \u0180-\u024F: Latin Extended-B
92
- pattern = re.compile(r"([a-zA-Z0-9\u00C0-\u024F]+(?:'[a-zA-Z]{1,2})?|[\u4e00-\u9fff]|.)")
94
+ pattern = re.compile(r"(\[[A-Z_]+\]|[a-zA-Z0-9\u00C0-\u024F]+(?:'[a-zA-Z]{1,2})?|[\u4e00-\u9fff]|.)")
93
95
 
94
96
  # filter(None, ...) removes any empty strings from re.findall results
95
97
  tokens = list(filter(None, pattern.findall(text)))
@@ -126,84 +128,12 @@ class LatticeTokenizer:
126
128
  self.g2p_model: Any = None # Placeholder for G2P model
127
129
  self.dictionaries = defaultdict(lambda: [])
128
130
  self.oov_word = "<unk>"
129
- self.sentence_splitter = None
131
+ self.sentence_splitter: Optional[SentenceSplitter] = None
130
132
  self.device = "cpu"
131
133
 
132
134
  def init_sentence_splitter(self):
133
- if self.sentence_splitter is not None:
134
- return
135
-
136
- import onnxruntime as ort
137
- from wtpsplit import SaT
138
-
139
- providers = []
140
- device = self.device
141
- if device.startswith("cuda") and ort.get_all_providers().count("CUDAExecutionProvider") > 0:
142
- providers.append("CUDAExecutionProvider")
143
- elif device.startswith("mps") and ort.get_all_providers().count("MPSExecutionProvider") > 0:
144
- providers.append("MPSExecutionProvider")
145
-
146
- if self.model_hub == "modelscope":
147
- from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
148
-
149
- downloaded_path = ms_snapshot("LattifAI/OmniTokenizer")
150
- sat = SaT(
151
- f"{downloaded_path}/sat-3l-sm",
152
- tokenizer_name_or_path=f"{downloaded_path}/xlm-roberta-base",
153
- ort_providers=providers + ["CPUExecutionProvider"],
154
- )
155
- else:
156
- sat = SaT(
157
- "sat-3l-sm",
158
- ort_providers=providers + ["CPUExecutionProvider"],
159
- )
160
- self.sentence_splitter = sat
161
-
162
- @staticmethod
163
- def _resplit_special_sentence_types(sentence: str) -> List[str]:
164
- """
165
- Re-split special sentence types.
166
-
167
- Examples:
168
- '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:']
169
- '[MUSIC] &gt;&gt; SPEAKER:' -> ['[MUSIC]', '&gt;&gt; SPEAKER:']
170
-
171
- Special handling patterns:
172
- 1. Separate special marks at the beginning (e.g., [APPLAUSE], [MUSIC], etc.) from subsequent speaker marks
173
- 2. Use speaker marks (&gt;&gt; or other separators) as split points
174
-
175
- Args:
176
- sentence: Input sentence string
177
-
178
- Returns:
179
- List of re-split sentences. If no special marks are found, returns the original sentence in a list
180
- """
181
- # Detect special mark patterns: [SOMETHING] &gt;&gt; SPEAKER:
182
- # or other forms like [SOMETHING] SPEAKER:
183
-
184
- # Pattern 1: [mark] HTML-encoded separator speaker:
185
- pattern1 = r"^(\[[^\]]+\])\s+(&gt;&gt;|>>)\s+(.+)$"
186
- match1 = re.match(pattern1, sentence.strip())
187
- if match1:
188
- special_mark = match1.group(1)
189
- separator = match1.group(2)
190
- speaker_part = match1.group(3)
191
- return [special_mark, f"{separator} {speaker_part}"]
192
-
193
- # Pattern 2: [mark] speaker:
194
- pattern2 = r"^(\[[^\]]+\])\s+([^:]+:)(.*)$"
195
- match2 = re.match(pattern2, sentence.strip())
196
- if match2:
197
- special_mark = match2.group(1)
198
- speaker_label = match2.group(2)
199
- remaining = match2.group(3).strip()
200
- if remaining:
201
- return [special_mark, f"{speaker_label} {remaining}"]
202
- else:
203
- return [special_mark, speaker_label]
204
-
205
- # If no special pattern matches, return the original sentence
206
- return [sentence]
135
+ if self.sentence_splitter is None:
136
+ self.sentence_splitter = SentenceSplitter(device=self.device, model_hub=self.model_hub)
207
137
 
208
138
  @classmethod
209
139
  def from_pretrained(
@@ -308,127 +238,45 @@ class LatticeTokenizer:
308
238
 
309
239
  return {}
310
240
 
311
- def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[str]:
241
+ def split_sentences(self, supervisions: List[Supervision], strip_whitespace=True) -> List[Supervision]:
312
242
  """Split supervisions into sentences using the sentence splitter.
313
243
 
314
- Carefull about speaker changes.
244
+ Careful about speaker changes.
315
245
  """
316
- texts, speakers = [], []
317
- text_len, sidx = 0, 0
318
-
319
- def flush_segment(end_idx: int, speaker: Optional[str] = None):
320
- """Flush accumulated text from sidx to end_idx with given speaker."""
321
- nonlocal text_len, sidx
322
- if sidx <= end_idx:
323
- if len(speakers) < len(texts) + 1:
324
- speakers.append(speaker)
325
- text = " ".join(sup.text for sup in supervisions[sidx : end_idx + 1])
326
- texts.append(text)
327
- sidx = end_idx + 1
328
- text_len = 0
329
-
330
- for s, supervision in enumerate(supervisions):
331
- text_len += len(supervision.text)
332
- is_last = s == len(supervisions) - 1
333
-
334
- if supervision.speaker:
335
- # Flush previous segment without speaker (if any)
336
- if sidx < s:
337
- flush_segment(s - 1, None)
338
- text_len = len(supervision.text)
339
-
340
- # Check if we should flush this speaker's segment now
341
- next_has_speaker = not is_last and supervisions[s + 1].speaker
342
- if is_last or next_has_speaker:
343
- flush_segment(s, supervision.speaker)
344
- else:
345
- speakers.append(supervision.speaker)
346
-
347
- elif text_len >= 2000 or is_last:
348
- flush_segment(s, None)
349
-
350
- assert len(speakers) == len(texts), f"len(speakers)={len(speakers)} != len(texts)={len(texts)}"
351
- sentences = self.sentence_splitter.split(texts, threshold=0.15, strip_whitespace=strip_whitespace, batch_size=8)
352
-
353
- supervisions, remainder = [], ""
354
- for k, (_speaker, _sentences) in enumerate(zip(speakers, sentences)):
355
- # Prepend remainder from previous iteration to the first sentence
356
- if _sentences and remainder:
357
- _sentences[0] = remainder + _sentences[0]
358
- remainder = ""
359
-
360
- if not _sentences:
361
- continue
362
-
363
- # Process and re-split special sentence types
364
- processed_sentences = []
365
- for s, _sentence in enumerate(_sentences):
366
- if remainder:
367
- _sentence = remainder + _sentence
368
- remainder = ""
369
- # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:'] # noqa: E501
370
- resplit_parts = self._resplit_special_sentence_types(_sentence)
371
- if any(resplit_parts[-1].endswith(sp) for sp in [":", ":"]):
372
- if s < len(_sentences) - 1:
373
- _sentences[s + 1] = resplit_parts[-1] + " " + _sentences[s + 1]
374
- else: # last part
375
- remainder = resplit_parts[-1] + " "
376
- processed_sentences.extend(resplit_parts[:-1])
377
- else:
378
- processed_sentences.extend(resplit_parts)
379
- _sentences = processed_sentences
380
-
381
- if not _sentences:
382
- if remainder:
383
- _sentences, remainder = [remainder.strip()], ""
384
- else:
385
- continue
246
+ self.init_sentence_splitter()
247
+ return self.sentence_splitter.split_sentences(supervisions, strip_whitespace=strip_whitespace)
248
+
249
+ def tokenize(
250
+ self, supervisions: Union[List[Supervision], TextAlignResult], split_sentence: bool = False, boost: float = 0.0
251
+ ) -> Tuple[str, Dict[str, Any]]:
252
+ if isinstance(supervisions[0], Supervision):
253
+ if split_sentence:
254
+ supervisions = self.split_sentences(supervisions)
255
+
256
+ pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
257
+ response = self.client_wrapper.post(
258
+ "tokenize",
259
+ json={
260
+ "model_name": self.model_name,
261
+ "supervisions": [s.to_dict() for s in supervisions],
262
+ "pronunciation_dictionaries": pronunciation_dictionaries,
263
+ },
264
+ )
265
+ else:
266
+ pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions[0]])
267
+ pronunciation_dictionaries.update(self.prenormalize([s.text for s in supervisions[1]]))
268
+
269
+ response = self.client_wrapper.post(
270
+ "difftokenize",
271
+ json={
272
+ "model_name": self.model_name,
273
+ "supervisions": [s.to_dict() for s in supervisions[0]],
274
+ "transcription": [s.to_dict() for s in supervisions[1]],
275
+ "pronunciation_dictionaries": pronunciation_dictionaries,
276
+ "boost": boost,
277
+ },
278
+ )
386
279
 
387
- if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
388
- supervisions.extend(
389
- Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
390
- )
391
- _speaker = None # reset speaker after use
392
- else:
393
- supervisions.extend(
394
- Supervision(text=text, speaker=(_speaker if s == 0 else None))
395
- for s, text in enumerate(_sentences[:-1])
396
- )
397
- remainder = _sentences[-1] + " " + remainder
398
- if k < len(speakers) - 1 and speakers[k + 1] is not None: # next speaker is set
399
- supervisions.append(
400
- Supervision(text=remainder.strip(), speaker=_speaker if len(_sentences) == 1 else None)
401
- )
402
- remainder = ""
403
- elif len(_sentences) == 1:
404
- if k == len(speakers) - 1:
405
- pass # keep _speaker for the last supervision
406
- else:
407
- assert speakers[k + 1] is None
408
- speakers[k + 1] = _speaker
409
- else:
410
- assert len(_sentences) > 1
411
- _speaker = None # reset speaker if sentence not ended
412
-
413
- if remainder.strip():
414
- supervisions.append(Supervision(text=remainder.strip(), speaker=_speaker))
415
-
416
- return supervisions
417
-
418
- def tokenize(self, supervisions: List[Supervision], split_sentence: bool = False) -> Tuple[str, Dict[str, Any]]:
419
- if split_sentence:
420
- self.init_sentence_splitter()
421
- supervisions = self.split_sentences(supervisions)
422
-
423
- pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
424
- response = self.client_wrapper.post(
425
- "tokenize",
426
- json={
427
- "model_name": self.model_name,
428
- "supervisions": [s.to_dict() for s in supervisions],
429
- "pronunciation_dictionaries": pronunciation_dictionaries,
430
- },
431
- )
432
280
  if response.status_code == 402:
433
281
  raise QuotaExceededError(response.json().get("detail", "Quota exceeded"))
434
282
  if response.status_code != 200:
@@ -445,28 +293,47 @@ class LatticeTokenizer:
445
293
  self,
446
294
  lattice_id: str,
447
295
  lattice_results: Tuple[np.ndarray, Any, Any, float, float],
448
- supervisions: List[Supervision],
296
+ supervisions: Union[List[Supervision], TextAlignResult],
449
297
  return_details: bool = False,
450
298
  start_margin: float = 0.08,
451
299
  end_margin: float = 0.20,
452
300
  ) -> List[Supervision]:
453
301
  emission, results, labels, frame_shift, offset, channel = lattice_results # noqa: F841
454
- response = self.client_wrapper.post(
455
- "detokenize",
456
- json={
457
- "model_name": self.model_name,
458
- "lattice_id": lattice_id,
459
- "frame_shift": frame_shift,
460
- "results": [t.to_dict() for t in results[0]],
461
- "labels": labels[0],
462
- "offset": offset,
463
- "channel": channel,
464
- "return_details": False if return_details is None else return_details,
465
- "destroy_lattice": True,
466
- "start_margin": start_margin,
467
- "end_margin": end_margin,
468
- },
469
- )
302
+ if isinstance(supervisions[0], Supervision):
303
+ response = self.client_wrapper.post(
304
+ "detokenize",
305
+ json={
306
+ "model_name": self.model_name,
307
+ "lattice_id": lattice_id,
308
+ "frame_shift": frame_shift,
309
+ "results": [t.to_dict() for t in results[0]],
310
+ "labels": labels[0],
311
+ "offset": offset,
312
+ "channel": channel,
313
+ "return_details": False if return_details is None else return_details,
314
+ "destroy_lattice": True,
315
+ "start_margin": start_margin,
316
+ "end_margin": end_margin,
317
+ },
318
+ )
319
+ else:
320
+ response = self.client_wrapper.post(
321
+ "diffdetokenize",
322
+ json={
323
+ "model_name": self.model_name,
324
+ "lattice_id": lattice_id,
325
+ "frame_shift": frame_shift,
326
+ "results": [t.to_dict() for t in results[0]],
327
+ "labels": labels[0],
328
+ "offset": offset,
329
+ "channel": channel,
330
+ "return_details": False if return_details is None else return_details,
331
+ "destroy_lattice": True,
332
+ "start_margin": start_margin,
333
+ "end_margin": end_margin,
334
+ },
335
+ )
336
+
470
337
  if response.status_code == 400:
471
338
  raise LatticeDecodingError(
472
339
  lattice_id,
@@ -487,7 +354,11 @@ class LatticeTokenizer:
487
354
  # Add emission confidence scores for segments and word-level alignments
488
355
  _add_confidence_scores(alignments, emission, labels[0], frame_shift, offset)
489
356
 
490
- alignments = _update_alignments_speaker(supervisions, alignments)
357
+ if isinstance(supervisions[0], Supervision):
358
+ alignments = _update_alignments_speaker(supervisions, alignments)
359
+ else:
360
+ # NOTE: Text Diff Alignment >> speaker has been handled in the backend service
361
+ pass
491
362
 
492
363
  return alignments
493
364
 
@@ -1,20 +1,96 @@
1
- from typing import List, Optional
1
+ """Caption processing module for LattifAI.
2
2
 
3
- from lhotse.utils import Pathlike
3
+ This module provides comprehensive caption/subtitle processing capabilities:
4
+ - Multi-format reading and writing (SRT, VTT, ASS, TTML, etc.)
5
+ - Professional NLE integration (Avid, Final Cut Pro, Premiere Pro, DaVinci Resolve)
6
+ - Audio workstation support (Pro Tools, Adobe Audition)
7
+ - Advanced features: timecode offset, overlap resolution, word-level timing
8
+ """
4
9
 
5
- from ..config.caption import InputCaptionFormat
10
+ from ..config.caption import InputCaptionFormat, OutputCaptionFormat
6
11
  from .caption import Caption
7
- from .gemini_reader import GeminiReader, GeminiSegment
8
- from .gemini_writer import GeminiWriter
12
+ from .formats.gemini import GeminiReader, GeminiSegment, GeminiWriter
13
+ from .formats.nle.audition import (
14
+ AuditionCSVConfig,
15
+ AuditionCSVWriter,
16
+ EdiMarkerConfig,
17
+ EdiMarkerWriter,
18
+ )
19
+
20
+ # Professional NLE format writers (re-exported from formats/)
21
+ from .formats.nle.avid import AvidDSConfig, AvidDSWriter, FrameRate
22
+ from .formats.nle.fcpxml import FCPXMLConfig, FCPXMLStyle, FCPXMLWriter
23
+ from .formats.nle.premiere import PremiereXMLConfig, PremiereXMLWriter
24
+ from .formats.ttml import TTMLConfig, TTMLFormat, TTMLRegion, TTMLStyle
25
+ from .parsers.text_parser import normalize_text
26
+ from .standardize import (
27
+ CaptionStandardizer,
28
+ CaptionValidator,
29
+ StandardizationConfig,
30
+ ValidationResult,
31
+ apply_margins_to_captions,
32
+ standardize_captions,
33
+ )
9
34
  from .supervision import Supervision
10
- from .text_parser import normalize_text
35
+
36
+ # Create TTMLWriter alias for backward compatibility
37
+ TTMLWriter = TTMLFormat
38
+
39
+ # Utility functions
40
+ from .utils import (
41
+ CollisionMode,
42
+ TimecodeOffset,
43
+ apply_timecode_offset,
44
+ detect_overlaps,
45
+ format_srt_timestamp,
46
+ generate_srt_content,
47
+ resolve_overlaps,
48
+ split_long_lines,
49
+ )
11
50
 
12
51
  __all__ = [
52
+ # Core classes
13
53
  "Caption",
14
54
  "Supervision",
55
+ # Standardization
56
+ "CaptionStandardizer",
57
+ "CaptionValidator",
58
+ "StandardizationConfig",
59
+ "ValidationResult",
60
+ "standardize_captions",
61
+ "apply_margins_to_captions",
62
+ # Gemini format support
15
63
  "GeminiReader",
16
64
  "GeminiWriter",
17
65
  "GeminiSegment",
66
+ # Text utilities
18
67
  "normalize_text",
68
+ # Format types
19
69
  "InputCaptionFormat",
70
+ "OutputCaptionFormat",
71
+ # Professional format writers
72
+ "AvidDSWriter",
73
+ "AvidDSConfig",
74
+ "FCPXMLWriter",
75
+ "FCPXMLConfig",
76
+ "FCPXMLStyle",
77
+ "PremiereXMLWriter",
78
+ "PremiereXMLConfig",
79
+ "AuditionCSVWriter",
80
+ "AuditionCSVConfig",
81
+ "EdiMarkerWriter",
82
+ "EdiMarkerConfig",
83
+ "TTMLWriter",
84
+ "TTMLConfig",
85
+ "TTMLStyle",
86
+ "TTMLRegion",
87
+ # Utilities
88
+ "CollisionMode",
89
+ "TimecodeOffset",
90
+ "apply_timecode_offset",
91
+ "resolve_overlaps",
92
+ "detect_overlaps",
93
+ "split_long_lines",
94
+ "format_srt_timestamp",
95
+ "generate_srt_content",
20
96
  ]