lattifai 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/METADATA +129 -58
  32. lattifai-1.3.1.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,31 @@
1
1
  """Configuration system for LattifAI using nemo_run."""
2
2
 
3
- from .alignment import AlignmentConfig
4
- from .caption import (
5
- CaptionConfig,
3
+ # Re-export caption config classes from lattifai-captions package
4
+ from lattifai.caption.config import (
5
+ ALL_CAPTION_FORMATS,
6
+ CAPTION_FORMATS,
7
+ INPUT_CAPTION_FORMATS,
8
+ OUTPUT_CAPTION_FORMATS,
6
9
  CaptionFonts,
7
10
  CaptionStyle,
11
+ InputCaptionFormat,
8
12
  KaraokeConfig,
13
+ OutputCaptionFormat,
9
14
  StandardizationConfig,
10
15
  )
16
+
17
+ from .alignment import AlignmentConfig
18
+
19
+ # CaptionConfig is defined in lattifai-python (workflow config)
20
+ from .caption import CaptionConfig
11
21
  from .client import ClientConfig
12
22
  from .diarization import DiarizationConfig
23
+ from .event import EventConfig
13
24
  from .media import AUDIO_FORMATS, MEDIA_FORMATS, VIDEO_FORMATS, MediaConfig
14
25
  from .transcription import TranscriptionConfig
15
26
 
16
27
  __all__ = [
28
+ "EventConfig",
17
29
  "ClientConfig",
18
30
  "AlignmentConfig",
19
31
  "CaptionConfig",
@@ -21,6 +33,12 @@ __all__ = [
21
33
  "CaptionStyle",
22
34
  "KaraokeConfig",
23
35
  "StandardizationConfig",
36
+ "InputCaptionFormat",
37
+ "OutputCaptionFormat",
38
+ "INPUT_CAPTION_FORMATS",
39
+ "OUTPUT_CAPTION_FORMATS",
40
+ "ALL_CAPTION_FORMATS",
41
+ "CAPTION_FORMATS",
24
42
  "TranscriptionConfig",
25
43
  "DiarizationConfig",
26
44
  "MediaConfig",
@@ -100,6 +100,13 @@ class AlignmentConfig:
100
100
  Default: 5.0. Typical range: 0.0-10.0.
101
101
  """
102
102
 
103
+ transition_penalty: float = 0.0
104
+ """Penalty for token transitions in the decoding graph to discourage duration=1 tokens.
105
+ A negative value penalizes transitions (moving to next token), making the model prefer
106
+ self-loops (staying on current token longer). This helps prevent spurious short-duration alignments.
107
+ Default: 0.0 (no penalty). Typical range: -1.0 to 0.0 (e.g., -0.5).
108
+ """
109
+
103
110
  client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
104
111
  """Reference to the SyncAPIClient instance. Auto-set during client initialization."""
105
112
 
@@ -1,248 +1,18 @@
1
- """Caption I/O configuration for LattifAI."""
1
+ """Caption I/O configuration for LattifAI SDK."""
2
2
 
3
- from dataclasses import dataclass, field
3
+ from dataclasses import dataclass
4
4
  from pathlib import Path
5
- from typing import TYPE_CHECKING, Dict, Literal, Optional, get_args
6
-
7
- from lhotse.utils import Pathlike
8
-
9
- # =============================================================================
10
- # Caption Style Configuration Classes
11
- # =============================================================================
12
-
13
-
14
- class CaptionFonts:
15
- """Common caption font constants.
16
-
17
- These are reference constants for popular fonts. You can use any
18
- system font name as the font_name parameter in CaptionStyle.
19
- """
20
-
21
- # Western fonts
22
- ARIAL = "Arial"
23
- IMPACT = "Impact"
24
- VERDANA = "Verdana"
25
- HELVETICA = "Helvetica"
26
-
27
- # Chinese fonts
28
- NOTO_SANS_SC = "Noto Sans SC"
29
- MICROSOFT_YAHEI = "Microsoft YaHei"
30
- PINGFANG_SC = "PingFang SC"
31
- SIMHEI = "SimHei"
32
-
33
- # Japanese fonts
34
- NOTO_SANS_JP = "Noto Sans JP"
35
- MEIRYO = "Meiryo"
36
- HIRAGINO_SANS = "Hiragino Sans"
37
-
38
- # Korean fonts
39
- NOTO_SANS_KR = "Noto Sans KR"
40
- MALGUN_GOTHIC = "Malgun Gothic"
41
-
42
-
43
- @dataclass
44
- class CaptionStyle:
45
- """Caption style configuration for ASS/TTML formats.
46
-
47
- Attributes:
48
- primary_color: Main text color (#RRGGBB)
49
- secondary_color: Secondary/highlight color (#RRGGBB)
50
- outline_color: Text outline color (#RRGGBB)
51
- back_color: Shadow color (#RRGGBB)
52
- font_name: Font family name (use CaptionFonts constants or any system font)
53
- font_size: Font size in points
54
- bold: Enable bold text
55
- italic: Enable italic text
56
- outline_width: Outline thickness
57
- shadow_depth: Shadow distance
58
- alignment: ASS alignment (1-9, numpad style), 2=bottom-center
59
- margin_l: Left margin in pixels
60
- margin_r: Right margin in pixels
61
- margin_v: Vertical margin in pixels
62
- """
63
-
64
- # Colors (#RRGGBB format)
65
- primary_color: str = "#FFFFFF"
66
- secondary_color: str = "#00FFFF"
67
- outline_color: str = "#000000"
68
- back_color: str = "#000000"
69
-
70
- # Font
71
- font_name: str = CaptionFonts.ARIAL
72
- font_size: int = 48
73
- bold: bool = False
74
- italic: bool = False
75
-
76
- # Border and shadow
77
- outline_width: float = 2.0
78
- shadow_depth: float = 1.0
79
-
80
- # Position
81
- alignment: int = 2
82
- margin_l: int = 20
83
- margin_r: int = 20
84
- margin_v: int = 20
85
-
86
-
87
- @dataclass
88
- class KaraokeConfig:
89
- """Karaoke export configuration.
90
-
91
- Attributes:
92
- enabled: Whether karaoke mode is enabled
93
- effect: Karaoke effect type
94
- - "sweep": Gradual fill from left to right (ASS \\kf tag)
95
- - "instant": Instant highlight (ASS \\k tag)
96
- - "outline": Outline then fill (ASS \\ko tag)
97
- style: Caption style configuration (font, colors, position)
98
- lrc_precision: LRC time precision ("centisecond" or "millisecond")
99
- lrc_metadata: LRC metadata dict (ar, ti, al, etc.)
100
- ttml_timing_mode: TTML timing attribute ("Word" or "Line")
101
- """
102
-
103
- enabled: bool = False
104
- effect: Literal["sweep", "instant", "outline"] = "sweep"
105
- style: CaptionStyle = field(default_factory=CaptionStyle)
106
-
107
- # LRC specific
108
- lrc_precision: Literal["centisecond", "millisecond"] = "millisecond"
109
- lrc_metadata: Dict[str, str] = field(default_factory=dict)
110
-
111
- # TTML specific
112
- ttml_timing_mode: Literal["Word", "Line"] = "Word"
113
-
114
-
115
- @dataclass
116
- class StandardizationConfig:
117
- """Caption standardization configuration following broadcast guidelines.
118
-
119
- Reference Standards:
120
- - Netflix Timed Text Style Guide
121
- - BBC Subtitle Guidelines
122
- - EBU-TT-D Standard
123
-
124
- Attributes:
125
- min_duration: Minimum segment duration (seconds). Netflix recommends 5/6s, BBC 0.3s
126
- max_duration: Maximum segment duration (seconds). Netflix/BBC recommends 7s
127
- min_gap: Minimum gap between segments (seconds). 80ms prevents subtitle flicker
128
- max_lines: Maximum lines per segment. Broadcast standard is typically 2
129
- max_chars_per_line: Maximum characters per line. CJK auto-adjusted by ÷2 (e.g., 42 → 21)
130
- optimal_cps: Optimal reading speed (chars/sec). Netflix recommends 17-20 CPS
131
- start_margin: Start margin (seconds) before first word. None = no adjustment (default)
132
- end_margin: End margin (seconds) after last word. None = no adjustment (default)
133
- margin_collision_mode: How to handle collisions: 'trim' (reduce margin) or 'gap' (maintain min_gap)
134
- """
135
-
136
- min_duration: float = 0.8
137
- max_duration: float = 7.0
138
- min_gap: float = 0.08
139
- max_lines: int = 2
140
- max_chars_per_line: int = 42
141
- optimal_cps: float = 17.0
142
- start_margin: Optional[float] = None
143
- end_margin: Optional[float] = None
144
- margin_collision_mode: Literal["trim", "gap"] = "trim"
145
-
146
- def __post_init__(self):
147
- """Validate configuration parameters."""
148
- if self.min_duration <= 0:
149
- raise ValueError("min_duration must be positive")
150
- if self.max_duration <= self.min_duration:
151
- raise ValueError("max_duration must be greater than min_duration")
152
- if self.min_gap < 0:
153
- raise ValueError("min_gap cannot be negative")
154
- if self.max_lines < 1:
155
- raise ValueError("max_lines must be at least 1")
156
- if self.max_chars_per_line < 10:
157
- raise ValueError("max_chars_per_line must be at least 10")
158
- if self.start_margin is not None and self.start_margin < 0:
159
- raise ValueError("start_margin cannot be negative")
160
- if self.end_margin is not None and self.end_margin < 0:
161
- raise ValueError("end_margin cannot be negative")
162
- if self.margin_collision_mode not in ("trim", "gap"):
163
- raise ValueError("margin_collision_mode must be 'trim' or 'gap'")
164
-
165
-
166
- # =============================================================================
167
- # Format Type Definitions (Single Source of Truth)
168
- # =============================================================================
169
-
170
- # Type alias for input caption formats (all formats with registered readers)
171
- InputCaptionFormat = Literal[
172
- # Standard subtitle formats
173
- "srt",
174
- "vtt", # WebVTT (auto-detects YouTube VTT with word-level timestamps)
175
- "ass",
176
- "ssa",
177
- "sub",
178
- "sbv",
179
- "txt",
180
- "sami",
181
- "smi",
182
- # Tabular formats
183
- "csv",
184
- "tsv",
185
- "aud",
186
- "json",
187
- # Specialized formats
188
- "textgrid", # Praat TextGrid
189
- "gemini", # Gemini/YouTube transcript format
190
- # Professional NLE formats
191
- "avid_ds",
192
- "fcpxml",
193
- "premiere_xml",
194
- "audition_csv",
195
- # Special
196
- "auto", # Auto-detect format
197
- ]
198
-
199
- # Type alias for output caption formats (all formats with registered writers)
200
- OutputCaptionFormat = Literal[
201
- # Standard subtitle formats
202
- "srt",
203
- "vtt", # WebVTT (use karaoke_config.enabled=True for YouTube VTT style output)
204
- "ass",
205
- "ssa",
206
- "sub",
207
- "sbv",
208
- "txt",
209
- "sami",
210
- "smi",
211
- # Tabular formats
212
- "csv",
213
- "tsv",
214
- "aud",
215
- "json",
216
- # Specialized formats
217
- "textgrid", # Praat TextGrid
218
- "gemini", # Gemini/YouTube transcript format
219
- # TTML profiles (write-only)
220
- "ttml", # Generic TTML
221
- "imsc1", # IMSC1 (Netflix/streaming) TTML profile
222
- "ebu_tt_d", # EBU-TT-D (European broadcast) TTML profile
223
- # Professional NLE formats
224
- "avid_ds", # Avid Media Composer SubCap format
225
- "fcpxml", # Final Cut Pro XML
226
- "premiere_xml", # Adobe Premiere Pro XML (graphic clips)
227
- "audition_csv", # Adobe Audition markers
228
- "edimarker_csv", # Pro Tools (via EdiMarker) markers
229
- ]
230
-
231
- # =============================================================================
232
- # Runtime Format Lists (Derived from Type Definitions)
233
- # =============================================================================
234
-
235
- # Input caption formats list (derived from InputCaptionFormat)
236
- INPUT_CAPTION_FORMATS: list[str] = list(get_args(InputCaptionFormat))
237
-
238
- # Output caption formats list (derived from OutputCaptionFormat)
239
- OUTPUT_CAPTION_FORMATS: list[str] = list(get_args(OutputCaptionFormat))
240
-
241
- # Standard caption formats (formats with both reader and writer)
242
- CAPTION_FORMATS: list[str] = ["srt", "vtt", "ass", "ssa", "sub", "sbv", "txt", "sami", "smi"]
243
-
244
- # All caption formats combined (for file detection, excludes "auto")
245
- ALL_CAPTION_FORMATS: list[str] = list(set(INPUT_CAPTION_FORMATS + OUTPUT_CAPTION_FORMATS) - {"auto"})
5
+ from typing import Optional
6
+
7
+ from lattifai.caption.config import (
8
+ INPUT_CAPTION_FORMATS,
9
+ OUTPUT_CAPTION_FORMATS,
10
+ InputCaptionFormat,
11
+ KaraokeConfig,
12
+ OutputCaptionFormat,
13
+ StandardizationConfig,
14
+ )
15
+ from lattifai.caption.supervision import Pathlike
246
16
 
247
17
 
248
18
  @dataclass
lattifai/config/client.py CHANGED
@@ -31,6 +31,13 @@ class ClientConfig:
31
31
  When True, prints detailed timing information for various stages of the process.
32
32
  """
33
33
 
34
+ # Client identification for usage tracking
35
+ client_name: Optional[str] = field(default="python-sdk")
36
+ """Client identifier for usage tracking (e.g., 'python-sdk', 'claude-plugin')."""
37
+
38
+ client_version: Optional[str] = field(default=None)
39
+ """Client version for usage tracking. If None, uses lattifai package version."""
40
+
34
41
  def __post_init__(self):
35
42
  """Validate and auto-populate configuration after initialization."""
36
43
 
@@ -44,6 +51,15 @@ class ClientConfig:
44
51
  if self.api_key is None:
45
52
  object.__setattr__(self, "api_key", os.environ.get("LATTIFAI_API_KEY"))
46
53
 
54
+ # Auto-load client version from package if not provided
55
+ if self.client_version is None:
56
+ try:
57
+ from importlib.metadata import version
58
+
59
+ object.__setattr__(self, "client_version", version("lattifai"))
60
+ except Exception:
61
+ object.__setattr__(self, "client_version", "unknown")
62
+
47
63
  # Validate API parameters
48
64
  if self.timeout <= 0:
49
65
  raise ValueError("timeout must be greater than 0")
@@ -0,0 +1,102 @@
1
+ """Audio Event Detection configuration for LattifAI."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional
5
+
6
+ from ..utils import _select_device
7
+
8
+ if TYPE_CHECKING:
9
+ from ..client import SyncAPIClient
10
+
11
+
12
+ @dataclass
13
+ class EventConfig:
14
+ """
15
+ Audio Event Detection configuration.
16
+
17
+ Settings for detecting audio events (Speech, Music, Male, Female...) in audio files using the AED model.
18
+
19
+ Event Matching:
20
+ When event_matching is enabled, the AED system will:
21
+ 1. Parse [Event] markers from input captions (e.g., [Music], [Applause])
22
+ 2. Match caption events to AED labels using semantic matching
23
+ 3. Force detection of matched labels even if not in top_k
24
+ 4. Update caption timestamps based on AED detection results
25
+
26
+ Event matching logic is implemented in lattifai_core.event.EventMatcher.
27
+ """
28
+
29
+ enabled: bool = False
30
+ """Enable audio event detection."""
31
+
32
+ device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
33
+ """Computation device for Event Detection models."""
34
+
35
+ vad_chunk_size: float = 30.0
36
+ """VAD chunk size in seconds for speech segmentation."""
37
+
38
+ vad_max_gap: float = 2.0
39
+ """Maximum gap in seconds between VAD segments to merge."""
40
+
41
+ fast_mode: bool = True
42
+ """Enable fast mode (only detect top_k classes, skip others)."""
43
+
44
+ model_path: str = ""
45
+ """Path to pretrained model. If empty, uses default bundled model."""
46
+
47
+ event_matching: bool = True
48
+ """Whether update events in the alignment"""
49
+
50
+ extra_events: List[str] = field(default_factory=list)
51
+ """Additional event types to always detect, even if not in top_k.
52
+ Example: ["Applause", "Laughter", "Music"]
53
+ """
54
+
55
+ event_aliases: Dict[str, List[str]] = field(default_factory=dict)
56
+ """Custom aliases mapping [Event] markers to AED labels.
57
+
58
+ Core AED labels (14 types):
59
+ [Applause], [Baby cry], [Battle cry], [Bellow], [Children shouting],
60
+ [Laughter], [Music], [Shout], [Singing], [Sound effect],
61
+ [Speech], [Whoop], [Yell]
62
+
63
+ Custom aliases extend built-ins (not replace):
64
+ {"[Audience reaction]": ["[Applause]", "[Cheering]"]}
65
+ """
66
+
67
+ time_tolerance: float = 20.0
68
+ """Max time (seconds) non-Speech events can extend beyond supervision boundaries."""
69
+
70
+ update_timestamps: bool = True
71
+ """Whether to update caption event timestamps based on AED detections."""
72
+
73
+ duplicate_strategy: Literal["keep_all", "merge_first", "split"] = "merge_first"
74
+ """Strategy for handling multiple [Event] markers mapped to same AED interval.
75
+ - keep_all: Update all events to same time range (may cause overlapping)
76
+ - merge_first: Keep only first event per interval, skip duplicates
77
+ - split: Split interval at speech boundaries (not yet implemented)
78
+ """
79
+
80
+ client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
81
+ """Reference to the SyncAPIClient instance. Auto-set during client initialization."""
82
+
83
+ def __post_init__(self):
84
+ """Validate and auto-populate configuration after initialization."""
85
+ # Validate device
86
+ if self.device not in ("cpu", "cuda", "mps", "auto") and not self.device.startswith("cuda:"):
87
+ raise ValueError(f"device must be one of ('cpu', 'cuda', 'mps', 'auto'), got '{self.device}'")
88
+
89
+ if self.device == "auto":
90
+ self.device = _select_device(self.device)
91
+
92
+ # Validate vad_chunk_size
93
+ if self.vad_chunk_size < 0:
94
+ raise ValueError("vad_chunk_size must be non-negative")
95
+
96
+ # Validate vad_max_gap
97
+ if self.vad_max_gap < 0:
98
+ raise ValueError("vad_max_gap must be non-negative")
99
+
100
+ # Validate time_tolerance
101
+ if self.time_tolerance < 0:
102
+ raise ValueError("time_tolerance must be non-negative")
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Literal, Optional
7
7
  from ..utils import _select_device
8
8
 
9
9
  if TYPE_CHECKING:
10
- from ..base_client import SyncAPIClient
10
+ from ..client import SyncAPIClient
11
11
 
12
12
  SUPPORTED_TRANSCRIPTION_MODELS = Literal[
13
13
  "gemini-2.5-pro",
@@ -48,6 +48,30 @@ class TranscriptionConfig:
48
48
  language: Optional[str] = None
49
49
  """Target language code for transcription (e.g., 'en', 'zh', 'ja')."""
50
50
 
51
+ prompt: Optional[str] = None
52
+ """Custom prompt text or path to prompt file for transcription.
53
+ If the value is an existing file path, the file contents will be used.
54
+ Otherwise, the value is used directly as the prompt text."""
55
+
56
+ description: Optional[str] = None
57
+ """Media description from platforms like YouTube, Xiaoyuzhou (小宇宙), etc.
58
+ Used to provide context for transcription."""
59
+
60
+ thinking: bool = True
61
+ """Enable Gemini's thinking mode (Gemini models only). Set to False to disable thinking."""
62
+
63
+ include_thoughts: bool = False
64
+ """Include Gemini's thinking process in the output (Gemini models only). Requires thinking=True."""
65
+
66
+ temperature: Optional[float] = None
67
+ """Sampling temperature for generation. Higher values increase randomness."""
68
+
69
+ top_k: Optional[float] = None
70
+ """Top-k sampling parameter. Limits token selection to top k candidates."""
71
+
72
+ top_p: Optional[float] = None
73
+ """Nucleus sampling parameter. Limits token selection by cumulative probability."""
74
+
51
75
  lattice_model_path: Optional[str] = None
52
76
  """Path to local LattifAI model. Will be auto-set in LattifAI client."""
53
77
 
@@ -0,0 +1,8 @@
1
+ """Data types for LattifAI.
2
+
3
+ Provides extended Caption class with transcription/alignment/diarization support.
4
+ """
5
+
6
+ from .caption import Caption
7
+
8
+ __all__ = ["Caption"]