lattifai 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +2 -3
  3. lattifai/alignment/lattice1_aligner.py +117 -4
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/segmenter.py +3 -2
  6. lattifai/alignment/text_align.py +2 -1
  7. lattifai/alignment/tokenizer.py +56 -29
  8. lattifai/audio2.py +162 -183
  9. lattifai/cli/alignment.py +5 -0
  10. lattifai/cli/caption.py +6 -6
  11. lattifai/cli/transcribe.py +1 -5
  12. lattifai/cli/youtube.py +3 -0
  13. lattifai/client.py +41 -12
  14. lattifai/config/__init__.py +21 -3
  15. lattifai/config/alignment.py +7 -0
  16. lattifai/config/caption.py +13 -243
  17. lattifai/config/client.py +16 -0
  18. lattifai/config/event.py +102 -0
  19. lattifai/config/transcription.py +25 -1
  20. lattifai/data/__init__.py +8 -0
  21. lattifai/data/caption.py +228 -0
  22. lattifai/errors.py +78 -53
  23. lattifai/event/__init__.py +65 -0
  24. lattifai/event/lattifai.py +166 -0
  25. lattifai/mixin.py +22 -17
  26. lattifai/transcription/base.py +2 -1
  27. lattifai/transcription/gemini.py +147 -16
  28. lattifai/transcription/lattifai.py +8 -11
  29. lattifai/types.py +1 -1
  30. lattifai/youtube/client.py +143 -48
  31. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/METADATA +117 -54
  32. lattifai-1.3.0.dist-info/RECORD +57 -0
  33. lattifai/__init__.py +0 -88
  34. lattifai/alignment/sentence_splitter.py +0 -350
  35. lattifai/caption/__init__.py +0 -96
  36. lattifai/caption/caption.py +0 -661
  37. lattifai/caption/formats/__init__.py +0 -199
  38. lattifai/caption/formats/base.py +0 -211
  39. lattifai/caption/formats/gemini.py +0 -722
  40. lattifai/caption/formats/json.py +0 -194
  41. lattifai/caption/formats/lrc.py +0 -309
  42. lattifai/caption/formats/nle/__init__.py +0 -9
  43. lattifai/caption/formats/nle/audition.py +0 -561
  44. lattifai/caption/formats/nle/avid.py +0 -423
  45. lattifai/caption/formats/nle/fcpxml.py +0 -549
  46. lattifai/caption/formats/nle/premiere.py +0 -589
  47. lattifai/caption/formats/pysubs2.py +0 -642
  48. lattifai/caption/formats/sbv.py +0 -147
  49. lattifai/caption/formats/tabular.py +0 -338
  50. lattifai/caption/formats/textgrid.py +0 -193
  51. lattifai/caption/formats/ttml.py +0 -652
  52. lattifai/caption/formats/vtt.py +0 -469
  53. lattifai/caption/parsers/__init__.py +0 -9
  54. lattifai/caption/parsers/text_parser.py +0 -147
  55. lattifai/caption/standardize.py +0 -636
  56. lattifai/caption/supervision.py +0 -34
  57. lattifai/caption/utils.py +0 -474
  58. lattifai-1.2.2.dist-info/RECORD +0 -76
  59. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
  60. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +0 -0
  61. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ """Extended Caption class with transcription, alignment, and diarization support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar
7
+
8
+ from lattifai.caption import Caption as BaseCaption
9
+ from lattifai.caption import Pathlike, Supervision
10
+
11
+ if TYPE_CHECKING:
12
+ from lattifai_core.event import LEDOutput
13
+
14
+ DiarizationOutput = TypeVar("DiarizationOutput")
15
+
16
+
17
+ @dataclass
18
+ class Caption(BaseCaption):
19
+ """
20
+ Extended Caption with transcription, alignment, and diarization support.
21
+
22
+ Inherits from BaseCaption and adds fields for:
23
+ - alignments: Post-alignment results
24
+ - transcription: ASR results
25
+ - event: LattifAI Event Detection results (LEDOutput)
26
+ - diarization: Speaker diarization results
27
+
28
+
29
+ These fields are used in the LattifAI pipeline for:
30
+ - Forced alignment results
31
+ - Storing intermediate transcription results
32
+ - LattifAI Event Detection (music, applause, speech, etc.)
33
+ - Speaker identification and separation
34
+
35
+ """
36
+
37
+ # Alignment results
38
+ alignments: List[Supervision] = field(default_factory=list)
39
+
40
+ # Transcription results
41
+ transcription: List[Supervision] = field(default_factory=list)
42
+
43
+ # LattifAI Event Detection results
44
+ event: Optional["LEDOutput"] = None
45
+
46
+ # Speaker Diarization results
47
+ diarization: Optional[DiarizationOutput] = None
48
+
49
+ def __len__(self) -> int:
50
+ """Return the number of supervision segments."""
51
+ return len(self.supervisions or self.transcription)
52
+
53
+ def __repr__(self) -> str:
54
+ """String representation of Caption."""
55
+ lang = f"lang={self.language}" if self.language else "lang=unknown"
56
+ kind_str = f"kind={self.kind}" if self.kind else ""
57
+ parts = [f"Caption({len(self.supervisions or self.transcription)} segments", lang]
58
+ if kind_str:
59
+ parts.append(kind_str)
60
+ if self.duration:
61
+ parts.append(f"duration={self.duration:.2f}s")
62
+ return ", ".join(parts) + ")"
63
+
64
+ def with_margins(
65
+ self,
66
+ start_margin: float = 0.08,
67
+ end_margin: float = 0.20,
68
+ min_gap: float = 0.08,
69
+ collision_mode: str = "trim",
70
+ ) -> "Caption":
71
+ """
72
+ Create a new Caption with segment boundaries adjusted based on word-level alignment.
73
+
74
+ Uses supervision.alignment['word'] to recalculate segment start/end times
75
+ with the specified margins applied around the actual speech boundaries.
76
+
77
+ Prefers alignments > supervisions > transcription as source.
78
+
79
+ Args:
80
+ start_margin: Seconds to extend before the first word (default: 0.08)
81
+ end_margin: Seconds to extend after the last word (default: 0.20)
82
+ min_gap: Minimum gap between segments for collision handling (default: 0.08)
83
+ collision_mode: How to handle segment overlap - 'trim' or 'gap' (default: 'trim')
84
+
85
+ Returns:
86
+ New Caption instance with adjusted timestamps
87
+
88
+ Note:
89
+ Segments without alignment data will keep their original timestamps.
90
+ """
91
+ from lattifai.caption.standardize import apply_margins_to_captions
92
+
93
+ # Determine which supervisions to use (priority: alignments > supervisions > transcription)
94
+ if self.alignments:
95
+ source_sups = self.alignments
96
+ elif self.supervisions:
97
+ source_sups = self.supervisions
98
+ else:
99
+ source_sups = self.transcription
100
+
101
+ adjusted_sups = apply_margins_to_captions(
102
+ source_sups,
103
+ start_margin=start_margin,
104
+ end_margin=end_margin,
105
+ min_gap=min_gap,
106
+ collision_mode=collision_mode,
107
+ )
108
+
109
+ return Caption(
110
+ supervisions=adjusted_sups,
111
+ transcription=self.transcription,
112
+ event=self.event,
113
+ diarization=self.diarization,
114
+ alignments=[], # Clear alignments since we've applied them
115
+ language=self.language,
116
+ kind=self.kind,
117
+ source_format=self.source_format,
118
+ source_path=self.source_path,
119
+ metadata=self.metadata.copy() if self.metadata else {},
120
+ )
121
+
122
+ def write(
123
+ self,
124
+ path=None,
125
+ output_format: Optional[str] = None,
126
+ include_speaker_in_text: bool = True,
127
+ word_level: bool = False,
128
+ karaoke_config=None,
129
+ metadata: Optional[Dict[str, Any]] = None,
130
+ ):
131
+ """
132
+ Write caption to file or return as bytes.
133
+
134
+ Prefers alignments > supervisions > transcription as source.
135
+
136
+ Args:
137
+ path: Path to output caption file, BytesIO object, or None to return bytes
138
+ output_format: Output format (e.g., 'srt', 'vtt', 'ass')
139
+ include_speaker_in_text: Whether to include speaker labels in text
140
+ word_level: Use word-level output format if supported
141
+ karaoke_config: Karaoke configuration
142
+ metadata: Optional metadata dict to pass to writer
143
+
144
+ Returns:
145
+ Path to the written file if path is a file path, or bytes if path is BytesIO/None
146
+ """
147
+ # Temporarily swap supervisions to use the priority order
148
+ original_supervisions = self.supervisions
149
+
150
+ if self.alignments:
151
+ self.supervisions = self.alignments
152
+ elif not self.supervisions and self.transcription:
153
+ self.supervisions = self.transcription
154
+
155
+ try:
156
+ result = super().write(
157
+ path=path,
158
+ output_format=output_format,
159
+ include_speaker_in_text=include_speaker_in_text,
160
+ word_level=word_level,
161
+ karaoke_config=karaoke_config,
162
+ metadata=metadata,
163
+ )
164
+ finally:
165
+ # Restore original supervisions
166
+ self.supervisions = original_supervisions
167
+
168
+ return result
169
+
170
+ @classmethod
171
+ def from_transcription_results(
172
+ cls,
173
+ transcription: List[Supervision],
174
+ event: Optional["LEDOutput"] = None,
175
+ diarization: Optional[DiarizationOutput] = None,
176
+ language: Optional[str] = None,
177
+ source_path: Optional[Pathlike] = None,
178
+ metadata: Optional[Dict[str, str]] = None,
179
+ ) -> "Caption":
180
+ """
181
+ Create Caption from transcription results including audio events and diarization.
182
+
183
+ Args:
184
+ transcription: List of transcription supervision segments
185
+ event: Optional LEDOutput with event detection results
186
+ diarization: Optional DiarizationOutput with speaker diarization results
187
+ language: Language code
188
+ source_path: Source file path
189
+ metadata: Additional metadata
190
+
191
+ Returns:
192
+ New Caption instance with transcription data
193
+ """
194
+ return cls(
195
+ transcription=transcription,
196
+ event=event,
197
+ diarization=diarization,
198
+ language=language,
199
+ kind="transcription",
200
+ source_format="asr",
201
+ source_path=source_path,
202
+ metadata=metadata or {},
203
+ )
204
+
205
+ def read_diarization(
206
+ self,
207
+ path: Pathlike,
208
+ ) -> "DiarizationOutput":
209
+ """
210
+ Read speaker diarization TextGrid from file.
211
+ """
212
+ from lattifai_core.diarization import DiarizationOutput
213
+
214
+ self.diarization = DiarizationOutput.read(path)
215
+ return self.diarization
216
+
217
+ def write_diarization(
218
+ self,
219
+ path: Pathlike,
220
+ ) -> Pathlike:
221
+ """
222
+ Write speaker diarization TextGrid to file.
223
+ """
224
+ if not self.diarization:
225
+ raise ValueError("No speaker diarization data to write.")
226
+
227
+ self.diarization.write(path)
228
+ return path
lattifai/errors.py CHANGED
@@ -1,10 +1,42 @@
1
1
  """Error handling and exception classes for LattifAI SDK."""
2
2
 
3
+ import functools
3
4
  import traceback
4
5
  from typing import Any, Dict, Optional
5
6
 
6
7
  import colorful
7
8
 
9
+
10
+ def format_exception(e: "LattifAIError") -> str:
11
+ """Format LattifAIError with filtered traceback (only lattifai frames)."""
12
+ tb_lines = traceback.format_exception(type(e), e, e.__traceback__)
13
+ filtered = []
14
+ skip_next_code_line = False
15
+
16
+ for i, line in enumerate(tb_lines):
17
+ if skip_next_code_line:
18
+ skip_next_code_line = False
19
+ continue
20
+
21
+ if line.startswith("Traceback") or not line.startswith(" File"):
22
+ filtered.append(line)
23
+ elif "lattifai" in line:
24
+ filtered.append(line)
25
+ if i + 1 < len(tb_lines) and tb_lines[i + 1].startswith(" "):
26
+ filtered.append(tb_lines[i + 1])
27
+ skip_next_code_line = True
28
+ elif i + 1 < len(tb_lines) and tb_lines[i + 1].startswith(" "):
29
+ skip_next_code_line = True
30
+
31
+ return "".join(filtered)
32
+
33
+
34
+ def _merge_context(kwargs: Dict[str, Any], updates: Dict[str, Any]) -> None:
35
+ """Merge updates into kwargs['context'], creating it if needed."""
36
+ context = kwargs.setdefault("context", {})
37
+ context.update(updates)
38
+
39
+
8
40
  # Error help messages
9
41
  LATTICE_DECODING_FAILURE_HELP = (
10
42
  "Failed to decode lattice alignment. Possible reasons:\n\n"
@@ -76,10 +108,8 @@ class AudioProcessingError(LattifAIError):
76
108
  """Error during audio processing operations."""
77
109
 
78
110
  def __init__(self, message: str, media_path: Optional[str] = None, **kwargs):
79
- context = kwargs.get("context", {})
80
111
  if media_path:
81
- context["media_path"] = media_path
82
- kwargs["context"] = context
112
+ _merge_context(kwargs, {"media_path": media_path})
83
113
  super().__init__(message, **kwargs)
84
114
 
85
115
 
@@ -90,11 +120,9 @@ class AudioLoadError(AudioProcessingError):
90
120
  message = f"Failed to load audio file: {colorful.red(media_path)}"
91
121
  if original_error:
92
122
  message += f" - {colorful.red(str(original_error))}"
93
-
94
- context = kwargs.get("context", {})
95
- context.update({"media_path": media_path, "original_error": str(original_error) if original_error else None})
96
- kwargs["context"] = context
97
-
123
+ _merge_context(
124
+ kwargs, {"media_path": media_path, "original_error": str(original_error) if original_error else None}
125
+ )
98
126
  super().__init__(message, media_path=media_path, **kwargs)
99
127
 
100
128
 
@@ -103,9 +131,7 @@ class AudioFormatError(AudioProcessingError):
103
131
 
104
132
  def __init__(self, media_path: str, format_issue: str, **kwargs):
105
133
  message = f"Audio format error for {colorful.red(media_path)}: {colorful.red(format_issue)}"
106
- context = kwargs.get("context", {})
107
- context.update({"media_path": media_path, "format_issue": format_issue})
108
- kwargs["context"] = context
134
+ _merge_context(kwargs, {"media_path": media_path, "format_issue": format_issue})
109
135
  super().__init__(message, media_path=media_path, **kwargs)
110
136
 
111
137
 
@@ -113,10 +139,8 @@ class CaptionProcessingError(LattifAIError):
113
139
  """Error during caption/text processing operations."""
114
140
 
115
141
  def __init__(self, message: str, caption_path: Optional[str] = None, **kwargs):
116
- context = kwargs.get("context", {})
117
142
  if caption_path:
118
- context["caption_path"] = caption_path
119
- kwargs["context"] = context
143
+ _merge_context(kwargs, {"caption_path": caption_path})
120
144
  super().__init__(message, **kwargs)
121
145
 
122
146
 
@@ -125,9 +149,7 @@ class CaptionParseError(CaptionProcessingError):
125
149
 
126
150
  def __init__(self, caption_path: str, parse_issue: str, **kwargs):
127
151
  message = f"Failed to parse caption file {caption_path}: {parse_issue}"
128
- context = kwargs.get("context", {})
129
- context.update({"caption_path": caption_path, "parse_issue": parse_issue})
130
- kwargs["context"] = context
152
+ _merge_context(kwargs, {"caption_path": caption_path, "parse_issue": parse_issue})
131
153
  super().__init__(message, caption_path=caption_path, **kwargs)
132
154
 
133
155
 
@@ -135,12 +157,13 @@ class AlignmentError(LattifAIError):
135
157
  """Error during audio-text alignment process."""
136
158
 
137
159
  def __init__(self, message: str, media_path: Optional[str] = None, caption_path: Optional[str] = None, **kwargs):
138
- context = kwargs.get("context", {})
160
+ updates = {}
139
161
  if media_path:
140
- context["media_path"] = media_path
162
+ updates["media_path"] = media_path
141
163
  if caption_path:
142
- context["caption_path"] = caption_path
143
- kwargs["context"] = context
164
+ updates["caption_path"] = caption_path
165
+ if updates:
166
+ _merge_context(kwargs, updates)
144
167
  super().__init__(message, **kwargs)
145
168
 
146
169
 
@@ -151,36 +174,44 @@ class LatticeEncodingError(AlignmentError):
151
174
  message = "Failed to generate lattice graph from text"
152
175
  if original_error:
153
176
  message += f": {colorful.red(str(original_error))}"
154
-
155
- context = kwargs.get("context", {})
156
- context.update(
177
+ text_preview = text_content[:100] + "..." if len(text_content) > 100 else text_content
178
+ _merge_context(
179
+ kwargs,
157
180
  {
158
181
  "text_content_length": len(text_content),
159
- "text_preview": text_content[:100] + "..." if len(text_content) > 100 else text_content,
182
+ "text_preview": text_preview,
160
183
  "original_error": str(original_error) if original_error else None,
161
- }
184
+ },
162
185
  )
163
- kwargs["context"] = context
164
186
  super().__init__(message, **kwargs)
165
187
 
166
188
 
167
189
  class LatticeDecodingError(AlignmentError):
168
190
  """Error decoding lattice alignment results."""
169
191
 
170
- def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
171
- message = f"Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}"
192
+ def __init__(
193
+ self,
194
+ lattice_id: str,
195
+ message: Optional[str] = None,
196
+ original_error: Optional[Exception] = None,
197
+ skip_help: bool = False,
198
+ **kwargs,
199
+ ):
200
+ message = message or f"Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}"
172
201
 
173
- # Don't duplicate the help message if it's already in original_error
174
- if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
175
- message += f" - {colorful.red(str(original_error))}"
202
+ error_str = str(original_error) if original_error else None
203
+ is_help_message = error_str == LATTICE_DECODING_FAILURE_HELP
204
+
205
+ if original_error and not is_help_message:
206
+ message += f" - {colorful.red(error_str)}"
207
+
208
+ context_updates = {"lattice_id": lattice_id}
209
+ if original_error and not is_help_message:
210
+ context_updates["original_error"] = error_str
211
+ _merge_context(kwargs, context_updates)
176
212
 
177
- context = kwargs.get("context", {})
178
- # Don't store the entire help message in context to avoid duplication
179
- if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
180
- context["original_error"] = str(original_error)
181
- context["lattice_id"] = lattice_id
182
- kwargs["context"] = context
183
213
  super().__init__(message, **kwargs)
214
+ self.skip_help = skip_help
184
215
 
185
216
  def get_message(self) -> str:
186
217
  """Return formatted error message with help text."""
@@ -188,8 +219,9 @@ class LatticeDecodingError(AlignmentError):
188
219
  if self.context and self.context.get("lattice_id"):
189
220
  # Only show essential context (lattice_id), not the duplicated help message
190
221
  base_message += f'\n{colorful.yellow("Lattice ID:")} {self.context["lattice_id"]}'
191
- # Append help message once at the end
192
- base_message += f"\n\n{colorful.yellow(LATTICE_DECODING_FAILURE_HELP)}"
222
+ # Append help message only if not skipped (e.g., when anomaly info is provided)
223
+ if not self.skip_help:
224
+ base_message += f"\n\n{colorful.yellow(LATTICE_DECODING_FAILURE_HELP)}"
193
225
  return base_message
194
226
 
195
227
 
@@ -200,10 +232,9 @@ class ModelLoadError(LattifAIError):
200
232
  message = f"Failed to load model: {colorful.red(model_name)}"
201
233
  if original_error:
202
234
  message += f" - {colorful.red(str(original_error))}"
203
-
204
- context = kwargs.get("context", {})
205
- context.update({"model_name": model_name, "original_error": str(original_error) if original_error else None})
206
- kwargs["context"] = context
235
+ _merge_context(
236
+ kwargs, {"model_name": model_name, "original_error": str(original_error) if original_error else None}
237
+ )
207
238
  super().__init__(message, **kwargs)
208
239
 
209
240
 
@@ -214,10 +245,7 @@ class DependencyError(LattifAIError):
214
245
  message = f"Missing required dependency: {colorful.red(dependency_name)}"
215
246
  if install_command:
216
247
  message += f"\nPlease install it using: {colorful.yellow(install_command)}"
217
-
218
- context = kwargs.get("context", {})
219
- context.update({"dependency_name": dependency_name, "install_command": install_command})
220
- kwargs["context"] = context
248
+ _merge_context(kwargs, {"dependency_name": dependency_name, "install_command": install_command})
221
249
  super().__init__(message, **kwargs)
222
250
 
223
251
 
@@ -225,9 +253,7 @@ class APIError(LattifAIError):
225
253
  """Error communicating with LattifAI API."""
226
254
 
227
255
  def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None, **kwargs):
228
- context = kwargs.get("context", {})
229
- context.update({"status_code": status_code, "response_text": response_text})
230
- kwargs["context"] = context
256
+ _merge_context(kwargs, {"status_code": status_code, "response_text": response_text})
231
257
  super().__init__(message, **kwargs)
232
258
 
233
259
 
@@ -249,14 +275,13 @@ class QuotaExceededError(APIError):
249
275
  def handle_exception(func):
250
276
  """Decorator to handle exceptions and convert them to LattifAI errors."""
251
277
 
278
+ @functools.wraps(func)
252
279
  def wrapper(*args, **kwargs):
253
280
  try:
254
281
  return func(*args, **kwargs)
255
282
  except LattifAIError:
256
- # Re-raise LattifAI errors as-is
257
283
  raise
258
284
  except Exception as e:
259
- # Convert other exceptions to LattifAI errors
260
285
  error_msg = f"Unexpected error in {func.__name__}: {str(e)}"
261
286
  context = {
262
287
  "function": func.__name__,
@@ -0,0 +1,65 @@
1
+ """Audio Event Detection module for LattifAI.
2
+
3
+ This module provides audio event detection capabilities, it can identify various
4
+ audio events including speech, music, singing, and demographic characteristics
5
+ (male, female, child voices).
6
+
7
+ Key Components:
8
+ LattifAIEventDetector: Main class that wraps lattifai_core's
9
+ EventDetector for seamless integration with LattifAI workflows.
10
+
11
+ Features:
12
+ - Multi-class audio event detection (30+ reduced classes or 400+ full classes)
13
+ - Voice Activity Detection (VAD) for speech segmentation
14
+ - Gender/age classification for speech segments
15
+ - Configurable detection thresholds and top-k filtering
16
+ - Support for both bundled and custom pretrained models
17
+
18
+ Detected Event Types:
19
+ - Speech: General speech activity
20
+ - Male/Female/Child: Speaker demographic classification
21
+ - Music: Musical content detection
22
+ - Singing: Vocal music detection
23
+ - Synthetic: Synthetic/electronic sounds
24
+
25
+ Configuration:
26
+ Use EventConfig to control:
27
+ - enabled: Whether to run audio event detection
28
+ - device: GPU/CPU device selection
29
+ - dtype: Model precision (float32, float16, bfloat16)
30
+ - reduced: Use reduced label set (33 vs 400+ classes)
31
+ - top_k: Number of top event classes to detect
32
+ - vad_chunk_size/vad_max_gap: VAD segmentation parameters
33
+
34
+ Example:
35
+ >>> from lattifai.event import LattifAIEventDetector
36
+ >>> from lattifai.config import EventConfig
37
+ >>> from lattifai.audio2 import AudioLoader
38
+ >>>
39
+ >>> config = EventConfig(enabled=True, device="cuda")
40
+ >>> detector = LattifAIEventDetector(config)
41
+ >>>
42
+ >>> audio = AudioLoader.load("speech.wav")
43
+ >>> result = detector.detect(audio)
44
+ >>>
45
+ >>> # Access VAD segments directly
46
+ >>> for start, end in result.vad_segments:
47
+ ... print(f"Speech: {start:.2f} - {end:.2f}")
48
+ >>>
49
+ >>> # Or access the full TextGrid
50
+ >>> print(result.audio_events)
51
+
52
+ Performance Notes:
53
+ - GPU acceleration provides significant speedup (10x+ over CPU)
54
+ - Use dtype="float16" for faster inference with minimal accuracy loss
55
+ - fast_mode=True reduces computation by only detecting top_k classes
56
+ - Long audio files are automatically chunked to manage memory
57
+
58
+ See Also:
59
+ - lattifai.config.EventConfig: Configuration options
60
+ - lattifai_core.event: Core event detection implementation
61
+ """
62
+
63
+ from .lattifai import LattifAIEventDetector
64
+
65
+ __all__ = ["LattifAIEventDetector"]