lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lattifai/__init__.py +42 -27
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
  5. lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/supervision.py +1 -0
  12. lattifai/{io → caption}/text_parser.py +53 -10
  13. lattifai/cli/__init__.py +17 -0
  14. lattifai/cli/alignment.py +153 -0
  15. lattifai/cli/caption.py +204 -0
  16. lattifai/cli/server.py +19 -0
  17. lattifai/cli/transcribe.py +197 -0
  18. lattifai/cli/youtube.py +128 -0
  19. lattifai/client.py +455 -246
  20. lattifai/config/__init__.py +20 -0
  21. lattifai/config/alignment.py +73 -0
  22. lattifai/config/caption.py +178 -0
  23. lattifai/config/client.py +46 -0
  24. lattifai/config/diarization.py +67 -0
  25. lattifai/config/media.py +335 -0
  26. lattifai/config/transcription.py +84 -0
  27. lattifai/diarization/__init__.py +5 -0
  28. lattifai/diarization/lattifai.py +89 -0
  29. lattifai/errors.py +41 -34
  30. lattifai/logging.py +116 -0
  31. lattifai/mixin.py +552 -0
  32. lattifai/server/app.py +420 -0
  33. lattifai/transcription/__init__.py +76 -0
  34. lattifai/transcription/base.py +108 -0
  35. lattifai/transcription/gemini.py +219 -0
  36. lattifai/transcription/lattifai.py +103 -0
  37. lattifai/types.py +30 -0
  38. lattifai/utils.py +3 -31
  39. lattifai/workflow/__init__.py +22 -0
  40. lattifai/workflow/agents.py +6 -0
  41. lattifai/{workflows → workflow}/file_manager.py +81 -57
  42. lattifai/workflow/youtube.py +564 -0
  43. lattifai-1.0.0.dist-info/METADATA +736 -0
  44. lattifai-1.0.0.dist-info/RECORD +52 -0
  45. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  46. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  47. lattifai/base_client.py +0 -126
  48. lattifai/bin/__init__.py +0 -3
  49. lattifai/bin/agent.py +0 -324
  50. lattifai/bin/align.py +0 -295
  51. lattifai/bin/cli_base.py +0 -25
  52. lattifai/bin/subtitle.py +0 -210
  53. lattifai/io/__init__.py +0 -43
  54. lattifai/io/reader.py +0 -86
  55. lattifai/io/utils.py +0 -15
  56. lattifai/io/writer.py +0 -102
  57. lattifai/tokenizer/__init__.py +0 -3
  58. lattifai/workers/__init__.py +0 -3
  59. lattifai/workflows/__init__.py +0 -34
  60. lattifai/workflows/agents.py +0 -12
  61. lattifai/workflows/gemini.py +0 -167
  62. lattifai/workflows/prompts/README.md +0 -22
  63. lattifai/workflows/prompts/gemini/README.md +0 -24
  64. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  65. lattifai/workflows/youtube.py +0 -931
  66. lattifai-0.4.6.dist-info/METADATA +0 -806
  67. lattifai-0.4.6.dist-info/RECORD +0 -39
  68. lattifai-0.4.6.dist-info/entry_points.txt +0 -3
  69. /lattifai/{io → caption}/gemini_reader.py +0 -0
  70. /lattifai/{io → caption}/gemini_writer.py +0 -0
  71. /lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
  72. /lattifai/{workflows → workflow}/base.py +0 -0
  73. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  74. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ """Media I/O configuration for LattifAI."""
2
+
3
+ from dataclasses import dataclass, field, replace
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from urllib.parse import urlparse
7
+
8
+ from lhotse.utils import Pathlike
9
+
10
+ # Supported media formats for both audio and video content
11
+ AUDIO_FORMATS = (
12
+ "aac",
13
+ "aiff",
14
+ "alac",
15
+ "flac",
16
+ "m4a",
17
+ "mp3",
18
+ "ogg",
19
+ "opus",
20
+ "wav",
21
+ "wma",
22
+ )
23
+
24
+ VIDEO_FORMATS = (
25
+ "3gp",
26
+ "avi",
27
+ "flv",
28
+ "m4v",
29
+ "mkv",
30
+ "mov",
31
+ "mp4",
32
+ "mpeg",
33
+ "mpg",
34
+ "webm",
35
+ "wmv",
36
+ )
37
+
38
+ MEDIA_FORMATS = tuple(sorted(set(AUDIO_FORMATS + VIDEO_FORMATS)))
39
+
40
+
41
+ @dataclass
42
+ class MediaConfig:
43
+ """Unified configuration for audio/video input and output handling."""
44
+
45
+ # Input configuration (local filesystem path or URL)
46
+ input_path: Optional[str] = None
47
+ """Local file path or URL to audio/video content."""
48
+
49
+ media_format: str = "auto"
50
+ """Media format (mp3, wav, mp4, etc.) or 'auto' for automatic detection."""
51
+
52
+ sample_rate: Optional[int] = None
53
+ """Audio sample rate in Hz (e.g., 16000, 44100)."""
54
+
55
+ channels: Optional[int] = None
56
+ """Number of audio channels (1=mono, 2=stereo)."""
57
+
58
+ channel_selector: Optional[str | int] = "average"
59
+ """Audio channel selection strategy: 'average', 'left', 'right', or channel index."""
60
+
61
+ # Output / download configuration
62
+ output_dir: Path = field(default_factory=lambda: Path.cwd())
63
+ """Directory for output files (default: current working directory)."""
64
+
65
+ output_path: Optional[str] = None
66
+ """Full path for output file (overrides output_dir + filename)."""
67
+
68
+ output_format: Optional[str] = None
69
+ """Output media format (mp3, wav, mp4, etc.)."""
70
+
71
+ prefer_audio: bool = True
72
+ """Prefer audio format when 'auto' is specified."""
73
+
74
+ default_audio_format: str = "mp3"
75
+ """Default audio format when no format is specified."""
76
+
77
+ default_video_format: str = "mp4"
78
+ """Default video format when no format is specified."""
79
+
80
+ force_overwrite: bool = False
81
+ """Overwrite existing output files without prompting."""
82
+
83
+ def __post_init__(self) -> None:
84
+ """Validate configuration and normalize paths/formats."""
85
+ self._setup_output_directory()
86
+ self._validate_default_formats()
87
+ self._normalize_media_format()
88
+ self._process_input_path()
89
+ self._process_output_path()
90
+
91
+ def _setup_output_directory(self) -> None:
92
+ """Ensure output directory exists and is valid."""
93
+ resolved_output_dir = self._ensure_dir(self.output_dir)
94
+ self.output_dir = resolved_output_dir
95
+
96
+ def _validate_default_formats(self) -> None:
97
+ """Validate default audio and video formats."""
98
+ self.default_audio_format = self._normalize_format(self.default_audio_format)
99
+ self.default_video_format = self._normalize_format(self.default_video_format)
100
+
101
+ def _normalize_media_format(self) -> None:
102
+ """Normalize media format, allowing 'auto' during initialization."""
103
+ self.media_format = self._normalize_format(self.media_format, allow_auto=True)
104
+
105
+ def _process_input_path(self) -> None:
106
+ """Process and validate input path if provided."""
107
+ if self.input_path is None:
108
+ return
109
+
110
+ if self._is_url(self.input_path):
111
+ normalized_url = self._normalize_url(self.input_path)
112
+ self.input_path = normalized_url
113
+ if self.media_format == "auto":
114
+ inferred_format = self._infer_format_from_source(normalized_url)
115
+ if inferred_format:
116
+ self.media_format = self._normalize_format(inferred_format)
117
+ else:
118
+ # For local paths, normalize to string without validation here
119
+ # Validation will be done in check_input_sanity()
120
+ self.input_path = str(Path(self.input_path).expanduser())
121
+ if self.media_format == "auto":
122
+ inferred_format = Path(self.input_path).suffix.lstrip(".").lower()
123
+ if inferred_format:
124
+ self.media_format = self._normalize_format(inferred_format)
125
+
126
+ # Validate input after setting
127
+ self.check_input_sanity()
128
+
129
+ def _process_output_path(self) -> None:
130
+ """Process output path and format."""
131
+ if self.output_path is not None:
132
+ self.set_output_path(self.output_path)
133
+ elif self.output_format is not None:
134
+ self.output_format = self._normalize_format(self.output_format)
135
+ else:
136
+ self.output_format = None
137
+
138
+ # ------------------------------------------------------------------
139
+ # Public helpers
140
+ # ------------------------------------------------------------------
141
+ def clone(self, **updates: object) -> "MediaConfig":
142
+ """Return a shallow copy of the config with optional overrides."""
143
+ return replace(self, **updates)
144
+
145
+ def normalize_format(self, media_format: Optional[str] = None, *, prefer_audio: Optional[bool] = None) -> str:
146
+ """Resolve a media format (handling the special "auto" value)."""
147
+ prefer_audio = self.prefer_audio if prefer_audio is None else prefer_audio
148
+ candidate = (media_format or self.media_format or "auto").lower()
149
+ if candidate == "auto":
150
+ candidate = self.default_audio_format if prefer_audio else self.default_video_format
151
+ return self._normalize_format(candidate)
152
+
153
+ def is_audio_format(self, media_format: Optional[str] = None) -> bool:
154
+ """Check whether the provided (or effective) format is an audio format."""
155
+ return self.normalize_format(media_format) in AUDIO_FORMATS
156
+
157
+ def is_video_format(self, media_format: Optional[str] = None) -> bool:
158
+ """Check whether the provided (or effective) format is a video format."""
159
+ return self.normalize_format(media_format) in VIDEO_FORMATS
160
+
161
+ def set_media_format(self, media_format: Optional[str], *, prefer_audio: Optional[bool] = None) -> str:
162
+ """Update media_format and return the normalized value."""
163
+ normalized = self.normalize_format(media_format, prefer_audio=prefer_audio)
164
+ self.media_format = normalized
165
+ return normalized
166
+
167
+ def set_input_path(self, path: Pathlike) -> Path | str:
168
+ """Update the input path (local path or URL) and infer format if possible."""
169
+ path = str(path)
170
+ if self._is_url(path):
171
+ normalized_url = self._normalize_url(path)
172
+ self.input_path = normalized_url
173
+ inferred_format = self._infer_format_from_source(normalized_url)
174
+ if inferred_format:
175
+ self.media_format = self._normalize_format(inferred_format)
176
+ self.check_input_sanity()
177
+ return normalized_url
178
+
179
+ resolved = self._ensure_file(path)
180
+ self.input_path = str(resolved)
181
+ inferred_format = resolved.suffix.lstrip(".").lower()
182
+ if inferred_format:
183
+ self.media_format = self._normalize_format(inferred_format)
184
+ self.check_input_sanity()
185
+ return resolved
186
+
187
+ def set_output_dir(self, output_dir: Pathlike) -> Path:
188
+ """Update the output directory (creating it if needed)."""
189
+ resolved = self._ensure_dir(output_dir)
190
+ self.output_dir = resolved
191
+ return resolved
192
+
193
+ def set_output_path(self, output_path: Pathlike) -> Path:
194
+ """Update the output path and synchronize output format and directory."""
195
+ resolved = self._ensure_file(output_path, must_exist=False, create_parent=True)
196
+ if not resolved.suffix:
197
+ raise ValueError("output_path must include a filename with an extension.")
198
+ fmt = resolved.suffix.lstrip(".").lower()
199
+ self.output_path = str(resolved)
200
+ self.output_dir = resolved.parent
201
+ self.output_format = self._normalize_format(fmt)
202
+ return resolved
203
+
204
+ def prepare_output_path(self, stem: Optional[str] = None, format: Optional[str] = None) -> Path:
205
+ """Return an output path, creating one if not set yet."""
206
+ if self.output_path:
207
+ return Path(self.output_path)
208
+
209
+ effective_format = self.normalize_format(format or self.output_format or self.media_format)
210
+ base_name = stem or (self._derive_input_stem() or "output")
211
+ candidate = self.output_dir / f"{base_name}.{effective_format}"
212
+ self.output_path = str(candidate)
213
+ self.output_format = effective_format
214
+ return candidate
215
+
216
+ def is_input_remote(self) -> bool:
217
+ """Return True if the configured input is a URL."""
218
+ return bool(self.input_path and self._is_url(self.input_path))
219
+
220
+ def check_input_sanity(self) -> None:
221
+ """
222
+ Validate that input_path is properly configured and accessible.
223
+
224
+ Raises:
225
+ ValueError: If input_path is not set or is invalid.
226
+ FileNotFoundError: If input_path is a local file that does not exist.
227
+ """
228
+ if not self.input_path:
229
+ raise ValueError("input_path is required but not set in MediaConfig")
230
+
231
+ if self._is_url(self.input_path):
232
+ # For URLs, validate that it's properly formatted
233
+ try:
234
+ parsed = urlparse(self.input_path)
235
+ if not parsed.scheme or not parsed.netloc:
236
+ raise ValueError(
237
+ f"Invalid URL format for input_path: '{self.input_path}'. "
238
+ "URL must include scheme (http/https) and domain."
239
+ )
240
+ except (ValueError, AttributeError) as e:
241
+ # ValueError: Invalid URL format
242
+ # AttributeError: urlparse issues with malformed input
243
+ raise ValueError(f"Failed to parse input_path as URL: {e}") from e
244
+ else:
245
+ # For local files, validate that the file exists and is accessible
246
+ input_file = Path(self.input_path).expanduser()
247
+ if not input_file.exists():
248
+ raise FileNotFoundError(
249
+ f"Input media file does not exist: '{input_file}'. " "Please check the path and try again."
250
+ )
251
+ if not input_file.is_file():
252
+ raise ValueError(
253
+ f"Input media path is not a file: '{input_file}'. " "Expected a valid media file path."
254
+ )
255
+
256
+ # ------------------------------------------------------------------
257
+ # Internal utilities
258
+ # ------------------------------------------------------------------
259
+ def _ensure_dir(self, directory: Pathlike) -> Path:
260
+ path = Path(directory).expanduser()
261
+ path.mkdir(parents=True, exist_ok=True)
262
+ if not path.is_dir():
263
+ raise NotADirectoryError(f"Output directory '{path}' is not a directory.")
264
+ return path
265
+
266
+ def _ensure_file(self, path: Pathlike, *, must_exist: bool = True, create_parent: bool = False) -> Path:
267
+ file_path = Path(path).expanduser()
268
+ if must_exist:
269
+ if not file_path.exists():
270
+ raise FileNotFoundError(f"Input media path '{file_path}' does not exist.")
271
+ if not file_path.is_file():
272
+ raise ValueError(f"Input media path '{file_path}' is not a file.")
273
+ else:
274
+ if create_parent:
275
+ file_path.parent.mkdir(parents=True, exist_ok=True)
276
+ return file_path
277
+
278
+ def _normalize_format(self, media_format: Optional[str], *, allow_auto: bool = False) -> str:
279
+ if media_format is None:
280
+ raise ValueError("media_format cannot be None")
281
+ normalized = media_format.strip().lower()
282
+ if not normalized:
283
+ raise ValueError("media_format cannot be empty")
284
+ if normalized == "auto":
285
+ if allow_auto:
286
+ return normalized
287
+ normalized = self.default_audio_format if self.prefer_audio else self.default_video_format
288
+ if normalized not in MEDIA_FORMATS:
289
+ raise ValueError(
290
+ "Unsupported media format '{fmt}'. Supported formats: {supported}".format(
291
+ fmt=media_format,
292
+ supported=", ".join(MEDIA_FORMATS),
293
+ )
294
+ )
295
+ return normalized
296
+
297
+ def _clean_url_escapes(self, url: str) -> str:
298
+ """Remove shell escape backslashes from URL special characters."""
299
+ return url.strip().replace(r"\?", "?").replace(r"\=", "=").replace(r"\&", "&")
300
+
301
+ def _is_url(self, value: Pathlike) -> bool:
302
+ if not isinstance(value, str):
303
+ return False
304
+ cleaned = self._clean_url_escapes(value)
305
+ parsed = urlparse(cleaned)
306
+ return bool(parsed.scheme and parsed.netloc)
307
+
308
+ def _normalize_url(self, url: str) -> str:
309
+ cleaned = self._clean_url_escapes(url)
310
+ parsed = urlparse(cleaned)
311
+ if not parsed.scheme or not parsed.netloc:
312
+ raise ValueError("input_path must be an absolute URL when provided as a remote source.")
313
+ return cleaned
314
+
315
+ def _infer_format_from_source(self, source: str) -> Optional[str]:
316
+ path_segment = Path(urlparse(source).path) if self._is_url(source) else Path(source)
317
+ suffix = path_segment.suffix.lstrip(".").lower()
318
+ return suffix or None
319
+
320
+ def _derive_input_stem(self) -> Optional[str]:
321
+ if not self.input_path:
322
+ return None
323
+ if self.is_input_remote():
324
+ path_segment = Path(urlparse(self.input_path).path)
325
+ stem = path_segment.stem
326
+ return stem or None
327
+ return Path(self.input_path).stem or None
328
+
329
+
330
+ __all__ = [
331
+ "MediaConfig",
332
+ "AUDIO_FORMATS",
333
+ "VIDEO_FORMATS",
334
+ "MEDIA_FORMATS",
335
+ ]
@@ -0,0 +1,84 @@
1
+ """Transcription service configuration for LattifAI."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Literal, Optional
6
+
7
+ from ..utils import _select_device
8
+
9
+ if TYPE_CHECKING:
10
+ from ..base_client import SyncAPIClient
11
+
12
+ SUPPORTED_TRANSCRIPTION_MODELS = Literal[
13
+ "gemini-2.5-pro",
14
+ "gemini-3-pro-preview",
15
+ "nvidia/parakeet-tdt-0.6b-v3",
16
+ "nvidia/canary-1b-v2",
17
+ "iic/SenseVoiceSmall",
18
+ ]
19
+
20
+
21
+ @dataclass
22
+ class TranscriptionConfig:
23
+ """
24
+ Transcription service configuration.
25
+
26
+ Settings for audio/video transcription using various providers.
27
+ """
28
+
29
+ model_name: SUPPORTED_TRANSCRIPTION_MODELS = "nvidia/parakeet-tdt-0.6b-v3"
30
+ """Model name for transcription."""
31
+
32
+ gemini_api_key: Optional[str] = None
33
+ """Gemini API key. If None, reads from GEMINI_API_KEY environment variable."""
34
+
35
+ device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
36
+ """Computation device for transcription models."""
37
+
38
+ max_retries: int = 0
39
+ """Maximum number of retry attempts for failed transcription requests."""
40
+
41
+ force_overwrite: bool = False
42
+ """Force overwrite existing transcription files."""
43
+
44
+ verbose: bool = False
45
+ """Enable debug logging for transcription operations."""
46
+
47
+ language: Optional[str] = None
48
+ """Target language code for transcription (e.g., 'en', 'zh', 'ja')."""
49
+
50
+ lattice_model_path: Optional[str] = None
51
+ """Path to local LattifAI model. Will be auto-set in LattifAI client."""
52
+
53
+ client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
54
+ """Reference to the SyncAPIClient instance. Auto-set during client initialization."""
55
+
56
+ def __post_init__(self):
57
+ """Validate and auto-populate configuration after initialization."""
58
+
59
+ if self.model_name not in SUPPORTED_TRANSCRIPTION_MODELS.__args__:
60
+ raise ValueError(
61
+ f"Unsupported model_name: '{self.model_name}'. "
62
+ f"Supported models are: {SUPPORTED_TRANSCRIPTION_MODELS.__args__}"
63
+ )
64
+
65
+ # Load environment variables from .env file
66
+ from dotenv import find_dotenv, load_dotenv
67
+
68
+ # Try to find and load .env file from current directory or parent directories
69
+ load_dotenv(find_dotenv(usecwd=True))
70
+
71
+ # Auto-load Gemini API key from environment if not provided
72
+ if self.gemini_api_key is None:
73
+ self.gemini_api_key = os.environ.get("GEMINI_API_KEY")
74
+
75
+ # Validate max_retries
76
+ if self.max_retries < 0:
77
+ raise ValueError("max_retries must be non-negative")
78
+
79
+ # Validate device
80
+ if self.device not in ("cpu", "cuda", "mps", "auto") and not self.device.startswith("cuda:"):
81
+ raise ValueError(f"device must be one of ('cpu', 'cuda', 'mps', 'auto'), got '{self.device}'")
82
+
83
+ if self.device == "auto":
84
+ self.device = _select_device(self.device)
@@ -0,0 +1,5 @@
1
+ """Speaker diarization module for LattifAI."""
2
+
3
+ from .lattifai import LattifAIDiarizer
4
+
5
+ __all__ = ["LattifAIDiarizer"]
@@ -0,0 +1,89 @@
1
+ """LattifAI speaker diarization implementation."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from typing import List, Optional, Tuple
6
+
7
+ import torch
8
+ from tgt import Interval, IntervalTier, TextGrid
9
+
10
+ from lattifai.audio2 import AudioData
11
+ from lattifai.caption import Supervision
12
+ from lattifai.config.diarization import DiarizationConfig
13
+ from lattifai.logging import get_logger
14
+
15
+ formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
16
+ logging.basicConfig(format=formatter, level=logging.INFO)
17
+
18
+
19
+ NOT_KNOWN = "NotKnown"
20
+
21
+
22
+ class LattifAIDiarizer:
23
+ """
24
+ LattifAI Diarizer implementation using pyannote.audio.
25
+ """
26
+
27
+ def __init__(self, config: Optional[DiarizationConfig] = None):
28
+ """
29
+ Initialize LattifAI diarizer.
30
+
31
+ Args:
32
+ config: Diarization configuration
33
+ """
34
+ if config is None:
35
+ config = DiarizationConfig()
36
+
37
+ self.config = config
38
+ self.logger = get_logger("diarization")
39
+
40
+ self._diarizer = None
41
+
42
+ @property
43
+ def name(self) -> str:
44
+ """Human-readable name of the diarizer."""
45
+ return "LattifAI_Diarizer"
46
+
47
+ @property
48
+ def diarizer(self):
49
+ """Lazy-load and return the diarization pipeline."""
50
+ if self._diarizer is None:
51
+ from lattifai_core.diarization import LattifAIDiarizer as CoreLattifAIDiarizer
52
+
53
+ self._diarizer = CoreLattifAIDiarizer(config=self.config)
54
+
55
+ return self._diarizer
56
+
57
+ def diarize(
58
+ self,
59
+ input_media: AudioData,
60
+ num_speakers: Optional[int] = None,
61
+ min_speakers: Optional[int] = None,
62
+ max_speakers: Optional[int] = None,
63
+ ) -> TextGrid:
64
+ """Perform speaker diarization on the input audio."""
65
+ return self.diarizer.diarize(
66
+ input_media,
67
+ num_speakers=num_speakers,
68
+ min_speakers=min_speakers,
69
+ max_speakers=max_speakers,
70
+ )
71
+
72
+ def diarize_with_alignments(
73
+ self,
74
+ input_media: AudioData,
75
+ alignments: List[Supervision],
76
+ diarization: Optional[TextGrid] = None,
77
+ num_speakers: Optional[int] = None,
78
+ min_speakers: Optional[int] = None,
79
+ max_speakers: Optional[int] = None,
80
+ ) -> Tuple[TextGrid, List[Supervision]]:
81
+ """Diarize the given media input and return alignments with refined speaker labels."""
82
+ return self.diarizer.diarize_with_alignments(
83
+ input_media,
84
+ alignments=alignments,
85
+ diarization=diarization,
86
+ num_speakers=num_speakers,
87
+ min_speakers=min_speakers,
88
+ max_speakers=max_speakers,
89
+ )
lattifai/errors.py CHANGED
@@ -8,15 +8,15 @@ import colorful
8
8
  # Error help messages
9
9
  LATTICE_DECODING_FAILURE_HELP = (
10
10
  "Failed to decode lattice alignment. Possible reasons:\n\n"
11
- "1) Audio and text content mismatch:\n"
12
- " - The transcript/subtitle does not accurately match the audio content\n"
13
- " - Text may be from a different version or section of the audio\n"
14
- " ⚠️ Note: Gemini transcription may occasionally skip large segments of audio, causing alignment failures.\n"
11
+ "1) Media(Audio/Video) and text content mismatch:\n"
12
+ " - The transcript/caption does not accurately match the media content\n"
13
+ " - Text may be from a different version or section of the media\n"
14
+ " ⚠️ Note: Gemini transcription may occasionally skip large segments of media, causing alignment failures.\n"
15
15
  " We will detect and fix this issue in the next version.\n\n"
16
- "2) Unsupported audio type:\n"
16
+ "2) Unsupported media type:\n"
17
17
  " - Singing is not yet supported, this will be optimized in future versions\n\n"
18
18
  "💡 Troubleshooting tips:\n"
19
- " • Verify the transcript matches the audio by listening to a few segments\n"
19
+ " • Verify the transcript matches the media by listening to a few segments\n"
20
20
  " • For YouTube videos, manually check if auto-generated transcript are accurate\n"
21
21
  " • Consider using a different transcription source if Gemini results are incomplete"
22
22
  )
@@ -45,7 +45,7 @@ class LattifAIError(Exception):
45
45
  f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
46
46
  " Please include:\n"
47
47
  " - Your audio file format and duration\n"
48
- " - The text/subtitle content you're trying to align\n"
48
+ " - The text/caption content you're trying to align\n"
49
49
  " - This error message and stack trace\n"
50
50
  f' 2. 💬 Join our Discord community: {colorful.green("https://discord.gg/vzmTzzZgNu")}\n'
51
51
  " Our team and community can help you troubleshoot\n"
@@ -71,10 +71,10 @@ class LattifAIError(Exception):
71
71
  class AudioProcessingError(LattifAIError):
72
72
  """Error during audio processing operations."""
73
73
 
74
- def __init__(self, message: str, audio_path: Optional[str] = None, **kwargs):
74
+ def __init__(self, message: str, media_path: Optional[str] = None, **kwargs):
75
75
  context = kwargs.get("context", {})
76
- if audio_path:
77
- context["audio_path"] = audio_path
76
+ if media_path:
77
+ context["media_path"] = media_path
78
78
  kwargs["context"] = context
79
79
  super().__init__(message, **kwargs)
80
80
 
@@ -82,60 +82,60 @@ class AudioProcessingError(LattifAIError):
82
82
  class AudioLoadError(AudioProcessingError):
83
83
  """Error loading or reading audio file."""
84
84
 
85
- def __init__(self, audio_path: str, original_error: Optional[Exception] = None, **kwargs):
86
- message = f"Failed to load audio file: {colorful.red(audio_path)}"
85
+ def __init__(self, media_path: str, original_error: Optional[Exception] = None, **kwargs):
86
+ message = f"Failed to load audio file: {colorful.red(media_path)}"
87
87
  if original_error:
88
88
  message += f" - {colorful.red(str(original_error))}"
89
89
 
90
90
  context = kwargs.get("context", {})
91
- context.update({"audio_path": audio_path, "original_error": str(original_error) if original_error else None})
91
+ context.update({"media_path": media_path, "original_error": str(original_error) if original_error else None})
92
92
  kwargs["context"] = context
93
93
 
94
- super().__init__(message, audio_path=audio_path, **kwargs)
94
+ super().__init__(message, media_path=media_path, **kwargs)
95
95
 
96
96
 
97
97
  class AudioFormatError(AudioProcessingError):
98
98
  """Error with audio format or codec."""
99
99
 
100
- def __init__(self, audio_path: str, format_issue: str, **kwargs):
101
- message = f"Audio format error for {colorful.red(audio_path)}: {colorful.red(format_issue)}"
100
+ def __init__(self, media_path: str, format_issue: str, **kwargs):
101
+ message = f"Audio format error for {colorful.red(media_path)}: {colorful.red(format_issue)}"
102
102
  context = kwargs.get("context", {})
103
- context.update({"audio_path": audio_path, "format_issue": format_issue})
103
+ context.update({"media_path": media_path, "format_issue": format_issue})
104
104
  kwargs["context"] = context
105
- super().__init__(message, audio_path=audio_path, **kwargs)
105
+ super().__init__(message, media_path=media_path, **kwargs)
106
106
 
107
107
 
108
- class SubtitleProcessingError(LattifAIError):
109
- """Error during subtitle/text processing operations."""
108
+ class CaptionProcessingError(LattifAIError):
109
+ """Error during caption/text processing operations."""
110
110
 
111
- def __init__(self, message: str, subtitle_path: Optional[str] = None, **kwargs):
111
+ def __init__(self, message: str, caption_path: Optional[str] = None, **kwargs):
112
112
  context = kwargs.get("context", {})
113
- if subtitle_path:
114
- context["subtitle_path"] = subtitle_path
113
+ if caption_path:
114
+ context["caption_path"] = caption_path
115
115
  kwargs["context"] = context
116
116
  super().__init__(message, **kwargs)
117
117
 
118
118
 
119
- class SubtitleParseError(SubtitleProcessingError):
120
- """Error parsing subtitle or text file."""
119
+ class CaptionParseError(CaptionProcessingError):
120
+ """Error parsing caption or text file."""
121
121
 
122
- def __init__(self, subtitle_path: str, parse_issue: str, **kwargs):
123
- message = f"Failed to parse subtitle file {subtitle_path}: {parse_issue}"
122
+ def __init__(self, caption_path: str, parse_issue: str, **kwargs):
123
+ message = f"Failed to parse caption file {caption_path}: {parse_issue}"
124
124
  context = kwargs.get("context", {})
125
- context.update({"subtitle_path": subtitle_path, "parse_issue": parse_issue})
125
+ context.update({"caption_path": caption_path, "parse_issue": parse_issue})
126
126
  kwargs["context"] = context
127
- super().__init__(message, subtitle_path=subtitle_path, **kwargs)
127
+ super().__init__(message, caption_path=caption_path, **kwargs)
128
128
 
129
129
 
130
130
  class AlignmentError(LattifAIError):
131
131
  """Error during audio-text alignment process."""
132
132
 
133
- def __init__(self, message: str, audio_path: Optional[str] = None, subtitle_path: Optional[str] = None, **kwargs):
133
+ def __init__(self, message: str, media_path: Optional[str] = None, caption_path: Optional[str] = None, **kwargs):
134
134
  context = kwargs.get("context", {})
135
- if audio_path:
136
- context["audio_path"] = audio_path
137
- if subtitle_path:
138
- context["subtitle_path"] = subtitle_path
135
+ if media_path:
136
+ context["media_path"] = media_path
137
+ if caption_path:
138
+ context["caption_path"] = caption_path
139
139
  kwargs["context"] = context
140
140
  super().__init__(message, **kwargs)
141
141
 
@@ -235,6 +235,13 @@ class ConfigurationError(LattifAIError):
235
235
  super().__init__(message, **kwargs)
236
236
 
237
237
 
238
+ class QuotaExceededError(APIError):
239
+ """Error when user quota or API key limit is exceeded."""
240
+
241
+ def __init__(self, message: str, **kwargs):
242
+ super().__init__(message, status_code=402, **kwargs)
243
+
244
+
238
245
  def handle_exception(func):
239
246
  """Decorator to handle exceptions and convert them to LattifAI errors."""
240
247