lattifai 0.2.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/bin/subtitle.py CHANGED
@@ -1,12 +1,16 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+
1
4
  import click
2
5
  from lhotse.utils import Pathlike
3
6
 
4
7
  from lattifai.bin.cli_base import cli
8
+ from lattifai.io import SUBTITLE_FORMATS
5
9
 
6
10
 
7
11
  @cli.group()
8
12
  def subtitle():
9
- """Group of commands used to convert subtitle format."""
13
+ """Commands for subtitle format conversion and management."""
10
14
  pass
11
15
 
12
16
 
@@ -26,7 +30,181 @@ def convert(
26
30
  """
27
31
  Convert subtitle file to another format.
28
32
  """
29
- import pysubs2
33
+ if str(output_subtitle_path).lower().endswith('.TextGrid'.lower()):
34
+ from lattifai.io import SubtitleIO
35
+
36
+ alignments = SubtitleIO.read(input_subtitle_path)
37
+ SubtitleIO.write(alignments, output_subtitle_path)
38
+ else:
39
+ import pysubs2
40
+
41
+ subtitle = pysubs2.load(input_subtitle_path)
42
+
43
+ subtitle.save(output_subtitle_path)
44
+
45
+
46
+ @subtitle.command()
47
+ @click.argument('url', type=str, required=True)
48
+ @click.option(
49
+ '--output-dir',
50
+ '--output_dir',
51
+ '-o',
52
+ type=click.Path(file_okay=False, dir_okay=True),
53
+ default='.',
54
+ help='Output directory for downloaded subtitle files (default: current directory).',
55
+ )
56
+ @click.option(
57
+ '--output-format',
58
+ '--output_format',
59
+ '-f',
60
+ type=click.Choice(SUBTITLE_FORMATS + ['best'], case_sensitive=False),
61
+ default='best',
62
+ help='Preferred subtitle format to download (default: best available).',
63
+ )
64
+ @click.option('--force-overwrite', '-F', is_flag=True, help='Overwrite existing files without prompting.')
65
+ @click.option(
66
+ '--lang',
67
+ '-l',
68
+ '-L',
69
+ '--subtitle-lang',
70
+ '--subtitle_lang',
71
+ type=str,
72
+ help='Specific subtitle language/track to download (e.g., "en").',
73
+ )
74
+ def download(
75
+ url: str,
76
+ output_dir: str,
77
+ output_format: str,
78
+ force_overwrite: bool,
79
+ lang: str,
80
+ ):
81
+ """
82
+ Download subtitles from YouTube URL using yt-dlp.
83
+
84
+ URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID).
85
+ """
86
+ # Import here to avoid circular imports and keep startup fast
87
+ from lattifai.workflows.youtube import YouTubeDownloader
88
+
89
+ # Validate URL format
90
+ if not _is_valid_youtube_url(url):
91
+ click.echo(f'Error: Invalid YouTube URL format: {url}', err=True)
92
+ click.echo('Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)', err=True)
93
+ raise click.Abort()
94
+
95
+ # Convert relative path to absolute
96
+ output_path = Path(output_dir).resolve()
97
+
98
+ # Create output directory if it doesn't exist
99
+ output_path.mkdir(parents=True, exist_ok=True)
100
+
101
+ click.echo(f'Downloading subtitles from: {url}')
102
+ click.echo(f' Output directory: {output_path}')
103
+ click.echo(f' Preferred format: {output_format}')
104
+ if lang:
105
+ click.echo(f' Subtitle language: {lang}')
106
+ else:
107
+ click.echo(' Subtitle language: All available')
108
+
109
+ # Initialize downloader and download
110
+ downloader = YouTubeDownloader()
111
+
112
+ async def download_subtitles():
113
+ try:
114
+ result = await downloader.download_subtitles(
115
+ url=url,
116
+ output_dir=str(output_path),
117
+ force_overwrite=force_overwrite,
118
+ subtitle_lang=lang,
119
+ )
120
+
121
+ if result:
122
+ click.echo('✅ Subtitles downloaded successfully!')
123
+ return result
124
+ else:
125
+ click.echo('⚠️ No subtitles available for this video')
126
+ return None
127
+
128
+ except Exception as e:
129
+ click.echo(f'❌ Error downloading subtitles: {str(e)}', err=True)
130
+ raise click.Abort()
131
+
132
+ # Run the async function
133
+ result = asyncio.run(download_subtitles())
134
+
135
+ if result:
136
+ if result == 'gemini':
137
+ click.echo('✨ Gemini transcription selected (use the agent command to transcribe)')
138
+ else:
139
+ click.echo(f'📄 Subtitle file saved to: {result}')
140
+
141
+
142
+ @subtitle.command()
143
+ @click.argument('url', type=str, required=True)
144
+ def list_subs(url: str):
145
+ """
146
+ List available subtitle tracks for a YouTube video.
147
+
148
+ URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
149
+ """
150
+ # Import here to avoid circular imports and keep startup fast
151
+ from lattifai.workflows.youtube import YouTubeDownloader
152
+
153
+ # Validate URL format
154
+ if not _is_valid_youtube_url(url):
155
+ click.echo(f'Error: Invalid YouTube URL format: {url}', err=True)
156
+ click.echo('Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)', err=True)
157
+ raise click.Abort()
158
+
159
+ click.echo(f'Listing available subtitles for: {url}')
160
+
161
+ # Initialize downloader
162
+ downloader = YouTubeDownloader()
163
+
164
+ async def list_available_subtitles():
165
+ try:
166
+ result = await downloader.list_available_subtitles(url)
167
+
168
+ if result:
169
+ click.echo('📋 Available subtitle tracks:')
170
+ for subtitle_info in result:
171
+ click.echo(f' 🎬 Language: {subtitle_info["language"]} - {subtitle_info["name"]}')
172
+ click.echo(f' 📄 Formats: {", ".join(subtitle_info["formats"])}')
173
+ click.echo()
174
+
175
+ click.echo('💡 To download a specific track, use:')
176
+ click.echo(f' lattifai subtitle download "{url}" --lang <language_code>')
177
+ click.echo(' Example: lattifai subtitle download "{}" --lang en-JkeT_87f4cc'.format(url))
178
+ else:
179
+ click.echo('⚠️ No subtitles available for this video')
180
+
181
+ except Exception as e:
182
+ click.echo(f'❌ Error listing subtitles: {str(e)}', err=True)
183
+ raise click.Abort()
184
+
185
+ # Run the async function
186
+ asyncio.run(list_available_subtitles())
187
+
188
+
189
+ def _is_valid_youtube_url(url: str) -> bool:
190
+ """
191
+ Validate if the URL is a valid YouTube URL format.
192
+
193
+ Supports various YouTube URL formats:
194
+ - https://www.youtube.com/watch?v=VIDEO_ID
195
+ - https://youtu.be/VIDEO_ID
196
+ - https://www.youtube.com/shorts/VIDEO_ID
197
+ - https://m.youtube.com/watch?v=VIDEO_ID
198
+ """
199
+ import re
200
+
201
+ patterns = [
202
+ r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})',
203
+ r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
204
+ r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
205
+ ]
30
206
 
31
- subtitle = pysubs2.load(input_subtitle_path)
32
- subtitle.save(output_subtitle_path)
207
+ for pattern in patterns:
208
+ if re.search(pattern, url):
209
+ return True
210
+ return False
lattifai/client.py CHANGED
@@ -1,9 +1,8 @@
1
1
  """LattifAI client implementation."""
2
2
 
3
- import logging
3
+ import asyncio
4
4
  import os
5
- from pathlib import Path
6
- from typing import Any, Awaitable, BinaryIO, Callable, Dict, Optional, Union
5
+ from typing import Dict, List, Optional, Tuple, Union
7
6
 
8
7
  import colorful
9
8
  from dotenv import load_dotenv
@@ -16,13 +15,12 @@ from lattifai.errors import (
16
15
  LatticeDecodingError,
17
16
  LatticeEncodingError,
18
17
  LattifAIError,
19
- ModelLoadError,
20
18
  SubtitleProcessingError,
21
19
  handle_exception,
22
20
  )
23
- from lattifai.io import SubtitleFormat, SubtitleIO
24
- from lattifai.tokenizer import LatticeTokenizer
25
- from lattifai.workers import Lattice1AlphaWorker
21
+ from lattifai.io import SubtitleFormat, SubtitleIO, Supervision
22
+ from lattifai.tokenizer import AsyncLatticeTokenizer
23
+ from lattifai.utils import _load_tokenizer, _load_worker, _resolve_model_path, _select_device
26
24
 
27
25
  load_dotenv()
28
26
 
@@ -62,47 +60,12 @@ class LattifAI(SyncAPIClient):
62
60
  default_headers=default_headers,
63
61
  )
64
62
 
65
- # Initialize components
66
- if not Path(model_name_or_path).exists():
67
- from huggingface_hub import snapshot_download
68
- from huggingface_hub.errors import LocalEntryNotFoundError
63
+ model_path = _resolve_model_path(model_name_or_path)
64
+ device = _select_device(device)
69
65
 
70
- try:
71
- model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
72
- except LocalEntryNotFoundError:
73
- try:
74
- os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
75
- model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
76
- except Exception as e:
77
- raise ModelLoadError(model_name_or_path, original_error=e)
78
- except Exception as e:
79
- raise ModelLoadError(model_name_or_path, original_error=e)
80
- else:
81
- model_path = model_name_or_path
82
-
83
- # device setup
84
- if device is None:
85
- import torch
86
-
87
- device = 'cpu'
88
- if torch.backends.mps.is_available():
89
- device = 'mps'
90
- elif torch.cuda.is_available():
91
- device = 'cuda'
92
-
93
- try:
94
- self.tokenizer = LatticeTokenizer.from_pretrained(
95
- client_wrapper=self,
96
- model_path=model_path,
97
- device=device,
98
- )
99
- except Exception as e:
100
- raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
101
-
102
- try:
103
- self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
104
- except Exception as e:
105
- raise ModelLoadError(f'worker from {model_path}', original_error=e)
66
+ self.tokenizer = _load_tokenizer(self, model_path, device)
67
+ self.worker = _load_worker(model_path, device)
68
+ self.device = device
106
69
 
107
70
  def alignment(
108
71
  self,
@@ -110,19 +73,23 @@ class LattifAI(SyncAPIClient):
110
73
  subtitle: Pathlike,
111
74
  format: Optional[SubtitleFormat] = None,
112
75
  split_sentence: bool = False,
76
+ return_details: bool = False,
113
77
  output_subtitle_path: Optional[Pathlike] = None,
114
- ) -> str:
78
+ ) -> Tuple[List[Supervision], Optional[Pathlike]]:
115
79
  """Perform alignment on audio and subtitle/text.
116
80
 
117
81
  Args:
118
82
  audio: Audio file path
119
83
  subtitle: Subtitle/Text to align with audio
120
- format: Output format (srt, vtt, ass, txt)
121
- split_sentence: Whether to split sentences during processing
122
- output_subtitle_path: Path to save output file
84
+ format: Input subtitle format (srt, vtt, ass, txt). Auto-detected if None
85
+ split_sentence: Enable intelligent sentence re-splitting based on punctuation semantics
86
+ return_details: Return word-level alignment details in Supervision.alignment field
87
+ output_subtitle_path: Output path for aligned subtitle (optional)
123
88
 
124
89
  Returns:
125
- Aligned subtitles in specified format
90
+ Tuple containing:
91
+ - List of aligned Supervision objects with timing information
92
+ - Output subtitle path (if output_subtitle_path was provided)
126
93
 
127
94
  Raises:
128
95
  SubtitleProcessingError: If subtitle file cannot be parsed
@@ -144,19 +111,21 @@ class LattifAI(SyncAPIClient):
144
111
  )
145
112
 
146
113
  # step2: make lattice by call Lattifai API
147
- print(colorful.cyan('🔗 Step 2: Creating lattice graph from text'))
114
+ print(colorful.cyan('🔗 Step 2: Creating lattice graph from segments'))
148
115
  try:
149
- lattice_id, lattice_graph = self.tokenizer.tokenize(supervisions, split_sentence=split_sentence)
116
+ supervisions, lattice_id, lattice_graph = self.tokenizer.tokenize(
117
+ supervisions, split_sentence=split_sentence
118
+ )
150
119
  print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
151
120
  except Exception as e:
152
121
  text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
153
122
  raise LatticeEncodingError(text_content, original_error=e)
154
123
 
155
- # step3: align audio with text
156
- print(colorful.cyan(f'🎵 Step 3: Performing alignment on audio file: {audio}'))
124
+ # step3: search lattice graph with audio
125
+ print(colorful.cyan(f'🔍 Step 3: Searching lattice graph with audio: {audio}'))
157
126
  try:
158
127
  lattice_results = self.worker.alignment(audio, lattice_graph)
159
- print(colorful.green(' ✓ Alignment completed successfully'))
128
+ print(colorful.green(' ✓ Lattice search completed'))
160
129
  except Exception as e:
161
130
  raise AlignmentError(
162
131
  f'Audio alignment failed for {audio}',
@@ -165,11 +134,16 @@ class LattifAI(SyncAPIClient):
165
134
  context={'original_error': str(e)},
166
135
  )
167
136
 
168
- # step4: decode the lattice paths
169
- print(colorful.cyan('🔍 Step 4: Decoding lattice paths to final alignments'))
137
+ # step4: decode lattice results to aligned segments
138
+ print(colorful.cyan('🎯 Step 4: Decoding lattice results to aligned segments'))
170
139
  try:
171
- alignments = self.tokenizer.detokenize(lattice_id, lattice_results)
172
- print(colorful.green(f' ✓ Decoded {len(alignments)} aligned segments'))
140
+ alignments = self.tokenizer.detokenize(
141
+ lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
142
+ )
143
+ print(colorful.green(f' ✓ Successfully aligned {len(alignments)} segments'))
144
+ except LatticeDecodingError as e:
145
+ print(colorful.red(' x Failed to decode lattice alignment results'))
146
+ raise e
173
147
  except Exception as e:
174
148
  print(colorful.red(' x Failed to decode lattice alignment results'))
175
149
  raise LatticeDecodingError(lattice_id, original_error=e)
@@ -185,8 +159,7 @@ class LattifAI(SyncAPIClient):
185
159
  subtitle_path=str(output_subtitle_path),
186
160
  context={'original_error': str(e)},
187
161
  )
188
-
189
- return output_subtitle_path or alignments
162
+ return (alignments, output_subtitle_path)
190
163
 
191
164
  except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
192
165
  # Re-raise our specific errors as-is
@@ -201,15 +174,142 @@ class LattifAI(SyncAPIClient):
201
174
  )
202
175
 
203
176
 
177
+ class AsyncLattifAI(AsyncAPIClient):
178
+ """Asynchronous LattifAI client."""
179
+
180
+ def __init__(
181
+ self,
182
+ *,
183
+ api_key: Optional[str] = None,
184
+ model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
185
+ device: Optional[str] = None,
186
+ base_url: Optional[str] = None,
187
+ timeout: Union[float, int] = 120.0,
188
+ max_retries: int = 2,
189
+ default_headers: Optional[Dict[str, str]] = None,
190
+ ) -> None:
191
+ if api_key is None:
192
+ api_key = os.environ.get('LATTIFAI_API_KEY')
193
+ if api_key is None:
194
+ raise ConfigurationError(
195
+ 'The api_key client option must be set either by passing api_key to the client '
196
+ 'or by setting the LATTIFAI_API_KEY environment variable'
197
+ )
198
+
199
+ if base_url is None:
200
+ base_url = os.environ.get('LATTIFAI_BASE_URL')
201
+ if not base_url:
202
+ base_url = 'https://api.lattifai.com/v1'
203
+
204
+ super().__init__(
205
+ api_key=api_key,
206
+ base_url=base_url,
207
+ timeout=timeout,
208
+ max_retries=max_retries,
209
+ default_headers=default_headers,
210
+ )
211
+
212
+ model_path = _resolve_model_path(model_name_or_path)
213
+ device = _select_device(device)
214
+
215
+ self.tokenizer = _load_tokenizer(self, model_path, device, tokenizer_cls=AsyncLatticeTokenizer)
216
+ self.worker = _load_worker(model_path, device)
217
+ self.device = device
218
+
219
+ async def alignment(
220
+ self,
221
+ audio: Pathlike,
222
+ subtitle: Pathlike,
223
+ format: Optional[SubtitleFormat] = None,
224
+ split_sentence: bool = False,
225
+ return_details: bool = False,
226
+ output_subtitle_path: Optional[Pathlike] = None,
227
+ ) -> Tuple[List[Supervision], Optional[Pathlike]]:
228
+ try:
229
+ print(colorful.cyan(f'📖 Step 1: Reading subtitle file from {subtitle}'))
230
+ try:
231
+ supervisions = await asyncio.to_thread(SubtitleIO.read, subtitle, format=format)
232
+ print(colorful.green(f' ✓ Parsed {len(supervisions)} subtitle segments'))
233
+ except Exception as e:
234
+ raise SubtitleProcessingError(
235
+ f'Failed to parse subtitle file: {subtitle}',
236
+ subtitle_path=str(subtitle),
237
+ context={'original_error': str(e)},
238
+ )
239
+
240
+ print(colorful.cyan('🔗 Step 2: Creating lattice graph from segments'))
241
+ try:
242
+ supervisions, lattice_id, lattice_graph = await self.tokenizer.tokenize(
243
+ supervisions,
244
+ split_sentence=split_sentence,
245
+ )
246
+ print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
247
+ except Exception as e:
248
+ text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
249
+ raise LatticeEncodingError(text_content, original_error=e)
250
+
251
+ print(colorful.cyan(f'🔍 Step 3: Searching lattice graph with audio: {audio}'))
252
+ try:
253
+ lattice_results = await asyncio.to_thread(self.worker.alignment, audio, lattice_graph)
254
+ print(colorful.green(' ✓ Lattice search completed'))
255
+ except Exception as e:
256
+ raise AlignmentError(
257
+ f'Audio alignment failed for {audio}',
258
+ audio_path=str(audio),
259
+ subtitle_path=str(subtitle),
260
+ context={'original_error': str(e)},
261
+ )
262
+
263
+ print(colorful.cyan('🎯 Step 4: Decoding lattice results to aligned segments'))
264
+ try:
265
+ alignments = await self.tokenizer.detokenize(
266
+ lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
267
+ )
268
+ print(colorful.green(f' ✓ Successfully aligned {len(alignments)} segments'))
269
+ except LatticeDecodingError as e:
270
+ print(colorful.red(' x Failed to decode lattice alignment results'))
271
+ raise e
272
+ except Exception as e:
273
+ print(colorful.red(' x Failed to decode lattice alignment results'))
274
+ raise LatticeDecodingError(lattice_id, original_error=e)
275
+
276
+ if output_subtitle_path:
277
+ try:
278
+ await asyncio.to_thread(SubtitleIO.write, alignments, output_subtitle_path)
279
+ print(colorful.green(f'🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}'))
280
+ except Exception as e:
281
+ raise SubtitleProcessingError(
282
+ f'Failed to write output file: {output_subtitle_path}',
283
+ subtitle_path=str(output_subtitle_path),
284
+ context={'original_error': str(e)},
285
+ )
286
+
287
+ return (alignments, output_subtitle_path)
288
+
289
+ except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
290
+ raise
291
+ except Exception as e:
292
+ raise AlignmentError(
293
+ 'Unexpected error during alignment process',
294
+ audio_path=str(audio),
295
+ subtitle_path=str(subtitle),
296
+ context={'original_error': str(e), 'error_type': e.__class__.__name__},
297
+ )
298
+
299
+
204
300
  if __name__ == '__main__':
205
301
  client = LattifAI()
206
302
  import sys
207
303
 
208
- if len(sys.argv) == 4:
209
- audio, subtitle, output = sys.argv[1:]
304
+ if len(sys.argv) == 5:
305
+ audio, subtitle, output, split_sentence = sys.argv[1:]
306
+ split_sentence = split_sentence.lower() in ('true', '1', 'yes')
210
307
  else:
211
308
  audio = 'tests/data/SA1.wav'
212
309
  subtitle = 'tests/data/SA1.TXT'
213
310
  output = None
311
+ split_sentence = False
214
312
 
215
- alignments = client.alignment(audio, subtitle, output_subtitle_path=output, split_sentence=True)
313
+ (alignments, output_subtitle_path) = client.alignment(
314
+ audio, subtitle, output_subtitle_path=output, split_sentence=split_sentence, return_details=True
315
+ )
lattifai/errors.py CHANGED
@@ -1,11 +1,26 @@
1
1
  """Error handling and exception classes for LattifAI SDK."""
2
2
 
3
- import sys
4
3
  import traceback
5
4
  from typing import Any, Dict, Optional
6
5
 
7
6
  import colorful
8
7
 
8
+ # Error help messages
9
+ LATTICE_DECODING_FAILURE_HELP = (
10
+ 'Failed to decode lattice alignment. Possible reasons:\n\n'
11
+ '1) Audio and text content mismatch:\n'
12
+ ' - The transcript/subtitle does not accurately match the audio content\n'
13
+ ' - Text may be from a different version or section of the audio\n'
14
+ ' ⚠️ Note: Gemini transcription may occasionally skip large segments of audio, causing alignment failures.\n'
15
+ ' We will detect and fix this issue in the next version.\n\n'
16
+ '2) Unsupported audio type:\n'
17
+ ' - Singing is not yet supported, this will be optimized in future versions\n\n'
18
+ '💡 Troubleshooting tips:\n'
19
+ ' • Verify the transcript matches the audio by listening to a few segments\n'
20
+ ' • For YouTube videos, manually check if auto-generated transcript are accurate\n'
21
+ ' • Consider using a different transcription source if Gemini results are incomplete'
22
+ )
23
+
9
24
 
10
25
  class LattifAIError(Exception):
11
26
  """Base exception for LattifAI errors."""
@@ -26,7 +41,7 @@ class LattifAIError(Exception):
26
41
  def get_support_info(self) -> str:
27
42
  """Get support information for users."""
28
43
  return (
29
- f'\n\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
44
+ f'\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
30
45
  f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
31
46
  ' Please include:\n'
32
47
  ' - Your audio file format and duration\n'
@@ -36,13 +51,21 @@ class LattifAIError(Exception):
36
51
  ' Our team and community can help you troubleshoot\n'
37
52
  )
38
53
 
39
- def __str__(self) -> str:
40
- """Return formatted error message with support information."""
54
+ def get_message(self) -> str:
55
+ """Return formatted error message without support information."""
41
56
  base_message = f'{colorful.red(f"[{self.error_code}] {self.message}")}'
42
57
  if self.context:
43
58
  context_str = f'\n{colorful.yellow("Context:")} ' + ', '.join(f'{k}={v}' for k, v in self.context.items())
44
59
  base_message += context_str
45
- return base_message + self.get_support_info()
60
+ return base_message
61
+
62
+ def __str__(self) -> str:
63
+ """Return formatted error message without support information.
64
+
65
+ Note: Support info should be displayed explicitly at the CLI level,
66
+ not automatically appended to avoid duplication when errors are re-raised.
67
+ """
68
+ return self.get_message()
46
69
 
47
70
 
48
71
  class AudioProcessingError(LattifAIError):
@@ -142,14 +165,29 @@ class LatticeDecodingError(AlignmentError):
142
165
 
143
166
  def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
144
167
  message = f'Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}'
145
- if original_error:
168
+
169
+ # Don't duplicate the help message if it's already in original_error
170
+ if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
146
171
  message += f' - {colorful.red(str(original_error))}'
147
172
 
148
173
  context = kwargs.get('context', {})
149
- context.update({'lattice_id': lattice_id, 'original_error': str(original_error) if original_error else None})
174
+ # Don't store the entire help message in context to avoid duplication
175
+ if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
176
+ context['original_error'] = str(original_error)
177
+ context['lattice_id'] = lattice_id
150
178
  kwargs['context'] = context
151
179
  super().__init__(message, **kwargs)
152
180
 
181
+ def get_message(self) -> str:
182
+ """Return formatted error message with help text."""
183
+ base_message = f'{colorful.red(f"[{self.error_code}]")} {self.message}'
184
+ if self.context and self.context.get('lattice_id'):
185
+ # Only show essential context (lattice_id), not the duplicated help message
186
+ base_message += f'\n{colorful.yellow("Lattice ID:")} {self.context["lattice_id"]}'
187
+ # Append help message once at the end
188
+ base_message += f'\n\n{colorful.yellow(LATTICE_DECODING_FAILURE_HELP)}'
189
+ return base_message
190
+
153
191
 
154
192
  class ModelLoadError(LattifAIError):
155
193
  """Error loading AI model."""
lattifai/io/__init__.py CHANGED
@@ -2,11 +2,31 @@ from typing import List, Optional
2
2
 
3
3
  from lhotse.utils import Pathlike
4
4
 
5
+ from .gemini_reader import GeminiReader, GeminiSegment
6
+ from .gemini_writer import GeminiWriter
5
7
  from .reader import SubtitleFormat, SubtitleReader
6
8
  from .supervision import Supervision
9
+ from .utils import (
10
+ ALL_SUBTITLE_FORMATS,
11
+ INPUT_SUBTITLE_FORMATS,
12
+ OUTPUT_SUBTITLE_FORMATS,
13
+ SUBTITLE_FORMATS,
14
+ )
7
15
  from .writer import SubtitleWriter
8
16
 
9
- __all__ = ['SubtitleReader', 'SubtitleWriter', 'SubtitleIO', 'Supervision']
17
+ __all__ = [
18
+ 'SubtitleReader',
19
+ 'SubtitleWriter',
20
+ 'SubtitleIO',
21
+ 'Supervision',
22
+ 'GeminiReader',
23
+ 'GeminiWriter',
24
+ 'GeminiSegment',
25
+ 'SUBTITLE_FORMATS',
26
+ 'INPUT_SUBTITLE_FORMATS',
27
+ 'OUTPUT_SUBTITLE_FORMATS',
28
+ 'ALL_SUBTITLE_FORMATS',
29
+ ]
10
30
 
11
31
 
12
32
  class SubtitleIO: