lattifai 0.2.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +5 -0
- lattifai/base_client.py +11 -0
- lattifai/bin/__init__.py +1 -0
- lattifai/bin/agent.py +326 -0
- lattifai/bin/align.py +253 -21
- lattifai/bin/cli_base.py +5 -0
- lattifai/bin/subtitle.py +182 -4
- lattifai/client.py +166 -66
- lattifai/errors.py +45 -7
- lattifai/io/__init__.py +21 -1
- lattifai/io/gemini_reader.py +371 -0
- lattifai/io/gemini_writer.py +173 -0
- lattifai/io/parser.py +75 -0
- lattifai/io/reader.py +25 -10
- lattifai/io/supervision.py +16 -0
- lattifai/io/utils.py +15 -0
- lattifai/io/writer.py +58 -17
- lattifai/tokenizer/__init__.py +2 -2
- lattifai/tokenizer/tokenizer.py +229 -41
- lattifai/utils.py +133 -0
- lattifai-0.4.1.dist-info/METADATA +810 -0
- lattifai-0.4.1.dist-info/RECORD +29 -0
- lattifai-0.4.1.dist-info/entry_points.txt +3 -0
- lattifai-0.2.5.dist-info/METADATA +0 -334
- lattifai-0.2.5.dist-info/RECORD +0 -23
- lattifai-0.2.5.dist-info/entry_points.txt +0 -4
- {lattifai-0.2.5.dist-info → lattifai-0.4.1.dist-info}/WHEEL +0 -0
- {lattifai-0.2.5.dist-info → lattifai-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.2.5.dist-info → lattifai-0.4.1.dist-info}/top_level.txt +0 -0
lattifai/bin/subtitle.py
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
1
4
|
import click
|
|
2
5
|
from lhotse.utils import Pathlike
|
|
3
6
|
|
|
4
7
|
from lattifai.bin.cli_base import cli
|
|
8
|
+
from lattifai.io import SUBTITLE_FORMATS
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
@cli.group()
|
|
8
12
|
def subtitle():
|
|
9
|
-
"""
|
|
13
|
+
"""Commands for subtitle format conversion and management."""
|
|
10
14
|
pass
|
|
11
15
|
|
|
12
16
|
|
|
@@ -26,7 +30,181 @@ def convert(
|
|
|
26
30
|
"""
|
|
27
31
|
Convert subtitle file to another format.
|
|
28
32
|
"""
|
|
29
|
-
|
|
33
|
+
if str(output_subtitle_path).lower().endswith('.TextGrid'.lower()):
|
|
34
|
+
from lattifai.io import SubtitleIO
|
|
35
|
+
|
|
36
|
+
alignments = SubtitleIO.read(input_subtitle_path)
|
|
37
|
+
SubtitleIO.write(alignments, output_subtitle_path)
|
|
38
|
+
else:
|
|
39
|
+
import pysubs2
|
|
40
|
+
|
|
41
|
+
subtitle = pysubs2.load(input_subtitle_path)
|
|
42
|
+
|
|
43
|
+
subtitle.save(output_subtitle_path)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@subtitle.command()
|
|
47
|
+
@click.argument('url', type=str, required=True)
|
|
48
|
+
@click.option(
|
|
49
|
+
'--output-dir',
|
|
50
|
+
'--output_dir',
|
|
51
|
+
'-o',
|
|
52
|
+
type=click.Path(file_okay=False, dir_okay=True),
|
|
53
|
+
default='.',
|
|
54
|
+
help='Output directory for downloaded subtitle files (default: current directory).',
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
'--output-format',
|
|
58
|
+
'--output_format',
|
|
59
|
+
'-f',
|
|
60
|
+
type=click.Choice(SUBTITLE_FORMATS + ['best'], case_sensitive=False),
|
|
61
|
+
default='best',
|
|
62
|
+
help='Preferred subtitle format to download (default: best available).',
|
|
63
|
+
)
|
|
64
|
+
@click.option('--force-overwrite', '-F', is_flag=True, help='Overwrite existing files without prompting.')
|
|
65
|
+
@click.option(
|
|
66
|
+
'--lang',
|
|
67
|
+
'-l',
|
|
68
|
+
'-L',
|
|
69
|
+
'--subtitle-lang',
|
|
70
|
+
'--subtitle_lang',
|
|
71
|
+
type=str,
|
|
72
|
+
help='Specific subtitle language/track to download (e.g., "en").',
|
|
73
|
+
)
|
|
74
|
+
def download(
|
|
75
|
+
url: str,
|
|
76
|
+
output_dir: str,
|
|
77
|
+
output_format: str,
|
|
78
|
+
force_overwrite: bool,
|
|
79
|
+
lang: str,
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Download subtitles from YouTube URL using yt-dlp.
|
|
83
|
+
|
|
84
|
+
URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID).
|
|
85
|
+
"""
|
|
86
|
+
# Import here to avoid circular imports and keep startup fast
|
|
87
|
+
from lattifai.workflows.youtube import YouTubeDownloader
|
|
88
|
+
|
|
89
|
+
# Validate URL format
|
|
90
|
+
if not _is_valid_youtube_url(url):
|
|
91
|
+
click.echo(f'Error: Invalid YouTube URL format: {url}', err=True)
|
|
92
|
+
click.echo('Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)', err=True)
|
|
93
|
+
raise click.Abort()
|
|
94
|
+
|
|
95
|
+
# Convert relative path to absolute
|
|
96
|
+
output_path = Path(output_dir).resolve()
|
|
97
|
+
|
|
98
|
+
# Create output directory if it doesn't exist
|
|
99
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
|
|
101
|
+
click.echo(f'Downloading subtitles from: {url}')
|
|
102
|
+
click.echo(f' Output directory: {output_path}')
|
|
103
|
+
click.echo(f' Preferred format: {output_format}')
|
|
104
|
+
if lang:
|
|
105
|
+
click.echo(f' Subtitle language: {lang}')
|
|
106
|
+
else:
|
|
107
|
+
click.echo(' Subtitle language: All available')
|
|
108
|
+
|
|
109
|
+
# Initialize downloader and download
|
|
110
|
+
downloader = YouTubeDownloader()
|
|
111
|
+
|
|
112
|
+
async def download_subtitles():
|
|
113
|
+
try:
|
|
114
|
+
result = await downloader.download_subtitles(
|
|
115
|
+
url=url,
|
|
116
|
+
output_dir=str(output_path),
|
|
117
|
+
force_overwrite=force_overwrite,
|
|
118
|
+
subtitle_lang=lang,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
if result:
|
|
122
|
+
click.echo('✅ Subtitles downloaded successfully!')
|
|
123
|
+
return result
|
|
124
|
+
else:
|
|
125
|
+
click.echo('⚠️ No subtitles available for this video')
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
click.echo(f'❌ Error downloading subtitles: {str(e)}', err=True)
|
|
130
|
+
raise click.Abort()
|
|
131
|
+
|
|
132
|
+
# Run the async function
|
|
133
|
+
result = asyncio.run(download_subtitles())
|
|
134
|
+
|
|
135
|
+
if result:
|
|
136
|
+
if result == 'gemini':
|
|
137
|
+
click.echo('✨ Gemini transcription selected (use the agent command to transcribe)')
|
|
138
|
+
else:
|
|
139
|
+
click.echo(f'📄 Subtitle file saved to: {result}')
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@subtitle.command()
|
|
143
|
+
@click.argument('url', type=str, required=True)
|
|
144
|
+
def list_subs(url: str):
|
|
145
|
+
"""
|
|
146
|
+
List available subtitle tracks for a YouTube video.
|
|
147
|
+
|
|
148
|
+
URL should be a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
|
|
149
|
+
"""
|
|
150
|
+
# Import here to avoid circular imports and keep startup fast
|
|
151
|
+
from lattifai.workflows.youtube import YouTubeDownloader
|
|
152
|
+
|
|
153
|
+
# Validate URL format
|
|
154
|
+
if not _is_valid_youtube_url(url):
|
|
155
|
+
click.echo(f'Error: Invalid YouTube URL format: {url}', err=True)
|
|
156
|
+
click.echo('Please provide a valid YouTube URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)', err=True)
|
|
157
|
+
raise click.Abort()
|
|
158
|
+
|
|
159
|
+
click.echo(f'Listing available subtitles for: {url}')
|
|
160
|
+
|
|
161
|
+
# Initialize downloader
|
|
162
|
+
downloader = YouTubeDownloader()
|
|
163
|
+
|
|
164
|
+
async def list_available_subtitles():
|
|
165
|
+
try:
|
|
166
|
+
result = await downloader.list_available_subtitles(url)
|
|
167
|
+
|
|
168
|
+
if result:
|
|
169
|
+
click.echo('📋 Available subtitle tracks:')
|
|
170
|
+
for subtitle_info in result:
|
|
171
|
+
click.echo(f' 🎬 Language: {subtitle_info["language"]} - {subtitle_info["name"]}')
|
|
172
|
+
click.echo(f' 📄 Formats: {", ".join(subtitle_info["formats"])}')
|
|
173
|
+
click.echo()
|
|
174
|
+
|
|
175
|
+
click.echo('💡 To download a specific track, use:')
|
|
176
|
+
click.echo(f' lattifai subtitle download "{url}" --lang <language_code>')
|
|
177
|
+
click.echo(' Example: lattifai subtitle download "{}" --lang en-JkeT_87f4cc'.format(url))
|
|
178
|
+
else:
|
|
179
|
+
click.echo('⚠️ No subtitles available for this video')
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
click.echo(f'❌ Error listing subtitles: {str(e)}', err=True)
|
|
183
|
+
raise click.Abort()
|
|
184
|
+
|
|
185
|
+
# Run the async function
|
|
186
|
+
asyncio.run(list_available_subtitles())
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _is_valid_youtube_url(url: str) -> bool:
|
|
190
|
+
"""
|
|
191
|
+
Validate if the URL is a valid YouTube URL format.
|
|
192
|
+
|
|
193
|
+
Supports various YouTube URL formats:
|
|
194
|
+
- https://www.youtube.com/watch?v=VIDEO_ID
|
|
195
|
+
- https://youtu.be/VIDEO_ID
|
|
196
|
+
- https://www.youtube.com/shorts/VIDEO_ID
|
|
197
|
+
- https://m.youtube.com/watch?v=VIDEO_ID
|
|
198
|
+
"""
|
|
199
|
+
import re
|
|
200
|
+
|
|
201
|
+
patterns = [
|
|
202
|
+
r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([a-zA-Z0-9_-]{11})',
|
|
203
|
+
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
|
|
204
|
+
r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
|
|
205
|
+
]
|
|
30
206
|
|
|
31
|
-
|
|
32
|
-
|
|
207
|
+
for pattern in patterns:
|
|
208
|
+
if re.search(pattern, url):
|
|
209
|
+
return True
|
|
210
|
+
return False
|
lattifai/client.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""LattifAI client implementation."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import asyncio
|
|
4
4
|
import os
|
|
5
|
-
from
|
|
6
|
-
from typing import Any, Awaitable, BinaryIO, Callable, Dict, Optional, Union
|
|
5
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
7
6
|
|
|
8
7
|
import colorful
|
|
9
8
|
from dotenv import load_dotenv
|
|
@@ -16,13 +15,12 @@ from lattifai.errors import (
|
|
|
16
15
|
LatticeDecodingError,
|
|
17
16
|
LatticeEncodingError,
|
|
18
17
|
LattifAIError,
|
|
19
|
-
ModelLoadError,
|
|
20
18
|
SubtitleProcessingError,
|
|
21
19
|
handle_exception,
|
|
22
20
|
)
|
|
23
|
-
from lattifai.io import SubtitleFormat, SubtitleIO
|
|
24
|
-
from lattifai.tokenizer import
|
|
25
|
-
from lattifai.
|
|
21
|
+
from lattifai.io import SubtitleFormat, SubtitleIO, Supervision
|
|
22
|
+
from lattifai.tokenizer import AsyncLatticeTokenizer
|
|
23
|
+
from lattifai.utils import _load_tokenizer, _load_worker, _resolve_model_path, _select_device
|
|
26
24
|
|
|
27
25
|
load_dotenv()
|
|
28
26
|
|
|
@@ -62,47 +60,12 @@ class LattifAI(SyncAPIClient):
|
|
|
62
60
|
default_headers=default_headers,
|
|
63
61
|
)
|
|
64
62
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
from huggingface_hub import snapshot_download
|
|
68
|
-
from huggingface_hub.errors import LocalEntryNotFoundError
|
|
63
|
+
model_path = _resolve_model_path(model_name_or_path)
|
|
64
|
+
device = _select_device(device)
|
|
69
65
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
try:
|
|
74
|
-
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
|
75
|
-
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
76
|
-
except Exception as e:
|
|
77
|
-
raise ModelLoadError(model_name_or_path, original_error=e)
|
|
78
|
-
except Exception as e:
|
|
79
|
-
raise ModelLoadError(model_name_or_path, original_error=e)
|
|
80
|
-
else:
|
|
81
|
-
model_path = model_name_or_path
|
|
82
|
-
|
|
83
|
-
# device setup
|
|
84
|
-
if device is None:
|
|
85
|
-
import torch
|
|
86
|
-
|
|
87
|
-
device = 'cpu'
|
|
88
|
-
if torch.backends.mps.is_available():
|
|
89
|
-
device = 'mps'
|
|
90
|
-
elif torch.cuda.is_available():
|
|
91
|
-
device = 'cuda'
|
|
92
|
-
|
|
93
|
-
try:
|
|
94
|
-
self.tokenizer = LatticeTokenizer.from_pretrained(
|
|
95
|
-
client_wrapper=self,
|
|
96
|
-
model_path=model_path,
|
|
97
|
-
device=device,
|
|
98
|
-
)
|
|
99
|
-
except Exception as e:
|
|
100
|
-
raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
|
|
101
|
-
|
|
102
|
-
try:
|
|
103
|
-
self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
|
|
104
|
-
except Exception as e:
|
|
105
|
-
raise ModelLoadError(f'worker from {model_path}', original_error=e)
|
|
66
|
+
self.tokenizer = _load_tokenizer(self, model_path, device)
|
|
67
|
+
self.worker = _load_worker(model_path, device)
|
|
68
|
+
self.device = device
|
|
106
69
|
|
|
107
70
|
def alignment(
|
|
108
71
|
self,
|
|
@@ -110,19 +73,23 @@ class LattifAI(SyncAPIClient):
|
|
|
110
73
|
subtitle: Pathlike,
|
|
111
74
|
format: Optional[SubtitleFormat] = None,
|
|
112
75
|
split_sentence: bool = False,
|
|
76
|
+
return_details: bool = False,
|
|
113
77
|
output_subtitle_path: Optional[Pathlike] = None,
|
|
114
|
-
) ->
|
|
78
|
+
) -> Tuple[List[Supervision], Optional[Pathlike]]:
|
|
115
79
|
"""Perform alignment on audio and subtitle/text.
|
|
116
80
|
|
|
117
81
|
Args:
|
|
118
82
|
audio: Audio file path
|
|
119
83
|
subtitle: Subtitle/Text to align with audio
|
|
120
|
-
format:
|
|
121
|
-
split_sentence:
|
|
122
|
-
|
|
84
|
+
format: Input subtitle format (srt, vtt, ass, txt). Auto-detected if None
|
|
85
|
+
split_sentence: Enable intelligent sentence re-splitting based on punctuation semantics
|
|
86
|
+
return_details: Return word-level alignment details in Supervision.alignment field
|
|
87
|
+
output_subtitle_path: Output path for aligned subtitle (optional)
|
|
123
88
|
|
|
124
89
|
Returns:
|
|
125
|
-
|
|
90
|
+
Tuple containing:
|
|
91
|
+
- List of aligned Supervision objects with timing information
|
|
92
|
+
- Output subtitle path (if output_subtitle_path was provided)
|
|
126
93
|
|
|
127
94
|
Raises:
|
|
128
95
|
SubtitleProcessingError: If subtitle file cannot be parsed
|
|
@@ -144,19 +111,21 @@ class LattifAI(SyncAPIClient):
|
|
|
144
111
|
)
|
|
145
112
|
|
|
146
113
|
# step2: make lattice by call Lattifai API
|
|
147
|
-
print(colorful.cyan('🔗 Step 2: Creating lattice graph from
|
|
114
|
+
print(colorful.cyan('🔗 Step 2: Creating lattice graph from segments'))
|
|
148
115
|
try:
|
|
149
|
-
lattice_id, lattice_graph = self.tokenizer.tokenize(
|
|
116
|
+
supervisions, lattice_id, lattice_graph = self.tokenizer.tokenize(
|
|
117
|
+
supervisions, split_sentence=split_sentence
|
|
118
|
+
)
|
|
150
119
|
print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
|
|
151
120
|
except Exception as e:
|
|
152
121
|
text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
|
|
153
122
|
raise LatticeEncodingError(text_content, original_error=e)
|
|
154
123
|
|
|
155
|
-
# step3:
|
|
156
|
-
print(colorful.cyan(f'
|
|
124
|
+
# step3: search lattice graph with audio
|
|
125
|
+
print(colorful.cyan(f'🔍 Step 3: Searching lattice graph with audio: {audio}'))
|
|
157
126
|
try:
|
|
158
127
|
lattice_results = self.worker.alignment(audio, lattice_graph)
|
|
159
|
-
print(colorful.green(' ✓
|
|
128
|
+
print(colorful.green(' ✓ Lattice search completed'))
|
|
160
129
|
except Exception as e:
|
|
161
130
|
raise AlignmentError(
|
|
162
131
|
f'Audio alignment failed for {audio}',
|
|
@@ -165,11 +134,16 @@ class LattifAI(SyncAPIClient):
|
|
|
165
134
|
context={'original_error': str(e)},
|
|
166
135
|
)
|
|
167
136
|
|
|
168
|
-
# step4: decode
|
|
169
|
-
print(colorful.cyan('
|
|
137
|
+
# step4: decode lattice results to aligned segments
|
|
138
|
+
print(colorful.cyan('🎯 Step 4: Decoding lattice results to aligned segments'))
|
|
170
139
|
try:
|
|
171
|
-
alignments = self.tokenizer.detokenize(
|
|
172
|
-
|
|
140
|
+
alignments = self.tokenizer.detokenize(
|
|
141
|
+
lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
|
|
142
|
+
)
|
|
143
|
+
print(colorful.green(f' ✓ Successfully aligned {len(alignments)} segments'))
|
|
144
|
+
except LatticeDecodingError as e:
|
|
145
|
+
print(colorful.red(' x Failed to decode lattice alignment results'))
|
|
146
|
+
raise e
|
|
173
147
|
except Exception as e:
|
|
174
148
|
print(colorful.red(' x Failed to decode lattice alignment results'))
|
|
175
149
|
raise LatticeDecodingError(lattice_id, original_error=e)
|
|
@@ -185,8 +159,7 @@ class LattifAI(SyncAPIClient):
|
|
|
185
159
|
subtitle_path=str(output_subtitle_path),
|
|
186
160
|
context={'original_error': str(e)},
|
|
187
161
|
)
|
|
188
|
-
|
|
189
|
-
return output_subtitle_path or alignments
|
|
162
|
+
return (alignments, output_subtitle_path)
|
|
190
163
|
|
|
191
164
|
except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
|
|
192
165
|
# Re-raise our specific errors as-is
|
|
@@ -201,15 +174,142 @@ class LattifAI(SyncAPIClient):
|
|
|
201
174
|
)
|
|
202
175
|
|
|
203
176
|
|
|
177
|
+
class AsyncLattifAI(AsyncAPIClient):
|
|
178
|
+
"""Asynchronous LattifAI client."""
|
|
179
|
+
|
|
180
|
+
def __init__(
|
|
181
|
+
self,
|
|
182
|
+
*,
|
|
183
|
+
api_key: Optional[str] = None,
|
|
184
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
185
|
+
device: Optional[str] = None,
|
|
186
|
+
base_url: Optional[str] = None,
|
|
187
|
+
timeout: Union[float, int] = 120.0,
|
|
188
|
+
max_retries: int = 2,
|
|
189
|
+
default_headers: Optional[Dict[str, str]] = None,
|
|
190
|
+
) -> None:
|
|
191
|
+
if api_key is None:
|
|
192
|
+
api_key = os.environ.get('LATTIFAI_API_KEY')
|
|
193
|
+
if api_key is None:
|
|
194
|
+
raise ConfigurationError(
|
|
195
|
+
'The api_key client option must be set either by passing api_key to the client '
|
|
196
|
+
'or by setting the LATTIFAI_API_KEY environment variable'
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if base_url is None:
|
|
200
|
+
base_url = os.environ.get('LATTIFAI_BASE_URL')
|
|
201
|
+
if not base_url:
|
|
202
|
+
base_url = 'https://api.lattifai.com/v1'
|
|
203
|
+
|
|
204
|
+
super().__init__(
|
|
205
|
+
api_key=api_key,
|
|
206
|
+
base_url=base_url,
|
|
207
|
+
timeout=timeout,
|
|
208
|
+
max_retries=max_retries,
|
|
209
|
+
default_headers=default_headers,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
model_path = _resolve_model_path(model_name_or_path)
|
|
213
|
+
device = _select_device(device)
|
|
214
|
+
|
|
215
|
+
self.tokenizer = _load_tokenizer(self, model_path, device, tokenizer_cls=AsyncLatticeTokenizer)
|
|
216
|
+
self.worker = _load_worker(model_path, device)
|
|
217
|
+
self.device = device
|
|
218
|
+
|
|
219
|
+
async def alignment(
|
|
220
|
+
self,
|
|
221
|
+
audio: Pathlike,
|
|
222
|
+
subtitle: Pathlike,
|
|
223
|
+
format: Optional[SubtitleFormat] = None,
|
|
224
|
+
split_sentence: bool = False,
|
|
225
|
+
return_details: bool = False,
|
|
226
|
+
output_subtitle_path: Optional[Pathlike] = None,
|
|
227
|
+
) -> Tuple[List[Supervision], Optional[Pathlike]]:
|
|
228
|
+
try:
|
|
229
|
+
print(colorful.cyan(f'📖 Step 1: Reading subtitle file from {subtitle}'))
|
|
230
|
+
try:
|
|
231
|
+
supervisions = await asyncio.to_thread(SubtitleIO.read, subtitle, format=format)
|
|
232
|
+
print(colorful.green(f' ✓ Parsed {len(supervisions)} subtitle segments'))
|
|
233
|
+
except Exception as e:
|
|
234
|
+
raise SubtitleProcessingError(
|
|
235
|
+
f'Failed to parse subtitle file: {subtitle}',
|
|
236
|
+
subtitle_path=str(subtitle),
|
|
237
|
+
context={'original_error': str(e)},
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
print(colorful.cyan('🔗 Step 2: Creating lattice graph from segments'))
|
|
241
|
+
try:
|
|
242
|
+
supervisions, lattice_id, lattice_graph = await self.tokenizer.tokenize(
|
|
243
|
+
supervisions,
|
|
244
|
+
split_sentence=split_sentence,
|
|
245
|
+
)
|
|
246
|
+
print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
|
|
247
|
+
except Exception as e:
|
|
248
|
+
text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
|
|
249
|
+
raise LatticeEncodingError(text_content, original_error=e)
|
|
250
|
+
|
|
251
|
+
print(colorful.cyan(f'🔍 Step 3: Searching lattice graph with audio: {audio}'))
|
|
252
|
+
try:
|
|
253
|
+
lattice_results = await asyncio.to_thread(self.worker.alignment, audio, lattice_graph)
|
|
254
|
+
print(colorful.green(' ✓ Lattice search completed'))
|
|
255
|
+
except Exception as e:
|
|
256
|
+
raise AlignmentError(
|
|
257
|
+
f'Audio alignment failed for {audio}',
|
|
258
|
+
audio_path=str(audio),
|
|
259
|
+
subtitle_path=str(subtitle),
|
|
260
|
+
context={'original_error': str(e)},
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
print(colorful.cyan('🎯 Step 4: Decoding lattice results to aligned segments'))
|
|
264
|
+
try:
|
|
265
|
+
alignments = await self.tokenizer.detokenize(
|
|
266
|
+
lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
|
|
267
|
+
)
|
|
268
|
+
print(colorful.green(f' ✓ Successfully aligned {len(alignments)} segments'))
|
|
269
|
+
except LatticeDecodingError as e:
|
|
270
|
+
print(colorful.red(' x Failed to decode lattice alignment results'))
|
|
271
|
+
raise e
|
|
272
|
+
except Exception as e:
|
|
273
|
+
print(colorful.red(' x Failed to decode lattice alignment results'))
|
|
274
|
+
raise LatticeDecodingError(lattice_id, original_error=e)
|
|
275
|
+
|
|
276
|
+
if output_subtitle_path:
|
|
277
|
+
try:
|
|
278
|
+
await asyncio.to_thread(SubtitleIO.write, alignments, output_subtitle_path)
|
|
279
|
+
print(colorful.green(f'🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}'))
|
|
280
|
+
except Exception as e:
|
|
281
|
+
raise SubtitleProcessingError(
|
|
282
|
+
f'Failed to write output file: {output_subtitle_path}',
|
|
283
|
+
subtitle_path=str(output_subtitle_path),
|
|
284
|
+
context={'original_error': str(e)},
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return (alignments, output_subtitle_path)
|
|
288
|
+
|
|
289
|
+
except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
|
|
290
|
+
raise
|
|
291
|
+
except Exception as e:
|
|
292
|
+
raise AlignmentError(
|
|
293
|
+
'Unexpected error during alignment process',
|
|
294
|
+
audio_path=str(audio),
|
|
295
|
+
subtitle_path=str(subtitle),
|
|
296
|
+
context={'original_error': str(e), 'error_type': e.__class__.__name__},
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
204
300
|
if __name__ == '__main__':
|
|
205
301
|
client = LattifAI()
|
|
206
302
|
import sys
|
|
207
303
|
|
|
208
|
-
if len(sys.argv) ==
|
|
209
|
-
audio, subtitle, output = sys.argv[1:]
|
|
304
|
+
if len(sys.argv) == 5:
|
|
305
|
+
audio, subtitle, output, split_sentence = sys.argv[1:]
|
|
306
|
+
split_sentence = split_sentence.lower() in ('true', '1', 'yes')
|
|
210
307
|
else:
|
|
211
308
|
audio = 'tests/data/SA1.wav'
|
|
212
309
|
subtitle = 'tests/data/SA1.TXT'
|
|
213
310
|
output = None
|
|
311
|
+
split_sentence = False
|
|
214
312
|
|
|
215
|
-
alignments = client.alignment(
|
|
313
|
+
(alignments, output_subtitle_path) = client.alignment(
|
|
314
|
+
audio, subtitle, output_subtitle_path=output, split_sentence=split_sentence, return_details=True
|
|
315
|
+
)
|
lattifai/errors.py
CHANGED
|
@@ -1,11 +1,26 @@
|
|
|
1
1
|
"""Error handling and exception classes for LattifAI SDK."""
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import traceback
|
|
5
4
|
from typing import Any, Dict, Optional
|
|
6
5
|
|
|
7
6
|
import colorful
|
|
8
7
|
|
|
8
|
+
# Error help messages
|
|
9
|
+
LATTICE_DECODING_FAILURE_HELP = (
|
|
10
|
+
'Failed to decode lattice alignment. Possible reasons:\n\n'
|
|
11
|
+
'1) Audio and text content mismatch:\n'
|
|
12
|
+
' - The transcript/subtitle does not accurately match the audio content\n'
|
|
13
|
+
' - Text may be from a different version or section of the audio\n'
|
|
14
|
+
' ⚠️ Note: Gemini transcription may occasionally skip large segments of audio, causing alignment failures.\n'
|
|
15
|
+
' We will detect and fix this issue in the next version.\n\n'
|
|
16
|
+
'2) Unsupported audio type:\n'
|
|
17
|
+
' - Singing is not yet supported, this will be optimized in future versions\n\n'
|
|
18
|
+
'💡 Troubleshooting tips:\n'
|
|
19
|
+
' • Verify the transcript matches the audio by listening to a few segments\n'
|
|
20
|
+
' • For YouTube videos, manually check if auto-generated transcript are accurate\n'
|
|
21
|
+
' • Consider using a different transcription source if Gemini results are incomplete'
|
|
22
|
+
)
|
|
23
|
+
|
|
9
24
|
|
|
10
25
|
class LattifAIError(Exception):
|
|
11
26
|
"""Base exception for LattifAI errors."""
|
|
@@ -26,7 +41,7 @@ class LattifAIError(Exception):
|
|
|
26
41
|
def get_support_info(self) -> str:
|
|
27
42
|
"""Get support information for users."""
|
|
28
43
|
return (
|
|
29
|
-
f'\n
|
|
44
|
+
f'\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
|
|
30
45
|
f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
|
|
31
46
|
' Please include:\n'
|
|
32
47
|
' - Your audio file format and duration\n'
|
|
@@ -36,13 +51,21 @@ class LattifAIError(Exception):
|
|
|
36
51
|
' Our team and community can help you troubleshoot\n'
|
|
37
52
|
)
|
|
38
53
|
|
|
39
|
-
def
|
|
40
|
-
"""Return formatted error message
|
|
54
|
+
def get_message(self) -> str:
|
|
55
|
+
"""Return formatted error message without support information."""
|
|
41
56
|
base_message = f'{colorful.red(f"[{self.error_code}] {self.message}")}'
|
|
42
57
|
if self.context:
|
|
43
58
|
context_str = f'\n{colorful.yellow("Context:")} ' + ', '.join(f'{k}={v}' for k, v in self.context.items())
|
|
44
59
|
base_message += context_str
|
|
45
|
-
return base_message
|
|
60
|
+
return base_message
|
|
61
|
+
|
|
62
|
+
def __str__(self) -> str:
|
|
63
|
+
"""Return formatted error message without support information.
|
|
64
|
+
|
|
65
|
+
Note: Support info should be displayed explicitly at the CLI level,
|
|
66
|
+
not automatically appended to avoid duplication when errors are re-raised.
|
|
67
|
+
"""
|
|
68
|
+
return self.get_message()
|
|
46
69
|
|
|
47
70
|
|
|
48
71
|
class AudioProcessingError(LattifAIError):
|
|
@@ -142,14 +165,29 @@ class LatticeDecodingError(AlignmentError):
|
|
|
142
165
|
|
|
143
166
|
def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
|
|
144
167
|
message = f'Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}'
|
|
145
|
-
|
|
168
|
+
|
|
169
|
+
# Don't duplicate the help message if it's already in original_error
|
|
170
|
+
if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
|
|
146
171
|
message += f' - {colorful.red(str(original_error))}'
|
|
147
172
|
|
|
148
173
|
context = kwargs.get('context', {})
|
|
149
|
-
|
|
174
|
+
# Don't store the entire help message in context to avoid duplication
|
|
175
|
+
if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
|
|
176
|
+
context['original_error'] = str(original_error)
|
|
177
|
+
context['lattice_id'] = lattice_id
|
|
150
178
|
kwargs['context'] = context
|
|
151
179
|
super().__init__(message, **kwargs)
|
|
152
180
|
|
|
181
|
+
def get_message(self) -> str:
|
|
182
|
+
"""Return formatted error message with help text."""
|
|
183
|
+
base_message = f'{colorful.red(f"[{self.error_code}]")} {self.message}'
|
|
184
|
+
if self.context and self.context.get('lattice_id'):
|
|
185
|
+
# Only show essential context (lattice_id), not the duplicated help message
|
|
186
|
+
base_message += f'\n{colorful.yellow("Lattice ID:")} {self.context["lattice_id"]}'
|
|
187
|
+
# Append help message once at the end
|
|
188
|
+
base_message += f'\n\n{colorful.yellow(LATTICE_DECODING_FAILURE_HELP)}'
|
|
189
|
+
return base_message
|
|
190
|
+
|
|
153
191
|
|
|
154
192
|
class ModelLoadError(LattifAIError):
|
|
155
193
|
"""Error loading AI model."""
|
lattifai/io/__init__.py
CHANGED
|
@@ -2,11 +2,31 @@ from typing import List, Optional
|
|
|
2
2
|
|
|
3
3
|
from lhotse.utils import Pathlike
|
|
4
4
|
|
|
5
|
+
from .gemini_reader import GeminiReader, GeminiSegment
|
|
6
|
+
from .gemini_writer import GeminiWriter
|
|
5
7
|
from .reader import SubtitleFormat, SubtitleReader
|
|
6
8
|
from .supervision import Supervision
|
|
9
|
+
from .utils import (
|
|
10
|
+
ALL_SUBTITLE_FORMATS,
|
|
11
|
+
INPUT_SUBTITLE_FORMATS,
|
|
12
|
+
OUTPUT_SUBTITLE_FORMATS,
|
|
13
|
+
SUBTITLE_FORMATS,
|
|
14
|
+
)
|
|
7
15
|
from .writer import SubtitleWriter
|
|
8
16
|
|
|
9
|
-
__all__ = [
|
|
17
|
+
__all__ = [
|
|
18
|
+
'SubtitleReader',
|
|
19
|
+
'SubtitleWriter',
|
|
20
|
+
'SubtitleIO',
|
|
21
|
+
'Supervision',
|
|
22
|
+
'GeminiReader',
|
|
23
|
+
'GeminiWriter',
|
|
24
|
+
'GeminiSegment',
|
|
25
|
+
'SUBTITLE_FORMATS',
|
|
26
|
+
'INPUT_SUBTITLE_FORMATS',
|
|
27
|
+
'OUTPUT_SUBTITLE_FORMATS',
|
|
28
|
+
'ALL_SUBTITLE_FORMATS',
|
|
29
|
+
]
|
|
10
30
|
|
|
11
31
|
|
|
12
32
|
class SubtitleIO:
|