lattifai 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +26 -27
- lattifai/base_client.py +7 -7
- lattifai/bin/agent.py +90 -91
- lattifai/bin/align.py +110 -111
- lattifai/bin/cli_base.py +3 -3
- lattifai/bin/subtitle.py +45 -45
- lattifai/client.py +56 -56
- lattifai/errors.py +73 -73
- lattifai/io/__init__.py +12 -11
- lattifai/io/gemini_reader.py +30 -30
- lattifai/io/gemini_writer.py +17 -17
- lattifai/io/reader.py +13 -12
- lattifai/io/supervision.py +3 -3
- lattifai/io/text_parser.py +43 -16
- lattifai/io/utils.py +4 -4
- lattifai/io/writer.py +31 -19
- lattifai/tokenizer/__init__.py +1 -1
- lattifai/tokenizer/phonemizer.py +3 -3
- lattifai/tokenizer/tokenizer.py +83 -82
- lattifai/utils.py +15 -15
- lattifai/workers/__init__.py +1 -1
- lattifai/workers/lattice1_alpha.py +46 -46
- lattifai/workflows/__init__.py +11 -11
- lattifai/workflows/agents.py +2 -0
- lattifai/workflows/base.py +22 -22
- lattifai/workflows/file_manager.py +182 -182
- lattifai/workflows/gemini.py +29 -29
- lattifai/workflows/prompts/__init__.py +4 -4
- lattifai/workflows/youtube.py +233 -233
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/METADATA +7 -9
- lattifai-0.4.6.dist-info/RECORD +39 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/licenses/LICENSE +1 -1
- lattifai-0.4.5.dist-info/RECORD +0 -39
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/WHEEL +0 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/entry_points.txt +0 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/top_level.txt +0 -0
lattifai/client.py
CHANGED
|
@@ -29,7 +29,7 @@ class LattifAI(SyncAPIClient):
|
|
|
29
29
|
self,
|
|
30
30
|
*,
|
|
31
31
|
api_key: Optional[str] = None,
|
|
32
|
-
model_name_or_path: str =
|
|
32
|
+
model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
|
|
33
33
|
device: Optional[str] = None,
|
|
34
34
|
base_url: Optional[str] = None,
|
|
35
35
|
timeout: Union[float, int] = 120.0,
|
|
@@ -37,17 +37,17 @@ class LattifAI(SyncAPIClient):
|
|
|
37
37
|
default_headers: Optional[Dict[str, str]] = None,
|
|
38
38
|
) -> None:
|
|
39
39
|
if api_key is None:
|
|
40
|
-
api_key = os.environ.get(
|
|
40
|
+
api_key = os.environ.get("LATTIFAI_API_KEY")
|
|
41
41
|
if api_key is None:
|
|
42
42
|
raise ConfigurationError(
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
"The api_key client option must be set either by passing api_key to the client "
|
|
44
|
+
"or by setting the LATTIFAI_API_KEY environment variable"
|
|
45
45
|
)
|
|
46
46
|
|
|
47
47
|
if base_url is None:
|
|
48
|
-
base_url = os.environ.get(
|
|
48
|
+
base_url = os.environ.get("LATTIFAI_BASE_URL")
|
|
49
49
|
if not base_url:
|
|
50
|
-
base_url =
|
|
50
|
+
base_url = "https://api.lattifai.com/v1"
|
|
51
51
|
|
|
52
52
|
super().__init__(
|
|
53
53
|
api_key=api_key,
|
|
@@ -96,65 +96,65 @@ class LattifAI(SyncAPIClient):
|
|
|
96
96
|
"""
|
|
97
97
|
try:
|
|
98
98
|
# step1: parse text or subtitles
|
|
99
|
-
print(colorful.cyan(f
|
|
99
|
+
print(colorful.cyan(f"📖 Step 1: Reading subtitle file from {subtitle}"))
|
|
100
100
|
try:
|
|
101
101
|
supervisions = SubtitleIO.read(subtitle, format=format)
|
|
102
|
-
print(colorful.green(f
|
|
102
|
+
print(colorful.green(f" ✓ Parsed {len(supervisions)} subtitle segments"))
|
|
103
103
|
except Exception as e:
|
|
104
104
|
raise SubtitleProcessingError(
|
|
105
|
-
f
|
|
105
|
+
f"Failed to parse subtitle file: {subtitle}",
|
|
106
106
|
subtitle_path=str(subtitle),
|
|
107
|
-
context={
|
|
107
|
+
context={"original_error": str(e)},
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
# step2: make lattice by call Lattifai API
|
|
111
|
-
print(colorful.cyan(
|
|
111
|
+
print(colorful.cyan("🔗 Step 2: Creating lattice graph from segments"))
|
|
112
112
|
try:
|
|
113
113
|
supervisions, lattice_id, lattice_graph = self.tokenizer.tokenize(
|
|
114
114
|
supervisions, split_sentence=split_sentence
|
|
115
115
|
)
|
|
116
|
-
print(colorful.green(f
|
|
116
|
+
print(colorful.green(f" ✓ Generated lattice graph with ID: {lattice_id}"))
|
|
117
117
|
except Exception as e:
|
|
118
|
-
text_content =
|
|
118
|
+
text_content = " ".join([sup.text for sup in supervisions]) if supervisions else ""
|
|
119
119
|
raise LatticeEncodingError(text_content, original_error=e)
|
|
120
120
|
|
|
121
121
|
# step3: search lattice graph with audio
|
|
122
|
-
print(colorful.cyan(f
|
|
122
|
+
print(colorful.cyan(f"🔍 Step 3: Searching lattice graph with audio: {audio}"))
|
|
123
123
|
try:
|
|
124
124
|
lattice_results = self.worker.alignment(audio, lattice_graph)
|
|
125
|
-
print(colorful.green(
|
|
125
|
+
print(colorful.green(" ✓ Lattice search completed"))
|
|
126
126
|
except Exception as e:
|
|
127
127
|
raise AlignmentError(
|
|
128
|
-
f
|
|
128
|
+
f"Audio alignment failed for {audio}",
|
|
129
129
|
audio_path=str(audio),
|
|
130
130
|
subtitle_path=str(subtitle),
|
|
131
|
-
context={
|
|
131
|
+
context={"original_error": str(e)},
|
|
132
132
|
)
|
|
133
133
|
|
|
134
134
|
# step4: decode lattice results to aligned segments
|
|
135
|
-
print(colorful.cyan(
|
|
135
|
+
print(colorful.cyan("🎯 Step 4: Decoding lattice results to aligned segments"))
|
|
136
136
|
try:
|
|
137
137
|
alignments = self.tokenizer.detokenize(
|
|
138
138
|
lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
|
|
139
139
|
)
|
|
140
|
-
print(colorful.green(f
|
|
140
|
+
print(colorful.green(f" ✓ Successfully aligned {len(alignments)} segments"))
|
|
141
141
|
except LatticeDecodingError as e:
|
|
142
|
-
print(colorful.red(
|
|
142
|
+
print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
143
143
|
raise e
|
|
144
144
|
except Exception as e:
|
|
145
|
-
print(colorful.red(
|
|
145
|
+
print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
146
146
|
raise LatticeDecodingError(lattice_id, original_error=e)
|
|
147
147
|
|
|
148
148
|
# step5: export alignments to target format
|
|
149
149
|
if output_subtitle_path:
|
|
150
150
|
try:
|
|
151
151
|
SubtitleIO.write(alignments, output_path=output_subtitle_path)
|
|
152
|
-
print(colorful.green(f
|
|
152
|
+
print(colorful.green(f"🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}"))
|
|
153
153
|
except Exception as e:
|
|
154
154
|
raise SubtitleProcessingError(
|
|
155
|
-
f
|
|
155
|
+
f"Failed to write output file: {output_subtitle_path}",
|
|
156
156
|
subtitle_path=str(output_subtitle_path),
|
|
157
|
-
context={
|
|
157
|
+
context={"original_error": str(e)},
|
|
158
158
|
)
|
|
159
159
|
return (alignments, output_subtitle_path)
|
|
160
160
|
|
|
@@ -164,10 +164,10 @@ class LattifAI(SyncAPIClient):
|
|
|
164
164
|
except Exception as e:
|
|
165
165
|
# Catch any unexpected errors and wrap them
|
|
166
166
|
raise AlignmentError(
|
|
167
|
-
|
|
167
|
+
"Unexpected error during alignment process",
|
|
168
168
|
audio_path=str(audio),
|
|
169
169
|
subtitle_path=str(subtitle),
|
|
170
|
-
context={
|
|
170
|
+
context={"original_error": str(e), "error_type": e.__class__.__name__},
|
|
171
171
|
)
|
|
172
172
|
|
|
173
173
|
|
|
@@ -178,7 +178,7 @@ class AsyncLattifAI(AsyncAPIClient):
|
|
|
178
178
|
self,
|
|
179
179
|
*,
|
|
180
180
|
api_key: Optional[str] = None,
|
|
181
|
-
model_name_or_path: str =
|
|
181
|
+
model_name_or_path: str = "Lattifai/Lattice-1-Alpha",
|
|
182
182
|
device: Optional[str] = None,
|
|
183
183
|
base_url: Optional[str] = None,
|
|
184
184
|
timeout: Union[float, int] = 120.0,
|
|
@@ -186,17 +186,17 @@ class AsyncLattifAI(AsyncAPIClient):
|
|
|
186
186
|
default_headers: Optional[Dict[str, str]] = None,
|
|
187
187
|
) -> None:
|
|
188
188
|
if api_key is None:
|
|
189
|
-
api_key = os.environ.get(
|
|
189
|
+
api_key = os.environ.get("LATTIFAI_API_KEY")
|
|
190
190
|
if api_key is None:
|
|
191
191
|
raise ConfigurationError(
|
|
192
|
-
|
|
193
|
-
|
|
192
|
+
"The api_key client option must be set either by passing api_key to the client "
|
|
193
|
+
"or by setting the LATTIFAI_API_KEY environment variable"
|
|
194
194
|
)
|
|
195
195
|
|
|
196
196
|
if base_url is None:
|
|
197
|
-
base_url = os.environ.get(
|
|
197
|
+
base_url = os.environ.get("LATTIFAI_BASE_URL")
|
|
198
198
|
if not base_url:
|
|
199
|
-
base_url =
|
|
199
|
+
base_url = "https://api.lattifai.com/v1"
|
|
200
200
|
|
|
201
201
|
super().__init__(
|
|
202
202
|
api_key=api_key,
|
|
@@ -223,62 +223,62 @@ class AsyncLattifAI(AsyncAPIClient):
|
|
|
223
223
|
output_subtitle_path: Optional[Pathlike] = None,
|
|
224
224
|
) -> Tuple[List[Supervision], Optional[Pathlike]]:
|
|
225
225
|
try:
|
|
226
|
-
print(colorful.cyan(f
|
|
226
|
+
print(colorful.cyan(f"📖 Step 1: Reading subtitle file from {subtitle}"))
|
|
227
227
|
try:
|
|
228
228
|
supervisions = await asyncio.to_thread(SubtitleIO.read, subtitle, format=format)
|
|
229
|
-
print(colorful.green(f
|
|
229
|
+
print(colorful.green(f" ✓ Parsed {len(supervisions)} subtitle segments"))
|
|
230
230
|
except Exception as e:
|
|
231
231
|
raise SubtitleProcessingError(
|
|
232
|
-
f
|
|
232
|
+
f"Failed to parse subtitle file: {subtitle}",
|
|
233
233
|
subtitle_path=str(subtitle),
|
|
234
|
-
context={
|
|
234
|
+
context={"original_error": str(e)},
|
|
235
235
|
)
|
|
236
236
|
|
|
237
|
-
print(colorful.cyan(
|
|
237
|
+
print(colorful.cyan("🔗 Step 2: Creating lattice graph from segments"))
|
|
238
238
|
try:
|
|
239
239
|
supervisions, lattice_id, lattice_graph = await self.tokenizer.tokenize(
|
|
240
240
|
supervisions,
|
|
241
241
|
split_sentence=split_sentence,
|
|
242
242
|
)
|
|
243
|
-
print(colorful.green(f
|
|
243
|
+
print(colorful.green(f" ✓ Generated lattice graph with ID: {lattice_id}"))
|
|
244
244
|
except Exception as e:
|
|
245
|
-
text_content =
|
|
245
|
+
text_content = " ".join([sup.text for sup in supervisions]) if supervisions else ""
|
|
246
246
|
raise LatticeEncodingError(text_content, original_error=e)
|
|
247
247
|
|
|
248
|
-
print(colorful.cyan(f
|
|
248
|
+
print(colorful.cyan(f"🔍 Step 3: Searching lattice graph with audio: {audio}"))
|
|
249
249
|
try:
|
|
250
250
|
lattice_results = await asyncio.to_thread(self.worker.alignment, audio, lattice_graph)
|
|
251
|
-
print(colorful.green(
|
|
251
|
+
print(colorful.green(" ✓ Lattice search completed"))
|
|
252
252
|
except Exception as e:
|
|
253
253
|
raise AlignmentError(
|
|
254
|
-
f
|
|
254
|
+
f"Audio alignment failed for {audio}",
|
|
255
255
|
audio_path=str(audio),
|
|
256
256
|
subtitle_path=str(subtitle),
|
|
257
|
-
context={
|
|
257
|
+
context={"original_error": str(e)},
|
|
258
258
|
)
|
|
259
259
|
|
|
260
|
-
print(colorful.cyan(
|
|
260
|
+
print(colorful.cyan("🎯 Step 4: Decoding lattice results to aligned segments"))
|
|
261
261
|
try:
|
|
262
262
|
alignments = await self.tokenizer.detokenize(
|
|
263
263
|
lattice_id, lattice_results, supervisions=supervisions, return_details=return_details
|
|
264
264
|
)
|
|
265
|
-
print(colorful.green(f
|
|
265
|
+
print(colorful.green(f" ✓ Successfully aligned {len(alignments)} segments"))
|
|
266
266
|
except LatticeDecodingError as e:
|
|
267
|
-
print(colorful.red(
|
|
267
|
+
print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
268
268
|
raise e
|
|
269
269
|
except Exception as e:
|
|
270
|
-
print(colorful.red(
|
|
270
|
+
print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
271
271
|
raise LatticeDecodingError(lattice_id, original_error=e)
|
|
272
272
|
|
|
273
273
|
if output_subtitle_path:
|
|
274
274
|
try:
|
|
275
275
|
await asyncio.to_thread(SubtitleIO.write, alignments, output_subtitle_path)
|
|
276
|
-
print(colorful.green(f
|
|
276
|
+
print(colorful.green(f"🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}"))
|
|
277
277
|
except Exception as e:
|
|
278
278
|
raise SubtitleProcessingError(
|
|
279
|
-
f
|
|
279
|
+
f"Failed to write output file: {output_subtitle_path}",
|
|
280
280
|
subtitle_path=str(output_subtitle_path),
|
|
281
|
-
context={
|
|
281
|
+
context={"original_error": str(e)},
|
|
282
282
|
)
|
|
283
283
|
|
|
284
284
|
return (alignments, output_subtitle_path)
|
|
@@ -287,23 +287,23 @@ class AsyncLattifAI(AsyncAPIClient):
|
|
|
287
287
|
raise
|
|
288
288
|
except Exception as e:
|
|
289
289
|
raise AlignmentError(
|
|
290
|
-
|
|
290
|
+
"Unexpected error during alignment process",
|
|
291
291
|
audio_path=str(audio),
|
|
292
292
|
subtitle_path=str(subtitle),
|
|
293
|
-
context={
|
|
293
|
+
context={"original_error": str(e), "error_type": e.__class__.__name__},
|
|
294
294
|
)
|
|
295
295
|
|
|
296
296
|
|
|
297
|
-
if __name__ ==
|
|
297
|
+
if __name__ == "__main__":
|
|
298
298
|
client = LattifAI()
|
|
299
299
|
import sys
|
|
300
300
|
|
|
301
301
|
if len(sys.argv) == 5:
|
|
302
302
|
audio, subtitle, output, split_sentence = sys.argv[1:]
|
|
303
|
-
split_sentence = split_sentence.lower() in (
|
|
303
|
+
split_sentence = split_sentence.lower() in ("true", "1", "yes")
|
|
304
304
|
else:
|
|
305
|
-
audio =
|
|
306
|
-
subtitle =
|
|
305
|
+
audio = "tests/data/SA1.wav"
|
|
306
|
+
subtitle = "tests/data/SA1.TXT"
|
|
307
307
|
output = None
|
|
308
308
|
split_sentence = False
|
|
309
309
|
|
lattifai/errors.py
CHANGED
|
@@ -7,18 +7,18 @@ import colorful
|
|
|
7
7
|
|
|
8
8
|
# Error help messages
|
|
9
9
|
LATTICE_DECODING_FAILURE_HELP = (
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
10
|
+
"Failed to decode lattice alignment. Possible reasons:\n\n"
|
|
11
|
+
"1) Audio and text content mismatch:\n"
|
|
12
|
+
" - The transcript/subtitle does not accurately match the audio content\n"
|
|
13
|
+
" - Text may be from a different version or section of the audio\n"
|
|
14
|
+
" ⚠️ Note: Gemini transcription may occasionally skip large segments of audio, causing alignment failures.\n"
|
|
15
|
+
" We will detect and fix this issue in the next version.\n\n"
|
|
16
|
+
"2) Unsupported audio type:\n"
|
|
17
|
+
" - Singing is not yet supported, this will be optimized in future versions\n\n"
|
|
18
|
+
"💡 Troubleshooting tips:\n"
|
|
19
|
+
" • Verify the transcript matches the audio by listening to a few segments\n"
|
|
20
|
+
" • For YouTube videos, manually check if auto-generated transcript are accurate\n"
|
|
21
|
+
" • Consider using a different transcription source if Gemini results are incomplete"
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
|
|
@@ -43,19 +43,19 @@ class LattifAIError(Exception):
|
|
|
43
43
|
return (
|
|
44
44
|
f'\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
|
|
45
45
|
f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
" Please include:\n"
|
|
47
|
+
" - Your audio file format and duration\n"
|
|
48
48
|
" - The text/subtitle content you're trying to align\n"
|
|
49
|
-
|
|
49
|
+
" - This error message and stack trace\n"
|
|
50
50
|
f' 2. 💬 Join our Discord community: {colorful.green("https://discord.gg/vzmTzzZgNu")}\n'
|
|
51
|
-
|
|
51
|
+
" Our team and community can help you troubleshoot\n"
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
def get_message(self) -> str:
|
|
55
55
|
"""Return formatted error message without support information."""
|
|
56
56
|
base_message = f'{colorful.red(f"[{self.error_code}] {self.message}")}'
|
|
57
57
|
if self.context:
|
|
58
|
-
context_str = f'\n{colorful.yellow("Context:")} ' +
|
|
58
|
+
context_str = f'\n{colorful.yellow("Context:")} ' + ", ".join(f"{k}={v}" for k, v in self.context.items())
|
|
59
59
|
base_message += context_str
|
|
60
60
|
return base_message
|
|
61
61
|
|
|
@@ -72,10 +72,10 @@ class AudioProcessingError(LattifAIError):
|
|
|
72
72
|
"""Error during audio processing operations."""
|
|
73
73
|
|
|
74
74
|
def __init__(self, message: str, audio_path: Optional[str] = None, **kwargs):
|
|
75
|
-
context = kwargs.get(
|
|
75
|
+
context = kwargs.get("context", {})
|
|
76
76
|
if audio_path:
|
|
77
|
-
context[
|
|
78
|
-
kwargs[
|
|
77
|
+
context["audio_path"] = audio_path
|
|
78
|
+
kwargs["context"] = context
|
|
79
79
|
super().__init__(message, **kwargs)
|
|
80
80
|
|
|
81
81
|
|
|
@@ -83,13 +83,13 @@ class AudioLoadError(AudioProcessingError):
|
|
|
83
83
|
"""Error loading or reading audio file."""
|
|
84
84
|
|
|
85
85
|
def __init__(self, audio_path: str, original_error: Optional[Exception] = None, **kwargs):
|
|
86
|
-
message = f
|
|
86
|
+
message = f"Failed to load audio file: {colorful.red(audio_path)}"
|
|
87
87
|
if original_error:
|
|
88
|
-
message += f
|
|
88
|
+
message += f" - {colorful.red(str(original_error))}"
|
|
89
89
|
|
|
90
|
-
context = kwargs.get(
|
|
91
|
-
context.update({
|
|
92
|
-
kwargs[
|
|
90
|
+
context = kwargs.get("context", {})
|
|
91
|
+
context.update({"audio_path": audio_path, "original_error": str(original_error) if original_error else None})
|
|
92
|
+
kwargs["context"] = context
|
|
93
93
|
|
|
94
94
|
super().__init__(message, audio_path=audio_path, **kwargs)
|
|
95
95
|
|
|
@@ -98,10 +98,10 @@ class AudioFormatError(AudioProcessingError):
|
|
|
98
98
|
"""Error with audio format or codec."""
|
|
99
99
|
|
|
100
100
|
def __init__(self, audio_path: str, format_issue: str, **kwargs):
|
|
101
|
-
message = f
|
|
102
|
-
context = kwargs.get(
|
|
103
|
-
context.update({
|
|
104
|
-
kwargs[
|
|
101
|
+
message = f"Audio format error for {colorful.red(audio_path)}: {colorful.red(format_issue)}"
|
|
102
|
+
context = kwargs.get("context", {})
|
|
103
|
+
context.update({"audio_path": audio_path, "format_issue": format_issue})
|
|
104
|
+
kwargs["context"] = context
|
|
105
105
|
super().__init__(message, audio_path=audio_path, **kwargs)
|
|
106
106
|
|
|
107
107
|
|
|
@@ -109,10 +109,10 @@ class SubtitleProcessingError(LattifAIError):
|
|
|
109
109
|
"""Error during subtitle/text processing operations."""
|
|
110
110
|
|
|
111
111
|
def __init__(self, message: str, subtitle_path: Optional[str] = None, **kwargs):
|
|
112
|
-
context = kwargs.get(
|
|
112
|
+
context = kwargs.get("context", {})
|
|
113
113
|
if subtitle_path:
|
|
114
|
-
context[
|
|
115
|
-
kwargs[
|
|
114
|
+
context["subtitle_path"] = subtitle_path
|
|
115
|
+
kwargs["context"] = context
|
|
116
116
|
super().__init__(message, **kwargs)
|
|
117
117
|
|
|
118
118
|
|
|
@@ -120,10 +120,10 @@ class SubtitleParseError(SubtitleProcessingError):
|
|
|
120
120
|
"""Error parsing subtitle or text file."""
|
|
121
121
|
|
|
122
122
|
def __init__(self, subtitle_path: str, parse_issue: str, **kwargs):
|
|
123
|
-
message = f
|
|
124
|
-
context = kwargs.get(
|
|
125
|
-
context.update({
|
|
126
|
-
kwargs[
|
|
123
|
+
message = f"Failed to parse subtitle file {subtitle_path}: {parse_issue}"
|
|
124
|
+
context = kwargs.get("context", {})
|
|
125
|
+
context.update({"subtitle_path": subtitle_path, "parse_issue": parse_issue})
|
|
126
|
+
kwargs["context"] = context
|
|
127
127
|
super().__init__(message, subtitle_path=subtitle_path, **kwargs)
|
|
128
128
|
|
|
129
129
|
|
|
@@ -131,12 +131,12 @@ class AlignmentError(LattifAIError):
|
|
|
131
131
|
"""Error during audio-text alignment process."""
|
|
132
132
|
|
|
133
133
|
def __init__(self, message: str, audio_path: Optional[str] = None, subtitle_path: Optional[str] = None, **kwargs):
|
|
134
|
-
context = kwargs.get(
|
|
134
|
+
context = kwargs.get("context", {})
|
|
135
135
|
if audio_path:
|
|
136
|
-
context[
|
|
136
|
+
context["audio_path"] = audio_path
|
|
137
137
|
if subtitle_path:
|
|
138
|
-
context[
|
|
139
|
-
kwargs[
|
|
138
|
+
context["subtitle_path"] = subtitle_path
|
|
139
|
+
kwargs["context"] = context
|
|
140
140
|
super().__init__(message, **kwargs)
|
|
141
141
|
|
|
142
142
|
|
|
@@ -144,19 +144,19 @@ class LatticeEncodingError(AlignmentError):
|
|
|
144
144
|
"""Error generating lattice graph from text."""
|
|
145
145
|
|
|
146
146
|
def __init__(self, text_content: str, original_error: Optional[Exception] = None, **kwargs):
|
|
147
|
-
message =
|
|
147
|
+
message = "Failed to generate lattice graph from text"
|
|
148
148
|
if original_error:
|
|
149
|
-
message += f
|
|
149
|
+
message += f": {colorful.red(str(original_error))}"
|
|
150
150
|
|
|
151
|
-
context = kwargs.get(
|
|
151
|
+
context = kwargs.get("context", {})
|
|
152
152
|
context.update(
|
|
153
153
|
{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
154
|
+
"text_content_length": len(text_content),
|
|
155
|
+
"text_preview": text_content[:100] + "..." if len(text_content) > 100 else text_content,
|
|
156
|
+
"original_error": str(original_error) if original_error else None,
|
|
157
157
|
}
|
|
158
158
|
)
|
|
159
|
-
kwargs[
|
|
159
|
+
kwargs["context"] = context
|
|
160
160
|
super().__init__(message, **kwargs)
|
|
161
161
|
|
|
162
162
|
|
|
@@ -164,28 +164,28 @@ class LatticeDecodingError(AlignmentError):
|
|
|
164
164
|
"""Error decoding lattice alignment results."""
|
|
165
165
|
|
|
166
166
|
def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
|
|
167
|
-
message = f
|
|
167
|
+
message = f"Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}"
|
|
168
168
|
|
|
169
169
|
# Don't duplicate the help message if it's already in original_error
|
|
170
170
|
if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
|
|
171
|
-
message += f
|
|
171
|
+
message += f" - {colorful.red(str(original_error))}"
|
|
172
172
|
|
|
173
|
-
context = kwargs.get(
|
|
173
|
+
context = kwargs.get("context", {})
|
|
174
174
|
# Don't store the entire help message in context to avoid duplication
|
|
175
175
|
if original_error and str(original_error) != LATTICE_DECODING_FAILURE_HELP:
|
|
176
|
-
context[
|
|
177
|
-
context[
|
|
178
|
-
kwargs[
|
|
176
|
+
context["original_error"] = str(original_error)
|
|
177
|
+
context["lattice_id"] = lattice_id
|
|
178
|
+
kwargs["context"] = context
|
|
179
179
|
super().__init__(message, **kwargs)
|
|
180
180
|
|
|
181
181
|
def get_message(self) -> str:
|
|
182
182
|
"""Return formatted error message with help text."""
|
|
183
183
|
base_message = f'{colorful.red(f"[{self.error_code}]")} {self.message}'
|
|
184
|
-
if self.context and self.context.get(
|
|
184
|
+
if self.context and self.context.get("lattice_id"):
|
|
185
185
|
# Only show essential context (lattice_id), not the duplicated help message
|
|
186
186
|
base_message += f'\n{colorful.yellow("Lattice ID:")} {self.context["lattice_id"]}'
|
|
187
187
|
# Append help message once at the end
|
|
188
|
-
base_message += f
|
|
188
|
+
base_message += f"\n\n{colorful.yellow(LATTICE_DECODING_FAILURE_HELP)}"
|
|
189
189
|
return base_message
|
|
190
190
|
|
|
191
191
|
|
|
@@ -193,13 +193,13 @@ class ModelLoadError(LattifAIError):
|
|
|
193
193
|
"""Error loading AI model."""
|
|
194
194
|
|
|
195
195
|
def __init__(self, model_name: str, original_error: Optional[Exception] = None, **kwargs):
|
|
196
|
-
message = f
|
|
196
|
+
message = f"Failed to load model: {colorful.red(model_name)}"
|
|
197
197
|
if original_error:
|
|
198
|
-
message += f
|
|
198
|
+
message += f" - {colorful.red(str(original_error))}"
|
|
199
199
|
|
|
200
|
-
context = kwargs.get(
|
|
201
|
-
context.update({
|
|
202
|
-
kwargs[
|
|
200
|
+
context = kwargs.get("context", {})
|
|
201
|
+
context.update({"model_name": model_name, "original_error": str(original_error) if original_error else None})
|
|
202
|
+
kwargs["context"] = context
|
|
203
203
|
super().__init__(message, **kwargs)
|
|
204
204
|
|
|
205
205
|
|
|
@@ -207,13 +207,13 @@ class DependencyError(LattifAIError):
|
|
|
207
207
|
"""Error with required dependencies."""
|
|
208
208
|
|
|
209
209
|
def __init__(self, dependency_name: str, install_command: Optional[str] = None, **kwargs):
|
|
210
|
-
message = f
|
|
210
|
+
message = f"Missing required dependency: {colorful.red(dependency_name)}"
|
|
211
211
|
if install_command:
|
|
212
|
-
message += f
|
|
212
|
+
message += f"\nPlease install it using: {colorful.yellow(install_command)}"
|
|
213
213
|
|
|
214
|
-
context = kwargs.get(
|
|
215
|
-
context.update({
|
|
216
|
-
kwargs[
|
|
214
|
+
context = kwargs.get("context", {})
|
|
215
|
+
context.update({"dependency_name": dependency_name, "install_command": install_command})
|
|
216
|
+
kwargs["context"] = context
|
|
217
217
|
super().__init__(message, **kwargs)
|
|
218
218
|
|
|
219
219
|
|
|
@@ -221,9 +221,9 @@ class APIError(LattifAIError):
|
|
|
221
221
|
"""Error communicating with LattifAI API."""
|
|
222
222
|
|
|
223
223
|
def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None, **kwargs):
|
|
224
|
-
context = kwargs.get(
|
|
225
|
-
context.update({
|
|
226
|
-
kwargs[
|
|
224
|
+
context = kwargs.get("context", {})
|
|
225
|
+
context.update({"status_code": status_code, "response_text": response_text})
|
|
226
|
+
kwargs["context"] = context
|
|
227
227
|
super().__init__(message, **kwargs)
|
|
228
228
|
|
|
229
229
|
|
|
@@ -231,7 +231,7 @@ class ConfigurationError(LattifAIError):
|
|
|
231
231
|
"""Error with client configuration."""
|
|
232
232
|
|
|
233
233
|
def __init__(self, config_issue: str, **kwargs):
|
|
234
|
-
message = f
|
|
234
|
+
message = f"Configuration error: {config_issue}"
|
|
235
235
|
super().__init__(message, **kwargs)
|
|
236
236
|
|
|
237
237
|
|
|
@@ -246,11 +246,11 @@ def handle_exception(func):
|
|
|
246
246
|
raise
|
|
247
247
|
except Exception as e:
|
|
248
248
|
# Convert other exceptions to LattifAI errors
|
|
249
|
-
error_msg = f
|
|
249
|
+
error_msg = f"Unexpected error in {func.__name__}: {str(e)}"
|
|
250
250
|
context = {
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
251
|
+
"function": func.__name__,
|
|
252
|
+
"original_exception": e.__class__.__name__,
|
|
253
|
+
"traceback": traceback.format_exc(),
|
|
254
254
|
}
|
|
255
255
|
raise LattifAIError(error_msg, context=context) from e
|
|
256
256
|
|
lattifai/io/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .gemini_reader import GeminiReader, GeminiSegment
|
|
|
6
6
|
from .gemini_writer import GeminiWriter
|
|
7
7
|
from .reader import SubtitleFormat, SubtitleReader
|
|
8
8
|
from .supervision import Supervision
|
|
9
|
+
from .text_parser import normalize_html_text
|
|
9
10
|
from .utils import (
|
|
10
11
|
ALL_SUBTITLE_FORMATS,
|
|
11
12
|
INPUT_SUBTITLE_FORMATS,
|
|
@@ -15,17 +16,17 @@ from .utils import (
|
|
|
15
16
|
from .writer import SubtitleWriter
|
|
16
17
|
|
|
17
18
|
__all__ = [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
19
|
+
"SubtitleReader",
|
|
20
|
+
"SubtitleWriter",
|
|
21
|
+
"SubtitleIO",
|
|
22
|
+
"Supervision",
|
|
23
|
+
"GeminiReader",
|
|
24
|
+
"GeminiWriter",
|
|
25
|
+
"GeminiSegment",
|
|
26
|
+
"SUBTITLE_FORMATS",
|
|
27
|
+
"INPUT_SUBTITLE_FORMATS",
|
|
28
|
+
"OUTPUT_SUBTITLE_FORMATS",
|
|
29
|
+
"ALL_SUBTITLE_FORMATS",
|
|
29
30
|
]
|
|
30
31
|
|
|
31
32
|
|