lattifai 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +10 -0
- lattifai/alignment/lattice1_aligner.py +33 -13
- lattifai/alignment/lattice1_worker.py +121 -50
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/tokenizer.py +3 -3
- lattifai/audio2.py +269 -70
- lattifai/caption/caption.py +161 -3
- lattifai/cli/alignment.py +2 -1
- lattifai/cli/app_installer.py +35 -33
- lattifai/cli/caption.py +8 -18
- lattifai/cli/server.py +3 -1
- lattifai/cli/transcribe.py +53 -38
- lattifai/cli/youtube.py +1 -0
- lattifai/client.py +16 -11
- lattifai/config/alignment.py +23 -2
- lattifai/config/caption.py +1 -1
- lattifai/config/media.py +23 -3
- lattifai/errors.py +7 -3
- lattifai/mixin.py +26 -15
- lattifai/server/app.py +2 -1
- lattifai/utils.py +37 -0
- lattifai/workflow/file_manager.py +15 -13
- lattifai/workflow/youtube.py +16 -1
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/METADATA +65 -15
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/RECORD +29 -29
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/licenses/LICENSE +1 -1
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/WHEEL +0 -0
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/entry_points.txt +0 -0
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/top_level.txt +0 -0
lattifai/errors.py
CHANGED
|
@@ -11,11 +11,15 @@ LATTICE_DECODING_FAILURE_HELP = (
|
|
|
11
11
|
"1) Media(Audio/Video) and text content mismatch:\n"
|
|
12
12
|
" - The transcript/caption does not accurately match the media content\n"
|
|
13
13
|
" - Text may be from a different version or section of the media\n"
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
14
|
+
"2) Text formatting issues:\n"
|
|
15
|
+
" - Special characters, HTML entities, or unusual punctuation may cause alignment failures\n"
|
|
16
|
+
" - Text normalization is enabled by default (caption.normalize_text=True)\n"
|
|
17
|
+
" If you disabled it, try re-enabling: caption.normalize_text=True\n"
|
|
18
|
+
"3) Unsupported media type:\n"
|
|
17
19
|
" - Singing is not yet supported, this will be optimized in future versions\n\n"
|
|
18
20
|
"💡 Troubleshooting tips:\n"
|
|
21
|
+
" • Text normalization is enabled by default to handle special characters\n"
|
|
22
|
+
" (no action needed unless you explicitly set caption.normalize_text=False)\n"
|
|
19
23
|
" • Verify the transcript matches the media by listening to a few segments\n"
|
|
20
24
|
" • For YouTube videos, manually check if auto-generated transcript are accurate\n"
|
|
21
25
|
" • Consider using a different transcription source if Gemini results are incomplete"
|
lattifai/mixin.py
CHANGED
|
@@ -10,6 +10,7 @@ from lhotse.utils import Pathlike
|
|
|
10
10
|
from lattifai.audio2 import AudioData
|
|
11
11
|
from lattifai.caption import Caption
|
|
12
12
|
from lattifai.errors import CaptionProcessingError
|
|
13
|
+
from lattifai.utils import safe_print
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from .config import AlignmentConfig, CaptionConfig, ClientConfig, DiarizationConfig, TranscriptionConfig
|
|
@@ -278,7 +279,7 @@ class LattifAIClientMixin:
|
|
|
278
279
|
|
|
279
280
|
try:
|
|
280
281
|
if verbose:
|
|
281
|
-
|
|
282
|
+
safe_print(colorful.cyan(f"📖 Step 1: Reading caption file from {input_caption}"))
|
|
282
283
|
caption = Caption.read(
|
|
283
284
|
input_caption,
|
|
284
285
|
format=input_caption_format,
|
|
@@ -287,18 +288,18 @@ class LattifAIClientMixin:
|
|
|
287
288
|
diarization_file = Path(str(input_caption)).with_suffix(".SpkDiar")
|
|
288
289
|
if diarization_file.exists():
|
|
289
290
|
if verbose:
|
|
290
|
-
|
|
291
|
+
safe_print(colorful.cyan(f"📖 Step 1b: Reading speaker diarization from {diarization_file}"))
|
|
291
292
|
caption.read_speaker_diarization(diarization_file)
|
|
292
293
|
events_file = Path(str(input_caption)).with_suffix(".AED")
|
|
293
294
|
if events_file.exists():
|
|
294
295
|
if verbose:
|
|
295
|
-
|
|
296
|
+
safe_print(colorful.cyan(f"📖 Step 1c: Reading audio events from {events_file}"))
|
|
296
297
|
from tgt import read_textgrid
|
|
297
298
|
|
|
298
299
|
caption.audio_events = read_textgrid(events_file)
|
|
299
300
|
|
|
300
301
|
if verbose:
|
|
301
|
-
|
|
302
|
+
safe_print(colorful.green(f" ✓ Parsed {len(caption)} caption segments"))
|
|
302
303
|
return caption
|
|
303
304
|
except Exception as e:
|
|
304
305
|
raise CaptionProcessingError(
|
|
@@ -332,10 +333,10 @@ class LattifAIClientMixin:
|
|
|
332
333
|
)
|
|
333
334
|
diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
|
|
334
335
|
if not diarization_file.exists() and caption.speaker_diarization:
|
|
335
|
-
|
|
336
|
+
safe_print(colorful.green(f" Writing speaker diarization to: {diarization_file}"))
|
|
336
337
|
caption.write_speaker_diarization(diarization_file)
|
|
337
338
|
|
|
338
|
-
|
|
339
|
+
safe_print(colorful.green(f"🎉🎉🎉🎉🎉 Caption file written to: {output_caption_path}"))
|
|
339
340
|
return result
|
|
340
341
|
except Exception as e:
|
|
341
342
|
raise CaptionProcessingError(
|
|
@@ -352,14 +353,14 @@ class LattifAIClientMixin:
|
|
|
352
353
|
force_overwrite: bool,
|
|
353
354
|
) -> str:
|
|
354
355
|
"""Download media from YouTube (async implementation)."""
|
|
355
|
-
|
|
356
|
+
safe_print(colorful.cyan("📥 Downloading media from YouTube..."))
|
|
356
357
|
media_file = await self.downloader.download_media(
|
|
357
358
|
url=url,
|
|
358
359
|
output_dir=str(output_dir),
|
|
359
360
|
media_format=media_format,
|
|
360
361
|
force_overwrite=force_overwrite,
|
|
361
362
|
)
|
|
362
|
-
|
|
363
|
+
safe_print(colorful.green(f" ✓ Media downloaded: {media_file}"))
|
|
363
364
|
return media_file
|
|
364
365
|
|
|
365
366
|
def _download_media_sync(
|
|
@@ -400,14 +401,20 @@ class LattifAIClientMixin:
|
|
|
400
401
|
# Transcription mode: use Transcriber to transcribe
|
|
401
402
|
self._validate_transcription_setup()
|
|
402
403
|
|
|
403
|
-
|
|
404
|
+
safe_print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
|
|
404
405
|
transcription = await self.transcriber.transcribe_file(media_file, language=source_lang)
|
|
405
|
-
|
|
406
|
+
safe_print(colorful.green(" ✓ Transcription completed."))
|
|
406
407
|
|
|
407
408
|
if "gemini" in self.transcriber.name.lower():
|
|
408
409
|
# write to temp file and use Caption read
|
|
409
|
-
|
|
410
|
-
|
|
410
|
+
# On Windows, we need to close the file before writing to it
|
|
411
|
+
tmp_file = tempfile.NamedTemporaryFile(
|
|
412
|
+
suffix=self.transcriber.file_suffix, delete=False, mode="w", encoding="utf-8"
|
|
413
|
+
)
|
|
414
|
+
tmp_path = Path(tmp_file.name)
|
|
415
|
+
tmp_file.close() # Close file before writing
|
|
416
|
+
|
|
417
|
+
try:
|
|
411
418
|
await asyncio.to_thread(
|
|
412
419
|
self.transcriber.write,
|
|
413
420
|
transcription,
|
|
@@ -417,6 +424,10 @@ class LattifAIClientMixin:
|
|
|
417
424
|
transcription = self._read_caption(
|
|
418
425
|
tmp_path, input_caption_format="gemini", normalize_text=False, verbose=False
|
|
419
426
|
)
|
|
427
|
+
finally:
|
|
428
|
+
# Clean up temp file
|
|
429
|
+
if tmp_path.exists():
|
|
430
|
+
tmp_path.unlink()
|
|
420
431
|
|
|
421
432
|
return transcription
|
|
422
433
|
|
|
@@ -459,7 +470,7 @@ class LattifAIClientMixin:
|
|
|
459
470
|
if self.caption_config.input_path:
|
|
460
471
|
caption_path = Path(self.caption_config.input_path)
|
|
461
472
|
if caption_path.exists():
|
|
462
|
-
|
|
473
|
+
safe_print(colorful.green(f"📄 Using provided caption file: {caption_path}"))
|
|
463
474
|
return str(caption_path)
|
|
464
475
|
else:
|
|
465
476
|
raise FileNotFoundError(f"Provided caption path does not exist: {caption_path}")
|
|
@@ -496,7 +507,7 @@ class LattifAIClientMixin:
|
|
|
496
507
|
|
|
497
508
|
# elif choice == "overwrite": continue to transcribe below
|
|
498
509
|
|
|
499
|
-
|
|
510
|
+
safe_print(colorful.cyan(f"🎤 Transcribing media with {transcriber_name}..."))
|
|
500
511
|
if self.transcriber.supports_url:
|
|
501
512
|
transcription = await self.transcriber.transcribe(url, language=source_lang)
|
|
502
513
|
else:
|
|
@@ -508,7 +519,7 @@ class LattifAIClientMixin:
|
|
|
508
519
|
caption_file = transcription
|
|
509
520
|
else:
|
|
510
521
|
caption_file = str(transcript_file)
|
|
511
|
-
|
|
522
|
+
safe_print(colorful.green(f" ✓ Transcription completed: {caption_file}"))
|
|
512
523
|
else:
|
|
513
524
|
# Download YouTube captions
|
|
514
525
|
caption_file = await self.downloader.download_captions(
|
lattifai/server/app.py
CHANGED
|
@@ -232,7 +232,7 @@ async def align_files(
|
|
|
232
232
|
normalize_text: bool = Form(False),
|
|
233
233
|
output_format: str = Form("srt"),
|
|
234
234
|
transcription_model: str = Form("nvidia/parakeet-tdt-0.6b-v3"),
|
|
235
|
-
alignment_model: str = Form("
|
|
235
|
+
alignment_model: str = Form("LattifAI/Lattice-1"),
|
|
236
236
|
):
|
|
237
237
|
# Check if LATTIFAI_API_KEY is set
|
|
238
238
|
if not os.environ.get("LATTIFAI_API_KEY"):
|
|
@@ -423,4 +423,5 @@ def process_alignment(
|
|
|
423
423
|
input_caption=str(caption_path) if caption_path else None,
|
|
424
424
|
output_caption_path=str(output_caption_path) if output_caption_path else None,
|
|
425
425
|
split_sentence=split_sentence,
|
|
426
|
+
streaming_chunk_secs=None, # Server API default: no streaming
|
|
426
427
|
)
|
lattifai/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Shared utility helpers for the LattifAI SDK."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import sys
|
|
4
5
|
from datetime import datetime, timedelta
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from typing import Any, Optional, Type
|
|
@@ -8,6 +9,42 @@ from typing import Any, Optional, Type
|
|
|
8
9
|
from lattifai.errors import ModelLoadError
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
def safe_print(text: str, **kwargs) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Safely print text with Unicode characters, handling Windows encoding issues.
|
|
15
|
+
|
|
16
|
+
On Windows, the default console encoding (cp1252) can't handle many Unicode
|
|
17
|
+
characters like emojis. This function ensures text is printed correctly by
|
|
18
|
+
using UTF-8 encoding when necessary.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
text: The text to print, may contain Unicode/emoji characters
|
|
22
|
+
**kwargs: Additional arguments passed to print()
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
print(text, **kwargs)
|
|
26
|
+
except UnicodeEncodeError:
|
|
27
|
+
# On Windows, try to reconfigure stdout to use UTF-8
|
|
28
|
+
if sys.platform == "win32":
|
|
29
|
+
try:
|
|
30
|
+
# Try to encode with UTF-8 and print
|
|
31
|
+
if hasattr(sys.stdout, "buffer"):
|
|
32
|
+
sys.stdout.buffer.write((text + "\n").encode("utf-8"))
|
|
33
|
+
sys.stdout.flush()
|
|
34
|
+
else:
|
|
35
|
+
# Fallback: replace problematic characters
|
|
36
|
+
print(text.encode(sys.stdout.encoding, errors="replace").decode(sys.stdout.encoding), **kwargs)
|
|
37
|
+
except Exception:
|
|
38
|
+
# Last resort: remove emojis
|
|
39
|
+
import re
|
|
40
|
+
|
|
41
|
+
text_no_emoji = re.sub(r"[^\x00-\x7F\u4e00-\u9fff]+", "", text)
|
|
42
|
+
print(text_no_emoji, **kwargs)
|
|
43
|
+
else:
|
|
44
|
+
# Non-Windows: this shouldn't happen, but fallback gracefully
|
|
45
|
+
print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
|
|
46
|
+
|
|
47
|
+
|
|
11
48
|
def _get_cache_marker_path(cache_dir: Path) -> Path:
|
|
12
49
|
"""Get the path for the cache marker file with current date."""
|
|
13
50
|
today = datetime.now().strftime("%Y%m%d")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
""".
|
|
2
2
|
File existence management utilities for video processing workflows
|
|
3
3
|
"""
|
|
4
4
|
|
|
@@ -11,6 +11,8 @@ from typing import Dict, List, Optional, Sequence, Tuple
|
|
|
11
11
|
|
|
12
12
|
import colorful
|
|
13
13
|
|
|
14
|
+
from lattifai.utils import safe_print
|
|
15
|
+
|
|
14
16
|
try:
|
|
15
17
|
import questionary
|
|
16
18
|
except ImportError: # pragma: no cover - optional dependency
|
|
@@ -110,7 +112,7 @@ class FileExistenceManager:
|
|
|
110
112
|
return "proceed" # No existing files, proceed normally
|
|
111
113
|
|
|
112
114
|
# Header with warning color
|
|
113
|
-
|
|
115
|
+
safe_print(f'\n{colorful.bold_yellow("⚠️ Existing files found:")}')
|
|
114
116
|
|
|
115
117
|
# Collect file paths for options
|
|
116
118
|
file_paths = []
|
|
@@ -157,15 +159,15 @@ class FileExistenceManager:
|
|
|
157
159
|
choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=default_value)
|
|
158
160
|
|
|
159
161
|
if choice == "overwrite":
|
|
160
|
-
|
|
162
|
+
safe_print(f'{colorful.yellow("🔄 Overwriting existing files")}')
|
|
161
163
|
elif choice == TRANSCRIBE_CHOICE:
|
|
162
164
|
print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
|
|
163
165
|
elif choice == "cancel":
|
|
164
|
-
|
|
166
|
+
safe_print(f'{colorful.red("❌ Operation cancelled")}')
|
|
165
167
|
elif choice in file_paths:
|
|
166
|
-
|
|
168
|
+
safe_print(f'{colorful.green(f"✅ Using selected file: {choice}")}')
|
|
167
169
|
else:
|
|
168
|
-
|
|
170
|
+
safe_print(f'{colorful.green("✅ Using existing files")}')
|
|
169
171
|
|
|
170
172
|
return choice
|
|
171
173
|
|
|
@@ -189,7 +191,7 @@ class FileExistenceManager:
|
|
|
189
191
|
del emoji # Unused variable
|
|
190
192
|
|
|
191
193
|
# Header with warning color
|
|
192
|
-
|
|
194
|
+
safe_print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
|
|
193
195
|
|
|
194
196
|
for file_path in sorted(files):
|
|
195
197
|
print(f' {colorful.green("•")} {file_path}')
|
|
@@ -203,11 +205,11 @@ class FileExistenceManager:
|
|
|
203
205
|
choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
|
|
204
206
|
|
|
205
207
|
if choice == "use":
|
|
206
|
-
|
|
208
|
+
safe_print(f'{colorful.green(f"✅ Using existing {label} files")}')
|
|
207
209
|
elif choice == "overwrite":
|
|
208
|
-
|
|
210
|
+
safe_print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
|
|
209
211
|
elif choice == "cancel":
|
|
210
|
-
|
|
212
|
+
safe_print(f'{colorful.red("❌ Operation cancelled")}')
|
|
211
213
|
|
|
212
214
|
return choice
|
|
213
215
|
|
|
@@ -245,7 +247,7 @@ class FileExistenceManager:
|
|
|
245
247
|
)
|
|
246
248
|
|
|
247
249
|
# Multiple files: let user choose which one
|
|
248
|
-
|
|
250
|
+
safe_print(f'\n{colorful.bold_yellow(f"⚠️ Multiple {file_type} files found:")}')
|
|
249
251
|
|
|
250
252
|
# Create options with full file paths
|
|
251
253
|
options = []
|
|
@@ -266,7 +268,7 @@ class FileExistenceManager:
|
|
|
266
268
|
choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=files[0])
|
|
267
269
|
|
|
268
270
|
if choice == "cancel":
|
|
269
|
-
|
|
271
|
+
safe_print(f'{colorful.red("❌ Operation cancelled")}')
|
|
270
272
|
elif choice == "overwrite":
|
|
271
273
|
overwrite_msg = f"🔄 Overwriting all {file_type} files"
|
|
272
274
|
print(f"{colorful.yellow(overwrite_msg)}")
|
|
@@ -274,7 +276,7 @@ class FileExistenceManager:
|
|
|
274
276
|
transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
|
|
275
277
|
print(f"{colorful.magenta(transcribe_msg)}")
|
|
276
278
|
else:
|
|
277
|
-
|
|
279
|
+
safe_print(f'{colorful.green(f"✅ Using: {choice}")}')
|
|
278
280
|
|
|
279
281
|
return choice
|
|
280
282
|
|
lattifai/workflow/youtube.py
CHANGED
|
@@ -407,6 +407,11 @@ class YouTubeDownloader:
|
|
|
407
407
|
output_template,
|
|
408
408
|
"--sub-format",
|
|
409
409
|
"best", # Prefer best available format
|
|
410
|
+
"--no-warnings", # Suppress warnings for cleaner output
|
|
411
|
+
"--extractor-retries",
|
|
412
|
+
"3", # Retry on errors
|
|
413
|
+
"--sleep-requests",
|
|
414
|
+
"1", # Sleep between requests to avoid rate limiting
|
|
410
415
|
]
|
|
411
416
|
|
|
412
417
|
# Add caption language selection if specified
|
|
@@ -425,7 +430,8 @@ class YouTubeDownloader:
|
|
|
425
430
|
None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
|
|
426
431
|
)
|
|
427
432
|
|
|
428
|
-
|
|
433
|
+
# Only log success message, not full yt-dlp output
|
|
434
|
+
self.logger.debug(f"yt-dlp output: {result.stdout.strip()}")
|
|
429
435
|
|
|
430
436
|
# Find the downloaded transcript file
|
|
431
437
|
caption_patterns = [
|
|
@@ -481,9 +487,18 @@ class YouTubeDownloader:
|
|
|
481
487
|
|
|
482
488
|
except subprocess.CalledProcessError as e:
|
|
483
489
|
error_msg = e.stderr.strip() if e.stderr else str(e)
|
|
490
|
+
|
|
491
|
+
# Check for specific error conditions
|
|
484
492
|
if "No automatic or manual captions found" in error_msg:
|
|
485
493
|
self.logger.warning("No captions available for this video")
|
|
486
494
|
return None
|
|
495
|
+
elif "HTTP Error 429" in error_msg or "Too Many Requests" in error_msg:
|
|
496
|
+
self.logger.error("YouTube rate limit exceeded. Please try again later or use a different method.")
|
|
497
|
+
raise RuntimeError(
|
|
498
|
+
"YouTube rate limit exceeded (HTTP 429). "
|
|
499
|
+
"Try again later or use --cookies option with authenticated cookies. "
|
|
500
|
+
"See: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"
|
|
501
|
+
)
|
|
487
502
|
else:
|
|
488
503
|
self.logger.error(f"Failed to download transcript: {error_msg}")
|
|
489
504
|
raise RuntimeError(f"Failed to download transcript: {error_msg}")
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
7
7
|
License: MIT License
|
|
8
8
|
|
|
9
|
-
Copyright (c) 2025
|
|
9
|
+
Copyright (c) 2025 LattifAI.
|
|
10
10
|
|
|
11
11
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
12
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -27,10 +27,10 @@ License: MIT License
|
|
|
27
27
|
SOFTWARE.
|
|
28
28
|
|
|
29
29
|
Project-URL: Homepage, https://github.com/lattifai/lattifai-python
|
|
30
|
-
Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
|
|
30
|
+
Project-URL: Documentation, https://github.com/lattifai/lattifai-python/blob/main/README.md
|
|
31
31
|
Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
|
|
32
32
|
Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
|
|
33
|
-
Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
|
|
33
|
+
Project-URL: Changelog, https://github.com/lattifai/lattifai-python/blob/main/CHANGELOG.md
|
|
34
34
|
Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
|
|
35
35
|
Classifier: Development Status :: 5 - Production/Stable
|
|
36
36
|
Classifier: Intended Audience :: Developers
|
|
@@ -50,7 +50,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
50
50
|
Requires-Python: <3.15,>=3.10
|
|
51
51
|
Description-Content-Type: text/markdown
|
|
52
52
|
License-File: LICENSE
|
|
53
|
-
Requires-Dist: lattifai-core>=0.
|
|
53
|
+
Requires-Dist: lattifai-core>=0.5.1
|
|
54
54
|
Requires-Dist: lattifai-run>=1.0.1
|
|
55
55
|
Requires-Dist: python-dotenv
|
|
56
56
|
Requires-Dist: lhotse>=1.26.0
|
|
@@ -63,10 +63,9 @@ Requires-Dist: onnxruntime
|
|
|
63
63
|
Requires-Dist: msgpack
|
|
64
64
|
Requires-Dist: g2p-phonemizer>=0.4.0
|
|
65
65
|
Requires-Dist: av
|
|
66
|
-
Requires-Dist: wtpsplit>=2.1.
|
|
67
|
-
Requires-Dist:
|
|
68
|
-
Requires-Dist:
|
|
69
|
-
Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc1
|
|
66
|
+
Requires-Dist: wtpsplit>=2.1.7
|
|
67
|
+
Requires-Dist: OmniSenseVoice>=0.4.2
|
|
68
|
+
Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3
|
|
70
69
|
Requires-Dist: pyannote-audio-notorchdeps>=4.0.2
|
|
71
70
|
Requires-Dist: questionary>=2.0
|
|
72
71
|
Requires-Dist: yt-dlp
|
|
@@ -83,7 +82,7 @@ Requires-Dist: torch-audiomentations==0.12.0; extra == "diarization"
|
|
|
83
82
|
Requires-Dist: pyannote.audio>=4.0.2; extra == "diarization"
|
|
84
83
|
Provides-Extra: transcription
|
|
85
84
|
Requires-Dist: OmniSenseVoice>=0.4.0; extra == "transcription"
|
|
86
|
-
Requires-Dist: nemo_toolkit_asr[asr]>=2.7.
|
|
85
|
+
Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3; extra == "transcription"
|
|
87
86
|
Provides-Extra: test
|
|
88
87
|
Requires-Dist: pytest; extra == "test"
|
|
89
88
|
Requires-Dist: pytest-cov; extra == "test"
|
|
@@ -116,7 +115,6 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
|
|
|
116
115
|
|
|
117
116
|
> **⚠️ Note on Current Limitations**:
|
|
118
117
|
> 1. **Memory Usage**: We are aware of high memory consumption and are actively working on further optimizations.
|
|
119
|
-
> 2. **Long Audio**: Currently, long audio files might face issues. Support for **long-form audio (up to 20 hours)** will be available shortly.
|
|
120
118
|
|
|
121
119
|
## Table of Contents
|
|
122
120
|
|
|
@@ -464,6 +462,8 @@ lai transcribe align \
|
|
|
464
462
|
|
|
465
463
|
```bash
|
|
466
464
|
lai caption convert input.srt output.vtt
|
|
465
|
+
lai caption convert input.srt output.json
|
|
466
|
+
# Enable normalization to clean HTML entities and special characters:
|
|
467
467
|
lai caption convert input.srt output.json normalize_text=true
|
|
468
468
|
```
|
|
469
469
|
|
|
@@ -554,10 +554,10 @@ from lattifai import LattifAI, CaptionConfig
|
|
|
554
554
|
|
|
555
555
|
client = LattifAI(
|
|
556
556
|
caption_config=CaptionConfig(
|
|
557
|
-
split_sentence=True, # Smart sentence splitting
|
|
558
|
-
word_level=True, # Word-level timestamps
|
|
559
|
-
normalize_text=True, # Clean HTML entities
|
|
560
|
-
include_speaker_in_text=False, # Include speaker labels
|
|
557
|
+
split_sentence=True, # Smart sentence splitting (default: False)
|
|
558
|
+
word_level=True, # Word-level timestamps (default: False)
|
|
559
|
+
normalize_text=True, # Clean HTML entities (default: True)
|
|
560
|
+
include_speaker_in_text=False, # Include speaker labels (default: True)
|
|
561
561
|
)
|
|
562
562
|
)
|
|
563
563
|
```
|
|
@@ -619,6 +619,56 @@ from lattifai import (
|
|
|
619
619
|
|
|
620
620
|
## Advanced Features
|
|
621
621
|
|
|
622
|
+
### Long-Form Audio Support
|
|
623
|
+
|
|
624
|
+
LattifAI now supports processing long audio files (up to 20 hours) through streaming mode. Enable streaming by setting the `streaming_chunk_secs` parameter:
|
|
625
|
+
|
|
626
|
+
**Python SDK:**
|
|
627
|
+
```python
|
|
628
|
+
from lattifai import LattifAI
|
|
629
|
+
|
|
630
|
+
client = LattifAI()
|
|
631
|
+
|
|
632
|
+
# Enable streaming for long audio files
|
|
633
|
+
caption = client.alignment(
|
|
634
|
+
input_media="long_audio.wav",
|
|
635
|
+
input_caption="subtitle.srt",
|
|
636
|
+
output_caption_path="output.srt",
|
|
637
|
+
streaming_chunk_secs=600.0, # Process in 30-second chunks
|
|
638
|
+
)
|
|
639
|
+
```
|
|
640
|
+
|
|
641
|
+
**CLI:**
|
|
642
|
+
```bash
|
|
643
|
+
# Enable streaming with chunk size
|
|
644
|
+
lai alignment align long_audio.wav subtitle.srt output.srt \
|
|
645
|
+
media.streaming_chunk_secs=300.0
|
|
646
|
+
|
|
647
|
+
# For YouTube videos
|
|
648
|
+
lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID" \
|
|
649
|
+
media.streaming_chunk_secs=300.0
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
**MediaConfig:**
|
|
653
|
+
```python
|
|
654
|
+
from lattifai import LattifAI, MediaConfig
|
|
655
|
+
|
|
656
|
+
client = LattifAI(
|
|
657
|
+
media_config=MediaConfig(
|
|
658
|
+
streaming_chunk_secs=600.0, # Chunk duration in seconds (1-1800), default: 600 (10 minutes)
|
|
659
|
+
)
|
|
660
|
+
)
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
**Notes:**
|
|
664
|
+
- Chunk duration must be between 1 and 1800 seconds (minimum 1 second, maximum 30 minutes)
|
|
665
|
+
- Default value: 600 seconds (10 minutes)
|
|
666
|
+
- **Recommended: Use 60 seconds or larger for optimal performance**
|
|
667
|
+
- Set to `None` to disable streaming
|
|
668
|
+
- **Thanks to our precise implementation, streaming has virtually no impact on alignment accuracy**
|
|
669
|
+
- Smaller chunks reduce memory usage with minimal quality trade-off
|
|
670
|
+
- Recommended chunk size: 300-900 seconds (5-15 minutes) for optimal balance
|
|
671
|
+
|
|
622
672
|
### Word-Level Alignment
|
|
623
673
|
|
|
624
674
|
Enable `word_level=True` to get precise timestamps for each word:
|
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
lattifai/__init__.py,sha256=
|
|
2
|
-
lattifai/audio2.py,sha256=
|
|
3
|
-
lattifai/client.py,sha256=
|
|
4
|
-
lattifai/errors.py,sha256=
|
|
1
|
+
lattifai/__init__.py,sha256=7y1R5IGw0Sgvl1tfqxEK7e-ozW0wVB-q_JZgv6YyrMQ,2751
|
|
2
|
+
lattifai/audio2.py,sha256=BKMCzkuEmBFAWOEnzgLxeK8TBPTFbjzr1esOfe3MQoo,17460
|
|
3
|
+
lattifai/client.py,sha256=OXDGsWVeOMEjmXI795pvnK3L-ZLn_sfUwG0i7uJ1JkY,22492
|
|
4
|
+
lattifai/errors.py,sha256=LyWRGVhQ6Ak2CYn9FBYAPRgQ7_VHpxzNsXI31HXD--s,11291
|
|
5
5
|
lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
|
|
6
|
-
lattifai/mixin.py,sha256=
|
|
6
|
+
lattifai/mixin.py,sha256=yj3H1SSQSQrhUeqKhQmRRELRr5fp2mb2ovkK9p8Vwn4,23858
|
|
7
7
|
lattifai/types.py,sha256=SjYBfwrCBOXlICvH04niFQJ7OzTx7oTaa_npfRkB67U,659
|
|
8
|
-
lattifai/utils.py,sha256=
|
|
8
|
+
lattifai/utils.py,sha256=ZYEUaoTBCwzv4PBBD-woeiDSTx8T1a1vXHIT0g1YmRI,5345
|
|
9
9
|
lattifai/alignment/__init__.py,sha256=ehpkKfjNIYUx7_M-RWD_8Efcrzd9bE-NSm0QgMMVLW0,178
|
|
10
|
-
lattifai/alignment/lattice1_aligner.py,sha256=
|
|
11
|
-
lattifai/alignment/lattice1_worker.py,sha256=
|
|
10
|
+
lattifai/alignment/lattice1_aligner.py,sha256=DpN_it7ETZgz6uH3I90Y926bvjhFRdL6dycxz5S_tkI,5142
|
|
11
|
+
lattifai/alignment/lattice1_worker.py,sha256=1yYK_xLOL_xHZTVGgNb957R7HhHnl6xwrXUcN372ZIY,12407
|
|
12
12
|
lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
|
|
13
|
-
lattifai/alignment/segmenter.py,sha256
|
|
14
|
-
lattifai/alignment/tokenizer.py,sha256=
|
|
13
|
+
lattifai/alignment/segmenter.py,sha256=mzWEQC6hWZtI2mR2WU59W7qLHa7KXy7fdU6991kyUuQ,6276
|
|
14
|
+
lattifai/alignment/tokenizer.py,sha256=oqgy5L9wU0_AMyUVNArEtPIDXm7WdvNNfJuB2ZJBpqI,22394
|
|
15
15
|
lattifai/caption/__init__.py,sha256=6MM_2j6CaqwZ81LfSy4di2EP0ykvheRjMZKAYDx2rQs,477
|
|
16
|
-
lattifai/caption/caption.py,sha256=
|
|
16
|
+
lattifai/caption/caption.py,sha256=Ljt-6K89AauIK05hdDqjV6G03mkTTJL2UE9ukt-tck0,52502
|
|
17
17
|
lattifai/caption/gemini_reader.py,sha256=GqY2w78xGYCMDP5kD5WGS8jK0gntel2SK-EPpPKTrwU,15138
|
|
18
18
|
lattifai/caption/gemini_writer.py,sha256=sYPxYEmVQcEan5WVGgSrcraxs3QJRQRh8CJkl2yUQ1s,6515
|
|
19
19
|
lattifai/caption/supervision.py,sha256=DRrM8lfKU_x9aVBcLG6xnT0xIJrnc8jzHpzcSwQOg8c,905
|
|
20
20
|
lattifai/caption/text_parser.py,sha256=XDb8KTt031uJ1hg6dpbINglGOTX-6pBcghbg3DULM1I,4633
|
|
21
21
|
lattifai/cli/__init__.py,sha256=dIUmrpN-OwR4h6BqMhXp87_5ZwgO41ShPru_iZGnpQs,463
|
|
22
|
-
lattifai/cli/alignment.py,sha256=
|
|
23
|
-
lattifai/cli/app_installer.py,sha256=
|
|
24
|
-
lattifai/cli/caption.py,sha256=
|
|
25
|
-
lattifai/cli/server.py,sha256=
|
|
26
|
-
lattifai/cli/transcribe.py,sha256=
|
|
27
|
-
lattifai/cli/youtube.py,sha256
|
|
22
|
+
lattifai/cli/alignment.py,sha256=06em-Uaf6NhSz1ce4dwT2r8n56NrtibR7ZsSkmc18Kc,5954
|
|
23
|
+
lattifai/cli/app_installer.py,sha256=gAndH3Yo97fGRDe2CQnGtOgZZ4k3_v5ftcUo5g6xbSA,5884
|
|
24
|
+
lattifai/cli/caption.py,sha256=p0VY6orf3D77tr30NQka7A84kwEmYiZrCDB6FbTgoFM,6312
|
|
25
|
+
lattifai/cli/server.py,sha256=sXMfOSse9-V79slXUU8FDLeqtI5U9zeU-5YpjTIGyVw,1186
|
|
26
|
+
lattifai/cli/transcribe.py,sha256=W42SVhnOQ0EndMk-Lu38BiG1LuMcJnzre9X83M6kBZ4,8137
|
|
27
|
+
lattifai/cli/youtube.py,sha256=-EIDSS1Iel3_6qD9M2CZZHwKOvgdkIa1cMY4rX7xwVo,5331
|
|
28
28
|
lattifai/config/__init__.py,sha256=Z8OudvS6fgfLNLu_2fvoXartQiYCECOnNfzDt-PfCN4,543
|
|
29
|
-
lattifai/config/alignment.py,sha256=
|
|
30
|
-
lattifai/config/caption.py,sha256=
|
|
29
|
+
lattifai/config/alignment.py,sha256=v6SuryAVNET9hgH_ZidYN2QhZqpEDnNhR-rogSSSfAg,4039
|
|
30
|
+
lattifai/config/caption.py,sha256=AYOyUJ1xZsX8CBZy3GpLitbcCAHcZ9LwXui_v3vtuso,6786
|
|
31
31
|
lattifai/config/client.py,sha256=I1JqLQlsQNU5ouovTumr-PP_8GWC9DI_e9B5UwsDZws,1492
|
|
32
32
|
lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
|
|
33
|
-
lattifai/config/media.py,sha256=
|
|
33
|
+
lattifai/config/media.py,sha256=cjM8eGeZ7ELhmy4cCqHAyogeHItaVqMrPzSwwIx79HY,14856
|
|
34
34
|
lattifai/config/transcription.py,sha256=bzghOGgcNWzTnDYd_cqCOB7GT8OnzHDiyam7LSixqxM,2901
|
|
35
35
|
lattifai/diarization/__init__.py,sha256=MgBDQ1ehL2qDnZprEp8KqON7CmbG-qaP37gzBsV0jzk,119
|
|
36
36
|
lattifai/diarization/lattifai.py,sha256=SE2BpIZ3_deKyhXdBqe77bsDLXIUV9AQV34gfINv7_s,2657
|
|
37
|
-
lattifai/server/app.py,sha256=
|
|
37
|
+
lattifai/server/app.py,sha256=wXYgXc_yGQACtUJdhkfhLsTOQjhhIhDQRiVRny7Ogcs,15455
|
|
38
38
|
lattifai/transcription/__init__.py,sha256=mEoMTbs5jAgtXQn1jTjlFY_GUr-S0WmPn8uZ6WZCkU0,2643
|
|
39
39
|
lattifai/transcription/base.py,sha256=59b4nQHFMyTRyyzBJTM8ZpEuUy1KjwA2o6rNfrNluKY,3911
|
|
40
40
|
lattifai/transcription/gemini.py,sha256=1VNi9gl-Kpkw3ljZcOZG5oq_OY8fMC9Xv4kOwyQpI0Q,7992
|
|
@@ -46,11 +46,11 @@ lattifai/transcription/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnK
|
|
|
46
46
|
lattifai/workflow/__init__.py,sha256=GOT9jptXwpIMiNRqJ_LToEt_5Dt0k7XXbLkFzhrl31o,548
|
|
47
47
|
lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,38
|
|
48
48
|
lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
|
|
49
|
-
lattifai/workflow/file_manager.py,sha256=
|
|
50
|
-
lattifai/workflow/youtube.py,sha256=
|
|
51
|
-
lattifai-1.0.
|
|
52
|
-
lattifai-1.0.
|
|
53
|
-
lattifai-1.0.
|
|
54
|
-
lattifai-1.0.
|
|
55
|
-
lattifai-1.0.
|
|
56
|
-
lattifai-1.0.
|
|
49
|
+
lattifai/workflow/file_manager.py,sha256=IUWW838ta83kfwM4gpW83gsD_Tx-pa-L_RWKjiefQbQ,33017
|
|
50
|
+
lattifai/workflow/youtube.py,sha256=ON9z0UUk16ThQzdhdgyOiwBmewZOcxfT05dsl3aKYqw,23840
|
|
51
|
+
lattifai-1.0.5.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
|
|
52
|
+
lattifai-1.0.5.dist-info/METADATA,sha256=cTg6ivcixFAv-464qk0R2v19LdEgGkETcNvRzycFSKk,26117
|
|
53
|
+
lattifai-1.0.5.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
54
|
+
lattifai-1.0.5.dist-info/entry_points.txt,sha256=F8Akof3VtKtrbnYSav1umgoo9Xbv34rUcKn-ioRfeGQ,474
|
|
55
|
+
lattifai-1.0.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
|
|
56
|
+
lattifai-1.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|