lattifai 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/errors.py CHANGED
@@ -11,11 +11,15 @@ LATTICE_DECODING_FAILURE_HELP = (
11
11
  "1) Media(Audio/Video) and text content mismatch:\n"
12
12
  " - The transcript/caption does not accurately match the media content\n"
13
13
  " - Text may be from a different version or section of the media\n"
14
- " ⚠️ Note: Gemini transcription may occasionally skip large segments of media, causing alignment failures.\n"
15
- " We will detect and fix this issue in the next version.\n\n"
16
- "2) Unsupported media type:\n"
14
+ "2) Text formatting issues:\n"
15
+ " - Special characters, HTML entities, or unusual punctuation may cause alignment failures\n"
16
+ " - Text normalization is enabled by default (caption.normalize_text=True)\n"
17
+ " If you disabled it, try re-enabling: caption.normalize_text=True\n"
18
+ "3) Unsupported media type:\n"
17
19
  " - Singing is not yet supported, this will be optimized in future versions\n\n"
18
20
  "💡 Troubleshooting tips:\n"
21
+ " • Text normalization is enabled by default to handle special characters\n"
22
+ " (no action needed unless you explicitly set caption.normalize_text=False)\n"
19
23
  " • Verify the transcript matches the media by listening to a few segments\n"
20
24
  " • For YouTube videos, manually check if auto-generated transcript are accurate\n"
21
25
  " • Consider using a different transcription source if Gemini results are incomplete"
lattifai/mixin.py CHANGED
@@ -10,6 +10,7 @@ from lhotse.utils import Pathlike
10
10
  from lattifai.audio2 import AudioData
11
11
  from lattifai.caption import Caption
12
12
  from lattifai.errors import CaptionProcessingError
13
+ from lattifai.utils import safe_print
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from .config import AlignmentConfig, CaptionConfig, ClientConfig, DiarizationConfig, TranscriptionConfig
@@ -278,7 +279,7 @@ class LattifAIClientMixin:
278
279
 
279
280
  try:
280
281
  if verbose:
281
- print(colorful.cyan(f"📖 Step 1: Reading caption file from {input_caption}"))
282
+ safe_print(colorful.cyan(f"📖 Step 1: Reading caption file from {input_caption}"))
282
283
  caption = Caption.read(
283
284
  input_caption,
284
285
  format=input_caption_format,
@@ -287,18 +288,18 @@ class LattifAIClientMixin:
287
288
  diarization_file = Path(str(input_caption)).with_suffix(".SpkDiar")
288
289
  if diarization_file.exists():
289
290
  if verbose:
290
- print(colorful.cyan(f"📖 Step 1b: Reading speaker diarization from {diarization_file}"))
291
+ safe_print(colorful.cyan(f"📖 Step 1b: Reading speaker diarization from {diarization_file}"))
291
292
  caption.read_speaker_diarization(diarization_file)
292
293
  events_file = Path(str(input_caption)).with_suffix(".AED")
293
294
  if events_file.exists():
294
295
  if verbose:
295
- print(colorful.cyan(f"📖 Step 1c: Reading audio events from {events_file}"))
296
+ safe_print(colorful.cyan(f"📖 Step 1c: Reading audio events from {events_file}"))
296
297
  from tgt import read_textgrid
297
298
 
298
299
  caption.audio_events = read_textgrid(events_file)
299
300
 
300
301
  if verbose:
301
- print(colorful.green(f" ✓ Parsed {len(caption)} caption segments"))
302
+ safe_print(colorful.green(f" ✓ Parsed {len(caption)} caption segments"))
302
303
  return caption
303
304
  except Exception as e:
304
305
  raise CaptionProcessingError(
@@ -332,10 +333,10 @@ class LattifAIClientMixin:
332
333
  )
333
334
  diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
334
335
  if not diarization_file.exists() and caption.speaker_diarization:
335
- print(colorful.green(f" Writing speaker diarization to: {diarization_file}"))
336
+ safe_print(colorful.green(f" Writing speaker diarization to: {diarization_file}"))
336
337
  caption.write_speaker_diarization(diarization_file)
337
338
 
338
- print(colorful.green(f"🎉🎉🎉🎉🎉 Caption file written to: {output_caption_path}"))
339
+ safe_print(colorful.green(f"🎉🎉🎉🎉🎉 Caption file written to: {output_caption_path}"))
339
340
  return result
340
341
  except Exception as e:
341
342
  raise CaptionProcessingError(
@@ -352,14 +353,14 @@ class LattifAIClientMixin:
352
353
  force_overwrite: bool,
353
354
  ) -> str:
354
355
  """Download media from YouTube (async implementation)."""
355
- print(colorful.cyan("📥 Downloading media from YouTube..."))
356
+ safe_print(colorful.cyan("📥 Downloading media from YouTube..."))
356
357
  media_file = await self.downloader.download_media(
357
358
  url=url,
358
359
  output_dir=str(output_dir),
359
360
  media_format=media_format,
360
361
  force_overwrite=force_overwrite,
361
362
  )
362
- print(colorful.green(f" ✓ Media downloaded: {media_file}"))
363
+ safe_print(colorful.green(f" ✓ Media downloaded: {media_file}"))
363
364
  return media_file
364
365
 
365
366
  def _download_media_sync(
@@ -400,14 +401,20 @@ class LattifAIClientMixin:
400
401
  # Transcription mode: use Transcriber to transcribe
401
402
  self._validate_transcription_setup()
402
403
 
403
- print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
404
+ safe_print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
404
405
  transcription = await self.transcriber.transcribe_file(media_file, language=source_lang)
405
- print(colorful.green(" ✓ Transcription completed."))
406
+ safe_print(colorful.green(" ✓ Transcription completed."))
406
407
 
407
408
  if "gemini" in self.transcriber.name.lower():
408
409
  # write to temp file and use Caption read
409
- with tempfile.NamedTemporaryFile(suffix=self.transcriber.file_suffix, delete=True) as tmp_file:
410
- tmp_path = Path(tmp_file.name)
410
+ # On Windows, we need to close the file before writing to it
411
+ tmp_file = tempfile.NamedTemporaryFile(
412
+ suffix=self.transcriber.file_suffix, delete=False, mode="w", encoding="utf-8"
413
+ )
414
+ tmp_path = Path(tmp_file.name)
415
+ tmp_file.close() # Close file before writing
416
+
417
+ try:
411
418
  await asyncio.to_thread(
412
419
  self.transcriber.write,
413
420
  transcription,
@@ -417,6 +424,10 @@ class LattifAIClientMixin:
417
424
  transcription = self._read_caption(
418
425
  tmp_path, input_caption_format="gemini", normalize_text=False, verbose=False
419
426
  )
427
+ finally:
428
+ # Clean up temp file
429
+ if tmp_path.exists():
430
+ tmp_path.unlink()
420
431
 
421
432
  return transcription
422
433
 
@@ -459,7 +470,7 @@ class LattifAIClientMixin:
459
470
  if self.caption_config.input_path:
460
471
  caption_path = Path(self.caption_config.input_path)
461
472
  if caption_path.exists():
462
- print(colorful.green(f"📄 Using provided caption file: {caption_path}"))
473
+ safe_print(colorful.green(f"📄 Using provided caption file: {caption_path}"))
463
474
  return str(caption_path)
464
475
  else:
465
476
  raise FileNotFoundError(f"Provided caption path does not exist: {caption_path}")
@@ -496,7 +507,7 @@ class LattifAIClientMixin:
496
507
 
497
508
  # elif choice == "overwrite": continue to transcribe below
498
509
 
499
- print(colorful.cyan(f"🎤 Transcribing media with {transcriber_name}..."))
510
+ safe_print(colorful.cyan(f"🎤 Transcribing media with {transcriber_name}..."))
500
511
  if self.transcriber.supports_url:
501
512
  transcription = await self.transcriber.transcribe(url, language=source_lang)
502
513
  else:
@@ -508,7 +519,7 @@ class LattifAIClientMixin:
508
519
  caption_file = transcription
509
520
  else:
510
521
  caption_file = str(transcript_file)
511
- print(colorful.green(f" ✓ Transcription completed: {caption_file}"))
522
+ safe_print(colorful.green(f" ✓ Transcription completed: {caption_file}"))
512
523
  else:
513
524
  # Download YouTube captions
514
525
  caption_file = await self.downloader.download_captions(
lattifai/server/app.py CHANGED
@@ -232,7 +232,7 @@ async def align_files(
232
232
  normalize_text: bool = Form(False),
233
233
  output_format: str = Form("srt"),
234
234
  transcription_model: str = Form("nvidia/parakeet-tdt-0.6b-v3"),
235
- alignment_model: str = Form("Lattifai/Lattice-1"),
235
+ alignment_model: str = Form("LattifAI/Lattice-1"),
236
236
  ):
237
237
  # Check if LATTIFAI_API_KEY is set
238
238
  if not os.environ.get("LATTIFAI_API_KEY"):
@@ -423,4 +423,5 @@ def process_alignment(
423
423
  input_caption=str(caption_path) if caption_path else None,
424
424
  output_caption_path=str(output_caption_path) if output_caption_path else None,
425
425
  split_sentence=split_sentence,
426
+ streaming_chunk_secs=None, # Server API default: no streaming
426
427
  )
lattifai/utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Shared utility helpers for the LattifAI SDK."""
2
2
 
3
3
  import os
4
+ import sys
4
5
  from datetime import datetime, timedelta
5
6
  from pathlib import Path
6
7
  from typing import Any, Optional, Type
@@ -8,6 +9,42 @@ from typing import Any, Optional, Type
8
9
  from lattifai.errors import ModelLoadError
9
10
 
10
11
 
12
+ def safe_print(text: str, **kwargs) -> None:
13
+ """
14
+ Safely print text with Unicode characters, handling Windows encoding issues.
15
+
16
+ On Windows, the default console encoding (cp1252) can't handle many Unicode
17
+ characters like emojis. This function ensures text is printed correctly by
18
+ using UTF-8 encoding when necessary.
19
+
20
+ Args:
21
+ text: The text to print, may contain Unicode/emoji characters
22
+ **kwargs: Additional arguments passed to print()
23
+ """
24
+ try:
25
+ print(text, **kwargs)
26
+ except UnicodeEncodeError:
27
+ # On Windows, try to reconfigure stdout to use UTF-8
28
+ if sys.platform == "win32":
29
+ try:
30
+ # Try to encode with UTF-8 and print
31
+ if hasattr(sys.stdout, "buffer"):
32
+ sys.stdout.buffer.write((text + "\n").encode("utf-8"))
33
+ sys.stdout.flush()
34
+ else:
35
+ # Fallback: replace problematic characters
36
+ print(text.encode(sys.stdout.encoding, errors="replace").decode(sys.stdout.encoding), **kwargs)
37
+ except Exception:
38
+ # Last resort: remove emojis
39
+ import re
40
+
41
+ text_no_emoji = re.sub(r"[^\x00-\x7F\u4e00-\u9fff]+", "", text)
42
+ print(text_no_emoji, **kwargs)
43
+ else:
44
+ # Non-Windows: this shouldn't happen, but fallback gracefully
45
+ print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
46
+
47
+
11
48
  def _get_cache_marker_path(cache_dir: Path) -> Path:
12
49
  """Get the path for the cache marker file with current date."""
13
50
  today = datetime.now().strftime("%Y%m%d")
@@ -1,4 +1,4 @@
1
- """
1
+ """.
2
2
  File existence management utilities for video processing workflows
3
3
  """
4
4
 
@@ -11,6 +11,8 @@ from typing import Dict, List, Optional, Sequence, Tuple
11
11
 
12
12
  import colorful
13
13
 
14
+ from lattifai.utils import safe_print
15
+
14
16
  try:
15
17
  import questionary
16
18
  except ImportError: # pragma: no cover - optional dependency
@@ -110,7 +112,7 @@ class FileExistenceManager:
110
112
  return "proceed" # No existing files, proceed normally
111
113
 
112
114
  # Header with warning color
113
- print(f'\n{colorful.bold_yellow("⚠️ Existing files found:")}')
115
+ safe_print(f'\n{colorful.bold_yellow("⚠️ Existing files found:")}')
114
116
 
115
117
  # Collect file paths for options
116
118
  file_paths = []
@@ -157,15 +159,15 @@ class FileExistenceManager:
157
159
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=default_value)
158
160
 
159
161
  if choice == "overwrite":
160
- print(f'{colorful.yellow("🔄 Overwriting existing files")}')
162
+ safe_print(f'{colorful.yellow("🔄 Overwriting existing files")}')
161
163
  elif choice == TRANSCRIBE_CHOICE:
162
164
  print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
163
165
  elif choice == "cancel":
164
- print(f'{colorful.red("❌ Operation cancelled")}')
166
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
165
167
  elif choice in file_paths:
166
- print(f'{colorful.green(f"✅ Using selected file: {choice}")}')
168
+ safe_print(f'{colorful.green(f"✅ Using selected file: {choice}")}')
167
169
  else:
168
- print(f'{colorful.green("✅ Using existing files")}')
170
+ safe_print(f'{colorful.green("✅ Using existing files")}')
169
171
 
170
172
  return choice
171
173
 
@@ -189,7 +191,7 @@ class FileExistenceManager:
189
191
  del emoji # Unused variable
190
192
 
191
193
  # Header with warning color
192
- print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
194
+ safe_print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
193
195
 
194
196
  for file_path in sorted(files):
195
197
  print(f' {colorful.green("•")} {file_path}')
@@ -203,11 +205,11 @@ class FileExistenceManager:
203
205
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
204
206
 
205
207
  if choice == "use":
206
- print(f'{colorful.green(f"✅ Using existing {label} files")}')
208
+ safe_print(f'{colorful.green(f"✅ Using existing {label} files")}')
207
209
  elif choice == "overwrite":
208
- print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
210
+ safe_print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
209
211
  elif choice == "cancel":
210
- print(f'{colorful.red("❌ Operation cancelled")}')
212
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
211
213
 
212
214
  return choice
213
215
 
@@ -245,7 +247,7 @@ class FileExistenceManager:
245
247
  )
246
248
 
247
249
  # Multiple files: let user choose which one
248
- print(f'\n{colorful.bold_yellow(f"⚠️ Multiple {file_type} files found:")}')
250
+ safe_print(f'\n{colorful.bold_yellow(f"⚠️ Multiple {file_type} files found:")}')
249
251
 
250
252
  # Create options with full file paths
251
253
  options = []
@@ -266,7 +268,7 @@ class FileExistenceManager:
266
268
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=files[0])
267
269
 
268
270
  if choice == "cancel":
269
- print(f'{colorful.red("❌ Operation cancelled")}')
271
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
270
272
  elif choice == "overwrite":
271
273
  overwrite_msg = f"🔄 Overwriting all {file_type} files"
272
274
  print(f"{colorful.yellow(overwrite_msg)}")
@@ -274,7 +276,7 @@ class FileExistenceManager:
274
276
  transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
275
277
  print(f"{colorful.magenta(transcribe_msg)}")
276
278
  else:
277
- print(f'{colorful.green(f"✅ Using: {choice}")}')
279
+ safe_print(f'{colorful.green(f"✅ Using: {choice}")}')
278
280
 
279
281
  return choice
280
282
 
@@ -407,6 +407,11 @@ class YouTubeDownloader:
407
407
  output_template,
408
408
  "--sub-format",
409
409
  "best", # Prefer best available format
410
+ "--no-warnings", # Suppress warnings for cleaner output
411
+ "--extractor-retries",
412
+ "3", # Retry on errors
413
+ "--sleep-requests",
414
+ "1", # Sleep between requests to avoid rate limiting
410
415
  ]
411
416
 
412
417
  # Add caption language selection if specified
@@ -425,7 +430,8 @@ class YouTubeDownloader:
425
430
  None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
426
431
  )
427
432
 
428
- self.logger.info(f"yt-dlp transcript output: {result.stdout.strip()}")
433
+ # Only log success message, not full yt-dlp output
434
+ self.logger.debug(f"yt-dlp output: {result.stdout.strip()}")
429
435
 
430
436
  # Find the downloaded transcript file
431
437
  caption_patterns = [
@@ -481,9 +487,18 @@ class YouTubeDownloader:
481
487
 
482
488
  except subprocess.CalledProcessError as e:
483
489
  error_msg = e.stderr.strip() if e.stderr else str(e)
490
+
491
+ # Check for specific error conditions
484
492
  if "No automatic or manual captions found" in error_msg:
485
493
  self.logger.warning("No captions available for this video")
486
494
  return None
495
+ elif "HTTP Error 429" in error_msg or "Too Many Requests" in error_msg:
496
+ self.logger.error("YouTube rate limit exceeded. Please try again later or use a different method.")
497
+ raise RuntimeError(
498
+ "YouTube rate limit exceeded (HTTP 429). "
499
+ "Try again later or use --cookies option with authenticated cookies. "
500
+ "See: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"
501
+ )
487
502
  else:
488
503
  self.logger.error(f"Failed to download transcript: {error_msg}")
489
504
  raise RuntimeError(f"Failed to download transcript: {error_msg}")
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 1.0.4
3
+ Version: 1.0.5
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
7
7
  License: MIT License
8
8
 
9
- Copyright (c) 2025 Lattifai.
9
+ Copyright (c) 2025 LattifAI.
10
10
 
11
11
  Permission is hereby granted, free of charge, to any person obtaining a copy
12
12
  of this software and associated documentation files (the "Software"), to deal
@@ -27,10 +27,10 @@ License: MIT License
27
27
  SOFTWARE.
28
28
 
29
29
  Project-URL: Homepage, https://github.com/lattifai/lattifai-python
30
- Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
30
+ Project-URL: Documentation, https://github.com/lattifai/lattifai-python/blob/main/README.md
31
31
  Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
32
32
  Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
33
- Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
33
+ Project-URL: Changelog, https://github.com/lattifai/lattifai-python/blob/main/CHANGELOG.md
34
34
  Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
35
35
  Classifier: Development Status :: 5 - Production/Stable
36
36
  Classifier: Intended Audience :: Developers
@@ -50,7 +50,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
50
50
  Requires-Python: <3.15,>=3.10
51
51
  Description-Content-Type: text/markdown
52
52
  License-File: LICENSE
53
- Requires-Dist: lattifai-core>=0.4.6
53
+ Requires-Dist: lattifai-core>=0.5.1
54
54
  Requires-Dist: lattifai-run>=1.0.1
55
55
  Requires-Dist: python-dotenv
56
56
  Requires-Dist: lhotse>=1.26.0
@@ -63,10 +63,9 @@ Requires-Dist: onnxruntime
63
63
  Requires-Dist: msgpack
64
64
  Requires-Dist: g2p-phonemizer>=0.4.0
65
65
  Requires-Dist: av
66
- Requires-Dist: wtpsplit>=2.1.6
67
- Requires-Dist: kaldi-native-fbank
68
- Requires-Dist: OmniSenseVoice>=0.4.0
69
- Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc1
66
+ Requires-Dist: wtpsplit>=2.1.7
67
+ Requires-Dist: OmniSenseVoice>=0.4.2
68
+ Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3
70
69
  Requires-Dist: pyannote-audio-notorchdeps>=4.0.2
71
70
  Requires-Dist: questionary>=2.0
72
71
  Requires-Dist: yt-dlp
@@ -83,7 +82,7 @@ Requires-Dist: torch-audiomentations==0.12.0; extra == "diarization"
83
82
  Requires-Dist: pyannote.audio>=4.0.2; extra == "diarization"
84
83
  Provides-Extra: transcription
85
84
  Requires-Dist: OmniSenseVoice>=0.4.0; extra == "transcription"
86
- Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc1; extra == "transcription"
85
+ Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3; extra == "transcription"
87
86
  Provides-Extra: test
88
87
  Requires-Dist: pytest; extra == "test"
89
88
  Requires-Dist: pytest-cov; extra == "test"
@@ -116,7 +115,6 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
116
115
 
117
116
  > **⚠️ Note on Current Limitations**:
118
117
  > 1. **Memory Usage**: We are aware of high memory consumption and are actively working on further optimizations.
119
- > 2. **Long Audio**: Currently, long audio files might face issues. Support for **long-form audio (up to 20 hours)** will be available shortly.
120
118
 
121
119
  ## Table of Contents
122
120
 
@@ -464,6 +462,8 @@ lai transcribe align \
464
462
 
465
463
  ```bash
466
464
  lai caption convert input.srt output.vtt
465
+ lai caption convert input.srt output.json
466
+ # Enable normalization to clean HTML entities and special characters:
467
467
  lai caption convert input.srt output.json normalize_text=true
468
468
  ```
469
469
 
@@ -554,10 +554,10 @@ from lattifai import LattifAI, CaptionConfig
554
554
 
555
555
  client = LattifAI(
556
556
  caption_config=CaptionConfig(
557
- split_sentence=True, # Smart sentence splitting
558
- word_level=True, # Word-level timestamps
559
- normalize_text=True, # Clean HTML entities
560
- include_speaker_in_text=False, # Include speaker labels
557
+ split_sentence=True, # Smart sentence splitting (default: False)
558
+ word_level=True, # Word-level timestamps (default: False)
559
+ normalize_text=True, # Clean HTML entities (default: True)
560
+ include_speaker_in_text=False, # Include speaker labels (default: True)
561
561
  )
562
562
  )
563
563
  ```
@@ -619,6 +619,56 @@ from lattifai import (
619
619
 
620
620
  ## Advanced Features
621
621
 
622
+ ### Long-Form Audio Support
623
+
624
+ LattifAI now supports processing long audio files (up to 20 hours) through streaming mode. Enable streaming by setting the `streaming_chunk_secs` parameter:
625
+
626
+ **Python SDK:**
627
+ ```python
628
+ from lattifai import LattifAI
629
+
630
+ client = LattifAI()
631
+
632
+ # Enable streaming for long audio files
633
+ caption = client.alignment(
634
+ input_media="long_audio.wav",
635
+ input_caption="subtitle.srt",
636
+ output_caption_path="output.srt",
637
+ streaming_chunk_secs=600.0, # Process in 30-second chunks
638
+ )
639
+ ```
640
+
641
+ **CLI:**
642
+ ```bash
643
+ # Enable streaming with chunk size
644
+ lai alignment align long_audio.wav subtitle.srt output.srt \
645
+ media.streaming_chunk_secs=300.0
646
+
647
+ # For YouTube videos
648
+ lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID" \
649
+ media.streaming_chunk_secs=300.0
650
+ ```
651
+
652
+ **MediaConfig:**
653
+ ```python
654
+ from lattifai import LattifAI, MediaConfig
655
+
656
+ client = LattifAI(
657
+ media_config=MediaConfig(
658
+ streaming_chunk_secs=600.0, # Chunk duration in seconds (1-1800), default: 600 (10 minutes)
659
+ )
660
+ )
661
+ ```
662
+
663
+ **Notes:**
664
+ - Chunk duration must be between 1 and 1800 seconds (minimum 1 second, maximum 30 minutes)
665
+ - Default value: 600 seconds (10 minutes)
666
+ - **Recommended: Use 60 seconds or larger for optimal performance**
667
+ - Set to `None` to disable streaming
668
+ - **Thanks to our precise implementation, streaming has virtually no impact on alignment accuracy**
669
+ - Smaller chunks reduce memory usage with minimal quality trade-off
670
+ - Recommended chunk size: 300-900 seconds (5-15 minutes) for optimal balance
671
+
622
672
  ### Word-Level Alignment
623
673
 
624
674
  Enable `word_level=True` to get precise timestamps for each word:
@@ -1,40 +1,40 @@
1
- lattifai/__init__.py,sha256=K46XVINrXgjGehO8uXByTIbUnBCdB7QwsvVNWzKbdeU,2364
2
- lattifai/audio2.py,sha256=WPAhcaEoIMRQBf2QZe-0yyAbgyyiqUVAthJ-z54R9Wc,7761
3
- lattifai/client.py,sha256=Wkz7Q1XvCQ9KxD0uZ_M1ix457ZbgIG1gAxxt8nMBUj4,22147
4
- lattifai/errors.py,sha256=dFQ_7c8rwuHrq2pDPjpzA755tAV3t8daXXFbHmWblbs,11015
1
+ lattifai/__init__.py,sha256=7y1R5IGw0Sgvl1tfqxEK7e-ozW0wVB-q_JZgv6YyrMQ,2751
2
+ lattifai/audio2.py,sha256=BKMCzkuEmBFAWOEnzgLxeK8TBPTFbjzr1esOfe3MQoo,17460
3
+ lattifai/client.py,sha256=OXDGsWVeOMEjmXI795pvnK3L-ZLn_sfUwG0i7uJ1JkY,22492
4
+ lattifai/errors.py,sha256=LyWRGVhQ6Ak2CYn9FBYAPRgQ7_VHpxzNsXI31HXD--s,11291
5
5
  lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
6
- lattifai/mixin.py,sha256=2QnWMG2E_cyZaa98Wtdf6Duo8w3DTIiD-EEfrmtDhGk,23388
6
+ lattifai/mixin.py,sha256=yj3H1SSQSQrhUeqKhQmRRELRr5fp2mb2ovkK9p8Vwn4,23858
7
7
  lattifai/types.py,sha256=SjYBfwrCBOXlICvH04niFQJ7OzTx7oTaa_npfRkB67U,659
8
- lattifai/utils.py,sha256=TqOPrd_Et7KxrbfI_JbBNIGZ5-oGJY8ZUyJMPDTih1I,3848
8
+ lattifai/utils.py,sha256=ZYEUaoTBCwzv4PBBD-woeiDSTx8T1a1vXHIT0g1YmRI,5345
9
9
  lattifai/alignment/__init__.py,sha256=ehpkKfjNIYUx7_M-RWD_8Efcrzd9bE-NSm0QgMMVLW0,178
10
- lattifai/alignment/lattice1_aligner.py,sha256=soBRZ98jRIju-wN5eqYUmQfF56KiEUxVGw0UvtRcx4A,4464
11
- lattifai/alignment/lattice1_worker.py,sha256=XGICEzLygspqC7SMj5s45M7VNSj-l2zemkawdKKjssw,9233
10
+ lattifai/alignment/lattice1_aligner.py,sha256=DpN_it7ETZgz6uH3I90Y926bvjhFRdL6dycxz5S_tkI,5142
11
+ lattifai/alignment/lattice1_worker.py,sha256=1yYK_xLOL_xHZTVGgNb957R7HhHnl6xwrXUcN372ZIY,12407
12
12
  lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
13
- lattifai/alignment/segmenter.py,sha256=-FKtIwv9Z4fU9Fs08jhL9VyREVSYcfcwuTqb8jxCiuo,6228
14
- lattifai/alignment/tokenizer.py,sha256=WilqU9Ecdkl_cW86IkB1mh_PFlHN-35Jsreiyse2r-8,22355
13
+ lattifai/alignment/segmenter.py,sha256=mzWEQC6hWZtI2mR2WU59W7qLHa7KXy7fdU6991kyUuQ,6276
14
+ lattifai/alignment/tokenizer.py,sha256=oqgy5L9wU0_AMyUVNArEtPIDXm7WdvNNfJuB2ZJBpqI,22394
15
15
  lattifai/caption/__init__.py,sha256=6MM_2j6CaqwZ81LfSy4di2EP0ykvheRjMZKAYDx2rQs,477
16
- lattifai/caption/caption.py,sha256=NNkBJbSdfXe4CwlCvMplrd4UOxlZyxq5Cs5g-dReB1E,46974
16
+ lattifai/caption/caption.py,sha256=Ljt-6K89AauIK05hdDqjV6G03mkTTJL2UE9ukt-tck0,52502
17
17
  lattifai/caption/gemini_reader.py,sha256=GqY2w78xGYCMDP5kD5WGS8jK0gntel2SK-EPpPKTrwU,15138
18
18
  lattifai/caption/gemini_writer.py,sha256=sYPxYEmVQcEan5WVGgSrcraxs3QJRQRh8CJkl2yUQ1s,6515
19
19
  lattifai/caption/supervision.py,sha256=DRrM8lfKU_x9aVBcLG6xnT0xIJrnc8jzHpzcSwQOg8c,905
20
20
  lattifai/caption/text_parser.py,sha256=XDb8KTt031uJ1hg6dpbINglGOTX-6pBcghbg3DULM1I,4633
21
21
  lattifai/cli/__init__.py,sha256=dIUmrpN-OwR4h6BqMhXp87_5ZwgO41ShPru_iZGnpQs,463
22
- lattifai/cli/alignment.py,sha256=uKMTE95_JMikfbyCcwLbQxms-EQmZXEj7oYugiupk9I,5890
23
- lattifai/cli/app_installer.py,sha256=0xBQnJZKhyx4JT_PkHXkZ0XlAWxCGz9o0Jjq99poKew,5680
24
- lattifai/cli/caption.py,sha256=ucgYxJ43ab71nGpZBAiVn8QA0DAVht2QMZFE5IdgxP0,6853
25
- lattifai/cli/server.py,sha256=Vo6_ANgwu7WtC5h4BebQLmhqLNpqzPoYrPQPANpP7rw,1142
26
- lattifai/cli/transcribe.py,sha256=6uJfvtB1o_u1uQwxt4fje_koyfN93mGaFLlskmjqx2c,7406
27
- lattifai/cli/youtube.py,sha256=9_erdIkhX8pCiy7BRzNstEiO9saM-VKZ1WVqvbXbmrc,5267
22
+ lattifai/cli/alignment.py,sha256=06em-Uaf6NhSz1ce4dwT2r8n56NrtibR7ZsSkmc18Kc,5954
23
+ lattifai/cli/app_installer.py,sha256=gAndH3Yo97fGRDe2CQnGtOgZZ4k3_v5ftcUo5g6xbSA,5884
24
+ lattifai/cli/caption.py,sha256=p0VY6orf3D77tr30NQka7A84kwEmYiZrCDB6FbTgoFM,6312
25
+ lattifai/cli/server.py,sha256=sXMfOSse9-V79slXUU8FDLeqtI5U9zeU-5YpjTIGyVw,1186
26
+ lattifai/cli/transcribe.py,sha256=W42SVhnOQ0EndMk-Lu38BiG1LuMcJnzre9X83M6kBZ4,8137
27
+ lattifai/cli/youtube.py,sha256=-EIDSS1Iel3_6qD9M2CZZHwKOvgdkIa1cMY4rX7xwVo,5331
28
28
  lattifai/config/__init__.py,sha256=Z8OudvS6fgfLNLu_2fvoXartQiYCECOnNfzDt-PfCN4,543
29
- lattifai/config/alignment.py,sha256=z0b9tg67ftDI90j9Td3qmXFg3WZjSbgszjzeTdwrjZA,3291
30
- lattifai/config/caption.py,sha256=nmfdsJ-18l4UmapdVgxF1ARJbA4aOr7jek1bmse2F_E,6787
29
+ lattifai/config/alignment.py,sha256=v6SuryAVNET9hgH_ZidYN2QhZqpEDnNhR-rogSSSfAg,4039
30
+ lattifai/config/caption.py,sha256=AYOyUJ1xZsX8CBZy3GpLitbcCAHcZ9LwXui_v3vtuso,6786
31
31
  lattifai/config/client.py,sha256=I1JqLQlsQNU5ouovTumr-PP_8GWC9DI_e9B5UwsDZws,1492
32
32
  lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
33
- lattifai/config/media.py,sha256=5JOPjifXDM2WWQERySDZen4-7YfgQNcYM2NkkKp0LjQ,13610
33
+ lattifai/config/media.py,sha256=cjM8eGeZ7ELhmy4cCqHAyogeHItaVqMrPzSwwIx79HY,14856
34
34
  lattifai/config/transcription.py,sha256=bzghOGgcNWzTnDYd_cqCOB7GT8OnzHDiyam7LSixqxM,2901
35
35
  lattifai/diarization/__init__.py,sha256=MgBDQ1ehL2qDnZprEp8KqON7CmbG-qaP37gzBsV0jzk,119
36
36
  lattifai/diarization/lattifai.py,sha256=SE2BpIZ3_deKyhXdBqe77bsDLXIUV9AQV34gfINv7_s,2657
37
- lattifai/server/app.py,sha256=UpHsKJHtK1-sdp5mtDPBSxEl8xYTbe7cVO8dLp9Xiuo,15380
37
+ lattifai/server/app.py,sha256=wXYgXc_yGQACtUJdhkfhLsTOQjhhIhDQRiVRny7Ogcs,15455
38
38
  lattifai/transcription/__init__.py,sha256=mEoMTbs5jAgtXQn1jTjlFY_GUr-S0WmPn8uZ6WZCkU0,2643
39
39
  lattifai/transcription/base.py,sha256=59b4nQHFMyTRyyzBJTM8ZpEuUy1KjwA2o6rNfrNluKY,3911
40
40
  lattifai/transcription/gemini.py,sha256=1VNi9gl-Kpkw3ljZcOZG5oq_OY8fMC9Xv4kOwyQpI0Q,7992
@@ -46,11 +46,11 @@ lattifai/transcription/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnK
46
46
  lattifai/workflow/__init__.py,sha256=GOT9jptXwpIMiNRqJ_LToEt_5Dt0k7XXbLkFzhrl31o,548
47
47
  lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,38
48
48
  lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
49
- lattifai/workflow/file_manager.py,sha256=d106KHLY8A9amLy5h1vR32e4od8mmJGqMD-iDyiRPLI,32917
50
- lattifai/workflow/youtube.py,sha256=n8L1c6tl8FuYzAzKZ-B76zf5yZsvVggZEJ9mPdbEWGQ,22989
51
- lattifai-1.0.4.dist-info/licenses/LICENSE,sha256=_IkHdwOWLAWcE1M_tIpDoRWdNSJwFdtIqI-XSkK3yPU,1066
52
- lattifai-1.0.4.dist-info/METADATA,sha256=uijheuHzvh-AhZwIFCGTc2_UX-x3ZXxhnlPsvcaMQ4c,24582
53
- lattifai-1.0.4.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
54
- lattifai-1.0.4.dist-info/entry_points.txt,sha256=F8Akof3VtKtrbnYSav1umgoo9Xbv34rUcKn-ioRfeGQ,474
55
- lattifai-1.0.4.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
56
- lattifai-1.0.4.dist-info/RECORD,,
49
+ lattifai/workflow/file_manager.py,sha256=IUWW838ta83kfwM4gpW83gsD_Tx-pa-L_RWKjiefQbQ,33017
50
+ lattifai/workflow/youtube.py,sha256=ON9z0UUk16ThQzdhdgyOiwBmewZOcxfT05dsl3aKYqw,23840
51
+ lattifai-1.0.5.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
52
+ lattifai-1.0.5.dist-info/METADATA,sha256=cTg6ivcixFAv-464qk0R2v19LdEgGkETcNvRzycFSKk,26117
53
+ lattifai-1.0.5.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
54
+ lattifai-1.0.5.dist-info/entry_points.txt,sha256=F8Akof3VtKtrbnYSav1umgoo9Xbv34rUcKn-ioRfeGQ,474
55
+ lattifai-1.0.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
56
+ lattifai-1.0.5.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Lattifai.
3
+ Copyright (c) 2025 LattifAI.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal