lattifai 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lattifai/__init__.py +10 -0
  2. lattifai/alignment/lattice1_aligner.py +64 -15
  3. lattifai/alignment/lattice1_worker.py +135 -50
  4. lattifai/alignment/segmenter.py +3 -2
  5. lattifai/alignment/tokenizer.py +14 -13
  6. lattifai/audio2.py +269 -70
  7. lattifai/caption/caption.py +213 -19
  8. lattifai/cli/__init__.py +2 -0
  9. lattifai/cli/alignment.py +2 -1
  10. lattifai/cli/app_installer.py +35 -33
  11. lattifai/cli/caption.py +9 -19
  12. lattifai/cli/diarization.py +108 -0
  13. lattifai/cli/server.py +3 -1
  14. lattifai/cli/transcribe.py +55 -38
  15. lattifai/cli/youtube.py +1 -0
  16. lattifai/client.py +42 -121
  17. lattifai/config/alignment.py +37 -2
  18. lattifai/config/caption.py +1 -1
  19. lattifai/config/media.py +23 -3
  20. lattifai/config/transcription.py +4 -0
  21. lattifai/diarization/lattifai.py +18 -7
  22. lattifai/errors.py +7 -3
  23. lattifai/mixin.py +45 -16
  24. lattifai/server/app.py +2 -1
  25. lattifai/transcription/__init__.py +1 -1
  26. lattifai/transcription/base.py +21 -2
  27. lattifai/transcription/gemini.py +127 -1
  28. lattifai/transcription/lattifai.py +30 -2
  29. lattifai/utils.py +96 -28
  30. lattifai/workflow/file_manager.py +15 -13
  31. lattifai/workflow/youtube.py +16 -1
  32. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/METADATA +86 -22
  33. lattifai-1.1.0.dist-info/RECORD +57 -0
  34. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/entry_points.txt +2 -0
  35. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/licenses/LICENSE +1 -1
  36. lattifai-1.0.4.dist-info/RECORD +0 -56
  37. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/WHEEL +0 -0
  38. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/top_level.txt +0 -0
lattifai/utils.py CHANGED
@@ -1,13 +1,49 @@
1
1
  """Shared utility helpers for the LattifAI SDK."""
2
2
 
3
- import os
3
+ import sys
4
4
  from datetime import datetime, timedelta
5
5
  from pathlib import Path
6
- from typing import Any, Optional, Type
6
+ from typing import Optional
7
7
 
8
8
  from lattifai.errors import ModelLoadError
9
9
 
10
10
 
11
+ def safe_print(text: str, **kwargs) -> None:
12
+ """
13
+ Safely print text with Unicode characters, handling Windows encoding issues.
14
+
15
+ On Windows, the default console encoding (cp1252) can't handle many Unicode
16
+ characters like emojis. This function ensures text is printed correctly by
17
+ using UTF-8 encoding when necessary.
18
+
19
+ Args:
20
+ text: The text to print, may contain Unicode/emoji characters
21
+ **kwargs: Additional arguments passed to print()
22
+ """
23
+ try:
24
+ print(text, **kwargs)
25
+ except UnicodeEncodeError:
26
+ # On Windows, try to reconfigure stdout to use UTF-8
27
+ if sys.platform == "win32":
28
+ try:
29
+ # Try to encode with UTF-8 and print
30
+ if hasattr(sys.stdout, "buffer"):
31
+ sys.stdout.buffer.write((text + "\n").encode("utf-8"))
32
+ sys.stdout.flush()
33
+ else:
34
+ # Fallback: replace problematic characters
35
+ print(text.encode(sys.stdout.encoding, errors="replace").decode(sys.stdout.encoding), **kwargs)
36
+ except Exception:
37
+ # Last resort: remove emojis
38
+ import re
39
+
40
+ text_no_emoji = re.sub(r"[^\x00-\x7F\u4e00-\u9fff]+", "", text)
41
+ print(text_no_emoji, **kwargs)
42
+ else:
43
+ # Non-Windows: this shouldn't happen, but fallback gracefully
44
+ print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
45
+
46
+
11
47
  def _get_cache_marker_path(cache_dir: Path) -> Path:
12
48
  """Get the path for the cache marker file with current date."""
13
49
  today = datetime.now().strftime("%Y%m%d")
@@ -51,42 +87,74 @@ def _create_cache_marker(cache_dir: Path) -> None:
51
87
  marker_path.touch()
52
88
 
53
89
 
54
- def _resolve_model_path(model_name_or_path: str) -> str:
55
- """Resolve model path, downloading from Hugging Face when necessary."""
90
+ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface") -> str:
91
+ """Resolve model path, downloading from the specified model hub when necessary.
92
+
93
+ Args:
94
+ model_name_or_path: Local path or remote model identifier.
95
+ model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
96
+ """
56
97
  if Path(model_name_or_path).expanduser().exists():
57
98
  return str(Path(model_name_or_path).expanduser())
58
99
 
59
- from huggingface_hub import snapshot_download
60
- from huggingface_hub.constants import HF_HUB_CACHE
61
- from huggingface_hub.errors import LocalEntryNotFoundError
100
+ # Normalize hub name
101
+ hub = (model_hub or "huggingface").lower()
62
102
 
63
- # Determine cache directory for this model
64
- cache_dir = Path(HF_HUB_CACHE) / f'models--{model_name_or_path.replace("/", "--")}'
103
+ if hub not in ("huggingface", "modelscope"):
104
+ raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
105
+
106
+ # If local path exists, return it regardless of hub
107
+ if Path(model_name_or_path).expanduser().exists():
108
+ return str(Path(model_name_or_path).expanduser())
65
109
 
66
- # Check if we have a valid cached version
67
- if _is_cache_valid(cache_dir):
68
- # Return the snapshot path (latest version)
69
- snapshots_dir = cache_dir / "snapshots"
70
- if snapshots_dir.exists():
71
- snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
72
- if snapshot_dirs:
73
- # Return the most recent snapshot
74
- latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
75
- return str(latest_snapshot)
110
+ if hub == "huggingface":
111
+ from huggingface_hub import snapshot_download
112
+ from huggingface_hub.constants import HF_HUB_CACHE
113
+ from huggingface_hub.errors import LocalEntryNotFoundError
114
+
115
+ # Determine cache directory for this model
116
+ cache_dir = Path(HF_HUB_CACHE) / f'models--{model_name_or_path.replace("/", "--")}'
117
+
118
+ # Check if we have a valid cached version
119
+ if _is_cache_valid(cache_dir):
120
+ # Return the snapshot path (latest version)
121
+ snapshots_dir = cache_dir / "snapshots"
122
+ if snapshots_dir.exists():
123
+ snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
124
+ if snapshot_dirs:
125
+ # Return the most recent snapshot
126
+ latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
127
+ return str(latest_snapshot)
76
128
 
77
- try:
78
- downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
79
- _create_cache_marker(cache_dir)
80
- return downloaded_path
81
- except LocalEntryNotFoundError:
82
129
  try:
83
- os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
84
130
  downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
85
131
  _create_cache_marker(cache_dir)
86
132
  return downloaded_path
87
- except Exception as e: # pragma: no cover - bubble up for caller context
88
- raise ModelLoadError(model_name_or_path, original_error=e)
89
- except Exception as e: # pragma: no cover - unexpected download issue
133
+ except LocalEntryNotFoundError:
134
+ # Fall back to modelscope if HF entry not found
135
+ try:
136
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
137
+
138
+ downloaded_path = ms_snapshot(model_name_or_path)
139
+ return downloaded_path
140
+ except Exception as e: # pragma: no cover - bubble up for caller context
141
+ raise ModelLoadError(model_name_or_path, original_error=e)
142
+ except Exception as e: # pragma: no cover - unexpected download issue
143
+ import colorful
144
+
145
+ print(colorful.red | f"Error downloading from Hugging Face Hub: {e}. Trying ModelScope...")
146
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
147
+
148
+ downloaded_path = ms_snapshot(model_name_or_path)
149
+ return downloaded_path
150
+
151
+ # modelscope path
152
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
153
+
154
+ try:
155
+ downloaded_path = ms_snapshot(model_name_or_path)
156
+ return downloaded_path
157
+ except Exception as e: # pragma: no cover
90
158
  raise ModelLoadError(model_name_or_path, original_error=e)
91
159
 
92
160
 
@@ -1,4 +1,4 @@
1
- """
1
+ """.
2
2
  File existence management utilities for video processing workflows
3
3
  """
4
4
 
@@ -11,6 +11,8 @@ from typing import Dict, List, Optional, Sequence, Tuple
11
11
 
12
12
  import colorful
13
13
 
14
+ from lattifai.utils import safe_print
15
+
14
16
  try:
15
17
  import questionary
16
18
  except ImportError: # pragma: no cover - optional dependency
@@ -110,7 +112,7 @@ class FileExistenceManager:
110
112
  return "proceed" # No existing files, proceed normally
111
113
 
112
114
  # Header with warning color
113
- print(f'\n{colorful.bold_yellow("⚠️ Existing files found:")}')
115
+ safe_print(f'\n{colorful.bold_yellow("⚠️ Existing files found:")}')
114
116
 
115
117
  # Collect file paths for options
116
118
  file_paths = []
@@ -157,15 +159,15 @@ class FileExistenceManager:
157
159
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=default_value)
158
160
 
159
161
  if choice == "overwrite":
160
- print(f'{colorful.yellow("🔄 Overwriting existing files")}')
162
+ safe_print(f'{colorful.yellow("🔄 Overwriting existing files")}')
161
163
  elif choice == TRANSCRIBE_CHOICE:
162
164
  print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
163
165
  elif choice == "cancel":
164
- print(f'{colorful.red("❌ Operation cancelled")}')
166
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
165
167
  elif choice in file_paths:
166
- print(f'{colorful.green(f"✅ Using selected file: {choice}")}')
168
+ safe_print(f'{colorful.green(f"✅ Using selected file: {choice}")}')
167
169
  else:
168
- print(f'{colorful.green("✅ Using existing files")}')
170
+ safe_print(f'{colorful.green("✅ Using existing files")}')
169
171
 
170
172
  return choice
171
173
 
@@ -189,7 +191,7 @@ class FileExistenceManager:
189
191
  del emoji # Unused variable
190
192
 
191
193
  # Header with warning color
192
- print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
194
+ safe_print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
193
195
 
194
196
  for file_path in sorted(files):
195
197
  print(f' {colorful.green("•")} {file_path}')
@@ -203,11 +205,11 @@ class FileExistenceManager:
203
205
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
204
206
 
205
207
  if choice == "use":
206
- print(f'{colorful.green(f"✅ Using existing {label} files")}')
208
+ safe_print(f'{colorful.green(f"✅ Using existing {label} files")}')
207
209
  elif choice == "overwrite":
208
- print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
210
+ safe_print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
209
211
  elif choice == "cancel":
210
- print(f'{colorful.red("❌ Operation cancelled")}')
212
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
211
213
 
212
214
  return choice
213
215
 
@@ -245,7 +247,7 @@ class FileExistenceManager:
245
247
  )
246
248
 
247
249
  # Multiple files: let user choose which one
248
- print(f'\n{colorful.bold_yellow(f"⚠️ Multiple {file_type} files found:")}')
250
+ safe_print(f'\n{colorful.bold_yellow(f"⚠️ Multiple {file_type} files found:")}')
249
251
 
250
252
  # Create options with full file paths
251
253
  options = []
@@ -266,7 +268,7 @@ class FileExistenceManager:
266
268
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default=files[0])
267
269
 
268
270
  if choice == "cancel":
269
- print(f'{colorful.red("❌ Operation cancelled")}')
271
+ safe_print(f'{colorful.red("❌ Operation cancelled")}')
270
272
  elif choice == "overwrite":
271
273
  overwrite_msg = f"🔄 Overwriting all {file_type} files"
272
274
  print(f"{colorful.yellow(overwrite_msg)}")
@@ -274,7 +276,7 @@ class FileExistenceManager:
274
276
  transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
275
277
  print(f"{colorful.magenta(transcribe_msg)}")
276
278
  else:
277
- print(f'{colorful.green(f"✅ Using: {choice}")}')
279
+ safe_print(f'{colorful.green(f"✅ Using: {choice}")}')
278
280
 
279
281
  return choice
280
282
 
@@ -407,6 +407,11 @@ class YouTubeDownloader:
407
407
  output_template,
408
408
  "--sub-format",
409
409
  "best", # Prefer best available format
410
+ "--no-warnings", # Suppress warnings for cleaner output
411
+ "--extractor-retries",
412
+ "3", # Retry on errors
413
+ "--sleep-requests",
414
+ "1", # Sleep between requests to avoid rate limiting
410
415
  ]
411
416
 
412
417
  # Add caption language selection if specified
@@ -425,7 +430,8 @@ class YouTubeDownloader:
425
430
  None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
426
431
  )
427
432
 
428
- self.logger.info(f"yt-dlp transcript output: {result.stdout.strip()}")
433
+ # Only log success message, not full yt-dlp output
434
+ self.logger.debug(f"yt-dlp output: {result.stdout.strip()}")
429
435
 
430
436
  # Find the downloaded transcript file
431
437
  caption_patterns = [
@@ -481,9 +487,18 @@ class YouTubeDownloader:
481
487
 
482
488
  except subprocess.CalledProcessError as e:
483
489
  error_msg = e.stderr.strip() if e.stderr else str(e)
490
+
491
+ # Check for specific error conditions
484
492
  if "No automatic or manual captions found" in error_msg:
485
493
  self.logger.warning("No captions available for this video")
486
494
  return None
495
+ elif "HTTP Error 429" in error_msg or "Too Many Requests" in error_msg:
496
+ self.logger.error("YouTube rate limit exceeded. Please try again later or use a different method.")
497
+ raise RuntimeError(
498
+ "YouTube rate limit exceeded (HTTP 429). "
499
+ "Try again later or use --cookies option with authenticated cookies. "
500
+ "See: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"
501
+ )
487
502
  else:
488
503
  self.logger.error(f"Failed to download transcript: {error_msg}")
489
504
  raise RuntimeError(f"Failed to download transcript: {error_msg}")
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 1.0.4
3
+ Version: 1.1.0
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
7
7
  License: MIT License
8
8
 
9
- Copyright (c) 2025 Lattifai.
9
+ Copyright (c) 2025 LattifAI.
10
10
 
11
11
  Permission is hereby granted, free of charge, to any person obtaining a copy
12
12
  of this software and associated documentation files (the "Software"), to deal
@@ -27,10 +27,10 @@ License: MIT License
27
27
  SOFTWARE.
28
28
 
29
29
  Project-URL: Homepage, https://github.com/lattifai/lattifai-python
30
- Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
30
+ Project-URL: Documentation, https://github.com/lattifai/lattifai-python/blob/main/README.md
31
31
  Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
32
32
  Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
33
- Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
33
+ Project-URL: Changelog, https://github.com/lattifai/lattifai-python/blob/main/CHANGELOG.md
34
34
  Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
35
35
  Classifier: Development Status :: 5 - Production/Stable
36
36
  Classifier: Intended Audience :: Developers
@@ -50,7 +50,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
50
50
  Requires-Python: <3.15,>=3.10
51
51
  Description-Content-Type: text/markdown
52
52
  License-File: LICENSE
53
- Requires-Dist: lattifai-core>=0.4.6
53
+ Requires-Dist: lattifai-core>=0.6.0
54
54
  Requires-Dist: lattifai-run>=1.0.1
55
55
  Requires-Dist: python-dotenv
56
56
  Requires-Dist: lhotse>=1.26.0
@@ -61,12 +61,12 @@ Requires-Dist: tgt
61
61
  Requires-Dist: onnx>=1.16.0
62
62
  Requires-Dist: onnxruntime
63
63
  Requires-Dist: msgpack
64
+ Requires-Dist: scipy!=1.16.3
64
65
  Requires-Dist: g2p-phonemizer>=0.4.0
65
66
  Requires-Dist: av
66
- Requires-Dist: wtpsplit>=2.1.6
67
- Requires-Dist: kaldi-native-fbank
68
- Requires-Dist: OmniSenseVoice>=0.4.0
69
- Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc1
67
+ Requires-Dist: wtpsplit>=2.1.7
68
+ Requires-Dist: OmniSenseVoice>=0.4.2
69
+ Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc4
70
70
  Requires-Dist: pyannote-audio-notorchdeps>=4.0.2
71
71
  Requires-Dist: questionary>=2.0
72
72
  Requires-Dist: yt-dlp
@@ -83,7 +83,7 @@ Requires-Dist: torch-audiomentations==0.12.0; extra == "diarization"
83
83
  Requires-Dist: pyannote.audio>=4.0.2; extra == "diarization"
84
84
  Provides-Extra: transcription
85
85
  Requires-Dist: OmniSenseVoice>=0.4.0; extra == "transcription"
86
- Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc1; extra == "transcription"
86
+ Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3; extra == "transcription"
87
87
  Provides-Extra: test
88
88
  Requires-Dist: pytest; extra == "test"
89
89
  Requires-Dist: pytest-cov; extra == "test"
@@ -114,10 +114,6 @@ Dynamic: license-file
114
114
 
115
115
  Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](https://huggingface.co/Lattifai/Lattice-1) model.
116
116
 
117
- > **⚠️ Note on Current Limitations**:
118
- > 1. **Memory Usage**: We are aware of high memory consumption and are actively working on further optimizations.
119
- > 2. **Long Audio**: Currently, long audio files might face issues. Support for **long-form audio (up to 20 hours)** will be available shortly.
120
-
121
117
  ## Table of Contents
122
118
 
123
119
  - [Installation](#installation)
@@ -138,7 +134,7 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
138
134
  - [Advanced Features](#advanced-features)
139
135
  - [Word-Level Alignment](#word-level-alignment)
140
136
  - [Smart Sentence Splitting](#smart-sentence-splitting)
141
- - [Speaker Diarization](#speaker-diarization-wip)
137
+ - [Speaker Diarization](#speaker-diarization)
142
138
  - [YAML Configuration Files](#yaml-configuration-files)
143
139
  - [Supported Formats](#supported-formats)
144
140
  - [Roadmap](#roadmap)
@@ -464,6 +460,8 @@ lai transcribe align \
464
460
 
465
461
  ```bash
466
462
  lai caption convert input.srt output.vtt
463
+ lai caption convert input.srt output.json
464
+ # Enable normalization to clean HTML entities and special characters:
467
465
  lai caption convert input.srt output.json normalize_text=true
468
466
  ```
469
467
 
@@ -554,10 +552,10 @@ from lattifai import LattifAI, CaptionConfig
554
552
 
555
553
  client = LattifAI(
556
554
  caption_config=CaptionConfig(
557
- split_sentence=True, # Smart sentence splitting
558
- word_level=True, # Word-level timestamps
559
- normalize_text=True, # Clean HTML entities
560
- include_speaker_in_text=False, # Include speaker labels
555
+ split_sentence=True, # Smart sentence splitting (default: False)
556
+ word_level=True, # Word-level timestamps (default: False)
557
+ normalize_text=True, # Clean HTML entities (default: True)
558
+ include_speaker_in_text=False, # Include speaker labels (default: True)
561
559
  )
562
560
  )
563
561
  ```
@@ -619,6 +617,56 @@ from lattifai import (
619
617
 
620
618
  ## Advanced Features
621
619
 
620
+ ### Long-Form Audio Support
621
+
622
+ LattifAI now supports processing long audio files (up to 20 hours) through streaming mode. Enable streaming by setting the `streaming_chunk_secs` parameter:
623
+
624
+ **Python SDK:**
625
+ ```python
626
+ from lattifai import LattifAI
627
+
628
+ client = LattifAI()
629
+
630
+ # Enable streaming for long audio files
631
+ caption = client.alignment(
632
+ input_media="long_audio.wav",
633
+ input_caption="subtitle.srt",
634
+ output_caption_path="output.srt",
635
+ streaming_chunk_secs=600.0, # Process in 30-second chunks
636
+ )
637
+ ```
638
+
639
+ **CLI:**
640
+ ```bash
641
+ # Enable streaming with chunk size
642
+ lai alignment align long_audio.wav subtitle.srt output.srt \
643
+ media.streaming_chunk_secs=300.0
644
+
645
+ # For YouTube videos
646
+ lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID" \
647
+ media.streaming_chunk_secs=300.0
648
+ ```
649
+
650
+ **MediaConfig:**
651
+ ```python
652
+ from lattifai import LattifAI, MediaConfig
653
+
654
+ client = LattifAI(
655
+ media_config=MediaConfig(
656
+ streaming_chunk_secs=600.0, # Chunk duration in seconds (1-1800), default: 600 (10 minutes)
657
+ )
658
+ )
659
+ ```
660
+
661
+ **Notes:**
662
+ - Chunk duration must be between 1 and 1800 seconds (minimum 1 second, maximum 30 minutes)
663
+ - Default value: 600 seconds (10 minutes)
664
+ - **Recommended: Use 60 seconds or larger for optimal performance**
665
+ - Set to `None` to disable streaming
666
+ - **Thanks to our precise implementation, streaming has virtually no impact on alignment accuracy**
667
+ - Smaller chunks reduce memory usage with minimal quality trade-off
668
+ - Recommended chunk size: 300-900 seconds (5-15 minutes) for optimal balance
669
+
622
670
  ### Word-Level Alignment
623
671
 
624
672
  Enable `word_level=True` to get precise timestamps for each word:
@@ -658,9 +706,7 @@ caption = client.alignment(
658
706
  )
659
707
  ```
660
708
 
661
- ### Speaker Diarization (WIP)
662
-
663
- **Note:** This feature is currently under development and not yet fully available.
709
+ ### Speaker Diarization
664
710
 
665
711
  Speaker diarization automatically identifies and labels different speakers in audio. When enabled, the system will:
666
712
  - Detect speaker changes in the audio
@@ -671,6 +717,24 @@ Speaker diarization automatically identifies and labels different speakers in au
671
717
  - **Existing speaker labels in subtitles**: If your input captions already contain speaker names (e.g., `[Alice]`, `>> Bob:`, or `SPEAKER_01:`), the system will preserve them as much as possible during alignment
672
718
  - **Gemini Transcriber**: When using Gemini models for transcription (e.g., `gemini-2.5-pro`), the model can intelligently identify and extract speaker names from dialogue context, making it easier to generate speaker-aware transcripts
673
719
 
720
+ **CLI:**
721
+ ```bash
722
+ # Enable speaker diarization during alignment
723
+ lai alignment align audio.wav subtitle.srt output.srt \
724
+ diarization.enabled=true
725
+
726
+ # With additional diarization settings
727
+ lai alignment align audio.wav subtitle.srt output.srt \
728
+ diarization.enabled=true \
729
+ diarization.device=cuda \
730
+ diarization.min_speakers=2 \
731
+ diarization.max_speakers=4
732
+
733
+ # For YouTube videos with diarization
734
+ lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID" \
735
+ diarization.enabled=true
736
+ ```
737
+
674
738
  **Python SDK:**
675
739
  ```python
676
740
  from lattifai import LattifAI, DiarizationConfig
@@ -0,0 +1,57 @@
1
+ lattifai/__init__.py,sha256=7y1R5IGw0Sgvl1tfqxEK7e-ozW0wVB-q_JZgv6YyrMQ,2751
2
+ lattifai/audio2.py,sha256=BKMCzkuEmBFAWOEnzgLxeK8TBPTFbjzr1esOfe3MQoo,17460
3
+ lattifai/client.py,sha256=yD8yIcw3xfvPqBzUQTJj5h9sFYbg4BSHBR5gwO-wGF8,18875
4
+ lattifai/errors.py,sha256=LyWRGVhQ6Ak2CYn9FBYAPRgQ7_VHpxzNsXI31HXD--s,11291
5
+ lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
6
+ lattifai/mixin.py,sha256=t8A3J1DVr7SBEv2FYK3JrGEFqxAV3NktfGPiOQdxTB4,24990
7
+ lattifai/types.py,sha256=SjYBfwrCBOXlICvH04niFQJ7OzTx7oTaa_npfRkB67U,659
8
+ lattifai/utils.py,sha256=N4k2cvoT2CgHsGY0sb-3l-eKUMLzGirPzVa53v8euh4,6658
9
+ lattifai/alignment/__init__.py,sha256=ehpkKfjNIYUx7_M-RWD_8Efcrzd9bE-NSm0QgMMVLW0,178
10
+ lattifai/alignment/lattice1_aligner.py,sha256=5c_bWv0v6jOX8H4gO3-mAKXn4Ux4vjHIUtAY4d8yMak,6110
11
+ lattifai/alignment/lattice1_worker.py,sha256=U_EbDsbisWWKIyfxtx9q4deAyml_hni8DpznZo5zBko,12975
12
+ lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
13
+ lattifai/alignment/segmenter.py,sha256=mzWEQC6hWZtI2mR2WU59W7qLHa7KXy7fdU6991kyUuQ,6276
14
+ lattifai/alignment/tokenizer.py,sha256=GNLZbkvZ066PJGUznJVgxZUwSzslD6mz8YsI2Cry6RI,22400
15
+ lattifai/caption/__init__.py,sha256=6MM_2j6CaqwZ81LfSy4di2EP0ykvheRjMZKAYDx2rQs,477
16
+ lattifai/caption/caption.py,sha256=mZYobxuZ8tkJUkZMVvRTrNeGTdmIZYSXTEySQdaGQd8,54595
17
+ lattifai/caption/gemini_reader.py,sha256=GqY2w78xGYCMDP5kD5WGS8jK0gntel2SK-EPpPKTrwU,15138
18
+ lattifai/caption/gemini_writer.py,sha256=sYPxYEmVQcEan5WVGgSrcraxs3QJRQRh8CJkl2yUQ1s,6515
19
+ lattifai/caption/supervision.py,sha256=DRrM8lfKU_x9aVBcLG6xnT0xIJrnc8jzHpzcSwQOg8c,905
20
+ lattifai/caption/text_parser.py,sha256=XDb8KTt031uJ1hg6dpbINglGOTX-6pBcghbg3DULM1I,4633
21
+ lattifai/cli/__init__.py,sha256=LafsAf8YfDcfTeJ1IevFcyLm-mNbxpOOnm33OFKtpDM,523
22
+ lattifai/cli/alignment.py,sha256=06em-Uaf6NhSz1ce4dwT2r8n56NrtibR7ZsSkmc18Kc,5954
23
+ lattifai/cli/app_installer.py,sha256=gAndH3Yo97fGRDe2CQnGtOgZZ4k3_v5ftcUo5g6xbSA,5884
24
+ lattifai/cli/caption.py,sha256=4qQ9DFhxcfaeFMY0TB5I42x4W_gOo2zY6kjXnHnFDms,6313
25
+ lattifai/cli/diarization.py,sha256=cDz1p6RUC-ySPzzGYHWff0L6EWHTUPrci7DaVxwZrVc,3933
26
+ lattifai/cli/server.py,sha256=sXMfOSse9-V79slXUU8FDLeqtI5U9zeU-5YpjTIGyVw,1186
27
+ lattifai/cli/transcribe.py,sha256=_vHzrdaGiPepQWATqvEDYDjwzfVLAd2i8RjOLkvdb0w,8218
28
+ lattifai/cli/youtube.py,sha256=-EIDSS1Iel3_6qD9M2CZZHwKOvgdkIa1cMY4rX7xwVo,5331
29
+ lattifai/config/__init__.py,sha256=Z8OudvS6fgfLNLu_2fvoXartQiYCECOnNfzDt-PfCN4,543
30
+ lattifai/config/alignment.py,sha256=vLiH150YWvBUiVkFOIO-nPXCB-b8fP9iSZgS79k1Qbg,4586
31
+ lattifai/config/caption.py,sha256=AYOyUJ1xZsX8CBZy3GpLitbcCAHcZ9LwXui_v3vtuso,6786
32
+ lattifai/config/client.py,sha256=I1JqLQlsQNU5ouovTumr-PP_8GWC9DI_e9B5UwsDZws,1492
33
+ lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
34
+ lattifai/config/media.py,sha256=cjM8eGeZ7ELhmy4cCqHAyogeHItaVqMrPzSwwIx79HY,14856
35
+ lattifai/config/transcription.py,sha256=_gPJD6cob_jWNdf841nBHhAqJGCxS6PfSyvx2W_vPcM,3082
36
+ lattifai/diarization/__init__.py,sha256=MgBDQ1ehL2qDnZprEp8KqON7CmbG-qaP37gzBsV0jzk,119
37
+ lattifai/diarization/lattifai.py,sha256=tCnFL6ywITqeKR8YoCsYvyJxNoIwoC6GsnI9zkXNB-Q,3128
38
+ lattifai/server/app.py,sha256=wXYgXc_yGQACtUJdhkfhLsTOQjhhIhDQRiVRny7Ogcs,15455
39
+ lattifai/transcription/__init__.py,sha256=vMHciyCEPKhhfM3KjMCeDqnyxU1oghF8g5o5SvpnT_4,2669
40
+ lattifai/transcription/base.py,sha256=v_b1_JGYiBqeMmwns0wHCJ7UOm6j9k-76Uzbr-qmzrs,4467
41
+ lattifai/transcription/gemini.py,sha256=LJSQt9nGqQdEG6ZFXoHWltumyMEM7-Ezy8ss0iPJb7k,12414
42
+ lattifai/transcription/lattifai.py,sha256=EKEdCafgdRWKw_084eD07BqGh2_D-qo3ig3H5X3XYGg,4621
43
+ lattifai/transcription/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
44
+ lattifai/transcription/prompts/__init__.py,sha256=G9b42COaCYv3sPPNkHsGDLOMBuVGKt4mXGYal_BYtYQ,1351
45
+ lattifai/transcription/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
46
+ lattifai/transcription/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
47
+ lattifai/workflow/__init__.py,sha256=GOT9jptXwpIMiNRqJ_LToEt_5Dt0k7XXbLkFzhrl31o,548
48
+ lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,38
49
+ lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
50
+ lattifai/workflow/file_manager.py,sha256=IUWW838ta83kfwM4gpW83gsD_Tx-pa-L_RWKjiefQbQ,33017
51
+ lattifai/workflow/youtube.py,sha256=ON9z0UUk16ThQzdhdgyOiwBmewZOcxfT05dsl3aKYqw,23840
52
+ lattifai-1.1.0.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
53
+ lattifai-1.1.0.dist-info/METADATA,sha256=MXg4IXWA38Y9c_RT9S_6NnCZFc_4-Yic_zZDHlS1TeY,26400
54
+ lattifai-1.1.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
55
+ lattifai-1.1.0.dist-info/entry_points.txt,sha256=nHZri2VQkPYEl0tQ0dMYTpVGlCOgVWlDG_JtDR3QXF8,545
56
+ lattifai-1.1.0.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
57
+ lattifai-1.1.0.dist-info/RECORD,,
@@ -1,6 +1,7 @@
1
1
  [console_scripts]
2
2
  lai-align = lattifai.cli.alignment:main
3
3
  lai-app-install = lattifai.cli.app_installer:main
4
+ lai-diarize = lattifai.cli.diarization:main
4
5
  lai-server = lattifai.cli.server:main
5
6
  lai-transcribe = lattifai.cli.transcribe:main
6
7
  lai-youtube = lattifai.cli.youtube:main
@@ -11,4 +12,5 @@ laicap-shift = lattifai.cli.caption:main_shift
11
12
  [lai_run.cli]
12
13
  alignment = lattifai.cli
13
14
  caption = lattifai.cli
15
+ diarization = lattifai.cli
14
16
  transcribe = lattifai.cli
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Lattifai.
3
+ Copyright (c) 2025 LattifAI.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,56 +0,0 @@
1
- lattifai/__init__.py,sha256=K46XVINrXgjGehO8uXByTIbUnBCdB7QwsvVNWzKbdeU,2364
2
- lattifai/audio2.py,sha256=WPAhcaEoIMRQBf2QZe-0yyAbgyyiqUVAthJ-z54R9Wc,7761
3
- lattifai/client.py,sha256=Wkz7Q1XvCQ9KxD0uZ_M1ix457ZbgIG1gAxxt8nMBUj4,22147
4
- lattifai/errors.py,sha256=dFQ_7c8rwuHrq2pDPjpzA755tAV3t8daXXFbHmWblbs,11015
5
- lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
6
- lattifai/mixin.py,sha256=2QnWMG2E_cyZaa98Wtdf6Duo8w3DTIiD-EEfrmtDhGk,23388
7
- lattifai/types.py,sha256=SjYBfwrCBOXlICvH04niFQJ7OzTx7oTaa_npfRkB67U,659
8
- lattifai/utils.py,sha256=TqOPrd_Et7KxrbfI_JbBNIGZ5-oGJY8ZUyJMPDTih1I,3848
9
- lattifai/alignment/__init__.py,sha256=ehpkKfjNIYUx7_M-RWD_8Efcrzd9bE-NSm0QgMMVLW0,178
10
- lattifai/alignment/lattice1_aligner.py,sha256=soBRZ98jRIju-wN5eqYUmQfF56KiEUxVGw0UvtRcx4A,4464
11
- lattifai/alignment/lattice1_worker.py,sha256=XGICEzLygspqC7SMj5s45M7VNSj-l2zemkawdKKjssw,9233
12
- lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
13
- lattifai/alignment/segmenter.py,sha256=-FKtIwv9Z4fU9Fs08jhL9VyREVSYcfcwuTqb8jxCiuo,6228
14
- lattifai/alignment/tokenizer.py,sha256=WilqU9Ecdkl_cW86IkB1mh_PFlHN-35Jsreiyse2r-8,22355
15
- lattifai/caption/__init__.py,sha256=6MM_2j6CaqwZ81LfSy4di2EP0ykvheRjMZKAYDx2rQs,477
16
- lattifai/caption/caption.py,sha256=NNkBJbSdfXe4CwlCvMplrd4UOxlZyxq5Cs5g-dReB1E,46974
17
- lattifai/caption/gemini_reader.py,sha256=GqY2w78xGYCMDP5kD5WGS8jK0gntel2SK-EPpPKTrwU,15138
18
- lattifai/caption/gemini_writer.py,sha256=sYPxYEmVQcEan5WVGgSrcraxs3QJRQRh8CJkl2yUQ1s,6515
19
- lattifai/caption/supervision.py,sha256=DRrM8lfKU_x9aVBcLG6xnT0xIJrnc8jzHpzcSwQOg8c,905
20
- lattifai/caption/text_parser.py,sha256=XDb8KTt031uJ1hg6dpbINglGOTX-6pBcghbg3DULM1I,4633
21
- lattifai/cli/__init__.py,sha256=dIUmrpN-OwR4h6BqMhXp87_5ZwgO41ShPru_iZGnpQs,463
22
- lattifai/cli/alignment.py,sha256=uKMTE95_JMikfbyCcwLbQxms-EQmZXEj7oYugiupk9I,5890
23
- lattifai/cli/app_installer.py,sha256=0xBQnJZKhyx4JT_PkHXkZ0XlAWxCGz9o0Jjq99poKew,5680
24
- lattifai/cli/caption.py,sha256=ucgYxJ43ab71nGpZBAiVn8QA0DAVht2QMZFE5IdgxP0,6853
25
- lattifai/cli/server.py,sha256=Vo6_ANgwu7WtC5h4BebQLmhqLNpqzPoYrPQPANpP7rw,1142
26
- lattifai/cli/transcribe.py,sha256=6uJfvtB1o_u1uQwxt4fje_koyfN93mGaFLlskmjqx2c,7406
27
- lattifai/cli/youtube.py,sha256=9_erdIkhX8pCiy7BRzNstEiO9saM-VKZ1WVqvbXbmrc,5267
28
- lattifai/config/__init__.py,sha256=Z8OudvS6fgfLNLu_2fvoXartQiYCECOnNfzDt-PfCN4,543
29
- lattifai/config/alignment.py,sha256=z0b9tg67ftDI90j9Td3qmXFg3WZjSbgszjzeTdwrjZA,3291
30
- lattifai/config/caption.py,sha256=nmfdsJ-18l4UmapdVgxF1ARJbA4aOr7jek1bmse2F_E,6787
31
- lattifai/config/client.py,sha256=I1JqLQlsQNU5ouovTumr-PP_8GWC9DI_e9B5UwsDZws,1492
32
- lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
33
- lattifai/config/media.py,sha256=5JOPjifXDM2WWQERySDZen4-7YfgQNcYM2NkkKp0LjQ,13610
34
- lattifai/config/transcription.py,sha256=bzghOGgcNWzTnDYd_cqCOB7GT8OnzHDiyam7LSixqxM,2901
35
- lattifai/diarization/__init__.py,sha256=MgBDQ1ehL2qDnZprEp8KqON7CmbG-qaP37gzBsV0jzk,119
36
- lattifai/diarization/lattifai.py,sha256=SE2BpIZ3_deKyhXdBqe77bsDLXIUV9AQV34gfINv7_s,2657
37
- lattifai/server/app.py,sha256=UpHsKJHtK1-sdp5mtDPBSxEl8xYTbe7cVO8dLp9Xiuo,15380
38
- lattifai/transcription/__init__.py,sha256=mEoMTbs5jAgtXQn1jTjlFY_GUr-S0WmPn8uZ6WZCkU0,2643
39
- lattifai/transcription/base.py,sha256=59b4nQHFMyTRyyzBJTM8ZpEuUy1KjwA2o6rNfrNluKY,3911
40
- lattifai/transcription/gemini.py,sha256=1VNi9gl-Kpkw3ljZcOZG5oq_OY8fMC9Xv4kOwyQpI0Q,7992
41
- lattifai/transcription/lattifai.py,sha256=h0nhXST0qljhyndf80IEddM7Y_N1jiS28YoaE536eME,3483
42
- lattifai/transcription/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
43
- lattifai/transcription/prompts/__init__.py,sha256=G9b42COaCYv3sPPNkHsGDLOMBuVGKt4mXGYal_BYtYQ,1351
44
- lattifai/transcription/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
45
- lattifai/transcription/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
46
- lattifai/workflow/__init__.py,sha256=GOT9jptXwpIMiNRqJ_LToEt_5Dt0k7XXbLkFzhrl31o,548
47
- lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,38
48
- lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
49
- lattifai/workflow/file_manager.py,sha256=d106KHLY8A9amLy5h1vR32e4od8mmJGqMD-iDyiRPLI,32917
50
- lattifai/workflow/youtube.py,sha256=n8L1c6tl8FuYzAzKZ-B76zf5yZsvVggZEJ9mPdbEWGQ,22989
51
- lattifai-1.0.4.dist-info/licenses/LICENSE,sha256=_IkHdwOWLAWcE1M_tIpDoRWdNSJwFdtIqI-XSkK3yPU,1066
52
- lattifai-1.0.4.dist-info/METADATA,sha256=uijheuHzvh-AhZwIFCGTc2_UX-x3ZXxhnlPsvcaMQ4c,24582
53
- lattifai-1.0.4.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
54
- lattifai-1.0.4.dist-info/entry_points.txt,sha256=F8Akof3VtKtrbnYSav1umgoo9Xbv34rUcKn-ioRfeGQ,474
55
- lattifai-1.0.4.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
56
- lattifai-1.0.4.dist-info/RECORD,,