lattifai 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
1
  """LattifAI speaker diarization implementation."""
2
2
 
3
3
  import logging
4
- from collections import defaultdict
5
- from typing import List, Optional, Tuple
4
+ from dataclasses import dataclass
5
+ from typing import Callable, List, Optional, Tuple
6
6
 
7
- import torch
8
- from tgt import Interval, IntervalTier, TextGrid
7
+ import numpy as np
8
+ from lattifai_core.diarization import DiarizationOutput
9
+ from tgt import TextGrid
9
10
 
10
11
  from lattifai.audio2 import AudioData
11
12
  from lattifai.caption import Supervision
@@ -60,7 +61,7 @@ class LattifAIDiarizer:
60
61
  num_speakers: Optional[int] = None,
61
62
  min_speakers: Optional[int] = None,
62
63
  max_speakers: Optional[int] = None,
63
- ) -> TextGrid:
64
+ ) -> DiarizationOutput:
64
65
  """Perform speaker diarization on the input audio."""
65
66
  return self.diarizer.diarize(
66
67
  input_media,
@@ -73,11 +74,16 @@ class LattifAIDiarizer:
73
74
  self,
74
75
  input_media: AudioData,
75
76
  alignments: List[Supervision],
76
- diarization: Optional[TextGrid] = None,
77
+ diarization: Optional[DiarizationOutput] = None,
77
78
  num_speakers: Optional[int] = None,
78
79
  min_speakers: Optional[int] = None,
79
80
  max_speakers: Optional[int] = None,
80
- ) -> Tuple[TextGrid, List[Supervision]]:
81
+ alignment_fn: Optional[Callable] = None,
82
+ transcribe_fn: Optional[Callable] = None,
83
+ separate_fn: Optional[Callable] = None,
84
+ debug: bool = False,
85
+ output_path: Optional[str] = None,
86
+ ) -> Tuple[DiarizationOutput, List[Supervision]]:
81
87
  """Diarize the given media input and return alignments with refined speaker labels."""
82
88
  return self.diarizer.diarize_with_alignments(
83
89
  input_media,
@@ -86,4 +92,9 @@ class LattifAIDiarizer:
86
92
  num_speakers=num_speakers,
87
93
  min_speakers=min_speakers,
88
94
  max_speakers=max_speakers,
95
+ alignment_fn=alignment_fn,
96
+ transcribe_fn=transcribe_fn,
97
+ separate_fn=separate_fn,
98
+ debug=debug,
99
+ output_path=output_path,
89
100
  )
lattifai/mixin.py CHANGED
@@ -184,7 +184,9 @@ class LattifAIClientMixin:
184
184
  from lattifai.utils import _resolve_model_path
185
185
 
186
186
  if transcription_config is not None:
187
- transcription_config.lattice_model_path = _resolve_model_path(alignment_config.model_name)
187
+ transcription_config.lattice_model_path = _resolve_model_path(
188
+ alignment_config.model_name, getattr(alignment_config, "model_hub", "huggingface")
189
+ )
188
190
 
189
191
  # Set client_wrapper for all configs
190
192
  alignment_config.client_wrapper = self
@@ -380,6 +382,7 @@ class LattifAIClientMixin:
380
382
  media_file: Union[str, Path, AudioData],
381
383
  source_lang: Optional[str],
382
384
  is_async: bool = False,
385
+ output_dir: Optional[Path] = None,
383
386
  ) -> Caption:
384
387
  """
385
388
  Get captions by downloading or transcribing.
@@ -406,6 +409,9 @@ class LattifAIClientMixin:
406
409
  safe_print(colorful.green(" ✓ Transcription completed."))
407
410
 
408
411
  if "gemini" in self.transcriber.name.lower():
412
+ safe_print(colorful.yellow("🔍 Gemini raw output:"))
413
+ safe_print(colorful.yellow(f"{transcription[:1000]}...")) # Print first 1000 chars
414
+
409
415
  # write to temp file and use Caption read
410
416
  # On Windows, we need to close the file before writing to it
411
417
  tmp_file = tempfile.NamedTemporaryFile(
@@ -428,6 +434,18 @@ class LattifAIClientMixin:
428
434
  # Clean up temp file
429
435
  if tmp_path.exists():
430
436
  tmp_path.unlink()
437
+ else:
438
+ safe_print(colorful.yellow(f"🔍 {self.transcriber.name} raw output:"))
439
+ if isinstance(transcription, Caption):
440
+ safe_print(colorful.yellow(f"Caption with {len(transcription.transcription)} segments"))
441
+ if transcription.transcription:
442
+ safe_print(colorful.yellow(f"First segment: {transcription.transcription[0].text}"))
443
+
444
+ if output_dir:
445
+ # Generate transcript file path
446
+ transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
447
+ await asyncio.to_thread(self.transcriber.write, transcription, transcript_file, encoding="utf-8")
448
+ safe_print(colorful.green(f" ✓ Transcription saved to: {transcript_file}"))
431
449
 
432
450
  return transcription
433
451
 
@@ -70,7 +70,7 @@ def create_transcriber(
70
70
  raise ValueError(
71
71
  f"Cannot determine transcriber for model_name='{transcription_config.model_name}'. "
72
72
  f"Supported patterns: \n"
73
- f" - Gemini API models: 'gemini-2.5-pro', 'gemini-3-pro-preview'\n"
73
+ f" - Gemini API models: 'gemini-2.5-pro', 'gemini-3-pro-preview', 'gemini-3-flash-preview'\n"
74
74
  f" - Local HF models: 'nvidia/parakeet-*', 'iic/SenseVoiceSmall', etc.\n"
75
75
  f"Please specify a valid model_name."
76
76
  )
@@ -2,10 +2,12 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from pathlib import Path
5
- from typing import Optional, Union
5
+ from typing import List, Optional, Union
6
+
7
+ import numpy as np
6
8
 
7
9
  from lattifai.audio2 import AudioData
8
- from lattifai.caption import Caption
10
+ from lattifai.caption import Caption, Supervision
9
11
  from lattifai.config import TranscriptionConfig
10
12
  from lattifai.logging import get_logger
11
13
 
@@ -96,6 +98,23 @@ class BaseTranscriber(ABC):
96
98
  language: Optional language code for transcription.
97
99
  """
98
100
 
101
+ @abstractmethod
102
+ def transcribe_numpy(
103
+ self,
104
+ audio: Union[np.ndarray, List[np.ndarray]],
105
+ language: Optional[str] = None,
106
+ ) -> Union[Supervision, List[Supervision]]:
107
+ """
108
+ Transcribe audio from a numpy array and return Supervision.
109
+
110
+ Args:
111
+ audio_array: Audio data as numpy array (shape: [samples]).
112
+ language: Optional language code for transcription.
113
+
114
+ Returns:
115
+ Supervision object with transcription info.
116
+ """
117
+
99
118
  @abstractmethod
100
119
  def write(self, transcript: Union[str, Caption], output_file: Path, encoding: str = "utf-8") -> Path:
101
120
  """
@@ -2,12 +2,14 @@
2
2
 
3
3
  import asyncio
4
4
  from pathlib import Path
5
- from typing import Optional, Union
5
+ from typing import List, Optional, Union
6
6
 
7
+ import numpy as np
7
8
  from google import genai
8
9
  from google.genai.types import GenerateContentConfig, Part, ThinkingConfig
9
10
 
10
11
  from lattifai.audio2 import AudioData
12
+ from lattifai.caption import Supervision
11
13
  from lattifai.config import TranscriptionConfig
12
14
  from lattifai.transcription.base import BaseTranscriber
13
15
  from lattifai.transcription.prompts import get_prompt_loader
@@ -118,6 +120,130 @@ class GeminiTranscriber(BaseTranscriber):
118
120
  self.logger.error(f"Gemini transcription failed: {str(e)}")
119
121
  raise RuntimeError(f"Gemini transcription failed: {str(e)}")
120
122
 
123
+ def transcribe_numpy(
124
+ self,
125
+ audio: Union[np.ndarray, List[np.ndarray]],
126
+ language: Optional[str] = None,
127
+ ) -> Union[Supervision, List[Supervision]]:
128
+ """
129
+ Transcribe audio from a numpy array (or list of arrays) and return Supervision.
130
+
131
+ Note: Gemini API does not support word-level alignment. The returned
132
+ Supervision will contain only the full transcription text without alignment.
133
+
134
+ Args:
135
+ audio: Audio data as numpy array (shape: [samples]),
136
+ or a list of such arrays for batch processing.
137
+ language: Optional language code for transcription.
138
+
139
+ Returns:
140
+ Supervision object (or list of Supervision objects) with transcription text (no alignment).
141
+
142
+ Raises:
143
+ ValueError: If API key not provided
144
+ RuntimeError: If transcription fails
145
+ """
146
+ # Handle batch processing
147
+ if isinstance(audio, list):
148
+ return [self.transcribe_numpy(arr, language=language) for arr in audio]
149
+
150
+ audio_array = audio
151
+ # Use default sample rate of 16000 Hz
152
+ sample_rate = 16000
153
+
154
+ if self.config.verbose:
155
+ self.logger.info(f"🎤 Starting Gemini transcription for numpy array (sample_rate={sample_rate})")
156
+
157
+ # Ensure audio is in the correct shape
158
+ if audio_array.ndim == 1:
159
+ audio_array = audio_array.reshape(1, -1)
160
+ elif audio_array.ndim > 2:
161
+ raise ValueError(f"Audio array must be 1D or 2D, got shape {audio_array.shape}")
162
+
163
+ # Save numpy array to temporary file
164
+ import tempfile
165
+
166
+ import soundfile as sf
167
+
168
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
169
+ # Transpose to (samples, channels) for soundfile
170
+ sf.write(tmp_file.name, audio_array.T, sample_rate)
171
+ tmp_path = Path(tmp_file.name)
172
+
173
+ try:
174
+ # Transcribe using simple ASR prompt
175
+ import asyncio
176
+
177
+ transcript = asyncio.run(self._transcribe_with_simple_prompt(tmp_path, language=language))
178
+
179
+ # Create Supervision object from transcript
180
+ duration = audio_array.shape[-1] / sample_rate
181
+ supervision = Supervision(
182
+ id="gemini-transcription",
183
+ recording_id="numpy-array",
184
+ start=0.0,
185
+ duration=duration,
186
+ text=transcript,
187
+ speaker=None,
188
+ alignment=None, # Gemini does not provide word-level alignment
189
+ )
190
+
191
+ return supervision
192
+
193
+ finally:
194
+ # Clean up temporary file
195
+ if tmp_path.exists():
196
+ tmp_path.unlink()
197
+
198
+ async def _transcribe_with_simple_prompt(self, media_file: Path, language: Optional[str] = None) -> str:
199
+ """
200
+ Transcribe audio using a simple ASR prompt instead of complex instructions.
201
+
202
+ Args:
203
+ media_file: Path to audio file
204
+ language: Optional language code
205
+
206
+ Returns:
207
+ Transcribed text
208
+ """
209
+ client = self._get_client()
210
+
211
+ # Upload audio file
212
+ if self.config.verbose:
213
+ self.logger.info("📤 Uploading audio file to Gemini...")
214
+ uploaded_file = client.files.upload(file=str(media_file))
215
+
216
+ # Simple ASR prompt
217
+ system_prompt = "Transcribe the audio."
218
+ if language:
219
+ system_prompt = f"Transcribe the audio in {language}."
220
+
221
+ # Create simple generation config
222
+ simple_config = GenerateContentConfig(
223
+ system_instruction=system_prompt,
224
+ response_modalities=["TEXT"],
225
+ )
226
+
227
+ contents = Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type)
228
+ response = await asyncio.get_event_loop().run_in_executor(
229
+ None,
230
+ lambda: client.models.generate_content(
231
+ model=self.config.model_name,
232
+ contents=contents,
233
+ config=simple_config,
234
+ ),
235
+ )
236
+
237
+ if not response.text:
238
+ raise RuntimeError("Empty response from Gemini API")
239
+
240
+ transcript = response.text.strip()
241
+
242
+ if self.config.verbose:
243
+ self.logger.info(f"✅ Transcription completed: {len(transcript)} characters")
244
+
245
+ return transcript
246
+
121
247
  def _get_transcription_prompt(self) -> str:
122
248
  """Get (and cache) transcription system prompt from prompts module."""
123
249
  if self._system_prompt is not None:
@@ -1,10 +1,12 @@
1
1
  """Transcription module with config-driven architecture."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional, Union
4
+ from typing import List, Optional, Union
5
+
6
+ import numpy as np
5
7
 
6
8
  from lattifai.audio2 import AudioData
7
- from lattifai.caption import Caption
9
+ from lattifai.caption import Caption, Supervision
8
10
  from lattifai.config import TranscriptionConfig
9
11
  from lattifai.transcription.base import BaseTranscriber
10
12
  from lattifai.transcription.prompts import get_prompt_loader # noqa: F401
@@ -74,6 +76,32 @@ class LattifAITranscriber(BaseTranscriber):
74
76
 
75
77
  return caption
76
78
 
79
+ def transcribe_numpy(
80
+ self,
81
+ audio: Union[np.ndarray, List[np.ndarray]],
82
+ language: Optional[str] = None,
83
+ ) -> Union[Supervision, List[Supervision]]:
84
+ """
85
+ Transcribe audio from a numpy array (or list of arrays) and return Supervision.
86
+
87
+ Args:
88
+ audio: Audio data as numpy array (shape: [samples]),
89
+ or a list of such arrays for batch processing.
90
+ language: Optional language code for transcription.
91
+
92
+ Returns:
93
+ Supervision object (or list of Supervision objects) with transcription and alignment info.
94
+ """
95
+ if self._transcriber is None:
96
+ from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
97
+
98
+ self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
99
+
100
+ # Delegate to core transcriber which handles both single arrays and lists
101
+ return self._transcriber.transcribe(
102
+ audio, language=language, return_hypotheses=True, progress_bar=False, timestamps=True
103
+ )[0]
104
+
77
105
  def write(
78
106
  self, transcript: Caption, output_file: Path, encoding: str = "utf-8", cache_audio_events: bool = True
79
107
  ) -> Path:
lattifai/utils.py CHANGED
@@ -1,10 +1,9 @@
1
1
  """Shared utility helpers for the LattifAI SDK."""
2
2
 
3
- import os
4
3
  import sys
5
4
  from datetime import datetime, timedelta
6
5
  from pathlib import Path
7
- from typing import Any, Optional, Type
6
+ from typing import Optional
8
7
 
9
8
  from lattifai.errors import ModelLoadError
10
9
 
@@ -88,42 +87,74 @@ def _create_cache_marker(cache_dir: Path) -> None:
88
87
  marker_path.touch()
89
88
 
90
89
 
91
- def _resolve_model_path(model_name_or_path: str) -> str:
92
- """Resolve model path, downloading from Hugging Face when necessary."""
90
+ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface") -> str:
91
+ """Resolve model path, downloading from the specified model hub when necessary.
92
+
93
+ Args:
94
+ model_name_or_path: Local path or remote model identifier.
95
+ model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
96
+ """
93
97
  if Path(model_name_or_path).expanduser().exists():
94
98
  return str(Path(model_name_or_path).expanduser())
95
99
 
96
- from huggingface_hub import snapshot_download
97
- from huggingface_hub.constants import HF_HUB_CACHE
98
- from huggingface_hub.errors import LocalEntryNotFoundError
100
+ # Normalize hub name
101
+ hub = (model_hub or "huggingface").lower()
99
102
 
100
- # Determine cache directory for this model
101
- cache_dir = Path(HF_HUB_CACHE) / f'models--{model_name_or_path.replace("/", "--")}'
103
+ if hub not in ("huggingface", "modelscope"):
104
+ raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
102
105
 
103
- # Check if we have a valid cached version
104
- if _is_cache_valid(cache_dir):
105
- # Return the snapshot path (latest version)
106
- snapshots_dir = cache_dir / "snapshots"
107
- if snapshots_dir.exists():
108
- snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
109
- if snapshot_dirs:
110
- # Return the most recent snapshot
111
- latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
112
- return str(latest_snapshot)
106
+ # If local path exists, return it regardless of hub
107
+ if Path(model_name_or_path).expanduser().exists():
108
+ return str(Path(model_name_or_path).expanduser())
109
+
110
+ if hub == "huggingface":
111
+ from huggingface_hub import snapshot_download
112
+ from huggingface_hub.constants import HF_HUB_CACHE
113
+ from huggingface_hub.errors import LocalEntryNotFoundError
114
+
115
+ # Determine cache directory for this model
116
+ cache_dir = Path(HF_HUB_CACHE) / f'models--{model_name_or_path.replace("/", "--")}'
117
+
118
+ # Check if we have a valid cached version
119
+ if _is_cache_valid(cache_dir):
120
+ # Return the snapshot path (latest version)
121
+ snapshots_dir = cache_dir / "snapshots"
122
+ if snapshots_dir.exists():
123
+ snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
124
+ if snapshot_dirs:
125
+ # Return the most recent snapshot
126
+ latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
127
+ return str(latest_snapshot)
113
128
 
114
- try:
115
- downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
116
- _create_cache_marker(cache_dir)
117
- return downloaded_path
118
- except LocalEntryNotFoundError:
119
129
  try:
120
- os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
121
130
  downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
122
131
  _create_cache_marker(cache_dir)
123
132
  return downloaded_path
124
- except Exception as e: # pragma: no cover - bubble up for caller context
125
- raise ModelLoadError(model_name_or_path, original_error=e)
126
- except Exception as e: # pragma: no cover - unexpected download issue
133
+ except LocalEntryNotFoundError:
134
+ # Fall back to modelscope if HF entry not found
135
+ try:
136
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
137
+
138
+ downloaded_path = ms_snapshot(model_name_or_path)
139
+ return downloaded_path
140
+ except Exception as e: # pragma: no cover - bubble up for caller context
141
+ raise ModelLoadError(model_name_or_path, original_error=e)
142
+ except Exception as e: # pragma: no cover - unexpected download issue
143
+ import colorful
144
+
145
+ print(colorful.red | f"Error downloading from Hugging Face Hub: {e}. Trying ModelScope...")
146
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
147
+
148
+ downloaded_path = ms_snapshot(model_name_or_path)
149
+ return downloaded_path
150
+
151
+ # modelscope path
152
+ from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
153
+
154
+ try:
155
+ downloaded_path = ms_snapshot(model_name_or_path)
156
+ return downloaded_path
157
+ except Exception as e: # pragma: no cover
127
158
  raise ModelLoadError(model_name_or_path, original_error=e)
128
159
 
129
160
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 1.0.5
3
+ Version: 1.1.0
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
@@ -50,7 +50,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
50
50
  Requires-Python: <3.15,>=3.10
51
51
  Description-Content-Type: text/markdown
52
52
  License-File: LICENSE
53
- Requires-Dist: lattifai-core>=0.5.1
53
+ Requires-Dist: lattifai-core>=0.6.0
54
54
  Requires-Dist: lattifai-run>=1.0.1
55
55
  Requires-Dist: python-dotenv
56
56
  Requires-Dist: lhotse>=1.26.0
@@ -61,11 +61,12 @@ Requires-Dist: tgt
61
61
  Requires-Dist: onnx>=1.16.0
62
62
  Requires-Dist: onnxruntime
63
63
  Requires-Dist: msgpack
64
+ Requires-Dist: scipy!=1.16.3
64
65
  Requires-Dist: g2p-phonemizer>=0.4.0
65
66
  Requires-Dist: av
66
67
  Requires-Dist: wtpsplit>=2.1.7
67
68
  Requires-Dist: OmniSenseVoice>=0.4.2
68
- Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc3
69
+ Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc4
69
70
  Requires-Dist: pyannote-audio-notorchdeps>=4.0.2
70
71
  Requires-Dist: questionary>=2.0
71
72
  Requires-Dist: yt-dlp
@@ -113,9 +114,6 @@ Dynamic: license-file
113
114
 
114
115
  Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](https://huggingface.co/Lattifai/Lattice-1) model.
115
116
 
116
- > **⚠️ Note on Current Limitations**:
117
- > 1. **Memory Usage**: We are aware of high memory consumption and are actively working on further optimizations.
118
-
119
117
  ## Table of Contents
120
118
 
121
119
  - [Installation](#installation)
@@ -136,7 +134,7 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
136
134
  - [Advanced Features](#advanced-features)
137
135
  - [Word-Level Alignment](#word-level-alignment)
138
136
  - [Smart Sentence Splitting](#smart-sentence-splitting)
139
- - [Speaker Diarization](#speaker-diarization-wip)
137
+ - [Speaker Diarization](#speaker-diarization)
140
138
  - [YAML Configuration Files](#yaml-configuration-files)
141
139
  - [Supported Formats](#supported-formats)
142
140
  - [Roadmap](#roadmap)
@@ -708,9 +706,7 @@ caption = client.alignment(
708
706
  )
709
707
  ```
710
708
 
711
- ### Speaker Diarization (WIP)
712
-
713
- **Note:** This feature is currently under development and not yet fully available.
709
+ ### Speaker Diarization
714
710
 
715
711
  Speaker diarization automatically identifies and labels different speakers in audio. When enabled, the system will:
716
712
  - Detect speaker changes in the audio
@@ -721,6 +717,24 @@ Speaker diarization automatically identifies and labels different speakers in au
721
717
  - **Existing speaker labels in subtitles**: If your input captions already contain speaker names (e.g., `[Alice]`, `>> Bob:`, or `SPEAKER_01:`), the system will preserve them as much as possible during alignment
722
718
  - **Gemini Transcriber**: When using Gemini models for transcription (e.g., `gemini-2.5-pro`), the model can intelligently identify and extract speaker names from dialogue context, making it easier to generate speaker-aware transcripts
723
719
 
720
+ **CLI:**
721
+ ```bash
722
+ # Enable speaker diarization during alignment
723
+ lai alignment align audio.wav subtitle.srt output.srt \
724
+ diarization.enabled=true
725
+
726
+ # With additional diarization settings
727
+ lai alignment align audio.wav subtitle.srt output.srt \
728
+ diarization.enabled=true \
729
+ diarization.device=cuda \
730
+ diarization.min_speakers=2 \
731
+ diarization.max_speakers=4
732
+
733
+ # For YouTube videos with diarization
734
+ lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID" \
735
+ diarization.enabled=true
736
+ ```
737
+
724
738
  **Python SDK:**
725
739
  ```python
726
740
  from lattifai import LattifAI, DiarizationConfig
@@ -1,44 +1,45 @@
1
1
  lattifai/__init__.py,sha256=7y1R5IGw0Sgvl1tfqxEK7e-ozW0wVB-q_JZgv6YyrMQ,2751
2
2
  lattifai/audio2.py,sha256=BKMCzkuEmBFAWOEnzgLxeK8TBPTFbjzr1esOfe3MQoo,17460
3
- lattifai/client.py,sha256=OXDGsWVeOMEjmXI795pvnK3L-ZLn_sfUwG0i7uJ1JkY,22492
3
+ lattifai/client.py,sha256=yD8yIcw3xfvPqBzUQTJj5h9sFYbg4BSHBR5gwO-wGF8,18875
4
4
  lattifai/errors.py,sha256=LyWRGVhQ6Ak2CYn9FBYAPRgQ7_VHpxzNsXI31HXD--s,11291
5
5
  lattifai/logging.py,sha256=MbUEeOUFlF92pA9v532DiPPWKl03S7UHCJ6Z652cf0w,2860
6
- lattifai/mixin.py,sha256=yj3H1SSQSQrhUeqKhQmRRELRr5fp2mb2ovkK9p8Vwn4,23858
6
+ lattifai/mixin.py,sha256=t8A3J1DVr7SBEv2FYK3JrGEFqxAV3NktfGPiOQdxTB4,24990
7
7
  lattifai/types.py,sha256=SjYBfwrCBOXlICvH04niFQJ7OzTx7oTaa_npfRkB67U,659
8
- lattifai/utils.py,sha256=ZYEUaoTBCwzv4PBBD-woeiDSTx8T1a1vXHIT0g1YmRI,5345
8
+ lattifai/utils.py,sha256=N4k2cvoT2CgHsGY0sb-3l-eKUMLzGirPzVa53v8euh4,6658
9
9
  lattifai/alignment/__init__.py,sha256=ehpkKfjNIYUx7_M-RWD_8Efcrzd9bE-NSm0QgMMVLW0,178
10
- lattifai/alignment/lattice1_aligner.py,sha256=DpN_it7ETZgz6uH3I90Y926bvjhFRdL6dycxz5S_tkI,5142
11
- lattifai/alignment/lattice1_worker.py,sha256=1yYK_xLOL_xHZTVGgNb957R7HhHnl6xwrXUcN372ZIY,12407
10
+ lattifai/alignment/lattice1_aligner.py,sha256=5c_bWv0v6jOX8H4gO3-mAKXn4Ux4vjHIUtAY4d8yMak,6110
11
+ lattifai/alignment/lattice1_worker.py,sha256=U_EbDsbisWWKIyfxtx9q4deAyml_hni8DpznZo5zBko,12975
12
12
  lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
13
13
  lattifai/alignment/segmenter.py,sha256=mzWEQC6hWZtI2mR2WU59W7qLHa7KXy7fdU6991kyUuQ,6276
14
- lattifai/alignment/tokenizer.py,sha256=oqgy5L9wU0_AMyUVNArEtPIDXm7WdvNNfJuB2ZJBpqI,22394
14
+ lattifai/alignment/tokenizer.py,sha256=GNLZbkvZ066PJGUznJVgxZUwSzslD6mz8YsI2Cry6RI,22400
15
15
  lattifai/caption/__init__.py,sha256=6MM_2j6CaqwZ81LfSy4di2EP0ykvheRjMZKAYDx2rQs,477
16
- lattifai/caption/caption.py,sha256=Ljt-6K89AauIK05hdDqjV6G03mkTTJL2UE9ukt-tck0,52502
16
+ lattifai/caption/caption.py,sha256=mZYobxuZ8tkJUkZMVvRTrNeGTdmIZYSXTEySQdaGQd8,54595
17
17
  lattifai/caption/gemini_reader.py,sha256=GqY2w78xGYCMDP5kD5WGS8jK0gntel2SK-EPpPKTrwU,15138
18
18
  lattifai/caption/gemini_writer.py,sha256=sYPxYEmVQcEan5WVGgSrcraxs3QJRQRh8CJkl2yUQ1s,6515
19
19
  lattifai/caption/supervision.py,sha256=DRrM8lfKU_x9aVBcLG6xnT0xIJrnc8jzHpzcSwQOg8c,905
20
20
  lattifai/caption/text_parser.py,sha256=XDb8KTt031uJ1hg6dpbINglGOTX-6pBcghbg3DULM1I,4633
21
- lattifai/cli/__init__.py,sha256=dIUmrpN-OwR4h6BqMhXp87_5ZwgO41ShPru_iZGnpQs,463
21
+ lattifai/cli/__init__.py,sha256=LafsAf8YfDcfTeJ1IevFcyLm-mNbxpOOnm33OFKtpDM,523
22
22
  lattifai/cli/alignment.py,sha256=06em-Uaf6NhSz1ce4dwT2r8n56NrtibR7ZsSkmc18Kc,5954
23
23
  lattifai/cli/app_installer.py,sha256=gAndH3Yo97fGRDe2CQnGtOgZZ4k3_v5ftcUo5g6xbSA,5884
24
- lattifai/cli/caption.py,sha256=p0VY6orf3D77tr30NQka7A84kwEmYiZrCDB6FbTgoFM,6312
24
+ lattifai/cli/caption.py,sha256=4qQ9DFhxcfaeFMY0TB5I42x4W_gOo2zY6kjXnHnFDms,6313
25
+ lattifai/cli/diarization.py,sha256=cDz1p6RUC-ySPzzGYHWff0L6EWHTUPrci7DaVxwZrVc,3933
25
26
  lattifai/cli/server.py,sha256=sXMfOSse9-V79slXUU8FDLeqtI5U9zeU-5YpjTIGyVw,1186
26
- lattifai/cli/transcribe.py,sha256=W42SVhnOQ0EndMk-Lu38BiG1LuMcJnzre9X83M6kBZ4,8137
27
+ lattifai/cli/transcribe.py,sha256=_vHzrdaGiPepQWATqvEDYDjwzfVLAd2i8RjOLkvdb0w,8218
27
28
  lattifai/cli/youtube.py,sha256=-EIDSS1Iel3_6qD9M2CZZHwKOvgdkIa1cMY4rX7xwVo,5331
28
29
  lattifai/config/__init__.py,sha256=Z8OudvS6fgfLNLu_2fvoXartQiYCECOnNfzDt-PfCN4,543
29
- lattifai/config/alignment.py,sha256=v6SuryAVNET9hgH_ZidYN2QhZqpEDnNhR-rogSSSfAg,4039
30
+ lattifai/config/alignment.py,sha256=vLiH150YWvBUiVkFOIO-nPXCB-b8fP9iSZgS79k1Qbg,4586
30
31
  lattifai/config/caption.py,sha256=AYOyUJ1xZsX8CBZy3GpLitbcCAHcZ9LwXui_v3vtuso,6786
31
32
  lattifai/config/client.py,sha256=I1JqLQlsQNU5ouovTumr-PP_8GWC9DI_e9B5UwsDZws,1492
32
33
  lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
33
34
  lattifai/config/media.py,sha256=cjM8eGeZ7ELhmy4cCqHAyogeHItaVqMrPzSwwIx79HY,14856
34
- lattifai/config/transcription.py,sha256=bzghOGgcNWzTnDYd_cqCOB7GT8OnzHDiyam7LSixqxM,2901
35
+ lattifai/config/transcription.py,sha256=_gPJD6cob_jWNdf841nBHhAqJGCxS6PfSyvx2W_vPcM,3082
35
36
  lattifai/diarization/__init__.py,sha256=MgBDQ1ehL2qDnZprEp8KqON7CmbG-qaP37gzBsV0jzk,119
36
- lattifai/diarization/lattifai.py,sha256=SE2BpIZ3_deKyhXdBqe77bsDLXIUV9AQV34gfINv7_s,2657
37
+ lattifai/diarization/lattifai.py,sha256=tCnFL6ywITqeKR8YoCsYvyJxNoIwoC6GsnI9zkXNB-Q,3128
37
38
  lattifai/server/app.py,sha256=wXYgXc_yGQACtUJdhkfhLsTOQjhhIhDQRiVRny7Ogcs,15455
38
- lattifai/transcription/__init__.py,sha256=mEoMTbs5jAgtXQn1jTjlFY_GUr-S0WmPn8uZ6WZCkU0,2643
39
- lattifai/transcription/base.py,sha256=59b4nQHFMyTRyyzBJTM8ZpEuUy1KjwA2o6rNfrNluKY,3911
40
- lattifai/transcription/gemini.py,sha256=1VNi9gl-Kpkw3ljZcOZG5oq_OY8fMC9Xv4kOwyQpI0Q,7992
41
- lattifai/transcription/lattifai.py,sha256=h0nhXST0qljhyndf80IEddM7Y_N1jiS28YoaE536eME,3483
39
+ lattifai/transcription/__init__.py,sha256=vMHciyCEPKhhfM3KjMCeDqnyxU1oghF8g5o5SvpnT_4,2669
40
+ lattifai/transcription/base.py,sha256=v_b1_JGYiBqeMmwns0wHCJ7UOm6j9k-76Uzbr-qmzrs,4467
41
+ lattifai/transcription/gemini.py,sha256=LJSQt9nGqQdEG6ZFXoHWltumyMEM7-Ezy8ss0iPJb7k,12414
42
+ lattifai/transcription/lattifai.py,sha256=EKEdCafgdRWKw_084eD07BqGh2_D-qo3ig3H5X3XYGg,4621
42
43
  lattifai/transcription/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
43
44
  lattifai/transcription/prompts/__init__.py,sha256=G9b42COaCYv3sPPNkHsGDLOMBuVGKt4mXGYal_BYtYQ,1351
44
45
  lattifai/transcription/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
@@ -48,9 +49,9 @@ lattifai/workflow/agents.py,sha256=yEOnxnhcTvr1iOhCorNvp8B76P6nQsLRXJCu_rCYFfM,3
48
49
  lattifai/workflow/base.py,sha256=8QoVIBZwJfr5mppJbtUFafHv5QR9lL-XrULjTWD0oBg,6257
49
50
  lattifai/workflow/file_manager.py,sha256=IUWW838ta83kfwM4gpW83gsD_Tx-pa-L_RWKjiefQbQ,33017
50
51
  lattifai/workflow/youtube.py,sha256=ON9z0UUk16ThQzdhdgyOiwBmewZOcxfT05dsl3aKYqw,23840
51
- lattifai-1.0.5.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
52
- lattifai-1.0.5.dist-info/METADATA,sha256=cTg6ivcixFAv-464qk0R2v19LdEgGkETcNvRzycFSKk,26117
53
- lattifai-1.0.5.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
54
- lattifai-1.0.5.dist-info/entry_points.txt,sha256=F8Akof3VtKtrbnYSav1umgoo9Xbv34rUcKn-ioRfeGQ,474
55
- lattifai-1.0.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
56
- lattifai-1.0.5.dist-info/RECORD,,
52
+ lattifai-1.1.0.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
53
+ lattifai-1.1.0.dist-info/METADATA,sha256=MXg4IXWA38Y9c_RT9S_6NnCZFc_4-Yic_zZDHlS1TeY,26400
54
+ lattifai-1.1.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
55
+ lattifai-1.1.0.dist-info/entry_points.txt,sha256=nHZri2VQkPYEl0tQ0dMYTpVGlCOgVWlDG_JtDR3QXF8,545
56
+ lattifai-1.1.0.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
57
+ lattifai-1.1.0.dist-info/RECORD,,
@@ -1,6 +1,7 @@
1
1
  [console_scripts]
2
2
  lai-align = lattifai.cli.alignment:main
3
3
  lai-app-install = lattifai.cli.app_installer:main
4
+ lai-diarize = lattifai.cli.diarization:main
4
5
  lai-server = lattifai.cli.server:main
5
6
  lai-transcribe = lattifai.cli.transcribe:main
6
7
  lai-youtube = lattifai.cli.youtube:main
@@ -11,4 +12,5 @@ laicap-shift = lattifai.cli.caption:main_shift
11
12
  [lai_run.cli]
12
13
  alignment = lattifai.cli
13
14
  caption = lattifai.cli
15
+ diarization = lattifai.cli
14
16
  transcribe = lattifai.cli