lattifai 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lattifai/__init__.py +0 -24
  2. lattifai/alignment/__init__.py +10 -1
  3. lattifai/alignment/lattice1_aligner.py +66 -58
  4. lattifai/alignment/lattice1_worker.py +1 -6
  5. lattifai/alignment/punctuation.py +38 -0
  6. lattifai/alignment/segmenter.py +1 -1
  7. lattifai/alignment/sentence_splitter.py +350 -0
  8. lattifai/alignment/text_align.py +440 -0
  9. lattifai/alignment/tokenizer.py +91 -220
  10. lattifai/caption/__init__.py +82 -6
  11. lattifai/caption/caption.py +335 -1143
  12. lattifai/caption/formats/__init__.py +199 -0
  13. lattifai/caption/formats/base.py +211 -0
  14. lattifai/caption/formats/gemini.py +722 -0
  15. lattifai/caption/formats/json.py +194 -0
  16. lattifai/caption/formats/lrc.py +309 -0
  17. lattifai/caption/formats/nle/__init__.py +9 -0
  18. lattifai/caption/formats/nle/audition.py +561 -0
  19. lattifai/caption/formats/nle/avid.py +423 -0
  20. lattifai/caption/formats/nle/fcpxml.py +549 -0
  21. lattifai/caption/formats/nle/premiere.py +589 -0
  22. lattifai/caption/formats/pysubs2.py +642 -0
  23. lattifai/caption/formats/sbv.py +147 -0
  24. lattifai/caption/formats/tabular.py +338 -0
  25. lattifai/caption/formats/textgrid.py +193 -0
  26. lattifai/caption/formats/ttml.py +652 -0
  27. lattifai/caption/formats/vtt.py +469 -0
  28. lattifai/caption/parsers/__init__.py +9 -0
  29. lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
  30. lattifai/caption/standardize.py +636 -0
  31. lattifai/caption/utils.py +474 -0
  32. lattifai/cli/__init__.py +2 -1
  33. lattifai/cli/caption.py +108 -1
  34. lattifai/cli/transcribe.py +4 -9
  35. lattifai/cli/youtube.py +4 -1
  36. lattifai/client.py +48 -84
  37. lattifai/config/__init__.py +11 -1
  38. lattifai/config/alignment.py +9 -2
  39. lattifai/config/caption.py +267 -23
  40. lattifai/config/media.py +20 -0
  41. lattifai/diarization/__init__.py +41 -1
  42. lattifai/mixin.py +36 -18
  43. lattifai/transcription/base.py +6 -1
  44. lattifai/transcription/lattifai.py +19 -54
  45. lattifai/utils.py +81 -13
  46. lattifai/workflow/__init__.py +28 -4
  47. lattifai/workflow/file_manager.py +2 -5
  48. lattifai/youtube/__init__.py +43 -0
  49. lattifai/youtube/client.py +1170 -0
  50. lattifai/youtube/types.py +23 -0
  51. lattifai-1.2.2.dist-info/METADATA +615 -0
  52. lattifai-1.2.2.dist-info/RECORD +76 -0
  53. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
  54. lattifai/caption/gemini_reader.py +0 -371
  55. lattifai/caption/gemini_writer.py +0 -173
  56. lattifai/cli/app_installer.py +0 -142
  57. lattifai/cli/server.py +0 -44
  58. lattifai/server/app.py +0 -427
  59. lattifai/workflow/youtube.py +0 -577
  60. lattifai-1.2.0.dist-info/METADATA +0 -1133
  61. lattifai-1.2.0.dist-info/RECORD +0 -57
  62. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
  63. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
  64. {lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,44 @@
1
- """Speaker diarization module for LattifAI."""
1
+ """Speaker diarization module for LattifAI.
2
+
3
+ This module provides multi-speaker identification and labeling capabilities
4
+ using pyannote.audio-based diarization models. It can identify who spoke
5
+ when in an audio file and optionally match detected speakers with existing
6
+ speaker labels from input captions.
7
+
8
+ Key Components:
9
+ LattifAIDiarizer: Main diarization class that wraps pyannote.audio
10
+ pipelines for speaker segmentation and clustering.
11
+
12
+ Features:
13
+ - Automatic speaker detection with configurable min/max speaker counts
14
+ - Speaker label preservation from input captions (e.g., "Alice:", ">> Bob:")
15
+ - Integration with alignment results to assign speakers to words/segments
16
+ - Support for pre-computed diarization results (avoid reprocessing)
17
+
18
+ Configuration:
19
+ Use DiarizationConfig to control:
20
+ - enabled: Whether to run diarization
21
+ - min_speakers/max_speakers: Constrain speaker count detection
22
+ - device: GPU/CPU device selection
23
+ - debug: Enable verbose output
24
+
25
+ Example:
26
+ >>> from lattifai import LattifAI
27
+ >>> from lattifai.config import DiarizationConfig
28
+ >>> client = LattifAI(diarization_config=DiarizationConfig(enabled=True))
29
+ >>> caption = client.alignment(audio="speech.wav", input_caption="transcript.srt")
30
+ >>> for seg in caption.supervisions:
31
+ ... print(f"{seg.speaker}: {seg.text}")
32
+
33
+ Performance Notes:
34
+ - Diarization adds ~10-30% processing time to alignment
35
+ - GPU acceleration recommended for longer audio files
36
+ - Results are cached when output_path is provided
37
+
38
+ See Also:
39
+ - lattifai.config.DiarizationConfig: Configuration options
40
+ - lattifai.client.LattifAI.speaker_diarization: Direct diarization method
41
+ """
2
42
 
3
43
  from .lattifai import LattifAIDiarizer
4
44
 
lattifai/mixin.py CHANGED
@@ -220,19 +220,16 @@ class LattifAIClientMixin:
220
220
  def downloader(self):
221
221
  """Lazy load YouTube downloader."""
222
222
  if self._downloader is None:
223
- from .workflow.youtube import YouTubeDownloader
223
+ from .youtube import YouTubeDownloader
224
224
 
225
225
  self._downloader = YouTubeDownloader()
226
226
  return self._downloader
227
227
 
228
228
  def _prepare_youtube_output_dir(self, output_dir: Optional["Pathlike"]) -> Path:
229
229
  """Prepare and return output directory for YouTube downloads."""
230
- if output_dir is None:
231
- output_dir = Path(tempfile.gettempdir()) / "lattifai_youtube"
232
- else:
233
- output_dir = Path(output_dir).expanduser()
234
- output_dir.mkdir(parents=True, exist_ok=True)
235
- return output_dir
230
+ output_path = Path(output_dir).expanduser() if output_dir else Path(tempfile.gettempdir()) / "lattifai_youtube"
231
+ output_path.mkdir(parents=True, exist_ok=True)
232
+ return output_path
236
233
 
237
234
  def _determine_media_format(self, media_format: Optional[str]) -> str:
238
235
  """Determine media format from parameter or config."""
@@ -242,11 +239,11 @@ class LattifAIClientMixin:
242
239
  self, output_caption_path: Optional["Pathlike"], media_file: str, output_dir: Path
243
240
  ) -> Path:
244
241
  """Generate output caption path if not provided."""
245
- if not output_caption_path:
246
- media_name = Path(media_file).stem
247
- output_format = self.caption_config.output_format or "srt"
248
- output_caption_path = output_dir / f"{media_name}_LattifAI.{output_format}"
249
- return Path(output_caption_path)
242
+ if output_caption_path:
243
+ return Path(output_caption_path)
244
+ media_name = Path(media_file).stem
245
+ output_format = self.caption_config.output_format or "srt"
246
+ return output_dir / f"{media_name}_LattifAI.{output_format}"
250
247
 
251
248
  def _validate_transcription_setup(self) -> None:
252
249
  """Validate that transcription is properly configured if requested."""
@@ -290,12 +287,12 @@ class LattifAIClientMixin:
290
287
  diarization_file = Path(str(input_caption)).with_suffix(".SpkDiar")
291
288
  if diarization_file.exists():
292
289
  if verbose:
293
- safe_print(colorful.cyan(f"📖 Step 1b: Reading speaker diarization from {diarization_file}"))
290
+ safe_print(colorful.cyan(f"📖 Step1b: Reading speaker diarization from {diarization_file}"))
294
291
  caption.read_speaker_diarization(diarization_file)
295
292
  events_file = Path(str(input_caption)).with_suffix(".AED")
296
293
  if events_file.exists():
297
294
  if verbose:
298
- safe_print(colorful.cyan(f"📖 Step 1c: Reading audio events from {events_file}"))
295
+ safe_print(colorful.cyan(f"📖 Step1c: Reading audio events from {events_file}"))
299
296
  from tgt import read_textgrid
300
297
 
301
298
  caption.audio_events = read_textgrid(events_file)
@@ -332,6 +329,8 @@ class LattifAIClientMixin:
332
329
  result = caption.write(
333
330
  output_caption_path,
334
331
  include_speaker_in_text=self.caption_config.include_speaker_in_text,
332
+ word_level=self.caption_config.word_level,
333
+ karaoke_config=self.caption_config.karaoke,
335
334
  )
336
335
  diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
337
336
  if not diarization_file.exists() and caption.speaker_diarization:
@@ -353,14 +352,22 @@ class LattifAIClientMixin:
353
352
  output_dir: Path,
354
353
  media_format: str,
355
354
  force_overwrite: bool,
355
+ audio_track_id: Optional[str] = "original",
356
+ quality: str = "best",
356
357
  ) -> str:
357
358
  """Download media from YouTube (async implementation)."""
358
359
  safe_print(colorful.cyan("📥 Downloading media from YouTube..."))
360
+ if audio_track_id:
361
+ safe_print(colorful.cyan(f" Audio track: {audio_track_id}"))
362
+ if quality != "best":
363
+ safe_print(colorful.cyan(f" Quality: {quality}"))
359
364
  media_file = await self.downloader.download_media(
360
365
  url=url,
361
366
  output_dir=str(output_dir),
362
367
  media_format=media_format,
363
368
  force_overwrite=force_overwrite,
369
+ audio_track_id=audio_track_id,
370
+ quality=quality,
364
371
  )
365
372
  safe_print(colorful.green(f" ✓ Media downloaded: {media_file}"))
366
373
  return media_file
@@ -371,11 +378,15 @@ class LattifAIClientMixin:
371
378
  output_dir: Path,
372
379
  media_format: str,
373
380
  force_overwrite: bool,
381
+ audio_track_id: Optional[str] = "original",
382
+ quality: str = "best",
374
383
  ) -> str:
375
384
  """Download media from YouTube (sync wrapper)."""
376
385
  import asyncio
377
386
 
378
- return asyncio.run(self._download_media(url, output_dir, media_format, force_overwrite))
387
+ return asyncio.run(
388
+ self._download_media(url, output_dir, media_format, force_overwrite, audio_track_id, quality)
389
+ )
379
390
 
380
391
  def _transcribe(
381
392
  self,
@@ -404,6 +415,14 @@ class LattifAIClientMixin:
404
415
  # Transcription mode: use Transcriber to transcribe
405
416
  self._validate_transcription_setup()
406
417
 
418
+ if output_dir:
419
+ # Generate transcript file path
420
+ transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
421
+ if transcript_file.exists():
422
+ safe_print(colorful.cyan(f" Using existing transcript file: {transcript_file}"))
423
+ transcription = self._read_caption(transcript_file, normalize_text=False)
424
+ return transcription
425
+
407
426
  safe_print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
408
427
  transcription = await self.transcriber.transcribe_file(media_file, language=source_lang)
409
428
  safe_print(colorful.green(" ✓ Transcription completed."))
@@ -442,8 +461,6 @@ class LattifAIClientMixin:
442
461
  safe_print(colorful.yellow(f"First segment: {transcription.transcription[0].text}"))
443
462
 
444
463
  if output_dir:
445
- # Generate transcript file path
446
- transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
447
464
  await asyncio.to_thread(self.transcriber.write, transcription, transcript_file, encoding="utf-8")
448
465
  safe_print(colorful.green(f" ✓ Transcription saved to: {transcript_file}"))
449
466
 
@@ -479,11 +496,12 @@ class LattifAIClientMixin:
479
496
  """
480
497
  import asyncio
481
498
 
482
- from lattifai.workflow.youtube import TRANSCRIBE_CHOICE
499
+ from lattifai.workflow.file_manager import TRANSCRIBE_CHOICE
483
500
 
484
501
  transcriber_name = self.transcriber.name
485
502
 
486
503
  async def _async_impl():
504
+ nonlocal use_transcription # Allow modification of outer variable
487
505
  # First check if caption input_path is already provided
488
506
  if self.caption_config.input_path:
489
507
  caption_path = Path(self.caption_config.input_path)
@@ -41,8 +41,13 @@ class BaseTranscriber(ABC):
41
41
  self.logger = get_logger("transcription")
42
42
 
43
43
  @property
44
+ @abstractmethod
44
45
  def name(self) -> str:
45
- """Human-readable name of the transcriber."""
46
+ """Human-readable name of the transcriber.
47
+
48
+ Returns:
49
+ str: Identifier for the transcriber (e.g., 'gemini', 'parakeet').
50
+ """
46
51
 
47
52
  @property
48
53
  def file_name(self) -> str:
@@ -9,7 +9,6 @@ from lattifai.audio2 import AudioData
9
9
  from lattifai.caption import Caption, Supervision
10
10
  from lattifai.config import TranscriptionConfig
11
11
  from lattifai.transcription.base import BaseTranscriber
12
- from lattifai.transcription.prompts import get_prompt_loader # noqa: F401
13
12
 
14
13
 
15
14
  class LattifAITranscriber(BaseTranscriber):
@@ -20,61 +19,42 @@ class LattifAITranscriber(BaseTranscriber):
20
19
  Note: This transcriber only supports local file transcription, not URLs.
21
20
  """
22
21
 
23
- # Transcriber metadata
24
22
  file_suffix = ".ass"
25
23
  supports_url = False
26
24
 
27
- def __init__(
28
- self,
29
- transcription_config: TranscriptionConfig,
30
- ):
25
+ def __init__(self, transcription_config: TranscriptionConfig):
31
26
  """
32
- Initialize Gemini transcriber.
27
+ Initialize LattifAI transcriber.
33
28
 
34
29
  Args:
35
- transcription_config: Transcription configuration. If None, uses default.
30
+ transcription_config: Transcription configuration.
36
31
  """
37
- super().__init__(
38
- config=transcription_config,
39
- )
40
-
41
- self._system_prompt: Optional[str] = None
32
+ super().__init__(config=transcription_config)
42
33
  self._transcriber = None
43
34
 
44
35
  @property
45
36
  def name(self) -> str:
46
- return f"{self.config.model_name}"
47
-
48
- async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
49
- """
50
- URL transcription not supported for LattifAI local models.
51
-
52
- This method exists to satisfy the BaseTranscriber interface but
53
- will never be called because supports_url = False and the base
54
- class checks this flag before calling this method.
55
-
56
- Args:
57
- url: URL to transcribe (not supported)
58
- language: Optional language code (not used)
59
- """
60
- raise NotImplementedError(
61
- f"{self.__class__.__name__} does not support URL transcription. "
62
- f"Please download the file first and use transcribe_file()."
63
- )
37
+ return self.config.model_name
64
38
 
65
- async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
39
+ def _ensure_transcriber(self):
40
+ """Lazily initialize the core transcriber."""
66
41
  if self._transcriber is None:
67
42
  from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
68
43
 
69
44
  self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
45
+ return self._transcriber
70
46
 
71
- transcription, audio_events = self._transcriber.transcribe(media_file, language=language, num_workers=2)
72
- caption = Caption.from_transcription_results(
73
- transcription=transcription,
74
- audio_events=audio_events,
47
+ async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
48
+ """URL transcription not supported for LattifAI local models."""
49
+ raise NotImplementedError(
50
+ f"{self.__class__.__name__} does not support URL transcription. "
51
+ "Please download the file first and use transcribe_file()."
75
52
  )
76
53
 
77
- return caption
54
+ async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
55
+ transcriber = self._ensure_transcriber()
56
+ transcription, audio_events = transcriber.transcribe(media_file, language=language, num_workers=2)
57
+ return Caption.from_transcription_results(transcription=transcription, audio_events=audio_events)
78
58
 
79
59
  def transcribe_numpy(
80
60
  self,
@@ -92,13 +72,8 @@ class LattifAITranscriber(BaseTranscriber):
92
72
  Returns:
93
73
  Supervision object (or list of Supervision objects) with transcription and alignment info.
94
74
  """
95
- if self._transcriber is None:
96
- from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
97
-
98
- self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
99
-
100
- # Delegate to core transcriber which handles both single arrays and lists
101
- return self._transcriber.transcribe(
75
+ transcriber = self._ensure_transcriber()
76
+ return transcriber.transcribe(
102
77
  audio, language=language, return_hypotheses=True, progress_bar=False, timestamps=True
103
78
  )[0]
104
79
 
@@ -119,13 +94,3 @@ class LattifAITranscriber(BaseTranscriber):
119
94
  write_to_file(transcript.audio_events, events_file, format="long")
120
95
 
121
96
  return output_file
122
-
123
- def _get_transcription_prompt(self) -> str:
124
- """Get (and cache) transcription system prompt from prompts module."""
125
- if self._system_prompt is not None:
126
- return self._system_prompt
127
-
128
- base_prompt = "" # TODO
129
-
130
- self._system_prompt = base_prompt
131
- return self._system_prompt
lattifai/utils.py CHANGED
@@ -44,6 +44,49 @@ def safe_print(text: str, **kwargs) -> None:
44
44
  print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
45
45
 
46
46
 
47
+ def _get_cache_marker_path(cache_dir: Path) -> Path:
48
+ """Get the path for the cache marker file with current date."""
49
+ today = datetime.now().strftime("%Y%m%d")
50
+ return cache_dir / f".done{today}"
51
+
52
+
53
+ def _is_cache_valid(cache_dir: Path) -> bool:
54
+ """Check if cached model is valid (exists and not older than 1 days)."""
55
+ if not cache_dir.exists():
56
+ return False
57
+
58
+ # Find any .done* marker files
59
+ marker_files = list(cache_dir.glob(".done*"))
60
+ if not marker_files:
61
+ return False
62
+
63
+ # Get the most recent marker file
64
+ latest_marker = max(marker_files, key=lambda p: p.stat().st_mtime)
65
+
66
+ # Extract date from marker filename (format: .doneYYYYMMDD)
67
+ try:
68
+ date_str = latest_marker.name.replace(".done", "")
69
+ marker_date = datetime.strptime(date_str, "%Y%m%d")
70
+ # Check if marker is older than 1 days
71
+ if datetime.now() - marker_date > timedelta(days=7):
72
+ return False
73
+ return True
74
+ except (ValueError, IndexError):
75
+ # Invalid marker file format, treat as invalid cache
76
+ return False
77
+
78
+
79
+ def _create_cache_marker(cache_dir: Path) -> None:
80
+ """Create a cache marker file with current date and clean old markers."""
81
+ # Remove old marker files
82
+ for old_marker in cache_dir.glob(".done*"):
83
+ old_marker.unlink(missing_ok=True)
84
+
85
+ # Create new marker file
86
+ marker_path = _get_cache_marker_path(cache_dir)
87
+ marker_path.touch()
88
+
89
+
47
90
  def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface") -> str:
48
91
  """Resolve model path, downloading from the specified model hub when necessary.
49
92
 
@@ -51,21 +94,17 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
51
94
  model_name_or_path: Local path or remote model identifier.
52
95
  model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
53
96
  """
54
- if Path(model_name_or_path).expanduser().exists():
55
- return str(Path(model_name_or_path).expanduser())
97
+ local_path = Path(model_name_or_path).expanduser()
98
+ if local_path.exists():
99
+ return str(local_path)
56
100
 
57
- # Normalize hub name
58
101
  hub = (model_hub or "huggingface").lower()
59
-
60
102
  if hub not in ("huggingface", "modelscope"):
61
103
  raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
62
104
 
63
- # If local path exists, return it regardless of hub
64
- if Path(model_name_or_path).expanduser().exists():
65
- return str(Path(model_name_or_path).expanduser())
66
-
67
105
  if hub == "huggingface":
68
106
  from huggingface_hub import HfApi, snapshot_download
107
+ from huggingface_hub.constants import HF_HUB_CACHE
69
108
  from huggingface_hub.errors import LocalEntryNotFoundError
70
109
 
71
110
  # Support repo_id@revision syntax
@@ -74,6 +113,20 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
74
113
  if "@" in model_name_or_path:
75
114
  hf_repo_id, revision = model_name_or_path.split("@", 1)
76
115
 
116
+ # Determine cache directory for this model
117
+ cache_dir = Path(HF_HUB_CACHE) / f'models--{hf_repo_id.replace("/", "--")}'
118
+
119
+ # Check if we have a valid cached version
120
+ if _is_cache_valid(cache_dir):
121
+ # Return the snapshot path (latest version)
122
+ snapshots_dir = cache_dir / "snapshots"
123
+ if snapshots_dir.exists():
124
+ snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
125
+ if snapshot_dirs:
126
+ # Return the most recent snapshot
127
+ latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
128
+ return str(latest_snapshot)
129
+
77
130
  # If no specific revision/commit is provided, try to fetch the real latest SHA
78
131
  # to bypass Hugging Face's model_info (metadata) sync lag.
79
132
  if not revision:
@@ -91,6 +144,7 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
91
144
 
92
145
  try:
93
146
  downloaded_path = snapshot_download(repo_id=hf_repo_id, repo_type="model", revision=revision)
147
+ _create_cache_marker(cache_dir)
94
148
  return downloaded_path
95
149
  except LocalEntryNotFoundError:
96
150
  # Fall back to modelscope if HF entry not found
@@ -113,8 +167,23 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
113
167
  # modelscope path
114
168
  from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
115
169
 
170
+ # Determine cache directory for ModelScope
171
+ # ModelScope uses ~/.cache/modelscope/hub/models/{org}/{model} structure
172
+ modelscope_cache = Path.home() / ".cache" / "modelscope" / "hub" / "models"
173
+ cache_dir = modelscope_cache / model_name_or_path
174
+
175
+ # Check if we have a valid cached version
176
+ if _is_cache_valid(cache_dir):
177
+ # Return the cached path directly
178
+ if cache_dir.exists():
179
+ return str(cache_dir)
180
+
116
181
  try:
117
182
  downloaded_path = ms_snapshot(model_name_or_path)
183
+ # Create cache marker after successful download
184
+ if downloaded_path:
185
+ actual_cache_dir = Path(downloaded_path)
186
+ _create_cache_marker(actual_cache_dir)
118
187
  return downloaded_path
119
188
  except Exception as e: # pragma: no cover
120
189
  raise ModelLoadError(model_name_or_path, original_error=e)
@@ -127,9 +196,8 @@ def _select_device(device: Optional[str]) -> str:
127
196
 
128
197
  import torch
129
198
 
130
- detected = "cpu"
131
199
  if torch.backends.mps.is_available():
132
- detected = "mps"
133
- elif torch.cuda.is_available():
134
- detected = "cuda"
135
- return detected
200
+ return "mps"
201
+ if torch.cuda.is_available():
202
+ return "cuda"
203
+ return "cpu"
@@ -1,8 +1,34 @@
1
- """
2
- LattifAI Agentic Workflows
1
+ """LattifAI Agentic Workflows.
3
2
 
4
3
  This module provides agentic workflow capabilities for automated processing
5
4
  of multimedia content through intelligent agent-based pipelines.
5
+
6
+ Key Components:
7
+ WorkflowAgent: Abstract base class for implementing workflow agents.
8
+ Provides step-based execution with retry logic, state management,
9
+ and consistent logging.
10
+
11
+ WorkflowStep: Defines individual workflow steps with timing and
12
+ execution status tracking.
13
+
14
+ WorkflowResult: Encapsulates workflow execution results including
15
+ status, outputs, errors, and timing information.
16
+
17
+ FileExistenceManager: Handles file existence conflicts during workflows,
18
+ supporting interactive and automatic resolution modes.
19
+
20
+ Example:
21
+ >>> from lattifai.workflow import WorkflowAgent, WorkflowStep, WorkflowResult
22
+ >>> class MyWorkflow(WorkflowAgent):
23
+ ... def define_steps(self):
24
+ ... return [WorkflowStep("download"), WorkflowStep("process")]
25
+ ... def execute_step(self, step, context):
26
+ ... # Implementation
27
+ ... pass
28
+
29
+ See Also:
30
+ - lattifai.client.LattifAI: Main client that orchestrates workflows
31
+ - lattifai.youtube: YouTube-specific workflow integration
6
32
  """
7
33
 
8
34
  # Import transcript processing functionality
@@ -10,13 +36,11 @@ of multimedia content through intelligent agent-based pipelines.
10
36
 
11
37
  from .base import WorkflowAgent, WorkflowResult, WorkflowStep
12
38
  from .file_manager import TRANSCRIBE_CHOICE, FileExistenceManager
13
- from .youtube import YouTubeDownloader
14
39
 
15
40
  __all__ = [
16
41
  "WorkflowAgent",
17
42
  "WorkflowStep",
18
43
  "WorkflowResult",
19
44
  "FileExistenceManager",
20
- "YouTubeDownloader",
21
45
  "TRANSCRIBE_CHOICE",
22
46
  ]
@@ -1,6 +1,4 @@
1
- """.
2
- File existence management utilities for video processing workflows
3
- """
1
+ """File existence management utilities for video processing workflows."""
4
2
 
5
3
  import asyncio
6
4
  import os
@@ -187,8 +185,7 @@ class FileExistenceManager:
187
185
  if not files:
188
186
  return "proceed"
189
187
 
190
- emoji, label = FileExistenceManager.FILE_TYPE_INFO.get(file_type, ("📄", file_type.capitalize()))
191
- del emoji # Unused variable
188
+ _, label = FileExistenceManager.FILE_TYPE_INFO.get(file_type, ("📄", file_type.capitalize()))
192
189
 
193
190
  # Header with warning color
194
191
  safe_print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
@@ -0,0 +1,43 @@
1
+ """YouTube Data Acquisition Module.
2
+
3
+ This module provides YouTube video metadata extraction, media download,
4
+ and caption retrieval functionality powered by yt-dlp.
5
+
6
+ Key Components:
7
+ YoutubeLoader: Lightweight loader for fetching video metadata and
8
+ caption content in memory. Use this for quick metadata lookups
9
+ or when you don't need to save files to disk.
10
+
11
+ YouTubeDownloader: Full-featured downloader for media files and
12
+ captions with disk persistence. Supports various output formats
13
+ and quality settings.
14
+
15
+ VideoMetadata: Dataclass containing video information (title, duration,
16
+ channel, upload date, available captions, etc.).
17
+
18
+ CaptionTrack: Represents a single caption track with language code,
19
+ format, and content retrieval methods.
20
+
21
+ Features:
22
+ - Proxy and cookie support for geo-restricted content
23
+ - Automatic caption format detection (manual vs auto-generated)
24
+ - Multiple audio/video format options
25
+ - Async and sync download APIs
26
+
27
+ Example:
28
+ >>> from lattifai.youtube import YoutubeLoader, VideoMetadata
29
+ >>> loader = YoutubeLoader()
30
+ >>> metadata = loader.get_metadata("https://youtube.com/watch?v=...")
31
+ >>> print(metadata.title, metadata.duration)
32
+
33
+ Requirements:
34
+ yt-dlp must be installed: `pip install yt-dlp`
35
+
36
+ See Also:
37
+ - lattifai.client.LattifAI.youtube: High-level YouTube workflow method
38
+ """
39
+
40
+ from .client import YouTubeDownloader, YoutubeLoader
41
+ from .types import CaptionTrack, VideoMetadata
42
+
43
+ __all__ = ["YoutubeLoader", "YouTubeDownloader", "VideoMetadata", "CaptionTrack"]