PyPI - lattifai - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

lattifai 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

lattifai/__init__.py +0 -24
lattifai/alignment/__init__.py +10 -1
lattifai/alignment/lattice1_aligner.py +66 -58
lattifai/alignment/lattice1_worker.py +1 -6
lattifai/alignment/punctuation.py +38 -0
lattifai/alignment/segmenter.py +1 -1
lattifai/alignment/sentence_splitter.py +350 -0
lattifai/alignment/text_align.py +440 -0
lattifai/alignment/tokenizer.py +91 -220
lattifai/caption/__init__.py +82 -6
lattifai/caption/caption.py +335 -1143
lattifai/caption/formats/__init__.py +199 -0
lattifai/caption/formats/base.py +211 -0
lattifai/caption/formats/gemini.py +722 -0
lattifai/caption/formats/json.py +194 -0
lattifai/caption/formats/lrc.py +309 -0
lattifai/caption/formats/nle/__init__.py +9 -0
lattifai/caption/formats/nle/audition.py +561 -0
lattifai/caption/formats/nle/avid.py +423 -0
lattifai/caption/formats/nle/fcpxml.py +549 -0
lattifai/caption/formats/nle/premiere.py +589 -0
lattifai/caption/formats/pysubs2.py +642 -0
lattifai/caption/formats/sbv.py +147 -0
lattifai/caption/formats/tabular.py +338 -0
lattifai/caption/formats/textgrid.py +193 -0
lattifai/caption/formats/ttml.py +652 -0
lattifai/caption/formats/vtt.py +469 -0
lattifai/caption/parsers/__init__.py +9 -0
lattifai/caption/{text_parser.py → parsers/text_parser.py} +4 -2
lattifai/caption/standardize.py +636 -0
lattifai/caption/utils.py +474 -0
lattifai/cli/__init__.py +2 -1
lattifai/cli/caption.py +108 -1
lattifai/cli/transcribe.py +4 -9
lattifai/cli/youtube.py +4 -1
lattifai/client.py +48 -84
lattifai/config/__init__.py +11 -1
lattifai/config/alignment.py +9 -2
lattifai/config/caption.py +267 -23
lattifai/config/media.py +20 -0
lattifai/diarization/__init__.py +41 -1
lattifai/mixin.py +36 -18
lattifai/transcription/base.py +6 -1
lattifai/transcription/lattifai.py +19 -54
lattifai/utils.py +81 -13
lattifai/workflow/__init__.py +28 -4
lattifai/workflow/file_manager.py +2 -5
lattifai/youtube/__init__.py +43 -0
lattifai/youtube/client.py +1170 -0
lattifai/youtube/types.py +23 -0
lattifai-1.2.2.dist-info/METADATA +615 -0
lattifai-1.2.2.dist-info/RECORD +76 -0
{lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/entry_points.txt +1 -2
lattifai/caption/gemini_reader.py +0 -371
lattifai/caption/gemini_writer.py +0 -173
lattifai/cli/app_installer.py +0 -142
lattifai/cli/server.py +0 -44
lattifai/server/app.py +0 -427
lattifai/workflow/youtube.py +0 -577
lattifai-1.2.0.dist-info/METADATA +0 -1133
lattifai-1.2.0.dist-info/RECORD +0 -57
{lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/WHEEL +0 -0
{lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/licenses/LICENSE +0 -0
{lattifai-1.2.0.dist-info → lattifai-1.2.2.dist-info}/top_level.txt +0 -0

lattifai/diarization/__init__.py CHANGED Viewed

@@ -1,4 +1,44 @@
-"""Speaker diarization module for LattifAI."""
+"""Speaker diarization module for LattifAI.
+This module provides multi-speaker identification and labeling capabilities
+using pyannote.audio-based diarization models. It can identify who spoke
+when in an audio file and optionally match detected speakers with existing
+speaker labels from input captions.
+Key Components:
+    LattifAIDiarizer: Main diarization class that wraps pyannote.audio
+        pipelines for speaker segmentation and clustering.
+Features:
+    - Automatic speaker detection with configurable min/max speaker counts
+    - Speaker label preservation from input captions (e.g., "Alice:", ">> Bob:")
+    - Integration with alignment results to assign speakers to words/segments
+    - Support for pre-computed diarization results (avoid reprocessing)
+Configuration:
+    Use DiarizationConfig to control:
+    - enabled: Whether to run diarization
+    - min_speakers/max_speakers: Constrain speaker count detection
+    - device: GPU/CPU device selection
+    - debug: Enable verbose output
+Example:
+    >>> from lattifai import LattifAI
+    >>> from lattifai.config import DiarizationConfig
+    >>> client = LattifAI(diarization_config=DiarizationConfig(enabled=True))
+    >>> caption = client.alignment(audio="speech.wav", input_caption="transcript.srt")
+    >>> for seg in caption.supervisions:
+    ...     print(f"{seg.speaker}: {seg.text}")
+Performance Notes:
+    - Diarization adds ~10-30% processing time to alignment
+    - GPU acceleration recommended for longer audio files
+    - Results are cached when output_path is provided
+See Also:
+    - lattifai.config.DiarizationConfig: Configuration options
+    - lattifai.client.LattifAI.speaker_diarization: Direct diarization method
+"""
 from .lattifai import LattifAIDiarizer

lattifai/mixin.py CHANGED Viewed

@@ -220,19 +220,16 @@ class LattifAIClientMixin:
     def downloader(self):
         """Lazy load YouTube downloader."""
         if self._downloader is None:
-            from .workflow.youtube import YouTubeDownloader
+            from .youtube import YouTubeDownloader
             self._downloader = YouTubeDownloader()
         return self._downloader
     def _prepare_youtube_output_dir(self, output_dir: Optional["Pathlike"]) -> Path:
         """Prepare and return output directory for YouTube downloads."""
-        if output_dir is None:
-            output_dir = Path(tempfile.gettempdir()) / "lattifai_youtube"
-        else:
-            output_dir = Path(output_dir).expanduser()
-        output_dir.mkdir(parents=True, exist_ok=True)
-        return output_dir
+        output_path = Path(output_dir).expanduser() if output_dir else Path(tempfile.gettempdir()) / "lattifai_youtube"
+        output_path.mkdir(parents=True, exist_ok=True)
+        return output_path
     def _determine_media_format(self, media_format: Optional[str]) -> str:
         """Determine media format from parameter or config."""
@@ -242,11 +239,11 @@ class LattifAIClientMixin:
         self, output_caption_path: Optional["Pathlike"], media_file: str, output_dir: Path
     ) -> Path:
         """Generate output caption path if not provided."""
-        if not output_caption_path:
-            media_name = Path(media_file).stem
-            output_format = self.caption_config.output_format or "srt"
-            output_caption_path = output_dir / f"{media_name}_LattifAI.{output_format}"
-        return Path(output_caption_path)
+        if output_caption_path:
+            return Path(output_caption_path)
+        media_name = Path(media_file).stem
+        output_format = self.caption_config.output_format or "srt"
+        return output_dir / f"{media_name}_LattifAI.{output_format}"
     def _validate_transcription_setup(self) -> None:
         """Validate that transcription is properly configured if requested."""
@@ -290,12 +287,12 @@ class LattifAIClientMixin:
             diarization_file = Path(str(input_caption)).with_suffix(".SpkDiar")
             if diarization_file.exists():
                 if verbose:
-                    safe_print(colorful.cyan(f"📖 Step 1b: Reading speaker diarization from {diarization_file}"))
+                    safe_print(colorful.cyan(f"📖 Step1b: Reading speaker diarization from {diarization_file}"))
                 caption.read_speaker_diarization(diarization_file)
             events_file = Path(str(input_caption)).with_suffix(".AED")
             if events_file.exists():
                 if verbose:
-                    safe_print(colorful.cyan(f"📖 Step 1c: Reading audio events from {events_file}"))
+                    safe_print(colorful.cyan(f"📖 Step1c: Reading audio events from {events_file}"))
                 from tgt import read_textgrid
                 caption.audio_events = read_textgrid(events_file)
@@ -332,6 +329,8 @@ class LattifAIClientMixin:
             result = caption.write(
                 output_caption_path,
                 include_speaker_in_text=self.caption_config.include_speaker_in_text,
+                word_level=self.caption_config.word_level,
+                karaoke_config=self.caption_config.karaoke,
             )
             diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
             if not diarization_file.exists() and caption.speaker_diarization:
@@ -353,14 +352,22 @@ class LattifAIClientMixin:
         output_dir: Path,
         media_format: str,
         force_overwrite: bool,
+        audio_track_id: Optional[str] = "original",
+        quality: str = "best",
     ) -> str:
         """Download media from YouTube (async implementation)."""
         safe_print(colorful.cyan("📥 Downloading media from YouTube..."))
+        if audio_track_id:
+            safe_print(colorful.cyan(f"    Audio track: {audio_track_id}"))
+        if quality != "best":
+            safe_print(colorful.cyan(f"    Quality: {quality}"))
         media_file = await self.downloader.download_media(
             url=url,
             output_dir=str(output_dir),
             media_format=media_format,
             force_overwrite=force_overwrite,
+            audio_track_id=audio_track_id,
+            quality=quality,
         )
         safe_print(colorful.green(f"    ✓ Media downloaded: {media_file}"))
         return media_file
@@ -371,11 +378,15 @@ class LattifAIClientMixin:
         output_dir: Path,
         media_format: str,
         force_overwrite: bool,
+        audio_track_id: Optional[str] = "original",
+        quality: str = "best",
     ) -> str:
         """Download media from YouTube (sync wrapper)."""
         import asyncio
-        return asyncio.run(self._download_media(url, output_dir, media_format, force_overwrite))
+        return asyncio.run(
+            self._download_media(url, output_dir, media_format, force_overwrite, audio_track_id, quality)
+        )
     def _transcribe(
         self,
@@ -404,6 +415,14 @@ class LattifAIClientMixin:
             # Transcription mode: use Transcriber to transcribe
             self._validate_transcription_setup()
+            if output_dir:
+                # Generate transcript file path
+                transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
+                if transcript_file.exists():
+                    safe_print(colorful.cyan(f"     Using existing transcript file: {transcript_file}"))
+                    transcription = self._read_caption(transcript_file, normalize_text=False)
+                    return transcription
             safe_print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
             transcription = await self.transcriber.transcribe_file(media_file, language=source_lang)
             safe_print(colorful.green("         ✓ Transcription completed."))
@@ -442,8 +461,6 @@ class LattifAIClientMixin:
                         safe_print(colorful.yellow(f"First segment: {transcription.transcription[0].text}"))
             if output_dir:
-                # Generate transcript file path
-                transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
                 await asyncio.to_thread(self.transcriber.write, transcription, transcript_file, encoding="utf-8")
                 safe_print(colorful.green(f"         ✓ Transcription saved to: {transcript_file}"))
@@ -479,11 +496,12 @@ class LattifAIClientMixin:
         """
         import asyncio
-        from lattifai.workflow.youtube import TRANSCRIBE_CHOICE
+        from lattifai.workflow.file_manager import TRANSCRIBE_CHOICE
         transcriber_name = self.transcriber.name
         async def _async_impl():
+            nonlocal use_transcription  # Allow modification of outer variable
             # First check if caption input_path is already provided
             if self.caption_config.input_path:
                 caption_path = Path(self.caption_config.input_path)

lattifai/transcription/base.py CHANGED Viewed

@@ -41,8 +41,13 @@ class BaseTranscriber(ABC):
         self.logger = get_logger("transcription")
     @property
+    @abstractmethod
     def name(self) -> str:
-        """Human-readable name of the transcriber."""
+        """Human-readable name of the transcriber.
+        Returns:
+            str: Identifier for the transcriber (e.g., 'gemini', 'parakeet').
+        """
     @property
     def file_name(self) -> str:

lattifai/transcription/lattifai.py CHANGED Viewed

@@ -9,7 +9,6 @@ from lattifai.audio2 import AudioData
 from lattifai.caption import Caption, Supervision
 from lattifai.config import TranscriptionConfig
 from lattifai.transcription.base import BaseTranscriber
-from lattifai.transcription.prompts import get_prompt_loader  # noqa: F401
 class LattifAITranscriber(BaseTranscriber):
@@ -20,61 +19,42 @@ class LattifAITranscriber(BaseTranscriber):
     Note: This transcriber only supports local file transcription, not URLs.
     """
-    # Transcriber metadata
     file_suffix = ".ass"
     supports_url = False
-    def __init__(
-        self,
-        transcription_config: TranscriptionConfig,
-    ):
+    def __init__(self, transcription_config: TranscriptionConfig):
         """
-        Initialize Gemini transcriber.
+        Initialize LattifAI transcriber.
         Args:
-            transcription_config: Transcription configuration. If None, uses default.
+            transcription_config: Transcription configuration.
         """
-        super().__init__(
-            config=transcription_config,
-        )
-        self._system_prompt: Optional[str] = None
+        super().__init__(config=transcription_config)
         self._transcriber = None
     @property
     def name(self) -> str:
-        return f"{self.config.model_name}"
-    async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
-        """
-        URL transcription not supported for LattifAI local models.
-        This method exists to satisfy the BaseTranscriber interface but
-        will never be called because supports_url = False and the base
-        class checks this flag before calling this method.
-        Args:
-            url: URL to transcribe (not supported)
-            language: Optional language code (not used)
-        """
-        raise NotImplementedError(
-            f"{self.__class__.__name__} does not support URL transcription. "
-            f"Please download the file first and use transcribe_file()."
-        )
+        return self.config.model_name
-    async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
+    def _ensure_transcriber(self):
+        """Lazily initialize the core transcriber."""
         if self._transcriber is None:
             from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
             self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
+        return self._transcriber
-        transcription, audio_events = self._transcriber.transcribe(media_file, language=language, num_workers=2)
-        caption = Caption.from_transcription_results(
-            transcription=transcription,
-            audio_events=audio_events,
+    async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
+        """URL transcription not supported for LattifAI local models."""
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not support URL transcription. "
+            "Please download the file first and use transcribe_file()."
         )
-        return caption
+    async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
+        transcriber = self._ensure_transcriber()
+        transcription, audio_events = transcriber.transcribe(media_file, language=language, num_workers=2)
+        return Caption.from_transcription_results(transcription=transcription, audio_events=audio_events)
     def transcribe_numpy(
         self,
@@ -92,13 +72,8 @@ class LattifAITranscriber(BaseTranscriber):
         Returns:
             Supervision object (or list of Supervision objects) with transcription and alignment info.
         """
-        if self._transcriber is None:
-            from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
-            self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
-        # Delegate to core transcriber which handles both single arrays and lists
-        return self._transcriber.transcribe(
+        transcriber = self._ensure_transcriber()
+        return transcriber.transcribe(
             audio, language=language, return_hypotheses=True, progress_bar=False, timestamps=True
         )[0]
@@ -119,13 +94,3 @@ class LattifAITranscriber(BaseTranscriber):
             write_to_file(transcript.audio_events, events_file, format="long")
         return output_file
-    def _get_transcription_prompt(self) -> str:
-        """Get (and cache) transcription system prompt from prompts module."""
-        if self._system_prompt is not None:
-            return self._system_prompt
-        base_prompt = ""  # TODO
-        self._system_prompt = base_prompt
-        return self._system_prompt

lattifai/utils.py CHANGED Viewed

@@ -44,6 +44,49 @@ def safe_print(text: str, **kwargs) -> None:
             print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
+def _get_cache_marker_path(cache_dir: Path) -> Path:
+    """Get the path for the cache marker file with current date."""
+    today = datetime.now().strftime("%Y%m%d")
+    return cache_dir / f".done{today}"
+def _is_cache_valid(cache_dir: Path) -> bool:
+    """Check if cached model is valid (exists and not older than 1 days)."""
+    if not cache_dir.exists():
+        return False
+    # Find any .done* marker files
+    marker_files = list(cache_dir.glob(".done*"))
+    if not marker_files:
+        return False
+    # Get the most recent marker file
+    latest_marker = max(marker_files, key=lambda p: p.stat().st_mtime)
+    # Extract date from marker filename (format: .doneYYYYMMDD)
+    try:
+        date_str = latest_marker.name.replace(".done", "")
+        marker_date = datetime.strptime(date_str, "%Y%m%d")
+        # Check if marker is older than 1 days
+        if datetime.now() - marker_date > timedelta(days=7):
+            return False
+        return True
+    except (ValueError, IndexError):
+        # Invalid marker file format, treat as invalid cache
+        return False
+def _create_cache_marker(cache_dir: Path) -> None:
+    """Create a cache marker file with current date and clean old markers."""
+    # Remove old marker files
+    for old_marker in cache_dir.glob(".done*"):
+        old_marker.unlink(missing_ok=True)
+    # Create new marker file
+    marker_path = _get_cache_marker_path(cache_dir)
+    marker_path.touch()
 def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface") -> str:
     """Resolve model path, downloading from the specified model hub when necessary.
@@ -51,21 +94,17 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
         model_name_or_path: Local path or remote model identifier.
         model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
     """
-    if Path(model_name_or_path).expanduser().exists():
-        return str(Path(model_name_or_path).expanduser())
+    local_path = Path(model_name_or_path).expanduser()
+    if local_path.exists():
+        return str(local_path)
-    # Normalize hub name
     hub = (model_hub or "huggingface").lower()
     if hub not in ("huggingface", "modelscope"):
         raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
-    # If local path exists, return it regardless of hub
-    if Path(model_name_or_path).expanduser().exists():
-        return str(Path(model_name_or_path).expanduser())
     if hub == "huggingface":
         from huggingface_hub import HfApi, snapshot_download
+        from huggingface_hub.constants import HF_HUB_CACHE
         from huggingface_hub.errors import LocalEntryNotFoundError
         # Support repo_id@revision syntax
@@ -74,6 +113,20 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
         if "@" in model_name_or_path:
             hf_repo_id, revision = model_name_or_path.split("@", 1)
+        # Determine cache directory for this model
+        cache_dir = Path(HF_HUB_CACHE) / f'models--{hf_repo_id.replace("/", "--")}'
+        # Check if we have a valid cached version
+        if _is_cache_valid(cache_dir):
+            # Return the snapshot path (latest version)
+            snapshots_dir = cache_dir / "snapshots"
+            if snapshots_dir.exists():
+                snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
+                if snapshot_dirs:
+                    # Return the most recent snapshot
+                    latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
+                    return str(latest_snapshot)
         # If no specific revision/commit is provided, try to fetch the real latest SHA
         # to bypass Hugging Face's model_info (metadata) sync lag.
         if not revision:
@@ -91,6 +144,7 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
         try:
             downloaded_path = snapshot_download(repo_id=hf_repo_id, repo_type="model", revision=revision)
+            _create_cache_marker(cache_dir)
             return downloaded_path
         except LocalEntryNotFoundError:
             # Fall back to modelscope if HF entry not found
@@ -113,8 +167,23 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
     # modelscope path
     from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
+    # Determine cache directory for ModelScope
+    # ModelScope uses ~/.cache/modelscope/hub/models/{org}/{model} structure
+    modelscope_cache = Path.home() / ".cache" / "modelscope" / "hub" / "models"
+    cache_dir = modelscope_cache / model_name_or_path
+    # Check if we have a valid cached version
+    if _is_cache_valid(cache_dir):
+        # Return the cached path directly
+        if cache_dir.exists():
+            return str(cache_dir)
     try:
         downloaded_path = ms_snapshot(model_name_or_path)
+        # Create cache marker after successful download
+        if downloaded_path:
+            actual_cache_dir = Path(downloaded_path)
+            _create_cache_marker(actual_cache_dir)
         return downloaded_path
     except Exception as e:  # pragma: no cover
         raise ModelLoadError(model_name_or_path, original_error=e)
@@ -127,9 +196,8 @@ def _select_device(device: Optional[str]) -> str:
     import torch
-    detected = "cpu"
     if torch.backends.mps.is_available():
-        detected = "mps"
-    elif torch.cuda.is_available():
-        detected = "cuda"
-    return detected
+        return "mps"
+    if torch.cuda.is_available():
+        return "cuda"
+    return "cpu"

lattifai/workflow/__init__.py CHANGED Viewed

@@ -1,8 +1,34 @@
-"""
-LattifAI Agentic Workflows
+"""LattifAI Agentic Workflows.
 This module provides agentic workflow capabilities for automated processing
 of multimedia content through intelligent agent-based pipelines.
+Key Components:
+    WorkflowAgent: Abstract base class for implementing workflow agents.
+        Provides step-based execution with retry logic, state management,
+        and consistent logging.
+    WorkflowStep: Defines individual workflow steps with timing and
+        execution status tracking.
+    WorkflowResult: Encapsulates workflow execution results including
+        status, outputs, errors, and timing information.
+    FileExistenceManager: Handles file existence conflicts during workflows,
+        supporting interactive and automatic resolution modes.
+Example:
+    >>> from lattifai.workflow import WorkflowAgent, WorkflowStep, WorkflowResult
+    >>> class MyWorkflow(WorkflowAgent):
+    ...     def define_steps(self):
+    ...         return [WorkflowStep("download"), WorkflowStep("process")]
+    ...     def execute_step(self, step, context):
+    ...         # Implementation
+    ...         pass
+See Also:
+    - lattifai.client.LattifAI: Main client that orchestrates workflows
+    - lattifai.youtube: YouTube-specific workflow integration
 """
 # Import transcript processing functionality
@@ -10,13 +36,11 @@ of multimedia content through intelligent agent-based pipelines.
 from .base import WorkflowAgent, WorkflowResult, WorkflowStep
 from .file_manager import TRANSCRIBE_CHOICE, FileExistenceManager
-from .youtube import YouTubeDownloader
 __all__ = [
     "WorkflowAgent",
     "WorkflowStep",
     "WorkflowResult",
     "FileExistenceManager",
-    "YouTubeDownloader",
     "TRANSCRIBE_CHOICE",
 ]

lattifai/workflow/file_manager.py CHANGED Viewed

@@ -1,6 +1,4 @@
-""".
-File existence management utilities for video processing workflows
-"""
+"""File existence management utilities for video processing workflows."""
 import asyncio
 import os
@@ -187,8 +185,7 @@ class FileExistenceManager:
         if not files:
             return "proceed"
-        emoji, label = FileExistenceManager.FILE_TYPE_INFO.get(file_type, ("📄", file_type.capitalize()))
-        del emoji  # Unused variable
+        _, label = FileExistenceManager.FILE_TYPE_INFO.get(file_type, ("📄", file_type.capitalize()))
         # Header with warning color
         safe_print(f'\n{colorful.bold_yellow(f"⚠️  Existing {label} files found:")}')

lattifai/youtube/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""YouTube Data Acquisition Module.
+This module provides YouTube video metadata extraction, media download,
+and caption retrieval functionality powered by yt-dlp.
+Key Components:
+    YoutubeLoader: Lightweight loader for fetching video metadata and
+        caption content in memory. Use this for quick metadata lookups
+        or when you don't need to save files to disk.
+    YouTubeDownloader: Full-featured downloader for media files and
+        captions with disk persistence. Supports various output formats
+        and quality settings.
+    VideoMetadata: Dataclass containing video information (title, duration,
+        channel, upload date, available captions, etc.).
+    CaptionTrack: Represents a single caption track with language code,
+        format, and content retrieval methods.
+Features:
+    - Proxy and cookie support for geo-restricted content
+    - Automatic caption format detection (manual vs auto-generated)
+    - Multiple audio/video format options
+    - Async and sync download APIs
+Example:
+    >>> from lattifai.youtube import YoutubeLoader, VideoMetadata
+    >>> loader = YoutubeLoader()
+    >>> metadata = loader.get_metadata("https://youtube.com/watch?v=...")
+    >>> print(metadata.title, metadata.duration)
+Requirements:
+    yt-dlp must be installed: `pip install yt-dlp`
+See Also:
+    - lattifai.client.LattifAI.youtube: High-level YouTube workflow method
+"""
+from .client import YouTubeDownloader, YoutubeLoader
+from .types import CaptionTrack, VideoMetadata
+__all__ = ["YoutubeLoader", "YouTubeDownloader", "VideoMetadata", "CaptionTrack"]

lattifai 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

lattifai 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl