PyPI - lattifai - Versions diffs - 1.0.5__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

lattifai 1.0.5py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

lattifai/__init__.py +11 -12
lattifai/alignment/lattice1_aligner.py +39 -7
lattifai/alignment/lattice1_worker.py +135 -147
lattifai/alignment/tokenizer.py +38 -22
lattifai/audio2.py +1 -1
lattifai/caption/caption.py +55 -19
lattifai/cli/__init__.py +2 -0
lattifai/cli/caption.py +1 -1
lattifai/cli/diarization.py +110 -0
lattifai/cli/transcribe.py +3 -1
lattifai/cli/youtube.py +11 -0
lattifai/client.py +32 -111
lattifai/config/alignment.py +14 -0
lattifai/config/client.py +5 -0
lattifai/config/transcription.py +4 -0
lattifai/diarization/lattifai.py +18 -7
lattifai/mixin.py +26 -5
lattifai/transcription/__init__.py +1 -1
lattifai/transcription/base.py +21 -2
lattifai/transcription/gemini.py +127 -1
lattifai/transcription/lattifai.py +30 -2
lattifai/utils.py +62 -69
lattifai/workflow/youtube.py +55 -57
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/METADATA +352 -56
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/RECORD +29 -28
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/entry_points.txt +2 -0
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/WHEEL +0 -0
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/licenses/LICENSE +0 -0
{lattifai-1.0.5.dist-info → lattifai-1.2.0.dist-info}/top_level.txt +0 -0

lattifai/diarization/lattifai.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """LattifAI speaker diarization implementation."""
 import logging
-from collections import defaultdict
-from typing import List, Optional, Tuple
+from dataclasses import dataclass
+from typing import Callable, List, Optional, Tuple
-import torch
-from tgt import Interval, IntervalTier, TextGrid
+import numpy as np
+from lattifai_core.diarization import DiarizationOutput
+from tgt import TextGrid
 from lattifai.audio2 import AudioData
 from lattifai.caption import Supervision
@@ -60,7 +61,7 @@ class LattifAIDiarizer:
         num_speakers: Optional[int] = None,
         min_speakers: Optional[int] = None,
         max_speakers: Optional[int] = None,
-    ) -> TextGrid:
+    ) -> DiarizationOutput:
         """Perform speaker diarization on the input audio."""
         return self.diarizer.diarize(
             input_media,
@@ -73,11 +74,16 @@ class LattifAIDiarizer:
         self,
         input_media: AudioData,
         alignments: List[Supervision],
-        diarization: Optional[TextGrid] = None,
+        diarization: Optional[DiarizationOutput] = None,
         num_speakers: Optional[int] = None,
         min_speakers: Optional[int] = None,
         max_speakers: Optional[int] = None,
-    ) -> Tuple[TextGrid, List[Supervision]]:
+        alignment_fn: Optional[Callable] = None,
+        transcribe_fn: Optional[Callable] = None,
+        separate_fn: Optional[Callable] = None,
+        debug: bool = False,
+        output_path: Optional[str] = None,
+    ) -> Tuple[DiarizationOutput, List[Supervision]]:
         """Diarize the given media input and return alignments with refined speaker labels."""
         return self.diarizer.diarize_with_alignments(
             input_media,
@@ -86,4 +92,9 @@ class LattifAIDiarizer:
             num_speakers=num_speakers,
             min_speakers=min_speakers,
             max_speakers=max_speakers,
+            alignment_fn=alignment_fn,
+            transcribe_fn=transcribe_fn,
+            separate_fn=separate_fn,
+            debug=debug,
+            output_path=output_path,
         )

lattifai/mixin.py CHANGED Viewed

@@ -184,7 +184,9 @@ class LattifAIClientMixin:
         from lattifai.utils import _resolve_model_path
         if transcription_config is not None:
-            transcription_config.lattice_model_path = _resolve_model_path(alignment_config.model_name)
+            transcription_config.lattice_model_path = _resolve_model_path(
+                alignment_config.model_name, getattr(alignment_config, "model_hub", "huggingface")
+            )
         # Set client_wrapper for all configs
         alignment_config.client_wrapper = self
@@ -380,6 +382,7 @@ class LattifAIClientMixin:
         media_file: Union[str, Path, AudioData],
         source_lang: Optional[str],
         is_async: bool = False,
+        output_dir: Optional[Path] = None,
     ) -> Caption:
         """
         Get captions by downloading or transcribing.
@@ -406,6 +409,9 @@ class LattifAIClientMixin:
             safe_print(colorful.green("         ✓ Transcription completed."))
             if "gemini" in self.transcriber.name.lower():
+                safe_print(colorful.yellow("🔍 Gemini raw output:"))
+                safe_print(colorful.yellow(f"{transcription[:1000]}..."))  # Print first 1000 chars
                 # write to temp file and use Caption read
                 # On Windows, we need to close the file before writing to it
                 tmp_file = tempfile.NamedTemporaryFile(
@@ -428,6 +434,18 @@ class LattifAIClientMixin:
                     # Clean up temp file
                     if tmp_path.exists():
                         tmp_path.unlink()
+            else:
+                safe_print(colorful.yellow(f"🔍 {self.transcriber.name} raw output:"))
+                if isinstance(transcription, Caption):
+                    safe_print(colorful.yellow(f"Caption with {len(transcription.transcription)} segments"))
+                    if transcription.transcription:
+                        safe_print(colorful.yellow(f"First segment: {transcription.transcription[0].text}"))
+            if output_dir:
+                # Generate transcript file path
+                transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
+                await asyncio.to_thread(self.transcriber.write, transcription, transcript_file, encoding="utf-8")
+                safe_print(colorful.green(f"         ✓ Transcription saved to: {transcript_file}"))
             return transcription
@@ -473,10 +491,13 @@ class LattifAIClientMixin:
                     safe_print(colorful.green(f"📄 Using provided caption file: {caption_path}"))
                     return str(caption_path)
                 else:
-                    raise FileNotFoundError(f"Provided caption path does not exist: {caption_path}")
-            # Generate transcript file path
-            transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
+                    safe_print(colorful.red(f"Provided caption path does not exist: {caption_path}, use transcription"))
+                    use_transcription = True
+                    transcript_file = caption_path
+                    caption_path.parent.mkdir(parents=True, exist_ok=True)
+            else:
+                # Generate transcript file path
+                transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
             if use_transcription:
                 # Transcription mode: use Transcriber to transcribe

lattifai/transcription/__init__.py CHANGED Viewed

@@ -70,7 +70,7 @@ def create_transcriber(
         raise ValueError(
             f"Cannot determine transcriber for model_name='{transcription_config.model_name}'. "
             f"Supported patterns: \n"
-            f"  - Gemini API models: 'gemini-2.5-pro', 'gemini-3-pro-preview'\n"
+            f"  - Gemini API models: 'gemini-2.5-pro', 'gemini-3-pro-preview', 'gemini-3-flash-preview'\n"
             f"  - Local HF models: 'nvidia/parakeet-*', 'iic/SenseVoiceSmall', etc.\n"
             f"Please specify a valid model_name."
         )

lattifai/transcription/base.py CHANGED Viewed

@@ -2,10 +2,12 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional, Union
+import numpy as np
 from lattifai.audio2 import AudioData
-from lattifai.caption import Caption
+from lattifai.caption import Caption, Supervision
 from lattifai.config import TranscriptionConfig
 from lattifai.logging import get_logger
@@ -96,6 +98,23 @@ class BaseTranscriber(ABC):
             language: Optional language code for transcription.
         """
+    @abstractmethod
+    def transcribe_numpy(
+        self,
+        audio: Union[np.ndarray, List[np.ndarray]],
+        language: Optional[str] = None,
+    ) -> Union[Supervision, List[Supervision]]:
+        """
+        Transcribe audio from a numpy array and return Supervision.
+        Args:
+            audio_array: Audio data as numpy array (shape: [samples]).
+            language: Optional language code for transcription.
+        Returns:
+            Supervision object with transcription info.
+        """
     @abstractmethod
     def write(self, transcript: Union[str, Caption], output_file: Path, encoding: str = "utf-8") -> Path:
         """

lattifai/transcription/gemini.py CHANGED Viewed

@@ -2,12 +2,14 @@
 import asyncio
 from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional, Union
+import numpy as np
 from google import genai
 from google.genai.types import GenerateContentConfig, Part, ThinkingConfig
 from lattifai.audio2 import AudioData
+from lattifai.caption import Supervision
 from lattifai.config import TranscriptionConfig
 from lattifai.transcription.base import BaseTranscriber
 from lattifai.transcription.prompts import get_prompt_loader
@@ -118,6 +120,130 @@ class GeminiTranscriber(BaseTranscriber):
             self.logger.error(f"Gemini transcription failed: {str(e)}")
             raise RuntimeError(f"Gemini transcription failed: {str(e)}")
+    def transcribe_numpy(
+        self,
+        audio: Union[np.ndarray, List[np.ndarray]],
+        language: Optional[str] = None,
+    ) -> Union[Supervision, List[Supervision]]:
+        """
+        Transcribe audio from a numpy array (or list of arrays) and return Supervision.
+        Note: Gemini API does not support word-level alignment. The returned
+        Supervision will contain only the full transcription text without alignment.
+        Args:
+            audio: Audio data as numpy array (shape: [samples]),
+                   or a list of such arrays for batch processing.
+            language: Optional language code for transcription.
+        Returns:
+            Supervision object (or list of Supervision objects) with transcription text (no alignment).
+        Raises:
+            ValueError: If API key not provided
+            RuntimeError: If transcription fails
+        """
+        # Handle batch processing
+        if isinstance(audio, list):
+            return [self.transcribe_numpy(arr, language=language) for arr in audio]
+        audio_array = audio
+        # Use default sample rate of 16000 Hz
+        sample_rate = 16000
+        if self.config.verbose:
+            self.logger.info(f"🎤 Starting Gemini transcription for numpy array (sample_rate={sample_rate})")
+        # Ensure audio is in the correct shape
+        if audio_array.ndim == 1:
+            audio_array = audio_array.reshape(1, -1)
+        elif audio_array.ndim > 2:
+            raise ValueError(f"Audio array must be 1D or 2D, got shape {audio_array.shape}")
+        # Save numpy array to temporary file
+        import tempfile
+        import soundfile as sf
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            # Transpose to (samples, channels) for soundfile
+            sf.write(tmp_file.name, audio_array.T, sample_rate)
+            tmp_path = Path(tmp_file.name)
+        try:
+            # Transcribe using simple ASR prompt
+            import asyncio
+            transcript = asyncio.run(self._transcribe_with_simple_prompt(tmp_path, language=language))
+            # Create Supervision object from transcript
+            duration = audio_array.shape[-1] / sample_rate
+            supervision = Supervision(
+                id="gemini-transcription",
+                recording_id="numpy-array",
+                start=0.0,
+                duration=duration,
+                text=transcript,
+                speaker=None,
+                alignment=None,  # Gemini does not provide word-level alignment
+            )
+            return supervision
+        finally:
+            # Clean up temporary file
+            if tmp_path.exists():
+                tmp_path.unlink()
+    async def _transcribe_with_simple_prompt(self, media_file: Path, language: Optional[str] = None) -> str:
+        """
+        Transcribe audio using a simple ASR prompt instead of complex instructions.
+        Args:
+            media_file: Path to audio file
+            language: Optional language code
+        Returns:
+            Transcribed text
+        """
+        client = self._get_client()
+        # Upload audio file
+        if self.config.verbose:
+            self.logger.info("📤 Uploading audio file to Gemini...")
+        uploaded_file = client.files.upload(file=str(media_file))
+        # Simple ASR prompt
+        system_prompt = "Transcribe the audio."
+        if language:
+            system_prompt = f"Transcribe the audio in {language}."
+        # Create simple generation config
+        simple_config = GenerateContentConfig(
+            system_instruction=system_prompt,
+            response_modalities=["TEXT"],
+        )
+        contents = Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type)
+        response = await asyncio.get_event_loop().run_in_executor(
+            None,
+            lambda: client.models.generate_content(
+                model=self.config.model_name,
+                contents=contents,
+                config=simple_config,
+            ),
+        )
+        if not response.text:
+            raise RuntimeError("Empty response from Gemini API")
+        transcript = response.text.strip()
+        if self.config.verbose:
+            self.logger.info(f"✅ Transcription completed: {len(transcript)} characters")
+        return transcript
     def _get_transcription_prompt(self) -> str:
         """Get (and cache) transcription system prompt from prompts module."""
         if self._system_prompt is not None:

lattifai/transcription/lattifai.py CHANGED Viewed

@@ -1,10 +1,12 @@
 """Transcription module with config-driven architecture."""
 from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional, Union
+import numpy as np
 from lattifai.audio2 import AudioData
-from lattifai.caption import Caption
+from lattifai.caption import Caption, Supervision
 from lattifai.config import TranscriptionConfig
 from lattifai.transcription.base import BaseTranscriber
 from lattifai.transcription.prompts import get_prompt_loader  # noqa: F401
@@ -74,6 +76,32 @@ class LattifAITranscriber(BaseTranscriber):
         return caption
+    def transcribe_numpy(
+        self,
+        audio: Union[np.ndarray, List[np.ndarray]],
+        language: Optional[str] = None,
+    ) -> Union[Supervision, List[Supervision]]:
+        """
+        Transcribe audio from a numpy array (or list of arrays) and return Supervision.
+        Args:
+            audio: Audio data as numpy array (shape: [samples]),
+                   or a list of such arrays for batch processing.
+            language: Optional language code for transcription.
+        Returns:
+            Supervision object (or list of Supervision objects) with transcription and alignment info.
+        """
+        if self._transcriber is None:
+            from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
+            self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
+        # Delegate to core transcriber which handles both single arrays and lists
+        return self._transcriber.transcribe(
+            audio, language=language, return_hypotheses=True, progress_bar=False, timestamps=True
+        )[0]
     def write(
         self, transcript: Caption, output_file: Path, encoding: str = "utf-8", cache_audio_events: bool = True
     ) -> Path:

lattifai/utils.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """Shared utility helpers for the LattifAI SDK."""
-import os
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Optional, Type
+from typing import Optional
 from lattifai.errors import ModelLoadError
@@ -45,85 +44,79 @@ def safe_print(text: str, **kwargs) -> None:
             print(text.encode("utf-8", errors="replace").decode("utf-8"), **kwargs)
-def _get_cache_marker_path(cache_dir: Path) -> Path:
-    """Get the path for the cache marker file with current date."""
-    today = datetime.now().strftime("%Y%m%d")
-    return cache_dir / f".done{today}"
+def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface") -> str:
+    """Resolve model path, downloading from the specified model hub when necessary.
+    Args:
+        model_name_or_path: Local path or remote model identifier.
+        model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
+    """
+    if Path(model_name_or_path).expanduser().exists():
+        return str(Path(model_name_or_path).expanduser())
-def _is_cache_valid(cache_dir: Path) -> bool:
-    """Check if cached model is valid (exists and not older than 1 days)."""
-    if not cache_dir.exists():
-        return False
-    # Find any .done* marker files
-    marker_files = list(cache_dir.glob(".done*"))
-    if not marker_files:
-        return False
+    # Normalize hub name
+    hub = (model_hub or "huggingface").lower()
-    # Get the most recent marker file
-    latest_marker = max(marker_files, key=lambda p: p.stat().st_mtime)
+    if hub not in ("huggingface", "modelscope"):
+        raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
-    # Extract date from marker filename (format: .doneYYYYMMDD)
-    try:
-        date_str = latest_marker.name.replace(".done", "")
-        marker_date = datetime.strptime(date_str, "%Y%m%d")
-        # Check if marker is older than 1 days
-        if datetime.now() - marker_date > timedelta(days=1):
-            return False
-        return True
-    except (ValueError, IndexError):
-        # Invalid marker file format, treat as invalid cache
-        return False
-def _create_cache_marker(cache_dir: Path) -> None:
-    """Create a cache marker file with current date and clean old markers."""
-    # Remove old marker files
-    for old_marker in cache_dir.glob(".done*"):
-        old_marker.unlink(missing_ok=True)
-    # Create new marker file
-    marker_path = _get_cache_marker_path(cache_dir)
-    marker_path.touch()
-def _resolve_model_path(model_name_or_path: str) -> str:
-    """Resolve model path, downloading from Hugging Face when necessary."""
+    # If local path exists, return it regardless of hub
     if Path(model_name_or_path).expanduser().exists():
         return str(Path(model_name_or_path).expanduser())
-    from huggingface_hub import snapshot_download
-    from huggingface_hub.constants import HF_HUB_CACHE
-    from huggingface_hub.errors import LocalEntryNotFoundError
+    if hub == "huggingface":
+        from huggingface_hub import HfApi, snapshot_download
+        from huggingface_hub.errors import LocalEntryNotFoundError
-    # Determine cache directory for this model
-    cache_dir = Path(HF_HUB_CACHE) / f'models--{model_name_or_path.replace("/", "--")}'
+        # Support repo_id@revision syntax
+        hf_repo_id = model_name_or_path
+        revision = None
+        if "@" in model_name_or_path:
+            hf_repo_id, revision = model_name_or_path.split("@", 1)
-    # Check if we have a valid cached version
-    if _is_cache_valid(cache_dir):
-        # Return the snapshot path (latest version)
-        snapshots_dir = cache_dir / "snapshots"
-        if snapshots_dir.exists():
-            snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
-            if snapshot_dirs:
-                # Return the most recent snapshot
-                latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
-                return str(latest_snapshot)
+        # If no specific revision/commit is provided, try to fetch the real latest SHA
+        # to bypass Hugging Face's model_info (metadata) sync lag.
+        if not revision:
+            try:
+                api = HfApi()
+                refs = api.list_repo_refs(repo_id=hf_repo_id, repo_type="model")
+                # Look for the default branch (usually 'main')
+                for branch in refs.branches:
+                    if branch.name == "main":
+                        revision = branch.target_commit
+                        break
+            except Exception:
+                # Fallback to default behavior if API call fails
+                revision = None
-    try:
-        downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
-        _create_cache_marker(cache_dir)
-        return downloaded_path
-    except LocalEntryNotFoundError:
         try:
-            os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
-            downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
-            _create_cache_marker(cache_dir)
+            downloaded_path = snapshot_download(repo_id=hf_repo_id, repo_type="model", revision=revision)
             return downloaded_path
-        except Exception as e:  # pragma: no cover - bubble up for caller context
-            raise ModelLoadError(model_name_or_path, original_error=e)
-    except Exception as e:  # pragma: no cover - unexpected download issue
+        except LocalEntryNotFoundError:
+            # Fall back to modelscope if HF entry not found
+            try:
+                from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
+                downloaded_path = ms_snapshot(model_name_or_path)
+                return downloaded_path
+            except Exception as e:  # pragma: no cover - bubble up for caller context
+                raise ModelLoadError(model_name_or_path, original_error=e)
+        except Exception as e:  # pragma: no cover - unexpected download issue
+            import colorful
+            print(colorful.red | f"Error downloading from Hugging Face Hub: {e}. Trying ModelScope...")
+            from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
+            downloaded_path = ms_snapshot(model_name_or_path)
+            return downloaded_path
+    # modelscope path
+    from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
+    try:
+        downloaded_path = ms_snapshot(model_name_or_path)
+        return downloaded_path
+    except Exception as e:  # pragma: no cover
         raise ModelLoadError(model_name_or_path, original_error=e)

lattifai/workflow/youtube.py CHANGED Viewed

@@ -429,79 +429,77 @@ class YouTubeDownloader:
             result = await loop.run_in_executor(
                 None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
             )
             # Only log success message, not full yt-dlp output
             self.logger.debug(f"yt-dlp output: {result.stdout.strip()}")
-            # Find the downloaded transcript file
-            caption_patterns = [
-                f"{video_id}.*vtt",
-                f"{video_id}.*srt",
-                f"{video_id}.*sub",
-                f"{video_id}.*sbv",
-                f"{video_id}.*ssa",
-                f"{video_id}.*ass",
-            ]
-            caption_files = []
-            for pattern in caption_patterns:
-                _caption_files = list(target_dir.glob(pattern))
-                for caption_file in _caption_files:
-                    self.logger.info(f"📥 Downloaded caption: {caption_file}")
-                caption_files.extend(_caption_files)
-            if not caption_files:
-                self.logger.warning("No caption available for this video")
-                return None
-            # If only one caption file, return it directly
-            if len(caption_files) == 1:
-                self.logger.info(f"✅ Using caption: {caption_files[0]}")
-                return str(caption_files[0])
-            # Multiple caption files found, let user choose
-            if FileExistenceManager.is_interactive_mode():
-                self.logger.info(f"📋 Found {len(caption_files)} caption files")
-                caption_choice = FileExistenceManager.prompt_file_selection(
-                    file_type="caption",
-                    files=[str(f) for f in caption_files],
-                    operation="use",
-                    transcriber_name=transcriber_name,
-                )
-                if caption_choice == "cancel":
-                    raise RuntimeError("Caption selection cancelled by user")
-                elif caption_choice == TRANSCRIBE_CHOICE:
-                    return caption_choice
-                elif caption_choice:
-                    self.logger.info(f"✅ Selected caption: {caption_choice}")
-                    return caption_choice
-                else:
-                    # Fallback to first file
-                    self.logger.info(f"✅ Using first caption: {caption_files[0]}")
-                    return str(caption_files[0])
-            else:
-                # Non-interactive mode: use first file
-                self.logger.info(f"✅ Using first caption: {caption_files[0]}")
-                return str(caption_files[0])
         except subprocess.CalledProcessError as e:
             error_msg = e.stderr.strip() if e.stderr else str(e)
             # Check for specific error conditions
             if "No automatic or manual captions found" in error_msg:
                 self.logger.warning("No captions available for this video")
-                return None
             elif "HTTP Error 429" in error_msg or "Too Many Requests" in error_msg:
                 self.logger.error("YouTube rate limit exceeded. Please try again later or use a different method.")
-                raise RuntimeError(
+                self.logger.error(
                     "YouTube rate limit exceeded (HTTP 429). "
                     "Try again later or use --cookies option with authenticated cookies. "
                     "See: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"
                 )
             else:
                 self.logger.error(f"Failed to download transcript: {error_msg}")
-                raise RuntimeError(f"Failed to download transcript: {error_msg}")
+        # Find the downloaded transcript file
+        caption_patterns = [
+            f"{video_id}.*vtt",
+            f"{video_id}.*srt",
+            f"{video_id}.*sub",
+            f"{video_id}.*sbv",
+            f"{video_id}.*ssa",
+            f"{video_id}.*ass",
+        ]
+        caption_files = []
+        for pattern in caption_patterns:
+            _caption_files = list(target_dir.glob(pattern))
+            for caption_file in _caption_files:
+                self.logger.info(f"📥 Downloaded caption: {caption_file}")
+            caption_files.extend(_caption_files)
+        # If only one caption file, return it directly
+        if len(caption_files) == 1:
+            self.logger.info(f"✅ Using caption: {caption_files[0]}")
+            return str(caption_files[0])
+        # Multiple caption files found, let user choose
+        if FileExistenceManager.is_interactive_mode():
+            self.logger.info(f"📋 Found {len(caption_files)} caption files")
+            caption_choice = FileExistenceManager.prompt_file_selection(
+                file_type="caption",
+                files=[str(f) for f in caption_files],
+                operation="use",
+                transcriber_name=transcriber_name,
+            )
+            if caption_choice == "cancel":
+                raise RuntimeError("Caption selection cancelled by user")
+            elif caption_choice == TRANSCRIBE_CHOICE:
+                return caption_choice
+            elif caption_choice:
+                self.logger.info(f"✅ Selected caption: {caption_choice}")
+                return caption_choice
+            elif caption_files:
+                # Fallback to first file
+                self.logger.info(f"✅ Using first caption: {caption_files[0]}")
+                return str(caption_files[0])
+            else:
+                self.logger.warning("No caption files available after download")
+                return None
+        elif caption_files:
+            # Non-interactive mode: use first file
+            self.logger.info(f"✅ Using first caption: {caption_files[0]}")
+            return str(caption_files[0])
+        else:
+            self.logger.warning("No caption files available after download")
+            return None
     async def list_available_captions(self, url: str) -> List[Dict[str, Any]]:
         """

lattifai 1.0.5__py3-none-any.whl → 1.2.0__py3-none-any.whl

lattifai 1.0.5py3-none-any.whl → 1.2.0py3-none-any.whl