PyPI - lattifai - Versions diffs - 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

lattifai 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

lattifai/_init.py +20 -0
lattifai/alignment/__init__.py +2 -3
lattifai/alignment/lattice1_aligner.py +117 -4
lattifai/alignment/lattice1_worker.py +47 -4
lattifai/alignment/segmenter.py +3 -2
lattifai/alignment/text_align.py +2 -1
lattifai/alignment/tokenizer.py +56 -29
lattifai/audio2.py +162 -183
lattifai/cli/alignment.py +5 -0
lattifai/cli/caption.py +6 -6
lattifai/cli/transcribe.py +1 -5
lattifai/cli/youtube.py +3 -0
lattifai/client.py +41 -12
lattifai/config/__init__.py +21 -3
lattifai/config/alignment.py +7 -0
lattifai/config/caption.py +13 -243
lattifai/config/client.py +16 -0
lattifai/config/event.py +102 -0
lattifai/config/transcription.py +25 -1
lattifai/data/__init__.py +8 -0
lattifai/data/caption.py +228 -0
lattifai/errors.py +78 -53
lattifai/event/__init__.py +65 -0
lattifai/event/lattifai.py +166 -0
lattifai/mixin.py +22 -17
lattifai/transcription/base.py +2 -1
lattifai/transcription/gemini.py +147 -16
lattifai/transcription/lattifai.py +8 -11
lattifai/types.py +1 -1
lattifai/youtube/client.py +143 -48
{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/METADATA +117 -54
lattifai-1.3.0.dist-info/RECORD +57 -0
lattifai/__init__.py +0 -88
lattifai/alignment/sentence_splitter.py +0 -350
lattifai/caption/__init__.py +0 -96
lattifai/caption/caption.py +0 -661
lattifai/caption/formats/__init__.py +0 -199
lattifai/caption/formats/base.py +0 -211
lattifai/caption/formats/gemini.py +0 -722
lattifai/caption/formats/json.py +0 -194
lattifai/caption/formats/lrc.py +0 -309
lattifai/caption/formats/nle/__init__.py +0 -9
lattifai/caption/formats/nle/audition.py +0 -561
lattifai/caption/formats/nle/avid.py +0 -423
lattifai/caption/formats/nle/fcpxml.py +0 -549
lattifai/caption/formats/nle/premiere.py +0 -589
lattifai/caption/formats/pysubs2.py +0 -642
lattifai/caption/formats/sbv.py +0 -147
lattifai/caption/formats/tabular.py +0 -338
lattifai/caption/formats/textgrid.py +0 -193
lattifai/caption/formats/ttml.py +0 -652
lattifai/caption/formats/vtt.py +0 -469
lattifai/caption/parsers/__init__.py +0 -9
lattifai/caption/parsers/text_parser.py +0 -147
lattifai/caption/standardize.py +0 -636
lattifai/caption/supervision.py +0 -34
lattifai/caption/utils.py +0 -474
lattifai-1.2.2.dist-info/RECORD +0 -76
{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +0 -0
{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0

lattifai/youtube/client.py CHANGED Viewed

@@ -15,7 +15,8 @@ try:
 except ImportError:
     yt_dlp = None
-from ..config.caption import CAPTION_FORMATS
+from lattifai.caption.config import CAPTION_FORMATS
 from ..errors import LattifAIError
 from ..workflow.base import setup_workflow_logger
 from ..workflow.file_manager import TRANSCRIBE_CHOICE, FileExistenceManager
@@ -49,6 +50,12 @@ class YoutubeLoader:
         if yt_dlp is None:
             raise ImportError("yt-dlp is required. Install with `pip install yt-dlp`")
+        # Auto-load from environment if not specified
+        if proxy is None:
+            proxy = os.getenv("YOUTUBE_PROXY")
+        if cookies is None:
+            cookies = os.getenv("YOUTUBE_COOKIE_FILE") or os.getenv("YOUTUBE_COOKIE_BROWSER")
         self.proxy = proxy
         self.cookies = cookies
@@ -64,14 +71,29 @@ class YoutubeLoader:
         if self.proxy:
             self._base_opts["proxy"] = self.proxy
+            logger.info(f"🌐 Using proxy: {self.proxy}")
+        # Cookie configuration
         if self.cookies:
-            self._base_opts["cookiefile"] = self.cookies
+            # Check if it's a browser name (chrome, firefox, safari, etc.)
+            browser_names = ["chrome", "firefox", "safari", "edge", "opera", "brave"]
+            if self.cookies.lower() in browser_names:
+                # Use cookies from browser directly
+                self._base_opts["cookiesfrombrowser"] = (self.cookies.lower(),)
+                logger.info(f"🍪 Using cookies from browser: {self.cookies}")
+            else:
+                # Use cookie file
+                cookie_path = Path(self.cookies).expanduser()
+                if cookie_path.exists():
+                    self._base_opts["cookiefile"] = str(cookie_path)
+                    logger.info(f"🍪 Using cookie file: {cookie_path}")
+                else:
+                    logger.warning(f"⚠️ Cookie file not found: {cookie_path}")
+                    logger.warning("💡 Tip: Run 'yt-dlp --cookies-from-browser chrome' to extract cookies")
-        # Strategy: Prefer Android client to avoid PO Token issues on Web
-        # But for captions, sometimes Web is needed.
-        # We start with a robust default.
-        self._base_opts["extractor_args"] = {"youtube": {"player_client": ["android", "web"]}}
+        # Note: player_client configuration is removed to avoid format availability issues
+        # with certain videos. Let yt-dlp automatically select the best client.
+        # Previous config caused "Requested format is not available" errors for some videos.
     def get_video_info(self, video_id: str) -> Dict[str, Any]:
         """
@@ -136,8 +158,23 @@ class YoutubeLoader:
         except yt_dlp.utils.DownloadError as e:
             msg = str(e)
-            if "Sign in to confirm" in msg or "Private video" in msg:
-                raise VideoUnavailableError(f"Video {video_id} is unavailable: {msg}")
+            if "Sign in to confirm" in msg or "not a bot" in msg:
+                # Bot detection error - provide helpful guidance
+                error_msg = (
+                    f"🤖 YouTube Bot Detection: Video {video_id} requires authentication.\n\n"
+                    "Solutions:\n"
+                    "1. Use browser cookies (recommended):\n"
+                    "   loader = YoutubeLoader(cookies='chrome')  # or 'firefox', 'safari'\n\n"
+                    "2. Export cookie file:\n"
+                    "   yt-dlp --cookies-from-browser chrome --cookies cookies.txt <video_url>\n"
+                    "   loader = YoutubeLoader(cookies='cookies.txt')\n\n"
+                    "3. Environment variable:\n"
+                    "   export YOUTUBE_COOKIE_BROWSER=chrome\n\n"
+                    f"Original error: {msg}"
+                )
+                raise VideoUnavailableError(error_msg) from e
+            elif "Private video" in msg:
+                raise VideoUnavailableError(f"Video {video_id} is private") from e
             raise YouTubeError(f"yt-dlp failed: {msg}") from e
         except Exception as e:
             raise YouTubeError(f"Unexpected error: {str(e)}") from e
@@ -192,8 +229,8 @@ class YoutubeLoader:
         return "Unknown"
     def _find_best_format(self, formats: List[Dict]) -> Optional[Dict]:
-        # Prefer json3, then vtt
-        priority = ["json3", "vtt", "ttml", "srv3", "srv2", "srv1"]
+        # Prefer json3 (best precision), srv3 (word-level timing), then vtt
+        priority = ["json3", "srv3", "vtt", "ttml", "srv2", "srv1"]
         for fmt_ext in priority:
             for f in formats:
@@ -234,18 +271,11 @@ class YoutubeLoader:
         """
         url = f"https://www.youtube.com/watch?v={video_id}"
-        # Use default yt-dlp config to get DASH formats with separate audio streams
+        # Use base opts (includes proxy and cookie config) + DASH manifest
         opts = {
-            "quiet": True,
-            "no_warnings": True,
-            "skip_download": True,
-            "extract_flat": False,
+            **self._base_opts,
             "youtube_include_dash_manifest": True,
         }
-        if self.proxy:
-            opts["proxy"] = self.proxy
-        if self.cookies:
-            opts["cookiefile"] = self.cookies
         try:
             with yt_dlp.YoutubeDL(opts) as ydl:
@@ -253,18 +283,57 @@ class YoutubeLoader:
                 # Get all formats and filter for audio-only (no video track)
                 formats = info.get("formats", [])
+                def is_direct_url(url: str) -> bool:
+                    """Check if URL is a direct stream URL (not HLS manifest)"""
+                    if not url:
+                        return False
+                    # HLS manifests contain these patterns
+                    hls_patterns = ["manifest.googlevideo.com", "/hls_playlist/", ".m3u8"]
+                    return not any(p in url for p in hls_patterns)
                 audio_formats = [
                     f
                     for f in formats
                     if f.get("acodec") not in (None, "none")
                     and f.get("vcodec") in (None, "none")
                     and f.get("url")  # Must have a direct URL
+                    and is_direct_url(f.get("url"))  # Exclude HLS manifests
                 ]
                 if not audio_formats:
-                    raise YouTubeError(
-                        "No audio-only formats available. " "YouTube may require authentication for this video."
-                    )
+                    # Fallback: If no audio-only formats, use lowest resolution video with audio
+                    # This happens with HLS-only videos (e.g., protected content)
+                    logger.warning("No audio-only formats found. Falling back to lowest resolution video with audio.")
+                    audio_formats = [
+                        f
+                        for f in formats
+                        if f.get("acodec") not in (None, "none")
+                        and f.get("vcodec") not in (None, "none")
+                        and f.get("url")
+                        and is_direct_url(f.get("url"))  # Exclude HLS manifests
+                    ]
+                    # Sort by resolution (lowest first) for minimal bandwidth
+                    audio_formats.sort(key=lambda f: f.get("height") or f.get("width") or 9999)
+                if not audio_formats:
+                    # Check if there are HLS-only formats (common for Shorts)
+                    # HLS can still work with server-side streaming (same IP)
+                    hls_with_audio = [f for f in formats if f.get("acodec") not in (None, "none") and f.get("url")]
+                    if hls_with_audio:
+                        logger.warning("Only HLS streams available. Returning HLS URL for server-side streaming.")
+                        # Sort: prefer audio-only, then by resolution (lowest first)
+                        hls_with_audio.sort(
+                            key=lambda f: (
+                                0 if f.get("vcodec") in (None, "none") else 1,
+                                f.get("height") or f.get("width") or 9999,
+                            )
+                        )
+                        audio_formats = hls_with_audio
+                    else:
+                        raise YouTubeError(
+                            "No formats with audio available. YouTube may require authentication for this video."
+                        )
                 # Filter by audio_track_id if specified (for multi-language audio)
                 if audio_track_id:
@@ -314,17 +383,30 @@ class YoutubeLoader:
                 audio_formats.sort(key=score_format, reverse=True)
                 best = audio_formats[0]
+                # Check if selected format is HLS (requires server-side streaming)
+                best_url = best.get("url", "")
+                is_hls = not is_direct_url(best_url)
                 return {
-                    "url": best.get("url"),
+                    "url": best_url,
                     "mime_type": best.get("ext", format_preference),
                     "bitrate": best.get("abr") or best.get("tbr"),
+                    "sample_rate": best.get("asr"),  # Audio sample rate
                     "content_length": best.get("filesize") or best.get("filesize_approx"),
                     "format_id": best.get("format_id"),
                     "ext": best.get("ext"),
+                    "is_hls": is_hls,  # True = use server streaming, False = use proxy
                 }
         except yt_dlp.utils.DownloadError as e:
-            raise YouTubeError(f"Failed to get audio URL: {str(e)}") from e
+            msg = str(e)
+            if "Sign in to confirm" in msg or "not a bot" in msg:
+                raise YouTubeError(
+                    f"🤖 YouTube Bot Detection: Cookie configuration required to access this video. "
+                    f"Reference: YoutubeLoader(cookies='chrome') or set environment variable YOUTUBE_COOKIE_BROWSER=chrome. "
+                    f"Original error: {msg}"
+                ) from e
+            raise YouTubeError(f"Failed to get audio URL: {msg}") from e
         except Exception as e:
             raise YouTubeError(f"Unexpected error getting audio URL: {str(e)}") from e
@@ -346,18 +428,12 @@ class YoutubeLoader:
         """
         url = f"https://www.youtube.com/watch?v={video_id}"
-        # Use default yt-dlp config to get all available formats
+        # Use base opts (includes proxy and cookie config) + DASH and HLS manifests
         opts = {
-            "quiet": True,
-            "no_warnings": True,
-            "skip_download": True,
-            "extract_flat": False,
+            **self._base_opts,
             "youtube_include_dash_manifest": True,
+            "youtube_include_hls_manifest": True,
         }
-        if self.proxy:
-            opts["proxy"] = self.proxy
-        if self.cookies:
-            opts["cookiefile"] = self.cookies
         try:
             with yt_dlp.YoutubeDL(opts) as ydl:
@@ -366,27 +442,34 @@ class YoutubeLoader:
                 # Get all formats
                 formats = info.get("formats", [])
+                def is_direct_url(url: str) -> bool:
+                    """Check if URL is a direct stream URL (not HLS manifest)"""
+                    if not url:
+                        return False
+                    hls_patterns = ["manifest.googlevideo.com", "/hls_playlist/", ".m3u8"]
+                    return not any(p in url for p in hls_patterns)
                 # Filter for video formats:
                 # - Must have video codec
-                # - Must have direct URL (not manifest/playlist)
-                # - Exclude HLS/DASH manifests (protocol contains m3u8 or dash)
-                def is_direct_video(f: Dict) -> bool:
+                # - Must have a URL
+                # - Prefer direct URLs (DASH) over HLS manifests
+                def is_usable_video(f: Dict) -> bool:
                     if f.get("vcodec") in (None, "none"):
                         return False
-                    url = f.get("url", "")
-                    protocol = f.get("protocol", "")
-                    # Exclude HLS manifests
-                    if "m3u8" in protocol or ".m3u8" in url or "manifest.googlevideo.com" in url:
-                        return False
-                    # Exclude DASH manifests
-                    if "dash" in protocol:
+                    if not f.get("url"):
                         return False
                     return True
-                video_formats = [f for f in formats if is_direct_video(f)]
+                # First try: direct URLs only (exclude HLS)
+                video_formats = [f for f in formats if is_usable_video(f) and is_direct_url(f.get("url", ""))]
+                # Fallback: include HLS if no direct formats
                 if not video_formats:
-                    raise YouTubeError("No direct video formats available (only HLS/DASH manifests found)")
+                    logger.warning("No direct video URLs found. Falling back to HLS formats.")
+                    video_formats = [f for f in formats if is_usable_video(f)]
+                if not video_formats:
+                    raise YouTubeError("No video formats available")
                 # Parse target height from quality parameter
                 target_height = None
@@ -413,15 +496,19 @@ class YoutubeLoader:
                 video_formats.sort(key=score_format, reverse=True)
                 best = video_formats[0]
+                # Check if selected format is HLS
+                best_url = best.get("url", "")
+                is_hls = not is_direct_url(best_url)
                 # Log selection for debugging
                 logger.info(
                     f"Selected video format: {best.get('format_id')} "
                     f"({best.get('width')}x{best.get('height')}, "
-                    f"vcodec={best.get('vcodec')}, acodec={best.get('acodec')})"
+                    f"vcodec={best.get('vcodec')}, acodec={best.get('acodec')}, is_hls={is_hls})"
                 )
                 return {
-                    "url": best.get("url"),
+                    "url": best_url,
                     "mime_type": best.get("ext", format_preference),
                     "width": best.get("width"),
                     "height": best.get("height"),
@@ -432,10 +519,18 @@ class YoutubeLoader:
                     "content_length": best.get("filesize") or best.get("filesize_approx"),
                     "format_id": best.get("format_id"),
                     "ext": best.get("ext"),
+                    "is_hls": is_hls,
                 }
         except yt_dlp.utils.DownloadError as e:
-            raise YouTubeError(f"Failed to get video URL: {str(e)}") from e
+            msg = str(e)
+            if "Sign in to confirm" in msg or "not a bot" in msg:
+                raise YouTubeError(
+                    f"🤖 YouTube Bot Detection: Cookie configuration required to access this video. "
+                    f"Reference: YoutubeLoader(cookies='chrome') or set environment variable YOUTUBE_COOKIE_BROWSER=chrome. "
+                    f"Original error: {msg}"
+                ) from e
+            raise YouTubeError(f"Failed to get video URL: {msg}") from e
         except Exception as e:
             raise YouTubeError(f"Unexpected error getting video URL: {str(e)}") from e

{lattifai-1.2.2.dist-info → lattifai-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lattifai
-Version: 1.2.2
+Version: 1.3.0
 Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
 Author-email: Lattifai Technologies <tech@lattifai.com>
 Maintainer-email: Lattice <tech@lattifai.com>
@@ -50,49 +50,40 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <3.15,>=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: lattifai[core]
-Requires-Dist: lattifai[alignment]
-Requires-Dist: lattifai[transcription]
-Requires-Dist: lattifai[workflow]
-Requires-Dist: lattifai[server]
-Provides-Extra: core
-Requires-Dist: k2py>=0.2.1; extra == "core"
-Requires-Dist: lattifai-core>=0.6.0; extra == "core"
-Requires-Dist: lattifai-run>=1.0.1; extra == "core"
-Requires-Dist: python-dotenv; extra == "core"
-Requires-Dist: msgpack; extra == "core"
-Requires-Dist: scipy!=1.16.3; extra == "core"
-Requires-Dist: av; extra == "core"
-Provides-Extra: alignment
-Requires-Dist: lhotse>=1.26.0; extra == "alignment"
-Requires-Dist: colorful>=0.5.6; extra == "alignment"
-Requires-Dist: pysubs2; extra == "alignment"
-Requires-Dist: praatio; extra == "alignment"
-Requires-Dist: tgt; extra == "alignment"
-Requires-Dist: onnx>=1.16.0; extra == "alignment"
-Requires-Dist: onnxruntime; extra == "alignment"
-Requires-Dist: g2p-phonemizer>=0.4.0; extra == "alignment"
-Requires-Dist: wtpsplit>=2.1.7; extra == "alignment"
-Requires-Dist: modelscope>=1.33.0; extra == "alignment"
-Requires-Dist: error-align-fix>=0.1.4; extra == "alignment"
+Requires-Dist: python-dotenv
+Requires-Dist: colorful>=0.5.6
+Requires-Dist: lattifai-run>=1.0.1
+Requires-Dist: lattifai-captions[splitting]>=0.1.6
+Requires-Dist: lattifai-core-hq>=0.6.4
+Requires-Dist: g2p-phonemizer>=0.4.0
+Requires-Dist: error-align-fix>=0.1.4
+Requires-Dist: lhotse>=1.26.0
+Requires-Dist: k2py==0.2.4
+Requires-Dist: onnxruntime
+Requires-Dist: av
+Requires-Dist: msgpack
+Provides-Extra: event
+Requires-Dist: pyannote-audio-notorchdeps>=4.0.2; extra == "event"
+Provides-Extra: diarization
+Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc4; extra == "diarization"
+Requires-Dist: pyannote-audio-notorchdeps>=4.0.2; extra == "diarization"
 Provides-Extra: transcription
 Requires-Dist: OmniSenseVoice>=0.4.2; extra == "transcription"
 Requires-Dist: nemo_toolkit_asr[asr]>=2.7.0rc4; extra == "transcription"
 Requires-Dist: google-genai>=1.22.0; extra == "transcription"
 Requires-Dist: pyannote-audio-notorchdeps>=4.0.2; extra == "transcription"
-Provides-Extra: workflow
-Requires-Dist: questionary>=2.0; extra == "workflow"
-Requires-Dist: yt-dlp; extra == "workflow"
-Requires-Dist: pycryptodome; extra == "workflow"
-Provides-Extra: server
-Requires-Dist: fastapi>=0.111.0; extra == "server"
-Requires-Dist: uvicorn>=0.30.0; extra == "server"
-Requires-Dist: python-multipart>=0.0.9; extra == "server"
-Requires-Dist: jinja2>=3.1.4; extra == "server"
+Provides-Extra: youtube
+Requires-Dist: questionary>=2.0; extra == "youtube"
+Requires-Dist: yt-dlp; extra == "youtube"
+Requires-Dist: pycryptodome; extra == "youtube"
 Provides-Extra: dev
+Requires-Dist: black; extra == "dev"
 Requires-Dist: pytest>=8.0.0; extra == "dev"
 Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
+Provides-Extra: all
+Requires-Dist: lattifai[transcription]; extra == "all"
+Requires-Dist: lattifai[youtube]; extra == "all"
 Dynamic: license-file
 <div align="center">
@@ -104,13 +95,13 @@ Dynamic: license-file
 </div>
 <p align="center">
-   🌐 <a href="https://lattifai.com"><b>Official Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🖥️ <a href="https://github.com/lattifai/lattifai-python">GitHub</a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/Lattifai/Lattice-1">Model</a> &nbsp&nbsp | &nbsp&nbsp 📑 <a href="https://lattifai.com/blogs">Blog</a> &nbsp&nbsp | &nbsp&nbsp <a href="https://discord.gg/kvF4WsBRK8"><img src="https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white" alt="Discord" style="vertical-align: middle;"></a>
+   🌐 <a href="https://lattifai.com"><b>Official Website</b></a> &nbsp;&nbsp; | &nbsp;&nbsp; 🖥️ <a href="https://github.com/lattifai/lattifai-python">GitHub</a> &nbsp;&nbsp; | &nbsp;&nbsp; 🤗 <a href="https://huggingface.co/LattifAI/Lattice-1">Model</a> &nbsp;&nbsp; | &nbsp;&nbsp; 📑 <a href="https://lattifai.com/blogs">Blog</a> &nbsp;&nbsp; | &nbsp;&nbsp; <a href="https://discord.gg/kvF4WsBRK8"><img src="https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white" alt="Discord" style="vertical-align: middle;"></a>
 </p>
 # LattifAI: Precision Alignment, Infinite Possibilities
-Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](https://huggingface.co/Lattifai/Lattice-1) model.
+Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](https://huggingface.co/LattifAI/Lattice-1) model.
 ## Table of Contents
@@ -120,6 +111,7 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
 - [CLI Reference](#cli-reference)
 - [Python SDK](#python-sdk)
 - [Advanced Features](#advanced-features)
+- [Text Processing](#text-processing)
 - [Supported Formats & Languages](#supported-formats--languages)
 - [Roadmap](#roadmap)
 - [Development](#development)
@@ -130,7 +122,7 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
 | Feature | Description |
 |---------|-------------|
-| **Forced Alignment** | Word-level and segment-level audio-text synchronization powered by [Lattice-1](https://huggingface.co/Lattifai/Lattice-1) |
+| **Forced Alignment** | Word-level and segment-level audio-text synchronization powered by [Lattice-1](https://huggingface.co/LattifAI/Lattice-1) |
 | **Multi-Model Transcription** | Gemini (100+ languages), Parakeet (24 languages), SenseVoice (5 languages) |
 | **Speaker Diarization** | Multi-speaker identification with label preservation |
 | **Streaming Mode** | Process audio up to 20 hours with minimal memory |
@@ -138,10 +130,10 @@ Advanced forced alignment and subtitle generation powered by [ 🤗 Lattice-1](h
 ### Alignment Models
-| Model | Languages | Description |
-|-------|-----------|-------------|
-| **Lattice-1** | English, Chinese, German | Production model with mixed-language alignment support |
-| **Lattice-1-Alpha** | English | Initial release with English forced alignment |
+| Model | Links | Languages | Description |
+|-------|-------|-----------|-------------|
+| **Lattice-1** | [🤗 HF](https://huggingface.co/LattifAI/Lattice-1) • [🤖 MS](https://modelscope.cn/models/LattifAI/Lattice-1) | English, Chinese, German | Production model with mixed-language alignment support |
+| **Lattice-1-Alpha** | [🤗 HF](https://huggingface.co/LattifAI/Lattice-1-Alpha) • [🤖 MS](https://modelscope.cn/models/LattifAI/Lattice-1-Alpha) | English | Initial release with English forced alignment |
 **Model Hub**: Models can be downloaded from `huggingface` (default) or `modelscope` (recommended for users in China):
@@ -151,7 +143,8 @@ lai alignment align audio.wav caption.srt output.srt alignment.model_hub=modelsc
 ```
 ```python
-from lattifai import LattifAI, AlignmentConfig
+from lattifai.client import LattifAI
+from lattifai.config import AlignmentConfig
 client = LattifAI(alignment_config=AlignmentConfig(model_hub="modelscope"))
 ```
@@ -173,16 +166,34 @@ uvx --from lattifai lai --help
 # Or create a project
 mkdir my-project && cd my-project
-uv init --bare && uv add lattifai
+uv init --bare && uv add "lattifai[all]"
 uv run lai alignment align audio.wav caption.srt output.srt
 ```
 ### Using pip
 ```bash
-pip install lattifai
+# Full installation (recommended)
+pip install "lattifai[all]"
 ```
+### Installation Options
+| Extra | Command | Includes |
+|-------|---------|----------|
+| (base) | `pip install lattifai` | Forced alignment (Lattice-1, k2py, ONNX, captions) |
+| `all` | `pip install "lattifai[all]"` | Base + transcription + youtube |
+| `transcription` | `pip install "lattifai[transcription]"` | ASR models (Gemini, Parakeet, SenseVoice) |
+| `youtube` | `pip install "lattifai[youtube]"` | YouTube download (yt-dlp) |
+| `diarization` | `pip install "lattifai[diarization]"` | Speaker diarization (NeMo, pyannote) |
+| `event` | `pip install "lattifai[event]"` | Audio event detection |
+**Note:** Base installation includes full alignment functionality. Use `[all]` for transcription and YouTube features.
+### Caption Format Support
+Caption/subtitle format parsing is provided by [lattifai-captions](https://github.com/lattifai/captions), a separate package supporting 30+ formats (SRT, VTT, ASS, TTML, TextGrid, NLE formats, etc.). It is automatically installed with `lattifai[core]` or `lattifai[all]`.
 ### API Keys
 **LattifAI API Key (Required)** - Get your free key at [lattifai.com/dashboard/api-keys](https://lattifai.com/dashboard/api-keys)
@@ -220,7 +231,7 @@ lai alignment youtube "https://youtube.com/watch?v=VIDEO_ID"
 ### Python SDK
 ```python
-from lattifai import LattifAI
+from lattifai.client import LattifAI
 client = LattifAI()
 caption = client.alignment(
@@ -319,8 +330,8 @@ lai transcribe align audio.wav output.srt \
 ### Configuration Objects
 ```python
-from lattifai import (
-    LattifAI,
+from lattifai.client import LattifAI
+from lattifai.config import (
     ClientConfig,
     AlignmentConfig,
     CaptionConfig,
@@ -365,7 +376,8 @@ caption = client.youtube(
 | `include_speaker_in_text` | `True` | Include speaker labels in text output |
 ```python
-from lattifai import LattifAI, CaptionConfig
+from lattifai.client import LattifAI
+from lattifai.config import CaptionConfig
 client = LattifAI(
     caption_config=CaptionConfig(
@@ -396,6 +408,9 @@ caption = client.alignment(
 ### Word-Level Alignment
 ```python
+from lattifai.client import LattifAI
+from lattifai.config import CaptionConfig
 client = LattifAI(caption_config=CaptionConfig(word_level=True))
 caption = client.alignment(
     input_media="audio.wav",
@@ -420,7 +435,8 @@ Automatically identify and label different speakers in audio.
 - Gemini transcription → Names extracted from context (e.g., "Hi, I'm Alice" → `Alice`)
 ```python
-from lattifai import LattifAI, DiarizationConfig
+from lattifai.client import LattifAI
+from lattifai.config import DiarizationConfig
 client = LattifAI(
     diarization_config=DiarizationConfig(
@@ -453,6 +469,51 @@ Input Caption → Reader → Tokenizer
 ---
+## Text Processing
+The tokenizer handles various text patterns for forced alignment.
+### Bracket/Caption Handling
+Visual captions and annotations in brackets are treated specially - they get **two pronunciation paths** so the aligner can choose:
+1. **Silence path** - skip when content doesn't appear in audio
+2. **Inner text pronunciation** - match if someone actually says the words
+| Bracket Type | Symbol | Example | Alignment Behavior |
+|--------------|--------|---------|-------------------|
+| Half-width square | `[]` | `[APPLAUSE]` | Skip or match "applause" |
+| Half-width paren | `()` | `(music)` | Skip or match "music" |
+| Full-width square | `【】` | `【笑声】` | Skip or match "笑声" |
+| Full-width paren | `（）` | `（音乐）` | Skip or match "音乐" |
+| Angle brackets | `<>` | `<intro>` | Skip or match "intro" |
+| Book title marks | `《》` | `《开场白》` | Skip or match "开场白" |
+This allows proper handling of:
+- **Visual descriptions**: `[Barret adjusts the camera and smiles]` → skipped if not spoken
+- **Sound effects**: `[APPLAUSE]`, `(music)` → matched if audible
+- **Chinese annotations**: `【笑声】`, `（鼓掌）` → flexible alignment
+### Multilingual Text
+| Pattern | Handling | Example |
+|---------|----------|---------|
+| CJK characters | Split individually | `你好` → `["你", "好"]` |
+| Latin words | Grouped with accents | `Kühlschrank` → `["Kühlschrank"]` |
+| Contractions | Kept together | `I'm`, `don't`, `we'll` |
+| Punctuation | Attached to words | `Hello,` `world!` |
+### Speaker Labels
+Recognized speaker patterns are preserved during alignment:
+| Format | Example | Output |
+|--------|---------|--------|
+| Arrow prefix | `>> Alice:` or `&gt;&gt; Alice:` | `[Alice]` |
+| LattifAI format | `[SPEAKER_01]:` | `[SPEAKER_01]` |
+| Uppercase name | `SPEAKER NAME:` | `[SPEAKER NAME]` |
+---
 ## Supported Formats & Languages
 ### Media Formats
@@ -461,7 +522,9 @@ Input Caption → Reader → Tokenizer
 |------|---------|
 | **Audio** | WAV, MP3, M4A, AAC, FLAC, OGG, OPUS, AIFF, and more |
 | **Video** | MP4, MKV, MOV, WEBM, AVI, and more |
-| **Caption** | SRT, VTT, ASS, SSA, JSON, TextGrid, TSV, CSV, LRC, TTML, and more |
+| **Caption** | SRT, VTT, ASS, SSA, SRV3, JSON, TextGrid, TSV, CSV, LRC, TTML, and more |
+> **Note**: Caption format handling is provided by [lattifai-captions](https://github.com/lattifai/captions), which is automatically installed as a dependency. For standalone caption processing without alignment features, install `pip install lattifai-captions`.
 ### JSON Format
@@ -515,8 +578,8 @@ WEBVTT
 **Writing**: Use `word_level=True` with `karaoke_config` to output YouTube VTT style:
 ```python
-from lattifai import Caption
-from lattifai.config.caption import KaraokeConfig
+from lattifai.caption import Caption
+from lattifai.caption.config import KaraokeConfig
 caption = Caption.read("input.vtt")
 caption.write(
@@ -584,7 +647,7 @@ cd lattifai-python
 uv sync && source .venv/bin/activate
 # Or pip
-pip install -e ".[test]"
+pip install -e ".[all,dev]"
 # Run tests
 pytest

lattifai 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

lattifai 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl