PyPI - lattifai - Versions diffs - 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl - Mend

lattifai 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

lattifai/__init__.py +10 -0
lattifai/alignment/lattice1_aligner.py +33 -13
lattifai/alignment/lattice1_worker.py +121 -50
lattifai/alignment/segmenter.py +3 -2
lattifai/alignment/tokenizer.py +3 -3
lattifai/audio2.py +269 -70
lattifai/caption/caption.py +161 -3
lattifai/cli/alignment.py +2 -1
lattifai/cli/app_installer.py +35 -33
lattifai/cli/caption.py +8 -18
lattifai/cli/server.py +3 -1
lattifai/cli/transcribe.py +53 -38
lattifai/cli/youtube.py +1 -0
lattifai/client.py +16 -11
lattifai/config/alignment.py +23 -2
lattifai/config/caption.py +1 -1
lattifai/config/media.py +23 -3
lattifai/errors.py +7 -3
lattifai/mixin.py +26 -15
lattifai/server/app.py +2 -1
lattifai/utils.py +37 -0
lattifai/workflow/file_manager.py +15 -13
lattifai/workflow/youtube.py +16 -1
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/METADATA +65 -15
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/RECORD +29 -29
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/licenses/LICENSE +1 -1
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/WHEEL +0 -0
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/entry_points.txt +0 -0
{lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/top_level.txt +0 -0

lattifai/cli/app_installer.py CHANGED Viewed

@@ -5,6 +5,8 @@ import subprocess
 import sys
 from pathlib import Path
+from lattifai.utils import safe_print
 def check_command_exists(cmd: str) -> bool:
     """Check if a command exists in PATH."""
@@ -19,17 +21,17 @@ def install_nodejs():
     """Install Node.js based on the operating system."""
     system = platform.system().lower()
-    print("📦 Node.js not found. Installing Node.js...\n")
+    safe_print("📦 Node.js not found. Installing Node.js...\n")
     try:
         if system == "darwin":  # macOS
             # Check if Homebrew is installed
             if check_command_exists("brew"):
-                print("🍺 Using Homebrew to install Node.js...")
+                safe_print("🍺 Using Homebrew to install Node.js...")
                 subprocess.run(["brew", "install", "node"], check=True)
-                print("✓ Node.js installed via Homebrew\n")
+                safe_print("✓ Node.js installed via Homebrew\n")
             else:
-                print("❌ Homebrew not found.")
+                safe_print("❌ Homebrew not found.")
                 print("   Please install Homebrew first:")
                 print(
                     '   /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
@@ -40,46 +42,46 @@ def install_nodejs():
         elif system == "linux":
             # Try common package managers
             if check_command_exists("apt"):
-                print("🐧 Using apt to install Node.js...")
+                safe_print("🐧 Using apt to install Node.js...")
                 subprocess.run(["sudo", "apt", "update"], check=True)
                 subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
-                print("✓ Node.js installed via apt\n")
+                safe_print("✓ Node.js installed via apt\n")
             elif check_command_exists("yum"):
-                print("🐧 Using yum to install Node.js...")
+                safe_print("🐧 Using yum to install Node.js...")
                 subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
-                print("✓ Node.js installed via yum\n")
+                safe_print("✓ Node.js installed via yum\n")
             elif check_command_exists("dnf"):
-                print("🐧 Using dnf to install Node.js...")
+                safe_print("🐧 Using dnf to install Node.js...")
                 subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
-                print("✓ Node.js installed via dnf\n")
+                safe_print("✓ Node.js installed via dnf\n")
             elif check_command_exists("pacman"):
-                print("🐧 Using pacman to install Node.js...")
+                safe_print("🐧 Using pacman to install Node.js...")
                 subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
-                print("✓ Node.js installed via pacman\n")
+                safe_print("✓ Node.js installed via pacman\n")
             else:
-                print("❌ No supported package manager found (apt/yum/dnf/pacman).")
+                safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
                 print("   Please install Node.js manually from: https://nodejs.org/")
                 sys.exit(1)
         elif system == "windows":
-            print("❌ Automatic installation on Windows is not supported.")
+            safe_print("❌ Automatic installation on Windows is not supported.")
             print("   Please download and install Node.js from: https://nodejs.org/")
             print("   Then run this command again.")
             sys.exit(1)
         else:
-            print(f"❌ Unsupported operating system: {system}")
+            safe_print(f"❌ Unsupported operating system: {system}")
             print("   Please install Node.js manually from: https://nodejs.org/")
             sys.exit(1)
         # Verify installation
         if not check_command_exists("npm"):
-            print("❌ Node.js installation verification failed.")
+            safe_print("❌ Node.js installation verification failed.")
             print("   Please restart your terminal and try again.")
             sys.exit(1)
     except subprocess.CalledProcessError as e:
-        print(f"\n❌ Error during Node.js installation: {e}")
+        safe_print(f"\n❌ Error during Node.js installation: {e}")
         print("   Please install Node.js manually from: https://nodejs.org/")
         sys.exit(1)
@@ -90,49 +92,49 @@ def main():
     app_dir = Path(__file__).parent.parent.parent.parent / "app"
     if not app_dir.exists():
-        print(f"❌ Error: app directory not found at {app_dir}")
+        safe_print(f"❌ Error: app directory not found at {app_dir}")
         print("   Make sure you're in the lattifai-python repository.")
         sys.exit(1)
-    print("🚀 Installing lai-app (LattifAI Web Application)...\n")
+    safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
     # Check if npm is installed, if not, install Node.js
     if not check_command_exists("npm"):
         install_nodejs()
     else:
         npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
-        print(f"✓ npm is already installed (v{npm_version})\n")
+        safe_print(f"✓ npm is already installed (v{npm_version})\n")
     # Change to app directory and run installation
     try:
-        print(f"📁 Working directory: {app_dir}\n")
+        safe_print(f"📁 Working directory: {app_dir}\n")
         # Install dependencies
-        print("📦 Installing dependencies...")
+        safe_print("📦 Installing dependencies...")
         subprocess.run(["npm", "install"], cwd=app_dir, check=True)
-        print("✓ Dependencies installed\n")
+        safe_print("✓ Dependencies installed\n")
         # Build the application
-        print("🔨 Building application...")
+        safe_print("🔨 Building application...")
         subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
-        print("✓ Application built\n")
+        safe_print("✓ Application built\n")
         # Link globally
-        print("🔗 Linking lai-app command globally...")
+        safe_print("🔗 Linking lai-app command globally...")
         subprocess.run(["npm", "link"], cwd=app_dir, check=True)
-        print("✓ lai-app command linked globally\n")
+        safe_print("✓ lai-app command linked globally\n")
-        print("=" * 60)
-        print("✅ lai-app installed successfully!")
-        print("=" * 60)
-        print("\n🎉 You can now run:")
+        safe_print("=" * 60)
+        safe_print("✅ lai-app installed successfully!")
+        safe_print("=" * 60)
+        safe_print("\n🎉 You can now run:")
         print("   lai-app              # Start the web application")
         print("   lai-app --help       # Show help")
         print("   lai-app --port 8080  # Use custom port")
-        print("\n📖 For more information, see app/CLI_USAGE.md\n")
+        safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
     except subprocess.CalledProcessError as e:
-        print(f"\n❌ Error during installation: {e}")
+        safe_print(f"\n❌ Error during installation: {e}")
         sys.exit(1)

lattifai/cli/caption.py CHANGED Viewed

@@ -7,6 +7,7 @@ from lhotse.utils import Pathlike
 from typing_extensions import Annotated
 from lattifai.config import CaptionConfig
+from lattifai.utils import safe_print
 @run.cli.entrypoint(name="convert", namespace="caption")
@@ -55,7 +56,7 @@ def convert(
     caption = Caption.read(input_path, normalize_text=normalize_text)
     caption.write(output_path, include_speaker_in_text=include_speaker_in_text)
-    print(f"✅ Converted {input_path} -> {output_path}")
+    safe_print(f"✅ Converted {input_path} -> {output_path}")
     return output_path
@@ -63,7 +64,6 @@ def convert(
 def normalize(
     input_path: Pathlike,
     output_path: Pathlike,
-    caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
 ):
     """
     Normalize caption text by cleaning HTML entities and whitespace.
@@ -81,9 +81,6 @@ def normalize(
     Args:
         input_path: Path to input caption file to normalize
         output_path: Path to output caption file (defaults to overwriting input file)
-        caption: Caption configuration for text normalization.
-            Fields: input_format, output_format, normalize_text (automatically enabled),
-                    encoding
     Examples:
         # Normalize and save to new file (positional arguments)
@@ -92,13 +89,9 @@ def normalize(
         # Normalize with format conversion
         lai caption normalize input.vtt output.srt
-        # Normalize with custom caption config
-        lai caption normalize input.srt output.srt \\
-            caption.encoding=utf-8
         # Using keyword arguments (traditional syntax)
-        lai caption normalize \\
-            input_path=input.srt \\
+        lai caption normalize \
+            input_path=input.srt \
             output_path=output.srt
     """
     from pathlib import Path
@@ -112,9 +105,9 @@ def normalize(
     caption_obj.write(output_path, include_speaker_in_text=True)
     if output_path == input_path:
-        print(f"✅ Normalized {input_path} (in-place)")
+        safe_print(f"✅ Normalized {input_path} (in-place)")
     else:
-        print(f"✅ Normalized {input_path} -> {output_path}")
+        safe_print(f"✅ Normalized {input_path} -> {output_path}")
     return output_path
@@ -124,7 +117,6 @@ def shift(
     input_path: Pathlike,
     output_path: Pathlike,
     seconds: float,
-    caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
 ):
     """
     Shift caption timestamps by a specified number of seconds.
@@ -140,8 +132,6 @@ def shift(
         output_path: Path to output caption file (can be same as input for in-place modification)
         seconds: Number of seconds to shift timestamps. Positive values delay captions,
                  negative values advance them earlier.
-        caption: Caption configuration for reading/writing.
-            Fields: input_format, output_format, encoding
     Examples:
         # Delay captions by 2 seconds (positional arguments)
@@ -181,9 +171,9 @@ def shift(
         direction = f"advanced by {abs(seconds)}s"
     if output_path == input_path:
-        print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
+        safe_print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
     else:
-        print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
+        safe_print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
     return output_path

lattifai/cli/server.py CHANGED Viewed

@@ -4,6 +4,8 @@ import os
 import colorful
 import uvicorn
+from lattifai.utils import safe_print
 def main():
     """Launch the LattifAI Web Interface."""
@@ -29,7 +31,7 @@ def main():
     args = parser.parse_args()
-    print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
+    safe_print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
     print(colorful.cyan(f"Server running at http://localhost:{args.port}"))
     print(colorful.yellow(f"Host: {args.host}"))
     print(colorful.yellow(f"Auto-reload: {'disabled' if args.no_reload else 'enabled'}"))

lattifai/cli/transcribe.py CHANGED Viewed

@@ -3,10 +3,8 @@
 from typing import Optional
 import nemo_run as run
-from lhotse.utils import Pathlike
 from typing_extensions import Annotated
-from lattifai.audio2 import AudioLoader, ChannelSelectorType
 from lattifai.cli.alignment import align as alignment_align
 from lattifai.config import (
     AlignmentConfig,
@@ -23,9 +21,8 @@ from lattifai.utils import _resolve_model_path
 def transcribe(
     input: Optional[str] = None,
     output_caption: Optional[str] = None,
-    output_dir: Optional[Pathlike] = None,
-    media_format: str = "mp3",
-    channel_selector: Optional[ChannelSelectorType] = "average",
+    media: Annotated[Optional[MediaConfig], run.Config[MediaConfig]] = None,
+    client: Annotated[Optional[ClientConfig], run.Config[ClientConfig]] = None,
     transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
 ):
     """
@@ -39,11 +36,8 @@ def transcribe(
     Args:
         input: Path to input audio/video file or YouTube URL (can be provided as positional argument)
         output_caption: Path for output caption file (can be provided as positional argument)
-        output_dir: Directory for output files when using YouTube URL
-        media_format: Media format for YouTube downloads (default: mp3)
-        channel_selector: Audio channel selection strategy (default: average)
-            Options: average, left, right, or an integer channel index.
-            Note: Ignored when input is a URL and Gemini transcriber is used.
+        media: Media configuration for input/output handling.
+            Fields: input_path, output_dir, media_format, channel_selector, streaming_chunk_secs
         transcription: Transcription service configuration.
             Fields: model_name, device, language, gemini_api_key
@@ -67,6 +61,11 @@ def transcribe(
         lai transcribe run audio.wav output.srt \\
             transcription.language=zh
+        # With MediaConfig settings
+        lai transcribe run audio.wav output.srt \\
+            media.channel_selector=left \\
+            media.streaming_chunk_secs=30.0
         # Full configuration with keyword arguments
         lai transcribe run \\
             input=audio.wav \\
@@ -78,68 +77,84 @@ def transcribe(
     from pathlib import Path
     import colorful
+    from lattifai_core.client import SyncAPIClient
+    from lattifai.audio2 import AudioLoader
     from lattifai.transcription import create_transcriber
+    from lattifai.utils import safe_print
-    # Initialize transcription config with defaults
+    # Initialize configs with defaults
+    client_config = client or ClientConfig()
     transcription_config = transcription or TranscriptionConfig()
+    media_config = media or MediaConfig()
+    # Initialize client wrapper to properly set client_wrapper
+    client_wrapper = SyncAPIClient(config=client_config)
+    transcription_config.client_wrapper = client_wrapper
+    # Initialize client wrapper to properly set client_wrapper
+    client_wrapper = SyncAPIClient(config=client_config)
+    transcription_config.client_wrapper = client_wrapper
     # Validate input is required
-    if not input:
-        raise ValueError("Input is required. Provide input as positional argument (file path or URL).")
+    if not input and not media_config.input_path:
+        raise ValueError("Input is required. Provide input as positional argument or media.input_path.")
+    # Assign input to media_config if provided
+    if input:
+        media_config.set_input_path(input)
     # Detect if input is a URL
-    is_url = input.startswith(("http://", "https://"))
+    is_url = media_config.is_input_remote()
     # Prepare output paths
     if is_url:
-        # For URLs, use output_dir
-        if output_dir:
-            output_path = Path(str(output_dir)).expanduser()
-            output_path.mkdir(parents=True, exist_ok=True)
-        else:
-            output_path = Path.cwd()
+        # For URLs, use output_dir from media_config or current directory
+        output_path = media_config.output_dir
     else:
         # For files, use input path directory
-        input_path = Path(str(input))
-        output_path = input_path.parent
+        output_path = Path(media_config.input_path).parent
     # Create transcriber
     if not transcription_config.lattice_model_path:
-        transcription_config.lattice_model_path = _resolve_model_path("Lattifai/Lattice-1")
+        transcription_config.lattice_model_path = _resolve_model_path("LattifAI/Lattice-1")
     transcriber = create_transcriber(transcription_config=transcription_config)
-    print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
-    print(colorful.cyan(f"    Input: {input}"))
+    safe_print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
+    safe_print(colorful.cyan(f"    Input: {media_config.input_path}"))
     # Perform transcription
     if is_url and transcriber.supports_url:
         # Check if transcriber supports URL directly
-        print(colorful.cyan("    Transcribing from URL directly..."))
-        transcript = asyncio.run(transcriber.transcribe(input))
+        safe_print(colorful.cyan("    Transcribing from URL directly..."))
+        transcript = asyncio.run(transcriber.transcribe(media_config.input_path))
     else:
         if is_url:
             # Download media first, then transcribe
-            print(colorful.cyan("    Downloading media from URL..."))
+            safe_print(colorful.cyan("    Downloading media from URL..."))
             from lattifai.workflow.youtube import YouTubeDownloader
             downloader = YouTubeDownloader()
             input_path = asyncio.run(
                 downloader.download_media(
-                    url=input,
+                    url=media_config.input_path,
                     output_dir=str(output_path),
-                    media_format=media_format,
-                    force_overwrite=False,
+                    media_format=media_config.normalize_format(),
+                    force_overwrite=media_config.force_overwrite,
                 )
             )
-            print(colorful.cyan(f"    Media downloaded to: {input_path}"))
+            safe_print(colorful.cyan(f"    Media downloaded to: {input_path}"))
         else:
-            input_path = Path(str(input))
+            input_path = Path(media_config.input_path)
-        print(colorful.cyan("    Loading audio..."))
+        safe_print(colorful.cyan("    Loading audio..."))
         # For files, load audio first
         audio_loader = AudioLoader(device=transcription_config.device)
-        media_audio = audio_loader(input_path, channel_selector=channel_selector)
+        media_audio = audio_loader(
+            input_path,
+            channel_selector=media_config.channel_selector,
+            streaming_chunk_secs=media_config.streaming_chunk_secs,
+        )
         transcript = asyncio.run(transcriber.transcribe(media_audio))
     # Determine output caption path
@@ -153,14 +168,14 @@ def transcribe(
             final_output = output_path / f"youtube_LattifAI_{transcriber.name}.{output_format}"
         else:
             # For files, use input filename with suffix
-            final_output = Path(str(input)).with_suffix(".LattifAI.srt")
+            final_output = Path(media_config.input_path).with_suffix(".LattifAI.srt")
-    print(colorful.cyan(f"   Output: {final_output}"))
+    safe_print(colorful.cyan(f"   Output: {final_output}"))
     # Write output
     transcriber.write(transcript, final_output, encoding="utf-8", cache_audio_events=False)
-    print(colorful.green(f"🎉 Transcription completed: {final_output}"))
+    safe_print(colorful.green(f"🎉 Transcription completed: {final_output}"))
     return transcript

lattifai/cli/youtube.py CHANGED Viewed

@@ -117,6 +117,7 @@ def youtube(
         force_overwrite=media_config.force_overwrite,
         split_sentence=caption_config.split_sentence,
         channel_selector=media_config.channel_selector,
+        streaming_chunk_secs=media_config.streaming_chunk_secs,
     )

lattifai/client.py CHANGED Viewed

@@ -18,6 +18,7 @@ from lattifai.errors import (
     LatticeEncodingError,
 )
 from lattifai.mixin import LattifAIClientMixin
+from lattifai.utils import safe_print
 if TYPE_CHECKING:
     from lattifai.diarization import LattifAIDiarizer  # noqa: F401
@@ -91,6 +92,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
         input_caption_format: Optional[InputCaptionFormat] = None,
         split_sentence: Optional[bool] = None,
         channel_selector: Optional[str | int] = "average",
+        streaming_chunk_secs: Optional[float] = None,
     ) -> Caption:
         try:
             # Step 1: Get caption
@@ -100,6 +102,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
                 media_audio = self.audio_loader(
                     input_media,
                     channel_selector=channel_selector,
+                    streaming_chunk_secs=streaming_chunk_secs,
                 )
             if not input_caption:
@@ -113,7 +116,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
             alignment_strategy = self.aligner.config.strategy
             if alignment_strategy != "entire" or caption.transcription:
-                print(colorful.cyan(f"🔄 Using segmented alignment strategy: {alignment_strategy}"))
+                safe_print(colorful.cyan(f"🔄 Using segmented alignment strategy: {alignment_strategy}"))
                 if caption.supervisions and alignment_strategy == "transcription":
                     # raise NotImplementedError("Transcription-based alignment is not yet implemented.")
@@ -126,7 +129,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
                     if not caption.transcription:
                         import asyncio
-                        print(colorful.cyan("📝 Transcribing media for alignment..."))
+                        safe_print(colorful.cyan("📝 Transcribing media for alignment..."))
                         if output_caption_path:
                             transcript_file = (
                                 Path(str(output_caption_path)).parent
@@ -223,11 +226,11 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
                         continue
                     offset = round(start, 4)
-                    emission = self.aligner.emission(
-                        media_audio.tensor[
-                            :, int(start * media_audio.sampling_rate) : int(end * media_audio.sampling_rate)
-                        ]
-                    )
+                    # Extract audio slice
+                    audio_slice_ndarray = media_audio.ndarray[
+                        :, int(start * media_audio.sampling_rate) : int(end * media_audio.sampling_rate)
+                    ]
+                    emission = self.aligner.emission(audio_slice_ndarray)
                     # Align segment
                     _supervisions, _alignments = self.aligner.alignment(
@@ -259,7 +262,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
             # Step 5: Speaker diarization
             if self.diarization_config.enabled and self.diarizer:
-                print(colorful.cyan("🗣️  Performing speaker diarization..."))
+                safe_print(colorful.cyan("🗣️  Performing speaker diarization..."))
                 caption = self.speaker_diarization(
                     input_media=media_audio,
                     caption=caption,
@@ -308,7 +311,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
         if output_caption_path:
             diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
             if diarization_file.exists():
-                print(colorful.cyan(f"Reading existing speaker diarization from {diarization_file}"))
+                safe_print(colorful.cyan(f"Reading existing speaker diarization from {diarization_file}"))
                 caption.read_speaker_diarization(diarization_file)
         diarization, alignments = self.diarizer.diarize_with_alignments(
@@ -433,12 +436,13 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
         split_sentence: Optional[bool] = None,
         use_transcription: bool = False,
         channel_selector: Optional[str | int] = "average",
+        streaming_chunk_secs: Optional[float] = None,
     ) -> Caption:
         # Prepare output directory and media format
         output_dir = self._prepare_youtube_output_dir(output_dir)
         media_format = self._determine_media_format(media_format)
-        print(colorful.cyan(f"🎬 Starting YouTube workflow for: {url}"))
+        safe_print(colorful.cyan(f"🎬 Starting YouTube workflow for: {url}"))
         # Step 1: Download media
         media_file = self._download_media_sync(url, output_dir, media_format, force_overwrite)
@@ -460,7 +464,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
         output_caption_path = self._generate_output_caption_path(output_caption_path, media_file, output_dir)
         # Step 4: Perform alignment
-        print(colorful.cyan("🔗 Performing forced alignment..."))
+        safe_print(colorful.cyan("🔗 Performing forced alignment..."))
         caption: Caption = self.alignment(
             input_media=media_audio,
@@ -468,6 +472,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
             output_caption_path=output_caption_path,
             split_sentence=split_sentence,
             channel_selector=channel_selector,
+            streaming_chunk_secs=streaming_chunk_secs,
         )
         return caption

lattifai/config/alignment.py CHANGED Viewed

@@ -18,8 +18,8 @@ class AlignmentConfig:
     """
     # Alignment configuration
-    model_name: str = "Lattifai/Lattice-1"
-    """Model identifier or path to local model directory (e.g., 'Lattifai/Lattice-1')."""
+    model_name: str = "LattifAI/Lattice-1"
+    """Model identifier or path to local model directory (e.g., 'LattifAI/Lattice-1')."""
     device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
     """Computation device: 'cpu' for CPU, 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon."""
@@ -58,6 +58,27 @@ class AlignmentConfig:
     Default: 4.0 seconds. Useful for detecting scene changes or natural breaks in content.
     """
+    # Beam search parameters for forced alignment
+    search_beam: int = 200
+    """Search beam size for beam search decoding. Larger values explore more hypotheses but are slower.
+    Default: 200. Typical range: 20-500.
+    """
+    output_beam: int = 80
+    """Output beam size for keeping top hypotheses. Should be smaller than search_beam.
+    Default: 80. Typical range: 10-200.
+    """
+    min_active_states: int = 400
+    """Minimum number of active states during decoding. Controls memory and search space.
+    Default: 400. Typical range: 30-1000.
+    """
+    max_active_states: int = 10000
+    """Maximum number of active states during decoding. Prevents excessive memory usage.
+    Default: 10000. Typical range: 1000-20000.
+    """
     client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
     """Reference to the SyncAPIClient instance. Auto-set during client initialization."""

lattifai/config/caption.py CHANGED Viewed

@@ -48,7 +48,7 @@ class CaptionConfig:
     include_speaker_in_text: bool = True
     """Preserve speaker labels in caption text content."""
-    normalize_text: bool = False
+    normalize_text: bool = True
     """Clean HTML entities and normalize whitespace in caption text."""
     split_sentence: bool = False

lattifai/config/media.py CHANGED Viewed

@@ -52,12 +52,23 @@ class MediaConfig:
     sample_rate: Optional[int] = None
     """Audio sample rate in Hz (e.g., 16000, 44100)."""
-    channels: Optional[int] = None
-    """Number of audio channels (1=mono, 2=stereo)."""
     channel_selector: Optional[str | int] = "average"
     """Audio channel selection strategy: 'average', 'left', 'right', or channel index."""
+    # Audio Streaming Configuration
+    streaming_chunk_secs: Optional[float] = 600.0
+    """Duration in seconds of each audio chunk for streaming mode.
+    When set to a value (e.g., 600.0), enables streaming mode for processing very long audio files (>1 hour).
+    Audio is processed in chunks to keep memory usage low (<4GB peak), suitable for 20+ hour files.
+    When None, disables streaming and loads entire audio into memory.
+    Valid range: 1-1800 seconds (minimum 1 second, maximum 30 minutes).
+    Default: 600 seconds (10 minutes).
+    Recommended: Use 60 seconds or larger for optimal performance.
+    - Smaller chunks: Lower memory usage, more frequent I/O
+    - Larger chunks: Better alignment context, higher memory usage
+    Note: Streaming may add slight processing overhead but enables handling arbitrarily long files.
+    """
     # Output / download configuration
     output_dir: Path = field(default_factory=lambda: Path.cwd())
     """Directory for output files (default: current working directory)."""
@@ -87,12 +98,21 @@ class MediaConfig:
         self._normalize_media_format()
         self._process_input_path()
         self._process_output_path()
+        self._validate_streaming_config()
     def _setup_output_directory(self) -> None:
         """Ensure output directory exists and is valid."""
         resolved_output_dir = self._ensure_dir(self.output_dir)
         self.output_dir = resolved_output_dir
+    def _validate_streaming_config(self) -> None:
+        """Validate streaming configuration parameters."""
+        if self.streaming_chunk_secs is not None:
+            if not 1.0 <= self.streaming_chunk_secs <= 1800.0:
+                raise ValueError(
+                    f"streaming_chunk_secs must be between 1 and 1800 seconds (1 second to 30 minutes), got {self.streaming_chunk_secs}. Recommended: 60 seconds or larger."
+                )
     def _validate_default_formats(self) -> None:
         """Validate default audio and video formats."""
         self.default_audio_format = self._normalize_format(self.default_audio_format)

lattifai 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

lattifai 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl