PyPI - lattifai - Versions diffs - 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

lattifai 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

lattifai/__init__.py +42 -27
lattifai/alignment/__init__.py +6 -0
lattifai/alignment/lattice1_aligner.py +119 -0
lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
lattifai/alignment/segmenter.py +166 -0
lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
lattifai/audio2.py +211 -0
lattifai/caption/__init__.py +20 -0
lattifai/caption/caption.py +1275 -0
lattifai/{io → caption}/supervision.py +1 -0
lattifai/{io → caption}/text_parser.py +53 -10
lattifai/cli/__init__.py +17 -0
lattifai/cli/alignment.py +153 -0
lattifai/cli/caption.py +204 -0
lattifai/cli/server.py +19 -0
lattifai/cli/transcribe.py +197 -0
lattifai/cli/youtube.py +128 -0
lattifai/client.py +455 -246
lattifai/config/__init__.py +20 -0
lattifai/config/alignment.py +73 -0
lattifai/config/caption.py +178 -0
lattifai/config/client.py +46 -0
lattifai/config/diarization.py +67 -0
lattifai/config/media.py +335 -0
lattifai/config/transcription.py +84 -0
lattifai/diarization/__init__.py +5 -0
lattifai/diarization/lattifai.py +89 -0
lattifai/errors.py +41 -34
lattifai/logging.py +116 -0
lattifai/mixin.py +552 -0
lattifai/server/app.py +420 -0
lattifai/transcription/__init__.py +76 -0
lattifai/transcription/base.py +108 -0
lattifai/transcription/gemini.py +219 -0
lattifai/transcription/lattifai.py +103 -0
lattifai/types.py +30 -0
lattifai/utils.py +3 -31
lattifai/workflow/__init__.py +22 -0
lattifai/workflow/agents.py +6 -0
lattifai/{workflows → workflow}/file_manager.py +81 -57
lattifai/workflow/youtube.py +564 -0
lattifai-1.0.0.dist-info/METADATA +736 -0
lattifai-1.0.0.dist-info/RECORD +52 -0
{lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
lattifai-1.0.0.dist-info/entry_points.txt +13 -0
lattifai/base_client.py +0 -126
lattifai/bin/__init__.py +0 -3
lattifai/bin/agent.py +0 -324
lattifai/bin/align.py +0 -295
lattifai/bin/cli_base.py +0 -25
lattifai/bin/subtitle.py +0 -210
lattifai/io/__init__.py +0 -43
lattifai/io/reader.py +0 -86
lattifai/io/utils.py +0 -15
lattifai/io/writer.py +0 -102
lattifai/tokenizer/__init__.py +0 -3
lattifai/workers/__init__.py +0 -3
lattifai/workflows/__init__.py +0 -34
lattifai/workflows/agents.py +0 -12
lattifai/workflows/gemini.py +0 -167
lattifai/workflows/prompts/README.md +0 -22
lattifai/workflows/prompts/gemini/README.md +0 -24
lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
lattifai/workflows/youtube.py +0 -931
lattifai-0.4.6.dist-info/METADATA +0 -806
lattifai-0.4.6.dist-info/RECORD +0 -39
lattifai-0.4.6.dist-info/entry_points.txt +0 -3
/lattifai/{io → caption}/gemini_reader.py +0 -0
/lattifai/{io → caption}/gemini_writer.py +0 -0
/lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
/lattifai/{workflows → workflow}/base.py +0 -0
{lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
{lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0

lattifai/{workflows → workflow}/file_manager.py RENAMED Viewed

@@ -17,6 +17,9 @@ except ImportError:  # pragma: no cover - optional dependency
     questionary = None
+TRANSCRIBE_CHOICE = "transcribe"
 class FileExistenceManager:
     """Utility class for handling file existence checks and user confirmations"""
@@ -24,37 +27,37 @@ class FileExistenceManager:
         "media": ("🎬", "Media"),
         # 'audio': ('📱', 'Audio'),
         # 'video': ('🎬', 'Video'),
-        "subtitle": ("📝", "Subtitle"),
+        "caption": ("📝", "Caption"),
     }
     @staticmethod
     def check_existing_files(
         video_id: str,
-        output_dir: str,
+        output_path: str,
         media_formats: List[str] = None,
-        subtitle_formats: List[str] = None,
+        caption_formats: List[str] = None,
     ) -> Dict[str, List[str]]:
         """
         Enhanced version to check for existing media files with customizable formats
         Args:
             video_id: Video ID from any platform
-            output_dir: Output directory to check
+            output_path: Output directory to check
             media_formats: List of media formats to check (audio and video combined)
-            subtitle_formats: List of subtitle formats to check
+            caption_formats: List of caption formats to check
         Returns:
-            Dictionary with 'media', 'subtitle' keys containing lists of existing files
+            Dictionary with 'media', 'caption' keys containing lists of existing files
         """
-        output_path = Path(output_dir).expanduser()
-        existing_files = {"media": [], "subtitle": []}
+        output_path = Path(output_path).expanduser()
+        existing_files = {"media": [], "caption": []}
         if not output_path.exists():
             return existing_files
         # Default formats - combine audio and video formats
         media_formats = media_formats or ["mp3", "wav", "m4a", "aac", "opus", "mp4", "webm", "mkv", "avi"]
-        subtitle_formats = subtitle_formats or ["md", "srt", "vtt", "ass", "ssa", "sub", "sbv", "txt"]
+        caption_formats = caption_formats or ["md", "srt", "vtt", "ass", "ssa", "sub", "sbv", "txt"]
         # Check for media files (audio and video)
         for ext in set(media_formats):  # Remove duplicates
@@ -69,44 +72,41 @@ class FileExistenceManager:
                 if file_path not in existing_files["media"]:
                     existing_files["media"].append(file_path)
-        # Check for subtitle files
-        for ext in set(subtitle_formats):  # Remove duplicates
-            # Check multiple naming patterns for subtitle files
+        # Check for caption files
+        for ext in set(caption_formats):  # Remove duplicates
+            # Check multiple naming patterns for caption files
             # Pattern 1: Simple pattern like {video_id}.vtt
-            subtitle_file = output_path / f"{video_id}.{ext}"
-            if subtitle_file.exists():
-                existing_files["subtitle"].append(str(subtitle_file))
+            caption_file = output_path / f"{video_id}.{ext}"
+            if caption_file.exists():
+                existing_files["caption"].append(str(caption_file))
             # Pattern 2: With language/track suffix like {video_id}.en-trackid.vtt
-            for sub_file in output_path.glob(f"{video_id}.*.{ext}"):
+            for sub_file in output_path.glob(f"{video_id}*.{ext}"):
                 file_path = str(sub_file)
-                if file_path not in existing_files["subtitle"]:
-                    existing_files["subtitle"].append(file_path)
-        if "md" in subtitle_formats:
-            # Gemini-specific pattern: {video_id}_Gemini.md
-            gemini_subtitle_file = output_path / f"{video_id}_Gemini.md"
-            if gemini_subtitle_file.exists():
-                existing_files["subtitle"].append(str(gemini_subtitle_file))
+                if file_path not in existing_files["caption"]:
+                    existing_files["caption"].append(file_path)
         return existing_files
     @staticmethod
-    def prompt_user_confirmation(existing_files: Dict[str, List[str]], operation: str = "download") -> str:
+    def prompt_user_confirmation(
+        existing_files: Dict[str, List[str]], operation: str = "download", transcriber_name: str = None
+    ) -> str:
         """
         Prompt user for confirmation when files already exist (legacy, confirms all files together)
         Args:
             existing_files: Dictionary of existing files
             operation: Type of operation (e.g., "download", "generate")
+            transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro")
         Returns:
             User choice: 'use' (use existing), 'overwrite' (regenerate), or 'cancel'
         """
         has_media = bool(existing_files.get("media", []))
-        has_subtitle = bool(existing_files.get("subtitle", []))
+        has_caption = bool(existing_files.get("caption", []))
-        if not has_media and not has_subtitle:
+        if not has_media and not has_caption:
             return "proceed"  # No existing files, proceed normally
         # Header with warning color
@@ -116,24 +116,39 @@ class FileExistenceManager:
         file_paths = []
         if has_media:
             file_paths.extend(existing_files["media"])
-        if has_subtitle:
-            file_paths.extend(existing_files["subtitle"])
+        if has_caption:
+            file_paths.extend(existing_files["caption"])
         # Create display options with emojis
-        options = []
-        for file_path in file_paths:
+        options, shift_length = [], 0
+        for file_path in sorted(file_paths):
             # Determine emoji based on file type
             if has_media and file_path in existing_files["media"]:
                 display_text = f'{colorful.green("•")} 🎬 Media file: {file_path}'
+                shift_length = len("Media file:")
             else:
-                display_text = f'{colorful.green("•")} 📝 Subtitle file: {file_path}'
+                display_text = f'{colorful.green("•")} 📝 Caption file: {file_path}'
+                shift_length = len("Caption file:")
             options.append((display_text, file_path))
-        # Add overwrite and cancel options
+        # Add overwrite and cancel options with aligned spacing
+        overwrite_text, overwrite_op = "Overwrite existing files (re-generate or download)", "overwrite"
+        if transcriber_name:
+            options.append(
+                (
+                    f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
+                    overwrite_op,
+                )
+            )
+            overwrite_text, overwrite_op = f"Transcribe with {transcriber_name}", TRANSCRIBE_CHOICE
         options.extend(
             [
-                ("                  Overwrite existing files (re-generate or download)", "overwrite"),
-                ("                  Cancel operation", "cancel"),
+                (
+                    f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
+                    overwrite_op,
+                ),
+                (f'{colorful.green("•")} ❌ {" " * shift_length} Cancel operation', "cancel"),
             ]
         )
@@ -143,6 +158,8 @@ class FileExistenceManager:
         if choice == "overwrite":
             print(f'{colorful.yellow("🔄 Overwriting existing files")}')
+        elif choice == TRANSCRIBE_CHOICE:
+            print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
         elif choice == "cancel":
             print(f'{colorful.red("❌ Operation cancelled")}')
         elif choice in file_paths:
@@ -158,7 +175,7 @@ class FileExistenceManager:
         Prompt user for confirmation for a specific file type
         Args:
-            file_type: Type of file ('audio', 'video', 'subtitle', 'gemini')
+            file_type: Type of file ('audio', 'video', 'caption', 'gemini')
             files: List of existing files of this type
             operation: Type of operation (e.g., "download", "generate")
@@ -174,21 +191,21 @@ class FileExistenceManager:
         # Header with warning color
         print(f'\n{colorful.bold_yellow(f"⚠️  Existing {label} files found:")}')
-        for file_path in files:
+        for file_path in sorted(files):
             print(f'   {colorful.green("•")} {file_path}')
-        prompt_message = f"What would you like to do with {label.lower()} files?"
+        prompt_message = f"What would you like to do with {label} files?"
         options = [
-            (f"Use existing {label.lower()} files (skip {operation})", "use"),
-            (f"Overwrite {label.lower()} files (re-{operation})", "overwrite"),
+            (f"Use existing {label} files (skip {operation})", "use"),
+            (f"Overwrite {label} files (re-{operation})", "overwrite"),
             ("Cancel operation", "cancel"),
         ]
         choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
         if choice == "use":
-            print(f'{colorful.green(f"✅ Using existing {label.lower()} files")}')
+            print(f'{colorful.green(f"✅ Using existing {label} files")}')
         elif choice == "overwrite":
-            print(f'{colorful.yellow(f"🔄 Overwriting {label.lower()} files")}')
+            print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
         elif choice == "cancel":
             print(f'{colorful.red("❌ Operation cancelled")}')
@@ -196,19 +213,23 @@ class FileExistenceManager:
     @staticmethod
     def prompt_file_selection(
-        file_type: str, files: List[str], operation: str = "use", enable_gemini: bool = False
+        file_type: str,
+        files: List[str],
+        operation: str = "use",
+        transcriber_name: str = None,
     ) -> str:
         """
         Prompt user to select a specific file from a list, or choose to overwrite/cancel
         Args:
-            file_type: Type of file (e.g., 'gemini transcript', 'subtitle')
+            file_type: Type of file (e.g., 'gemini transcript', 'caption')
             files: List of existing files to choose from
             operation: Type of operation (e.g., "transcribe", "download")
-            enable_gemini: If True, adds "Transcribe with Gemini" option
+            transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro").
+                If provided, adds transcribe option for the transcriber.
         Returns:
-            Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with Gemini, or 'cancel' to abort
+            Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with transcriber, or 'cancel' to abort
         """
         if not files:
             return "proceed"
@@ -228,16 +249,17 @@ class FileExistenceManager:
         # Create options with full file paths
         options = []
-        for i, file_path in enumerate(files, 1):
+        for i, file_path in enumerate(sorted(files), 1):
             # Display full path for clarity
             options.append((f"{colorful.cyan(file_path)}", file_path))
-        # Add Gemini transcription option if enabled
-        if enable_gemini:
-            options.append((colorful.magenta("✨ Transcribe with Gemini 2.5 Pro"), "gemini"))
-        # Add overwrite and cancel options
-        options.append((colorful.yellow(f"Overwrite (re-{operation} or download)"), "overwrite"))
+        # Add transcription or overwrite option
+        if transcriber_name:
+            transcribe_text = f"✨ Transcribe with {transcriber_name}"
+            options.append((colorful.magenta(transcribe_text), TRANSCRIBE_CHOICE))
+        else:
+            overwrite_text = f"Overwrite (re-{operation} or download)"
+            options.append((colorful.yellow(overwrite_text), "overwrite"))
         options.append((colorful.red("Cancel operation"), "cancel"))
         prompt_message = colorful.bold_black_on_cyan(f"Select which {file_type} to use:")
@@ -246,9 +268,11 @@ class FileExistenceManager:
         if choice == "cancel":
             print(f'{colorful.red("❌ Operation cancelled")}')
         elif choice == "overwrite":
-            print(f'{colorful.yellow(f"🔄 Overwriting all {file_type} files")}')
-        elif choice == "gemini":
-            print(f'{colorful.magenta("✨ Will transcribe with Gemini 2.5 Pro")}')
+            overwrite_msg = f"🔄 Overwriting all {file_type} files"
+            print(f"{colorful.yellow(overwrite_msg)}")
+        elif choice == TRANSCRIBE_CHOICE:
+            transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
+            print(f"{colorful.magenta(transcribe_msg)}")
         else:
             print(f'{colorful.green(f"✅ Using: {choice}")}')
@@ -269,7 +293,7 @@ class FileExistenceManager:
             Dictionary mapping file type to user choice ('use', 'overwrite', 'proceed', or 'cancel')
         """
         ordered_types = []
-        for preferred in ["media", "audio", "video", "subtitle"]:
+        for preferred in ["media", "audio", "video", "caption"]:
             if preferred not in ordered_types:
                 ordered_types.append(preferred)
         for file_type in existing_files.keys():

lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

lattifai 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl