PyPI - karaoke-gen - Versions diffs - 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl - Mend

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

karaoke_gen/audio_fetcher.py +766 -33
karaoke_gen/audio_processor.py +4 -0
karaoke_gen/instrumental_review/static/index.html +37 -14
karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
karaoke_gen/karaoke_gen.py +18 -14
karaoke_gen/lyrics_processor.py +97 -6
karaoke_gen/utils/cli_args.py +6 -5
karaoke_gen/utils/gen_cli.py +30 -5
karaoke_gen/utils/remote_cli.py +269 -15
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +106 -4
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +24 -24
lyrics_transcriber/core/controller.py +76 -2
lyrics_transcriber/frontend/package.json +1 -1
lyrics_transcriber/frontend/src/App.tsx +6 -4
lyrics_transcriber/frontend/src/api.ts +25 -10
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
lyrics_transcriber/frontend/web_assets/index.html +1 -1
lyrics_transcriber/output/countdown_processor.py +39 -0
lyrics_transcriber/transcribers/audioshake.py +96 -7
lyrics_transcriber/types.py +14 -12
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0

lyrics_transcriber/frontend/web_assets/index.html CHANGED Viewed

@@ -10,7 +10,7 @@
     <link rel="icon" type="image/png" sizes="512x512" href="/android-chrome-512x512.png" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Nomad Karaoke: Lyrics Review</title>
-    <script type="module" crossorigin src="/assets/index-COYImAcx.js"></script>
+    <script type="module" crossorigin src="/assets/index-BECn1o8Q.js"></script>
   </head>
   <body>
     <div id="root"></div>

lyrics_transcriber/output/countdown_processor.py CHANGED Viewed

@@ -265,3 +265,42 @@ class CountdownProcessor:
         return countdown_segment
+    def has_countdown(self, correction_result: CorrectionResult) -> bool:
+        """
+        Check if a CorrectionResult already has a countdown segment.
+        This is used to detect if countdown padding was applied to corrections
+        that were loaded from a saved JSON file (where the padding state is not
+        explicitly stored).
+        Args:
+            correction_result: The correction result to check
+        Returns:
+            True if the first segment is a countdown, False otherwise
+        """
+        if not correction_result.corrected_segments:
+            return False
+        first_segment = correction_result.corrected_segments[0]
+        return first_segment.text == self.COUNTDOWN_TEXT
+    def create_padded_audio_only(self, audio_filepath: str) -> str:
+        """
+        Create a padded audio file without modifying the correction result.
+        This is used when loading existing corrections that already have countdown
+        timestamps, but we need to create the padded audio file for video rendering.
+        Args:
+            audio_filepath: Path to original audio file
+        Returns:
+            Path to padded audio file
+        Raises:
+            FileNotFoundError: If input audio file doesn't exist
+            RuntimeError: If ffmpeg command fails
+        """
+        return self._create_padded_audio(audio_filepath)

lyrics_transcriber/transcribers/audioshake.py CHANGED Viewed

@@ -2,12 +2,92 @@ from dataclasses import dataclass
 import requests
 import time
 import os
-from typing import Dict, Optional, Any, Union
+import tempfile
+from typing import Dict, Optional, Any, Union, Tuple
 from pathlib import Path
+from pydub import AudioSegment
 from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
 from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
 from lyrics_transcriber.utils.word_utils import WordUtils
+# Lossy formats that should be uploaded directly (transcoding would cause quality loss)
+LOSSY_FORMATS = {'.mp3', '.aac', '.ogg', '.m4a', '.wma', '.opus'}
+# Lossless formats that are already compressed and can be uploaded directly
+LOSSLESS_COMPRESSED_FORMATS = {'.flac', '.alac'}
+# Uncompressed formats that should be converted to FLAC for efficient upload
+UNCOMPRESSED_FORMATS = {'.wav', '.aiff', '.aif', '.pcm'}
+class AudioUploadOptimizer:
+    """Optimizes audio files for upload by converting uncompressed formats to FLAC."""
+    def __init__(self, logger):
+        self.logger = logger
+    def prepare_for_upload(self, filepath: str) -> Tuple[str, Optional[str]]:
+        """
+        Prepare audio file for optimal upload.
+        Returns:
+            Tuple of (filepath_to_upload, temp_file_to_cleanup)
+            - If no conversion needed, returns (original_filepath, None)
+            - If converted, returns (temp_flac_filepath, temp_flac_filepath)
+        """
+        ext = os.path.splitext(filepath)[1].lower()
+        # Lossy formats: upload directly (transcoding would lose quality)
+        if ext in LOSSY_FORMATS:
+            self.logger.info(f"Uploading lossy format ({ext}) directly to preserve quality")
+            return filepath, None
+        # Already compressed lossless: upload directly
+        if ext in LOSSLESS_COMPRESSED_FORMATS:
+            self.logger.info(f"Uploading lossless compressed format ({ext}) directly")
+            return filepath, None
+        # Uncompressed formats: convert to FLAC for smaller upload
+        if ext in UNCOMPRESSED_FORMATS:
+            self.logger.info(f"Converting uncompressed format ({ext}) to FLAC for efficient upload")
+            return self._convert_to_flac(filepath)
+        # Unknown format: try to upload directly
+        self.logger.warning(f"Unknown audio format ({ext}), uploading directly")
+        return filepath, None
+    def _convert_to_flac(self, filepath: str) -> Tuple[str, str]:
+        """Convert audio file to FLAC format."""
+        ext = os.path.splitext(filepath)[1].lower()
+        # Load audio based on format
+        if ext == '.wav':
+            audio = AudioSegment.from_wav(filepath)
+        elif ext in {'.aiff', '.aif'}:
+            audio = AudioSegment.from_file(filepath, format='aiff')
+        else:
+            audio = AudioSegment.from_file(filepath)
+        # Create temp file for FLAC output
+        with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
+            flac_path = temp_flac.name
+            audio.export(flac_path, format="flac")
+        # Log size reduction
+        original_size = os.path.getsize(filepath)
+        flac_size = os.path.getsize(flac_path)
+        reduction_pct = (1 - flac_size / original_size) * 100
+        self.logger.info(f"Converted to FLAC: {original_size / 1024 / 1024:.1f}MB → {flac_size / 1024 / 1024:.1f}MB ({reduction_pct:.0f}% smaller)")
+        return flac_path, flac_path
+    def cleanup(self, temp_filepath: Optional[str]) -> None:
+        """Clean up temporary file if it exists."""
+        if temp_filepath and os.path.exists(temp_filepath):
+            try:
+                os.unlink(temp_filepath)
+                self.logger.debug(f"Cleaned up temporary file: {temp_filepath}")
+            except OSError as e:
+                self.logger.warning(f"Failed to clean up temporary file {temp_filepath}: {e}")
 @dataclass
 class AudioShakeConfig:
@@ -162,11 +242,13 @@ class AudioShakeTranscriber(BaseTranscriber):
         config: Optional[AudioShakeConfig] = None,
         logger: Optional[Any] = None,
         api_client: Optional[AudioShakeAPI] = None,
+        upload_optimizer: Optional[AudioUploadOptimizer] = None,
     ):
         """Initialize AudioShake transcriber."""
         super().__init__(cache_dir=cache_dir, logger=logger)
         self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
         self.api = api_client or AudioShakeAPI(self.config, self.logger)
+        self.upload_optimizer = upload_optimizer or AudioUploadOptimizer(self.logger)
     def get_name(self) -> str:
         return "AudioShake"
@@ -195,14 +277,21 @@ class AudioShakeTranscriber(BaseTranscriber):
         """Starts the transcription task and returns the task ID."""
         self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
-        # Upload file and create task
-        file_url = self.api.upload_file(audio_filepath)
-        self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
+        # Optimize file format for upload (convert WAV to FLAC, etc.)
+        upload_filepath, temp_filepath = self.upload_optimizer.prepare_for_upload(audio_filepath)
+        try:
+            # Upload file and create task
+            file_url = self.api.upload_file(upload_filepath)
+            self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
-        task_id = self.api.create_task(file_url)
-        self.logger.debug(f"Task created successfully. Task ID: {task_id}")
+            task_id = self.api.create_task(file_url)
+            self.logger.debug(f"Task created successfully. Task ID: {task_id}")
-        return task_id
+            return task_id
+        finally:
+            # Clean up any temporary file created during optimization
+            self.upload_optimizer.cleanup(temp_filepath)
     def get_transcription_result(self, task_id: str) -> Dict[str, Any]:
         """Gets the raw results for a previously started task."""

lyrics_transcriber/types.py CHANGED Viewed

@@ -363,30 +363,32 @@ class AnchorSequence:
     def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
         """Create AnchorSequence from dictionary."""
         # Handle both old and new dictionary formats
-        if "words" in data:
-            # Old format - convert to new format without setting _words
-            # This ensures to_dict() always returns the new format
-            words = data["words"]
+        # Check for new format keys FIRST (they take priority even if old keys also present)
+        if "transcribed_word_ids" in data:
+            # New format - use existing IDs
             return cls(
                 id=data.get("id", WordUtils.generate_id()),
-                transcribed_word_ids=[WordUtils.generate_id() for _ in words],
+                transcribed_word_ids=data["transcribed_word_ids"],
                 transcription_position=data["transcription_position"],
                 reference_positions=data["reference_positions"],
-                reference_word_ids={source: [WordUtils.generate_id() for _ in words]
-                                   for source in data["reference_positions"].keys()},
+                reference_word_ids=data["reference_word_ids"],
                 confidence=data["confidence"],
-                # Don't set _words - this ensures we always use the new format
             )
-        else:
-            # New format
+        elif "words" in data:
+            # Old format only - convert to new format by generating IDs
+            # This ensures to_dict() always returns the new format
+            words = data["words"]
             return cls(
                 id=data.get("id", WordUtils.generate_id()),
-                transcribed_word_ids=data["transcribed_word_ids"],
+                transcribed_word_ids=[WordUtils.generate_id() for _ in words],
                 transcription_position=data["transcription_position"],
                 reference_positions=data["reference_positions"],
-                reference_word_ids=data["reference_word_ids"],
+                reference_word_ids={source: [WordUtils.generate_id() for _ in words]
+                                   for source in data["reference_positions"].keys()},
                 confidence=data["confidence"],
             )
+        else:
+            raise ValueError("AnchorSequence.from_dict requires either 'transcribed_word_ids' or 'words' key")
 @dataclass

{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL RENAMED Viewed

File without changes

{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

karaoke-gen 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl