PyPI - lyrics-transcriber - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl - Mend

lyrics-transcriber 0.30.1py3-none-any.whl → 0.32.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

lyrics_transcriber/__init__.py +2 -1
lyrics_transcriber/cli/cli_main.py +33 -12
lyrics_transcriber/core/config.py +35 -0
lyrics_transcriber/core/controller.py +85 -121
lyrics_transcriber/correction/anchor_sequence.py +471 -0
lyrics_transcriber/correction/corrector.py +237 -33
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +30 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
lyrics_transcriber/correction/handlers/repeat.py +71 -0
lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
lyrics_transcriber/correction/handlers/word_operations.py +135 -0
lyrics_transcriber/correction/phrase_analyzer.py +426 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +5 -81
lyrics_transcriber/lyrics/genius.py +5 -2
lyrics_transcriber/lyrics/spotify.py +3 -3
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +37 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +219 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +503 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +101 -193
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +91 -0
lyrics_transcriber/output/segment_resizer.py +416 -0
lyrics_transcriber/output/subtitles.py +328 -302
lyrics_transcriber/output/video.py +219 -0
lyrics_transcriber/review/__init__.py +1 -0
lyrics_transcriber/review/server.py +138 -0
lyrics_transcriber/transcribers/audioshake.py +3 -2
lyrics_transcriber/transcribers/base_transcriber.py +5 -42
lyrics_transcriber/transcribers/whisper.py +3 -4
lyrics_transcriber/types.py +454 -0
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/METADATA +14 -3
lyrics_transcriber-0.32.2.dist-info/RECORD +86 -0
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/WHEEL +1 -1
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/correction/base_strategy.py +0 -29
lyrics_transcriber/correction/strategy_diff.py +0 -263
lyrics_transcriber-0.30.1.dist-info/RECORD +0 -25
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/LICENSE +0 -0

lyrics_transcriber/output/video.py ADDED Viewed

@@ -0,0 +1,219 @@
+import logging
+import os
+import json
+import subprocess
+from typing import List, Optional, Tuple
+class VideoGenerator:
+    """Handles generation of video files with lyrics overlay."""
+    def __init__(
+        self,
+        output_dir: str,
+        cache_dir: str,
+        video_resolution: Tuple[int, int],
+        styles: dict,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """Initialize VideoGenerator.
+        Args:
+            output_dir: Directory where output files will be written
+            cache_dir: Directory for temporary files
+            video_resolution: Tuple of (width, height) for video resolution
+            styles: Dictionary of output video & CDG styling configuration
+            logger: Optional logger instance
+        """
+        if not all(x > 0 for x in video_resolution):
+            raise ValueError("Video resolution dimensions must be greater than 0")
+        self.output_dir = output_dir
+        self.cache_dir = cache_dir
+        self.video_resolution = video_resolution
+        self.logger = logger or logging.getLogger(__name__)
+        # Get background settings from styles, with defaults
+        karaoke_styles = styles.get("karaoke", {})
+        self.background_image = karaoke_styles.get("background_image")
+        self.background_color = karaoke_styles.get("background_color", "black")
+        # Validate background image if specified
+        if self.background_image and not os.path.isfile(self.background_image):
+            raise FileNotFoundError(f"Video background image not found: {self.background_image}")
+    def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
+        """Generate MP4 video with lyrics overlay.
+        Args:
+            ass_path: Path to ASS subtitles file
+            audio_path: Path to audio file
+            output_prefix: Prefix for output filename
+        Returns:
+            Path to generated video file
+        """
+        self.logger.info("Generating video with lyrics overlay")
+        output_path = self._get_output_path(f"{output_prefix} (With Vocals)", "mkv")
+        # Check input files exist before running FFmpeg
+        if not os.path.isfile(ass_path):
+            raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
+        if not os.path.isfile(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        try:
+            # Create a temporary copy of the ASS file with a safe filename
+            temp_ass_path = os.path.join(self.cache_dir, "temp_subtitles.ass")
+            import shutil
+            shutil.copy2(ass_path, temp_ass_path)
+            self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
+            cmd = self._build_ffmpeg_command(temp_ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
+            self.logger.info(f"Video generated: {output_path}")
+            # Clean up temporary file
+            os.remove(temp_ass_path)
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate video: {str(e)}")
+            # Clean up temporary file in case of error
+            if "temp_ass_path" in locals():
+                try:
+                    os.remove(temp_ass_path)
+                except:
+                    pass
+            raise
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
+    def _resize_background_image(self, input_path: str) -> str:
+        """Resize background image to match target resolution and save to temp file."""
+        target_width, target_height = self.video_resolution
+        # Get current image dimensions using ffprobe
+        try:
+            probe_cmd = [
+                "ffprobe",
+                "-v",
+                "error",
+                "-select_streams",
+                "v:0",
+                "-show_entries",
+                "stream=width,height",
+                "-of",
+                "json",
+                input_path,
+            ]
+            probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
+            probe_data = json.loads(probe_output)
+            current_width = probe_data["streams"][0]["width"]
+            current_height = probe_data["streams"][0]["height"]
+            # If dimensions already match, return original path
+            if current_width == target_width and current_height == target_height:
+                self.logger.debug("Background image already at target resolution")
+                return input_path
+        except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+            self.logger.warning(f"Failed to get image dimensions: {e}")
+            # Continue with resize attempt if probe fails
+        temp_path = os.path.join(self.cache_dir, "resized_background.png")
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            input_path,
+            "-vf",
+            f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
+            f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2",
+            temp_path,
+        ]
+        try:
+            subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
+            return temp_path
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"Failed to resize background image: {e.output}")
+            raise
+    def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
+        """Build FFmpeg command for video generation with optimized settings."""
+        width, height = self.video_resolution
+        # fmt: off
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel", "error",
+            "-r", "30",  # Set frame rate to 30 fps
+        ]
+        # Input source (background)
+        if self.background_image:
+            # Resize background image first
+            resized_bg = self._resize_background_image(self.background_image)
+            self.logger.debug(f"Using resized background image: {resized_bg}")
+            cmd.extend([
+                "-loop", "1",  # Loop the image
+                "-i", resized_bg,
+            ])
+        else:
+            self.logger.debug(
+                f"Using solid {self.background_color} background "
+                f"with resolution: {width}x{height}"
+            )
+            cmd.extend([
+                "-f", "lavfi",
+                "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
+            ])
+        # Add audio input and subtitle overlay
+        cmd.extend([
+            "-i", audio_path,
+            "-c:a", "flac",  # Re-encode audio as FLAC
+            "-vf", f"ass={ass_path}",  # Add subtitles
+            "-c:v", self._get_video_codec(),
+            # Video quality settings
+            "-preset", "slow",  # Better compression efficiency
+            "-b:v", "5000k",  # Base video bitrate
+            "-minrate", "5000k",  # Minimum bitrate
+            "-maxrate", "20000k",  # Maximum bitrate
+            "-bufsize", "10000k",  # Buffer size (2x base rate)
+            "-shortest",  # End encoding after shortest stream
+            "-y",  # Overwrite output without asking
+        ])
+        # fmt: on
+        # Add output path
+        cmd.append(output_path)
+        return cmd
+    def _get_video_codec(self) -> str:
+        """Determine the best available video codec."""
+        try:
+            ffmpeg_codes = subprocess.getoutput("ffmpeg -codecs")
+            if "h264_videotoolbox" in ffmpeg_codes:
+                self.logger.info("Using hardware accelerated h264_videotoolbox")
+                return "h264_videotoolbox"
+        except Exception as e:
+            self.logger.warning(f"Error checking for hardware acceleration: {e}")
+        return "libx264"
+    def _run_ffmpeg_command(self, cmd: List[str]) -> None:
+        """Execute FFmpeg command with output handling."""
+        self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
+        try:
+            output = subprocess.check_output(cmd, universal_newlines=True, stderr=subprocess.STDOUT)
+            # self.logger.debug(f"FFmpeg output: {output}")
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"FFmpeg error: {e.output}")
+            raise

lyrics_transcriber/review/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .server import start_review_server, complete_review

lyrics_transcriber/review/server.py ADDED Viewed

@@ -0,0 +1,138 @@
+import logging
+from fastapi import FastAPI, Body
+from fastapi.middleware.cors import CORSMiddleware
+from typing import Optional, Dict, Any
+from ..types import CorrectionResult
+import time
+import subprocess
+import os
+import atexit
+import urllib.parse
+logger = logging.getLogger(__name__)
+app = FastAPI()
+# Configure CORS for development
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:5173"],  # Vite's default dev server port
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global state for the review process
+current_review: Optional[CorrectionResult] = None
+review_completed = False
+vite_process: Optional[subprocess.Popen] = None
+def start_vite_server():
+    """Start the Vite development server."""
+    global vite_process
+    # Get the path to the lyrics-analyzer directory relative to this file
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    vite_dir = os.path.abspath(os.path.join(current_dir, "../../lyrics-analyzer"))
+    logger.info(f"Starting Vite dev server in {vite_dir}")
+    # Start the Vite dev server
+    vite_process = subprocess.Popen(["npm", "run", "dev"], cwd=vite_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    # Register cleanup function to kill Vite server on exit
+    atexit.register(lambda: vite_process.terminate() if vite_process else None)
+    # Wait a bit for the server to start
+    time.sleep(2)  # Adjust this if needed
+    return vite_process
+@app.get("/api/correction-data")
+async def get_correction_data():
+    """Get the current correction data for review."""
+    if current_review is None:
+        return {"error": "No review in progress"}
+    return current_review.to_dict()
+@app.post("/api/complete")
+async def complete_review(updated_data: Dict[str, Any] = Body(...)):
+    """
+    Mark the review as complete and update the correction data.
+    Args:
+        updated_data: The complete correction result data with any modifications
+    """
+    global review_completed, current_review
+    logger.info("Received updated correction data")
+    try:
+        # Update the current review with modified data
+        # We use from_dict to ensure the data is properly structured
+        current_review = CorrectionResult.from_dict(updated_data)
+        logger.info(f"Successfully updated correction data with {len(current_review.corrections)} corrections")
+        review_completed = True
+        return {"status": "success"}
+    except Exception as e:
+        logger.error(f"Failed to update correction data: {str(e)}")
+        return {"status": "error", "message": str(e)}
+def start_review_server(correction_result: CorrectionResult) -> CorrectionResult:
+    """
+    Start the review server and wait for completion.
+    Args:
+        correction_result: The correction result to review
+    Returns:
+        The potentially modified correction result after review
+    """
+    import uvicorn
+    import webbrowser
+    from threading import Thread
+    global current_review, review_completed
+    current_review = correction_result
+    review_completed = False
+    logger.info("Starting review server...")
+    # Start Vite dev server
+    vite_proc = start_vite_server()
+    logger.info("Vite dev server started")
+    # Start FastAPI server in a separate thread
+    server_thread = Thread(target=uvicorn.run, args=(app,), kwargs={"host": "127.0.0.1", "port": 8000, "log_level": "info"}, daemon=True)
+    server_thread.start()
+    logger.info("Server thread started")
+    # Open browser
+    base_api_url = "http://localhost:8000/api"
+    encoded_api_url = urllib.parse.quote(base_api_url, safe="")
+    webbrowser.open(f"http://localhost:5173?baseApiUrl={encoded_api_url}")
+    logger.info("Opened browser for review")
+    # Wait for review to complete
+    start_time = time.time()
+    while not review_completed:
+        time.sleep(0.1)
+        # if time.time() - start_time > 600:  # 10 minute timeout
+        #     logger.error("Review timed out after 10 minutes")
+        #     raise TimeoutError("Review did not complete within the expected time frame.")
+    # Clean up Vite server
+    if vite_proc:
+        vite_proc.terminate()
+        try:
+            vite_proc.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            vite_proc.kill()
+    logger.info("Review completed, returning results")
+    return current_review

lyrics_transcriber/transcribers/audioshake.py CHANGED Viewed

@@ -4,7 +4,8 @@ import time
 import os
 from typing import Dict, Optional, Any, Union
 from pathlib import Path
-from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
+from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
+from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
 @dataclass
@@ -182,7 +183,7 @@ class AudioShakeTranscriber(BaseTranscriber):
         for line in transcription_data.get("lines", []):
             words = [
                 Word(
-                    text=word["text"],
+                    text=word["text"].strip(" "),
                     start_time=word.get("start", 0.0),
                     end_time=word.get("end", 0.0),
                 )

lyrics_transcriber/transcribers/base_transcriber.py CHANGED Viewed

@@ -1,61 +1,24 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Dict, Any, Optional, Protocol, List, Union
+from typing import Dict, Any, Optional, Union
 from pathlib import Path
 import logging
 import os
 import json
 import hashlib
-from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsSegment, Word
-@dataclass
-class TranscriptionData:
-    """Structured container for transcription results."""
-    segments: List[LyricsSegment]
-    words: List[Word]
-    text: str
-    source: str  # e.g., "whisper", "audioshake"
-    metadata: Optional[Dict[str, Any]] = None
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert TranscriptionData to dictionary for JSON serialization."""
-        return {
-            "segments": [segment.to_dict() for segment in self.segments],
-            "words": [word.to_dict() for word in self.words],
-            "text": self.text,
-            "source": self.source,
-            "metadata": self.metadata,
-        }
-@dataclass
-class TranscriptionResult:
-    name: str
-    priority: int
-    result: TranscriptionData
-class LoggerProtocol(Protocol):
-    """Protocol for logger interface."""
-    def debug(self, msg: str) -> None: ...
-    def info(self, msg: str) -> None: ...
-    def warning(self, msg: str) -> None: ...
-    def error(self, msg: str) -> None: ...
+from lyrics_transcriber.types import TranscriptionData
 class TranscriptionError(Exception):
     """Base exception for transcription errors."""
-    pass
+    def __init__(self, message: str):
+        super().__init__(message)
 class BaseTranscriber(ABC):
     """Base class for all transcription services."""
-    def __init__(self, cache_dir: Union[str, Path], logger: Optional[LoggerProtocol] = None):
+    def __init__(self, cache_dir: Union[str, Path], logger: Optional[logging.Logger] = None):
         """
         Initialize transcriber with cache directory and logger.

lyrics_transcriber/transcribers/whisper.py CHANGED Viewed

@@ -9,7 +9,8 @@ import time
 from typing import Optional, Dict, Any, Protocol, Union
 from pathlib import Path
 from pydub import AudioSegment
-from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
+from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
+from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
 @dataclass
@@ -194,7 +195,7 @@ class WhisperTranscriber(BaseTranscriber):
     def _initialize_storage(self) -> FileStorageProtocol:
         """Initialize storage client."""
-        from ..storage.dropbox import DropboxHandler, DropboxConfig
+        from lyrics_transcriber.storage.dropbox import DropboxHandler, DropboxConfig
         # Create config using os.getenv directly
         config = DropboxConfig(
@@ -313,8 +314,6 @@ class WhisperTranscriber(BaseTranscriber):
     def _validate_response(self, raw_data: Dict[str, Any]) -> None:
         """Validate the response contains required fields."""
-        if not isinstance(raw_data, dict):
-            raise TranscriptionError(f"Invalid response format: {raw_data}")
         if "segments" not in raw_data:
             raise TranscriptionError("Response missing required 'segments' field")
         if "transcription" not in raw_data:

lyrics-transcriber 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

lyrics-transcriber 0.30.1py3-none-any.whl → 0.32.2py3-none-any.whl