PyPI - lyrics-transcriber - Versions diffs - 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

lyrics_transcriber/cli/{main.py → cli_main.py} +15 -3
lyrics_transcriber/core/controller.py +129 -95
lyrics_transcriber/correction/base_strategy.py +29 -0
lyrics_transcriber/correction/corrector.py +52 -0
lyrics_transcriber/correction/strategy_diff.py +263 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
lyrics_transcriber/lyrics/genius.py +70 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/generator.py +158 -97
lyrics_transcriber/output/subtitles.py +12 -12
lyrics_transcriber/storage/dropbox.py +110 -134
lyrics_transcriber/transcribers/audioshake.py +170 -105
lyrics_transcriber/transcribers/base_transcriber.py +186 -0
lyrics_transcriber/transcribers/whisper.py +268 -133
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +1 -1
lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
lyrics_transcriber/core/corrector.py +0 -56
lyrics_transcriber/core/fetcher.py +0 -143
lyrics_transcriber/storage/tokens.py +0 -116
lyrics_transcriber/transcribers/base.py +0 -31
lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0

lyrics_transcriber/output/generator.py CHANGED Viewed

@@ -1,9 +1,42 @@
+from dataclasses import dataclass
 import os
 import logging
-from typing import Dict, Any, Optional
+from typing import Dict, Any, List, Optional, Tuple
 import subprocess
 from datetime import timedelta
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
 from .subtitles import create_styled_subtitles, LyricsScreen, LyricsLine, LyricSegment
+from ..correction.corrector import CorrectionResult
+@dataclass
+class OutputGeneratorConfig:
+    """Configuration for output generation."""
+    output_dir: str
+    cache_dir: str
+    video_resolution: str = "360p"
+    video_background_image: Optional[str] = None
+    video_background_color: str = "black"
+    def __post_init__(self):
+        """Validate configuration after initialization."""
+        if not self.output_dir:
+            raise ValueError("output_dir must be provided")
+        if not self.cache_dir:
+            raise ValueError("cache_dir must be provided")
+        if self.video_background_image and not os.path.isfile(self.video_background_image):
+            raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
+@dataclass
+class OutputPaths:
+    """Holds paths for generated output files."""
+    lrc: Optional[str] = None
+    ass: Optional[str] = None
+    video: Optional[str] = None
 class OutputGenerator:
@@ -11,59 +44,56 @@ class OutputGenerator:
     def __init__(
         self,
+        config: OutputGeneratorConfig,
         logger: Optional[logging.Logger] = None,
-        output_dir: Optional[str] = None,
-        cache_dir: str = "/tmp/lyrics-transcriber-cache/",
-        video_resolution: str = "360p",
-        video_background_image: Optional[str] = None,
-        video_background_color: str = "black",
     ):
+        """
+        Initialize OutputGenerator with configuration.
+        Args:
+            config: OutputGeneratorConfig instance with required paths
+            logger: Optional logger instance
+        """
+        self.config = config
         self.logger = logger or logging.getLogger(__name__)
-        self.output_dir = output_dir
-        self.cache_dir = cache_dir
-        # Video settings
-        self.video_resolution = video_resolution
-        self.video_background_image = video_background_image
-        self.video_background_color = video_background_color
+        # Log the configured directories
+        self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
+        self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
         # Set video resolution parameters
-        self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(video_resolution)
-        # Validate video background if provided
-        if self.video_background_image and not os.path.isfile(self.video_background_image):
-            raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
+        self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
     def generate_outputs(
-        self, transcription_data: Dict[str, Any], output_prefix: str, audio_filepath: str, render_video: bool = False
-    ) -> Dict[str, str]:
-        """
-        Generate all requested output formats.
+        self,
+        transcription_corrected: CorrectionResult,
+        lyrics_results: List[LyricsData],
+        output_prefix: str,
+        audio_filepath: str,
+        render_video: bool = False,
+    ) -> OutputPaths:
+        """Generate all requested output formats."""
+        outputs = OutputPaths()
-        Args:
-            transcription_data: Dictionary containing transcription segments with timing
-            output_prefix: Prefix for output filenames
-            audio_filepath: Path to the source audio file
-            render_video: Whether to generate video output
+        try:
+            # Generate plain lyrics files for each provider
+            for lyrics_data in lyrics_results:
+                provider_name = lyrics_data.metadata.source.title()
+                self.write_plain_lyrics(lyrics_data, f"{output_prefix} (Lyrics {provider_name})")
-        Returns:
-            Dictionary of output paths for each format
-        """
-        outputs = {}
+            if transcription_corrected:
+                # Write corrected lyrics as plain text
+                self.write_plain_lyrics_from_correction(transcription_corrected, f"{output_prefix} (Lyrics Corrected)")
-        try:
-            # Generate LRC
-            lrc_path = self.generate_lrc(transcription_data, output_prefix)
-            outputs["lrc"] = lrc_path
+                # Generate LRC
+                outputs.lrc = self.generate_lrc(transcription_corrected, output_prefix)
-            # Generate ASS
-            ass_path = self.generate_ass(transcription_data, output_prefix)
-            outputs["ass"] = ass_path
+                # Generate ASS
+                outputs.ass = self.generate_ass(transcription_corrected, output_prefix)
-            # Generate video if requested
-            if render_video:
-                video_path = self.generate_video(ass_path, audio_filepath, output_prefix)
-                outputs["video"] = video_path
+                # Generate video if requested
+                if render_video:
+                    outputs.video = self.generate_video(outputs.ass, audio_filepath, output_prefix)
         except Exception as e:
             self.logger.error(f"Error generating outputs: {str(e)}")
@@ -71,19 +101,17 @@ class OutputGenerator:
         return outputs
-    def generate_lrc(self, transcription_data: Dict[str, Any], output_prefix: str) -> str:
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
+    def generate_lrc(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
         """Generate LRC format lyrics file."""
         self.logger.info("Generating LRC format lyrics")
-        output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.lrc")
+        output_path = self._get_output_path(output_prefix, "lrc")
         try:
-            with open(output_path, "w", encoding="utf-8") as f:
-                for segment in transcription_data["segments"]:
-                    start_time = self._format_lrc_timestamp(segment["start"])
-                    line = f"[{start_time}]{segment['text']}\n"
-                    f.write(line)
+            self._write_lrc_file(output_path, transcription_data.segments)
             self.logger.info(f"LRC file generated: {output_path}")
             return output_path
@@ -91,24 +119,21 @@ class OutputGenerator:
             self.logger.error(f"Failed to generate LRC file: {str(e)}")
             raise
-    def generate_ass(self, transcription_data: Dict[str, Any], output_prefix: str) -> str:
+    def _write_lrc_file(self, output_path: str, segments: list) -> None:
+        """Write LRC file content."""
+        with open(output_path, "w", encoding="utf-8") as f:
+            for segment in segments:
+                start_time = self._format_lrc_timestamp(segment.start_time)
+                line = f"[{start_time}]{segment.text}\n"
+                f.write(line)
+    def generate_ass(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
         """Generate ASS format subtitles file."""
         self.logger.info("Generating ASS format subtitles")
-        output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.ass")
+        output_path = self._get_output_path(output_prefix, "ass")
         try:
-            with open(output_path, "w", encoding="utf-8") as f:
-                # Write ASS header
-                f.write(self._get_ass_header())
-                # Write events
-                for segment in transcription_data["segments"]:
-                    start_time = self._format_ass_timestamp(segment["start"])
-                    end_time = self._format_ass_timestamp(segment["end"])
-                    line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment['text']}\n"
-                    f.write(line)
+            self._write_ass_file(output_path, transcription_data.segments)
             self.logger.info(f"ASS file generated: {output_path}")
             return output_path
@@ -116,51 +141,57 @@ class OutputGenerator:
             self.logger.error(f"Failed to generate ASS file: {str(e)}")
             raise
+    def _write_ass_file(self, output_path: str, segments: list) -> None:
+        """Write ASS file content."""
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(self._get_ass_header())
+            for segment in segments:
+                # Change from ts/end_ts to start_time/end_time
+                start_time = self._format_ass_timestamp(segment.start_time)
+                end_time = self._format_ass_timestamp(segment.end_time)
+                line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment.text}\n"
+                f.write(line)
     def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
         """Generate MP4 video with lyrics overlay."""
         self.logger.info("Generating video with lyrics overlay")
-        output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.mp4")
-        width, height = self.video_resolution_num
+        output_path = self._get_output_path(output_prefix, "mp4")
         try:
-            # Prepare FFmpeg command
-            cmd = [
-                "ffmpeg",
-                "-y",
-                "-f",
-                "lavfi",
-                "-i",
-                f"color=c={self.video_background_color}:s={width}x{height}",
-                "-i",
-                audio_path,
-                "-vf",
-                f"ass={ass_path}",
-                "-c:v",
-                "libx264",
-                "-c:a",
-                "aac",
-                "-shortest",
-                output_path,
-            ]
-            # If background image provided, use it instead of solid color
-            if self.video_background_image:
-                cmd[3:6] = ["-i", self.video_background_image]
-            self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
-            subprocess.run(cmd, check=True)
+            cmd = self._build_ffmpeg_command(ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
             self.logger.info(f"Video generated: {output_path}")
             return output_path
-        except subprocess.CalledProcessError as e:
-            self.logger.error(f"FFmpeg error: {str(e)}")
-            raise
         except Exception as e:
             self.logger.error(f"Failed to generate video: {str(e)}")
             raise
+    def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> list:
+        """Build FFmpeg command for video generation."""
+        width, height = self.video_resolution_num
+        cmd = ["ffmpeg", "-y"]
+        # Input source (background)
+        if self.config.video_background_image:
+            cmd.extend(["-i", self.config.video_background_image])
+        else:
+            cmd.extend(["-f", "lavfi", "-i", f"color=c={self.config.video_background_color}:s={width}x{height}"])
+        # Add audio and subtitle inputs
+        cmd.extend(["-i", audio_path, "-vf", f"ass={ass_path}", "-c:v", "libx264", "-c:a", "aac", "-shortest", output_path])
+        return cmd
+    def _run_ffmpeg_command(self, cmd: list) -> None:
+        """Execute FFmpeg command."""
+        self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
+        try:
+            subprocess.run(cmd, check=True)
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"FFmpeg error: {str(e)}")
+            raise
     def _get_video_params(self, resolution: str) -> tuple:
         """Get video parameters based on resolution setting."""
         match resolution:
@@ -208,3 +239,33 @@ Style: Default,Arial,{self.font_size},&H00FFFFFF,&H000000FF,&H00000000,&H0000000
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 """
+    def write_plain_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
+        """Write plain text lyrics file."""
+        self.logger.info("Writing plain lyrics file")
+        output_path = self._get_output_path(output_prefix, "txt")
+        try:
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write(lyrics_data.lyrics)
+            self.logger.info(f"Plain lyrics file generated: {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to write plain lyrics file: {str(e)}")
+            raise
+    def write_plain_lyrics_from_correction(self, correction_result: CorrectionResult, output_prefix: str) -> str:
+        """Write corrected lyrics as plain text file."""
+        self.logger.info("Writing corrected lyrics file")
+        output_path = self._get_output_path(output_prefix, "txt")
+        try:
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write(correction_result.text)
+            self.logger.info(f"Corrected lyrics file generated: {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to write corrected lyrics file: {str(e)}")
+            raise

lyrics_transcriber/output/subtitles.py CHANGED Viewed

@@ -18,10 +18,10 @@ Functions for generating ASS subtitles from lyric data
 class LyricSegmentIterator:
     def __init__(self, lyrics_segments: List[str]):
         self._segments = lyrics_segments
-        self._current_segment = None
+        self._current_segment = 0
     def __iter__(self):
-        self._current_sement = 0
+        self._current_segment = 0
         return self
     def __next__(self):
@@ -49,17 +49,17 @@ class LyricSegment:
     def to_ass(self) -> str:
         """Render this segment as part of an ASS event line"""
         duration = (self.end_ts - self.ts).total_seconds() * 100
-        return f"{{\kf{duration}}}{self.text}"
+        return rf"{{\kf{duration}}}{self.text}"
     def to_dict(self) -> dict:
-        return {"text": self.text, "ts": str(self.ts), "end_ts": str(self.end_ts) if self.end_ts else None}
+        return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
     @classmethod
     def from_dict(cls, data: dict) -> "LyricSegment":
         return cls(
             text=data["text"],
-            ts=timedelta(seconds=float(data["ts"])),
-            end_ts=timedelta(seconds=float(data["end_ts"])) if data["end_ts"] else None,
+            ts=timedelta(seconds=data["ts"]),
+            end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
         )
@@ -73,7 +73,7 @@ class LyricsLine:
     @property
     def end_ts(self) -> Optional[timedelta]:
-        return self.segments[-1].end_ts
+        return self.segments[-1].end_ts if self.segments else None
     @ts.setter
     def ts(self, value):
@@ -105,7 +105,7 @@ class LyricsLine:
         """Decorate line with karaoke tags"""
         # Prefix the tag with centisecs prior to line in screen
         start_time = (self.ts - screen_start_ts).total_seconds() * 100
-        line = f"{{\k{start_time}}}"
+        line = rf"{{\k{start_time}}}"
         prev_end: Optional[timedelta] = None
         for s in self.segments:
             if prev_end is not None and prev_end < s.ts:
@@ -168,10 +168,10 @@ class LyricsScreen:
         events = []
         for i, line in enumerate(self.lines):
             y_position = self.get_line_y(i)
             # if self.logger:
             #     self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
             event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
             events.append(event)
         return events
@@ -188,12 +188,12 @@ class LyricsScreen:
         return LyricsScreen(new_lines, start_ts)
     def to_dict(self) -> dict:
-        return {"lines": [line.to_dict() for line in self.lines], "start_ts": str(self.start_ts) if self.start_ts else None}
+        return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
     @classmethod
     def from_dict(cls, data: dict) -> "LyricsScreen":
         lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
-        start_ts = timedelta(seconds=float(data["start_ts"])) if data["start_ts"] else None
+        start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
         return cls(lines=lines, start_ts=start_ts)

lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl