PyPI - lyrics-transcriber - Versions diffs - 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

lyrics-transcriber 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

lyrics_transcriber/output/subtitles.py CHANGED Viewed

@@ -5,7 +5,7 @@ import subprocess
 import json
 from lyrics_transcriber.output.ass.section_screen import SectionScreen
-from lyrics_transcriber.types import LyricsSegment
+from lyrics_transcriber.types import LyricsSegment, Word
 from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
 from lyrics_transcriber.output.ass.ass import ASS
 from lyrics_transcriber.output.ass.style import Style
@@ -25,6 +25,7 @@ class SubtitlesGenerator:
         font_size: int,
         line_height: int,
         styles: dict,
+        subtitle_offset_ms: int = 0,
         logger: Optional[logging.Logger] = None,
     ):
         """Initialize SubtitleGenerator.
@@ -34,13 +35,39 @@ class SubtitlesGenerator:
             video_resolution: Tuple of (width, height) for video resolution
             font_size: Font size for subtitles
             line_height: Line height for subtitle positioning
+            styles: Dictionary of style configurations
+            subtitle_offset_ms: Offset for subtitle timing in milliseconds
             logger: Optional logger instance
         """
         self.output_dir = output_dir
         self.video_resolution = video_resolution
         self.font_size = font_size
         self.styles = styles
-        self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
+        self.subtitle_offset_ms = subtitle_offset_ms
+        # Create ScreenConfig with potential overrides from styles
+        karaoke_styles = styles.get("karaoke", {})
+        config_params = {
+            "line_height": line_height,
+            "video_width": video_resolution[0],
+            "video_height": video_resolution[1]
+        }
+        # Add any overrides from styles
+        screen_config_props = [
+            "max_visible_lines",
+            "top_padding",
+            "screen_gap_threshold",
+            "post_roll_time",
+            "fade_in_ms",
+            "fade_out_ms"
+        ]
+        for prop in screen_config_props:
+            if prop in karaoke_styles:
+                config_params[prop] = karaoke_styles[prop]
+        self.config = ScreenConfig(**config_params)
         self.logger = logger or logging.getLogger(__name__)
     def _get_output_path(self, output_prefix: str, extension: str) -> str:
@@ -91,6 +118,33 @@ class SubtitlesGenerator:
         """Create screens from segments with detailed logging."""
         self.logger.debug("Creating screens from segments")
+        # Apply timing offset to segments if needed
+        if self.subtitle_offset_ms != 0:
+            self.logger.info(f"Subtitle offset: {self.subtitle_offset_ms}ms")
+            offset_seconds = self.subtitle_offset_ms / 1000.0
+            segments = [
+                LyricsSegment(
+                    id=seg.id,  # Preserve original segment ID
+                    text=seg.text,
+                    words=[
+                        Word(
+                            id=word.id,  # Preserve original word ID
+                            text=word.text,
+                            start_time=max(0, word.start_time + offset_seconds),
+                            end_time=word.end_time + offset_seconds,
+                            confidence=word.confidence,
+                            created_during_correction=getattr(word, "created_during_correction", False),  # Preserve correction flag
+                        )
+                        for word in seg.words
+                    ],
+                    start_time=max(0, seg.start_time + offset_seconds),
+                    end_time=seg.end_time + offset_seconds,
+                )
+                for seg in segments
+            ]
+            self.logger.info(f"Applied {self.subtitle_offset_ms}ms offset to segment timings")
         # Create section screens and get instrumental boundaries
         section_screens = self._create_section_screens(segments, song_duration)
         instrumental_times = self._get_instrumental_times(section_screens)

lyrics_transcriber/output/video.py CHANGED Viewed

@@ -88,6 +88,52 @@ class VideoGenerator:
                     pass
             raise
+    def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
+        """Generate lower resolution MP4 preview video with lyrics overlay.
+        Args:
+            ass_path: Path to ASS subtitles file
+            audio_path: Path to audio file
+            output_prefix: Prefix for output filename
+        Returns:
+            Path to generated preview video file
+        """
+        self.logger.info("Generating preview video with lyrics overlay")
+        output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
+        # Check input files exist before running FFmpeg
+        if not os.path.isfile(ass_path):
+            raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
+        if not os.path.isfile(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        try:
+            # Create a temporary copy of the ASS file with a safe filename
+            temp_ass_path = os.path.join(self.cache_dir, "temp_preview_subtitles.ass")
+            import shutil
+            shutil.copy2(ass_path, temp_ass_path)
+            self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
+            cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
+            self.logger.info(f"Preview video generated: {output_path}")
+            # Clean up temporary file
+            os.remove(temp_ass_path)
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate preview video: {str(e)}")
+            # Clean up temporary file in case of error
+            if "temp_ass_path" in locals():
+                try:
+                    os.remove(temp_ass_path)
+                except:
+                    pass
+            raise
     def _get_output_path(self, output_prefix: str, extension: str) -> str:
         """Generate full output path for a file."""
         return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
@@ -181,7 +227,7 @@ class VideoGenerator:
             "-vf", f"ass={ass_path}",  # Add subtitles
             "-c:v", self._get_video_codec(),
             # Video quality settings
-            "-preset", "slow",  # Better compression efficiency
+            "-preset", "fast",  # Better compression efficiency
             "-b:v", "5000k",  # Base video bitrate
             "-minrate", "5000k",  # Minimum bitrate
             "-maxrate", "20000k",  # Maximum bitrate
@@ -196,6 +242,66 @@ class VideoGenerator:
         return cmd
+    def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
+        """Build FFmpeg command for preview video generation with optimized settings."""
+        # Use 360p resolution for preview
+        width, height = 640, 360
+        # fmt: off
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel", "error",
+            "-r", "30",  # Set frame rate to 30 fps
+        ]
+        # Input source (background)
+        if self.background_image:
+            # Resize background image first
+            resized_bg = self._resize_background_image(self.background_image)
+            self.logger.debug(f"Using resized background image: {resized_bg}")
+            cmd.extend([
+                "-loop", "1",  # Loop the image
+                "-i", resized_bg,
+            ])
+        else:
+            self.logger.debug(
+                f"Using solid {self.background_color} background "
+                f"with resolution: {width}x{height}"
+            )
+            cmd.extend([
+                "-f", "lavfi",
+                "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
+            ])
+        # Add audio input and subtitle overlay
+        cmd.extend([
+            "-i", audio_path,
+            "-c:a", "aac",  # Use AAC for audio
+            "-b:a", "128k",  # Audio bitrate
+            "-vf", f"ass={ass_path}",  # Add subtitles
+            "-c:v", "libx264",  # Use H.264 codec
+            "-profile:v", "baseline",  # Most compatible H.264 profile
+            "-level", "3.0",  # Compatibility level
+            "-pix_fmt", "yuv420p",  # Required for browser compatibility
+            "-preset", "ultrafast",
+            "-b:v", "1000k",  # Slightly higher bitrate
+            "-maxrate", "1500k",
+            "-bufsize", "2000k",
+            "-movflags", "+faststart+frag_keyframe+empty_moov",  # Enhanced streaming flags
+            "-g", "30",  # Keyframe every 30 frames (1 second)
+            "-keyint_min", "30",  # Minimum keyframe interval
+            "-sc_threshold", "0",  # Disable scene change detection
+            "-shortest",
+            "-y"
+        ])
+        # fmt: on
+        # Add output path
+        cmd.append(output_path)
+        return cmd
     def _get_video_codec(self) -> str:
         """Determine the best available video codec."""
         # try:

lyrics_transcriber/review/__init__.py CHANGED Viewed

	@@ -1 +0,0 @@
1	- from .server import start_review_server, complete_review

lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

lyrics-transcriber 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl