PyPI - lyrics-transcriber - Versions diffs - 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl - Mend

lyrics-transcriber 0.30.0py3-none-any.whl → 0.32.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

lyrics_transcriber/__init__.py +2 -1
lyrics_transcriber/cli/{main.py → cli_main.py} +47 -14
lyrics_transcriber/core/config.py +35 -0
lyrics_transcriber/core/controller.py +164 -166
lyrics_transcriber/correction/anchor_sequence.py +471 -0
lyrics_transcriber/correction/corrector.py +256 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +30 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
lyrics_transcriber/correction/handlers/repeat.py +71 -0
lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
lyrics_transcriber/correction/handlers/word_operations.py +135 -0
lyrics_transcriber/correction/phrase_analyzer.py +426 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
lyrics_transcriber/lyrics/genius.py +73 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +37 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +219 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +503 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +140 -171
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +91 -0
lyrics_transcriber/output/segment_resizer.py +416 -0
lyrics_transcriber/output/subtitles.py +328 -302
lyrics_transcriber/output/video.py +219 -0
lyrics_transcriber/review/__init__.py +1 -0
lyrics_transcriber/review/server.py +138 -0
lyrics_transcriber/storage/dropbox.py +110 -134
lyrics_transcriber/transcribers/audioshake.py +171 -105
lyrics_transcriber/transcribers/base_transcriber.py +149 -0
lyrics_transcriber/transcribers/whisper.py +267 -133
lyrics_transcriber/types.py +454 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/METADATA +14 -3
lyrics_transcriber-0.32.1.dist-info/RECORD +86 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/WHEEL +1 -1
lyrics_transcriber-0.32.1.dist-info/entry_points.txt +4 -0
lyrics_transcriber/core/corrector.py +0 -56
lyrics_transcriber/core/fetcher.py +0 -143
lyrics_transcriber/storage/tokens.py +0 -116
lyrics_transcriber/transcribers/base.py +0 -31
lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/LICENSE +0 -0

lyrics_transcriber/output/ass/config.py ADDED Viewed

@@ -0,0 +1,37 @@
+from dataclasses import dataclass
+class ScreenConfig:
+    """Configuration for screen timing and layout."""
+    def __init__(self, line_height: int = 50, max_visible_lines: int = 4, top_padding: int = None, video_width: int = 640, video_height: int = 360):
+        # Screen layout
+        self.max_visible_lines = max_visible_lines
+        self.line_height = line_height
+        self.top_padding = top_padding if top_padding is not None else line_height
+        self.video_height = video_height
+        self.video_width = video_width
+        # Timing configuration
+        self.screen_gap_threshold = 5.0
+        self.post_roll_time = 1.0
+        self.fade_in_ms = 200
+        self.fade_out_ms = 300
+@dataclass
+class LineTimingInfo:
+    """Timing information for a single line."""
+    fade_in_time: float
+    end_time: float
+    fade_out_time: float
+    clear_time: float
+@dataclass
+class LineState:
+    """Complete state for a single line."""
+    text: str
+    timing: LineTimingInfo
+    y_position: int

lyrics_transcriber/output/ass/constants.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Alignment constants
+ALIGN_BOTTOM_LEFT = 1
+ALIGN_BOTTOM_CENTER = 2
+ALIGN_BOTTOM_RIGHT = 3
+ALIGN_MIDDLE_LEFT = 4
+ALIGN_MIDDLE_CENTER = 5
+ALIGN_MIDDLE_RIGHT = 6
+ALIGN_TOP_LEFT = 7
+ALIGN_TOP_CENTER = 8
+ALIGN_TOP_RIGHT = 9
+# Legacy alignment mapping
+LEGACY_ALIGNMENT_TO_REGULAR = {
+    "1": ALIGN_BOTTOM_LEFT,
+    "2": ALIGN_BOTTOM_CENTER,
+    "3": ALIGN_BOTTOM_RIGHT,
+    "5": ALIGN_TOP_LEFT,
+    "6": ALIGN_TOP_CENTER,
+    "7": ALIGN_TOP_RIGHT,
+    "9": ALIGN_MIDDLE_LEFT,
+    "10": ALIGN_MIDDLE_CENTER,
+    "11": ALIGN_MIDDLE_RIGHT,
+}

lyrics_transcriber/output/ass/event.py ADDED Viewed

@@ -0,0 +1,94 @@
+class Event:
+    aliases = {}
+    formatters = None
+    order = [
+        "Layer",
+        "Start",
+        "End",
+        "Style",
+        "Name",
+        "MarginL",
+        "MarginR",
+        "MarginV",
+        "Effect",
+        "Text",
+    ]
+    # Constructor
+    def __init__(self):
+        self.type = None
+        self.Layer = 0
+        self.Start = 0.0
+        self.End = 0.0
+        self.Style = None
+        self.Name = ""
+        self.MarginL = 0
+        self.MarginR = 0
+        self.MarginV = 0
+        self.Effect = ""
+        self.Text = ""
+    def set(self, attribute_name, value, *args):
+        if hasattr(self, attribute_name) and attribute_name[0].isupper():
+            setattr(
+                self,
+                attribute_name,
+                self.formatters[attribute_name][0](value, *args),
+            )
+    def get(self, attribute_name, *args):
+        if hasattr(self, attribute_name) and attribute_name[0].isupper():
+            return self.formatters[attribute_name][1](getattr(self, attribute_name), *args)
+        return None
+    def copy(self, other=None):
+        if other is None:
+            other = self.__class__()
+            target = other
+            source = self
+        else:
+            target = other
+            source = self
+        # Copy all attributes
+        target.type = source.type
+        target.Layer = source.Layer
+        target.Start = source.Start
+        target.End = source.End
+        target.Style = source.Style
+        target.Name = source.Name
+        target.MarginL = source.MarginL
+        target.MarginR = source.MarginR
+        target.MarginV = source.MarginV
+        target.Effect = source.Effect
+        target.Text = source.Text
+        return target
+    def equals(self, other):
+        return (
+            self.type == other.type
+            and self.Layer == other.Layer
+            and self.Start == other.Start
+            and self.End == other.End
+            and self.Style is other.Style
+            and self.Name == other.Name
+            and self.MarginL == other.MarginL
+            and self.MarginR == other.MarginR
+            and self.MarginV == other.MarginV
+            and self.Effect == other.Effect
+            and self.Text == other.Text
+        )
+    def same_style(self, other):
+        return (
+            self.type == other.type
+            and self.Layer == other.Layer
+            and self.Style is other.Style
+            and self.Name == other.Name
+            and self.MarginL == other.MarginL
+            and self.MarginR == other.MarginR
+            and self.MarginV == other.MarginV
+            and self.Effect == other.Effect
+        )

lyrics_transcriber/output/ass/formatters.py ADDED Viewed

@@ -0,0 +1,132 @@
+import re
+class Formatters:
+    __re_color_format = re.compile(r"&H([0-9a-fA-F]{8}|[0-9a-fA-F]{6})", re.U)
+    __re_tag_number = re.compile(r"^\s*([\+\-]?(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+))", re.U)
+    @classmethod
+    def same(cls, val, *args):
+        return val
+    @classmethod
+    def color_to_str(cls, val, *args):
+        return "&H{0:02X}{1:02X}{2:02X}{3:02X}".format(255 - val[3], val[2], val[1], val[0])
+    @classmethod
+    def str_to_color(cls, val, *args):
+        match = cls.__re_color_format.search(val)
+        if match:
+            hex_val = "{0:>08s}".format(match.group(1))
+            return (
+                int(hex_val[6:8], 16),  # Red
+                int(hex_val[4:6], 16),  # Green
+                int(hex_val[2:4], 16),  # Blue
+                255 - int(hex_val[0:2], 16),  # Alpha
+            )
+        # Return white (255, 255, 255, 255) for invalid input
+        return (255, 255, 255, 255)
+    @classmethod
+    def n1bool_to_str(cls, val, *args):
+        if val:
+            return "-1"
+        return "0"
+    @classmethod
+    def str_to_n1bool(cls, val, *args):
+        try:
+            val = int(val, 10)
+        except ValueError:
+            return False
+        return val != 0
+    @classmethod
+    def integer_to_str(cls, val, *args):
+        return str(int(val))
+    @classmethod
+    def str_to_integer(cls, val, *args):
+        try:
+            return int(val, 10)
+        except ValueError:
+            return 0
+    @classmethod
+    def number_to_str(cls, val, *args):
+        if int(val) == val:
+            return str(int(val))
+            # No decimal
+        return str(val)
+    @classmethod
+    def str_to_number(cls, val, *args):
+        try:
+            return float(val)
+        except ValueError:
+            return 0.0
+    @classmethod
+    def timecode_to_str_generic(
+        cls,
+        timecode,
+        decimal_length=2,
+        seconds_length=2,
+        minutes_length=2,
+        hours_length=1,
+    ):
+        if decimal_length > 0:
+            total_length = seconds_length + decimal_length + 1
+        else:
+            total_length = seconds_length
+        tc_parts = [
+            "{{0:0{0:d}d}}".format(hours_length).format(int(timecode // 3600)),
+            "{{0:0{0:d}d}}".format(minutes_length).format(int((timecode // 60) % 60)),
+            "{{0:0{0:d}.{1:d}f}}".format(total_length, decimal_length).format(timecode % 60),
+        ]
+        return ":".join(tc_parts)
+    @classmethod
+    def timecode_to_str(cls, val, *args):
+        return cls.timecode_to_str_generic(val, 2)
+    @classmethod
+    def str_to_timecode(cls, val, *args):
+        time = 0.0
+        mult = 1
+        for t in reversed(val.split(":")):
+            time += float(t) * mult
+            mult *= 60
+        return time
+    @classmethod
+    def style_to_str(cls, val, *args):
+        if val is None:
+            return ""
+        return val.Name
+    @classmethod
+    def str_to_style(cls, val, style_map, style_constructor, *args):
+        if val in style_map:
+            return style_map[val]
+        # Create fake
+        style = style_constructor()
+        style.fake = True
+        style.Name = val
+        # Add to map (will not be included in global style list, but allows for duplicate "fake" styles to reference the same object)
+        style_map[style.Name] = style
+        # Return the new style
+        return style
+    @classmethod
+    def tag_argument_to_number(cls, arg, default_value=None):
+        match = cls.__re_tag_number.match(arg)
+        if match is None:
+            return default_value
+        return float(match.group(1))

lyrics_transcriber/output/ass/lyrics_line.py ADDED Viewed

@@ -0,0 +1,219 @@
+from dataclasses import dataclass
+from typing import Optional, Tuple, List
+import logging
+from datetime import timedelta
+from PIL import Image, ImageDraw, ImageFont
+import os
+from lyrics_transcriber.types import LyricsSegment
+from lyrics_transcriber.output.ass.event import Event
+from lyrics_transcriber.output.ass.style import Style
+from lyrics_transcriber.output.ass.config import LineState, ScreenConfig
+@dataclass
+class LyricsLine:
+    """Represents a single line of lyrics with timing and karaoke information."""
+    segment: LyricsSegment
+    screen_config: ScreenConfig
+    logger: Optional[logging.Logger] = None
+    previous_end_time: Optional[float] = None
+    def __post_init__(self):
+        """Ensure logger is initialized"""
+        if self.logger is None:
+            self.logger = logging.getLogger(__name__)
+    def _get_font(self, style: Style) -> ImageFont.FreeTypeFont:
+        """Get the font for text measurements."""
+        # ASS renders fonts about 70% of their actual size
+        ASS_FONT_SCALE = 0.70
+        # Scale down the font size to match ASS rendering
+        adjusted_size = int(style.Fontsize * ASS_FONT_SCALE)
+        self.logger.debug(f"Adjusting font size from {style.Fontsize} to {adjusted_size} to match ASS rendering")
+        try:
+            # Use the Fontpath property from Style class
+            if style.Fontpath and os.path.exists(style.Fontpath):
+                return ImageFont.truetype(style.Fontpath, size=adjusted_size)
+            self.logger.warning(f"Could not load font {style.Fontpath}, using default")
+            return ImageFont.load_default()
+        except (OSError, AttributeError) as e:
+            self.logger.warning(f"Font error ({e}), using default")
+            return ImageFont.load_default()
+    def _get_text_dimensions(self, text: str, font: ImageFont.FreeTypeFont) -> Tuple[int, int]:
+        """Get the pixel dimensions of rendered text."""
+        # Create an image the same size as the video frame
+        img = Image.new("RGB", (self.screen_config.video_width, self.screen_config.video_height), color="black")
+        draw = ImageDraw.Draw(img)
+        # Get the bounding box
+        bbox = draw.textbbox((0, 0), text, font=font)
+        width = bbox[2] - bbox[0]
+        height = bbox[3] - bbox[1]
+        self.logger.debug(f"Text dimensions for '{text}': width={width}px, height={height}px")
+        self.logger.debug(f"Video dimensions: {self.screen_config.video_width}x{self.screen_config.video_height}")
+        return width, height
+    # fmt: off
+    def _create_lead_in_text(self, state: LineState) -> Tuple[str, bool]:
+        """Create lead-in indicator text if needed.
+        Returns:
+            Tuple of (text, has_lead_in)
+        """
+        has_lead_in = (self.previous_end_time is None or
+                      self.segment.start_time - self.previous_end_time >= 5.0)
+        if not has_lead_in:
+            return "", False
+        # Add a hyphen with karaoke timing for the last 2 seconds before the line
+        lead_in_start = max(state.timing.fade_in_time, self.segment.start_time - 2.0)
+        gap_before_highlight = int((lead_in_start - state.timing.fade_in_time) * 100)
+        highlight_duration = int((self.segment.start_time - lead_in_start) * 100)
+        text = ""
+        # Add initial gap if needed
+        if gap_before_highlight > 0:
+            text += f"{{\\k{gap_before_highlight}}}"
+        # Add the hyphen with highlight
+        text += f"{{\\kf{highlight_duration}}}→ "
+        return text, True
+    def _create_lead_in_event(self, state: LineState, style: Style, video_width: int, config: ScreenConfig) -> Optional[Event]:
+        """Create a separate event for the lead-in indicator if needed."""
+        if not (self.previous_end_time is None or
+                self.segment.start_time - self.previous_end_time >= 5.0):
+            return None
+        self.logger.debug(f"Creating lead-in indicator for line: '{self.segment.text}'")
+        # Calculate all timing points
+        line_start = self.segment.start_time
+        appear_time = line_start - 3.0  # Start 3 seconds before line
+        fade_in_end = appear_time + 0.8  # 800ms fade in
+        fade_out_start = line_start - 0.3  # Start fade 300ms before reaching final position
+        fade_out_end = line_start + 0.2  # Complete fade 200ms after line starts (500ms total fade)
+        self.logger.debug(f"Timing calculations:")
+        self.logger.debug(f"  Line starts at: {line_start:.2f}s")
+        self.logger.debug(f"  Rectangle appears at: {appear_time:.2f}s")
+        self.logger.debug(f"  Fade in completes at: {fade_in_end:.2f}s")
+        self.logger.debug(f"  Fade out starts at: {fade_out_start:.2f}s")
+        self.logger.debug(f"  Rectangle reaches final position at: {line_start:.2f}s")
+        self.logger.debug(f"  Rectangle fully faded out at: {fade_out_end:.2f}s")
+        # Calculate dimensions and positions
+        font = self._get_font(style)
+        main_text = self.segment.text
+        main_width, main_height = self._get_text_dimensions(main_text, font)
+        rect_width = int(self.screen_config.video_width * 0.035)  # 3.5% of video width
+        rect_height = int(self.screen_config.video_height * 0.04)  # 4% of video height
+        text_left = self.screen_config.video_width//2 - main_width//2
+        self.logger.debug(f"Position calculations:")
+        self.logger.debug(f"  Video dimensions: {self.screen_config.video_width}x{self.screen_config.video_height}")
+        self.logger.debug(f"  Main text width: {main_width}px")
+        self.logger.debug(f"  Main text height: {main_height}px")
+        self.logger.debug(f"  Rectangle dimensions: {rect_width}x{rect_height}px")
+        self.logger.debug(f"  Text left edge: {text_left}px")
+        self.logger.debug(f"  Vertical position: {state.y_position}px")
+        # Create main indicator event
+        main_event = Event()
+        main_event.type = "Dialogue"
+        main_event.Layer = 0
+        main_event.Style = style
+        main_event.Start = appear_time
+        main_event.End = fade_out_end
+        # Calculate movement duration in milliseconds
+        move_duration = int((line_start - appear_time) * 1000)
+        # Create indicator rectangle aligned to bottom
+        main_text = (
+            f"{{\\an8}}"  # center-bottom alignment
+            f"{{\\move(0,{state.y_position + main_height},{text_left},{state.y_position + main_height},0,{move_duration})}}"  # Move until line start
+            f"{{\\c&HF77070&}}"  # Same color as karaoke highlight
+            f"{{\\alpha&H4D&}}"  # 70% opacity (FF=0%, 00=100%)
+            f"{{\\fad(800,500)}}"  # 800ms fade in, 500ms fade out
+            f"{{\\p1}}m {-rect_width} {-rect_height} l 0 {-rect_height} 0 0 {-rect_width} 0{{\\p0}}"  # Draw up from bottom
+        )
+        main_event.Text = main_text
+        return [main_event]
+    def create_ass_events(
+        self,
+        state: LineState,
+        style: Style,
+        config: ScreenConfig,
+        previous_end_time: Optional[float] = None
+    ) -> List[Event]:
+        """Create ASS events for this line. Returns [main_event] or [lead_in_event, main_event]."""
+        self.previous_end_time = previous_end_time
+        events = []
+        # Create lead-in event if needed
+        lead_in_event = self._create_lead_in_event(state, style, config.video_width, config)
+        if lead_in_event:
+            events.extend(lead_in_event)
+        # Create main lyrics event
+        main_event = Event()
+        main_event.type = "Dialogue"
+        main_event.Layer = 0
+        main_event.Style = style
+        main_event.Start = state.timing.fade_in_time
+        main_event.End = state.timing.end_time
+        # Use absolute positioning
+        x_pos = config.video_width // 2  # Center horizontally
+        # Main lyrics text with positioning and fade
+        text = (
+            f"{{\\an8}}{{\\pos({x_pos},{state.y_position})}}"
+            f"{{\\fad({config.fade_in_ms},{config.fade_out_ms})}}"
+        )
+        # Add the main lyrics text with karaoke timing
+        text += self._create_ass_text(timedelta(seconds=state.timing.fade_in_time))
+        main_event.Text = text
+        events.append(main_event)
+        return events
+    def _create_ass_text(self, start_ts: timedelta) -> str:
+        """Create the ASS text with karaoke timing tags."""
+        # Initial delay before first word
+        first_word_time = self.segment.start_time
+        # Add initial delay for regular lines
+        start_time = max(0, (first_word_time - start_ts.total_seconds()) * 100)
+        text = r"{\k" + str(int(round(start_time))) + r"}"
+        prev_end_time = first_word_time
+        for word in self.segment.words:
+            # Add gap between words if needed
+            gap = word.start_time - prev_end_time
+            if gap > 0.1:  # Only add gap if significant
+                text += r"{\k" + str(int(round(gap * 100))) + r"}"
+            # Add the word with its duration
+            duration = int(round((word.end_time - word.start_time) * 100))
+            text += r"{\kf" + str(duration) + r"}" + word.text + " "
+            prev_end_time = word.end_time  # Track the actual end time of the word
+        return text.rstrip()
+    def __str__(self):
+        return f"{{{self.segment.text}}}"

lyrics-transcriber 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl

lyrics-transcriber 0.30.0py3-none-any.whl → 0.32.1py3-none-any.whl