PyPI - lyrics-transcriber - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl - Mend

lyrics-transcriber 0.30.1py3-none-any.whl → 0.32.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

lyrics_transcriber/__init__.py +2 -1
lyrics_transcriber/cli/cli_main.py +33 -12
lyrics_transcriber/core/config.py +35 -0
lyrics_transcriber/core/controller.py +85 -121
lyrics_transcriber/correction/anchor_sequence.py +471 -0
lyrics_transcriber/correction/corrector.py +237 -33
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +30 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
lyrics_transcriber/correction/handlers/repeat.py +71 -0
lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
lyrics_transcriber/correction/handlers/word_operations.py +135 -0
lyrics_transcriber/correction/phrase_analyzer.py +426 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +5 -81
lyrics_transcriber/lyrics/genius.py +5 -2
lyrics_transcriber/lyrics/spotify.py +3 -3
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +37 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +219 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +503 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +101 -193
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +91 -0
lyrics_transcriber/output/segment_resizer.py +416 -0
lyrics_transcriber/output/subtitles.py +328 -302
lyrics_transcriber/output/video.py +219 -0
lyrics_transcriber/review/__init__.py +1 -0
lyrics_transcriber/review/server.py +138 -0
lyrics_transcriber/transcribers/audioshake.py +3 -2
lyrics_transcriber/transcribers/base_transcriber.py +5 -42
lyrics_transcriber/transcribers/whisper.py +3 -4
lyrics_transcriber/types.py +454 -0
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/METADATA +14 -3
lyrics_transcriber-0.32.2.dist-info/RECORD +86 -0
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/WHEEL +1 -1
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/correction/base_strategy.py +0 -29
lyrics_transcriber/correction/strategy_diff.py +0 -263
lyrics_transcriber-0.30.1.dist-info/RECORD +0 -25
{lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/LICENSE +0 -0

lyrics_transcriber/output/subtitles.py CHANGED Viewed

@@ -1,305 +1,331 @@
-from dataclasses import dataclass, field
-from datetime import timedelta
-from typing import Dict, List, Optional, Tuple
-import json
-import itertools
-from pathlib import Path
-from enum import IntEnum
+import os
 import logging
+from typing import List, Optional, Tuple, Union
+import subprocess
+import json
-from . import ass
-"""
-Functions for generating ASS subtitles from lyric data
-"""
-class LyricSegmentIterator:
-    def __init__(self, lyrics_segments: List[str]):
-        self._segments = lyrics_segments
-        self._current_segment = 0
-    def __iter__(self):
-        self._current_segment = 0
-        return self
-    def __next__(self):
-        if self._current_segment >= len(self._segments):
-            raise StopIteration
-        val = self._segments[self._current_segment]
-        self._current_segment += 1
-        return val
-    def __len__(self):
-        return len(self._segments)
-@dataclass
-class LyricSegment:
-    text: str
-    ts: timedelta
-    end_ts: Optional[timedelta] = None
-    def adjust_timestamps(self, adjustment) -> "LyricSegment":
-        ts = self.ts + adjustment
-        end_ts = self.end_ts + adjustment if self.end_ts else None
-        return LyricSegment(self.text, ts, end_ts)
-    def to_ass(self) -> str:
-        """Render this segment as part of an ASS event line"""
-        duration = (self.end_ts - self.ts).total_seconds() * 100
-        return rf"{{\kf{duration}}}{self.text}"
-    def to_dict(self) -> dict:
-        return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
-    @classmethod
-    def from_dict(cls, data: dict) -> "LyricSegment":
-        return cls(
-            text=data["text"],
-            ts=timedelta(seconds=data["ts"]),
-            end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
-        )
-@dataclass
-class LyricsLine:
-    segments: List[LyricSegment] = field(default_factory=list)
-    @property
-    def ts(self) -> Optional[timedelta]:
-        return self.segments[0].ts if len(self.segments) else None
-    @property
-    def end_ts(self) -> Optional[timedelta]:
-        return self.segments[-1].end_ts if self.segments else None
-    @ts.setter
-    def ts(self, value):
-        self.segments[0].ts = value
-    @end_ts.setter
-    def end_ts(self, value):
-        self.segments[-1].end_ts = value
-    def __str__(self):
-        return "".join([f"{{{s.text}}}" for s in self.segments])
-    def as_ass_event(self, screen_start: timedelta, screen_end: timedelta, style: ass.ASS.Style, y_position: int):
-        e = ass.ASS.Event()
-        e.type = "Dialogue"
-        e.Layer = 0
-        e.Style = style
-        e.Start = screen_start.total_seconds()
-        e.End = screen_end.total_seconds()
-        e.MarginV = y_position
-        e.Text = self.decorate_ass_line(self.segments, screen_start)
-        # Set alignment to top-center
-        e.Text = "{\\an8}" + e.Text
-        return e
-    def decorate_ass_line(self, segments, screen_start_ts: timedelta):
-        """Decorate line with karaoke tags"""
-        # Prefix the tag with centisecs prior to line in screen
-        start_time = (self.ts - screen_start_ts).total_seconds() * 100
-        line = rf"{{\k{start_time}}}"
-        prev_end: Optional[timedelta] = None
-        for s in self.segments:
-            if prev_end is not None and prev_end < s.ts:
-                blank_segment = LyricSegment("", prev_end, s.ts)
-                line += blank_segment.to_ass()
-            line += s.to_ass()
-            prev_end = s.end_ts
-        return line
-    def adjust_timestamps(self, adjustment) -> "LyricsLine":
-        new_segments = [s.adjust_timestamps(adjustment) for s in self.segments]
-        start_ts = self.ts + adjustment if self.ts else None
-        return LyricsLine(new_segments)
-    def to_dict(self) -> dict:
-        return {"segments": [segment.to_dict() for segment in self.segments]}
-    @classmethod
-    def from_dict(cls, data: dict) -> "LyricsLine":
-        segments = [LyricSegment.from_dict(segment_data) for segment_data in data["segments"]]
-        return cls(segments=segments)
-@dataclass
-class LyricsScreen:
-    lines: List[LyricsLine] = field(default_factory=list)
-    start_ts: Optional[timedelta] = None
-    video_size: Tuple[int, int] = None
-    line_height: int = None
-    logger: logging.Logger = None
-    @property
-    def end_ts(self) -> timedelta:
-        return self.lines[-1].end_ts
-    def get_line_y(self, line_num: int) -> int:
-        _, h = self.video_size
-        line_count = len(self.lines)
-        total_height = line_count * self.line_height
-        # Calculate the top margin to center the lyrics block
-        top_margin = (h - total_height) / 2
-        # Calculate the y-position for this specific line
-        line_y = top_margin + (line_num * self.line_height)
-        # if self.logger:
-        #     self.logger.debug(f"Line {line_num + 1} positioning:")
-        #     self.logger.debug(f"  Video height: {h}")
-        #     self.logger.debug(f"  Total lines: {line_count}")
-        #     self.logger.debug(f"  Line height: {self.line_height}")
-        #     self.logger.debug(f"  Total lyrics height: {total_height}")
-        #     self.logger.debug(f"  Top margin: {top_margin}")
-        #     self.logger.debug(f"  Line y: {line_y}")
-        return int(line_y)
-    def as_ass_events(self, style: ass.ASS.Style) -> List[ass.ASS.Event]:
-        events = []
-        for i, line in enumerate(self.lines):
-            y_position = self.get_line_y(i)
-            # if self.logger:
-            #     self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
-            event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
-            events.append(event)
-        return events
-    def __str__(self):
-        lines = [f"{self.start_ts} - {self.end_ts}:"]
-        for line in self.lines:
-            lines.append(f"\t{line}")
-        return "\n".join(lines)
-    def adjust_timestamps(self, adjustment: timedelta) -> "LyricsScreen":
-        new_lines = [l.adjust_timestamps(adjustment) for l in self.lines]
-        start_ts = self.start_ts + adjustment if self.start_ts else None
-        return LyricsScreen(new_lines, start_ts)
-    def to_dict(self) -> dict:
-        return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
-    @classmethod
-    def from_dict(cls, data: dict) -> "LyricsScreen":
-        lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
-        start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
-        return cls(lines=lines, start_ts=start_ts)
-class LyricsObjectJSONEncoder(json.JSONEncoder):
-    def default(self, o):
-        if isinstance(o, (LyricSegment, LyricsLine, LyricsScreen)):
-            return o.to_dict()
-        return super().default(o)
-def set_segment_end_times(screens: List[LyricsScreen], song_duration_seconds: int) -> List[LyricsScreen]:
-    """
-    Infer end times of lines for screens where they are not already set.
-    """
-    segments = list(itertools.chain.from_iterable([l.segments for s in screens for l in s.lines]))
-    for i, segment in enumerate(segments):
-        if not segment.end_ts:
-            if i == len(segments) - 1:
-                segment.end_ts = timedelta(seconds=song_duration_seconds)
+from lyrics_transcriber.output.ass.section_screen import SectionScreen
+from lyrics_transcriber.types import LyricsSegment
+from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
+from lyrics_transcriber.output.ass.ass import ASS
+from lyrics_transcriber.output.ass.style import Style
+from lyrics_transcriber.output.ass.constants import ALIGN_TOP_CENTER
+from lyrics_transcriber.output.ass import LyricsScreen
+from lyrics_transcriber.output.ass.section_detector import SectionDetector
+from lyrics_transcriber.output.ass.config import ScreenConfig
+class SubtitlesGenerator:
+    """Handles generation of subtitle files in various formats."""
+    def __init__(
+        self,
+        output_dir: str,
+        video_resolution: Tuple[int, int],
+        font_size: int,
+        line_height: int,
+        styles: dict,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """Initialize SubtitleGenerator.
+        Args:
+            output_dir: Directory where output files will be written
+            video_resolution: Tuple of (width, height) for video resolution
+            font_size: Font size for subtitles
+            line_height: Line height for subtitle positioning
+            logger: Optional logger instance
+        """
+        self.output_dir = output_dir
+        self.video_resolution = video_resolution
+        self.font_size = font_size
+        self.styles = styles
+        self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
+        self.logger = logger or logging.getLogger(__name__)
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
+    def _get_audio_duration(self, audio_filepath: str, segments: Optional[List[LyricsSegment]] = None) -> float:
+        """Get audio duration using ffprobe."""
+        try:
+            probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", audio_filepath]
+            probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
+            probe_data = json.loads(probe_output)
+            duration = float(probe_data["format"]["duration"])
+            self.logger.debug(f"Detected audio duration: {duration:.2f}s")
+            return duration
+        except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+            self.logger.error(f"Failed to get audio duration: {e}")
+            # Fallback to last segment end time plus buffer
+            if segments:
+                duration = segments[-1].end_time + 30.0
+                self.logger.warning(f"Using fallback duration: {duration:.2f}s")
+                return duration
+            return 0.0
+    def generate_ass(self, segments: List[LyricsSegment], output_prefix: str, audio_filepath: str) -> str:
+        self.logger.info("Generating ASS format subtitles")
+        output_path = self._get_output_path(f"{output_prefix} (Karaoke)", "ass")
+        try:
+            self.logger.debug(f"Processing {len(segments)} segments")
+            song_duration = self._get_audio_duration(audio_filepath, segments)
+            screens = self._create_screens(segments, song_duration)
+            self.logger.debug(f"Created {len(screens)} initial screens")
+            lyric_subtitles_ass = self._create_styled_subtitles(screens, self.video_resolution, self.font_size)
+            self.logger.debug("Created styled subtitles")
+            lyric_subtitles_ass.write(output_path)
+            self.logger.info(f"ASS file generated: {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate ASS file: {str(e)}", exc_info=True)
+            raise
+    def _create_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[LyricsScreen]:
+        """Create screens from segments with detailed logging."""
+        self.logger.debug("Creating screens from segments")
+        # Create section screens and get instrumental boundaries
+        section_screens = self._create_section_screens(segments, song_duration)
+        instrumental_times = self._get_instrumental_times(section_screens)
+        # Create regular lyric screens
+        lyric_screens = self._create_lyric_screens(segments, instrumental_times)
+        # Merge and process all screens
+        all_screens = self._merge_and_process_screens(section_screens, lyric_screens)
+        # Log final results
+        self._log_final_screens(all_screens)
+        return all_screens
+    def _create_section_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[SectionScreen]:
+        """Create section screens using SectionDetector."""
+        section_detector = SectionDetector(logger=self.logger)
+        return section_detector.process_segments(segments, self.video_resolution, self.config.line_height, song_duration)
+    def _get_instrumental_times(self, section_screens: List[SectionScreen]) -> List[Tuple[float, float]]:
+        """Extract instrumental section time boundaries."""
+        instrumental_times = [
+            (s.start_time, s.end_time) for s in section_screens if isinstance(s, SectionScreen) and s.section_type == "INSTRUMENTAL"
+        ]
+        self.logger.debug(f"Found {len(instrumental_times)} instrumental sections:")
+        for start, end in instrumental_times:
+            self.logger.debug(f"  {start:.2f}s - {end:.2f}s")
+        return instrumental_times
+    def _create_lyric_screens(self, segments: List[LyricsSegment], instrumental_times: List[Tuple[float, float]]) -> List[LyricsScreen]:
+        """Create regular lyric screens, handling instrumental boundaries."""
+        screens: List[LyricsScreen] = []
+        current_screen: Optional[LyricsScreen] = None
+        for i, segment in enumerate(segments):
+            self.logger.debug(f"Processing segment {i}: {segment.start_time:.2f}s - {segment.end_time:.2f}s")
+            # Skip segments in instrumental sections
+            if self._is_in_instrumental_section(segment, instrumental_times):
+                continue
+            # Check if we need a new screen
+            if self._should_start_new_screen(current_screen, segment, instrumental_times):
+                # fmt: off
+                current_screen = LyricsScreen(
+                    video_size=self.video_resolution,
+                    line_height=self.config.line_height,
+                    config=self.config,
+                    logger=self.logger
+                )
+                # fmt: on
+                screens.append(current_screen)
+                self.logger.debug("  Created new screen")
+            # Add line to current screen
+            line = LyricsLine(logger=self.logger, segment=segment, screen_config=self.config)
+            current_screen.lines.append(line)
+            self.logger.debug(f"  Added line to screen (now has {len(current_screen.lines)} lines)")
+        return screens
+    def _is_in_instrumental_section(self, segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]) -> bool:
+        """Check if a segment falls within any instrumental section."""
+        for inst_start, inst_end in instrumental_times:
+            if segment.start_time >= inst_start and segment.start_time < inst_end:
+                self.logger.debug(f"  Skipping segment - falls within instrumental {inst_start:.2f}s - {inst_end:.2f}s")
+                return True
+        return False
+    def _should_start_new_screen(
+        self, current_screen: Optional[LyricsScreen], segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]
+    ) -> bool:
+        """Determine if a new screen should be started."""
+        if current_screen is None:
+            return True
+        if len(current_screen.lines) >= self.config.max_visible_lines:
+            return True
+        # Check if this segment is first after any instrumental section
+        if current_screen.lines:
+            prev_segment = current_screen.lines[-1].segment
+            for inst_start, inst_end in instrumental_times:
+                if prev_segment.end_time <= inst_start and segment.start_time >= inst_end:
+                    self.logger.debug(f"  Forcing new screen - first segment after instrumental {inst_start:.2f}s - {inst_end:.2f}s")
+                    return True
+        return False
+    def _merge_and_process_screens(
+        self, section_screens: List[SectionScreen], lyric_screens: List[LyricsScreen]
+    ) -> List[Union[SectionScreen, LyricsScreen]]:
+        """Merge section and lyric screens in chronological order."""
+        # Sort all screens by start time
+        return sorted(section_screens + lyric_screens, key=lambda s: s.start_ts)
+    def _log_final_screens(self, screens: List[Union[SectionScreen, LyricsScreen]]) -> None:
+        """Log details of all final screens."""
+        self.logger.debug("Final screens created:")
+        for i, screen in enumerate(screens):
+            self.logger.debug(f"Screen {i + 1}:")
+            if isinstance(screen, SectionScreen):
+                self.logger.debug(f"  Section: {screen.section_type}")
+                self.logger.debug(f"  Text: {screen.text}")
+                self.logger.debug(f"  Time: {screen.start_time:.2f}s - {screen.end_time:.2f}s")
             else:
-                next_segment = segments[i + 1]
-                segment.end_ts = next_segment.ts
-    return screens
-def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
-    """
-    Set start times for screens to the end times of the previous screen.
-    """
-    prev_screen = None
-    for screen in screens:
-        if prev_screen is None:
-            screen.start_ts = timedelta()
-        else:
-            screen.start_ts = prev_screen.end_ts + timedelta(seconds=0.1)
-        prev_screen = screen
-    return screens
-def create_styled_subtitles(
-    lyric_screens: List[LyricsScreen],
-    resolution,
-    fontsize,
-) -> ass.ASS:
-    a = ass.ASS()
-    a.set_resolution(resolution)
-    a.styles_format = [
-        "Name",  # The name of the Style. Case sensitive. Cannot include commas.
-        "Fontname",  # The fontname as used by Windows. Case-sensitive.
-        "Fontsize",  # Font size
-        "PrimaryColour",  # This is the colour that a subtitle will normally appear in.
-        "SecondaryColour",  # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
-        "OutlineColour",  # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
-        "BackColour",  # This is the colour of the subtitle outline or shadow, if these are used
-        "Bold",  # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
-        "Italic",  # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
-        "Underline",  # [-1 or 0]
-        "StrikeOut",  # [-1 or 0]
-        "ScaleX",  # Modifies the width of the font. [percent]
-        "ScaleY",  # Modifies the height of the font. [percent]
-        "Spacing",  # Extra space between characters. [pixels]
-        "Angle",  # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
-        "BorderStyle",  # 1=Outline + drop shadow, 3=Opaque box
-        "Outline",  # If BorderStyle is 1,  then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
-        "Shadow",  # If BorderStyle is 1,  then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
-        "Alignment",  # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
-        "MarginL",  # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
-        "MarginR",  # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
-        "MarginV",  # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
-        "Encoding",  #
-    ]
-    style = ass.ASS.Style()
-    style.type = "Style"
-    style.Name = "Nomad"
-    style.Fontname = "Avenir Next Bold"
-    style.Fontsize = fontsize
-    style.PrimaryColour = (112, 112, 247, 255)
-    style.SecondaryColour = (255, 255, 255, 255)
-    style.OutlineColour = (26, 58, 235, 255)
-    style.BackColour = (0, 255, 0, 255)  # (26, 58, 235, 255)
-    style.Bold = False
-    style.Italic = False
-    style.Underline = False
-    style.StrikeOut = False
-    style.ScaleX = 100
-    style.ScaleY = 100
-    style.Spacing = 0
-    style.Angle = 0.0
-    style.BorderStyle = 1
-    style.Outline = 1
-    style.Shadow = 0
-    style.Alignment = ass.ASS.ALIGN_TOP_CENTER
-    style.MarginL = 0
-    style.MarginR = 0
-    style.MarginV = 0
-    style.Encoding = 0
-    a.add_style(style)
-    a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
-    for screen in lyric_screens:
-        [a.add(event) for event in screen.as_ass_events(style)]
-    return a
+                self.logger.debug(f"  Number of lines: {len(screen.lines)}")
+                for j, line in enumerate(screen.lines):
+                    self.logger.debug(f"    Line {j + 1} ({line.segment.start_time:.2f}s - {line.segment.end_time:.2f}s): {line}")
+    def _create_styled_ass_instance(self, resolution, fontsize):
+        a = ASS()
+        a.set_resolution(resolution)
+        a.styles_format = [
+            "Name",  # The name of the Style. Case sensitive. Cannot include commas.
+            "Fontname",  # The fontname as used by Windows. Case-sensitive.
+            "Fontpath",  # The path to the font file.
+            "Fontsize",  # Font size
+            "PrimaryColour",  # This is the colour that a subtitle will normally appear in.
+            "SecondaryColour",  # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+            "OutlineColour",  # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+            "BackColour",  # This is the colour of the subtitle outline or shadow, if these are used
+            "Bold",  # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
+            "Italic",  # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
+            "Underline",  # [-1 or 0]
+            "StrikeOut",  # [-1 or 0]
+            "ScaleX",  # Modifies the width of the font. [percent]
+            "ScaleY",  # Modifies the height of the font. [percent]
+            "Spacing",  # Extra space between characters. [pixels]
+            "Angle",  # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
+            "BorderStyle",  # 1=Outline + drop shadow, 3=Opaque box
+            "Outline",  # If BorderStyle is 1,  then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
+            "Shadow",  # If BorderStyle is 1,  then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
+            "Alignment",  # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
+            "MarginL",  # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
+            "MarginR",  # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
+            "MarginV",  # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
+            "Encoding",  #
+        ]
+        # Get font settings from styles
+        karaoke_styles = self.styles.get("karaoke", {})
+        font_path = karaoke_styles.get("font_path")
+        style = Style()
+        style.type = "Style"
+        style.Name = self.styles["karaoke"]["ass_name"]
+        style.Fontname = self.styles["karaoke"]["font"]
+        style.Fontpath = font_path
+        style.Fontsize = fontsize
+        style.Alignment = ALIGN_TOP_CENTER
+        # Convert color strings to tuples of integers
+        def parse_color(color_str):
+            return tuple(int(x.strip()) for x in color_str.split(","))
+        style.PrimaryColour = parse_color(self.styles["karaoke"]["primary_color"])
+        style.SecondaryColour = parse_color(self.styles["karaoke"]["secondary_color"])
+        style.OutlineColour = parse_color(self.styles["karaoke"]["outline_color"])
+        style.BackColour = parse_color(self.styles["karaoke"]["back_color"])
+        # Convert boolean strings to integers (-1 for True, 0 for False)
+        def parse_bool(value):
+            return -1 if value else 0
+        style.Bold = parse_bool(self.styles["karaoke"]["bold"])
+        style.Italic = parse_bool(self.styles["karaoke"]["italic"])
+        style.Underline = parse_bool(self.styles["karaoke"]["underline"])
+        style.StrikeOut = parse_bool(self.styles["karaoke"]["strike_out"])
+        # Convert numeric strings to appropriate types
+        style.ScaleX = int(self.styles["karaoke"]["scale_x"])
+        style.ScaleY = int(self.styles["karaoke"]["scale_y"])
+        style.Spacing = int(self.styles["karaoke"]["spacing"])
+        style.Angle = float(self.styles["karaoke"]["angle"])
+        style.BorderStyle = int(self.styles["karaoke"]["border_style"])
+        style.Outline = int(self.styles["karaoke"]["outline"])
+        style.Shadow = int(self.styles["karaoke"]["shadow"])
+        style.MarginL = int(self.styles["karaoke"]["margin_l"])
+        style.MarginR = int(self.styles["karaoke"]["margin_r"])
+        style.MarginV = int(self.styles["karaoke"]["margin_v"])
+        style.Encoding = int(self.styles["karaoke"]["encoding"])
+        a.add_style(style)
+        a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
+        return a, style
+    def _create_styled_subtitles(
+        self,
+        screens: List[Union[SectionScreen, LyricsScreen]],
+        resolution: Tuple[int, int],
+        fontsize: int,
+    ) -> ASS:
+        """Create styled ASS subtitles from all screens."""
+        ass_file, style = self._create_styled_ass_instance(resolution, fontsize)
+        active_lines = []
+        previous_instrumental_end = None
+        for screen in screens:
+            if isinstance(screen, SectionScreen):
+                # Create section marker events (returns tuple of ([event], []))
+                section_events, _ = screen.as_ass_events(style=style)
+                for event in section_events:  # Now we're iterating over the list of events
+                    ass_file.add(event)
+                previous_instrumental_end = screen.end_time
+                active_lines = []
+                self.logger.debug(f"Found instrumental section ending at {screen.end_time:.2f}s")
+                continue
+            # Process screen and get its events
+            self.logger.debug(f"Processing screen with instrumental_end={previous_instrumental_end}")
+            # fmt: off
+            events, active_lines = screen.as_ass_events(
+                style=style,
+                previous_active_lines=active_lines,
+                previous_instrumental_end=previous_instrumental_end
+            )
+            # fmt: on
+            # Only reset instrumental end after we've processed the first post-instrumental screen
+            if previous_instrumental_end is not None:
+                self.logger.debug("Clearing instrumental end time after processing post-instrumental screen")
+                previous_instrumental_end = None
+            # Add all events to ASS file
+            for event in events:
+                ass_file.add(event)
+        return ass_file

lyrics-transcriber 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

lyrics-transcriber 0.30.1py3-none-any.whl → 0.32.2py3-none-any.whl