PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/output/subtitles.py ADDED Viewed

@@ -0,0 +1,397 @@
+import os
+import logging
+from typing import List, Optional, Tuple, Union
+import subprocess
+import json
+from lyrics_transcriber.output.ass.section_screen import SectionScreen
+from lyrics_transcriber.types import LyricsSegment, Word
+from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
+from lyrics_transcriber.output.ass.ass import ASS
+from lyrics_transcriber.output.ass.style import Style
+from lyrics_transcriber.output.ass.constants import ALIGN_TOP_CENTER
+from lyrics_transcriber.output.ass import LyricsScreen
+from lyrics_transcriber.output.ass.section_detector import SectionDetector
+from lyrics_transcriber.output.ass.config import ScreenConfig
+class SubtitlesGenerator:
+    """Handles generation of subtitle files in various formats."""
+    def __init__(
+        self,
+        output_dir: str,
+        video_resolution: Tuple[int, int],
+        font_size: int,
+        line_height: int,
+        styles: dict,
+        subtitle_offset_ms: int = 0,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """Initialize SubtitleGenerator.
+        Args:
+            output_dir: Directory where output files will be written
+            video_resolution: Tuple of (width, height) for video resolution
+            font_size: Font size for subtitles
+            line_height: Line height for subtitle positioning
+            styles: Dictionary of style configurations
+            subtitle_offset_ms: Offset for subtitle timing in milliseconds
+            logger: Optional logger instance
+        """
+        self.output_dir = output_dir
+        self.video_resolution = video_resolution
+        self.font_size = font_size
+        self.styles = styles
+        self.subtitle_offset_ms = subtitle_offset_ms
+        # Create ScreenConfig with potential overrides from styles
+        karaoke_styles = styles.get("karaoke", {})
+        config_params = {
+            "line_height": line_height,
+            "video_width": video_resolution[0],
+            "video_height": video_resolution[1]
+        }
+        # Add any overrides from styles
+        screen_config_props = [
+            "max_visible_lines",
+            "top_padding",
+            "screen_gap_threshold",
+            "post_roll_time",
+            "fade_in_ms",
+            "fade_out_ms",
+            "lead_in_color",
+            "text_case_transform",
+            # New lead-in indicator configuration options
+            "lead_in_enabled",
+            "lead_in_width_percent",
+            "lead_in_height_percent",
+            "lead_in_opacity_percent",
+            "lead_in_outline_thickness",
+            "lead_in_outline_color",
+            "lead_in_gap_threshold",
+            "lead_in_horiz_offset_percent",
+            "lead_in_vert_offset_percent",
+        ]
+        for prop in screen_config_props:
+            if prop in karaoke_styles:
+                config_params[prop] = karaoke_styles[prop]
+        self.config = ScreenConfig(**config_params)
+        self.logger = logger or logging.getLogger(__name__)
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
+    def _get_audio_duration(self, audio_filepath: str, segments: Optional[List[LyricsSegment]] = None) -> float:
+        """Get audio duration using ffprobe."""
+        try:
+            probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", audio_filepath]
+            probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
+            probe_data = json.loads(probe_output)
+            duration = float(probe_data["format"]["duration"])
+            self.logger.debug(f"Detected audio duration: {duration:.2f}s")
+            return duration
+        except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+            self.logger.error(f"Failed to get audio duration: {e}")
+            # Fallback to last segment end time plus buffer
+            if segments:
+                duration = segments[-1].end_time + 30.0
+                self.logger.warning(f"Using fallback duration: {duration:.2f}s")
+                return duration
+            return 0.0
+    def generate_ass(self, segments: List[LyricsSegment], output_prefix: str, audio_filepath: str) -> str:
+        self.logger.info("Generating ASS format subtitles")
+        output_path = self._get_output_path(f"{output_prefix} (Karaoke)", "ass")
+        try:
+            self.logger.debug(f"Processing {len(segments)} segments")
+            song_duration = self._get_audio_duration(audio_filepath, segments)
+            screens = self._create_screens(segments, song_duration)
+            self.logger.debug(f"Created {len(screens)} initial screens")
+            lyric_subtitles_ass = self._create_styled_subtitles(screens, self.video_resolution, self.font_size)
+            self.logger.debug("Created styled subtitles")
+            lyric_subtitles_ass.write(output_path)
+            self.logger.info(f"ASS file generated: {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate ASS file: {str(e)}", exc_info=True)
+            raise
+    def _create_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[LyricsScreen]:
+        """Create screens from segments with detailed logging."""
+        self.logger.debug("Creating screens from segments")
+        # Apply timing offset to segments if needed
+        if self.subtitle_offset_ms != 0:
+            self.logger.info(f"Subtitle offset: {self.subtitle_offset_ms}ms")
+            offset_seconds = self.subtitle_offset_ms / 1000.0
+            segments = [
+                LyricsSegment(
+                    id=seg.id,  # Preserve original segment ID
+                    text=seg.text,
+                    words=[
+                        Word(
+                            id=word.id,  # Preserve original word ID
+                            text=word.text,
+                            start_time=max(0, word.start_time + offset_seconds),
+                            end_time=word.end_time + offset_seconds,
+                            confidence=word.confidence,
+                            created_during_correction=getattr(word, "created_during_correction", False),  # Preserve correction flag
+                        )
+                        for word in seg.words
+                    ],
+                    start_time=max(0, seg.start_time + offset_seconds),
+                    end_time=seg.end_time + offset_seconds,
+                )
+                for seg in segments
+            ]
+            self.logger.info(f"Applied {self.subtitle_offset_ms}ms offset to segment timings")
+        # Create section screens and get instrumental boundaries
+        section_screens = self._create_section_screens(segments, song_duration)
+        instrumental_times = self._get_instrumental_times(section_screens)
+        # Create regular lyric screens
+        lyric_screens = self._create_lyric_screens(segments, instrumental_times)
+        # Merge and process all screens
+        all_screens = self._merge_and_process_screens(section_screens, lyric_screens)
+        # Log final results
+        self._log_final_screens(all_screens)
+        return all_screens
+    def _create_section_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[SectionScreen]:
+        """Create section screens using SectionDetector."""
+        section_detector = SectionDetector(logger=self.logger)
+        return section_detector.process_segments(segments, self.video_resolution, self.config.line_height, song_duration)
+    def _get_instrumental_times(self, section_screens: List[SectionScreen]) -> List[Tuple[float, float]]:
+        """Extract instrumental section time boundaries."""
+        instrumental_times = [
+            (s.start_time, s.end_time) for s in section_screens if isinstance(s, SectionScreen) and s.section_type == "INSTRUMENTAL"
+        ]
+        self.logger.debug(f"Found {len(instrumental_times)} instrumental sections:")
+        for start, end in instrumental_times:
+            self.logger.debug(f"  {start:.2f}s - {end:.2f}s")
+        return instrumental_times
+    def _create_lyric_screens(self, segments: List[LyricsSegment], instrumental_times: List[Tuple[float, float]]) -> List[LyricsScreen]:
+        """Create regular lyric screens, handling instrumental boundaries."""
+        screens: List[LyricsScreen] = []
+        current_screen: Optional[LyricsScreen] = None
+        for i, segment in enumerate(segments):
+            self.logger.debug(f"Processing segment {i}: {segment.start_time:.2f}s - {segment.end_time:.2f}s")
+            # Skip segments in instrumental sections
+            if self._is_in_instrumental_section(segment, instrumental_times):
+                continue
+            # Check if we need a new screen
+            if self._should_start_new_screen(current_screen, segment, instrumental_times):
+                # fmt: off
+                current_screen = LyricsScreen(
+                    video_size=self.video_resolution,
+                    line_height=self.config.line_height,
+                    config=self.config,
+                    logger=self.logger
+                )
+                # fmt: on
+                screens.append(current_screen)
+                self.logger.debug("  Created new screen")
+            # Add line to current screen
+            line = LyricsLine(logger=self.logger, segment=segment, screen_config=self.config)
+            current_screen.lines.append(line)
+            self.logger.debug(f"  Added line to screen (now has {len(current_screen.lines)} lines)")
+        return screens
+    def _is_in_instrumental_section(self, segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]) -> bool:
+        """Check if a segment falls within any instrumental section."""
+        for inst_start, inst_end in instrumental_times:
+            if segment.start_time >= inst_start and segment.start_time < inst_end:
+                self.logger.debug(f"  Skipping segment - falls within instrumental {inst_start:.2f}s - {inst_end:.2f}s")
+                return True
+        return False
+    def _should_start_new_screen(
+        self, current_screen: Optional[LyricsScreen], segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]
+    ) -> bool:
+        """Determine if a new screen should be started."""
+        if current_screen is None:
+            return True
+        if len(current_screen.lines) >= self.config.max_visible_lines:
+            return True
+        # Check if this segment is first after any instrumental section
+        if current_screen.lines:
+            prev_segment = current_screen.lines[-1].segment
+            for inst_start, inst_end in instrumental_times:
+                if prev_segment.end_time <= inst_start and segment.start_time >= inst_end:
+                    self.logger.debug(f"  Forcing new screen - first segment after instrumental {inst_start:.2f}s - {inst_end:.2f}s")
+                    return True
+        return False
+    def _merge_and_process_screens(
+        self, section_screens: List[SectionScreen], lyric_screens: List[LyricsScreen]
+    ) -> List[Union[SectionScreen, LyricsScreen]]:
+        """Merge section and lyric screens in chronological order."""
+        # Sort all screens by start time
+        return sorted(section_screens + lyric_screens, key=lambda s: s.start_ts)
+    def _log_final_screens(self, screens: List[Union[SectionScreen, LyricsScreen]]) -> None:
+        """Log details of all final screens."""
+        self.logger.debug("Final screens created:")
+        for i, screen in enumerate(screens):
+            self.logger.debug(f"Screen {i + 1}:")
+            if isinstance(screen, SectionScreen):
+                self.logger.debug(f"  Section: {screen.section_type}")
+                self.logger.debug(f"  Text: {screen.text}")
+                self.logger.debug(f"  Time: {screen.start_time:.2f}s - {screen.end_time:.2f}s")
+            else:
+                self.logger.debug(f"  Number of lines: {len(screen.lines)}")
+                for j, line in enumerate(screen.lines):
+                    self.logger.debug(f"    Line {j + 1} ({line.segment.start_time:.2f}s - {line.segment.end_time:.2f}s): {line}")
+    def _create_styled_ass_instance(self, resolution, fontsize):
+        a = ASS()
+        a.set_resolution(resolution)
+        a.styles_format = [
+            "Name",  # The name of the Style. Case sensitive. Cannot include commas.
+            "Fontname",  # The fontname as used by Windows. Case-sensitive.
+            "Fontpath",  # The path to the font file.
+            "Fontsize",  # Font size
+            "PrimaryColour",  # This is the colour that a subtitle will normally appear in.
+            "SecondaryColour",  # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+            "OutlineColour",  # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+            "BackColour",  # This is the colour of the subtitle outline or shadow, if these are used
+            "Bold",  # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
+            "Italic",  # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
+            "Underline",  # [-1 or 0]
+            "StrikeOut",  # [-1 or 0]
+            "ScaleX",  # Modifies the width of the font. [percent]
+            "ScaleY",  # Modifies the height of the font. [percent]
+            "Spacing",  # Extra space between characters. [pixels]
+            "Angle",  # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
+            "BorderStyle",  # 1=Outline + drop shadow, 3=Opaque box
+            "Outline",  # If BorderStyle is 1,  then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
+            "Shadow",  # If BorderStyle is 1,  then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
+            "Alignment",  # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
+            "MarginL",  # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
+            "MarginR",  # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
+            "MarginV",  # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
+            "Encoding",  #
+        ]
+        # Get font settings from styles
+        karaoke_styles = self.styles.get("karaoke", {})
+        font_path = karaoke_styles.get("font_path")
+        style = Style()
+        style.type = "Style"
+        style.Name = self.styles["karaoke"]["ass_name"]
+        style.Fontname = self.styles["karaoke"]["font"]
+        style.Fontpath = font_path
+        style.Fontsize = fontsize
+        style.Alignment = ALIGN_TOP_CENTER
+        # Convert color strings to tuples of integers
+        def parse_color(color_str):
+            return tuple(int(x.strip()) for x in color_str.split(","))
+        style.PrimaryColour = parse_color(self.styles["karaoke"]["primary_color"])
+        style.SecondaryColour = parse_color(self.styles["karaoke"]["secondary_color"])
+        style.OutlineColour = parse_color(self.styles["karaoke"]["outline_color"])
+        style.BackColour = parse_color(self.styles["karaoke"]["back_color"])
+        # Convert boolean strings to integers (-1 for True, 0 for False)
+        def parse_bool(value):
+            return -1 if value else 0
+        style.Bold = parse_bool(self.styles["karaoke"]["bold"])
+        style.Italic = parse_bool(self.styles["karaoke"]["italic"])
+        style.Underline = parse_bool(self.styles["karaoke"]["underline"])
+        style.StrikeOut = parse_bool(self.styles["karaoke"]["strike_out"])
+        # Convert numeric strings to appropriate types
+        style.ScaleX = int(self.styles["karaoke"]["scale_x"])
+        style.ScaleY = int(self.styles["karaoke"]["scale_y"])
+        style.Spacing = int(self.styles["karaoke"]["spacing"])
+        style.Angle = float(self.styles["karaoke"]["angle"])
+        style.BorderStyle = int(self.styles["karaoke"]["border_style"])
+        style.Outline = int(self.styles["karaoke"]["outline"])
+        style.Shadow = int(self.styles["karaoke"]["shadow"])
+        style.MarginL = int(self.styles["karaoke"]["margin_l"])
+        style.MarginR = int(self.styles["karaoke"]["margin_r"])
+        style.MarginV = int(self.styles["karaoke"]["margin_v"])
+        style.Encoding = int(self.styles["karaoke"]["encoding"])
+        a.add_style(style)
+        a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
+        return a, style
+    def _create_styled_subtitles(
+        self,
+        screens: List[Union[SectionScreen, LyricsScreen]],
+        resolution: Tuple[int, int],
+        fontsize: int,
+    ) -> ASS:
+        """Create styled ASS subtitles from all screens."""
+        ass_file, style = self._create_styled_ass_instance(resolution, fontsize)
+        active_lines = []
+        previous_instrumental_end = None
+        for screen in screens:
+            if isinstance(screen, SectionScreen):
+                # Create section marker events (returns tuple of ([event], []))
+                section_events, _ = screen.as_ass_events(style=style)
+                for event in section_events:  # Now we're iterating over the list of events
+                    ass_file.add(event)
+                previous_instrumental_end = screen.end_time
+                active_lines = []
+                self.logger.debug(f"Found instrumental section ending at {screen.end_time:.2f}s")
+                continue
+            # Process screen and get its events
+            self.logger.debug(f"Processing screen with instrumental_end={previous_instrumental_end}")
+            # fmt: off
+            events, active_lines = screen.as_ass_events(
+                style=style,
+                previous_active_lines=active_lines,
+                previous_instrumental_end=previous_instrumental_end
+            )
+            # fmt: on
+            # Only reset instrumental end after we've processed the first post-instrumental screen
+            if previous_instrumental_end is not None:
+                self.logger.debug("Clearing instrumental end time after processing post-instrumental screen")
+                previous_instrumental_end = None
+            # Add all events to ASS file
+            for event in events:
+                ass_file.add(event)
+        return ass_file

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl