PyPI - karaoke-gen - Versions diffs - 0.75.54__py3-none-any.whl - Mend

karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show

karaoke_gen/__init__.py +38 -0
karaoke_gen/audio_fetcher.py +1614 -0
karaoke_gen/audio_processor.py +790 -0
karaoke_gen/config.py +83 -0
karaoke_gen/file_handler.py +387 -0
karaoke_gen/instrumental_review/__init__.py +45 -0
karaoke_gen/instrumental_review/analyzer.py +408 -0
karaoke_gen/instrumental_review/editor.py +322 -0
karaoke_gen/instrumental_review/models.py +171 -0
karaoke_gen/instrumental_review/server.py +475 -0
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/instrumental_review/waveform.py +409 -0
karaoke_gen/karaoke_finalise/__init__.py +1 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
karaoke_gen/karaoke_gen.py +1026 -0
karaoke_gen/lyrics_processor.py +474 -0
karaoke_gen/metadata.py +160 -0
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/__init__.py +18 -0
karaoke_gen/utils/bulk_cli.py +492 -0
karaoke_gen/utils/cli_args.py +432 -0
karaoke_gen/utils/gen_cli.py +978 -0
karaoke_gen/utils/remote_cli.py +3268 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen/video_generator.py +424 -0
karaoke_gen-0.75.54.dist-info/METADATA +718 -0
karaoke_gen-0.75.54.dist-info/RECORD +287 -0
karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +594 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +919 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +214 -0
lyrics_transcriber/frontend/src/api.ts +254 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +306 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +379 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +650 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0

lyrics_transcriber/output/ass/config.py ADDED Viewed

@@ -0,0 +1,180 @@
+from dataclasses import dataclass
+class ScreenConfig:
+    """Configuration for screen timing and layout.
+    Lead-in Indicator Configuration:
+        lead_in_enabled: bool - Enable/disable the lead-in indicator entirely (default: True)
+        lead_in_width_percent: float - Width as percentage of screen width (default: 3.5)
+        lead_in_height_percent: float - Height as percentage of screen height (default: 4.0)
+        lead_in_opacity_percent: float - Opacity percentage, 0-100 (default: 70.0)
+        lead_in_outline_thickness: int - Outline thickness in pixels, 0 for no outline (default: 0)
+        lead_in_outline_color: str - Outline color in RGB format "R, G, B" (default: "0, 0, 0")
+        lead_in_gap_threshold: float - Minimum gap in seconds to show lead-in (default: 5.0)
+        lead_in_color: str - Fill color in RGB format "R, G, B" (default: "112, 112, 247")
+        lead_in_horiz_offset_percent: float - Horizontal offset as percentage of screen width, can be negative (default: 0.0)
+        lead_in_vert_offset_percent: float - Vertical offset as percentage of screen height, can be negative (default: 0.0)
+    Example JSON configuration:
+        {
+          "karaoke": {
+            "lead_in_enabled": true,
+            "lead_in_width_percent": 4.0,
+            "lead_in_height_percent": 5.0,
+            "lead_in_opacity_percent": 80,
+            "lead_in_outline_thickness": 2,
+            "lead_in_outline_color": "255, 255, 255",
+            "lead_in_gap_threshold": 3.0,
+            "lead_in_color": "230, 139, 33",
+            "lead_in_horiz_offset_percent": -2.0,
+            "lead_in_vert_offset_percent": 1.0
+          }
+        }
+    """
+    def __init__(
+        self,
+        line_height: int = 50,
+        max_visible_lines: int = 4,
+        top_padding: int = None,
+        video_width: int = 640,
+        video_height: int = 360,
+        screen_gap_threshold: float = 5.0,
+        post_roll_time: float = 1.0,
+        fade_in_ms: int = 200,
+        fade_out_ms: int = 300,
+        lead_in_color: str = "112, 112, 247",  # Default blue color in RGB format
+        text_case_transform: str = "none",  # Options: "none", "uppercase", "lowercase", "propercase"
+        # New lead-in indicator configuration options
+        lead_in_enabled: bool = True,
+        lead_in_width_percent: float = 3.5,
+        lead_in_height_percent: float = 4.0,
+        lead_in_opacity_percent: float = 70.0,
+        lead_in_outline_thickness: int = 0,
+        lead_in_outline_color: str = "0, 0, 0",
+        lead_in_gap_threshold: float = 5.0,
+        lead_in_horiz_offset_percent: float = 0.0,
+        lead_in_vert_offset_percent: float = 0.0,
+    ):
+        # Screen layout
+        self.max_visible_lines = max_visible_lines
+        self.line_height = line_height
+        self.top_padding = top_padding if top_padding is not None else line_height
+        self.video_height = video_height
+        self.video_width = video_width
+        # Timing configuration
+        self.screen_gap_threshold = screen_gap_threshold
+        self.post_roll_time = post_roll_time
+        self.fade_in_ms = fade_in_ms
+        self.fade_out_ms = fade_out_ms
+        # Lead-in configuration
+        self.lead_in_color = lead_in_color
+        self.lead_in_enabled = lead_in_enabled
+        self.lead_in_width_percent = lead_in_width_percent
+        self.lead_in_height_percent = lead_in_height_percent
+        self.lead_in_opacity_percent = lead_in_opacity_percent
+        self.lead_in_outline_thickness = lead_in_outline_thickness
+        self.lead_in_outline_color = lead_in_outline_color
+        self.lead_in_gap_threshold = lead_in_gap_threshold
+        self.lead_in_horiz_offset_percent = lead_in_horiz_offset_percent
+        self.lead_in_vert_offset_percent = lead_in_vert_offset_percent
+        # Text formatting configuration
+        self.text_case_transform = text_case_transform
+    def get_lead_in_color_ass_format(self) -> str:
+        """Convert RGB lead-in color to ASS format.
+        Accepts either:
+        - RGB format: "112, 112, 247"
+        - ASS format: "&HF77070&" (for backward compatibility)
+        Returns ASS format color string.
+        """
+        color_str = self.lead_in_color.strip()
+        # If already in ASS format, return as-is
+        if color_str.startswith("&H") and color_str.endswith("&"):
+            return color_str
+        # Parse RGB format "R, G, B" or "R, G, B, A"
+        try:
+            parts = [int(x.strip()) for x in color_str.split(",")]
+            if len(parts) == 3:
+                r, g, b = parts
+                a = 255  # Default full opacity
+            elif len(parts) == 4:
+                r, g, b, a = parts
+            else:
+                raise ValueError(f"Invalid color format: {color_str}")
+            # Convert to ASS format: &H{alpha}{blue}{green}{red}&
+            # Note: alpha is inverted in ASS (255-a)
+            return f"&H{255-a:02X}{b:02X}{g:02X}{r:02X}&"
+        except (ValueError, TypeError) as e:
+            # Fallback to default blue if parsing fails
+            return "&HF77070&"
+    def get_lead_in_outline_color_ass_format(self) -> str:
+        """Convert RGB lead-in outline color to ASS format.
+        Accepts either:
+        - RGB format: "0, 0, 0"
+        - ASS format: "&H000000&" (for backward compatibility)
+        Returns ASS format color string.
+        """
+        color_str = self.lead_in_outline_color.strip()
+        # If already in ASS format, return as-is
+        if color_str.startswith("&H") and color_str.endswith("&"):
+            return color_str
+        # Parse RGB format "R, G, B" or "R, G, B, A"
+        try:
+            parts = [int(x.strip()) for x in color_str.split(",")]
+            if len(parts) == 3:
+                r, g, b = parts
+                a = 255  # Default full opacity
+            elif len(parts) == 4:
+                r, g, b, a = parts
+            else:
+                raise ValueError(f"Invalid color format: {color_str}")
+            # Convert to ASS format: &H{alpha}{blue}{green}{red}&
+            # Note: alpha is inverted in ASS (255-a)
+            return f"&H{255-a:02X}{b:02X}{g:02X}{r:02X}&"
+        except (ValueError, TypeError) as e:
+            # Fallback to default black if parsing fails
+            return "&H000000&"
+    def get_lead_in_opacity_ass_format(self) -> str:
+        """Convert opacity percentage to ASS alpha format.
+        Returns ASS alpha value (e.g., &H4D& for 70% opacity).
+        """
+        # ASS alpha is inverted: 0=opaque, 255=transparent
+        # Convert percentage to alpha value
+        alpha = int((100 - self.lead_in_opacity_percent) / 100 * 255)
+        return f"&H{alpha:02X}&"
+@dataclass
+class LineTimingInfo:
+    """Timing information for a single line."""
+    fade_in_time: float
+    end_time: float
+    fade_out_time: float
+    clear_time: float
+@dataclass
+class LineState:
+    """Complete state for a single line."""
+    text: str
+    timing: LineTimingInfo
+    y_position: int

lyrics_transcriber/output/ass/constants.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Alignment constants
+ALIGN_BOTTOM_LEFT = 1
+ALIGN_BOTTOM_CENTER = 2
+ALIGN_BOTTOM_RIGHT = 3
+ALIGN_MIDDLE_LEFT = 4
+ALIGN_MIDDLE_CENTER = 5
+ALIGN_MIDDLE_RIGHT = 6
+ALIGN_TOP_LEFT = 7
+ALIGN_TOP_CENTER = 8
+ALIGN_TOP_RIGHT = 9
+# Legacy alignment mapping
+LEGACY_ALIGNMENT_TO_REGULAR = {
+    "1": ALIGN_BOTTOM_LEFT,
+    "2": ALIGN_BOTTOM_CENTER,
+    "3": ALIGN_BOTTOM_RIGHT,
+    "5": ALIGN_TOP_LEFT,
+    "6": ALIGN_TOP_CENTER,
+    "7": ALIGN_TOP_RIGHT,
+    "9": ALIGN_MIDDLE_LEFT,
+    "10": ALIGN_MIDDLE_CENTER,
+    "11": ALIGN_MIDDLE_RIGHT,
+}

lyrics_transcriber/output/ass/event.py ADDED Viewed

@@ -0,0 +1,94 @@
+class Event:
+    aliases = {}
+    formatters = None
+    order = [
+        "Layer",
+        "Start",
+        "End",
+        "Style",
+        "Name",
+        "MarginL",
+        "MarginR",
+        "MarginV",
+        "Effect",
+        "Text",
+    ]
+    # Constructor
+    def __init__(self):
+        self.type = None
+        self.Layer = 0
+        self.Start = 0.0
+        self.End = 0.0
+        self.Style = None
+        self.Name = ""
+        self.MarginL = 0
+        self.MarginR = 0
+        self.MarginV = 0
+        self.Effect = ""
+        self.Text = ""
+    def set(self, attribute_name, value, *args):
+        if hasattr(self, attribute_name) and attribute_name[0].isupper():
+            setattr(
+                self,
+                attribute_name,
+                self.formatters[attribute_name][0](value, *args),
+            )
+    def get(self, attribute_name, *args):
+        if hasattr(self, attribute_name) and attribute_name[0].isupper():
+            return self.formatters[attribute_name][1](getattr(self, attribute_name), *args)
+        return None
+    def copy(self, other=None):
+        if other is None:
+            other = self.__class__()
+            target = other
+            source = self
+        else:
+            target = other
+            source = self
+        # Copy all attributes
+        target.type = source.type
+        target.Layer = source.Layer
+        target.Start = source.Start
+        target.End = source.End
+        target.Style = source.Style
+        target.Name = source.Name
+        target.MarginL = source.MarginL
+        target.MarginR = source.MarginR
+        target.MarginV = source.MarginV
+        target.Effect = source.Effect
+        target.Text = source.Text
+        return target
+    def equals(self, other):
+        return (
+            self.type == other.type
+            and self.Layer == other.Layer
+            and self.Start == other.Start
+            and self.End == other.End
+            and self.Style is other.Style
+            and self.Name == other.Name
+            and self.MarginL == other.MarginL
+            and self.MarginR == other.MarginR
+            and self.MarginV == other.MarginV
+            and self.Effect == other.Effect
+            and self.Text == other.Text
+        )
+    def same_style(self, other):
+        return (
+            self.type == other.type
+            and self.Layer == other.Layer
+            and self.Style is other.Style
+            and self.Name == other.Name
+            and self.MarginL == other.MarginL
+            and self.MarginR == other.MarginR
+            and self.MarginV == other.MarginV
+            and self.Effect == other.Effect
+        )

lyrics_transcriber/output/ass/formatters.py ADDED Viewed

@@ -0,0 +1,132 @@
+import re
+class Formatters:
+    __re_color_format = re.compile(r"&H([0-9a-fA-F]{8}|[0-9a-fA-F]{6})", re.U)
+    __re_tag_number = re.compile(r"^\s*([\+\-]?(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+))", re.U)
+    @classmethod
+    def same(cls, val, *args):
+        return val
+    @classmethod
+    def color_to_str(cls, val, *args):
+        return "&H{0:02X}{1:02X}{2:02X}{3:02X}".format(255 - val[3], val[2], val[1], val[0])
+    @classmethod
+    def str_to_color(cls, val, *args):
+        match = cls.__re_color_format.search(val)
+        if match:
+            hex_val = "{0:>08s}".format(match.group(1))
+            return (
+                int(hex_val[6:8], 16),  # Red
+                int(hex_val[4:6], 16),  # Green
+                int(hex_val[2:4], 16),  # Blue
+                255 - int(hex_val[0:2], 16),  # Alpha
+            )
+        # Return white (255, 255, 255, 255) for invalid input
+        return (255, 255, 255, 255)
+    @classmethod
+    def n1bool_to_str(cls, val, *args):
+        if val:
+            return "-1"
+        return "0"
+    @classmethod
+    def str_to_n1bool(cls, val, *args):
+        try:
+            val = int(val, 10)
+        except ValueError:
+            return False
+        return val != 0
+    @classmethod
+    def integer_to_str(cls, val, *args):
+        return str(int(val))
+    @classmethod
+    def str_to_integer(cls, val, *args):
+        try:
+            return int(val, 10)
+        except ValueError:
+            return 0
+    @classmethod
+    def number_to_str(cls, val, *args):
+        if int(val) == val:
+            return str(int(val))
+            # No decimal
+        return str(val)
+    @classmethod
+    def str_to_number(cls, val, *args):
+        try:
+            return float(val)
+        except ValueError:
+            return 0.0
+    @classmethod
+    def timecode_to_str_generic(
+        cls,
+        timecode,
+        decimal_length=2,
+        seconds_length=2,
+        minutes_length=2,
+        hours_length=1,
+    ):
+        if decimal_length > 0:
+            total_length = seconds_length + decimal_length + 1
+        else:
+            total_length = seconds_length
+        tc_parts = [
+            "{{0:0{0:d}d}}".format(hours_length).format(int(timecode // 3600)),
+            "{{0:0{0:d}d}}".format(minutes_length).format(int((timecode // 60) % 60)),
+            "{{0:0{0:d}.{1:d}f}}".format(total_length, decimal_length).format(timecode % 60),
+        ]
+        return ":".join(tc_parts)
+    @classmethod
+    def timecode_to_str(cls, val, *args):
+        return cls.timecode_to_str_generic(val, 2)
+    @classmethod
+    def str_to_timecode(cls, val, *args):
+        time = 0.0
+        mult = 1
+        for t in reversed(val.split(":")):
+            time += float(t) * mult
+            mult *= 60
+        return time
+    @classmethod
+    def style_to_str(cls, val, *args):
+        if val is None:
+            return ""
+        return val.Name
+    @classmethod
+    def str_to_style(cls, val, style_map, style_constructor, *args):
+        if val in style_map:
+            return style_map[val]
+        # Create fake
+        style = style_constructor()
+        style.fake = True
+        style.Name = val
+        # Add to map (will not be included in global style list, but allows for duplicate "fake" styles to reference the same object)
+        style_map[style.Name] = style
+        # Return the new style
+        return style
+    @classmethod
+    def tag_argument_to_number(cls, arg, default_value=None):
+        match = cls.__re_tag_number.match(arg)
+        if match is None:
+            return default_value
+        return float(match.group(1))

lyrics_transcriber/output/ass/lyrics_line.py ADDED Viewed

@@ -0,0 +1,265 @@
+from dataclasses import dataclass
+from typing import Optional, Tuple, List
+import logging
+from datetime import timedelta
+from PIL import Image, ImageDraw, ImageFont
+import os
+from lyrics_transcriber.types import LyricsSegment
+from lyrics_transcriber.output.ass.event import Event
+from lyrics_transcriber.output.ass.style import Style
+from lyrics_transcriber.output.ass.config import LineState, ScreenConfig
+@dataclass
+class LyricsLine:
+    """Represents a single line of lyrics with timing and karaoke information."""
+    segment: LyricsSegment
+    screen_config: ScreenConfig
+    logger: Optional[logging.Logger] = None
+    previous_end_time: Optional[float] = None
+    def __post_init__(self):
+        """Ensure logger is initialized"""
+        if self.logger is None:
+            self.logger = logging.getLogger(__name__)
+    def _get_font(self, style: Style) -> ImageFont.FreeTypeFont:
+        """Get the font for text measurements."""
+        # ASS renders fonts about 70% of their actual size
+        ASS_FONT_SCALE = 0.70
+        # Scale down the font size to match ASS rendering
+        adjusted_size = int(style.Fontsize * ASS_FONT_SCALE)
+        self.logger.debug(f"Adjusting font size from {style.Fontsize} to {adjusted_size} to match ASS rendering")
+        try:
+            # Use the Fontpath property from Style class
+            if style.Fontpath and os.path.exists(style.Fontpath):
+                return ImageFont.truetype(style.Fontpath, size=adjusted_size)
+            self.logger.warning(f"Could not load font {style.Fontpath}, using default")
+            return ImageFont.load_default()
+        except (OSError, AttributeError) as e:
+            self.logger.warning(f"Font error ({e}), using default")
+            return ImageFont.load_default()
+    def _get_text_dimensions(self, text: str, font: ImageFont.FreeTypeFont) -> Tuple[int, int]:
+        """Get the pixel dimensions of rendered text."""
+        # Create an image the same size as the video frame
+        img = Image.new("RGB", (self.screen_config.video_width, self.screen_config.video_height), color="black")
+        draw = ImageDraw.Draw(img)
+        # Get the bounding box
+        bbox = draw.textbbox((0, 0), text, font=font)
+        width = bbox[2] - bbox[0]
+        height = bbox[3] - bbox[1]
+        self.logger.debug(f"Text dimensions for '{text}': width={width}px, height={height}px")
+        self.logger.debug(f"Video dimensions: {self.screen_config.video_width}x{self.screen_config.video_height}")
+        return width, height
+    # fmt: off
+    def _create_lead_in_text(self, state: LineState) -> Tuple[str, bool]:
+        """Create lead-in indicator text if needed.
+        Returns:
+            Tuple of (text, has_lead_in)
+        """
+        has_lead_in = (self.previous_end_time is None or
+                      self.segment.start_time - self.previous_end_time >= self.screen_config.lead_in_gap_threshold)
+        if not has_lead_in:
+            return "", False
+        # Add a hyphen with karaoke timing for the last 2 seconds before the line
+        lead_in_start = max(state.timing.fade_in_time, self.segment.start_time - 2.0)
+        gap_before_highlight = int((lead_in_start - state.timing.fade_in_time) * 100)
+        highlight_duration = int((self.segment.start_time - lead_in_start) * 100)
+        text = ""
+        # Add initial gap if needed
+        if gap_before_highlight > 0:
+            text += f"{{\\k{gap_before_highlight}}}"
+        # Add the hyphen with highlight
+        text += f"{{\\kf{highlight_duration}}}→ "
+        return text, True
+    def _create_lead_in_event(self, state: LineState, style: Style, video_width: int, config: ScreenConfig) -> Optional[Event]:
+        """Create a separate event for the lead-in indicator if needed."""
+        # Check if lead-in is enabled
+        if not config.lead_in_enabled:
+            return None
+        # Check if there's a sufficient gap to show lead-in
+        if not (self.previous_end_time is None or
+                self.segment.start_time - self.previous_end_time >= config.lead_in_gap_threshold):
+            return None
+        self.logger.debug(f"Creating lead-in indicator for line: '{self.segment.text}'")
+        # Calculate all timing points
+        line_start = self.segment.start_time
+        appear_time = line_start - 3.0  # Start 3 seconds before line
+        fade_in_end = appear_time + 0.8  # 800ms fade in
+        fade_out_start = line_start - 0.3  # Start fade 300ms before reaching final position
+        fade_out_end = line_start + 0.2  # Complete fade 200ms after line starts (500ms total fade)
+        self.logger.debug(f"Timing calculations:")
+        self.logger.debug(f"  Line starts at: {line_start:.2f}s")
+        self.logger.debug(f"  Rectangle appears at: {appear_time:.2f}s")
+        self.logger.debug(f"  Fade in completes at: {fade_in_end:.2f}s")
+        self.logger.debug(f"  Fade out starts at: {fade_out_start:.2f}s")
+        self.logger.debug(f"  Rectangle reaches final position at: {line_start:.2f}s")
+        self.logger.debug(f"  Rectangle fully faded out at: {fade_out_end:.2f}s")
+        # Calculate dimensions and positions using configurable percentages
+        font = self._get_font(style)
+        # Apply case transformation to match the actual rendered text
+        main_text = self._apply_case_transform(self.segment.text)
+        main_width, main_height = self._get_text_dimensions(main_text, font)
+        rect_width = int(self.screen_config.video_width * (config.lead_in_width_percent / 100))
+        rect_height = int(self.screen_config.video_height * (config.lead_in_height_percent / 100))
+        # Calculate where the left edge of the centered text will be
+        text_left = self.screen_config.video_width//2 - main_width//2
+        # Apply horizontal offset if configured
+        horizontal_offset = int(self.screen_config.video_width * (config.lead_in_horiz_offset_percent / 100))
+        final_x_position = text_left + horizontal_offset
+        # Apply vertical offset if configured
+        vertical_offset = int(self.screen_config.video_height * (config.lead_in_vert_offset_percent / 100))
+        final_y_position = state.y_position + main_height + vertical_offset
+        self.logger.debug(f"Position calculations:")
+        self.logger.debug(f"  Video dimensions: {self.screen_config.video_width}x{self.screen_config.video_height}")
+        self.logger.debug(f"  Original text: '{self.segment.text}'")
+        self.logger.debug(f"  Transformed text: '{main_text}'")
+        self.logger.debug(f"  Main text width: {main_width}px")
+        self.logger.debug(f"  Main text height: {main_height}px")
+        self.logger.debug(f"  Rectangle dimensions: {rect_width}x{rect_height}px (from {config.lead_in_width_percent}% x {config.lead_in_height_percent}%)")
+        self.logger.debug(f"  Text left edge: {text_left}px")
+        self.logger.debug(f"  Horizontal offset: {horizontal_offset}px ({config.lead_in_horiz_offset_percent}% of screen width)")
+        self.logger.debug(f"  Final X position: {final_x_position}px")
+        self.logger.debug(f"  Vertical offset: {vertical_offset}px ({config.lead_in_vert_offset_percent}% of screen height)")
+        self.logger.debug(f"  Final Y position: {final_y_position}px")
+        self.logger.debug(f"  Vertical position: {state.y_position}px")
+        # Create main indicator event
+        main_event = Event()
+        main_event.type = "Dialogue"
+        main_event.Layer = 0
+        main_event.Style = style
+        main_event.Start = appear_time
+        main_event.End = fade_out_end
+        # Calculate movement duration in milliseconds
+        move_duration = int((line_start - appear_time) * 1000)
+        # Build the indicator rectangle text with configurable styling
+        main_text = (
+            f"{{\\an8}}"  # center-bottom alignment
+            f"{{\\move(0,{final_y_position},{final_x_position},{final_y_position},0,{move_duration})}}"  # Move until line start
+            f"{{\\c{config.get_lead_in_color_ass_format()}}}"  # Configurable lead-in color in ASS format
+            f"{{\\alpha{config.get_lead_in_opacity_ass_format()}}}"  # Configurable opacity
+            f"{{\\fad(800,500)}}"  # 800ms fade in, 500ms fade out
+        )
+        # Add outline if thickness > 0
+        if config.lead_in_outline_thickness > 0:
+            main_text += (
+                f"{{\\3c{config.get_lead_in_outline_color_ass_format()}}}"  # Outline color
+                f"{{\\bord{config.lead_in_outline_thickness}}}"  # Outline thickness
+            )
+        else:
+            main_text += f"{{\\bord0}}"  # No outline
+        # Add the rectangle shape
+        main_text += f"{{\\p1}}m {-rect_width} {-rect_height} l 0 {-rect_height} 0 0 {-rect_width} 0{{\\p0}}"  # Draw up from bottom
+        main_event.Text = main_text
+        return [main_event]
+    def create_ass_events(
+        self,
+        state: LineState,
+        style: Style,
+        config: ScreenConfig,
+        previous_end_time: Optional[float] = None
+    ) -> List[Event]:
+        """Create ASS events for this line. Returns [main_event] or [lead_in_event, main_event]."""
+        self.previous_end_time = previous_end_time
+        events = []
+        # Create lead-in event if needed
+        lead_in_event = self._create_lead_in_event(state, style, config.video_width, config)
+        if lead_in_event:
+            events.extend(lead_in_event)
+        # Create main lyrics event
+        main_event = Event()
+        main_event.type = "Dialogue"
+        main_event.Layer = 0
+        main_event.Style = style
+        main_event.Start = state.timing.fade_in_time
+        main_event.End = state.timing.end_time
+        # Use absolute positioning
+        x_pos = config.video_width // 2  # Center horizontally
+        # Main lyrics text with positioning and fade
+        text = (
+            f"{{\\an8}}{{\\pos({x_pos},{state.y_position})}}"
+            f"{{\\fad({config.fade_in_ms},{config.fade_out_ms})}}"
+        )
+        # Add the main lyrics text with karaoke timing
+        text += self._create_ass_text(timedelta(seconds=state.timing.fade_in_time))
+        main_event.Text = text
+        events.append(main_event)
+        return events
+    def _apply_case_transform(self, text: str) -> str:
+        """Apply case transformation to text based on screen config setting."""
+        transform = getattr(self.screen_config, 'text_case_transform', 'none')
+        if transform == "uppercase":
+            return text.upper()
+        elif transform == "lowercase":
+            return text.lower()
+        elif transform == "propercase":
+            return text.title()
+        else:  # "none" or any other value
+            return text
+    def _create_ass_text(self, start_ts: timedelta) -> str:
+        """Create the ASS text with karaoke timing tags."""
+        # Initial delay before first word
+        first_word_time = self.segment.start_time
+        # Add initial delay for regular lines
+        start_time = max(0, (first_word_time - start_ts.total_seconds()) * 100)
+        text = r"{\k" + str(int(round(start_time))) + r"}"
+        prev_end_time = first_word_time
+        for word in self.segment.words:
+            # Add gap between words if needed
+            gap = word.start_time - prev_end_time
+            if gap > 0.1:  # Only add gap if significant
+                text += r"{\k" + str(int(round(gap * 100))) + r"}"
+            # Add the word with its duration
+            duration = int(round((word.end_time - word.start_time) * 100))
+            # Apply case transformation to the word text
+            transformed_text = self._apply_case_transform(word.text)
+            text += r"{\kf" + str(duration) + r"}" + transformed_text + " "
+            prev_end_time = word.end_time  # Track the actual end time of the word
+        return text.rstrip()
+    def __str__(self):
+        return f"{{{self.segment.text}}}"