PyPI - karaoke-gen - Versions diffs - 0.75.54__py3-none-any.whl - Mend

karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show

karaoke_gen/__init__.py +38 -0
karaoke_gen/audio_fetcher.py +1614 -0
karaoke_gen/audio_processor.py +790 -0
karaoke_gen/config.py +83 -0
karaoke_gen/file_handler.py +387 -0
karaoke_gen/instrumental_review/__init__.py +45 -0
karaoke_gen/instrumental_review/analyzer.py +408 -0
karaoke_gen/instrumental_review/editor.py +322 -0
karaoke_gen/instrumental_review/models.py +171 -0
karaoke_gen/instrumental_review/server.py +475 -0
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/instrumental_review/waveform.py +409 -0
karaoke_gen/karaoke_finalise/__init__.py +1 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
karaoke_gen/karaoke_gen.py +1026 -0
karaoke_gen/lyrics_processor.py +474 -0
karaoke_gen/metadata.py +160 -0
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/__init__.py +18 -0
karaoke_gen/utils/bulk_cli.py +492 -0
karaoke_gen/utils/cli_args.py +432 -0
karaoke_gen/utils/gen_cli.py +978 -0
karaoke_gen/utils/remote_cli.py +3268 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen/video_generator.py +424 -0
karaoke_gen-0.75.54.dist-info/METADATA +718 -0
karaoke_gen-0.75.54.dist-info/RECORD +287 -0
karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +594 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +919 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +214 -0
lyrics_transcriber/frontend/src/api.ts +254 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +306 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +379 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +650 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0

lyrics_transcriber/output/countdown_processor.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""Handles adding countdown intro to songs that start too quickly for karaoke singers."""
+import logging
+import os
+import subprocess
+from typing import List, Optional, Tuple
+from copy import deepcopy
+from lyrics_transcriber.types import CorrectionResult, LyricsSegment, Word
+from lyrics_transcriber.utils.word_utils import WordUtils
+class CountdownProcessor:
+    """
+    Processes corrected lyrics and audio to add countdown intro for songs that start too quickly.
+    For songs where vocals start within the first 3 seconds, this processor:
+    - Adds 3 seconds of silence to the start of the audio file
+    - Shifts all timestamps in corrected lyrics by 3 seconds
+    - Adds a countdown segment "3... 2... 1..." spanning 0.1s to 2.9s
+    """
+    # Configuration constants
+    COUNTDOWN_THRESHOLD_SECONDS = 3.0  # Trigger countdown if first word is within this time
+    COUNTDOWN_PADDING_SECONDS = 3.0    # Amount of silence to add
+    COUNTDOWN_START_TIME = 0.1         # When countdown text starts
+    COUNTDOWN_END_TIME = 2.9           # When countdown text ends
+    COUNTDOWN_TEXT = "3... 2... 1..."  # The countdown text to display
+    def __init__(
+        self,
+        cache_dir: str,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """
+        Initialize CountdownProcessor.
+        Args:
+            cache_dir: Directory for temporary files (padded audio)
+            logger: Optional logger instance
+        """
+        self.cache_dir = cache_dir
+        self.logger = logger or logging.getLogger(__name__)
+        # Ensure cache directory exists
+        os.makedirs(self.cache_dir, exist_ok=True)
+    def process(
+        self,
+        correction_result: CorrectionResult,
+        audio_filepath: str,
+    ) -> Tuple[CorrectionResult, str, bool, float]:
+        """
+        Process correction result and audio file, adding countdown if needed.
+        Args:
+            correction_result: The CorrectionResult to potentially modify
+            audio_filepath: Path to the original audio file
+        Returns:
+            Tuple of:
+            - potentially modified CorrectionResult
+            - potentially padded audio filepath
+            - whether padding was added (bool)
+            - amount of padding in seconds (float)
+        """
+        # Check if countdown is needed
+        if not self._needs_countdown(correction_result):
+            self.logger.info(
+                f"First word starts after {self.COUNTDOWN_THRESHOLD_SECONDS}s - "
+                "no countdown needed"
+            )
+            return correction_result, audio_filepath, False, 0.0
+        self.logger.info(
+            f"First word starts within {self.COUNTDOWN_THRESHOLD_SECONDS}s - "
+            "adding countdown intro"
+        )
+        # Create padded audio file
+        padded_audio_path = self._create_padded_audio(audio_filepath)
+        # Create modified correction result with adjusted timestamps
+        modified_result = self._add_countdown_to_result(correction_result)
+        self.logger.info(
+            f"Countdown intro added successfully. "
+            f"Padded audio: {os.path.basename(padded_audio_path)}"
+        )
+        return modified_result, padded_audio_path, True, self.COUNTDOWN_PADDING_SECONDS
+    def _needs_countdown(self, correction_result: CorrectionResult) -> bool:
+        """
+        Check if the song needs a countdown intro.
+        Args:
+            correction_result: The correction result to check
+        Returns:
+            True if first word starts within threshold, False otherwise
+        """
+        if not correction_result.corrected_segments:
+            return False
+        # Find the first segment with words
+        for segment in correction_result.corrected_segments:
+            if segment.words:
+                first_word_start = segment.words[0].start_time
+                return first_word_start < self.COUNTDOWN_THRESHOLD_SECONDS
+        return False
+    def _create_padded_audio(self, audio_filepath: str) -> str:
+        """
+        Create a new audio file with silence prepended.
+        Args:
+            audio_filepath: Path to original audio file
+        Returns:
+            Path to padded audio file
+        Raises:
+            FileNotFoundError: If input audio file doesn't exist
+            RuntimeError: If ffmpeg command fails
+        """
+        if not os.path.isfile(audio_filepath):
+            raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
+        # Create output path in cache directory
+        # Always use .flac extension since we encode with FLAC codec for quality
+        basename = os.path.basename(audio_filepath)
+        name, _ = os.path.splitext(basename)
+        padded_filename = f"{name}_padded.flac"
+        padded_filepath = os.path.join(self.cache_dir, padded_filename)
+        self.logger.info(f"Creating padded audio file: {padded_filename}")
+        # Build ffmpeg command to prepend silence
+        # We use the anullsrc filter to generate silence and concat it with the original audio
+        cmd = [
+            "ffmpeg",
+            "-y",  # Overwrite output file if it exists
+            "-hide_banner",
+            "-loglevel", "error",
+            "-f", "lavfi",
+            "-t", str(self.COUNTDOWN_PADDING_SECONDS),
+            "-i", f"anullsrc=channel_layout=stereo:sample_rate=44100",
+            "-i", audio_filepath,
+            "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1[out]",
+            "-map", "[out]",
+            "-c:a", "flac",  # Use FLAC to preserve quality
+            padded_filepath,
+        ]
+        try:
+            self.logger.debug(f"Running ffmpeg command: {' '.join(cmd)}")
+            output = subprocess.check_output(
+                cmd,
+                stderr=subprocess.STDOUT,
+                universal_newlines=True
+            )
+            self.logger.debug(f"ffmpeg output: {output}")
+            if not os.path.isfile(padded_filepath):
+                raise RuntimeError(
+                    f"ffmpeg command succeeded but output file not created: {padded_filepath}"
+                )
+            return padded_filepath
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"Failed to create padded audio: {e.output}")
+            raise RuntimeError(f"ffmpeg command failed: {e.output}")
+    def _add_countdown_to_result(self, correction_result: CorrectionResult) -> CorrectionResult:
+        """
+        Create a new CorrectionResult with countdown segment and adjusted timestamps.
+        Args:
+            correction_result: The original correction result
+        Returns:
+            A new CorrectionResult with countdown and shifted timestamps
+        """
+        # Deep copy the result to avoid modifying the original
+        modified_result = deepcopy(correction_result)
+        # Shift all timestamps in corrected_segments
+        self._shift_segments_timestamps(
+            modified_result.corrected_segments,
+            self.COUNTDOWN_PADDING_SECONDS
+        )
+        # Shift timestamps in resized_segments if they exist
+        if modified_result.resized_segments:
+            self._shift_segments_timestamps(
+                modified_result.resized_segments,
+                self.COUNTDOWN_PADDING_SECONDS
+            )
+        # Create and prepend countdown segment
+        countdown_segment = self._create_countdown_segment()
+        modified_result.corrected_segments.insert(0, countdown_segment)
+        # Also add to resized_segments if present
+        if modified_result.resized_segments:
+            modified_result.resized_segments.insert(0, countdown_segment)
+        self.logger.debug(
+            f"Added countdown segment and shifted {len(modified_result.corrected_segments)} segments "
+            f"by {self.COUNTDOWN_PADDING_SECONDS}s"
+        )
+        return modified_result
+    def _shift_segments_timestamps(
+        self,
+        segments: List[LyricsSegment],
+        offset_seconds: float
+    ) -> None:
+        """
+        Shift all timestamps in segments by the given offset (in-place).
+        Args:
+            segments: List of segments to modify
+            offset_seconds: Amount to shift timestamps (in seconds)
+        """
+        for segment in segments:
+            # Shift segment timestamps
+            segment.start_time += offset_seconds
+            segment.end_time += offset_seconds
+            # Shift all word timestamps
+            for word in segment.words:
+                word.start_time += offset_seconds
+                word.end_time += offset_seconds
+    def _create_countdown_segment(self) -> LyricsSegment:
+        """
+        Create a countdown segment with the countdown text.
+        Returns:
+            A LyricsSegment containing the countdown
+        """
+        # Create a single word for the countdown text
+        countdown_word = Word(
+            id=WordUtils.generate_id(),
+            text=self.COUNTDOWN_TEXT,
+            start_time=self.COUNTDOWN_START_TIME,
+            end_time=self.COUNTDOWN_END_TIME,
+            confidence=1.0,
+            created_during_correction=True,
+        )
+        # Create the segment
+        countdown_segment = LyricsSegment(
+            id=WordUtils.generate_id(),
+            text=self.COUNTDOWN_TEXT,
+            words=[countdown_word],
+            start_time=self.COUNTDOWN_START_TIME,
+            end_time=self.COUNTDOWN_END_TIME,
+        )
+        return countdown_segment
+    def has_countdown(self, correction_result: CorrectionResult) -> bool:
+        """
+        Check if a CorrectionResult already has a countdown segment.
+        This is used to detect if countdown padding was applied to corrections
+        that were loaded from a saved JSON file (where the padding state is not
+        explicitly stored).
+        Args:
+            correction_result: The correction result to check
+        Returns:
+            True if the first segment is a countdown, False otherwise
+        """
+        if not correction_result.corrected_segments:
+            return False
+        first_segment = correction_result.corrected_segments[0]
+        return first_segment.text == self.COUNTDOWN_TEXT
+    def create_padded_audio_only(self, audio_filepath: str) -> str:
+        """
+        Create a padded audio file without modifying the correction result.
+        This is used when loading existing corrections that already have countdown
+        timestamps, but we need to create the padded audio file for video rendering.
+        Args:
+            audio_filepath: Path to original audio file
+        Returns:
+            Path to padded audio file
+        Raises:
+            FileNotFoundError: If input audio file doesn't exist
+            RuntimeError: If ffmpeg command fails
+        """
+        return self._create_padded_audio(audio_filepath)

lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/arial.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/georgia.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/fonts/verdana.ttf ADDED Viewed

Binary file

lyrics_transcriber/output/generator.py ADDED Viewed

@@ -0,0 +1,257 @@
+from dataclasses import dataclass
+import os
+import logging
+from typing import List, Optional
+import json
+from lyrics_transcriber.types import LyricsData
+from lyrics_transcriber.correction.corrector import CorrectionResult
+from lyrics_transcriber.output.plain_text import PlainTextGenerator
+from lyrics_transcriber.output.lyrics_file import LyricsFileGenerator
+from lyrics_transcriber.output.subtitles import SubtitlesGenerator
+from lyrics_transcriber.output.video import VideoGenerator
+from lyrics_transcriber.output.segment_resizer import SegmentResizer
+from lyrics_transcriber.output.cdg import CDGGenerator
+from lyrics_transcriber.core.config import OutputConfig
+@dataclass
+class OutputPaths:
+    """Holds paths for generated output files."""
+    lrc: Optional[str] = None
+    ass: Optional[str] = None
+    video: Optional[str] = None
+    original_txt: Optional[str] = None
+    corrected_txt: Optional[str] = None
+    corrections_json: Optional[str] = None
+    cdg: Optional[str] = None
+    mp3: Optional[str] = None
+    cdg_zip: Optional[str] = None
+class OutputGenerator:
+    """Handles generation of various lyrics output formats."""
+    def __init__(
+        self,
+        config: OutputConfig,
+        logger: Optional[logging.Logger] = None,
+        preview_mode: bool = False,
+    ):
+        """
+        Initialize OutputGenerator with configuration.
+        Args:
+            config: OutputConfig instance with required paths and settings
+            logger: Optional logger instance
+            preview_mode: Boolean indicating if the generator is in preview mode
+        """
+        self.config = config
+        self.logger = logger or logging.getLogger(__name__)
+        self.logger.info(f"Initializing OutputGenerator with config: {self.config}")
+        # Load output styles from JSON if provided
+        if self.config.output_styles_json and os.path.exists(self.config.output_styles_json):
+            try:
+                with open(self.config.output_styles_json, "r") as f:
+                    self.config.styles = json.load(f)
+                self.logger.debug(f"Loaded output styles from: {self.config.output_styles_json}")
+            except Exception as e:
+                if self.config.render_video or self.config.generate_cdg:
+                    # Only raise error for video/CDG since they require styles
+                    raise ValueError(f"Failed to load output styles file: {str(e)}")
+                else:
+                    # For other outputs, just log warning and continue with empty styles
+                    self.logger.warning(f"Failed to load output styles file: {str(e)}")
+                    self.config.styles = {}
+        else:
+            # No styles file provided or doesn't exist
+            if self.config.render_video or self.config.generate_cdg:
+                raise ValueError(f"Output styles file required for video/CDG generation but not found: {self.config.output_styles_json}")
+            else:
+                self.config.styles = {}
+        # Set video resolution parameters
+        self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
+        self.logger.info(f"Video resolution: {self.video_resolution_num}, font size: {self.font_size}, line height: {self.line_height}")
+        # Initialize generators
+        self.plain_text = PlainTextGenerator(self.config.output_dir, self.logger)
+        self.lyrics_file = LyricsFileGenerator(self.config.output_dir, self.logger)
+        if self.config.generate_cdg:
+            self.cdg = CDGGenerator(self.config.output_dir, self.logger)
+        self.preview_mode = preview_mode
+        if self.config.render_video:
+            # Apply preview mode scaling if needed
+            if self.preview_mode:
+                # Scale down from 4K (2160p) to 360p - factor of 1/6
+                scale_factor = 1 / 6
+                # Scale down top padding for preview if it exists
+                if "karaoke" in self.config.styles and "top_padding" in self.config.styles["karaoke"]:
+                    self.logger.info(f"Preview mode: Found top_padding: {self.config.styles['karaoke']['top_padding']}")
+                    original_padding = self.config.styles["karaoke"]["top_padding"]
+                    if original_padding is not None:
+                        # Scale down from 4K (2160p) to 360p - factor of 1/6
+                        self.config.styles["karaoke"]["top_padding"] = original_padding * scale_factor
+                        self.logger.info(f"Preview mode: Scaled down top_padding to: {self.config.styles['karaoke']['top_padding']}")
+                # Scale down font size for preview if it exists
+                if "karaoke" in self.config.styles and "font_size" in self.config.styles["karaoke"]:
+                    self.logger.info(f"Preview mode: Found font_size: {self.config.styles['karaoke']['font_size']}")
+                    original_font_size = self.config.styles["karaoke"]["font_size"]
+                    if original_font_size is not None:
+                        # Scale down from 4K (2160p) to 360p - factor of 1/6
+                        self.font_size = original_font_size * scale_factor
+                        self.config.styles["karaoke"]["font_size"] = self.font_size
+                        self.logger.info(f"Preview mode: Scaled down font_size to: {self.font_size}")
+        # Get max_line_length from styles if available, otherwise use config default
+        max_line_length = self.config.styles.get("karaoke", {}).get("max_line_length", self.config.default_max_line_length)
+        self.logger.info(f"Using max_line_length: {max_line_length}")
+        self.segment_resizer = SegmentResizer(max_line_length=max_line_length, logger=self.logger)
+        if self.config.render_video:
+            # Initialize subtitle generator with potentially scaled values
+            self.subtitle = SubtitlesGenerator(
+                output_dir=self.config.output_dir,
+                video_resolution=self.video_resolution_num,
+                font_size=self.font_size,
+                line_height=self.line_height,
+                styles=self.config.styles,
+                subtitle_offset_ms=self.config.subtitle_offset_ms,
+                logger=self.logger,
+            )
+            self.video = VideoGenerator(
+                output_dir=self.config.output_dir,
+                cache_dir=self.config.cache_dir,
+                video_resolution=self.video_resolution_num,
+                styles=self.config.styles,
+                logger=self.logger,
+            )
+        # Log the configured directories
+        self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
+        self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
+    def generate_outputs(
+        self,
+        transcription_corrected: Optional[CorrectionResult],
+        lyrics_results: dict[str, LyricsData],
+        output_prefix: str,
+        audio_filepath: str,
+        artist: Optional[str] = None,
+        title: Optional[str] = None,
+    ) -> OutputPaths:
+        """Generate all requested output formats."""
+        outputs = OutputPaths()
+        try:
+            # Only process transcription-related outputs if we have transcription data
+            if transcription_corrected:
+                # Resize corrected segments
+                resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
+                transcription_corrected.resized_segments = resized_segments
+                # For preview, we only need to generate ASS and video
+                if self.preview_mode:
+                    # Generate ASS subtitles for preview
+                    outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
+                    # Generate preview video
+                    outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
+                    return outputs
+                # Normal output generation (non-preview mode)
+                # Generate plain lyrics files for each provider
+                for name, lyrics_data in lyrics_results.items():
+                    self.plain_text.write_lyrics(lyrics_data, output_prefix)
+                # Write original (uncorrected) transcription
+                outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
+                outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
+                # Write corrected lyrics as plain text
+                outputs.corrected_txt = self.plain_text.write_corrected_lyrics(resized_segments, output_prefix)
+                # Generate LRC using LyricsFileGenerator
+                outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)
+                # Generate CDG file if requested
+                if self.config.generate_cdg:
+                    outputs.cdg, outputs.mp3, outputs.cdg_zip = self.cdg.generate_cdg(
+                        segments=resized_segments,
+                        audio_file=audio_filepath,
+                        title=title or output_prefix,
+                        artist=artist or "",
+                        cdg_styles=self.config.styles["cdg"],
+                    )
+                # Generate video if requested
+                if self.config.render_video:
+                    # Generate ASS subtitles
+                    outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix, audio_filepath)
+                    outputs.video = self.video.generate_video(outputs.ass, audio_filepath, output_prefix)
+            return outputs
+        except Exception as e:
+            self.logger.error(f"Failed to generate outputs: {str(e)}")
+            raise
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
+    def _get_video_params(self, resolution: str) -> tuple:
+        """Get video parameters: (width, height), font_size, line_height based on video resolution config."""
+        # Get resolution dimensions
+        resolution_map = {
+            "4k": (3840, 2160),
+            "1080p": (1920, 1080),
+            "720p": (1280, 720),
+            "360p": (640, 360),
+        }
+        if resolution not in resolution_map:
+            raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
+        resolution_dims = resolution_map[resolution]
+        # Default font sizes for each resolution
+        default_font_sizes = {
+            "4k": 250,
+            "1080p": 120,
+            "720p": 100,
+            "360p": 40,
+        }
+        # Get font size from styles if available, otherwise use default
+        font_size = self.config.styles.get("karaoke", {}).get("font_size", default_font_sizes[resolution])
+        # Line height matches font size for all except 360p
+        line_height = 50 if resolution == "360p" else font_size
+        return resolution_dims, font_size, line_height
+    def write_corrections_data(self, correction_result: CorrectionResult, output_prefix: str) -> str:
+        """Write corrections data to JSON file."""
+        self.logger.info("Writing corrections data JSON")
+        output_path = self._get_output_path(f"{output_prefix} (Lyrics Corrections)", "json")
+        try:
+            with open(output_path, "w", encoding="utf-8") as f:
+                json.dump(correction_result.to_dict(), f, indent=2, ensure_ascii=False)
+            self.logger.info(f"Corrections data JSON generated: {output_path}")
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to write corrections data JSON: {str(e)}")
+            raise

lyrics_transcriber/output/lrc_to_cdg.py ADDED Viewed

@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+import logging
+import argparse
+import json
+import sys
+from pathlib import Path
+from lyrics_transcriber.output.cdg import CDGGenerator
+logger = logging.getLogger(__name__)
+def cli_main():
+    """Command-line interface entry point for the lrc2cdg tool."""
+    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    parser = argparse.ArgumentParser(description="Convert LRC file to CDG")
+    parser.add_argument("lrc_file", help="Path to the LRC file")
+    parser.add_argument("audio_file", help="Path to the audio file")
+    parser.add_argument("--title", required=True, help="Title of the song")
+    parser.add_argument("--artist", required=True, help="Artist of the song")
+    parser.add_argument("--style_params_json", required=True, help="Path to JSON file containing CDG style configuration")
+    args = parser.parse_args()
+    try:
+        with open(args.style_params_json, "r") as f:
+            style_params = json.loads(f.read())
+            cdg_styles = style_params["cdg"]
+    except FileNotFoundError:
+        logger.error(f"Style configuration file not found: {args.style_params_json}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in style configuration file: {e}")
+        sys.exit(1)
+    try:
+        output_dir = str(Path(args.lrc_file).parent)
+        generator = CDGGenerator(output_dir=output_dir, logger=logger)
+        cdg_file, mp3_file, zip_file = generator.generate_cdg_from_lrc(
+            lrc_file=args.lrc_file,
+            audio_file=args.audio_file,
+            title=args.title,
+            artist=args.artist,
+            cdg_styles=cdg_styles,
+        )
+        logger.info(f"Generated files:\nCDG: {cdg_file}\nMP3: {mp3_file}\nZIP: {zip_file}")
+    except ValueError as e:
+        logger.error(f"Invalid style configuration: {e}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error generating CDG: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    cli_main()