PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1815 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.23.dist-info/METADATA +610 -0
karaoke_gen-0.71.23.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0

karaoke_gen/pipeline/stages/render.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+Video rendering pipeline stage.
+This stage handles:
+- Rendering karaoke video with synchronized lyrics
+- Using the OutputGenerator from lyrics_transcriber
+- Combining audio, video, and synchronized lyrics
+This stage runs after transcription is complete and corrections
+have been applied.
+"""
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from karaoke_gen.pipeline.base import PipelineStage, StageResult, StageStatus
+from karaoke_gen.pipeline.context import PipelineContext
+class RenderStage(PipelineStage):
+    """
+    Video rendering stage.
+    Renders the karaoke video with synchronized lyrics overlay.
+    Uses OutputGenerator from lyrics_transcriber.
+    """
+    def __init__(
+        self,
+        logger: Optional[logging.Logger] = None,
+        render_bounding_boxes: bool = False,
+    ):
+        """
+        Initialize the render stage.
+        Args:
+            logger: Logger instance
+            render_bounding_boxes: If True, render debug bounding boxes
+        """
+        self.logger = logger or logging.getLogger(__name__)
+        self.render_bounding_boxes = render_bounding_boxes
+    @property
+    def name(self) -> str:
+        return "render"
+    @property
+    def required_inputs(self) -> List[str]:
+        # Requires transcription output
+        return ["transcription"]
+    @property
+    def optional_inputs(self) -> List[str]:
+        return ["separation"]
+    @property
+    def output_keys(self) -> List[str]:
+        return [
+            "with_vocals_video_path",  # Path to rendered video with vocals
+            "lrc_path",                 # Path to LRC file
+            "ass_path",                 # Path to ASS subtitle file
+        ]
+    async def execute(self, context: PipelineContext) -> StageResult:
+        """
+        Execute video rendering.
+        Args:
+            context: Pipeline context with transcription outputs
+        Returns:
+            StageResult with rendered video path
+        """
+        import time
+        start_time = time.time()
+        try:
+            context.update_progress(self.name, 0, "Starting video rendering")
+            context.log("INFO", f"Rendering video for: {context.artist} - {context.title}")
+            # Get transcription outputs
+            transcription = context.stage_outputs.get("transcription", {})
+            corrections_result = transcription.get("corrections_result")
+            if not corrections_result:
+                context.log("WARNING", "No corrections result available for rendering")
+                return StageResult(
+                    status=StageStatus.SKIPPED,
+                    outputs={},
+                    error_message="No corrections result available",
+                )
+            # Import OutputGenerator
+            from lyrics_transcriber import OutputGenerator, OutputConfig
+            # Build output config
+            output_config = OutputConfig(
+                output_dir=context.output_dir,
+                cache_dir=os.path.join(context.output_dir, "cache"),
+                video_resolution="4k",  # Default to 4K
+            )
+            # Apply style params if available
+            if context.style_params:
+                output_config = self._apply_style_params(output_config, context.style_params)
+            context.update_progress(self.name, 20, "Initializing video generator")
+            # Create OutputGenerator
+            generator = OutputGenerator(
+                config=output_config,
+                logger=self.logger,
+            )
+            context.update_progress(self.name, 40, "Rendering video with lyrics")
+            # Generate video
+            result = generator.generate_video(
+                result=corrections_result,
+                output_prefix=context.base_name,
+                audio_file=context.input_audio_path,
+            )
+            outputs = {}
+            if result:
+                outputs["with_vocals_video_path"] = result.get("video_path")
+                outputs["lrc_path"] = result.get("lrc_path")
+                outputs["ass_path"] = result.get("ass_path")
+            context.update_progress(self.name, 100, "Video rendering complete")
+            duration = time.time() - start_time
+            context.log("INFO", f"Video rendering completed in {duration:.1f}s")
+            return StageResult(
+                status=StageStatus.COMPLETED,
+                outputs=outputs,
+                duration_seconds=duration,
+            )
+        except Exception as e:
+            duration = time.time() - start_time
+            context.log("ERROR", f"Video rendering failed: {str(e)}")
+            return StageResult(
+                status=StageStatus.FAILED,
+                error_message=str(e),
+                error_details={"exception_type": type(e).__name__},
+                duration_seconds=duration,
+            )
+    def _apply_style_params(self, config, style_params: Dict[str, Any]):
+        """Apply style parameters to output config."""
+        # Apply karaoke style settings if present
+        karaoke_params = style_params.get("karaoke", {})
+        if karaoke_params.get("background_image"):
+            config.background_image = karaoke_params["background_image"]
+        if karaoke_params.get("font_path"):
+            config.font_path = karaoke_params["font_path"]
+        # Add more style mappings as needed
+        return config

karaoke_gen/pipeline/stages/screens.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""
+Title and end screen generation pipeline stage.
+This stage handles the generation of:
+- Title screen video (intro)
+- End screen video (outro)
+- Corresponding PNG/JPG images
+These are generated using the video_generator module.
+"""
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from karaoke_gen.pipeline.base import PipelineStage, StageResult, StageStatus
+from karaoke_gen.pipeline.context import PipelineContext
+class ScreensStage(PipelineStage):
+    """
+    Title and end screen generation stage.
+    Generates title and end screen videos/images using configured
+    style parameters.
+    """
+    def __init__(
+        self,
+        output_png: bool = True,
+        output_jpg: bool = True,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """
+        Initialize the screens stage.
+        Args:
+            output_png: Generate PNG format images
+            output_jpg: Generate JPG format images
+            logger: Logger instance
+        """
+        self.output_png = output_png
+        self.output_jpg = output_jpg
+        self.logger = logger or logging.getLogger(__name__)
+    @property
+    def name(self) -> str:
+        return "screens"
+    @property
+    def required_inputs(self) -> List[str]:
+        return []
+    @property
+    def output_keys(self) -> List[str]:
+        return [
+            "title_video_path",    # Path to title screen video
+            "title_png_path",      # Path to title screen PNG
+            "title_jpg_path",      # Path to title screen JPG
+            "end_video_path",      # Path to end screen video
+            "end_png_path",        # Path to end screen PNG
+            "end_jpg_path",        # Path to end screen JPG
+        ]
+    async def execute(self, context: PipelineContext) -> StageResult:
+        """
+        Execute screen generation.
+        Args:
+            context: Pipeline context with style params
+        Returns:
+            StageResult with screen file paths
+        """
+        import time
+        start_time = time.time()
+        try:
+            context.update_progress(self.name, 0, "Generating title and end screens")
+            context.log("INFO", f"Generating screens for: {context.artist} - {context.title}")
+            from karaoke_gen.video_generator import VideoGenerator
+            # Create video generator
+            generator = VideoGenerator(
+                artist=context.artist,
+                title=context.title,
+                output_dir=context.output_dir,
+                style_params=context.style_params,
+                logger=self.logger,
+            )
+            outputs = {}
+            context.update_progress(self.name, 25, "Generating title screen")
+            # Generate title screen
+            title_result = generator.generate_title_screen(
+                output_png=self.output_png,
+                output_jpg=self.output_jpg,
+            )
+            if title_result:
+                outputs["title_video_path"] = title_result.get("video_path")
+                outputs["title_png_path"] = title_result.get("png_path")
+                outputs["title_jpg_path"] = title_result.get("jpg_path")
+            context.update_progress(self.name, 75, "Generating end screen")
+            # Generate end screen
+            end_result = generator.generate_end_screen(
+                output_png=self.output_png,
+                output_jpg=self.output_jpg,
+            )
+            if end_result:
+                outputs["end_video_path"] = end_result.get("video_path")
+                outputs["end_png_path"] = end_result.get("png_path")
+                outputs["end_jpg_path"] = end_result.get("jpg_path")
+            context.update_progress(self.name, 100, "Screen generation complete")
+            duration = time.time() - start_time
+            context.log("INFO", f"Screen generation completed in {duration:.1f}s")
+            return StageResult(
+                status=StageStatus.COMPLETED,
+                outputs=outputs,
+                duration_seconds=duration,
+            )
+        except Exception as e:
+            duration = time.time() - start_time
+            context.log("ERROR", f"Screen generation failed: {str(e)}")
+            return StageResult(
+                status=StageStatus.FAILED,
+                error_message=str(e),
+                error_details={"exception_type": type(e).__name__},
+                duration_seconds=duration,
+            )

karaoke_gen/pipeline/stages/separation.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""
+Audio separation pipeline stage.
+This stage handles the separation of audio into stems:
+- Clean instrumental (vocals removed)
+- Vocals
+- Backing vocals and lead vocals (optional)
+- Other stems (drums, bass, guitar, etc.)
+- Combined instrumental with backing vocals
+The stage delegates to AudioProcessor but provides a consistent
+pipeline interface.
+"""
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from karaoke_gen.pipeline.base import PipelineStage, StageResult, StageStatus
+from karaoke_gen.pipeline.context import PipelineContext
+class SeparationStage(PipelineStage):
+    """
+    Audio separation stage.
+    Separates audio into stems using configured models.
+    Supports both local processing and remote API.
+    """
+    def __init__(
+        self,
+        model_file_dir: str = "/tmp/audio-separator-models/",
+        lossless_output_format: str = "flac",
+        clean_instrumental_model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt",
+        backing_vocals_models: Optional[List[str]] = None,
+        other_stems_models: Optional[List[str]] = None,
+        logger: Optional[logging.Logger] = None,
+        log_level: int = logging.INFO,
+        skip_separation: bool = False,
+    ):
+        """
+        Initialize the separation stage.
+        Args:
+            model_file_dir: Directory for model files
+            lossless_output_format: Output format (flac, wav, etc.)
+            clean_instrumental_model: Model for clean instrumental separation
+            backing_vocals_models: Models for backing vocals separation
+            other_stems_models: Models for other stems (drums, bass, etc.)
+            logger: Logger instance
+            log_level: Logging level
+            skip_separation: If True, skip separation (for testing)
+        """
+        self.model_file_dir = model_file_dir
+        self.lossless_output_format = lossless_output_format
+        self.clean_instrumental_model = clean_instrumental_model
+        self.backing_vocals_models = backing_vocals_models or [
+            "mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"
+        ]
+        self.other_stems_models = other_stems_models or ["htdemucs_6s.yaml"]
+        self.logger = logger or logging.getLogger(__name__)
+        self.log_level = log_level
+        self.skip_separation = skip_separation
+    @property
+    def name(self) -> str:
+        return "separation"
+    @property
+    def required_inputs(self) -> List[str]:
+        # No required inputs from other stages - uses context.input_audio_path
+        return []
+    @property
+    def output_keys(self) -> List[str]:
+        return [
+            "clean_instrumental",  # Dict with 'instrumental' and 'vocals' paths
+            "other_stems",         # Dict mapping model -> stems dict
+            "backing_vocals",      # Dict mapping model -> backing/lead vocals
+            "combined_instrumentals",  # Dict mapping model -> combined path
+        ]
+    async def execute(self, context: PipelineContext) -> StageResult:
+        """
+        Execute audio separation.
+        Args:
+            context: Pipeline context with input audio path
+        Returns:
+            StageResult with separated stem paths
+        """
+        import time
+        start_time = time.time()
+        try:
+            context.update_progress(self.name, 0, "Starting audio separation")
+            context.log("INFO", f"Separating audio: {context.input_audio_path}")
+            if self.skip_separation:
+                context.log("INFO", "Skipping audio separation (skip_separation=True)")
+                return StageResult(
+                    status=StageStatus.SKIPPED,
+                    outputs={},
+                )
+            # Create AudioProcessor instance
+            from karaoke_gen.audio_processor import AudioProcessor
+            processor = AudioProcessor(
+                logger=self.logger,
+                log_level=self.log_level,
+                log_formatter=None,
+                model_file_dir=self.model_file_dir,
+                lossless_output_format=self.lossless_output_format,
+                clean_instrumental_model=self.clean_instrumental_model,
+                backing_vocals_models=self.backing_vocals_models,
+                other_stems_models=self.other_stems_models,
+                ffmpeg_base_command="ffmpeg -y -hide_banner -nostats -loglevel error",
+            )
+            context.update_progress(self.name, 10, "Processing audio separation")
+            # Run the separation
+            result = processor.process_audio_separation(
+                audio_file=context.input_audio_path,
+                artist_title=context.base_name,
+                track_output_dir=context.output_dir,
+            )
+            context.update_progress(self.name, 90, "Audio separation complete")
+            duration = time.time() - start_time
+            context.log("INFO", f"Audio separation completed in {duration:.1f}s")
+            return StageResult(
+                status=StageStatus.COMPLETED,
+                outputs=result,
+                duration_seconds=duration,
+            )
+        except Exception as e:
+            duration = time.time() - start_time
+            context.log("ERROR", f"Audio separation failed: {str(e)}")
+            return StageResult(
+                status=StageStatus.FAILED,
+                error_message=str(e),
+                error_details={"exception_type": type(e).__name__},
+                duration_seconds=duration,
+            )
+    def apply_countdown_padding(
+        self,
+        context: PipelineContext,
+        separation_result: Dict[str, Any],
+        padding_seconds: float,
+    ) -> Dict[str, Any]:
+        """
+        Apply countdown padding to instrumental files.
+        This is called after transcription determines the padding amount
+        needed to synchronize with padded vocals.
+        Args:
+            context: Pipeline context
+            separation_result: Original separation result
+            padding_seconds: Amount of padding to apply
+        Returns:
+            Updated separation result with padded file paths
+        """
+        from karaoke_gen.audio_processor import AudioProcessor
+        processor = AudioProcessor(
+            logger=self.logger,
+            log_level=self.log_level,
+            log_formatter=None,
+            model_file_dir=self.model_file_dir,
+            lossless_output_format=self.lossless_output_format,
+            clean_instrumental_model=self.clean_instrumental_model,
+            backing_vocals_models=self.backing_vocals_models,
+            other_stems_models=self.other_stems_models,
+            ffmpeg_base_command="ffmpeg -y -hide_banner -nostats -loglevel error",
+        )
+        return processor.apply_countdown_padding_to_instrumentals(
+            separation_result=separation_result,
+            padding_seconds=padding_seconds,
+            artist_title=context.base_name,
+            track_output_dir=context.output_dir,
+        )

karaoke_gen/pipeline/stages/transcription.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""
+Lyrics transcription pipeline stage.
+This stage handles:
+- Transcription of lyrics from audio (using AudioShake API)
+- Fetching lyrics from online sources (Genius, Spotify, etc.)
+- Synchronization of lyrics with audio timing
+- Generation of LRC, ASS, and corrected text files
+Note: Video rendering is handled by the RenderStage, not here.
+"""
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from karaoke_gen.pipeline.base import PipelineStage, StageResult, StageStatus
+from karaoke_gen.pipeline.context import PipelineContext
+class TranscriptionStage(PipelineStage):
+    """
+    Lyrics transcription stage.
+    Transcribes and synchronizes lyrics from audio.
+    Does NOT render video - that's handled by RenderStage.
+    """
+    def __init__(
+        self,
+        style_params_json: Optional[str] = None,
+        lyrics_file: Optional[str] = None,
+        skip_transcription: bool = False,
+        skip_transcription_review: bool = False,
+        subtitle_offset_ms: int = 0,
+        lyrics_artist: Optional[str] = None,
+        lyrics_title: Optional[str] = None,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """
+        Initialize the transcription stage.
+        Args:
+            style_params_json: Path to style parameters JSON file
+            lyrics_file: Path to existing lyrics file (optional)
+            skip_transcription: If True, skip automatic transcription
+            skip_transcription_review: If True, skip interactive review
+            subtitle_offset_ms: Offset for subtitle timing in milliseconds
+            lyrics_artist: Override artist name for lyrics search
+            lyrics_title: Override title for lyrics search
+            logger: Logger instance
+        """
+        self.style_params_json = style_params_json
+        self.lyrics_file = lyrics_file
+        self.skip_transcription = skip_transcription
+        self.skip_transcription_review = skip_transcription_review
+        self.subtitle_offset_ms = subtitle_offset_ms
+        self.lyrics_artist = lyrics_artist
+        self.lyrics_title = lyrics_title
+        self.logger = logger or logging.getLogger(__name__)
+    @property
+    def name(self) -> str:
+        return "transcription"
+    @property
+    def required_inputs(self) -> List[str]:
+        # No required inputs from other stages - uses context.input_audio_path
+        return []
+    @property
+    def optional_inputs(self) -> List[str]:
+        # Can use separation output for vocals-only transcription
+        return ["separation"]
+    @property
+    def output_keys(self) -> List[str]:
+        return [
+            "lrc_filepath",        # Path to LRC lyrics file
+            "ass_filepath",        # Path to ASS subtitle file
+            "corrected_txt_path",  # Path to corrected text file
+            "corrections_result",  # Full corrections JSON data
+            "countdown_padding_seconds",  # Countdown padding applied (if any)
+        ]
+    async def execute(self, context: PipelineContext) -> StageResult:
+        """
+        Execute lyrics transcription.
+        Args:
+            context: Pipeline context with input audio path
+        Returns:
+            StageResult with lyrics file paths
+        """
+        import time
+        start_time = time.time()
+        try:
+            context.update_progress(self.name, 0, "Starting lyrics transcription")
+            context.log("INFO", f"Transcribing lyrics for: {context.artist} - {context.title}")
+            if self.skip_transcription:
+                context.log("INFO", "Skipping transcription (skip_transcription=True)")
+                return StageResult(
+                    status=StageStatus.SKIPPED,
+                    outputs={},
+                )
+            # Get style params from context or use instance value
+            style_params_json = self.style_params_json
+            if not style_params_json and context.style_params:
+                # Write style params to temp file
+                import json
+                import tempfile
+                with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+                    json.dump(context.style_params, f)
+                    style_params_json = f.name
+                    context.add_temp_path(style_params_json)
+            # Create LyricsProcessor instance
+            # Note: render_video=False because we handle rendering in RenderStage
+            from karaoke_gen.lyrics_processor import LyricsProcessor
+            processor = LyricsProcessor(
+                logger=self.logger,
+                style_params_json=style_params_json,
+                lyrics_file=self.lyrics_file,
+                skip_transcription=self.skip_transcription,
+                skip_transcription_review=self.skip_transcription_review,
+                render_video=False,  # Don't render video here
+                subtitle_offset_ms=self.subtitle_offset_ms,
+            )
+            context.update_progress(self.name, 20, "Running transcription")
+            # Run transcription
+            result = processor.transcribe_lyrics(
+                input_audio_wav=context.input_audio_path,
+                artist=context.artist,
+                title=context.title,
+                track_output_dir=context.output_dir,
+                lyrics_artist=self.lyrics_artist or context.artist,
+                lyrics_title=self.lyrics_title or context.title,
+            )
+            # Build output dictionary
+            outputs = {}
+            if result.get("lrc_filepath"):
+                outputs["lrc_filepath"] = result["lrc_filepath"]
+            if result.get("ass_filepath"):
+                outputs["ass_filepath"] = result["ass_filepath"]
+            if result.get("corrected_txt_path"):
+                outputs["corrected_txt_path"] = result["corrected_txt_path"]
+            # Get corrections data if available
+            if hasattr(processor, 'corrections_result'):
+                outputs["corrections_result"] = processor.corrections_result
+            # Check for countdown padding
+            lyrics_dir = os.path.join(context.output_dir, "lyrics")
+            countdown_file = os.path.join(lyrics_dir, "countdown_padding_seconds.txt")
+            if os.path.exists(countdown_file):
+                with open(countdown_file, 'r') as f:
+                    try:
+                        outputs["countdown_padding_seconds"] = float(f.read().strip())
+                    except ValueError:
+                        pass
+            context.update_progress(self.name, 100, "Transcription complete")
+            duration = time.time() - start_time
+            context.log("INFO", f"Lyrics transcription completed in {duration:.1f}s")
+            return StageResult(
+                status=StageStatus.COMPLETED,
+                outputs=outputs,
+                duration_seconds=duration,
+            )
+        except Exception as e:
+            duration = time.time() - start_time
+            context.log("ERROR", f"Lyrics transcription failed: {str(e)}")
+            return StageResult(
+                status=StageStatus.FAILED,
+                error_message=str(e),
+                error_details={"exception_type": type(e).__name__},
+                duration_seconds=duration,
+            )

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl