PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

karaoke_gen/audio_processor.py CHANGED Viewed

@@ -12,6 +12,14 @@ import psutil
 from datetime import datetime
 from pydub import AudioSegment
+# Try to import the remote API client if available
+try:
+    from audio_separator.remote import AudioSeparatorAPIClient
+    REMOTE_API_AVAILABLE = True
+except ImportError:
+    REMOTE_API_AVAILABLE = False
+    AudioSeparatorAPIClient = None
 # Placeholder class or functions for audio processing
 class AudioProcessor:
@@ -44,6 +52,60 @@ class AudioProcessor:
             self.logger.info(f"File already exists, skipping creation: {file_path}")
         return exists
+    def pad_audio_file(self, input_audio, output_audio, padding_seconds):
+        """
+        Add silence to the start of an audio file using ffmpeg.
+        This ensures the instrumental tracks are synchronized with vocals when
+        countdown padding has been applied by the LyricsTranscriber.
+        Args:
+            input_audio: Path to input audio file
+            output_audio: Path for output padded audio file
+            padding_seconds: Amount of silence to add in seconds (e.g., 3.0)
+        Raises:
+            Exception: If ffmpeg command fails
+        """
+        self.logger.info(f"Padding audio file with {padding_seconds}s of silence: {input_audio}")
+        # Use ffmpeg to prepend silence to the audio file
+        # This matches the approach used in LyricsTranscriber for vocal padding
+        cmd = [
+            "ffmpeg",
+            "-y",  # Overwrite output file
+            "-hide_banner",
+            "-loglevel", "error",
+            "-f", "lavfi",
+            "-t", str(padding_seconds),
+            "-i", f"anullsrc=channel_layout=stereo:sample_rate=44100",
+            "-i", input_audio,
+            "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1[out]",
+            "-map", "[out]",
+            "-c:a", self.lossless_output_format.lower(),
+            output_audio,
+        ]
+        try:
+            import subprocess
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minute timeout
+                check=True
+            )
+            self.logger.info(f"Successfully padded audio file: {output_audio}")
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Failed to pad audio file {input_audio}: {e.stderr}"
+            self.logger.error(error_msg)
+            raise Exception(error_msg)
+        except subprocess.TimeoutExpired:
+            error_msg = f"Timeout while padding audio file {input_audio}"
+            self.logger.error(error_msg)
+            raise Exception(error_msg)
     def separate_audio(self, audio_file, model_name, artist_title, track_output_dir, instrumental_path, vocals_path):
         if audio_file is None or not os.path.isfile(audio_file):
             raise Exception("Error: Invalid audio source provided.")
@@ -104,9 +166,36 @@ class AudioProcessor:
         self.logger.info(f"Separation complete! Output file(s): {vocals_path} {instrumental_path}")
     def process_audio_separation(self, audio_file, artist_title, track_output_dir):
+        # Check if we should use remote API
+        remote_api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
+        if remote_api_url:
+            if not REMOTE_API_AVAILABLE:
+                self.logger.warning("AUDIO_SEPARATOR_API_URL is set but remote API client is not available. "
+                                  "Please ensure audio-separator is updated to a version that includes remote API support. "
+                                  "Falling back to local processing.")
+            else:
+                self.logger.info(f"Using remote audio separator API at: {remote_api_url}")
+                try:
+                    return self._process_audio_separation_remote(audio_file, artist_title, track_output_dir, remote_api_url)
+                except Exception as e:
+                    error_str = str(e)
+                    # Don't fall back for download failures - these indicate API issues that should be fixed
+                    if ("no files were downloaded" in error_str or
+                        "failed to produce essential" in error_str):
+                        self.logger.error(f"Remote API processing failed with download/file organization issue: {error_str}")
+                        self.logger.error("This indicates an audio-separator API issue that should be fixed. Not falling back to local processing.")
+                        raise e
+                    else:
+                        # Fall back for other types of errors (network issues, etc.)
+                        self.logger.error(f"Remote API processing failed: {error_str}")
+                        self.logger.info("Falling back to local audio separation")
+        else:
+            self.logger.info("AUDIO_SEPARATOR_API_URL not set, using local audio separation. "
+                           "Set this environment variable to use remote GPU processing.")
         from audio_separator.separator import Separator
-        self.logger.info(f"Starting audio separation process for {artist_title}")
+        self.logger.info(f"Starting local audio separation process for {artist_title}")
         # Define lock file path in system temp directory
         lock_file_path = os.path.join(tempfile.gettempdir(), "audio_separator.lock")
@@ -202,35 +291,6 @@ class AudioProcessor:
             )
             self._normalize_audio_files(result, artist_title, track_output_dir)
-            # Create Audacity LOF file
-            lof_path = os.path.join(stems_dir, f"{artist_title} (Audacity).lof")
-            first_model = list(result["backing_vocals"].keys())[0]
-            files_to_include = [
-                audio_file,  # Original audio
-                result["clean_instrumental"]["instrumental"],  # Clean instrumental
-                result["backing_vocals"][first_model]["backing_vocals"],  # Backing vocals
-                result["combined_instrumentals"][first_model],  # Combined instrumental+BV
-            ]
-            # Convert to absolute paths
-            files_to_include = [os.path.abspath(f) for f in files_to_include]
-            with open(lof_path, "w") as lof:
-                for file_path in files_to_include:
-                    lof.write(f'file "{file_path}"\n')
-            self.logger.info(f"Created Audacity LOF file: {lof_path}")
-            result["audacity_lof"] = lof_path
-            # Launch Audacity with multiple tracks
-            if sys.platform == "darwin":  # Check if we're on macOS
-                if lof_path and os.path.exists(lof_path):
-                    self.logger.info(f"Launching Audacity with LOF file: {lof_path}")
-                    os.system(f'open -a Audacity "{lof_path}"')
-                else:
-                    self.logger.debug("Audacity LOF file not available or not found")
             self.logger.info("Audio separation, combination, and normalization process completed")
             return result
         finally:
@@ -242,6 +302,258 @@ class AudioProcessor:
             except OSError:
                 pass
+    def _process_audio_separation_remote(self, audio_file, artist_title, track_output_dir, remote_api_url):
+        """Process audio separation using remote API with proper two-stage workflow."""
+        self.logger.info(f"Starting remote audio separation process for {artist_title}")
+        # Initialize the API client
+        api_client = AudioSeparatorAPIClient(remote_api_url, self.logger)
+        stems_dir = self._create_stems_directory(track_output_dir)
+        result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}, "combined_instrumentals": {}}
+        if os.environ.get("KARAOKE_GEN_SKIP_AUDIO_SEPARATION"):
+            return result
+        try:
+            # Stage 1: Process original song with clean instrumental model + other stems models
+            stage1_models = []
+            if self.clean_instrumental_model:
+                stage1_models.append(self.clean_instrumental_model)
+            stage1_models.extend(self.other_stems_models)
+            self.logger.info(f"Stage 1: Submitting audio separation job with models: {stage1_models}")
+            # Submit the first stage job
+            stage1_result = api_client.separate_audio_and_wait(
+                audio_file,
+                models=stage1_models,
+                timeout=1800,  # 30 minutes timeout
+                poll_interval=15,  # Check every 15 seconds
+                download=True,
+                output_dir=stems_dir,
+                output_format=self.lossless_output_format.lower()
+            )
+            if stage1_result["status"] != "completed":
+                raise Exception(f"Stage 1 remote audio separation failed: {stage1_result.get('error', 'Unknown error')}")
+            self.logger.info(f"Stage 1 completed. Downloaded {len(stage1_result['downloaded_files'])} files")
+            # Check if we actually got the expected files for Stage 1
+            if len(stage1_result["downloaded_files"]) == 0:
+                error_msg = ("Stage 1 audio separation completed successfully but no files were downloaded. "
+                           "This indicates a filename encoding or API issue in the audio-separator remote service. "
+                           f"Expected files for models {stage1_models} but got 0.")
+                self.logger.error(error_msg)
+                raise Exception(error_msg)
+            # Organize the stage 1 results
+            result = self._organize_stage1_remote_results(
+                stage1_result["downloaded_files"], artist_title, track_output_dir, stems_dir
+            )
+            # Validate that we got the essential clean instrumental outputs
+            if not result["clean_instrumental"].get("vocals") or not result["clean_instrumental"].get("instrumental"):
+                missing = []
+                if not result["clean_instrumental"].get("vocals"):
+                    missing.append("clean vocals")
+                if not result["clean_instrumental"].get("instrumental"):
+                    missing.append("clean instrumental")
+                error_msg = (f"Stage 1 completed but failed to produce essential clean instrumental outputs: {', '.join(missing)}. "
+                           "This may indicate a model naming or file organization issue in the remote API.")
+                self.logger.error(error_msg)
+                raise Exception(error_msg)
+            # Stage 2: Process clean vocals with backing vocals models (if we have both)
+            if result["clean_instrumental"].get("vocals") and self.backing_vocals_models:
+                self.logger.info(f"Stage 2: Processing clean vocals for backing vocals separation...")
+                vocals_path = result["clean_instrumental"]["vocals"]
+                stage2_result = api_client.separate_audio_and_wait(
+                    vocals_path,
+                    models=self.backing_vocals_models,
+                    timeout=900,  # 15 minutes timeout for backing vocals
+                    poll_interval=10,
+                    download=True,
+                    output_dir=stems_dir,
+                    output_format=self.lossless_output_format.lower()
+                )
+                if stage2_result["status"] == "completed":
+                    self.logger.info(f"Stage 2 completed. Downloaded {len(stage2_result['downloaded_files'])} files")
+                    # Check if we actually got the expected files
+                    if len(stage2_result["downloaded_files"]) == 0:
+                        error_msg = ("Stage 2 backing vocals separation completed successfully but no files were downloaded. "
+                                   "This indicates a filename encoding or API issue in the audio-separator remote service. "
+                                   "Expected 2 files (lead vocals + backing vocals) but got 0.")
+                        self.logger.error(error_msg)
+                        raise Exception(error_msg)
+                    # Organize the stage 2 results (backing vocals)
+                    backing_vocals_result = self._organize_stage2_remote_results(
+                        stage2_result["downloaded_files"], artist_title, stems_dir
+                    )
+                    result["backing_vocals"] = backing_vocals_result
+                else:
+                    error_msg = f"Stage 2 backing vocals separation failed: {stage2_result.get('error', 'Unknown error')}"
+                    self.logger.error(error_msg)
+                    raise Exception(error_msg)
+            else:
+                result["backing_vocals"] = {}
+            # Generate combined instrumentals
+            if result["clean_instrumental"].get("instrumental") and result["backing_vocals"]:
+                result["combined_instrumentals"] = self._generate_combined_instrumentals(
+                    result["clean_instrumental"]["instrumental"], result["backing_vocals"], artist_title, track_output_dir
+                )
+            else:
+                result["combined_instrumentals"] = {}
+            # Normalize audio files
+            self._normalize_audio_files(result, artist_title, track_output_dir)
+            self.logger.info("Remote audio separation, combination, and normalization process completed")
+            return result
+        except Exception as e:
+            self.logger.error(f"Error during remote audio separation: {str(e)}")
+            raise e
+    def _organize_stage1_remote_results(self, downloaded_files, artist_title, track_output_dir, stems_dir):
+        """Organize stage 1 separation results (clean instrumental + other stems)."""
+        result = {"clean_instrumental": {}, "other_stems": {}}
+        for file_path in downloaded_files:
+            filename = os.path.basename(file_path)
+            self.logger.debug(f"Stage 1 - Processing downloaded file: {filename}")
+            # Determine which model and stem type this file represents
+            model_name = None
+            stem_type = None
+            # Extract model name and stem type from filename
+            # Expected format: "audio_(StemType)_modelname.ext"
+            if "_(Vocals)_" in filename:
+                stem_type = "Vocals"
+                model_name = filename.split("_(Vocals)_")[1].split(".")[0]
+            elif "_(Instrumental)_" in filename:
+                stem_type = "Instrumental"
+                model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
+            elif "_(Drums)_" in filename:
+                stem_type = "Drums"
+                model_name = filename.split("_(Drums)_")[1].split(".")[0]
+            elif "_(Bass)_" in filename:
+                stem_type = "Bass"
+                model_name = filename.split("_(Bass)_")[1].split(".")[0]
+            elif "_(Other)_" in filename:
+                stem_type = "Other"
+                model_name = filename.split("_(Other)_")[1].split(".")[0]
+            elif "_(Guitar)_" in filename:
+                stem_type = "Guitar"
+                model_name = filename.split("_(Guitar)_")[1].split(".")[0]
+            elif "_(Piano)_" in filename:
+                stem_type = "Piano"
+                model_name = filename.split("_(Piano)_")[1].split(".")[0]
+            else:
+                # Try to extract stem type from parentheses
+                import re
+                match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
+                if match:
+                    stem_type = match.group(1)
+                    model_name = match.group(2)
+                else:
+                    self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
+                    continue
+            # Check if this model name matches the clean instrumental model
+            is_clean_instrumental_model = (
+                model_name == self.clean_instrumental_model or
+                self.clean_instrumental_model.startswith(model_name) or
+                model_name.startswith(self.clean_instrumental_model.split('.')[0])
+            )
+            if is_clean_instrumental_model:
+                if stem_type == "Vocals":
+                    target_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")
+                    shutil.move(file_path, target_path)
+                    result["clean_instrumental"]["vocals"] = target_path
+                elif stem_type == "Instrumental":
+                    target_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}")
+                    shutil.move(file_path, target_path)
+                    result["clean_instrumental"]["instrumental"] = target_path
+            elif any(model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]) for os_model in self.other_stems_models):
+                # Find the matching other stems model
+                matching_os_model = None
+                for os_model in self.other_stems_models:
+                    if model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]):
+                        matching_os_model = os_model
+                        break
+                if matching_os_model:
+                    if matching_os_model not in result["other_stems"]:
+                        result["other_stems"][matching_os_model] = {}
+                    target_path = os.path.join(stems_dir, f"{artist_title} ({stem_type} {matching_os_model}).{self.lossless_output_format}")
+                    shutil.move(file_path, target_path)
+                    result["other_stems"][matching_os_model][stem_type] = target_path
+        return result
+    def _organize_stage2_remote_results(self, downloaded_files, artist_title, stems_dir):
+        """Organize stage 2 separation results (backing vocals)."""
+        result = {}
+        for file_path in downloaded_files:
+            filename = os.path.basename(file_path)
+            self.logger.debug(f"Stage 2 - Processing downloaded file: {filename}")
+            # Determine which model and stem type this file represents
+            model_name = None
+            stem_type = None
+            # Extract model name and stem type from filename
+            if "_(Vocals)_" in filename:
+                stem_type = "Vocals"
+                model_name = filename.split("_(Vocals)_")[1].split(".")[0]
+            elif "_(Instrumental)_" in filename:
+                stem_type = "Instrumental"
+                model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
+            else:
+                # Try to extract stem type from parentheses
+                import re
+                match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
+                if match:
+                    stem_type = match.group(1)
+                    model_name = match.group(2)
+                else:
+                    self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
+                    continue
+            # Find the matching backing vocals model
+            matching_bv_model = None
+            for bv_model in self.backing_vocals_models:
+                if model_name == bv_model or bv_model.startswith(model_name) or model_name.startswith(bv_model.split('.')[0]):
+                    matching_bv_model = bv_model
+                    break
+            if matching_bv_model:
+                if matching_bv_model not in result:
+                    result[matching_bv_model] = {}
+                if stem_type == "Vocals":
+                    target_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {matching_bv_model}).{self.lossless_output_format}")
+                    shutil.move(file_path, target_path)
+                    result[matching_bv_model]["lead_vocals"] = target_path
+                elif stem_type == "Instrumental":
+                    target_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {matching_bv_model}).{self.lossless_output_format}")
+                    shutil.move(file_path, target_path)
+                    result[matching_bv_model]["backing_vocals"] = target_path
+        return result
     def _create_stems_directory(self, track_output_dir):
         stems_dir = os.path.join(track_output_dir, "stems")
         os.makedirs(stems_dir, exist_ok=True)
@@ -399,3 +711,68 @@ class AudioProcessor:
         self.logger.info(f"Normalized audio saved, replacing: {output_path}")
         self.logger.debug(f"Original peak: {peak_amplitude} dB, Applied gain: {gain_db} dB")
+    def apply_countdown_padding_to_instrumentals(self, separation_result, padding_seconds, artist_title, track_output_dir):
+        """
+        Apply countdown padding to all instrumental audio files.
+        When LyricsTranscriber adds countdown padding to vocals, this method ensures
+        all instrumental tracks are padded by the same amount to maintain synchronization.
+        Args:
+            separation_result: Dictionary containing paths to separated audio files
+            padding_seconds: Amount of padding to apply (e.g., 3.0)
+            artist_title: Artist and title string for naming padded files
+            track_output_dir: Output directory for padded files
+        Returns:
+            Dictionary with updated paths to padded instrumental files
+        """
+        self.logger.info(
+            f"Applying {padding_seconds}s countdown padding to all instrumental files to match vocal padding"
+        )
+        padded_result = {
+            "clean_instrumental": {},
+            "other_stems": {},
+            "backing_vocals": {},
+            "combined_instrumentals": {},
+        }
+        # Pad clean instrumental
+        if "clean_instrumental" in separation_result and separation_result["clean_instrumental"].get("instrumental"):
+            original_instrumental = separation_result["clean_instrumental"]["instrumental"]
+            # Insert "Padded" before the file extension
+            base, ext = os.path.splitext(original_instrumental)
+            padded_instrumental = f"{base} (Padded){ext}"
+            if not self._file_exists(padded_instrumental):
+                self.logger.info(f"Padding clean instrumental: {original_instrumental}")
+                self.pad_audio_file(original_instrumental, padded_instrumental, padding_seconds)
+            padded_result["clean_instrumental"]["instrumental"] = padded_instrumental
+            padded_result["clean_instrumental"]["vocals"] = separation_result["clean_instrumental"].get("vocals")
+        # Pad combined instrumentals (instrumental + backing vocals)
+        if "combined_instrumentals" in separation_result:
+            for model, combined_path in separation_result["combined_instrumentals"].items():
+                base, ext = os.path.splitext(combined_path)
+                padded_combined = f"{base} (Padded){ext}"
+                if not self._file_exists(padded_combined):
+                    self.logger.info(f"Padding combined instrumental ({model}): {combined_path}")
+                    self.pad_audio_file(combined_path, padded_combined, padding_seconds)
+                padded_result["combined_instrumentals"][model] = padded_combined
+        # Copy over other stems and backing vocals without padding
+        # (these are typically not used in final output, but preserve the structure)
+        padded_result["other_stems"] = separation_result.get("other_stems", {})
+        padded_result["backing_vocals"] = separation_result.get("backing_vocals", {})
+        self.logger.info(
+            f"✓ Countdown padding applied to {len(padded_result['combined_instrumentals']) + 1} instrumental file(s)"
+        )
+        return padded_result

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl