PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/output/video.py ADDED Viewed

@@ -0,0 +1,544 @@
+import logging
+import os
+import json
+import subprocess
+from typing import List, Optional, Tuple
+class VideoGenerator:
+    """Handles generation of video files with lyrics overlay."""
+    def __init__(
+        self,
+        output_dir: str,
+        cache_dir: str,
+        video_resolution: Tuple[int, int],
+        styles: dict,
+        logger: Optional[logging.Logger] = None,
+    ):
+        """Initialize VideoGenerator.
+        Args:
+            output_dir: Directory where output files will be written
+            cache_dir: Directory for temporary files
+            video_resolution: Tuple of (width, height) for video resolution
+            styles: Dictionary of output video & CDG styling configuration
+            logger: Optional logger instance
+        """
+        if not all(x > 0 for x in video_resolution):
+            raise ValueError("Video resolution dimensions must be greater than 0")
+        self.output_dir = output_dir
+        self.cache_dir = cache_dir
+        self.video_resolution = video_resolution
+        self.styles = styles
+        self.logger = logger or logging.getLogger(__name__)
+        # Get background settings from styles, with defaults
+        karaoke_styles = styles.get("karaoke", {})
+        self.background_image = karaoke_styles.get("background_image")
+        self.background_color = karaoke_styles.get("background_color", "black")
+        # Validate background image if specified
+        if self.background_image and not os.path.isfile(self.background_image):
+            raise FileNotFoundError(f"Video background image not found: {self.background_image}")
+        # Detect and configure hardware acceleration
+        self.nvenc_available = self.detect_nvenc_support()
+        self.configure_hardware_acceleration()
+    def detect_nvenc_support(self):
+        """Detect if NVENC hardware encoding is available."""
+        try:
+            self.logger.info("🔍 Detecting NVENC hardware acceleration...")
+            # Check if NVIDIA GPU is available
+            gpu_detected = False
+            try:
+                nvidia_smi_cmd = ["nvidia-smi", "--query-gpu=name,driver_version", "--format=csv,noheader"]
+                nvidia_result = subprocess.run(nvidia_smi_cmd, capture_output=True, text=True, timeout=10)
+                if nvidia_result.returncode == 0:
+                    gpu_info = nvidia_result.stdout.strip()
+                    self.logger.info(f"  ✓ NVIDIA GPU detected: {gpu_info}")
+                    gpu_detected = True
+                else:
+                    self.logger.debug(f"nvidia-smi failed: {nvidia_result.stderr}")
+            except Exception as e:
+                self.logger.debug(f"nvidia-smi not available: {e}")
+            # Check for NVENC encoders in FFmpeg
+            nvenc_in_ffmpeg = False
+            try:
+                encoders_cmd = ["ffmpeg", "-hide_banner", "-encoders"]
+                encoders_result = subprocess.run(encoders_cmd, capture_output=True, text=True, timeout=10)
+                if encoders_result.returncode == 0:
+                    encoder_lines = encoders_result.stdout.split('\n')
+                    nvenc_encoders = [line for line in encoder_lines if 'nvenc' in line.lower()]
+                    if nvenc_encoders:
+                        self.logger.debug(f"Found NVENC encoders: {[e.strip() for e in nvenc_encoders]}")
+                        nvenc_in_ffmpeg = True
+                    else:
+                        self.logger.debug("No NVENC encoders found in FFmpeg encoder list")
+            except Exception as e:
+                self.logger.debug(f"Error listing FFmpeg encoders: {e}")
+            # Test h264_nvenc encoder
+            test_cmd = [
+                "ffmpeg", "-hide_banner", "-loglevel", "error",
+                "-f", "lavfi", "-i", "testsrc=duration=1:size=320x240:rate=1",
+                "-c:v", "h264_nvenc", "-f", "null", "-"
+            ]
+            self.logger.debug(f"Testing NVENC: {' '.join(test_cmd)}")
+            result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=30)
+            nvenc_available = result.returncode == 0
+            if nvenc_available:
+                self.logger.info("  ✓ NVENC encoding available")
+                return True
+            # NVENC test failed - log details at debug level
+            self.logger.debug(f"NVENC test failed (return code {result.returncode})")
+            self.logger.debug(f"NVENC test stderr: {result.stderr}")
+            # Try alternative test
+            alt_test_cmd = [
+                "ffmpeg", "-hide_banner", "-loglevel", "error",
+                "-f", "lavfi", "-i", "color=red:size=320x240:duration=0.1",
+                "-c:v", "h264_nvenc", "-preset", "fast", "-f", "null", "-"
+            ]
+            alt_result = subprocess.run(alt_test_cmd, capture_output=True, text=True, timeout=30)
+            if alt_result.returncode == 0:
+                self.logger.info("  ✓ NVENC encoding available")
+                return True
+            self.logger.debug(f"Alternative NVENC test also failed: {alt_result.stderr}")
+            # Check CUDA availability for debug info
+            try:
+                cuda_test_cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-hwaccels"]
+                cuda_result = subprocess.run(cuda_test_cmd, capture_output=True, text=True, timeout=10)
+                if cuda_result.returncode == 0:
+                    hwaccels = cuda_result.stdout.strip()
+                    cuda_available = 'cuda' in hwaccels
+                    self.logger.debug(f"FFmpeg hardware accelerators: {hwaccels}")
+                    if cuda_available:
+                        self.logger.debug("CUDA found in FFmpeg but NVENC still not working")
+            except Exception as e:
+                self.logger.debug(f"Error checking CUDA: {e}")
+            # Check CUDA libraries for debug info
+            try:
+                ldconfig_cmd = ["ldconfig", "-p"]
+                ldconfig_result = subprocess.run(ldconfig_cmd, capture_output=True, text=True, timeout=10)
+                if ldconfig_result.returncode == 0:
+                    has_libcuda = "libcuda.so.1" in ldconfig_result.stdout
+                    has_nvenc_lib = "libnvidia-encode.so" in ldconfig_result.stdout
+                    self.logger.debug(f"CUDA libraries: libcuda.so.1={has_libcuda}, libnvidia-encode.so={has_nvenc_lib}")
+                    if not has_libcuda:
+                        self.logger.debug("Missing libcuda.so.1 - may need nvidia/cuda:*-devel image")
+            except Exception as e:
+                self.logger.debug(f"Error checking CUDA libraries: {e}")
+            self.logger.info("  ✗ NVENC not available")
+            return False
+        except subprocess.TimeoutExpired:
+            self.logger.debug("NVENC detection timed out")
+            self.logger.info("  ✗ NVENC not available (timeout)")
+            return False
+        except Exception as e:
+            self.logger.debug(f"Failed to detect NVENC support: {e}")
+            import traceback
+            self.logger.debug(f"Full traceback: {traceback.format_exc()}")
+            self.logger.info("  ✗ NVENC not available (error)")
+            return False
+    def configure_hardware_acceleration(self):
+        """Configure hardware acceleration settings based on detected capabilities."""
+        if self.nvenc_available:
+            self.video_encoder = "h264_nvenc"
+            self.hwaccel_flags = ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]
+            self.logger.info("🚀 Using NVENC hardware acceleration for video encoding")
+        else:
+            self.video_encoder = "libx264"
+            self.hwaccel_flags = []
+            self.logger.info("🔧 Using software encoding (libx264) for video")
+    def get_nvenc_settings(self, quality_mode="high", is_preview=False):
+        """Get optimized NVENC settings for subtitle overlay content."""
+        if not self.nvenc_available:
+            return []
+        if is_preview:
+            # Fast encoding for preview
+            return [
+                "-preset", "p1",  # Fastest preset
+                "-tune", "ll",    # Low latency
+                "-rc", "vbr",     # Variable bitrate
+            ]
+        elif quality_mode == "high":
+            # High quality for final output
+            return [
+                "-preset", "p4",     # Balanced preset
+                "-tune", "hq",       # High quality
+                "-rc", "vbr",        # Variable bitrate
+                "-cq", "18",         # Constant quality (higher quality)
+                "-spatial-aq", "1",  # Spatial adaptive quantization
+                "-temporal-aq", "1", # Temporal adaptive quantization
+            ]
+        else:
+            # Balanced settings
+            return [
+                "-preset", "p4",
+                "-rc", "vbr",
+            ]
+    def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
+        """Generate MP4 video with lyrics overlay.
+        Args:
+            ass_path: Path to ASS subtitles file
+            audio_path: Path to audio file
+            output_prefix: Prefix for output filename
+        Returns:
+            Path to generated video file
+        """
+        self.logger.info("Generating video with lyrics overlay")
+        output_path = self._get_output_path(f"{output_prefix} (With Vocals)", "mkv")
+        # Check input files exist before running FFmpeg
+        if not os.path.isfile(ass_path):
+            raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
+        if not os.path.isfile(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        try:
+            # Create a temporary copy of the ASS file with a unique filename
+            import time
+            safe_prefix = "".join(c if c.isalnum() else "_" for c in output_prefix)
+            timestamp = int(time.time() * 1000)
+            temp_ass_path = os.path.join(self.cache_dir, f"temp_subtitles_{safe_prefix}_{timestamp}.ass")
+            import shutil
+            shutil.copy2(ass_path, temp_ass_path)
+            self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
+            cmd = self._build_ffmpeg_command(temp_ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
+            self.logger.info(f"Video generated: {output_path}")
+            # Clean up temporary file
+            if os.path.exists(temp_ass_path):
+                os.remove(temp_ass_path)
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate video: {str(e)}")
+            # Clean up temporary file in case of error
+            if "temp_ass_path" in locals() and os.path.exists(temp_ass_path):
+                try:
+                    os.remove(temp_ass_path)
+                except:
+                    pass
+            raise
+    def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
+        """Generate lower resolution MP4 preview video with lyrics overlay.
+        Args:
+            ass_path: Path to ASS subtitles file
+            audio_path: Path to audio file
+            output_prefix: Prefix for output filename
+        Returns:
+            Path to generated preview video file
+        """
+        self.logger.info("Generating preview video with lyrics overlay")
+        output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
+        # Check input files exist before running FFmpeg
+        if not os.path.isfile(ass_path):
+            raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
+        if not os.path.isfile(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        try:
+            # Create a temporary copy of the ASS file with a unique filename
+            import time
+            safe_prefix = "".join(c if c.isalnum() else "_" for c in output_prefix)
+            timestamp = int(time.time() * 1000)
+            temp_ass_path = os.path.join(self.cache_dir, f"temp_preview_subtitles_{safe_prefix}_{timestamp}.ass")
+            import shutil
+            shutil.copy2(ass_path, temp_ass_path)
+            self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
+            cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
+            self.logger.info(f"Preview video generated: {output_path}")
+            # Clean up temporary file
+            if os.path.exists(temp_ass_path):
+                os.remove(temp_ass_path)
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate preview video: {str(e)}")
+            # Clean up temporary file in case of error
+            if "temp_ass_path" in locals() and os.path.exists(temp_ass_path):
+                try:
+                    os.remove(temp_ass_path)
+                except:
+                    pass
+            raise
+    def _get_output_path(self, output_prefix: str, extension: str) -> str:
+        """Generate full output path for a file."""
+        return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
+    def _resize_background_image(self, input_path: str) -> str:
+        """Resize background image to match target resolution and save to temp file."""
+        target_width, target_height = self.video_resolution
+        # Get current image dimensions using ffprobe
+        try:
+            probe_cmd = [
+                "ffprobe",
+                "-v",
+                "error",
+                "-select_streams",
+                "v:0",
+                "-show_entries",
+                "stream=width,height",
+                "-of",
+                "json",
+                input_path,
+            ]
+            probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
+            probe_data = json.loads(probe_output)
+            current_width = probe_data["streams"][0]["width"]
+            current_height = probe_data["streams"][0]["height"]
+            # If dimensions already match, return original path
+            if current_width == target_width and current_height == target_height:
+                self.logger.debug("Background image already at target resolution")
+                return input_path
+        except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+            self.logger.warning(f"Failed to get image dimensions: {e}")
+            # Continue with resize attempt if probe fails
+        temp_path = os.path.join(self.cache_dir, "resized_background.png")
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            input_path,
+            "-vf",
+            f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
+            f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2",
+            temp_path,
+        ]
+        try:
+            subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
+            return temp_path
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"Failed to resize background image: {e.output}")
+            raise
+    def _build_ass_filter(self, ass_path: str) -> str:
+        """Build ASS filter with font directory support."""
+        ass_filter = f"ass={ass_path}"
+        # Get font path from styles configuration
+        karaoke_styles = self.styles.get("karaoke", {})
+        font_path = karaoke_styles.get("font_path")
+        if font_path and os.path.isfile(font_path):
+            font_dir = os.path.dirname(font_path)
+            ass_filter += f":fontsdir={font_dir}"
+            self.logger.info(f"Returning ASS filter with fonts dir: {ass_filter}")
+        return ass_filter
+    def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
+        """Build FFmpeg command for video generation with hardware acceleration when available."""
+        width, height = self.video_resolution
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel", "error",
+            "-r", "30",  # Set frame rate to 30 fps
+        ]
+        # Add hardware acceleration flags if available
+        cmd.extend(self.hwaccel_flags)
+        # Input source (background)
+        if self.background_image:
+            # Resize background image first
+            resized_bg = self._resize_background_image(self.background_image)
+            self.logger.debug(f"Using resized background image: {resized_bg}")
+            cmd.extend([
+                "-loop", "1",  # Loop the image
+                "-i", resized_bg,
+            ])
+        else:
+            self.logger.debug(
+                f"Using solid {self.background_color} background "
+                f"with resolution: {width}x{height}"
+            )
+            cmd.extend([
+                "-f", "lavfi",
+                "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
+            ])
+        cmd.extend([
+            "-i", audio_path,
+            "-c:a", "flac",  # Re-encode audio as FLAC
+            "-vf", self._build_ass_filter(ass_path),  # Add subtitles with font directories
+            "-c:v", self.video_encoder,
+        ])
+        # Add encoder-specific settings
+        if self.nvenc_available:
+            # NVENC settings optimized for subtitle content
+            cmd.extend(self.get_nvenc_settings("high", is_preview=False))
+            # Use higher bitrate for NVENC as it's more efficient
+            cmd.extend([
+                "-b:v", "8000k",      # Higher base bitrate for NVENC
+                "-maxrate", "15000k", # Reasonable max for 4K
+                "-bufsize", "16000k", # Buffer size
+            ])
+            self.logger.debug("Using NVENC encoding for high-quality video generation")
+        else:
+            # Software encoding fallback settings
+            cmd.extend([
+                "-preset", "fast",     # Better compression efficiency
+                "-b:v", "5000k",       # Base video bitrate
+                "-minrate", "5000k",   # Minimum bitrate
+                "-maxrate", "20000k",  # Maximum bitrate
+                "-bufsize", "10000k",  # Buffer size (2x base rate)
+            ])
+            self.logger.debug("Using software encoding for video generation")
+        cmd.extend([
+            "-shortest",  # End encoding after shortest stream
+            "-y",         # Overwrite output without asking
+        ])
+        # Add output path
+        cmd.append(output_path)
+        return cmd
+    def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
+        """Build FFmpeg command for preview video generation with hardware acceleration when available."""
+        # Use even lower resolution for preview (480x270 instead of 640x360 for faster encoding)
+        width, height = 480, 270
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel", "error",
+            "-r", "24",  # Reduced frame rate to 24 fps for faster encoding
+        ]
+        # Add hardware acceleration flags if available
+        cmd.extend(self.hwaccel_flags)
+        # Input source (background) - simplified for preview
+        if self.background_image:
+            # For preview, use the original image without resizing to save time
+            self.logger.debug(f"Using original background image for preview: {self.background_image}")
+            cmd.extend([
+                "-loop", "1",  # Loop the image
+                "-i", self.background_image,
+            ])
+            # Build video filter with scaling and ASS subtitles
+            video_filter = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,{self._build_ass_filter(ass_path)}"
+        else:
+            self.logger.debug(
+                f"Using solid {self.background_color} background "
+                f"with resolution: {width}x{height}"
+            )
+            cmd.extend([
+                "-f", "lavfi",
+                "-i", f"color=c={self.background_color}:s={width}x{height}:r=24",
+            ])
+            # Build video filter with just ASS subtitles (no scaling needed)
+            video_filter = self._build_ass_filter(ass_path)
+        cmd.extend([
+            "-i", audio_path,
+            "-vf", video_filter,    # Apply the video filter
+            "-c:a", "aac",          # Use AAC for audio compatibility
+            "-b:a", "96k",          # Reduced audio bitrate for faster encoding
+            "-c:v", self.video_encoder,
+        ])
+        # Add encoder-specific settings for preview with maximum speed priority
+        if self.nvenc_available:
+            # NVENC settings optimized for maximum speed
+            cmd.extend([
+                "-preset", "p1",       # Fastest NVENC preset
+                "-tune", "ll",         # Low latency
+                "-rc", "cbr",          # Constant bitrate for speed
+                "-b:v", "800k",        # Lower bitrate for speed
+                "-profile:v", "baseline", # Most compatible profile
+                "-level", "3.1",       # Lower level for speed
+            ])
+            self.logger.debug("Using NVENC encoding with maximum speed settings for preview video generation")
+        else:
+            # Software encoding with maximum speed priority
+            cmd.extend([
+                "-profile:v", "baseline",  # Most compatible H.264 profile
+                "-level", "3.0",           # Compatibility level
+                "-preset", "superfast",    # Even faster than ultrafast for preview
+                "-tune", "fastdecode",     # Optimize for fast decoding
+                "-b:v", "600k",            # Lower base bitrate for speed
+                "-maxrate", "800k",        # Lower max bitrate
+                "-bufsize", "1200k",       # Smaller buffer size
+                "-crf", "28",              # Higher CRF for faster encoding (lower quality but faster)
+            ])
+            self.logger.debug("Using software encoding with maximum speed settings for preview video generation")
+        cmd.extend([
+            "-pix_fmt", "yuv420p",  # Required for browser compatibility
+            "-movflags", "+faststart+frag_keyframe+empty_moov+dash",  # Enhanced streaming with dash for faster start
+            "-g", "48",             # Keyframe every 48 frames (2 seconds at 24fps) - fewer keyframes for speed
+            "-keyint_min", "48",    # Minimum keyframe interval
+            "-sc_threshold", "0",   # Disable scene change detection for speed
+            "-threads", "0",        # Use all available CPU threads
+            "-shortest",            # End encoding after shortest stream
+            "-y"                    # Overwrite output without asking
+        ])
+        # Add output path
+        cmd.append(output_path)
+        return cmd
+    def _get_video_codec(self) -> str:
+        """Determine the best available video codec (legacy method - use video_encoder instead)."""
+        # This method is kept for backwards compatibility but is deprecated
+        # The new hardware acceleration system uses self.video_encoder instead
+        self.logger.warning("_get_video_codec is deprecated, use self.video_encoder instead")
+        return self.video_encoder
+    def _run_ffmpeg_command(self, cmd: List[str]) -> None:
+        """Execute FFmpeg command with output handling."""
+        self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
+        try:
+            output = subprocess.check_output(cmd, universal_newlines=True, stderr=subprocess.STDOUT)
+            self.logger.debug(f"FFmpeg output: {output}")
+        except subprocess.CalledProcessError as e:
+            self.logger.error(f"FFmpeg error: {e.output}")
+            raise

lyrics_transcriber/review/__init__.py ADDED Viewed

File without changes

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl