karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,351 @@
1
+ import os
2
+ import sys
3
+ import logging
4
+ import subprocess
5
+ import shutil
6
+
7
+
8
+ class VideoBackgroundProcessor:
9
+ """
10
+ Handles video background processing for karaoke videos.
11
+ Responsible for scaling, looping/trimming, darkening, and subtitle rendering.
12
+ """
13
+
14
+ def __init__(self, logger, ffmpeg_base_command):
15
+ """
16
+ Initialize the VideoBackgroundProcessor.
17
+
18
+ Args:
19
+ logger: Logger instance for output
20
+ ffmpeg_base_command: Base ffmpeg command with common flags
21
+ """
22
+ self.logger = logger
23
+ self.ffmpeg_base_command = ffmpeg_base_command
24
+
25
+ # Detect and configure hardware acceleration
26
+ self.nvenc_available = self.detect_nvenc_support()
27
+ self.configure_hardware_acceleration()
28
+
29
+ def detect_nvenc_support(self):
30
+ """Detect if NVENC hardware encoding is available."""
31
+ try:
32
+ self.logger.info("🔍 Detecting NVENC hardware acceleration...")
33
+
34
+ # Check for nvidia-smi (indicates NVIDIA driver presence)
35
+ try:
36
+ nvidia_smi_result = subprocess.run(
37
+ ["nvidia-smi", "--query-gpu=name,driver_version", "--format=csv,noheader"],
38
+ capture_output=True,
39
+ text=True,
40
+ timeout=10,
41
+ )
42
+ if nvidia_smi_result.returncode == 0:
43
+ gpu_info = nvidia_smi_result.stdout.strip()
44
+ self.logger.info(f" ✓ NVIDIA GPU detected: {gpu_info}")
45
+ else:
46
+ self.logger.debug(f"nvidia-smi failed: {nvidia_smi_result.stderr}")
47
+ self.logger.info(" ✗ NVENC not available (no NVIDIA GPU)")
48
+ return False
49
+ except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError) as e:
50
+ self.logger.debug(f"nvidia-smi not available: {e}")
51
+ self.logger.info(" ✗ NVENC not available (no NVIDIA GPU)")
52
+ return False
53
+
54
+ # Check for NVENC encoders in FFmpeg
55
+ try:
56
+ encoders_cmd = f"{self.ffmpeg_base_command} -hide_banner -encoders 2>/dev/null | grep nvenc"
57
+ encoders_result = subprocess.run(encoders_cmd, shell=True, capture_output=True, text=True, timeout=10)
58
+ if encoders_result.returncode == 0 and "nvenc" in encoders_result.stdout:
59
+ nvenc_encoders = [line.strip() for line in encoders_result.stdout.split("\n") if "nvenc" in line]
60
+ self.logger.debug(f"Found NVENC encoders: {nvenc_encoders}")
61
+ else:
62
+ self.logger.debug("No NVENC encoders found in FFmpeg")
63
+ self.logger.info(" ✗ NVENC not available (no FFmpeg support)")
64
+ return False
65
+ except Exception as e:
66
+ self.logger.debug(f"Failed to check FFmpeg NVENC encoders: {e}")
67
+ self.logger.info(" ✗ NVENC not available")
68
+ return False
69
+
70
+ # Check for libcuda.so.1 (critical for NVENC)
71
+ try:
72
+ libcuda_check = subprocess.run(["ldconfig", "-p"], capture_output=True, text=True, timeout=10)
73
+ if libcuda_check.returncode == 0 and "libcuda.so.1" in libcuda_check.stdout:
74
+ self.logger.debug("libcuda.so.1 found in system libraries")
75
+ else:
76
+ self.logger.debug("libcuda.so.1 NOT found - may need nvidia/cuda:*-devel image")
77
+ self.logger.info(" ✗ NVENC not available (missing CUDA libraries)")
78
+ return False
79
+ except Exception as e:
80
+ self.logger.debug(f"Failed to check for libcuda.so.1: {e}")
81
+ self.logger.info(" ✗ NVENC not available")
82
+ return False
83
+
84
+ # Test h264_nvenc encoder
85
+ test_cmd = f"{self.ffmpeg_base_command} -hide_banner -loglevel error -f lavfi -i testsrc=duration=1:size=320x240:rate=1 -c:v h264_nvenc -f null -"
86
+ self.logger.debug(f"Testing NVENC: {test_cmd}")
87
+
88
+ try:
89
+ result = subprocess.run(test_cmd, shell=True, capture_output=True, text=True, timeout=30)
90
+
91
+ if result.returncode == 0:
92
+ self.logger.info(" ✓ NVENC encoding available")
93
+ return True
94
+ else:
95
+ self.logger.debug(f"NVENC test failed (exit code {result.returncode}): {result.stderr}")
96
+ self.logger.info(" ✗ NVENC not available")
97
+ return False
98
+
99
+ except subprocess.TimeoutExpired:
100
+ self.logger.debug("NVENC test timed out")
101
+ self.logger.info(" ✗ NVENC not available (timeout)")
102
+ return False
103
+
104
+ except Exception as e:
105
+ self.logger.debug(f"Failed to detect NVENC support: {e}")
106
+ self.logger.info(" ✗ NVENC not available (error)")
107
+ return False
108
+
109
+ def configure_hardware_acceleration(self):
110
+ """Configure hardware acceleration settings based on detected capabilities."""
111
+ if self.nvenc_available:
112
+ self.video_encoder = "h264_nvenc"
113
+ self.hwaccel_decode_flags = "-hwaccel cuda"
114
+ self.scale_filter = "scale"
115
+ self.logger.info("🚀 Using NVENC hardware acceleration for video encoding")
116
+ else:
117
+ self.video_encoder = "libx264"
118
+ self.hwaccel_decode_flags = ""
119
+ self.scale_filter = "scale"
120
+ self.logger.info("🔧 Using software encoding (libx264) for video")
121
+
122
+ def get_nvenc_quality_settings(self):
123
+ """Get NVENC settings for high quality encoding."""
124
+ return "-preset p4 -tune hq -rc vbr -cq 18 -spatial-aq 1 -temporal-aq 1 -b:v 8000k -maxrate 15000k -bufsize 16000k"
125
+
126
+ def get_audio_duration(self, audio_path):
127
+ """
128
+ Get duration of audio file in seconds using ffprobe.
129
+
130
+ Args:
131
+ audio_path: Path to audio file
132
+
133
+ Returns:
134
+ float: Duration in seconds
135
+ """
136
+ try:
137
+ cmd = [
138
+ "ffprobe",
139
+ "-v",
140
+ "error",
141
+ "-show_entries",
142
+ "format=duration",
143
+ "-of",
144
+ "default=noprint_wrappers=1:nokey=1",
145
+ audio_path,
146
+ ]
147
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
148
+ duration = float(result.stdout.strip())
149
+ self.logger.info(f"Audio duration: {duration:.2f} seconds")
150
+ return duration
151
+ except Exception as e:
152
+ self.logger.error(f"Failed to get audio duration: {e}")
153
+ raise
154
+
155
+ def escape_filter_path(self, path):
156
+ """
157
+ Escape a file path for use in ffmpeg filter expressions.
158
+
159
+ Args:
160
+ path: File path to escape
161
+
162
+ Returns:
163
+ str: Escaped path
164
+ """
165
+ # Escape backslashes and colons for ffmpeg filter syntax
166
+ escaped = path.replace("\\", "\\\\").replace(":", "\\:")
167
+ return escaped
168
+
169
+ def build_video_filter(self, ass_subtitles_path, darkness_percent, fonts_dir=None):
170
+ """
171
+ Build the video filter chain for scaling, darkening, and subtitles.
172
+
173
+ Args:
174
+ ass_subtitles_path: Path to ASS subtitle file
175
+ darkness_percent: Darkness overlay percentage (0-100)
176
+ fonts_dir: Optional fonts directory for ASS rendering
177
+
178
+ Returns:
179
+ str: FFmpeg filter string
180
+ """
181
+ filters = []
182
+
183
+ # Scale to 4K with intelligent cropping (not stretching)
184
+ # force_original_aspect_ratio=increase ensures we scale up to fill the frame
185
+ # then crop to exact 4K dimensions
186
+ filters.append("scale=w=3840:h=2160:force_original_aspect_ratio=increase,crop=3840:2160")
187
+
188
+ # Add darkening overlay if requested (before ASS subtitles)
189
+ if darkness_percent > 0:
190
+ # Convert percentage (0-100) to alpha (0.0-1.0)
191
+ alpha = darkness_percent / 100.0
192
+ filters.append(f"drawbox=x=0:y=0:w=iw:h=ih:color=black@{alpha:.2f}:t=fill")
193
+
194
+ # Add ASS subtitle filter
195
+ ass_escaped = self.escape_filter_path(ass_subtitles_path)
196
+ ass_filter = f"ass={ass_escaped}"
197
+
198
+ # Add fonts directory if provided
199
+ if fonts_dir and os.path.isdir(fonts_dir):
200
+ fonts_escaped = self.escape_filter_path(fonts_dir)
201
+ ass_filter += f":fontsdir={fonts_escaped}"
202
+
203
+ filters.append(ass_filter)
204
+
205
+ # Combine all filters with commas
206
+ return ",".join(filters)
207
+
208
+ def execute_command_with_fallback(self, gpu_command, cpu_command, description):
209
+ """
210
+ Execute GPU command with automatic fallback to CPU if it fails.
211
+
212
+ Args:
213
+ gpu_command: Command to try with GPU acceleration
214
+ cpu_command: Fallback command for CPU encoding
215
+ description: Description for logging
216
+
217
+ Raises:
218
+ Exception: If both GPU and CPU commands fail
219
+ """
220
+ self.logger.info(f"{description}")
221
+
222
+ # Try GPU-accelerated command first if available
223
+ if self.nvenc_available and gpu_command != cpu_command:
224
+ self.logger.debug(f"Attempting hardware-accelerated encoding: {gpu_command}")
225
+ try:
226
+ result = subprocess.run(gpu_command, shell=True, capture_output=True, text=True, timeout=600)
227
+
228
+ if result.returncode == 0:
229
+ self.logger.info(f"✓ Hardware acceleration successful")
230
+ return
231
+ else:
232
+ self.logger.warning(f"✗ Hardware acceleration failed (exit code {result.returncode})")
233
+ self.logger.warning(f"GPU Command: {gpu_command}")
234
+
235
+ if result.stderr:
236
+ self.logger.warning(f"FFmpeg STDERR: {result.stderr}")
237
+ if result.stdout:
238
+ self.logger.warning(f"FFmpeg STDOUT: {result.stdout}")
239
+ self.logger.info("Falling back to software encoding...")
240
+
241
+ except subprocess.TimeoutExpired:
242
+ self.logger.warning("✗ Hardware acceleration timed out, falling back to software encoding")
243
+ except Exception as e:
244
+ self.logger.warning(f"✗ Hardware acceleration failed with exception: {e}, falling back to software encoding")
245
+
246
+ # Use CPU command (either as fallback or primary method)
247
+ self.logger.debug(f"Running software encoding: {cpu_command}")
248
+ try:
249
+ result = subprocess.run(cpu_command, shell=True, capture_output=True, text=True, timeout=600)
250
+
251
+ if result.returncode != 0:
252
+ error_msg = f"Software encoding failed with exit code {result.returncode}"
253
+ self.logger.error(error_msg)
254
+ self.logger.error(f"CPU Command: {cpu_command}")
255
+ if result.stderr:
256
+ self.logger.error(f"FFmpeg STDERR: {result.stderr}")
257
+ if result.stdout:
258
+ self.logger.error(f"FFmpeg STDOUT: {result.stdout}")
259
+ raise Exception(f"{error_msg}: {cpu_command}")
260
+ else:
261
+ self.logger.info(f"✓ Software encoding successful")
262
+
263
+ except subprocess.TimeoutExpired:
264
+ error_msg = "Software encoding timed out"
265
+ self.logger.error(error_msg)
266
+ raise Exception(f"{error_msg}: {cpu_command}")
267
+ except Exception as e:
268
+ if "Software encoding failed" not in str(e):
269
+ error_msg = f"Software encoding failed with exception: {e}"
270
+ self.logger.error(error_msg)
271
+ raise Exception(f"{error_msg}: {cpu_command}")
272
+ else:
273
+ raise
274
+
275
+ def process_video_background(
276
+ self, video_path, audio_path, ass_subtitles_path, output_path, darkness_percent=0, audio_duration=None
277
+ ):
278
+ """
279
+ Process video background with scaling, looping/trimming, darkening, and subtitle rendering.
280
+
281
+ Args:
282
+ video_path: Path to input video file
283
+ audio_path: Path to audio file (used for duration and audio track)
284
+ ass_subtitles_path: Path to ASS subtitle file
285
+ output_path: Path to output video file
286
+ darkness_percent: Darkness overlay percentage (0-100), default 0
287
+ audio_duration: Optional pre-calculated audio duration (will calculate if not provided)
288
+
289
+ Returns:
290
+ str: Path to output file
291
+
292
+ Raises:
293
+ Exception: If video processing fails
294
+ """
295
+ self.logger.info(f"Processing video background: {video_path}")
296
+ self.logger.info(f" Output: {output_path}")
297
+ self.logger.info(f" Darkness: {darkness_percent}%")
298
+
299
+ # Validate inputs
300
+ if not os.path.isfile(video_path):
301
+ raise FileNotFoundError(f"Video file not found: {video_path}")
302
+ if not os.path.isfile(audio_path):
303
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
304
+ if not os.path.isfile(ass_subtitles_path):
305
+ raise FileNotFoundError(f"ASS subtitle file not found: {ass_subtitles_path}")
306
+
307
+ # Validate darkness parameter
308
+ if not 0 <= darkness_percent <= 100:
309
+ raise ValueError(f"Darkness percentage must be between 0 and 100, got {darkness_percent}")
310
+
311
+ # Get audio duration if not provided
312
+ if audio_duration is None:
313
+ audio_duration = self.get_audio_duration(audio_path)
314
+
315
+ # Check for optional fonts directory (matching video.py behavior)
316
+ fonts_dir = os.environ.get("KARAOKE_FONTS_DIR")
317
+
318
+ # Build video filter chain
319
+ vf_filter = self.build_video_filter(ass_subtitles_path, darkness_percent, fonts_dir)
320
+
321
+ # Build commands for GPU and CPU encoding
322
+ # Use -stream_loop -1 to loop video indefinitely, -shortest to cut when audio ends
323
+ base_inputs = f'-stream_loop -1 -i "{video_path}" -i "{audio_path}"'
324
+
325
+ # GPU-accelerated version
326
+ gpu_command = (
327
+ f"{self.ffmpeg_base_command} {self.hwaccel_decode_flags} {base_inputs} "
328
+ f'-c:a flac -vf "{vf_filter}" -c:v {self.video_encoder} '
329
+ f"{self.get_nvenc_quality_settings()} -shortest -y \"{output_path}\""
330
+ )
331
+
332
+ # Software fallback version
333
+ cpu_command = (
334
+ f'{self.ffmpeg_base_command} {base_inputs} '
335
+ f'-c:a flac -vf "{vf_filter}" -c:v libx264 -preset fast '
336
+ f"-b:v 5000k -minrate 5000k -maxrate 20000k -bufsize 10000k "
337
+ f'-shortest -y "{output_path}"'
338
+ )
339
+
340
+ # Execute with fallback
341
+ self.execute_command_with_fallback(
342
+ gpu_command, cpu_command, f"Rendering video with background, subtitles, and effects"
343
+ )
344
+
345
+ # Verify output was created
346
+ if not os.path.isfile(output_path):
347
+ raise Exception(f"Output video file was not created: {output_path}")
348
+
349
+ self.logger.info(f"✓ Video background processing complete: {output_path}")
350
+ return output_path
351
+