karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,544 @@
1
+ import logging
2
+ import os
3
+ import json
4
+ import subprocess
5
+ from typing import List, Optional, Tuple
6
+
7
+
8
+ class VideoGenerator:
9
+ """Handles generation of video files with lyrics overlay."""
10
+
11
+ def __init__(
12
+ self,
13
+ output_dir: str,
14
+ cache_dir: str,
15
+ video_resolution: Tuple[int, int],
16
+ styles: dict,
17
+ logger: Optional[logging.Logger] = None,
18
+ ):
19
+ """Initialize VideoGenerator.
20
+
21
+ Args:
22
+ output_dir: Directory where output files will be written
23
+ cache_dir: Directory for temporary files
24
+ video_resolution: Tuple of (width, height) for video resolution
25
+ styles: Dictionary of output video & CDG styling configuration
26
+ logger: Optional logger instance
27
+ """
28
+ if not all(x > 0 for x in video_resolution):
29
+ raise ValueError("Video resolution dimensions must be greater than 0")
30
+
31
+ self.output_dir = output_dir
32
+ self.cache_dir = cache_dir
33
+ self.video_resolution = video_resolution
34
+ self.styles = styles
35
+ self.logger = logger or logging.getLogger(__name__)
36
+
37
+ # Get background settings from styles, with defaults
38
+ karaoke_styles = styles.get("karaoke", {})
39
+ self.background_image = karaoke_styles.get("background_image")
40
+ self.background_color = karaoke_styles.get("background_color", "black")
41
+
42
+ # Validate background image if specified
43
+ if self.background_image and not os.path.isfile(self.background_image):
44
+ raise FileNotFoundError(f"Video background image not found: {self.background_image}")
45
+
46
+ # Detect and configure hardware acceleration
47
+ self.nvenc_available = self.detect_nvenc_support()
48
+ self.configure_hardware_acceleration()
49
+
50
+ def detect_nvenc_support(self):
51
+ """Detect if NVENC hardware encoding is available."""
52
+ try:
53
+ self.logger.info("🔍 Detecting NVENC hardware acceleration...")
54
+
55
+ # Check if NVIDIA GPU is available
56
+ gpu_detected = False
57
+ try:
58
+ nvidia_smi_cmd = ["nvidia-smi", "--query-gpu=name,driver_version", "--format=csv,noheader"]
59
+ nvidia_result = subprocess.run(nvidia_smi_cmd, capture_output=True, text=True, timeout=10)
60
+ if nvidia_result.returncode == 0:
61
+ gpu_info = nvidia_result.stdout.strip()
62
+ self.logger.info(f" ✓ NVIDIA GPU detected: {gpu_info}")
63
+ gpu_detected = True
64
+ else:
65
+ self.logger.debug(f"nvidia-smi failed: {nvidia_result.stderr}")
66
+ except Exception as e:
67
+ self.logger.debug(f"nvidia-smi not available: {e}")
68
+
69
+ # Check for NVENC encoders in FFmpeg
70
+ nvenc_in_ffmpeg = False
71
+ try:
72
+ encoders_cmd = ["ffmpeg", "-hide_banner", "-encoders"]
73
+ encoders_result = subprocess.run(encoders_cmd, capture_output=True, text=True, timeout=10)
74
+ if encoders_result.returncode == 0:
75
+ encoder_lines = encoders_result.stdout.split('\n')
76
+ nvenc_encoders = [line for line in encoder_lines if 'nvenc' in line.lower()]
77
+ if nvenc_encoders:
78
+ self.logger.debug(f"Found NVENC encoders: {[e.strip() for e in nvenc_encoders]}")
79
+ nvenc_in_ffmpeg = True
80
+ else:
81
+ self.logger.debug("No NVENC encoders found in FFmpeg encoder list")
82
+ except Exception as e:
83
+ self.logger.debug(f"Error listing FFmpeg encoders: {e}")
84
+
85
+ # Test h264_nvenc encoder
86
+ test_cmd = [
87
+ "ffmpeg", "-hide_banner", "-loglevel", "error",
88
+ "-f", "lavfi", "-i", "testsrc=duration=1:size=320x240:rate=1",
89
+ "-c:v", "h264_nvenc", "-f", "null", "-"
90
+ ]
91
+
92
+ self.logger.debug(f"Testing NVENC: {' '.join(test_cmd)}")
93
+ result = subprocess.run(test_cmd, capture_output=True, text=True, timeout=30)
94
+ nvenc_available = result.returncode == 0
95
+
96
+ if nvenc_available:
97
+ self.logger.info(" ✓ NVENC encoding available")
98
+ return True
99
+
100
+ # NVENC test failed - log details at debug level
101
+ self.logger.debug(f"NVENC test failed (return code {result.returncode})")
102
+ self.logger.debug(f"NVENC test stderr: {result.stderr}")
103
+
104
+ # Try alternative test
105
+ alt_test_cmd = [
106
+ "ffmpeg", "-hide_banner", "-loglevel", "error",
107
+ "-f", "lavfi", "-i", "color=red:size=320x240:duration=0.1",
108
+ "-c:v", "h264_nvenc", "-preset", "fast", "-f", "null", "-"
109
+ ]
110
+
111
+ alt_result = subprocess.run(alt_test_cmd, capture_output=True, text=True, timeout=30)
112
+ if alt_result.returncode == 0:
113
+ self.logger.info(" ✓ NVENC encoding available")
114
+ return True
115
+
116
+ self.logger.debug(f"Alternative NVENC test also failed: {alt_result.stderr}")
117
+
118
+ # Check CUDA availability for debug info
119
+ try:
120
+ cuda_test_cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-hwaccels"]
121
+ cuda_result = subprocess.run(cuda_test_cmd, capture_output=True, text=True, timeout=10)
122
+ if cuda_result.returncode == 0:
123
+ hwaccels = cuda_result.stdout.strip()
124
+ cuda_available = 'cuda' in hwaccels
125
+ self.logger.debug(f"FFmpeg hardware accelerators: {hwaccels}")
126
+ if cuda_available:
127
+ self.logger.debug("CUDA found in FFmpeg but NVENC still not working")
128
+ except Exception as e:
129
+ self.logger.debug(f"Error checking CUDA: {e}")
130
+
131
+ # Check CUDA libraries for debug info
132
+ try:
133
+ ldconfig_cmd = ["ldconfig", "-p"]
134
+ ldconfig_result = subprocess.run(ldconfig_cmd, capture_output=True, text=True, timeout=10)
135
+ if ldconfig_result.returncode == 0:
136
+ has_libcuda = "libcuda.so.1" in ldconfig_result.stdout
137
+ has_nvenc_lib = "libnvidia-encode.so" in ldconfig_result.stdout
138
+ self.logger.debug(f"CUDA libraries: libcuda.so.1={has_libcuda}, libnvidia-encode.so={has_nvenc_lib}")
139
+ if not has_libcuda:
140
+ self.logger.debug("Missing libcuda.so.1 - may need nvidia/cuda:*-devel image")
141
+ except Exception as e:
142
+ self.logger.debug(f"Error checking CUDA libraries: {e}")
143
+
144
+ self.logger.info(" ✗ NVENC not available")
145
+ return False
146
+
147
+ except subprocess.TimeoutExpired:
148
+ self.logger.debug("NVENC detection timed out")
149
+ self.logger.info(" ✗ NVENC not available (timeout)")
150
+ return False
151
+ except Exception as e:
152
+ self.logger.debug(f"Failed to detect NVENC support: {e}")
153
+ import traceback
154
+ self.logger.debug(f"Full traceback: {traceback.format_exc()}")
155
+ self.logger.info(" ✗ NVENC not available (error)")
156
+ return False
157
+
158
+ def configure_hardware_acceleration(self):
159
+ """Configure hardware acceleration settings based on detected capabilities."""
160
+ if self.nvenc_available:
161
+ self.video_encoder = "h264_nvenc"
162
+ self.hwaccel_flags = ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]
163
+ self.logger.info("🚀 Using NVENC hardware acceleration for video encoding")
164
+ else:
165
+ self.video_encoder = "libx264"
166
+ self.hwaccel_flags = []
167
+ self.logger.info("🔧 Using software encoding (libx264) for video")
168
+
169
+ def get_nvenc_settings(self, quality_mode="high", is_preview=False):
170
+ """Get optimized NVENC settings for subtitle overlay content."""
171
+ if not self.nvenc_available:
172
+ return []
173
+
174
+ if is_preview:
175
+ # Fast encoding for preview
176
+ return [
177
+ "-preset", "p1", # Fastest preset
178
+ "-tune", "ll", # Low latency
179
+ "-rc", "vbr", # Variable bitrate
180
+ ]
181
+ elif quality_mode == "high":
182
+ # High quality for final output
183
+ return [
184
+ "-preset", "p4", # Balanced preset
185
+ "-tune", "hq", # High quality
186
+ "-rc", "vbr", # Variable bitrate
187
+ "-cq", "18", # Constant quality (higher quality)
188
+ "-spatial-aq", "1", # Spatial adaptive quantization
189
+ "-temporal-aq", "1", # Temporal adaptive quantization
190
+ ]
191
+ else:
192
+ # Balanced settings
193
+ return [
194
+ "-preset", "p4",
195
+ "-rc", "vbr",
196
+ ]
197
+
198
+ def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
199
+ """Generate MP4 video with lyrics overlay.
200
+
201
+ Args:
202
+ ass_path: Path to ASS subtitles file
203
+ audio_path: Path to audio file
204
+ output_prefix: Prefix for output filename
205
+
206
+ Returns:
207
+ Path to generated video file
208
+ """
209
+ self.logger.info("Generating video with lyrics overlay")
210
+ output_path = self._get_output_path(f"{output_prefix} (With Vocals)", "mkv")
211
+
212
+ # Check input files exist before running FFmpeg
213
+ if not os.path.isfile(ass_path):
214
+ raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
215
+ if not os.path.isfile(audio_path):
216
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
217
+
218
+ try:
219
+ # Create a temporary copy of the ASS file with a unique filename
220
+ import time
221
+
222
+ safe_prefix = "".join(c if c.isalnum() else "_" for c in output_prefix)
223
+ timestamp = int(time.time() * 1000)
224
+ temp_ass_path = os.path.join(self.cache_dir, f"temp_subtitles_{safe_prefix}_{timestamp}.ass")
225
+ import shutil
226
+
227
+ shutil.copy2(ass_path, temp_ass_path)
228
+ self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
229
+
230
+ cmd = self._build_ffmpeg_command(temp_ass_path, audio_path, output_path)
231
+ self._run_ffmpeg_command(cmd)
232
+ self.logger.info(f"Video generated: {output_path}")
233
+
234
+ # Clean up temporary file
235
+ if os.path.exists(temp_ass_path):
236
+ os.remove(temp_ass_path)
237
+ return output_path
238
+
239
+ except Exception as e:
240
+ self.logger.error(f"Failed to generate video: {str(e)}")
241
+ # Clean up temporary file in case of error
242
+ if "temp_ass_path" in locals() and os.path.exists(temp_ass_path):
243
+ try:
244
+ os.remove(temp_ass_path)
245
+ except:
246
+ pass
247
+ raise
248
+
249
+ def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
250
+ """Generate lower resolution MP4 preview video with lyrics overlay.
251
+
252
+ Args:
253
+ ass_path: Path to ASS subtitles file
254
+ audio_path: Path to audio file
255
+ output_prefix: Prefix for output filename
256
+
257
+ Returns:
258
+ Path to generated preview video file
259
+ """
260
+ self.logger.info("Generating preview video with lyrics overlay")
261
+ output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
262
+
263
+ # Check input files exist before running FFmpeg
264
+ if not os.path.isfile(ass_path):
265
+ raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
266
+ if not os.path.isfile(audio_path):
267
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
268
+
269
+ try:
270
+ # Create a temporary copy of the ASS file with a unique filename
271
+ import time
272
+
273
+ safe_prefix = "".join(c if c.isalnum() else "_" for c in output_prefix)
274
+ timestamp = int(time.time() * 1000)
275
+ temp_ass_path = os.path.join(self.cache_dir, f"temp_preview_subtitles_{safe_prefix}_{timestamp}.ass")
276
+ import shutil
277
+
278
+ shutil.copy2(ass_path, temp_ass_path)
279
+ self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
280
+
281
+ cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
282
+ self._run_ffmpeg_command(cmd)
283
+ self.logger.info(f"Preview video generated: {output_path}")
284
+
285
+ # Clean up temporary file
286
+ if os.path.exists(temp_ass_path):
287
+ os.remove(temp_ass_path)
288
+ return output_path
289
+
290
+ except Exception as e:
291
+ self.logger.error(f"Failed to generate preview video: {str(e)}")
292
+ # Clean up temporary file in case of error
293
+ if "temp_ass_path" in locals() and os.path.exists(temp_ass_path):
294
+ try:
295
+ os.remove(temp_ass_path)
296
+ except:
297
+ pass
298
+ raise
299
+
300
+ def _get_output_path(self, output_prefix: str, extension: str) -> str:
301
+ """Generate full output path for a file."""
302
+ return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
303
+
304
+ def _resize_background_image(self, input_path: str) -> str:
305
+ """Resize background image to match target resolution and save to temp file."""
306
+ target_width, target_height = self.video_resolution
307
+
308
+ # Get current image dimensions using ffprobe
309
+ try:
310
+ probe_cmd = [
311
+ "ffprobe",
312
+ "-v",
313
+ "error",
314
+ "-select_streams",
315
+ "v:0",
316
+ "-show_entries",
317
+ "stream=width,height",
318
+ "-of",
319
+ "json",
320
+ input_path,
321
+ ]
322
+ probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
323
+ probe_data = json.loads(probe_output)
324
+ current_width = probe_data["streams"][0]["width"]
325
+ current_height = probe_data["streams"][0]["height"]
326
+
327
+ # If dimensions already match, return original path
328
+ if current_width == target_width and current_height == target_height:
329
+ self.logger.debug("Background image already at target resolution")
330
+ return input_path
331
+
332
+ except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
333
+ self.logger.warning(f"Failed to get image dimensions: {e}")
334
+ # Continue with resize attempt if probe fails
335
+
336
+ temp_path = os.path.join(self.cache_dir, "resized_background.png")
337
+ cmd = [
338
+ "ffmpeg",
339
+ "-y",
340
+ "-i",
341
+ input_path,
342
+ "-vf",
343
+ f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
344
+ f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2",
345
+ temp_path,
346
+ ]
347
+
348
+ try:
349
+ subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
350
+ return temp_path
351
+ except subprocess.CalledProcessError as e:
352
+ self.logger.error(f"Failed to resize background image: {e.output}")
353
+ raise
354
+
355
+ def _build_ass_filter(self, ass_path: str) -> str:
356
+ """Build ASS filter with font directory support."""
357
+ ass_filter = f"ass={ass_path}"
358
+
359
+ # Get font path from styles configuration
360
+ karaoke_styles = self.styles.get("karaoke", {})
361
+ font_path = karaoke_styles.get("font_path")
362
+
363
+ if font_path and os.path.isfile(font_path):
364
+ font_dir = os.path.dirname(font_path)
365
+ ass_filter += f":fontsdir={font_dir}"
366
+ self.logger.info(f"Returning ASS filter with fonts dir: {ass_filter}")
367
+
368
+ return ass_filter
369
+
370
+ def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
371
+ """Build FFmpeg command for video generation with hardware acceleration when available."""
372
+ width, height = self.video_resolution
373
+
374
+ cmd = [
375
+ "ffmpeg",
376
+ "-hide_banner",
377
+ "-loglevel", "error",
378
+ "-r", "30", # Set frame rate to 30 fps
379
+ ]
380
+
381
+ # Add hardware acceleration flags if available
382
+ cmd.extend(self.hwaccel_flags)
383
+
384
+ # Input source (background)
385
+ if self.background_image:
386
+ # Resize background image first
387
+ resized_bg = self._resize_background_image(self.background_image)
388
+ self.logger.debug(f"Using resized background image: {resized_bg}")
389
+ cmd.extend([
390
+ "-loop", "1", # Loop the image
391
+ "-i", resized_bg,
392
+ ])
393
+ else:
394
+ self.logger.debug(
395
+ f"Using solid {self.background_color} background "
396
+ f"with resolution: {width}x{height}"
397
+ )
398
+ cmd.extend([
399
+ "-f", "lavfi",
400
+ "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
401
+ ])
402
+
403
+ cmd.extend([
404
+ "-i", audio_path,
405
+ "-c:a", "flac", # Re-encode audio as FLAC
406
+ "-vf", self._build_ass_filter(ass_path), # Add subtitles with font directories
407
+ "-c:v", self.video_encoder,
408
+ ])
409
+
410
+ # Add encoder-specific settings
411
+ if self.nvenc_available:
412
+ # NVENC settings optimized for subtitle content
413
+ cmd.extend(self.get_nvenc_settings("high", is_preview=False))
414
+ # Use higher bitrate for NVENC as it's more efficient
415
+ cmd.extend([
416
+ "-b:v", "8000k", # Higher base bitrate for NVENC
417
+ "-maxrate", "15000k", # Reasonable max for 4K
418
+ "-bufsize", "16000k", # Buffer size
419
+ ])
420
+ self.logger.debug("Using NVENC encoding for high-quality video generation")
421
+ else:
422
+ # Software encoding fallback settings
423
+ cmd.extend([
424
+ "-preset", "fast", # Better compression efficiency
425
+ "-b:v", "5000k", # Base video bitrate
426
+ "-minrate", "5000k", # Minimum bitrate
427
+ "-maxrate", "20000k", # Maximum bitrate
428
+ "-bufsize", "10000k", # Buffer size (2x base rate)
429
+ ])
430
+ self.logger.debug("Using software encoding for video generation")
431
+
432
+ cmd.extend([
433
+ "-shortest", # End encoding after shortest stream
434
+ "-y", # Overwrite output without asking
435
+ ])
436
+
437
+ # Add output path
438
+ cmd.append(output_path)
439
+
440
+ return cmd
441
+
442
+ def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
443
+ """Build FFmpeg command for preview video generation with hardware acceleration when available."""
444
+ # Use even lower resolution for preview (480x270 instead of 640x360 for faster encoding)
445
+ width, height = 480, 270
446
+
447
+ cmd = [
448
+ "ffmpeg",
449
+ "-hide_banner",
450
+ "-loglevel", "error",
451
+ "-r", "24", # Reduced frame rate to 24 fps for faster encoding
452
+ ]
453
+
454
+ # Add hardware acceleration flags if available
455
+ cmd.extend(self.hwaccel_flags)
456
+
457
+ # Input source (background) - simplified for preview
458
+ if self.background_image:
459
+ # For preview, use the original image without resizing to save time
460
+ self.logger.debug(f"Using original background image for preview: {self.background_image}")
461
+ cmd.extend([
462
+ "-loop", "1", # Loop the image
463
+ "-i", self.background_image,
464
+ ])
465
+ # Build video filter with scaling and ASS subtitles
466
+ video_filter = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,{self._build_ass_filter(ass_path)}"
467
+ else:
468
+ self.logger.debug(
469
+ f"Using solid {self.background_color} background "
470
+ f"with resolution: {width}x{height}"
471
+ )
472
+ cmd.extend([
473
+ "-f", "lavfi",
474
+ "-i", f"color=c={self.background_color}:s={width}x{height}:r=24",
475
+ ])
476
+ # Build video filter with just ASS subtitles (no scaling needed)
477
+ video_filter = self._build_ass_filter(ass_path)
478
+
479
+ cmd.extend([
480
+ "-i", audio_path,
481
+ "-vf", video_filter, # Apply the video filter
482
+ "-c:a", "aac", # Use AAC for audio compatibility
483
+ "-b:a", "96k", # Reduced audio bitrate for faster encoding
484
+ "-c:v", self.video_encoder,
485
+ ])
486
+
487
+ # Add encoder-specific settings for preview with maximum speed priority
488
+ if self.nvenc_available:
489
+ # NVENC settings optimized for maximum speed
490
+ cmd.extend([
491
+ "-preset", "p1", # Fastest NVENC preset
492
+ "-tune", "ll", # Low latency
493
+ "-rc", "cbr", # Constant bitrate for speed
494
+ "-b:v", "800k", # Lower bitrate for speed
495
+ "-profile:v", "baseline", # Most compatible profile
496
+ "-level", "3.1", # Lower level for speed
497
+ ])
498
+ self.logger.debug("Using NVENC encoding with maximum speed settings for preview video generation")
499
+ else:
500
+ # Software encoding with maximum speed priority
501
+ cmd.extend([
502
+ "-profile:v", "baseline", # Most compatible H.264 profile
503
+ "-level", "3.0", # Compatibility level
504
+ "-preset", "superfast", # Even faster than ultrafast for preview
505
+ "-tune", "fastdecode", # Optimize for fast decoding
506
+ "-b:v", "600k", # Lower base bitrate for speed
507
+ "-maxrate", "800k", # Lower max bitrate
508
+ "-bufsize", "1200k", # Smaller buffer size
509
+ "-crf", "28", # Higher CRF for faster encoding (lower quality but faster)
510
+ ])
511
+ self.logger.debug("Using software encoding with maximum speed settings for preview video generation")
512
+
513
+ cmd.extend([
514
+ "-pix_fmt", "yuv420p", # Required for browser compatibility
515
+ "-movflags", "+faststart+frag_keyframe+empty_moov+dash", # Enhanced streaming with dash for faster start
516
+ "-g", "48", # Keyframe every 48 frames (2 seconds at 24fps) - fewer keyframes for speed
517
+ "-keyint_min", "48", # Minimum keyframe interval
518
+ "-sc_threshold", "0", # Disable scene change detection for speed
519
+ "-threads", "0", # Use all available CPU threads
520
+ "-shortest", # End encoding after shortest stream
521
+ "-y" # Overwrite output without asking
522
+ ])
523
+
524
+ # Add output path
525
+ cmd.append(output_path)
526
+
527
+ return cmd
528
+
529
+ def _get_video_codec(self) -> str:
530
+ """Determine the best available video codec (legacy method - use video_encoder instead)."""
531
+ # This method is kept for backwards compatibility but is deprecated
532
+ # The new hardware acceleration system uses self.video_encoder instead
533
+ self.logger.warning("_get_video_codec is deprecated, use self.video_encoder instead")
534
+ return self.video_encoder
535
+
536
+ def _run_ffmpeg_command(self, cmd: List[str]) -> None:
537
+ """Execute FFmpeg command with output handling."""
538
+ self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
539
+ try:
540
+ output = subprocess.check_output(cmd, universal_newlines=True, stderr=subprocess.STDOUT)
541
+ self.logger.debug(f"FFmpeg output: {output}")
542
+ except subprocess.CalledProcessError as e:
543
+ self.logger.error(f"FFmpeg error: {e.output}")
544
+ raise
File without changes