karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,718 @@
1
+ Metadata-Version: 2.4
2
+ Name: karaoke-gen
3
+ Version: 0.75.54
4
+ Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Andrew Beveridge
8
+ Author-email: andrew@beveridge.uk
9
+ Requires-Python: >=3.10,<3.14
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: argparse (>=1.4.0)
17
+ Requires-Dist: attrs (>=24.2.0)
18
+ Requires-Dist: audio-separator[cpu] (>=0.34.0)
19
+ Requires-Dist: beautifulsoup4 (>=4)
20
+ Requires-Dist: cattrs (>=24.1.2)
21
+ Requires-Dist: dropbox (>=12)
22
+ Requires-Dist: fastapi (>=0.104.0)
23
+ Requires-Dist: fetch-lyrics-from-genius (>=0.1)
24
+ Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
25
+ Requires-Dist: flacfetch (>=0.9.0)
26
+ Requires-Dist: fonttools (>=4.55)
27
+ Requires-Dist: google-api-python-client
28
+ Requires-Dist: google-auth
29
+ Requires-Dist: google-auth-httplib2
30
+ Requires-Dist: google-auth-oauthlib
31
+ Requires-Dist: google-cloud-firestore (>=2.14.0)
32
+ Requires-Dist: google-cloud-run (>=0.10.0)
33
+ Requires-Dist: google-cloud-secret-manager (>=2.18.0)
34
+ Requires-Dist: google-cloud-storage (>=2.14.0)
35
+ Requires-Dist: google-cloud-tasks (>=2.16.0)
36
+ Requires-Dist: httpx (>=0.25.0)
37
+ Requires-Dist: jiwer (>=3.0.0)
38
+ Requires-Dist: karaoke-lyrics-processor (>=0.6)
39
+ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
40
+ Requires-Dist: langchain (>=0.3.0)
41
+ Requires-Dist: langchain-anthropic (>=0.2.0)
42
+ Requires-Dist: langchain-core (>=0.3.0)
43
+ Requires-Dist: langchain-ollama (>=0.2.0)
44
+ Requires-Dist: langchain-openai (>=0.2.0)
45
+ Requires-Dist: langfuse (>=3.0.0)
46
+ Requires-Dist: langgraph (>=0.2.0)
47
+ Requires-Dist: lyrics-converter (>=0.2.1)
48
+ Requires-Dist: lyricsgenius (>=3)
49
+ Requires-Dist: matplotlib (>=3)
50
+ Requires-Dist: metaphone (>=0.6)
51
+ Requires-Dist: mutagen (>=1.47)
52
+ Requires-Dist: nest-asyncio (>=1.5)
53
+ Requires-Dist: nltk (>=3.9)
54
+ Requires-Dist: numpy (>=2)
55
+ Requires-Dist: ollama (>=0.4.7)
56
+ Requires-Dist: openai (>=1.63.2)
57
+ Requires-Dist: opentelemetry-api (>=1.20.0)
58
+ Requires-Dist: opentelemetry-exporter-gcp-trace (>=1.6.0)
59
+ Requires-Dist: opentelemetry-instrumentation-fastapi (>=0.41b0)
60
+ Requires-Dist: opentelemetry-instrumentation-httpx (>=0.41b0)
61
+ Requires-Dist: opentelemetry-instrumentation-logging (>=0.41b0)
62
+ Requires-Dist: opentelemetry-resourcedetector-gcp (>=1.6.0a0)
63
+ Requires-Dist: opentelemetry-sdk (>=1.20.0)
64
+ Requires-Dist: pillow (>=10.1)
65
+ Requires-Dist: psutil (>=7.0.0,<8.0.0)
66
+ Requires-Dist: pydantic (>=2.5.0)
67
+ Requires-Dist: pydantic-settings (>=2.1.0)
68
+ Requires-Dist: pydub (>=0.25.1)
69
+ Requires-Dist: pyinstaller (>=6.3)
70
+ Requires-Dist: pyperclip
71
+ Requires-Dist: pytest-asyncio
72
+ Requires-Dist: python-dotenv (>=1.0.0)
73
+ Requires-Dist: python-levenshtein (>=0.26)
74
+ Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
75
+ Requires-Dist: python-slugify (>=8)
76
+ Requires-Dist: requests (>=2)
77
+ Requires-Dist: shortuuid (>=1.0.13)
78
+ Requires-Dist: spacy (>=3.8.7)
79
+ Requires-Dist: spacy-syllables (>=3)
80
+ Requires-Dist: srsly (>=2.5.1)
81
+ Requires-Dist: syllables (>=1)
82
+ Requires-Dist: syrics (>=0)
83
+ Requires-Dist: thefuzz (>=0.22)
84
+ Requires-Dist: toml (>=0.10)
85
+ Requires-Dist: torch (>=2.7)
86
+ Requires-Dist: tqdm (>=4.67)
87
+ Requires-Dist: transformers (>=4.47)
88
+ Requires-Dist: uvicorn[standard] (>=0.24.0)
89
+ Requires-Dist: yt-dlp (>=2024.0.0)
90
+ Project-URL: Documentation, https://github.com/nomadkaraoke/karaoke-gen/blob/main/README.md
91
+ Project-URL: Homepage, https://github.com/nomadkaraoke/karaoke-gen
92
+ Project-URL: Repository, https://github.com/nomadkaraoke/karaoke-gen
93
+ Description-Content-Type: text/markdown
94
+
95
+ # Karaoke Generator ๐ŸŽถ ๐ŸŽฅ ๐Ÿš€
96
+
97
+ ![PyPI - Version](https://img.shields.io/pypi/v/karaoke-gen)
98
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/karaoke-gen)
99
+ ![Tests](https://github.com/nomadkaraoke/karaoke-gen/workflows/Test%20and%20Publish/badge.svg)
100
+ ![Test Coverage](https://codecov.io/gh/nomadkaraoke/karaoke-gen/branch/main/graph/badge.svg)
101
+
102
+ Generate professional karaoke videos with instrumental audio and synchronized lyrics. Available as a **local CLI** (`karaoke-gen`) or **cloud-based CLI** (`karaoke-gen-remote`) that offloads processing to Google Cloud.
103
+
104
+ ## โœจ Two Ways to Generate Karaoke
105
+
106
+ ### 1. Local CLI (`karaoke-gen`)
107
+ Run all processing locally on your machine. Requires GPU for optimal audio separation performance.
108
+
109
+ ```bash
110
+ karaoke-gen "ABBA" "Waterloo"
111
+ ```
112
+
113
+ ### 2. Remote CLI (`karaoke-gen-remote`)
114
+ Offload all processing to a cloud backend. No GPU required - just authenticate and submit jobs.
115
+
116
+ ```bash
117
+ karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
118
+ ```
119
+
120
+ Both CLIs produce identical outputs: 4K karaoke videos, CDG+MP3 packages, audio stems, and more.
121
+
122
+ ---
123
+
124
+ ## ๐ŸŽฏ Features
125
+
126
+ ### Core Pipeline
127
+ - **Audio Separation**: AI-powered vocal/instrumental separation using MDX and Demucs models
128
+ - **Lyrics Transcription**: Word-level timestamps via AudioShake API
129
+ - **Lyrics Correction**: Match transcription against online lyrics (Genius, Spotify, Musixmatch)
130
+ - **Human Review**: Interactive UI for correcting lyrics before final render
131
+ - **Video Rendering**: High-quality 4K karaoke videos with customizable styles
132
+ - **Multiple Outputs**: MP4 (4K lossless/lossy, 720p), MKV, CDG+MP3, TXT+MP3
133
+
134
+ ### Distribution Features
135
+ - **YouTube Upload**: Automatic upload to your YouTube channel
136
+ - **Dropbox Integration**: Organize output in brand-coded folders
137
+ - **Google Drive**: Upload to public share folders
138
+ - **Discord Notifications**: Webhook notifications on completion
139
+
140
+ ---
141
+
142
+ ## ๐Ÿ“ฆ Installation
143
+
144
+ ```bash
145
+ pip install karaoke-gen
146
+ ```
147
+
148
+ This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
149
+
150
+ ### Requirements
151
+ - Python 3.10-3.13
152
+ - FFmpeg
153
+ - For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
154
+
155
+ ### Transcription Provider Setup
156
+
157
+ **Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
158
+
159
+ #### Option 1: AudioShake (Recommended)
160
+ Commercial service with high-quality transcription. Best for production use.
161
+
162
+ ```bash
163
+ export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
164
+ ```
165
+
166
+ Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
167
+
168
+ #### Option 2: Whisper via RunPod
169
+ Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
170
+
171
+ ```bash
172
+ export RUNPOD_API_KEY="your_runpod_key"
173
+ export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
174
+ ```
175
+
176
+ Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
177
+
178
+ #### Without Transcription (Instrumental Only)
179
+ If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
180
+
181
+ ```bash
182
+ karaoke-gen --skip-lyrics "Artist" "Title"
183
+ ```
184
+
185
+ This creates an instrumental-only karaoke video without lyrics overlay.
186
+
187
+ > **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
188
+
189
+ ---
190
+
191
+ ## ๐Ÿ–ฅ๏ธ Local CLI (`karaoke-gen`)
192
+
193
+ ### Basic Usage
194
+
195
+ ```bash
196
+ # Generate from local audio file
197
+ karaoke-gen ./song.mp3 "Artist Name" "Song Title"
198
+
199
+ # Search and download audio automatically
200
+ karaoke-gen "Rick Astley" "Never Gonna Give You Up"
201
+
202
+ # Process from YouTube URL
203
+ karaoke-gen "https://www.youtube.com/watch?v=dQw4w9WgXcQ" "Rick Astley" "Never Gonna Give You Up"
204
+ ```
205
+
206
+ ### Remote Audio Separation (Optional)
207
+
208
+ Offload just the GPU-intensive audio separation to Modal.com while keeping other processing local:
209
+
210
+ ```bash
211
+ export AUDIO_SEPARATOR_API_URL="https://USERNAME--audio-separator-api.modal.run"
212
+ karaoke-gen "Artist" "Title"
213
+ ```
214
+
215
+ ### Key Options
216
+
217
+ ```bash
218
+ # Custom styling
219
+ karaoke-gen --style_params_json="./styles.json" "Artist" "Title"
220
+
221
+ # Generate CDG and TXT packages
222
+ karaoke-gen --enable_cdg --enable_txt "Artist" "Title"
223
+
224
+ # YouTube upload
225
+ karaoke-gen --enable_youtube_upload --youtube_description_file="./desc.txt" "Artist" "Title"
226
+
227
+ # Full production run
228
+ karaoke-gen \
229
+ --style_params_json="./branding.json" \
230
+ --enable_cdg \
231
+ --enable_txt \
232
+ --brand_prefix="BRAND" \
233
+ --enable_youtube_upload \
234
+ --youtube_description_file="./description.txt" \
235
+ "Artist" "Title"
236
+ ```
237
+
238
+ ### Full Options Reference
239
+
240
+ ```bash
241
+ karaoke-gen --help
242
+ ```
243
+
244
+ ---
245
+
246
+ ## โ˜๏ธ Remote CLI (`karaoke-gen-remote`)
247
+
248
+ The remote CLI submits jobs to a Google Cloud backend that handles all processing. You don't need a GPU or any audio processing libraries installed locally.
249
+
250
+ ### Setup
251
+
252
+ 1. **Set the backend URL:**
253
+ ```bash
254
+ export KARAOKE_GEN_URL="https://api.nomadkaraoke.com" # Or your own backend
255
+ ```
256
+
257
+ 2. **Authenticate with Google Cloud:**
258
+ ```bash
259
+ gcloud auth login
260
+ ```
261
+
262
+ ### Basic Usage
263
+
264
+ ```bash
265
+ # Submit a job
266
+ karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
267
+
268
+ # The CLI will:
269
+ # 1. Upload your audio file
270
+ # 2. Monitor processing progress
271
+ # 3. Open lyrics review UI when ready
272
+ # 4. Prompt for instrumental selection
273
+ # 5. Download all outputs when complete
274
+ ```
275
+
276
+ ### Job Management
277
+
278
+ ```bash
279
+ # List all jobs
280
+ karaoke-gen-remote --list
281
+
282
+ # Resume monitoring an existing job
283
+ karaoke-gen-remote --resume abc12345
284
+
285
+ # Cancel a running job
286
+ karaoke-gen-remote --cancel abc12345
287
+
288
+ # Delete a job and its files
289
+ karaoke-gen-remote --delete abc12345
290
+ ```
291
+
292
+ ### Full Production Run
293
+
294
+ ```bash
295
+ karaoke-gen-remote \
296
+ --style_params_json="./karaoke-styles.json" \
297
+ --enable_cdg \
298
+ --enable_txt \
299
+ --brand_prefix=NOMAD \
300
+ --enable_youtube_upload \
301
+ --youtube_description_file="./youtube-description.txt" \
302
+ ./song.flac "Artist" "Title"
303
+ ```
304
+
305
+ ### Environment Variables
306
+
307
+ | Variable | Description | Default |
308
+ |----------|-------------|---------|
309
+ | `KARAOKE_GEN_URL` | Backend service URL | Required |
310
+ | `KARAOKE_GEN_AUTH_TOKEN` | Admin auth token (for protected endpoints) | Optional |
311
+ | `REVIEW_UI_URL` | Lyrics review UI URL | `https://lyrics.nomadkaraoke.com` |
312
+ | `POLL_INTERVAL` | Seconds between status polls | `5` |
313
+
314
+ **Note:** The `REVIEW_UI_URL` defaults to the hosted lyrics review UI. For local development, set it to `http://localhost:5173` if you're running the frontend dev server.
315
+
316
+ ### Authentication
317
+
318
+ The backend uses token-based authentication for admin operations (bulk delete, internal worker triggers). For basic job submission and monitoring, authentication is optional.
319
+
320
+ **For admin access:**
321
+ ```bash
322
+ export KARAOKE_GEN_AUTH_TOKEN="your-admin-token"
323
+ ```
324
+
325
+ The token must match one of the tokens configured in the backend's `ADMIN_TOKENS` environment variable.
326
+
327
+ ### Non-Interactive Mode
328
+
329
+ For automated/CI usage:
330
+
331
+ ```bash
332
+ karaoke-gen-remote -y ./song.flac "Artist" "Title"
333
+ ```
334
+
335
+ The `-y` flag auto-accepts default corrections and selects clean instrumental.
336
+
337
+ ---
338
+
339
+ ## ๐ŸŽจ Style Configuration
340
+
341
+ Create a `styles.json` file to customize the karaoke video appearance:
342
+
343
+ ```json
344
+ {
345
+ "intro": {
346
+ "video_duration": 5,
347
+ "background_image": "/path/to/title-background.png",
348
+ "font": "/path/to/Font.ttf",
349
+ "artist_color": "#ffdf6b",
350
+ "title_color": "#ffffff"
351
+ },
352
+ "karaoke": {
353
+ "background_image": "/path/to/karaoke-background.png",
354
+ "font_path": "/path/to/Font.ttf"
355
+ },
356
+ "end": {
357
+ "background_image": "/path/to/end-background.png"
358
+ },
359
+ "cdg": {
360
+ "font_path": "/path/to/Font.ttf",
361
+ "instrumental_background": "/path/to/cdg-background.png"
362
+ }
363
+ }
364
+ ```
365
+
366
+ When using `karaoke-gen-remote`, all referenced files are automatically uploaded with your job.
367
+
368
+ ---
369
+
370
+ ## ๐Ÿ“ค Output Files
371
+
372
+ A completed job produces:
373
+
374
+ ```
375
+ BRAND-1234 - Artist - Title/
376
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke Lossless 4k).mp4 # ProRes 4K
377
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke Lossless 4k).mkv # FLAC audio 4K
378
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke Lossy 4k).mp4 # H.264 4K
379
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke Lossy 720p).mp4 # H.264 720p
380
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke CDG).zip # CDG+MP3 package
381
+ โ”œโ”€โ”€ Artist - Title (Final Karaoke TXT).zip # TXT+MP3 package
382
+ โ”œโ”€โ”€ Artist - Title (Karaoke).cdg # Individual CDG
383
+ โ”œโ”€โ”€ Artist - Title (Karaoke).mp3 # Karaoke audio
384
+ โ”œโ”€โ”€ Artist - Title (Karaoke).lrc # LRC lyrics
385
+ โ”œโ”€โ”€ Artist - Title (Karaoke).ass # ASS subtitles
386
+ โ”œโ”€โ”€ Artist - Title (Title).mov # Title screen video
387
+ โ”œโ”€โ”€ Artist - Title (End).mov # End screen video
388
+ โ”œโ”€โ”€ Artist - Title (Instrumental...).flac # Clean instrumental
389
+ โ”œโ”€โ”€ Artist - Title (Instrumental +BV...).flac # With backing vocals
390
+ โ””โ”€โ”€ stems/ # All audio stems
391
+ โ”œโ”€โ”€ ...Vocals....flac
392
+ โ”œโ”€โ”€ ...Bass....flac
393
+ โ”œโ”€โ”€ ...Drums....flac
394
+ โ””โ”€โ”€ ...
395
+ ```
396
+
397
+ ---
398
+
399
+ ## ๐Ÿ—๏ธ Deploy Your Own Backend
400
+
401
+ The cloud backend runs on Google Cloud Platform using:
402
+ - **Cloud Run**: Serverless API hosting
403
+ - **Firestore**: Job state management
404
+ - **Cloud Storage**: File uploads and outputs
405
+ - **Modal.com**: GPU-accelerated audio separation
406
+ - **AudioShake**: Lyrics transcription API
407
+
408
+ ### Prerequisites
409
+
410
+ - Google Cloud account with billing enabled
411
+ - [Pulumi CLI](https://www.pulumi.com/docs/install/)
412
+ - Modal.com account (for audio separation)
413
+ - AudioShake API key
414
+
415
+ ### Infrastructure Setup
416
+
417
+ ```bash
418
+ cd infrastructure
419
+
420
+ # Install dependencies
421
+ pip install -r requirements.txt
422
+
423
+ # Login to Pulumi
424
+ pulumi login
425
+
426
+ # Create a stack
427
+ pulumi stack init prod
428
+
429
+ # Configure GCP project
430
+ pulumi config set gcp:project your-project-id
431
+ pulumi config set gcp:region us-central1
432
+
433
+ # Deploy infrastructure
434
+ pulumi up
435
+ ```
436
+
437
+ This creates:
438
+ - Firestore database
439
+ - Cloud Storage bucket
440
+ - Artifact Registry
441
+ - Service account with IAM roles
442
+ - Secret Manager secrets (you add values)
443
+
444
+ ### Add Secret Values
445
+
446
+ ```bash
447
+ # AudioShake API key
448
+ echo -n "your-audioshake-key" | gcloud secrets versions add audioshake-api-key --data-file=-
449
+
450
+ # Genius API key
451
+ echo -n "your-genius-key" | gcloud secrets versions add genius-api-key --data-file=-
452
+
453
+ # Modal API URL
454
+ echo -n "https://your-modal-url" | gcloud secrets versions add audio-separator-api-url --data-file=-
455
+
456
+ # YouTube OAuth credentials (JSON)
457
+ gcloud secrets versions add youtube-oauth-credentials --data-file=./youtube-creds.json
458
+
459
+ # Dropbox OAuth credentials (JSON)
460
+ gcloud secrets versions add dropbox-oauth-credentials --data-file=./dropbox-creds.json
461
+
462
+ # Google Drive service account (JSON)
463
+ gcloud secrets versions add gdrive-service-account --data-file=./gdrive-sa.json
464
+ ```
465
+
466
+ ### Deploy Cloud Run
467
+
468
+ ```bash
469
+ # Build and deploy
470
+ gcloud builds submit --config=cloudbuild.yaml
471
+
472
+ # Get outputs from Pulumi
473
+ SA_EMAIL=$(pulumi stack output service_account_email)
474
+ BUCKET_NAME=$(pulumi stack output bucket_name)
475
+
476
+ # Deploy Cloud Run service
477
+ gcloud run deploy karaoke-backend \
478
+ --image us-central1-docker.pkg.dev/YOUR-PROJECT/karaoke-repo/karaoke-backend:latest \
479
+ --platform managed \
480
+ --region us-central1 \
481
+ --allow-unauthenticated \
482
+ --service-account $SA_EMAIL \
483
+ --memory 2Gi \
484
+ --cpu 2 \
485
+ --timeout 600 \
486
+ --set-env-vars="GOOGLE_CLOUD_PROJECT=YOUR-PROJECT,GCS_BUCKET_NAME=$BUCKET_NAME"
487
+ ```
488
+
489
+ ### Point CLI to Your Backend
490
+
491
+ ```bash
492
+ export KARAOKE_GEN_URL="https://your-backend.run.app"
493
+ karaoke-gen-remote ./song.flac "Artist" "Title"
494
+ ```
495
+
496
+ ---
497
+
498
+ ## ๐Ÿ”Œ Backend API Reference
499
+
500
+ The backend exposes a REST API for job management.
501
+
502
+ ### Job Submission
503
+
504
+ **POST** `/api/jobs/upload`
505
+
506
+ Submit a new karaoke generation job with audio file and options.
507
+
508
+ ```bash
509
+ curl -X POST "https://api.example.com/api/jobs/upload" \
510
+ -F "file=@song.flac" \
511
+ -F "artist=ABBA" \
512
+ -F "title=Waterloo" \
513
+ -F "enable_cdg=true" \
514
+ -F "enable_txt=true" \
515
+ -F "brand_prefix=NOMAD" \
516
+ -F "style_params=@styles.json" \
517
+ -F "style_karaoke_background=@background.png"
518
+ ```
519
+
520
+ ### Job Status
521
+
522
+ **GET** `/api/jobs/{job_id}`
523
+
524
+ Get job status and details.
525
+
526
+ ```bash
527
+ curl "https://api.example.com/api/jobs/abc12345"
528
+ ```
529
+
530
+ ### List Jobs
531
+
532
+ **GET** `/api/jobs`
533
+
534
+ List all jobs with optional status filter.
535
+
536
+ ```bash
537
+ curl "https://api.example.com/api/jobs?status=complete&limit=10"
538
+ ```
539
+
540
+ ### Cancel Job
541
+
542
+ **POST** `/api/jobs/{job_id}/cancel`
543
+
544
+ Cancel a running job.
545
+
546
+ ```bash
547
+ curl -X POST "https://api.example.com/api/jobs/abc12345/cancel" \
548
+ -H "Content-Type: application/json" \
549
+ -d '{"reason": "User cancelled"}'
550
+ ```
551
+
552
+ ### Delete Job
553
+
554
+ **DELETE** `/api/jobs/{job_id}`
555
+
556
+ Delete a job and its files.
557
+
558
+ ```bash
559
+ curl -X DELETE "https://api.example.com/api/jobs/abc12345?delete_files=true"
560
+ ```
561
+
562
+ ### Lyrics Review
563
+
564
+ **GET** `/api/review/{job_id}/correction-data`
565
+
566
+ Get correction data for lyrics review.
567
+
568
+ **POST** `/api/review/{job_id}/complete`
569
+
570
+ Submit corrected lyrics and trigger video rendering.
571
+
572
+ ### Instrumental Selection
573
+
574
+ **GET** `/api/jobs/{job_id}/instrumental-options`
575
+
576
+ Get available instrumental options.
577
+
578
+ **POST** `/api/jobs/{job_id}/select-instrumental`
579
+
580
+ Submit instrumental selection (clean or with_backing).
581
+
582
+ ```bash
583
+ curl -X POST "https://api.example.com/api/jobs/abc12345/select-instrumental" \
584
+ -H "Content-Type: application/json" \
585
+ -d '{"selection": "clean"}'
586
+ ```
587
+
588
+ ### Download Files
589
+
590
+ **GET** `/api/jobs/{job_id}/download-urls`
591
+
592
+ Get download URLs for all output files.
593
+
594
+ **GET** `/api/jobs/{job_id}/download/{category}/{file_key}`
595
+
596
+ Stream download a specific file.
597
+
598
+ ### Health Check
599
+
600
+ **GET** `/api/health`
601
+
602
+ Check backend health status.
603
+
604
+ ---
605
+
606
+ ## ๐Ÿ”ง Troubleshooting
607
+
608
+ ### "No suitable files found for processing"
609
+
610
+ This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
611
+
612
+ **Most common cause:** No transcription provider configured.
613
+
614
+ **Quick fix:**
615
+ 1. Check if transcription providers are configured:
616
+ ```bash
617
+ echo $AUDIOSHAKE_API_TOKEN
618
+ echo $RUNPOD_API_KEY
619
+ ```
620
+
621
+ 2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
622
+
623
+ 3. Or use `--skip-lyrics` for instrumental-only karaoke:
624
+ ```bash
625
+ karaoke-gen --skip-lyrics "Artist" "Title"
626
+ ```
627
+
628
+ **Other causes:**
629
+ - Invalid API credentials - verify your tokens are correct and active
630
+ - API service unavailable - check service status pages
631
+ - Network connectivity issues - ensure you can reach the API endpoints
632
+ - Transcription timeout - try again or use a different provider
633
+
634
+ ### Transcription Fails Silently
635
+
636
+ If karaoke-gen runs without errors but produces no synchronized lyrics:
637
+
638
+ 1. **Check logs** - Run with `--log_level debug` for detailed output:
639
+ ```bash
640
+ karaoke-gen --log_level debug "Artist" "Title"
641
+ ```
642
+
643
+ 2. **Verify environment variables** - Ensure API tokens are exported in your shell:
644
+ ```bash
645
+ # Check if set
646
+ printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
647
+
648
+ # Set in current session
649
+ export AUDIOSHAKE_API_TOKEN="your_token"
650
+ ```
651
+
652
+ 3. **Test API connectivity** - Verify you can reach the transcription service
653
+
654
+ ### "No lyrics found from any source"
655
+
656
+ This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
657
+
658
+ **To fix:**
659
+ - Set `GENIUS_API_TOKEN` for Genius lyrics
660
+ - Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
661
+ - Set `RAPIDAPI_KEY` for Musixmatch lyrics
662
+ - Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
663
+
664
+ ### Video Quality Issues
665
+
666
+ If the output video has quality problems:
667
+ - Ensure FFmpeg is properly installed: `ffmpeg -version`
668
+ - Check available codecs: `ffmpeg -codecs`
669
+ - For 4K output, ensure sufficient disk space (10GB+ per track)
670
+
671
+ ---
672
+
673
+ ## ๐Ÿงช Development
674
+
675
+ ### Running Tests
676
+
677
+ ```bash
678
+ # Run all tests
679
+ pytest tests/ backend/tests/ -v
680
+
681
+ # Run only unit tests
682
+ pytest tests/unit/ -v
683
+
684
+ # Run with coverage
685
+ pytest tests/unit/ -v --cov=karaoke_gen --cov-report=term-missing
686
+ ```
687
+
688
+ ### Project Structure
689
+
690
+ ```
691
+ karaoke-gen/
692
+ โ”œโ”€โ”€ karaoke_gen/ # Core CLI package
693
+ โ”‚ โ”œโ”€โ”€ utils/
694
+ โ”‚ โ”‚ โ”œโ”€โ”€ gen_cli.py # Local CLI (karaoke-gen)
695
+ โ”‚ โ”‚ โ””โ”€โ”€ remote_cli.py # Remote CLI (karaoke-gen-remote)
696
+ โ”‚ โ”œโ”€โ”€ karaoke_finalise/ # Video encoding, packaging, distribution
697
+ โ”‚ โ””โ”€โ”€ style_loader.py # Unified style configuration
698
+ โ”œโ”€โ”€ backend/ # Cloud backend (FastAPI)
699
+ โ”‚ โ”œโ”€โ”€ api/routes/ # API endpoints
700
+ โ”‚ โ”œโ”€โ”€ workers/ # Background processing workers
701
+ โ”‚ โ””โ”€โ”€ services/ # Business logic services
702
+ โ”œโ”€โ”€ infrastructure/ # Pulumi IaC for GCP
703
+ โ”œโ”€โ”€ docs/ # Documentation
704
+ โ””โ”€โ”€ tests/ # Test suite
705
+ ```
706
+
707
+ ---
708
+
709
+ ## ๐Ÿ“„ License
710
+
711
+ MIT
712
+
713
+ ---
714
+
715
+ ## ๐Ÿค Contributing
716
+
717
+ Contributions are welcome! Please see our contributing guidelines.
718
+