karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1815 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.23.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.23.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,610 @@
1
+ Metadata-Version: 2.4
2
+ Name: karaoke-gen
3
+ Version: 0.71.23
4
+ Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Andrew Beveridge
8
+ Author-email: andrew@beveridge.uk
9
+ Requires-Python: >=3.10,<3.14
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: argparse (>=1.4.0)
17
+ Requires-Dist: attrs (>=24.2.0)
18
+ Requires-Dist: audio-separator[cpu] (>=0.34.0)
19
+ Requires-Dist: beautifulsoup4 (>=4)
20
+ Requires-Dist: cattrs (>=24.1.2)
21
+ Requires-Dist: dropbox (>=12)
22
+ Requires-Dist: fastapi (>=0.104.0)
23
+ Requires-Dist: fetch-lyrics-from-genius (>=0.1)
24
+ Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
25
+ Requires-Dist: flacfetch (>=0.3)
26
+ Requires-Dist: fonttools (>=4.55)
27
+ Requires-Dist: google-api-python-client
28
+ Requires-Dist: google-auth
29
+ Requires-Dist: google-auth-httplib2
30
+ Requires-Dist: google-auth-oauthlib
31
+ Requires-Dist: google-cloud-firestore (>=2.14.0)
32
+ Requires-Dist: google-cloud-secret-manager (>=2.18.0)
33
+ Requires-Dist: google-cloud-storage (>=2.14.0)
34
+ Requires-Dist: httpx (>=0.25.0)
35
+ Requires-Dist: jiwer (>=3.0.0)
36
+ Requires-Dist: karaoke-lyrics-processor (>=0.6)
37
+ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
38
+ Requires-Dist: langchain (>=0.3.0)
39
+ Requires-Dist: langchain-anthropic (>=0.2.0)
40
+ Requires-Dist: langchain-core (>=0.3.0)
41
+ Requires-Dist: langchain-ollama (>=0.2.0)
42
+ Requires-Dist: langchain-openai (>=0.2.0)
43
+ Requires-Dist: langfuse (>=3.0.0)
44
+ Requires-Dist: langgraph (>=0.2.0)
45
+ Requires-Dist: lyrics-converter (>=0.2.1)
46
+ Requires-Dist: lyricsgenius (>=3)
47
+ Requires-Dist: matplotlib (>=3)
48
+ Requires-Dist: metaphone (>=0.6)
49
+ Requires-Dist: nltk (>=3.9)
50
+ Requires-Dist: numpy (>=2)
51
+ Requires-Dist: ollama (>=0.4.7)
52
+ Requires-Dist: openai (>=1.63.2)
53
+ Requires-Dist: opentelemetry-api (>=1.20.0)
54
+ Requires-Dist: opentelemetry-exporter-gcp-trace (>=1.6.0)
55
+ Requires-Dist: opentelemetry-instrumentation-fastapi (>=0.41b0)
56
+ Requires-Dist: opentelemetry-instrumentation-httpx (>=0.41b0)
57
+ Requires-Dist: opentelemetry-instrumentation-logging (>=0.41b0)
58
+ Requires-Dist: opentelemetry-resourcedetector-gcp (>=1.6.0a0)
59
+ Requires-Dist: opentelemetry-sdk (>=1.20.0)
60
+ Requires-Dist: pillow (>=10.1)
61
+ Requires-Dist: psutil (>=7.0.0,<8.0.0)
62
+ Requires-Dist: pydantic (>=2.5.0)
63
+ Requires-Dist: pydantic-settings (>=2.1.0)
64
+ Requires-Dist: pydub (>=0.25.1)
65
+ Requires-Dist: pyinstaller (>=6.3)
66
+ Requires-Dist: pyperclip
67
+ Requires-Dist: pytest-asyncio
68
+ Requires-Dist: python-dotenv (>=1.0.0)
69
+ Requires-Dist: python-levenshtein (>=0.26)
70
+ Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
71
+ Requires-Dist: python-slugify (>=8)
72
+ Requires-Dist: requests (>=2)
73
+ Requires-Dist: shortuuid (>=1.0.13)
74
+ Requires-Dist: spacy (>=3.8.7)
75
+ Requires-Dist: spacy-syllables (>=3)
76
+ Requires-Dist: srsly (>=2.5.1)
77
+ Requires-Dist: syllables (>=1)
78
+ Requires-Dist: syrics (>=0)
79
+ Requires-Dist: thefuzz (>=0.22)
80
+ Requires-Dist: toml (>=0.10)
81
+ Requires-Dist: torch (>=2.7)
82
+ Requires-Dist: tqdm (>=4.67)
83
+ Requires-Dist: transformers (>=4.47)
84
+ Requires-Dist: uvicorn[standard] (>=0.24.0)
85
+ Project-URL: Documentation, https://github.com/nomadkaraoke/karaoke-gen/blob/main/README.md
86
+ Project-URL: Homepage, https://github.com/nomadkaraoke/karaoke-gen
87
+ Project-URL: Repository, https://github.com/nomadkaraoke/karaoke-gen
88
+ Description-Content-Type: text/markdown
89
+
90
+ # Karaoke Generator 🎶 🎥 🚀
91
+
92
+ ![PyPI - Version](https://img.shields.io/pypi/v/karaoke-gen)
93
+ ![Python Version](https://img.shields.io/badge/python-3.10+-blue)
94
+ ![Tests](https://github.com/nomadkaraoke/karaoke-gen/workflows/Test%20and%20Publish/badge.svg)
95
+ ![Test Coverage](https://codecov.io/gh/nomadkaraoke/karaoke-gen/branch/main/graph/badge.svg)
96
+
97
+ Generate professional karaoke videos with instrumental audio and synchronized lyrics. Available as a **local CLI** (`karaoke-gen`) or **cloud-based CLI** (`karaoke-gen-remote`) that offloads processing to Google Cloud.
98
+
99
+ ## ✨ Two Ways to Generate Karaoke
100
+
101
+ ### 1. Local CLI (`karaoke-gen`)
102
+ Run all processing locally on your machine. Requires GPU for optimal audio separation performance.
103
+
104
+ ```bash
105
+ karaoke-gen "ABBA" "Waterloo"
106
+ ```
107
+
108
+ ### 2. Remote CLI (`karaoke-gen-remote`)
109
+ Offload all processing to a cloud backend. No GPU required - just authenticate and submit jobs.
110
+
111
+ ```bash
112
+ karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
113
+ ```
114
+
115
+ Both CLIs produce identical outputs: 4K karaoke videos, CDG+MP3 packages, audio stems, and more.
116
+
117
+ ---
118
+
119
+ ## 🎯 Features
120
+
121
+ ### Core Pipeline
122
+ - **Audio Separation**: AI-powered vocal/instrumental separation using MDX and Demucs models
123
+ - **Lyrics Transcription**: Word-level timestamps via AudioShake API
124
+ - **Lyrics Correction**: Match transcription against online lyrics (Genius, Spotify, Musixmatch)
125
+ - **Human Review**: Interactive UI for correcting lyrics before final render
126
+ - **Video Rendering**: High-quality 4K karaoke videos with customizable styles
127
+ - **Multiple Outputs**: MP4 (4K lossless/lossy, 720p), MKV, CDG+MP3, TXT+MP3
128
+
129
+ ### Distribution Features
130
+ - **YouTube Upload**: Automatic upload to your YouTube channel
131
+ - **Dropbox Integration**: Organize output in brand-coded folders
132
+ - **Google Drive**: Upload to public share folders
133
+ - **Discord Notifications**: Webhook notifications on completion
134
+
135
+ ---
136
+
137
+ ## 📦 Installation
138
+
139
+ ```bash
140
+ pip install karaoke-gen
141
+ ```
142
+
143
+ This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
144
+
145
+ ### Requirements
146
+ - Python 3.10+
147
+ - FFmpeg
148
+ - For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
149
+
150
+ ---
151
+
152
+ ## 🖥️ Local CLI (`karaoke-gen`)
153
+
154
+ ### Basic Usage
155
+
156
+ ```bash
157
+ # Generate from local audio file
158
+ karaoke-gen ./song.mp3 "Artist Name" "Song Title"
159
+
160
+ # Search and download audio automatically
161
+ karaoke-gen "Rick Astley" "Never Gonna Give You Up"
162
+
163
+ # Process from YouTube URL
164
+ karaoke-gen "https://www.youtube.com/watch?v=dQw4w9WgXcQ" "Rick Astley" "Never Gonna Give You Up"
165
+ ```
166
+
167
+ ### Remote Audio Separation (Optional)
168
+
169
+ Offload just the GPU-intensive audio separation to Modal.com while keeping other processing local:
170
+
171
+ ```bash
172
+ export AUDIO_SEPARATOR_API_URL="https://USERNAME--audio-separator-api.modal.run"
173
+ karaoke-gen "Artist" "Title"
174
+ ```
175
+
176
+ ### Key Options
177
+
178
+ ```bash
179
+ # Custom styling
180
+ karaoke-gen --style_params_json="./styles.json" "Artist" "Title"
181
+
182
+ # Generate CDG and TXT packages
183
+ karaoke-gen --enable_cdg --enable_txt "Artist" "Title"
184
+
185
+ # YouTube upload
186
+ karaoke-gen --enable_youtube_upload --youtube_description_file="./desc.txt" "Artist" "Title"
187
+
188
+ # Full production run
189
+ karaoke-gen \
190
+ --style_params_json="./branding.json" \
191
+ --enable_cdg \
192
+ --enable_txt \
193
+ --brand_prefix="BRAND" \
194
+ --enable_youtube_upload \
195
+ --youtube_description_file="./description.txt" \
196
+ "Artist" "Title"
197
+ ```
198
+
199
+ ### Full Options Reference
200
+
201
+ ```bash
202
+ karaoke-gen --help
203
+ ```
204
+
205
+ ---
206
+
207
+ ## ☁️ Remote CLI (`karaoke-gen-remote`)
208
+
209
+ The remote CLI submits jobs to a Google Cloud backend that handles all processing. You don't need a GPU or any audio processing libraries installed locally.
210
+
211
+ ### Setup
212
+
213
+ 1. **Set the backend URL:**
214
+ ```bash
215
+ export KARAOKE_GEN_URL="https://api.nomadkaraoke.com" # Or your own backend
216
+ ```
217
+
218
+ 2. **Authenticate with Google Cloud:**
219
+ ```bash
220
+ gcloud auth login
221
+ ```
222
+
223
+ ### Basic Usage
224
+
225
+ ```bash
226
+ # Submit a job
227
+ karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
228
+
229
+ # The CLI will:
230
+ # 1. Upload your audio file
231
+ # 2. Monitor processing progress
232
+ # 3. Open lyrics review UI when ready
233
+ # 4. Prompt for instrumental selection
234
+ # 5. Download all outputs when complete
235
+ ```
236
+
237
+ ### Job Management
238
+
239
+ ```bash
240
+ # List all jobs
241
+ karaoke-gen-remote --list
242
+
243
+ # Resume monitoring an existing job
244
+ karaoke-gen-remote --resume abc12345
245
+
246
+ # Cancel a running job
247
+ karaoke-gen-remote --cancel abc12345
248
+
249
+ # Delete a job and its files
250
+ karaoke-gen-remote --delete abc12345
251
+ ```
252
+
253
+ ### Full Production Run
254
+
255
+ ```bash
256
+ karaoke-gen-remote \
257
+ --style_params_json="./karaoke-styles.json" \
258
+ --enable_cdg \
259
+ --enable_txt \
260
+ --brand_prefix=NOMAD \
261
+ --enable_youtube_upload \
262
+ --youtube_description_file="./youtube-description.txt" \
263
+ ./song.flac "Artist" "Title"
264
+ ```
265
+
266
+ ### Environment Variables
267
+
268
+ | Variable | Description | Default |
269
+ |----------|-------------|---------|
270
+ | `KARAOKE_GEN_URL` | Backend service URL | Required |
271
+ | `KARAOKE_GEN_AUTH_TOKEN` | Admin auth token (for protected endpoints) | Optional |
272
+ | `REVIEW_UI_URL` | Lyrics review UI URL | `https://lyrics.nomadkaraoke.com` |
273
+ | `POLL_INTERVAL` | Seconds between status polls | `5` |
274
+
275
+ ### Authentication
276
+
277
+ The backend uses token-based authentication for admin operations (bulk delete, internal worker triggers). For basic job submission and monitoring, authentication is optional.
278
+
279
+ **For admin access:**
280
+ ```bash
281
+ export KARAOKE_GEN_AUTH_TOKEN="your-admin-token"
282
+ ```
283
+
284
+ The token must match one of the tokens configured in the backend's `ADMIN_TOKENS` environment variable.
285
+
286
+ ### Non-Interactive Mode
287
+
288
+ For automated/CI usage:
289
+
290
+ ```bash
291
+ karaoke-gen-remote -y ./song.flac "Artist" "Title"
292
+ ```
293
+
294
+ The `-y` flag auto-accepts default corrections and selects clean instrumental.
295
+
296
+ ---
297
+
298
+ ## 🎨 Style Configuration
299
+
300
+ Create a `styles.json` file to customize the karaoke video appearance:
301
+
302
+ ```json
303
+ {
304
+ "intro": {
305
+ "video_duration": 5,
306
+ "background_image": "/path/to/title-background.png",
307
+ "font": "/path/to/Font.ttf",
308
+ "artist_color": "#ffdf6b",
309
+ "title_color": "#ffffff"
310
+ },
311
+ "karaoke": {
312
+ "background_image": "/path/to/karaoke-background.png",
313
+ "font_path": "/path/to/Font.ttf"
314
+ },
315
+ "end": {
316
+ "background_image": "/path/to/end-background.png"
317
+ },
318
+ "cdg": {
319
+ "font_path": "/path/to/Font.ttf",
320
+ "instrumental_background": "/path/to/cdg-background.png"
321
+ }
322
+ }
323
+ ```
324
+
325
+ When using `karaoke-gen-remote`, all referenced files are automatically uploaded with your job.
326
+
327
+ ---
328
+
329
+ ## 📤 Output Files
330
+
331
+ A completed job produces:
332
+
333
+ ```
334
+ BRAND-1234 - Artist - Title/
335
+ ├── Artist - Title (Final Karaoke Lossless 4k).mp4 # ProRes 4K
336
+ ├── Artist - Title (Final Karaoke Lossless 4k).mkv # FLAC audio 4K
337
+ ├── Artist - Title (Final Karaoke Lossy 4k).mp4 # H.264 4K
338
+ ├── Artist - Title (Final Karaoke Lossy 720p).mp4 # H.264 720p
339
+ ├── Artist - Title (Final Karaoke CDG).zip # CDG+MP3 package
340
+ ├── Artist - Title (Final Karaoke TXT).zip # TXT+MP3 package
341
+ ├── Artist - Title (Karaoke).cdg # Individual CDG
342
+ ├── Artist - Title (Karaoke).mp3 # Karaoke audio
343
+ ├── Artist - Title (Karaoke).lrc # LRC lyrics
344
+ ├── Artist - Title (Karaoke).ass # ASS subtitles
345
+ ├── Artist - Title (Title).mov # Title screen video
346
+ ├── Artist - Title (End).mov # End screen video
347
+ ├── Artist - Title (Instrumental...).flac # Clean instrumental
348
+ ├── Artist - Title (Instrumental +BV...).flac # With backing vocals
349
+ └── stems/ # All audio stems
350
+ ├── ...Vocals....flac
351
+ ├── ...Bass....flac
352
+ ├── ...Drums....flac
353
+ └── ...
354
+ ```
355
+
356
+ ---
357
+
358
+ ## 🏗️ Deploy Your Own Backend
359
+
360
+ The cloud backend runs on Google Cloud Platform using:
361
+ - **Cloud Run**: Serverless API hosting
362
+ - **Firestore**: Job state management
363
+ - **Cloud Storage**: File uploads and outputs
364
+ - **Modal.com**: GPU-accelerated audio separation
365
+ - **AudioShake**: Lyrics transcription API
366
+
367
+ ### Prerequisites
368
+
369
+ - Google Cloud account with billing enabled
370
+ - [Pulumi CLI](https://www.pulumi.com/docs/install/)
371
+ - Modal.com account (for audio separation)
372
+ - AudioShake API key
373
+
374
+ ### Infrastructure Setup
375
+
376
+ ```bash
377
+ cd infrastructure
378
+
379
+ # Install dependencies
380
+ pip install -r requirements.txt
381
+
382
+ # Login to Pulumi
383
+ pulumi login
384
+
385
+ # Create a stack
386
+ pulumi stack init prod
387
+
388
+ # Configure GCP project
389
+ pulumi config set gcp:project your-project-id
390
+ pulumi config set gcp:region us-central1
391
+
392
+ # Deploy infrastructure
393
+ pulumi up
394
+ ```
395
+
396
+ This creates:
397
+ - Firestore database
398
+ - Cloud Storage bucket
399
+ - Artifact Registry
400
+ - Service account with IAM roles
401
+ - Secret Manager secrets (you add values)
402
+
403
+ ### Add Secret Values
404
+
405
+ ```bash
406
+ # AudioShake API key
407
+ echo -n "your-audioshake-key" | gcloud secrets versions add audioshake-api-key --data-file=-
408
+
409
+ # Genius API key
410
+ echo -n "your-genius-key" | gcloud secrets versions add genius-api-key --data-file=-
411
+
412
+ # Modal API URL
413
+ echo -n "https://your-modal-url" | gcloud secrets versions add audio-separator-api-url --data-file=-
414
+
415
+ # YouTube OAuth credentials (JSON)
416
+ gcloud secrets versions add youtube-oauth-credentials --data-file=./youtube-creds.json
417
+
418
+ # Dropbox OAuth credentials (JSON)
419
+ gcloud secrets versions add dropbox-oauth-credentials --data-file=./dropbox-creds.json
420
+
421
+ # Google Drive service account (JSON)
422
+ gcloud secrets versions add gdrive-service-account --data-file=./gdrive-sa.json
423
+ ```
424
+
425
+ ### Deploy Cloud Run
426
+
427
+ ```bash
428
+ # Build and deploy
429
+ gcloud builds submit --config=cloudbuild.yaml
430
+
431
+ # Get outputs from Pulumi
432
+ SA_EMAIL=$(pulumi stack output service_account_email)
433
+ BUCKET_NAME=$(pulumi stack output bucket_name)
434
+
435
+ # Deploy Cloud Run service
436
+ gcloud run deploy karaoke-backend \
437
+ --image us-central1-docker.pkg.dev/YOUR-PROJECT/karaoke-repo/karaoke-backend:latest \
438
+ --platform managed \
439
+ --region us-central1 \
440
+ --allow-unauthenticated \
441
+ --service-account $SA_EMAIL \
442
+ --memory 2Gi \
443
+ --cpu 2 \
444
+ --timeout 600 \
445
+ --set-env-vars="GOOGLE_CLOUD_PROJECT=YOUR-PROJECT,GCS_BUCKET_NAME=$BUCKET_NAME"
446
+ ```
447
+
448
+ ### Point CLI to Your Backend
449
+
450
+ ```bash
451
+ export KARAOKE_GEN_URL="https://your-backend.run.app"
452
+ karaoke-gen-remote ./song.flac "Artist" "Title"
453
+ ```
454
+
455
+ ---
456
+
457
+ ## 🔌 Backend API Reference
458
+
459
+ The backend exposes a REST API for job management.
460
+
461
+ ### Job Submission
462
+
463
+ **POST** `/api/jobs/upload`
464
+
465
+ Submit a new karaoke generation job with audio file and options.
466
+
467
+ ```bash
468
+ curl -X POST "https://api.example.com/api/jobs/upload" \
469
+ -F "file=@song.flac" \
470
+ -F "artist=ABBA" \
471
+ -F "title=Waterloo" \
472
+ -F "enable_cdg=true" \
473
+ -F "enable_txt=true" \
474
+ -F "brand_prefix=NOMAD" \
475
+ -F "style_params=@styles.json" \
476
+ -F "style_karaoke_background=@background.png"
477
+ ```
478
+
479
+ ### Job Status
480
+
481
+ **GET** `/api/jobs/{job_id}`
482
+
483
+ Get job status and details.
484
+
485
+ ```bash
486
+ curl "https://api.example.com/api/jobs/abc12345"
487
+ ```
488
+
489
+ ### List Jobs
490
+
491
+ **GET** `/api/jobs`
492
+
493
+ List all jobs with optional status filter.
494
+
495
+ ```bash
496
+ curl "https://api.example.com/api/jobs?status=complete&limit=10"
497
+ ```
498
+
499
+ ### Cancel Job
500
+
501
+ **POST** `/api/jobs/{job_id}/cancel`
502
+
503
+ Cancel a running job.
504
+
505
+ ```bash
506
+ curl -X POST "https://api.example.com/api/jobs/abc12345/cancel" \
507
+ -H "Content-Type: application/json" \
508
+ -d '{"reason": "User cancelled"}'
509
+ ```
510
+
511
+ ### Delete Job
512
+
513
+ **DELETE** `/api/jobs/{job_id}`
514
+
515
+ Delete a job and its files.
516
+
517
+ ```bash
518
+ curl -X DELETE "https://api.example.com/api/jobs/abc12345?delete_files=true"
519
+ ```
520
+
521
+ ### Lyrics Review
522
+
523
+ **GET** `/api/review/{job_id}/correction-data`
524
+
525
+ Get correction data for lyrics review.
526
+
527
+ **POST** `/api/review/{job_id}/complete`
528
+
529
+ Submit corrected lyrics and trigger video rendering.
530
+
531
+ ### Instrumental Selection
532
+
533
+ **GET** `/api/jobs/{job_id}/instrumental-options`
534
+
535
+ Get available instrumental options.
536
+
537
+ **POST** `/api/jobs/{job_id}/select-instrumental`
538
+
539
+ Submit instrumental selection (clean or with_backing).
540
+
541
+ ```bash
542
+ curl -X POST "https://api.example.com/api/jobs/abc12345/select-instrumental" \
543
+ -H "Content-Type: application/json" \
544
+ -d '{"selection": "clean"}'
545
+ ```
546
+
547
+ ### Download Files
548
+
549
+ **GET** `/api/jobs/{job_id}/download-urls`
550
+
551
+ Get download URLs for all output files.
552
+
553
+ **GET** `/api/jobs/{job_id}/download/{category}/{file_key}`
554
+
555
+ Stream download a specific file.
556
+
557
+ ### Health Check
558
+
559
+ **GET** `/api/health`
560
+
561
+ Check backend health status.
562
+
563
+ ---
564
+
565
+ ## 🧪 Development
566
+
567
+ ### Running Tests
568
+
569
+ ```bash
570
+ # Run all tests
571
+ pytest tests/ backend/tests/ -v
572
+
573
+ # Run only unit tests
574
+ pytest tests/unit/ -v
575
+
576
+ # Run with coverage
577
+ pytest tests/unit/ -v --cov=karaoke_gen --cov-report=term-missing
578
+ ```
579
+
580
+ ### Project Structure
581
+
582
+ ```
583
+ karaoke-gen/
584
+ ├── karaoke_gen/ # Core CLI package
585
+ │ ├── utils/
586
+ │ │ ├── gen_cli.py # Local CLI (karaoke-gen)
587
+ │ │ └── remote_cli.py # Remote CLI (karaoke-gen-remote)
588
+ │ ├── karaoke_finalise/ # Video encoding, packaging, distribution
589
+ │ └── style_loader.py # Unified style configuration
590
+ ├── backend/ # Cloud backend (FastAPI)
591
+ │ ├── api/routes/ # API endpoints
592
+ │ ├── workers/ # Background processing workers
593
+ │ └── services/ # Business logic services
594
+ ├── infrastructure/ # Pulumi IaC for GCP
595
+ ├── docs/ # Documentation
596
+ └── tests/ # Test suite
597
+ ```
598
+
599
+ ---
600
+
601
+ ## 📄 License
602
+
603
+ MIT
604
+
605
+ ---
606
+
607
+ ## 🤝 Contributing
608
+
609
+ Contributions are welcome! Please see our contributing guidelines.
610
+