karaoke-gen 0.75.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of karaoke-gen might be problematic. Click here for more details.
- karaoke_gen/__init__.py +38 -0
- karaoke_gen/audio_fetcher.py +1614 -0
- karaoke_gen/audio_processor.py +790 -0
- karaoke_gen/config.py +83 -0
- karaoke_gen/file_handler.py +387 -0
- karaoke_gen/instrumental_review/__init__.py +45 -0
- karaoke_gen/instrumental_review/analyzer.py +408 -0
- karaoke_gen/instrumental_review/editor.py +322 -0
- karaoke_gen/instrumental_review/models.py +171 -0
- karaoke_gen/instrumental_review/server.py +475 -0
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/instrumental_review/waveform.py +409 -0
- karaoke_gen/karaoke_finalise/__init__.py +1 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
- karaoke_gen/karaoke_gen.py +1026 -0
- karaoke_gen/lyrics_processor.py +474 -0
- karaoke_gen/metadata.py +160 -0
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
- karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
- karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/__init__.py +18 -0
- karaoke_gen/utils/bulk_cli.py +492 -0
- karaoke_gen/utils/cli_args.py +432 -0
- karaoke_gen/utils/gen_cli.py +978 -0
- karaoke_gen/utils/remote_cli.py +3268 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen/video_generator.py +424 -0
- karaoke_gen-0.75.54.dist-info/METADATA +718 -0
- karaoke_gen-0.75.54.dist-info/RECORD +287 -0
- karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
- karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
- karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +594 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +919 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +214 -0
- lyrics_transcriber/frontend/src/api.ts +254 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +306 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +379 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +650 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: karaoke-gen
|
|
3
|
+
Version: 0.75.54
|
|
4
|
+
Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Andrew Beveridge
|
|
8
|
+
Author-email: andrew@beveridge.uk
|
|
9
|
+
Requires-Python: >=3.10,<3.14
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: argparse (>=1.4.0)
|
|
17
|
+
Requires-Dist: attrs (>=24.2.0)
|
|
18
|
+
Requires-Dist: audio-separator[cpu] (>=0.34.0)
|
|
19
|
+
Requires-Dist: beautifulsoup4 (>=4)
|
|
20
|
+
Requires-Dist: cattrs (>=24.1.2)
|
|
21
|
+
Requires-Dist: dropbox (>=12)
|
|
22
|
+
Requires-Dist: fastapi (>=0.104.0)
|
|
23
|
+
Requires-Dist: fetch-lyrics-from-genius (>=0.1)
|
|
24
|
+
Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
|
|
25
|
+
Requires-Dist: flacfetch (>=0.9.0)
|
|
26
|
+
Requires-Dist: fonttools (>=4.55)
|
|
27
|
+
Requires-Dist: google-api-python-client
|
|
28
|
+
Requires-Dist: google-auth
|
|
29
|
+
Requires-Dist: google-auth-httplib2
|
|
30
|
+
Requires-Dist: google-auth-oauthlib
|
|
31
|
+
Requires-Dist: google-cloud-firestore (>=2.14.0)
|
|
32
|
+
Requires-Dist: google-cloud-run (>=0.10.0)
|
|
33
|
+
Requires-Dist: google-cloud-secret-manager (>=2.18.0)
|
|
34
|
+
Requires-Dist: google-cloud-storage (>=2.14.0)
|
|
35
|
+
Requires-Dist: google-cloud-tasks (>=2.16.0)
|
|
36
|
+
Requires-Dist: httpx (>=0.25.0)
|
|
37
|
+
Requires-Dist: jiwer (>=3.0.0)
|
|
38
|
+
Requires-Dist: karaoke-lyrics-processor (>=0.6)
|
|
39
|
+
Requires-Dist: kbputils (>=0.0.16,<0.0.17)
|
|
40
|
+
Requires-Dist: langchain (>=0.3.0)
|
|
41
|
+
Requires-Dist: langchain-anthropic (>=0.2.0)
|
|
42
|
+
Requires-Dist: langchain-core (>=0.3.0)
|
|
43
|
+
Requires-Dist: langchain-ollama (>=0.2.0)
|
|
44
|
+
Requires-Dist: langchain-openai (>=0.2.0)
|
|
45
|
+
Requires-Dist: langfuse (>=3.0.0)
|
|
46
|
+
Requires-Dist: langgraph (>=0.2.0)
|
|
47
|
+
Requires-Dist: lyrics-converter (>=0.2.1)
|
|
48
|
+
Requires-Dist: lyricsgenius (>=3)
|
|
49
|
+
Requires-Dist: matplotlib (>=3)
|
|
50
|
+
Requires-Dist: metaphone (>=0.6)
|
|
51
|
+
Requires-Dist: mutagen (>=1.47)
|
|
52
|
+
Requires-Dist: nest-asyncio (>=1.5)
|
|
53
|
+
Requires-Dist: nltk (>=3.9)
|
|
54
|
+
Requires-Dist: numpy (>=2)
|
|
55
|
+
Requires-Dist: ollama (>=0.4.7)
|
|
56
|
+
Requires-Dist: openai (>=1.63.2)
|
|
57
|
+
Requires-Dist: opentelemetry-api (>=1.20.0)
|
|
58
|
+
Requires-Dist: opentelemetry-exporter-gcp-trace (>=1.6.0)
|
|
59
|
+
Requires-Dist: opentelemetry-instrumentation-fastapi (>=0.41b0)
|
|
60
|
+
Requires-Dist: opentelemetry-instrumentation-httpx (>=0.41b0)
|
|
61
|
+
Requires-Dist: opentelemetry-instrumentation-logging (>=0.41b0)
|
|
62
|
+
Requires-Dist: opentelemetry-resourcedetector-gcp (>=1.6.0a0)
|
|
63
|
+
Requires-Dist: opentelemetry-sdk (>=1.20.0)
|
|
64
|
+
Requires-Dist: pillow (>=10.1)
|
|
65
|
+
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
|
66
|
+
Requires-Dist: pydantic (>=2.5.0)
|
|
67
|
+
Requires-Dist: pydantic-settings (>=2.1.0)
|
|
68
|
+
Requires-Dist: pydub (>=0.25.1)
|
|
69
|
+
Requires-Dist: pyinstaller (>=6.3)
|
|
70
|
+
Requires-Dist: pyperclip
|
|
71
|
+
Requires-Dist: pytest-asyncio
|
|
72
|
+
Requires-Dist: python-dotenv (>=1.0.0)
|
|
73
|
+
Requires-Dist: python-levenshtein (>=0.26)
|
|
74
|
+
Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
|
|
75
|
+
Requires-Dist: python-slugify (>=8)
|
|
76
|
+
Requires-Dist: requests (>=2)
|
|
77
|
+
Requires-Dist: shortuuid (>=1.0.13)
|
|
78
|
+
Requires-Dist: spacy (>=3.8.7)
|
|
79
|
+
Requires-Dist: spacy-syllables (>=3)
|
|
80
|
+
Requires-Dist: srsly (>=2.5.1)
|
|
81
|
+
Requires-Dist: syllables (>=1)
|
|
82
|
+
Requires-Dist: syrics (>=0)
|
|
83
|
+
Requires-Dist: thefuzz (>=0.22)
|
|
84
|
+
Requires-Dist: toml (>=0.10)
|
|
85
|
+
Requires-Dist: torch (>=2.7)
|
|
86
|
+
Requires-Dist: tqdm (>=4.67)
|
|
87
|
+
Requires-Dist: transformers (>=4.47)
|
|
88
|
+
Requires-Dist: uvicorn[standard] (>=0.24.0)
|
|
89
|
+
Requires-Dist: yt-dlp (>=2024.0.0)
|
|
90
|
+
Project-URL: Documentation, https://github.com/nomadkaraoke/karaoke-gen/blob/main/README.md
|
|
91
|
+
Project-URL: Homepage, https://github.com/nomadkaraoke/karaoke-gen
|
|
92
|
+
Project-URL: Repository, https://github.com/nomadkaraoke/karaoke-gen
|
|
93
|
+
Description-Content-Type: text/markdown
|
|
94
|
+
|
|
95
|
+
# Karaoke Generator ๐ถ ๐ฅ ๐
|
|
96
|
+
|
|
97
|
+

|
|
98
|
+

|
|
99
|
+

|
|
100
|
+

|
|
101
|
+
|
|
102
|
+
Generate professional karaoke videos with instrumental audio and synchronized lyrics. Available as a **local CLI** (`karaoke-gen`) or **cloud-based CLI** (`karaoke-gen-remote`) that offloads processing to Google Cloud.
|
|
103
|
+
|
|
104
|
+
## โจ Two Ways to Generate Karaoke
|
|
105
|
+
|
|
106
|
+
### 1. Local CLI (`karaoke-gen`)
|
|
107
|
+
Run all processing locally on your machine. Requires GPU for optimal audio separation performance.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
karaoke-gen "ABBA" "Waterloo"
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### 2. Remote CLI (`karaoke-gen-remote`)
|
|
114
|
+
Offload all processing to a cloud backend. No GPU required - just authenticate and submit jobs.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Both CLIs produce identical outputs: 4K karaoke videos, CDG+MP3 packages, audio stems, and more.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## ๐ฏ Features
|
|
125
|
+
|
|
126
|
+
### Core Pipeline
|
|
127
|
+
- **Audio Separation**: AI-powered vocal/instrumental separation using MDX and Demucs models
|
|
128
|
+
- **Lyrics Transcription**: Word-level timestamps via AudioShake API
|
|
129
|
+
- **Lyrics Correction**: Match transcription against online lyrics (Genius, Spotify, Musixmatch)
|
|
130
|
+
- **Human Review**: Interactive UI for correcting lyrics before final render
|
|
131
|
+
- **Video Rendering**: High-quality 4K karaoke videos with customizable styles
|
|
132
|
+
- **Multiple Outputs**: MP4 (4K lossless/lossy, 720p), MKV, CDG+MP3, TXT+MP3
|
|
133
|
+
|
|
134
|
+
### Distribution Features
|
|
135
|
+
- **YouTube Upload**: Automatic upload to your YouTube channel
|
|
136
|
+
- **Dropbox Integration**: Organize output in brand-coded folders
|
|
137
|
+
- **Google Drive**: Upload to public share folders
|
|
138
|
+
- **Discord Notifications**: Webhook notifications on completion
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## ๐ฆ Installation
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
pip install karaoke-gen
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
|
|
149
|
+
|
|
150
|
+
### Requirements
|
|
151
|
+
- Python 3.10-3.13
|
|
152
|
+
- FFmpeg
|
|
153
|
+
- For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
|
|
154
|
+
|
|
155
|
+
### Transcription Provider Setup
|
|
156
|
+
|
|
157
|
+
**Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
|
|
158
|
+
|
|
159
|
+
#### Option 1: AudioShake (Recommended)
|
|
160
|
+
Commercial service with high-quality transcription. Best for production use.
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
|
|
167
|
+
|
|
168
|
+
#### Option 2: Whisper via RunPod
|
|
169
|
+
Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
export RUNPOD_API_KEY="your_runpod_key"
|
|
173
|
+
export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
|
|
177
|
+
|
|
178
|
+
#### Without Transcription (Instrumental Only)
|
|
179
|
+
If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
This creates an instrumental-only karaoke video without lyrics overlay.
|
|
186
|
+
|
|
187
|
+
> **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## ๐ฅ๏ธ Local CLI (`karaoke-gen`)
|
|
192
|
+
|
|
193
|
+
### Basic Usage
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
# Generate from local audio file
|
|
197
|
+
karaoke-gen ./song.mp3 "Artist Name" "Song Title"
|
|
198
|
+
|
|
199
|
+
# Search and download audio automatically
|
|
200
|
+
karaoke-gen "Rick Astley" "Never Gonna Give You Up"
|
|
201
|
+
|
|
202
|
+
# Process from YouTube URL
|
|
203
|
+
karaoke-gen "https://www.youtube.com/watch?v=dQw4w9WgXcQ" "Rick Astley" "Never Gonna Give You Up"
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Remote Audio Separation (Optional)
|
|
207
|
+
|
|
208
|
+
Offload just the GPU-intensive audio separation to Modal.com while keeping other processing local:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
export AUDIO_SEPARATOR_API_URL="https://USERNAME--audio-separator-api.modal.run"
|
|
212
|
+
karaoke-gen "Artist" "Title"
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Key Options
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Custom styling
|
|
219
|
+
karaoke-gen --style_params_json="./styles.json" "Artist" "Title"
|
|
220
|
+
|
|
221
|
+
# Generate CDG and TXT packages
|
|
222
|
+
karaoke-gen --enable_cdg --enable_txt "Artist" "Title"
|
|
223
|
+
|
|
224
|
+
# YouTube upload
|
|
225
|
+
karaoke-gen --enable_youtube_upload --youtube_description_file="./desc.txt" "Artist" "Title"
|
|
226
|
+
|
|
227
|
+
# Full production run
|
|
228
|
+
karaoke-gen \
|
|
229
|
+
--style_params_json="./branding.json" \
|
|
230
|
+
--enable_cdg \
|
|
231
|
+
--enable_txt \
|
|
232
|
+
--brand_prefix="BRAND" \
|
|
233
|
+
--enable_youtube_upload \
|
|
234
|
+
--youtube_description_file="./description.txt" \
|
|
235
|
+
"Artist" "Title"
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Full Options Reference
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
karaoke-gen --help
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## โ๏ธ Remote CLI (`karaoke-gen-remote`)
|
|
247
|
+
|
|
248
|
+
The remote CLI submits jobs to a Google Cloud backend that handles all processing. You don't need a GPU or any audio processing libraries installed locally.
|
|
249
|
+
|
|
250
|
+
### Setup
|
|
251
|
+
|
|
252
|
+
1. **Set the backend URL:**
|
|
253
|
+
```bash
|
|
254
|
+
export KARAOKE_GEN_URL="https://api.nomadkaraoke.com" # Or your own backend
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
2. **Authenticate with Google Cloud:**
|
|
258
|
+
```bash
|
|
259
|
+
gcloud auth login
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Basic Usage
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
# Submit a job
|
|
266
|
+
karaoke-gen-remote ./song.flac "ABBA" "Waterloo"
|
|
267
|
+
|
|
268
|
+
# The CLI will:
|
|
269
|
+
# 1. Upload your audio file
|
|
270
|
+
# 2. Monitor processing progress
|
|
271
|
+
# 3. Open lyrics review UI when ready
|
|
272
|
+
# 4. Prompt for instrumental selection
|
|
273
|
+
# 5. Download all outputs when complete
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Job Management
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
# List all jobs
|
|
280
|
+
karaoke-gen-remote --list
|
|
281
|
+
|
|
282
|
+
# Resume monitoring an existing job
|
|
283
|
+
karaoke-gen-remote --resume abc12345
|
|
284
|
+
|
|
285
|
+
# Cancel a running job
|
|
286
|
+
karaoke-gen-remote --cancel abc12345
|
|
287
|
+
|
|
288
|
+
# Delete a job and its files
|
|
289
|
+
karaoke-gen-remote --delete abc12345
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Full Production Run
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
karaoke-gen-remote \
|
|
296
|
+
--style_params_json="./karaoke-styles.json" \
|
|
297
|
+
--enable_cdg \
|
|
298
|
+
--enable_txt \
|
|
299
|
+
--brand_prefix=NOMAD \
|
|
300
|
+
--enable_youtube_upload \
|
|
301
|
+
--youtube_description_file="./youtube-description.txt" \
|
|
302
|
+
./song.flac "Artist" "Title"
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Environment Variables
|
|
306
|
+
|
|
307
|
+
| Variable | Description | Default |
|
|
308
|
+
|----------|-------------|---------|
|
|
309
|
+
| `KARAOKE_GEN_URL` | Backend service URL | Required |
|
|
310
|
+
| `KARAOKE_GEN_AUTH_TOKEN` | Admin auth token (for protected endpoints) | Optional |
|
|
311
|
+
| `REVIEW_UI_URL` | Lyrics review UI URL | `https://lyrics.nomadkaraoke.com` |
|
|
312
|
+
| `POLL_INTERVAL` | Seconds between status polls | `5` |
|
|
313
|
+
|
|
314
|
+
**Note:** The `REVIEW_UI_URL` defaults to the hosted lyrics review UI. For local development, set it to `http://localhost:5173` if you're running the frontend dev server.
|
|
315
|
+
|
|
316
|
+
### Authentication
|
|
317
|
+
|
|
318
|
+
The backend uses token-based authentication for admin operations (bulk delete, internal worker triggers). For basic job submission and monitoring, authentication is optional.
|
|
319
|
+
|
|
320
|
+
**For admin access:**
|
|
321
|
+
```bash
|
|
322
|
+
export KARAOKE_GEN_AUTH_TOKEN="your-admin-token"
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
The token must match one of the tokens configured in the backend's `ADMIN_TOKENS` environment variable.
|
|
326
|
+
|
|
327
|
+
### Non-Interactive Mode
|
|
328
|
+
|
|
329
|
+
For automated/CI usage:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
karaoke-gen-remote -y ./song.flac "Artist" "Title"
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
The `-y` flag auto-accepts default corrections and selects clean instrumental.
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## ๐จ Style Configuration
|
|
340
|
+
|
|
341
|
+
Create a `styles.json` file to customize the karaoke video appearance:
|
|
342
|
+
|
|
343
|
+
```json
|
|
344
|
+
{
|
|
345
|
+
"intro": {
|
|
346
|
+
"video_duration": 5,
|
|
347
|
+
"background_image": "/path/to/title-background.png",
|
|
348
|
+
"font": "/path/to/Font.ttf",
|
|
349
|
+
"artist_color": "#ffdf6b",
|
|
350
|
+
"title_color": "#ffffff"
|
|
351
|
+
},
|
|
352
|
+
"karaoke": {
|
|
353
|
+
"background_image": "/path/to/karaoke-background.png",
|
|
354
|
+
"font_path": "/path/to/Font.ttf"
|
|
355
|
+
},
|
|
356
|
+
"end": {
|
|
357
|
+
"background_image": "/path/to/end-background.png"
|
|
358
|
+
},
|
|
359
|
+
"cdg": {
|
|
360
|
+
"font_path": "/path/to/Font.ttf",
|
|
361
|
+
"instrumental_background": "/path/to/cdg-background.png"
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
When using `karaoke-gen-remote`, all referenced files are automatically uploaded with your job.
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## ๐ค Output Files
|
|
371
|
+
|
|
372
|
+
A completed job produces:
|
|
373
|
+
|
|
374
|
+
```
|
|
375
|
+
BRAND-1234 - Artist - Title/
|
|
376
|
+
โโโ Artist - Title (Final Karaoke Lossless 4k).mp4 # ProRes 4K
|
|
377
|
+
โโโ Artist - Title (Final Karaoke Lossless 4k).mkv # FLAC audio 4K
|
|
378
|
+
โโโ Artist - Title (Final Karaoke Lossy 4k).mp4 # H.264 4K
|
|
379
|
+
โโโ Artist - Title (Final Karaoke Lossy 720p).mp4 # H.264 720p
|
|
380
|
+
โโโ Artist - Title (Final Karaoke CDG).zip # CDG+MP3 package
|
|
381
|
+
โโโ Artist - Title (Final Karaoke TXT).zip # TXT+MP3 package
|
|
382
|
+
โโโ Artist - Title (Karaoke).cdg # Individual CDG
|
|
383
|
+
โโโ Artist - Title (Karaoke).mp3 # Karaoke audio
|
|
384
|
+
โโโ Artist - Title (Karaoke).lrc # LRC lyrics
|
|
385
|
+
โโโ Artist - Title (Karaoke).ass # ASS subtitles
|
|
386
|
+
โโโ Artist - Title (Title).mov # Title screen video
|
|
387
|
+
โโโ Artist - Title (End).mov # End screen video
|
|
388
|
+
โโโ Artist - Title (Instrumental...).flac # Clean instrumental
|
|
389
|
+
โโโ Artist - Title (Instrumental +BV...).flac # With backing vocals
|
|
390
|
+
โโโ stems/ # All audio stems
|
|
391
|
+
โโโ ...Vocals....flac
|
|
392
|
+
โโโ ...Bass....flac
|
|
393
|
+
โโโ ...Drums....flac
|
|
394
|
+
โโโ ...
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
## ๐๏ธ Deploy Your Own Backend
|
|
400
|
+
|
|
401
|
+
The cloud backend runs on Google Cloud Platform using:
|
|
402
|
+
- **Cloud Run**: Serverless API hosting
|
|
403
|
+
- **Firestore**: Job state management
|
|
404
|
+
- **Cloud Storage**: File uploads and outputs
|
|
405
|
+
- **Modal.com**: GPU-accelerated audio separation
|
|
406
|
+
- **AudioShake**: Lyrics transcription API
|
|
407
|
+
|
|
408
|
+
### Prerequisites
|
|
409
|
+
|
|
410
|
+
- Google Cloud account with billing enabled
|
|
411
|
+
- [Pulumi CLI](https://www.pulumi.com/docs/install/)
|
|
412
|
+
- Modal.com account (for audio separation)
|
|
413
|
+
- AudioShake API key
|
|
414
|
+
|
|
415
|
+
### Infrastructure Setup
|
|
416
|
+
|
|
417
|
+
```bash
|
|
418
|
+
cd infrastructure
|
|
419
|
+
|
|
420
|
+
# Install dependencies
|
|
421
|
+
pip install -r requirements.txt
|
|
422
|
+
|
|
423
|
+
# Login to Pulumi
|
|
424
|
+
pulumi login
|
|
425
|
+
|
|
426
|
+
# Create a stack
|
|
427
|
+
pulumi stack init prod
|
|
428
|
+
|
|
429
|
+
# Configure GCP project
|
|
430
|
+
pulumi config set gcp:project your-project-id
|
|
431
|
+
pulumi config set gcp:region us-central1
|
|
432
|
+
|
|
433
|
+
# Deploy infrastructure
|
|
434
|
+
pulumi up
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
This creates:
|
|
438
|
+
- Firestore database
|
|
439
|
+
- Cloud Storage bucket
|
|
440
|
+
- Artifact Registry
|
|
441
|
+
- Service account with IAM roles
|
|
442
|
+
- Secret Manager secrets (you add values)
|
|
443
|
+
|
|
444
|
+
### Add Secret Values
|
|
445
|
+
|
|
446
|
+
```bash
|
|
447
|
+
# AudioShake API key
|
|
448
|
+
echo -n "your-audioshake-key" | gcloud secrets versions add audioshake-api-key --data-file=-
|
|
449
|
+
|
|
450
|
+
# Genius API key
|
|
451
|
+
echo -n "your-genius-key" | gcloud secrets versions add genius-api-key --data-file=-
|
|
452
|
+
|
|
453
|
+
# Modal API URL
|
|
454
|
+
echo -n "https://your-modal-url" | gcloud secrets versions add audio-separator-api-url --data-file=-
|
|
455
|
+
|
|
456
|
+
# YouTube OAuth credentials (JSON)
|
|
457
|
+
gcloud secrets versions add youtube-oauth-credentials --data-file=./youtube-creds.json
|
|
458
|
+
|
|
459
|
+
# Dropbox OAuth credentials (JSON)
|
|
460
|
+
gcloud secrets versions add dropbox-oauth-credentials --data-file=./dropbox-creds.json
|
|
461
|
+
|
|
462
|
+
# Google Drive service account (JSON)
|
|
463
|
+
gcloud secrets versions add gdrive-service-account --data-file=./gdrive-sa.json
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
### Deploy Cloud Run
|
|
467
|
+
|
|
468
|
+
```bash
|
|
469
|
+
# Build and deploy
|
|
470
|
+
gcloud builds submit --config=cloudbuild.yaml
|
|
471
|
+
|
|
472
|
+
# Get outputs from Pulumi
|
|
473
|
+
SA_EMAIL=$(pulumi stack output service_account_email)
|
|
474
|
+
BUCKET_NAME=$(pulumi stack output bucket_name)
|
|
475
|
+
|
|
476
|
+
# Deploy Cloud Run service
|
|
477
|
+
gcloud run deploy karaoke-backend \
|
|
478
|
+
--image us-central1-docker.pkg.dev/YOUR-PROJECT/karaoke-repo/karaoke-backend:latest \
|
|
479
|
+
--platform managed \
|
|
480
|
+
--region us-central1 \
|
|
481
|
+
--allow-unauthenticated \
|
|
482
|
+
--service-account $SA_EMAIL \
|
|
483
|
+
--memory 2Gi \
|
|
484
|
+
--cpu 2 \
|
|
485
|
+
--timeout 600 \
|
|
486
|
+
--set-env-vars="GOOGLE_CLOUD_PROJECT=YOUR-PROJECT,GCS_BUCKET_NAME=$BUCKET_NAME"
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
### Point CLI to Your Backend
|
|
490
|
+
|
|
491
|
+
```bash
|
|
492
|
+
export KARAOKE_GEN_URL="https://your-backend.run.app"
|
|
493
|
+
karaoke-gen-remote ./song.flac "Artist" "Title"
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
---
|
|
497
|
+
|
|
498
|
+
## ๐ Backend API Reference
|
|
499
|
+
|
|
500
|
+
The backend exposes a REST API for job management.
|
|
501
|
+
|
|
502
|
+
### Job Submission
|
|
503
|
+
|
|
504
|
+
**POST** `/api/jobs/upload`
|
|
505
|
+
|
|
506
|
+
Submit a new karaoke generation job with audio file and options.
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
curl -X POST "https://api.example.com/api/jobs/upload" \
|
|
510
|
+
-F "file=@song.flac" \
|
|
511
|
+
-F "artist=ABBA" \
|
|
512
|
+
-F "title=Waterloo" \
|
|
513
|
+
-F "enable_cdg=true" \
|
|
514
|
+
-F "enable_txt=true" \
|
|
515
|
+
-F "brand_prefix=NOMAD" \
|
|
516
|
+
-F "style_params=@styles.json" \
|
|
517
|
+
-F "style_karaoke_background=@background.png"
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
### Job Status
|
|
521
|
+
|
|
522
|
+
**GET** `/api/jobs/{job_id}`
|
|
523
|
+
|
|
524
|
+
Get job status and details.
|
|
525
|
+
|
|
526
|
+
```bash
|
|
527
|
+
curl "https://api.example.com/api/jobs/abc12345"
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
### List Jobs
|
|
531
|
+
|
|
532
|
+
**GET** `/api/jobs`
|
|
533
|
+
|
|
534
|
+
List all jobs with optional status filter.
|
|
535
|
+
|
|
536
|
+
```bash
|
|
537
|
+
curl "https://api.example.com/api/jobs?status=complete&limit=10"
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
### Cancel Job
|
|
541
|
+
|
|
542
|
+
**POST** `/api/jobs/{job_id}/cancel`
|
|
543
|
+
|
|
544
|
+
Cancel a running job.
|
|
545
|
+
|
|
546
|
+
```bash
|
|
547
|
+
curl -X POST "https://api.example.com/api/jobs/abc12345/cancel" \
|
|
548
|
+
-H "Content-Type: application/json" \
|
|
549
|
+
-d '{"reason": "User cancelled"}'
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### Delete Job
|
|
553
|
+
|
|
554
|
+
**DELETE** `/api/jobs/{job_id}`
|
|
555
|
+
|
|
556
|
+
Delete a job and its files.
|
|
557
|
+
|
|
558
|
+
```bash
|
|
559
|
+
curl -X DELETE "https://api.example.com/api/jobs/abc12345?delete_files=true"
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
### Lyrics Review
|
|
563
|
+
|
|
564
|
+
**GET** `/api/review/{job_id}/correction-data`
|
|
565
|
+
|
|
566
|
+
Get correction data for lyrics review.
|
|
567
|
+
|
|
568
|
+
**POST** `/api/review/{job_id}/complete`
|
|
569
|
+
|
|
570
|
+
Submit corrected lyrics and trigger video rendering.
|
|
571
|
+
|
|
572
|
+
### Instrumental Selection
|
|
573
|
+
|
|
574
|
+
**GET** `/api/jobs/{job_id}/instrumental-options`
|
|
575
|
+
|
|
576
|
+
Get available instrumental options.
|
|
577
|
+
|
|
578
|
+
**POST** `/api/jobs/{job_id}/select-instrumental`
|
|
579
|
+
|
|
580
|
+
Submit instrumental selection (clean or with_backing).
|
|
581
|
+
|
|
582
|
+
```bash
|
|
583
|
+
curl -X POST "https://api.example.com/api/jobs/abc12345/select-instrumental" \
|
|
584
|
+
-H "Content-Type: application/json" \
|
|
585
|
+
-d '{"selection": "clean"}'
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
### Download Files
|
|
589
|
+
|
|
590
|
+
**GET** `/api/jobs/{job_id}/download-urls`
|
|
591
|
+
|
|
592
|
+
Get download URLs for all output files.
|
|
593
|
+
|
|
594
|
+
**GET** `/api/jobs/{job_id}/download/{category}/{file_key}`
|
|
595
|
+
|
|
596
|
+
Stream download a specific file.
|
|
597
|
+
|
|
598
|
+
### Health Check
|
|
599
|
+
|
|
600
|
+
**GET** `/api/health`
|
|
601
|
+
|
|
602
|
+
Check backend health status.
|
|
603
|
+
|
|
604
|
+
---
|
|
605
|
+
|
|
606
|
+
## ๐ง Troubleshooting
|
|
607
|
+
|
|
608
|
+
### "No suitable files found for processing"
|
|
609
|
+
|
|
610
|
+
This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
|
|
611
|
+
|
|
612
|
+
**Most common cause:** No transcription provider configured.
|
|
613
|
+
|
|
614
|
+
**Quick fix:**
|
|
615
|
+
1. Check if transcription providers are configured:
|
|
616
|
+
```bash
|
|
617
|
+
echo $AUDIOSHAKE_API_TOKEN
|
|
618
|
+
echo $RUNPOD_API_KEY
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
|
|
622
|
+
|
|
623
|
+
3. Or use `--skip-lyrics` for instrumental-only karaoke:
|
|
624
|
+
```bash
|
|
625
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
**Other causes:**
|
|
629
|
+
- Invalid API credentials - verify your tokens are correct and active
|
|
630
|
+
- API service unavailable - check service status pages
|
|
631
|
+
- Network connectivity issues - ensure you can reach the API endpoints
|
|
632
|
+
- Transcription timeout - try again or use a different provider
|
|
633
|
+
|
|
634
|
+
### Transcription Fails Silently
|
|
635
|
+
|
|
636
|
+
If karaoke-gen runs without errors but produces no synchronized lyrics:
|
|
637
|
+
|
|
638
|
+
1. **Check logs** - Run with `--log_level debug` for detailed output:
|
|
639
|
+
```bash
|
|
640
|
+
karaoke-gen --log_level debug "Artist" "Title"
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
2. **Verify environment variables** - Ensure API tokens are exported in your shell:
|
|
644
|
+
```bash
|
|
645
|
+
# Check if set
|
|
646
|
+
printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
|
|
647
|
+
|
|
648
|
+
# Set in current session
|
|
649
|
+
export AUDIOSHAKE_API_TOKEN="your_token"
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
3. **Test API connectivity** - Verify you can reach the transcription service
|
|
653
|
+
|
|
654
|
+
### "No lyrics found from any source"
|
|
655
|
+
|
|
656
|
+
This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
|
|
657
|
+
|
|
658
|
+
**To fix:**
|
|
659
|
+
- Set `GENIUS_API_TOKEN` for Genius lyrics
|
|
660
|
+
- Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
|
|
661
|
+
- Set `RAPIDAPI_KEY` for Musixmatch lyrics
|
|
662
|
+
- Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
|
|
663
|
+
|
|
664
|
+
### Video Quality Issues
|
|
665
|
+
|
|
666
|
+
If the output video has quality problems:
|
|
667
|
+
- Ensure FFmpeg is properly installed: `ffmpeg -version`
|
|
668
|
+
- Check available codecs: `ffmpeg -codecs`
|
|
669
|
+
- For 4K output, ensure sufficient disk space (10GB+ per track)
|
|
670
|
+
|
|
671
|
+
---
|
|
672
|
+
|
|
673
|
+
## ๐งช Development
|
|
674
|
+
|
|
675
|
+
### Running Tests
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
# Run all tests
|
|
679
|
+
pytest tests/ backend/tests/ -v
|
|
680
|
+
|
|
681
|
+
# Run only unit tests
|
|
682
|
+
pytest tests/unit/ -v
|
|
683
|
+
|
|
684
|
+
# Run with coverage
|
|
685
|
+
pytest tests/unit/ -v --cov=karaoke_gen --cov-report=term-missing
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
### Project Structure
|
|
689
|
+
|
|
690
|
+
```
|
|
691
|
+
karaoke-gen/
|
|
692
|
+
โโโ karaoke_gen/ # Core CLI package
|
|
693
|
+
โ โโโ utils/
|
|
694
|
+
โ โ โโโ gen_cli.py # Local CLI (karaoke-gen)
|
|
695
|
+
โ โ โโโ remote_cli.py # Remote CLI (karaoke-gen-remote)
|
|
696
|
+
โ โโโ karaoke_finalise/ # Video encoding, packaging, distribution
|
|
697
|
+
โ โโโ style_loader.py # Unified style configuration
|
|
698
|
+
โโโ backend/ # Cloud backend (FastAPI)
|
|
699
|
+
โ โโโ api/routes/ # API endpoints
|
|
700
|
+
โ โโโ workers/ # Background processing workers
|
|
701
|
+
โ โโโ services/ # Business logic services
|
|
702
|
+
โโโ infrastructure/ # Pulumi IaC for GCP
|
|
703
|
+
โโโ docs/ # Documentation
|
|
704
|
+
โโโ tests/ # Test suite
|
|
705
|
+
```
|
|
706
|
+
|
|
707
|
+
---
|
|
708
|
+
|
|
709
|
+
## ๐ License
|
|
710
|
+
|
|
711
|
+
MIT
|
|
712
|
+
|
|
713
|
+
---
|
|
714
|
+
|
|
715
|
+
## ๐ค Contributing
|
|
716
|
+
|
|
717
|
+
Contributions are welcome! Please see our contributing guidelines.
|
|
718
|
+
|