karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen/config.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ Configuration utilities for karaoke generation.
3
+
4
+ This module provides configuration loading and setup functions.
5
+ Style loading is delegated to the unified style_loader module.
6
+ """
7
+ import os
8
+ import sys
9
+ import logging
10
+
11
+ # Import from the unified style loader module
12
+ from .style_loader import (
13
+ # Re-export defaults for backwards compatibility
14
+ DEFAULT_STYLE_PARAMS,
15
+ DEFAULT_INTRO_STYLE as _DEFAULT_INTRO,
16
+ DEFAULT_END_STYLE as _DEFAULT_END,
17
+ # Functions
18
+ load_style_params_from_file,
19
+ apply_style_overrides,
20
+ get_intro_format as _get_intro_format,
21
+ get_end_format as _get_end_format,
22
+ get_video_durations,
23
+ get_existing_images,
24
+ )
25
+
26
+
27
+ def load_style_params(style_params_json, style_overrides, logger):
28
+ """
29
+ Loads style parameters from a JSON file or uses defaults.
30
+
31
+ This is the main entry point for the local CLI to load styles.
32
+
33
+ Args:
34
+ style_params_json: Path to style JSON file, or None for defaults.
35
+ style_overrides: Dict of "section.key" -> value overrides.
36
+ logger: Logger for messages.
37
+
38
+ Returns:
39
+ Dictionary of style parameters.
40
+ """
41
+ style_params = load_style_params_from_file(
42
+ style_params_json,
43
+ logger=logger,
44
+ exit_on_error=True,
45
+ )
46
+
47
+ if style_overrides:
48
+ apply_style_overrides(style_params, style_overrides, logger)
49
+
50
+ return style_params
51
+
52
+
53
+ def setup_title_format(style_params):
54
+ """
55
+ Sets up the title format dictionary from style parameters.
56
+
57
+ This is a thin wrapper around style_loader.get_intro_format()
58
+ for backwards compatibility.
59
+ """
60
+ return _get_intro_format(style_params)
61
+
62
+
63
+ def setup_end_format(style_params):
64
+ """
65
+ Sets up the end format dictionary from style parameters.
66
+
67
+ This is a thin wrapper around style_loader.get_end_format()
68
+ for backwards compatibility.
69
+ """
70
+ return _get_end_format(style_params)
71
+
72
+
73
+ def setup_ffmpeg_command(log_level):
74
+ """Sets up the base ffmpeg command string based on log level."""
75
+ # Path to the Windows PyInstaller frozen bundled ffmpeg.exe,
76
+ # or the system-installed FFmpeg binary on Mac/Linux
77
+ ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg.exe") if getattr(sys, "frozen", False) else "ffmpeg"
78
+ ffmpeg_base_command = f"{ffmpeg_path} -hide_banner -nostats"
79
+ if log_level == logging.DEBUG:
80
+ ffmpeg_base_command += " -loglevel verbose"
81
+ else:
82
+ ffmpeg_base_command += " -loglevel fatal"
83
+ return ffmpeg_base_command
@@ -0,0 +1,387 @@
1
+ import os
2
+ import glob
3
+ import logging
4
+ import shutil
5
+ import tempfile
6
+ from .utils import sanitize_filename
7
+
8
+ try:
9
+ import yt_dlp
10
+ YT_DLP_AVAILABLE = True
11
+ except ImportError:
12
+ YT_DLP_AVAILABLE = False
13
+
14
+
15
+ # Placeholder class or functions for file handling
16
+ class FileHandler:
17
+ def __init__(self, logger, ffmpeg_base_command, create_track_subfolders, dry_run):
18
+ self.logger = logger
19
+ self.ffmpeg_base_command = ffmpeg_base_command
20
+ self.create_track_subfolders = create_track_subfolders
21
+ self.dry_run = dry_run
22
+
23
+ def _file_exists(self, file_path):
24
+ """Check if a file exists and log the result."""
25
+ exists = os.path.isfile(file_path)
26
+ if exists:
27
+ self.logger.info(f"File already exists, skipping creation: {file_path}")
28
+ return exists
29
+
30
+ # Placeholder methods - to be filled by user moving code
31
+ def copy_input_media(self, input_media, output_filename_no_extension):
32
+ self.logger.debug(f"Copying media from local path {input_media} to filename {output_filename_no_extension} + existing extension")
33
+
34
+ copied_file_name = output_filename_no_extension + os.path.splitext(input_media)[1]
35
+ self.logger.debug(f"Target filename: {copied_file_name}")
36
+
37
+ # Check if source and destination are the same
38
+ if os.path.abspath(input_media) == os.path.abspath(copied_file_name):
39
+ self.logger.info("Source and destination are the same file, skipping copy")
40
+ return input_media
41
+
42
+ self.logger.debug(f"Copying {input_media} to {copied_file_name}")
43
+ shutil.copy2(input_media, copied_file_name)
44
+
45
+ return copied_file_name
46
+
47
+ def download_audio_from_fetcher_result(self, filepath, output_filename_no_extension):
48
+ """
49
+ Handle audio that was downloaded via the AudioFetcher.
50
+
51
+ This method copies/moves the downloaded file to the expected location
52
+ and returns the path with the correct naming convention.
53
+
54
+ Args:
55
+ filepath: Path to the downloaded audio file from AudioFetcher
56
+ output_filename_no_extension: Desired output filename without extension
57
+
58
+ Returns:
59
+ Path to the renamed/copied audio file
60
+ """
61
+ if not os.path.isfile(filepath):
62
+ self.logger.error(f"Downloaded file not found: {filepath}")
63
+ return None
64
+
65
+ # Get the extension from the downloaded file
66
+ ext = os.path.splitext(filepath)[1]
67
+ target_path = f"{output_filename_no_extension}{ext}"
68
+
69
+ # If source and target are the same, no action needed
70
+ if os.path.abspath(filepath) == os.path.abspath(target_path):
71
+ self.logger.debug(f"Downloaded file already at target location: {target_path}")
72
+ return target_path
73
+
74
+ # Copy the file to the target location
75
+ self.logger.debug(f"Copying downloaded file from {filepath} to {target_path}")
76
+ shutil.copy2(filepath, target_path)
77
+
78
+ return target_path
79
+
80
+ def download_video(self, url, output_filename_no_extension, cookies_str=None):
81
+ """
82
+ Download audio from a URL (YouTube, etc.) using yt-dlp.
83
+
84
+ This method downloads the best quality audio from a URL and saves it
85
+ to the specified output path. It handles YouTube and other video platforms
86
+ supported by yt-dlp.
87
+
88
+ Args:
89
+ url: URL to download from (YouTube, Vimeo, etc.)
90
+ output_filename_no_extension: Output filename without extension
91
+ cookies_str: Optional cookies string for authenticated downloads
92
+
93
+ Returns:
94
+ Path to downloaded audio file, or None if failed
95
+ """
96
+ if not YT_DLP_AVAILABLE:
97
+ self.logger.error("yt-dlp is not installed. Install with: pip install yt-dlp")
98
+ return None
99
+
100
+ self.logger.info(f"Downloading audio from URL: {url}")
101
+
102
+ # Configure yt-dlp options
103
+ ydl_opts = {
104
+ 'format': 'bestaudio/best',
105
+ 'outtmpl': output_filename_no_extension + '.%(ext)s',
106
+ 'postprocessors': [{
107
+ 'key': 'FFmpegExtractAudio',
108
+ 'preferredcodec': 'best',
109
+ 'preferredquality': '0', # Best quality
110
+ }],
111
+ 'quiet': True,
112
+ 'no_warnings': True,
113
+ 'extract_flat': False,
114
+ # Anti-detection options
115
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
116
+ 'retries': 3,
117
+ 'fragment_retries': 3,
118
+ 'http_headers': {
119
+ 'Accept-Language': 'en-US,en;q=0.9',
120
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
121
+ },
122
+ }
123
+
124
+ # Handle cookies if provided - use safe tempfile pattern to avoid leaks
125
+ cookie_file_path = None
126
+ if cookies_str:
127
+ try:
128
+ # Use context manager to safely write cookies file
129
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as cookie_file:
130
+ cookie_file.write(cookies_str)
131
+ cookie_file_path = cookie_file.name
132
+ ydl_opts['cookiefile'] = cookie_file_path
133
+ except Exception as e:
134
+ self.logger.warning(f"Failed to write cookies file: {e}")
135
+ cookie_file_path = None
136
+
137
+ try:
138
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
139
+ # Extract info first to get actual filename
140
+ info = ydl.extract_info(url, download=True)
141
+
142
+ if info is None:
143
+ self.logger.error("Failed to extract info from URL")
144
+ return None
145
+
146
+ # Find the downloaded file
147
+ # The actual filename might differ from template due to post-processing
148
+ downloaded_file = None
149
+
150
+ # Check common extensions
151
+ for ext in ['m4a', 'opus', 'webm', 'mp3', 'flac', 'wav', 'ogg', 'aac']:
152
+ candidate = f"{output_filename_no_extension}.{ext}"
153
+ if os.path.exists(candidate):
154
+ downloaded_file = candidate
155
+ break
156
+
157
+ if downloaded_file is None:
158
+ # Try to find any audio file with matching prefix
159
+ import glob
160
+ matches = glob.glob(f"{output_filename_no_extension}.*")
161
+ audio_extensions = ['.m4a', '.opus', '.webm', '.mp3', '.flac', '.wav', '.ogg', '.aac']
162
+ for match in matches:
163
+ if any(match.endswith(ext) for ext in audio_extensions):
164
+ downloaded_file = match
165
+ break
166
+
167
+ if downloaded_file and os.path.exists(downloaded_file):
168
+ self.logger.info(f"Successfully downloaded: {downloaded_file}")
169
+ return downloaded_file
170
+ else:
171
+ self.logger.error("Downloaded file not found after yt-dlp completed")
172
+ return None
173
+
174
+ except yt_dlp.DownloadError as e:
175
+ self.logger.error(f"yt-dlp download error: {e}")
176
+ return None
177
+ except Exception as e:
178
+ self.logger.error(f"Failed to download from URL: {e}")
179
+ return None
180
+ finally:
181
+ # Clean up cookie file if we created one
182
+ if cookie_file_path is not None:
183
+ try:
184
+ os.unlink(cookie_file_path)
185
+ except Exception:
186
+ pass
187
+
188
+ def extract_metadata_from_url(self, url):
189
+ """
190
+ Extract metadata (artist, title) from a URL without downloading.
191
+
192
+ Uses yt-dlp to fetch video metadata including title, uploader/artist,
193
+ and other information that can be used for the karaoke generation.
194
+
195
+ Args:
196
+ url: URL to extract metadata from
197
+
198
+ Returns:
199
+ Dict with 'artist', 'title', 'duration', and 'raw_info', or None if failed
200
+ """
201
+ if not YT_DLP_AVAILABLE:
202
+ self.logger.error("yt-dlp is not installed. Install with: pip install yt-dlp")
203
+ return None
204
+
205
+ self.logger.info(f"Extracting metadata from URL: {url}")
206
+
207
+ ydl_opts = {
208
+ 'quiet': True,
209
+ 'no_warnings': True,
210
+ 'extract_flat': False,
211
+ 'skip_download': True,
212
+ }
213
+
214
+ try:
215
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
216
+ info = ydl.extract_info(url, download=False)
217
+
218
+ if info is None:
219
+ self.logger.error("Failed to extract metadata from URL")
220
+ return None
221
+
222
+ # Try to extract artist and title from various fields
223
+ raw_title = info.get('title', '')
224
+ uploader = info.get('uploader', '') or info.get('channel', '') or info.get('artist', '')
225
+ duration = info.get('duration', 0)
226
+
227
+ # Attempt to parse "Artist - Title" format from title
228
+ artist = None
229
+ title = raw_title
230
+
231
+ if ' - ' in raw_title:
232
+ parts = raw_title.split(' - ', 1)
233
+ if len(parts) == 2:
234
+ artist = parts[0].strip()
235
+ title = parts[1].strip()
236
+
237
+ # Fall back to uploader as artist if not found in title
238
+ if not artist:
239
+ artist = uploader
240
+
241
+ # Clean up title (remove common suffixes like "(Official Video)")
242
+ title_cleanup_patterns = [
243
+ '(official video)', '(official music video)', '(official audio)',
244
+ '(lyric video)', '(lyrics)', '(visualizer)', '(music video)',
245
+ '[official video]', '[official music video]', '[official audio]',
246
+ '(hd)', '(4k)', '(remastered)', '| official video', '| official audio',
247
+ ]
248
+ title_lower = title.lower()
249
+ for pattern in title_cleanup_patterns:
250
+ if pattern in title_lower:
251
+ idx = title_lower.find(pattern)
252
+ title = title[:idx].strip()
253
+ title_lower = title.lower()
254
+
255
+ return {
256
+ 'artist': artist,
257
+ 'title': title,
258
+ 'duration': duration,
259
+ 'raw_info': info,
260
+ }
261
+
262
+ except Exception as e:
263
+ self.logger.error(f"Failed to extract metadata from URL: {e}")
264
+ return None
265
+
266
+ def extract_still_image_from_video(self, input_filename, output_filename_no_extension):
267
+ output_filename = output_filename_no_extension + ".png"
268
+ self.logger.info(f"Extracting still image from position 30s input media")
269
+ ffmpeg_command = f'{self.ffmpeg_base_command} -i "{input_filename}" -ss 00:00:30 -vframes 1 "{output_filename}"'
270
+ self.logger.debug(f"Running command: {ffmpeg_command}")
271
+ os.system(ffmpeg_command)
272
+ return output_filename
273
+
274
+ def convert_to_wav(self, input_filename, output_filename_no_extension):
275
+ """Convert input audio to WAV format, with input validation."""
276
+ # Validate input file exists and is readable
277
+ if not os.path.isfile(input_filename):
278
+ raise Exception(f"Input audio file not found: {input_filename}")
279
+
280
+ if os.path.getsize(input_filename) == 0:
281
+ raise Exception(f"Input audio file is empty: {input_filename}")
282
+
283
+ # Validate input file format using ffprobe
284
+ probe_command = f'ffprobe -v error -show_entries stream=codec_type -of default=noprint_wrappers=1 "{input_filename}"'
285
+ probe_output = os.popen(probe_command).read()
286
+
287
+ if "codec_type=audio" not in probe_output:
288
+ raise Exception(f"No valid audio stream found in file: {input_filename}")
289
+
290
+ output_filename = output_filename_no_extension + ".wav"
291
+ self.logger.info(f"Converting input media to audio WAV file")
292
+ ffmpeg_command = f'{self.ffmpeg_base_command} -n -i "{input_filename}" "{output_filename}"'
293
+ self.logger.debug(f"Running command: {ffmpeg_command}")
294
+ if not self.dry_run:
295
+ os.system(ffmpeg_command)
296
+ return output_filename
297
+
298
+ def setup_output_paths(self, output_dir, artist, title):
299
+ if title is None and artist is None:
300
+ raise ValueError("Error: At least title or artist must be provided")
301
+
302
+ # If only title is provided, use it for both artist and title portions of paths
303
+ if artist is None:
304
+ sanitized_title = sanitize_filename(title)
305
+ artist_title = sanitized_title
306
+ else:
307
+ sanitized_artist = sanitize_filename(artist)
308
+ sanitized_title = sanitize_filename(title)
309
+ artist_title = f"{sanitized_artist} - {sanitized_title}"
310
+
311
+ track_output_dir = output_dir
312
+ if self.create_track_subfolders:
313
+ track_output_dir = os.path.join(output_dir, f"{artist_title}")
314
+
315
+ if not os.path.exists(track_output_dir):
316
+ self.logger.debug(f"Output dir {track_output_dir} did not exist, creating")
317
+ os.makedirs(track_output_dir)
318
+
319
+ return track_output_dir, artist_title
320
+
321
+ def backup_existing_outputs(self, track_output_dir, artist, title):
322
+ """
323
+ Backup existing outputs to a versioned folder.
324
+
325
+ Args:
326
+ track_output_dir: The directory containing the track outputs
327
+ artist: The artist name
328
+ title: The track title
329
+
330
+ Returns:
331
+ The path to the original input audio file
332
+ """
333
+ self.logger.info(f"Backing up existing outputs for {artist} - {title}")
334
+
335
+ # Sanitize artist and title for filenames
336
+ sanitized_artist = sanitize_filename(artist)
337
+ sanitized_title = sanitize_filename(title)
338
+ base_name = f"{sanitized_artist} - {sanitized_title}"
339
+
340
+ # Find the next available version number
341
+ version_num = 1
342
+ while os.path.exists(os.path.join(track_output_dir, f"version-{version_num}")):
343
+ version_num += 1
344
+
345
+ version_dir = os.path.join(track_output_dir, f"version-{version_num}")
346
+ self.logger.info(f"Creating backup directory: {version_dir}")
347
+ os.makedirs(version_dir, exist_ok=True)
348
+
349
+ # Find the input audio file (we'll need this for re-running the transcription)
350
+ input_audio_wav = os.path.join(track_output_dir, f"{base_name}.wav")
351
+ if not os.path.exists(input_audio_wav):
352
+ self.logger.warning(f"Input audio file not found: {input_audio_wav}")
353
+ # Try to find any WAV file
354
+ wav_files = glob.glob(os.path.join(track_output_dir, "*.wav"))
355
+ if wav_files:
356
+ input_audio_wav = wav_files[0]
357
+ self.logger.info(f"Using alternative input audio file: {input_audio_wav}")
358
+ else:
359
+ raise Exception(f"No input audio file found in {track_output_dir}")
360
+
361
+ # List of file patterns to move
362
+ file_patterns = [
363
+ f"{base_name} (With Vocals).*",
364
+ f"{base_name} (Karaoke).*",
365
+ f"{base_name} (Final Karaoke*).*",
366
+ ]
367
+
368
+ # Move files matching patterns to version directory
369
+ for pattern in file_patterns:
370
+ for file_path in glob.glob(os.path.join(track_output_dir, pattern)):
371
+ if os.path.isfile(file_path):
372
+ dest_path = os.path.join(version_dir, os.path.basename(file_path))
373
+ self.logger.info(f"Moving {file_path} to {dest_path}")
374
+ if not self.dry_run:
375
+ shutil.move(file_path, dest_path)
376
+
377
+ # Also backup the lyrics directory
378
+ lyrics_dir = os.path.join(track_output_dir, "lyrics")
379
+ if os.path.exists(lyrics_dir):
380
+ lyrics_backup_dir = os.path.join(version_dir, "lyrics")
381
+ self.logger.info(f"Backing up lyrics directory to {lyrics_backup_dir}")
382
+ if not self.dry_run:
383
+ shutil.copytree(lyrics_dir, lyrics_backup_dir)
384
+ # Remove the original lyrics directory
385
+ shutil.rmtree(lyrics_dir)
386
+
387
+ return input_audio_wav
@@ -0,0 +1,45 @@
1
+ """
2
+ Instrumental Review Module - Shared core for both local and remote CLI.
3
+
4
+ This module provides audio analysis and editing functionality for instrumental
5
+ selection in karaoke generation. It's designed to be:
6
+ - Pure Python with no cloud dependencies (GCS, etc.)
7
+ - Reusable by both local CLI (karaoke-gen) and remote backend (Cloud Run)
8
+ - Easy to test without mocking cloud services
9
+
10
+ Classes:
11
+ AudioAnalyzer: Analyzes backing vocals audio for audible content
12
+ AudioEditor: Creates custom instrumentals by muting regions
13
+ WaveformGenerator: Generates waveform visualization images
14
+ InstrumentalReviewServer: Local HTTP server for browser-based review
15
+
16
+ Models:
17
+ AnalysisResult: Result of audio analysis
18
+ AudibleSegment: A detected segment of audible content
19
+ MuteRegion: A region to mute in the backing vocals
20
+ RecommendedSelection: Enum of selection recommendations
21
+ """
22
+
23
+ from .models import (
24
+ AnalysisResult,
25
+ AudibleSegment,
26
+ MuteRegion,
27
+ RecommendedSelection,
28
+ )
29
+ from .analyzer import AudioAnalyzer
30
+ from .editor import AudioEditor
31
+ from .waveform import WaveformGenerator
32
+ from .server import InstrumentalReviewServer
33
+
34
+ __all__ = [
35
+ # Models
36
+ "AnalysisResult",
37
+ "AudibleSegment",
38
+ "MuteRegion",
39
+ "RecommendedSelection",
40
+ # Classes
41
+ "AudioAnalyzer",
42
+ "AudioEditor",
43
+ "WaveformGenerator",
44
+ "InstrumentalReviewServer",
45
+ ]