karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -12,6 +12,14 @@ import psutil
12
12
  from datetime import datetime
13
13
  from pydub import AudioSegment
14
14
 
15
+ # Try to import the remote API client if available
16
+ try:
17
+ from audio_separator.remote import AudioSeparatorAPIClient
18
+ REMOTE_API_AVAILABLE = True
19
+ except ImportError:
20
+ REMOTE_API_AVAILABLE = False
21
+ AudioSeparatorAPIClient = None
22
+
15
23
 
16
24
  # Placeholder class or functions for audio processing
17
25
  class AudioProcessor:
@@ -44,6 +52,60 @@ class AudioProcessor:
44
52
  self.logger.info(f"File already exists, skipping creation: {file_path}")
45
53
  return exists
46
54
 
55
+ def pad_audio_file(self, input_audio, output_audio, padding_seconds):
56
+ """
57
+ Add silence to the start of an audio file using ffmpeg.
58
+
59
+ This ensures the instrumental tracks are synchronized with vocals when
60
+ countdown padding has been applied by the LyricsTranscriber.
61
+
62
+ Args:
63
+ input_audio: Path to input audio file
64
+ output_audio: Path for output padded audio file
65
+ padding_seconds: Amount of silence to add in seconds (e.g., 3.0)
66
+
67
+ Raises:
68
+ Exception: If ffmpeg command fails
69
+ """
70
+ self.logger.info(f"Padding audio file with {padding_seconds}s of silence: {input_audio}")
71
+
72
+ # Use ffmpeg to prepend silence to the audio file
73
+ # This matches the approach used in LyricsTranscriber for vocal padding
74
+ cmd = [
75
+ "ffmpeg",
76
+ "-y", # Overwrite output file
77
+ "-hide_banner",
78
+ "-loglevel", "error",
79
+ "-f", "lavfi",
80
+ "-t", str(padding_seconds),
81
+ "-i", f"anullsrc=channel_layout=stereo:sample_rate=44100",
82
+ "-i", input_audio,
83
+ "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1[out]",
84
+ "-map", "[out]",
85
+ "-c:a", self.lossless_output_format.lower(),
86
+ output_audio,
87
+ ]
88
+
89
+ try:
90
+ import subprocess
91
+ result = subprocess.run(
92
+ cmd,
93
+ capture_output=True,
94
+ text=True,
95
+ timeout=300, # 5 minute timeout
96
+ check=True
97
+ )
98
+ self.logger.info(f"Successfully padded audio file: {output_audio}")
99
+
100
+ except subprocess.CalledProcessError as e:
101
+ error_msg = f"Failed to pad audio file {input_audio}: {e.stderr}"
102
+ self.logger.error(error_msg)
103
+ raise Exception(error_msg)
104
+ except subprocess.TimeoutExpired:
105
+ error_msg = f"Timeout while padding audio file {input_audio}"
106
+ self.logger.error(error_msg)
107
+ raise Exception(error_msg)
108
+
47
109
  def separate_audio(self, audio_file, model_name, artist_title, track_output_dir, instrumental_path, vocals_path):
48
110
  if audio_file is None or not os.path.isfile(audio_file):
49
111
  raise Exception("Error: Invalid audio source provided.")
@@ -104,9 +166,36 @@ class AudioProcessor:
104
166
  self.logger.info(f"Separation complete! Output file(s): {vocals_path} {instrumental_path}")
105
167
 
106
168
  def process_audio_separation(self, audio_file, artist_title, track_output_dir):
169
+ # Check if we should use remote API
170
+ remote_api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
171
+ if remote_api_url:
172
+ if not REMOTE_API_AVAILABLE:
173
+ self.logger.warning("AUDIO_SEPARATOR_API_URL is set but remote API client is not available. "
174
+ "Please ensure audio-separator is updated to a version that includes remote API support. "
175
+ "Falling back to local processing.")
176
+ else:
177
+ self.logger.info(f"Using remote audio separator API at: {remote_api_url}")
178
+ try:
179
+ return self._process_audio_separation_remote(audio_file, artist_title, track_output_dir, remote_api_url)
180
+ except Exception as e:
181
+ error_str = str(e)
182
+ # Don't fall back for download failures - these indicate API issues that should be fixed
183
+ if ("no files were downloaded" in error_str or
184
+ "failed to produce essential" in error_str):
185
+ self.logger.error(f"Remote API processing failed with download/file organization issue: {error_str}")
186
+ self.logger.error("This indicates an audio-separator API issue that should be fixed. Not falling back to local processing.")
187
+ raise e
188
+ else:
189
+ # Fall back for other types of errors (network issues, etc.)
190
+ self.logger.error(f"Remote API processing failed: {error_str}")
191
+ self.logger.info("Falling back to local audio separation")
192
+ else:
193
+ self.logger.info("AUDIO_SEPARATOR_API_URL not set, using local audio separation. "
194
+ "Set this environment variable to use remote GPU processing.")
195
+
107
196
  from audio_separator.separator import Separator
108
197
 
109
- self.logger.info(f"Starting audio separation process for {artist_title}")
198
+ self.logger.info(f"Starting local audio separation process for {artist_title}")
110
199
 
111
200
  # Define lock file path in system temp directory
112
201
  lock_file_path = os.path.join(tempfile.gettempdir(), "audio_separator.lock")
@@ -202,35 +291,6 @@ class AudioProcessor:
202
291
  )
203
292
  self._normalize_audio_files(result, artist_title, track_output_dir)
204
293
 
205
- # Create Audacity LOF file
206
- lof_path = os.path.join(stems_dir, f"{artist_title} (Audacity).lof")
207
- first_model = list(result["backing_vocals"].keys())[0]
208
-
209
- files_to_include = [
210
- audio_file, # Original audio
211
- result["clean_instrumental"]["instrumental"], # Clean instrumental
212
- result["backing_vocals"][first_model]["backing_vocals"], # Backing vocals
213
- result["combined_instrumentals"][first_model], # Combined instrumental+BV
214
- ]
215
-
216
- # Convert to absolute paths
217
- files_to_include = [os.path.abspath(f) for f in files_to_include]
218
-
219
- with open(lof_path, "w") as lof:
220
- for file_path in files_to_include:
221
- lof.write(f'file "{file_path}"\n')
222
-
223
- self.logger.info(f"Created Audacity LOF file: {lof_path}")
224
- result["audacity_lof"] = lof_path
225
-
226
- # Launch Audacity with multiple tracks
227
- if sys.platform == "darwin": # Check if we're on macOS
228
- if lof_path and os.path.exists(lof_path):
229
- self.logger.info(f"Launching Audacity with LOF file: {lof_path}")
230
- os.system(f'open -a Audacity "{lof_path}"')
231
- else:
232
- self.logger.debug("Audacity LOF file not available or not found")
233
-
234
294
  self.logger.info("Audio separation, combination, and normalization process completed")
235
295
  return result
236
296
  finally:
@@ -242,6 +302,258 @@ class AudioProcessor:
242
302
  except OSError:
243
303
  pass
244
304
 
305
+ def _process_audio_separation_remote(self, audio_file, artist_title, track_output_dir, remote_api_url):
306
+ """Process audio separation using remote API with proper two-stage workflow."""
307
+ self.logger.info(f"Starting remote audio separation process for {artist_title}")
308
+
309
+ # Initialize the API client
310
+ api_client = AudioSeparatorAPIClient(remote_api_url, self.logger)
311
+
312
+ stems_dir = self._create_stems_directory(track_output_dir)
313
+ result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}, "combined_instrumentals": {}}
314
+
315
+ if os.environ.get("KARAOKE_GEN_SKIP_AUDIO_SEPARATION"):
316
+ return result
317
+
318
+ try:
319
+ # Stage 1: Process original song with clean instrumental model + other stems models
320
+ stage1_models = []
321
+ if self.clean_instrumental_model:
322
+ stage1_models.append(self.clean_instrumental_model)
323
+ stage1_models.extend(self.other_stems_models)
324
+
325
+ self.logger.info(f"Stage 1: Submitting audio separation job with models: {stage1_models}")
326
+
327
+ # Submit the first stage job
328
+ stage1_result = api_client.separate_audio_and_wait(
329
+ audio_file,
330
+ models=stage1_models,
331
+ timeout=1800, # 30 minutes timeout
332
+ poll_interval=15, # Check every 15 seconds
333
+ download=True,
334
+ output_dir=stems_dir,
335
+ output_format=self.lossless_output_format.lower()
336
+ )
337
+
338
+ if stage1_result["status"] != "completed":
339
+ raise Exception(f"Stage 1 remote audio separation failed: {stage1_result.get('error', 'Unknown error')}")
340
+
341
+ self.logger.info(f"Stage 1 completed. Downloaded {len(stage1_result['downloaded_files'])} files")
342
+
343
+ # Check if we actually got the expected files for Stage 1
344
+ if len(stage1_result["downloaded_files"]) == 0:
345
+ error_msg = ("Stage 1 audio separation completed successfully but no files were downloaded. "
346
+ "This indicates a filename encoding or API issue in the audio-separator remote service. "
347
+ f"Expected files for models {stage1_models} but got 0.")
348
+ self.logger.error(error_msg)
349
+ raise Exception(error_msg)
350
+
351
+ # Organize the stage 1 results
352
+ result = self._organize_stage1_remote_results(
353
+ stage1_result["downloaded_files"], artist_title, track_output_dir, stems_dir
354
+ )
355
+
356
+ # Validate that we got the essential clean instrumental outputs
357
+ if not result["clean_instrumental"].get("vocals") or not result["clean_instrumental"].get("instrumental"):
358
+ missing = []
359
+ if not result["clean_instrumental"].get("vocals"):
360
+ missing.append("clean vocals")
361
+ if not result["clean_instrumental"].get("instrumental"):
362
+ missing.append("clean instrumental")
363
+ error_msg = (f"Stage 1 completed but failed to produce essential clean instrumental outputs: {', '.join(missing)}. "
364
+ "This may indicate a model naming or file organization issue in the remote API.")
365
+ self.logger.error(error_msg)
366
+ raise Exception(error_msg)
367
+
368
+ # Stage 2: Process clean vocals with backing vocals models (if we have both)
369
+ if result["clean_instrumental"].get("vocals") and self.backing_vocals_models:
370
+ self.logger.info(f"Stage 2: Processing clean vocals for backing vocals separation...")
371
+ vocals_path = result["clean_instrumental"]["vocals"]
372
+
373
+ stage2_result = api_client.separate_audio_and_wait(
374
+ vocals_path,
375
+ models=self.backing_vocals_models,
376
+ timeout=900, # 15 minutes timeout for backing vocals
377
+ poll_interval=10,
378
+ download=True,
379
+ output_dir=stems_dir,
380
+ output_format=self.lossless_output_format.lower()
381
+ )
382
+
383
+ if stage2_result["status"] == "completed":
384
+ self.logger.info(f"Stage 2 completed. Downloaded {len(stage2_result['downloaded_files'])} files")
385
+
386
+ # Check if we actually got the expected files
387
+ if len(stage2_result["downloaded_files"]) == 0:
388
+ error_msg = ("Stage 2 backing vocals separation completed successfully but no files were downloaded. "
389
+ "This indicates a filename encoding or API issue in the audio-separator remote service. "
390
+ "Expected 2 files (lead vocals + backing vocals) but got 0.")
391
+ self.logger.error(error_msg)
392
+ raise Exception(error_msg)
393
+
394
+ # Organize the stage 2 results (backing vocals)
395
+ backing_vocals_result = self._organize_stage2_remote_results(
396
+ stage2_result["downloaded_files"], artist_title, stems_dir
397
+ )
398
+ result["backing_vocals"] = backing_vocals_result
399
+ else:
400
+ error_msg = f"Stage 2 backing vocals separation failed: {stage2_result.get('error', 'Unknown error')}"
401
+ self.logger.error(error_msg)
402
+ raise Exception(error_msg)
403
+ else:
404
+ result["backing_vocals"] = {}
405
+
406
+ # Generate combined instrumentals
407
+ if result["clean_instrumental"].get("instrumental") and result["backing_vocals"]:
408
+ result["combined_instrumentals"] = self._generate_combined_instrumentals(
409
+ result["clean_instrumental"]["instrumental"], result["backing_vocals"], artist_title, track_output_dir
410
+ )
411
+ else:
412
+ result["combined_instrumentals"] = {}
413
+
414
+ # Normalize audio files
415
+ self._normalize_audio_files(result, artist_title, track_output_dir)
416
+
417
+ self.logger.info("Remote audio separation, combination, and normalization process completed")
418
+ return result
419
+
420
+ except Exception as e:
421
+ self.logger.error(f"Error during remote audio separation: {str(e)}")
422
+ raise e
423
+
424
+ def _organize_stage1_remote_results(self, downloaded_files, artist_title, track_output_dir, stems_dir):
425
+ """Organize stage 1 separation results (clean instrumental + other stems)."""
426
+ result = {"clean_instrumental": {}, "other_stems": {}}
427
+
428
+ for file_path in downloaded_files:
429
+ filename = os.path.basename(file_path)
430
+ self.logger.debug(f"Stage 1 - Processing downloaded file: {filename}")
431
+
432
+ # Determine which model and stem type this file represents
433
+ model_name = None
434
+ stem_type = None
435
+
436
+ # Extract model name and stem type from filename
437
+ # Expected format: "audio_(StemType)_modelname.ext"
438
+ if "_(Vocals)_" in filename:
439
+ stem_type = "Vocals"
440
+ model_name = filename.split("_(Vocals)_")[1].split(".")[0]
441
+ elif "_(Instrumental)_" in filename:
442
+ stem_type = "Instrumental"
443
+ model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
444
+ elif "_(Drums)_" in filename:
445
+ stem_type = "Drums"
446
+ model_name = filename.split("_(Drums)_")[1].split(".")[0]
447
+ elif "_(Bass)_" in filename:
448
+ stem_type = "Bass"
449
+ model_name = filename.split("_(Bass)_")[1].split(".")[0]
450
+ elif "_(Other)_" in filename:
451
+ stem_type = "Other"
452
+ model_name = filename.split("_(Other)_")[1].split(".")[0]
453
+ elif "_(Guitar)_" in filename:
454
+ stem_type = "Guitar"
455
+ model_name = filename.split("_(Guitar)_")[1].split(".")[0]
456
+ elif "_(Piano)_" in filename:
457
+ stem_type = "Piano"
458
+ model_name = filename.split("_(Piano)_")[1].split(".")[0]
459
+ else:
460
+ # Try to extract stem type from parentheses
461
+ import re
462
+ match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
463
+ if match:
464
+ stem_type = match.group(1)
465
+ model_name = match.group(2)
466
+ else:
467
+ self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
468
+ continue
469
+
470
+ # Check if this model name matches the clean instrumental model
471
+ is_clean_instrumental_model = (
472
+ model_name == self.clean_instrumental_model or
473
+ self.clean_instrumental_model.startswith(model_name) or
474
+ model_name.startswith(self.clean_instrumental_model.split('.')[0])
475
+ )
476
+
477
+ if is_clean_instrumental_model:
478
+ if stem_type == "Vocals":
479
+ target_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")
480
+ shutil.move(file_path, target_path)
481
+ result["clean_instrumental"]["vocals"] = target_path
482
+ elif stem_type == "Instrumental":
483
+ target_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}")
484
+ shutil.move(file_path, target_path)
485
+ result["clean_instrumental"]["instrumental"] = target_path
486
+
487
+ elif any(model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]) for os_model in self.other_stems_models):
488
+ # Find the matching other stems model
489
+ matching_os_model = None
490
+ for os_model in self.other_stems_models:
491
+ if model_name == os_model or os_model.startswith(model_name) or model_name.startswith(os_model.split('.')[0]):
492
+ matching_os_model = os_model
493
+ break
494
+
495
+ if matching_os_model:
496
+ if matching_os_model not in result["other_stems"]:
497
+ result["other_stems"][matching_os_model] = {}
498
+
499
+ target_path = os.path.join(stems_dir, f"{artist_title} ({stem_type} {matching_os_model}).{self.lossless_output_format}")
500
+ shutil.move(file_path, target_path)
501
+ result["other_stems"][matching_os_model][stem_type] = target_path
502
+
503
+ return result
504
+
505
+ def _organize_stage2_remote_results(self, downloaded_files, artist_title, stems_dir):
506
+ """Organize stage 2 separation results (backing vocals)."""
507
+ result = {}
508
+
509
+ for file_path in downloaded_files:
510
+ filename = os.path.basename(file_path)
511
+ self.logger.debug(f"Stage 2 - Processing downloaded file: {filename}")
512
+
513
+ # Determine which model and stem type this file represents
514
+ model_name = None
515
+ stem_type = None
516
+
517
+ # Extract model name and stem type from filename
518
+ if "_(Vocals)_" in filename:
519
+ stem_type = "Vocals"
520
+ model_name = filename.split("_(Vocals)_")[1].split(".")[0]
521
+ elif "_(Instrumental)_" in filename:
522
+ stem_type = "Instrumental"
523
+ model_name = filename.split("_(Instrumental)_")[1].split(".")[0]
524
+ else:
525
+ # Try to extract stem type from parentheses
526
+ import re
527
+ match = re.search(r'_\(([^)]+)\)_([^.]+)', filename)
528
+ if match:
529
+ stem_type = match.group(1)
530
+ model_name = match.group(2)
531
+ else:
532
+ self.logger.warning(f"Could not parse stem type and model from filename: {filename}")
533
+ continue
534
+
535
+ # Find the matching backing vocals model
536
+ matching_bv_model = None
537
+ for bv_model in self.backing_vocals_models:
538
+ if model_name == bv_model or bv_model.startswith(model_name) or model_name.startswith(bv_model.split('.')[0]):
539
+ matching_bv_model = bv_model
540
+ break
541
+
542
+ if matching_bv_model:
543
+ if matching_bv_model not in result:
544
+ result[matching_bv_model] = {}
545
+
546
+ if stem_type == "Vocals":
547
+ target_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {matching_bv_model}).{self.lossless_output_format}")
548
+ shutil.move(file_path, target_path)
549
+ result[matching_bv_model]["lead_vocals"] = target_path
550
+ elif stem_type == "Instrumental":
551
+ target_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {matching_bv_model}).{self.lossless_output_format}")
552
+ shutil.move(file_path, target_path)
553
+ result[matching_bv_model]["backing_vocals"] = target_path
554
+
555
+ return result
556
+
245
557
  def _create_stems_directory(self, track_output_dir):
246
558
  stems_dir = os.path.join(track_output_dir, "stems")
247
559
  os.makedirs(stems_dir, exist_ok=True)
@@ -399,3 +711,68 @@ class AudioProcessor:
399
711
 
400
712
  self.logger.info(f"Normalized audio saved, replacing: {output_path}")
401
713
  self.logger.debug(f"Original peak: {peak_amplitude} dB, Applied gain: {gain_db} dB")
714
+
715
+ def apply_countdown_padding_to_instrumentals(self, separation_result, padding_seconds, artist_title, track_output_dir):
716
+ """
717
+ Apply countdown padding to all instrumental audio files.
718
+
719
+ When LyricsTranscriber adds countdown padding to vocals, this method ensures
720
+ all instrumental tracks are padded by the same amount to maintain synchronization.
721
+
722
+ Args:
723
+ separation_result: Dictionary containing paths to separated audio files
724
+ padding_seconds: Amount of padding to apply (e.g., 3.0)
725
+ artist_title: Artist and title string for naming padded files
726
+ track_output_dir: Output directory for padded files
727
+
728
+ Returns:
729
+ Dictionary with updated paths to padded instrumental files
730
+ """
731
+ self.logger.info(
732
+ f"Applying {padding_seconds}s countdown padding to all instrumental files to match vocal padding"
733
+ )
734
+
735
+ padded_result = {
736
+ "clean_instrumental": {},
737
+ "other_stems": {},
738
+ "backing_vocals": {},
739
+ "combined_instrumentals": {},
740
+ }
741
+
742
+ # Pad clean instrumental
743
+ if "clean_instrumental" in separation_result and separation_result["clean_instrumental"].get("instrumental"):
744
+ original_instrumental = separation_result["clean_instrumental"]["instrumental"]
745
+
746
+ # Insert "Padded" before the file extension
747
+ base, ext = os.path.splitext(original_instrumental)
748
+ padded_instrumental = f"{base} (Padded){ext}"
749
+
750
+ if not self._file_exists(padded_instrumental):
751
+ self.logger.info(f"Padding clean instrumental: {original_instrumental}")
752
+ self.pad_audio_file(original_instrumental, padded_instrumental, padding_seconds)
753
+
754
+ padded_result["clean_instrumental"]["instrumental"] = padded_instrumental
755
+ padded_result["clean_instrumental"]["vocals"] = separation_result["clean_instrumental"].get("vocals")
756
+
757
+ # Pad combined instrumentals (instrumental + backing vocals)
758
+ if "combined_instrumentals" in separation_result:
759
+ for model, combined_path in separation_result["combined_instrumentals"].items():
760
+ base, ext = os.path.splitext(combined_path)
761
+ padded_combined = f"{base} (Padded){ext}"
762
+
763
+ if not self._file_exists(padded_combined):
764
+ self.logger.info(f"Padding combined instrumental ({model}): {combined_path}")
765
+ self.pad_audio_file(combined_path, padded_combined, padding_seconds)
766
+
767
+ padded_result["combined_instrumentals"][model] = padded_combined
768
+
769
+ # Copy over other stems and backing vocals without padding
770
+ # (these are typically not used in final output, but preserve the structure)
771
+ padded_result["other_stems"] = separation_result.get("other_stems", {})
772
+ padded_result["backing_vocals"] = separation_result.get("backing_vocals", {})
773
+
774
+ self.logger.info(
775
+ f"✓ Countdown padding applied to {len(padded_result['combined_instrumentals']) + 1} instrumental file(s)"
776
+ )
777
+
778
+ return padded_result