karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,475 @@
1
+ """
2
+ Local FastAPI server for instrumental review.
3
+
4
+ This module provides a local HTTP server that serves the instrumental review
5
+ UI for local CLI usage. It provides the same API endpoints as the cloud backend
6
+ to enable UI reuse.
7
+
8
+ Similar pattern to LyricsTranscriber's ReviewServer.
9
+ """
10
+
11
+ import logging
12
+ import os
13
+ from pathlib import Path
14
+ import socket
15
+ import threading
16
+ import webbrowser
17
+ from typing import List, Optional
18
+
19
+ from fastapi import FastAPI, HTTPException, UploadFile, File
20
+ from fastapi.middleware.cors import CORSMiddleware
21
+ from fastapi.responses import FileResponse, HTMLResponse
22
+ from pydantic import BaseModel
23
+ import shutil
24
+ import tempfile
25
+ import uvicorn
26
+
27
+ from pydub import AudioSegment
28
+
29
+ from karaoke_gen.instrumental_review import (
30
+ AnalysisResult,
31
+ AudioAnalyzer,
32
+ AudioEditor,
33
+ MuteRegion,
34
+ WaveformGenerator,
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ # Request/Response Models
41
+ class MuteRegionRequest(BaseModel):
42
+ start_seconds: float
43
+ end_seconds: float
44
+
45
+
46
+ class CreateCustomRequest(BaseModel):
47
+ mute_regions: List[MuteRegionRequest]
48
+
49
+
50
+ class SelectionRequest(BaseModel):
51
+ selection: str
52
+
53
+
54
+ class InstrumentalReviewServer:
55
+ """
56
+ Local FastAPI server for instrumental review UI.
57
+
58
+ This server provides a web interface for reviewing and selecting
59
+ instrumental tracks in the local CLI workflow. It serves the same
60
+ API endpoints as the cloud backend to enable UI reuse.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ output_dir: str,
66
+ base_name: str,
67
+ analysis: AnalysisResult,
68
+ waveform_path: str,
69
+ backing_vocals_path: str,
70
+ clean_instrumental_path: str,
71
+ with_backing_path: Optional[str] = None,
72
+ original_audio_path: Optional[str] = None,
73
+ ):
74
+ """
75
+ Initialize the review server.
76
+
77
+ Args:
78
+ output_dir: Directory containing the audio files
79
+ base_name: Base name for output files (e.g., "Artist - Title")
80
+ analysis: Analysis result from AudioAnalyzer
81
+ waveform_path: Path to the waveform image
82
+ backing_vocals_path: Path to the backing vocals audio file
83
+ clean_instrumental_path: Path to the clean instrumental audio file
84
+ with_backing_path: Path to the instrumental with backing vocals
85
+ original_audio_path: Path to the original audio file (with vocals)
86
+ """
87
+ self.output_dir = output_dir
88
+ self.base_name = base_name
89
+ self.analysis = analysis
90
+ self.waveform_path = waveform_path
91
+ self.backing_vocals_path = backing_vocals_path
92
+ self.clean_instrumental_path = clean_instrumental_path
93
+ self.with_backing_path = with_backing_path
94
+ self.original_audio_path = original_audio_path
95
+ self.custom_instrumental_path: Optional[str] = None
96
+ self.uploaded_instrumental_path: Optional[str] = None
97
+ self.selection: Optional[str] = None
98
+
99
+ self._app: Optional[FastAPI] = None
100
+ self._server_thread: Optional[threading.Thread] = None
101
+ self._selection_event = threading.Event()
102
+ self._shutdown_event = threading.Event()
103
+
104
+ def _create_app(self) -> FastAPI:
105
+ """Create and configure the FastAPI application."""
106
+ app = FastAPI(title="Instrumental Review", docs_url=None, redoc_url=None)
107
+
108
+ # Configure CORS
109
+ app.add_middleware(
110
+ CORSMiddleware,
111
+ allow_origins=["*"],
112
+ allow_credentials=True,
113
+ allow_methods=["*"],
114
+ allow_headers=["*"],
115
+ )
116
+
117
+ # Register routes
118
+ self._register_routes(app)
119
+
120
+ return app
121
+
122
+ def _register_routes(self, app: FastAPI) -> None:
123
+ """Register API routes."""
124
+
125
+ @app.get("/")
126
+ async def serve_frontend():
127
+ """Serve the frontend HTML."""
128
+ return HTMLResponse(content=self._get_frontend_html())
129
+
130
+ @app.get("/api/jobs/local/instrumental-analysis")
131
+ async def get_analysis():
132
+ """Get analysis data for the instrumental review."""
133
+ return {
134
+ "job_id": "local",
135
+ "artist": self.base_name.split(" - ")[0] if " - " in self.base_name else "",
136
+ "title": self.base_name.split(" - ")[1] if " - " in self.base_name else self.base_name,
137
+ "status": "awaiting_instrumental_selection",
138
+ "analysis": {
139
+ "has_audible_content": self.analysis.has_audible_content,
140
+ "total_duration_seconds": self.analysis.total_duration_seconds,
141
+ "audible_segments": [
142
+ {
143
+ "start_seconds": seg.start_seconds,
144
+ "end_seconds": seg.end_seconds,
145
+ "duration_seconds": seg.duration_seconds,
146
+ "avg_amplitude_db": seg.avg_amplitude_db,
147
+ "peak_amplitude_db": seg.peak_amplitude_db,
148
+ }
149
+ for seg in self.analysis.audible_segments
150
+ ],
151
+ "recommended_selection": self.analysis.recommended_selection.value,
152
+ "total_audible_duration_seconds": self.analysis.total_audible_duration_seconds,
153
+ "audible_percentage": self.analysis.audible_percentage,
154
+ "silence_threshold_db": self.analysis.silence_threshold_db,
155
+ },
156
+ "audio_urls": {
157
+ "clean_instrumental": "/api/audio/clean_instrumental" if self.clean_instrumental_path else None,
158
+ "backing_vocals": "/api/audio/backing_vocals" if self.backing_vocals_path else None,
159
+ "with_backing": "/api/audio/with_backing" if self.with_backing_path else None,
160
+ "custom_instrumental": "/api/audio/custom_instrumental" if self.custom_instrumental_path else None,
161
+ "uploaded_instrumental": "/api/audio/uploaded_instrumental" if self.uploaded_instrumental_path else None,
162
+ "original": "/api/audio/original" if self.original_audio_path else None,
163
+ },
164
+ "waveform_url": "/api/waveform" if self.waveform_path else None,
165
+ "has_custom_instrumental": self.custom_instrumental_path is not None,
166
+ "has_uploaded_instrumental": self.uploaded_instrumental_path is not None,
167
+ "has_original": self.original_audio_path is not None,
168
+ }
169
+
170
+ @app.get("/api/jobs/local/waveform-data")
171
+ async def get_waveform_data(num_points: int = 600):
172
+ """Get waveform amplitude data for client-side rendering."""
173
+ # Validate num_points parameter
174
+ if num_points <= 0 or num_points > 10000:
175
+ raise HTTPException(
176
+ status_code=400,
177
+ detail="num_points must be between 1 and 10000"
178
+ )
179
+
180
+ if not self.backing_vocals_path or not os.path.exists(self.backing_vocals_path):
181
+ raise HTTPException(status_code=404, detail="Backing vocals file not found")
182
+
183
+ try:
184
+ generator = WaveformGenerator()
185
+ amplitudes, duration = generator.generate_data_only(self.backing_vocals_path, num_points)
186
+ return {"amplitudes": amplitudes, "duration": duration}
187
+ except Exception as e:
188
+ logger.exception(f"Error generating waveform data: {e}")
189
+ raise HTTPException(status_code=500, detail=str(e)) from e
190
+
191
+ @app.get("/api/audio/{stem_type}")
192
+ async def stream_audio(stem_type: str):
193
+ """Stream audio file."""
194
+ path_map = {
195
+ "clean_instrumental": self.clean_instrumental_path,
196
+ "backing_vocals": self.backing_vocals_path,
197
+ "with_backing": self.with_backing_path,
198
+ "custom_instrumental": self.custom_instrumental_path,
199
+ "uploaded_instrumental": self.uploaded_instrumental_path,
200
+ "original": self.original_audio_path,
201
+ }
202
+
203
+ audio_path = path_map.get(stem_type)
204
+ if not audio_path or not os.path.exists(audio_path):
205
+ raise HTTPException(status_code=404, detail=f"Audio file not found: {stem_type}")
206
+
207
+ # Determine content type
208
+ ext = os.path.splitext(audio_path)[1].lower()
209
+ content_types = {
210
+ ".flac": "audio/flac",
211
+ ".mp3": "audio/mpeg",
212
+ ".wav": "audio/wav",
213
+ }
214
+ content_type = content_types.get(ext, "application/octet-stream")
215
+
216
+ return FileResponse(audio_path, media_type=content_type)
217
+
218
+ @app.get("/api/waveform")
219
+ async def get_waveform_image():
220
+ """Serve waveform image."""
221
+ if not self.waveform_path or not os.path.exists(self.waveform_path):
222
+ raise HTTPException(status_code=404, detail="Waveform image not found")
223
+ return FileResponse(self.waveform_path, media_type="image/png")
224
+
225
+ @app.post("/api/jobs/local/create-custom-instrumental")
226
+ async def create_custom_instrumental(request: CreateCustomRequest):
227
+ """Create a custom instrumental with muted regions."""
228
+ if not request.mute_regions:
229
+ raise HTTPException(status_code=400, detail="No mute regions provided")
230
+
231
+ try:
232
+ mute_regions = [
233
+ MuteRegion(
234
+ start_seconds=r.start_seconds,
235
+ end_seconds=r.end_seconds,
236
+ )
237
+ for r in request.mute_regions
238
+ ]
239
+
240
+ editor = AudioEditor()
241
+ output_path = os.path.join(
242
+ self.output_dir,
243
+ f"{self.base_name} (Instrumental Custom).flac"
244
+ )
245
+
246
+ result = editor.create_custom_instrumental(
247
+ clean_instrumental_path=self.clean_instrumental_path,
248
+ backing_vocals_path=self.backing_vocals_path,
249
+ mute_regions=mute_regions,
250
+ output_path=output_path,
251
+ )
252
+
253
+ self.custom_instrumental_path = result.output_path
254
+
255
+ return {
256
+ "status": "success",
257
+ "custom_instrumental_url": "/api/audio/custom_instrumental",
258
+ "statistics": {
259
+ "mute_regions_applied": len(result.mute_regions_applied),
260
+ "total_muted_duration_seconds": result.total_muted_duration_seconds,
261
+ "output_duration_seconds": result.output_duration_seconds,
262
+ },
263
+ }
264
+ except Exception as e:
265
+ logger.exception(f"Error creating custom instrumental: {e}")
266
+ raise HTTPException(status_code=500, detail=str(e)) from e
267
+
268
+ @app.post("/api/jobs/local/upload-instrumental")
269
+ async def upload_instrumental(file: UploadFile = File(...)):
270
+ """Upload a custom instrumental audio file."""
271
+ # Validate file type
272
+ allowed_extensions = {".flac", ".mp3", ".wav", ".m4a", ".ogg"}
273
+ ext = os.path.splitext(file.filename or "")[1].lower()
274
+ if ext not in allowed_extensions:
275
+ raise HTTPException(
276
+ status_code=400,
277
+ detail=f"Invalid file type. Allowed: {', '.join(allowed_extensions)}"
278
+ )
279
+
280
+ tmp_path = None
281
+ file_moved = False
282
+ try:
283
+ # Save to temp file first to validate
284
+ with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
285
+ shutil.copyfileobj(file.file, tmp)
286
+ tmp_path = tmp.name
287
+
288
+ # Load and check duration
289
+ uploaded_audio = AudioSegment.from_file(tmp_path)
290
+ uploaded_duration = len(uploaded_audio) / 1000.0 # ms to seconds
291
+
292
+ expected_duration = self.analysis.total_duration_seconds
293
+ duration_diff = abs(uploaded_duration - expected_duration)
294
+
295
+ if duration_diff > 0.5:
296
+ raise HTTPException(
297
+ status_code=400,
298
+ detail=f"Duration mismatch: uploaded file is {uploaded_duration:.2f}s, "
299
+ f"expected {expected_duration:.2f}s (±0.5s allowed)"
300
+ )
301
+
302
+ # Move to final location
303
+ output_path = os.path.join(
304
+ self.output_dir,
305
+ f"{self.base_name} (Instrumental Uploaded){ext}"
306
+ )
307
+ shutil.move(tmp_path, output_path)
308
+ file_moved = True
309
+ self.uploaded_instrumental_path = output_path
310
+
311
+ return {
312
+ "status": "success",
313
+ "uploaded_instrumental_url": "/api/audio/uploaded_instrumental",
314
+ "duration_seconds": uploaded_duration,
315
+ "filename": file.filename,
316
+ }
317
+ except HTTPException:
318
+ raise
319
+ except Exception as e:
320
+ logger.exception(f"Error uploading instrumental: {e}")
321
+ raise HTTPException(status_code=500, detail=str(e)) from e
322
+ finally:
323
+ # Clean up temp file if it wasn't moved
324
+ if tmp_path and not file_moved and os.path.exists(tmp_path):
325
+ try:
326
+ os.unlink(tmp_path)
327
+ except OSError:
328
+ pass # Best effort cleanup
329
+
330
+ @app.post("/api/jobs/local/select-instrumental")
331
+ async def select_instrumental(request: SelectionRequest):
332
+ """Submit instrumental selection."""
333
+ if request.selection not in ("clean", "with_backing", "custom", "uploaded", "original"):
334
+ raise HTTPException(status_code=400, detail=f"Invalid selection: {request.selection}")
335
+
336
+ self.selection = request.selection
337
+ self._selection_event.set()
338
+
339
+ return {"status": "success", "selection": request.selection}
340
+
341
+ @staticmethod
342
+ def _get_static_dir() -> Path:
343
+ """Get the path to the static assets directory."""
344
+ return Path(__file__).parent / "static"
345
+
346
+ def _get_frontend_html(self) -> str:
347
+ """Return the frontend HTML by reading from the static file."""
348
+ static_file = self._get_static_dir() / "index.html"
349
+ if static_file.exists():
350
+ return static_file.read_text(encoding="utf-8")
351
+ else:
352
+ # Fallback error message if file is missing
353
+ return """<!DOCTYPE html>
354
+ <html>
355
+ <head><title>Error</title></head>
356
+ <body style="background:#1a1a1a;color:#fff;font-family:sans-serif;padding:2rem;">
357
+ <h1>Frontend assets not found</h1>
358
+ <p>The static/index.html file is missing from the instrumental_review module.</p>
359
+ </body>
360
+ </html>"""
361
+
362
+ @staticmethod
363
+ def _is_port_available(host: str, port: int) -> bool:
364
+ """Check if a port is available for binding."""
365
+ try:
366
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
367
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
368
+ sock.bind((host, port))
369
+ return True
370
+ except OSError:
371
+ return False
372
+
373
+ @staticmethod
374
+ def _find_available_port(host: str, preferred_port: int, max_attempts: int = 100) -> int:
375
+ """
376
+ Find an available port, starting with the preferred port.
377
+
378
+ Args:
379
+ host: Host to bind to
380
+ preferred_port: The preferred port to try first
381
+ max_attempts: Maximum number of ports to try
382
+
383
+ Returns:
384
+ An available port number
385
+
386
+ Raises:
387
+ RuntimeError: If no available port could be found
388
+ """
389
+ # Try the preferred port first
390
+ if InstrumentalReviewServer._is_port_available(host, preferred_port):
391
+ return preferred_port
392
+
393
+ # Try subsequent ports
394
+ for offset in range(1, max_attempts):
395
+ port = preferred_port + offset
396
+ if port > 65535:
397
+ break
398
+ if InstrumentalReviewServer._is_port_available(host, port):
399
+ return port
400
+
401
+ # Last resort: let the OS assign a port
402
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
403
+ sock.bind((host, 0))
404
+ return sock.getsockname()[1]
405
+
406
+ def start_and_open_browser(self, port: int = 8765) -> str:
407
+ """
408
+ Start server, open browser, and block until selection is submitted.
409
+
410
+ Args:
411
+ port: Preferred port to run the server on. If unavailable, will
412
+ automatically find an available port.
413
+
414
+ Returns:
415
+ The user's selection ("clean", "with_backing", or "custom")
416
+ """
417
+ self._app = self._create_app()
418
+
419
+ # Find an available port (handles concurrent CLI instances)
420
+ host = "127.0.0.1"
421
+ actual_port = self._find_available_port(host, port)
422
+ if actual_port != port:
423
+ logger.info(f"Port {port} in use, using port {actual_port} instead")
424
+
425
+ # Run uvicorn in a separate thread
426
+ config = uvicorn.Config(
427
+ self._app,
428
+ host=host,
429
+ port=actual_port,
430
+ log_level="warning",
431
+ )
432
+ server = uvicorn.Server(config)
433
+
434
+ def run_server():
435
+ server.run()
436
+
437
+ self._server_thread = threading.Thread(target=run_server, daemon=True)
438
+ self._server_thread.start()
439
+
440
+ # Wait a moment for server to start
441
+ import time
442
+ time.sleep(0.5)
443
+
444
+ url = f"http://localhost:{actual_port}/"
445
+ logger.info(f"Instrumental review server started at {url}")
446
+
447
+ # Open browser
448
+ webbrowser.open(url)
449
+
450
+ # Wait for selection
451
+ logger.info("Waiting for instrumental selection...")
452
+ self._selection_event.wait()
453
+
454
+ # Give a moment for response to be sent
455
+ time.sleep(0.5)
456
+
457
+ return self.get_selection()
458
+
459
+ def stop(self) -> None:
460
+ """Stop the server."""
461
+ self._shutdown_event.set()
462
+
463
+ def get_selection(self) -> str:
464
+ """Get the user's selection."""
465
+ if self.selection is None:
466
+ raise ValueError("No selection has been made")
467
+ return self.selection
468
+
469
+ def get_custom_instrumental_path(self) -> Optional[str]:
470
+ """Get the path to the custom instrumental if one was created."""
471
+ return self.custom_instrumental_path
472
+
473
+ def get_uploaded_instrumental_path(self) -> Optional[str]:
474
+ """Get the path to the uploaded instrumental if one was uploaded."""
475
+ return self.uploaded_instrumental_path