karaoke-gen 0.71.23__py3-none-any.whl → 0.71.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1181 @@
1
+ """
2
+ Local FastAPI server for instrumental review.
3
+
4
+ This module provides a local HTTP server that serves the instrumental review
5
+ UI for local CLI usage. It provides the same API endpoints as the cloud backend
6
+ to enable UI reuse.
7
+
8
+ Similar pattern to LyricsTranscriber's ReviewServer.
9
+ """
10
+
11
+ import logging
12
+ import os
13
+ import threading
14
+ import webbrowser
15
+ from typing import List, Optional
16
+
17
+ from fastapi import FastAPI, HTTPException
18
+ from fastapi.middleware.cors import CORSMiddleware
19
+ from fastapi.responses import FileResponse, HTMLResponse
20
+ from pydantic import BaseModel
21
+ import uvicorn
22
+
23
+ from karaoke_gen.instrumental_review import (
24
+ AnalysisResult,
25
+ AudioAnalyzer,
26
+ AudioEditor,
27
+ MuteRegion,
28
+ WaveformGenerator,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # Request/Response Models
35
+ class MuteRegionRequest(BaseModel):
36
+ start_seconds: float
37
+ end_seconds: float
38
+
39
+
40
+ class CreateCustomRequest(BaseModel):
41
+ mute_regions: List[MuteRegionRequest]
42
+
43
+
44
+ class SelectionRequest(BaseModel):
45
+ selection: str
46
+
47
+
48
+ class InstrumentalReviewServer:
49
+ """
50
+ Local FastAPI server for instrumental review UI.
51
+
52
+ This server provides a web interface for reviewing and selecting
53
+ instrumental tracks in the local CLI workflow. It serves the same
54
+ API endpoints as the cloud backend to enable UI reuse.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ output_dir: str,
60
+ base_name: str,
61
+ analysis: AnalysisResult,
62
+ waveform_path: str,
63
+ backing_vocals_path: str,
64
+ clean_instrumental_path: str,
65
+ with_backing_path: Optional[str] = None,
66
+ ):
67
+ """
68
+ Initialize the review server.
69
+
70
+ Args:
71
+ output_dir: Directory containing the audio files
72
+ base_name: Base name for output files (e.g., "Artist - Title")
73
+ analysis: Analysis result from AudioAnalyzer
74
+ waveform_path: Path to the waveform image
75
+ backing_vocals_path: Path to the backing vocals audio file
76
+ clean_instrumental_path: Path to the clean instrumental audio file
77
+ with_backing_path: Path to the instrumental with backing vocals
78
+ """
79
+ self.output_dir = output_dir
80
+ self.base_name = base_name
81
+ self.analysis = analysis
82
+ self.waveform_path = waveform_path
83
+ self.backing_vocals_path = backing_vocals_path
84
+ self.clean_instrumental_path = clean_instrumental_path
85
+ self.with_backing_path = with_backing_path
86
+ self.custom_instrumental_path: Optional[str] = None
87
+ self.selection: Optional[str] = None
88
+
89
+ self._app: Optional[FastAPI] = None
90
+ self._server_thread: Optional[threading.Thread] = None
91
+ self._selection_event = threading.Event()
92
+ self._shutdown_event = threading.Event()
93
+
94
+ def _create_app(self) -> FastAPI:
95
+ """Create and configure the FastAPI application."""
96
+ app = FastAPI(title="Instrumental Review", docs_url=None, redoc_url=None)
97
+
98
+ # Configure CORS
99
+ app.add_middleware(
100
+ CORSMiddleware,
101
+ allow_origins=["*"],
102
+ allow_credentials=True,
103
+ allow_methods=["*"],
104
+ allow_headers=["*"],
105
+ )
106
+
107
+ # Register routes
108
+ self._register_routes(app)
109
+
110
+ return app
111
+
112
+ def _register_routes(self, app: FastAPI) -> None:
113
+ """Register API routes."""
114
+
115
+ @app.get("/")
116
+ async def serve_frontend():
117
+ """Serve the frontend HTML."""
118
+ return HTMLResponse(content=self._get_frontend_html())
119
+
120
+ @app.get("/api/jobs/local/instrumental-analysis")
121
+ async def get_analysis():
122
+ """Get analysis data for the instrumental review."""
123
+ return {
124
+ "job_id": "local",
125
+ "artist": self.base_name.split(" - ")[0] if " - " in self.base_name else "",
126
+ "title": self.base_name.split(" - ")[1] if " - " in self.base_name else self.base_name,
127
+ "status": "awaiting_instrumental_selection",
128
+ "analysis": {
129
+ "has_audible_content": self.analysis.has_audible_content,
130
+ "total_duration_seconds": self.analysis.total_duration_seconds,
131
+ "audible_segments": [
132
+ {
133
+ "start_seconds": seg.start_seconds,
134
+ "end_seconds": seg.end_seconds,
135
+ "duration_seconds": seg.duration_seconds,
136
+ "avg_amplitude_db": seg.avg_amplitude_db,
137
+ "peak_amplitude_db": seg.peak_amplitude_db,
138
+ }
139
+ for seg in self.analysis.audible_segments
140
+ ],
141
+ "recommended_selection": self.analysis.recommended_selection.value,
142
+ "total_audible_duration_seconds": self.analysis.total_audible_duration_seconds,
143
+ "audible_percentage": self.analysis.audible_percentage,
144
+ "silence_threshold_db": self.analysis.silence_threshold_db,
145
+ },
146
+ "audio_urls": {
147
+ "clean_instrumental": "/api/audio/clean_instrumental" if self.clean_instrumental_path else None,
148
+ "backing_vocals": "/api/audio/backing_vocals" if self.backing_vocals_path else None,
149
+ "with_backing": "/api/audio/with_backing" if self.with_backing_path else None,
150
+ "custom_instrumental": "/api/audio/custom_instrumental" if self.custom_instrumental_path else None,
151
+ },
152
+ "waveform_url": "/api/waveform" if self.waveform_path else None,
153
+ "has_custom_instrumental": self.custom_instrumental_path is not None,
154
+ }
155
+
156
+ @app.get("/api/jobs/local/waveform-data")
157
+ async def get_waveform_data(num_points: int = 600):
158
+ """Get waveform amplitude data for client-side rendering."""
159
+ # Validate num_points parameter
160
+ if num_points <= 0 or num_points > 10000:
161
+ raise HTTPException(
162
+ status_code=400,
163
+ detail="num_points must be between 1 and 10000"
164
+ )
165
+
166
+ if not self.backing_vocals_path or not os.path.exists(self.backing_vocals_path):
167
+ raise HTTPException(status_code=404, detail="Backing vocals file not found")
168
+
169
+ try:
170
+ generator = WaveformGenerator()
171
+ amplitudes, duration = generator.generate_data_only(self.backing_vocals_path, num_points)
172
+ return {"amplitudes": amplitudes, "duration": duration}
173
+ except Exception as e:
174
+ logger.exception(f"Error generating waveform data: {e}")
175
+ raise HTTPException(status_code=500, detail=str(e)) from e
176
+
177
+ @app.get("/api/audio/{stem_type}")
178
+ async def stream_audio(stem_type: str):
179
+ """Stream audio file."""
180
+ path_map = {
181
+ "clean_instrumental": self.clean_instrumental_path,
182
+ "backing_vocals": self.backing_vocals_path,
183
+ "with_backing": self.with_backing_path,
184
+ "custom_instrumental": self.custom_instrumental_path,
185
+ }
186
+
187
+ audio_path = path_map.get(stem_type)
188
+ if not audio_path or not os.path.exists(audio_path):
189
+ raise HTTPException(status_code=404, detail=f"Audio file not found: {stem_type}")
190
+
191
+ # Determine content type
192
+ ext = os.path.splitext(audio_path)[1].lower()
193
+ content_types = {
194
+ ".flac": "audio/flac",
195
+ ".mp3": "audio/mpeg",
196
+ ".wav": "audio/wav",
197
+ }
198
+ content_type = content_types.get(ext, "application/octet-stream")
199
+
200
+ return FileResponse(audio_path, media_type=content_type)
201
+
202
+ @app.get("/api/waveform")
203
+ async def get_waveform_image():
204
+ """Serve waveform image."""
205
+ if not self.waveform_path or not os.path.exists(self.waveform_path):
206
+ raise HTTPException(status_code=404, detail="Waveform image not found")
207
+ return FileResponse(self.waveform_path, media_type="image/png")
208
+
209
+ @app.post("/api/jobs/local/create-custom-instrumental")
210
+ async def create_custom_instrumental(request: CreateCustomRequest):
211
+ """Create a custom instrumental with muted regions."""
212
+ if not request.mute_regions:
213
+ raise HTTPException(status_code=400, detail="No mute regions provided")
214
+
215
+ try:
216
+ mute_regions = [
217
+ MuteRegion(
218
+ start_seconds=r.start_seconds,
219
+ end_seconds=r.end_seconds,
220
+ )
221
+ for r in request.mute_regions
222
+ ]
223
+
224
+ editor = AudioEditor()
225
+ output_path = os.path.join(
226
+ self.output_dir,
227
+ f"{self.base_name} (Instrumental Custom).flac"
228
+ )
229
+
230
+ result = editor.create_custom_instrumental(
231
+ clean_instrumental_path=self.clean_instrumental_path,
232
+ backing_vocals_path=self.backing_vocals_path,
233
+ mute_regions=mute_regions,
234
+ output_path=output_path,
235
+ )
236
+
237
+ self.custom_instrumental_path = result.output_path
238
+
239
+ return {
240
+ "status": "success",
241
+ "custom_instrumental_url": "/api/audio/custom_instrumental",
242
+ "statistics": {
243
+ "mute_regions_applied": len(result.mute_regions_applied),
244
+ "total_muted_duration_seconds": result.total_muted_duration_seconds,
245
+ "output_duration_seconds": result.output_duration_seconds,
246
+ },
247
+ }
248
+ except Exception as e:
249
+ logger.exception(f"Error creating custom instrumental: {e}")
250
+ raise HTTPException(status_code=500, detail=str(e)) from e
251
+
252
+ @app.post("/api/jobs/local/select-instrumental")
253
+ async def select_instrumental(request: SelectionRequest):
254
+ """Submit instrumental selection."""
255
+ if request.selection not in ("clean", "with_backing", "custom"):
256
+ raise HTTPException(status_code=400, detail=f"Invalid selection: {request.selection}")
257
+
258
+ self.selection = request.selection
259
+ self._selection_event.set()
260
+
261
+ return {"status": "success", "selection": request.selection}
262
+
263
+ def _get_frontend_html(self) -> str:
264
+ """Return the complete frontend HTML with all features."""
265
+ return '''<!DOCTYPE html>
266
+ <html lang="en">
267
+ <head>
268
+ <meta charset="UTF-8">
269
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
270
+ <title>Instrumental Review</title>
271
+ <style>
272
+ :root {
273
+ --bg: #0a0a0a;
274
+ --card: #18181b;
275
+ --card-border: #27272a;
276
+ --text: #fafafa;
277
+ --text-muted: #a1a1aa;
278
+ --primary: #3b82f6;
279
+ --primary-hover: #2563eb;
280
+ --secondary: #27272a;
281
+ --secondary-hover: #3f3f46;
282
+ --success: #22c55e;
283
+ --warning: #eab308;
284
+ --danger: #ef4444;
285
+ --badge-bg: #27272a;
286
+ }
287
+
288
+ * { box-sizing: border-box; margin: 0; padding: 0; }
289
+
290
+ body {
291
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
292
+ background: var(--bg);
293
+ color: var(--text);
294
+ line-height: 1.6;
295
+ min-height: 100vh;
296
+ }
297
+
298
+ .container {
299
+ max-width: 1200px;
300
+ margin: 0 auto;
301
+ padding: 2rem;
302
+ }
303
+
304
+ header {
305
+ background: var(--card);
306
+ border-bottom: 1px solid var(--card-border);
307
+ padding: 1rem 2rem;
308
+ margin-bottom: 2rem;
309
+ }
310
+
311
+ header h1 {
312
+ font-size: 1.25rem;
313
+ font-weight: 600;
314
+ }
315
+
316
+ .page-title {
317
+ font-size: 1.875rem;
318
+ font-weight: 700;
319
+ margin-bottom: 0.5rem;
320
+ }
321
+
322
+ .page-subtitle {
323
+ color: var(--text-muted);
324
+ margin-bottom: 2rem;
325
+ }
326
+
327
+ .card {
328
+ background: var(--card);
329
+ border: 1px solid var(--card-border);
330
+ border-radius: 0.5rem;
331
+ margin-bottom: 1.5rem;
332
+ }
333
+
334
+ .card-header {
335
+ padding: 1.25rem 1.5rem;
336
+ border-bottom: 1px solid var(--card-border);
337
+ }
338
+
339
+ .card-title {
340
+ font-size: 1.125rem;
341
+ font-weight: 600;
342
+ display: flex;
343
+ align-items: center;
344
+ gap: 0.5rem;
345
+ }
346
+
347
+ .card-description {
348
+ color: var(--text-muted);
349
+ font-size: 0.875rem;
350
+ margin-top: 0.25rem;
351
+ }
352
+
353
+ .card-content {
354
+ padding: 1.5rem;
355
+ }
356
+
357
+ .badge {
358
+ display: inline-flex;
359
+ align-items: center;
360
+ padding: 0.25rem 0.75rem;
361
+ border-radius: 9999px;
362
+ font-size: 0.75rem;
363
+ font-weight: 500;
364
+ background: var(--badge-bg);
365
+ border: 1px solid var(--card-border);
366
+ }
367
+
368
+ .badge-success { background: rgba(34, 197, 94, 0.2); color: var(--success); border-color: var(--success); }
369
+ .badge-warning { background: rgba(234, 179, 8, 0.2); color: var(--warning); border-color: var(--warning); }
370
+
371
+ .flex { display: flex; }
372
+ .flex-wrap { flex-wrap: wrap; }
373
+ .gap-2 { gap: 0.5rem; }
374
+ .gap-4 { gap: 1rem; }
375
+ .items-center { align-items: center; }
376
+ .justify-between { justify-content: space-between; }
377
+
378
+ .grid { display: grid; }
379
+ .grid-cols-2 { grid-template-columns: repeat(2, 1fr); }
380
+ @media (max-width: 1024px) { .grid-cols-2 { grid-template-columns: 1fr; } }
381
+
382
+ .btn {
383
+ display: inline-flex;
384
+ align-items: center;
385
+ justify-content: center;
386
+ padding: 0.5rem 1rem;
387
+ border-radius: 0.375rem;
388
+ font-size: 0.875rem;
389
+ font-weight: 500;
390
+ cursor: pointer;
391
+ border: none;
392
+ transition: background 0.2s;
393
+ }
394
+
395
+ .btn-primary {
396
+ background: var(--primary);
397
+ color: white;
398
+ }
399
+ .btn-primary:hover { background: var(--primary-hover); }
400
+ .btn-primary:disabled { opacity: 0.5; cursor: not-allowed; }
401
+
402
+ .btn-secondary {
403
+ background: var(--secondary);
404
+ color: var(--text);
405
+ }
406
+ .btn-secondary:hover { background: var(--secondary-hover); }
407
+
408
+ .btn-outline {
409
+ background: transparent;
410
+ border: 1px solid var(--card-border);
411
+ color: var(--text);
412
+ }
413
+ .btn-outline:hover { background: var(--secondary); }
414
+ .btn-outline.active { background: var(--primary); border-color: var(--primary); }
415
+
416
+ .btn-sm { padding: 0.375rem 0.75rem; font-size: 0.8125rem; }
417
+ .btn-lg { padding: 0.75rem 1.5rem; font-size: 1rem; }
418
+
419
+ .btn-danger { background: var(--danger); color: white; }
420
+ .btn-success { background: var(--success); color: white; }
421
+
422
+ /* Waveform */
423
+ #waveform-container {
424
+ position: relative;
425
+ background: #1a1a2e;
426
+ border-radius: 0.5rem;
427
+ overflow: hidden;
428
+ }
429
+
430
+ #waveform-canvas {
431
+ display: block;
432
+ width: 100%;
433
+ cursor: crosshair;
434
+ }
435
+
436
+ .time-axis {
437
+ display: flex;
438
+ justify-content: space-between;
439
+ padding: 0.5rem 1rem;
440
+ font-size: 0.75rem;
441
+ color: var(--text-muted);
442
+ background: rgba(0,0,0,0.3);
443
+ }
444
+
445
+ /* Audio Player */
446
+ .audio-player {
447
+ background: var(--secondary);
448
+ padding: 1rem;
449
+ border-radius: 0.5rem;
450
+ }
451
+
452
+ .audio-player audio {
453
+ width: 100%;
454
+ }
455
+
456
+ .audio-label {
457
+ font-size: 0.875rem;
458
+ color: var(--text-muted);
459
+ margin-bottom: 0.5rem;
460
+ }
461
+
462
+ /* Region List */
463
+ .region-list {
464
+ max-height: 200px;
465
+ overflow-y: auto;
466
+ }
467
+
468
+ .region-item {
469
+ display: flex;
470
+ align-items: center;
471
+ justify-content: space-between;
472
+ padding: 0.75rem;
473
+ background: var(--secondary);
474
+ border-radius: 0.375rem;
475
+ margin-bottom: 0.5rem;
476
+ }
477
+
478
+ .region-item:last-child { margin-bottom: 0; }
479
+
480
+ /* Radio Group */
481
+ .radio-group { display: flex; flex-direction: column; gap: 1rem; }
482
+
483
+ .radio-item {
484
+ display: flex;
485
+ align-items: flex-start;
486
+ gap: 0.75rem;
487
+ padding: 1rem;
488
+ background: var(--secondary);
489
+ border-radius: 0.5rem;
490
+ cursor: pointer;
491
+ border: 2px solid transparent;
492
+ transition: border-color 0.2s;
493
+ }
494
+
495
+ .radio-item:hover { border-color: var(--card-border); }
496
+ .radio-item.selected { border-color: var(--primary); }
497
+
498
+ .radio-item input { margin-top: 0.25rem; }
499
+ .radio-item-content { flex: 1; }
500
+ .radio-item-title { font-weight: 500; }
501
+ .radio-item-desc { font-size: 0.875rem; color: var(--text-muted); }
502
+
503
+ /* Alert */
504
+ .alert {
505
+ padding: 1rem;
506
+ border-radius: 0.5rem;
507
+ margin-bottom: 1.5rem;
508
+ }
509
+
510
+ .alert-error {
511
+ background: rgba(239, 68, 68, 0.2);
512
+ border: 1px solid var(--danger);
513
+ color: var(--danger);
514
+ }
515
+
516
+ .alert-success {
517
+ background: rgba(34, 197, 94, 0.2);
518
+ border: 1px solid var(--success);
519
+ color: var(--success);
520
+ }
521
+
522
+ /* Loading */
523
+ .loading {
524
+ display: flex;
525
+ align-items: center;
526
+ justify-content: center;
527
+ padding: 4rem;
528
+ }
529
+
530
+ .spinner {
531
+ width: 2rem;
532
+ height: 2rem;
533
+ border: 3px solid var(--card-border);
534
+ border-top-color: var(--primary);
535
+ border-radius: 50%;
536
+ animation: spin 1s linear infinite;
537
+ }
538
+
539
+ @keyframes spin { to { transform: rotate(360deg); } }
540
+
541
+ /* Segment markers */
542
+ .segment-marker {
543
+ position: absolute;
544
+ background: rgba(234, 179, 8, 0.3);
545
+ border-left: 1px solid var(--warning);
546
+ border-right: 1px solid var(--warning);
547
+ pointer-events: none;
548
+ }
549
+
550
+ .mute-region {
551
+ position: absolute;
552
+ background: rgba(239, 68, 68, 0.4);
553
+ border: 1px dashed var(--danger);
554
+ pointer-events: none;
555
+ }
556
+
557
+ .playhead {
558
+ position: absolute;
559
+ width: 2px;
560
+ background: var(--primary);
561
+ pointer-events: none;
562
+ }
563
+
564
+ .hidden { display: none !important; }
565
+ </style>
566
+ </head>
567
+ <body>
568
+ <header>
569
+ <h1>🎤 Karaoke Generator</h1>
570
+ </header>
571
+
572
+ <main class="container" id="main-content">
573
+ <div class="loading" id="loading">
574
+ <div class="spinner"></div>
575
+ </div>
576
+ </main>
577
+
578
+ <script>
579
+ // State
580
+ let analysisData = null;
581
+ let waveformData = null;
582
+ let muteRegions = [];
583
+ let currentTime = 0;
584
+ let isSelectionMode = false;
585
+ let selectionStart = null;
586
+ let activeAudio = 'backing';
587
+ let selectedOption = 'clean';
588
+ let hasCustom = false;
589
+
590
+ const API_BASE = '/api/jobs/local';
591
+
592
+ // Initialize
593
+ async function init() {
594
+ try {
595
+ // Fetch analysis
596
+ const analysisRes = await fetch(`${API_BASE}/instrumental-analysis`);
597
+ if (!analysisRes.ok) throw new Error('Failed to load analysis');
598
+ analysisData = await analysisRes.json();
599
+
600
+ // Fetch waveform data
601
+ const waveformRes = await fetch(`${API_BASE}/waveform-data?num_points=800`);
602
+ if (waveformRes.ok) {
603
+ waveformData = await waveformRes.json();
604
+ }
605
+
606
+ // Set initial selection based on recommendation
607
+ selectedOption = analysisData.analysis.recommended_selection === 'clean' ? 'clean' : 'with_backing';
608
+
609
+ render();
610
+ } catch (error) {
611
+ showError(error.message);
612
+ }
613
+ }
614
+
615
+ function render() {
616
+ const main = document.getElementById('main-content');
617
+ main.innerHTML = `
618
+ <h1 class="page-title">Select Instrumental Track</h1>
619
+ <p class="page-subtitle">${analysisData.artist || ''} ${analysisData.artist && analysisData.title ? '-' : ''} ${analysisData.title || ''}</p>
620
+
621
+ <div id="error-container"></div>
622
+
623
+ <!-- Analysis Summary -->
624
+ <div class="card">
625
+ <div class="card-header">
626
+ <div class="card-title">
627
+ <span>🎵</span> Backing Vocals Analysis
628
+ </div>
629
+ <div class="card-description">Automated analysis of the backing vocals stem</div>
630
+ </div>
631
+ <div class="card-content">
632
+ <div class="flex flex-wrap gap-4 items-center">
633
+ <div class="flex items-center gap-2">
634
+ ${analysisData.analysis.has_audible_content
635
+ ? '<span style="color: var(--warning)">🔊</span> Backing vocals detected'
636
+ : '<span style="color: var(--success)">🔇</span> No backing vocals detected'}
637
+ </div>
638
+ ${analysisData.analysis.has_audible_content ? `
639
+ <span class="badge">${analysisData.analysis.audible_segments.length} segments</span>
640
+ <span class="badge">${analysisData.analysis.audible_percentage.toFixed(1)}% of track</span>
641
+ <span class="badge">${analysisData.analysis.total_audible_duration_seconds.toFixed(1)}s total</span>
642
+ ` : ''}
643
+ <span class="badge ${analysisData.analysis.recommended_selection === 'clean' ? 'badge-success' : 'badge-warning'}">
644
+ Recommended: ${analysisData.analysis.recommended_selection === 'clean' ? 'Clean Instrumental' : 'Review Needed'}
645
+ </span>
646
+ </div>
647
+ </div>
648
+ </div>
649
+
650
+ <!-- Waveform -->
651
+ ${waveformData ? `
652
+ <div class="card">
653
+ <div class="card-header">
654
+ <div class="card-title">Backing Vocals Waveform</div>
655
+ <div class="card-description">
656
+ Click to seek • ${isSelectionMode ? 'Click and drag to select mute region' : 'Click "Add Mute Region" to start selecting'}
657
+ </div>
658
+ </div>
659
+ <div class="card-content">
660
+ <div id="waveform-container">
661
+ <canvas id="waveform-canvas" width="800" height="150"></canvas>
662
+ <div id="segments-overlay"></div>
663
+ <div id="mute-overlay"></div>
664
+ <div id="playhead" class="playhead hidden"></div>
665
+ </div>
666
+ <div class="time-axis">
667
+ <span>0:00</span>
668
+ <span>${formatTime(waveformData.duration / 4)}</span>
669
+ <span>${formatTime(waveformData.duration / 2)}</span>
670
+ <span>${formatTime(waveformData.duration * 3 / 4)}</span>
671
+ <span>${formatTime(waveformData.duration)}</span>
672
+ </div>
673
+ </div>
674
+ </div>
675
+ ` : ''}
676
+
677
+ <!-- Audio & Regions Grid -->
678
+ <div class="grid grid-cols-2 gap-4">
679
+ <!-- Audio Player -->
680
+ <div class="card">
681
+ <div class="card-header">
682
+ <div class="card-title">Audio Preview</div>
683
+ <div class="card-description">Listen to different instrumental options</div>
684
+ </div>
685
+ <div class="card-content">
686
+ <div class="flex flex-wrap gap-2" style="margin-bottom: 1rem;">
687
+ <button class="btn btn-sm ${activeAudio === 'backing' ? 'btn-primary' : 'btn-outline'}" onclick="setActiveAudio('backing')">Backing Vocals</button>
688
+ <button class="btn btn-sm ${activeAudio === 'clean' ? 'btn-primary' : 'btn-outline'}" onclick="setActiveAudio('clean')">Clean Instrumental</button>
689
+ ${analysisData.audio_urls.with_backing ? `
690
+ <button class="btn btn-sm ${activeAudio === 'with_backing' ? 'btn-primary' : 'btn-outline'}" onclick="setActiveAudio('with_backing')">With Backing</button>
691
+ ` : ''}
692
+ ${hasCustom ? `
693
+ <button class="btn btn-sm ${activeAudio === 'custom' ? 'btn-primary' : 'btn-outline'}" onclick="setActiveAudio('custom')">Custom</button>
694
+ ` : ''}
695
+ </div>
696
+ <div class="audio-player">
697
+ <div class="audio-label">${getAudioLabel()}</div>
698
+ <audio id="audio-player" controls src="${getAudioUrl()}" ontimeupdate="onTimeUpdate(this)"></audio>
699
+ </div>
700
+ </div>
701
+ </div>
702
+
703
+ <!-- Region Selector -->
704
+ <div class="card">
705
+ <div class="card-header">
706
+ <div class="card-title">Mute Regions</div>
707
+ <div class="card-description">Select sections of backing vocals to mute</div>
708
+ </div>
709
+ <div class="card-content">
710
+ <div class="flex gap-2" style="margin-bottom: 1rem;">
711
+ <button class="btn btn-sm ${isSelectionMode ? 'btn-primary' : 'btn-outline'}" onclick="toggleSelectionMode()">
712
+ ${isSelectionMode ? '✓ Selecting...' : '+ Add Mute Region'}
713
+ </button>
714
+ ${muteRegions.length > 0 ? `
715
+ <button class="btn btn-sm btn-outline" onclick="clearAllRegions()">Clear All</button>
716
+ ` : ''}
717
+ </div>
718
+
719
+ ${muteRegions.length > 0 ? `
720
+ <div class="region-list">
721
+ ${muteRegions.map((r, i) => `
722
+ <div class="region-item">
723
+ <span>${formatTime(r.start_seconds)} - ${formatTime(r.end_seconds)} (${(r.end_seconds - r.start_seconds).toFixed(1)}s)</span>
724
+ <div class="flex gap-2">
725
+ <button class="btn btn-sm btn-outline" onclick="seekTo(${r.start_seconds})">▶</button>
726
+ <button class="btn btn-sm btn-danger" onclick="removeRegion(${i})">×</button>
727
+ </div>
728
+ </div>
729
+ `).join('')}
730
+ </div>
731
+ ` : `
732
+ <p style="color: var(--text-muted); font-size: 0.875rem;">
733
+ No mute regions selected. Click "Add Mute Region" then click and drag on the waveform to select sections to mute.
734
+ </p>
735
+ `}
736
+
737
+ ${analysisData.analysis.audible_segments.length > 0 ? `
738
+ <div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--card-border);">
739
+ <div style="font-size: 0.875rem; color: var(--text-muted); margin-bottom: 0.5rem;">Quick mute detected segments:</div>
740
+ <div class="flex flex-wrap gap-2">
741
+ ${analysisData.analysis.audible_segments.slice(0, 5).map((seg, i) => `
742
+ <button class="btn btn-sm btn-outline" onclick="addSegmentAsRegion(${i})">
743
+ ${formatTime(seg.start_seconds)} - ${formatTime(seg.end_seconds)}
744
+ </button>
745
+ `).join('')}
746
+ ${analysisData.analysis.audible_segments.length > 5 ? `<span class="badge">+${analysisData.analysis.audible_segments.length - 5} more</span>` : ''}
747
+ </div>
748
+ </div>
749
+ ` : ''}
750
+ </div>
751
+ </div>
752
+ </div>
753
+
754
+ <!-- Create Custom Button -->
755
+ ${muteRegions.length > 0 && !hasCustom ? `
756
+ <div class="card">
757
+ <div class="card-content">
758
+ <div class="flex items-center justify-between">
759
+ <div>
760
+ <p style="font-weight: 500;">Ready to create custom instrumental</p>
761
+ <p style="font-size: 0.875rem; color: var(--text-muted);">${muteRegions.length} region${muteRegions.length > 1 ? 's' : ''} will be muted</p>
762
+ </div>
763
+ <button class="btn btn-primary" id="create-custom-btn" onclick="createCustomInstrumental()">
764
+ Create Custom Instrumental
765
+ </button>
766
+ </div>
767
+ </div>
768
+ </div>
769
+ ` : ''}
770
+
771
+ <!-- Selection Options -->
772
+ <div class="card">
773
+ <div class="card-header">
774
+ <div class="card-title">Final Selection</div>
775
+ <div class="card-description">Choose which instrumental to use for your karaoke video</div>
776
+ </div>
777
+ <div class="card-content">
778
+ <div class="radio-group">
779
+ <label class="radio-item ${selectedOption === 'clean' ? 'selected' : ''}" onclick="setSelection('clean')">
780
+ <input type="radio" name="selection" value="clean" ${selectedOption === 'clean' ? 'checked' : ''}>
781
+ <div class="radio-item-content">
782
+ <div class="radio-item-title">Clean Instrumental</div>
783
+ <div class="radio-item-desc">Use the instrumental with no backing vocals at all</div>
784
+ </div>
785
+ ${analysisData.analysis.recommended_selection === 'clean' ? '<span class="badge badge-success">✓ Recommended</span>' : ''}
786
+ </label>
787
+
788
+ <label class="radio-item ${selectedOption === 'with_backing' ? 'selected' : ''}" onclick="setSelection('with_backing')">
789
+ <input type="radio" name="selection" value="with_backing" ${selectedOption === 'with_backing' ? 'checked' : ''}>
790
+ <div class="radio-item-content">
791
+ <div class="radio-item-title">Instrumental with Backing Vocals</div>
792
+ <div class="radio-item-desc">Use the instrumental with all backing vocals included</div>
793
+ </div>
794
+ </label>
795
+
796
+ ${hasCustom ? `
797
+ <label class="radio-item ${selectedOption === 'custom' ? 'selected' : ''}" onclick="setSelection('custom')">
798
+ <input type="radio" name="selection" value="custom" ${selectedOption === 'custom' ? 'checked' : ''}>
799
+ <div class="radio-item-content">
800
+ <div class="radio-item-title">Custom Instrumental</div>
801
+ <div class="radio-item-desc">Use your custom instrumental with ${muteRegions.length} muted region${muteRegions.length > 1 ? 's' : ''}</div>
802
+ </div>
803
+ <span class="badge">Custom</span>
804
+ </label>
805
+ ` : ''}
806
+ </div>
807
+ </div>
808
+ </div>
809
+
810
+ <!-- Submit Button -->
811
+ <button class="btn btn-primary btn-lg" id="submit-btn" onclick="submitSelection()" style="width: 100%; max-width: 400px; margin-top: 1rem;">
812
+ ✓ Confirm Selection & Continue
813
+ </button>
814
+ `;
815
+
816
+ // Initialize waveform after render
817
+ if (waveformData) {
818
+ setTimeout(drawWaveform, 0);
819
+ setupWaveformInteraction();
820
+ }
821
+ }
822
+
823
+ function drawWaveform() {
824
+ const canvas = document.getElementById('waveform-canvas');
825
+ if (!canvas || !waveformData) return;
826
+
827
+ const ctx = canvas.getContext('2d');
828
+ const { amplitudes, duration } = waveformData;
829
+ const width = canvas.width;
830
+ const height = canvas.height;
831
+ const centerY = height / 2;
832
+
833
+ // Clear canvas
834
+ ctx.fillStyle = '#1a1a2e';
835
+ ctx.fillRect(0, 0, width, height);
836
+
837
+ // Draw threshold line
838
+ ctx.strokeStyle = 'rgba(255, 255, 255, 0.2)';
839
+ ctx.setLineDash([5, 5]);
840
+ ctx.beginPath();
841
+ ctx.moveTo(0, centerY);
842
+ ctx.lineTo(width, centerY);
843
+ ctx.stroke();
844
+ ctx.setLineDash([]);
845
+
846
+ // Draw waveform
847
+ const barWidth = width / amplitudes.length;
848
+
849
+ amplitudes.forEach((amp, i) => {
850
+ const x = i * barWidth;
851
+ const barHeight = Math.max(2, amp * height);
852
+ const y = centerY - barHeight / 2;
853
+
854
+ // Check if this point is in a muted region
855
+ const time = (i / amplitudes.length) * duration;
856
+ const inMuteRegion = muteRegions.some(r => time >= r.start_seconds && time <= r.end_seconds);
857
+
858
+ // Check if in audible segment
859
+ const inAudibleSegment = analysisData.analysis.audible_segments.some(
860
+ s => time >= s.start_seconds && time <= s.end_seconds
861
+ );
862
+
863
+ if (inMuteRegion) {
864
+ ctx.fillStyle = '#ef4444';
865
+ } else if (inAudibleSegment) {
866
+ ctx.fillStyle = '#ec4899';
867
+ } else {
868
+ ctx.fillStyle = '#60a5fa';
869
+ }
870
+
871
+ ctx.fillRect(x, y, Math.max(1, barWidth - 1), barHeight);
872
+ });
873
+
874
+ // Draw playhead
875
+ const playhead = document.getElementById('playhead');
876
+ if (playhead && currentTime > 0) {
877
+ const x = (currentTime / duration) * width;
878
+ playhead.style.left = `${x}px`;
879
+ playhead.style.height = `${height}px`;
880
+ playhead.classList.remove('hidden');
881
+ }
882
+ }
883
+
884
+ function setupWaveformInteraction() {
885
+ const canvas = document.getElementById('waveform-canvas');
886
+ if (!canvas) return;
887
+
888
+ let isDragging = false;
889
+ let dragStart = 0;
890
+
891
+ canvas.onmousedown = (e) => {
892
+ const rect = canvas.getBoundingClientRect();
893
+ const x = e.clientX - rect.left;
894
+ const time = (x / rect.width) * waveformData.duration;
895
+
896
+ if (isSelectionMode) {
897
+ isDragging = true;
898
+ dragStart = time;
899
+ selectionStart = time;
900
+ } else {
901
+ seekTo(time);
902
+ }
903
+ };
904
+
905
+ canvas.onmousemove = (e) => {
906
+ if (!isDragging || !isSelectionMode) return;
907
+ // Visual feedback could be added here
908
+ };
909
+
910
+ canvas.onmouseup = (e) => {
911
+ if (!isDragging || !isSelectionMode) return;
912
+
913
+ const rect = canvas.getBoundingClientRect();
914
+ const x = e.clientX - rect.left;
915
+ const time = (x / rect.width) * waveformData.duration;
916
+
917
+ const start = Math.min(dragStart, time);
918
+ const end = Math.max(dragStart, time);
919
+
920
+ if (end - start > 0.1) {
921
+ addRegion(start, end);
922
+ }
923
+
924
+ isDragging = false;
925
+ isSelectionMode = false;
926
+ render();
927
+ };
928
+
929
+ canvas.onmouseleave = () => {
930
+ if (isDragging) {
931
+ isDragging = false;
932
+ }
933
+ };
934
+ }
935
+
936
+ function formatTime(seconds) {
937
+ const mins = Math.floor(seconds / 60);
938
+ const secs = Math.floor(seconds % 60);
939
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
940
+ }
941
+
942
+ function getAudioUrl() {
943
+ const urls = {
944
+ backing: '/api/audio/backing_vocals',
945
+ clean: '/api/audio/clean_instrumental',
946
+ with_backing: '/api/audio/with_backing',
947
+ custom: '/api/audio/custom_instrumental'
948
+ };
949
+ return urls[activeAudio] || urls.backing;
950
+ }
951
+
952
+ function getAudioLabel() {
953
+ const labels = {
954
+ backing: 'Backing Vocals Only',
955
+ clean: 'Clean Instrumental',
956
+ with_backing: 'Instrumental + Backing Vocals',
957
+ custom: 'Custom Instrumental'
958
+ };
959
+ return labels[activeAudio] || 'Audio';
960
+ }
961
+
962
+ function setActiveAudio(type) {
963
+ activeAudio = type;
964
+ render();
965
+ }
966
+
967
+ function onTimeUpdate(audio) {
968
+ currentTime = audio.currentTime;
969
+ if (waveformData) {
970
+ const playhead = document.getElementById('playhead');
971
+ const canvas = document.getElementById('waveform-canvas');
972
+ if (playhead && canvas) {
973
+ const x = (currentTime / waveformData.duration) * canvas.width;
974
+ playhead.style.left = `${x}px`;
975
+ playhead.style.height = `${canvas.height}px`;
976
+ playhead.classList.remove('hidden');
977
+ }
978
+ }
979
+ }
980
+
981
+ function seekTo(time) {
982
+ const audio = document.getElementById('audio-player');
983
+ if (audio) {
984
+ audio.currentTime = time;
985
+ audio.play();
986
+ }
987
+ }
988
+
989
+ function toggleSelectionMode() {
990
+ isSelectionMode = !isSelectionMode;
991
+ render();
992
+ }
993
+
994
+ function addRegion(start, end) {
995
+ muteRegions.push({ start_seconds: start, end_seconds: end });
996
+ muteRegions.sort((a, b) => a.start_seconds - b.start_seconds);
997
+ // Merge overlapping
998
+ mergeOverlappingRegions();
999
+ }
1000
+
1001
+ function addSegmentAsRegion(index) {
1002
+ const seg = analysisData.analysis.audible_segments[index];
1003
+ if (seg) {
1004
+ addRegion(seg.start_seconds, seg.end_seconds);
1005
+ render();
1006
+ }
1007
+ }
1008
+
1009
+ function removeRegion(index) {
1010
+ muteRegions.splice(index, 1);
1011
+ render();
1012
+ }
1013
+
1014
+ function clearAllRegions() {
1015
+ muteRegions = [];
1016
+ hasCustom = false;
1017
+ render();
1018
+ }
1019
+
1020
+ function mergeOverlappingRegions() {
1021
+ if (muteRegions.length < 2) return;
1022
+
1023
+ const merged = [muteRegions[0]];
1024
+ for (let i = 1; i < muteRegions.length; i++) {
1025
+ const last = merged[merged.length - 1];
1026
+ const curr = muteRegions[i];
1027
+
1028
+ if (curr.start_seconds <= last.end_seconds) {
1029
+ last.end_seconds = Math.max(last.end_seconds, curr.end_seconds);
1030
+ } else {
1031
+ merged.push(curr);
1032
+ }
1033
+ }
1034
+ muteRegions = merged;
1035
+ }
1036
+
1037
+ function setSelection(value) {
1038
+ selectedOption = value;
1039
+ render();
1040
+ }
1041
+
1042
+ async function createCustomInstrumental() {
1043
+ const btn = document.getElementById('create-custom-btn');
1044
+ if (btn) {
1045
+ btn.disabled = true;
1046
+ btn.textContent = 'Creating...';
1047
+ }
1048
+
1049
+ try {
1050
+ const response = await fetch(`${API_BASE}/create-custom-instrumental`, {
1051
+ method: 'POST',
1052
+ headers: { 'Content-Type': 'application/json' },
1053
+ body: JSON.stringify({ mute_regions: muteRegions })
1054
+ });
1055
+
1056
+ if (!response.ok) {
1057
+ const data = await response.json();
1058
+ throw new Error(data.detail || 'Failed to create custom instrumental');
1059
+ }
1060
+
1061
+ hasCustom = true;
1062
+ selectedOption = 'custom';
1063
+ activeAudio = 'custom';
1064
+ render();
1065
+ } catch (error) {
1066
+ showError(error.message);
1067
+ if (btn) {
1068
+ btn.disabled = false;
1069
+ btn.textContent = 'Create Custom Instrumental';
1070
+ }
1071
+ }
1072
+ }
1073
+
1074
+ async function submitSelection() {
1075
+ const btn = document.getElementById('submit-btn');
1076
+ if (btn) {
1077
+ btn.disabled = true;
1078
+ btn.textContent = 'Submitting...';
1079
+ }
1080
+
1081
+ try {
1082
+ const response = await fetch(`${API_BASE}/select-instrumental`, {
1083
+ method: 'POST',
1084
+ headers: { 'Content-Type': 'application/json' },
1085
+ body: JSON.stringify({ selection: selectedOption })
1086
+ });
1087
+
1088
+ if (!response.ok) {
1089
+ const data = await response.json();
1090
+ throw new Error(data.detail || 'Failed to submit selection');
1091
+ }
1092
+
1093
+ // Show success message
1094
+ document.getElementById('main-content').innerHTML = `
1095
+ <div class="alert alert-success" style="max-width: 600px; margin: 4rem auto; text-align: center;">
1096
+ <h2 style="margin-bottom: 1rem;">✓ Selection Submitted!</h2>
1097
+ <p>You selected: <strong>${selectedOption === 'clean' ? 'Clean Instrumental' : selectedOption === 'with_backing' ? 'Instrumental with Backing Vocals' : 'Custom Instrumental'}</strong></p>
1098
+ <p style="margin-top: 1rem; color: var(--text-muted);">You can close this window now.</p>
1099
+ </div>
1100
+ `;
1101
+ } catch (error) {
1102
+ showError(error.message);
1103
+ if (btn) {
1104
+ btn.disabled = false;
1105
+ btn.textContent = '✓ Confirm Selection & Continue';
1106
+ }
1107
+ }
1108
+ }
1109
+
1110
+ function showError(message) {
1111
+ const container = document.getElementById('error-container');
1112
+ if (container) {
1113
+ container.innerHTML = `<div class="alert alert-error">${message}</div>`;
1114
+ }
1115
+ }
1116
+
1117
+ // Start
1118
+ init();
1119
+ </script>
1120
+ </body>
1121
+ </html>'''
1122
+
1123
+ def start_and_open_browser(self, port: int = 8765) -> str:
1124
+ """
1125
+ Start server, open browser, and block until selection is submitted.
1126
+
1127
+ Args:
1128
+ port: Port to run the server on
1129
+
1130
+ Returns:
1131
+ The user's selection ("clean", "with_backing", or "custom")
1132
+ """
1133
+ self._app = self._create_app()
1134
+
1135
+ # Run uvicorn in a separate thread
1136
+ config = uvicorn.Config(
1137
+ self._app,
1138
+ host="127.0.0.1",
1139
+ port=port,
1140
+ log_level="warning",
1141
+ )
1142
+ server = uvicorn.Server(config)
1143
+
1144
+ def run_server():
1145
+ server.run()
1146
+
1147
+ self._server_thread = threading.Thread(target=run_server, daemon=True)
1148
+ self._server_thread.start()
1149
+
1150
+ # Wait a moment for server to start
1151
+ import time
1152
+ time.sleep(0.5)
1153
+
1154
+ url = f"http://localhost:{port}/"
1155
+ logger.info(f"Instrumental review server started at {url}")
1156
+
1157
+ # Open browser
1158
+ webbrowser.open(url)
1159
+
1160
+ # Wait for selection
1161
+ logger.info("Waiting for instrumental selection...")
1162
+ self._selection_event.wait()
1163
+
1164
+ # Give a moment for response to be sent
1165
+ time.sleep(0.5)
1166
+
1167
+ return self.get_selection()
1168
+
1169
+ def stop(self) -> None:
1170
+ """Stop the server."""
1171
+ self._shutdown_event.set()
1172
+
1173
+ def get_selection(self) -> str:
1174
+ """Get the user's selection."""
1175
+ if self.selection is None:
1176
+ raise ValueError("No selection has been made")
1177
+ return self.selection
1178
+
1179
+ def get_custom_instrumental_path(self) -> Optional[str]:
1180
+ """Get the path to the custom instrumental if one was created."""
1181
+ return self.custom_instrumental_path