karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
backend/requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# DEPRECATED: This file is no longer used for Docker builds or CI.
|
|
2
|
+
# Docker and CI both use poetry/pyproject.toml directly.
|
|
3
|
+
# This file is kept for reference only.
|
|
4
|
+
|
|
5
|
+
fastapi>=0.104.0,<1.0.0
|
|
6
|
+
uvicorn[standard]>=0.24.0,<1.0.0
|
|
7
|
+
pydantic>=2.5.0,<3.0.0
|
|
8
|
+
pydantic-settings>=2.1.0,<3.0.0
|
|
9
|
+
python-multipart>=0.0.6,<1.0.0
|
|
10
|
+
httpx>=0.25.0,<1.0.0
|
|
11
|
+
google-cloud-firestore>=2.14.0,<3.0.0
|
|
12
|
+
google-cloud-storage>=2.14.0,<3.0.0
|
|
13
|
+
google-cloud-secret-manager>=2.18.0,<3.0.0
|
|
14
|
+
google-cloud-tasks>=2.14.0,<3.0.0
|
|
15
|
+
google-cloud-run>=0.10.0,<1.0.0
|
|
16
|
+
python-dotenv>=1.0.0,<2.0.0
|
|
17
|
+
|
|
18
|
+
# Audio processing (reuses karaoke_gen library)
|
|
19
|
+
audio-separator[remote]>=0.18.0 # Remote Modal API client
|
|
20
|
+
pydub>=0.25.1 # Audio manipulation (used by karaoke_gen)
|
|
21
|
+
yt-dlp>=2024.0.0 # YouTube downloads (future use)
|
|
22
|
+
|
|
23
|
+
# Cloud storage native APIs
|
|
24
|
+
dropbox>=11.0.0 # Dropbox Python SDK for native API access
|
|
25
|
+
# google-api-python-client already installed via karaoke_gen for YouTube
|
|
26
|
+
|
|
27
|
+
# karaoke-gen dependencies (will be installed from parent dir in Dockerfile)
|
|
28
|
+
# Using explicit dependencies instead of -e ../
|
|
29
|
+
|
backend/run_tests.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Backend Test Runner
|
|
3
|
+
# Runs integration tests against the deployed Cloud Run service
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
echo "🧪 Backend Integration Test Suite"
|
|
8
|
+
echo "=================================="
|
|
9
|
+
echo ""
|
|
10
|
+
|
|
11
|
+
# Check prerequisites
|
|
12
|
+
echo "Checking prerequisites..."
|
|
13
|
+
if ! command -v gcloud &> /dev/null; then
|
|
14
|
+
echo "❌ gcloud CLI not found. Please install Google Cloud SDK."
|
|
15
|
+
exit 1
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
if ! command -v pytest &> /dev/null; then
|
|
19
|
+
echo "❌ pytest not found. Installing test dependencies..."
|
|
20
|
+
pip install -r tests/requirements.txt
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
# Verify authentication
|
|
24
|
+
echo "Verifying authentication..."
|
|
25
|
+
if ! gcloud auth print-identity-token &> /dev/null; then
|
|
26
|
+
echo "❌ Not authenticated with gcloud. Run: gcloud auth login"
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
echo "✅ Prerequisites met"
|
|
31
|
+
echo ""
|
|
32
|
+
|
|
33
|
+
# Run tests
|
|
34
|
+
echo "Running integration tests..."
|
|
35
|
+
echo ""
|
|
36
|
+
|
|
37
|
+
# Run fast tests (no slow marker)
|
|
38
|
+
echo "📝 Running fast tests..."
|
|
39
|
+
pytest tests/test_api_integration.py -v -m "not slow" --tb=short
|
|
40
|
+
|
|
41
|
+
echo ""
|
|
42
|
+
echo "✅ Fast tests complete!"
|
|
43
|
+
echo ""
|
|
44
|
+
|
|
45
|
+
# Ask if user wants to run slow tests
|
|
46
|
+
read -p "Run slow/integration tests? These test actual job processing (5-10 min). [y/N] " -n 1 -r
|
|
47
|
+
echo
|
|
48
|
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
49
|
+
echo "🐌 Running slow integration tests..."
|
|
50
|
+
pytest tests/test_api_integration.py -v -m "slow" --tb=short
|
|
51
|
+
echo ""
|
|
52
|
+
echo "✅ All tests complete!"
|
|
53
|
+
else
|
|
54
|
+
echo "⏭️ Skipping slow tests"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
echo ""
|
|
58
|
+
echo "=================================="
|
|
59
|
+
echo "Test run complete! 🎉"
|
|
60
|
+
|
|
File without changes
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio analysis service for analyzing backing vocals.
|
|
3
|
+
|
|
4
|
+
This service wraps the shared karaoke_gen.instrumental_review module
|
|
5
|
+
to provide GCS-integrated audio analysis capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from backend.services.storage_service import StorageService
|
|
14
|
+
from karaoke_gen.instrumental_review import (
|
|
15
|
+
AudioAnalyzer,
|
|
16
|
+
AnalysisResult,
|
|
17
|
+
WaveformGenerator,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AudioAnalysisService:
|
|
25
|
+
"""
|
|
26
|
+
Service for analyzing backing vocals audio files stored in GCS.
|
|
27
|
+
|
|
28
|
+
This service acts as a thin wrapper around the shared AudioAnalyzer
|
|
29
|
+
and WaveformGenerator classes, handling GCS download/upload operations.
|
|
30
|
+
|
|
31
|
+
The actual analysis logic is in the shared karaoke_gen.instrumental_review
|
|
32
|
+
module, ensuring feature parity between local and remote workflows.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
storage_service: Optional[StorageService] = None,
|
|
38
|
+
silence_threshold_db: float = -40.0,
|
|
39
|
+
min_segment_duration_ms: int = 100,
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Initialize the audio analysis service.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
storage_service: GCS storage service. If not provided, a new
|
|
46
|
+
instance will be created.
|
|
47
|
+
silence_threshold_db: Threshold for considering audio as silent.
|
|
48
|
+
Default is -40.0 dB.
|
|
49
|
+
min_segment_duration_ms: Minimum duration for audible segments.
|
|
50
|
+
Default is 100ms.
|
|
51
|
+
"""
|
|
52
|
+
self.storage_service = storage_service or StorageService()
|
|
53
|
+
self.analyzer = AudioAnalyzer(
|
|
54
|
+
silence_threshold_db=silence_threshold_db,
|
|
55
|
+
min_segment_duration_ms=min_segment_duration_ms,
|
|
56
|
+
)
|
|
57
|
+
self.waveform_generator = WaveformGenerator()
|
|
58
|
+
|
|
59
|
+
def analyze_backing_vocals(
|
|
60
|
+
self,
|
|
61
|
+
gcs_audio_path: str,
|
|
62
|
+
job_id: str,
|
|
63
|
+
) -> AnalysisResult:
|
|
64
|
+
"""
|
|
65
|
+
Analyze a backing vocals audio file from GCS.
|
|
66
|
+
|
|
67
|
+
This method:
|
|
68
|
+
1. Downloads the audio file from GCS to a temp file
|
|
69
|
+
2. Runs the analysis using the shared AudioAnalyzer
|
|
70
|
+
3. Returns the analysis result
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
gcs_audio_path: Path to the audio file in GCS
|
|
74
|
+
job_id: Job ID for logging
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
AnalysisResult containing analysis data
|
|
78
|
+
"""
|
|
79
|
+
logger.info(f"[{job_id}] Analyzing backing vocals: {gcs_audio_path}")
|
|
80
|
+
|
|
81
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
82
|
+
# Download audio file
|
|
83
|
+
local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
84
|
+
self.storage_service.download_file(gcs_audio_path, local_audio_path)
|
|
85
|
+
|
|
86
|
+
# Run analysis
|
|
87
|
+
result = self.analyzer.analyze(local_audio_path)
|
|
88
|
+
|
|
89
|
+
logger.info(
|
|
90
|
+
f"[{job_id}] Analysis complete: "
|
|
91
|
+
f"has_audible={result.has_audible_content}, "
|
|
92
|
+
f"segments={result.segment_count}, "
|
|
93
|
+
f"recommendation={result.recommended_selection.value}"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
def analyze_and_generate_waveform(
|
|
99
|
+
self,
|
|
100
|
+
gcs_audio_path: str,
|
|
101
|
+
job_id: str,
|
|
102
|
+
gcs_waveform_destination: str,
|
|
103
|
+
) -> tuple[AnalysisResult, str]:
|
|
104
|
+
"""
|
|
105
|
+
Analyze backing vocals and generate a waveform image.
|
|
106
|
+
|
|
107
|
+
This method:
|
|
108
|
+
1. Downloads the audio file from GCS
|
|
109
|
+
2. Runs analysis using AudioAnalyzer
|
|
110
|
+
3. Generates waveform image using WaveformGenerator
|
|
111
|
+
4. Uploads the waveform image to GCS
|
|
112
|
+
5. Returns analysis result and waveform GCS path
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
gcs_audio_path: Path to the audio file in GCS
|
|
116
|
+
job_id: Job ID for logging
|
|
117
|
+
gcs_waveform_destination: Where to upload the waveform image in GCS
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Tuple of (AnalysisResult, waveform_gcs_path)
|
|
121
|
+
"""
|
|
122
|
+
logger.info(f"[{job_id}] Analyzing and generating waveform: {gcs_audio_path}")
|
|
123
|
+
|
|
124
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
125
|
+
# Download audio file
|
|
126
|
+
local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
127
|
+
self.storage_service.download_file(gcs_audio_path, local_audio_path)
|
|
128
|
+
|
|
129
|
+
# Run analysis
|
|
130
|
+
result = self.analyzer.analyze(local_audio_path)
|
|
131
|
+
|
|
132
|
+
# Generate waveform
|
|
133
|
+
local_waveform_path = os.path.join(temp_dir, "waveform.png")
|
|
134
|
+
self.waveform_generator.generate(
|
|
135
|
+
audio_path=local_audio_path,
|
|
136
|
+
output_path=local_waveform_path,
|
|
137
|
+
segments=result.audible_segments,
|
|
138
|
+
show_time_axis=True,
|
|
139
|
+
silence_threshold_db=self.analyzer.silence_threshold_db,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Upload waveform to GCS
|
|
143
|
+
self.storage_service.upload_file(
|
|
144
|
+
local_waveform_path,
|
|
145
|
+
gcs_waveform_destination
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
logger.info(
|
|
149
|
+
f"[{job_id}] Analysis and waveform generation complete. "
|
|
150
|
+
f"Waveform uploaded to: {gcs_waveform_destination}"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return result, gcs_waveform_destination
|
|
154
|
+
|
|
155
|
+
def get_waveform_data(
|
|
156
|
+
self,
|
|
157
|
+
gcs_audio_path: str,
|
|
158
|
+
job_id: str,
|
|
159
|
+
num_points: int = 500,
|
|
160
|
+
) -> tuple[list[float], float]:
|
|
161
|
+
"""
|
|
162
|
+
Get waveform data (amplitude envelope) for client-side rendering.
|
|
163
|
+
|
|
164
|
+
This is useful when the frontend wants to render the waveform
|
|
165
|
+
itself using Canvas or SVG, rather than displaying a pre-generated
|
|
166
|
+
image.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
gcs_audio_path: Path to the audio file in GCS
|
|
170
|
+
job_id: Job ID for logging
|
|
171
|
+
num_points: Number of data points to return
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Tuple of (amplitude_values, duration_seconds)
|
|
175
|
+
"""
|
|
176
|
+
logger.info(f"[{job_id}] Getting waveform data: {gcs_audio_path}")
|
|
177
|
+
|
|
178
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
179
|
+
# Download audio file
|
|
180
|
+
local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
181
|
+
self.storage_service.download_file(gcs_audio_path, local_audio_path)
|
|
182
|
+
|
|
183
|
+
# Generate waveform data
|
|
184
|
+
amplitudes, duration = self.waveform_generator.generate_data_only(
|
|
185
|
+
audio_path=local_audio_path,
|
|
186
|
+
num_points=num_points,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return amplitudes, duration
|
|
190
|
+
|
|
191
|
+
def generate_waveform_with_mutes(
|
|
192
|
+
self,
|
|
193
|
+
gcs_audio_path: str,
|
|
194
|
+
job_id: str,
|
|
195
|
+
gcs_waveform_destination: str,
|
|
196
|
+
mute_regions: list,
|
|
197
|
+
) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generate a waveform image with mute regions highlighted.
|
|
200
|
+
|
|
201
|
+
This is useful for showing the user which regions will be muted
|
|
202
|
+
in the custom instrumental.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
gcs_audio_path: Path to the audio file in GCS
|
|
206
|
+
job_id: Job ID for logging
|
|
207
|
+
gcs_waveform_destination: Where to upload the waveform image
|
|
208
|
+
mute_regions: List of MuteRegion objects to highlight
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
GCS path to the uploaded waveform image
|
|
212
|
+
"""
|
|
213
|
+
from karaoke_gen.instrumental_review import MuteRegion
|
|
214
|
+
|
|
215
|
+
logger.info(
|
|
216
|
+
f"[{job_id}] Generating waveform with {len(mute_regions)} mute regions"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
220
|
+
# Download audio file
|
|
221
|
+
local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
222
|
+
self.storage_service.download_file(gcs_audio_path, local_audio_path)
|
|
223
|
+
|
|
224
|
+
# First run analysis to get segments
|
|
225
|
+
result = self.analyzer.analyze(local_audio_path)
|
|
226
|
+
|
|
227
|
+
# Generate waveform with mute regions
|
|
228
|
+
local_waveform_path = os.path.join(temp_dir, "waveform_with_mutes.png")
|
|
229
|
+
self.waveform_generator.generate(
|
|
230
|
+
audio_path=local_audio_path,
|
|
231
|
+
output_path=local_waveform_path,
|
|
232
|
+
segments=result.audible_segments,
|
|
233
|
+
mute_regions=mute_regions,
|
|
234
|
+
show_time_axis=True,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Upload to GCS
|
|
238
|
+
self.storage_service.upload_file(
|
|
239
|
+
local_waveform_path,
|
|
240
|
+
gcs_waveform_destination
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return gcs_waveform_destination
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio editing service for creating custom instrumentals.
|
|
3
|
+
|
|
4
|
+
This service wraps the shared karaoke_gen.instrumental_review module
|
|
5
|
+
to provide GCS-integrated audio editing capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
from typing import List, Optional
|
|
12
|
+
|
|
13
|
+
from backend.services.storage_service import StorageService
|
|
14
|
+
from karaoke_gen.instrumental_review import (
|
|
15
|
+
AudioEditor,
|
|
16
|
+
MuteRegion,
|
|
17
|
+
)
|
|
18
|
+
from karaoke_gen.instrumental_review.models import CustomInstrumentalResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AudioEditingService:
|
|
25
|
+
"""
|
|
26
|
+
Service for creating custom instrumental tracks with GCS integration.
|
|
27
|
+
|
|
28
|
+
This service acts as a thin wrapper around the shared AudioEditor
|
|
29
|
+
class, handling GCS download/upload operations.
|
|
30
|
+
|
|
31
|
+
The actual editing logic is in the shared karaoke_gen.instrumental_review
|
|
32
|
+
module, ensuring feature parity between local and remote workflows.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
storage_service: Optional[StorageService] = None,
|
|
38
|
+
output_format: str = "flac",
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Initialize the audio editing service.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
storage_service: GCS storage service. If not provided, a new
|
|
45
|
+
instance will be created.
|
|
46
|
+
output_format: Output audio format. Default is "flac".
|
|
47
|
+
"""
|
|
48
|
+
self.storage_service = storage_service or StorageService()
|
|
49
|
+
self.editor = AudioEditor(output_format=output_format)
|
|
50
|
+
|
|
51
|
+
def create_custom_instrumental(
|
|
52
|
+
self,
|
|
53
|
+
gcs_clean_instrumental_path: str,
|
|
54
|
+
gcs_backing_vocals_path: str,
|
|
55
|
+
mute_regions: List[MuteRegion],
|
|
56
|
+
gcs_output_path: str,
|
|
57
|
+
job_id: str,
|
|
58
|
+
) -> CustomInstrumentalResult:
|
|
59
|
+
"""
|
|
60
|
+
Create a custom instrumental by muting regions of backing vocals.
|
|
61
|
+
|
|
62
|
+
This method:
|
|
63
|
+
1. Downloads clean instrumental and backing vocals from GCS
|
|
64
|
+
2. Applies mute regions to backing vocals
|
|
65
|
+
3. Combines with clean instrumental
|
|
66
|
+
4. Uploads result to GCS
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
gcs_clean_instrumental_path: Path to clean instrumental in GCS
|
|
70
|
+
gcs_backing_vocals_path: Path to backing vocals in GCS
|
|
71
|
+
mute_regions: List of regions to mute in backing vocals
|
|
72
|
+
gcs_output_path: Where to upload the custom instrumental in GCS
|
|
73
|
+
job_id: Job ID for logging
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
CustomInstrumentalResult with details about the created file
|
|
77
|
+
"""
|
|
78
|
+
logger.info(
|
|
79
|
+
f"[{job_id}] Creating custom instrumental with "
|
|
80
|
+
f"{len(mute_regions)} mute regions"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
84
|
+
# Download input files
|
|
85
|
+
local_clean_path = os.path.join(temp_dir, "clean_instrumental.flac")
|
|
86
|
+
local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
87
|
+
local_output_path = os.path.join(temp_dir, "custom_instrumental.flac")
|
|
88
|
+
|
|
89
|
+
logger.debug(f"[{job_id}] Downloading clean instrumental")
|
|
90
|
+
self.storage_service.download_file(
|
|
91
|
+
gcs_clean_instrumental_path,
|
|
92
|
+
local_clean_path
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
logger.debug(f"[{job_id}] Downloading backing vocals")
|
|
96
|
+
self.storage_service.download_file(
|
|
97
|
+
gcs_backing_vocals_path,
|
|
98
|
+
local_backing_path
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Create custom instrumental using shared editor
|
|
102
|
+
result = self.editor.create_custom_instrumental(
|
|
103
|
+
clean_instrumental_path=local_clean_path,
|
|
104
|
+
backing_vocals_path=local_backing_path,
|
|
105
|
+
mute_regions=mute_regions,
|
|
106
|
+
output_path=local_output_path,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Upload result to GCS
|
|
110
|
+
logger.debug(f"[{job_id}] Uploading custom instrumental to GCS")
|
|
111
|
+
self.storage_service.upload_file(local_output_path, gcs_output_path)
|
|
112
|
+
|
|
113
|
+
# Update result with GCS path
|
|
114
|
+
result.output_path = gcs_output_path
|
|
115
|
+
|
|
116
|
+
logger.info(
|
|
117
|
+
f"[{job_id}] Custom instrumental created: {gcs_output_path}, "
|
|
118
|
+
f"muted {result.total_muted_duration_seconds:.1f}s across "
|
|
119
|
+
f"{len(result.mute_regions_applied)} regions"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
def create_preview(
|
|
125
|
+
self,
|
|
126
|
+
gcs_clean_instrumental_path: str,
|
|
127
|
+
gcs_backing_vocals_path: str,
|
|
128
|
+
mute_regions: List[MuteRegion],
|
|
129
|
+
gcs_preview_path: str,
|
|
130
|
+
job_id: str,
|
|
131
|
+
preview_duration_seconds: Optional[float] = None,
|
|
132
|
+
) -> str:
|
|
133
|
+
"""
|
|
134
|
+
Create a preview of the custom instrumental.
|
|
135
|
+
|
|
136
|
+
This is similar to create_custom_instrumental but optimized for
|
|
137
|
+
quick preview generation (optionally truncated duration).
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
gcs_clean_instrumental_path: Path to clean instrumental in GCS
|
|
141
|
+
gcs_backing_vocals_path: Path to backing vocals in GCS
|
|
142
|
+
mute_regions: List of regions to mute
|
|
143
|
+
gcs_preview_path: Where to upload the preview in GCS
|
|
144
|
+
job_id: Job ID for logging
|
|
145
|
+
preview_duration_seconds: Optional max duration for preview
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
GCS path to the uploaded preview
|
|
149
|
+
"""
|
|
150
|
+
logger.info(f"[{job_id}] Creating preview with {len(mute_regions)} mute regions")
|
|
151
|
+
|
|
152
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
153
|
+
# Download input files
|
|
154
|
+
local_clean_path = os.path.join(temp_dir, "clean_instrumental.flac")
|
|
155
|
+
local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
156
|
+
local_preview_path = os.path.join(temp_dir, "preview.flac")
|
|
157
|
+
|
|
158
|
+
self.storage_service.download_file(
|
|
159
|
+
gcs_clean_instrumental_path,
|
|
160
|
+
local_clean_path
|
|
161
|
+
)
|
|
162
|
+
self.storage_service.download_file(
|
|
163
|
+
gcs_backing_vocals_path,
|
|
164
|
+
local_backing_path
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Generate preview
|
|
168
|
+
from pydub import AudioSegment
|
|
169
|
+
|
|
170
|
+
preview = self.editor.preview_with_mutes(
|
|
171
|
+
clean_instrumental_path=local_clean_path,
|
|
172
|
+
backing_vocals_path=local_backing_path,
|
|
173
|
+
mute_regions=mute_regions,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Optionally truncate
|
|
177
|
+
if preview_duration_seconds:
|
|
178
|
+
max_ms = int(preview_duration_seconds * 1000)
|
|
179
|
+
preview = preview[:max_ms]
|
|
180
|
+
|
|
181
|
+
# Export and upload
|
|
182
|
+
preview.export(local_preview_path, format="flac")
|
|
183
|
+
self.storage_service.upload_file(local_preview_path, gcs_preview_path)
|
|
184
|
+
|
|
185
|
+
logger.info(f"[{job_id}] Preview created: {gcs_preview_path}")
|
|
186
|
+
|
|
187
|
+
return gcs_preview_path
|
|
188
|
+
|
|
189
|
+
def mute_backing_vocals_only(
|
|
190
|
+
self,
|
|
191
|
+
gcs_backing_vocals_path: str,
|
|
192
|
+
mute_regions: List[MuteRegion],
|
|
193
|
+
gcs_output_path: str,
|
|
194
|
+
job_id: str,
|
|
195
|
+
) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Apply mute regions to backing vocals without combining with instrumental.
|
|
198
|
+
|
|
199
|
+
This is useful for creating an edited backing vocals track that
|
|
200
|
+
can be combined with the instrumental later.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
gcs_backing_vocals_path: Path to backing vocals in GCS
|
|
204
|
+
mute_regions: List of regions to mute
|
|
205
|
+
gcs_output_path: Where to upload the edited backing vocals
|
|
206
|
+
job_id: Job ID for logging
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
GCS path to the uploaded edited backing vocals
|
|
210
|
+
"""
|
|
211
|
+
logger.info(
|
|
212
|
+
f"[{job_id}] Muting backing vocals with {len(mute_regions)} regions"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
216
|
+
local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
|
|
217
|
+
local_output_path = os.path.join(temp_dir, "muted_backing.flac")
|
|
218
|
+
|
|
219
|
+
self.storage_service.download_file(
|
|
220
|
+
gcs_backing_vocals_path,
|
|
221
|
+
local_backing_path
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
self.editor.apply_mute_to_single_track(
|
|
225
|
+
audio_path=local_backing_path,
|
|
226
|
+
mute_regions=mute_regions,
|
|
227
|
+
output_path=local_output_path,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
self.storage_service.upload_file(local_output_path, gcs_output_path)
|
|
231
|
+
|
|
232
|
+
logger.info(f"[{job_id}] Muted backing vocals: {gcs_output_path}")
|
|
233
|
+
|
|
234
|
+
return gcs_output_path
|
|
235
|
+
|
|
236
|
+
def validate_mute_regions(
|
|
237
|
+
self,
|
|
238
|
+
mute_regions: List[MuteRegion],
|
|
239
|
+
total_duration_seconds: float,
|
|
240
|
+
) -> List[str]:
|
|
241
|
+
"""
|
|
242
|
+
Validate mute regions for consistency.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
mute_regions: List of mute regions to validate
|
|
246
|
+
total_duration_seconds: Total duration of the audio
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of validation error messages (empty if valid)
|
|
250
|
+
"""
|
|
251
|
+
errors = []
|
|
252
|
+
|
|
253
|
+
for i, region in enumerate(mute_regions):
|
|
254
|
+
if region.start_seconds < 0:
|
|
255
|
+
errors.append(
|
|
256
|
+
f"Region {i}: start_seconds ({region.start_seconds}) cannot be negative"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if region.end_seconds <= region.start_seconds:
|
|
260
|
+
errors.append(
|
|
261
|
+
f"Region {i}: end_seconds ({region.end_seconds}) must be after "
|
|
262
|
+
f"start_seconds ({region.start_seconds})"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
if region.start_seconds > total_duration_seconds:
|
|
266
|
+
errors.append(
|
|
267
|
+
f"Region {i}: start_seconds ({region.start_seconds}) exceeds "
|
|
268
|
+
f"audio duration ({total_duration_seconds})"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
if region.end_seconds > total_duration_seconds:
|
|
272
|
+
# Not an error, but log a warning - the region will be clamped
|
|
273
|
+
logger.warning(
|
|
274
|
+
f"Region {i}: end_seconds ({region.end_seconds}) exceeds "
|
|
275
|
+
f"audio duration ({total_duration_seconds}), will be clamped"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return errors
|