karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/style_loader.py +3 -1
  148. karaoke_gen/utils/__init__.py +163 -8
  149. karaoke_gen/video_background_processor.py +9 -4
  150. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
  151. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
  152. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  153. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  154. lyrics_transcriber/correction/corrector.py +192 -130
  155. lyrics_transcriber/correction/operations.py +24 -9
  156. lyrics_transcriber/frontend/package-lock.json +2 -2
  157. lyrics_transcriber/frontend/package.json +1 -1
  158. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  159. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  160. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  161. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  162. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  163. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  164. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  165. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  168. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  170. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  171. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  172. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  173. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  174. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  175. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  176. lyrics_transcriber/frontend/src/theme.ts +42 -15
  177. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  178. lyrics_transcriber/frontend/vite.config.js +5 -0
  179. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  180. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  181. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  182. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  183. lyrics_transcriber/output/generator.py +17 -3
  184. lyrics_transcriber/output/video.py +60 -95
  185. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  186. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  187. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  188. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,29 @@
1
+ # DEPRECATED: This file is no longer used for Docker builds or CI.
2
+ # Docker and CI both use poetry/pyproject.toml directly.
3
+ # This file is kept for reference only.
4
+
5
+ fastapi>=0.104.0,<1.0.0
6
+ uvicorn[standard]>=0.24.0,<1.0.0
7
+ pydantic>=2.5.0,<3.0.0
8
+ pydantic-settings>=2.1.0,<3.0.0
9
+ python-multipart>=0.0.6,<1.0.0
10
+ httpx>=0.25.0,<1.0.0
11
+ google-cloud-firestore>=2.14.0,<3.0.0
12
+ google-cloud-storage>=2.14.0,<3.0.0
13
+ google-cloud-secret-manager>=2.18.0,<3.0.0
14
+ google-cloud-tasks>=2.14.0,<3.0.0
15
+ google-cloud-run>=0.10.0,<1.0.0
16
+ python-dotenv>=1.0.0,<2.0.0
17
+
18
+ # Audio processing (reuses karaoke_gen library)
19
+ audio-separator[remote]>=0.18.0 # Remote Modal API client
20
+ pydub>=0.25.1 # Audio manipulation (used by karaoke_gen)
21
+ yt-dlp>=2024.0.0 # YouTube downloads (future use)
22
+
23
+ # Cloud storage native APIs
24
+ dropbox>=11.0.0 # Dropbox Python SDK for native API access
25
+ # google-api-python-client already installed via karaoke_gen for YouTube
26
+
27
+ # karaoke-gen dependencies (will be installed from parent dir in Dockerfile)
28
+ # Using explicit dependencies instead of -e ../
29
+
backend/run_tests.sh ADDED
@@ -0,0 +1,60 @@
1
+ #!/bin/bash
2
+ # Backend Test Runner
3
+ # Runs integration tests against the deployed Cloud Run service
4
+
5
+ set -e
6
+
7
+ echo "🧪 Backend Integration Test Suite"
8
+ echo "=================================="
9
+ echo ""
10
+
11
+ # Check prerequisites
12
+ echo "Checking prerequisites..."
13
+ if ! command -v gcloud &> /dev/null; then
14
+ echo "❌ gcloud CLI not found. Please install Google Cloud SDK."
15
+ exit 1
16
+ fi
17
+
18
+ if ! command -v pytest &> /dev/null; then
19
+ echo "❌ pytest not found. Installing test dependencies..."
20
+ pip install -r tests/requirements.txt
21
+ fi
22
+
23
+ # Verify authentication
24
+ echo "Verifying authentication..."
25
+ if ! gcloud auth print-identity-token &> /dev/null; then
26
+ echo "❌ Not authenticated with gcloud. Run: gcloud auth login"
27
+ exit 1
28
+ fi
29
+
30
+ echo "✅ Prerequisites met"
31
+ echo ""
32
+
33
+ # Run tests
34
+ echo "Running integration tests..."
35
+ echo ""
36
+
37
+ # Run fast tests (no slow marker)
38
+ echo "📝 Running fast tests..."
39
+ pytest tests/test_api_integration.py -v -m "not slow" --tb=short
40
+
41
+ echo ""
42
+ echo "✅ Fast tests complete!"
43
+ echo ""
44
+
45
+ # Ask if user wants to run slow tests
46
+ read -p "Run slow/integration tests? These test actual job processing (5-10 min). [y/N] " -n 1 -r
47
+ echo
48
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
49
+ echo "🐌 Running slow integration tests..."
50
+ pytest tests/test_api_integration.py -v -m "slow" --tb=short
51
+ echo ""
52
+ echo "✅ All tests complete!"
53
+ else
54
+ echo "⏭️ Skipping slow tests"
55
+ fi
56
+
57
+ echo ""
58
+ echo "=================================="
59
+ echo "Test run complete! 🎉"
60
+
File without changes
@@ -0,0 +1,243 @@
1
+ """
2
+ Audio analysis service for analyzing backing vocals.
3
+
4
+ This service wraps the shared karaoke_gen.instrumental_review module
5
+ to provide GCS-integrated audio analysis capabilities.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import tempfile
11
+ from typing import Optional
12
+
13
+ from backend.services.storage_service import StorageService
14
+ from karaoke_gen.instrumental_review import (
15
+ AudioAnalyzer,
16
+ AnalysisResult,
17
+ WaveformGenerator,
18
+ )
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class AudioAnalysisService:
25
+ """
26
+ Service for analyzing backing vocals audio files stored in GCS.
27
+
28
+ This service acts as a thin wrapper around the shared AudioAnalyzer
29
+ and WaveformGenerator classes, handling GCS download/upload operations.
30
+
31
+ The actual analysis logic is in the shared karaoke_gen.instrumental_review
32
+ module, ensuring feature parity between local and remote workflows.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ storage_service: Optional[StorageService] = None,
38
+ silence_threshold_db: float = -40.0,
39
+ min_segment_duration_ms: int = 100,
40
+ ):
41
+ """
42
+ Initialize the audio analysis service.
43
+
44
+ Args:
45
+ storage_service: GCS storage service. If not provided, a new
46
+ instance will be created.
47
+ silence_threshold_db: Threshold for considering audio as silent.
48
+ Default is -40.0 dB.
49
+ min_segment_duration_ms: Minimum duration for audible segments.
50
+ Default is 100ms.
51
+ """
52
+ self.storage_service = storage_service or StorageService()
53
+ self.analyzer = AudioAnalyzer(
54
+ silence_threshold_db=silence_threshold_db,
55
+ min_segment_duration_ms=min_segment_duration_ms,
56
+ )
57
+ self.waveform_generator = WaveformGenerator()
58
+
59
+ def analyze_backing_vocals(
60
+ self,
61
+ gcs_audio_path: str,
62
+ job_id: str,
63
+ ) -> AnalysisResult:
64
+ """
65
+ Analyze a backing vocals audio file from GCS.
66
+
67
+ This method:
68
+ 1. Downloads the audio file from GCS to a temp file
69
+ 2. Runs the analysis using the shared AudioAnalyzer
70
+ 3. Returns the analysis result
71
+
72
+ Args:
73
+ gcs_audio_path: Path to the audio file in GCS
74
+ job_id: Job ID for logging
75
+
76
+ Returns:
77
+ AnalysisResult containing analysis data
78
+ """
79
+ logger.info(f"[{job_id}] Analyzing backing vocals: {gcs_audio_path}")
80
+
81
+ with tempfile.TemporaryDirectory() as temp_dir:
82
+ # Download audio file
83
+ local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
84
+ self.storage_service.download_file(gcs_audio_path, local_audio_path)
85
+
86
+ # Run analysis
87
+ result = self.analyzer.analyze(local_audio_path)
88
+
89
+ logger.info(
90
+ f"[{job_id}] Analysis complete: "
91
+ f"has_audible={result.has_audible_content}, "
92
+ f"segments={result.segment_count}, "
93
+ f"recommendation={result.recommended_selection.value}"
94
+ )
95
+
96
+ return result
97
+
98
+ def analyze_and_generate_waveform(
99
+ self,
100
+ gcs_audio_path: str,
101
+ job_id: str,
102
+ gcs_waveform_destination: str,
103
+ ) -> tuple[AnalysisResult, str]:
104
+ """
105
+ Analyze backing vocals and generate a waveform image.
106
+
107
+ This method:
108
+ 1. Downloads the audio file from GCS
109
+ 2. Runs analysis using AudioAnalyzer
110
+ 3. Generates waveform image using WaveformGenerator
111
+ 4. Uploads the waveform image to GCS
112
+ 5. Returns analysis result and waveform GCS path
113
+
114
+ Args:
115
+ gcs_audio_path: Path to the audio file in GCS
116
+ job_id: Job ID for logging
117
+ gcs_waveform_destination: Where to upload the waveform image in GCS
118
+
119
+ Returns:
120
+ Tuple of (AnalysisResult, waveform_gcs_path)
121
+ """
122
+ logger.info(f"[{job_id}] Analyzing and generating waveform: {gcs_audio_path}")
123
+
124
+ with tempfile.TemporaryDirectory() as temp_dir:
125
+ # Download audio file
126
+ local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
127
+ self.storage_service.download_file(gcs_audio_path, local_audio_path)
128
+
129
+ # Run analysis
130
+ result = self.analyzer.analyze(local_audio_path)
131
+
132
+ # Generate waveform
133
+ local_waveform_path = os.path.join(temp_dir, "waveform.png")
134
+ self.waveform_generator.generate(
135
+ audio_path=local_audio_path,
136
+ output_path=local_waveform_path,
137
+ segments=result.audible_segments,
138
+ show_time_axis=True,
139
+ silence_threshold_db=self.analyzer.silence_threshold_db,
140
+ )
141
+
142
+ # Upload waveform to GCS
143
+ self.storage_service.upload_file(
144
+ local_waveform_path,
145
+ gcs_waveform_destination
146
+ )
147
+
148
+ logger.info(
149
+ f"[{job_id}] Analysis and waveform generation complete. "
150
+ f"Waveform uploaded to: {gcs_waveform_destination}"
151
+ )
152
+
153
+ return result, gcs_waveform_destination
154
+
155
+ def get_waveform_data(
156
+ self,
157
+ gcs_audio_path: str,
158
+ job_id: str,
159
+ num_points: int = 500,
160
+ ) -> tuple[list[float], float]:
161
+ """
162
+ Get waveform data (amplitude envelope) for client-side rendering.
163
+
164
+ This is useful when the frontend wants to render the waveform
165
+ itself using Canvas or SVG, rather than displaying a pre-generated
166
+ image.
167
+
168
+ Args:
169
+ gcs_audio_path: Path to the audio file in GCS
170
+ job_id: Job ID for logging
171
+ num_points: Number of data points to return
172
+
173
+ Returns:
174
+ Tuple of (amplitude_values, duration_seconds)
175
+ """
176
+ logger.info(f"[{job_id}] Getting waveform data: {gcs_audio_path}")
177
+
178
+ with tempfile.TemporaryDirectory() as temp_dir:
179
+ # Download audio file
180
+ local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
181
+ self.storage_service.download_file(gcs_audio_path, local_audio_path)
182
+
183
+ # Generate waveform data
184
+ amplitudes, duration = self.waveform_generator.generate_data_only(
185
+ audio_path=local_audio_path,
186
+ num_points=num_points,
187
+ )
188
+
189
+ return amplitudes, duration
190
+
191
+ def generate_waveform_with_mutes(
192
+ self,
193
+ gcs_audio_path: str,
194
+ job_id: str,
195
+ gcs_waveform_destination: str,
196
+ mute_regions: list,
197
+ ) -> str:
198
+ """
199
+ Generate a waveform image with mute regions highlighted.
200
+
201
+ This is useful for showing the user which regions will be muted
202
+ in the custom instrumental.
203
+
204
+ Args:
205
+ gcs_audio_path: Path to the audio file in GCS
206
+ job_id: Job ID for logging
207
+ gcs_waveform_destination: Where to upload the waveform image
208
+ mute_regions: List of MuteRegion objects to highlight
209
+
210
+ Returns:
211
+ GCS path to the uploaded waveform image
212
+ """
213
+ from karaoke_gen.instrumental_review import MuteRegion
214
+
215
+ logger.info(
216
+ f"[{job_id}] Generating waveform with {len(mute_regions)} mute regions"
217
+ )
218
+
219
+ with tempfile.TemporaryDirectory() as temp_dir:
220
+ # Download audio file
221
+ local_audio_path = os.path.join(temp_dir, "backing_vocals.flac")
222
+ self.storage_service.download_file(gcs_audio_path, local_audio_path)
223
+
224
+ # First run analysis to get segments
225
+ result = self.analyzer.analyze(local_audio_path)
226
+
227
+ # Generate waveform with mute regions
228
+ local_waveform_path = os.path.join(temp_dir, "waveform_with_mutes.png")
229
+ self.waveform_generator.generate(
230
+ audio_path=local_audio_path,
231
+ output_path=local_waveform_path,
232
+ segments=result.audible_segments,
233
+ mute_regions=mute_regions,
234
+ show_time_axis=True,
235
+ )
236
+
237
+ # Upload to GCS
238
+ self.storage_service.upload_file(
239
+ local_waveform_path,
240
+ gcs_waveform_destination
241
+ )
242
+
243
+ return gcs_waveform_destination
@@ -0,0 +1,278 @@
1
+ """
2
+ Audio editing service for creating custom instrumentals.
3
+
4
+ This service wraps the shared karaoke_gen.instrumental_review module
5
+ to provide GCS-integrated audio editing capabilities.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import tempfile
11
+ from typing import List, Optional
12
+
13
+ from backend.services.storage_service import StorageService
14
+ from karaoke_gen.instrumental_review import (
15
+ AudioEditor,
16
+ MuteRegion,
17
+ )
18
+ from karaoke_gen.instrumental_review.models import CustomInstrumentalResult
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class AudioEditingService:
25
+ """
26
+ Service for creating custom instrumental tracks with GCS integration.
27
+
28
+ This service acts as a thin wrapper around the shared AudioEditor
29
+ class, handling GCS download/upload operations.
30
+
31
+ The actual editing logic is in the shared karaoke_gen.instrumental_review
32
+ module, ensuring feature parity between local and remote workflows.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ storage_service: Optional[StorageService] = None,
38
+ output_format: str = "flac",
39
+ ):
40
+ """
41
+ Initialize the audio editing service.
42
+
43
+ Args:
44
+ storage_service: GCS storage service. If not provided, a new
45
+ instance will be created.
46
+ output_format: Output audio format. Default is "flac".
47
+ """
48
+ self.storage_service = storage_service or StorageService()
49
+ self.editor = AudioEditor(output_format=output_format)
50
+
51
+ def create_custom_instrumental(
52
+ self,
53
+ gcs_clean_instrumental_path: str,
54
+ gcs_backing_vocals_path: str,
55
+ mute_regions: List[MuteRegion],
56
+ gcs_output_path: str,
57
+ job_id: str,
58
+ ) -> CustomInstrumentalResult:
59
+ """
60
+ Create a custom instrumental by muting regions of backing vocals.
61
+
62
+ This method:
63
+ 1. Downloads clean instrumental and backing vocals from GCS
64
+ 2. Applies mute regions to backing vocals
65
+ 3. Combines with clean instrumental
66
+ 4. Uploads result to GCS
67
+
68
+ Args:
69
+ gcs_clean_instrumental_path: Path to clean instrumental in GCS
70
+ gcs_backing_vocals_path: Path to backing vocals in GCS
71
+ mute_regions: List of regions to mute in backing vocals
72
+ gcs_output_path: Where to upload the custom instrumental in GCS
73
+ job_id: Job ID for logging
74
+
75
+ Returns:
76
+ CustomInstrumentalResult with details about the created file
77
+ """
78
+ logger.info(
79
+ f"[{job_id}] Creating custom instrumental with "
80
+ f"{len(mute_regions)} mute regions"
81
+ )
82
+
83
+ with tempfile.TemporaryDirectory() as temp_dir:
84
+ # Download input files
85
+ local_clean_path = os.path.join(temp_dir, "clean_instrumental.flac")
86
+ local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
87
+ local_output_path = os.path.join(temp_dir, "custom_instrumental.flac")
88
+
89
+ logger.debug(f"[{job_id}] Downloading clean instrumental")
90
+ self.storage_service.download_file(
91
+ gcs_clean_instrumental_path,
92
+ local_clean_path
93
+ )
94
+
95
+ logger.debug(f"[{job_id}] Downloading backing vocals")
96
+ self.storage_service.download_file(
97
+ gcs_backing_vocals_path,
98
+ local_backing_path
99
+ )
100
+
101
+ # Create custom instrumental using shared editor
102
+ result = self.editor.create_custom_instrumental(
103
+ clean_instrumental_path=local_clean_path,
104
+ backing_vocals_path=local_backing_path,
105
+ mute_regions=mute_regions,
106
+ output_path=local_output_path,
107
+ )
108
+
109
+ # Upload result to GCS
110
+ logger.debug(f"[{job_id}] Uploading custom instrumental to GCS")
111
+ self.storage_service.upload_file(local_output_path, gcs_output_path)
112
+
113
+ # Update result with GCS path
114
+ result.output_path = gcs_output_path
115
+
116
+ logger.info(
117
+ f"[{job_id}] Custom instrumental created: {gcs_output_path}, "
118
+ f"muted {result.total_muted_duration_seconds:.1f}s across "
119
+ f"{len(result.mute_regions_applied)} regions"
120
+ )
121
+
122
+ return result
123
+
124
+ def create_preview(
125
+ self,
126
+ gcs_clean_instrumental_path: str,
127
+ gcs_backing_vocals_path: str,
128
+ mute_regions: List[MuteRegion],
129
+ gcs_preview_path: str,
130
+ job_id: str,
131
+ preview_duration_seconds: Optional[float] = None,
132
+ ) -> str:
133
+ """
134
+ Create a preview of the custom instrumental.
135
+
136
+ This is similar to create_custom_instrumental but optimized for
137
+ quick preview generation (optionally truncated duration).
138
+
139
+ Args:
140
+ gcs_clean_instrumental_path: Path to clean instrumental in GCS
141
+ gcs_backing_vocals_path: Path to backing vocals in GCS
142
+ mute_regions: List of regions to mute
143
+ gcs_preview_path: Where to upload the preview in GCS
144
+ job_id: Job ID for logging
145
+ preview_duration_seconds: Optional max duration for preview
146
+
147
+ Returns:
148
+ GCS path to the uploaded preview
149
+ """
150
+ logger.info(f"[{job_id}] Creating preview with {len(mute_regions)} mute regions")
151
+
152
+ with tempfile.TemporaryDirectory() as temp_dir:
153
+ # Download input files
154
+ local_clean_path = os.path.join(temp_dir, "clean_instrumental.flac")
155
+ local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
156
+ local_preview_path = os.path.join(temp_dir, "preview.flac")
157
+
158
+ self.storage_service.download_file(
159
+ gcs_clean_instrumental_path,
160
+ local_clean_path
161
+ )
162
+ self.storage_service.download_file(
163
+ gcs_backing_vocals_path,
164
+ local_backing_path
165
+ )
166
+
167
+ # Generate preview
168
+ from pydub import AudioSegment
169
+
170
+ preview = self.editor.preview_with_mutes(
171
+ clean_instrumental_path=local_clean_path,
172
+ backing_vocals_path=local_backing_path,
173
+ mute_regions=mute_regions,
174
+ )
175
+
176
+ # Optionally truncate
177
+ if preview_duration_seconds:
178
+ max_ms = int(preview_duration_seconds * 1000)
179
+ preview = preview[:max_ms]
180
+
181
+ # Export and upload
182
+ preview.export(local_preview_path, format="flac")
183
+ self.storage_service.upload_file(local_preview_path, gcs_preview_path)
184
+
185
+ logger.info(f"[{job_id}] Preview created: {gcs_preview_path}")
186
+
187
+ return gcs_preview_path
188
+
189
+ def mute_backing_vocals_only(
190
+ self,
191
+ gcs_backing_vocals_path: str,
192
+ mute_regions: List[MuteRegion],
193
+ gcs_output_path: str,
194
+ job_id: str,
195
+ ) -> str:
196
+ """
197
+ Apply mute regions to backing vocals without combining with instrumental.
198
+
199
+ This is useful for creating an edited backing vocals track that
200
+ can be combined with the instrumental later.
201
+
202
+ Args:
203
+ gcs_backing_vocals_path: Path to backing vocals in GCS
204
+ mute_regions: List of regions to mute
205
+ gcs_output_path: Where to upload the edited backing vocals
206
+ job_id: Job ID for logging
207
+
208
+ Returns:
209
+ GCS path to the uploaded edited backing vocals
210
+ """
211
+ logger.info(
212
+ f"[{job_id}] Muting backing vocals with {len(mute_regions)} regions"
213
+ )
214
+
215
+ with tempfile.TemporaryDirectory() as temp_dir:
216
+ local_backing_path = os.path.join(temp_dir, "backing_vocals.flac")
217
+ local_output_path = os.path.join(temp_dir, "muted_backing.flac")
218
+
219
+ self.storage_service.download_file(
220
+ gcs_backing_vocals_path,
221
+ local_backing_path
222
+ )
223
+
224
+ self.editor.apply_mute_to_single_track(
225
+ audio_path=local_backing_path,
226
+ mute_regions=mute_regions,
227
+ output_path=local_output_path,
228
+ )
229
+
230
+ self.storage_service.upload_file(local_output_path, gcs_output_path)
231
+
232
+ logger.info(f"[{job_id}] Muted backing vocals: {gcs_output_path}")
233
+
234
+ return gcs_output_path
235
+
236
+ def validate_mute_regions(
237
+ self,
238
+ mute_regions: List[MuteRegion],
239
+ total_duration_seconds: float,
240
+ ) -> List[str]:
241
+ """
242
+ Validate mute regions for consistency.
243
+
244
+ Args:
245
+ mute_regions: List of mute regions to validate
246
+ total_duration_seconds: Total duration of the audio
247
+
248
+ Returns:
249
+ List of validation error messages (empty if valid)
250
+ """
251
+ errors = []
252
+
253
+ for i, region in enumerate(mute_regions):
254
+ if region.start_seconds < 0:
255
+ errors.append(
256
+ f"Region {i}: start_seconds ({region.start_seconds}) cannot be negative"
257
+ )
258
+
259
+ if region.end_seconds <= region.start_seconds:
260
+ errors.append(
261
+ f"Region {i}: end_seconds ({region.end_seconds}) must be after "
262
+ f"start_seconds ({region.start_seconds})"
263
+ )
264
+
265
+ if region.start_seconds > total_duration_seconds:
266
+ errors.append(
267
+ f"Region {i}: start_seconds ({region.start_seconds}) exceeds "
268
+ f"audio duration ({total_duration_seconds})"
269
+ )
270
+
271
+ if region.end_seconds > total_duration_seconds:
272
+ # Not an error, but log a warning - the region will be clamped
273
+ logger.warning(
274
+ f"Region {i}: end_seconds ({region.end_seconds}) exceeds "
275
+ f"audio duration ({total_duration_seconds}), will be clamped"
276
+ )
277
+
278
+ return errors