karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom metrics for Cloud Monitoring.
|
|
3
|
+
|
|
4
|
+
This module provides application-level metrics for tracking job processing,
|
|
5
|
+
worker performance, and external API usage. Metrics can be viewed in:
|
|
6
|
+
1. Cloud Logging (via structured log entries)
|
|
7
|
+
2. Cloud Monitoring (via log-based metrics or OpenTelemetry)
|
|
8
|
+
|
|
9
|
+
The metrics service uses a pragmatic approach:
|
|
10
|
+
- Always emits metrics as structured log entries (works immediately)
|
|
11
|
+
- Uses the same JSON format as Cloud Logging
|
|
12
|
+
- Can be enhanced with OpenTelemetry metrics exporters when available
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from backend.services.metrics import metrics
|
|
16
|
+
|
|
17
|
+
# Record a job completion
|
|
18
|
+
metrics.record_job_completed("abc123", source="upload")
|
|
19
|
+
|
|
20
|
+
# Record worker duration
|
|
21
|
+
metrics.record_worker_duration("audio", 45.2, success=True)
|
|
22
|
+
|
|
23
|
+
# Record external API call
|
|
24
|
+
with metrics.time_external_api("modal"):
|
|
25
|
+
response = await modal_client.separate_audio(...)
|
|
26
|
+
"""
|
|
27
|
+
import logging
|
|
28
|
+
import time
|
|
29
|
+
from contextlib import contextmanager
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from typing import Any, Dict, Optional
|
|
32
|
+
|
|
33
|
+
from backend.services.tracing import get_current_trace_id, get_current_span_id
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger("metrics")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class MetricLabels:
|
|
41
|
+
"""Common metric labels."""
|
|
42
|
+
job_id: Optional[str] = None
|
|
43
|
+
worker: Optional[str] = None
|
|
44
|
+
status: Optional[str] = None
|
|
45
|
+
source: Optional[str] = None
|
|
46
|
+
api: Optional[str] = None
|
|
47
|
+
operation: Optional[str] = None
|
|
48
|
+
bucket: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
51
|
+
"""Convert to dict, excluding None values."""
|
|
52
|
+
return {k: v for k, v in self.__dict__.items() if v is not None}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class MetricsService:
|
|
56
|
+
"""
|
|
57
|
+
Application metrics service.
|
|
58
|
+
|
|
59
|
+
Emits metrics as structured log entries that can be:
|
|
60
|
+
1. Queried directly in Cloud Logging
|
|
61
|
+
2. Converted to Cloud Monitoring metrics via log-based metrics
|
|
62
|
+
3. Exported via OpenTelemetry (when configured)
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self):
|
|
66
|
+
"""Initialize the metrics service."""
|
|
67
|
+
self._logger = logging.getLogger("metrics")
|
|
68
|
+
# Ensure metrics logger outputs at INFO level
|
|
69
|
+
self._logger.setLevel(logging.INFO)
|
|
70
|
+
|
|
71
|
+
def _emit_metric(self, metric_name: str, metric_type: str, value: float, labels: Dict[str, Any]) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Emit a metric as a structured log entry.
|
|
74
|
+
|
|
75
|
+
The log format is designed to be easily parsed by Cloud Logging
|
|
76
|
+
and converted to log-based metrics.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
metric_name: Name of the metric (e.g., "jobs_total")
|
|
80
|
+
metric_type: Type of metric (counter, histogram, gauge)
|
|
81
|
+
value: Metric value
|
|
82
|
+
labels: Metric labels/dimensions
|
|
83
|
+
"""
|
|
84
|
+
# Add trace context if available
|
|
85
|
+
trace_id = get_current_trace_id()
|
|
86
|
+
span_id = get_current_span_id()
|
|
87
|
+
|
|
88
|
+
# Build metric entry
|
|
89
|
+
metric_entry = {
|
|
90
|
+
"metric_name": metric_name,
|
|
91
|
+
"metric_type": metric_type,
|
|
92
|
+
"metric_value": value,
|
|
93
|
+
**labels,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if trace_id:
|
|
97
|
+
metric_entry["trace_id"] = trace_id
|
|
98
|
+
if span_id:
|
|
99
|
+
metric_entry["span_id"] = span_id
|
|
100
|
+
|
|
101
|
+
# Emit as structured log entry
|
|
102
|
+
# Use INFO level so metrics always show up
|
|
103
|
+
self._logger.info(
|
|
104
|
+
f"METRIC {metric_name}={value}",
|
|
105
|
+
extra=metric_entry
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# =========================================
|
|
109
|
+
# Job Metrics
|
|
110
|
+
# =========================================
|
|
111
|
+
|
|
112
|
+
def record_job_created(self, job_id: str, source: str = "unknown") -> None:
|
|
113
|
+
"""
|
|
114
|
+
Record a new job creation.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
job_id: Job ID
|
|
118
|
+
source: Job source (upload, url, search)
|
|
119
|
+
"""
|
|
120
|
+
self._emit_metric(
|
|
121
|
+
metric_name="jobs_total",
|
|
122
|
+
metric_type="counter",
|
|
123
|
+
value=1,
|
|
124
|
+
labels={"job_id": job_id, "status": "created", "source": source}
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def record_job_completed(self, job_id: str, source: str = "unknown") -> None:
|
|
128
|
+
"""
|
|
129
|
+
Record a job completion.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
job_id: Job ID
|
|
133
|
+
source: Job source (upload, url, search)
|
|
134
|
+
"""
|
|
135
|
+
self._emit_metric(
|
|
136
|
+
metric_name="jobs_total",
|
|
137
|
+
metric_type="counter",
|
|
138
|
+
value=1,
|
|
139
|
+
labels={"job_id": job_id, "status": "completed", "source": source}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def record_job_failed(self, job_id: str, source: str = "unknown", error: Optional[str] = None) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Record a job failure.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
job_id: Job ID
|
|
148
|
+
source: Job source (upload, url, search)
|
|
149
|
+
error: Optional error message
|
|
150
|
+
"""
|
|
151
|
+
labels = {"job_id": job_id, "status": "failed", "source": source}
|
|
152
|
+
if error:
|
|
153
|
+
labels["error"] = error[:200] # Truncate long errors
|
|
154
|
+
self._emit_metric(
|
|
155
|
+
metric_name="jobs_total",
|
|
156
|
+
metric_type="counter",
|
|
157
|
+
value=1,
|
|
158
|
+
labels=labels
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def record_job_duration(self, job_id: str, duration_seconds: float, source: str = "unknown") -> None:
|
|
162
|
+
"""
|
|
163
|
+
Record total job processing duration.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
job_id: Job ID
|
|
167
|
+
duration_seconds: Total processing time in seconds
|
|
168
|
+
source: Job source (upload, url, search)
|
|
169
|
+
"""
|
|
170
|
+
self._emit_metric(
|
|
171
|
+
metric_name="job_duration_seconds",
|
|
172
|
+
metric_type="histogram",
|
|
173
|
+
value=duration_seconds,
|
|
174
|
+
labels={"job_id": job_id, "source": source}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# =========================================
|
|
178
|
+
# Worker Metrics
|
|
179
|
+
# =========================================
|
|
180
|
+
|
|
181
|
+
def record_worker_started(self, worker: str, job_id: str) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Record a worker invocation start.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
worker: Worker name (audio, lyrics, screens, video, render_video)
|
|
187
|
+
job_id: Job ID
|
|
188
|
+
"""
|
|
189
|
+
self._emit_metric(
|
|
190
|
+
metric_name="worker_invocations_total",
|
|
191
|
+
metric_type="counter",
|
|
192
|
+
value=1,
|
|
193
|
+
labels={"worker": worker, "job_id": job_id, "status": "started"}
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def record_worker_duration(self, worker: str, duration_seconds: float, success: bool, job_id: Optional[str] = None) -> None:
|
|
197
|
+
"""
|
|
198
|
+
Record worker processing duration.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
worker: Worker name
|
|
202
|
+
duration_seconds: Processing time in seconds
|
|
203
|
+
success: Whether worker completed successfully
|
|
204
|
+
job_id: Optional job ID
|
|
205
|
+
"""
|
|
206
|
+
labels = {
|
|
207
|
+
"worker": worker,
|
|
208
|
+
"success": str(success).lower(),
|
|
209
|
+
}
|
|
210
|
+
if job_id:
|
|
211
|
+
labels["job_id"] = job_id
|
|
212
|
+
|
|
213
|
+
self._emit_metric(
|
|
214
|
+
metric_name="job_stage_duration_seconds",
|
|
215
|
+
metric_type="histogram",
|
|
216
|
+
value=duration_seconds,
|
|
217
|
+
labels=labels
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Also emit a counter for success/failure tracking
|
|
221
|
+
self._emit_metric(
|
|
222
|
+
metric_name="worker_invocations_total",
|
|
223
|
+
metric_type="counter",
|
|
224
|
+
value=1,
|
|
225
|
+
labels={"worker": worker, "success": str(success).lower(), "job_id": job_id or "unknown"}
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# =========================================
|
|
229
|
+
# GCS Metrics
|
|
230
|
+
# =========================================
|
|
231
|
+
|
|
232
|
+
def record_gcs_operation(
|
|
233
|
+
self,
|
|
234
|
+
operation: str,
|
|
235
|
+
bucket: str,
|
|
236
|
+
success: bool,
|
|
237
|
+
size_bytes: Optional[int] = None,
|
|
238
|
+
duration_seconds: Optional[float] = None,
|
|
239
|
+
job_id: Optional[str] = None,
|
|
240
|
+
) -> None:
|
|
241
|
+
"""
|
|
242
|
+
Record a GCS operation.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
operation: Operation type (upload, download, delete)
|
|
246
|
+
bucket: GCS bucket name
|
|
247
|
+
success: Whether operation succeeded
|
|
248
|
+
size_bytes: Optional file size in bytes
|
|
249
|
+
duration_seconds: Optional operation duration
|
|
250
|
+
job_id: Optional job ID
|
|
251
|
+
"""
|
|
252
|
+
labels = {
|
|
253
|
+
"operation": operation,
|
|
254
|
+
"bucket": bucket,
|
|
255
|
+
"success": str(success).lower(),
|
|
256
|
+
}
|
|
257
|
+
if job_id:
|
|
258
|
+
labels["job_id"] = job_id
|
|
259
|
+
|
|
260
|
+
self._emit_metric(
|
|
261
|
+
metric_name="gcs_operations_total",
|
|
262
|
+
metric_type="counter",
|
|
263
|
+
value=1,
|
|
264
|
+
labels=labels
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
if size_bytes is not None:
|
|
268
|
+
self._emit_metric(
|
|
269
|
+
metric_name="gcs_operation_bytes",
|
|
270
|
+
metric_type="histogram",
|
|
271
|
+
value=size_bytes,
|
|
272
|
+
labels={**labels, "operation": operation}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if duration_seconds is not None:
|
|
276
|
+
self._emit_metric(
|
|
277
|
+
metric_name="gcs_operation_duration_seconds",
|
|
278
|
+
metric_type="histogram",
|
|
279
|
+
value=duration_seconds,
|
|
280
|
+
labels={**labels, "operation": operation}
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# =========================================
|
|
284
|
+
# External API Metrics
|
|
285
|
+
# =========================================
|
|
286
|
+
|
|
287
|
+
def record_external_api_call(
|
|
288
|
+
self,
|
|
289
|
+
api: str,
|
|
290
|
+
success: bool,
|
|
291
|
+
duration_seconds: float,
|
|
292
|
+
job_id: Optional[str] = None,
|
|
293
|
+
error: Optional[str] = None,
|
|
294
|
+
) -> None:
|
|
295
|
+
"""
|
|
296
|
+
Record an external API call.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
api: API name (modal, audioshake, genius, spotify)
|
|
300
|
+
success: Whether call succeeded
|
|
301
|
+
duration_seconds: API call duration
|
|
302
|
+
job_id: Optional job ID
|
|
303
|
+
error: Optional error message
|
|
304
|
+
"""
|
|
305
|
+
labels = {
|
|
306
|
+
"api": api,
|
|
307
|
+
"success": str(success).lower(),
|
|
308
|
+
}
|
|
309
|
+
if job_id:
|
|
310
|
+
labels["job_id"] = job_id
|
|
311
|
+
if error:
|
|
312
|
+
labels["error"] = error[:100] # Truncate
|
|
313
|
+
|
|
314
|
+
self._emit_metric(
|
|
315
|
+
metric_name="external_api_calls_total",
|
|
316
|
+
metric_type="counter",
|
|
317
|
+
value=1,
|
|
318
|
+
labels=labels
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
self._emit_metric(
|
|
322
|
+
metric_name="external_api_duration_seconds",
|
|
323
|
+
metric_type="histogram",
|
|
324
|
+
value=duration_seconds,
|
|
325
|
+
labels={"api": api, "success": str(success).lower()}
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
@contextmanager
|
|
329
|
+
def time_external_api(self, api: str, job_id: Optional[str] = None):
|
|
330
|
+
"""
|
|
331
|
+
Context manager to time an external API call.
|
|
332
|
+
|
|
333
|
+
Usage:
|
|
334
|
+
with metrics.time_external_api("modal", job_id) as timer:
|
|
335
|
+
response = await client.call_api()
|
|
336
|
+
timer.set_success(True)
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
api: API name
|
|
340
|
+
job_id: Optional job ID
|
|
341
|
+
|
|
342
|
+
Yields:
|
|
343
|
+
Timer object with set_success() method
|
|
344
|
+
"""
|
|
345
|
+
timer = _ApiTimer()
|
|
346
|
+
start_time = time.time()
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
yield timer
|
|
350
|
+
except Exception as e:
|
|
351
|
+
timer.set_success(False)
|
|
352
|
+
timer.error = str(e)
|
|
353
|
+
raise
|
|
354
|
+
finally:
|
|
355
|
+
duration = time.time() - start_time
|
|
356
|
+
self.record_external_api_call(
|
|
357
|
+
api=api,
|
|
358
|
+
success=timer.success,
|
|
359
|
+
duration_seconds=duration,
|
|
360
|
+
job_id=job_id,
|
|
361
|
+
error=timer.error,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
@contextmanager
|
|
365
|
+
def time_worker(self, worker: str, job_id: str):
|
|
366
|
+
"""
|
|
367
|
+
Context manager to time a worker execution.
|
|
368
|
+
|
|
369
|
+
Usage:
|
|
370
|
+
with metrics.time_worker("audio", job_id) as timer:
|
|
371
|
+
await process_audio()
|
|
372
|
+
timer.set_success(True)
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
worker: Worker name
|
|
376
|
+
job_id: Job ID
|
|
377
|
+
|
|
378
|
+
Yields:
|
|
379
|
+
Timer object with set_success() method
|
|
380
|
+
"""
|
|
381
|
+
timer = _ApiTimer()
|
|
382
|
+
self.record_worker_started(worker, job_id)
|
|
383
|
+
start_time = time.time()
|
|
384
|
+
|
|
385
|
+
try:
|
|
386
|
+
yield timer
|
|
387
|
+
except Exception as e:
|
|
388
|
+
timer.set_success(False)
|
|
389
|
+
raise
|
|
390
|
+
finally:
|
|
391
|
+
duration = time.time() - start_time
|
|
392
|
+
self.record_worker_duration(
|
|
393
|
+
worker=worker,
|
|
394
|
+
duration_seconds=duration,
|
|
395
|
+
success=timer.success,
|
|
396
|
+
job_id=job_id,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class _ApiTimer:
|
|
401
|
+
"""Helper class for tracking API call success state."""
|
|
402
|
+
|
|
403
|
+
def __init__(self):
|
|
404
|
+
self.success = True # Assume success unless set otherwise
|
|
405
|
+
self.error: Optional[str] = None
|
|
406
|
+
|
|
407
|
+
def set_success(self, success: bool) -> None:
|
|
408
|
+
self.success = success
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# Global metrics instance
|
|
412
|
+
metrics = MetricsService()
|
|
413
|
+
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""NLTK resource preloader for container startup.
|
|
2
|
+
|
|
3
|
+
Loads NLTK data at container startup to avoid slow downloads during request processing.
|
|
4
|
+
Cloud Run's ephemeral filesystem means NLTK data must be re-downloaded on each cold start,
|
|
5
|
+
which can take 30-100+ seconds for cmudict.
|
|
6
|
+
|
|
7
|
+
See docs/archive/2026-01-08-performance-investigation.md for background.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import time
|
|
12
|
+
from typing import Optional, Dict, Any
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Singleton storage for preloaded resources
|
|
17
|
+
_preloaded_resources: Dict[str, Any] = {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def preload_nltk_cmudict() -> None:
|
|
21
|
+
"""Preload NLTK's CMU Pronouncing Dictionary at startup.
|
|
22
|
+
|
|
23
|
+
The cmudict is used by SyllablesMatchHandler for syllable counting.
|
|
24
|
+
Without preloading, each SyllablesMatchHandler init downloads ~30MB,
|
|
25
|
+
which took 50-100+ seconds in Cloud Run.
|
|
26
|
+
"""
|
|
27
|
+
global _preloaded_resources
|
|
28
|
+
|
|
29
|
+
if "cmudict" in _preloaded_resources:
|
|
30
|
+
logger.info("NLTK cmudict already preloaded")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
logger.info("Preloading NLTK cmudict...")
|
|
34
|
+
start_time = time.time()
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import nltk
|
|
38
|
+
|
|
39
|
+
# Ensure the data is downloaded
|
|
40
|
+
try:
|
|
41
|
+
from nltk.corpus import cmudict
|
|
42
|
+
|
|
43
|
+
# Try to access it - will raise LookupError if not downloaded
|
|
44
|
+
_ = cmudict.dict()
|
|
45
|
+
except LookupError:
|
|
46
|
+
logger.info("Downloading NLTK cmudict data...")
|
|
47
|
+
nltk.download("cmudict", quiet=True)
|
|
48
|
+
from nltk.corpus import cmudict
|
|
49
|
+
|
|
50
|
+
# Load into memory
|
|
51
|
+
cmu_dict = cmudict.dict()
|
|
52
|
+
_preloaded_resources["cmudict"] = cmu_dict
|
|
53
|
+
|
|
54
|
+
elapsed = time.time() - start_time
|
|
55
|
+
logger.info(f"NLTK cmudict preloaded in {elapsed:.2f}s ({len(cmu_dict)} entries)")
|
|
56
|
+
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Failed to preload NLTK cmudict: {e}")
|
|
59
|
+
raise
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_preloaded_cmudict() -> Optional[Dict]:
|
|
63
|
+
"""Get the preloaded CMU dictionary if available.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The preloaded cmudict dictionary, or None if not preloaded
|
|
67
|
+
"""
|
|
68
|
+
return _preloaded_resources.get("cmudict")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def is_cmudict_preloaded() -> bool:
|
|
72
|
+
"""Check if cmudict has been preloaded."""
|
|
73
|
+
return "cmudict" in _preloaded_resources
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def preload_nltk_punkt() -> None:
|
|
77
|
+
"""Preload NLTK's punkt tokenizer (optional, used for sentence tokenization)."""
|
|
78
|
+
global _preloaded_resources
|
|
79
|
+
|
|
80
|
+
if "punkt" in _preloaded_resources:
|
|
81
|
+
logger.info("NLTK punkt already preloaded")
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
logger.info("Preloading NLTK punkt tokenizer...")
|
|
85
|
+
start_time = time.time()
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
import nltk
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
from nltk.tokenize import word_tokenize
|
|
92
|
+
|
|
93
|
+
# Test it works
|
|
94
|
+
_ = word_tokenize("test")
|
|
95
|
+
except LookupError:
|
|
96
|
+
logger.info("Downloading NLTK punkt data...")
|
|
97
|
+
nltk.download("punkt", quiet=True)
|
|
98
|
+
nltk.download("punkt_tab", quiet=True)
|
|
99
|
+
|
|
100
|
+
_preloaded_resources["punkt"] = True
|
|
101
|
+
|
|
102
|
+
elapsed = time.time() - start_time
|
|
103
|
+
logger.info(f"NLTK punkt preloaded in {elapsed:.2f}s")
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.warning(f"Failed to preload NLTK punkt (non-critical): {e}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def preload_all_nltk_resources() -> None:
|
|
110
|
+
"""Preload all NLTK resources used by the application."""
|
|
111
|
+
preload_nltk_cmudict()
|
|
112
|
+
# punkt is optional and less critical
|
|
113
|
+
try:
|
|
114
|
+
preload_nltk_punkt()
|
|
115
|
+
except Exception:
|
|
116
|
+
pass # Non-critical
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def clear_preloaded_resources() -> None:
|
|
120
|
+
"""Clear all preloaded resources. Useful for testing."""
|
|
121
|
+
global _preloaded_resources
|
|
122
|
+
_preloaded_resources.clear()
|