karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
backend/config.py ADDED
@@ -0,0 +1,172 @@
1
+ """
2
+ Configuration management for the karaoke generation backend.
3
+ """
4
+ import os
5
+ import logging
6
+ from typing import Optional, Dict
7
+ from pydantic_settings import BaseSettings
8
+ from google.cloud import secretmanager
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class Settings(BaseSettings):
15
+ """Application settings."""
16
+
17
+ # Google Cloud
18
+ google_cloud_project: str = os.getenv("GOOGLE_CLOUD_PROJECT", "")
19
+ gcs_bucket_name: str = os.getenv("GCS_BUCKET_NAME", "karaoke-gen-storage")
20
+ gcs_temp_bucket: str = os.getenv("GCS_TEMP_BUCKET", "karaoke-gen-temp")
21
+ gcs_output_bucket: str = os.getenv("GCS_OUTPUT_BUCKET", "karaoke-gen-outputs")
22
+ firestore_collection: str = os.getenv("FIRESTORE_COLLECTION", "jobs")
23
+
24
+ # Audio Separator API (for GPU processing)
25
+ audio_separator_api_url: Optional[str] = os.getenv("AUDIO_SEPARATOR_API_URL")
26
+
27
+ # External APIs (can be set via env or Secret Manager)
28
+ audioshake_api_key: Optional[str] = os.getenv("AUDIOSHAKE_API_KEY")
29
+ genius_api_key: Optional[str] = os.getenv("GENIUS_API_KEY")
30
+ spotify_cookie: Optional[str] = os.getenv("SPOTIFY_COOKIE_SP_DC")
31
+ rapidapi_key: Optional[str] = os.getenv("RAPIDAPI_KEY")
32
+
33
+ # Authentication
34
+ admin_tokens: Optional[str] = os.getenv("ADMIN_TOKENS") # Comma-separated list
35
+
36
+ # Application
37
+ environment: str = os.getenv("ENVIRONMENT", "development")
38
+ log_level: str = os.getenv("LOG_LEVEL", "INFO")
39
+
40
+ # Processing
41
+ max_concurrent_jobs: int = int(os.getenv("MAX_CONCURRENT_JOBS", "5"))
42
+ job_timeout_seconds: int = int(os.getenv("JOB_TIMEOUT_SECONDS", "3600"))
43
+
44
+ # Agentic AI Correction (for lyrics correction via LLM)
45
+ # When enabled, uses Gemini via Vertex AI for intelligent lyrics correction
46
+ use_agentic_ai: bool = os.getenv("USE_AGENTIC_AI", "true").lower() in ("true", "1", "yes")
47
+ agentic_ai_model: str = os.getenv("AGENTIC_AI_MODEL", "vertexai/gemini-3-flash-preview")
48
+ # Timeout for agentic correction in seconds. If correction takes longer, abort and
49
+ # use uncorrected transcription - human review will fix any issues.
50
+ agentic_correction_timeout_seconds: int = int(os.getenv("AGENTIC_CORRECTION_TIMEOUT_SECONDS", "180"))
51
+
52
+ # Cloud Tasks (for scalable worker coordination)
53
+ # When enabled, workers are triggered via Cloud Tasks for guaranteed delivery
54
+ # When disabled (default), workers are triggered via direct HTTP (for development)
55
+ enable_cloud_tasks: bool = os.getenv("ENABLE_CLOUD_TASKS", "false").lower() in ("true", "1", "yes")
56
+ gcp_region: str = os.getenv("GCP_REGION", "us-central1")
57
+
58
+ # Cloud Run Jobs (for long-running video encoding)
59
+ # When enabled AND enable_cloud_tasks is true, video worker uses Cloud Run Jobs
60
+ # instead of Cloud Tasks. This supports encoding times >30 minutes (up to 24 hours).
61
+ # Default is false - Cloud Tasks is sufficient for most videos (15-20 min).
62
+ use_cloud_run_jobs_for_video: bool = os.getenv("USE_CLOUD_RUN_JOBS_FOR_VIDEO", "false").lower() in ("true", "1", "yes")
63
+
64
+ # GCE Encoding Worker (for high-performance video encoding)
65
+ # When enabled, video encoding is offloaded to a dedicated C4 GCE instance
66
+ # with faster CPU (Intel Granite Rapids 3.9 GHz) instead of Cloud Run.
67
+ # This provides 2-3x faster encoding times for CPU-bound FFmpeg libx264 encoding.
68
+ use_gce_encoding: bool = os.getenv("USE_GCE_ENCODING", "false").lower() in ("true", "1", "yes")
69
+ encoding_worker_url: Optional[str] = os.getenv("ENCODING_WORKER_URL") # e.g., http://136.119.50.148:8080
70
+ encoding_worker_api_key: Optional[str] = os.getenv("ENCODING_WORKER_API_KEY")
71
+
72
+ # GCE Preview Encoding (for faster preview video generation)
73
+ # When enabled, preview video encoding during lyrics review is offloaded to the GCE worker.
74
+ # This reduces preview generation time from 60+ seconds to ~15-20 seconds.
75
+ # Requires use_gce_encoding to be enabled and the GCE worker to support /encode-preview endpoint.
76
+ use_gce_preview_encoding: bool = os.getenv("USE_GCE_PREVIEW_ENCODING", "false").lower() in ("true", "1", "yes")
77
+
78
+ # Storage paths
79
+ temp_dir: str = os.getenv("TEMP_DIR", "/tmp/karaoke-gen")
80
+
81
+ # Worker logs storage mode
82
+ # When enabled, worker logs are stored in a Firestore subcollection (jobs/{job_id}/logs)
83
+ # instead of an embedded array. This avoids the 1MB document size limit.
84
+ # Default is true for new deployments.
85
+ use_log_subcollection: bool = os.getenv("USE_LOG_SUBCOLLECTION", "true").lower() in ("true", "1", "yes")
86
+
87
+ # Flacfetch remote service (for torrent downloads)
88
+ # When configured, audio search uses the remote flacfetch HTTP API instead of local flacfetch.
89
+ # This is required for torrent downloads since Cloud Run doesn't support BitTorrent.
90
+ flacfetch_api_url: Optional[str] = os.getenv("FLACFETCH_API_URL") # e.g., http://10.0.0.5:8080
91
+ flacfetch_api_key: Optional[str] = os.getenv("FLACFETCH_API_KEY")
92
+
93
+ # Default distribution settings (can be overridden per-request)
94
+ default_dropbox_path: Optional[str] = os.getenv("DEFAULT_DROPBOX_PATH")
95
+ default_gdrive_folder_id: Optional[str] = os.getenv("DEFAULT_GDRIVE_FOLDER_ID")
96
+ # Strip whitespace/newlines from webhook URL - common issue when env vars are set with trailing newlines
97
+ default_discord_webhook_url: Optional[str] = (
98
+ os.getenv("DEFAULT_DISCORD_WEBHOOK_URL", "").strip() or None
99
+ )
100
+
101
+ # Default values for web service jobs (YouTube/Dropbox distribution)
102
+ default_enable_youtube_upload: bool = os.getenv("DEFAULT_ENABLE_YOUTUBE_UPLOAD", "false").lower() in ("true", "1", "yes")
103
+ default_brand_prefix: Optional[str] = os.getenv("DEFAULT_BRAND_PREFIX")
104
+ default_youtube_description: str = os.getenv(
105
+ "DEFAULT_YOUTUBE_DESCRIPTION",
106
+ "Karaoke video created with Nomad Karaoke (https://nomadkaraoke.com)\n\n"
107
+ "AI-powered vocal separation and synchronized lyrics.\n\n"
108
+ "#karaoke #music #singing #instrumental #lyrics"
109
+ )
110
+
111
+ # Secret Manager cache
112
+ _secret_cache: Dict[str, str] = {}
113
+
114
+ class Config:
115
+ env_file = ".env"
116
+ case_sensitive = False
117
+
118
+ def get_secret(self, secret_id: str) -> Optional[str]:
119
+ """
120
+ Get a secret from Google Secret Manager.
121
+
122
+ Caches secrets in memory to avoid repeated API calls.
123
+ Falls back to environment variables if Secret Manager unavailable.
124
+
125
+ Args:
126
+ secret_id: Secret name (e.g., "audioshake-api-key")
127
+
128
+ Returns:
129
+ Secret value or None if not found
130
+ """
131
+ # Check cache first
132
+ if secret_id in self._secret_cache:
133
+ return self._secret_cache[secret_id]
134
+
135
+ # Check environment variable (development mode)
136
+ env_var = secret_id.upper().replace('-', '_')
137
+ env_value = os.getenv(env_var)
138
+ if env_value:
139
+ logger.debug(f"Using {secret_id} from environment variable")
140
+ self._secret_cache[secret_id] = env_value
141
+ return env_value
142
+
143
+ # Try Secret Manager (production mode)
144
+ if not self.google_cloud_project:
145
+ logger.warning(f"Cannot fetch secret {secret_id}: No GCP project configured")
146
+ return None
147
+
148
+ try:
149
+ client = secretmanager.SecretManagerServiceClient()
150
+ name = f"projects/{self.google_cloud_project}/secrets/{secret_id}/versions/latest"
151
+ response = client.access_secret_version(request={"name": name})
152
+ # Strip whitespace/newlines - common issue when secrets are created with trailing newlines
153
+ secret_value = response.payload.data.decode('UTF-8').strip()
154
+
155
+ # Cache it
156
+ self._secret_cache[secret_id] = secret_value
157
+ logger.info(f"Loaded secret {secret_id} from Secret Manager")
158
+ return secret_value
159
+
160
+ except Exception as e:
161
+ logger.error(f"Failed to load secret {secret_id} from Secret Manager: {e}")
162
+ return None
163
+
164
+
165
+ # Global settings instance
166
+ settings = Settings()
167
+
168
+
169
+ def get_settings() -> Settings:
170
+ """Get the global settings instance."""
171
+ return settings
172
+
backend/main.py ADDED
@@ -0,0 +1,157 @@
1
+ """
2
+ FastAPI application entry point for karaoke generation backend.
3
+ """
4
+ import logging
5
+ from contextlib import asynccontextmanager
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+
9
+ from backend.config import settings
10
+ from backend.api.routes import health, jobs, internal, file_upload, review, auth, audio_search, themes, users, admin
11
+ from backend.services.tracing import setup_tracing, instrument_app, get_current_trace_id
12
+ from backend.services.structured_logging import setup_structured_logging
13
+ from backend.services.spacy_preloader import preload_spacy_model
14
+ from backend.services.nltk_preloader import preload_all_nltk_resources
15
+ from backend.services.langfuse_preloader import preload_langfuse_handler
16
+ from backend.middleware.audit_logging import AuditLoggingMiddleware
17
+
18
+
19
+ from backend.version import VERSION
20
+
21
+
22
+ # Configure structured logging (JSON in Cloud Run, human-readable locally)
23
+ # This must happen before any logging calls
24
+ setup_structured_logging()
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Initialize OpenTelemetry tracing (must happen before app creation)
28
+ tracing_enabled = setup_tracing(
29
+ service_name="karaoke-backend",
30
+ service_version=VERSION,
31
+ enable_in_dev=False, # Set to True to enable tracing locally
32
+ )
33
+
34
+
35
+ async def validate_credentials_on_startup():
36
+ """Validate OAuth credentials on startup and send alerts if needed."""
37
+ try:
38
+ from backend.services.credential_manager import get_credential_manager, CredentialStatus
39
+
40
+ manager = get_credential_manager()
41
+ results = manager.check_all_credentials()
42
+
43
+ invalid_services = [
44
+ result for result in results.values()
45
+ if result.status in (CredentialStatus.INVALID, CredentialStatus.EXPIRED)
46
+ ]
47
+
48
+ if invalid_services:
49
+ logger.warning(f"Some OAuth credentials need attention:")
50
+ for result in invalid_services:
51
+ logger.warning(f" - {result.service}: {result.message}")
52
+
53
+ # Try to send Discord alert
54
+ discord_url = settings.get_secret("discord-alert-webhook") if hasattr(settings, 'get_secret') else None
55
+ if discord_url:
56
+ manager.send_credential_alert(invalid_services, discord_url)
57
+ logger.info("Sent credential alert to Discord")
58
+ else:
59
+ logger.info("All OAuth credentials validated successfully")
60
+
61
+ except Exception as e:
62
+ logger.error(f"Failed to validate credentials on startup: {e}")
63
+
64
+
65
+ @asynccontextmanager
66
+ async def lifespan(app: FastAPI):
67
+ """Lifespan event handler for startup and shutdown."""
68
+ # Startup
69
+ logger.info("Starting karaoke generation backend")
70
+ logger.info(f"Environment: {settings.environment}")
71
+ logger.info(f"GCS Bucket: {settings.gcs_bucket_name}")
72
+ logger.info(f"Tracing enabled: {tracing_enabled}")
73
+
74
+ # Preload NLP models and resources to avoid cold start delays
75
+ # See docs/archive/2026-01-08-performance-investigation.md for background
76
+
77
+ # 1. SpaCy model (60+ second delay without preload)
78
+ try:
79
+ preload_spacy_model("en_core_web_sm")
80
+ except Exception as e:
81
+ logger.warning(f"SpaCy preload failed (will load lazily): {e}")
82
+
83
+ # 2. NLTK cmudict (50-100+ second delay without preload)
84
+ try:
85
+ preload_all_nltk_resources()
86
+ except Exception as e:
87
+ logger.warning(f"NLTK preload failed (will load lazily): {e}")
88
+
89
+ # 3. Langfuse callback handler (200+ second delay without preload)
90
+ try:
91
+ preload_langfuse_handler()
92
+ except Exception as e:
93
+ logger.warning(f"Langfuse preload failed (will initialize lazily): {e}")
94
+
95
+ # Validate OAuth credentials (non-blocking)
96
+ try:
97
+ await validate_credentials_on_startup()
98
+ except Exception as e:
99
+ logger.error(f"Credential validation failed: {e}")
100
+
101
+ yield
102
+
103
+ # Shutdown
104
+ logger.info("Shutting down karaoke generation backend")
105
+
106
+
107
+ # Create FastAPI app
108
+ app = FastAPI(
109
+ title="Karaoke Generator API",
110
+ description="Backend API for web-based karaoke video generation",
111
+ version=VERSION,
112
+ lifespan=lifespan
113
+ )
114
+
115
+ # Instrument FastAPI with OpenTelemetry (adds automatic spans for all requests)
116
+ if tracing_enabled:
117
+ instrument_app(app)
118
+
119
+ # Configure CORS
120
+ app.add_middleware(
121
+ CORSMiddleware,
122
+ allow_origins=["*"], # Configure this properly for production
123
+ allow_credentials=True,
124
+ allow_methods=["*"],
125
+ allow_headers=["*"],
126
+ )
127
+
128
+ # Add audit logging middleware (captures all requests with request_id for correlation)
129
+ app.add_middleware(AuditLoggingMiddleware)
130
+
131
+ # Include routers
132
+ app.include_router(health.router, prefix="/api")
133
+ app.include_router(jobs.router, prefix="/api")
134
+ app.include_router(file_upload.router, prefix="/api") # File upload endpoint
135
+ app.include_router(internal.router, prefix="/api") # Internal worker endpoints
136
+ app.include_router(review.router, prefix="/api") # Review UI compatibility endpoints
137
+ app.include_router(auth.router, prefix="/api") # OAuth credential management
138
+ app.include_router(audio_search.router, prefix="/api") # Audio search (artist+title mode)
139
+ app.include_router(themes.router, prefix="/api") # Theme selection for styles
140
+ app.include_router(users.router, prefix="/api") # User auth, credits, and Stripe webhooks
141
+ app.include_router(admin.router, prefix="/api") # Admin dashboard and management
142
+
143
+
144
+ @app.get("/")
145
+ async def root():
146
+ """Root endpoint."""
147
+ return {
148
+ "service": "karaoke-gen-backend",
149
+ "version": VERSION,
150
+ "status": "running"
151
+ }
152
+
153
+
154
+ if __name__ == "__main__":
155
+ import uvicorn
156
+ uvicorn.run(app, host="0.0.0.0", port=8080)
157
+
@@ -0,0 +1,5 @@
1
+ """Middleware package for FastAPI application."""
2
+
3
+ from backend.middleware.audit_logging import AuditLoggingMiddleware
4
+
5
+ __all__ = ["AuditLoggingMiddleware"]
@@ -0,0 +1,124 @@
1
+ """
2
+ Audit logging middleware for tracking all HTTP requests.
3
+
4
+ This middleware captures request metadata and logs it to Cloud Logging
5
+ for audit trail and user activity investigation purposes.
6
+ """
7
+ import logging
8
+ import time
9
+ import uuid
10
+ from starlette.middleware.base import BaseHTTPMiddleware
11
+ from starlette.requests import Request
12
+ from starlette.responses import Response
13
+
14
+
15
+ logger = logging.getLogger("audit")
16
+
17
+ # Endpoints to exclude from audit logging (high-frequency health checks)
18
+ EXCLUDED_PATHS = {
19
+ "/",
20
+ "/api/health",
21
+ "/api/health/detailed",
22
+ "/api/readiness",
23
+ "/healthz",
24
+ "/ready",
25
+ }
26
+
27
+
28
+ class AuditLoggingMiddleware(BaseHTTPMiddleware):
29
+ """
30
+ Middleware that logs all HTTP requests for audit purposes.
31
+
32
+ Captures:
33
+ - request_id: UUID for correlating with auth logs
34
+ - method: HTTP method
35
+ - path: Request path
36
+ - status_code: Response status
37
+ - latency_ms: Request duration
38
+ - client_ip: Client IP (from X-Forwarded-For for proxied requests)
39
+ - user_agent: Browser/client identifier
40
+
41
+ The request_id is stored in request.state and added to response headers,
42
+ allowing correlation with auth logs that capture user_email.
43
+ """
44
+
45
+ async def dispatch(self, request: Request, call_next) -> Response:
46
+ # Skip excluded paths (health checks)
47
+ if request.url.path in EXCLUDED_PATHS:
48
+ return await call_next(request)
49
+
50
+ # Generate unique request ID for correlation
51
+ request_id = str(uuid.uuid4())
52
+ request.state.request_id = request_id
53
+
54
+ # Capture start time
55
+ start_time = time.time()
56
+
57
+ # Extract client info
58
+ client_ip = self._get_client_ip(request)
59
+ user_agent = request.headers.get("user-agent", "")
60
+
61
+ # Process request
62
+ try:
63
+ response = await call_next(request)
64
+ except Exception:
65
+ # Log failed requests too (exception() auto-includes stack trace)
66
+ latency_ms = int((time.time() - start_time) * 1000)
67
+ logger.exception(
68
+ "request_audit_error",
69
+ extra={
70
+ "request_id": request_id,
71
+ "method": request.method,
72
+ "path": request.url.path,
73
+ "query_string": str(request.query_params) if request.query_params else None,
74
+ "latency_ms": latency_ms,
75
+ "client_ip": client_ip,
76
+ "user_agent": user_agent[:200] if user_agent else None,
77
+ "audit_type": "http_request",
78
+ }
79
+ )
80
+ raise
81
+
82
+ # Calculate latency
83
+ latency_ms = int((time.time() - start_time) * 1000)
84
+
85
+ # Log audit entry (INFO level for successful requests)
86
+ log_level = logging.WARNING if response.status_code >= 400 else logging.INFO
87
+ logger.log(
88
+ log_level,
89
+ "request_audit",
90
+ extra={
91
+ "request_id": request_id,
92
+ "method": request.method,
93
+ "path": request.url.path,
94
+ "query_string": str(request.query_params) if request.query_params else None,
95
+ "status_code": response.status_code,
96
+ "latency_ms": latency_ms,
97
+ "client_ip": client_ip,
98
+ "user_agent": user_agent[:200] if user_agent else None,
99
+ "audit_type": "http_request",
100
+ }
101
+ )
102
+
103
+ # Add request_id to response headers for debugging/correlation
104
+ response.headers["X-Request-ID"] = request_id
105
+
106
+ return response
107
+
108
+ def _get_client_ip(self, request: Request) -> str:
109
+ """
110
+ Extract client IP address, handling proxy scenarios.
111
+
112
+ Cloud Run and other proxies set X-Forwarded-For header.
113
+ """
114
+ # Check X-Forwarded-For for proxy scenarios (Cloud Run, load balancers)
115
+ forwarded = request.headers.get("x-forwarded-for", "")
116
+ if forwarded:
117
+ # First IP is the original client
118
+ return forwarded.split(",")[0].strip()
119
+
120
+ # Fall back to direct connection
121
+ if request.client:
122
+ return request.client.host
123
+
124
+ return "unknown"
File without changes