karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
backend/models/job.py ADDED
@@ -0,0 +1,519 @@
1
+ """
2
+ Job data models for karaoke generation.
3
+
4
+ This module defines the complete state machine for karaoke generation jobs,
5
+ mirroring the 8-stage CLI workflow with human-in-the-loop interaction points.
6
+ """
7
+ from typing import Optional, Dict, Any, List
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from pydantic import BaseModel, Field, validator
11
+
12
+ from karaoke_gen.utils import normalize_text
13
+
14
+
15
+ class JobStatus(str, Enum):
16
+ """
17
+ Job status enumeration - Complete state machine.
18
+
19
+ The workflow has 8 main stages with 3 human interaction points:
20
+ 1. Input & Setup (may include audio source selection)
21
+ 2. Parallel Processing (audio + lyrics)
22
+ 3. Title/End Screen Generation
23
+ 4. Countdown Padding Synchronization
24
+ 5. Human Review (BLOCKING)
25
+ 6. Instrumental Selection (BLOCKING)
26
+ 7. Video Finalization
27
+ 8. Distribution
28
+ """
29
+ # Initial states
30
+ PENDING = "pending" # Job created, queued for processing
31
+
32
+ # Audio search states (for artist+title search mode)
33
+ SEARCHING_AUDIO = "searching_audio" # Searching for audio sources via flacfetch
34
+ AWAITING_AUDIO_SELECTION = "awaiting_audio_selection" # ⚠️ WAITING FOR USER - select audio source
35
+ DOWNLOADING_AUDIO = "downloading_audio" # Downloading selected audio from source
36
+
37
+ DOWNLOADING = "downloading" # Downloading from URL or processing upload
38
+
39
+ # Stage 2a: Audio separation (parallel track 1)
40
+ SEPARATING_STAGE1 = "separating_stage1" # Clean instrumental separation (Modal API)
41
+ SEPARATING_STAGE2 = "separating_stage2" # Backing vocals separation (Modal API)
42
+ AUDIO_COMPLETE = "audio_complete" # All audio stems ready
43
+
44
+ # Stage 2b: Lyrics processing (parallel track 2)
45
+ TRANSCRIBING = "transcribing" # AudioShake API transcription
46
+ CORRECTING = "correcting" # Automatic lyrics correction
47
+ LYRICS_COMPLETE = "lyrics_complete" # Corrections JSON ready
48
+
49
+ # Stage 3: Title/End screens
50
+ GENERATING_SCREENS = "generating_screens" # Creating title and end screen videos
51
+
52
+ # Stage 4: Countdown padding (automatic)
53
+ APPLYING_PADDING = "applying_padding" # Synchronizing countdown padding
54
+
55
+ # Stage 5: Human review (BLOCKING)
56
+ AWAITING_REVIEW = "awaiting_review" # ⚠️ WAITING FOR USER - lyrics review needed
57
+ IN_REVIEW = "in_review" # User is actively reviewing lyrics
58
+ REVIEW_COMPLETE = "review_complete" # User submitted corrected lyrics
59
+
60
+ # Stage 5.5: Render video with corrected lyrics (post-review)
61
+ RENDERING_VIDEO = "rendering_video" # Using OutputGenerator to create with_vocals.mkv
62
+
63
+ # Stage 6: Instrumental selection (BLOCKING)
64
+ AWAITING_INSTRUMENTAL_SELECTION = "awaiting_instrumental_selection" # ⚠️ WAITING FOR USER
65
+ INSTRUMENTAL_SELECTED = "instrumental_selected" # User made selection
66
+
67
+ # Stage 7: Video generation and finalization
68
+ GENERATING_VIDEO = "generating_video" # Creating initial karaoke video
69
+ ENCODING = "encoding" # Multi-format video encoding (Cloud Build)
70
+ PACKAGING = "packaging" # CDG/TXT ZIP generation
71
+
72
+ # Stage 8: Distribution (optional)
73
+ UPLOADING = "uploading" # YouTube/Dropbox upload
74
+ NOTIFYING = "notifying" # Discord/Email notifications
75
+
76
+ # Terminal states
77
+ COMPLETE = "complete" # All processing finished successfully
78
+ PREP_COMPLETE = "prep_complete" # Prep-only job completed (stops after review)
79
+ FAILED = "failed" # Unrecoverable error occurred
80
+ CANCELLED = "cancelled" # User cancelled the job
81
+
82
+ # Legacy compatibility (will be removed)
83
+ QUEUED = "queued" # Deprecated: use PENDING
84
+ PROCESSING = "processing" # Deprecated: use specific states
85
+ READY_FOR_FINALIZATION = "ready_for_finalization" # Deprecated
86
+ FINALIZING = "finalizing" # Deprecated: use ENCODING/PACKAGING
87
+ ERROR = "error" # Deprecated: use FAILED
88
+
89
+
90
+ # Valid state transitions
91
+ STATE_TRANSITIONS = {
92
+ # PENDING can go to DOWNLOADING (file upload), SEARCHING_AUDIO (artist+title search), or AWAITING_INSTRUMENTAL_SELECTION (finalise-only)
93
+ JobStatus.PENDING: [JobStatus.DOWNLOADING, JobStatus.SEARCHING_AUDIO, JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED, JobStatus.CANCELLED],
94
+
95
+ # Audio search flow (for artist+title search mode)
96
+ JobStatus.SEARCHING_AUDIO: [JobStatus.AWAITING_AUDIO_SELECTION, JobStatus.DOWNLOADING_AUDIO, JobStatus.FAILED],
97
+ JobStatus.AWAITING_AUDIO_SELECTION: [JobStatus.DOWNLOADING_AUDIO, JobStatus.FAILED, JobStatus.CANCELLED],
98
+ JobStatus.DOWNLOADING_AUDIO: [JobStatus.DOWNLOADING, JobStatus.FAILED],
99
+
100
+ # DOWNLOADING allows parallel processing (audio + lyrics) and then screens when both complete
101
+ JobStatus.DOWNLOADING: [JobStatus.SEPARATING_STAGE1, JobStatus.TRANSCRIBING, JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
102
+
103
+ # Audio separation flow
104
+ JobStatus.SEPARATING_STAGE1: [JobStatus.SEPARATING_STAGE2, JobStatus.FAILED],
105
+ JobStatus.SEPARATING_STAGE2: [JobStatus.AUDIO_COMPLETE, JobStatus.FAILED],
106
+ JobStatus.AUDIO_COMPLETE: [JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
107
+
108
+ # Lyrics flow
109
+ JobStatus.TRANSCRIBING: [JobStatus.CORRECTING, JobStatus.FAILED],
110
+ JobStatus.CORRECTING: [JobStatus.LYRICS_COMPLETE, JobStatus.FAILED],
111
+ JobStatus.LYRICS_COMPLETE: [JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
112
+
113
+ # Post-parallel processing
114
+ JobStatus.GENERATING_SCREENS: [JobStatus.APPLYING_PADDING, JobStatus.AWAITING_REVIEW, JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED],
115
+ JobStatus.APPLYING_PADDING: [JobStatus.AWAITING_REVIEW, JobStatus.FAILED],
116
+
117
+ # Human review flow
118
+ # AWAITING_REVIEW can go directly to REVIEW_COMPLETE (quick review) or to IN_REVIEW (editing)
119
+ JobStatus.AWAITING_REVIEW: [JobStatus.IN_REVIEW, JobStatus.REVIEW_COMPLETE, JobStatus.FAILED, JobStatus.CANCELLED],
120
+ JobStatus.IN_REVIEW: [JobStatus.REVIEW_COMPLETE, JobStatus.AWAITING_REVIEW, JobStatus.FAILED],
121
+ JobStatus.REVIEW_COMPLETE: [JobStatus.RENDERING_VIDEO, JobStatus.PREP_COMPLETE, JobStatus.FAILED], # PREP_COMPLETE for prep-only jobs
122
+
123
+ # Video rendering (post-review)
124
+ JobStatus.RENDERING_VIDEO: [JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.PREP_COMPLETE, JobStatus.FAILED],
125
+
126
+ # Instrumental selection flow
127
+ JobStatus.AWAITING_INSTRUMENTAL_SELECTION: [JobStatus.INSTRUMENTAL_SELECTED, JobStatus.FAILED, JobStatus.CANCELLED],
128
+ JobStatus.INSTRUMENTAL_SELECTED: [JobStatus.GENERATING_VIDEO, JobStatus.FAILED],
129
+
130
+ # Video generation flow
131
+ JobStatus.GENERATING_VIDEO: [JobStatus.ENCODING, JobStatus.FAILED],
132
+ JobStatus.ENCODING: [JobStatus.PACKAGING, JobStatus.COMPLETE, JobStatus.FAILED],
133
+ JobStatus.PACKAGING: [JobStatus.UPLOADING, JobStatus.COMPLETE, JobStatus.FAILED],
134
+
135
+ # Distribution flow
136
+ JobStatus.UPLOADING: [JobStatus.NOTIFYING, JobStatus.COMPLETE, JobStatus.FAILED],
137
+ JobStatus.NOTIFYING: [JobStatus.COMPLETE, JobStatus.FAILED],
138
+
139
+ # Terminal states - COMPLETE, PREP_COMPLETE have no transitions
140
+ # FAILED and CANCELLED allow retry transitions to resume from checkpoints
141
+ # PREP_COMPLETE allows finalise-only continuation
142
+ JobStatus.COMPLETE: [],
143
+ JobStatus.PREP_COMPLETE: [JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED], # Finalise-only continues from here
144
+ JobStatus.FAILED: [
145
+ JobStatus.DOWNLOADING, # Retry from beginning (if input audio exists)
146
+ JobStatus.INSTRUMENTAL_SELECTED, # Retry from video generation
147
+ JobStatus.REVIEW_COMPLETE, # Retry from render stage
148
+ JobStatus.LYRICS_COMPLETE, # Retry from screens generation
149
+ ],
150
+ JobStatus.CANCELLED: [
151
+ JobStatus.DOWNLOADING, # Retry from beginning (if input audio exists)
152
+ JobStatus.INSTRUMENTAL_SELECTED, # Retry from video generation
153
+ JobStatus.REVIEW_COMPLETE, # Retry from render stage
154
+ JobStatus.LYRICS_COMPLETE, # Retry from screens generation
155
+ ],
156
+
157
+ # Legacy states (for backward compatibility)
158
+ JobStatus.QUEUED: [JobStatus.PENDING],
159
+ JobStatus.PROCESSING: [JobStatus.SEPARATING_STAGE1, JobStatus.TRANSCRIBING],
160
+ JobStatus.READY_FOR_FINALIZATION: [JobStatus.GENERATING_VIDEO],
161
+ JobStatus.FINALIZING: [JobStatus.ENCODING],
162
+ JobStatus.ERROR: [JobStatus.FAILED],
163
+ }
164
+
165
+
166
+ class TimelineEvent(BaseModel):
167
+ """Timeline event for job progress tracking."""
168
+ status: str
169
+ timestamp: str
170
+ progress: Optional[int] = None
171
+ message: Optional[str] = None
172
+
173
+
174
+ class LogEntry(BaseModel):
175
+ """Worker log entry for debugging and monitoring."""
176
+ timestamp: str
177
+ level: str # DEBUG, INFO, WARNING, ERROR
178
+ worker: str # audio, lyrics, screens, video, render
179
+ message: str
180
+
181
+
182
+ class Job(BaseModel):
183
+ """
184
+ Complete job data model.
185
+
186
+ Tracks the full lifecycle of a karaoke generation job from submission
187
+ through all 8 stages to completion or failure.
188
+ """
189
+ job_id: str
190
+ status: JobStatus
191
+ progress: int = 0 # 0-100 percentage for UI display
192
+ created_at: datetime
193
+ updated_at: datetime
194
+
195
+ # Input data
196
+ url: Optional[str] = None # YouTube URL (if provided)
197
+ artist: Optional[str] = None
198
+ title: Optional[str] = None
199
+ filename: Optional[str] = None # Original uploaded filename
200
+ input_media_gcs_path: Optional[str] = None # GCS path to uploaded file
201
+
202
+ # User preferences
203
+ enable_cdg: bool = False # Generate CDG+MP3 package (requires style config)
204
+ enable_txt: bool = False # Generate TXT+MP3 package (requires style config)
205
+ enable_youtube_upload: bool = False # Upload to YouTube
206
+ youtube_description: Optional[str] = None # YouTube video description
207
+ webhook_url: Optional[str] = None # Webhook for notifications
208
+ user_email: Optional[str] = None # Email for notifications
209
+ non_interactive: bool = False # Skip interactive steps (lyrics review, instrumental selection)
210
+
211
+ # Theme configuration (pre-made themes from GCS)
212
+ theme_id: Optional[str] = None # Theme identifier (e.g., "nomad", "default")
213
+ color_overrides: Dict[str, str] = Field(default_factory=dict)
214
+ """
215
+ User color overrides applied on top of theme. Keys:
216
+ - artist_color: Hex color for artist name (#RRGGBB)
217
+ - title_color: Hex color for song title
218
+ - sung_lyrics_color: Hex color for highlighted lyrics
219
+ - unsung_lyrics_color: Hex color for unhighlighted lyrics
220
+ """
221
+
222
+ # Style configuration (uploaded files - used when theme_id is not set)
223
+ style_params_gcs_path: Optional[str] = None # GCS path to style_params.json
224
+ style_assets: Dict[str, str] = Field(default_factory=dict)
225
+ """
226
+ GCS paths to style asset files:
227
+ {
228
+ "intro_background": "gs://bucket/jobs/{job_id}/style/intro_bg.png",
229
+ "karaoke_background": "gs://bucket/jobs/{job_id}/style/karaoke_bg.png",
230
+ "end_background": "gs://bucket/jobs/{job_id}/style/end_bg.png",
231
+ "font": "gs://bucket/jobs/{job_id}/style/font.ttf",
232
+ "cdg_instrumental_background": "gs://bucket/jobs/{job_id}/style/cdg_instr.png",
233
+ "cdg_title_background": "gs://bucket/jobs/{job_id}/style/cdg_title.png",
234
+ "cdg_outro_background": "gs://bucket/jobs/{job_id}/style/cdg_outro.png"
235
+ }
236
+ """
237
+
238
+ # Finalisation configuration
239
+ brand_prefix: Optional[str] = None # Brand code prefix (e.g., "NOMAD")
240
+ discord_webhook_url: Optional[str] = None # Discord notification webhook
241
+ youtube_description_template: Optional[str] = None # YouTube description template text
242
+
243
+ # Distribution configuration (native API - for remote CLI)
244
+ dropbox_path: Optional[str] = None # Dropbox folder path for organized output (e.g., "/Karaoke/Tracks-Organized")
245
+ gdrive_folder_id: Optional[str] = None # Google Drive folder ID for public share uploads
246
+
247
+ # Legacy distribution configuration (rclone - for local CLI backward compat)
248
+ organised_dir_rclone_root: Optional[str] = None # Deprecated: use dropbox_path instead
249
+
250
+ # Lyrics configuration (overrides for search/transcription)
251
+ lyrics_artist: Optional[str] = None # Override artist name for lyrics search
252
+ lyrics_title: Optional[str] = None # Override title for lyrics search
253
+ lyrics_file_gcs_path: Optional[str] = None # GCS path to user-provided lyrics file
254
+ subtitle_offset_ms: int = 0 # Offset for subtitle timing (positive = delay)
255
+
256
+ # Audio separation model configuration
257
+ clean_instrumental_model: Optional[str] = None # Model for clean instrumental separation
258
+ backing_vocals_models: Optional[List[str]] = None # Models for backing vocals separation
259
+ other_stems_models: Optional[List[str]] = None # Models for other stems (bass, drums, etc.)
260
+
261
+ # Existing instrumental configuration (Batch 3)
262
+ existing_instrumental_gcs_path: Optional[str] = None # GCS path to user-provided instrumental file
263
+
264
+ # Audio search configuration (Batch 5 - artist+title search mode)
265
+ audio_search_artist: Optional[str] = None # Artist name used for audio search
266
+ audio_search_title: Optional[str] = None # Title used for audio search
267
+ auto_download: bool = False # Auto-select best audio source (skip selection)
268
+
269
+ # Two-phase workflow configuration (Batch 6)
270
+ prep_only: bool = False # Stop after review, don't run finalisation
271
+ finalise_only: bool = False # Skip prep, run only finalisation (requires uploaded prep outputs)
272
+ keep_brand_code: Optional[str] = None # Preserve existing brand code instead of generating new one
273
+
274
+ # Review authentication (Batch 7)
275
+ review_token: Optional[str] = None # Job-scoped token for lyrics review UI access (generated when entering AWAITING_REVIEW)
276
+ review_token_expires_at: Optional[datetime] = None # Token expiry time (optional, for extra security)
277
+ instrumental_token: Optional[str] = None # Job-scoped token for instrumental review UI access (generated when entering AWAITING_INSTRUMENTAL_SELECTION)
278
+ instrumental_token_expires_at: Optional[datetime] = None # Token expiry time
279
+
280
+ # Processing state
281
+ track_output_dir: Optional[str] = None # Local output directory (temp)
282
+ audio_hash: Optional[str] = None # Hash for deduplication
283
+
284
+ # State-specific data (JSON field for stage-specific metadata)
285
+ state_data: Dict[str, Any] = Field(default_factory=dict)
286
+ """
287
+ Stage-specific metadata. Examples:
288
+ - audio_complete: {"stems": {"clean": "gs://...", "backing": "gs://..."}}
289
+ - lyrics_complete: {"corrections_url": "gs://...", "audio_url": "gs://..."}
290
+ - review_complete: {"corrected_lyrics": {...}}
291
+ - instrumental_selected: {"selection": "clean" | "with_backing"}
292
+ - encoding: {"build_id": "...", "progress": 45}
293
+ """
294
+
295
+ # Timeline tracking
296
+ timeline: List[TimelineEvent] = Field(default_factory=list)
297
+
298
+ # Worker logs for debugging (limited to last N entries to avoid document size issues)
299
+ worker_logs: List[LogEntry] = Field(default_factory=list)
300
+
301
+ # File URLs (GCS storage)
302
+ file_urls: Dict[str, Any] = Field(default_factory=dict)
303
+ """
304
+ File storage URLs. Structure:
305
+ {
306
+ "input": "gs://bucket/jobs/{job_id}/input.flac",
307
+ "stems": {
308
+ "instrumental_clean": "gs://...",
309
+ "instrumental_with_backing": "gs://...",
310
+ "vocals": "gs://...",
311
+ "backing_vocals": "gs://...",
312
+ "lead_vocals": "gs://...",
313
+ "bass": "gs://...",
314
+ "drums": "gs://...",
315
+ "guitar": "gs://...",
316
+ "piano": "gs://...",
317
+ "other": "gs://..."
318
+ },
319
+ "lyrics": {
320
+ "corrections": "gs://bucket/jobs/{job_id}/lyrics/corrections.json",
321
+ "audio": "gs://bucket/jobs/{job_id}/lyrics/audio.flac",
322
+ "lrc": "gs://...",
323
+ "ass": "gs://..."
324
+ },
325
+ "screens": {
326
+ "title": "gs://bucket/jobs/{job_id}/screens/title.mov",
327
+ "end": "gs://bucket/jobs/{job_id}/screens/end.mov"
328
+ },
329
+ "videos": {
330
+ "with_vocals": "gs://bucket/jobs/{job_id}/videos/with_vocals.mkv"
331
+ },
332
+ "finals": {
333
+ "lossless_4k_mp4": "gs://...",
334
+ "lossless_4k_mkv": "gs://...",
335
+ "lossy_4k_mp4": "gs://...",
336
+ "lossy_720p_mp4": "gs://..."
337
+ },
338
+ "packages": {
339
+ "cdg_zip": "gs://...",
340
+ "txt_zip": "gs://..."
341
+ },
342
+ "youtube": {
343
+ "url": "https://youtube.com/watch?v=...",
344
+ "video_id": "..."
345
+ }
346
+ }
347
+ """
348
+
349
+ # Results (for backward compatibility, will be deprecated)
350
+ output_files: Dict[str, str] = Field(default_factory=dict)
351
+ download_urls: Dict[str, str] = Field(default_factory=dict)
352
+
353
+ # Error handling
354
+ error_message: Optional[str] = None
355
+ error_details: Optional[Dict[str, Any]] = None # Structured error information
356
+ retry_count: int = 0 # Number of retry attempts
357
+
358
+ # Worker tracking
359
+ worker_ids: Dict[str, str] = Field(default_factory=dict)
360
+ """
361
+ IDs of background workers/jobs:
362
+ {
363
+ "audio_worker": "cloud-run-request-id",
364
+ "lyrics_worker": "cloud-run-request-id",
365
+ "video_encoder": "cloud-build-id",
366
+ "distribution": "cloud-run-request-id"
367
+ }
368
+ """
369
+
370
+ # Request metadata (captured at job creation for tracking and filtering)
371
+ request_metadata: Dict[str, Any] = Field(default_factory=dict)
372
+ """
373
+ Metadata captured from the original API request.
374
+ Used for tracking, filtering, and operational management.
375
+
376
+ Standard fields:
377
+ {
378
+ "client_ip": "192.168.1.1", # IP address of the client
379
+ "user_agent": "karaoke-gen-remote/0.71.0", # User-Agent header
380
+ "environment": "test", # From X-Environment header (test/production/development)
381
+ "client_id": "cli-user-123", # From X-Client-ID header (customer/user identifier)
382
+ "server_version": "0.71.0", # Server version at job creation
383
+ "created_from": "upload", # "upload" (file) or "url" (YouTube URL)
384
+ "custom_headers": { # All X-* headers for extensibility
385
+ "X-Environment": "test",
386
+ "X-Client-ID": "cli-user-123",
387
+ "X-Request-ID": "abc-123"
388
+ }
389
+ }
390
+
391
+ Use cases:
392
+ - Filter test vs production jobs
393
+ - Track jobs by customer/client
394
+ - Debug issues with specific clients
395
+ - Bulk cleanup of test jobs
396
+ """
397
+
398
+ # Note: Status transition validation is handled by JobManager.validate_state_transition()
399
+ # which is called before status updates. The Job model does not validate transitions
400
+ # because Firestore updates happen directly without reconstructing the model.
401
+
402
+ class Config:
403
+ use_enum_values = True
404
+
405
+
406
+ class JobCreate(BaseModel):
407
+ """
408
+ Job creation request.
409
+
410
+ Either `url` OR file upload is required (file upload handled separately).
411
+ Artist and title are optional - will be auto-detected from YouTube if not provided.
412
+ """
413
+ url: Optional[str] = None
414
+ artist: Optional[str] = None
415
+ title: Optional[str] = None
416
+ filename: Optional[str] = None # Original uploaded filename
417
+
418
+ # Optional preferences
419
+ enable_cdg: bool = False # Requires style config
420
+ enable_txt: bool = False # Requires style config
421
+ enable_youtube_upload: bool = False
422
+ youtube_description: Optional[str] = None
423
+ webhook_url: Optional[str] = None
424
+ user_email: Optional[str] = None
425
+ non_interactive: bool = False # Skip interactive steps (lyrics review, instrumental selection)
426
+
427
+ # Theme configuration (pre-made themes from GCS)
428
+ theme_id: Optional[str] = None # Theme identifier (e.g., "nomad", "default")
429
+ color_overrides: Dict[str, str] = Field(default_factory=dict)
430
+ """
431
+ User color overrides applied on top of theme. Keys:
432
+ - artist_color: Hex color for artist name (#RRGGBB)
433
+ - title_color: Hex color for song title
434
+ - sung_lyrics_color: Hex color for highlighted lyrics
435
+ - unsung_lyrics_color: Hex color for unhighlighted lyrics
436
+ """
437
+
438
+ # Style configuration (will be populated after file upload, or from theme)
439
+ style_params_gcs_path: Optional[str] = None
440
+ style_assets: Dict[str, str] = Field(default_factory=dict)
441
+
442
+ # Finalisation configuration
443
+ brand_prefix: Optional[str] = None
444
+ discord_webhook_url: Optional[str] = None
445
+ youtube_description_template: Optional[str] = None
446
+
447
+ # Distribution configuration (native API - for remote CLI)
448
+ dropbox_path: Optional[str] = None # Dropbox folder path for organized output
449
+ gdrive_folder_id: Optional[str] = None # Google Drive folder ID for public share uploads
450
+
451
+ # Legacy (rclone - deprecated, use dropbox_path instead)
452
+ organised_dir_rclone_root: Optional[str] = None
453
+
454
+ # Lyrics configuration (overrides for search/transcription)
455
+ lyrics_artist: Optional[str] = None # Override artist name for lyrics search
456
+ lyrics_title: Optional[str] = None # Override title for lyrics search
457
+ lyrics_file_gcs_path: Optional[str] = None # GCS path to user-provided lyrics file
458
+ subtitle_offset_ms: int = 0 # Offset for subtitle timing (positive = delay)
459
+
460
+ # Audio separation model configuration
461
+ clean_instrumental_model: Optional[str] = None # Model for clean instrumental separation
462
+ backing_vocals_models: Optional[List[str]] = None # Models for backing vocals separation
463
+ other_stems_models: Optional[List[str]] = None # Models for other stems (bass, drums, etc.)
464
+
465
+ # Existing instrumental configuration (Batch 3)
466
+ existing_instrumental_gcs_path: Optional[str] = None # GCS path to user-provided instrumental file
467
+
468
+ # Audio search configuration (Batch 5 - artist+title search mode)
469
+ audio_search_artist: Optional[str] = None # Artist name used for audio search
470
+ audio_search_title: Optional[str] = None # Title used for audio search
471
+ auto_download: bool = False # Auto-select best audio source (skip selection)
472
+
473
+ # Two-phase workflow configuration (Batch 6)
474
+ prep_only: bool = False # Stop after review, don't run finalisation
475
+ finalise_only: bool = False # Skip prep, run only finalisation
476
+ keep_brand_code: Optional[str] = None # Preserve existing brand code instead of generating new one
477
+
478
+ # Request metadata (set by API endpoint from request headers)
479
+ request_metadata: Dict[str, Any] = Field(default_factory=dict)
480
+ """
481
+ Populated by the API endpoint with request context:
482
+ - client_ip: Client IP address
483
+ - user_agent: User-Agent header
484
+ - environment: From X-Environment header (test/production/development)
485
+ - client_id: From X-Client-ID header
486
+ - server_version: Current server version
487
+ - custom_headers: All X-* headers
488
+ """
489
+
490
+ @validator('url')
491
+ def validate_url(cls, v):
492
+ """Validate URL is not empty."""
493
+ if v is not None and isinstance(v, str) and not v.strip():
494
+ raise ValueError("Field cannot be empty string")
495
+ return v.strip() if isinstance(v, str) else v
496
+
497
+ @validator('artist', 'title')
498
+ def normalize_artist_title(cls, v):
499
+ """Normalize artist/title text to standardize Unicode characters.
500
+
501
+ This ensures consistent data storage by converting:
502
+ - Curly quotes -> straight quotes
503
+ - Various dashes -> hyphen
504
+ - Unusual whitespace -> regular space
505
+ """
506
+ if v is not None and isinstance(v, str):
507
+ if not v.strip():
508
+ raise ValueError("Field cannot be empty string")
509
+ # normalize_text handles stripping and Unicode normalization
510
+ return normalize_text(v)
511
+ return v
512
+
513
+
514
+ class JobResponse(BaseModel):
515
+ """Job response model."""
516
+ status: str
517
+ job_id: str
518
+ message: str
519
+
@@ -0,0 +1,123 @@
1
+ """
2
+ API request models for karaoke generation endpoints.
3
+ """
4
+ from typing import Optional, Dict, Any, List
5
+ from pydantic import BaseModel, HttpUrl, validator
6
+
7
+
8
+ class URLSubmissionRequest(BaseModel):
9
+ """Request to submit a job from a URL (YouTube, etc.)."""
10
+ url: HttpUrl
11
+ artist: Optional[str] = None # Auto-detected if not provided
12
+ title: Optional[str] = None # Auto-detected if not provided
13
+
14
+ # Optional preferences
15
+ enable_cdg: bool = False # Requires style config
16
+ enable_txt: bool = False # Requires style config
17
+ enable_youtube_upload: Optional[bool] = None # None = use server default
18
+ youtube_description: Optional[str] = None
19
+ webhook_url: Optional[str] = None
20
+ user_email: Optional[str] = None
21
+
22
+
23
+ class UploadSubmissionRequest(BaseModel):
24
+ """Request to submit a job from an uploaded file."""
25
+ artist: str
26
+ title: str
27
+
28
+ # Optional preferences
29
+ enable_cdg: bool = False # Requires style config
30
+ enable_txt: bool = False # Requires style config
31
+ enable_youtube_upload: Optional[bool] = None # None = use server default
32
+ youtube_description: Optional[str] = None
33
+ webhook_url: Optional[str] = None
34
+ user_email: Optional[str] = None
35
+
36
+
37
+ class CorrectionsSubmission(BaseModel):
38
+ """
39
+ Request to submit corrected lyrics after human review.
40
+
41
+ This is the critical human-in-the-loop interaction point.
42
+ The corrections data comes from the lyrics-transcriber review interface.
43
+ """
44
+ corrections: Dict[str, Any] # Full corrections JSON from frontend
45
+ user_notes: Optional[str] = None # Optional notes from reviewer
46
+
47
+ @validator('corrections')
48
+ def validate_corrections_format(cls, v):
49
+ """Validate corrections has required fields."""
50
+ required_fields = ['lines', 'metadata']
51
+ for field in required_fields:
52
+ if field not in v:
53
+ raise ValueError(f"Corrections must include '{field}' field")
54
+ return v
55
+
56
+
57
+ class InstrumentalSelection(BaseModel):
58
+ """
59
+ Request to select instrumental audio option.
60
+
61
+ This is the second critical human-in-the-loop interaction point.
62
+ User chooses between clean instrumental, instrumental with backing vocals,
63
+ or a custom instrumental (created via create-custom-instrumental endpoint).
64
+ """
65
+ selection: str # "clean", "with_backing", or "custom"
66
+
67
+ @validator('selection')
68
+ def validate_selection(cls, v):
69
+ """Validate selection is a valid option."""
70
+ valid_options = ['clean', 'with_backing', 'custom']
71
+ if v not in valid_options:
72
+ raise ValueError(f"Selection must be one of: {valid_options}")
73
+ return v
74
+
75
+
76
+ class MuteRegionRequest(BaseModel):
77
+ """A region to mute in the backing vocals."""
78
+ start_seconds: float
79
+ end_seconds: float
80
+
81
+ @validator('start_seconds')
82
+ def validate_start(cls, v):
83
+ if v < 0:
84
+ raise ValueError("start_seconds must be non-negative")
85
+ return v
86
+
87
+ @validator('end_seconds')
88
+ def validate_end(cls, v, values):
89
+ if 'start_seconds' in values and v <= values['start_seconds']:
90
+ raise ValueError("end_seconds must be greater than start_seconds")
91
+ return v
92
+
93
+
94
+ class CreateCustomInstrumentalRequest(BaseModel):
95
+ """
96
+ Request to create a custom instrumental with muted backing vocal regions.
97
+
98
+ The mute_regions specify time ranges in the backing vocals track that
99
+ should be silenced before mixing with the clean instrumental.
100
+ """
101
+ mute_regions: List[MuteRegionRequest]
102
+
103
+ @validator('mute_regions')
104
+ def validate_regions(cls, v):
105
+ if not v:
106
+ raise ValueError("At least one mute region is required")
107
+ return v
108
+
109
+
110
+ class StartReviewRequest(BaseModel):
111
+ """Request to mark job as in-review (user opened interface)."""
112
+ pass # No body needed, just triggers state transition
113
+
114
+
115
+ class CancelJobRequest(BaseModel):
116
+ """Request to cancel a job."""
117
+ reason: Optional[str] = None
118
+
119
+
120
+ class RetryJobRequest(BaseModel):
121
+ """Request to retry a failed job."""
122
+ from_stage: Optional[str] = None # Optional: restart from specific stage
123
+