PyPI - karaoke-gen - Versions diffs - 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl - Mend

karaoke-gen 0.90.1py3-none-any.whl → 0.99.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

backend/.coveragerc +20 -0
backend/.gitignore +37 -0
backend/Dockerfile +43 -0
backend/Dockerfile.base +74 -0
backend/README.md +242 -0
backend/__init__.py +0 -0
backend/api/__init__.py +0 -0
backend/api/dependencies.py +457 -0
backend/api/routes/__init__.py +0 -0
backend/api/routes/admin.py +835 -0
backend/api/routes/audio_search.py +913 -0
backend/api/routes/auth.py +348 -0
backend/api/routes/file_upload.py +2112 -0
backend/api/routes/health.py +409 -0
backend/api/routes/internal.py +435 -0
backend/api/routes/jobs.py +1629 -0
backend/api/routes/review.py +652 -0
backend/api/routes/themes.py +162 -0
backend/api/routes/users.py +1513 -0
backend/config.py +172 -0
backend/main.py +157 -0
backend/middleware/__init__.py +5 -0
backend/middleware/audit_logging.py +124 -0
backend/models/__init__.py +0 -0
backend/models/job.py +519 -0
backend/models/requests.py +123 -0
backend/models/theme.py +153 -0
backend/models/user.py +254 -0
backend/models/worker_log.py +164 -0
backend/pyproject.toml +29 -0
backend/quick-check.sh +93 -0
backend/requirements.txt +29 -0
backend/run_tests.sh +60 -0
backend/services/__init__.py +0 -0
backend/services/audio_analysis_service.py +243 -0
backend/services/audio_editing_service.py +278 -0
backend/services/audio_search_service.py +702 -0
backend/services/auth_service.py +630 -0
backend/services/credential_manager.py +792 -0
backend/services/discord_service.py +172 -0
backend/services/dropbox_service.py +301 -0
backend/services/email_service.py +1093 -0
backend/services/encoding_interface.py +454 -0
backend/services/encoding_service.py +502 -0
backend/services/firestore_service.py +512 -0
backend/services/flacfetch_client.py +573 -0
backend/services/gce_encoding/README.md +72 -0
backend/services/gce_encoding/__init__.py +22 -0
backend/services/gce_encoding/main.py +589 -0
backend/services/gce_encoding/requirements.txt +16 -0
backend/services/gdrive_service.py +356 -0
backend/services/job_logging.py +258 -0
backend/services/job_manager.py +853 -0
backend/services/job_notification_service.py +271 -0
backend/services/langfuse_preloader.py +98 -0
backend/services/local_encoding_service.py +590 -0
backend/services/local_preview_encoding_service.py +407 -0
backend/services/lyrics_cache_service.py +216 -0
backend/services/metrics.py +413 -0
backend/services/nltk_preloader.py +122 -0
backend/services/packaging_service.py +287 -0
backend/services/rclone_service.py +106 -0
backend/services/spacy_preloader.py +65 -0
backend/services/storage_service.py +209 -0
backend/services/stripe_service.py +371 -0
backend/services/structured_logging.py +254 -0
backend/services/template_service.py +330 -0
backend/services/theme_service.py +469 -0
backend/services/tracing.py +543 -0
backend/services/user_service.py +721 -0
backend/services/worker_service.py +558 -0
backend/services/youtube_service.py +112 -0
backend/services/youtube_upload_service.py +445 -0
backend/tests/__init__.py +4 -0
backend/tests/conftest.py +224 -0
backend/tests/emulator/__init__.py +7 -0
backend/tests/emulator/conftest.py +109 -0
backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
backend/tests/emulator/test_emulator_integration.py +356 -0
backend/tests/emulator/test_style_loading_direct.py +436 -0
backend/tests/emulator/test_worker_logs_direct.py +229 -0
backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
backend/tests/requirements-test.txt +10 -0
backend/tests/requirements.txt +6 -0
backend/tests/test_admin_email_endpoints.py +411 -0
backend/tests/test_api_integration.py +460 -0
backend/tests/test_api_routes.py +93 -0
backend/tests/test_audio_analysis_service.py +294 -0
backend/tests/test_audio_editing_service.py +386 -0
backend/tests/test_audio_search.py +1398 -0
backend/tests/test_audio_services.py +378 -0
backend/tests/test_auth_firestore.py +231 -0
backend/tests/test_config_extended.py +68 -0
backend/tests/test_credential_manager.py +377 -0
backend/tests/test_dependencies.py +54 -0
backend/tests/test_discord_service.py +244 -0
backend/tests/test_distribution_services.py +820 -0
backend/tests/test_dropbox_service.py +472 -0
backend/tests/test_email_service.py +492 -0
backend/tests/test_emulator_integration.py +322 -0
backend/tests/test_encoding_interface.py +412 -0
backend/tests/test_file_upload.py +1739 -0
backend/tests/test_flacfetch_client.py +632 -0
backend/tests/test_gdrive_service.py +524 -0
backend/tests/test_instrumental_api.py +431 -0
backend/tests/test_internal_api.py +343 -0
backend/tests/test_job_creation_regression.py +583 -0
backend/tests/test_job_manager.py +356 -0
backend/tests/test_job_manager_notifications.py +329 -0
backend/tests/test_job_notification_service.py +443 -0
backend/tests/test_jobs_api.py +283 -0
backend/tests/test_local_encoding_service.py +423 -0
backend/tests/test_local_preview_encoding_service.py +567 -0
backend/tests/test_main.py +87 -0
backend/tests/test_models.py +918 -0
backend/tests/test_packaging_service.py +382 -0
backend/tests/test_requests.py +201 -0
backend/tests/test_routes_jobs.py +282 -0
backend/tests/test_routes_review.py +337 -0
backend/tests/test_services.py +556 -0
backend/tests/test_services_extended.py +112 -0
backend/tests/test_spacy_preloader.py +119 -0
backend/tests/test_storage_service.py +448 -0
backend/tests/test_style_upload.py +261 -0
backend/tests/test_template_service.py +295 -0
backend/tests/test_theme_service.py +516 -0
backend/tests/test_unicode_sanitization.py +522 -0
backend/tests/test_upload_api.py +256 -0
backend/tests/test_validate.py +156 -0
backend/tests/test_video_worker_orchestrator.py +847 -0
backend/tests/test_worker_log_subcollection.py +509 -0
backend/tests/test_worker_logging.py +365 -0
backend/tests/test_workers.py +1116 -0
backend/tests/test_workers_extended.py +178 -0
backend/tests/test_youtube_service.py +247 -0
backend/tests/test_youtube_upload_service.py +568 -0
backend/utils/test_data.py +27 -0
backend/validate.py +173 -0
backend/version.py +27 -0
backend/workers/README.md +597 -0
backend/workers/__init__.py +11 -0
backend/workers/audio_worker.py +618 -0
backend/workers/lyrics_worker.py +683 -0
backend/workers/render_video_worker.py +483 -0
backend/workers/screens_worker.py +535 -0
backend/workers/style_helper.py +198 -0
backend/workers/video_worker.py +1277 -0
backend/workers/video_worker_orchestrator.py +701 -0
backend/workers/worker_logging.py +278 -0
karaoke_gen/instrumental_review/static/index.html +7 -4
karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
karaoke_gen/utils/__init__.py +163 -8
karaoke_gen/video_background_processor.py +9 -4
{karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
{karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
lyrics_transcriber/correction/agentic/agent.py +17 -6
lyrics_transcriber/correction/agentic/providers/config.py +9 -5
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
lyrics_transcriber/correction/anchor_sequence.py +151 -37
lyrics_transcriber/correction/corrector.py +192 -130
lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
lyrics_transcriber/correction/operations.py +24 -9
lyrics_transcriber/correction/phrase_analyzer.py +18 -0
lyrics_transcriber/frontend/package-lock.json +2 -2
lyrics_transcriber/frontend/package.json +1 -1
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
lyrics_transcriber/frontend/src/theme.ts +42 -15
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
lyrics_transcriber/frontend/vite.config.js +5 -0
lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
lyrics_transcriber/frontend/web_assets/index.html +6 -2
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
lyrics_transcriber/output/generator.py +17 -3
lyrics_transcriber/output/video.py +60 -95
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
{karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
{karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0

backend/models/job.py ADDED Viewed

@@ -0,0 +1,519 @@
+"""
+Job data models for karaoke generation.
+This module defines the complete state machine for karaoke generation jobs,
+mirroring the 8-stage CLI workflow with human-in-the-loop interaction points.
+"""
+from typing import Optional, Dict, Any, List
+from datetime import datetime
+from enum import Enum
+from pydantic import BaseModel, Field, validator
+from karaoke_gen.utils import normalize_text
+class JobStatus(str, Enum):
+    """
+    Job status enumeration - Complete state machine.
+    The workflow has 8 main stages with 3 human interaction points:
+    1. Input & Setup (may include audio source selection)
+    2. Parallel Processing (audio + lyrics)
+    3. Title/End Screen Generation
+    4. Countdown Padding Synchronization
+    5. Human Review (BLOCKING)
+    6. Instrumental Selection (BLOCKING)
+    7. Video Finalization
+    8. Distribution
+    """
+    # Initial states
+    PENDING = "pending"                           # Job created, queued for processing
+    # Audio search states (for artist+title search mode)
+    SEARCHING_AUDIO = "searching_audio"           # Searching for audio sources via flacfetch
+    AWAITING_AUDIO_SELECTION = "awaiting_audio_selection"  # ⚠️ WAITING FOR USER - select audio source
+    DOWNLOADING_AUDIO = "downloading_audio"       # Downloading selected audio from source
+    DOWNLOADING = "downloading"                   # Downloading from URL or processing upload
+    # Stage 2a: Audio separation (parallel track 1)
+    SEPARATING_STAGE1 = "separating_stage1"      # Clean instrumental separation (Modal API)
+    SEPARATING_STAGE2 = "separating_stage2"      # Backing vocals separation (Modal API)
+    AUDIO_COMPLETE = "audio_complete"            # All audio stems ready
+    # Stage 2b: Lyrics processing (parallel track 2)
+    TRANSCRIBING = "transcribing"                # AudioShake API transcription
+    CORRECTING = "correcting"                     # Automatic lyrics correction
+    LYRICS_COMPLETE = "lyrics_complete"           # Corrections JSON ready
+    # Stage 3: Title/End screens
+    GENERATING_SCREENS = "generating_screens"     # Creating title and end screen videos
+    # Stage 4: Countdown padding (automatic)
+    APPLYING_PADDING = "applying_padding"         # Synchronizing countdown padding
+    # Stage 5: Human review (BLOCKING)
+    AWAITING_REVIEW = "awaiting_review"          # ⚠️ WAITING FOR USER - lyrics review needed
+    IN_REVIEW = "in_review"                      # User is actively reviewing lyrics
+    REVIEW_COMPLETE = "review_complete"          # User submitted corrected lyrics
+    # Stage 5.5: Render video with corrected lyrics (post-review)
+    RENDERING_VIDEO = "rendering_video"          # Using OutputGenerator to create with_vocals.mkv
+    # Stage 6: Instrumental selection (BLOCKING)
+    AWAITING_INSTRUMENTAL_SELECTION = "awaiting_instrumental_selection"  # ⚠️ WAITING FOR USER
+    INSTRUMENTAL_SELECTED = "instrumental_selected"  # User made selection
+    # Stage 7: Video generation and finalization
+    GENERATING_VIDEO = "generating_video"        # Creating initial karaoke video
+    ENCODING = "encoding"                        # Multi-format video encoding (Cloud Build)
+    PACKAGING = "packaging"                      # CDG/TXT ZIP generation
+    # Stage 8: Distribution (optional)
+    UPLOADING = "uploading"                      # YouTube/Dropbox upload
+    NOTIFYING = "notifying"                      # Discord/Email notifications
+    # Terminal states
+    COMPLETE = "complete"                        # All processing finished successfully
+    PREP_COMPLETE = "prep_complete"             # Prep-only job completed (stops after review)
+    FAILED = "failed"                           # Unrecoverable error occurred
+    CANCELLED = "cancelled"                      # User cancelled the job
+    # Legacy compatibility (will be removed)
+    QUEUED = "queued"                           # Deprecated: use PENDING
+    PROCESSING = "processing"                    # Deprecated: use specific states
+    READY_FOR_FINALIZATION = "ready_for_finalization"  # Deprecated
+    FINALIZING = "finalizing"                    # Deprecated: use ENCODING/PACKAGING
+    ERROR = "error"                             # Deprecated: use FAILED
+# Valid state transitions
+STATE_TRANSITIONS = {
+    # PENDING can go to DOWNLOADING (file upload), SEARCHING_AUDIO (artist+title search), or AWAITING_INSTRUMENTAL_SELECTION (finalise-only)
+    JobStatus.PENDING: [JobStatus.DOWNLOADING, JobStatus.SEARCHING_AUDIO, JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED, JobStatus.CANCELLED],
+    # Audio search flow (for artist+title search mode)
+    JobStatus.SEARCHING_AUDIO: [JobStatus.AWAITING_AUDIO_SELECTION, JobStatus.DOWNLOADING_AUDIO, JobStatus.FAILED],
+    JobStatus.AWAITING_AUDIO_SELECTION: [JobStatus.DOWNLOADING_AUDIO, JobStatus.FAILED, JobStatus.CANCELLED],
+    JobStatus.DOWNLOADING_AUDIO: [JobStatus.DOWNLOADING, JobStatus.FAILED],
+    # DOWNLOADING allows parallel processing (audio + lyrics) and then screens when both complete
+    JobStatus.DOWNLOADING: [JobStatus.SEPARATING_STAGE1, JobStatus.TRANSCRIBING, JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
+    # Audio separation flow
+    JobStatus.SEPARATING_STAGE1: [JobStatus.SEPARATING_STAGE2, JobStatus.FAILED],
+    JobStatus.SEPARATING_STAGE2: [JobStatus.AUDIO_COMPLETE, JobStatus.FAILED],
+    JobStatus.AUDIO_COMPLETE: [JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
+    # Lyrics flow
+    JobStatus.TRANSCRIBING: [JobStatus.CORRECTING, JobStatus.FAILED],
+    JobStatus.CORRECTING: [JobStatus.LYRICS_COMPLETE, JobStatus.FAILED],
+    JobStatus.LYRICS_COMPLETE: [JobStatus.GENERATING_SCREENS, JobStatus.FAILED],
+    # Post-parallel processing
+    JobStatus.GENERATING_SCREENS: [JobStatus.APPLYING_PADDING, JobStatus.AWAITING_REVIEW, JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED],
+    JobStatus.APPLYING_PADDING: [JobStatus.AWAITING_REVIEW, JobStatus.FAILED],
+    # Human review flow
+    # AWAITING_REVIEW can go directly to REVIEW_COMPLETE (quick review) or to IN_REVIEW (editing)
+    JobStatus.AWAITING_REVIEW: [JobStatus.IN_REVIEW, JobStatus.REVIEW_COMPLETE, JobStatus.FAILED, JobStatus.CANCELLED],
+    JobStatus.IN_REVIEW: [JobStatus.REVIEW_COMPLETE, JobStatus.AWAITING_REVIEW, JobStatus.FAILED],
+    JobStatus.REVIEW_COMPLETE: [JobStatus.RENDERING_VIDEO, JobStatus.PREP_COMPLETE, JobStatus.FAILED],  # PREP_COMPLETE for prep-only jobs
+    # Video rendering (post-review)
+    JobStatus.RENDERING_VIDEO: [JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.PREP_COMPLETE, JobStatus.FAILED],
+    # Instrumental selection flow
+    JobStatus.AWAITING_INSTRUMENTAL_SELECTION: [JobStatus.INSTRUMENTAL_SELECTED, JobStatus.FAILED, JobStatus.CANCELLED],
+    JobStatus.INSTRUMENTAL_SELECTED: [JobStatus.GENERATING_VIDEO, JobStatus.FAILED],
+    # Video generation flow
+    JobStatus.GENERATING_VIDEO: [JobStatus.ENCODING, JobStatus.FAILED],
+    JobStatus.ENCODING: [JobStatus.PACKAGING, JobStatus.COMPLETE, JobStatus.FAILED],
+    JobStatus.PACKAGING: [JobStatus.UPLOADING, JobStatus.COMPLETE, JobStatus.FAILED],
+    # Distribution flow
+    JobStatus.UPLOADING: [JobStatus.NOTIFYING, JobStatus.COMPLETE, JobStatus.FAILED],
+    JobStatus.NOTIFYING: [JobStatus.COMPLETE, JobStatus.FAILED],
+    # Terminal states - COMPLETE, PREP_COMPLETE have no transitions
+    # FAILED and CANCELLED allow retry transitions to resume from checkpoints
+    # PREP_COMPLETE allows finalise-only continuation
+    JobStatus.COMPLETE: [],
+    JobStatus.PREP_COMPLETE: [JobStatus.AWAITING_INSTRUMENTAL_SELECTION, JobStatus.FAILED],  # Finalise-only continues from here
+    JobStatus.FAILED: [
+        JobStatus.DOWNLOADING,            # Retry from beginning (if input audio exists)
+        JobStatus.INSTRUMENTAL_SELECTED,  # Retry from video generation
+        JobStatus.REVIEW_COMPLETE,        # Retry from render stage
+        JobStatus.LYRICS_COMPLETE,        # Retry from screens generation
+    ],
+    JobStatus.CANCELLED: [
+        JobStatus.DOWNLOADING,            # Retry from beginning (if input audio exists)
+        JobStatus.INSTRUMENTAL_SELECTED,  # Retry from video generation
+        JobStatus.REVIEW_COMPLETE,        # Retry from render stage
+        JobStatus.LYRICS_COMPLETE,        # Retry from screens generation
+    ],
+    # Legacy states (for backward compatibility)
+    JobStatus.QUEUED: [JobStatus.PENDING],
+    JobStatus.PROCESSING: [JobStatus.SEPARATING_STAGE1, JobStatus.TRANSCRIBING],
+    JobStatus.READY_FOR_FINALIZATION: [JobStatus.GENERATING_VIDEO],
+    JobStatus.FINALIZING: [JobStatus.ENCODING],
+    JobStatus.ERROR: [JobStatus.FAILED],
+}
+class TimelineEvent(BaseModel):
+    """Timeline event for job progress tracking."""
+    status: str
+    timestamp: str
+    progress: Optional[int] = None
+    message: Optional[str] = None
+class LogEntry(BaseModel):
+    """Worker log entry for debugging and monitoring."""
+    timestamp: str
+    level: str  # DEBUG, INFO, WARNING, ERROR
+    worker: str  # audio, lyrics, screens, video, render
+    message: str
+class Job(BaseModel):
+    """
+    Complete job data model.
+    Tracks the full lifecycle of a karaoke generation job from submission
+    through all 8 stages to completion or failure.
+    """
+    job_id: str
+    status: JobStatus
+    progress: int = 0  # 0-100 percentage for UI display
+    created_at: datetime
+    updated_at: datetime
+    # Input data
+    url: Optional[str] = None                    # YouTube URL (if provided)
+    artist: Optional[str] = None
+    title: Optional[str] = None
+    filename: Optional[str] = None               # Original uploaded filename
+    input_media_gcs_path: Optional[str] = None   # GCS path to uploaded file
+    # User preferences
+    enable_cdg: bool = False                     # Generate CDG+MP3 package (requires style config)
+    enable_txt: bool = False                     # Generate TXT+MP3 package (requires style config)
+    enable_youtube_upload: bool = False          # Upload to YouTube
+    youtube_description: Optional[str] = None    # YouTube video description
+    webhook_url: Optional[str] = None            # Webhook for notifications
+    user_email: Optional[str] = None             # Email for notifications
+    non_interactive: bool = False                # Skip interactive steps (lyrics review, instrumental selection)
+    # Theme configuration (pre-made themes from GCS)
+    theme_id: Optional[str] = None               # Theme identifier (e.g., "nomad", "default")
+    color_overrides: Dict[str, str] = Field(default_factory=dict)
+    """
+    User color overrides applied on top of theme. Keys:
+    - artist_color: Hex color for artist name (#RRGGBB)
+    - title_color: Hex color for song title
+    - sung_lyrics_color: Hex color for highlighted lyrics
+    - unsung_lyrics_color: Hex color for unhighlighted lyrics
+    """
+    # Style configuration (uploaded files - used when theme_id is not set)
+    style_params_gcs_path: Optional[str] = None  # GCS path to style_params.json
+    style_assets: Dict[str, str] = Field(default_factory=dict)
+    """
+    GCS paths to style asset files:
+    {
+        "intro_background": "gs://bucket/jobs/{job_id}/style/intro_bg.png",
+        "karaoke_background": "gs://bucket/jobs/{job_id}/style/karaoke_bg.png",
+        "end_background": "gs://bucket/jobs/{job_id}/style/end_bg.png",
+        "font": "gs://bucket/jobs/{job_id}/style/font.ttf",
+        "cdg_instrumental_background": "gs://bucket/jobs/{job_id}/style/cdg_instr.png",
+        "cdg_title_background": "gs://bucket/jobs/{job_id}/style/cdg_title.png",
+        "cdg_outro_background": "gs://bucket/jobs/{job_id}/style/cdg_outro.png"
+    }
+    """
+    # Finalisation configuration
+    brand_prefix: Optional[str] = None           # Brand code prefix (e.g., "NOMAD")
+    discord_webhook_url: Optional[str] = None    # Discord notification webhook
+    youtube_description_template: Optional[str] = None  # YouTube description template text
+    # Distribution configuration (native API - for remote CLI)
+    dropbox_path: Optional[str] = None           # Dropbox folder path for organized output (e.g., "/Karaoke/Tracks-Organized")
+    gdrive_folder_id: Optional[str] = None       # Google Drive folder ID for public share uploads
+    # Legacy distribution configuration (rclone - for local CLI backward compat)
+    organised_dir_rclone_root: Optional[str] = None  # Deprecated: use dropbox_path instead
+    # Lyrics configuration (overrides for search/transcription)
+    lyrics_artist: Optional[str] = None          # Override artist name for lyrics search
+    lyrics_title: Optional[str] = None           # Override title for lyrics search
+    lyrics_file_gcs_path: Optional[str] = None   # GCS path to user-provided lyrics file
+    subtitle_offset_ms: int = 0                  # Offset for subtitle timing (positive = delay)
+    # Audio separation model configuration
+    clean_instrumental_model: Optional[str] = None   # Model for clean instrumental separation
+    backing_vocals_models: Optional[List[str]] = None  # Models for backing vocals separation
+    other_stems_models: Optional[List[str]] = None     # Models for other stems (bass, drums, etc.)
+    # Existing instrumental configuration (Batch 3)
+    existing_instrumental_gcs_path: Optional[str] = None  # GCS path to user-provided instrumental file
+    # Audio search configuration (Batch 5 - artist+title search mode)
+    audio_search_artist: Optional[str] = None     # Artist name used for audio search
+    audio_search_title: Optional[str] = None      # Title used for audio search
+    auto_download: bool = False                    # Auto-select best audio source (skip selection)
+    # Two-phase workflow configuration (Batch 6)
+    prep_only: bool = False                      # Stop after review, don't run finalisation
+    finalise_only: bool = False                  # Skip prep, run only finalisation (requires uploaded prep outputs)
+    keep_brand_code: Optional[str] = None        # Preserve existing brand code instead of generating new one
+    # Review authentication (Batch 7)
+    review_token: Optional[str] = None           # Job-scoped token for lyrics review UI access (generated when entering AWAITING_REVIEW)
+    review_token_expires_at: Optional[datetime] = None  # Token expiry time (optional, for extra security)
+    instrumental_token: Optional[str] = None     # Job-scoped token for instrumental review UI access (generated when entering AWAITING_INSTRUMENTAL_SELECTION)
+    instrumental_token_expires_at: Optional[datetime] = None  # Token expiry time
+    # Processing state
+    track_output_dir: Optional[str] = None       # Local output directory (temp)
+    audio_hash: Optional[str] = None             # Hash for deduplication
+    # State-specific data (JSON field for stage-specific metadata)
+    state_data: Dict[str, Any] = Field(default_factory=dict)
+    """
+    Stage-specific metadata. Examples:
+    - audio_complete: {"stems": {"clean": "gs://...", "backing": "gs://..."}}
+    - lyrics_complete: {"corrections_url": "gs://...", "audio_url": "gs://..."}
+    - review_complete: {"corrected_lyrics": {...}}
+    - instrumental_selected: {"selection": "clean" | "with_backing"}
+    - encoding: {"build_id": "...", "progress": 45}
+    """
+    # Timeline tracking
+    timeline: List[TimelineEvent] = Field(default_factory=list)
+    # Worker logs for debugging (limited to last N entries to avoid document size issues)
+    worker_logs: List[LogEntry] = Field(default_factory=list)
+    # File URLs (GCS storage)
+    file_urls: Dict[str, Any] = Field(default_factory=dict)
+    """
+    File storage URLs. Structure:
+    {
+        "input": "gs://bucket/jobs/{job_id}/input.flac",
+        "stems": {
+            "instrumental_clean": "gs://...",
+            "instrumental_with_backing": "gs://...",
+            "vocals": "gs://...",
+            "backing_vocals": "gs://...",
+            "lead_vocals": "gs://...",
+            "bass": "gs://...",
+            "drums": "gs://...",
+            "guitar": "gs://...",
+            "piano": "gs://...",
+            "other": "gs://..."
+        },
+        "lyrics": {
+            "corrections": "gs://bucket/jobs/{job_id}/lyrics/corrections.json",
+            "audio": "gs://bucket/jobs/{job_id}/lyrics/audio.flac",
+            "lrc": "gs://...",
+            "ass": "gs://..."
+        },
+        "screens": {
+            "title": "gs://bucket/jobs/{job_id}/screens/title.mov",
+            "end": "gs://bucket/jobs/{job_id}/screens/end.mov"
+        },
+        "videos": {
+            "with_vocals": "gs://bucket/jobs/{job_id}/videos/with_vocals.mkv"
+        },
+        "finals": {
+            "lossless_4k_mp4": "gs://...",
+            "lossless_4k_mkv": "gs://...",
+            "lossy_4k_mp4": "gs://...",
+            "lossy_720p_mp4": "gs://..."
+        },
+        "packages": {
+            "cdg_zip": "gs://...",
+            "txt_zip": "gs://..."
+        },
+        "youtube": {
+            "url": "https://youtube.com/watch?v=...",
+            "video_id": "..."
+        }
+    }
+    """
+    # Results (for backward compatibility, will be deprecated)
+    output_files: Dict[str, str] = Field(default_factory=dict)
+    download_urls: Dict[str, str] = Field(default_factory=dict)
+    # Error handling
+    error_message: Optional[str] = None
+    error_details: Optional[Dict[str, Any]] = None  # Structured error information
+    retry_count: int = 0                            # Number of retry attempts
+    # Worker tracking
+    worker_ids: Dict[str, str] = Field(default_factory=dict)
+    """
+    IDs of background workers/jobs:
+    {
+        "audio_worker": "cloud-run-request-id",
+        "lyrics_worker": "cloud-run-request-id",
+        "video_encoder": "cloud-build-id",
+        "distribution": "cloud-run-request-id"
+    }
+    """
+    # Request metadata (captured at job creation for tracking and filtering)
+    request_metadata: Dict[str, Any] = Field(default_factory=dict)
+    """
+    Metadata captured from the original API request.
+    Used for tracking, filtering, and operational management.
+    Standard fields:
+    {
+        "client_ip": "192.168.1.1",           # IP address of the client
+        "user_agent": "karaoke-gen-remote/0.71.0",  # User-Agent header
+        "environment": "test",                 # From X-Environment header (test/production/development)
+        "client_id": "cli-user-123",          # From X-Client-ID header (customer/user identifier)
+        "server_version": "0.71.0",           # Server version at job creation
+        "created_from": "upload",              # "upload" (file) or "url" (YouTube URL)
+        "custom_headers": {                    # All X-* headers for extensibility
+            "X-Environment": "test",
+            "X-Client-ID": "cli-user-123",
+            "X-Request-ID": "abc-123"
+        }
+    }
+    Use cases:
+    - Filter test vs production jobs
+    - Track jobs by customer/client
+    - Debug issues with specific clients
+    - Bulk cleanup of test jobs
+    """
+    # Note: Status transition validation is handled by JobManager.validate_state_transition()
+    # which is called before status updates. The Job model does not validate transitions
+    # because Firestore updates happen directly without reconstructing the model.
+    class Config:
+        use_enum_values = True
+class JobCreate(BaseModel):
+    """
+    Job creation request.
+    Either `url` OR file upload is required (file upload handled separately).
+    Artist and title are optional - will be auto-detected from YouTube if not provided.
+    """
+    url: Optional[str] = None
+    artist: Optional[str] = None
+    title: Optional[str] = None
+    filename: Optional[str] = None  # Original uploaded filename
+    # Optional preferences
+    enable_cdg: bool = False  # Requires style config
+    enable_txt: bool = False  # Requires style config
+    enable_youtube_upload: bool = False
+    youtube_description: Optional[str] = None
+    webhook_url: Optional[str] = None
+    user_email: Optional[str] = None
+    non_interactive: bool = False  # Skip interactive steps (lyrics review, instrumental selection)
+    # Theme configuration (pre-made themes from GCS)
+    theme_id: Optional[str] = None               # Theme identifier (e.g., "nomad", "default")
+    color_overrides: Dict[str, str] = Field(default_factory=dict)
+    """
+    User color overrides applied on top of theme. Keys:
+    - artist_color: Hex color for artist name (#RRGGBB)
+    - title_color: Hex color for song title
+    - sung_lyrics_color: Hex color for highlighted lyrics
+    - unsung_lyrics_color: Hex color for unhighlighted lyrics
+    """
+    # Style configuration (will be populated after file upload, or from theme)
+    style_params_gcs_path: Optional[str] = None
+    style_assets: Dict[str, str] = Field(default_factory=dict)
+    # Finalisation configuration
+    brand_prefix: Optional[str] = None
+    discord_webhook_url: Optional[str] = None
+    youtube_description_template: Optional[str] = None
+    # Distribution configuration (native API - for remote CLI)
+    dropbox_path: Optional[str] = None           # Dropbox folder path for organized output
+    gdrive_folder_id: Optional[str] = None       # Google Drive folder ID for public share uploads
+    # Legacy (rclone - deprecated, use dropbox_path instead)
+    organised_dir_rclone_root: Optional[str] = None
+    # Lyrics configuration (overrides for search/transcription)
+    lyrics_artist: Optional[str] = None          # Override artist name for lyrics search
+    lyrics_title: Optional[str] = None           # Override title for lyrics search
+    lyrics_file_gcs_path: Optional[str] = None   # GCS path to user-provided lyrics file
+    subtitle_offset_ms: int = 0                  # Offset for subtitle timing (positive = delay)
+    # Audio separation model configuration
+    clean_instrumental_model: Optional[str] = None   # Model for clean instrumental separation
+    backing_vocals_models: Optional[List[str]] = None  # Models for backing vocals separation
+    other_stems_models: Optional[List[str]] = None     # Models for other stems (bass, drums, etc.)
+    # Existing instrumental configuration (Batch 3)
+    existing_instrumental_gcs_path: Optional[str] = None  # GCS path to user-provided instrumental file
+    # Audio search configuration (Batch 5 - artist+title search mode)
+    audio_search_artist: Optional[str] = None     # Artist name used for audio search
+    audio_search_title: Optional[str] = None      # Title used for audio search
+    auto_download: bool = False                    # Auto-select best audio source (skip selection)
+    # Two-phase workflow configuration (Batch 6)
+    prep_only: bool = False                      # Stop after review, don't run finalisation
+    finalise_only: bool = False                  # Skip prep, run only finalisation
+    keep_brand_code: Optional[str] = None        # Preserve existing brand code instead of generating new one
+    # Request metadata (set by API endpoint from request headers)
+    request_metadata: Dict[str, Any] = Field(default_factory=dict)
+    """
+    Populated by the API endpoint with request context:
+    - client_ip: Client IP address
+    - user_agent: User-Agent header
+    - environment: From X-Environment header (test/production/development)
+    - client_id: From X-Client-ID header
+    - server_version: Current server version
+    - custom_headers: All X-* headers
+    """
+    @validator('url')
+    def validate_url(cls, v):
+        """Validate URL is not empty."""
+        if v is not None and isinstance(v, str) and not v.strip():
+            raise ValueError("Field cannot be empty string")
+        return v.strip() if isinstance(v, str) else v
+    @validator('artist', 'title')
+    def normalize_artist_title(cls, v):
+        """Normalize artist/title text to standardize Unicode characters.
+        This ensures consistent data storage by converting:
+        - Curly quotes -> straight quotes
+        - Various dashes -> hyphen
+        - Unusual whitespace -> regular space
+        """
+        if v is not None and isinstance(v, str):
+            if not v.strip():
+                raise ValueError("Field cannot be empty string")
+            # normalize_text handles stripping and Unicode normalization
+            return normalize_text(v)
+        return v
+class JobResponse(BaseModel):
+    """Job response model."""
+    status: str
+    job_id: str
+    message: str

backend/models/requests.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+API request models for karaoke generation endpoints.
+"""
+from typing import Optional, Dict, Any, List
+from pydantic import BaseModel, HttpUrl, validator
+class URLSubmissionRequest(BaseModel):
+    """Request to submit a job from a URL (YouTube, etc.)."""
+    url: HttpUrl
+    artist: Optional[str] = None  # Auto-detected if not provided
+    title: Optional[str] = None   # Auto-detected if not provided
+    # Optional preferences
+    enable_cdg: bool = False  # Requires style config
+    enable_txt: bool = False  # Requires style config
+    enable_youtube_upload: Optional[bool] = None  # None = use server default
+    youtube_description: Optional[str] = None
+    webhook_url: Optional[str] = None
+    user_email: Optional[str] = None
+class UploadSubmissionRequest(BaseModel):
+    """Request to submit a job from an uploaded file."""
+    artist: str
+    title: str
+    # Optional preferences
+    enable_cdg: bool = False  # Requires style config
+    enable_txt: bool = False  # Requires style config
+    enable_youtube_upload: Optional[bool] = None  # None = use server default
+    youtube_description: Optional[str] = None
+    webhook_url: Optional[str] = None
+    user_email: Optional[str] = None
+class CorrectionsSubmission(BaseModel):
+    """
+    Request to submit corrected lyrics after human review.
+    This is the critical human-in-the-loop interaction point.
+    The corrections data comes from the lyrics-transcriber review interface.
+    """
+    corrections: Dict[str, Any]  # Full corrections JSON from frontend
+    user_notes: Optional[str] = None  # Optional notes from reviewer
+    @validator('corrections')
+    def validate_corrections_format(cls, v):
+        """Validate corrections has required fields."""
+        required_fields = ['lines', 'metadata']
+        for field in required_fields:
+            if field not in v:
+                raise ValueError(f"Corrections must include '{field}' field")
+        return v
+class InstrumentalSelection(BaseModel):
+    """
+    Request to select instrumental audio option.
+    This is the second critical human-in-the-loop interaction point.
+    User chooses between clean instrumental, instrumental with backing vocals,
+    or a custom instrumental (created via create-custom-instrumental endpoint).
+    """
+    selection: str  # "clean", "with_backing", or "custom"
+    @validator('selection')
+    def validate_selection(cls, v):
+        """Validate selection is a valid option."""
+        valid_options = ['clean', 'with_backing', 'custom']
+        if v not in valid_options:
+            raise ValueError(f"Selection must be one of: {valid_options}")
+        return v
+class MuteRegionRequest(BaseModel):
+    """A region to mute in the backing vocals."""
+    start_seconds: float
+    end_seconds: float
+    @validator('start_seconds')
+    def validate_start(cls, v):
+        if v < 0:
+            raise ValueError("start_seconds must be non-negative")
+        return v
+    @validator('end_seconds')
+    def validate_end(cls, v, values):
+        if 'start_seconds' in values and v <= values['start_seconds']:
+            raise ValueError("end_seconds must be greater than start_seconds")
+        return v
+class CreateCustomInstrumentalRequest(BaseModel):
+    """
+    Request to create a custom instrumental with muted backing vocal regions.
+    The mute_regions specify time ranges in the backing vocals track that
+    should be silenced before mixing with the clean instrumental.
+    """
+    mute_regions: List[MuteRegionRequest]
+    @validator('mute_regions')
+    def validate_regions(cls, v):
+        if not v:
+            raise ValueError("At least one mute region is required")
+        return v
+class StartReviewRequest(BaseModel):
+    """Request to mark job as in-review (user opened interface)."""
+    pass  # No body needed, just triggers state transition
+class CancelJobRequest(BaseModel):
+    """Request to cancel a job."""
+    reason: Optional[str] = None
+class RetryJobRequest(BaseModel):
+    """Request to retry a failed job."""
+    from_stage: Optional[str] = None  # Optional: restart from specific stage

karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

karaoke-gen 0.90.1py3-none-any.whl → 0.99.3py3-none-any.whl