PyPI - karaoke-gen - Versions diffs - 0.96.0__py3-none-any.whl → 0.101.0__py3-none-any.whl - Mend

karaoke-gen 0.96.0py3-none-any.whl → 0.101.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

backend/api/routes/admin.py +696 -92
backend/api/routes/audio_search.py +29 -8
backend/api/routes/file_upload.py +99 -22
backend/api/routes/health.py +65 -0
backend/api/routes/internal.py +6 -0
backend/api/routes/jobs.py +28 -1
backend/api/routes/review.py +13 -6
backend/api/routes/tenant.py +120 -0
backend/api/routes/users.py +472 -51
backend/main.py +31 -2
backend/middleware/__init__.py +7 -1
backend/middleware/tenant.py +192 -0
backend/models/job.py +19 -3
backend/models/tenant.py +208 -0
backend/models/user.py +18 -0
backend/services/email_service.py +253 -6
backend/services/encoding_service.py +128 -31
backend/services/firestore_service.py +6 -0
backend/services/job_manager.py +44 -2
backend/services/langfuse_preloader.py +98 -0
backend/services/nltk_preloader.py +122 -0
backend/services/spacy_preloader.py +65 -0
backend/services/stripe_service.py +133 -11
backend/services/tenant_service.py +285 -0
backend/services/user_service.py +85 -7
backend/tests/emulator/conftest.py +22 -1
backend/tests/emulator/test_made_for_you_integration.py +167 -0
backend/tests/test_admin_job_files.py +337 -0
backend/tests/test_admin_job_reset.py +384 -0
backend/tests/test_admin_job_update.py +326 -0
backend/tests/test_email_service.py +233 -0
backend/tests/test_impersonation.py +223 -0
backend/tests/test_job_creation_regression.py +4 -0
backend/tests/test_job_manager.py +171 -9
backend/tests/test_jobs_api.py +11 -1
backend/tests/test_made_for_you.py +2086 -0
backend/tests/test_models.py +139 -0
backend/tests/test_spacy_preloader.py +119 -0
backend/tests/test_tenant_api.py +350 -0
backend/tests/test_tenant_middleware.py +345 -0
backend/tests/test_tenant_models.py +406 -0
backend/tests/test_tenant_service.py +418 -0
backend/utils/test_data.py +27 -0
backend/workers/screens_worker.py +16 -6
backend/workers/video_worker.py +8 -3
{karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/METADATA +1 -1
{karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/RECORD +58 -39
lyrics_transcriber/correction/agentic/agent.py +17 -6
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -43
lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
lyrics_transcriber/correction/anchor_sequence.py +151 -37
lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
lyrics_transcriber/correction/phrase_analyzer.py +18 -0
lyrics_transcriber/frontend/src/api.ts +13 -5
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +90 -57
{karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/WHEEL +0 -0
{karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/licenses/LICENSE +0 -0

backend/api/routes/admin.py CHANGED Viewed

@@ -15,11 +15,13 @@ from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel
 from backend.api.dependencies import require_admin
-from backend.services.auth_service import UserType
+from backend.services.auth_service import UserType, AuthResult
 from backend.services.user_service import get_user_service, UserService, USERS_COLLECTION
 from backend.services.job_manager import JobManager
 from backend.services.flacfetch_client import get_flacfetch_client, FlacfetchServiceError
+from backend.services.storage_service import StorageService
 from backend.models.job import JobStatus
+from backend.utils.test_data import is_test_email
 from karaoke_gen.utils import sanitize_filename
@@ -55,18 +57,91 @@ class AdminStatsOverview(BaseModel):
     total_beta_testers: int
+class FileInfo(BaseModel):
+    """Information about a single file with signed download URL."""
+    name: str
+    path: str  # GCS path (gs://bucket/...)
+    download_url: str  # Signed URL for download
+    category: str  # e.g., "stems", "lyrics", "finals"
+    file_key: str  # e.g., "instrumental_clean", "lrc"
+class JobFilesResponse(BaseModel):
+    """Response containing all files for a job with signed download URLs."""
+    job_id: str
+    artist: Optional[str]
+    title: Optional[str]
+    files: List[FileInfo]
+    total_files: int
+class JobUpdateRequest(BaseModel):
+    """Request model for updating job fields."""
+    # Editable text fields
+    artist: Optional[str] = None
+    title: Optional[str] = None
+    user_email: Optional[str] = None
+    theme_id: Optional[str] = None
+    brand_prefix: Optional[str] = None
+    discord_webhook_url: Optional[str] = None
+    youtube_description: Optional[str] = None
+    youtube_description_template: Optional[str] = None
+    customer_email: Optional[str] = None
+    customer_notes: Optional[str] = None
+    # Editable boolean fields
+    enable_cdg: Optional[bool] = None
+    enable_txt: Optional[bool] = None
+    enable_youtube_upload: Optional[bool] = None
+    non_interactive: Optional[bool] = None
+    prep_only: Optional[bool] = None
+class JobUpdateResponse(BaseModel):
+    """Response from job update endpoint."""
+    status: str
+    job_id: str
+    updated_fields: List[str]
+    message: str
+# Fields that are allowed to be updated via PATCH endpoint
+EDITABLE_JOB_FIELDS = {
+    "artist",
+    "title",
+    "user_email",
+    "theme_id",
+    "brand_prefix",
+    "discord_webhook_url",
+    "youtube_description",
+    "youtube_description_template",
+    "customer_email",
+    "customer_notes",
+    "enable_cdg",
+    "enable_txt",
+    "enable_youtube_upload",
+    "non_interactive",
+    "prep_only",
+}
 # =============================================================================
 # Admin Stats Endpoints
 # =============================================================================
 @router.get("/stats/overview", response_model=AdminStatsOverview)
 async def get_admin_stats_overview(
+    exclude_test: bool = True,
     auth_data: Tuple[str, UserType, int] = Depends(require_admin),
     user_service: UserService = Depends(get_user_service),
 ):
     """
     Get overview statistics for admin dashboard.
+    Args:
+        exclude_test: If True (default), exclude test data (users with @inbox.testmail.app emails
+                     and jobs created by test users) from all counts.
     Includes:
     - User counts (total, active in 7d, active in 30d)
     - Job counts (total, by status, recent)
@@ -81,48 +156,10 @@ async def get_admin_stats_overview(
     seven_days_ago = now - timedelta(days=7)
     thirty_days_ago = now - timedelta(days=30)
-    # Helper function to get count using aggregation
-    def get_count(query) -> int:
-        try:
-            agg_query = aggregation.AggregationQuery(query)
-            agg_query.count(alias="count")
-            results = agg_query.get()
-            return results[0][0].value if results else 0
-        except Exception as e:
-            logger.warning(f"Aggregation query failed: {e}")
-            return 0
-    # User statistics
     users_collection = db.collection(USERS_COLLECTION)
-    total_users = get_count(users_collection)
-    active_users_7d = get_count(
-        users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
-    )
-    active_users_30d = get_count(
-        users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
-    )
-    total_beta_testers = get_count(
-        users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
-    )
-    # Job statistics
     jobs_collection = db.collection("jobs")
-    total_jobs = get_count(jobs_collection)
-    jobs_last_7d = get_count(
-        jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
-    )
-    jobs_last_30d = get_count(
-        jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
-    )
-    # Jobs by status - map multiple statuses to simplified categories
+    # Jobs by status category mapping
     processing_statuses = [
         "downloading", "downloading_audio", "searching_audio", "awaiting_audio_selection",
         "separating_stage1", "separating_stage2", "transcribing", "correcting",
@@ -131,63 +168,166 @@ async def get_admin_stats_overview(
         "uploading", "notifying"
     ]
-    jobs_by_status = JobsByStatusResponse(
-        pending=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
-        ),
-        processing=sum(
-            get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
-            for status in processing_statuses
-        ),
-        awaiting_review=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
-        ) + get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
-        ),
-        awaiting_instrumental=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
-        ),
-        complete=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
-        ) + get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
-        ),
-        failed=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
-        ),
-        cancelled=get_count(
-            jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
-        ),
-    )
+    # Limits for streaming queries - these are safety limits to prevent memory issues
+    # If hit, stats may be incomplete so we log a warning
+    USERS_STREAM_LIMIT = 2000
+    JOBS_STREAM_LIMIT = 10000
+    if exclude_test:
+        # When excluding test data, we must stream and filter in Python
+        # because Firestore doesn't support "not ends with" queries
+        # Stream all users and filter
+        all_users = []
+        users_fetched = 0
+        for doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
+            users_fetched += 1
+            user_data = doc.to_dict()
+            email = user_data.get("email", "")
+            if not is_test_email(email):
+                all_users.append(user_data)
+        if users_fetched >= USERS_STREAM_LIMIT:
+            logger.warning(f"Users stream hit limit ({USERS_STREAM_LIMIT}), stats may be incomplete")
+        # Calculate user stats from filtered list
+        total_users = len(all_users)
+        active_users_7d = sum(
+            1 for u in all_users
+            if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= seven_days_ago
+        )
+        active_users_30d = sum(
+            1 for u in all_users
+            if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= thirty_days_ago
+        )
+        total_beta_testers = sum(1 for u in all_users if u.get("is_beta_tester"))
-    # Credit statistics - sum credits added in last 30 days
-    # This is more expensive, so we'll just estimate from users
-    total_credits_issued_30d = 0
-    try:
-        # Get all users and sum recent credit transactions
-        users_docs = users_collection.limit(500).stream()
-        for user_doc in users_docs:
-            user_data = user_doc.to_dict()
+        # Calculate credits from filtered users
+        total_credits_issued_30d = 0
+        for user_data in all_users:
             transactions = user_data.get("credit_transactions", [])
             for txn in transactions:
-                txn_date = txn.get("created_at")
-                if txn_date:
-                    # Handle both datetime and string formats
-                    if isinstance(txn_date, str):
-                        try:
-                            txn_date = datetime.fromisoformat(txn_date.replace("Z", "+00:00"))
-                        except Exception:
-                            continue
-                    if isinstance(txn_date, datetime):
-                        txn_date = txn_date.replace(tzinfo=None)
-                    else:
-                        continue
-                    if txn_date >= thirty_days_ago:
+                txn_date = _normalize_datetime(txn.get("created_at"))
+                if txn_date and txn_date >= thirty_days_ago:
+                    amount = txn.get("amount", 0)
+                    if amount > 0:
+                        total_credits_issued_30d += amount
+        # Stream all jobs and filter by user_email
+        all_jobs = []
+        jobs_fetched = 0
+        for doc in jobs_collection.limit(JOBS_STREAM_LIMIT).stream():
+            jobs_fetched += 1
+            job_data = doc.to_dict()
+            user_email = job_data.get("user_email", "")
+            if not is_test_email(user_email):
+                all_jobs.append(job_data)
+        if jobs_fetched >= JOBS_STREAM_LIMIT:
+            logger.warning(f"Jobs stream hit limit ({JOBS_STREAM_LIMIT}), stats may be incomplete")
+        # Calculate job stats from filtered list
+        total_jobs = len(all_jobs)
+        jobs_last_7d = sum(
+            1 for j in all_jobs
+            if j.get("created_at") and _normalize_datetime(j["created_at"]) >= seven_days_ago
+        )
+        jobs_last_30d = sum(
+            1 for j in all_jobs
+            if j.get("created_at") and _normalize_datetime(j["created_at"]) >= thirty_days_ago
+        )
+        # Jobs by status
+        jobs_by_status = JobsByStatusResponse(
+            pending=sum(1 for j in all_jobs if j.get("status") == "pending"),
+            processing=sum(1 for j in all_jobs if j.get("status") in processing_statuses),
+            awaiting_review=sum(1 for j in all_jobs if j.get("status") in ["awaiting_review", "in_review"]),
+            awaiting_instrumental=sum(1 for j in all_jobs if j.get("status") == "awaiting_instrumental_selection"),
+            complete=sum(1 for j in all_jobs if j.get("status") in ["complete", "prep_complete"]),
+            failed=sum(1 for j in all_jobs if j.get("status") == "failed"),
+            cancelled=sum(1 for j in all_jobs if j.get("status") == "cancelled"),
+        )
+    else:
+        # When including test data, use efficient aggregation queries
+        def get_count(query) -> int:
+            try:
+                agg_query = aggregation.AggregationQuery(query)
+                agg_query.count(alias="count")
+                results = agg_query.get()
+                return results[0][0].value if results else 0
+            except Exception as e:
+                logger.warning(f"Aggregation query failed: {e}")
+                return 0
+        # User statistics
+        total_users = get_count(users_collection)
+        active_users_7d = get_count(
+            users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
+        )
+        active_users_30d = get_count(
+            users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
+        )
+        total_beta_testers = get_count(
+            users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
+        )
+        # Job statistics
+        total_jobs = get_count(jobs_collection)
+        jobs_last_7d = get_count(
+            jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
+        )
+        jobs_last_30d = get_count(
+            jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
+        )
+        # Jobs by status
+        jobs_by_status = JobsByStatusResponse(
+            pending=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
+            ),
+            processing=sum(
+                get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
+                for status in processing_statuses
+            ),
+            awaiting_review=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
+            ) + get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
+            ),
+            awaiting_instrumental=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
+            ),
+            complete=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
+            ) + get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
+            ),
+            failed=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
+            ),
+            cancelled=get_count(
+                jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
+            ),
+        )
+        # Credit statistics - sum credits added in last 30 days
+        total_credits_issued_30d = 0
+        try:
+            users_fetched = 0
+            for user_doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
+                users_fetched += 1
+                user_data = user_doc.to_dict()
+                transactions = user_data.get("credit_transactions", [])
+                for txn in transactions:
+                    txn_date = _normalize_datetime(txn.get("created_at"))
+                    if txn_date and txn_date >= thirty_days_ago:
                         amount = txn.get("amount", 0)
-                        if amount > 0:  # Only count additions, not deductions
+                        if amount > 0:
                             total_credits_issued_30d += amount
-    except Exception as e:
-        logger.warning(f"Error calculating credits: {e}")
+            if users_fetched >= USERS_STREAM_LIMIT:
+                logger.warning(f"Credit calculation hit user limit ({USERS_STREAM_LIMIT}), total may be incomplete")
+        except Exception as e:
+            logger.warning(f"Error calculating credits: {e}")
     return AdminStatsOverview(
         total_users=total_users,
@@ -202,6 +342,21 @@ async def get_admin_stats_overview(
     )
+def _normalize_datetime(dt_value) -> Optional[datetime]:
+    """Normalize datetime values from Firestore (can be datetime or ISO string)."""
+    if dt_value is None:
+        return None
+    if isinstance(dt_value, datetime):
+        return dt_value.replace(tzinfo=None)
+    if isinstance(dt_value, str):
+        try:
+            parsed = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
+            return parsed.replace(tzinfo=None)
+        except Exception:
+            return None
+    return None
 # =============================================================================
 # Audio Search Management Models
 # =============================================================================
@@ -273,6 +428,7 @@ class CacheStatsResponse(BaseModel):
 async def list_audio_searches(
     limit: int = 50,
     status_filter: Optional[str] = None,
+    exclude_test: bool = True,
     auth_data: Tuple[str, UserType, int] = Depends(require_admin),
     user_service: UserService = Depends(get_user_service),
 ):
@@ -287,6 +443,7 @@ async def list_audio_searches(
     Args:
         limit: Maximum number of jobs to return (default 50)
         status_filter: Optional filter by job status (e.g., 'awaiting_audio_selection')
+        exclude_test: If True (default), exclude jobs from test users
     """
     from google.cloud.firestore_v1 import FieldFilter
@@ -306,6 +463,11 @@ async def list_audio_searches(
     for doc in query.stream():
         data = doc.to_dict()
+        # Filter out test users if exclude_test is True
+        if exclude_test and is_test_email(data.get("user_email", "")):
+            continue
         state_data = data.get("state_data", {})
         audio_results = state_data.get("audio_search_results", [])
@@ -609,6 +771,374 @@ class SendCompletionEmailResponse(BaseModel):
     message: str
+# =============================================================================
+# Job Files Endpoint
+# =============================================================================
+def _extract_files_recursive(
+    file_urls: Dict[str, Any],
+    storage: StorageService,
+    category: str = "",
+    expiration_minutes: int = 120,
+) -> List[FileInfo]:
+    """
+    Recursively extract files from nested file_urls structure.
+    Only includes entries that are GCS paths (gs://...).
+    Skips non-GCS entries like YouTube URLs.
+    Args:
+        file_urls: Dictionary of file URLs (may be nested)
+        storage: StorageService instance for generating signed URLs
+        category: Current category name (for nested calls)
+        expiration_minutes: How long signed URLs should be valid
+    Returns:
+        List of FileInfo objects with signed download URLs
+    """
+    files = []
+    for key, value in file_urls.items():
+        if isinstance(value, dict):
+            # Nested structure - recurse with key as category
+            nested_files = _extract_files_recursive(
+                value,
+                storage,
+                category=key if not category else f"{category}.{key}",
+                expiration_minutes=expiration_minutes,
+            )
+            files.extend(nested_files)
+        elif isinstance(value, str) and value.startswith("gs://"):
+            # GCS path - generate signed URL
+            try:
+                signed_url = storage.generate_signed_url(value, expiration_minutes=expiration_minutes)
+                # Extract filename from path
+                name = value.split("/")[-1] if "/" in value else value
+                files.append(FileInfo(
+                    name=name,
+                    path=value,
+                    download_url=signed_url,
+                    category=category,
+                    file_key=key,
+                ))
+            except Exception as e:
+                # Log but don't fail - file might not exist
+                logger.warning(f"Failed to generate signed URL for {value}: {e}")
+        # Skip non-GCS values (e.g., youtube URLs, video IDs)
+    return files
+@router.get("/jobs/{job_id}/files", response_model=JobFilesResponse)
+async def get_job_files(
+    job_id: str,
+    auth_data: Tuple[str, UserType, int] = Depends(require_admin),
+):
+    """
+    Get all files for a job with signed download URLs.
+    Returns a list of all files associated with the job, including:
+    - Input audio file
+    - Stem separation results (vocals, instrumentals, etc.)
+    - Lyrics files (LRC, ASS, corrections JSON)
+    - Screen files (title, end screens)
+    - Video files (with/without vocals)
+    - Final output files (various formats)
+    - Package files (CDG, TXT zips)
+    Each file includes a signed URL that's valid for 2 hours.
+    Non-GCS entries (like YouTube URLs) are excluded.
+    Requires admin authentication.
+    """
+    job_manager = JobManager()
+    job = job_manager.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
+    # Extract all files with signed URLs
+    storage = StorageService()
+    file_urls = job.file_urls or {}
+    files = _extract_files_recursive(file_urls, storage)
+    return JobFilesResponse(
+        job_id=job.job_id,
+        artist=job.artist,
+        title=job.title,
+        files=files,
+        total_files=len(files),
+    )
+@router.patch("/jobs/{job_id}", response_model=JobUpdateResponse)
+async def update_job(
+    job_id: str,
+    request: Dict[str, Any],
+    auth_data: AuthResult = Depends(require_admin),
+):
+    """
+    Update editable fields of a job (admin only).
+    This endpoint allows admins to update certain job fields without
+    affecting the job's processing state. It's useful for:
+    - Correcting artist/title typos
+    - Changing user assignment
+    - Updating delivery settings (email, theme, etc.)
+    Editable fields:
+    - artist, title: Track metadata
+    - user_email: Job owner
+    - theme_id: Visual theme
+    - enable_cdg, enable_txt, enable_youtube_upload: Output options
+    - customer_email, customer_notes: Made-for-you order info
+    - brand_prefix: Brand code prefix
+    - non_interactive, prep_only: Workflow options
+    - discord_webhook_url: Notification URL
+    - youtube_description, youtube_description_template: YouTube settings
+    Non-editable fields (will return 400 error):
+    - job_id, status, progress: System-managed
+    - created_at, updated_at: Timestamps
+    - state_data, file_urls, timeline: Processing state
+    - worker_logs, worker_ids: Audit/tracking data
+    For status changes, use the reset endpoint instead.
+    """
+    admin_email = auth_data.user_email or "unknown"
+    # Check for non-editable fields in request
+    non_editable_fields = set(request.keys()) - EDITABLE_JOB_FIELDS
+    if non_editable_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=f"The following fields are not editable: {', '.join(sorted(non_editable_fields))}. "
+            f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
+        )
+    # Filter to only include provided fields (non-None values)
+    updates = {k: v for k, v in request.items() if v is not None}
+    if not updates:
+        raise HTTPException(
+            status_code=400,
+            detail="No valid fields provided for update. "
+            f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
+        )
+    job_manager = JobManager()
+    job = job_manager.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
+    # Perform the update
+    success = job_manager.update_job(job_id, updates)
+    if not success:
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to update job. Please try again."
+        )
+    # Log the admin action
+    logger.info(
+        f"Admin {admin_email} updated job {job_id}. "
+        f"Updated fields: {list(updates.keys())}"
+    )
+    return JobUpdateResponse(
+        status="success",
+        job_id=job_id,
+        updated_fields=list(updates.keys()),
+        message=f"Successfully updated {len(updates)} field(s)",
+    )
+# =============================================================================
+# Job Reset Endpoint
+# =============================================================================
+class JobResetRequest(BaseModel):
+    """Request model for resetting a job to a specific state."""
+    target_state: str
+class JobResetResponse(BaseModel):
+    """Response from job reset endpoint."""
+    status: str
+    job_id: str
+    previous_status: str
+    new_status: str
+    message: str
+    cleared_data: List[str]
+# States that are allowed as reset targets
+ALLOWED_RESET_STATES = {
+    "pending",
+    "awaiting_audio_selection",
+    "awaiting_review",
+    "awaiting_instrumental_selection",
+}
+# State data keys to clear for each reset target
+# Keys not in this mapping are preserved
+STATE_DATA_CLEAR_KEYS = {
+    "pending": [
+        "audio_search_results",
+        "audio_search_count",
+        "remote_search_id",
+        "audio_selection",
+        "review_complete",
+        "corrected_lyrics",
+        "instrumental_selection",
+        "video_progress",
+        "render_progress",
+        "screens_progress",
+    ],
+    "awaiting_audio_selection": [
+        "audio_selection",
+        "review_complete",
+        "corrected_lyrics",
+        "instrumental_selection",
+        "video_progress",
+        "render_progress",
+        "screens_progress",
+    ],
+    "awaiting_review": [
+        "review_complete",
+        "corrected_lyrics",
+        "instrumental_selection",
+        "video_progress",
+        "render_progress",
+        "screens_progress",
+    ],
+    "awaiting_instrumental_selection": [
+        "instrumental_selection",
+        "video_progress",
+        "render_progress",
+        "screens_progress",
+    ],
+}
+@router.post("/jobs/{job_id}/reset", response_model=JobResetResponse)
+async def reset_job(
+    job_id: str,
+    request: JobResetRequest,
+    auth_data: AuthResult = Depends(require_admin),
+    user_service: UserService = Depends(get_user_service),
+):
+    """
+    Reset a job to a specific state for re-processing (admin only).
+    This endpoint allows admins to reset a job back to specific workflow
+    checkpoints to re-do parts of the processing. This is useful for:
+    - Re-running audio search after flacfetch updates
+    - Re-reviewing lyrics after corrections
+    - Re-selecting instrumental after hearing the result
+    - Restarting a failed job from the beginning
+    Allowed target states:
+    - pending: Restart from the beginning (clears all processing data)
+    - awaiting_audio_selection: Re-select audio source
+    - awaiting_review: Re-review lyrics (preserves audio stems)
+    - awaiting_instrumental_selection: Re-select instrumental (preserves review)
+    State data is cleared based on the target state to ensure a clean
+    re-processing from that point forward.
+    """
+    admin_email = auth_data.user_email or "unknown"
+    target_state = request.target_state.lower()
+    # Validate target state
+    if target_state not in ALLOWED_RESET_STATES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid target state '{target_state}'. "
+            f"Allowed states are: {', '.join(sorted(ALLOWED_RESET_STATES))}"
+        )
+    job_manager = JobManager()
+    job = job_manager.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
+    previous_status = job.status
+    # Build update payload
+    updates = {
+        "status": target_state,
+        "progress": 0,
+        "message": f"Job reset to {target_state} by admin",
+        "updated_at": datetime.utcnow().isoformat(),
+    }
+    # Clear state data keys based on target state
+    keys_to_clear = STATE_DATA_CLEAR_KEYS.get(target_state, [])
+    cleared_keys = []
+    current_state_data = job.state_data or {}
+    for key in keys_to_clear:
+        if key in current_state_data:
+            cleared_keys.append(key)
+    # Add timeline event
+    timeline_event = {
+        "status": target_state,
+        "timestamp": datetime.utcnow().isoformat(),
+        "message": f"Admin reset from {previous_status} to {target_state}",
+    }
+    # Perform the update with state_data clearing
+    # We need to set the cleared keys to DELETE_FIELD
+    success = job_manager.update_job(job_id, updates)
+    if not success:
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to reset job. Please try again."
+        )
+    # Clear the state data keys separately using direct Firestore update
+    from google.cloud.firestore_v1 import DELETE_FIELD, ArrayUnion
+    job_ref = user_service.db.collection("jobs").document(job_id)
+    if cleared_keys:
+        clear_updates = {}
+        for key in cleared_keys:
+            clear_updates[f"state_data.{key}"] = DELETE_FIELD
+        # Add timeline event
+        clear_updates["timeline"] = ArrayUnion([timeline_event])
+        job_ref.update(clear_updates)
+    else:
+        # Just add timeline event
+        job_ref.update({
+            "timeline": ArrayUnion([timeline_event])
+        })
+    # Log the admin action
+    logger.info(
+        f"Admin {admin_email} reset job {job_id} from {previous_status} to {target_state}. "
+        f"Cleared state_data keys: {cleared_keys}"
+    )
+    return JobResetResponse(
+        status="success",
+        job_id=job_id,
+        previous_status=previous_status,
+        new_status=target_state,
+        message=f"Job reset from {previous_status} to {target_state}",
+        cleared_data=cleared_keys,
+    )
 @router.get("/jobs/{job_id}/completion-message", response_model=CompletionMessageResponse)
 async def get_job_completion_message(
     job_id: str,
@@ -740,3 +1270,77 @@ async def send_job_completion_email(
             status_code=500,
             detail="Failed to send email. Check email service configuration."
         )
+# =============================================================================
+# User Impersonation
+# =============================================================================
+class ImpersonateUserResponse(BaseModel):
+    """Response from impersonate user endpoint."""
+    session_token: str
+    user_email: str
+    message: str
+@router.post("/users/{email}/impersonate", response_model=ImpersonateUserResponse)
+async def impersonate_user(
+    email: str,
+    auth_data: Tuple[str, UserType, int] = Depends(require_admin),
+    user_service: UserService = Depends(get_user_service),
+):
+    """
+    Create a session token to impersonate a user (admin only).
+    This allows admins to view the application exactly as a specific user would see it.
+    The admin's original session remains valid and can be restored client-side.
+    Security:
+    - Only admins can impersonate
+    - Creates a real session (auditable in Firestore)
+    - Impersonation is logged for security audit
+    Args:
+        email: Email of the user to impersonate
+    Returns:
+        session_token: A valid session token for the target user
+        user_email: The impersonated user's email
+        message: Success message
+    """
+    admin_email = auth_data[0]
+    target_email = email.lower()
+    # Cannot impersonate yourself
+    if target_email == admin_email.lower():
+        raise HTTPException(
+            status_code=400,
+            detail="Cannot impersonate yourself"
+        )
+    # Verify target user exists
+    target_user = user_service.get_user(target_email)
+    if not target_user:
+        raise HTTPException(
+            status_code=404,
+            detail=f"User {target_email} not found"
+        )
+    # Create a real session for the target user
+    session = user_service.create_session(
+        user_email=target_email,
+        ip_address=None,  # Not tracking IP for impersonation
+        user_agent=f"Impersonation by {admin_email}",
+    )
+    # Log impersonation for audit trail
+    logger.info(
+        f"IMPERSONATION: Admin {admin_email} started impersonating user {target_email}. "
+        f"Session token prefix: {session.token[:12]}..."
+    )
+    return ImpersonateUserResponse(
+        session_token=session.token,
+        user_email=target_email,
+        message=f"Now impersonating {target_email}",
+    )

karaoke-gen 0.96.0__py3-none-any.whl → 0.101.0__py3-none-any.whl

karaoke-gen 0.96.0py3-none-any.whl → 0.101.0py3-none-any.whl