PyPI - karaoke-gen - Versions diffs - 0.76.20__py3-none-any.whl → 0.81.1__py3-none-any.whl - Mend

karaoke-gen 0.76.20py3-none-any.whl → 0.81.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

lyrics_transcriber/core/controller.py CHANGED Viewed

@@ -7,6 +7,7 @@ from lyrics_transcriber.types import LyricsData, TranscriptionResult, Correction
 from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
 from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
 from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
+from lyrics_transcriber.transcribers.local_whisper import LocalWhisperTranscriber, LocalWhisperConfig
 from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
 from lyrics_transcriber.lyrics.genius import GeniusProvider
 from lyrics_transcriber.lyrics.spotify import SpotifyProvider
@@ -206,6 +207,34 @@ class LyricsTranscriber:
         else:
             self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
+        # Local Whisper - lowest priority, fallback when cloud services unavailable
+        if self.transcriber_config.enable_local_whisper:
+            # Check if whisper-timestamped is available
+            try:
+                import whisper_timestamped  # noqa: F401
+                self.logger.debug("Initializing LocalWhisper transcriber")
+                transcribers["local_whisper"] = {
+                    "instance": LocalWhisperTranscriber(
+                        cache_dir=self.output_config.cache_dir,
+                        config=LocalWhisperConfig(
+                            model_size=self.transcriber_config.local_whisper_model_size,
+                            device=self.transcriber_config.local_whisper_device,
+                            cache_dir=self.transcriber_config.local_whisper_cache_dir,
+                            language=self.transcriber_config.local_whisper_language,
+                        ),
+                        logger=self.logger,
+                    ),
+                    "priority": 3,  # Local Whisper has lowest priority (fallback)
+                }
+            except ImportError:
+                self.logger.debug(
+                    "Skipping LocalWhisper transcriber - whisper-timestamped not installed. "
+                    "Install with: pip install karaoke-gen[local-whisper]"
+                )
+        else:
+            self.logger.debug("Skipping LocalWhisper transcriber - disabled via enable_local_whisper=False")
         return transcribers
     def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
@@ -442,7 +471,7 @@ class LyricsTranscriber:
         # Whisper/RunPod status
         has_runpod_key = bool(self.transcriber_config.runpod_api_key)
         has_whisper_id = bool(self.transcriber_config.whisper_runpod_id)
         if has_runpod_key and has_whisper_id:
             self.logger.debug("  - Whisper (RunPod): CONFIGURED (API key and endpoint ID provided)")
         elif has_runpod_key:
@@ -452,6 +481,19 @@ class LyricsTranscriber:
         else:
             self.logger.debug("  - Whisper (RunPod): NOT CONFIGURED (missing RUNPOD_API_KEY and WHISPER_RUNPOD_ID)")
+        # Local Whisper status
+        if self.transcriber_config.enable_local_whisper:
+            try:
+                import whisper_timestamped  # noqa: F401
+                self.logger.debug(
+                    f"  - LocalWhisper: AVAILABLE (model={self.transcriber_config.local_whisper_model_size}, "
+                    f"device={self.transcriber_config.local_whisper_device or 'auto'})"
+                )
+            except ImportError:
+                self.logger.debug("  - LocalWhisper: ENABLED but whisper-timestamped not installed")
+        else:
+            self.logger.debug("  - LocalWhisper: DISABLED (enable_local_whisper=False)")
     def correct_lyrics(self) -> None:
         """Run lyrics correction using transcription and internet lyrics."""
         self.logger.info("Starting lyrics correction process")

lyrics_transcriber/correction/agentic/providers/config.py CHANGED Viewed

@@ -18,6 +18,10 @@ class ProviderConfig:
     privacy_mode: bool
     cache_dir: str
+    # GCP/Vertex AI settings
+    gcp_project_id: Optional[str] = None
+    gcp_location: str = "us-central1"
     request_timeout_seconds: float = 30.0
     max_retries: int = 2
     retry_backoff_base_seconds: float = 0.2
@@ -46,6 +50,8 @@ class ProviderConfig:
             openrouter_api_key=os.getenv("OPENROUTER_API_KEY"),
             privacy_mode=os.getenv("PRIVACY_MODE", "false").lower() in {"1", "true", "yes"},
             cache_dir=cache_dir,
+            gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
+            gcp_location=os.getenv("GCP_LOCATION", "us-central1"),
             request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
             max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
             retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),

lyrics_transcriber/correction/agentic/providers/model_factory.py CHANGED Viewed

@@ -155,6 +155,8 @@ class ModelFactory:
                 return self._create_openai_model(model_name, callbacks, config)
             elif provider == "anthropic":
                 return self._create_anthropic_model(model_name, callbacks, config)
+            elif provider in ("vertexai", "google"):
+                return self._create_vertexai_model(model_name, callbacks, config)
             else:
                 raise ValueError(f"Unsupported provider: {provider}")
         except ImportError as e:
@@ -197,7 +199,7 @@ class ModelFactory:
     ) -> Any:
         """Create ChatAnthropic model."""
         from langchain_anthropic import ChatAnthropic
         model = ChatAnthropic(
             model=model_name,
             timeout=config.request_timeout_seconds,
@@ -207,3 +209,24 @@ class ModelFactory:
         logger.debug(f"🤖 Created Anthropic model: {model_name}")
         return model
+    def _create_vertexai_model(
+        self, model_name: str, callbacks: List[Any], config: ProviderConfig
+    ) -> Any:
+        """Create ChatVertexAI model for Google Gemini via Vertex AI.
+        Uses Application Default Credentials (ADC) for authentication.
+        In Cloud Run, this uses the service account automatically.
+        Locally, run: gcloud auth application-default login
+        """
+        from langchain_google_vertexai import ChatVertexAI
+        model = ChatVertexAI(
+            model=model_name,
+            project=config.gcp_project_id,
+            location=config.gcp_location,
+            max_retries=config.max_retries,
+            callbacks=callbacks,
+        )
+        logger.debug(f"🤖 Created Vertex AI model: {model_name} (project={config.gcp_project_id})")
+        return model

lyrics_transcriber/correction/agentic/router.py CHANGED Viewed

@@ -5,6 +5,9 @@ from typing import Dict, Any
 from .providers.config import ProviderConfig
+# Default model for cloud deployments - Gemini 3 Flash via Vertex AI
+DEFAULT_CLOUD_MODEL = "vertexai/gemini-3-flash-preview"
 class ModelRouter:
     """Rules-based routing by gap type/length/uncertainty (scaffold)."""
@@ -14,22 +17,23 @@ class ModelRouter:
     def choose_model(self, gap_type: str, uncertainty: float) -> str:
         """Choose appropriate model based on gap characteristics.
         Returns model identifier in format "provider/model" for LangChain:
-        - "ollama/gpt-oss:latest" for local Ollama models
-        - "openai/gpt-4" for OpenAI models
+        - "vertexai/gemini-3-flash-preview" for Gemini via Vertex AI (default)
+        - "ollama/llama3.2:latest" for local Ollama models
+        - "openai/gpt-4" for OpenAI models
         - "anthropic/claude-3-sonnet-20240229" for Anthropic models
         """
-        # Simple baseline per technical guidance
+        # Check for explicit model override from environment
+        env_model = os.getenv("AGENTIC_AI_MODEL")
+        if env_model:
+            return env_model
+        # Privacy mode: use local Ollama
         if self._config.privacy_mode:
-            # Use the actual model from env, or default to a common Ollama model
-            return os.getenv("AGENTIC_AI_MODEL", "ollama/gpt-oss:latest")
-        # For high-uncertainty gaps, use Claude (best reasoning)
-        if uncertainty > 0.5:
-            return "anthropic/claude-3-sonnet-20240229"
-        # Default to GPT-4 for general cases
-        return "openai/gpt-4"
+            return "ollama/llama3.2:latest"
+        # Default to Gemini 3 Flash for all cases (fast, cost-effective)
+        return DEFAULT_CLOUD_MODEL

lyrics_transcriber/frontend/.gitignore CHANGED Viewed

@@ -21,3 +21,4 @@ dist-ssr
 *.njsproj
 *.sln
 *.sw?
+test-results/

lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts ADDED Viewed

@@ -0,0 +1,207 @@
+import { test, expect } from '@playwright/test';
+import * as path from 'path';
+import { fileURLToPath } from 'url';
+// Get __dirname equivalent in ESM
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+/**
+ * E2E tests for the agentic correction workflow in the lyrics transcriber frontend.
+ *
+ * These tests verify:
+ * 1. The UI loads correctly with agentic correction data
+ * 2. The AgenticCorrectionMetrics panel displays correctly
+ * 3. Corrected words are highlighted and clickable
+ * 4. The CorrectionDetailCard shows proper information
+ */
+// Helper function to load fixture data
+async function loadFixtureData(page: import('@playwright/test').Page) {
+  const fixturePath = path.join(__dirname, 'fixtures', 'agentic-correction-data.json');
+  // Create a file chooser promise before clicking
+  const fileChooserPromise = page.waitForEvent('filechooser');
+  // Click the Load File button
+  await page.getByRole('button', { name: /load file/i }).click();
+  // Handle the file chooser
+  const fileChooser = await fileChooserPromise;
+  await fileChooser.setFiles(fixturePath);
+  // Wait for data to load by asserting expected content appears
+  await expect(page.getByText('Hello,')).toBeVisible({ timeout: 5000 });
+}
+test.describe('Agentic Correction Workflow', () => {
+  test.beforeEach(async ({ page }) => {
+    // Navigate to the app
+    await page.goto('/');
+    // Wait for the initial load
+    await expect(page.getByText('Lyrics Correction Review')).toBeVisible();
+  });
+  test('should load the app in read-only mode', async ({ page }) => {
+    // Verify read-only mode alert is shown
+    await expect(page.getByText('Running in read-only mode')).toBeVisible();
+    // Verify Load File button is present
+    await expect(page.getByRole('button', { name: /load file/i })).toBeVisible();
+  });
+  test('should load correction data from JSON file', async ({ page }) => {
+    await loadFixtureData(page);
+    // Verify we're no longer in loading state
+    await expect(page.getByText('Loading Lyrics Correction Review...')).not.toBeVisible();
+    // Verify expected content from fixture is visible
+    await expect(page.getByText('Hello,')).toBeVisible();
+  });
+  test('should render transcription view after loading data', async ({ page }) => {
+    await loadFixtureData(page);
+    // Wait for transcription content to render
+    await expect(page.getByText('Hello,')).toBeVisible();
+    // Verify the Corrected Transcription header is visible
+    await expect(page.getByText('Corrected Transcription')).toBeVisible();
+  });
+});
+test.describe('UI Components', () => {
+  test('should show Load File button on initial load', async ({ page }) => {
+    await page.goto('/');
+    // The Load File button should be visible
+    const loadButton = page.getByRole('button', { name: /load file/i });
+    await expect(loadButton).toBeVisible();
+    // Should have upload icon
+    const uploadIcon = page.locator('svg[data-testid="UploadFileIcon"]');
+    await expect(uploadIcon).toBeVisible();
+  });
+  test('should show read-only mode banner', async ({ page }) => {
+    await page.goto('/');
+    // The read-only alert should be visible
+    await expect(page.getByRole('alert')).toBeVisible();
+    await expect(page.getByText(/read-only mode/i)).toBeVisible();
+  });
+  test('should have correction metrics component', async ({ page }) => {
+    await page.goto('/');
+    // The page structure should be there with Paper components
+    const metricsSection = page.locator('.MuiPaper-root');
+    await expect(metricsSection.first()).toBeVisible();
+  });
+});
+test.describe('File Upload Flow', () => {
+  test('should open file dialog when clicking Load File', async ({ page }) => {
+    await page.goto('/');
+    // Set up listener for file chooser
+    let fileChooserOpened = false;
+    page.on('filechooser', () => {
+      fileChooserOpened = true;
+    });
+    // Click the button
+    const loadButton = page.getByRole('button', { name: /load file/i });
+    await loadButton.click();
+    // Wait for the file chooser event to be processed
+    await page.waitForEvent('filechooser', { timeout: 5000 }).catch(() => {
+      // Event already fired
+    });
+    // Verify file chooser was triggered
+    expect(fileChooserOpened).toBe(true);
+  });
+});
+test.describe('Review Mode', () => {
+  test.beforeEach(async ({ page }) => {
+    await page.goto('/');
+    await expect(page.getByText('Lyrics Correction Review')).toBeVisible();
+  });
+  test('should show Review Mode toggle when agentic data is loaded', async ({ page }) => {
+    await loadFixtureData(page);
+    // Wait for content to load
+    await expect(page.getByText('Hello,')).toBeVisible();
+    // The Review Mode toggle should be visible when agentic corrections are present
+    // It will appear as "Review Off" chip initially (only in non-read-only mode with agentic data)
+    // Note: In read-only mode, the toggle won't appear
+    const reviewChip = page.getByText(/Review Off|Review Mode/i);
+    const chipCount = await reviewChip.count();
+    // Log for debugging purposes (will show in test output)
+    if (chipCount === 0) {
+      // Review toggle only shows in edit mode, not read-only mode
+      // This is expected behavior when loading files in read-only mode
+    }
+  });
+  test('should show batch actions panel when Review Mode is enabled', async ({ page }) => {
+    await loadFixtureData(page);
+    // Wait for content to load
+    await expect(page.getByText('Hello,')).toBeVisible();
+    // Find the Review Mode toggle (only visible in non-read-only mode)
+    const reviewToggle = page.getByText(/Review Off/i);
+    if (await reviewToggle.isVisible({ timeout: 2000 }).catch(() => false)) {
+      await reviewToggle.click();
+      // Wait for the batch actions panel to appear
+      await expect(page.getByRole('button', { name: /Accept High Confidence/i })).toBeVisible({ timeout: 5000 });
+      await expect(page.getByRole('button', { name: /Accept All/i })).toBeVisible();
+      await expect(page.getByRole('button', { name: /Revert All/i })).toBeVisible();
+    }
+  });
+  test('should render corrected words with original text preview', async ({ page }) => {
+    await loadFixtureData(page);
+    // Wait for transcription content to load
+    await expect(page.getByText('Hello,')).toBeVisible();
+    // Verify correction-related content is rendered
+    // The corrected word "now" should be visible (from fixture: "you're" -> "now")
+    await expect(page.getByText('now')).toBeVisible();
+  });
+  test('should toggle Review Mode on and off', async ({ page }) => {
+    await loadFixtureData(page);
+    // Wait for content to load
+    await expect(page.getByText('Hello,')).toBeVisible();
+    // Find the Review toggle (only visible in non-read-only mode)
+    const reviewOff = page.getByText(/Review Off/i);
+    if (await reviewOff.isVisible({ timeout: 2000 }).catch(() => false)) {
+      // Click to enable Review Mode
+      await reviewOff.click();
+      // Should now show "Review Mode" label (in filled state)
+      await expect(page.getByText(/Review Mode/i).first()).toBeVisible();
+      // Click to disable Review Mode
+      await page.getByText(/Review Mode/i).first().click();
+      // Should show "Review Off" again
+      await expect(page.getByText(/Review Off/i)).toBeVisible();
+    }
+  });
+});

lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json ADDED Viewed

@@ -0,0 +1,226 @@
+{
+  "original_segments": [
+    {
+      "id": "seg-1",
+      "text": "Hello, is it me you're looking for?",
+      "words": [
+        {"id": "w1", "text": "Hello,", "start_time": 0.0, "end_time": 0.5},
+        {"id": "w2", "text": "is", "start_time": 0.5, "end_time": 0.7},
+        {"id": "w3", "text": "it", "start_time": 0.7, "end_time": 0.9},
+        {"id": "w4", "text": "me", "start_time": 0.9, "end_time": 1.1},
+        {"id": "w5", "text": "you're", "start_time": 1.1, "end_time": 1.3},
+        {"id": "w6", "text": "looking", "start_time": 1.3, "end_time": 1.6},
+        {"id": "w7", "text": "for?", "start_time": 1.6, "end_time": 1.9}
+      ],
+      "start_time": 0.0,
+      "end_time": 1.9
+    },
+    {
+      "id": "seg-2",
+      "text": "I can see it in your eyes",
+      "words": [
+        {"id": "w8", "text": "I", "start_time": 2.0, "end_time": 2.2},
+        {"id": "w9", "text": "can", "start_time": 2.2, "end_time": 2.4},
+        {"id": "w10", "text": "see", "start_time": 2.4, "end_time": 2.6},
+        {"id": "w11", "text": "it", "start_time": 2.6, "end_time": 2.8},
+        {"id": "w12", "text": "in", "start_time": 2.8, "end_time": 3.0},
+        {"id": "w13", "text": "your", "start_time": 3.0, "end_time": 3.2},
+        {"id": "w14", "text": "eyes", "start_time": 3.2, "end_time": 3.5}
+      ],
+      "start_time": 2.0,
+      "end_time": 3.5
+    },
+    {
+      "id": "seg-3",
+      "text": "I can see it in your smile",
+      "words": [
+        {"id": "w15", "text": "I", "start_time": 4.0, "end_time": 4.2},
+        {"id": "w16", "text": "can", "start_time": 4.2, "end_time": 4.4},
+        {"id": "w17", "text": "see", "start_time": 4.4, "end_time": 4.6},
+        {"id": "w18", "text": "it", "start_time": 4.6, "end_time": 4.8},
+        {"id": "w19", "text": "in", "start_time": 4.8, "end_time": 5.0},
+        {"id": "w20", "text": "your", "start_time": 5.0, "end_time": 5.2},
+        {"id": "w21", "text": "smile", "start_time": 5.2, "end_time": 5.5}
+      ],
+      "start_time": 4.0,
+      "end_time": 5.5
+    }
+  ],
+  "reference_lyrics": {
+    "genius": {
+      "segments": [
+        {
+          "id": "ref-seg-1",
+          "text": "Hello, is it me you're looking for?",
+          "words": [
+            {"id": "rw1", "text": "Hello,", "start_time": null, "end_time": null},
+            {"id": "rw2", "text": "is", "start_time": null, "end_time": null},
+            {"id": "rw3", "text": "it", "start_time": null, "end_time": null},
+            {"id": "rw4", "text": "me", "start_time": null, "end_time": null},
+            {"id": "rw5", "text": "you're", "start_time": null, "end_time": null},
+            {"id": "rw6", "text": "looking", "start_time": null, "end_time": null},
+            {"id": "rw7", "text": "for?", "start_time": null, "end_time": null}
+          ],
+          "start_time": null,
+          "end_time": null
+        }
+      ],
+      "metadata": {
+        "source": "genius",
+        "track_name": "Hello",
+        "artist_names": "Lionel Richie",
+        "album_name": "Can't Slow Down",
+        "duration_ms": 280000,
+        "explicit": false,
+        "language": "en",
+        "is_synced": false,
+        "lyrics_provider": "Genius",
+        "lyrics_provider_id": "12345",
+        "provider_metadata": {}
+      },
+      "source": "genius"
+    }
+  },
+  "anchor_sequences": [
+    {
+      "id": "anchor-1",
+      "transcribed_word_ids": ["w1", "w2", "w3", "w4"],
+      "transcription_position": 0,
+      "reference_positions": {"genius": 0},
+      "reference_word_ids": {"genius": ["rw1", "rw2", "rw3", "rw4"]},
+      "confidence": 0.95
+    }
+  ],
+  "gap_sequences": [
+    {
+      "id": "gap-1",
+      "transcribed_word_ids": ["w5", "w6", "w7"],
+      "transcription_position": 4,
+      "preceding_anchor_id": "anchor-1",
+      "following_anchor_id": null,
+      "reference_word_ids": {"genius": ["rw5", "rw6", "rw7"]}
+    }
+  ],
+  "resized_segments": [],
+  "corrections_made": 3,
+  "confidence": 0.85,
+  "corrections": [
+    {
+      "id": "corr-1",
+      "handler": "AgenticCorrector",
+      "original_word": "you're",
+      "corrected_word": "now",
+      "segment_id": "seg-1",
+      "word_id": "w5",
+      "corrected_word_id": "cw5",
+      "source": "genius",
+      "confidence": 0.92,
+      "reason": "Transcription error - misheard word, reference lyrics confirm it should be 'now' [SOUND_ALIKE] (confidence: 92%)",
+      "alternatives": {"your": 0.65},
+      "is_deletion": false,
+      "split_index": null,
+      "split_total": null,
+      "reference_positions": {"genius": 4},
+      "length": 1
+    },
+    {
+      "id": "corr-2",
+      "handler": "AgenticCorrector",
+      "original_word": "it",
+      "corrected_word": "",
+      "segment_id": "seg-2",
+      "word_id": "w11",
+      "corrected_word_id": null,
+      "source": "genius",
+      "confidence": 0.88,
+      "reason": "Extra filler word detected, not in reference lyrics [EXTRA_WORDS] (confidence: 88%)",
+      "alternatives": {},
+      "is_deletion": true,
+      "split_index": null,
+      "split_total": null,
+      "reference_positions": {},
+      "length": 1
+    },
+    {
+      "id": "corr-3",
+      "handler": "AgenticCorrector",
+      "original_word": "I",
+      "corrected_word": "",
+      "segment_id": "seg-3",
+      "word_id": "w15",
+      "corrected_word_id": null,
+      "source": "genius",
+      "confidence": 0.75,
+      "reason": "Extra word at start of line, not in reference [EXTRA_WORDS] (confidence: 75%)",
+      "alternatives": {},
+      "is_deletion": true,
+      "split_index": null,
+      "split_total": null,
+      "reference_positions": {},
+      "length": 1
+    }
+  ],
+  "corrected_segments": [
+    {
+      "id": "seg-1",
+      "text": "Hello, is it me you're looking for?",
+      "words": [
+        {"id": "w1", "text": "Hello,", "start_time": 0.0, "end_time": 0.5},
+        {"id": "w2", "text": "is", "start_time": 0.5, "end_time": 0.7},
+        {"id": "w3", "text": "it", "start_time": 0.7, "end_time": 0.9},
+        {"id": "w4", "text": "me", "start_time": 0.9, "end_time": 1.1},
+        {"id": "cw5", "text": "now", "start_time": 1.1, "end_time": 1.3, "created_during_correction": true},
+        {"id": "w6", "text": "looking", "start_time": 1.3, "end_time": 1.6},
+        {"id": "w7", "text": "for?", "start_time": 1.6, "end_time": 1.9}
+      ],
+      "start_time": 0.0,
+      "end_time": 1.9
+    },
+    {
+      "id": "seg-2",
+      "text": "I can see in your eyes",
+      "words": [
+        {"id": "w8", "text": "I", "start_time": 2.0, "end_time": 2.2},
+        {"id": "w9", "text": "can", "start_time": 2.2, "end_time": 2.4},
+        {"id": "w10", "text": "see", "start_time": 2.4, "end_time": 2.6},
+        {"id": "w12", "text": "in", "start_time": 2.8, "end_time": 3.0},
+        {"id": "w13", "text": "your", "start_time": 3.0, "end_time": 3.2},
+        {"id": "w14", "text": "eyes", "start_time": 3.2, "end_time": 3.5}
+      ],
+      "start_time": 2.0,
+      "end_time": 3.5
+    },
+    {
+      "id": "seg-3",
+      "text": "can see it in your smile",
+      "words": [
+        {"id": "w16", "text": "can", "start_time": 4.2, "end_time": 4.4},
+        {"id": "w17", "text": "see", "start_time": 4.4, "end_time": 4.6},
+        {"id": "w18", "text": "it", "start_time": 4.6, "end_time": 4.8},
+        {"id": "w19", "text": "in", "start_time": 4.8, "end_time": 5.0},
+        {"id": "w20", "text": "your", "start_time": 5.0, "end_time": 5.2},
+        {"id": "w21", "text": "smile", "start_time": 5.2, "end_time": 5.5}
+      ],
+      "start_time": 4.0,
+      "end_time": 5.5
+    }
+  ],
+  "metadata": {
+    "anchor_sequences_count": 1,
+    "gap_sequences_count": 1,
+    "total_words": 21,
+    "correction_ratio": 0.143,
+    "audio_filepath": "/path/to/test-audio.mp3",
+    "audio_hash": "test-audio-hash-123",
+    "available_handlers": [
+      {"id": "agentic", "name": "AgenticCorrector", "description": "AI-powered agentic correction", "enabled": true},
+      {"id": "sound_alike", "name": "SoundAlikeHandler", "description": "Fixes sound-alike errors", "enabled": true}
+    ],
+    "enabled_handlers": ["AgenticCorrector", "SoundAlikeHandler"]
+  },
+  "correction_steps": [],
+  "word_id_map": {
+    "w5": "cw5"
+  },
+  "segment_id_map": {}
+}

lyrics_transcriber/frontend/package.json CHANGED Viewed

@@ -11,7 +11,10 @@
     "lint": "eslint .",
     "preview": "vite preview",
     "predeploy": "npm run build-prod",
-    "deploy": "gh-pages -d dist"
+    "deploy": "gh-pages -d dist",
+    "test": "playwright test",
+    "test:ui": "playwright test --ui",
+    "test:headed": "playwright test --headed"
   },
   "dependencies": {
     "@emotion/react": "^11.14.0",

lyrics_transcriber/frontend/playwright.config.ts CHANGED Viewed

@@ -61,7 +61,7 @@ export default defineConfig({
   // Run local dev server before starting tests
   webServer: {
-    command: 'yarn dev',
+    command: 'npm run dev',
     url: 'http://localhost:5173',
     reuseExistingServer: !process.env.CI,
     timeout: 120000,

karaoke-gen 0.76.20__py3-none-any.whl → 0.81.1__py3-none-any.whl

karaoke-gen 0.76.20py3-none-any.whl → 0.81.1py3-none-any.whl