karaoke-gen 0.76.20__py3-none-any.whl → 0.81.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. karaoke_gen/instrumental_review/static/index.html +179 -16
  2. karaoke_gen/karaoke_gen.py +5 -4
  3. karaoke_gen/lyrics_processor.py +25 -6
  4. {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.81.1.dist-info}/METADATA +79 -3
  5. {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.81.1.dist-info}/RECORD +26 -23
  6. lyrics_transcriber/core/config.py +8 -0
  7. lyrics_transcriber/core/controller.py +43 -1
  8. lyrics_transcriber/correction/agentic/providers/config.py +6 -0
  9. lyrics_transcriber/correction/agentic/providers/model_factory.py +24 -1
  10. lyrics_transcriber/correction/agentic/router.py +17 -13
  11. lyrics_transcriber/frontend/.gitignore +1 -0
  12. lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts +207 -0
  13. lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json +226 -0
  14. lyrics_transcriber/frontend/package.json +4 -1
  15. lyrics_transcriber/frontend/playwright.config.ts +1 -1
  16. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +34 -30
  17. lyrics_transcriber/frontend/src/components/Header.tsx +141 -34
  18. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +120 -3
  19. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +11 -1
  20. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +122 -35
  21. lyrics_transcriber/frontend/src/components/shared/types.ts +6 -0
  22. lyrics_transcriber/output/generator.py +50 -3
  23. lyrics_transcriber/transcribers/local_whisper.py +260 -0
  24. {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.81.1.dist-info}/WHEEL +0 -0
  25. {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.81.1.dist-info}/entry_points.txt +0 -0
  26. {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.81.1.dist-info}/licenses/LICENSE +0 -0
@@ -7,6 +7,7 @@ from lyrics_transcriber.types import LyricsData, TranscriptionResult, Correction
7
7
  from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
8
8
  from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
9
9
  from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
10
+ from lyrics_transcriber.transcribers.local_whisper import LocalWhisperTranscriber, LocalWhisperConfig
10
11
  from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
11
12
  from lyrics_transcriber.lyrics.genius import GeniusProvider
12
13
  from lyrics_transcriber.lyrics.spotify import SpotifyProvider
@@ -206,6 +207,34 @@ class LyricsTranscriber:
206
207
  else:
207
208
  self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
208
209
 
210
+ # Local Whisper - lowest priority, fallback when cloud services unavailable
211
+ if self.transcriber_config.enable_local_whisper:
212
+ # Check if whisper-timestamped is available
213
+ try:
214
+ import whisper_timestamped # noqa: F401
215
+
216
+ self.logger.debug("Initializing LocalWhisper transcriber")
217
+ transcribers["local_whisper"] = {
218
+ "instance": LocalWhisperTranscriber(
219
+ cache_dir=self.output_config.cache_dir,
220
+ config=LocalWhisperConfig(
221
+ model_size=self.transcriber_config.local_whisper_model_size,
222
+ device=self.transcriber_config.local_whisper_device,
223
+ cache_dir=self.transcriber_config.local_whisper_cache_dir,
224
+ language=self.transcriber_config.local_whisper_language,
225
+ ),
226
+ logger=self.logger,
227
+ ),
228
+ "priority": 3, # Local Whisper has lowest priority (fallback)
229
+ }
230
+ except ImportError:
231
+ self.logger.debug(
232
+ "Skipping LocalWhisper transcriber - whisper-timestamped not installed. "
233
+ "Install with: pip install karaoke-gen[local-whisper]"
234
+ )
235
+ else:
236
+ self.logger.debug("Skipping LocalWhisper transcriber - disabled via enable_local_whisper=False")
237
+
209
238
  return transcribers
210
239
 
211
240
  def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
@@ -442,7 +471,7 @@ class LyricsTranscriber:
442
471
  # Whisper/RunPod status
443
472
  has_runpod_key = bool(self.transcriber_config.runpod_api_key)
444
473
  has_whisper_id = bool(self.transcriber_config.whisper_runpod_id)
445
-
474
+
446
475
  if has_runpod_key and has_whisper_id:
447
476
  self.logger.debug(" - Whisper (RunPod): CONFIGURED (API key and endpoint ID provided)")
448
477
  elif has_runpod_key:
@@ -452,6 +481,19 @@ class LyricsTranscriber:
452
481
  else:
453
482
  self.logger.debug(" - Whisper (RunPod): NOT CONFIGURED (missing RUNPOD_API_KEY and WHISPER_RUNPOD_ID)")
454
483
 
484
+ # Local Whisper status
485
+ if self.transcriber_config.enable_local_whisper:
486
+ try:
487
+ import whisper_timestamped # noqa: F401
488
+ self.logger.debug(
489
+ f" - LocalWhisper: AVAILABLE (model={self.transcriber_config.local_whisper_model_size}, "
490
+ f"device={self.transcriber_config.local_whisper_device or 'auto'})"
491
+ )
492
+ except ImportError:
493
+ self.logger.debug(" - LocalWhisper: ENABLED but whisper-timestamped not installed")
494
+ else:
495
+ self.logger.debug(" - LocalWhisper: DISABLED (enable_local_whisper=False)")
496
+
455
497
  def correct_lyrics(self) -> None:
456
498
  """Run lyrics correction using transcription and internet lyrics."""
457
499
  self.logger.info("Starting lyrics correction process")
@@ -18,6 +18,10 @@ class ProviderConfig:
18
18
  privacy_mode: bool
19
19
  cache_dir: str
20
20
 
21
+ # GCP/Vertex AI settings
22
+ gcp_project_id: Optional[str] = None
23
+ gcp_location: str = "us-central1"
24
+
21
25
  request_timeout_seconds: float = 30.0
22
26
  max_retries: int = 2
23
27
  retry_backoff_base_seconds: float = 0.2
@@ -46,6 +50,8 @@ class ProviderConfig:
46
50
  openrouter_api_key=os.getenv("OPENROUTER_API_KEY"),
47
51
  privacy_mode=os.getenv("PRIVACY_MODE", "false").lower() in {"1", "true", "yes"},
48
52
  cache_dir=cache_dir,
53
+ gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
54
+ gcp_location=os.getenv("GCP_LOCATION", "us-central1"),
49
55
  request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
50
56
  max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
51
57
  retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),
@@ -155,6 +155,8 @@ class ModelFactory:
155
155
  return self._create_openai_model(model_name, callbacks, config)
156
156
  elif provider == "anthropic":
157
157
  return self._create_anthropic_model(model_name, callbacks, config)
158
+ elif provider in ("vertexai", "google"):
159
+ return self._create_vertexai_model(model_name, callbacks, config)
158
160
  else:
159
161
  raise ValueError(f"Unsupported provider: {provider}")
160
162
  except ImportError as e:
@@ -197,7 +199,7 @@ class ModelFactory:
197
199
  ) -> Any:
198
200
  """Create ChatAnthropic model."""
199
201
  from langchain_anthropic import ChatAnthropic
200
-
202
+
201
203
  model = ChatAnthropic(
202
204
  model=model_name,
203
205
  timeout=config.request_timeout_seconds,
@@ -207,3 +209,24 @@ class ModelFactory:
207
209
  logger.debug(f"🤖 Created Anthropic model: {model_name}")
208
210
  return model
209
211
 
212
+ def _create_vertexai_model(
213
+ self, model_name: str, callbacks: List[Any], config: ProviderConfig
214
+ ) -> Any:
215
+ """Create ChatVertexAI model for Google Gemini via Vertex AI.
216
+
217
+ Uses Application Default Credentials (ADC) for authentication.
218
+ In Cloud Run, this uses the service account automatically.
219
+ Locally, run: gcloud auth application-default login
220
+ """
221
+ from langchain_google_vertexai import ChatVertexAI
222
+
223
+ model = ChatVertexAI(
224
+ model=model_name,
225
+ project=config.gcp_project_id,
226
+ location=config.gcp_location,
227
+ max_retries=config.max_retries,
228
+ callbacks=callbacks,
229
+ )
230
+ logger.debug(f"🤖 Created Vertex AI model: {model_name} (project={config.gcp_project_id})")
231
+ return model
232
+
@@ -5,6 +5,9 @@ from typing import Dict, Any
5
5
 
6
6
  from .providers.config import ProviderConfig
7
7
 
8
+ # Default model for cloud deployments - Gemini 3 Flash via Vertex AI
9
+ DEFAULT_CLOUD_MODEL = "vertexai/gemini-3-flash-preview"
10
+
8
11
 
9
12
  class ModelRouter:
10
13
  """Rules-based routing by gap type/length/uncertainty (scaffold)."""
@@ -14,22 +17,23 @@ class ModelRouter:
14
17
 
15
18
  def choose_model(self, gap_type: str, uncertainty: float) -> str:
16
19
  """Choose appropriate model based on gap characteristics.
17
-
20
+
18
21
  Returns model identifier in format "provider/model" for LangChain:
19
- - "ollama/gpt-oss:latest" for local Ollama models
20
- - "openai/gpt-4" for OpenAI models
22
+ - "vertexai/gemini-3-flash-preview" for Gemini via Vertex AI (default)
23
+ - "ollama/llama3.2:latest" for local Ollama models
24
+ - "openai/gpt-4" for OpenAI models
21
25
  - "anthropic/claude-3-sonnet-20240229" for Anthropic models
22
26
  """
23
- # Simple baseline per technical guidance
27
+ # Check for explicit model override from environment
28
+ env_model = os.getenv("AGENTIC_AI_MODEL")
29
+ if env_model:
30
+ return env_model
31
+
32
+ # Privacy mode: use local Ollama
24
33
  if self._config.privacy_mode:
25
- # Use the actual model from env, or default to a common Ollama model
26
- return os.getenv("AGENTIC_AI_MODEL", "ollama/gpt-oss:latest")
27
-
28
- # For high-uncertainty gaps, use Claude (best reasoning)
29
- if uncertainty > 0.5:
30
- return "anthropic/claude-3-sonnet-20240229"
31
-
32
- # Default to GPT-4 for general cases
33
- return "openai/gpt-4"
34
+ return "ollama/llama3.2:latest"
35
+
36
+ # Default to Gemini 3 Flash for all cases (fast, cost-effective)
37
+ return DEFAULT_CLOUD_MODEL
34
38
 
35
39
 
@@ -21,3 +21,4 @@ dist-ssr
21
21
  *.njsproj
22
22
  *.sln
23
23
  *.sw?
24
+ test-results/
@@ -0,0 +1,207 @@
1
+ import { test, expect } from '@playwright/test';
2
+ import * as path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+
5
+ // Get __dirname equivalent in ESM
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+
9
+ /**
10
+ * E2E tests for the agentic correction workflow in the lyrics transcriber frontend.
11
+ *
12
+ * These tests verify:
13
+ * 1. The UI loads correctly with agentic correction data
14
+ * 2. The AgenticCorrectionMetrics panel displays correctly
15
+ * 3. Corrected words are highlighted and clickable
16
+ * 4. The CorrectionDetailCard shows proper information
17
+ */
18
+
19
+ // Helper function to load fixture data
20
+ async function loadFixtureData(page: import('@playwright/test').Page) {
21
+ const fixturePath = path.join(__dirname, 'fixtures', 'agentic-correction-data.json');
22
+
23
+ // Create a file chooser promise before clicking
24
+ const fileChooserPromise = page.waitForEvent('filechooser');
25
+
26
+ // Click the Load File button
27
+ await page.getByRole('button', { name: /load file/i }).click();
28
+
29
+ // Handle the file chooser
30
+ const fileChooser = await fileChooserPromise;
31
+ await fileChooser.setFiles(fixturePath);
32
+
33
+ // Wait for data to load by asserting expected content appears
34
+ await expect(page.getByText('Hello,')).toBeVisible({ timeout: 5000 });
35
+ }
36
+
37
+ test.describe('Agentic Correction Workflow', () => {
38
+ test.beforeEach(async ({ page }) => {
39
+ // Navigate to the app
40
+ await page.goto('/');
41
+
42
+ // Wait for the initial load
43
+ await expect(page.getByText('Lyrics Correction Review')).toBeVisible();
44
+ });
45
+
46
+ test('should load the app in read-only mode', async ({ page }) => {
47
+ // Verify read-only mode alert is shown
48
+ await expect(page.getByText('Running in read-only mode')).toBeVisible();
49
+
50
+ // Verify Load File button is present
51
+ await expect(page.getByRole('button', { name: /load file/i })).toBeVisible();
52
+ });
53
+
54
+ test('should load correction data from JSON file', async ({ page }) => {
55
+ await loadFixtureData(page);
56
+
57
+ // Verify we're no longer in loading state
58
+ await expect(page.getByText('Loading Lyrics Correction Review...')).not.toBeVisible();
59
+
60
+ // Verify expected content from fixture is visible
61
+ await expect(page.getByText('Hello,')).toBeVisible();
62
+ });
63
+
64
+ test('should render transcription view after loading data', async ({ page }) => {
65
+ await loadFixtureData(page);
66
+
67
+ // Wait for transcription content to render
68
+ await expect(page.getByText('Hello,')).toBeVisible();
69
+
70
+ // Verify the Corrected Transcription header is visible
71
+ await expect(page.getByText('Corrected Transcription')).toBeVisible();
72
+ });
73
+ });
74
+
75
+ test.describe('UI Components', () => {
76
+ test('should show Load File button on initial load', async ({ page }) => {
77
+ await page.goto('/');
78
+
79
+ // The Load File button should be visible
80
+ const loadButton = page.getByRole('button', { name: /load file/i });
81
+ await expect(loadButton).toBeVisible();
82
+
83
+ // Should have upload icon
84
+ const uploadIcon = page.locator('svg[data-testid="UploadFileIcon"]');
85
+ await expect(uploadIcon).toBeVisible();
86
+ });
87
+
88
+ test('should show read-only mode banner', async ({ page }) => {
89
+ await page.goto('/');
90
+
91
+ // The read-only alert should be visible
92
+ await expect(page.getByRole('alert')).toBeVisible();
93
+ await expect(page.getByText(/read-only mode/i)).toBeVisible();
94
+ });
95
+
96
+ test('should have correction metrics component', async ({ page }) => {
97
+ await page.goto('/');
98
+
99
+ // The page structure should be there with Paper components
100
+ const metricsSection = page.locator('.MuiPaper-root');
101
+ await expect(metricsSection.first()).toBeVisible();
102
+ });
103
+ });
104
+
105
+ test.describe('File Upload Flow', () => {
106
+ test('should open file dialog when clicking Load File', async ({ page }) => {
107
+ await page.goto('/');
108
+
109
+ // Set up listener for file chooser
110
+ let fileChooserOpened = false;
111
+ page.on('filechooser', () => {
112
+ fileChooserOpened = true;
113
+ });
114
+
115
+ // Click the button
116
+ const loadButton = page.getByRole('button', { name: /load file/i });
117
+ await loadButton.click();
118
+
119
+ // Wait for the file chooser event to be processed
120
+ await page.waitForEvent('filechooser', { timeout: 5000 }).catch(() => {
121
+ // Event already fired
122
+ });
123
+
124
+ // Verify file chooser was triggered
125
+ expect(fileChooserOpened).toBe(true);
126
+ });
127
+ });
128
+
129
+ test.describe('Review Mode', () => {
130
+ test.beforeEach(async ({ page }) => {
131
+ await page.goto('/');
132
+ await expect(page.getByText('Lyrics Correction Review')).toBeVisible();
133
+ });
134
+
135
+ test('should show Review Mode toggle when agentic data is loaded', async ({ page }) => {
136
+ await loadFixtureData(page);
137
+
138
+ // Wait for content to load
139
+ await expect(page.getByText('Hello,')).toBeVisible();
140
+
141
+ // The Review Mode toggle should be visible when agentic corrections are present
142
+ // It will appear as "Review Off" chip initially (only in non-read-only mode with agentic data)
143
+ // Note: In read-only mode, the toggle won't appear
144
+ const reviewChip = page.getByText(/Review Off|Review Mode/i);
145
+ const chipCount = await reviewChip.count();
146
+
147
+ // Log for debugging purposes (will show in test output)
148
+ if (chipCount === 0) {
149
+ // Review toggle only shows in edit mode, not read-only mode
150
+ // This is expected behavior when loading files in read-only mode
151
+ }
152
+ });
153
+
154
+ test('should show batch actions panel when Review Mode is enabled', async ({ page }) => {
155
+ await loadFixtureData(page);
156
+
157
+ // Wait for content to load
158
+ await expect(page.getByText('Hello,')).toBeVisible();
159
+
160
+ // Find the Review Mode toggle (only visible in non-read-only mode)
161
+ const reviewToggle = page.getByText(/Review Off/i);
162
+
163
+ if (await reviewToggle.isVisible({ timeout: 2000 }).catch(() => false)) {
164
+ await reviewToggle.click();
165
+
166
+ // Wait for the batch actions panel to appear
167
+ await expect(page.getByRole('button', { name: /Accept High Confidence/i })).toBeVisible({ timeout: 5000 });
168
+ await expect(page.getByRole('button', { name: /Accept All/i })).toBeVisible();
169
+ await expect(page.getByRole('button', { name: /Revert All/i })).toBeVisible();
170
+ }
171
+ });
172
+
173
+ test('should render corrected words with original text preview', async ({ page }) => {
174
+ await loadFixtureData(page);
175
+
176
+ // Wait for transcription content to load
177
+ await expect(page.getByText('Hello,')).toBeVisible();
178
+
179
+ // Verify correction-related content is rendered
180
+ // The corrected word "now" should be visible (from fixture: "you're" -> "now")
181
+ await expect(page.getByText('now')).toBeVisible();
182
+ });
183
+
184
+ test('should toggle Review Mode on and off', async ({ page }) => {
185
+ await loadFixtureData(page);
186
+
187
+ // Wait for content to load
188
+ await expect(page.getByText('Hello,')).toBeVisible();
189
+
190
+ // Find the Review toggle (only visible in non-read-only mode)
191
+ const reviewOff = page.getByText(/Review Off/i);
192
+
193
+ if (await reviewOff.isVisible({ timeout: 2000 }).catch(() => false)) {
194
+ // Click to enable Review Mode
195
+ await reviewOff.click();
196
+
197
+ // Should now show "Review Mode" label (in filled state)
198
+ await expect(page.getByText(/Review Mode/i).first()).toBeVisible();
199
+
200
+ // Click to disable Review Mode
201
+ await page.getByText(/Review Mode/i).first().click();
202
+
203
+ // Should show "Review Off" again
204
+ await expect(page.getByText(/Review Off/i)).toBeVisible();
205
+ }
206
+ });
207
+ });
@@ -0,0 +1,226 @@
1
+ {
2
+ "original_segments": [
3
+ {
4
+ "id": "seg-1",
5
+ "text": "Hello, is it me you're looking for?",
6
+ "words": [
7
+ {"id": "w1", "text": "Hello,", "start_time": 0.0, "end_time": 0.5},
8
+ {"id": "w2", "text": "is", "start_time": 0.5, "end_time": 0.7},
9
+ {"id": "w3", "text": "it", "start_time": 0.7, "end_time": 0.9},
10
+ {"id": "w4", "text": "me", "start_time": 0.9, "end_time": 1.1},
11
+ {"id": "w5", "text": "you're", "start_time": 1.1, "end_time": 1.3},
12
+ {"id": "w6", "text": "looking", "start_time": 1.3, "end_time": 1.6},
13
+ {"id": "w7", "text": "for?", "start_time": 1.6, "end_time": 1.9}
14
+ ],
15
+ "start_time": 0.0,
16
+ "end_time": 1.9
17
+ },
18
+ {
19
+ "id": "seg-2",
20
+ "text": "I can see it in your eyes",
21
+ "words": [
22
+ {"id": "w8", "text": "I", "start_time": 2.0, "end_time": 2.2},
23
+ {"id": "w9", "text": "can", "start_time": 2.2, "end_time": 2.4},
24
+ {"id": "w10", "text": "see", "start_time": 2.4, "end_time": 2.6},
25
+ {"id": "w11", "text": "it", "start_time": 2.6, "end_time": 2.8},
26
+ {"id": "w12", "text": "in", "start_time": 2.8, "end_time": 3.0},
27
+ {"id": "w13", "text": "your", "start_time": 3.0, "end_time": 3.2},
28
+ {"id": "w14", "text": "eyes", "start_time": 3.2, "end_time": 3.5}
29
+ ],
30
+ "start_time": 2.0,
31
+ "end_time": 3.5
32
+ },
33
+ {
34
+ "id": "seg-3",
35
+ "text": "I can see it in your smile",
36
+ "words": [
37
+ {"id": "w15", "text": "I", "start_time": 4.0, "end_time": 4.2},
38
+ {"id": "w16", "text": "can", "start_time": 4.2, "end_time": 4.4},
39
+ {"id": "w17", "text": "see", "start_time": 4.4, "end_time": 4.6},
40
+ {"id": "w18", "text": "it", "start_time": 4.6, "end_time": 4.8},
41
+ {"id": "w19", "text": "in", "start_time": 4.8, "end_time": 5.0},
42
+ {"id": "w20", "text": "your", "start_time": 5.0, "end_time": 5.2},
43
+ {"id": "w21", "text": "smile", "start_time": 5.2, "end_time": 5.5}
44
+ ],
45
+ "start_time": 4.0,
46
+ "end_time": 5.5
47
+ }
48
+ ],
49
+ "reference_lyrics": {
50
+ "genius": {
51
+ "segments": [
52
+ {
53
+ "id": "ref-seg-1",
54
+ "text": "Hello, is it me you're looking for?",
55
+ "words": [
56
+ {"id": "rw1", "text": "Hello,", "start_time": null, "end_time": null},
57
+ {"id": "rw2", "text": "is", "start_time": null, "end_time": null},
58
+ {"id": "rw3", "text": "it", "start_time": null, "end_time": null},
59
+ {"id": "rw4", "text": "me", "start_time": null, "end_time": null},
60
+ {"id": "rw5", "text": "you're", "start_time": null, "end_time": null},
61
+ {"id": "rw6", "text": "looking", "start_time": null, "end_time": null},
62
+ {"id": "rw7", "text": "for?", "start_time": null, "end_time": null}
63
+ ],
64
+ "start_time": null,
65
+ "end_time": null
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "source": "genius",
70
+ "track_name": "Hello",
71
+ "artist_names": "Lionel Richie",
72
+ "album_name": "Can't Slow Down",
73
+ "duration_ms": 280000,
74
+ "explicit": false,
75
+ "language": "en",
76
+ "is_synced": false,
77
+ "lyrics_provider": "Genius",
78
+ "lyrics_provider_id": "12345",
79
+ "provider_metadata": {}
80
+ },
81
+ "source": "genius"
82
+ }
83
+ },
84
+ "anchor_sequences": [
85
+ {
86
+ "id": "anchor-1",
87
+ "transcribed_word_ids": ["w1", "w2", "w3", "w4"],
88
+ "transcription_position": 0,
89
+ "reference_positions": {"genius": 0},
90
+ "reference_word_ids": {"genius": ["rw1", "rw2", "rw3", "rw4"]},
91
+ "confidence": 0.95
92
+ }
93
+ ],
94
+ "gap_sequences": [
95
+ {
96
+ "id": "gap-1",
97
+ "transcribed_word_ids": ["w5", "w6", "w7"],
98
+ "transcription_position": 4,
99
+ "preceding_anchor_id": "anchor-1",
100
+ "following_anchor_id": null,
101
+ "reference_word_ids": {"genius": ["rw5", "rw6", "rw7"]}
102
+ }
103
+ ],
104
+ "resized_segments": [],
105
+ "corrections_made": 3,
106
+ "confidence": 0.85,
107
+ "corrections": [
108
+ {
109
+ "id": "corr-1",
110
+ "handler": "AgenticCorrector",
111
+ "original_word": "you're",
112
+ "corrected_word": "now",
113
+ "segment_id": "seg-1",
114
+ "word_id": "w5",
115
+ "corrected_word_id": "cw5",
116
+ "source": "genius",
117
+ "confidence": 0.92,
118
+ "reason": "Transcription error - misheard word, reference lyrics confirm it should be 'now' [SOUND_ALIKE] (confidence: 92%)",
119
+ "alternatives": {"your": 0.65},
120
+ "is_deletion": false,
121
+ "split_index": null,
122
+ "split_total": null,
123
+ "reference_positions": {"genius": 4},
124
+ "length": 1
125
+ },
126
+ {
127
+ "id": "corr-2",
128
+ "handler": "AgenticCorrector",
129
+ "original_word": "it",
130
+ "corrected_word": "",
131
+ "segment_id": "seg-2",
132
+ "word_id": "w11",
133
+ "corrected_word_id": null,
134
+ "source": "genius",
135
+ "confidence": 0.88,
136
+ "reason": "Extra filler word detected, not in reference lyrics [EXTRA_WORDS] (confidence: 88%)",
137
+ "alternatives": {},
138
+ "is_deletion": true,
139
+ "split_index": null,
140
+ "split_total": null,
141
+ "reference_positions": {},
142
+ "length": 1
143
+ },
144
+ {
145
+ "id": "corr-3",
146
+ "handler": "AgenticCorrector",
147
+ "original_word": "I",
148
+ "corrected_word": "",
149
+ "segment_id": "seg-3",
150
+ "word_id": "w15",
151
+ "corrected_word_id": null,
152
+ "source": "genius",
153
+ "confidence": 0.75,
154
+ "reason": "Extra word at start of line, not in reference [EXTRA_WORDS] (confidence: 75%)",
155
+ "alternatives": {},
156
+ "is_deletion": true,
157
+ "split_index": null,
158
+ "split_total": null,
159
+ "reference_positions": {},
160
+ "length": 1
161
+ }
162
+ ],
163
+ "corrected_segments": [
164
+ {
165
+ "id": "seg-1",
166
+ "text": "Hello, is it me you're looking for?",
167
+ "words": [
168
+ {"id": "w1", "text": "Hello,", "start_time": 0.0, "end_time": 0.5},
169
+ {"id": "w2", "text": "is", "start_time": 0.5, "end_time": 0.7},
170
+ {"id": "w3", "text": "it", "start_time": 0.7, "end_time": 0.9},
171
+ {"id": "w4", "text": "me", "start_time": 0.9, "end_time": 1.1},
172
+ {"id": "cw5", "text": "now", "start_time": 1.1, "end_time": 1.3, "created_during_correction": true},
173
+ {"id": "w6", "text": "looking", "start_time": 1.3, "end_time": 1.6},
174
+ {"id": "w7", "text": "for?", "start_time": 1.6, "end_time": 1.9}
175
+ ],
176
+ "start_time": 0.0,
177
+ "end_time": 1.9
178
+ },
179
+ {
180
+ "id": "seg-2",
181
+ "text": "I can see in your eyes",
182
+ "words": [
183
+ {"id": "w8", "text": "I", "start_time": 2.0, "end_time": 2.2},
184
+ {"id": "w9", "text": "can", "start_time": 2.2, "end_time": 2.4},
185
+ {"id": "w10", "text": "see", "start_time": 2.4, "end_time": 2.6},
186
+ {"id": "w12", "text": "in", "start_time": 2.8, "end_time": 3.0},
187
+ {"id": "w13", "text": "your", "start_time": 3.0, "end_time": 3.2},
188
+ {"id": "w14", "text": "eyes", "start_time": 3.2, "end_time": 3.5}
189
+ ],
190
+ "start_time": 2.0,
191
+ "end_time": 3.5
192
+ },
193
+ {
194
+ "id": "seg-3",
195
+ "text": "can see it in your smile",
196
+ "words": [
197
+ {"id": "w16", "text": "can", "start_time": 4.2, "end_time": 4.4},
198
+ {"id": "w17", "text": "see", "start_time": 4.4, "end_time": 4.6},
199
+ {"id": "w18", "text": "it", "start_time": 4.6, "end_time": 4.8},
200
+ {"id": "w19", "text": "in", "start_time": 4.8, "end_time": 5.0},
201
+ {"id": "w20", "text": "your", "start_time": 5.0, "end_time": 5.2},
202
+ {"id": "w21", "text": "smile", "start_time": 5.2, "end_time": 5.5}
203
+ ],
204
+ "start_time": 4.0,
205
+ "end_time": 5.5
206
+ }
207
+ ],
208
+ "metadata": {
209
+ "anchor_sequences_count": 1,
210
+ "gap_sequences_count": 1,
211
+ "total_words": 21,
212
+ "correction_ratio": 0.143,
213
+ "audio_filepath": "/path/to/test-audio.mp3",
214
+ "audio_hash": "test-audio-hash-123",
215
+ "available_handlers": [
216
+ {"id": "agentic", "name": "AgenticCorrector", "description": "AI-powered agentic correction", "enabled": true},
217
+ {"id": "sound_alike", "name": "SoundAlikeHandler", "description": "Fixes sound-alike errors", "enabled": true}
218
+ ],
219
+ "enabled_handlers": ["AgenticCorrector", "SoundAlikeHandler"]
220
+ },
221
+ "correction_steps": [],
222
+ "word_id_map": {
223
+ "w5": "cw5"
224
+ },
225
+ "segment_id_map": {}
226
+ }
@@ -11,7 +11,10 @@
11
11
  "lint": "eslint .",
12
12
  "preview": "vite preview",
13
13
  "predeploy": "npm run build-prod",
14
- "deploy": "gh-pages -d dist"
14
+ "deploy": "gh-pages -d dist",
15
+ "test": "playwright test",
16
+ "test:ui": "playwright test --ui",
17
+ "test:headed": "playwright test --headed"
15
18
  },
16
19
  "dependencies": {
17
20
  "@emotion/react": "^11.14.0",
@@ -61,7 +61,7 @@ export default defineConfig({
61
61
 
62
62
  // Run local dev server before starting tests
63
63
  webServer: {
64
- command: 'yarn dev',
64
+ command: 'npm run dev',
65
65
  url: 'http://localhost:5173',
66
66
  reuseExistingServer: !process.env.CI,
67
67
  timeout: 120000,