karaoke-gen 0.75.53__py3-none-any.whl → 0.81.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. karaoke_gen/audio_fetcher.py +218 -0
  2. karaoke_gen/instrumental_review/static/index.html +179 -16
  3. karaoke_gen/karaoke_gen.py +191 -25
  4. karaoke_gen/lyrics_processor.py +39 -31
  5. karaoke_gen/utils/__init__.py +26 -0
  6. karaoke_gen/utils/cli_args.py +9 -1
  7. karaoke_gen/utils/gen_cli.py +1 -1
  8. karaoke_gen/utils/remote_cli.py +33 -6
  9. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/METADATA +80 -4
  10. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/RECORD +50 -43
  11. lyrics_transcriber/core/config.py +8 -0
  12. lyrics_transcriber/core/controller.py +43 -1
  13. lyrics_transcriber/correction/agentic/providers/config.py +6 -0
  14. lyrics_transcriber/correction/agentic/providers/model_factory.py +24 -1
  15. lyrics_transcriber/correction/agentic/router.py +17 -13
  16. lyrics_transcriber/frontend/.gitignore +1 -0
  17. lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts +207 -0
  18. lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json +226 -0
  19. lyrics_transcriber/frontend/index.html +5 -1
  20. lyrics_transcriber/frontend/package-lock.json +4553 -0
  21. lyrics_transcriber/frontend/package.json +7 -1
  22. lyrics_transcriber/frontend/playwright.config.ts +69 -0
  23. lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
  24. lyrics_transcriber/frontend/src/App.tsx +88 -59
  25. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
  26. lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
  27. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +39 -35
  28. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
  30. lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
  31. lyrics_transcriber/frontend/src/components/Header.tsx +96 -3
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +120 -3
  33. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  35. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +12 -2
  36. lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
  37. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +122 -35
  38. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
  39. lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
  40. lyrics_transcriber/frontend/src/components/shared/types.ts +6 -0
  41. lyrics_transcriber/frontend/src/main.tsx +1 -7
  42. lyrics_transcriber/frontend/src/theme.ts +337 -135
  43. lyrics_transcriber/frontend/vite.config.ts +5 -0
  44. lyrics_transcriber/frontend/yarn.lock +1005 -1046
  45. lyrics_transcriber/output/generator.py +50 -3
  46. lyrics_transcriber/review/server.py +1 -1
  47. lyrics_transcriber/transcribers/local_whisper.py +260 -0
  48. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/WHEEL +0 -0
  49. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/entry_points.txt +0 -0
  50. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- import { Typography, Box } from '@mui/material'
1
+ import { Typography, Box, useMediaQuery, useTheme } from '@mui/material'
2
2
  import { WordComponent } from './Word'
3
3
  import { useWordClick } from '../hooks/useWordClick'
4
4
  import {
@@ -16,6 +16,7 @@ import React from 'react'
16
16
  import ContentCopyIcon from '@mui/icons-material/ContentCopy'
17
17
  import IconButton from '@mui/material/IconButton'
18
18
  import { getWordsFromIds } from '../utils/wordUtils'
19
+ import CorrectedWordWithActions from '../../CorrectedWordWithActions'
19
20
 
20
21
  export interface HighlightedTextProps {
21
22
  text?: string
@@ -36,6 +37,12 @@ export interface HighlightedTextProps {
36
37
  gaps?: GapSequence[]
37
38
  flashingHandler?: string | null
38
39
  corrections?: WordCorrection[]
40
+ // Review mode props for agentic corrections
41
+ reviewMode?: boolean
42
+ onRevertCorrection?: (wordId: string) => void
43
+ onEditCorrection?: (wordId: string) => void
44
+ onAcceptCorrection?: (wordId: string) => void
45
+ onShowCorrectionDetail?: (wordId: string) => void
39
46
  }
40
47
 
41
48
  export function HighlightedText({
@@ -57,7 +64,15 @@ export function HighlightedText({
57
64
  gaps = [],
58
65
  flashingHandler,
59
66
  corrections = [],
67
+ reviewMode = false,
68
+ onRevertCorrection,
69
+ onEditCorrection,
70
+ onAcceptCorrection,
71
+ onShowCorrectionDetail,
60
72
  }: HighlightedTextProps) {
73
+ const theme = useTheme()
74
+ const isMobile = useMediaQuery(theme.breakpoints.down('sm'))
75
+
61
76
  const { handleWordClick } = useWordClick({
62
77
  mode,
63
78
  onElementClick,
@@ -157,43 +172,83 @@ export function HighlightedText({
157
172
 
158
173
  const renderContent = () => {
159
174
  if (wordPositions && !segments) {
160
- return wordPositions.map((wordPos, index) => (
161
- <React.Fragment key={wordPos.word.id}>
162
- <WordComponent
163
- key={`${wordPos.word.id}-${index}`}
164
- word={wordPos.word.text}
165
- shouldFlash={shouldWordFlash(wordPos)}
166
- isAnchor={wordPos.type === 'anchor'}
167
- isCorrectedGap={wordPos.isCorrected}
168
- isUncorrectedGap={wordPos.type === 'gap' && !wordPos.isCorrected}
169
- isCurrentlyPlaying={shouldHighlightWord(wordPos)}
170
- onClick={() => handleWordClick(
171
- wordPos.word.text,
172
- wordPos.word.id,
173
- wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
174
- wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
175
- )}
176
- correction={(() => {
177
- const correction = corrections?.find(c =>
178
- c.corrected_word_id === wordPos.word.id ||
179
- c.word_id === wordPos.word.id
180
- );
181
- return correction ? {
175
+ return wordPositions.map((wordPos, index) => {
176
+ // Find correction for this word
177
+ const correction = corrections?.find(c =>
178
+ c.corrected_word_id === wordPos.word.id ||
179
+ c.word_id === wordPos.word.id
180
+ );
181
+
182
+ // Use CorrectedWordWithActions for agentic corrections
183
+ if (correction && correction.handler === 'AgenticCorrector') {
184
+ return (
185
+ <React.Fragment key={wordPos.word.id}>
186
+ <CorrectedWordWithActions
187
+ word={wordPos.word.text}
188
+ originalWord={correction.original_word}
189
+ correction={{
190
+ originalWord: correction.original_word,
191
+ handler: correction.handler,
192
+ confidence: correction.confidence,
193
+ source: correction.source,
194
+ reason: correction.reason
195
+ }}
196
+ shouldFlash={shouldWordFlash(wordPos)}
197
+ showActions={reviewMode && !isMobile}
198
+ onRevert={() => onRevertCorrection?.(wordPos.word.id)}
199
+ onEdit={() => onEditCorrection?.(wordPos.word.id)}
200
+ onAccept={() => onAcceptCorrection?.(wordPos.word.id)}
201
+ onClick={() => {
202
+ if (isMobile) {
203
+ onShowCorrectionDetail?.(wordPos.word.id)
204
+ } else {
205
+ handleWordClick(
206
+ wordPos.word.text,
207
+ wordPos.word.id,
208
+ wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
209
+ wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
210
+ )
211
+ }
212
+ }}
213
+ />
214
+ {index < wordPositions.length - 1 && ' '}
215
+ </React.Fragment>
216
+ );
217
+ }
218
+
219
+ // Default rendering with WordComponent
220
+ return (
221
+ <React.Fragment key={wordPos.word.id}>
222
+ <WordComponent
223
+ key={`${wordPos.word.id}-${index}`}
224
+ word={wordPos.word.text}
225
+ shouldFlash={shouldWordFlash(wordPos)}
226
+ isAnchor={wordPos.type === 'anchor'}
227
+ isCorrectedGap={wordPos.isCorrected}
228
+ isUncorrectedGap={wordPos.type === 'gap' && !wordPos.isCorrected}
229
+ isCurrentlyPlaying={shouldHighlightWord(wordPos)}
230
+ onClick={() => handleWordClick(
231
+ wordPos.word.text,
232
+ wordPos.word.id,
233
+ wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
234
+ wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
235
+ )}
236
+ correction={correction ? {
182
237
  originalWord: correction.original_word,
183
238
  handler: correction.handler,
184
239
  confidence: correction.confidence,
185
240
  source: correction.source,
186
241
  reason: correction.reason
187
- } : null;
188
- })()}
189
- />
190
- {index < wordPositions.length - 1 && ' '}
191
- </React.Fragment>
192
- ))
242
+ } : null}
243
+ />
244
+ {index < wordPositions.length - 1 && ' '}
245
+ </React.Fragment>
246
+ );
247
+ })
193
248
  } else if (segments) {
194
249
  return segments.map((segment) => (
195
- <Box key={segment.id} sx={{
196
- display: 'flex',
250
+ <Box key={segment.id} sx={{
251
+ display: 'flex',
197
252
  alignItems: 'flex-start',
198
253
  mb: 0
199
254
  }}>
@@ -212,12 +267,44 @@ export function HighlightedText({
212
267
 
213
268
  const sequence = wordPos?.type === 'gap' ? wordPos.sequence as GapSequence : undefined;
214
269
 
215
- // Find correction information for the tooltip
216
- const correction = corrections?.find(c =>
217
- c.corrected_word_id === word.id ||
270
+ // Find correction information
271
+ const correction = corrections?.find(c =>
272
+ c.corrected_word_id === word.id ||
218
273
  c.word_id === word.id
219
274
  );
220
-
275
+
276
+ // Use CorrectedWordWithActions for agentic corrections
277
+ if (correction && correction.handler === 'AgenticCorrector') {
278
+ return (
279
+ <React.Fragment key={word.id}>
280
+ <CorrectedWordWithActions
281
+ word={word.text}
282
+ originalWord={correction.original_word}
283
+ correction={{
284
+ originalWord: correction.original_word,
285
+ handler: correction.handler,
286
+ confidence: correction.confidence,
287
+ source: correction.source,
288
+ reason: correction.reason
289
+ }}
290
+ shouldFlash={shouldWordFlash(wordPos || { word: word.text, id: word.id })}
291
+ showActions={reviewMode && !isMobile}
292
+ onRevert={() => onRevertCorrection?.(word.id)}
293
+ onEdit={() => onEditCorrection?.(word.id)}
294
+ onAccept={() => onAcceptCorrection?.(word.id)}
295
+ onClick={() => {
296
+ if (isMobile) {
297
+ onShowCorrectionDetail?.(word.id)
298
+ } else {
299
+ handleWordClick(word.text, word.id, anchor, sequence)
300
+ }
301
+ }}
302
+ />
303
+ {wordIndex < segment.words.length - 1 && ' '}
304
+ </React.Fragment>
305
+ );
306
+ }
307
+
221
308
  const correctionInfo = correction ? {
222
309
  originalWord: correction.original_word,
223
310
  handler: correction.handler,
@@ -41,14 +41,14 @@ export const WordComponent = React.memo(function Word({
41
41
  borderRadius: '2px',
42
42
  color: isCurrentlyPlaying ? '#ffffff' : 'inherit',
43
43
  textDecoration: correction ? 'underline dotted' : 'none',
44
- textDecorationColor: correction ? '#666' : 'inherit',
44
+ textDecorationColor: correction ? '#666666' : 'inherit', // slate-500 for dark mode
45
45
  textUnderlineOffset: '2px',
46
46
  fontSize: '0.85rem',
47
47
  lineHeight: 1.2
48
48
  }}
49
49
  sx={{
50
50
  '&:hover': {
51
- backgroundColor: '#e0e0e0'
51
+ backgroundColor: 'rgba(248, 250, 252, 0.08)' // slate-50 hover for dark mode
52
52
  }
53
53
  }}
54
54
  onClick={onClick}
@@ -1,11 +1,21 @@
1
1
  import { keyframes } from '@mui/system'
2
2
 
3
+ // Dark theme colors matching karaoke-gen globals.css
3
4
  export const COLORS = {
4
- anchor: '#e3f2fd', // Pale blue
5
- corrected: '#e8f5e9', // Pale green
6
- uncorrectedGap: '#fff3e0', // Pale orange
7
- highlighted: '#ffeb3b', // or any color you prefer for highlighting
8
- playing: '#1976d2', // Blue
5
+ anchor: 'rgba(59, 130, 246, 0.25)', // Blue tint for dark mode
6
+ corrected: 'rgba(34, 197, 94, 0.25)', // Green tint for dark mode
7
+ uncorrectedGap: 'rgba(249, 115, 22, 0.25)', // Orange tint for dark mode
8
+ highlighted: 'rgba(251, 191, 36, 0.4)', // Amber highlight for dark mode
9
+ playing: '#3b82f6', // Blue-500
10
+ // Text colors (matching karaoke-gen)
11
+ textPrimary: '#e5e5e5', // matches karaoke-gen --text
12
+ textSecondary: '#888888', // matches karaoke-gen --text-muted
13
+ textMuted: '#666666',
14
+ // Background colors (matching karaoke-gen globals.css)
15
+ background: '#0f0f0f', // matches karaoke-gen --bg
16
+ backgroundPaper: '#1a1a1a', // matches karaoke-gen --card
17
+ backgroundElevated: '#252525', // matches karaoke-gen --secondary
18
+ border: '#2a2a2a', // matches karaoke-gen --card-border
9
19
  } as const
10
20
 
11
21
  export const flashAnimation = keyframes`
@@ -85,6 +85,12 @@ export interface TranscriptionViewProps {
85
85
  anchors?: AnchorSequence[]
86
86
  flashingHandler?: string | null
87
87
  onDataChange?: (updatedData: CorrectionData) => void
88
+ // Review mode props for agentic corrections
89
+ reviewMode?: boolean
90
+ onRevertCorrection?: (wordId: string) => void
91
+ onEditCorrection?: (wordId: string) => void
92
+ onAcceptCorrection?: (wordId: string) => void
93
+ onShowCorrectionDetail?: (wordId: string) => void
88
94
  }
89
95
 
90
96
  // Add LinePosition type here since it's used in multiple places
@@ -1,8 +1,5 @@
1
1
  import ReactDOM from 'react-dom/client'
2
- import { ThemeProvider } from '@mui/material/styles'
3
- import CssBaseline from '@mui/material/CssBaseline'
4
2
  import App from './App'
5
- import theme from './theme'
6
3
  // Import version from package.json
7
4
  import packageJson from '../package.json'
8
5
 
@@ -10,8 +7,5 @@ import packageJson from '../package.json'
10
7
  console.log(`🎵 Lyrics Transcriber Frontend v${packageJson.version}`)
11
8
 
12
9
  ReactDOM.createRoot(document.getElementById('root')!).render(
13
- <ThemeProvider theme={theme}>
14
- <CssBaseline />
15
- <App />
16
- </ThemeProvider>
10
+ <App />
17
11
  )