lyrics-transcriber 0.34.0__py3-none-any.whl → 0.34.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lyrics_transcriber/correction/handlers/syllables_match.py +22 -2
  2. lyrics_transcriber/frontend/.gitignore +23 -0
  3. lyrics_transcriber/frontend/README.md +50 -0
  4. lyrics_transcriber/frontend/dist/assets/index-DqFgiUni.js +245 -0
  5. lyrics_transcriber/frontend/dist/index.html +13 -0
  6. lyrics_transcriber/frontend/dist/vite.svg +1 -0
  7. lyrics_transcriber/frontend/eslint.config.js +28 -0
  8. lyrics_transcriber/frontend/index.html +13 -0
  9. lyrics_transcriber/frontend/package-lock.json +4260 -0
  10. lyrics_transcriber/frontend/package.json +37 -0
  11. lyrics_transcriber/frontend/public/vite.svg +1 -0
  12. lyrics_transcriber/frontend/src/App.tsx +192 -0
  13. lyrics_transcriber/frontend/src/api.ts +59 -0
  14. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +155 -0
  15. lyrics_transcriber/frontend/src/components/DebugPanel.tsx +311 -0
  16. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +297 -0
  17. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  18. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +450 -0
  19. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +287 -0
  20. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +157 -0
  21. lyrics_transcriber/frontend/src/components/constants.ts +19 -0
  22. lyrics_transcriber/frontend/src/components/styles.ts +13 -0
  23. lyrics_transcriber/frontend/src/main.tsx +6 -0
  24. lyrics_transcriber/frontend/src/types.ts +158 -0
  25. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  26. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  27. lyrics_transcriber/frontend/tsconfig.json +25 -0
  28. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  29. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  30. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  31. lyrics_transcriber/frontend/vite.config.js +6 -0
  32. lyrics_transcriber/frontend/vite.config.ts +7 -0
  33. lyrics_transcriber/review/server.py +18 -29
  34. {lyrics_transcriber-0.34.0.dist-info → lyrics_transcriber-0.34.2.dist-info}/METADATA +1 -1
  35. {lyrics_transcriber-0.34.0.dist-info → lyrics_transcriber-0.34.2.dist-info}/RECORD +38 -7
  36. {lyrics_transcriber-0.34.0.dist-info → lyrics_transcriber-0.34.2.dist-info}/LICENSE +0 -0
  37. {lyrics_transcriber-0.34.0.dist-info → lyrics_transcriber-0.34.2.dist-info}/WHEEL +0 -0
  38. {lyrics_transcriber-0.34.0.dist-info → lyrics_transcriber-0.34.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,450 @@
1
+ import LockIcon from '@mui/icons-material/Lock'
2
+ import UploadFileIcon from '@mui/icons-material/UploadFile'
3
+ import { Box, Button, Grid, Typography, useMediaQuery, useTheme } from '@mui/material'
4
+ import { useCallback, useState } from 'react'
5
+ import { ApiClient } from '../api'
6
+ import { CorrectionData, LyricsData, HighlightInfo, AnchorMatchInfo, GapSequence, AnchorSequence, LyricsSegment, WordCorrection } from '../types'
7
+ import CorrectionMetrics from './CorrectionMetrics'
8
+ import DetailsModal from './DetailsModal'
9
+ import ReferenceView from './ReferenceView'
10
+ import TranscriptionView from './TranscriptionView'
11
+ import DebugPanel from './DebugPanel'
12
+
13
+ interface WordClickInfo {
14
+ wordIndex: number
15
+ type: 'anchor' | 'gap' | 'other'
16
+ anchor?: AnchorSequence
17
+ gap?: GapSequence
18
+ }
19
+
20
+ interface LyricsAnalyzerProps {
21
+ data: CorrectionData
22
+ onFileLoad: () => void
23
+ onShowMetadata: () => void
24
+ apiClient: ApiClient | null
25
+ isReadOnly: boolean
26
+ }
27
+
28
+ export type ModalContent = {
29
+ type: 'anchor'
30
+ data: LyricsData['anchor_sequences'][0] & {
31
+ position: number
32
+ }
33
+ } | {
34
+ type: 'gap'
35
+ data: LyricsData['gap_sequences'][0] & {
36
+ position: number
37
+ word: string
38
+ }
39
+ }
40
+
41
+ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
42
+
43
+ function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
44
+ // Create a deep clone to avoid modifying the original
45
+ const normalized = JSON.parse(JSON.stringify(data))
46
+
47
+ // Preserve floating point numbers with original precision
48
+ const preserveFloats = (obj: Record<string, unknown>): void => {
49
+ for (const key in obj) {
50
+ const value = obj[key]
51
+ if (typeof value === 'number') {
52
+ // Handle integers and floats differently
53
+ let formatted: string
54
+ if (Number.isInteger(value)) {
55
+ formatted = value.toFixed(1) // Force decimal point for integers
56
+ } else {
57
+ formatted = value.toString() // Keep original precision for floats
58
+ }
59
+ obj[key] = parseFloat(formatted)
60
+ } else if (typeof value === 'object' && value !== null) {
61
+ preserveFloats(value as Record<string, unknown>)
62
+ }
63
+ }
64
+ }
65
+ preserveFloats(normalized)
66
+ return normalized
67
+ }
68
+
69
+ export default function LyricsAnalyzer({ data: initialData, onFileLoad, apiClient, isReadOnly }: LyricsAnalyzerProps) {
70
+ const [modalContent, setModalContent] = useState<ModalContent | null>(null)
71
+ const [flashingType, setFlashingType] = useState<FlashType>(null)
72
+ const [highlightInfo, setHighlightInfo] = useState<HighlightInfo | null>(null)
73
+ const [currentSource, setCurrentSource] = useState<'genius' | 'spotify'>('genius')
74
+ const [anchorMatchInfo, setAnchorMatchInfo] = useState<AnchorMatchInfo[]>([])
75
+ const [manualCorrections, setManualCorrections] = useState<Map<number, string[]>>(new Map())
76
+ const [isReviewComplete, setIsReviewComplete] = useState(false)
77
+ const [data, setData] = useState(initialData)
78
+ const theme = useTheme()
79
+ const isMobile = useMediaQuery(theme.breakpoints.down('md'))
80
+
81
+ const handleFlash = useCallback((type: FlashType, info?: HighlightInfo) => {
82
+ setFlashingType(null)
83
+ setHighlightInfo(null)
84
+
85
+ requestAnimationFrame(() => {
86
+ requestAnimationFrame(() => {
87
+ setFlashingType(type)
88
+ if (info) {
89
+ setHighlightInfo(info)
90
+ }
91
+ setTimeout(() => {
92
+ setFlashingType(null)
93
+ setHighlightInfo(null)
94
+ }, 1200)
95
+ })
96
+ })
97
+ }, [])
98
+
99
+ const handleWordClick = useCallback((info: WordClickInfo) => {
100
+ console.group('Word Click Debug Info')
101
+ console.log('Clicked word info:', JSON.stringify(info, null, 2))
102
+
103
+ if (info.type === 'gap' && info.gap) {
104
+ console.log('Gap sequence:', JSON.stringify(info.gap, null, 2))
105
+ const modalData = {
106
+ type: 'gap' as const,
107
+ data: {
108
+ ...info.gap,
109
+ position: info.gap.transcription_position + (info.wordIndex - info.gap.transcription_position),
110
+ word: info.gap.words[info.wordIndex - info.gap.transcription_position]
111
+ }
112
+ }
113
+ setModalContent(modalData)
114
+ console.log('Set modal content:', JSON.stringify(modalData, null, 2))
115
+ }
116
+
117
+ console.groupEnd()
118
+ }, [])
119
+
120
+ const handleUpdateCorrection = useCallback((position: number, updatedWords: string[]) => {
121
+ console.group('handleUpdateCorrection Debug')
122
+ console.log('Position:', position)
123
+ console.log('Updated words:', updatedWords)
124
+
125
+ // Create a deep clone of the data
126
+ const newData = JSON.parse(JSON.stringify(data))
127
+
128
+ // Find the gap that contains this position
129
+ const gapIndex = newData.gap_sequences.findIndex(
130
+ (gap: GapSequence) =>
131
+ position >= gap.transcription_position &&
132
+ position < gap.transcription_position + gap.words.length
133
+ )
134
+
135
+ if (gapIndex !== -1) {
136
+ const originalGap = newData.gap_sequences[gapIndex]
137
+ const wordIndexInGap = position - originalGap.transcription_position
138
+ console.log('Found gap at index:', gapIndex, 'word index in gap:', wordIndexInGap)
139
+
140
+ // Update manual corrections
141
+ setManualCorrections(prev => {
142
+ const newCorrections = new Map(prev)
143
+ newCorrections.set(position, updatedWords)
144
+ return newCorrections
145
+ })
146
+
147
+ // Create a new correction
148
+ const newCorrection: WordCorrection = {
149
+ original_word: originalGap.words[wordIndexInGap],
150
+ corrected_word: updatedWords.join(' '),
151
+ segment_index: 0,
152
+ original_position: position,
153
+ source: 'manual',
154
+ confidence: 1.0,
155
+ reason: 'Manual correction during review',
156
+ alternatives: {},
157
+ is_deletion: false,
158
+ length: updatedWords.length,
159
+ reference_positions: {}
160
+ }
161
+
162
+ // Find the corresponding segment by counting words
163
+ let currentPosition = 0
164
+ let segmentIndex = -1
165
+ let wordIndex = -1
166
+
167
+ for (let i = 0; i < newData.corrected_segments.length; i++) {
168
+ const segment = newData.corrected_segments[i]
169
+ if (position >= currentPosition && position < currentPosition + segment.words.length) {
170
+ segmentIndex = i
171
+ wordIndex = position - currentPosition
172
+ break
173
+ }
174
+ currentPosition += segment.words.length
175
+ }
176
+
177
+ console.log('Segment search:', {
178
+ position,
179
+ segmentIndex,
180
+ wordIndex,
181
+ totalSegments: newData.corrected_segments.length
182
+ })
183
+
184
+ if (segmentIndex !== -1 && wordIndex !== -1) {
185
+ const segment = newData.corrected_segments[segmentIndex]
186
+ const timingWord = segment.words[wordIndex]
187
+
188
+ console.log('Found matching segment:', {
189
+ text: segment.text,
190
+ wordCount: segment.words.length,
191
+ wordIndex,
192
+ word: timingWord?.text
193
+ })
194
+
195
+ if (!timingWord) {
196
+ console.error('Could not find timing word in segment')
197
+ console.groupEnd()
198
+ return
199
+ }
200
+
201
+ // Update gap sequence
202
+ const newWords = [...originalGap.words]
203
+ newWords[wordIndexInGap] = updatedWords[0]
204
+ newData.gap_sequences[gapIndex] = {
205
+ ...originalGap,
206
+ words: newWords,
207
+ text: newWords.join(' '),
208
+ corrections: originalGap.corrections
209
+ .filter((c: WordCorrection) => c.source !== 'manual')
210
+ .concat([newCorrection])
211
+ }
212
+
213
+ // Update segment
214
+ const newSegmentWords = [...segment.words]
215
+ newSegmentWords[wordIndex] = {
216
+ ...timingWord,
217
+ text: updatedWords[0],
218
+ confidence: 1.0
219
+ }
220
+
221
+ newData.corrected_segments[segmentIndex] = {
222
+ ...segment,
223
+ words: newSegmentWords,
224
+ text: newSegmentWords.map(word => word.text).join(' ')
225
+ }
226
+
227
+ console.log('Updated both gap and segment')
228
+ } else {
229
+ console.error('Could not find matching segment for position:', position)
230
+ }
231
+ }
232
+
233
+ // Update the corrected_text field
234
+ newData.corrected_text = newData.corrected_segments
235
+ .map((segment: LyricsSegment) => segment.text)
236
+ .join('\n')
237
+
238
+ setData(newData)
239
+ console.groupEnd()
240
+ }, [data])
241
+
242
+ const handleFinishReview = useCallback(async () => {
243
+ if (!apiClient) return
244
+
245
+ let dataToSubmit: CorrectionData
246
+ if (manualCorrections.size > 0) {
247
+ console.log('Manual corrections found:', Array.from(manualCorrections.entries()))
248
+
249
+ // Only proceed with data modifications if there were manual corrections
250
+ const updatedData = JSON.parse(JSON.stringify(data))
251
+ console.log('Deep cloned data:', JSON.stringify(updatedData, null, 2))
252
+
253
+ // Only update the specific gaps that were manually corrected
254
+ updatedData.gap_sequences = updatedData.gap_sequences.map((gap: GapSequence) => {
255
+ const manualUpdate = manualCorrections.get(gap.transcription_position)
256
+ if (manualUpdate) {
257
+ return {
258
+ ...gap,
259
+ words: manualUpdate,
260
+ text: manualUpdate.join(' '),
261
+ corrections: [
262
+ ...gap.corrections,
263
+ {
264
+ original_word: gap.text,
265
+ corrected_word: manualUpdate.join(' '),
266
+ segment_index: 0,
267
+ original_position: gap.transcription_position,
268
+ source: 'manual',
269
+ confidence: 1.0,
270
+ reason: 'Manual correction during review',
271
+ alternatives: {},
272
+ is_deletion: false,
273
+ length: manualUpdate.length,
274
+ reference_positions: {}
275
+ }
276
+ ]
277
+ }
278
+ }
279
+ return gap
280
+ })
281
+
282
+ // Preserve original newline formatting in corrected_text
283
+ if (manualCorrections.size > 0) {
284
+ const lines: string[] = updatedData.corrected_text.split('\n')
285
+ let currentPosition = 0
286
+ const updatedLines = lines.map((line: string) => {
287
+ const words = line.trim().split(/\s+/)
288
+ const lineLength = words.length
289
+
290
+ // Check if this line contains any corrections
291
+ let lineUpdated = false
292
+ for (const [position, updatedWords] of manualCorrections.entries()) {
293
+ if (position >= currentPosition && position < currentPosition + lineLength) {
294
+ const gapPosition = position - currentPosition
295
+ const gap = updatedData.gap_sequences.find((g: GapSequence) =>
296
+ g.transcription_position === position
297
+ )
298
+ if (gap) {
299
+ words.splice(gapPosition, gap.length, ...updatedWords)
300
+ lineUpdated = true
301
+ }
302
+ }
303
+ }
304
+ currentPosition += lineLength
305
+ return lineUpdated ? words.join(' ') : line
306
+ })
307
+ updatedData.corrected_text = updatedLines.join('\n')
308
+ }
309
+
310
+ dataToSubmit = normalizeDataForSubmission(updatedData)
311
+ console.log('Submitting data with manual corrections:', dataToSubmit)
312
+ } else {
313
+ console.log('Original data:', initialData)
314
+ console.log('No manual corrections, submitting original data')
315
+ dataToSubmit = normalizeDataForSubmission(initialData)
316
+ }
317
+
318
+ console.log('Data being sent to API:', dataToSubmit)
319
+ await apiClient.submitCorrections(dataToSubmit)
320
+ setIsReviewComplete(true)
321
+ // eslint-disable-next-line react-hooks/exhaustive-deps
322
+ }, [apiClient, initialData, manualCorrections])
323
+
324
+ return (
325
+ <Box>
326
+ {isReadOnly && (
327
+ <Box sx={{ display: 'flex', alignItems: 'center', mb: 2, color: 'text.secondary' }}>
328
+ <LockIcon sx={{ mr: 1 }} />
329
+ <Typography variant="body2">
330
+ View Only Mode
331
+ </Typography>
332
+ </Box>
333
+ )}
334
+ <Box sx={{
335
+ display: 'flex',
336
+ flexDirection: isMobile ? 'column' : 'row',
337
+ gap: 2,
338
+ justifyContent: 'space-between',
339
+ alignItems: isMobile ? 'stretch' : 'center',
340
+ mb: 3
341
+ }}>
342
+ <Typography variant="h4" sx={{ fontSize: isMobile ? '1.75rem' : '2.125rem' }}>
343
+ Lyrics Correction Review
344
+ </Typography>
345
+ {isReadOnly && (
346
+ <Button
347
+ variant="outlined"
348
+ startIcon={<UploadFileIcon />}
349
+ onClick={onFileLoad}
350
+ fullWidth={isMobile}
351
+ >
352
+ Load File
353
+ </Button>
354
+ )}
355
+ </Box>
356
+
357
+ <Box sx={{ mb: 3 }}>
358
+ <CorrectionMetrics
359
+ // Anchor metrics
360
+ anchorCount={data.metadata.anchor_sequences_count}
361
+ multiSourceAnchors={data.anchor_sequences.filter(anchor =>
362
+ Object.keys(anchor.reference_positions).length > 1).length}
363
+ singleSourceMatches={{
364
+ spotify: data.anchor_sequences.filter(anchor =>
365
+ Object.keys(anchor.reference_positions).length === 1 &&
366
+ 'spotify' in anchor.reference_positions).length,
367
+ genius: data.anchor_sequences.filter(anchor =>
368
+ Object.keys(anchor.reference_positions).length === 1 &&
369
+ 'genius' in anchor.reference_positions).length
370
+ }}
371
+ // Gap metrics
372
+ correctedGapCount={data.gap_sequences.filter(gap =>
373
+ gap.corrections?.length > 0).length}
374
+ uncorrectedGapCount={data.gap_sequences.filter(gap =>
375
+ !gap.corrections?.length).length}
376
+ uncorrectedGaps={data.gap_sequences
377
+ .filter(gap => !gap.corrections?.length)
378
+ .map(gap => ({
379
+ position: gap.transcription_position,
380
+ length: gap.length
381
+ }))}
382
+ // Correction details
383
+ replacedCount={data.gap_sequences.reduce((count, gap) =>
384
+ count + (gap.corrections?.filter(c => !c.is_deletion && !c.split_total).length ?? 0), 0)}
385
+ addedCount={data.gap_sequences.reduce((count, gap) =>
386
+ count + (gap.corrections?.filter(c => c.split_total).length ?? 0), 0)}
387
+ deletedCount={data.gap_sequences.reduce((count, gap) =>
388
+ count + (gap.corrections?.filter(c => c.is_deletion).length ?? 0), 0)}
389
+ onMetricClick={{
390
+ anchor: () => handleFlash('anchor'),
391
+ corrected: () => handleFlash('corrected'),
392
+ uncorrected: () => handleFlash('uncorrected')
393
+ }}
394
+ />
395
+ </Box>
396
+
397
+ <DebugPanel
398
+ data={data}
399
+ currentSource={currentSource}
400
+ anchorMatchInfo={anchorMatchInfo}
401
+ />
402
+
403
+ <Grid container spacing={2} direction={isMobile ? 'column' : 'row'}>
404
+ <Grid item xs={12} md={6}>
405
+ <TranscriptionView
406
+ data={data}
407
+ onElementClick={setModalContent}
408
+ onWordClick={handleWordClick}
409
+ flashingType={flashingType}
410
+ highlightInfo={highlightInfo}
411
+ />
412
+ </Grid>
413
+ <Grid item xs={12} md={6}>
414
+ <ReferenceView
415
+ referenceTexts={data.reference_texts}
416
+ anchors={data.anchor_sequences}
417
+ gaps={data.gap_sequences}
418
+ onElementClick={setModalContent}
419
+ onWordClick={handleWordClick}
420
+ flashingType={flashingType}
421
+ corrected_segments={data.corrected_segments}
422
+ currentSource={currentSource}
423
+ onSourceChange={setCurrentSource}
424
+ onDebugInfoUpdate={setAnchorMatchInfo}
425
+ />
426
+ </Grid>
427
+ </Grid>
428
+
429
+ <DetailsModal
430
+ open={modalContent !== null}
431
+ content={modalContent}
432
+ onClose={() => setModalContent(null)}
433
+ onUpdateCorrection={handleUpdateCorrection}
434
+ isReadOnly={isReadOnly}
435
+ />
436
+
437
+ {!isReadOnly && apiClient && (
438
+ <Box sx={{ mt: 2 }}>
439
+ <Button
440
+ variant="contained"
441
+ onClick={handleFinishReview}
442
+ disabled={isReviewComplete}
443
+ >
444
+ {isReviewComplete ? 'Review Complete' : 'Finish Review'}
445
+ </Button>
446
+ </Box>
447
+ )}
448
+ </Box>
449
+ )
450
+ }