lyrics-transcriber 0.35.1__py3-none-any.whl → 0.37.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. lyrics_transcriber/cli/cli_main.py +2 -0
  2. lyrics_transcriber/core/config.py +1 -1
  3. lyrics_transcriber/core/controller.py +35 -2
  4. lyrics_transcriber/correction/corrector.py +8 -8
  5. lyrics_transcriber/correction/handlers/base.py +4 -0
  6. lyrics_transcriber/correction/handlers/extend_anchor.py +9 -0
  7. lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
  8. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
  9. lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
  10. lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
  11. lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +182 -0
  12. lyrics_transcriber/frontend/dist/index.html +1 -1
  13. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +18 -7
  14. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +28 -27
  15. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +108 -12
  16. lyrics_transcriber/frontend/src/components/EditModal.tsx +10 -2
  17. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +145 -141
  18. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +7 -2
  19. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +24 -12
  20. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +8 -15
  21. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
  22. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +36 -51
  23. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +17 -19
  24. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +41 -33
  25. lyrics_transcriber/frontend/src/components/shared/types.ts +6 -6
  26. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +146 -0
  27. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +24 -25
  28. lyrics_transcriber/frontend/src/types.ts +24 -23
  29. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  30. lyrics_transcriber/lyrics/base_lyrics_provider.py +1 -0
  31. lyrics_transcriber/lyrics/file_provider.py +89 -0
  32. lyrics_transcriber/output/cdg.py +32 -6
  33. lyrics_transcriber/output/video.py +17 -7
  34. lyrics_transcriber/review/server.py +24 -8
  35. {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/METADATA +1 -1
  36. {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/RECORD +39 -38
  37. {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/entry_points.txt +1 -0
  38. lyrics_transcriber/frontend/dist/assets/index-CQCER5Fo.js +0 -181
  39. lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
  40. {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/LICENSE +0 -0
  41. {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/WHEEL +0 -0
@@ -8,13 +8,12 @@ import React from 'react'
8
8
  import ContentCopyIcon from '@mui/icons-material/ContentCopy';
9
9
  import IconButton from '@mui/material/IconButton';
10
10
 
11
- interface HighlightedTextProps {
11
+ export interface HighlightedTextProps {
12
12
  // Input can be either raw text or pre-processed word positions
13
13
  text?: string
14
14
  wordPositions?: TranscriptionWordPosition[]
15
15
  // Common props
16
16
  anchors: AnchorSequence[]
17
- gaps: GapSequence[]
18
17
  highlightInfo: HighlightInfo | null
19
18
  mode: InteractionMode
20
19
  onElementClick: (content: ModalContent) => void
@@ -22,7 +21,7 @@ interface HighlightedTextProps {
22
21
  flashingType: FlashType
23
22
  // Reference-specific props
24
23
  isReference?: boolean
25
- currentSource?: 'genius' | 'spotify'
24
+ currentSource?: string
26
25
  preserveSegments?: boolean
27
26
  linePositions?: LinePosition[]
28
27
  currentTime?: number
@@ -51,42 +50,37 @@ export function HighlightedText({
51
50
  currentSource
52
51
  })
53
52
 
54
- const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string; index: number }): boolean => {
53
+ const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string; id: string }): boolean => {
55
54
  if (!flashingType) return false
56
55
 
57
56
  if ('type' in wordPos) {
58
57
  // Handle TranscriptionWordPosition
59
- const hasCorrections = wordPos.type === 'gap' &&
60
- Boolean((wordPos.sequence as GapSequence)?.corrections?.length)
58
+ const gap = wordPos.sequence as GapSequence
59
+ const isCorrected = wordPos.type === 'gap' &&
60
+ gap?.corrections?.some(correction =>
61
+ correction.word_id === wordPos.word.id
62
+ )
61
63
 
62
64
  return Boolean(
63
65
  (flashingType === 'anchor' && wordPos.type === 'anchor') ||
64
- (flashingType === 'corrected' && hasCorrections) ||
65
- (flashingType === 'uncorrected' && wordPos.type === 'gap' && !hasCorrections) ||
66
+ (flashingType === 'corrected' && isCorrected) ||
67
+ (flashingType === 'uncorrected' && wordPos.type === 'gap' && !isCorrected) ||
66
68
  (flashingType === 'word' && highlightInfo?.type === 'anchor' &&
67
- wordPos.type === 'anchor' && wordPos.sequence && (
68
- (wordPos.sequence as AnchorSequence).transcription_position === highlightInfo.transcriptionIndex ||
69
- (isReference && currentSource &&
70
- (wordPos.sequence as AnchorSequence).reference_positions[currentSource] === highlightInfo.referenceIndices?.[currentSource])
71
- ))
69
+ wordPos.type === 'anchor' && wordPos.sequence &&
70
+ highlightInfo.word_ids?.includes(wordPos.word.id))
72
71
  )
73
72
  } else {
74
73
  // Handle reference word
75
- const thisWordIndex = wordPos.index
76
- const anchor = anchors.find(a => {
77
- const position = isReference
78
- ? a.reference_positions[currentSource!]
79
- : a.transcription_position
80
- if (position === undefined) return false
81
- return thisWordIndex >= position && thisWordIndex < position + a.length
82
- })
74
+ if (!currentSource) return false
75
+
76
+ const anchor = anchors?.find(a =>
77
+ a?.reference_word_ids?.[currentSource]?.includes(wordPos.id)
78
+ )
83
79
 
84
80
  return Boolean(
85
81
  (flashingType === 'anchor' && anchor) ||
86
- (flashingType === 'word' && highlightInfo?.type === 'anchor' && anchor && (
87
- anchor.transcription_position === highlightInfo.transcriptionIndex ||
88
- (isReference && currentSource && anchor.reference_positions[currentSource] === highlightInfo.referenceIndices?.[currentSource])
89
- ))
82
+ (flashingType === 'word' && highlightInfo?.type === 'anchor' &&
83
+ highlightInfo.reference_word_ids?.[currentSource]?.includes(wordPos.id))
90
84
  )
91
85
  }
92
86
  }
@@ -103,7 +97,7 @@ export function HighlightedText({
103
97
  const renderContent = () => {
104
98
  if (wordPositions) {
105
99
  return wordPositions.map((wordPos, index) => (
106
- <React.Fragment key={`${wordPos.word.text}-${index}`}>
100
+ <React.Fragment key={wordPos.word.id}>
107
101
  <Word
108
102
  word={wordPos.word.text}
109
103
  shouldFlash={shouldWordFlash(wordPos)}
@@ -113,7 +107,7 @@ export function HighlightedText({
113
107
  isUncorrectedGap={wordPos.type === 'gap' && !(wordPos.sequence as GapSequence)?.corrections?.length}
114
108
  onClick={() => handleWordClick(
115
109
  wordPos.word.text,
116
- wordPos.position,
110
+ wordPos.word.id,
117
111
  wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
118
112
  wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
119
113
  )}
@@ -123,12 +117,12 @@ export function HighlightedText({
123
117
  ))
124
118
  } else if (text) {
125
119
  const lines = text.split('\n')
126
- let globalWordIndex = 0
120
+ let wordCount = 0
127
121
 
128
122
  return lines.map((line, lineIndex) => {
129
- const currentLinePosition = linePositions?.find((pos: LinePosition) => pos.position === globalWordIndex)
123
+ const currentLinePosition = linePositions?.find(pos => pos.position === wordCount)
130
124
  if (currentLinePosition?.isEmpty) {
131
- globalWordIndex++
125
+ wordCount++
132
126
  return (
133
127
  <Box key={`empty-${lineIndex}`} sx={{ display: 'flex', alignItems: 'flex-start' }}>
134
128
  <Typography
@@ -168,7 +162,7 @@ export function HighlightedText({
168
162
  paddingTop: '4px',
169
163
  }}
170
164
  >
171
- {lineIndex}
165
+ {currentLinePosition?.lineNumber ?? lineIndex}
172
166
  </Typography>
173
167
  <IconButton
174
168
  size="small"
@@ -189,32 +183,24 @@ export function HighlightedText({
189
183
  return <span key={`space-${lineIndex}-${wordIndex}`}> </span>
190
184
  }
191
185
 
192
- const position = globalWordIndex++
193
- const anchor = anchors.find(a => {
194
- const refPos = a.reference_positions[currentSource!]
195
- if (refPos === undefined) return false
196
- return position >= refPos && position < refPos + a.length
197
- })
198
-
199
- // Create a mock TranscriptionWordPosition for highlighting
200
- const wordPos: TranscriptionWordPosition = {
201
- word: { text: word },
202
- position,
203
- type: anchor ? 'anchor' : 'other',
204
- sequence: anchor,
205
- isInRange: true
206
- }
186
+ // Generate word ID based on position in the reference text
187
+ const wordId = `${currentSource}-word-${wordCount}`
188
+ wordCount++
189
+
190
+ // Find if this word is part of any anchor sequence
191
+ const anchor = currentSource ? anchors?.find(a =>
192
+ a?.reference_word_ids?.[currentSource]?.includes(wordId)
193
+ ) : undefined
207
194
 
208
195
  return (
209
196
  <Word
210
- key={`${word}-${lineIndex}-${wordIndex}`}
197
+ key={wordId}
211
198
  word={word}
212
- shouldFlash={shouldWordFlash({ word, index: position })}
213
- isCurrentlyPlaying={shouldHighlightWord(wordPos)}
199
+ shouldFlash={shouldWordFlash({ word, id: wordId })}
214
200
  isAnchor={Boolean(anchor)}
215
201
  isCorrectedGap={false}
216
202
  isUncorrectedGap={false}
217
- onClick={() => handleWordClick(word, position, anchor, undefined)}
203
+ onClick={() => handleWordClick(word, wordId, anchor, undefined)}
218
204
  />
219
205
  )
220
206
  })}
@@ -223,7 +209,6 @@ export function HighlightedText({
223
209
  )
224
210
  })
225
211
  }
226
-
227
212
  return null
228
213
  }
229
214
 
@@ -1,28 +1,26 @@
1
1
  import { Box, Button } from '@mui/material'
2
2
 
3
- interface SourceSelectorProps {
4
- currentSource: 'genius' | 'spotify'
5
- onSourceChange: (source: 'genius' | 'spotify') => void
3
+ export interface SourceSelectorProps {
4
+ currentSource: string
5
+ onSourceChange: (source: string) => void
6
+ availableSources: string[]
6
7
  }
7
8
 
8
- export function SourceSelector({ currentSource, onSourceChange }: SourceSelectorProps) {
9
+ export function SourceSelector({ currentSource, onSourceChange, availableSources }: SourceSelectorProps) {
9
10
  return (
10
11
  <Box>
11
- <Button
12
- size="small"
13
- variant={currentSource === 'genius' ? 'contained' : 'outlined'}
14
- onClick={() => onSourceChange('genius')}
15
- sx={{ mr: 1 }}
16
- >
17
- Genius
18
- </Button>
19
- <Button
20
- size="small"
21
- variant={currentSource === 'spotify' ? 'contained' : 'outlined'}
22
- onClick={() => onSourceChange('spotify')}
23
- >
24
- Spotify
25
- </Button>
12
+ {availableSources.map((source) => (
13
+ <Button
14
+ key={source}
15
+ size="small"
16
+ variant={currentSource === source ? 'contained' : 'outlined'}
17
+ onClick={() => onSourceChange(source)}
18
+ sx={{ mr: 1 }}
19
+ >
20
+ {/* Capitalize first letter of source */}
21
+ {source.charAt(0).toUpperCase() + source.slice(1)}
22
+ </Button>
23
+ ))}
26
24
  </Box>
27
25
  )
28
26
  }
@@ -3,12 +3,22 @@ import { AnchorSequence, GapSequence, InteractionMode } from '../../../types'
3
3
  import { ModalContent } from '../../LyricsAnalyzer'
4
4
  import { WordClickInfo } from '../types'
5
5
 
6
- interface UseWordClickProps {
6
+ // Define debug info type
7
+ interface WordDebugInfo {
8
+ wordSplitInfo?: {
9
+ text: string
10
+ startIndex: number
11
+ endIndex: number
12
+ }
13
+ nearbyAnchors?: AnchorSequence[]
14
+ }
15
+
16
+ export interface UseWordClickProps {
7
17
  mode: InteractionMode
8
18
  onElementClick: (content: ModalContent) => void
9
19
  onWordClick?: (info: WordClickInfo) => void
10
20
  isReference?: boolean
11
- currentSource?: 'genius' | 'spotify'
21
+ currentSource?: string
12
22
  }
13
23
 
14
24
  export function useWordClick({
@@ -20,72 +30,69 @@ export function useWordClick({
20
30
  }: UseWordClickProps) {
21
31
  const handleWordClick = useCallback((
22
32
  word: string,
23
- position: number,
33
+ wordId: string,
24
34
  anchor?: AnchorSequence,
25
35
  gap?: GapSequence,
26
- debugInfo?: any
36
+ debugInfo?: WordDebugInfo
27
37
  ) => {
28
38
  console.log(JSON.stringify({
29
39
  debug: {
30
40
  clickedWord: word,
31
- position,
41
+ wordId,
32
42
  isReference,
33
43
  currentSource,
34
44
  wordInfo: debugInfo?.wordSplitInfo,
35
45
  nearbyAnchors: debugInfo?.nearbyAnchors,
36
46
  anchorInfo: anchor && {
37
- transcriptionPos: anchor.transcription_position,
47
+ wordIds: anchor.word_ids,
38
48
  length: anchor.length,
39
49
  words: anchor.words,
40
- refPositions: anchor.reference_positions
50
+ referenceWordIds: anchor.reference_word_ids
41
51
  },
42
52
  gapInfo: gap && {
43
- transcriptionPos: gap.transcription_position,
53
+ wordIds: gap.word_ids,
44
54
  length: gap.length,
45
55
  words: gap.words,
46
56
  corrections: gap.corrections.map(c => ({
57
+ original_word: c.original_word,
58
+ corrected_word: c.corrected_word,
59
+ word_id: c.word_id,
47
60
  length: c.length,
48
- refPositions: c.reference_positions
61
+ is_deletion: c.is_deletion,
62
+ split_index: c.split_index,
63
+ split_total: c.split_total
49
64
  }))
50
65
  },
51
66
  belongsToAnchor: anchor && (
52
67
  isReference
53
- ? position >= (anchor.reference_positions[currentSource!] ?? -1) &&
54
- position < ((anchor.reference_positions[currentSource!] ?? -1) + anchor.length)
55
- : position >= anchor.transcription_position &&
56
- position < (anchor.transcription_position + anchor.length)
68
+ ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
69
+ : anchor.word_ids.includes(wordId)
57
70
  ),
58
71
  belongsToGap: gap && (
59
72
  isReference
60
- ? gap.corrections[0]?.reference_positions?.[currentSource!] !== undefined &&
61
- position >= (gap.corrections[0].reference_positions![currentSource!]) &&
62
- position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
63
- : position >= gap.transcription_position &&
64
- position < (gap.transcription_position + gap.length)
65
- )
73
+ ? gap.corrections.some(c => c.word_id === wordId)
74
+ : gap.word_ids.includes(wordId)
75
+ ),
76
+ wordIndexInGap: gap && gap.words.indexOf(word),
77
+ hasMatchingCorrection: gap && gap.corrections.some(c => c.word_id === wordId)
66
78
  }
67
79
  }, null, 2))
68
80
 
69
81
  const belongsToAnchor = anchor && (
70
82
  isReference
71
- ? position >= (anchor.reference_positions[currentSource!] ?? -1) &&
72
- position < ((anchor.reference_positions[currentSource!] ?? -1) + anchor.length)
73
- : position >= anchor.transcription_position &&
74
- position < (anchor.transcription_position + anchor.length)
83
+ ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
84
+ : anchor.word_ids.includes(wordId)
75
85
  )
76
86
 
77
87
  const belongsToGap = gap && (
78
88
  isReference
79
- ? gap.corrections[0]?.reference_positions?.[currentSource!] !== undefined &&
80
- position >= (gap.corrections[0].reference_positions![currentSource!]) &&
81
- position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
82
- : position >= gap.transcription_position &&
83
- position < (gap.transcription_position + gap.length)
89
+ ? gap.corrections.some(c => c.word_id === wordId)
90
+ : gap.word_ids.includes(wordId)
84
91
  )
85
92
 
86
93
  if (mode === 'highlight' || mode === 'edit') {
87
94
  onWordClick?.({
88
- wordIndex: position,
95
+ word_id: wordId,
89
96
  type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
90
97
  anchor: belongsToAnchor ? anchor : undefined,
91
98
  gap: belongsToGap ? gap : undefined
@@ -96,7 +103,7 @@ export function useWordClick({
96
103
  type: 'anchor',
97
104
  data: {
98
105
  ...anchor,
99
- position,
106
+ wordId,
100
107
  word
101
108
  }
102
109
  })
@@ -105,16 +112,17 @@ export function useWordClick({
105
112
  type: 'gap',
106
113
  data: {
107
114
  ...gap,
108
- position,
115
+ wordId,
109
116
  word
110
117
  }
111
118
  })
112
119
  } else if (!isReference) {
113
120
  // Create synthetic gap for non-sequence words (transcription view only)
114
121
  const syntheticGap: GapSequence = {
122
+ id: `synthetic-${wordId}`,
115
123
  text: word,
116
124
  words: [word],
117
- transcription_position: position,
125
+ word_ids: [wordId],
118
126
  length: 1,
119
127
  corrections: [],
120
128
  preceding_anchor: null,
@@ -125,7 +133,7 @@ export function useWordClick({
125
133
  type: 'gap',
126
134
  data: {
127
135
  ...syntheticGap,
128
- position: 0,
136
+ wordId,
129
137
  word
130
138
  }
131
139
  })
@@ -6,7 +6,7 @@ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
6
6
 
7
7
  // Common word click handling
8
8
  export interface WordClickInfo {
9
- wordIndex: number
9
+ word_id: string
10
10
  type: 'anchor' | 'gap' | 'other'
11
11
  anchor?: AnchorSequence
12
12
  gap?: GapSequence
@@ -29,13 +29,13 @@ export interface BaseWordPosition {
29
29
 
30
30
  // Transcription-specific word position with timing info
31
31
  export interface TranscriptionWordPosition extends BaseWordPosition {
32
- position: number
33
- isInRange: boolean
34
32
  word: {
33
+ id: string
35
34
  text: string
36
35
  start_time?: number
37
36
  end_time?: number
38
37
  }
38
+ isInRange: boolean
39
39
  }
40
40
 
41
41
  // Reference-specific word position with simple string word
@@ -81,8 +81,8 @@ export interface ReferenceViewProps extends BaseViewProps {
81
81
  referenceTexts: Record<string, string>
82
82
  anchors: LyricsData['anchor_sequences']
83
83
  gaps: LyricsData['gap_sequences']
84
- currentSource: 'genius' | 'spotify'
85
- onSourceChange: (source: 'genius' | 'spotify') => void
84
+ currentSource: string
85
+ onSourceChange: (source: string) => void
86
86
  corrected_segments: LyricsSegment[]
87
87
  }
88
88
 
@@ -93,7 +93,7 @@ export interface HighlightedTextProps extends BaseViewProps {
93
93
  anchors: AnchorSequence[]
94
94
  gaps: GapSequence[]
95
95
  isReference?: boolean
96
- currentSource?: 'genius' | 'spotify'
96
+ currentSource?: string
97
97
  preserveSegments?: boolean
98
98
  linePositions?: LinePosition[]
99
99
  }
@@ -0,0 +1,146 @@
1
+ import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
2
+ import { nanoid } from 'nanoid';
3
+
4
+ // Define server-side types just for this file
5
+ interface ServerData {
6
+ transcription_position: number;
7
+ length: number;
8
+ words: string[];
9
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
10
+ [key: string]: any;
11
+ }
12
+
13
+ export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
14
+ // Create a deep clone to avoid modifying the original
15
+ const normalized = JSON.parse(JSON.stringify(data));
16
+
17
+ // Preserve floating point numbers with original precision
18
+ const preserveFloats = (obj: Record<string, unknown>): void => {
19
+ for (const key in obj) {
20
+ const value = obj[key];
21
+ if (typeof value === 'number') {
22
+ // Handle integers and floats differently
23
+ let formatted: string;
24
+ if (Number.isInteger(value)) {
25
+ formatted = value.toFixed(1); // Force decimal point for integers
26
+ } else {
27
+ formatted = value.toString(); // Keep original precision for floats
28
+ }
29
+ obj[key] = parseFloat(formatted);
30
+ } else if (typeof value === 'object' && value !== null) {
31
+ preserveFloats(value as Record<string, unknown>);
32
+ }
33
+ }
34
+ };
35
+ preserveFloats(normalized);
36
+ return normalized;
37
+ }
38
+
39
+ // Helper function to find word IDs for a sequence based on original positions
40
+ function findWordIdsForSequence(
41
+ segments: LyricsSegment[],
42
+ sequence: ServerData
43
+ ): string[] {
44
+ const allWords = segments.flatMap(s => s.words);
45
+ const startIndex = sequence.transcription_position;
46
+ const endIndex = startIndex + sequence.length;
47
+
48
+ console.log('Finding word IDs for sequence:', JSON.stringify({
49
+ position: sequence.transcription_position,
50
+ length: sequence.length,
51
+ words: allWords.slice(startIndex, endIndex).map(w => w.text)
52
+ }));
53
+
54
+ return allWords.slice(startIndex, endIndex).map(word => word.id);
55
+ }
56
+
57
+ // Helper function to find word ID for a correction
58
+ function findWordIdForCorrection(
59
+ segments: LyricsSegment[],
60
+ correction: { original_word: string; }
61
+ ): string {
62
+ for (const segment of segments) {
63
+ const word = segment.words.find(w => w.text === correction.original_word);
64
+ if (word) return word.id;
65
+ }
66
+ return nanoid(); // Fallback if word not found
67
+ }
68
+
69
+ // Helper function to find word IDs in reference text
70
+ function findReferenceWordIds(
71
+ referenceSource: string,
72
+ sequence: ServerData
73
+ ): string[] {
74
+ const referencePosition = sequence.reference_positions?.[referenceSource];
75
+ if (typeof referencePosition !== 'number') {
76
+ return [];
77
+ }
78
+
79
+ // Generate IDs in the same format as HighlightedText
80
+ const wordIds = Array.from({ length: sequence.length },
81
+ (_, i) => `${referenceSource}-word-${referencePosition + i}`
82
+ );
83
+
84
+ return wordIds;
85
+ }
86
+
87
+ export function initializeDataWithIds(data: CorrectionData): CorrectionData {
88
+ const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
89
+
90
+ // Initialize segment and word IDs
91
+ newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
92
+ ...segment,
93
+ id: segment.id || nanoid(),
94
+ words: segment.words.map((word: Word) => ({
95
+ ...word,
96
+ id: word.id || nanoid()
97
+ }))
98
+ }));
99
+
100
+ console.log('Segments after ID initialization:', JSON.stringify({
101
+ segmentCount: newData.corrected_segments.length,
102
+ totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
103
+ sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
104
+ }));
105
+
106
+ // Update anchor sequences with word IDs based on positions
107
+ newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
108
+ const serverAnchor = anchor as unknown as ServerData;
109
+
110
+ // Get reference word IDs for each source
111
+ const referenceWordIds: Record<string, string[]> = {};
112
+ Object.keys(data.reference_texts || {}).forEach(source => {
113
+ referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
114
+ });
115
+
116
+ console.log('Processing anchor with references:', JSON.stringify({
117
+ words: anchor.words,
118
+ reference_positions: serverAnchor.reference_positions,
119
+ reference_word_ids: referenceWordIds
120
+ }));
121
+
122
+ return {
123
+ ...anchor,
124
+ id: anchor.id || nanoid(),
125
+ word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
126
+ reference_word_ids: referenceWordIds
127
+ } as AnchorSequence;
128
+ });
129
+
130
+ // Update gap sequences to use word IDs
131
+ newData.gap_sequences = newData.gap_sequences.map((gap) => {
132
+ const serverGap = gap as unknown as ServerData;
133
+ return {
134
+ ...gap,
135
+ id: gap.id || nanoid(),
136
+ word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
137
+ corrections: gap.corrections.map((correction: WordCorrection) => ({
138
+ ...correction,
139
+ id: correction.id || nanoid(),
140
+ word_id: correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction)
141
+ }))
142
+ } as GapSequence;
143
+ });
144
+
145
+ return newData;
146
+ }
@@ -4,40 +4,37 @@ import { LinePosition } from '../types'
4
4
  export function calculateReferenceLinePositions(
5
5
  corrected_segments: LyricsSegment[],
6
6
  anchors: LyricsData['anchor_sequences'],
7
- currentSource: 'genius' | 'spotify'
7
+ currentSource: string
8
8
  ): { linePositions: LinePosition[] } {
9
9
  const linePositions: LinePosition[] = []
10
10
  let currentReferencePosition = 0
11
11
 
12
12
  // First, find all anchor sequences that cover entire lines
13
- const fullLineAnchors = anchors.map(anchor => {
14
- const referencePos = anchor.reference_positions[currentSource]
15
- if (referencePos === undefined) return null
13
+ const fullLineAnchors = anchors?.map(anchor => {
14
+ // Add null checks for anchor and reference_word_ids
15
+ if (!anchor?.reference_word_ids?.[currentSource]) return null
16
16
 
17
- return {
18
- referenceStart: referencePos,
19
- referenceLength: anchor.length,
20
- transcriptionLine: corrected_segments.findIndex((segment, segmentIndex) => {
21
- const words = segment.words
22
- if (!words.length) return false
23
-
24
- // Calculate the absolute position of the first and last words in this segment
25
- let absolutePosition = 0
26
- for (let i = 0; i < segmentIndex; i++) {
27
- absolutePosition += corrected_segments[i].words.length
28
- }
17
+ const referenceWordIds = anchor.reference_word_ids[currentSource]
18
+ if (!referenceWordIds?.length) return null
29
19
 
30
- const firstWordPosition = absolutePosition
31
- const lastWordPosition = absolutePosition + words.length - 1
20
+ return {
21
+ referenceWordIds,
22
+ transcriptionLine: corrected_segments.findIndex((segment) => {
23
+ const wordIds = segment.words.map(w => w.id)
24
+ if (!wordIds.length) return false
32
25
 
33
- return firstWordPosition >= anchor.transcription_position &&
34
- lastWordPosition < anchor.transcription_position + anchor.length
26
+ // Check if all word IDs in this segment are part of the anchor
27
+ return wordIds.every(id => anchor.word_ids?.includes(id))
35
28
  })
36
29
  }
37
- }).filter((a): a is NonNullable<typeof a> => a !== null)
30
+ })?.filter((a): a is NonNullable<typeof a> => a !== null) ?? []
38
31
 
39
- // Sort by reference position to process in order
40
- fullLineAnchors.sort((a, b) => a.referenceStart - b.referenceStart)
32
+ // Sort by first reference word ID to process in order
33
+ fullLineAnchors.sort((a, b) => {
34
+ const firstIdA = a.referenceWordIds[0]
35
+ const firstIdB = b.referenceWordIds[0]
36
+ return firstIdA.localeCompare(firstIdB)
37
+ })
41
38
 
42
39
  // Add line positions with padding
43
40
  let currentLine = 0
@@ -55,10 +52,12 @@ export function calculateReferenceLinePositions(
55
52
 
56
53
  // Add the actual line position
57
54
  linePositions.push({
58
- position: anchor.referenceStart,
59
- lineNumber: currentLine
55
+ position: currentReferencePosition,
56
+ lineNumber: currentLine,
57
+ isEmpty: false
60
58
  })
61
59
  currentLine++
60
+ currentReferencePosition++
62
61
  })
63
62
 
64
63
  // Add any remaining lines after the last anchor