lyrics-transcriber 0.35.1__py3-none-any.whl → 0.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/cli/cli_main.py +2 -0
- lyrics_transcriber/core/config.py +1 -1
- lyrics_transcriber/core/controller.py +35 -2
- lyrics_transcriber/correction/corrector.py +8 -8
- lyrics_transcriber/correction/handlers/base.py +4 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +9 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
- lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
- lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
- lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +182 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +18 -7
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +28 -27
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +108 -12
- lyrics_transcriber/frontend/src/components/EditModal.tsx +10 -2
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +145 -141
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +7 -2
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +24 -12
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +8 -15
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +36 -51
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +17 -19
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +41 -33
- lyrics_transcriber/frontend/src/components/shared/types.ts +6 -6
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +146 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +24 -25
- lyrics_transcriber/frontend/src/types.ts +24 -23
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/lyrics/base_lyrics_provider.py +1 -0
- lyrics_transcriber/lyrics/file_provider.py +89 -0
- lyrics_transcriber/output/cdg.py +32 -6
- lyrics_transcriber/output/video.py +17 -7
- lyrics_transcriber/review/server.py +24 -8
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/METADATA +1 -1
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/RECORD +39 -38
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/frontend/dist/assets/index-CQCER5Fo.js +0 -181
- lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/WHEEL +0 -0
@@ -8,13 +8,12 @@ import React from 'react'
|
|
8
8
|
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
9
9
|
import IconButton from '@mui/material/IconButton';
|
10
10
|
|
11
|
-
interface HighlightedTextProps {
|
11
|
+
export interface HighlightedTextProps {
|
12
12
|
// Input can be either raw text or pre-processed word positions
|
13
13
|
text?: string
|
14
14
|
wordPositions?: TranscriptionWordPosition[]
|
15
15
|
// Common props
|
16
16
|
anchors: AnchorSequence[]
|
17
|
-
gaps: GapSequence[]
|
18
17
|
highlightInfo: HighlightInfo | null
|
19
18
|
mode: InteractionMode
|
20
19
|
onElementClick: (content: ModalContent) => void
|
@@ -22,7 +21,7 @@ interface HighlightedTextProps {
|
|
22
21
|
flashingType: FlashType
|
23
22
|
// Reference-specific props
|
24
23
|
isReference?: boolean
|
25
|
-
currentSource?:
|
24
|
+
currentSource?: string
|
26
25
|
preserveSegments?: boolean
|
27
26
|
linePositions?: LinePosition[]
|
28
27
|
currentTime?: number
|
@@ -51,42 +50,37 @@ export function HighlightedText({
|
|
51
50
|
currentSource
|
52
51
|
})
|
53
52
|
|
54
|
-
const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string;
|
53
|
+
const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string; id: string }): boolean => {
|
55
54
|
if (!flashingType) return false
|
56
55
|
|
57
56
|
if ('type' in wordPos) {
|
58
57
|
// Handle TranscriptionWordPosition
|
59
|
-
const
|
60
|
-
|
58
|
+
const gap = wordPos.sequence as GapSequence
|
59
|
+
const isCorrected = wordPos.type === 'gap' &&
|
60
|
+
gap?.corrections?.some(correction =>
|
61
|
+
correction.word_id === wordPos.word.id
|
62
|
+
)
|
61
63
|
|
62
64
|
return Boolean(
|
63
65
|
(flashingType === 'anchor' && wordPos.type === 'anchor') ||
|
64
|
-
(flashingType === 'corrected' &&
|
65
|
-
(flashingType === 'uncorrected' && wordPos.type === 'gap' && !
|
66
|
+
(flashingType === 'corrected' && isCorrected) ||
|
67
|
+
(flashingType === 'uncorrected' && wordPos.type === 'gap' && !isCorrected) ||
|
66
68
|
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
67
|
-
wordPos.type === 'anchor' && wordPos.sequence &&
|
68
|
-
|
69
|
-
(isReference && currentSource &&
|
70
|
-
(wordPos.sequence as AnchorSequence).reference_positions[currentSource] === highlightInfo.referenceIndices?.[currentSource])
|
71
|
-
))
|
69
|
+
wordPos.type === 'anchor' && wordPos.sequence &&
|
70
|
+
highlightInfo.word_ids?.includes(wordPos.word.id))
|
72
71
|
)
|
73
72
|
} else {
|
74
73
|
// Handle reference word
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
if (position === undefined) return false
|
81
|
-
return thisWordIndex >= position && thisWordIndex < position + a.length
|
82
|
-
})
|
74
|
+
if (!currentSource) return false
|
75
|
+
|
76
|
+
const anchor = anchors?.find(a =>
|
77
|
+
a?.reference_word_ids?.[currentSource]?.includes(wordPos.id)
|
78
|
+
)
|
83
79
|
|
84
80
|
return Boolean(
|
85
81
|
(flashingType === 'anchor' && anchor) ||
|
86
|
-
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
87
|
-
|
88
|
-
(isReference && currentSource && anchor.reference_positions[currentSource] === highlightInfo.referenceIndices?.[currentSource])
|
89
|
-
))
|
82
|
+
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
83
|
+
highlightInfo.reference_word_ids?.[currentSource]?.includes(wordPos.id))
|
90
84
|
)
|
91
85
|
}
|
92
86
|
}
|
@@ -103,7 +97,7 @@ export function HighlightedText({
|
|
103
97
|
const renderContent = () => {
|
104
98
|
if (wordPositions) {
|
105
99
|
return wordPositions.map((wordPos, index) => (
|
106
|
-
<React.Fragment key={
|
100
|
+
<React.Fragment key={wordPos.word.id}>
|
107
101
|
<Word
|
108
102
|
word={wordPos.word.text}
|
109
103
|
shouldFlash={shouldWordFlash(wordPos)}
|
@@ -113,7 +107,7 @@ export function HighlightedText({
|
|
113
107
|
isUncorrectedGap={wordPos.type === 'gap' && !(wordPos.sequence as GapSequence)?.corrections?.length}
|
114
108
|
onClick={() => handleWordClick(
|
115
109
|
wordPos.word.text,
|
116
|
-
wordPos.
|
110
|
+
wordPos.word.id,
|
117
111
|
wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
|
118
112
|
wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
|
119
113
|
)}
|
@@ -123,12 +117,12 @@ export function HighlightedText({
|
|
123
117
|
))
|
124
118
|
} else if (text) {
|
125
119
|
const lines = text.split('\n')
|
126
|
-
let
|
120
|
+
let wordCount = 0
|
127
121
|
|
128
122
|
return lines.map((line, lineIndex) => {
|
129
|
-
const currentLinePosition = linePositions?.find(
|
123
|
+
const currentLinePosition = linePositions?.find(pos => pos.position === wordCount)
|
130
124
|
if (currentLinePosition?.isEmpty) {
|
131
|
-
|
125
|
+
wordCount++
|
132
126
|
return (
|
133
127
|
<Box key={`empty-${lineIndex}`} sx={{ display: 'flex', alignItems: 'flex-start' }}>
|
134
128
|
<Typography
|
@@ -168,7 +162,7 @@ export function HighlightedText({
|
|
168
162
|
paddingTop: '4px',
|
169
163
|
}}
|
170
164
|
>
|
171
|
-
{lineIndex}
|
165
|
+
{currentLinePosition?.lineNumber ?? lineIndex}
|
172
166
|
</Typography>
|
173
167
|
<IconButton
|
174
168
|
size="small"
|
@@ -189,32 +183,24 @@ export function HighlightedText({
|
|
189
183
|
return <span key={`space-${lineIndex}-${wordIndex}`}> </span>
|
190
184
|
}
|
191
185
|
|
192
|
-
|
193
|
-
const
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
const wordPos: TranscriptionWordPosition = {
|
201
|
-
word: { text: word },
|
202
|
-
position,
|
203
|
-
type: anchor ? 'anchor' : 'other',
|
204
|
-
sequence: anchor,
|
205
|
-
isInRange: true
|
206
|
-
}
|
186
|
+
// Generate word ID based on position in the reference text
|
187
|
+
const wordId = `${currentSource}-word-${wordCount}`
|
188
|
+
wordCount++
|
189
|
+
|
190
|
+
// Find if this word is part of any anchor sequence
|
191
|
+
const anchor = currentSource ? anchors?.find(a =>
|
192
|
+
a?.reference_word_ids?.[currentSource]?.includes(wordId)
|
193
|
+
) : undefined
|
207
194
|
|
208
195
|
return (
|
209
196
|
<Word
|
210
|
-
key={
|
197
|
+
key={wordId}
|
211
198
|
word={word}
|
212
|
-
shouldFlash={shouldWordFlash({ word,
|
213
|
-
isCurrentlyPlaying={shouldHighlightWord(wordPos)}
|
199
|
+
shouldFlash={shouldWordFlash({ word, id: wordId })}
|
214
200
|
isAnchor={Boolean(anchor)}
|
215
201
|
isCorrectedGap={false}
|
216
202
|
isUncorrectedGap={false}
|
217
|
-
onClick={() => handleWordClick(word,
|
203
|
+
onClick={() => handleWordClick(word, wordId, anchor, undefined)}
|
218
204
|
/>
|
219
205
|
)
|
220
206
|
})}
|
@@ -223,7 +209,6 @@ export function HighlightedText({
|
|
223
209
|
)
|
224
210
|
})
|
225
211
|
}
|
226
|
-
|
227
212
|
return null
|
228
213
|
}
|
229
214
|
|
@@ -1,28 +1,26 @@
|
|
1
1
|
import { Box, Button } from '@mui/material'
|
2
2
|
|
3
|
-
interface SourceSelectorProps {
|
4
|
-
currentSource:
|
5
|
-
onSourceChange: (source:
|
3
|
+
export interface SourceSelectorProps {
|
4
|
+
currentSource: string
|
5
|
+
onSourceChange: (source: string) => void
|
6
|
+
availableSources: string[]
|
6
7
|
}
|
7
8
|
|
8
|
-
export function SourceSelector({ currentSource, onSourceChange }: SourceSelectorProps) {
|
9
|
+
export function SourceSelector({ currentSource, onSourceChange, availableSources }: SourceSelectorProps) {
|
9
10
|
return (
|
10
11
|
<Box>
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
>
|
24
|
-
Spotify
|
25
|
-
</Button>
|
12
|
+
{availableSources.map((source) => (
|
13
|
+
<Button
|
14
|
+
key={source}
|
15
|
+
size="small"
|
16
|
+
variant={currentSource === source ? 'contained' : 'outlined'}
|
17
|
+
onClick={() => onSourceChange(source)}
|
18
|
+
sx={{ mr: 1 }}
|
19
|
+
>
|
20
|
+
{/* Capitalize first letter of source */}
|
21
|
+
{source.charAt(0).toUpperCase() + source.slice(1)}
|
22
|
+
</Button>
|
23
|
+
))}
|
26
24
|
</Box>
|
27
25
|
)
|
28
26
|
}
|
@@ -3,12 +3,22 @@ import { AnchorSequence, GapSequence, InteractionMode } from '../../../types'
|
|
3
3
|
import { ModalContent } from '../../LyricsAnalyzer'
|
4
4
|
import { WordClickInfo } from '../types'
|
5
5
|
|
6
|
-
|
6
|
+
// Define debug info type
|
7
|
+
interface WordDebugInfo {
|
8
|
+
wordSplitInfo?: {
|
9
|
+
text: string
|
10
|
+
startIndex: number
|
11
|
+
endIndex: number
|
12
|
+
}
|
13
|
+
nearbyAnchors?: AnchorSequence[]
|
14
|
+
}
|
15
|
+
|
16
|
+
export interface UseWordClickProps {
|
7
17
|
mode: InteractionMode
|
8
18
|
onElementClick: (content: ModalContent) => void
|
9
19
|
onWordClick?: (info: WordClickInfo) => void
|
10
20
|
isReference?: boolean
|
11
|
-
currentSource?:
|
21
|
+
currentSource?: string
|
12
22
|
}
|
13
23
|
|
14
24
|
export function useWordClick({
|
@@ -20,72 +30,69 @@ export function useWordClick({
|
|
20
30
|
}: UseWordClickProps) {
|
21
31
|
const handleWordClick = useCallback((
|
22
32
|
word: string,
|
23
|
-
|
33
|
+
wordId: string,
|
24
34
|
anchor?: AnchorSequence,
|
25
35
|
gap?: GapSequence,
|
26
|
-
debugInfo?:
|
36
|
+
debugInfo?: WordDebugInfo
|
27
37
|
) => {
|
28
38
|
console.log(JSON.stringify({
|
29
39
|
debug: {
|
30
40
|
clickedWord: word,
|
31
|
-
|
41
|
+
wordId,
|
32
42
|
isReference,
|
33
43
|
currentSource,
|
34
44
|
wordInfo: debugInfo?.wordSplitInfo,
|
35
45
|
nearbyAnchors: debugInfo?.nearbyAnchors,
|
36
46
|
anchorInfo: anchor && {
|
37
|
-
|
47
|
+
wordIds: anchor.word_ids,
|
38
48
|
length: anchor.length,
|
39
49
|
words: anchor.words,
|
40
|
-
|
50
|
+
referenceWordIds: anchor.reference_word_ids
|
41
51
|
},
|
42
52
|
gapInfo: gap && {
|
43
|
-
|
53
|
+
wordIds: gap.word_ids,
|
44
54
|
length: gap.length,
|
45
55
|
words: gap.words,
|
46
56
|
corrections: gap.corrections.map(c => ({
|
57
|
+
original_word: c.original_word,
|
58
|
+
corrected_word: c.corrected_word,
|
59
|
+
word_id: c.word_id,
|
47
60
|
length: c.length,
|
48
|
-
|
61
|
+
is_deletion: c.is_deletion,
|
62
|
+
split_index: c.split_index,
|
63
|
+
split_total: c.split_total
|
49
64
|
}))
|
50
65
|
},
|
51
66
|
belongsToAnchor: anchor && (
|
52
67
|
isReference
|
53
|
-
?
|
54
|
-
|
55
|
-
: position >= anchor.transcription_position &&
|
56
|
-
position < (anchor.transcription_position + anchor.length)
|
68
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
69
|
+
: anchor.word_ids.includes(wordId)
|
57
70
|
),
|
58
71
|
belongsToGap: gap && (
|
59
72
|
isReference
|
60
|
-
? gap.corrections
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
)
|
73
|
+
? gap.corrections.some(c => c.word_id === wordId)
|
74
|
+
: gap.word_ids.includes(wordId)
|
75
|
+
),
|
76
|
+
wordIndexInGap: gap && gap.words.indexOf(word),
|
77
|
+
hasMatchingCorrection: gap && gap.corrections.some(c => c.word_id === wordId)
|
66
78
|
}
|
67
79
|
}, null, 2))
|
68
80
|
|
69
81
|
const belongsToAnchor = anchor && (
|
70
82
|
isReference
|
71
|
-
?
|
72
|
-
|
73
|
-
: position >= anchor.transcription_position &&
|
74
|
-
position < (anchor.transcription_position + anchor.length)
|
83
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
84
|
+
: anchor.word_ids.includes(wordId)
|
75
85
|
)
|
76
86
|
|
77
87
|
const belongsToGap = gap && (
|
78
88
|
isReference
|
79
|
-
? gap.corrections
|
80
|
-
|
81
|
-
position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
|
82
|
-
: position >= gap.transcription_position &&
|
83
|
-
position < (gap.transcription_position + gap.length)
|
89
|
+
? gap.corrections.some(c => c.word_id === wordId)
|
90
|
+
: gap.word_ids.includes(wordId)
|
84
91
|
)
|
85
92
|
|
86
93
|
if (mode === 'highlight' || mode === 'edit') {
|
87
94
|
onWordClick?.({
|
88
|
-
|
95
|
+
word_id: wordId,
|
89
96
|
type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
|
90
97
|
anchor: belongsToAnchor ? anchor : undefined,
|
91
98
|
gap: belongsToGap ? gap : undefined
|
@@ -96,7 +103,7 @@ export function useWordClick({
|
|
96
103
|
type: 'anchor',
|
97
104
|
data: {
|
98
105
|
...anchor,
|
99
|
-
|
106
|
+
wordId,
|
100
107
|
word
|
101
108
|
}
|
102
109
|
})
|
@@ -105,16 +112,17 @@ export function useWordClick({
|
|
105
112
|
type: 'gap',
|
106
113
|
data: {
|
107
114
|
...gap,
|
108
|
-
|
115
|
+
wordId,
|
109
116
|
word
|
110
117
|
}
|
111
118
|
})
|
112
119
|
} else if (!isReference) {
|
113
120
|
// Create synthetic gap for non-sequence words (transcription view only)
|
114
121
|
const syntheticGap: GapSequence = {
|
122
|
+
id: `synthetic-${wordId}`,
|
115
123
|
text: word,
|
116
124
|
words: [word],
|
117
|
-
|
125
|
+
word_ids: [wordId],
|
118
126
|
length: 1,
|
119
127
|
corrections: [],
|
120
128
|
preceding_anchor: null,
|
@@ -125,7 +133,7 @@ export function useWordClick({
|
|
125
133
|
type: 'gap',
|
126
134
|
data: {
|
127
135
|
...syntheticGap,
|
128
|
-
|
136
|
+
wordId,
|
129
137
|
word
|
130
138
|
}
|
131
139
|
})
|
@@ -6,7 +6,7 @@ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
|
|
6
6
|
|
7
7
|
// Common word click handling
|
8
8
|
export interface WordClickInfo {
|
9
|
-
|
9
|
+
word_id: string
|
10
10
|
type: 'anchor' | 'gap' | 'other'
|
11
11
|
anchor?: AnchorSequence
|
12
12
|
gap?: GapSequence
|
@@ -29,13 +29,13 @@ export interface BaseWordPosition {
|
|
29
29
|
|
30
30
|
// Transcription-specific word position with timing info
|
31
31
|
export interface TranscriptionWordPosition extends BaseWordPosition {
|
32
|
-
position: number
|
33
|
-
isInRange: boolean
|
34
32
|
word: {
|
33
|
+
id: string
|
35
34
|
text: string
|
36
35
|
start_time?: number
|
37
36
|
end_time?: number
|
38
37
|
}
|
38
|
+
isInRange: boolean
|
39
39
|
}
|
40
40
|
|
41
41
|
// Reference-specific word position with simple string word
|
@@ -81,8 +81,8 @@ export interface ReferenceViewProps extends BaseViewProps {
|
|
81
81
|
referenceTexts: Record<string, string>
|
82
82
|
anchors: LyricsData['anchor_sequences']
|
83
83
|
gaps: LyricsData['gap_sequences']
|
84
|
-
currentSource:
|
85
|
-
onSourceChange: (source:
|
84
|
+
currentSource: string
|
85
|
+
onSourceChange: (source: string) => void
|
86
86
|
corrected_segments: LyricsSegment[]
|
87
87
|
}
|
88
88
|
|
@@ -93,7 +93,7 @@ export interface HighlightedTextProps extends BaseViewProps {
|
|
93
93
|
anchors: AnchorSequence[]
|
94
94
|
gaps: GapSequence[]
|
95
95
|
isReference?: boolean
|
96
|
-
currentSource?:
|
96
|
+
currentSource?: string
|
97
97
|
preserveSegments?: boolean
|
98
98
|
linePositions?: LinePosition[]
|
99
99
|
}
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
|
2
|
+
import { nanoid } from 'nanoid';
|
3
|
+
|
4
|
+
// Define server-side types just for this file
|
5
|
+
interface ServerData {
|
6
|
+
transcription_position: number;
|
7
|
+
length: number;
|
8
|
+
words: string[];
|
9
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
10
|
+
[key: string]: any;
|
11
|
+
}
|
12
|
+
|
13
|
+
export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
|
14
|
+
// Create a deep clone to avoid modifying the original
|
15
|
+
const normalized = JSON.parse(JSON.stringify(data));
|
16
|
+
|
17
|
+
// Preserve floating point numbers with original precision
|
18
|
+
const preserveFloats = (obj: Record<string, unknown>): void => {
|
19
|
+
for (const key in obj) {
|
20
|
+
const value = obj[key];
|
21
|
+
if (typeof value === 'number') {
|
22
|
+
// Handle integers and floats differently
|
23
|
+
let formatted: string;
|
24
|
+
if (Number.isInteger(value)) {
|
25
|
+
formatted = value.toFixed(1); // Force decimal point for integers
|
26
|
+
} else {
|
27
|
+
formatted = value.toString(); // Keep original precision for floats
|
28
|
+
}
|
29
|
+
obj[key] = parseFloat(formatted);
|
30
|
+
} else if (typeof value === 'object' && value !== null) {
|
31
|
+
preserveFloats(value as Record<string, unknown>);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
};
|
35
|
+
preserveFloats(normalized);
|
36
|
+
return normalized;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Helper function to find word IDs for a sequence based on original positions
|
40
|
+
function findWordIdsForSequence(
|
41
|
+
segments: LyricsSegment[],
|
42
|
+
sequence: ServerData
|
43
|
+
): string[] {
|
44
|
+
const allWords = segments.flatMap(s => s.words);
|
45
|
+
const startIndex = sequence.transcription_position;
|
46
|
+
const endIndex = startIndex + sequence.length;
|
47
|
+
|
48
|
+
console.log('Finding word IDs for sequence:', JSON.stringify({
|
49
|
+
position: sequence.transcription_position,
|
50
|
+
length: sequence.length,
|
51
|
+
words: allWords.slice(startIndex, endIndex).map(w => w.text)
|
52
|
+
}));
|
53
|
+
|
54
|
+
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
|
+
}
|
56
|
+
|
57
|
+
// Helper function to find word ID for a correction
|
58
|
+
function findWordIdForCorrection(
|
59
|
+
segments: LyricsSegment[],
|
60
|
+
correction: { original_word: string; }
|
61
|
+
): string {
|
62
|
+
for (const segment of segments) {
|
63
|
+
const word = segment.words.find(w => w.text === correction.original_word);
|
64
|
+
if (word) return word.id;
|
65
|
+
}
|
66
|
+
return nanoid(); // Fallback if word not found
|
67
|
+
}
|
68
|
+
|
69
|
+
// Helper function to find word IDs in reference text
|
70
|
+
function findReferenceWordIds(
|
71
|
+
referenceSource: string,
|
72
|
+
sequence: ServerData
|
73
|
+
): string[] {
|
74
|
+
const referencePosition = sequence.reference_positions?.[referenceSource];
|
75
|
+
if (typeof referencePosition !== 'number') {
|
76
|
+
return [];
|
77
|
+
}
|
78
|
+
|
79
|
+
// Generate IDs in the same format as HighlightedText
|
80
|
+
const wordIds = Array.from({ length: sequence.length },
|
81
|
+
(_, i) => `${referenceSource}-word-${referencePosition + i}`
|
82
|
+
);
|
83
|
+
|
84
|
+
return wordIds;
|
85
|
+
}
|
86
|
+
|
87
|
+
export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
88
|
+
const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
|
89
|
+
|
90
|
+
// Initialize segment and word IDs
|
91
|
+
newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
|
92
|
+
...segment,
|
93
|
+
id: segment.id || nanoid(),
|
94
|
+
words: segment.words.map((word: Word) => ({
|
95
|
+
...word,
|
96
|
+
id: word.id || nanoid()
|
97
|
+
}))
|
98
|
+
}));
|
99
|
+
|
100
|
+
console.log('Segments after ID initialization:', JSON.stringify({
|
101
|
+
segmentCount: newData.corrected_segments.length,
|
102
|
+
totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
|
103
|
+
sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
|
104
|
+
}));
|
105
|
+
|
106
|
+
// Update anchor sequences with word IDs based on positions
|
107
|
+
newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
|
108
|
+
const serverAnchor = anchor as unknown as ServerData;
|
109
|
+
|
110
|
+
// Get reference word IDs for each source
|
111
|
+
const referenceWordIds: Record<string, string[]> = {};
|
112
|
+
Object.keys(data.reference_texts || {}).forEach(source => {
|
113
|
+
referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
|
114
|
+
});
|
115
|
+
|
116
|
+
console.log('Processing anchor with references:', JSON.stringify({
|
117
|
+
words: anchor.words,
|
118
|
+
reference_positions: serverAnchor.reference_positions,
|
119
|
+
reference_word_ids: referenceWordIds
|
120
|
+
}));
|
121
|
+
|
122
|
+
return {
|
123
|
+
...anchor,
|
124
|
+
id: anchor.id || nanoid(),
|
125
|
+
word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
|
126
|
+
reference_word_ids: referenceWordIds
|
127
|
+
} as AnchorSequence;
|
128
|
+
});
|
129
|
+
|
130
|
+
// Update gap sequences to use word IDs
|
131
|
+
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
132
|
+
const serverGap = gap as unknown as ServerData;
|
133
|
+
return {
|
134
|
+
...gap,
|
135
|
+
id: gap.id || nanoid(),
|
136
|
+
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
137
|
+
corrections: gap.corrections.map((correction: WordCorrection) => ({
|
138
|
+
...correction,
|
139
|
+
id: correction.id || nanoid(),
|
140
|
+
word_id: correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction)
|
141
|
+
}))
|
142
|
+
} as GapSequence;
|
143
|
+
});
|
144
|
+
|
145
|
+
return newData;
|
146
|
+
}
|
@@ -4,40 +4,37 @@ import { LinePosition } from '../types'
|
|
4
4
|
export function calculateReferenceLinePositions(
|
5
5
|
corrected_segments: LyricsSegment[],
|
6
6
|
anchors: LyricsData['anchor_sequences'],
|
7
|
-
currentSource:
|
7
|
+
currentSource: string
|
8
8
|
): { linePositions: LinePosition[] } {
|
9
9
|
const linePositions: LinePosition[] = []
|
10
10
|
let currentReferencePosition = 0
|
11
11
|
|
12
12
|
// First, find all anchor sequences that cover entire lines
|
13
|
-
const fullLineAnchors = anchors
|
14
|
-
|
15
|
-
if (
|
13
|
+
const fullLineAnchors = anchors?.map(anchor => {
|
14
|
+
// Add null checks for anchor and reference_word_ids
|
15
|
+
if (!anchor?.reference_word_ids?.[currentSource]) return null
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
referenceLength: anchor.length,
|
20
|
-
transcriptionLine: corrected_segments.findIndex((segment, segmentIndex) => {
|
21
|
-
const words = segment.words
|
22
|
-
if (!words.length) return false
|
23
|
-
|
24
|
-
// Calculate the absolute position of the first and last words in this segment
|
25
|
-
let absolutePosition = 0
|
26
|
-
for (let i = 0; i < segmentIndex; i++) {
|
27
|
-
absolutePosition += corrected_segments[i].words.length
|
28
|
-
}
|
17
|
+
const referenceWordIds = anchor.reference_word_ids[currentSource]
|
18
|
+
if (!referenceWordIds?.length) return null
|
29
19
|
|
30
|
-
|
31
|
-
|
20
|
+
return {
|
21
|
+
referenceWordIds,
|
22
|
+
transcriptionLine: corrected_segments.findIndex((segment) => {
|
23
|
+
const wordIds = segment.words.map(w => w.id)
|
24
|
+
if (!wordIds.length) return false
|
32
25
|
|
33
|
-
|
34
|
-
|
26
|
+
// Check if all word IDs in this segment are part of the anchor
|
27
|
+
return wordIds.every(id => anchor.word_ids?.includes(id))
|
35
28
|
})
|
36
29
|
}
|
37
|
-
})
|
30
|
+
})?.filter((a): a is NonNullable<typeof a> => a !== null) ?? []
|
38
31
|
|
39
|
-
// Sort by reference
|
40
|
-
fullLineAnchors.sort((a, b) =>
|
32
|
+
// Sort by first reference word ID to process in order
|
33
|
+
fullLineAnchors.sort((a, b) => {
|
34
|
+
const firstIdA = a.referenceWordIds[0]
|
35
|
+
const firstIdB = b.referenceWordIds[0]
|
36
|
+
return firstIdA.localeCompare(firstIdB)
|
37
|
+
})
|
41
38
|
|
42
39
|
// Add line positions with padding
|
43
40
|
let currentLine = 0
|
@@ -55,10 +52,12 @@ export function calculateReferenceLinePositions(
|
|
55
52
|
|
56
53
|
// Add the actual line position
|
57
54
|
linePositions.push({
|
58
|
-
position:
|
59
|
-
lineNumber: currentLine
|
55
|
+
position: currentReferencePosition,
|
56
|
+
lineNumber: currentLine,
|
57
|
+
isEmpty: false
|
60
58
|
})
|
61
59
|
currentLine++
|
60
|
+
currentReferencePosition++
|
62
61
|
})
|
63
62
|
|
64
63
|
// Add any remaining lines after the last anchor
|