lyrics-transcriber 0.36.1__py3-none-any.whl → 0.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +22 -2
- lyrics_transcriber/correction/corrector.py +8 -8
- lyrics_transcriber/correction/handlers/base.py +4 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +9 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
- lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
- lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
- lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +182 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +1 -2
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +76 -70
- lyrics_transcriber/frontend/src/components/EditModal.tsx +10 -2
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +128 -125
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -3
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +24 -12
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +8 -15
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +34 -52
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +39 -31
- lyrics_transcriber/frontend/src/components/shared/types.ts +3 -3
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +146 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +23 -24
- lyrics_transcriber/frontend/src/types.ts +25 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/output/cdg.py +32 -6
- lyrics_transcriber/output/video.py +17 -7
- lyrics_transcriber/review/server.py +24 -8
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/METADATA +1 -1
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/RECORD +33 -33
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/frontend/dist/assets/index-ztlAYPYT.js +0 -181
- lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/WHEEL +0 -0
@@ -14,7 +14,6 @@ export interface HighlightedTextProps {
|
|
14
14
|
wordPositions?: TranscriptionWordPosition[]
|
15
15
|
// Common props
|
16
16
|
anchors: AnchorSequence[]
|
17
|
-
gaps: GapSequence[]
|
18
17
|
highlightInfo: HighlightInfo | null
|
19
18
|
mode: InteractionMode
|
20
19
|
onElementClick: (content: ModalContent) => void
|
@@ -51,45 +50,37 @@ export function HighlightedText({
|
|
51
50
|
currentSource
|
52
51
|
})
|
53
52
|
|
54
|
-
const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string;
|
53
|
+
const shouldWordFlash = (wordPos: TranscriptionWordPosition | { word: string; id: string }): boolean => {
|
55
54
|
if (!flashingType) return false
|
56
55
|
|
57
56
|
if ('type' in wordPos) {
|
58
57
|
// Handle TranscriptionWordPosition
|
59
|
-
const
|
60
|
-
|
58
|
+
const gap = wordPos.sequence as GapSequence
|
59
|
+
const isCorrected = wordPos.type === 'gap' &&
|
60
|
+
gap?.corrections?.some(correction =>
|
61
|
+
correction.word_id === wordPos.word.id
|
62
|
+
)
|
61
63
|
|
62
64
|
return Boolean(
|
63
65
|
(flashingType === 'anchor' && wordPos.type === 'anchor') ||
|
64
|
-
(flashingType === 'corrected' &&
|
65
|
-
(flashingType === 'uncorrected' && wordPos.type === 'gap' && !
|
66
|
+
(flashingType === 'corrected' && isCorrected) ||
|
67
|
+
(flashingType === 'uncorrected' && wordPos.type === 'gap' && !isCorrected) ||
|
66
68
|
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
67
|
-
wordPos.type === 'anchor' && wordPos.sequence &&
|
68
|
-
|
69
|
-
(isReference && currentSource &&
|
70
|
-
(wordPos.sequence as AnchorSequence).reference_positions[currentSource as keyof typeof highlightInfo.referenceIndices] ===
|
71
|
-
highlightInfo.referenceIndices?.[currentSource as keyof typeof highlightInfo.referenceIndices])
|
72
|
-
))
|
69
|
+
wordPos.type === 'anchor' && wordPos.sequence &&
|
70
|
+
highlightInfo.word_ids?.includes(wordPos.word.id))
|
73
71
|
)
|
74
72
|
} else {
|
75
73
|
// Handle reference word
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
if (position === undefined) return false
|
82
|
-
return thisWordIndex >= position && thisWordIndex < position + a.length
|
83
|
-
})
|
74
|
+
if (!currentSource) return false
|
75
|
+
|
76
|
+
const anchor = anchors?.find(a =>
|
77
|
+
a?.reference_word_ids?.[currentSource]?.includes(wordPos.id)
|
78
|
+
)
|
84
79
|
|
85
80
|
return Boolean(
|
86
81
|
(flashingType === 'anchor' && anchor) ||
|
87
|
-
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
88
|
-
|
89
|
-
(isReference && currentSource &&
|
90
|
-
anchor.reference_positions[currentSource as keyof typeof highlightInfo.referenceIndices] ===
|
91
|
-
highlightInfo.referenceIndices?.[currentSource as keyof typeof highlightInfo.referenceIndices])
|
92
|
-
))
|
82
|
+
(flashingType === 'word' && highlightInfo?.type === 'anchor' &&
|
83
|
+
highlightInfo.reference_word_ids?.[currentSource]?.includes(wordPos.id))
|
93
84
|
)
|
94
85
|
}
|
95
86
|
}
|
@@ -106,7 +97,7 @@ export function HighlightedText({
|
|
106
97
|
const renderContent = () => {
|
107
98
|
if (wordPositions) {
|
108
99
|
return wordPositions.map((wordPos, index) => (
|
109
|
-
<React.Fragment key={
|
100
|
+
<React.Fragment key={wordPos.word.id}>
|
110
101
|
<Word
|
111
102
|
word={wordPos.word.text}
|
112
103
|
shouldFlash={shouldWordFlash(wordPos)}
|
@@ -116,7 +107,7 @@ export function HighlightedText({
|
|
116
107
|
isUncorrectedGap={wordPos.type === 'gap' && !(wordPos.sequence as GapSequence)?.corrections?.length}
|
117
108
|
onClick={() => handleWordClick(
|
118
109
|
wordPos.word.text,
|
119
|
-
wordPos.
|
110
|
+
wordPos.word.id,
|
120
111
|
wordPos.type === 'anchor' ? wordPos.sequence as AnchorSequence : undefined,
|
121
112
|
wordPos.type === 'gap' ? wordPos.sequence as GapSequence : undefined
|
122
113
|
)}
|
@@ -126,12 +117,12 @@ export function HighlightedText({
|
|
126
117
|
))
|
127
118
|
} else if (text) {
|
128
119
|
const lines = text.split('\n')
|
129
|
-
let
|
120
|
+
let wordCount = 0
|
130
121
|
|
131
122
|
return lines.map((line, lineIndex) => {
|
132
|
-
const currentLinePosition = linePositions?.find(
|
123
|
+
const currentLinePosition = linePositions?.find(pos => pos.position === wordCount)
|
133
124
|
if (currentLinePosition?.isEmpty) {
|
134
|
-
|
125
|
+
wordCount++
|
135
126
|
return (
|
136
127
|
<Box key={`empty-${lineIndex}`} sx={{ display: 'flex', alignItems: 'flex-start' }}>
|
137
128
|
<Typography
|
@@ -171,7 +162,7 @@ export function HighlightedText({
|
|
171
162
|
paddingTop: '4px',
|
172
163
|
}}
|
173
164
|
>
|
174
|
-
{lineIndex}
|
165
|
+
{currentLinePosition?.lineNumber ?? lineIndex}
|
175
166
|
</Typography>
|
176
167
|
<IconButton
|
177
168
|
size="small"
|
@@ -192,32 +183,24 @@ export function HighlightedText({
|
|
192
183
|
return <span key={`space-${lineIndex}-${wordIndex}`}> </span>
|
193
184
|
}
|
194
185
|
|
195
|
-
|
196
|
-
const
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
const wordPos: TranscriptionWordPosition = {
|
204
|
-
word: { text: word },
|
205
|
-
position,
|
206
|
-
type: anchor ? 'anchor' : 'other',
|
207
|
-
sequence: anchor,
|
208
|
-
isInRange: true
|
209
|
-
}
|
186
|
+
// Generate word ID based on position in the reference text
|
187
|
+
const wordId = `${currentSource}-word-${wordCount}`
|
188
|
+
wordCount++
|
189
|
+
|
190
|
+
// Find if this word is part of any anchor sequence
|
191
|
+
const anchor = currentSource ? anchors?.find(a =>
|
192
|
+
a?.reference_word_ids?.[currentSource]?.includes(wordId)
|
193
|
+
) : undefined
|
210
194
|
|
211
195
|
return (
|
212
196
|
<Word
|
213
|
-
key={
|
197
|
+
key={wordId}
|
214
198
|
word={word}
|
215
|
-
shouldFlash={shouldWordFlash({ word,
|
216
|
-
isCurrentlyPlaying={shouldHighlightWord(wordPos)}
|
199
|
+
shouldFlash={shouldWordFlash({ word, id: wordId })}
|
217
200
|
isAnchor={Boolean(anchor)}
|
218
201
|
isCorrectedGap={false}
|
219
202
|
isUncorrectedGap={false}
|
220
|
-
onClick={() => handleWordClick(word,
|
203
|
+
onClick={() => handleWordClick(word, wordId, anchor, undefined)}
|
221
204
|
/>
|
222
205
|
)
|
223
206
|
})}
|
@@ -226,7 +209,6 @@ export function HighlightedText({
|
|
226
209
|
)
|
227
210
|
})
|
228
211
|
}
|
229
|
-
|
230
212
|
return null
|
231
213
|
}
|
232
214
|
|
@@ -3,6 +3,16 @@ import { AnchorSequence, GapSequence, InteractionMode } from '../../../types'
|
|
3
3
|
import { ModalContent } from '../../LyricsAnalyzer'
|
4
4
|
import { WordClickInfo } from '../types'
|
5
5
|
|
6
|
+
// Define debug info type
|
7
|
+
interface WordDebugInfo {
|
8
|
+
wordSplitInfo?: {
|
9
|
+
text: string
|
10
|
+
startIndex: number
|
11
|
+
endIndex: number
|
12
|
+
}
|
13
|
+
nearbyAnchors?: AnchorSequence[]
|
14
|
+
}
|
15
|
+
|
6
16
|
export interface UseWordClickProps {
|
7
17
|
mode: InteractionMode
|
8
18
|
onElementClick: (content: ModalContent) => void
|
@@ -20,72 +30,69 @@ export function useWordClick({
|
|
20
30
|
}: UseWordClickProps) {
|
21
31
|
const handleWordClick = useCallback((
|
22
32
|
word: string,
|
23
|
-
|
33
|
+
wordId: string,
|
24
34
|
anchor?: AnchorSequence,
|
25
35
|
gap?: GapSequence,
|
26
|
-
debugInfo?:
|
36
|
+
debugInfo?: WordDebugInfo
|
27
37
|
) => {
|
28
38
|
console.log(JSON.stringify({
|
29
39
|
debug: {
|
30
40
|
clickedWord: word,
|
31
|
-
|
41
|
+
wordId,
|
32
42
|
isReference,
|
33
43
|
currentSource,
|
34
44
|
wordInfo: debugInfo?.wordSplitInfo,
|
35
45
|
nearbyAnchors: debugInfo?.nearbyAnchors,
|
36
46
|
anchorInfo: anchor && {
|
37
|
-
|
47
|
+
wordIds: anchor.word_ids,
|
38
48
|
length: anchor.length,
|
39
49
|
words: anchor.words,
|
40
|
-
|
50
|
+
referenceWordIds: anchor.reference_word_ids
|
41
51
|
},
|
42
52
|
gapInfo: gap && {
|
43
|
-
|
53
|
+
wordIds: gap.word_ids,
|
44
54
|
length: gap.length,
|
45
55
|
words: gap.words,
|
46
56
|
corrections: gap.corrections.map(c => ({
|
57
|
+
original_word: c.original_word,
|
58
|
+
corrected_word: c.corrected_word,
|
59
|
+
word_id: c.word_id,
|
47
60
|
length: c.length,
|
48
|
-
|
61
|
+
is_deletion: c.is_deletion,
|
62
|
+
split_index: c.split_index,
|
63
|
+
split_total: c.split_total
|
49
64
|
}))
|
50
65
|
},
|
51
66
|
belongsToAnchor: anchor && (
|
52
67
|
isReference
|
53
|
-
?
|
54
|
-
|
55
|
-
: position >= anchor.transcription_position &&
|
56
|
-
position < (anchor.transcription_position + anchor.length)
|
68
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
69
|
+
: anchor.word_ids.includes(wordId)
|
57
70
|
),
|
58
71
|
belongsToGap: gap && (
|
59
72
|
isReference
|
60
|
-
? gap.corrections
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
)
|
73
|
+
? gap.corrections.some(c => c.word_id === wordId)
|
74
|
+
: gap.word_ids.includes(wordId)
|
75
|
+
),
|
76
|
+
wordIndexInGap: gap && gap.words.indexOf(word),
|
77
|
+
hasMatchingCorrection: gap && gap.corrections.some(c => c.word_id === wordId)
|
66
78
|
}
|
67
79
|
}, null, 2))
|
68
80
|
|
69
81
|
const belongsToAnchor = anchor && (
|
70
82
|
isReference
|
71
|
-
?
|
72
|
-
|
73
|
-
: position >= anchor.transcription_position &&
|
74
|
-
position < (anchor.transcription_position + anchor.length)
|
83
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
84
|
+
: anchor.word_ids.includes(wordId)
|
75
85
|
)
|
76
86
|
|
77
87
|
const belongsToGap = gap && (
|
78
88
|
isReference
|
79
|
-
? gap.corrections
|
80
|
-
|
81
|
-
position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
|
82
|
-
: position >= gap.transcription_position &&
|
83
|
-
position < (gap.transcription_position + gap.length)
|
89
|
+
? gap.corrections.some(c => c.word_id === wordId)
|
90
|
+
: gap.word_ids.includes(wordId)
|
84
91
|
)
|
85
92
|
|
86
93
|
if (mode === 'highlight' || mode === 'edit') {
|
87
94
|
onWordClick?.({
|
88
|
-
|
95
|
+
word_id: wordId,
|
89
96
|
type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
|
90
97
|
anchor: belongsToAnchor ? anchor : undefined,
|
91
98
|
gap: belongsToGap ? gap : undefined
|
@@ -96,7 +103,7 @@ export function useWordClick({
|
|
96
103
|
type: 'anchor',
|
97
104
|
data: {
|
98
105
|
...anchor,
|
99
|
-
|
106
|
+
wordId,
|
100
107
|
word
|
101
108
|
}
|
102
109
|
})
|
@@ -105,16 +112,17 @@ export function useWordClick({
|
|
105
112
|
type: 'gap',
|
106
113
|
data: {
|
107
114
|
...gap,
|
108
|
-
|
115
|
+
wordId,
|
109
116
|
word
|
110
117
|
}
|
111
118
|
})
|
112
119
|
} else if (!isReference) {
|
113
120
|
// Create synthetic gap for non-sequence words (transcription view only)
|
114
121
|
const syntheticGap: GapSequence = {
|
122
|
+
id: `synthetic-${wordId}`,
|
115
123
|
text: word,
|
116
124
|
words: [word],
|
117
|
-
|
125
|
+
word_ids: [wordId],
|
118
126
|
length: 1,
|
119
127
|
corrections: [],
|
120
128
|
preceding_anchor: null,
|
@@ -125,7 +133,7 @@ export function useWordClick({
|
|
125
133
|
type: 'gap',
|
126
134
|
data: {
|
127
135
|
...syntheticGap,
|
128
|
-
|
136
|
+
wordId,
|
129
137
|
word
|
130
138
|
}
|
131
139
|
})
|
@@ -6,7 +6,7 @@ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
|
|
6
6
|
|
7
7
|
// Common word click handling
|
8
8
|
export interface WordClickInfo {
|
9
|
-
|
9
|
+
word_id: string
|
10
10
|
type: 'anchor' | 'gap' | 'other'
|
11
11
|
anchor?: AnchorSequence
|
12
12
|
gap?: GapSequence
|
@@ -29,13 +29,13 @@ export interface BaseWordPosition {
|
|
29
29
|
|
30
30
|
// Transcription-specific word position with timing info
|
31
31
|
export interface TranscriptionWordPosition extends BaseWordPosition {
|
32
|
-
position: number
|
33
|
-
isInRange: boolean
|
34
32
|
word: {
|
33
|
+
id: string
|
35
34
|
text: string
|
36
35
|
start_time?: number
|
37
36
|
end_time?: number
|
38
37
|
}
|
38
|
+
isInRange: boolean
|
39
39
|
}
|
40
40
|
|
41
41
|
// Reference-specific word position with simple string word
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
|
2
|
+
import { nanoid } from 'nanoid';
|
3
|
+
|
4
|
+
// Define server-side types just for this file
|
5
|
+
interface ServerData {
|
6
|
+
transcription_position: number;
|
7
|
+
length: number;
|
8
|
+
words: string[];
|
9
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
10
|
+
[key: string]: any;
|
11
|
+
}
|
12
|
+
|
13
|
+
export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
|
14
|
+
// Create a deep clone to avoid modifying the original
|
15
|
+
const normalized = JSON.parse(JSON.stringify(data));
|
16
|
+
|
17
|
+
// Preserve floating point numbers with original precision
|
18
|
+
const preserveFloats = (obj: Record<string, unknown>): void => {
|
19
|
+
for (const key in obj) {
|
20
|
+
const value = obj[key];
|
21
|
+
if (typeof value === 'number') {
|
22
|
+
// Handle integers and floats differently
|
23
|
+
let formatted: string;
|
24
|
+
if (Number.isInteger(value)) {
|
25
|
+
formatted = value.toFixed(1); // Force decimal point for integers
|
26
|
+
} else {
|
27
|
+
formatted = value.toString(); // Keep original precision for floats
|
28
|
+
}
|
29
|
+
obj[key] = parseFloat(formatted);
|
30
|
+
} else if (typeof value === 'object' && value !== null) {
|
31
|
+
preserveFloats(value as Record<string, unknown>);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
};
|
35
|
+
preserveFloats(normalized);
|
36
|
+
return normalized;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Helper function to find word IDs for a sequence based on original positions
|
40
|
+
function findWordIdsForSequence(
|
41
|
+
segments: LyricsSegment[],
|
42
|
+
sequence: ServerData
|
43
|
+
): string[] {
|
44
|
+
const allWords = segments.flatMap(s => s.words);
|
45
|
+
const startIndex = sequence.transcription_position;
|
46
|
+
const endIndex = startIndex + sequence.length;
|
47
|
+
|
48
|
+
console.log('Finding word IDs for sequence:', JSON.stringify({
|
49
|
+
position: sequence.transcription_position,
|
50
|
+
length: sequence.length,
|
51
|
+
words: allWords.slice(startIndex, endIndex).map(w => w.text)
|
52
|
+
}));
|
53
|
+
|
54
|
+
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
|
+
}
|
56
|
+
|
57
|
+
// Helper function to find word ID for a correction
|
58
|
+
function findWordIdForCorrection(
|
59
|
+
segments: LyricsSegment[],
|
60
|
+
correction: { original_word: string; }
|
61
|
+
): string {
|
62
|
+
for (const segment of segments) {
|
63
|
+
const word = segment.words.find(w => w.text === correction.original_word);
|
64
|
+
if (word) return word.id;
|
65
|
+
}
|
66
|
+
return nanoid(); // Fallback if word not found
|
67
|
+
}
|
68
|
+
|
69
|
+
// Helper function to find word IDs in reference text
|
70
|
+
function findReferenceWordIds(
|
71
|
+
referenceSource: string,
|
72
|
+
sequence: ServerData
|
73
|
+
): string[] {
|
74
|
+
const referencePosition = sequence.reference_positions?.[referenceSource];
|
75
|
+
if (typeof referencePosition !== 'number') {
|
76
|
+
return [];
|
77
|
+
}
|
78
|
+
|
79
|
+
// Generate IDs in the same format as HighlightedText
|
80
|
+
const wordIds = Array.from({ length: sequence.length },
|
81
|
+
(_, i) => `${referenceSource}-word-${referencePosition + i}`
|
82
|
+
);
|
83
|
+
|
84
|
+
return wordIds;
|
85
|
+
}
|
86
|
+
|
87
|
+
export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
88
|
+
const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
|
89
|
+
|
90
|
+
// Initialize segment and word IDs
|
91
|
+
newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
|
92
|
+
...segment,
|
93
|
+
id: segment.id || nanoid(),
|
94
|
+
words: segment.words.map((word: Word) => ({
|
95
|
+
...word,
|
96
|
+
id: word.id || nanoid()
|
97
|
+
}))
|
98
|
+
}));
|
99
|
+
|
100
|
+
console.log('Segments after ID initialization:', JSON.stringify({
|
101
|
+
segmentCount: newData.corrected_segments.length,
|
102
|
+
totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
|
103
|
+
sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
|
104
|
+
}));
|
105
|
+
|
106
|
+
// Update anchor sequences with word IDs based on positions
|
107
|
+
newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
|
108
|
+
const serverAnchor = anchor as unknown as ServerData;
|
109
|
+
|
110
|
+
// Get reference word IDs for each source
|
111
|
+
const referenceWordIds: Record<string, string[]> = {};
|
112
|
+
Object.keys(data.reference_texts || {}).forEach(source => {
|
113
|
+
referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
|
114
|
+
});
|
115
|
+
|
116
|
+
console.log('Processing anchor with references:', JSON.stringify({
|
117
|
+
words: anchor.words,
|
118
|
+
reference_positions: serverAnchor.reference_positions,
|
119
|
+
reference_word_ids: referenceWordIds
|
120
|
+
}));
|
121
|
+
|
122
|
+
return {
|
123
|
+
...anchor,
|
124
|
+
id: anchor.id || nanoid(),
|
125
|
+
word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
|
126
|
+
reference_word_ids: referenceWordIds
|
127
|
+
} as AnchorSequence;
|
128
|
+
});
|
129
|
+
|
130
|
+
// Update gap sequences to use word IDs
|
131
|
+
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
132
|
+
const serverGap = gap as unknown as ServerData;
|
133
|
+
return {
|
134
|
+
...gap,
|
135
|
+
id: gap.id || nanoid(),
|
136
|
+
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
137
|
+
corrections: gap.corrections.map((correction: WordCorrection) => ({
|
138
|
+
...correction,
|
139
|
+
id: correction.id || nanoid(),
|
140
|
+
word_id: correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction)
|
141
|
+
}))
|
142
|
+
} as GapSequence;
|
143
|
+
});
|
144
|
+
|
145
|
+
return newData;
|
146
|
+
}
|
@@ -10,34 +10,31 @@ export function calculateReferenceLinePositions(
|
|
10
10
|
let currentReferencePosition = 0
|
11
11
|
|
12
12
|
// First, find all anchor sequences that cover entire lines
|
13
|
-
const fullLineAnchors = anchors
|
14
|
-
|
15
|
-
if (
|
13
|
+
const fullLineAnchors = anchors?.map(anchor => {
|
14
|
+
// Add null checks for anchor and reference_word_ids
|
15
|
+
if (!anchor?.reference_word_ids?.[currentSource]) return null
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
referenceLength: anchor.length,
|
20
|
-
transcriptionLine: corrected_segments.findIndex((segment, segmentIndex) => {
|
21
|
-
const words = segment.words
|
22
|
-
if (!words.length) return false
|
23
|
-
|
24
|
-
// Calculate the absolute position of the first and last words in this segment
|
25
|
-
let absolutePosition = 0
|
26
|
-
for (let i = 0; i < segmentIndex; i++) {
|
27
|
-
absolutePosition += corrected_segments[i].words.length
|
28
|
-
}
|
17
|
+
const referenceWordIds = anchor.reference_word_ids[currentSource]
|
18
|
+
if (!referenceWordIds?.length) return null
|
29
19
|
|
30
|
-
|
31
|
-
|
20
|
+
return {
|
21
|
+
referenceWordIds,
|
22
|
+
transcriptionLine: corrected_segments.findIndex((segment) => {
|
23
|
+
const wordIds = segment.words.map(w => w.id)
|
24
|
+
if (!wordIds.length) return false
|
32
25
|
|
33
|
-
|
34
|
-
|
26
|
+
// Check if all word IDs in this segment are part of the anchor
|
27
|
+
return wordIds.every(id => anchor.word_ids?.includes(id))
|
35
28
|
})
|
36
29
|
}
|
37
|
-
})
|
30
|
+
})?.filter((a): a is NonNullable<typeof a> => a !== null) ?? []
|
38
31
|
|
39
|
-
// Sort by reference
|
40
|
-
fullLineAnchors.sort((a, b) =>
|
32
|
+
// Sort by first reference word ID to process in order
|
33
|
+
fullLineAnchors.sort((a, b) => {
|
34
|
+
const firstIdA = a.referenceWordIds[0]
|
35
|
+
const firstIdB = b.referenceWordIds[0]
|
36
|
+
return firstIdA.localeCompare(firstIdB)
|
37
|
+
})
|
41
38
|
|
42
39
|
// Add line positions with padding
|
43
40
|
let currentLine = 0
|
@@ -55,10 +52,12 @@ export function calculateReferenceLinePositions(
|
|
55
52
|
|
56
53
|
// Add the actual line position
|
57
54
|
linePositions.push({
|
58
|
-
position:
|
59
|
-
lineNumber: currentLine
|
55
|
+
position: currentReferencePosition,
|
56
|
+
lineNumber: currentLine,
|
57
|
+
isEmpty: false
|
60
58
|
})
|
61
59
|
currentLine++
|
60
|
+
currentReferencePosition++
|
62
61
|
})
|
63
62
|
|
64
63
|
// Add any remaining lines after the last anchor
|
@@ -1,4 +1,5 @@
|
|
1
1
|
export interface Word {
|
2
|
+
id: string
|
2
3
|
text: string
|
3
4
|
start_time: number
|
4
5
|
end_time: number
|
@@ -6,6 +7,7 @@ export interface Word {
|
|
6
7
|
}
|
7
8
|
|
8
9
|
export interface LyricsSegment {
|
10
|
+
id: string
|
9
11
|
text: string
|
10
12
|
words: Word[]
|
11
13
|
start_time: number
|
@@ -13,10 +15,11 @@ export interface LyricsSegment {
|
|
13
15
|
}
|
14
16
|
|
15
17
|
export interface WordCorrection {
|
18
|
+
id: string
|
16
19
|
original_word: string
|
17
20
|
corrected_word: string
|
18
|
-
|
19
|
-
|
21
|
+
segment_id: string
|
22
|
+
word_id: string
|
20
23
|
source: string
|
21
24
|
confidence: number
|
22
25
|
reason: string
|
@@ -24,7 +27,7 @@ export interface WordCorrection {
|
|
24
27
|
is_deletion: boolean
|
25
28
|
split_index?: number
|
26
29
|
split_total?: number
|
27
|
-
reference_positions?: Record<string,
|
30
|
+
reference_positions?: Record<string, string>
|
28
31
|
length: number
|
29
32
|
}
|
30
33
|
|
@@ -36,26 +39,35 @@ export interface PhraseScore {
|
|
36
39
|
}
|
37
40
|
|
38
41
|
export interface AnchorSequence {
|
42
|
+
id: string
|
39
43
|
words: string[]
|
40
44
|
text: string
|
41
45
|
length: number
|
42
|
-
|
43
|
-
|
46
|
+
word_ids: string[]
|
47
|
+
reference_word_ids: Record<string, string[]>
|
44
48
|
confidence: number
|
45
49
|
phrase_score: PhraseScore
|
46
50
|
total_score: number
|
47
51
|
}
|
48
52
|
|
53
|
+
export interface AnchorReference {
|
54
|
+
text: string
|
55
|
+
word_ids: string[]
|
56
|
+
confidence: number
|
57
|
+
}
|
58
|
+
|
49
59
|
export interface GapSequence {
|
50
|
-
|
60
|
+
id: string
|
51
61
|
text: string
|
62
|
+
words: string[]
|
63
|
+
word_ids: string[]
|
52
64
|
length: number
|
53
|
-
transcription_position: number
|
54
|
-
preceding_anchor: AnchorSequence | null
|
55
|
-
following_anchor: AnchorSequence | null
|
56
|
-
reference_words: Record<string, string[]>
|
57
|
-
reference_words_original?: Record<string, string[]>
|
58
65
|
corrections: WordCorrection[]
|
66
|
+
preceding_anchor: AnchorReference | null
|
67
|
+
following_anchor: AnchorReference | null
|
68
|
+
reference_words: {
|
69
|
+
[source: string]: string[]
|
70
|
+
}
|
59
71
|
}
|
60
72
|
|
61
73
|
export interface LyricsData {
|
@@ -98,10 +110,8 @@ export interface CorrectionData {
|
|
98
110
|
}
|
99
111
|
|
100
112
|
export interface HighlightInfo {
|
101
|
-
|
102
|
-
|
103
|
-
referenceIndices: Record<string, number>
|
104
|
-
referenceLength?: number
|
113
|
+
word_ids?: string[]
|
114
|
+
reference_word_ids?: Record<string, string[]>
|
105
115
|
type: 'single' | 'gap' | 'anchor'
|
106
116
|
}
|
107
117
|
|
@@ -1 +1 @@
|
|
1
|
-
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/
|
1
|
+
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/initializedatawithids.tsx","./src/components/shared/utils/referencelinecalculator.ts"],"version":"5.6.3"}
|