lyrics-transcriber 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +22 -2
- lyrics_transcriber/correction/corrector.py +8 -8
- lyrics_transcriber/correction/handlers/base.py +4 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +22 -2
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
- lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
- lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
- lyrics_transcriber/correction/handlers/word_operations.py +8 -2
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js +26696 -0
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +3 -2
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +1 -2
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +76 -70
- lyrics_transcriber/frontend/src/components/EditModal.tsx +11 -2
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +154 -128
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +42 -4
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +59 -15
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +71 -16
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +16 -19
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +72 -57
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +113 -41
- lyrics_transcriber/frontend/src/components/shared/types.ts +6 -3
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +202 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +23 -24
- lyrics_transcriber/frontend/src/types.ts +25 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +4 -0
- lyrics_transcriber/frontend/vite.config.ts +4 -0
- lyrics_transcriber/lyrics/genius.py +41 -12
- lyrics_transcriber/output/cdg.py +33 -6
- lyrics_transcriber/output/cdgmaker/composer.py +839 -534
- lyrics_transcriber/output/video.py +17 -7
- lyrics_transcriber/review/server.py +22 -8
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/METADATA +3 -2
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/RECORD +41 -40
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/frontend/dist/assets/index-ztlAYPYT.js +0 -181
- lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/WHEEL +0 -0
@@ -3,12 +3,23 @@ import { AnchorSequence, GapSequence, InteractionMode } from '../../../types'
|
|
3
3
|
import { ModalContent } from '../../LyricsAnalyzer'
|
4
4
|
import { WordClickInfo } from '../types'
|
5
5
|
|
6
|
+
// Define debug info type
|
7
|
+
interface WordDebugInfo {
|
8
|
+
wordSplitInfo?: {
|
9
|
+
text: string
|
10
|
+
startIndex: number
|
11
|
+
endIndex: number
|
12
|
+
}
|
13
|
+
nearbyAnchors?: AnchorSequence[]
|
14
|
+
}
|
15
|
+
|
6
16
|
export interface UseWordClickProps {
|
7
17
|
mode: InteractionMode
|
8
18
|
onElementClick: (content: ModalContent) => void
|
9
19
|
onWordClick?: (info: WordClickInfo) => void
|
10
20
|
isReference?: boolean
|
11
21
|
currentSource?: string
|
22
|
+
gaps?: GapSequence[]
|
12
23
|
}
|
13
24
|
|
14
25
|
export function useWordClick({
|
@@ -16,87 +27,147 @@ export function useWordClick({
|
|
16
27
|
onElementClick,
|
17
28
|
onWordClick,
|
18
29
|
isReference,
|
19
|
-
currentSource
|
30
|
+
currentSource,
|
31
|
+
gaps = []
|
20
32
|
}: UseWordClickProps) {
|
21
33
|
const handleWordClick = useCallback((
|
22
34
|
word: string,
|
23
|
-
|
35
|
+
wordId: string,
|
24
36
|
anchor?: AnchorSequence,
|
25
37
|
gap?: GapSequence,
|
26
|
-
debugInfo?:
|
38
|
+
debugInfo?: WordDebugInfo
|
27
39
|
) => {
|
28
40
|
console.log(JSON.stringify({
|
29
41
|
debug: {
|
30
42
|
clickedWord: word,
|
31
|
-
|
43
|
+
wordId,
|
32
44
|
isReference,
|
33
45
|
currentSource,
|
34
46
|
wordInfo: debugInfo?.wordSplitInfo,
|
35
47
|
nearbyAnchors: debugInfo?.nearbyAnchors,
|
36
48
|
anchorInfo: anchor && {
|
37
|
-
|
49
|
+
wordIds: anchor.word_ids,
|
38
50
|
length: anchor.length,
|
39
51
|
words: anchor.words,
|
40
|
-
|
52
|
+
referenceWordIds: anchor.reference_word_ids,
|
53
|
+
matchesWordId: isReference
|
54
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
55
|
+
: anchor.word_ids.includes(wordId)
|
41
56
|
},
|
42
57
|
gapInfo: gap && {
|
43
|
-
|
58
|
+
wordIds: gap.word_ids,
|
44
59
|
length: gap.length,
|
45
60
|
words: gap.words,
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
61
|
+
referenceWords: gap.reference_words,
|
62
|
+
corrections: gap.corrections,
|
63
|
+
matchesWordId: isReference
|
64
|
+
? gap.reference_words[currentSource!]?.includes(wordId)
|
65
|
+
: gap.word_ids.includes(wordId)
|
50
66
|
},
|
51
67
|
belongsToAnchor: anchor && (
|
52
68
|
isReference
|
53
|
-
?
|
54
|
-
|
55
|
-
: position >= anchor.transcription_position &&
|
56
|
-
position < (anchor.transcription_position + anchor.length)
|
69
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
70
|
+
: anchor.word_ids.includes(wordId)
|
57
71
|
),
|
58
72
|
belongsToGap: gap && (
|
59
73
|
isReference
|
60
|
-
? gap.corrections
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
)
|
74
|
+
? gap.corrections.some(c => c.word_id === wordId)
|
75
|
+
: gap.word_ids.includes(wordId)
|
76
|
+
),
|
77
|
+
wordIndexInGap: gap && gap.words.indexOf(word),
|
78
|
+
hasMatchingCorrection: gap && gap.corrections.some(c => c.word_id === wordId)
|
66
79
|
}
|
67
80
|
}, null, 2))
|
68
81
|
|
82
|
+
// For reference view clicks, find the corresponding gap
|
83
|
+
if (isReference && currentSource) {
|
84
|
+
// Extract position from wordId (e.g., "genius-word-3" -> 3)
|
85
|
+
const position = parseInt(wordId.split('-').pop() || '', 10);
|
86
|
+
|
87
|
+
// Find gap that has a correction matching this reference position
|
88
|
+
const matchingGap = gaps?.find(g =>
|
89
|
+
g.corrections.some(c => {
|
90
|
+
const refPosition = c.reference_positions?.[currentSource];
|
91
|
+
return typeof refPosition === 'number' && refPosition === position;
|
92
|
+
})
|
93
|
+
);
|
94
|
+
|
95
|
+
if (matchingGap) {
|
96
|
+
console.log('Found matching gap for reference click:', {
|
97
|
+
position,
|
98
|
+
gap: matchingGap
|
99
|
+
});
|
100
|
+
gap = matchingGap;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
69
104
|
const belongsToAnchor = anchor && (
|
70
105
|
isReference
|
71
|
-
?
|
72
|
-
|
73
|
-
: position >= anchor.transcription_position &&
|
74
|
-
position < (anchor.transcription_position + anchor.length)
|
106
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
107
|
+
: anchor.word_ids.includes(wordId)
|
75
108
|
)
|
76
109
|
|
77
110
|
const belongsToGap = gap && (
|
78
111
|
isReference
|
79
|
-
? gap.corrections
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
112
|
+
? gap.corrections.some(c => {
|
113
|
+
const refPosition = c.reference_positions?.[currentSource!];
|
114
|
+
const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
|
115
|
+
return typeof refPosition === 'number' && refPosition === clickedPosition;
|
116
|
+
})
|
117
|
+
: gap.word_ids.includes(wordId)
|
84
118
|
)
|
85
119
|
|
86
120
|
if (mode === 'highlight' || mode === 'edit') {
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
121
|
+
if (belongsToAnchor && anchor) {
|
122
|
+
onWordClick?.({
|
123
|
+
word_id: wordId,
|
124
|
+
type: 'anchor',
|
125
|
+
anchor,
|
126
|
+
gap: undefined
|
127
|
+
})
|
128
|
+
} else if (belongsToGap && gap) {
|
129
|
+
// Create highlight info that includes both transcription and reference IDs
|
130
|
+
const referenceWords: Record<string, string[]> = {};
|
131
|
+
|
132
|
+
// For each correction in the gap, add its reference positions
|
133
|
+
gap.corrections.forEach(correction => {
|
134
|
+
Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
|
135
|
+
if (typeof position === 'number') {
|
136
|
+
const refId = `${source}-word-${position}`;
|
137
|
+
if (!referenceWords[source]) {
|
138
|
+
referenceWords[source] = [];
|
139
|
+
}
|
140
|
+
if (!referenceWords[source].includes(refId)) {
|
141
|
+
referenceWords[source].push(refId);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
});
|
145
|
+
});
|
146
|
+
|
147
|
+
onWordClick?.({
|
148
|
+
word_id: wordId,
|
149
|
+
type: 'gap',
|
150
|
+
anchor: undefined,
|
151
|
+
gap: {
|
152
|
+
...gap,
|
153
|
+
reference_words: referenceWords // Use reference_words instead of reference_word_ids
|
154
|
+
}
|
155
|
+
})
|
156
|
+
} else {
|
157
|
+
onWordClick?.({
|
158
|
+
word_id: wordId,
|
159
|
+
type: 'other',
|
160
|
+
anchor: undefined,
|
161
|
+
gap: undefined
|
162
|
+
})
|
163
|
+
}
|
93
164
|
} else if (mode === 'details') {
|
94
165
|
if (belongsToAnchor && anchor) {
|
95
166
|
onElementClick({
|
96
167
|
type: 'anchor',
|
97
168
|
data: {
|
98
169
|
...anchor,
|
99
|
-
|
170
|
+
wordId,
|
100
171
|
word
|
101
172
|
}
|
102
173
|
})
|
@@ -105,16 +176,17 @@ export function useWordClick({
|
|
105
176
|
type: 'gap',
|
106
177
|
data: {
|
107
178
|
...gap,
|
108
|
-
|
179
|
+
wordId,
|
109
180
|
word
|
110
181
|
}
|
111
182
|
})
|
112
183
|
} else if (!isReference) {
|
113
184
|
// Create synthetic gap for non-sequence words (transcription view only)
|
114
185
|
const syntheticGap: GapSequence = {
|
186
|
+
id: `synthetic-${wordId}`,
|
115
187
|
text: word,
|
116
188
|
words: [word],
|
117
|
-
|
189
|
+
word_ids: [wordId],
|
118
190
|
length: 1,
|
119
191
|
corrections: [],
|
120
192
|
preceding_anchor: null,
|
@@ -125,13 +197,13 @@ export function useWordClick({
|
|
125
197
|
type: 'gap',
|
126
198
|
data: {
|
127
199
|
...syntheticGap,
|
128
|
-
|
200
|
+
wordId,
|
129
201
|
word
|
130
202
|
}
|
131
203
|
})
|
132
204
|
}
|
133
205
|
}
|
134
|
-
}, [mode, onWordClick, onElementClick, isReference, currentSource])
|
206
|
+
}, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
|
135
207
|
|
136
208
|
return { handleWordClick }
|
137
209
|
}
|
@@ -6,7 +6,7 @@ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
|
|
6
6
|
|
7
7
|
// Common word click handling
|
8
8
|
export interface WordClickInfo {
|
9
|
-
|
9
|
+
word_id: string
|
10
10
|
type: 'anchor' | 'gap' | 'other'
|
11
11
|
anchor?: AnchorSequence
|
12
12
|
gap?: GapSequence
|
@@ -29,13 +29,16 @@ export interface BaseWordPosition {
|
|
29
29
|
|
30
30
|
// Transcription-specific word position with timing info
|
31
31
|
export interface TranscriptionWordPosition extends BaseWordPosition {
|
32
|
-
position: number
|
33
|
-
isInRange: boolean
|
34
32
|
word: {
|
33
|
+
id: string
|
35
34
|
text: string
|
36
35
|
start_time?: number
|
37
36
|
end_time?: number
|
38
37
|
}
|
38
|
+
type: 'anchor' | 'gap' | 'other'
|
39
|
+
sequence?: AnchorSequence | GapSequence
|
40
|
+
isInRange: boolean
|
41
|
+
isCorrected?: boolean
|
39
42
|
}
|
40
43
|
|
41
44
|
// Reference-specific word position with simple string word
|
@@ -0,0 +1,202 @@
|
|
1
|
+
import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
|
2
|
+
import { nanoid } from 'nanoid';
|
3
|
+
|
4
|
+
// Define server-side types just for this file
|
5
|
+
interface ServerData {
|
6
|
+
transcription_position: number;
|
7
|
+
length: number;
|
8
|
+
words: string[];
|
9
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
10
|
+
[key: string]: any;
|
11
|
+
}
|
12
|
+
|
13
|
+
export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
|
14
|
+
// Create a deep clone to avoid modifying the original
|
15
|
+
const normalized = JSON.parse(JSON.stringify(data));
|
16
|
+
|
17
|
+
// Preserve floating point numbers with original precision
|
18
|
+
const preserveFloats = (obj: Record<string, unknown>): void => {
|
19
|
+
for (const key in obj) {
|
20
|
+
const value = obj[key];
|
21
|
+
if (typeof value === 'number') {
|
22
|
+
// Handle integers and floats differently
|
23
|
+
let formatted: string;
|
24
|
+
if (Number.isInteger(value)) {
|
25
|
+
formatted = value.toFixed(1); // Force decimal point for integers
|
26
|
+
} else {
|
27
|
+
formatted = value.toString(); // Keep original precision for floats
|
28
|
+
}
|
29
|
+
obj[key] = parseFloat(formatted);
|
30
|
+
} else if (typeof value === 'object' && value !== null) {
|
31
|
+
preserveFloats(value as Record<string, unknown>);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
};
|
35
|
+
preserveFloats(normalized);
|
36
|
+
return normalized;
|
37
|
+
}
|
38
|
+
|
39
|
+
// Helper function to find word IDs for a sequence based on original positions
|
40
|
+
function findWordIdsForSequence(
|
41
|
+
segments: LyricsSegment[],
|
42
|
+
sequence: ServerData
|
43
|
+
): string[] {
|
44
|
+
const allWords = segments.flatMap(s => s.words);
|
45
|
+
const startIndex = sequence.transcription_position;
|
46
|
+
const endIndex = startIndex + sequence.length;
|
47
|
+
|
48
|
+
console.log('Finding word IDs for sequence:', JSON.stringify({
|
49
|
+
position: sequence.transcription_position,
|
50
|
+
length: sequence.length,
|
51
|
+
words: allWords.slice(startIndex, endIndex).map(w => w.text)
|
52
|
+
}));
|
53
|
+
|
54
|
+
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
|
+
}
|
56
|
+
|
57
|
+
// Add this at the top of the file
|
58
|
+
const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
|
59
|
+
const allWords = segments.flatMap(s => s.words);
|
60
|
+
console.log('Word ID Assignment:', {
|
61
|
+
searchingFor: correction.original_word,
|
62
|
+
allWordsWithIds: allWords.map(w => ({
|
63
|
+
text: w.text,
|
64
|
+
id: w.id
|
65
|
+
})),
|
66
|
+
matchedId: foundId,
|
67
|
+
matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
|
68
|
+
});
|
69
|
+
};
|
70
|
+
|
71
|
+
// Modify findWordIdForCorrection to include logging
|
72
|
+
function findWordIdForCorrection(
|
73
|
+
segments: LyricsSegment[],
|
74
|
+
correction: {
|
75
|
+
original_word: string;
|
76
|
+
original_position?: number;
|
77
|
+
}
|
78
|
+
): string {
|
79
|
+
const allWords = segments.flatMap(s => s.words);
|
80
|
+
|
81
|
+
// If we have position information, use it to find the exact word
|
82
|
+
if (typeof correction.original_position === 'number') {
|
83
|
+
const word = allWords[correction.original_position];
|
84
|
+
if (word && word.text === correction.original_word) {
|
85
|
+
logWordMatching(segments, correction, word.id);
|
86
|
+
return word.id;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
// Fallback to finding by text (but log a warning)
|
91
|
+
for (const segment of segments) {
|
92
|
+
const word = segment.words.find(w => w.text === correction.original_word);
|
93
|
+
if (word) {
|
94
|
+
console.warn(
|
95
|
+
'Warning: Had to find word by text match rather than position.',
|
96
|
+
correction.original_word,
|
97
|
+
'Consider using position information for more accurate matching.'
|
98
|
+
);
|
99
|
+
logWordMatching(segments, correction, word.id);
|
100
|
+
return word.id;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
const newId = nanoid();
|
105
|
+
logWordMatching(segments, correction, null);
|
106
|
+
console.log('Generated new ID:', newId, 'for word:', correction.original_word);
|
107
|
+
return newId;
|
108
|
+
}
|
109
|
+
|
110
|
+
// Helper function to find word IDs in reference text
|
111
|
+
function findReferenceWordIds(
|
112
|
+
referenceSource: string,
|
113
|
+
sequence: ServerData
|
114
|
+
): string[] {
|
115
|
+
const referencePosition = sequence.reference_positions?.[referenceSource];
|
116
|
+
if (typeof referencePosition !== 'number') {
|
117
|
+
return [];
|
118
|
+
}
|
119
|
+
|
120
|
+
// Generate IDs in the same format as HighlightedText
|
121
|
+
const wordIds = Array.from({ length: sequence.length },
|
122
|
+
(_, i) => `${referenceSource}-word-${referencePosition + i}`
|
123
|
+
);
|
124
|
+
|
125
|
+
return wordIds;
|
126
|
+
}
|
127
|
+
|
128
|
+
export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
129
|
+
const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
|
130
|
+
|
131
|
+
// Initialize segment and word IDs
|
132
|
+
newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
|
133
|
+
...segment,
|
134
|
+
id: segment.id || nanoid(),
|
135
|
+
words: segment.words.map((word: Word) => ({
|
136
|
+
...word,
|
137
|
+
id: word.id || nanoid()
|
138
|
+
}))
|
139
|
+
}));
|
140
|
+
|
141
|
+
console.log('Segments after ID initialization:', JSON.stringify({
|
142
|
+
segmentCount: newData.corrected_segments.length,
|
143
|
+
totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
|
144
|
+
sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
|
145
|
+
}));
|
146
|
+
|
147
|
+
// Update anchor sequences with word IDs based on positions
|
148
|
+
newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
|
149
|
+
const serverAnchor = anchor as unknown as ServerData;
|
150
|
+
|
151
|
+
// Get reference word IDs for each source
|
152
|
+
const referenceWordIds: Record<string, string[]> = {};
|
153
|
+
Object.keys(data.reference_texts || {}).forEach(source => {
|
154
|
+
referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
|
155
|
+
});
|
156
|
+
|
157
|
+
console.log('Processing anchor with references:', JSON.stringify({
|
158
|
+
words: anchor.words,
|
159
|
+
reference_positions: serverAnchor.reference_positions,
|
160
|
+
reference_word_ids: referenceWordIds
|
161
|
+
}));
|
162
|
+
|
163
|
+
return {
|
164
|
+
...anchor,
|
165
|
+
id: anchor.id || nanoid(),
|
166
|
+
word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
|
167
|
+
reference_word_ids: referenceWordIds
|
168
|
+
} as AnchorSequence;
|
169
|
+
});
|
170
|
+
|
171
|
+
// Update gap sequences to use word IDs
|
172
|
+
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
173
|
+
const serverGap = gap as unknown as ServerData;
|
174
|
+
console.log('Processing gap sequence:', {
|
175
|
+
words: gap.words,
|
176
|
+
word_ids: gap.word_ids,
|
177
|
+
corrections: gap.corrections,
|
178
|
+
foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
|
179
|
+
});
|
180
|
+
|
181
|
+
return {
|
182
|
+
...gap,
|
183
|
+
id: gap.id || nanoid(),
|
184
|
+
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
185
|
+
corrections: gap.corrections.map((correction: WordCorrection) => {
|
186
|
+
const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
|
187
|
+
console.log('Correction word ID assignment:', {
|
188
|
+
original_word: correction.original_word,
|
189
|
+
corrected_word: correction.corrected_word,
|
190
|
+
assigned_id: wordId
|
191
|
+
});
|
192
|
+
return {
|
193
|
+
...correction,
|
194
|
+
id: correction.id || nanoid(),
|
195
|
+
word_id: wordId
|
196
|
+
};
|
197
|
+
})
|
198
|
+
} as GapSequence;
|
199
|
+
});
|
200
|
+
|
201
|
+
return newData;
|
202
|
+
}
|
@@ -10,34 +10,31 @@ export function calculateReferenceLinePositions(
|
|
10
10
|
let currentReferencePosition = 0
|
11
11
|
|
12
12
|
// First, find all anchor sequences that cover entire lines
|
13
|
-
const fullLineAnchors = anchors
|
14
|
-
|
15
|
-
if (
|
13
|
+
const fullLineAnchors = anchors?.map(anchor => {
|
14
|
+
// Add null checks for anchor and reference_word_ids
|
15
|
+
if (!anchor?.reference_word_ids?.[currentSource]) return null
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
referenceLength: anchor.length,
|
20
|
-
transcriptionLine: corrected_segments.findIndex((segment, segmentIndex) => {
|
21
|
-
const words = segment.words
|
22
|
-
if (!words.length) return false
|
23
|
-
|
24
|
-
// Calculate the absolute position of the first and last words in this segment
|
25
|
-
let absolutePosition = 0
|
26
|
-
for (let i = 0; i < segmentIndex; i++) {
|
27
|
-
absolutePosition += corrected_segments[i].words.length
|
28
|
-
}
|
17
|
+
const referenceWordIds = anchor.reference_word_ids[currentSource]
|
18
|
+
if (!referenceWordIds?.length) return null
|
29
19
|
|
30
|
-
|
31
|
-
|
20
|
+
return {
|
21
|
+
referenceWordIds,
|
22
|
+
transcriptionLine: corrected_segments.findIndex((segment) => {
|
23
|
+
const wordIds = segment.words.map(w => w.id)
|
24
|
+
if (!wordIds.length) return false
|
32
25
|
|
33
|
-
|
34
|
-
|
26
|
+
// Check if all word IDs in this segment are part of the anchor
|
27
|
+
return wordIds.every(id => anchor.word_ids?.includes(id))
|
35
28
|
})
|
36
29
|
}
|
37
|
-
})
|
30
|
+
})?.filter((a): a is NonNullable<typeof a> => a !== null) ?? []
|
38
31
|
|
39
|
-
// Sort by reference
|
40
|
-
fullLineAnchors.sort((a, b) =>
|
32
|
+
// Sort by first reference word ID to process in order
|
33
|
+
fullLineAnchors.sort((a, b) => {
|
34
|
+
const firstIdA = a.referenceWordIds[0]
|
35
|
+
const firstIdB = b.referenceWordIds[0]
|
36
|
+
return firstIdA.localeCompare(firstIdB)
|
37
|
+
})
|
41
38
|
|
42
39
|
// Add line positions with padding
|
43
40
|
let currentLine = 0
|
@@ -55,10 +52,12 @@ export function calculateReferenceLinePositions(
|
|
55
52
|
|
56
53
|
// Add the actual line position
|
57
54
|
linePositions.push({
|
58
|
-
position:
|
59
|
-
lineNumber: currentLine
|
55
|
+
position: currentReferencePosition,
|
56
|
+
lineNumber: currentLine,
|
57
|
+
isEmpty: false
|
60
58
|
})
|
61
59
|
currentLine++
|
60
|
+
currentReferencePosition++
|
62
61
|
})
|
63
62
|
|
64
63
|
// Add any remaining lines after the last anchor
|
@@ -1,4 +1,5 @@
|
|
1
1
|
export interface Word {
|
2
|
+
id: string
|
2
3
|
text: string
|
3
4
|
start_time: number
|
4
5
|
end_time: number
|
@@ -6,6 +7,7 @@ export interface Word {
|
|
6
7
|
}
|
7
8
|
|
8
9
|
export interface LyricsSegment {
|
10
|
+
id: string
|
9
11
|
text: string
|
10
12
|
words: Word[]
|
11
13
|
start_time: number
|
@@ -13,10 +15,11 @@ export interface LyricsSegment {
|
|
13
15
|
}
|
14
16
|
|
15
17
|
export interface WordCorrection {
|
18
|
+
id: string
|
16
19
|
original_word: string
|
17
20
|
corrected_word: string
|
18
|
-
|
19
|
-
|
21
|
+
segment_id: string
|
22
|
+
word_id: string
|
20
23
|
source: string
|
21
24
|
confidence: number
|
22
25
|
reason: string
|
@@ -24,7 +27,7 @@ export interface WordCorrection {
|
|
24
27
|
is_deletion: boolean
|
25
28
|
split_index?: number
|
26
29
|
split_total?: number
|
27
|
-
reference_positions?: Record<string,
|
30
|
+
reference_positions?: Record<string, string>
|
28
31
|
length: number
|
29
32
|
}
|
30
33
|
|
@@ -36,26 +39,35 @@ export interface PhraseScore {
|
|
36
39
|
}
|
37
40
|
|
38
41
|
export interface AnchorSequence {
|
42
|
+
id: string
|
39
43
|
words: string[]
|
40
44
|
text: string
|
41
45
|
length: number
|
42
|
-
|
43
|
-
|
46
|
+
word_ids: string[]
|
47
|
+
reference_word_ids: Record<string, string[]>
|
44
48
|
confidence: number
|
45
49
|
phrase_score: PhraseScore
|
46
50
|
total_score: number
|
47
51
|
}
|
48
52
|
|
53
|
+
export interface AnchorReference {
|
54
|
+
text: string
|
55
|
+
word_ids: string[]
|
56
|
+
confidence: number
|
57
|
+
}
|
58
|
+
|
49
59
|
export interface GapSequence {
|
50
|
-
|
60
|
+
id: string
|
51
61
|
text: string
|
62
|
+
words: string[]
|
63
|
+
word_ids: string[]
|
52
64
|
length: number
|
53
|
-
transcription_position: number
|
54
|
-
preceding_anchor: AnchorSequence | null
|
55
|
-
following_anchor: AnchorSequence | null
|
56
|
-
reference_words: Record<string, string[]>
|
57
|
-
reference_words_original?: Record<string, string[]>
|
58
65
|
corrections: WordCorrection[]
|
66
|
+
preceding_anchor: AnchorReference | null
|
67
|
+
following_anchor: AnchorReference | null
|
68
|
+
reference_words: {
|
69
|
+
[source: string]: string[]
|
70
|
+
}
|
59
71
|
}
|
60
72
|
|
61
73
|
export interface LyricsData {
|
@@ -98,10 +110,8 @@ export interface CorrectionData {
|
|
98
110
|
}
|
99
111
|
|
100
112
|
export interface HighlightInfo {
|
101
|
-
|
102
|
-
|
103
|
-
referenceIndices: Record<string, number>
|
104
|
-
referenceLength?: number
|
113
|
+
word_ids?: string[]
|
114
|
+
reference_word_ids?: Record<string, string[]>
|
105
115
|
type: 'single' | 'gap' | 'anchor'
|
106
116
|
}
|
107
117
|
|
@@ -1 +1 @@
|
|
1
|
-
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/
|
1
|
+
{"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/initializedatawithids.tsx","./src/components/shared/utils/referencelinecalculator.ts"],"version":"5.6.3"}
|