lyrics-transcriber 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/correction/handlers/extend_anchor.py +13 -2
- lyrics_transcriber/correction/handlers/word_operations.py +8 -2
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js +26696 -0
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +3 -2
- lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +36 -13
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +41 -1
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +48 -16
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +71 -16
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +45 -12
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +83 -19
- lyrics_transcriber/frontend/src/components/shared/types.ts +3 -0
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +65 -9
- lyrics_transcriber/frontend/vite.config.js +4 -0
- lyrics_transcriber/frontend/vite.config.ts +4 -0
- lyrics_transcriber/lyrics/genius.py +41 -12
- lyrics_transcriber/output/cdg.py +1 -0
- lyrics_transcriber/output/cdgmaker/composer.py +839 -534
- lyrics_transcriber/review/server.py +10 -12
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.39.0.dist-info}/METADATA +3 -2
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.39.0.dist-info}/RECORD +27 -26
- lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +0 -182
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.39.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.39.0.dist-info}/WHEEL +0 -0
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.39.0.dist-info}/entry_points.txt +0 -0
@@ -19,6 +19,7 @@ export interface UseWordClickProps {
|
|
19
19
|
onWordClick?: (info: WordClickInfo) => void
|
20
20
|
isReference?: boolean
|
21
21
|
currentSource?: string
|
22
|
+
gaps?: GapSequence[]
|
22
23
|
}
|
23
24
|
|
24
25
|
export function useWordClick({
|
@@ -26,7 +27,8 @@ export function useWordClick({
|
|
26
27
|
onElementClick,
|
27
28
|
onWordClick,
|
28
29
|
isReference,
|
29
|
-
currentSource
|
30
|
+
currentSource,
|
31
|
+
gaps = []
|
30
32
|
}: UseWordClickProps) {
|
31
33
|
const handleWordClick = useCallback((
|
32
34
|
word: string,
|
@@ -47,21 +49,20 @@ export function useWordClick({
|
|
47
49
|
wordIds: anchor.word_ids,
|
48
50
|
length: anchor.length,
|
49
51
|
words: anchor.words,
|
50
|
-
referenceWordIds: anchor.reference_word_ids
|
52
|
+
referenceWordIds: anchor.reference_word_ids,
|
53
|
+
matchesWordId: isReference
|
54
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
55
|
+
: anchor.word_ids.includes(wordId)
|
51
56
|
},
|
52
57
|
gapInfo: gap && {
|
53
58
|
wordIds: gap.word_ids,
|
54
59
|
length: gap.length,
|
55
60
|
words: gap.words,
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
is_deletion: c.is_deletion,
|
62
|
-
split_index: c.split_index,
|
63
|
-
split_total: c.split_total
|
64
|
-
}))
|
61
|
+
referenceWords: gap.reference_words,
|
62
|
+
corrections: gap.corrections,
|
63
|
+
matchesWordId: isReference
|
64
|
+
? gap.reference_words[currentSource!]?.includes(wordId)
|
65
|
+
: gap.word_ids.includes(wordId)
|
65
66
|
},
|
66
67
|
belongsToAnchor: anchor && (
|
67
68
|
isReference
|
@@ -78,6 +79,28 @@ export function useWordClick({
|
|
78
79
|
}
|
79
80
|
}, null, 2))
|
80
81
|
|
82
|
+
// For reference view clicks, find the corresponding gap
|
83
|
+
if (isReference && currentSource) {
|
84
|
+
// Extract position from wordId (e.g., "genius-word-3" -> 3)
|
85
|
+
const position = parseInt(wordId.split('-').pop() || '', 10);
|
86
|
+
|
87
|
+
// Find gap that has a correction matching this reference position
|
88
|
+
const matchingGap = gaps?.find(g =>
|
89
|
+
g.corrections.some(c => {
|
90
|
+
const refPosition = c.reference_positions?.[currentSource];
|
91
|
+
return typeof refPosition === 'number' && refPosition === position;
|
92
|
+
})
|
93
|
+
);
|
94
|
+
|
95
|
+
if (matchingGap) {
|
96
|
+
console.log('Found matching gap for reference click:', {
|
97
|
+
position,
|
98
|
+
gap: matchingGap
|
99
|
+
});
|
100
|
+
gap = matchingGap;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
81
104
|
const belongsToAnchor = anchor && (
|
82
105
|
isReference
|
83
106
|
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
@@ -86,17 +109,58 @@ export function useWordClick({
|
|
86
109
|
|
87
110
|
const belongsToGap = gap && (
|
88
111
|
isReference
|
89
|
-
? gap.corrections.some(c =>
|
112
|
+
? gap.corrections.some(c => {
|
113
|
+
const refPosition = c.reference_positions?.[currentSource!];
|
114
|
+
const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
|
115
|
+
return typeof refPosition === 'number' && refPosition === clickedPosition;
|
116
|
+
})
|
90
117
|
: gap.word_ids.includes(wordId)
|
91
118
|
)
|
92
119
|
|
93
120
|
if (mode === 'highlight' || mode === 'edit') {
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
121
|
+
if (belongsToAnchor && anchor) {
|
122
|
+
onWordClick?.({
|
123
|
+
word_id: wordId,
|
124
|
+
type: 'anchor',
|
125
|
+
anchor,
|
126
|
+
gap: undefined
|
127
|
+
})
|
128
|
+
} else if (belongsToGap && gap) {
|
129
|
+
// Create highlight info that includes both transcription and reference IDs
|
130
|
+
const referenceWords: Record<string, string[]> = {};
|
131
|
+
|
132
|
+
// For each correction in the gap, add its reference positions
|
133
|
+
gap.corrections.forEach(correction => {
|
134
|
+
Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
|
135
|
+
if (typeof position === 'number') {
|
136
|
+
const refId = `${source}-word-${position}`;
|
137
|
+
if (!referenceWords[source]) {
|
138
|
+
referenceWords[source] = [];
|
139
|
+
}
|
140
|
+
if (!referenceWords[source].includes(refId)) {
|
141
|
+
referenceWords[source].push(refId);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
});
|
145
|
+
});
|
146
|
+
|
147
|
+
onWordClick?.({
|
148
|
+
word_id: wordId,
|
149
|
+
type: 'gap',
|
150
|
+
anchor: undefined,
|
151
|
+
gap: {
|
152
|
+
...gap,
|
153
|
+
reference_words: referenceWords // Use reference_words instead of reference_word_ids
|
154
|
+
}
|
155
|
+
})
|
156
|
+
} else {
|
157
|
+
onWordClick?.({
|
158
|
+
word_id: wordId,
|
159
|
+
type: 'other',
|
160
|
+
anchor: undefined,
|
161
|
+
gap: undefined
|
162
|
+
})
|
163
|
+
}
|
100
164
|
} else if (mode === 'details') {
|
101
165
|
if (belongsToAnchor && anchor) {
|
102
166
|
onElementClick({
|
@@ -139,7 +203,7 @@ export function useWordClick({
|
|
139
203
|
})
|
140
204
|
}
|
141
205
|
}
|
142
|
-
}, [mode, onWordClick, onElementClick, isReference, currentSource])
|
206
|
+
}, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
|
143
207
|
|
144
208
|
return { handleWordClick }
|
145
209
|
}
|
@@ -35,7 +35,10 @@ export interface TranscriptionWordPosition extends BaseWordPosition {
|
|
35
35
|
start_time?: number
|
36
36
|
end_time?: number
|
37
37
|
}
|
38
|
+
type: 'anchor' | 'gap' | 'other'
|
39
|
+
sequence?: AnchorSequence | GapSequence
|
38
40
|
isInRange: boolean
|
41
|
+
isCorrected?: boolean
|
39
42
|
}
|
40
43
|
|
41
44
|
// Reference-specific word position with simple string word
|
@@ -54,16 +54,57 @@ function findWordIdsForSequence(
|
|
54
54
|
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
55
|
}
|
56
56
|
|
57
|
-
//
|
57
|
+
// Add this at the top of the file
|
58
|
+
const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
|
59
|
+
const allWords = segments.flatMap(s => s.words);
|
60
|
+
console.log('Word ID Assignment:', {
|
61
|
+
searchingFor: correction.original_word,
|
62
|
+
allWordsWithIds: allWords.map(w => ({
|
63
|
+
text: w.text,
|
64
|
+
id: w.id
|
65
|
+
})),
|
66
|
+
matchedId: foundId,
|
67
|
+
matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
|
68
|
+
});
|
69
|
+
};
|
70
|
+
|
71
|
+
// Modify findWordIdForCorrection to include logging
|
58
72
|
function findWordIdForCorrection(
|
59
73
|
segments: LyricsSegment[],
|
60
|
-
correction: {
|
74
|
+
correction: {
|
75
|
+
original_word: string;
|
76
|
+
original_position?: number;
|
77
|
+
}
|
61
78
|
): string {
|
79
|
+
const allWords = segments.flatMap(s => s.words);
|
80
|
+
|
81
|
+
// If we have position information, use it to find the exact word
|
82
|
+
if (typeof correction.original_position === 'number') {
|
83
|
+
const word = allWords[correction.original_position];
|
84
|
+
if (word && word.text === correction.original_word) {
|
85
|
+
logWordMatching(segments, correction, word.id);
|
86
|
+
return word.id;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
// Fallback to finding by text (but log a warning)
|
62
91
|
for (const segment of segments) {
|
63
92
|
const word = segment.words.find(w => w.text === correction.original_word);
|
64
|
-
if (word)
|
93
|
+
if (word) {
|
94
|
+
console.warn(
|
95
|
+
'Warning: Had to find word by text match rather than position.',
|
96
|
+
correction.original_word,
|
97
|
+
'Consider using position information for more accurate matching.'
|
98
|
+
);
|
99
|
+
logWordMatching(segments, correction, word.id);
|
100
|
+
return word.id;
|
101
|
+
}
|
65
102
|
}
|
66
|
-
|
103
|
+
|
104
|
+
const newId = nanoid();
|
105
|
+
logWordMatching(segments, correction, null);
|
106
|
+
console.log('Generated new ID:', newId, 'for word:', correction.original_word);
|
107
|
+
return newId;
|
67
108
|
}
|
68
109
|
|
69
110
|
// Helper function to find word IDs in reference text
|
@@ -130,15 +171,30 @@ export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
|
130
171
|
// Update gap sequences to use word IDs
|
131
172
|
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
132
173
|
const serverGap = gap as unknown as ServerData;
|
174
|
+
console.log('Processing gap sequence:', {
|
175
|
+
words: gap.words,
|
176
|
+
word_ids: gap.word_ids,
|
177
|
+
corrections: gap.corrections,
|
178
|
+
foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
|
179
|
+
});
|
180
|
+
|
133
181
|
return {
|
134
182
|
...gap,
|
135
183
|
id: gap.id || nanoid(),
|
136
184
|
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
137
|
-
corrections: gap.corrections.map((correction: WordCorrection) =>
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
185
|
+
corrections: gap.corrections.map((correction: WordCorrection) => {
|
186
|
+
const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
|
187
|
+
console.log('Correction word ID assignment:', {
|
188
|
+
original_word: correction.original_word,
|
189
|
+
corrected_word: correction.corrected_word,
|
190
|
+
assigned_id: wordId
|
191
|
+
});
|
192
|
+
return {
|
193
|
+
...correction,
|
194
|
+
id: correction.id || nanoid(),
|
195
|
+
word_id: wordId
|
196
|
+
};
|
197
|
+
})
|
142
198
|
} as GapSequence;
|
143
199
|
});
|
144
200
|
|
@@ -82,19 +82,48 @@ class GeniusProvider(BaseLyricsProvider):
|
|
82
82
|
|
83
83
|
def _clean_lyrics(self, lyrics: str) -> str:
|
84
84
|
"""Clean and process lyrics from Genius to remove unwanted content."""
|
85
|
+
self.logger.debug("Starting lyrics cleaning process")
|
86
|
+
original = lyrics
|
85
87
|
|
86
88
|
lyrics = lyrics.replace("\\n", "\n")
|
87
89
|
lyrics = re.sub(r"You might also like", "", lyrics)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
lyrics = re.sub(
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
lyrics = re.sub(r"^
|
98
|
-
|
99
|
-
|
90
|
+
if original != lyrics:
|
91
|
+
self.logger.debug("Removed 'You might also like' text")
|
92
|
+
|
93
|
+
original = lyrics
|
94
|
+
lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
|
95
|
+
if original != lyrics:
|
96
|
+
self.logger.debug("Removed song name and 'Lyrics' prefix")
|
97
|
+
|
98
|
+
original = lyrics
|
99
|
+
lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
|
100
|
+
if original != lyrics:
|
101
|
+
self.logger.debug("Removed contributors count and 'Lyrics' text")
|
102
|
+
|
103
|
+
original = lyrics
|
104
|
+
lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
|
105
|
+
if original != lyrics:
|
106
|
+
self.logger.debug("Removed ticket sales text")
|
107
|
+
|
108
|
+
original = lyrics
|
109
|
+
lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
|
110
|
+
if original != lyrics:
|
111
|
+
self.logger.debug("Removed numbered embed marker")
|
112
|
+
|
113
|
+
original = lyrics
|
114
|
+
lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
|
115
|
+
if original != lyrics:
|
116
|
+
self.logger.debug("Removed 'Embed' suffix from word")
|
117
|
+
|
118
|
+
original = lyrics
|
119
|
+
lyrics = re.sub(r"^Embed$", r"", lyrics)
|
120
|
+
if original != lyrics:
|
121
|
+
self.logger.debug("Removed standalone 'Embed' text")
|
122
|
+
|
123
|
+
original = lyrics
|
124
|
+
lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)
|
125
|
+
if original != lyrics:
|
126
|
+
self.logger.debug("Removed lines containing square brackets")
|
127
|
+
|
128
|
+
self.logger.debug("Completed lyrics cleaning process")
|
100
129
|
return lyrics
|
lyrics_transcriber/output/cdg.py
CHANGED
@@ -189,6 +189,7 @@ class CDGGenerator:
|
|
189
189
|
"""Compose CDG using KaraokeComposer."""
|
190
190
|
kc = KaraokeComposer.from_file(toml_file)
|
191
191
|
kc.compose()
|
192
|
+
kc.create_mp4(height=1080, fps=30)
|
192
193
|
|
193
194
|
def _find_cdg_zip(self, artist: str, title: str) -> str:
|
194
195
|
"""Find the generated CDG ZIP file."""
|