lyrics-transcriber 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-D0Gr3Ep7.js} +16509 -9038
- lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +281 -63
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +249 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +320 -266
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +120 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +174 -52
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +158 -114
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +39 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +134 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +67 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +1 -1
- lyrics_transcriber/output/generator.py +22 -8
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +27 -1
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/RECORD +75 -61
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt +0 -0
@@ -1,202 +0,0 @@
|
|
1
|
-
import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
|
2
|
-
import { nanoid } from 'nanoid';
|
3
|
-
|
4
|
-
// Define server-side types just for this file
|
5
|
-
interface ServerData {
|
6
|
-
transcription_position: number;
|
7
|
-
length: number;
|
8
|
-
words: string[];
|
9
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
10
|
-
[key: string]: any;
|
11
|
-
}
|
12
|
-
|
13
|
-
export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
|
14
|
-
// Create a deep clone to avoid modifying the original
|
15
|
-
const normalized = JSON.parse(JSON.stringify(data));
|
16
|
-
|
17
|
-
// Preserve floating point numbers with original precision
|
18
|
-
const preserveFloats = (obj: Record<string, unknown>): void => {
|
19
|
-
for (const key in obj) {
|
20
|
-
const value = obj[key];
|
21
|
-
if (typeof value === 'number') {
|
22
|
-
// Handle integers and floats differently
|
23
|
-
let formatted: string;
|
24
|
-
if (Number.isInteger(value)) {
|
25
|
-
formatted = value.toFixed(1); // Force decimal point for integers
|
26
|
-
} else {
|
27
|
-
formatted = value.toString(); // Keep original precision for floats
|
28
|
-
}
|
29
|
-
obj[key] = parseFloat(formatted);
|
30
|
-
} else if (typeof value === 'object' && value !== null) {
|
31
|
-
preserveFloats(value as Record<string, unknown>);
|
32
|
-
}
|
33
|
-
}
|
34
|
-
};
|
35
|
-
preserveFloats(normalized);
|
36
|
-
return normalized;
|
37
|
-
}
|
38
|
-
|
39
|
-
// Helper function to find word IDs for a sequence based on original positions
|
40
|
-
function findWordIdsForSequence(
|
41
|
-
segments: LyricsSegment[],
|
42
|
-
sequence: ServerData
|
43
|
-
): string[] {
|
44
|
-
const allWords = segments.flatMap(s => s.words);
|
45
|
-
const startIndex = sequence.transcription_position;
|
46
|
-
const endIndex = startIndex + sequence.length;
|
47
|
-
|
48
|
-
console.log('Finding word IDs for sequence:', JSON.stringify({
|
49
|
-
position: sequence.transcription_position,
|
50
|
-
length: sequence.length,
|
51
|
-
words: allWords.slice(startIndex, endIndex).map(w => w.text)
|
52
|
-
}));
|
53
|
-
|
54
|
-
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
|
-
}
|
56
|
-
|
57
|
-
// Add this at the top of the file
|
58
|
-
const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
|
59
|
-
const allWords = segments.flatMap(s => s.words);
|
60
|
-
console.log('Word ID Assignment:', {
|
61
|
-
searchingFor: correction.original_word,
|
62
|
-
allWordsWithIds: allWords.map(w => ({
|
63
|
-
text: w.text,
|
64
|
-
id: w.id
|
65
|
-
})),
|
66
|
-
matchedId: foundId,
|
67
|
-
matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
|
68
|
-
});
|
69
|
-
};
|
70
|
-
|
71
|
-
// Modify findWordIdForCorrection to include logging
|
72
|
-
function findWordIdForCorrection(
|
73
|
-
segments: LyricsSegment[],
|
74
|
-
correction: {
|
75
|
-
original_word: string;
|
76
|
-
original_position?: number;
|
77
|
-
}
|
78
|
-
): string {
|
79
|
-
const allWords = segments.flatMap(s => s.words);
|
80
|
-
|
81
|
-
// If we have position information, use it to find the exact word
|
82
|
-
if (typeof correction.original_position === 'number') {
|
83
|
-
const word = allWords[correction.original_position];
|
84
|
-
if (word && word.text === correction.original_word) {
|
85
|
-
logWordMatching(segments, correction, word.id);
|
86
|
-
return word.id;
|
87
|
-
}
|
88
|
-
}
|
89
|
-
|
90
|
-
// Fallback to finding by text (but log a warning)
|
91
|
-
for (const segment of segments) {
|
92
|
-
const word = segment.words.find(w => w.text === correction.original_word);
|
93
|
-
if (word) {
|
94
|
-
console.warn(
|
95
|
-
'Warning: Had to find word by text match rather than position.',
|
96
|
-
correction.original_word,
|
97
|
-
'Consider using position information for more accurate matching.'
|
98
|
-
);
|
99
|
-
logWordMatching(segments, correction, word.id);
|
100
|
-
return word.id;
|
101
|
-
}
|
102
|
-
}
|
103
|
-
|
104
|
-
const newId = nanoid();
|
105
|
-
logWordMatching(segments, correction, null);
|
106
|
-
console.log('Generated new ID:', newId, 'for word:', correction.original_word);
|
107
|
-
return newId;
|
108
|
-
}
|
109
|
-
|
110
|
-
// Helper function to find word IDs in reference text
|
111
|
-
function findReferenceWordIds(
|
112
|
-
referenceSource: string,
|
113
|
-
sequence: ServerData
|
114
|
-
): string[] {
|
115
|
-
const referencePosition = sequence.reference_positions?.[referenceSource];
|
116
|
-
if (typeof referencePosition !== 'number') {
|
117
|
-
return [];
|
118
|
-
}
|
119
|
-
|
120
|
-
// Generate IDs in the same format as HighlightedText
|
121
|
-
const wordIds = Array.from({ length: sequence.length },
|
122
|
-
(_, i) => `${referenceSource}-word-${referencePosition + i}`
|
123
|
-
);
|
124
|
-
|
125
|
-
return wordIds;
|
126
|
-
}
|
127
|
-
|
128
|
-
export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
129
|
-
const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
|
130
|
-
|
131
|
-
// Initialize segment and word IDs
|
132
|
-
newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
|
133
|
-
...segment,
|
134
|
-
id: segment.id || nanoid(),
|
135
|
-
words: segment.words.map((word: Word) => ({
|
136
|
-
...word,
|
137
|
-
id: word.id || nanoid()
|
138
|
-
}))
|
139
|
-
}));
|
140
|
-
|
141
|
-
console.log('Segments after ID initialization:', JSON.stringify({
|
142
|
-
segmentCount: newData.corrected_segments.length,
|
143
|
-
totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
|
144
|
-
sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
|
145
|
-
}));
|
146
|
-
|
147
|
-
// Update anchor sequences with word IDs based on positions
|
148
|
-
newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
|
149
|
-
const serverAnchor = anchor as unknown as ServerData;
|
150
|
-
|
151
|
-
// Get reference word IDs for each source
|
152
|
-
const referenceWordIds: Record<string, string[]> = {};
|
153
|
-
Object.keys(data.reference_texts || {}).forEach(source => {
|
154
|
-
referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
|
155
|
-
});
|
156
|
-
|
157
|
-
console.log('Processing anchor with references:', JSON.stringify({
|
158
|
-
words: anchor.words,
|
159
|
-
reference_positions: serverAnchor.reference_positions,
|
160
|
-
reference_word_ids: referenceWordIds
|
161
|
-
}));
|
162
|
-
|
163
|
-
return {
|
164
|
-
...anchor,
|
165
|
-
id: anchor.id || nanoid(),
|
166
|
-
word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
|
167
|
-
reference_word_ids: referenceWordIds
|
168
|
-
} as AnchorSequence;
|
169
|
-
});
|
170
|
-
|
171
|
-
// Update gap sequences to use word IDs
|
172
|
-
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
173
|
-
const serverGap = gap as unknown as ServerData;
|
174
|
-
console.log('Processing gap sequence:', {
|
175
|
-
words: gap.words,
|
176
|
-
word_ids: gap.word_ids,
|
177
|
-
corrections: gap.corrections,
|
178
|
-
foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
|
179
|
-
});
|
180
|
-
|
181
|
-
return {
|
182
|
-
...gap,
|
183
|
-
id: gap.id || nanoid(),
|
184
|
-
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
185
|
-
corrections: gap.corrections.map((correction: WordCorrection) => {
|
186
|
-
const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
|
187
|
-
console.log('Correction word ID assignment:', {
|
188
|
-
original_word: correction.original_word,
|
189
|
-
corrected_word: correction.corrected_word,
|
190
|
-
assigned_id: wordId
|
191
|
-
});
|
192
|
-
return {
|
193
|
-
...correction,
|
194
|
-
id: correction.id || nanoid(),
|
195
|
-
word_id: wordId
|
196
|
-
};
|
197
|
-
})
|
198
|
-
} as GapSequence;
|
199
|
-
});
|
200
|
-
|
201
|
-
return newData;
|
202
|
-
}
|
File without changes
|
{lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt
RENAMED
File without changes
|