lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lyrics_transcriber/cli/cli_main.py +7 -0
  2. lyrics_transcriber/core/config.py +1 -0
  3. lyrics_transcriber/core/controller.py +30 -52
  4. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  5. lyrics_transcriber/correction/corrector.py +224 -107
  6. lyrics_transcriber/correction/handlers/base.py +28 -10
  7. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  8. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  9. lyrics_transcriber/correction/handlers/llm.py +290 -0
  10. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  11. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  12. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  13. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  14. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  15. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  16. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  17. lyrics_transcriber/correction/text_utils.py +3 -7
  18. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  19. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  20. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  21. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  22. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  23. lyrics_transcriber/frontend/dist/index.html +1 -1
  24. lyrics_transcriber/frontend/package.json +6 -2
  25. lyrics_transcriber/frontend/src/App.tsx +18 -2
  26. lyrics_transcriber/frontend/src/api.ts +103 -6
  27. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  28. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  30. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  31. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  33. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  35. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  36. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  37. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  38. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  39. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  40. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  41. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  42. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  43. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  44. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  45. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  47. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  48. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  49. lyrics_transcriber/frontend/src/types.js +2 -0
  50. lyrics_transcriber/frontend/src/types.ts +70 -49
  51. lyrics_transcriber/frontend/src/validation.ts +132 -0
  52. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  53. lyrics_transcriber/frontend/yarn.lock +3752 -0
  54. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  55. lyrics_transcriber/lyrics/file_provider.py +6 -5
  56. lyrics_transcriber/lyrics/genius.py +5 -2
  57. lyrics_transcriber/lyrics/spotify.py +58 -21
  58. lyrics_transcriber/output/ass/config.py +16 -5
  59. lyrics_transcriber/output/cdg.py +8 -8
  60. lyrics_transcriber/output/generator.py +29 -14
  61. lyrics_transcriber/output/plain_text.py +15 -10
  62. lyrics_transcriber/output/segment_resizer.py +16 -3
  63. lyrics_transcriber/output/subtitles.py +56 -2
  64. lyrics_transcriber/output/video.py +107 -1
  65. lyrics_transcriber/review/__init__.py +0 -1
  66. lyrics_transcriber/review/server.py +337 -164
  67. lyrics_transcriber/transcribers/audioshake.py +3 -0
  68. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  69. lyrics_transcriber/transcribers/whisper.py +11 -1
  70. lyrics_transcriber/types.py +151 -105
  71. lyrics_transcriber/utils/word_utils.py +27 -0
  72. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  73. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
  74. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  75. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  76. lyrics_transcriber/frontend/package-lock.json +0 -4260
  77. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  78. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  79. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,202 +0,0 @@
1
- import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
2
- import { nanoid } from 'nanoid';
3
-
4
- // Define server-side types just for this file
5
- interface ServerData {
6
- transcription_position: number;
7
- length: number;
8
- words: string[];
9
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
10
- [key: string]: any;
11
- }
12
-
13
- export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
14
- // Create a deep clone to avoid modifying the original
15
- const normalized = JSON.parse(JSON.stringify(data));
16
-
17
- // Preserve floating point numbers with original precision
18
- const preserveFloats = (obj: Record<string, unknown>): void => {
19
- for (const key in obj) {
20
- const value = obj[key];
21
- if (typeof value === 'number') {
22
- // Handle integers and floats differently
23
- let formatted: string;
24
- if (Number.isInteger(value)) {
25
- formatted = value.toFixed(1); // Force decimal point for integers
26
- } else {
27
- formatted = value.toString(); // Keep original precision for floats
28
- }
29
- obj[key] = parseFloat(formatted);
30
- } else if (typeof value === 'object' && value !== null) {
31
- preserveFloats(value as Record<string, unknown>);
32
- }
33
- }
34
- };
35
- preserveFloats(normalized);
36
- return normalized;
37
- }
38
-
39
- // Helper function to find word IDs for a sequence based on original positions
40
- function findWordIdsForSequence(
41
- segments: LyricsSegment[],
42
- sequence: ServerData
43
- ): string[] {
44
- const allWords = segments.flatMap(s => s.words);
45
- const startIndex = sequence.transcription_position;
46
- const endIndex = startIndex + sequence.length;
47
-
48
- console.log('Finding word IDs for sequence:', JSON.stringify({
49
- position: sequence.transcription_position,
50
- length: sequence.length,
51
- words: allWords.slice(startIndex, endIndex).map(w => w.text)
52
- }));
53
-
54
- return allWords.slice(startIndex, endIndex).map(word => word.id);
55
- }
56
-
57
- // Add this at the top of the file
58
- const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
59
- const allWords = segments.flatMap(s => s.words);
60
- console.log('Word ID Assignment:', {
61
- searchingFor: correction.original_word,
62
- allWordsWithIds: allWords.map(w => ({
63
- text: w.text,
64
- id: w.id
65
- })),
66
- matchedId: foundId,
67
- matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
68
- });
69
- };
70
-
71
- // Modify findWordIdForCorrection to include logging
72
- function findWordIdForCorrection(
73
- segments: LyricsSegment[],
74
- correction: {
75
- original_word: string;
76
- original_position?: number;
77
- }
78
- ): string {
79
- const allWords = segments.flatMap(s => s.words);
80
-
81
- // If we have position information, use it to find the exact word
82
- if (typeof correction.original_position === 'number') {
83
- const word = allWords[correction.original_position];
84
- if (word && word.text === correction.original_word) {
85
- logWordMatching(segments, correction, word.id);
86
- return word.id;
87
- }
88
- }
89
-
90
- // Fallback to finding by text (but log a warning)
91
- for (const segment of segments) {
92
- const word = segment.words.find(w => w.text === correction.original_word);
93
- if (word) {
94
- console.warn(
95
- 'Warning: Had to find word by text match rather than position.',
96
- correction.original_word,
97
- 'Consider using position information for more accurate matching.'
98
- );
99
- logWordMatching(segments, correction, word.id);
100
- return word.id;
101
- }
102
- }
103
-
104
- const newId = nanoid();
105
- logWordMatching(segments, correction, null);
106
- console.log('Generated new ID:', newId, 'for word:', correction.original_word);
107
- return newId;
108
- }
109
-
110
- // Helper function to find word IDs in reference text
111
- function findReferenceWordIds(
112
- referenceSource: string,
113
- sequence: ServerData
114
- ): string[] {
115
- const referencePosition = sequence.reference_positions?.[referenceSource];
116
- if (typeof referencePosition !== 'number') {
117
- return [];
118
- }
119
-
120
- // Generate IDs in the same format as HighlightedText
121
- const wordIds = Array.from({ length: sequence.length },
122
- (_, i) => `${referenceSource}-word-${referencePosition + i}`
123
- );
124
-
125
- return wordIds;
126
- }
127
-
128
- export function initializeDataWithIds(data: CorrectionData): CorrectionData {
129
- const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
130
-
131
- // Initialize segment and word IDs
132
- newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
133
- ...segment,
134
- id: segment.id || nanoid(),
135
- words: segment.words.map((word: Word) => ({
136
- ...word,
137
- id: word.id || nanoid()
138
- }))
139
- }));
140
-
141
- console.log('Segments after ID initialization:', JSON.stringify({
142
- segmentCount: newData.corrected_segments.length,
143
- totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
144
- sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
145
- }));
146
-
147
- // Update anchor sequences with word IDs based on positions
148
- newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
149
- const serverAnchor = anchor as unknown as ServerData;
150
-
151
- // Get reference word IDs for each source
152
- const referenceWordIds: Record<string, string[]> = {};
153
- Object.keys(data.reference_texts || {}).forEach(source => {
154
- referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
155
- });
156
-
157
- console.log('Processing anchor with references:', JSON.stringify({
158
- words: anchor.words,
159
- reference_positions: serverAnchor.reference_positions,
160
- reference_word_ids: referenceWordIds
161
- }));
162
-
163
- return {
164
- ...anchor,
165
- id: anchor.id || nanoid(),
166
- word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
167
- reference_word_ids: referenceWordIds
168
- } as AnchorSequence;
169
- });
170
-
171
- // Update gap sequences to use word IDs
172
- newData.gap_sequences = newData.gap_sequences.map((gap) => {
173
- const serverGap = gap as unknown as ServerData;
174
- console.log('Processing gap sequence:', {
175
- words: gap.words,
176
- word_ids: gap.word_ids,
177
- corrections: gap.corrections,
178
- foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
179
- });
180
-
181
- return {
182
- ...gap,
183
- id: gap.id || nanoid(),
184
- word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
185
- corrections: gap.corrections.map((correction: WordCorrection) => {
186
- const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
187
- console.log('Correction word ID assignment:', {
188
- original_word: correction.original_word,
189
- corrected_word: correction.corrected_word,
190
- assigned_id: wordId
191
- });
192
- return {
193
- ...correction,
194
- id: correction.id || nanoid(),
195
- word_id: wordId
196
- };
197
- })
198
- } as GapSequence;
199
- });
200
-
201
- return newData;
202
- }