lyrics-transcriber 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-D0Gr3Ep7.js} +16509 -9038
- lyrics_transcriber/frontend/dist/assets/index-D0Gr3Ep7.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +14 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +281 -63
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +249 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +320 -266
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +120 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +174 -52
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +158 -114
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +39 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +134 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +67 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +1 -1
- lyrics_transcriber/output/generator.py +22 -8
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +27 -1
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/RECORD +75 -61
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.43.0.dist-info}/entry_points.txt +0 -0
lyrics_transcriber/types.py
CHANGED
@@ -1,16 +1,20 @@
|
|
1
|
-
from dataclasses import dataclass, asdict, field
|
2
|
-
from typing import Any, Dict, List, Optional, Set,
|
1
|
+
from dataclasses import dataclass, asdict, field, fields
|
2
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
3
3
|
from enum import Enum
|
4
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
4
5
|
|
5
6
|
|
6
7
|
@dataclass
|
7
8
|
class Word:
|
8
9
|
"""Represents a single word with its timing (in seconds) and confidence information."""
|
9
10
|
|
11
|
+
id: str # New: Unique identifier for each word
|
10
12
|
text: str
|
11
13
|
start_time: float
|
12
14
|
end_time: float
|
13
15
|
confidence: Optional[float] = None
|
16
|
+
# New: Track if this word was created during correction
|
17
|
+
created_during_correction: bool = False
|
14
18
|
|
15
19
|
def to_dict(self) -> Dict[str, Any]:
|
16
20
|
"""Convert Word to dictionary for JSON serialization."""
|
@@ -24,10 +28,12 @@ class Word:
|
|
24
28
|
def from_dict(cls, data: Dict[str, Any]) -> "Word":
|
25
29
|
"""Create Word from dictionary."""
|
26
30
|
return cls(
|
31
|
+
id=data["id"],
|
27
32
|
text=data["text"],
|
28
33
|
start_time=data["start_time"],
|
29
34
|
end_time=data["end_time"],
|
30
35
|
confidence=data.get("confidence"), # Use get() since confidence is optional
|
36
|
+
created_during_correction=data.get("created_during_correction", False),
|
31
37
|
)
|
32
38
|
|
33
39
|
|
@@ -35,6 +41,7 @@ class Word:
|
|
35
41
|
class LyricsSegment:
|
36
42
|
"""Represents a segment/line of lyrics with timing information in seconds."""
|
37
43
|
|
44
|
+
id: str # New: Unique identifier for each segment
|
38
45
|
text: str
|
39
46
|
words: List[Word]
|
40
47
|
start_time: float
|
@@ -43,6 +50,7 @@ class LyricsSegment:
|
|
43
50
|
def to_dict(self) -> Dict[str, Any]:
|
44
51
|
"""Convert LyricsSegment to dictionary for JSON serialization."""
|
45
52
|
return {
|
53
|
+
"id": self.id,
|
46
54
|
"text": self.text,
|
47
55
|
"words": [word.to_dict() for word in self.words],
|
48
56
|
"start_time": self.start_time,
|
@@ -53,6 +61,7 @@ class LyricsSegment:
|
|
53
61
|
def from_dict(cls, data: Dict[str, Any]) -> "LyricsSegment":
|
54
62
|
"""Create LyricsSegment from dictionary."""
|
55
63
|
return cls(
|
64
|
+
id=data["id"],
|
56
65
|
text=data["text"],
|
57
66
|
words=[Word.from_dict(w) for w in data["words"]],
|
58
67
|
start_time=data["start_time"],
|
@@ -80,31 +89,59 @@ class LyricsMetadata:
|
|
80
89
|
lyrics_provider_id: Optional[str] = None
|
81
90
|
|
82
91
|
# Provider-specific metadata
|
83
|
-
provider_metadata: Dict[str, Any] =
|
92
|
+
provider_metadata: Dict[str, Any] = field(default_factory=dict)
|
84
93
|
|
85
94
|
def to_dict(self) -> Dict[str, Any]:
|
86
95
|
"""Convert metadata to dictionary for JSON serialization."""
|
87
96
|
return asdict(self)
|
88
97
|
|
98
|
+
@classmethod
|
99
|
+
def from_dict(cls, data: Dict[str, Any]) -> "LyricsMetadata":
|
100
|
+
"""Create LyricsMetadata from dictionary."""
|
101
|
+
return cls(
|
102
|
+
source=data["source"],
|
103
|
+
track_name=data["track_name"],
|
104
|
+
artist_names=data["artist_names"],
|
105
|
+
album_name=data.get("album_name"),
|
106
|
+
duration_ms=data.get("duration_ms"),
|
107
|
+
explicit=data.get("explicit"),
|
108
|
+
language=data.get("language"),
|
109
|
+
is_synced=data.get("is_synced", False),
|
110
|
+
lyrics_provider=data.get("lyrics_provider"),
|
111
|
+
lyrics_provider_id=data.get("lyrics_provider_id"),
|
112
|
+
provider_metadata=data.get("provider_metadata", {}),
|
113
|
+
)
|
114
|
+
|
89
115
|
|
90
116
|
@dataclass
|
91
117
|
class LyricsData:
|
92
118
|
"""Standardized response format for all lyrics providers."""
|
93
119
|
|
94
|
-
lyrics: str
|
95
120
|
segments: List[LyricsSegment]
|
96
121
|
metadata: LyricsMetadata
|
97
122
|
source: str # e.g., "genius", "spotify", etc.
|
98
123
|
|
124
|
+
def get_full_text(self) -> str:
|
125
|
+
"""Get the full lyrics text by joining all segment texts."""
|
126
|
+
return "\n".join(segment.text for segment in self.segments)
|
127
|
+
|
99
128
|
def to_dict(self) -> Dict[str, Any]:
|
100
129
|
"""Convert result to dictionary for JSON serialization."""
|
101
130
|
return {
|
102
|
-
"lyrics": self.lyrics,
|
103
131
|
"segments": [segment.to_dict() for segment in self.segments],
|
104
132
|
"metadata": self.metadata.to_dict(),
|
105
133
|
"source": self.source,
|
106
134
|
}
|
107
135
|
|
136
|
+
@classmethod
|
137
|
+
def from_dict(cls, data: Dict[str, Any]) -> "LyricsData":
|
138
|
+
"""Create LyricsData from dictionary."""
|
139
|
+
return cls(
|
140
|
+
segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
|
141
|
+
metadata=LyricsMetadata.from_dict(data["metadata"]),
|
142
|
+
source=data["source"],
|
143
|
+
)
|
144
|
+
|
108
145
|
|
109
146
|
@dataclass
|
110
147
|
class WordCorrection:
|
@@ -112,12 +149,12 @@ class WordCorrection:
|
|
112
149
|
|
113
150
|
original_word: str
|
114
151
|
corrected_word: str # Empty string indicates word should be deleted
|
115
|
-
segment_index: int
|
116
152
|
original_position: int
|
117
153
|
source: str # e.g., "spotify", "genius"
|
118
|
-
confidence: Optional[float]
|
119
154
|
reason: str # e.g., "matched_in_3_sources", "high_confidence_match"
|
120
|
-
|
155
|
+
segment_index: int = 0 # Default to 0 since it's often not needed
|
156
|
+
confidence: Optional[float] = None
|
157
|
+
alternatives: Dict[str, int] = field(default_factory=dict) # Other possible corrections and their occurrence counts
|
121
158
|
is_deletion: bool = False # New field to explicitly mark deletions
|
122
159
|
# New fields for handling word splits
|
123
160
|
split_index: Optional[int] = None # Position in the split sequence (0-based)
|
@@ -127,14 +164,22 @@ class WordCorrection:
|
|
127
164
|
# New fields to match TypeScript interface
|
128
165
|
reference_positions: Optional[Dict[str, int]] = None # Maps source to position in reference text
|
129
166
|
length: int = 1 # Default to 1 for single-word corrections
|
167
|
+
handler: Optional[str] = None # Name of the correction handler that created this correction
|
168
|
+
# New ID fields for tracking word identity through corrections
|
169
|
+
word_id: Optional[str] = None # ID of the original word being corrected
|
170
|
+
corrected_word_id: Optional[str] = None # ID of the new word after correction
|
130
171
|
|
131
172
|
def to_dict(self) -> Dict[str, Any]:
|
173
|
+
"""Convert to dictionary representation."""
|
132
174
|
return asdict(self)
|
133
175
|
|
134
176
|
@classmethod
|
135
177
|
def from_dict(cls, data: Dict[str, Any]) -> "WordCorrection":
|
136
178
|
"""Create WordCorrection from dictionary."""
|
137
|
-
|
179
|
+
# Filter out any keys that aren't part of the dataclass
|
180
|
+
valid_fields = {f.name for f in fields(cls)}
|
181
|
+
filtered_data = {k: v for k, v in data.items() if k in valid_fields}
|
182
|
+
return cls(**filtered_data)
|
138
183
|
|
139
184
|
|
140
185
|
@dataclass
|
@@ -157,6 +202,17 @@ class TranscriptionData:
|
|
157
202
|
"metadata": self.metadata,
|
158
203
|
}
|
159
204
|
|
205
|
+
@classmethod
|
206
|
+
def from_dict(cls, data: Dict[str, Any]) -> "TranscriptionData":
|
207
|
+
"""Create TranscriptionData from dictionary."""
|
208
|
+
return cls(
|
209
|
+
segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
|
210
|
+
words=[Word.from_dict(w) for w in data["words"]],
|
211
|
+
text=data["text"],
|
212
|
+
source=data["source"],
|
213
|
+
metadata=data.get("metadata"),
|
214
|
+
)
|
215
|
+
|
160
216
|
|
161
217
|
@dataclass
|
162
218
|
class TranscriptionResult:
|
@@ -207,29 +263,33 @@ class PhraseScore:
|
|
207
263
|
class AnchorSequence:
|
208
264
|
"""Represents a sequence of words that appears in both transcribed and reference lyrics."""
|
209
265
|
|
210
|
-
|
266
|
+
id: str # Unique identifier for this anchor sequence
|
267
|
+
transcribed_word_ids: List[str] # IDs of Word objects from the transcription
|
211
268
|
transcription_position: int # Starting position in transcribed text
|
212
269
|
reference_positions: Dict[str, int] # Source -> position mapping
|
270
|
+
reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
|
213
271
|
confidence: float
|
214
272
|
|
215
273
|
@property
|
216
274
|
def text(self) -> str:
|
217
275
|
"""Get the sequence as a space-separated string."""
|
276
|
+
# This property might need to be updated to look up words from parent object
|
277
|
+
# For now, keeping it for backwards compatibility
|
218
278
|
return " ".join(self.words)
|
219
279
|
|
220
280
|
@property
|
221
281
|
def length(self) -> int:
|
222
282
|
"""Get the number of words in the sequence."""
|
223
|
-
return len(self.
|
283
|
+
return len(self.transcribed_word_ids)
|
224
284
|
|
225
285
|
def to_dict(self) -> Dict[str, Any]:
|
226
286
|
"""Convert the anchor sequence to a JSON-serializable dictionary."""
|
227
287
|
return {
|
228
|
-
"
|
229
|
-
"
|
230
|
-
"length": self.length,
|
288
|
+
"id": self.id,
|
289
|
+
"transcribed_word_ids": self.transcribed_word_ids,
|
231
290
|
"transcription_position": self.transcription_position,
|
232
291
|
"reference_positions": self.reference_positions,
|
292
|
+
"reference_word_ids": self.reference_word_ids,
|
233
293
|
"confidence": self.confidence,
|
234
294
|
}
|
235
295
|
|
@@ -237,9 +297,11 @@ class AnchorSequence:
|
|
237
297
|
def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
|
238
298
|
"""Create AnchorSequence from dictionary."""
|
239
299
|
return cls(
|
240
|
-
|
300
|
+
id=data.get("id", WordUtils.generate_id()), # Generate ID if not present in old data
|
301
|
+
transcribed_word_ids=data["transcribed_word_ids"],
|
241
302
|
transcription_position=data["transcription_position"],
|
242
303
|
reference_positions=data["reference_positions"],
|
304
|
+
reference_word_ids=data["reference_word_ids"],
|
243
305
|
confidence=data["confidence"],
|
244
306
|
)
|
245
307
|
|
@@ -284,115 +346,94 @@ class ScoredAnchor:
|
|
284
346
|
class GapSequence:
|
285
347
|
"""Represents a sequence of words between anchor sequences in transcribed lyrics."""
|
286
348
|
|
287
|
-
|
349
|
+
id: str # Unique identifier for this gap sequence
|
350
|
+
transcribed_word_ids: List[str] # IDs of Word objects from the transcription
|
288
351
|
transcription_position: int # Original starting position in transcription
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
reference_words_original: Dict[str, List[str]]
|
293
|
-
corrections: List[WordCorrection] = field(default_factory=list)
|
352
|
+
preceding_anchor_id: Optional[str] # ID of preceding AnchorSequence
|
353
|
+
following_anchor_id: Optional[str] # ID of following AnchorSequence
|
354
|
+
reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
|
294
355
|
_corrected_positions: Set[int] = field(default_factory=set, repr=False)
|
295
356
|
_position_offset: int = field(default=0, repr=False) # Track cumulative position changes
|
296
357
|
|
297
|
-
def add_correction(self, correction: WordCorrection) -> None:
|
298
|
-
"""Add a correction and mark its position as corrected."""
|
299
|
-
self.corrections.append(correction)
|
300
|
-
relative_pos = correction.original_position - self.transcription_position
|
301
|
-
self._corrected_positions.add(relative_pos)
|
302
|
-
|
303
|
-
# Update position offset based on correction type
|
304
|
-
if correction.is_deletion:
|
305
|
-
self._position_offset -= 1
|
306
|
-
elif correction.split_total:
|
307
|
-
self._position_offset += correction.split_total - 1
|
308
|
-
|
309
|
-
# Update corrected position for the correction
|
310
|
-
correction.corrected_position = correction.original_position + self._position_offset
|
311
|
-
|
312
|
-
def get_corrected_position(self, original_position: int) -> int:
|
313
|
-
"""Convert an original position to its corrected position."""
|
314
|
-
offset = sum(
|
315
|
-
-1 if c.is_deletion else (c.split_total - 1 if c.split_total else 0)
|
316
|
-
for c in self.corrections
|
317
|
-
if c.original_position < original_position
|
318
|
-
)
|
319
|
-
return original_position + offset
|
320
|
-
|
321
|
-
@property
|
322
|
-
def corrected_length(self) -> int:
|
323
|
-
"""Get the length after applying all corrections."""
|
324
|
-
return self.length + self._position_offset
|
325
|
-
|
326
|
-
def is_word_corrected(self, relative_position: int) -> bool:
|
327
|
-
"""Check if a word at the given position (relative to gap start) has been corrected."""
|
328
|
-
return relative_position in self._corrected_positions
|
329
|
-
|
330
|
-
@property
|
331
|
-
def uncorrected_words(self) -> List[Tuple[int, str]]:
|
332
|
-
"""Get list of (position, word) tuples for words that haven't been corrected yet."""
|
333
|
-
return [(i, word) for i, word in enumerate(self.words) if i not in self._corrected_positions]
|
334
|
-
|
335
|
-
@property
|
336
|
-
def is_fully_corrected(self) -> bool:
|
337
|
-
"""Check if all words in the gap have been corrected."""
|
338
|
-
return len(self._corrected_positions) == self.length
|
339
|
-
|
340
|
-
def __hash__(self):
|
341
|
-
# Hash based on words and position
|
342
|
-
return hash((self.words, self.transcription_position))
|
343
|
-
|
344
|
-
def __eq__(self, other):
|
345
|
-
if not isinstance(other, GapSequence):
|
346
|
-
return NotImplemented
|
347
|
-
return self.words == other.words and self.transcription_position == other.transcription_position
|
348
|
-
|
349
358
|
@property
|
350
359
|
def text(self) -> str:
|
351
360
|
"""Get the sequence as a space-separated string."""
|
361
|
+
# This property might need to be updated to look up words from parent object
|
352
362
|
return " ".join(self.words)
|
353
363
|
|
354
364
|
@property
|
355
365
|
def length(self) -> int:
|
356
366
|
"""Get the number of words in the sequence."""
|
357
|
-
return len(self.
|
358
|
-
|
359
|
-
@property
|
360
|
-
def was_corrected(self) -> bool:
|
361
|
-
"""Check if this gap has any corrections."""
|
362
|
-
return len(self.corrections) > 0
|
367
|
+
return len(self.transcribed_word_ids)
|
363
368
|
|
364
369
|
def to_dict(self) -> Dict[str, Any]:
|
365
370
|
"""Convert the gap sequence to a JSON-serializable dictionary."""
|
366
371
|
return {
|
367
|
-
"
|
368
|
-
"
|
369
|
-
"length": self.length,
|
372
|
+
"id": self.id,
|
373
|
+
"transcribed_word_ids": self.transcribed_word_ids,
|
370
374
|
"transcription_position": self.transcription_position,
|
371
|
-
"
|
372
|
-
"
|
373
|
-
"
|
374
|
-
"reference_words_original": self.reference_words_original,
|
375
|
-
"corrections": [c.to_dict() for c in self.corrections],
|
375
|
+
"preceding_anchor_id": self.preceding_anchor_id,
|
376
|
+
"following_anchor_id": self.following_anchor_id,
|
377
|
+
"reference_word_ids": self.reference_word_ids,
|
376
378
|
}
|
377
379
|
|
378
380
|
@classmethod
|
379
381
|
def from_dict(cls, data: Dict[str, Any]) -> "GapSequence":
|
380
382
|
"""Create GapSequence from dictionary."""
|
381
383
|
gap = cls(
|
382
|
-
|
384
|
+
id=data.get("id", WordUtils.generate_id()), # Generate ID if not present in old data
|
385
|
+
transcribed_word_ids=data["transcribed_word_ids"],
|
383
386
|
transcription_position=data["transcription_position"],
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
reference_words_original=data.get("reference_words_original", {}),
|
387
|
+
preceding_anchor_id=data["preceding_anchor_id"],
|
388
|
+
following_anchor_id=data["following_anchor_id"],
|
389
|
+
reference_word_ids=data["reference_word_ids"],
|
388
390
|
)
|
389
|
-
# Add any corrections from the data
|
390
|
-
if "corrections" in data:
|
391
|
-
for correction_data in data["corrections"]:
|
392
|
-
gap.add_correction(WordCorrection.from_dict(correction_data))
|
393
391
|
return gap
|
394
392
|
|
395
393
|
|
394
|
+
@dataclass
|
395
|
+
class CorrectionStep:
|
396
|
+
"""Represents a single correction operation with enough info to replay/undo."""
|
397
|
+
|
398
|
+
handler_name: str
|
399
|
+
affected_word_ids: List[str] # IDs of words modified/deleted
|
400
|
+
affected_segment_ids: List[str] # IDs of segments modified
|
401
|
+
corrections: List[WordCorrection]
|
402
|
+
# State before and after for affected segments
|
403
|
+
segments_before: List[LyricsSegment]
|
404
|
+
segments_after: List[LyricsSegment]
|
405
|
+
# For splits/merges
|
406
|
+
created_word_ids: List[str] = field(default_factory=list) # New words created
|
407
|
+
deleted_word_ids: List[str] = field(default_factory=list) # Words removed
|
408
|
+
|
409
|
+
def to_dict(self) -> Dict[str, Any]:
|
410
|
+
"""Convert CorrectionStep to dictionary for JSON serialization."""
|
411
|
+
return {
|
412
|
+
"handler_name": self.handler_name,
|
413
|
+
"affected_word_ids": self.affected_word_ids,
|
414
|
+
"affected_segment_ids": self.affected_segment_ids,
|
415
|
+
"corrections": [c.to_dict() for c in self.corrections],
|
416
|
+
"segments_before": [s.to_dict() for s in self.segments_before],
|
417
|
+
"segments_after": [s.to_dict() for s in self.segments_after],
|
418
|
+
"created_word_ids": self.created_word_ids,
|
419
|
+
"deleted_word_ids": self.deleted_word_ids,
|
420
|
+
}
|
421
|
+
|
422
|
+
@classmethod
|
423
|
+
def from_dict(cls, data: Dict[str, Any]) -> "CorrectionStep":
|
424
|
+
"""Create CorrectionStep from dictionary."""
|
425
|
+
return cls(
|
426
|
+
handler_name=data["handler_name"],
|
427
|
+
affected_word_ids=data["affected_word_ids"],
|
428
|
+
affected_segment_ids=data["affected_segment_ids"],
|
429
|
+
corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
|
430
|
+
segments_before=[LyricsSegment.from_dict(s) for s in data["segments_before"]],
|
431
|
+
segments_after=[LyricsSegment.from_dict(s) for s in data["segments_after"]],
|
432
|
+
created_word_ids=data["created_word_ids"],
|
433
|
+
deleted_word_ids=data["deleted_word_ids"],
|
434
|
+
)
|
435
|
+
|
436
|
+
|
396
437
|
@dataclass
|
397
438
|
class CorrectionResult:
|
398
439
|
"""Container for correction results with detailed correction information."""
|
@@ -402,7 +443,6 @@ class CorrectionResult:
|
|
402
443
|
|
403
444
|
# Corrected data
|
404
445
|
corrected_segments: List[LyricsSegment]
|
405
|
-
corrected_text: str
|
406
446
|
|
407
447
|
# Correction details
|
408
448
|
corrections: List[WordCorrection]
|
@@ -410,29 +450,34 @@ class CorrectionResult:
|
|
410
450
|
confidence: float
|
411
451
|
|
412
452
|
# Debug/analysis information
|
413
|
-
|
414
|
-
reference_texts: Dict[str, str]
|
453
|
+
reference_lyrics: Dict[str, LyricsData] # Maps source to LyricsData
|
415
454
|
anchor_sequences: List[AnchorSequence]
|
416
455
|
gap_sequences: List[GapSequence]
|
417
456
|
resized_segments: List[LyricsSegment]
|
418
457
|
|
419
458
|
metadata: Dict[str, Any]
|
420
459
|
|
460
|
+
# Correction history
|
461
|
+
correction_steps: List[CorrectionStep]
|
462
|
+
word_id_map: Dict[str, str] # Maps original word IDs to corrected word IDs
|
463
|
+
segment_id_map: Dict[str, str] # Maps original segment IDs to corrected segment IDs
|
464
|
+
|
421
465
|
def to_dict(self) -> Dict[str, Any]:
|
422
466
|
"""Convert the correction result to a JSON-serializable dictionary."""
|
423
467
|
return {
|
424
|
-
"transcribed_text": self.transcribed_text,
|
425
468
|
"original_segments": [s.to_dict() for s in self.original_segments],
|
426
|
-
"
|
469
|
+
"reference_lyrics": {source: lyrics.to_dict() for source, lyrics in self.reference_lyrics.items()},
|
427
470
|
"anchor_sequences": [a.to_dict() for a in self.anchor_sequences],
|
428
471
|
"gap_sequences": [g.to_dict() for g in self.gap_sequences],
|
429
472
|
"resized_segments": [s.to_dict() for s in self.resized_segments],
|
430
|
-
"corrected_text": self.corrected_text,
|
431
473
|
"corrections_made": self.corrections_made,
|
432
474
|
"confidence": self.confidence,
|
433
475
|
"corrections": [c.to_dict() for c in self.corrections],
|
434
476
|
"corrected_segments": [s.to_dict() for s in self.corrected_segments],
|
435
477
|
"metadata": self.metadata,
|
478
|
+
"correction_steps": [step.to_dict() for step in self.correction_steps],
|
479
|
+
"word_id_map": self.word_id_map,
|
480
|
+
"segment_id_map": self.segment_id_map,
|
436
481
|
}
|
437
482
|
|
438
483
|
@classmethod
|
@@ -441,14 +486,15 @@ class CorrectionResult:
|
|
441
486
|
return cls(
|
442
487
|
original_segments=[LyricsSegment.from_dict(s) for s in data["original_segments"]],
|
443
488
|
corrected_segments=[LyricsSegment.from_dict(s) for s in data["corrected_segments"]],
|
444
|
-
corrected_text=data["corrected_text"],
|
445
489
|
corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
|
446
490
|
corrections_made=data["corrections_made"],
|
447
491
|
confidence=data["confidence"],
|
448
|
-
|
449
|
-
reference_texts=data["reference_texts"],
|
492
|
+
reference_lyrics={source: LyricsData.from_dict(lyrics) for source, lyrics in data["reference_lyrics"].items()},
|
450
493
|
anchor_sequences=[AnchorSequence.from_dict(a) for a in data["anchor_sequences"]],
|
451
494
|
gap_sequences=[GapSequence.from_dict(g) for g in data["gap_sequences"]],
|
452
495
|
resized_segments=[LyricsSegment.from_dict(s) for s in data["resized_segments"]],
|
453
496
|
metadata=data["metadata"],
|
497
|
+
correction_steps=[CorrectionStep.from_dict(step) for step in data["correction_steps"]],
|
498
|
+
word_id_map=data["word_id_map"],
|
499
|
+
segment_id_map=data["segment_id_map"],
|
454
500
|
)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import random
|
2
|
+
import string
|
3
|
+
|
4
|
+
|
5
|
+
class WordUtils:
|
6
|
+
"""Utility class for word-related operations."""
|
7
|
+
|
8
|
+
_used_ids = set() # Keep track of used IDs
|
9
|
+
_id_length = 6 # Length of generated IDs
|
10
|
+
|
11
|
+
@classmethod
|
12
|
+
def generate_id(cls) -> str:
|
13
|
+
"""Generate a unique ID for words/segments.
|
14
|
+
|
15
|
+
Uses a combination of letters and numbers to create an 8-character ID.
|
16
|
+
With 36 possible characters (26 letters + 10 digits), this gives us
|
17
|
+
36^8 = ~2.8 trillion possible combinations, which is more than enough
|
18
|
+
for our use case while being much shorter than UUID.
|
19
|
+
"""
|
20
|
+
while True:
|
21
|
+
# Generate random string of letters and numbers
|
22
|
+
new_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=cls._id_length))
|
23
|
+
|
24
|
+
# Make sure it's unique for this session
|
25
|
+
if new_id not in cls._used_ids:
|
26
|
+
cls._used_ids.add(new_id)
|
27
|
+
return new_id
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.43.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
License: MIT
|
6
6
|
Author: Andrew Beveridge
|
@@ -19,10 +19,12 @@ Requires-Dist: karaoke-lyrics-processor (>=0.4)
|
|
19
19
|
Requires-Dist: lyricsgenius (>=3)
|
20
20
|
Requires-Dist: metaphone (>=0.6)
|
21
21
|
Requires-Dist: nltk (>=3.9)
|
22
|
+
Requires-Dist: ollama (>=0.4.7,<0.5.0)
|
22
23
|
Requires-Dist: pydub (>=0.25)
|
23
24
|
Requires-Dist: python-dotenv (>=1)
|
24
25
|
Requires-Dist: python-levenshtein (>=0.26)
|
25
26
|
Requires-Dist: python-slugify (>=8)
|
27
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
26
28
|
Requires-Dist: spacy (>=3.8)
|
27
29
|
Requires-Dist: spacy-syllables (>=3)
|
28
30
|
Requires-Dist: syllables (>=1)
|