lyrics-transcriber 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. lyrics_transcriber/core/controller.py +30 -52
  2. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  3. lyrics_transcriber/correction/corrector.py +224 -107
  4. lyrics_transcriber/correction/handlers/base.py +28 -10
  5. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  6. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  7. lyrics_transcriber/correction/handlers/llm.py +290 -0
  8. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  9. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  10. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  11. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  12. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  13. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  14. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  15. lyrics_transcriber/correction/text_utils.py +3 -7
  16. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  17. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  18. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  19. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  20. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  21. lyrics_transcriber/frontend/dist/index.html +1 -1
  22. lyrics_transcriber/frontend/package.json +6 -2
  23. lyrics_transcriber/frontend/src/App.tsx +18 -2
  24. lyrics_transcriber/frontend/src/api.ts +103 -6
  25. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  26. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  27. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  28. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  29. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  30. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  31. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  32. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  33. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  34. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  35. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  36. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  37. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  38. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  39. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  40. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  41. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  42. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  43. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  44. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  45. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  47. lyrics_transcriber/frontend/src/types.js +2 -0
  48. lyrics_transcriber/frontend/src/types.ts +70 -49
  49. lyrics_transcriber/frontend/src/validation.ts +132 -0
  50. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  51. lyrics_transcriber/frontend/yarn.lock +3752 -0
  52. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  53. lyrics_transcriber/lyrics/file_provider.py +6 -5
  54. lyrics_transcriber/lyrics/genius.py +5 -2
  55. lyrics_transcriber/lyrics/spotify.py +58 -21
  56. lyrics_transcriber/output/ass/config.py +16 -5
  57. lyrics_transcriber/output/cdg.py +1 -1
  58. lyrics_transcriber/output/generator.py +22 -8
  59. lyrics_transcriber/output/plain_text.py +15 -10
  60. lyrics_transcriber/output/segment_resizer.py +16 -3
  61. lyrics_transcriber/output/subtitles.py +27 -1
  62. lyrics_transcriber/output/video.py +107 -1
  63. lyrics_transcriber/review/__init__.py +0 -1
  64. lyrics_transcriber/review/server.py +337 -164
  65. lyrics_transcriber/transcribers/audioshake.py +3 -0
  66. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  67. lyrics_transcriber/transcribers/whisper.py +11 -1
  68. lyrics_transcriber/types.py +151 -105
  69. lyrics_transcriber/utils/word_utils.py +27 -0
  70. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  71. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +74 -61
  72. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  73. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  74. lyrics_transcriber/frontend/package-lock.json +0 -4260
  75. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  76. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  77. {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,20 @@
1
- from dataclasses import dataclass, asdict, field
2
- from typing import Any, Dict, List, Optional, Set, Protocol, Tuple
1
+ from dataclasses import dataclass, asdict, field, fields
2
+ from typing import Any, Dict, List, Optional, Set, Tuple
3
3
  from enum import Enum
4
+ from lyrics_transcriber.utils.word_utils import WordUtils
4
5
 
5
6
 
6
7
  @dataclass
7
8
  class Word:
8
9
  """Represents a single word with its timing (in seconds) and confidence information."""
9
10
 
11
+ id: str # New: Unique identifier for each word
10
12
  text: str
11
13
  start_time: float
12
14
  end_time: float
13
15
  confidence: Optional[float] = None
16
+ # New: Track if this word was created during correction
17
+ created_during_correction: bool = False
14
18
 
15
19
  def to_dict(self) -> Dict[str, Any]:
16
20
  """Convert Word to dictionary for JSON serialization."""
@@ -24,10 +28,12 @@ class Word:
24
28
  def from_dict(cls, data: Dict[str, Any]) -> "Word":
25
29
  """Create Word from dictionary."""
26
30
  return cls(
31
+ id=data["id"],
27
32
  text=data["text"],
28
33
  start_time=data["start_time"],
29
34
  end_time=data["end_time"],
30
35
  confidence=data.get("confidence"), # Use get() since confidence is optional
36
+ created_during_correction=data.get("created_during_correction", False),
31
37
  )
32
38
 
33
39
 
@@ -35,6 +41,7 @@ class Word:
35
41
  class LyricsSegment:
36
42
  """Represents a segment/line of lyrics with timing information in seconds."""
37
43
 
44
+ id: str # New: Unique identifier for each segment
38
45
  text: str
39
46
  words: List[Word]
40
47
  start_time: float
@@ -43,6 +50,7 @@ class LyricsSegment:
43
50
  def to_dict(self) -> Dict[str, Any]:
44
51
  """Convert LyricsSegment to dictionary for JSON serialization."""
45
52
  return {
53
+ "id": self.id,
46
54
  "text": self.text,
47
55
  "words": [word.to_dict() for word in self.words],
48
56
  "start_time": self.start_time,
@@ -53,6 +61,7 @@ class LyricsSegment:
53
61
  def from_dict(cls, data: Dict[str, Any]) -> "LyricsSegment":
54
62
  """Create LyricsSegment from dictionary."""
55
63
  return cls(
64
+ id=data["id"],
56
65
  text=data["text"],
57
66
  words=[Word.from_dict(w) for w in data["words"]],
58
67
  start_time=data["start_time"],
@@ -80,31 +89,59 @@ class LyricsMetadata:
80
89
  lyrics_provider_id: Optional[str] = None
81
90
 
82
91
  # Provider-specific metadata
83
- provider_metadata: Dict[str, Any] = None
92
+ provider_metadata: Dict[str, Any] = field(default_factory=dict)
84
93
 
85
94
  def to_dict(self) -> Dict[str, Any]:
86
95
  """Convert metadata to dictionary for JSON serialization."""
87
96
  return asdict(self)
88
97
 
98
+ @classmethod
99
+ def from_dict(cls, data: Dict[str, Any]) -> "LyricsMetadata":
100
+ """Create LyricsMetadata from dictionary."""
101
+ return cls(
102
+ source=data["source"],
103
+ track_name=data["track_name"],
104
+ artist_names=data["artist_names"],
105
+ album_name=data.get("album_name"),
106
+ duration_ms=data.get("duration_ms"),
107
+ explicit=data.get("explicit"),
108
+ language=data.get("language"),
109
+ is_synced=data.get("is_synced", False),
110
+ lyrics_provider=data.get("lyrics_provider"),
111
+ lyrics_provider_id=data.get("lyrics_provider_id"),
112
+ provider_metadata=data.get("provider_metadata", {}),
113
+ )
114
+
89
115
 
90
116
  @dataclass
91
117
  class LyricsData:
92
118
  """Standardized response format for all lyrics providers."""
93
119
 
94
- lyrics: str
95
120
  segments: List[LyricsSegment]
96
121
  metadata: LyricsMetadata
97
122
  source: str # e.g., "genius", "spotify", etc.
98
123
 
124
+ def get_full_text(self) -> str:
125
+ """Get the full lyrics text by joining all segment texts."""
126
+ return "\n".join(segment.text for segment in self.segments)
127
+
99
128
  def to_dict(self) -> Dict[str, Any]:
100
129
  """Convert result to dictionary for JSON serialization."""
101
130
  return {
102
- "lyrics": self.lyrics,
103
131
  "segments": [segment.to_dict() for segment in self.segments],
104
132
  "metadata": self.metadata.to_dict(),
105
133
  "source": self.source,
106
134
  }
107
135
 
136
+ @classmethod
137
+ def from_dict(cls, data: Dict[str, Any]) -> "LyricsData":
138
+ """Create LyricsData from dictionary."""
139
+ return cls(
140
+ segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
141
+ metadata=LyricsMetadata.from_dict(data["metadata"]),
142
+ source=data["source"],
143
+ )
144
+
108
145
 
109
146
  @dataclass
110
147
  class WordCorrection:
@@ -112,12 +149,12 @@ class WordCorrection:
112
149
 
113
150
  original_word: str
114
151
  corrected_word: str # Empty string indicates word should be deleted
115
- segment_index: int
116
152
  original_position: int
117
153
  source: str # e.g., "spotify", "genius"
118
- confidence: Optional[float]
119
154
  reason: str # e.g., "matched_in_3_sources", "high_confidence_match"
120
- alternatives: Dict[str, int] # Other possible corrections and their occurrence counts
155
+ segment_index: int = 0 # Default to 0 since it's often not needed
156
+ confidence: Optional[float] = None
157
+ alternatives: Dict[str, int] = field(default_factory=dict) # Other possible corrections and their occurrence counts
121
158
  is_deletion: bool = False # New field to explicitly mark deletions
122
159
  # New fields for handling word splits
123
160
  split_index: Optional[int] = None # Position in the split sequence (0-based)
@@ -127,14 +164,22 @@ class WordCorrection:
127
164
  # New fields to match TypeScript interface
128
165
  reference_positions: Optional[Dict[str, int]] = None # Maps source to position in reference text
129
166
  length: int = 1 # Default to 1 for single-word corrections
167
+ handler: Optional[str] = None # Name of the correction handler that created this correction
168
+ # New ID fields for tracking word identity through corrections
169
+ word_id: Optional[str] = None # ID of the original word being corrected
170
+ corrected_word_id: Optional[str] = None # ID of the new word after correction
130
171
 
131
172
  def to_dict(self) -> Dict[str, Any]:
173
+ """Convert to dictionary representation."""
132
174
  return asdict(self)
133
175
 
134
176
  @classmethod
135
177
  def from_dict(cls, data: Dict[str, Any]) -> "WordCorrection":
136
178
  """Create WordCorrection from dictionary."""
137
- return cls(**data)
179
+ # Filter out any keys that aren't part of the dataclass
180
+ valid_fields = {f.name for f in fields(cls)}
181
+ filtered_data = {k: v for k, v in data.items() if k in valid_fields}
182
+ return cls(**filtered_data)
138
183
 
139
184
 
140
185
  @dataclass
@@ -157,6 +202,17 @@ class TranscriptionData:
157
202
  "metadata": self.metadata,
158
203
  }
159
204
 
205
+ @classmethod
206
+ def from_dict(cls, data: Dict[str, Any]) -> "TranscriptionData":
207
+ """Create TranscriptionData from dictionary."""
208
+ return cls(
209
+ segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
210
+ words=[Word.from_dict(w) for w in data["words"]],
211
+ text=data["text"],
212
+ source=data["source"],
213
+ metadata=data.get("metadata"),
214
+ )
215
+
160
216
 
161
217
  @dataclass
162
218
  class TranscriptionResult:
@@ -207,29 +263,33 @@ class PhraseScore:
207
263
  class AnchorSequence:
208
264
  """Represents a sequence of words that appears in both transcribed and reference lyrics."""
209
265
 
210
- words: List[str]
266
+ id: str # Unique identifier for this anchor sequence
267
+ transcribed_word_ids: List[str] # IDs of Word objects from the transcription
211
268
  transcription_position: int # Starting position in transcribed text
212
269
  reference_positions: Dict[str, int] # Source -> position mapping
270
+ reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
213
271
  confidence: float
214
272
 
215
273
  @property
216
274
  def text(self) -> str:
217
275
  """Get the sequence as a space-separated string."""
276
+ # This property might need to be updated to look up words from parent object
277
+ # For now, keeping it for backwards compatibility
218
278
  return " ".join(self.words)
219
279
 
220
280
  @property
221
281
  def length(self) -> int:
222
282
  """Get the number of words in the sequence."""
223
- return len(self.words)
283
+ return len(self.transcribed_word_ids)
224
284
 
225
285
  def to_dict(self) -> Dict[str, Any]:
226
286
  """Convert the anchor sequence to a JSON-serializable dictionary."""
227
287
  return {
228
- "words": self.words,
229
- "text": self.text,
230
- "length": self.length,
288
+ "id": self.id,
289
+ "transcribed_word_ids": self.transcribed_word_ids,
231
290
  "transcription_position": self.transcription_position,
232
291
  "reference_positions": self.reference_positions,
292
+ "reference_word_ids": self.reference_word_ids,
233
293
  "confidence": self.confidence,
234
294
  }
235
295
 
@@ -237,9 +297,11 @@ class AnchorSequence:
237
297
  def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
238
298
  """Create AnchorSequence from dictionary."""
239
299
  return cls(
240
- words=data["words"],
300
+ id=data.get("id", WordUtils.generate_id()), # Generate ID if not present in old data
301
+ transcribed_word_ids=data["transcribed_word_ids"],
241
302
  transcription_position=data["transcription_position"],
242
303
  reference_positions=data["reference_positions"],
304
+ reference_word_ids=data["reference_word_ids"],
243
305
  confidence=data["confidence"],
244
306
  )
245
307
 
@@ -284,115 +346,94 @@ class ScoredAnchor:
284
346
  class GapSequence:
285
347
  """Represents a sequence of words between anchor sequences in transcribed lyrics."""
286
348
 
287
- words: Tuple[str, ...]
349
+ id: str # Unique identifier for this gap sequence
350
+ transcribed_word_ids: List[str] # IDs of Word objects from the transcription
288
351
  transcription_position: int # Original starting position in transcription
289
- preceding_anchor: Optional[AnchorSequence]
290
- following_anchor: Optional[AnchorSequence]
291
- reference_words: Dict[str, List[str]]
292
- reference_words_original: Dict[str, List[str]]
293
- corrections: List[WordCorrection] = field(default_factory=list)
352
+ preceding_anchor_id: Optional[str] # ID of preceding AnchorSequence
353
+ following_anchor_id: Optional[str] # ID of following AnchorSequence
354
+ reference_word_ids: Dict[str, List[str]] # Source -> list of Word IDs from reference
294
355
  _corrected_positions: Set[int] = field(default_factory=set, repr=False)
295
356
  _position_offset: int = field(default=0, repr=False) # Track cumulative position changes
296
357
 
297
- def add_correction(self, correction: WordCorrection) -> None:
298
- """Add a correction and mark its position as corrected."""
299
- self.corrections.append(correction)
300
- relative_pos = correction.original_position - self.transcription_position
301
- self._corrected_positions.add(relative_pos)
302
-
303
- # Update position offset based on correction type
304
- if correction.is_deletion:
305
- self._position_offset -= 1
306
- elif correction.split_total:
307
- self._position_offset += correction.split_total - 1
308
-
309
- # Update corrected position for the correction
310
- correction.corrected_position = correction.original_position + self._position_offset
311
-
312
- def get_corrected_position(self, original_position: int) -> int:
313
- """Convert an original position to its corrected position."""
314
- offset = sum(
315
- -1 if c.is_deletion else (c.split_total - 1 if c.split_total else 0)
316
- for c in self.corrections
317
- if c.original_position < original_position
318
- )
319
- return original_position + offset
320
-
321
- @property
322
- def corrected_length(self) -> int:
323
- """Get the length after applying all corrections."""
324
- return self.length + self._position_offset
325
-
326
- def is_word_corrected(self, relative_position: int) -> bool:
327
- """Check if a word at the given position (relative to gap start) has been corrected."""
328
- return relative_position in self._corrected_positions
329
-
330
- @property
331
- def uncorrected_words(self) -> List[Tuple[int, str]]:
332
- """Get list of (position, word) tuples for words that haven't been corrected yet."""
333
- return [(i, word) for i, word in enumerate(self.words) if i not in self._corrected_positions]
334
-
335
- @property
336
- def is_fully_corrected(self) -> bool:
337
- """Check if all words in the gap have been corrected."""
338
- return len(self._corrected_positions) == self.length
339
-
340
- def __hash__(self):
341
- # Hash based on words and position
342
- return hash((self.words, self.transcription_position))
343
-
344
- def __eq__(self, other):
345
- if not isinstance(other, GapSequence):
346
- return NotImplemented
347
- return self.words == other.words and self.transcription_position == other.transcription_position
348
-
349
358
  @property
350
359
  def text(self) -> str:
351
360
  """Get the sequence as a space-separated string."""
361
+ # This property might need to be updated to look up words from parent object
352
362
  return " ".join(self.words)
353
363
 
354
364
  @property
355
365
  def length(self) -> int:
356
366
  """Get the number of words in the sequence."""
357
- return len(self.words)
358
-
359
- @property
360
- def was_corrected(self) -> bool:
361
- """Check if this gap has any corrections."""
362
- return len(self.corrections) > 0
367
+ return len(self.transcribed_word_ids)
363
368
 
364
369
  def to_dict(self) -> Dict[str, Any]:
365
370
  """Convert the gap sequence to a JSON-serializable dictionary."""
366
371
  return {
367
- "words": self.words,
368
- "text": self.text,
369
- "length": self.length,
372
+ "id": self.id,
373
+ "transcribed_word_ids": self.transcribed_word_ids,
370
374
  "transcription_position": self.transcription_position,
371
- "preceding_anchor": self.preceding_anchor.to_dict() if self.preceding_anchor else None,
372
- "following_anchor": self.following_anchor.to_dict() if self.following_anchor else None,
373
- "reference_words": self.reference_words,
374
- "reference_words_original": self.reference_words_original,
375
- "corrections": [c.to_dict() for c in self.corrections],
375
+ "preceding_anchor_id": self.preceding_anchor_id,
376
+ "following_anchor_id": self.following_anchor_id,
377
+ "reference_word_ids": self.reference_word_ids,
376
378
  }
377
379
 
378
380
  @classmethod
379
381
  def from_dict(cls, data: Dict[str, Any]) -> "GapSequence":
380
382
  """Create GapSequence from dictionary."""
381
383
  gap = cls(
382
- words=tuple(data["words"]),
384
+ id=data.get("id", WordUtils.generate_id()), # Generate ID if not present in old data
385
+ transcribed_word_ids=data["transcribed_word_ids"],
383
386
  transcription_position=data["transcription_position"],
384
- preceding_anchor=AnchorSequence.from_dict(data["preceding_anchor"]) if data["preceding_anchor"] else None,
385
- following_anchor=AnchorSequence.from_dict(data["following_anchor"]) if data["following_anchor"] else None,
386
- reference_words=data["reference_words"],
387
- reference_words_original=data.get("reference_words_original", {}),
387
+ preceding_anchor_id=data["preceding_anchor_id"],
388
+ following_anchor_id=data["following_anchor_id"],
389
+ reference_word_ids=data["reference_word_ids"],
388
390
  )
389
- # Add any corrections from the data
390
- if "corrections" in data:
391
- for correction_data in data["corrections"]:
392
- gap.add_correction(WordCorrection.from_dict(correction_data))
393
391
  return gap
394
392
 
395
393
 
394
+ @dataclass
395
+ class CorrectionStep:
396
+ """Represents a single correction operation with enough info to replay/undo."""
397
+
398
+ handler_name: str
399
+ affected_word_ids: List[str] # IDs of words modified/deleted
400
+ affected_segment_ids: List[str] # IDs of segments modified
401
+ corrections: List[WordCorrection]
402
+ # State before and after for affected segments
403
+ segments_before: List[LyricsSegment]
404
+ segments_after: List[LyricsSegment]
405
+ # For splits/merges
406
+ created_word_ids: List[str] = field(default_factory=list) # New words created
407
+ deleted_word_ids: List[str] = field(default_factory=list) # Words removed
408
+
409
+ def to_dict(self) -> Dict[str, Any]:
410
+ """Convert CorrectionStep to dictionary for JSON serialization."""
411
+ return {
412
+ "handler_name": self.handler_name,
413
+ "affected_word_ids": self.affected_word_ids,
414
+ "affected_segment_ids": self.affected_segment_ids,
415
+ "corrections": [c.to_dict() for c in self.corrections],
416
+ "segments_before": [s.to_dict() for s in self.segments_before],
417
+ "segments_after": [s.to_dict() for s in self.segments_after],
418
+ "created_word_ids": self.created_word_ids,
419
+ "deleted_word_ids": self.deleted_word_ids,
420
+ }
421
+
422
+ @classmethod
423
+ def from_dict(cls, data: Dict[str, Any]) -> "CorrectionStep":
424
+ """Create CorrectionStep from dictionary."""
425
+ return cls(
426
+ handler_name=data["handler_name"],
427
+ affected_word_ids=data["affected_word_ids"],
428
+ affected_segment_ids=data["affected_segment_ids"],
429
+ corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
430
+ segments_before=[LyricsSegment.from_dict(s) for s in data["segments_before"]],
431
+ segments_after=[LyricsSegment.from_dict(s) for s in data["segments_after"]],
432
+ created_word_ids=data["created_word_ids"],
433
+ deleted_word_ids=data["deleted_word_ids"],
434
+ )
435
+
436
+
396
437
  @dataclass
397
438
  class CorrectionResult:
398
439
  """Container for correction results with detailed correction information."""
@@ -402,7 +443,6 @@ class CorrectionResult:
402
443
 
403
444
  # Corrected data
404
445
  corrected_segments: List[LyricsSegment]
405
- corrected_text: str
406
446
 
407
447
  # Correction details
408
448
  corrections: List[WordCorrection]
@@ -410,29 +450,34 @@ class CorrectionResult:
410
450
  confidence: float
411
451
 
412
452
  # Debug/analysis information
413
- transcribed_text: str
414
- reference_texts: Dict[str, str]
453
+ reference_lyrics: Dict[str, LyricsData] # Maps source to LyricsData
415
454
  anchor_sequences: List[AnchorSequence]
416
455
  gap_sequences: List[GapSequence]
417
456
  resized_segments: List[LyricsSegment]
418
457
 
419
458
  metadata: Dict[str, Any]
420
459
 
460
+ # Correction history
461
+ correction_steps: List[CorrectionStep]
462
+ word_id_map: Dict[str, str] # Maps original word IDs to corrected word IDs
463
+ segment_id_map: Dict[str, str] # Maps original segment IDs to corrected segment IDs
464
+
421
465
  def to_dict(self) -> Dict[str, Any]:
422
466
  """Convert the correction result to a JSON-serializable dictionary."""
423
467
  return {
424
- "transcribed_text": self.transcribed_text,
425
468
  "original_segments": [s.to_dict() for s in self.original_segments],
426
- "reference_texts": self.reference_texts,
469
+ "reference_lyrics": {source: lyrics.to_dict() for source, lyrics in self.reference_lyrics.items()},
427
470
  "anchor_sequences": [a.to_dict() for a in self.anchor_sequences],
428
471
  "gap_sequences": [g.to_dict() for g in self.gap_sequences],
429
472
  "resized_segments": [s.to_dict() for s in self.resized_segments],
430
- "corrected_text": self.corrected_text,
431
473
  "corrections_made": self.corrections_made,
432
474
  "confidence": self.confidence,
433
475
  "corrections": [c.to_dict() for c in self.corrections],
434
476
  "corrected_segments": [s.to_dict() for s in self.corrected_segments],
435
477
  "metadata": self.metadata,
478
+ "correction_steps": [step.to_dict() for step in self.correction_steps],
479
+ "word_id_map": self.word_id_map,
480
+ "segment_id_map": self.segment_id_map,
436
481
  }
437
482
 
438
483
  @classmethod
@@ -441,14 +486,15 @@ class CorrectionResult:
441
486
  return cls(
442
487
  original_segments=[LyricsSegment.from_dict(s) for s in data["original_segments"]],
443
488
  corrected_segments=[LyricsSegment.from_dict(s) for s in data["corrected_segments"]],
444
- corrected_text=data["corrected_text"],
445
489
  corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
446
490
  corrections_made=data["corrections_made"],
447
491
  confidence=data["confidence"],
448
- transcribed_text=data["transcribed_text"],
449
- reference_texts=data["reference_texts"],
492
+ reference_lyrics={source: LyricsData.from_dict(lyrics) for source, lyrics in data["reference_lyrics"].items()},
450
493
  anchor_sequences=[AnchorSequence.from_dict(a) for a in data["anchor_sequences"]],
451
494
  gap_sequences=[GapSequence.from_dict(g) for g in data["gap_sequences"]],
452
495
  resized_segments=[LyricsSegment.from_dict(s) for s in data["resized_segments"]],
453
496
  metadata=data["metadata"],
497
+ correction_steps=[CorrectionStep.from_dict(step) for step in data["correction_steps"]],
498
+ word_id_map=data["word_id_map"],
499
+ segment_id_map=data["segment_id_map"],
454
500
  )
@@ -0,0 +1,27 @@
1
+ import random
2
+ import string
3
+
4
+
5
+ class WordUtils:
6
+ """Utility class for word-related operations."""
7
+
8
+ _used_ids = set() # Keep track of used IDs
9
+ _id_length = 6 # Length of generated IDs
10
+
11
+ @classmethod
12
+ def generate_id(cls) -> str:
13
+ """Generate a unique ID for words/segments.
14
+
15
+ Uses a combination of letters and numbers to create an 8-character ID.
16
+ With 36 possible characters (26 letters + 10 digits), this gives us
17
+ 36^8 = ~2.8 trillion possible combinations, which is more than enough
18
+ for our use case while being much shorter than UUID.
19
+ """
20
+ while True:
21
+ # Generate random string of letters and numbers
22
+ new_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=cls._id_length))
23
+
24
+ # Make sure it's unique for this session
25
+ if new_id not in cls._used_ids:
26
+ cls._used_ids.add(new_id)
27
+ return new_id
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lyrics-transcriber
3
- Version: 0.41.0
3
+ Version: 0.42.0
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  License: MIT
6
6
  Author: Andrew Beveridge
@@ -19,10 +19,12 @@ Requires-Dist: karaoke-lyrics-processor (>=0.4)
19
19
  Requires-Dist: lyricsgenius (>=3)
20
20
  Requires-Dist: metaphone (>=0.6)
21
21
  Requires-Dist: nltk (>=3.9)
22
+ Requires-Dist: ollama (>=0.4.7,<0.5.0)
22
23
  Requires-Dist: pydub (>=0.25)
23
24
  Requires-Dist: python-dotenv (>=1)
24
25
  Requires-Dist: python-levenshtein (>=0.26)
25
26
  Requires-Dist: python-slugify (>=8)
27
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
26
28
  Requires-Dist: spacy (>=3.8)
27
29
  Requires-Dist: spacy-syllables (>=3)
28
30
  Requires-Dist: syllables (>=1)