lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lyrics_transcriber/cli/cli_main.py +7 -0
  2. lyrics_transcriber/core/config.py +1 -0
  3. lyrics_transcriber/core/controller.py +30 -52
  4. lyrics_transcriber/correction/anchor_sequence.py +325 -150
  5. lyrics_transcriber/correction/corrector.py +224 -107
  6. lyrics_transcriber/correction/handlers/base.py +28 -10
  7. lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
  8. lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
  9. lyrics_transcriber/correction/handlers/llm.py +290 -0
  10. lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
  11. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
  12. lyrics_transcriber/correction/handlers/repeat.py +28 -11
  13. lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
  14. lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
  15. lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
  16. lyrics_transcriber/correction/handlers/word_operations.py +68 -22
  17. lyrics_transcriber/correction/text_utils.py +3 -7
  18. lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
  19. lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
  20. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  21. lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
  22. lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
  23. lyrics_transcriber/frontend/dist/index.html +1 -1
  24. lyrics_transcriber/frontend/package.json +6 -2
  25. lyrics_transcriber/frontend/src/App.tsx +18 -2
  26. lyrics_transcriber/frontend/src/api.ts +103 -6
  27. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
  28. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
  30. lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
  31. lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
  33. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
  35. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
  36. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
  37. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
  38. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
  39. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
  40. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
  41. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
  42. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  43. lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
  44. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
  45. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  46. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
  47. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
  48. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  49. lyrics_transcriber/frontend/src/types.js +2 -0
  50. lyrics_transcriber/frontend/src/types.ts +70 -49
  51. lyrics_transcriber/frontend/src/validation.ts +132 -0
  52. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  53. lyrics_transcriber/frontend/yarn.lock +3752 -0
  54. lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
  55. lyrics_transcriber/lyrics/file_provider.py +6 -5
  56. lyrics_transcriber/lyrics/genius.py +5 -2
  57. lyrics_transcriber/lyrics/spotify.py +58 -21
  58. lyrics_transcriber/output/ass/config.py +16 -5
  59. lyrics_transcriber/output/cdg.py +8 -8
  60. lyrics_transcriber/output/generator.py +29 -14
  61. lyrics_transcriber/output/plain_text.py +15 -10
  62. lyrics_transcriber/output/segment_resizer.py +16 -3
  63. lyrics_transcriber/output/subtitles.py +56 -2
  64. lyrics_transcriber/output/video.py +107 -1
  65. lyrics_transcriber/review/__init__.py +0 -1
  66. lyrics_transcriber/review/server.py +337 -164
  67. lyrics_transcriber/transcribers/audioshake.py +3 -0
  68. lyrics_transcriber/transcribers/base_transcriber.py +11 -3
  69. lyrics_transcriber/transcribers/whisper.py +11 -1
  70. lyrics_transcriber/types.py +151 -105
  71. lyrics_transcriber/utils/word_utils.py +27 -0
  72. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
  73. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
  74. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
  75. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
  76. lyrics_transcriber/frontend/package-lock.json +0 -4260
  77. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
  78. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
  79. {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,38 +1,55 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Any
2
2
  from lyrics_transcriber.types import WordCorrection, GapSequence
3
+ from lyrics_transcriber.utils.word_utils import WordUtils
3
4
 
4
5
 
5
6
  class WordOperations:
6
7
  """Utility class for common word manipulation operations used by correction handlers."""
7
8
 
8
9
  @staticmethod
9
- def calculate_reference_positions(gap: GapSequence, sources: Optional[List[str]] = None) -> Dict[str, int]:
10
+ def calculate_reference_positions(
11
+ gap: GapSequence, sources: Optional[List[str]] = None, anchor_sequences: Optional[List[Any]] = None
12
+ ) -> Dict[str, int]:
10
13
  """Calculate reference positions for given sources based on preceding anchor.
11
14
 
12
15
  Args:
13
- gap: The gap sequence containing the preceding anchor
16
+ gap: The gap sequence containing the preceding anchor ID
14
17
  sources: Optional list of sources to calculate positions for. If None, uses all sources.
18
+ anchor_sequences: List of anchor sequences to look up preceding anchor
15
19
 
16
20
  Returns:
17
21
  Dictionary mapping source names to their reference positions
18
22
  """
19
23
  reference_positions = {}
20
- if gap.preceding_anchor:
21
- # If no sources specified, use all sources from reference words
22
- sources_to_check = sources or list(gap.reference_words.keys())
23
-
24
- for source in sources_to_check:
25
- if source in gap.preceding_anchor.reference_positions:
26
- # Calculate base position from anchor
27
- anchor_pos = gap.preceding_anchor.reference_positions[source]
28
- base_ref_pos = anchor_pos + len(gap.preceding_anchor.words)
29
-
30
- # Calculate word offset within the gap
31
- word_offset = gap.words.index(gap.words[gap.transcription_position - gap.transcription_position])
32
-
33
- # Add word offset to base position
34
- ref_pos = base_ref_pos + word_offset
35
- reference_positions[source] = ref_pos
24
+
25
+ if not gap.preceding_anchor_id or not anchor_sequences:
26
+ return reference_positions
27
+
28
+ # Find the preceding anchor in the sequences
29
+ preceding_anchor = next(
30
+ (scored_anchor.anchor for scored_anchor in anchor_sequences if scored_anchor.anchor.id == gap.preceding_anchor_id), None
31
+ )
32
+
33
+ if not preceding_anchor:
34
+ return reference_positions
35
+
36
+ # If no sources specified, use all sources from reference words
37
+ sources_to_check = sources or list(gap.reference_word_ids.keys())
38
+
39
+ for source in sources_to_check:
40
+ # Get reference positions from the anchor
41
+ if source in preceding_anchor.reference_positions:
42
+ # Calculate base position from anchor
43
+ anchor_pos = preceding_anchor.reference_positions[source]
44
+ base_ref_pos = anchor_pos + len(preceding_anchor.reference_word_ids[source])
45
+
46
+ # Calculate word offset within the gap
47
+ word_offset = 0
48
+
49
+ # Add word offset to base position
50
+ ref_pos = base_ref_pos + word_offset
51
+ reference_positions[source] = ref_pos
52
+
36
53
  return reference_positions
37
54
 
38
55
  @staticmethod
@@ -43,7 +60,10 @@ class WordOperations:
43
60
  source: str,
44
61
  confidence: float,
45
62
  reason: str,
63
+ handler: str,
46
64
  reference_positions: Optional[Dict[str, int]] = None,
65
+ original_word_id: Optional[str] = None,
66
+ corrected_word_id: Optional[str] = None,
47
67
  ) -> WordCorrection:
48
68
  """Creates a correction for replacing a single word with another word."""
49
69
  return WordCorrection(
@@ -56,7 +76,10 @@ class WordOperations:
56
76
  reason=reason,
57
77
  alternatives={},
58
78
  reference_positions=reference_positions,
59
- length=1, # Single word replacement
79
+ length=1,
80
+ handler=handler,
81
+ word_id=original_word_id,
82
+ corrected_word_id=corrected_word_id if corrected_word_id is not None else (WordUtils.generate_id() if corrected_word else None),
60
83
  )
61
84
 
62
85
  @staticmethod
@@ -67,11 +90,19 @@ class WordOperations:
67
90
  source: str,
68
91
  confidence: float,
69
92
  reason: str,
93
+ handler: str,
70
94
  reference_positions: Optional[Dict[str, int]] = None,
95
+ original_word_id: Optional[str] = None,
96
+ corrected_word_ids: Optional[List[str]] = None,
71
97
  ) -> List[WordCorrection]:
72
98
  """Creates corrections for splitting a single word into multiple words."""
73
99
  corrections = []
74
- for split_idx, ref_word in enumerate(reference_words):
100
+
101
+ # Generate word IDs if none provided
102
+ if corrected_word_ids is None:
103
+ corrected_word_ids = [WordUtils.generate_id() for _ in reference_words]
104
+
105
+ for split_idx, (ref_word, word_id) in enumerate(zip(reference_words, corrected_word_ids)):
75
106
  corrections.append(
76
107
  WordCorrection(
77
108
  original_word=original_word,
@@ -86,6 +117,9 @@ class WordOperations:
86
117
  split_total=len(reference_words),
87
118
  reference_positions=reference_positions,
88
119
  length=1, # Each split word is length 1
120
+ handler=handler,
121
+ word_id=WordUtils.generate_id(), # Generate new ID for each split
122
+ corrected_word_id=word_id,
89
123
  )
90
124
  )
91
125
  return corrections
@@ -99,10 +133,16 @@ class WordOperations:
99
133
  confidence: float,
100
134
  combine_reason: str,
101
135
  delete_reason: str,
136
+ handler: str,
102
137
  reference_positions: Optional[Dict[str, int]] = None,
138
+ original_word_ids: Optional[List[str]] = None,
139
+ corrected_word_id: Optional[str] = None,
103
140
  ) -> List[WordCorrection]:
104
141
  """Creates corrections for combining multiple words into a single word."""
105
142
  corrections = []
143
+ word_ids = original_word_ids or [None] * len(original_words)
144
+
145
+ final_word_id = corrected_word_id or WordUtils.generate_id()
106
146
 
107
147
  # First word gets replaced
108
148
  corrections.append(
@@ -117,11 +157,14 @@ class WordOperations:
117
157
  alternatives={},
118
158
  reference_positions=reference_positions,
119
159
  length=len(original_words), # Combined word spans all original words
160
+ handler=handler,
161
+ word_id=WordUtils.generate_id(), # Generate new ID for combined word
162
+ corrected_word_id=final_word_id,
120
163
  )
121
164
  )
122
165
 
123
166
  # Additional words get marked for deletion
124
- for i, word in enumerate(original_words[1:], start=1):
167
+ for i, (word, word_id) in enumerate(zip(original_words[1:], word_ids[1:]), start=1):
125
168
  corrections.append(
126
169
  WordCorrection(
127
170
  original_word=word,
@@ -135,6 +178,9 @@ class WordOperations:
135
178
  is_deletion=True,
136
179
  reference_positions=reference_positions,
137
180
  length=1, # Deleted words are length 1
181
+ handler=handler,
182
+ word_id=WordUtils.generate_id(), # Generate new ID for each deleted word
183
+ corrected_word_id=None, # Deleted words don't need a corrected ID
138
184
  )
139
185
  )
140
186
 
@@ -10,19 +10,15 @@ def clean_text(text: str) -> str:
10
10
  Returns:
11
11
  Cleaned text with:
12
12
  - All text converted to lowercase
13
- - Hyphens and slashes converted to spaces
14
- - All other punctuation removed
15
13
  - Multiple spaces/whitespace collapsed to single space
16
14
  - Leading/trailing whitespace removed
15
+ - Punctuation removed (except for internal hyphens/slashes in words)
17
16
  """
18
17
  # Convert to lowercase
19
18
  text = text.lower()
20
19
 
21
- # Replace hyphens and slashes with spaces first
22
- text = text.replace("-", " ").replace("/", " ")
23
-
24
- # Remove remaining punctuation
25
- text = re.sub(r"[^\w\s]", "", text)
20
+ # Remove punctuation except hyphens and slashes that are between word characters
21
+ text = re.sub(r"(?<!\w)[^\w\s]|[^\w\s](?!\w)", "", text)
26
22
 
27
23
  # Normalize whitespace (collapse multiple spaces, remove leading/trailing)
28
24
  text = " ".join(text.split())