lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/cli/cli_main.py +7 -0
- lyrics_transcriber/core/config.py +1 -0
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
- lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +8 -8
- lyrics_transcriber/output/generator.py +29 -14
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +56 -2
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,38 +1,55 @@
|
|
1
|
-
from typing import List, Optional, Dict
|
1
|
+
from typing import List, Optional, Dict, Any
|
2
2
|
from lyrics_transcriber.types import WordCorrection, GapSequence
|
3
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
3
4
|
|
4
5
|
|
5
6
|
class WordOperations:
|
6
7
|
"""Utility class for common word manipulation operations used by correction handlers."""
|
7
8
|
|
8
9
|
@staticmethod
|
9
|
-
def calculate_reference_positions(
|
10
|
+
def calculate_reference_positions(
|
11
|
+
gap: GapSequence, sources: Optional[List[str]] = None, anchor_sequences: Optional[List[Any]] = None
|
12
|
+
) -> Dict[str, int]:
|
10
13
|
"""Calculate reference positions for given sources based on preceding anchor.
|
11
14
|
|
12
15
|
Args:
|
13
|
-
gap: The gap sequence containing the preceding anchor
|
16
|
+
gap: The gap sequence containing the preceding anchor ID
|
14
17
|
sources: Optional list of sources to calculate positions for. If None, uses all sources.
|
18
|
+
anchor_sequences: List of anchor sequences to look up preceding anchor
|
15
19
|
|
16
20
|
Returns:
|
17
21
|
Dictionary mapping source names to their reference positions
|
18
22
|
"""
|
19
23
|
reference_positions = {}
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
24
|
+
|
25
|
+
if not gap.preceding_anchor_id or not anchor_sequences:
|
26
|
+
return reference_positions
|
27
|
+
|
28
|
+
# Find the preceding anchor in the sequences
|
29
|
+
preceding_anchor = next(
|
30
|
+
(scored_anchor.anchor for scored_anchor in anchor_sequences if scored_anchor.anchor.id == gap.preceding_anchor_id), None
|
31
|
+
)
|
32
|
+
|
33
|
+
if not preceding_anchor:
|
34
|
+
return reference_positions
|
35
|
+
|
36
|
+
# If no sources specified, use all sources from reference words
|
37
|
+
sources_to_check = sources or list(gap.reference_word_ids.keys())
|
38
|
+
|
39
|
+
for source in sources_to_check:
|
40
|
+
# Get reference positions from the anchor
|
41
|
+
if source in preceding_anchor.reference_positions:
|
42
|
+
# Calculate base position from anchor
|
43
|
+
anchor_pos = preceding_anchor.reference_positions[source]
|
44
|
+
base_ref_pos = anchor_pos + len(preceding_anchor.reference_word_ids[source])
|
45
|
+
|
46
|
+
# Calculate word offset within the gap
|
47
|
+
word_offset = 0
|
48
|
+
|
49
|
+
# Add word offset to base position
|
50
|
+
ref_pos = base_ref_pos + word_offset
|
51
|
+
reference_positions[source] = ref_pos
|
52
|
+
|
36
53
|
return reference_positions
|
37
54
|
|
38
55
|
@staticmethod
|
@@ -43,7 +60,10 @@ class WordOperations:
|
|
43
60
|
source: str,
|
44
61
|
confidence: float,
|
45
62
|
reason: str,
|
63
|
+
handler: str,
|
46
64
|
reference_positions: Optional[Dict[str, int]] = None,
|
65
|
+
original_word_id: Optional[str] = None,
|
66
|
+
corrected_word_id: Optional[str] = None,
|
47
67
|
) -> WordCorrection:
|
48
68
|
"""Creates a correction for replacing a single word with another word."""
|
49
69
|
return WordCorrection(
|
@@ -56,7 +76,10 @@ class WordOperations:
|
|
56
76
|
reason=reason,
|
57
77
|
alternatives={},
|
58
78
|
reference_positions=reference_positions,
|
59
|
-
length=1,
|
79
|
+
length=1,
|
80
|
+
handler=handler,
|
81
|
+
word_id=original_word_id,
|
82
|
+
corrected_word_id=corrected_word_id if corrected_word_id is not None else (WordUtils.generate_id() if corrected_word else None),
|
60
83
|
)
|
61
84
|
|
62
85
|
@staticmethod
|
@@ -67,11 +90,19 @@ class WordOperations:
|
|
67
90
|
source: str,
|
68
91
|
confidence: float,
|
69
92
|
reason: str,
|
93
|
+
handler: str,
|
70
94
|
reference_positions: Optional[Dict[str, int]] = None,
|
95
|
+
original_word_id: Optional[str] = None,
|
96
|
+
corrected_word_ids: Optional[List[str]] = None,
|
71
97
|
) -> List[WordCorrection]:
|
72
98
|
"""Creates corrections for splitting a single word into multiple words."""
|
73
99
|
corrections = []
|
74
|
-
|
100
|
+
|
101
|
+
# Generate word IDs if none provided
|
102
|
+
if corrected_word_ids is None:
|
103
|
+
corrected_word_ids = [WordUtils.generate_id() for _ in reference_words]
|
104
|
+
|
105
|
+
for split_idx, (ref_word, word_id) in enumerate(zip(reference_words, corrected_word_ids)):
|
75
106
|
corrections.append(
|
76
107
|
WordCorrection(
|
77
108
|
original_word=original_word,
|
@@ -86,6 +117,9 @@ class WordOperations:
|
|
86
117
|
split_total=len(reference_words),
|
87
118
|
reference_positions=reference_positions,
|
88
119
|
length=1, # Each split word is length 1
|
120
|
+
handler=handler,
|
121
|
+
word_id=WordUtils.generate_id(), # Generate new ID for each split
|
122
|
+
corrected_word_id=word_id,
|
89
123
|
)
|
90
124
|
)
|
91
125
|
return corrections
|
@@ -99,10 +133,16 @@ class WordOperations:
|
|
99
133
|
confidence: float,
|
100
134
|
combine_reason: str,
|
101
135
|
delete_reason: str,
|
136
|
+
handler: str,
|
102
137
|
reference_positions: Optional[Dict[str, int]] = None,
|
138
|
+
original_word_ids: Optional[List[str]] = None,
|
139
|
+
corrected_word_id: Optional[str] = None,
|
103
140
|
) -> List[WordCorrection]:
|
104
141
|
"""Creates corrections for combining multiple words into a single word."""
|
105
142
|
corrections = []
|
143
|
+
word_ids = original_word_ids or [None] * len(original_words)
|
144
|
+
|
145
|
+
final_word_id = corrected_word_id or WordUtils.generate_id()
|
106
146
|
|
107
147
|
# First word gets replaced
|
108
148
|
corrections.append(
|
@@ -117,11 +157,14 @@ class WordOperations:
|
|
117
157
|
alternatives={},
|
118
158
|
reference_positions=reference_positions,
|
119
159
|
length=len(original_words), # Combined word spans all original words
|
160
|
+
handler=handler,
|
161
|
+
word_id=WordUtils.generate_id(), # Generate new ID for combined word
|
162
|
+
corrected_word_id=final_word_id,
|
120
163
|
)
|
121
164
|
)
|
122
165
|
|
123
166
|
# Additional words get marked for deletion
|
124
|
-
for i, word in enumerate(original_words[1:], start=1):
|
167
|
+
for i, (word, word_id) in enumerate(zip(original_words[1:], word_ids[1:]), start=1):
|
125
168
|
corrections.append(
|
126
169
|
WordCorrection(
|
127
170
|
original_word=word,
|
@@ -135,6 +178,9 @@ class WordOperations:
|
|
135
178
|
is_deletion=True,
|
136
179
|
reference_positions=reference_positions,
|
137
180
|
length=1, # Deleted words are length 1
|
181
|
+
handler=handler,
|
182
|
+
word_id=WordUtils.generate_id(), # Generate new ID for each deleted word
|
183
|
+
corrected_word_id=None, # Deleted words don't need a corrected ID
|
138
184
|
)
|
139
185
|
)
|
140
186
|
|
@@ -10,19 +10,15 @@ def clean_text(text: str) -> str:
|
|
10
10
|
Returns:
|
11
11
|
Cleaned text with:
|
12
12
|
- All text converted to lowercase
|
13
|
-
- Hyphens and slashes converted to spaces
|
14
|
-
- All other punctuation removed
|
15
13
|
- Multiple spaces/whitespace collapsed to single space
|
16
14
|
- Leading/trailing whitespace removed
|
15
|
+
- Punctuation removed (except for internal hyphens/slashes in words)
|
17
16
|
"""
|
18
17
|
# Convert to lowercase
|
19
18
|
text = text.lower()
|
20
19
|
|
21
|
-
#
|
22
|
-
text =
|
23
|
-
|
24
|
-
# Remove remaining punctuation
|
25
|
-
text = re.sub(r"[^\w\s]", "", text)
|
20
|
+
# Remove punctuation except hyphens and slashes that are between word characters
|
21
|
+
text = re.sub(r"(?<!\w)[^\w\s]|[^\w\s](?!\w)", "", text)
|
26
22
|
|
27
23
|
# Normalize whitespace (collapse multiple spaces, remove leading/trailing)
|
28
24
|
text = " ".join(text.split())
|
Binary file
|