lyrics-transcriber 0.37.0__py3-none-any.whl → 0.40.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/correction/handlers/extend_anchor.py +13 -2
- lyrics_transcriber/correction/handlers/word_operations.py +8 -2
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js +26696 -0
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +3 -2
- lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +36 -13
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +41 -1
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +48 -16
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +71 -16
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +45 -12
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +83 -19
- lyrics_transcriber/frontend/src/components/shared/types.ts +3 -0
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +65 -9
- lyrics_transcriber/frontend/vite.config.js +4 -0
- lyrics_transcriber/frontend/vite.config.ts +4 -0
- lyrics_transcriber/lyrics/genius.py +41 -12
- lyrics_transcriber/output/cdg.py +106 -29
- lyrics_transcriber/output/cdgmaker/composer.py +822 -528
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/review/server.py +10 -12
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/METADATA +3 -2
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/RECORD +28 -26
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +0 -182
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/WHEEL +0 -0
@@ -19,6 +19,7 @@ export interface UseWordClickProps {
|
|
19
19
|
onWordClick?: (info: WordClickInfo) => void
|
20
20
|
isReference?: boolean
|
21
21
|
currentSource?: string
|
22
|
+
gaps?: GapSequence[]
|
22
23
|
}
|
23
24
|
|
24
25
|
export function useWordClick({
|
@@ -26,7 +27,8 @@ export function useWordClick({
|
|
26
27
|
onElementClick,
|
27
28
|
onWordClick,
|
28
29
|
isReference,
|
29
|
-
currentSource
|
30
|
+
currentSource,
|
31
|
+
gaps = []
|
30
32
|
}: UseWordClickProps) {
|
31
33
|
const handleWordClick = useCallback((
|
32
34
|
word: string,
|
@@ -47,21 +49,20 @@ export function useWordClick({
|
|
47
49
|
wordIds: anchor.word_ids,
|
48
50
|
length: anchor.length,
|
49
51
|
words: anchor.words,
|
50
|
-
referenceWordIds: anchor.reference_word_ids
|
52
|
+
referenceWordIds: anchor.reference_word_ids,
|
53
|
+
matchesWordId: isReference
|
54
|
+
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
55
|
+
: anchor.word_ids.includes(wordId)
|
51
56
|
},
|
52
57
|
gapInfo: gap && {
|
53
58
|
wordIds: gap.word_ids,
|
54
59
|
length: gap.length,
|
55
60
|
words: gap.words,
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
is_deletion: c.is_deletion,
|
62
|
-
split_index: c.split_index,
|
63
|
-
split_total: c.split_total
|
64
|
-
}))
|
61
|
+
referenceWords: gap.reference_words,
|
62
|
+
corrections: gap.corrections,
|
63
|
+
matchesWordId: isReference
|
64
|
+
? gap.reference_words[currentSource!]?.includes(wordId)
|
65
|
+
: gap.word_ids.includes(wordId)
|
65
66
|
},
|
66
67
|
belongsToAnchor: anchor && (
|
67
68
|
isReference
|
@@ -78,6 +79,28 @@ export function useWordClick({
|
|
78
79
|
}
|
79
80
|
}, null, 2))
|
80
81
|
|
82
|
+
// For reference view clicks, find the corresponding gap
|
83
|
+
if (isReference && currentSource) {
|
84
|
+
// Extract position from wordId (e.g., "genius-word-3" -> 3)
|
85
|
+
const position = parseInt(wordId.split('-').pop() || '', 10);
|
86
|
+
|
87
|
+
// Find gap that has a correction matching this reference position
|
88
|
+
const matchingGap = gaps?.find(g =>
|
89
|
+
g.corrections.some(c => {
|
90
|
+
const refPosition = c.reference_positions?.[currentSource];
|
91
|
+
return typeof refPosition === 'number' && refPosition === position;
|
92
|
+
})
|
93
|
+
);
|
94
|
+
|
95
|
+
if (matchingGap) {
|
96
|
+
console.log('Found matching gap for reference click:', {
|
97
|
+
position,
|
98
|
+
gap: matchingGap
|
99
|
+
});
|
100
|
+
gap = matchingGap;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
81
104
|
const belongsToAnchor = anchor && (
|
82
105
|
isReference
|
83
106
|
? anchor.reference_word_ids[currentSource!]?.includes(wordId)
|
@@ -86,17 +109,58 @@ export function useWordClick({
|
|
86
109
|
|
87
110
|
const belongsToGap = gap && (
|
88
111
|
isReference
|
89
|
-
? gap.corrections.some(c =>
|
112
|
+
? gap.corrections.some(c => {
|
113
|
+
const refPosition = c.reference_positions?.[currentSource!];
|
114
|
+
const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
|
115
|
+
return typeof refPosition === 'number' && refPosition === clickedPosition;
|
116
|
+
})
|
90
117
|
: gap.word_ids.includes(wordId)
|
91
118
|
)
|
92
119
|
|
93
120
|
if (mode === 'highlight' || mode === 'edit') {
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
121
|
+
if (belongsToAnchor && anchor) {
|
122
|
+
onWordClick?.({
|
123
|
+
word_id: wordId,
|
124
|
+
type: 'anchor',
|
125
|
+
anchor,
|
126
|
+
gap: undefined
|
127
|
+
})
|
128
|
+
} else if (belongsToGap && gap) {
|
129
|
+
// Create highlight info that includes both transcription and reference IDs
|
130
|
+
const referenceWords: Record<string, string[]> = {};
|
131
|
+
|
132
|
+
// For each correction in the gap, add its reference positions
|
133
|
+
gap.corrections.forEach(correction => {
|
134
|
+
Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
|
135
|
+
if (typeof position === 'number') {
|
136
|
+
const refId = `${source}-word-${position}`;
|
137
|
+
if (!referenceWords[source]) {
|
138
|
+
referenceWords[source] = [];
|
139
|
+
}
|
140
|
+
if (!referenceWords[source].includes(refId)) {
|
141
|
+
referenceWords[source].push(refId);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
});
|
145
|
+
});
|
146
|
+
|
147
|
+
onWordClick?.({
|
148
|
+
word_id: wordId,
|
149
|
+
type: 'gap',
|
150
|
+
anchor: undefined,
|
151
|
+
gap: {
|
152
|
+
...gap,
|
153
|
+
reference_words: referenceWords // Use reference_words instead of reference_word_ids
|
154
|
+
}
|
155
|
+
})
|
156
|
+
} else {
|
157
|
+
onWordClick?.({
|
158
|
+
word_id: wordId,
|
159
|
+
type: 'other',
|
160
|
+
anchor: undefined,
|
161
|
+
gap: undefined
|
162
|
+
})
|
163
|
+
}
|
100
164
|
} else if (mode === 'details') {
|
101
165
|
if (belongsToAnchor && anchor) {
|
102
166
|
onElementClick({
|
@@ -139,7 +203,7 @@ export function useWordClick({
|
|
139
203
|
})
|
140
204
|
}
|
141
205
|
}
|
142
|
-
}, [mode, onWordClick, onElementClick, isReference, currentSource])
|
206
|
+
}, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
|
143
207
|
|
144
208
|
return { handleWordClick }
|
145
209
|
}
|
@@ -35,7 +35,10 @@ export interface TranscriptionWordPosition extends BaseWordPosition {
|
|
35
35
|
start_time?: number
|
36
36
|
end_time?: number
|
37
37
|
}
|
38
|
+
type: 'anchor' | 'gap' | 'other'
|
39
|
+
sequence?: AnchorSequence | GapSequence
|
38
40
|
isInRange: boolean
|
41
|
+
isCorrected?: boolean
|
39
42
|
}
|
40
43
|
|
41
44
|
// Reference-specific word position with simple string word
|
@@ -54,16 +54,57 @@ function findWordIdsForSequence(
|
|
54
54
|
return allWords.slice(startIndex, endIndex).map(word => word.id);
|
55
55
|
}
|
56
56
|
|
57
|
-
//
|
57
|
+
// Add this at the top of the file
|
58
|
+
const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
|
59
|
+
const allWords = segments.flatMap(s => s.words);
|
60
|
+
console.log('Word ID Assignment:', {
|
61
|
+
searchingFor: correction.original_word,
|
62
|
+
allWordsWithIds: allWords.map(w => ({
|
63
|
+
text: w.text,
|
64
|
+
id: w.id
|
65
|
+
})),
|
66
|
+
matchedId: foundId,
|
67
|
+
matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
|
68
|
+
});
|
69
|
+
};
|
70
|
+
|
71
|
+
// Modify findWordIdForCorrection to include logging
|
58
72
|
function findWordIdForCorrection(
|
59
73
|
segments: LyricsSegment[],
|
60
|
-
correction: {
|
74
|
+
correction: {
|
75
|
+
original_word: string;
|
76
|
+
original_position?: number;
|
77
|
+
}
|
61
78
|
): string {
|
79
|
+
const allWords = segments.flatMap(s => s.words);
|
80
|
+
|
81
|
+
// If we have position information, use it to find the exact word
|
82
|
+
if (typeof correction.original_position === 'number') {
|
83
|
+
const word = allWords[correction.original_position];
|
84
|
+
if (word && word.text === correction.original_word) {
|
85
|
+
logWordMatching(segments, correction, word.id);
|
86
|
+
return word.id;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
// Fallback to finding by text (but log a warning)
|
62
91
|
for (const segment of segments) {
|
63
92
|
const word = segment.words.find(w => w.text === correction.original_word);
|
64
|
-
if (word)
|
93
|
+
if (word) {
|
94
|
+
console.warn(
|
95
|
+
'Warning: Had to find word by text match rather than position.',
|
96
|
+
correction.original_word,
|
97
|
+
'Consider using position information for more accurate matching.'
|
98
|
+
);
|
99
|
+
logWordMatching(segments, correction, word.id);
|
100
|
+
return word.id;
|
101
|
+
}
|
65
102
|
}
|
66
|
-
|
103
|
+
|
104
|
+
const newId = nanoid();
|
105
|
+
logWordMatching(segments, correction, null);
|
106
|
+
console.log('Generated new ID:', newId, 'for word:', correction.original_word);
|
107
|
+
return newId;
|
67
108
|
}
|
68
109
|
|
69
110
|
// Helper function to find word IDs in reference text
|
@@ -130,15 +171,30 @@ export function initializeDataWithIds(data: CorrectionData): CorrectionData {
|
|
130
171
|
// Update gap sequences to use word IDs
|
131
172
|
newData.gap_sequences = newData.gap_sequences.map((gap) => {
|
132
173
|
const serverGap = gap as unknown as ServerData;
|
174
|
+
console.log('Processing gap sequence:', {
|
175
|
+
words: gap.words,
|
176
|
+
word_ids: gap.word_ids,
|
177
|
+
corrections: gap.corrections,
|
178
|
+
foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
|
179
|
+
});
|
180
|
+
|
133
181
|
return {
|
134
182
|
...gap,
|
135
183
|
id: gap.id || nanoid(),
|
136
184
|
word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
|
137
|
-
corrections: gap.corrections.map((correction: WordCorrection) =>
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
185
|
+
corrections: gap.corrections.map((correction: WordCorrection) => {
|
186
|
+
const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
|
187
|
+
console.log('Correction word ID assignment:', {
|
188
|
+
original_word: correction.original_word,
|
189
|
+
corrected_word: correction.corrected_word,
|
190
|
+
assigned_id: wordId
|
191
|
+
});
|
192
|
+
return {
|
193
|
+
...correction,
|
194
|
+
id: correction.id || nanoid(),
|
195
|
+
word_id: wordId
|
196
|
+
};
|
197
|
+
})
|
142
198
|
} as GapSequence;
|
143
199
|
});
|
144
200
|
|
@@ -82,19 +82,48 @@ class GeniusProvider(BaseLyricsProvider):
|
|
82
82
|
|
83
83
|
def _clean_lyrics(self, lyrics: str) -> str:
|
84
84
|
"""Clean and process lyrics from Genius to remove unwanted content."""
|
85
|
+
self.logger.debug("Starting lyrics cleaning process")
|
86
|
+
original = lyrics
|
85
87
|
|
86
88
|
lyrics = lyrics.replace("\\n", "\n")
|
87
89
|
lyrics = re.sub(r"You might also like", "", lyrics)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
lyrics = re.sub(
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
lyrics = re.sub(r"^
|
98
|
-
|
99
|
-
|
90
|
+
if original != lyrics:
|
91
|
+
self.logger.debug("Removed 'You might also like' text")
|
92
|
+
|
93
|
+
original = lyrics
|
94
|
+
lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
|
95
|
+
if original != lyrics:
|
96
|
+
self.logger.debug("Removed song name and 'Lyrics' prefix")
|
97
|
+
|
98
|
+
original = lyrics
|
99
|
+
lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
|
100
|
+
if original != lyrics:
|
101
|
+
self.logger.debug("Removed contributors count and 'Lyrics' text")
|
102
|
+
|
103
|
+
original = lyrics
|
104
|
+
lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
|
105
|
+
if original != lyrics:
|
106
|
+
self.logger.debug("Removed ticket sales text")
|
107
|
+
|
108
|
+
original = lyrics
|
109
|
+
lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
|
110
|
+
if original != lyrics:
|
111
|
+
self.logger.debug("Removed numbered embed marker")
|
112
|
+
|
113
|
+
original = lyrics
|
114
|
+
lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
|
115
|
+
if original != lyrics:
|
116
|
+
self.logger.debug("Removed 'Embed' suffix from word")
|
117
|
+
|
118
|
+
original = lyrics
|
119
|
+
lyrics = re.sub(r"^Embed$", r"", lyrics)
|
120
|
+
if original != lyrics:
|
121
|
+
self.logger.debug("Removed standalone 'Embed' text")
|
122
|
+
|
123
|
+
original = lyrics
|
124
|
+
lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)
|
125
|
+
if original != lyrics:
|
126
|
+
self.logger.debug("Removed lines containing square brackets")
|
127
|
+
|
128
|
+
self.logger.debug("Completed lyrics cleaning process")
|
100
129
|
return lyrics
|
lyrics_transcriber/output/cdg.py
CHANGED
@@ -9,6 +9,7 @@ import os
|
|
9
9
|
import zipfile
|
10
10
|
import shutil
|
11
11
|
|
12
|
+
from lyrics_transcriber.output.cdgmaker.cdg import CDG_VISIBLE_WIDTH
|
12
13
|
from lyrics_transcriber.output.cdgmaker.composer import KaraokeComposer
|
13
14
|
from lyrics_transcriber.output.cdgmaker.render import get_wrapped_text
|
14
15
|
from lyrics_transcriber.types import LyricsSegment
|
@@ -110,7 +111,7 @@ class CDGGenerator:
|
|
110
111
|
# Convert time from seconds to centiseconds
|
111
112
|
timestamp = int(word.start_time * 100)
|
112
113
|
lyrics_data.append({"timestamp": timestamp, "text": word.text.upper()}) # CDG format expects uppercase text
|
113
|
-
|
114
|
+
self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
|
114
115
|
|
115
116
|
# Sort by timestamp to ensure correct order
|
116
117
|
lyrics_data.sort(key=lambda x: x["timestamp"])
|
@@ -189,6 +190,7 @@ class CDGGenerator:
|
|
189
190
|
"""Compose CDG using KaraokeComposer."""
|
190
191
|
kc = KaraokeComposer.from_file(toml_file)
|
191
192
|
kc.compose()
|
193
|
+
kc.create_mp4(height=1080, fps=30)
|
192
194
|
|
193
195
|
def _find_cdg_zip(self, artist: str, title: str) -> str:
|
194
196
|
"""Find the generated CDG ZIP file."""
|
@@ -337,20 +339,20 @@ class CDGGenerator:
|
|
337
339
|
formatted_lyrics = []
|
338
340
|
|
339
341
|
for i, lyric in enumerate(lyrics_data):
|
340
|
-
|
342
|
+
self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
|
341
343
|
|
342
344
|
if i == 0 or lyric["timestamp"] - lyrics_data[i - 1]["timestamp"] >= cdg_styles["lead_in_threshold"]:
|
343
345
|
lead_in_start = lyric["timestamp"] - cdg_styles["lead_in_total"]
|
344
|
-
|
346
|
+
self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
|
345
347
|
for j, symbol in enumerate(cdg_styles["lead_in_symbols"]):
|
346
348
|
sync_time = lead_in_start + j * cdg_styles["lead_in_duration"]
|
347
349
|
sync_times.append(sync_time)
|
348
350
|
formatted_lyrics.append(symbol)
|
349
|
-
|
351
|
+
self.logger.debug(f" Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
|
350
352
|
|
351
353
|
sync_times.append(lyric["timestamp"])
|
352
354
|
formatted_lyrics.append(lyric["text"])
|
353
|
-
|
355
|
+
self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
|
354
356
|
|
355
357
|
formatted_text = self.format_lyrics(
|
356
358
|
formatted_lyrics,
|
@@ -472,24 +474,29 @@ class CDGGenerator:
|
|
472
474
|
page_number = 1
|
473
475
|
|
474
476
|
for i, text in enumerate(lyrics_data):
|
475
|
-
|
477
|
+
self.logger.debug(f"format_lyrics: Processing text {i}: '{text}' (sync time: {sync_times[i]})")
|
476
478
|
|
477
479
|
if text.startswith("/"):
|
478
480
|
if current_line:
|
479
|
-
wrapped_lines = get_wrapped_text(current_line.strip(), font,
|
481
|
+
wrapped_lines = get_wrapped_text(current_line.strip(), font, CDG_VISIBLE_WIDTH).split("\n")
|
480
482
|
for wrapped_line in wrapped_lines:
|
481
483
|
formatted_lyrics.append(wrapped_line)
|
482
484
|
lines_on_page += 1
|
483
|
-
|
485
|
+
self.logger.debug(f"format_lyrics: Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
|
486
|
+
# Add empty line after punctuation immediately
|
487
|
+
if wrapped_line.endswith(("!", "?", ".")) and not wrapped_line == "~":
|
488
|
+
formatted_lyrics.append("~")
|
489
|
+
lines_on_page += 1
|
490
|
+
self.logger.debug(f"format_lyrics: Added empty line after punctuation. Lines on page now: {lines_on_page}")
|
484
491
|
if lines_on_page == 4:
|
485
492
|
lines_on_page = 0
|
486
493
|
page_number += 1
|
487
|
-
|
494
|
+
self.logger.debug(f"format_lyrics: Page full. New page number: {page_number}")
|
488
495
|
current_line = ""
|
489
496
|
text = text[1:]
|
490
497
|
|
491
498
|
current_line += text + " "
|
492
|
-
|
499
|
+
self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
|
493
500
|
|
494
501
|
is_last_before_instrumental = any(
|
495
502
|
inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
|
@@ -497,33 +504,103 @@ class CDGGenerator:
|
|
497
504
|
|
498
505
|
if is_last_before_instrumental or i == len(lyrics_data) - 1:
|
499
506
|
if current_line:
|
500
|
-
wrapped_lines = get_wrapped_text(current_line.strip(), font,
|
507
|
+
wrapped_lines = get_wrapped_text(current_line.strip(), font, CDG_VISIBLE_WIDTH).split("\n")
|
501
508
|
for wrapped_line in wrapped_lines:
|
502
509
|
formatted_lyrics.append(wrapped_line)
|
503
510
|
lines_on_page += 1
|
504
|
-
|
511
|
+
self.logger.debug(
|
512
|
+
f"format_lyrics: Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}"
|
513
|
+
)
|
505
514
|
if lines_on_page == 4:
|
506
515
|
lines_on_page = 0
|
507
516
|
page_number += 1
|
508
|
-
|
517
|
+
self.logger.debug(f"format_lyrics: Page full. New page number: {page_number}")
|
509
518
|
current_line = ""
|
510
519
|
|
511
520
|
if is_last_before_instrumental:
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
521
|
+
self.logger.debug(f"format_lyrics: is_last_before_instrumental: True lines_on_page: {lines_on_page}")
|
522
|
+
# Calculate remaining lines needed to reach next full page
|
523
|
+
remaining_lines = 4 - (lines_on_page % 4) if lines_on_page % 4 != 0 else 0
|
524
|
+
if remaining_lines > 0:
|
525
|
+
formatted_lyrics.extend(["~"] * remaining_lines)
|
526
|
+
self.logger.debug(f"format_lyrics: Added {remaining_lines} empty lines to complete current page")
|
527
|
+
|
528
|
+
# Reset the counter and increment page
|
516
529
|
lines_on_page = 0
|
517
530
|
page_number += 1
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
531
|
+
self.logger.debug(f"format_lyrics: Reset lines_on_page to 0. New page number: {page_number}")
|
532
|
+
|
533
|
+
return "\n".join(formatted_lyrics)
|
534
|
+
|
535
|
+
def generate_cdg_from_lrc(
|
536
|
+
self,
|
537
|
+
lrc_file: str,
|
538
|
+
audio_file: str,
|
539
|
+
title: str,
|
540
|
+
artist: str,
|
541
|
+
cdg_styles: dict,
|
542
|
+
) -> Tuple[str, str, str]:
|
543
|
+
"""Generate a CDG file from an LRC file and audio file.
|
544
|
+
|
545
|
+
Args:
|
546
|
+
lrc_file: Path to the LRC file
|
547
|
+
audio_file: Path to the audio file
|
548
|
+
title: Title of the song
|
549
|
+
artist: Artist name
|
550
|
+
cdg_styles: Dictionary containing CDG style parameters
|
551
|
+
|
552
|
+
Returns:
|
553
|
+
Tuple containing paths to (cdg_file, mp3_file, zip_file)
|
554
|
+
"""
|
555
|
+
self._validate_and_setup_font(cdg_styles)
|
556
|
+
|
557
|
+
# Parse LRC file and convert to lyrics_data format
|
558
|
+
lyrics_data = self._parse_lrc(lrc_file)
|
559
|
+
|
560
|
+
toml_file = self._create_toml_file(
|
561
|
+
audio_file=audio_file,
|
562
|
+
title=title,
|
563
|
+
artist=artist,
|
564
|
+
lyrics_data=lyrics_data,
|
565
|
+
cdg_styles=cdg_styles,
|
566
|
+
)
|
567
|
+
|
568
|
+
try:
|
569
|
+
self._compose_cdg(toml_file)
|
570
|
+
output_zip = self._find_cdg_zip(artist, title)
|
571
|
+
self._extract_cdg_files(output_zip)
|
572
|
+
|
573
|
+
cdg_file = self._get_cdg_path(artist, title)
|
574
|
+
mp3_file = self._get_mp3_path(artist, title)
|
575
|
+
|
576
|
+
self._verify_output_files(cdg_file, mp3_file)
|
577
|
+
|
578
|
+
self.logger.info("CDG file generated successfully")
|
579
|
+
return cdg_file, mp3_file, output_zip
|
580
|
+
|
581
|
+
except Exception as e:
|
582
|
+
self.logger.error(f"Error composing CDG: {e}")
|
583
|
+
raise
|
584
|
+
|
585
|
+
def _parse_lrc(self, lrc_file: str) -> List[dict]:
|
586
|
+
"""Parse LRC file and extract timestamps and lyrics."""
|
587
|
+
with open(lrc_file, "r", encoding="utf-8") as f:
|
588
|
+
content = f.read()
|
589
|
+
|
590
|
+
# Extract timestamps and lyrics
|
591
|
+
pattern = r"\[(\d{2}):(\d{2})\.(\d{3})\](\d+:)?(/?.*)"
|
592
|
+
matches = re.findall(pattern, content)
|
593
|
+
|
594
|
+
if not matches:
|
595
|
+
raise ValueError(f"No valid lyrics found in the LRC file: {lrc_file}")
|
596
|
+
|
597
|
+
lyrics = []
|
598
|
+
for match in matches:
|
599
|
+
minutes, seconds, milliseconds = map(int, match[:3])
|
600
|
+
timestamp = (minutes * 60 + seconds) * 100 + int(milliseconds / 10) # Convert to centiseconds
|
601
|
+
text = match[4].strip().upper()
|
602
|
+
if text: # Only add non-empty lyrics
|
603
|
+
lyrics.append({"timestamp": timestamp, "text": text})
|
604
|
+
|
605
|
+
self.logger.info(f"Found {len(lyrics)} lyric lines")
|
606
|
+
return lyrics
|