lyrics-transcriber 0.37.0__py3-none-any.whl → 0.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. lyrics_transcriber/correction/handlers/extend_anchor.py +13 -2
  2. lyrics_transcriber/correction/handlers/word_operations.py +8 -2
  3. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js +26696 -0
  4. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +1 -0
  5. lyrics_transcriber/frontend/dist/index.html +1 -1
  6. lyrics_transcriber/frontend/package.json +3 -2
  7. lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -0
  8. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +36 -13
  9. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +41 -1
  10. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +48 -16
  11. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +71 -16
  12. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +11 -7
  13. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +45 -12
  14. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +83 -19
  15. lyrics_transcriber/frontend/src/components/shared/types.ts +3 -0
  16. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +65 -9
  17. lyrics_transcriber/frontend/vite.config.js +4 -0
  18. lyrics_transcriber/frontend/vite.config.ts +4 -0
  19. lyrics_transcriber/lyrics/genius.py +41 -12
  20. lyrics_transcriber/output/cdg.py +106 -29
  21. lyrics_transcriber/output/cdgmaker/composer.py +822 -528
  22. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  23. lyrics_transcriber/review/server.py +10 -12
  24. {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/METADATA +3 -2
  25. {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/RECORD +28 -26
  26. {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/entry_points.txt +1 -0
  27. lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +0 -182
  28. {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/LICENSE +0 -0
  29. {lyrics_transcriber-0.37.0.dist-info → lyrics_transcriber-0.40.0.dist-info}/WHEEL +0 -0
@@ -19,6 +19,7 @@ export interface UseWordClickProps {
19
19
  onWordClick?: (info: WordClickInfo) => void
20
20
  isReference?: boolean
21
21
  currentSource?: string
22
+ gaps?: GapSequence[]
22
23
  }
23
24
 
24
25
  export function useWordClick({
@@ -26,7 +27,8 @@ export function useWordClick({
26
27
  onElementClick,
27
28
  onWordClick,
28
29
  isReference,
29
- currentSource
30
+ currentSource,
31
+ gaps = []
30
32
  }: UseWordClickProps) {
31
33
  const handleWordClick = useCallback((
32
34
  word: string,
@@ -47,21 +49,20 @@ export function useWordClick({
47
49
  wordIds: anchor.word_ids,
48
50
  length: anchor.length,
49
51
  words: anchor.words,
50
- referenceWordIds: anchor.reference_word_ids
52
+ referenceWordIds: anchor.reference_word_ids,
53
+ matchesWordId: isReference
54
+ ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
55
+ : anchor.word_ids.includes(wordId)
51
56
  },
52
57
  gapInfo: gap && {
53
58
  wordIds: gap.word_ids,
54
59
  length: gap.length,
55
60
  words: gap.words,
56
- corrections: gap.corrections.map(c => ({
57
- original_word: c.original_word,
58
- corrected_word: c.corrected_word,
59
- word_id: c.word_id,
60
- length: c.length,
61
- is_deletion: c.is_deletion,
62
- split_index: c.split_index,
63
- split_total: c.split_total
64
- }))
61
+ referenceWords: gap.reference_words,
62
+ corrections: gap.corrections,
63
+ matchesWordId: isReference
64
+ ? gap.reference_words[currentSource!]?.includes(wordId)
65
+ : gap.word_ids.includes(wordId)
65
66
  },
66
67
  belongsToAnchor: anchor && (
67
68
  isReference
@@ -78,6 +79,28 @@ export function useWordClick({
78
79
  }
79
80
  }, null, 2))
80
81
 
82
+ // For reference view clicks, find the corresponding gap
83
+ if (isReference && currentSource) {
84
+ // Extract position from wordId (e.g., "genius-word-3" -> 3)
85
+ const position = parseInt(wordId.split('-').pop() || '', 10);
86
+
87
+ // Find gap that has a correction matching this reference position
88
+ const matchingGap = gaps?.find(g =>
89
+ g.corrections.some(c => {
90
+ const refPosition = c.reference_positions?.[currentSource];
91
+ return typeof refPosition === 'number' && refPosition === position;
92
+ })
93
+ );
94
+
95
+ if (matchingGap) {
96
+ console.log('Found matching gap for reference click:', {
97
+ position,
98
+ gap: matchingGap
99
+ });
100
+ gap = matchingGap;
101
+ }
102
+ }
103
+
81
104
  const belongsToAnchor = anchor && (
82
105
  isReference
83
106
  ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
@@ -86,17 +109,58 @@ export function useWordClick({
86
109
 
87
110
  const belongsToGap = gap && (
88
111
  isReference
89
- ? gap.corrections.some(c => c.word_id === wordId)
112
+ ? gap.corrections.some(c => {
113
+ const refPosition = c.reference_positions?.[currentSource!];
114
+ const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
115
+ return typeof refPosition === 'number' && refPosition === clickedPosition;
116
+ })
90
117
  : gap.word_ids.includes(wordId)
91
118
  )
92
119
 
93
120
  if (mode === 'highlight' || mode === 'edit') {
94
- onWordClick?.({
95
- word_id: wordId,
96
- type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
97
- anchor: belongsToAnchor ? anchor : undefined,
98
- gap: belongsToGap ? gap : undefined
99
- })
121
+ if (belongsToAnchor && anchor) {
122
+ onWordClick?.({
123
+ word_id: wordId,
124
+ type: 'anchor',
125
+ anchor,
126
+ gap: undefined
127
+ })
128
+ } else if (belongsToGap && gap) {
129
+ // Create highlight info that includes both transcription and reference IDs
130
+ const referenceWords: Record<string, string[]> = {};
131
+
132
+ // For each correction in the gap, add its reference positions
133
+ gap.corrections.forEach(correction => {
134
+ Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
135
+ if (typeof position === 'number') {
136
+ const refId = `${source}-word-${position}`;
137
+ if (!referenceWords[source]) {
138
+ referenceWords[source] = [];
139
+ }
140
+ if (!referenceWords[source].includes(refId)) {
141
+ referenceWords[source].push(refId);
142
+ }
143
+ }
144
+ });
145
+ });
146
+
147
+ onWordClick?.({
148
+ word_id: wordId,
149
+ type: 'gap',
150
+ anchor: undefined,
151
+ gap: {
152
+ ...gap,
153
+ reference_words: referenceWords // Use reference_words instead of reference_word_ids
154
+ }
155
+ })
156
+ } else {
157
+ onWordClick?.({
158
+ word_id: wordId,
159
+ type: 'other',
160
+ anchor: undefined,
161
+ gap: undefined
162
+ })
163
+ }
100
164
  } else if (mode === 'details') {
101
165
  if (belongsToAnchor && anchor) {
102
166
  onElementClick({
@@ -139,7 +203,7 @@ export function useWordClick({
139
203
  })
140
204
  }
141
205
  }
142
- }, [mode, onWordClick, onElementClick, isReference, currentSource])
206
+ }, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
143
207
 
144
208
  return { handleWordClick }
145
209
  }
@@ -35,7 +35,10 @@ export interface TranscriptionWordPosition extends BaseWordPosition {
35
35
  start_time?: number
36
36
  end_time?: number
37
37
  }
38
+ type: 'anchor' | 'gap' | 'other'
39
+ sequence?: AnchorSequence | GapSequence
38
40
  isInRange: boolean
41
+ isCorrected?: boolean
39
42
  }
40
43
 
41
44
  // Reference-specific word position with simple string word
@@ -54,16 +54,57 @@ function findWordIdsForSequence(
54
54
  return allWords.slice(startIndex, endIndex).map(word => word.id);
55
55
  }
56
56
 
57
- // Helper function to find word ID for a correction
57
+ // Add this at the top of the file
58
+ const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
59
+ const allWords = segments.flatMap(s => s.words);
60
+ console.log('Word ID Assignment:', {
61
+ searchingFor: correction.original_word,
62
+ allWordsWithIds: allWords.map(w => ({
63
+ text: w.text,
64
+ id: w.id
65
+ })),
66
+ matchedId: foundId,
67
+ matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
68
+ });
69
+ };
70
+
71
+ // Modify findWordIdForCorrection to include logging
58
72
  function findWordIdForCorrection(
59
73
  segments: LyricsSegment[],
60
- correction: { original_word: string; }
74
+ correction: {
75
+ original_word: string;
76
+ original_position?: number;
77
+ }
61
78
  ): string {
79
+ const allWords = segments.flatMap(s => s.words);
80
+
81
+ // If we have position information, use it to find the exact word
82
+ if (typeof correction.original_position === 'number') {
83
+ const word = allWords[correction.original_position];
84
+ if (word && word.text === correction.original_word) {
85
+ logWordMatching(segments, correction, word.id);
86
+ return word.id;
87
+ }
88
+ }
89
+
90
+ // Fallback to finding by text (but log a warning)
62
91
  for (const segment of segments) {
63
92
  const word = segment.words.find(w => w.text === correction.original_word);
64
- if (word) return word.id;
93
+ if (word) {
94
+ console.warn(
95
+ 'Warning: Had to find word by text match rather than position.',
96
+ correction.original_word,
97
+ 'Consider using position information for more accurate matching.'
98
+ );
99
+ logWordMatching(segments, correction, word.id);
100
+ return word.id;
101
+ }
65
102
  }
66
- return nanoid(); // Fallback if word not found
103
+
104
+ const newId = nanoid();
105
+ logWordMatching(segments, correction, null);
106
+ console.log('Generated new ID:', newId, 'for word:', correction.original_word);
107
+ return newId;
67
108
  }
68
109
 
69
110
  // Helper function to find word IDs in reference text
@@ -130,15 +171,30 @@ export function initializeDataWithIds(data: CorrectionData): CorrectionData {
130
171
  // Update gap sequences to use word IDs
131
172
  newData.gap_sequences = newData.gap_sequences.map((gap) => {
132
173
  const serverGap = gap as unknown as ServerData;
174
+ console.log('Processing gap sequence:', {
175
+ words: gap.words,
176
+ word_ids: gap.word_ids,
177
+ corrections: gap.corrections,
178
+ foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
179
+ });
180
+
133
181
  return {
134
182
  ...gap,
135
183
  id: gap.id || nanoid(),
136
184
  word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
137
- corrections: gap.corrections.map((correction: WordCorrection) => ({
138
- ...correction,
139
- id: correction.id || nanoid(),
140
- word_id: correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction)
141
- }))
185
+ corrections: gap.corrections.map((correction: WordCorrection) => {
186
+ const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
187
+ console.log('Correction word ID assignment:', {
188
+ original_word: correction.original_word,
189
+ corrected_word: correction.corrected_word,
190
+ assigned_id: wordId
191
+ });
192
+ return {
193
+ ...correction,
194
+ id: correction.id || nanoid(),
195
+ word_id: wordId
196
+ };
197
+ })
142
198
  } as GapSequence;
143
199
  });
144
200
 
@@ -3,4 +3,8 @@ import react from '@vitejs/plugin-react';
3
3
  // https://vite.dev/config/
4
4
  export default defineConfig({
5
5
  plugins: [react()],
6
+ build: {
7
+ minify: false,
8
+ sourcemap: true,
9
+ }
6
10
  });
@@ -4,4 +4,8 @@ import react from '@vitejs/plugin-react'
4
4
  // https://vite.dev/config/
5
5
  export default defineConfig({
6
6
  plugins: [react()],
7
+ build: {
8
+ minify: false,
9
+ sourcemap: true,
10
+ }
7
11
  })
@@ -82,19 +82,48 @@ class GeniusProvider(BaseLyricsProvider):
82
82
 
83
83
  def _clean_lyrics(self, lyrics: str) -> str:
84
84
  """Clean and process lyrics from Genius to remove unwanted content."""
85
+ self.logger.debug("Starting lyrics cleaning process")
86
+ original = lyrics
85
87
 
86
88
  lyrics = lyrics.replace("\\n", "\n")
87
89
  lyrics = re.sub(r"You might also like", "", lyrics)
88
- lyrics = re.sub(
89
- r".*?Lyrics([A-Z])", r"\1", lyrics
90
- ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
91
- lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
92
- lyrics = re.sub(
93
- r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
94
- ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
95
- lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
96
- lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
97
- lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
98
- lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics) # Remove lines containing square brackets
99
- # add any additional cleaning rules here
90
+ if original != lyrics:
91
+ self.logger.debug("Removed 'You might also like' text")
92
+
93
+ original = lyrics
94
+ lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
95
+ if original != lyrics:
96
+ self.logger.debug("Removed song name and 'Lyrics' prefix")
97
+
98
+ original = lyrics
99
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
100
+ if original != lyrics:
101
+ self.logger.debug("Removed contributors count and 'Lyrics' text")
102
+
103
+ original = lyrics
104
+ lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
105
+ if original != lyrics:
106
+ self.logger.debug("Removed ticket sales text")
107
+
108
+ original = lyrics
109
+ lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
110
+ if original != lyrics:
111
+ self.logger.debug("Removed numbered embed marker")
112
+
113
+ original = lyrics
114
+ lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
115
+ if original != lyrics:
116
+ self.logger.debug("Removed 'Embed' suffix from word")
117
+
118
+ original = lyrics
119
+ lyrics = re.sub(r"^Embed$", r"", lyrics)
120
+ if original != lyrics:
121
+ self.logger.debug("Removed standalone 'Embed' text")
122
+
123
+ original = lyrics
124
+ lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)
125
+ if original != lyrics:
126
+ self.logger.debug("Removed lines containing square brackets")
127
+
128
+ self.logger.debug("Completed lyrics cleaning process")
100
129
  return lyrics
@@ -9,6 +9,7 @@ import os
9
9
  import zipfile
10
10
  import shutil
11
11
 
12
+ from lyrics_transcriber.output.cdgmaker.cdg import CDG_VISIBLE_WIDTH
12
13
  from lyrics_transcriber.output.cdgmaker.composer import KaraokeComposer
13
14
  from lyrics_transcriber.output.cdgmaker.render import get_wrapped_text
14
15
  from lyrics_transcriber.types import LyricsSegment
@@ -110,7 +111,7 @@ class CDGGenerator:
110
111
  # Convert time from seconds to centiseconds
111
112
  timestamp = int(word.start_time * 100)
112
113
  lyrics_data.append({"timestamp": timestamp, "text": word.text.upper()}) # CDG format expects uppercase text
113
- # self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
114
+ self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
114
115
 
115
116
  # Sort by timestamp to ensure correct order
116
117
  lyrics_data.sort(key=lambda x: x["timestamp"])
@@ -189,6 +190,7 @@ class CDGGenerator:
189
190
  """Compose CDG using KaraokeComposer."""
190
191
  kc = KaraokeComposer.from_file(toml_file)
191
192
  kc.compose()
193
+ kc.create_mp4(height=1080, fps=30)
192
194
 
193
195
  def _find_cdg_zip(self, artist: str, title: str) -> str:
194
196
  """Find the generated CDG ZIP file."""
@@ -337,20 +339,20 @@ class CDGGenerator:
337
339
  formatted_lyrics = []
338
340
 
339
341
  for i, lyric in enumerate(lyrics_data):
340
- # self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
342
+ self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
341
343
 
342
344
  if i == 0 or lyric["timestamp"] - lyrics_data[i - 1]["timestamp"] >= cdg_styles["lead_in_threshold"]:
343
345
  lead_in_start = lyric["timestamp"] - cdg_styles["lead_in_total"]
344
- # self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
346
+ self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
345
347
  for j, symbol in enumerate(cdg_styles["lead_in_symbols"]):
346
348
  sync_time = lead_in_start + j * cdg_styles["lead_in_duration"]
347
349
  sync_times.append(sync_time)
348
350
  formatted_lyrics.append(symbol)
349
- # self.logger.debug(f" Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
351
+ self.logger.debug(f" Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
350
352
 
351
353
  sync_times.append(lyric["timestamp"])
352
354
  formatted_lyrics.append(lyric["text"])
353
- # self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
355
+ self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
354
356
 
355
357
  formatted_text = self.format_lyrics(
356
358
  formatted_lyrics,
@@ -472,24 +474,29 @@ class CDGGenerator:
472
474
  page_number = 1
473
475
 
474
476
  for i, text in enumerate(lyrics_data):
475
- # self.logger.debug(f"Processing text {i}: '{text}' (sync time: {sync_times[i]})")
477
+ self.logger.debug(f"format_lyrics: Processing text {i}: '{text}' (sync time: {sync_times[i]})")
476
478
 
477
479
  if text.startswith("/"):
478
480
  if current_line:
479
- wrapped_lines = get_wrapped_text(current_line.strip(), font, self.cdg_visible_width).split("\n")
481
+ wrapped_lines = get_wrapped_text(current_line.strip(), font, CDG_VISIBLE_WIDTH).split("\n")
480
482
  for wrapped_line in wrapped_lines:
481
483
  formatted_lyrics.append(wrapped_line)
482
484
  lines_on_page += 1
483
- # self.logger.debug(f"Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
485
+ self.logger.debug(f"format_lyrics: Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
486
+ # Add empty line after punctuation immediately
487
+ if wrapped_line.endswith(("!", "?", ".")) and not wrapped_line == "~":
488
+ formatted_lyrics.append("~")
489
+ lines_on_page += 1
490
+ self.logger.debug(f"format_lyrics: Added empty line after punctuation. Lines on page now: {lines_on_page}")
484
491
  if lines_on_page == 4:
485
492
  lines_on_page = 0
486
493
  page_number += 1
487
- # self.logger.debug(f"Page full. New page number: {page_number}")
494
+ self.logger.debug(f"format_lyrics: Page full. New page number: {page_number}")
488
495
  current_line = ""
489
496
  text = text[1:]
490
497
 
491
498
  current_line += text + " "
492
- # self.logger.debug(f"Current line: '{current_line}'")
499
+ self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
493
500
 
494
501
  is_last_before_instrumental = any(
495
502
  inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
@@ -497,33 +504,103 @@ class CDGGenerator:
497
504
 
498
505
  if is_last_before_instrumental or i == len(lyrics_data) - 1:
499
506
  if current_line:
500
- wrapped_lines = get_wrapped_text(current_line.strip(), font, self.cdg_visible_width).split("\n")
507
+ wrapped_lines = get_wrapped_text(current_line.strip(), font, CDG_VISIBLE_WIDTH).split("\n")
501
508
  for wrapped_line in wrapped_lines:
502
509
  formatted_lyrics.append(wrapped_line)
503
510
  lines_on_page += 1
504
- # self.logger.debug(f"Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}")
511
+ self.logger.debug(
512
+ f"format_lyrics: Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}"
513
+ )
505
514
  if lines_on_page == 4:
506
515
  lines_on_page = 0
507
516
  page_number += 1
508
- # self.logger.debug(f"Page full. New page number: {page_number}")
517
+ self.logger.debug(f"format_lyrics: Page full. New page number: {page_number}")
509
518
  current_line = ""
510
519
 
511
520
  if is_last_before_instrumental:
512
- blank_lines_needed = 4 - lines_on_page
513
- if blank_lines_needed < 4:
514
- formatted_lyrics.extend(["~"] * blank_lines_needed)
515
- # self.logger.debug(f"Added {blank_lines_needed} empty lines before instrumental. Lines on page was {lines_on_page}")
521
+ self.logger.debug(f"format_lyrics: is_last_before_instrumental: True lines_on_page: {lines_on_page}")
522
+ # Calculate remaining lines needed to reach next full page
523
+ remaining_lines = 4 - (lines_on_page % 4) if lines_on_page % 4 != 0 else 0
524
+ if remaining_lines > 0:
525
+ formatted_lyrics.extend(["~"] * remaining_lines)
526
+ self.logger.debug(f"format_lyrics: Added {remaining_lines} empty lines to complete current page")
527
+
528
+ # Reset the counter and increment page
516
529
  lines_on_page = 0
517
530
  page_number += 1
518
- # self.logger.debug(f"Reset lines_on_page to 0. New page number: {page_number}")
519
-
520
- final_lyrics = []
521
- for line in formatted_lyrics:
522
- final_lyrics.append(line)
523
- if line.endswith(("!", "?", ".")) and not line == "~":
524
- final_lyrics.append("~")
525
- # self.logger.debug("Added empty line after punctuation")
526
-
527
- result = "\n".join(final_lyrics)
528
- # self.logger.debug(f"Final formatted lyrics:\n{result}")
529
- return result
531
+ self.logger.debug(f"format_lyrics: Reset lines_on_page to 0. New page number: {page_number}")
532
+
533
+ return "\n".join(formatted_lyrics)
534
+
535
+ def generate_cdg_from_lrc(
536
+ self,
537
+ lrc_file: str,
538
+ audio_file: str,
539
+ title: str,
540
+ artist: str,
541
+ cdg_styles: dict,
542
+ ) -> Tuple[str, str, str]:
543
+ """Generate a CDG file from an LRC file and audio file.
544
+
545
+ Args:
546
+ lrc_file: Path to the LRC file
547
+ audio_file: Path to the audio file
548
+ title: Title of the song
549
+ artist: Artist name
550
+ cdg_styles: Dictionary containing CDG style parameters
551
+
552
+ Returns:
553
+ Tuple containing paths to (cdg_file, mp3_file, zip_file)
554
+ """
555
+ self._validate_and_setup_font(cdg_styles)
556
+
557
+ # Parse LRC file and convert to lyrics_data format
558
+ lyrics_data = self._parse_lrc(lrc_file)
559
+
560
+ toml_file = self._create_toml_file(
561
+ audio_file=audio_file,
562
+ title=title,
563
+ artist=artist,
564
+ lyrics_data=lyrics_data,
565
+ cdg_styles=cdg_styles,
566
+ )
567
+
568
+ try:
569
+ self._compose_cdg(toml_file)
570
+ output_zip = self._find_cdg_zip(artist, title)
571
+ self._extract_cdg_files(output_zip)
572
+
573
+ cdg_file = self._get_cdg_path(artist, title)
574
+ mp3_file = self._get_mp3_path(artist, title)
575
+
576
+ self._verify_output_files(cdg_file, mp3_file)
577
+
578
+ self.logger.info("CDG file generated successfully")
579
+ return cdg_file, mp3_file, output_zip
580
+
581
+ except Exception as e:
582
+ self.logger.error(f"Error composing CDG: {e}")
583
+ raise
584
+
585
+ def _parse_lrc(self, lrc_file: str) -> List[dict]:
586
+ """Parse LRC file and extract timestamps and lyrics."""
587
+ with open(lrc_file, "r", encoding="utf-8") as f:
588
+ content = f.read()
589
+
590
+ # Extract timestamps and lyrics
591
+ pattern = r"\[(\d{2}):(\d{2})\.(\d{3})\](\d+:)?(/?.*)"
592
+ matches = re.findall(pattern, content)
593
+
594
+ if not matches:
595
+ raise ValueError(f"No valid lyrics found in the LRC file: {lrc_file}")
596
+
597
+ lyrics = []
598
+ for match in matches:
599
+ minutes, seconds, milliseconds = map(int, match[:3])
600
+ timestamp = (minutes * 60 + seconds) * 100 + int(milliseconds / 10) # Convert to centiseconds
601
+ text = match[4].strip().upper()
602
+ if text: # Only add non-empty lyrics
603
+ lyrics.append({"timestamp": timestamp, "text": text})
604
+
605
+ self.logger.info(f"Found {len(lyrics)} lyric lines")
606
+ return lyrics