lyrics-transcriber 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. lyrics_transcriber/core/controller.py +22 -2
  2. lyrics_transcriber/correction/corrector.py +8 -8
  3. lyrics_transcriber/correction/handlers/base.py +4 -0
  4. lyrics_transcriber/correction/handlers/extend_anchor.py +22 -2
  5. lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
  6. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
  7. lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
  8. lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
  9. lyrics_transcriber/correction/handlers/word_operations.py +8 -2
  10. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js +26696 -0
  11. lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +1 -0
  12. lyrics_transcriber/frontend/dist/index.html +1 -1
  13. lyrics_transcriber/frontend/package.json +3 -2
  14. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +1 -2
  15. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +76 -70
  16. lyrics_transcriber/frontend/src/components/EditModal.tsx +11 -2
  17. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +154 -128
  18. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +42 -4
  19. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +59 -15
  20. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +71 -16
  21. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +16 -19
  22. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
  23. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +72 -57
  24. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +113 -41
  25. lyrics_transcriber/frontend/src/components/shared/types.ts +6 -3
  26. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +202 -0
  27. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +23 -24
  28. lyrics_transcriber/frontend/src/types.ts +25 -15
  29. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  30. lyrics_transcriber/frontend/vite.config.js +4 -0
  31. lyrics_transcriber/frontend/vite.config.ts +4 -0
  32. lyrics_transcriber/lyrics/genius.py +41 -12
  33. lyrics_transcriber/output/cdg.py +33 -6
  34. lyrics_transcriber/output/cdgmaker/composer.py +839 -534
  35. lyrics_transcriber/output/video.py +17 -7
  36. lyrics_transcriber/review/server.py +22 -8
  37. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/METADATA +3 -2
  38. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/RECORD +41 -40
  39. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/entry_points.txt +1 -0
  40. lyrics_transcriber/frontend/dist/assets/index-ztlAYPYT.js +0 -181
  41. lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
  42. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/LICENSE +0 -0
  43. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.39.0.dist-info}/WHEEL +0 -0
@@ -82,19 +82,48 @@ class GeniusProvider(BaseLyricsProvider):
82
82
 
83
83
  def _clean_lyrics(self, lyrics: str) -> str:
84
84
  """Clean and process lyrics from Genius to remove unwanted content."""
85
+ self.logger.debug("Starting lyrics cleaning process")
86
+ original = lyrics
85
87
 
86
88
  lyrics = lyrics.replace("\\n", "\n")
87
89
  lyrics = re.sub(r"You might also like", "", lyrics)
88
- lyrics = re.sub(
89
- r".*?Lyrics([A-Z])", r"\1", lyrics
90
- ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
91
- lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
92
- lyrics = re.sub(
93
- r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
94
- ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
95
- lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
96
- lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
97
- lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
98
- lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics) # Remove lines containing square brackets
99
- # add any additional cleaning rules here
90
+ if original != lyrics:
91
+ self.logger.debug("Removed 'You might also like' text")
92
+
93
+ original = lyrics
94
+ lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
95
+ if original != lyrics:
96
+ self.logger.debug("Removed song name and 'Lyrics' prefix")
97
+
98
+ original = lyrics
99
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
100
+ if original != lyrics:
101
+ self.logger.debug("Removed contributors count and 'Lyrics' text")
102
+
103
+ original = lyrics
104
+ lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
105
+ if original != lyrics:
106
+ self.logger.debug("Removed ticket sales text")
107
+
108
+ original = lyrics
109
+ lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
110
+ if original != lyrics:
111
+ self.logger.debug("Removed numbered embed marker")
112
+
113
+ original = lyrics
114
+ lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
115
+ if original != lyrics:
116
+ self.logger.debug("Removed 'Embed' suffix from word")
117
+
118
+ original = lyrics
119
+ lyrics = re.sub(r"^Embed$", r"", lyrics)
120
+ if original != lyrics:
121
+ self.logger.debug("Removed standalone 'Embed' text")
122
+
123
+ original = lyrics
124
+ lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)
125
+ if original != lyrics:
126
+ self.logger.debug("Removed lines containing square brackets")
127
+
128
+ self.logger.debug("Completed lyrics cleaning process")
100
129
  return lyrics
@@ -28,6 +28,28 @@ class CDGGenerator:
28
28
  self.logger = logger or logging.getLogger(__name__)
29
29
  self.cdg_visible_width = 280
30
30
 
31
+ def _sanitize_filename(self, filename: str) -> str:
32
+ """Replace or remove characters that are unsafe for filenames."""
33
+ if not filename:
34
+ return ""
35
+ # Replace problematic characters with underscores
36
+ for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
37
+ filename = filename.replace(char, "_")
38
+ # Remove any trailing spaces
39
+ filename = filename.rstrip(" ")
40
+ return filename
41
+
42
+ def _get_safe_filename(self, artist: str, title: str, suffix: str = "", ext: str = "") -> str:
43
+ """Create a safe filename from artist and title."""
44
+ safe_artist = self._sanitize_filename(artist)
45
+ safe_title = self._sanitize_filename(title)
46
+ base = f"{safe_artist} - {safe_title}"
47
+ if suffix:
48
+ base += f" ({suffix})"
49
+ if ext:
50
+ base += f".{ext}"
51
+ return base
52
+
31
53
  def generate_cdg(
32
54
  self,
33
55
  segments: List[LyricsSegment],
@@ -103,7 +125,8 @@ class CDGGenerator:
103
125
  cdg_styles: dict,
104
126
  ) -> str:
105
127
  """Create TOML configuration file for CDG generation."""
106
- toml_file = os.path.join(self.output_dir, f"{artist} - {title} (Karaoke CDG).toml")
128
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "toml")
129
+ toml_file = os.path.join(self.output_dir, safe_filename)
107
130
  self.logger.debug(f"Generating TOML file: {toml_file}")
108
131
 
109
132
  self.generate_toml(
@@ -166,11 +189,12 @@ class CDGGenerator:
166
189
  """Compose CDG using KaraokeComposer."""
167
190
  kc = KaraokeComposer.from_file(toml_file)
168
191
  kc.compose()
192
+ kc.create_mp4(height=1080, fps=30)
169
193
 
170
194
  def _find_cdg_zip(self, artist: str, title: str) -> str:
171
195
  """Find the generated CDG ZIP file."""
172
- expected_zip = f"{artist} - {title} (Karaoke CDG).zip"
173
- output_zip = os.path.join(self.output_dir, expected_zip)
196
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "zip")
197
+ output_zip = os.path.join(self.output_dir, safe_filename)
174
198
 
175
199
  self.logger.info(f"Looking for CDG ZIP file in output directory: {output_zip}")
176
200
 
@@ -191,11 +215,13 @@ class CDGGenerator:
191
215
 
192
216
  def _get_cdg_path(self, artist: str, title: str) -> str:
193
217
  """Get the path to the CDG file."""
194
- return os.path.join(self.output_dir, f"{artist} - {title} (Karaoke CDG).cdg")
218
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "cdg")
219
+ return os.path.join(self.output_dir, safe_filename)
195
220
 
196
221
  def _get_mp3_path(self, artist: str, title: str) -> str:
197
222
  """Get the path to the MP3 file."""
198
- return os.path.join(self.output_dir, f"{artist} - {title} (Karaoke CDG).mp3")
223
+ safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "mp3")
224
+ return os.path.join(self.output_dir, safe_filename)
199
225
 
200
226
  def _verify_output_files(self, cdg_file: str, mp3_file: str) -> None:
201
227
  """Verify that the required output files exist."""
@@ -349,11 +375,12 @@ class CDGGenerator:
349
375
  cdg_styles: dict,
350
376
  ) -> dict:
351
377
  """Create TOML data structure."""
378
+ safe_output_name = self._get_safe_filename(artist, title, "Karaoke CDG")
352
379
  return {
353
380
  "title": title,
354
381
  "artist": artist,
355
382
  "file": audio_file,
356
- "outname": output_name,
383
+ "outname": safe_output_name,
357
384
  "clear_mode": cdg_styles["clear_mode"],
358
385
  "sync_offset": cdg_styles["sync_offset"],
359
386
  "background": cdg_styles["background_color"],