karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,384 @@
1
+ import logging
2
+ import re
3
+ from typing import Optional, Dict, Any
4
+ import requests
5
+ import lyricsgenius
6
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata
7
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
8
+
9
+
10
+ class GeniusProvider(BaseLyricsProvider):
11
+ """Handles fetching lyrics from Genius."""
12
+
13
+ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
14
+ super().__init__(config, logger)
15
+ self.api_token = config.genius_api_token
16
+ self.rapidapi_key = config.rapidapi_key
17
+ self.client = None
18
+ # Only initialize lyricsgenius client if rapidapi_key is not set
19
+ if self.api_token and not self.rapidapi_key:
20
+ self.client = lyricsgenius.Genius(
21
+ self.api_token,
22
+ verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
23
+ remove_section_headers=True, # Remove [Chorus], [Verse], etc.
24
+ skip_non_songs=True, # Skip track listings and other non-song results
25
+ timeout=10, # Reasonable timeout for requests
26
+ retries=3, # Number of retries for failed requests
27
+ sleep_time=1, # Small delay between requests to be nice to the API
28
+ )
29
+
30
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
31
+ """Fetch raw song data from Genius API or RapidAPI."""
32
+ # Try RapidAPI first if available
33
+ if self.rapidapi_key:
34
+ self.logger.info(f"Trying RapidAPI for {artist} - {title}")
35
+ result = self._fetch_from_rapidapi(artist, title)
36
+ if result:
37
+ return result
38
+
39
+ # Fall back to direct Genius API
40
+ if not self.client:
41
+ self.logger.warning("No Genius API token provided and RapidAPI failed")
42
+ return None
43
+
44
+ self.logger.info(f"Searching Genius for {artist} - {title}")
45
+ try:
46
+ song = self.client.search_song(title, artist)
47
+ if song:
48
+ self.logger.info("Found lyrics on Genius")
49
+ return song.to_dict()
50
+ except Exception as e:
51
+ self.logger.error(f"Error fetching from Genius: {str(e)}")
52
+ return None
53
+
54
+ def _fetch_from_rapidapi(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
55
+ """Fetch song data using RapidAPI."""
56
+ try:
57
+ # Step 1: Search for the song
58
+ search_url = "https://genius-song-lyrics1.p.rapidapi.com/search/"
59
+ search_params = {
60
+ "q": f"{artist} {title}",
61
+ "per_page": "10",
62
+ "page": "1"
63
+ }
64
+
65
+ headers = {
66
+ "x-rapidapi-key": self.rapidapi_key,
67
+ "x-rapidapi-host": "genius-song-lyrics1.p.rapidapi.com"
68
+ }
69
+
70
+ self.logger.debug(f"Making RapidAPI search request for '{artist} {title}'")
71
+ search_response = requests.get(search_url, headers=headers, params=search_params, timeout=10)
72
+ search_response.raise_for_status()
73
+
74
+ search_data = search_response.json()
75
+
76
+ # Find the best match from search results
77
+ if not search_data.get("hits"):
78
+ self.logger.warning("No search results from RapidAPI")
79
+ return None
80
+
81
+ best_match = None
82
+ for hit in search_data["hits"]:
83
+ result = hit.get("result", {})
84
+ if result.get("id"):
85
+ best_match = result
86
+ break
87
+
88
+ if not best_match:
89
+ self.logger.warning("No valid song ID found in RapidAPI search results")
90
+ return None
91
+
92
+ song_id = best_match["id"]
93
+ self.logger.debug(f"Found song ID: {song_id}")
94
+
95
+ # Step 2: Fetch lyrics using the song ID
96
+ lyrics_url = "https://genius-song-lyrics1.p.rapidapi.com/song/lyrics/"
97
+ lyrics_params = {"id": str(song_id)}
98
+
99
+ self.logger.debug(f"Making RapidAPI lyrics request for song ID {song_id}")
100
+ lyrics_response = requests.get(lyrics_url, headers=headers, params=lyrics_params, timeout=10)
101
+ lyrics_response.raise_for_status()
102
+
103
+ lyrics_data = lyrics_response.json()
104
+
105
+ # Extract lyrics from the nested response structure
106
+ lyrics_text = self._extract_lyrics_from_rapidapi_response(lyrics_data)
107
+ if not lyrics_text:
108
+ self.logger.warning("No lyrics found in RapidAPI response")
109
+ return None
110
+
111
+ # Create a clean RapidAPI-only response structure
112
+ # Don't mix search metadata (which contains Genius fields) with our clean structure
113
+ rapidapi_response = {
114
+ "title": best_match.get("title", ""),
115
+ "primary_artist": best_match.get("primary_artist", {}),
116
+ "lyrics": lyrics_text,
117
+ "id": song_id,
118
+ "url": best_match.get("url", ""),
119
+ "release_date_for_display": best_match.get("release_date_for_display", ""),
120
+ # Mark this as RapidAPI source
121
+ "_rapidapi_source": True
122
+ }
123
+
124
+ self.logger.info("Successfully fetched lyrics from RapidAPI")
125
+ return rapidapi_response
126
+
127
+ except requests.exceptions.RequestException as e:
128
+ self.logger.error(f"RapidAPI request failed: {str(e)}")
129
+ return None
130
+ except Exception as e:
131
+ self.logger.error(f"Error fetching from RapidAPI: {str(e)}")
132
+ return None
133
+
134
+ def _extract_lyrics_from_rapidapi_response(self, lyrics_data: Dict[str, Any]) -> Optional[str]:
135
+ """Extract lyrics text from RapidAPI response structure."""
136
+ try:
137
+ # Log the actual response structure for debugging
138
+ self.logger.debug(f"RapidAPI response structure: {lyrics_data}")
139
+
140
+ # Try different possible response structures
141
+
142
+ # Structure 1: lyrics.lyrics.body.html (the actual RapidAPI structure)
143
+ nested_lyrics = lyrics_data.get("lyrics", {}).get("lyrics", {})
144
+ if isinstance(nested_lyrics, dict):
145
+ html_content = nested_lyrics.get("body", {}).get("html")
146
+ if html_content:
147
+ return self._clean_html_lyrics(html_content)
148
+
149
+ # Structure 2: lyrics.lyrics (simple string)
150
+ if isinstance(lyrics_data.get("lyrics", {}).get("lyrics"), str):
151
+ return lyrics_data["lyrics"]["lyrics"]
152
+
153
+ # Structure 3: lyrics.body.html (HTML content)
154
+ html_content = lyrics_data.get("lyrics", {}).get("body", {}).get("html")
155
+ if html_content:
156
+ return self._clean_html_lyrics(html_content)
157
+
158
+ # Structure 4: Direct lyrics field
159
+ if isinstance(lyrics_data.get("lyrics"), str):
160
+ return lyrics_data["lyrics"]
161
+
162
+ # Structure 5: body.html at top level
163
+ if lyrics_data.get("body", {}).get("html"):
164
+ return self._clean_html_lyrics(lyrics_data["body"]["html"])
165
+
166
+ # Structure 6: Check if lyrics is a dict with other possible keys
167
+ lyrics_obj = lyrics_data.get("lyrics", {})
168
+ if isinstance(lyrics_obj, dict):
169
+ # Try common alternative keys
170
+ for key in ["text", "content", "plain", "body"]:
171
+ if key in lyrics_obj:
172
+ content = lyrics_obj[key]
173
+ if isinstance(content, str):
174
+ return content
175
+ elif isinstance(content, dict) and "html" in content:
176
+ return self._clean_html_lyrics(content["html"])
177
+ elif isinstance(content, dict) and "text" in content:
178
+ return content["text"]
179
+
180
+ self.logger.warning(f"Unknown RapidAPI response structure: {list(lyrics_data.keys())}")
181
+ if "lyrics" in lyrics_data:
182
+ self.logger.warning(f"Lyrics object structure: {lyrics_data['lyrics']}")
183
+ return None
184
+
185
+ except Exception as e:
186
+ self.logger.error(f"Error extracting lyrics from RapidAPI response: {str(e)}")
187
+ return None
188
+
189
+ def _clean_html_lyrics(self, html_content: str) -> str:
190
+ """Clean HTML content to extract plain text lyrics."""
191
+ import re
192
+
193
+ if not html_content:
194
+ return ""
195
+
196
+ # Remove HTML tags while preserving line breaks
197
+ text = re.sub(r'<br\s*/?>', '\n', html_content) # Convert <br> to newlines
198
+ text = re.sub(r'<[^>]+>', '', text) # Remove all other HTML tags
199
+
200
+ # Decode HTML entities
201
+ text = text.replace('&lt;', '<').replace('&gt;', '>').replace('&amp;', '&')
202
+ text = text.replace('&quot;', '"').replace('&#x27;', "'").replace('&nbsp;', ' ')
203
+
204
+ # Remove section markers but keep the lyrics content
205
+ # Instead of removing entire lines, just remove the square bracket markers
206
+ text = re.sub(r'\[Verse \d+\]', '', text)
207
+ text = re.sub(r'\[Pre-Chorus\]', '', text)
208
+ text = re.sub(r'\[Chorus\]', '', text)
209
+ text = re.sub(r'\[Refrain\]', '', text)
210
+ text = re.sub(r'\[Outro\]', '', text)
211
+ text = re.sub(r'\[Bridge\]', '', text)
212
+ text = re.sub(r'\[Intro\]', '', text)
213
+
214
+ # Clean up multiple consecutive newlines
215
+ text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)
216
+
217
+ # Clean up leading/trailing whitespace
218
+ text = text.strip()
219
+
220
+ return text
221
+
222
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
223
+ """Convert Genius's raw API response to standardized format."""
224
+ # Use our explicit source marker for detection
225
+ is_rapidapi = raw_data.get("_rapidapi_source", False)
226
+
227
+ if is_rapidapi:
228
+ return self._convert_rapidapi_format(raw_data)
229
+ else:
230
+ return self._convert_lyricsgenius_format(raw_data)
231
+
232
+ def _convert_lyricsgenius_format(self, raw_data: Dict[str, Any]) -> LyricsData:
233
+ """Convert lyricsgenius format to standardized format."""
234
+ # Clean the lyrics before processing
235
+ lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
236
+
237
+ # Extract release date components if available
238
+ release_date = None
239
+ if release_components := raw_data.get("release_date_components"):
240
+ year = release_components.get("year")
241
+ month = release_components.get("month")
242
+ day = release_components.get("day")
243
+ if all(x is not None for x in (year, month, day)):
244
+ release_date = f"{year}-{month:02d}-{day:02d}"
245
+
246
+ # Create metadata object
247
+ metadata = LyricsMetadata(
248
+ source="genius",
249
+ track_name=raw_data.get("title", ""),
250
+ artist_names=raw_data.get("artist_names", ""),
251
+ album_name=raw_data.get("album", {}).get("name"),
252
+ lyrics_provider="genius",
253
+ lyrics_provider_id=str(raw_data.get("id")),
254
+ is_synced=False, # Genius doesn't provide synced lyrics
255
+ provider_metadata={
256
+ "genius_id": raw_data.get("id"),
257
+ "release_date": release_date,
258
+ "page_url": raw_data.get("url"),
259
+ "annotation_count": raw_data.get("annotation_count"),
260
+ "lyrics_state": raw_data.get("lyrics_state"),
261
+ "lyrics_owner_id": raw_data.get("lyrics_owner_id"),
262
+ "pyongs_count": raw_data.get("pyongs_count"),
263
+ "verified_annotations": len(raw_data.get("verified_annotations_by", [])),
264
+ "verified_contributors": len(raw_data.get("verified_contributors", [])),
265
+ "external_urls": {"genius": raw_data.get("url")},
266
+ "api_source": "lyricsgenius",
267
+ },
268
+ )
269
+
270
+ # Create segments with words from cleaned lyrics
271
+ segments = self._create_segments_with_words(lyrics, is_synced=False)
272
+
273
+ # Create result object with segments
274
+ return LyricsData(source="genius", segments=segments, metadata=metadata)
275
+
276
+ def _convert_rapidapi_format(self, raw_data: Dict[str, Any]) -> LyricsData:
277
+ """Convert RapidAPI format to standardized format."""
278
+ # Clean the lyrics before processing
279
+ lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
280
+
281
+ # Extract artist name from primary_artist
282
+ primary_artist = raw_data.get("primary_artist", {})
283
+ artist_name = primary_artist.get("name", "")
284
+
285
+ # Extract release date from release_date_for_display
286
+ release_date = raw_data.get("release_date_for_display")
287
+
288
+ # Create metadata object
289
+ metadata = LyricsMetadata(
290
+ source="genius",
291
+ track_name=raw_data.get("title", ""),
292
+ artist_names=artist_name,
293
+ album_name=raw_data.get("album", {}).get("name") if raw_data.get("album") else None,
294
+ lyrics_provider="genius",
295
+ lyrics_provider_id=str(raw_data.get("id")),
296
+ is_synced=False, # Genius doesn't provide synced lyrics
297
+ provider_metadata={
298
+ "genius_id": raw_data.get("id"),
299
+ "release_date": release_date,
300
+ "page_url": raw_data.get("url"),
301
+ "annotation_count": raw_data.get("annotation_count"),
302
+ "lyrics_state": raw_data.get("lyrics_state"),
303
+ "pyongs_count": raw_data.get("pyongs_count"),
304
+ "external_urls": {"genius": raw_data.get("url")},
305
+ "api_source": "rapidapi",
306
+ },
307
+ )
308
+
309
+ # Create segments with words from cleaned lyrics
310
+ segments = self._create_segments_with_words(lyrics, is_synced=False)
311
+
312
+ # Create result object with segments
313
+ return LyricsData(source="genius", segments=segments, metadata=metadata)
314
+
315
+ def _clean_lyrics(self, lyrics: str) -> str:
316
+ """Clean and process lyrics from Genius to remove unwanted content."""
317
+ self.logger.debug("Starting lyrics cleaning process")
318
+
319
+ # Handle unexpected input types
320
+ if not isinstance(lyrics, str):
321
+ self.logger.warning(f"Expected string for lyrics, got {type(lyrics)}: {repr(lyrics)}")
322
+ if lyrics is None:
323
+ return ""
324
+ # Try to convert to string
325
+ try:
326
+ lyrics = str(lyrics)
327
+ except Exception as e:
328
+ self.logger.error(f"Failed to convert lyrics to string: {e}")
329
+ return ""
330
+
331
+ original = lyrics
332
+
333
+ lyrics = lyrics.replace("\\n", "\n")
334
+ lyrics = re.sub(r"You might also like", "", lyrics)
335
+ if original != lyrics:
336
+ self.logger.debug("Removed 'You might also like' text")
337
+
338
+ original = lyrics
339
+ lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
340
+ if original != lyrics:
341
+ self.logger.debug("Removed song name and 'Lyrics' prefix")
342
+
343
+ original = lyrics
344
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
345
+ if original != lyrics:
346
+ self.logger.debug("Removed contributors count and 'Lyrics' text")
347
+
348
+ original = lyrics
349
+ lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
350
+ if original != lyrics:
351
+ self.logger.debug("Removed ticket sales text")
352
+
353
+ original = lyrics
354
+ lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
355
+ if original != lyrics:
356
+ self.logger.debug("Removed numbered embed marker")
357
+
358
+ original = lyrics
359
+ lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
360
+ if original != lyrics:
361
+ self.logger.debug("Removed 'Embed' suffix from word")
362
+
363
+ original = lyrics
364
+ lyrics = re.sub(r"^Embed$", r"", lyrics)
365
+ if original != lyrics:
366
+ self.logger.debug("Removed standalone 'Embed' text")
367
+
368
+ # Remove section markers but keep the lyrics content (for non-HTML lyrics)
369
+ # Instead of removing entire lines, just remove the square bracket markers
370
+ original = lyrics
371
+ lyrics = re.sub(r'\[Verse \d+\]', '', lyrics)
372
+ lyrics = re.sub(r'\[Pre-Chorus\]', '', lyrics)
373
+ lyrics = re.sub(r'\[Chorus\]', '', lyrics)
374
+ lyrics = re.sub(r'\[Refrain\]', '', lyrics)
375
+ lyrics = re.sub(r'\[Outro\]', '', lyrics)
376
+ lyrics = re.sub(r'\[Bridge\]', '', lyrics)
377
+ lyrics = re.sub(r'\[Intro\]', '', lyrics)
378
+ if original != lyrics:
379
+ self.logger.debug("Removed section markers while preserving lyrics content")
380
+
381
+ # Remove common LyricsGenius page elements
382
+
383
+ self.logger.debug("Completed lyrics cleaning process")
384
+ return lyrics
@@ -0,0 +1,231 @@
1
+ import logging
2
+ import re
3
+ from typing import Optional, Dict, Any, List
4
+ import requests
5
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
6
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
7
+ from lyrics_transcriber.utils.word_utils import WordUtils
8
+
9
+
10
+ class LRCLIBProvider(BaseLyricsProvider):
11
+ """Handles fetching lyrics from LRCLIB."""
12
+
13
+ BASE_URL = "https://lrclib.net"
14
+ USER_AGENT = "lyrics-transcriber (https://github.com/nomadkaraoke/python-lyrics-transcriber)"
15
+
16
+ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
17
+ super().__init__(config, logger)
18
+ self.duration = None # Will be set when fetching lyrics
19
+
20
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
21
+ """Fetch raw song data from LRCLIB API."""
22
+ self.logger.info(f"Searching LRCLIB for {artist} - {title}")
23
+
24
+ # Try to get duration from audio file if available
25
+ duration = self._get_track_duration()
26
+
27
+ if duration:
28
+ # Try exact match with duration first
29
+ result = self._fetch_with_duration(artist, title, "", duration)
30
+ if result:
31
+ return result
32
+
33
+ # Fall back to search API if exact match fails or duration unavailable
34
+ result = self._fetch_from_search(artist, title)
35
+ if result:
36
+ return result
37
+
38
+ self.logger.warning(f"No lyrics found on LRCLIB for {artist} - {title}")
39
+ return None
40
+
41
+ def _get_track_duration(self) -> Optional[int]:
42
+ """Get track duration in seconds from audio file."""
43
+ if not self.audio_filepath:
44
+ return None
45
+
46
+ try:
47
+ import mutagen
48
+ audio = mutagen.File(self.audio_filepath)
49
+ if audio and audio.info:
50
+ duration = int(audio.info.length)
51
+ self.logger.debug(f"Track duration: {duration} seconds")
52
+ return duration
53
+ except Exception as e:
54
+ self.logger.warning(f"Could not determine track duration: {str(e)}")
55
+
56
+ return None
57
+
58
+ def _fetch_with_duration(self, artist: str, title: str, album: str, duration: int) -> Optional[Dict[str, Any]]:
59
+ """Fetch lyrics using the exact signature endpoint."""
60
+ try:
61
+ url = f"{self.BASE_URL}/api/get"
62
+ params = {
63
+ "artist_name": artist,
64
+ "track_name": title,
65
+ "album_name": album,
66
+ "duration": duration
67
+ }
68
+
69
+ headers = {
70
+ "User-Agent": self.USER_AGENT
71
+ }
72
+
73
+ self.logger.debug(f"Making LRCLIB request with duration {duration}s")
74
+ response = requests.get(url, headers=headers, params=params, timeout=15)
75
+
76
+ if response.status_code == 404:
77
+ self.logger.debug("Track not found with exact duration")
78
+ return None
79
+
80
+ response.raise_for_status()
81
+ data = response.json()
82
+
83
+ self.logger.info("Successfully fetched lyrics from LRCLIB")
84
+ return data
85
+
86
+ except requests.exceptions.RequestException as e:
87
+ self.logger.error(f"LRCLIB request failed: {str(e)}")
88
+ return None
89
+ except Exception as e:
90
+ self.logger.error(f"Error fetching from LRCLIB: {str(e)}")
91
+ return None
92
+
93
+ def _fetch_from_search(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
94
+ """Fetch lyrics using the search endpoint."""
95
+ try:
96
+ url = f"{self.BASE_URL}/api/search"
97
+ params = {
98
+ "track_name": title,
99
+ "artist_name": artist
100
+ }
101
+
102
+ headers = {
103
+ "User-Agent": self.USER_AGENT
104
+ }
105
+
106
+ self.logger.debug(f"Making LRCLIB search request")
107
+ response = requests.get(url, headers=headers, params=params, timeout=15)
108
+ response.raise_for_status()
109
+
110
+ results = response.json()
111
+
112
+ if not results or len(results) == 0:
113
+ self.logger.debug("No search results from LRCLIB")
114
+ return None
115
+
116
+ # Return the first (best) match
117
+ best_match = results[0]
118
+ self.logger.info(f"Found lyrics via LRCLIB search: {best_match.get('trackName')} by {best_match.get('artistName')}")
119
+ return best_match
120
+
121
+ except requests.exceptions.RequestException as e:
122
+ self.logger.error(f"LRCLIB search request failed: {str(e)}")
123
+ return None
124
+ except Exception as e:
125
+ self.logger.error(f"Error searching LRCLIB: {str(e)}")
126
+ return None
127
+
128
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
129
+ """Convert LRCLIB's raw API response to standardized format."""
130
+ # Check if track is instrumental
131
+ is_instrumental = raw_data.get("instrumental", False)
132
+
133
+ # Determine if we have synced lyrics
134
+ synced_lyrics = raw_data.get("syncedLyrics", "")
135
+ plain_lyrics = raw_data.get("plainLyrics", "")
136
+ has_synced = bool(synced_lyrics and synced_lyrics.strip())
137
+
138
+ # Create metadata object
139
+ metadata = LyricsMetadata(
140
+ source="lrclib",
141
+ track_name=raw_data.get("trackName", ""),
142
+ artist_names=raw_data.get("artistName", ""),
143
+ album_name=raw_data.get("albumName"),
144
+ duration_ms=raw_data.get("duration", 0) * 1000 if raw_data.get("duration") else None,
145
+ is_synced=has_synced,
146
+ lyrics_provider="lrclib",
147
+ lyrics_provider_id=str(raw_data.get("id")) if raw_data.get("id") else None,
148
+ provider_metadata={
149
+ "lrclib_id": raw_data.get("id"),
150
+ "duration": raw_data.get("duration"),
151
+ "instrumental": is_instrumental,
152
+ "has_synced_lyrics": has_synced,
153
+ "has_plain_lyrics": bool(plain_lyrics and plain_lyrics.strip()),
154
+ },
155
+ )
156
+
157
+ # Create segments based on whether we have synced or plain lyrics
158
+ if has_synced:
159
+ segments = self._parse_synced_lyrics(synced_lyrics)
160
+ elif plain_lyrics:
161
+ segments = self._create_segments_with_words(plain_lyrics, is_synced=False)
162
+ else:
163
+ # Empty segments for instrumental tracks
164
+ segments = []
165
+
166
+ return LyricsData(source="lrclib", segments=segments, metadata=metadata)
167
+
168
+ def _parse_synced_lyrics(self, synced_lyrics: str) -> List[LyricsSegment]:
169
+ """Parse LRC format synced lyrics into segments with timing."""
170
+ segments = []
171
+
172
+ # LRC format: [mm:ss.xx] lyrics text
173
+ # Pattern matches timestamps like [00:17.12] or [03:20.31]
174
+ lrc_pattern = re.compile(r'\[(\d+):(\d+)\.(\d+)\]\s*(.+)')
175
+
176
+ lines = synced_lyrics.strip().split('\n')
177
+
178
+ for i, line in enumerate(lines):
179
+ match = lrc_pattern.match(line.strip())
180
+ if not match:
181
+ continue
182
+
183
+ minutes, seconds, centiseconds, text = match.groups()
184
+
185
+ # Calculate start time in seconds
186
+ start_time = int(minutes) * 60 + int(seconds) + int(centiseconds) / 100
187
+
188
+ # Estimate end time (use next line's start time or add 3 seconds for last line)
189
+ end_time = start_time + 3.0 # Default duration
190
+ if i + 1 < len(lines):
191
+ next_match = lrc_pattern.match(lines[i + 1].strip())
192
+ if next_match:
193
+ next_minutes, next_seconds, next_centiseconds, _ = next_match.groups()
194
+ end_time = int(next_minutes) * 60 + int(next_seconds) + int(next_centiseconds) / 100
195
+
196
+ # Skip empty lines
197
+ if not text.strip():
198
+ continue
199
+
200
+ # Split line into words
201
+ word_texts = text.strip().split()
202
+ if not word_texts:
203
+ continue
204
+
205
+ # Calculate timing for each word
206
+ duration = end_time - start_time
207
+ word_duration = duration / len(word_texts) if len(word_texts) > 0 else duration
208
+
209
+ words = []
210
+ for j, word_text in enumerate(word_texts):
211
+ word = Word(
212
+ id=WordUtils.generate_id(),
213
+ text=word_text,
214
+ start_time=start_time + (j * word_duration),
215
+ end_time=start_time + ((j + 1) * word_duration),
216
+ confidence=1.0,
217
+ created_during_correction=False,
218
+ )
219
+ words.append(word)
220
+
221
+ segment = LyricsSegment(
222
+ id=WordUtils.generate_id(),
223
+ text=text.strip(),
224
+ words=words,
225
+ start_time=start_time,
226
+ end_time=end_time
227
+ )
228
+ segments.append(segment)
229
+
230
+ return segments
231
+