PyPI - karaoke-gen - Versions diffs - 0.75.54__py3-none-any.whl - Mend

karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show

karaoke_gen/__init__.py +38 -0
karaoke_gen/audio_fetcher.py +1614 -0
karaoke_gen/audio_processor.py +790 -0
karaoke_gen/config.py +83 -0
karaoke_gen/file_handler.py +387 -0
karaoke_gen/instrumental_review/__init__.py +45 -0
karaoke_gen/instrumental_review/analyzer.py +408 -0
karaoke_gen/instrumental_review/editor.py +322 -0
karaoke_gen/instrumental_review/models.py +171 -0
karaoke_gen/instrumental_review/server.py +475 -0
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/instrumental_review/waveform.py +409 -0
karaoke_gen/karaoke_finalise/__init__.py +1 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
karaoke_gen/karaoke_gen.py +1026 -0
karaoke_gen/lyrics_processor.py +474 -0
karaoke_gen/metadata.py +160 -0
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/__init__.py +18 -0
karaoke_gen/utils/bulk_cli.py +492 -0
karaoke_gen/utils/cli_args.py +432 -0
karaoke_gen/utils/gen_cli.py +978 -0
karaoke_gen/utils/remote_cli.py +3268 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen/video_generator.py +424 -0
karaoke_gen-0.75.54.dist-info/METADATA +718 -0
karaoke_gen-0.75.54.dist-info/RECORD +287 -0
karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +594 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +919 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +214 -0
lyrics_transcriber/frontend/src/api.ts +254 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +306 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +379 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +650 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0

lyrics_transcriber/lyrics/genius.py ADDED Viewed

@@ -0,0 +1,384 @@
+import logging
+import re
+from typing import Optional, Dict, Any
+import requests
+import lyricsgenius
+from lyrics_transcriber.types import LyricsData, LyricsMetadata
+from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
+class GeniusProvider(BaseLyricsProvider):
+    """Handles fetching lyrics from Genius."""
+    def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
+        super().__init__(config, logger)
+        self.api_token = config.genius_api_token
+        self.rapidapi_key = config.rapidapi_key
+        self.client = None
+        # Only initialize lyricsgenius client if rapidapi_key is not set
+        if self.api_token and not self.rapidapi_key:
+            self.client = lyricsgenius.Genius(
+                self.api_token,
+                verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
+                remove_section_headers=True,  # Remove [Chorus], [Verse], etc.
+                skip_non_songs=True,  # Skip track listings and other non-song results
+                timeout=10,  # Reasonable timeout for requests
+                retries=3,  # Number of retries for failed requests
+                sleep_time=1,  # Small delay between requests to be nice to the API
+            )
+    def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch raw song data from Genius API or RapidAPI."""
+        # Try RapidAPI first if available
+        if self.rapidapi_key:
+            self.logger.info(f"Trying RapidAPI for {artist} - {title}")
+            result = self._fetch_from_rapidapi(artist, title)
+            if result:
+                return result
+        # Fall back to direct Genius API
+        if not self.client:
+            self.logger.warning("No Genius API token provided and RapidAPI failed")
+            return None
+        self.logger.info(f"Searching Genius for {artist} - {title}")
+        try:
+            song = self.client.search_song(title, artist)
+            if song:
+                self.logger.info("Found lyrics on Genius")
+                return song.to_dict()
+        except Exception as e:
+            self.logger.error(f"Error fetching from Genius: {str(e)}")
+        return None
+    def _fetch_from_rapidapi(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch song data using RapidAPI."""
+        try:
+            # Step 1: Search for the song
+            search_url = "https://genius-song-lyrics1.p.rapidapi.com/search/"
+            search_params = {
+                "q": f"{artist} {title}",
+                "per_page": "10",
+                "page": "1"
+            }
+            headers = {
+                "x-rapidapi-key": self.rapidapi_key,
+                "x-rapidapi-host": "genius-song-lyrics1.p.rapidapi.com"
+            }
+            self.logger.debug(f"Making RapidAPI search request for '{artist} {title}'")
+            search_response = requests.get(search_url, headers=headers, params=search_params, timeout=10)
+            search_response.raise_for_status()
+            search_data = search_response.json()
+            # Find the best match from search results
+            if not search_data.get("hits"):
+                self.logger.warning("No search results from RapidAPI")
+                return None
+            best_match = None
+            for hit in search_data["hits"]:
+                result = hit.get("result", {})
+                if result.get("id"):
+                    best_match = result
+                    break
+            if not best_match:
+                self.logger.warning("No valid song ID found in RapidAPI search results")
+                return None
+            song_id = best_match["id"]
+            self.logger.debug(f"Found song ID: {song_id}")
+            # Step 2: Fetch lyrics using the song ID
+            lyrics_url = "https://genius-song-lyrics1.p.rapidapi.com/song/lyrics/"
+            lyrics_params = {"id": str(song_id)}
+            self.logger.debug(f"Making RapidAPI lyrics request for song ID {song_id}")
+            lyrics_response = requests.get(lyrics_url, headers=headers, params=lyrics_params, timeout=10)
+            lyrics_response.raise_for_status()
+            lyrics_data = lyrics_response.json()
+            # Extract lyrics from the nested response structure
+            lyrics_text = self._extract_lyrics_from_rapidapi_response(lyrics_data)
+            if not lyrics_text:
+                self.logger.warning("No lyrics found in RapidAPI response")
+                return None
+            # Create a clean RapidAPI-only response structure
+            # Don't mix search metadata (which contains Genius fields) with our clean structure
+            rapidapi_response = {
+                "title": best_match.get("title", ""),
+                "primary_artist": best_match.get("primary_artist", {}),
+                "lyrics": lyrics_text,
+                "id": song_id,
+                "url": best_match.get("url", ""),
+                "release_date_for_display": best_match.get("release_date_for_display", ""),
+                # Mark this as RapidAPI source
+                "_rapidapi_source": True
+            }
+            self.logger.info("Successfully fetched lyrics from RapidAPI")
+            return rapidapi_response
+        except requests.exceptions.RequestException as e:
+            self.logger.error(f"RapidAPI request failed: {str(e)}")
+            return None
+        except Exception as e:
+            self.logger.error(f"Error fetching from RapidAPI: {str(e)}")
+            return None
+    def _extract_lyrics_from_rapidapi_response(self, lyrics_data: Dict[str, Any]) -> Optional[str]:
+        """Extract lyrics text from RapidAPI response structure."""
+        try:
+            # Log the actual response structure for debugging
+            self.logger.debug(f"RapidAPI response structure: {lyrics_data}")
+            # Try different possible response structures
+            # Structure 1: lyrics.lyrics.body.html (the actual RapidAPI structure)
+            nested_lyrics = lyrics_data.get("lyrics", {}).get("lyrics", {})
+            if isinstance(nested_lyrics, dict):
+                html_content = nested_lyrics.get("body", {}).get("html")
+                if html_content:
+                    return self._clean_html_lyrics(html_content)
+            # Structure 2: lyrics.lyrics (simple string)
+            if isinstance(lyrics_data.get("lyrics", {}).get("lyrics"), str):
+                return lyrics_data["lyrics"]["lyrics"]
+            # Structure 3: lyrics.body.html (HTML content)
+            html_content = lyrics_data.get("lyrics", {}).get("body", {}).get("html")
+            if html_content:
+                return self._clean_html_lyrics(html_content)
+            # Structure 4: Direct lyrics field
+            if isinstance(lyrics_data.get("lyrics"), str):
+                return lyrics_data["lyrics"]
+            # Structure 5: body.html at top level
+            if lyrics_data.get("body", {}).get("html"):
+                return self._clean_html_lyrics(lyrics_data["body"]["html"])
+            # Structure 6: Check if lyrics is a dict with other possible keys
+            lyrics_obj = lyrics_data.get("lyrics", {})
+            if isinstance(lyrics_obj, dict):
+                # Try common alternative keys
+                for key in ["text", "content", "plain", "body"]:
+                    if key in lyrics_obj:
+                        content = lyrics_obj[key]
+                        if isinstance(content, str):
+                            return content
+                        elif isinstance(content, dict) and "html" in content:
+                            return self._clean_html_lyrics(content["html"])
+                        elif isinstance(content, dict) and "text" in content:
+                            return content["text"]
+            self.logger.warning(f"Unknown RapidAPI response structure: {list(lyrics_data.keys())}")
+            if "lyrics" in lyrics_data:
+                self.logger.warning(f"Lyrics object structure: {lyrics_data['lyrics']}")
+            return None
+        except Exception as e:
+            self.logger.error(f"Error extracting lyrics from RapidAPI response: {str(e)}")
+            return None
+    def _clean_html_lyrics(self, html_content: str) -> str:
+        """Clean HTML content to extract plain text lyrics."""
+        import re
+        if not html_content:
+            return ""
+        # Remove HTML tags while preserving line breaks
+        text = re.sub(r'<br\s*/?>', '\n', html_content)  # Convert <br> to newlines
+        text = re.sub(r'<[^>]+>', '', text)  # Remove all other HTML tags
+        # Decode HTML entities
+        text = text.replace('&lt;', '<').replace('&gt;', '>').replace('&amp;', '&')
+        text = text.replace('&quot;', '"').replace('&#x27;', "'").replace('&nbsp;', ' ')
+        # Remove section markers but keep the lyrics content
+        # Instead of removing entire lines, just remove the square bracket markers
+        text = re.sub(r'\[Verse \d+\]', '', text)
+        text = re.sub(r'\[Pre-Chorus\]', '', text)
+        text = re.sub(r'\[Chorus\]', '', text)
+        text = re.sub(r'\[Refrain\]', '', text)
+        text = re.sub(r'\[Outro\]', '', text)
+        text = re.sub(r'\[Bridge\]', '', text)
+        text = re.sub(r'\[Intro\]', '', text)
+        # Clean up multiple consecutive newlines
+        text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)
+        # Clean up leading/trailing whitespace
+        text = text.strip()
+        return text
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert Genius's raw API response to standardized format."""
+        # Use our explicit source marker for detection
+        is_rapidapi = raw_data.get("_rapidapi_source", False)
+        if is_rapidapi:
+            return self._convert_rapidapi_format(raw_data)
+        else:
+            return self._convert_lyricsgenius_format(raw_data)
+    def _convert_lyricsgenius_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert lyricsgenius format to standardized format."""
+        # Clean the lyrics before processing
+        lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
+        # Extract release date components if available
+        release_date = None
+        if release_components := raw_data.get("release_date_components"):
+            year = release_components.get("year")
+            month = release_components.get("month")
+            day = release_components.get("day")
+            if all(x is not None for x in (year, month, day)):
+                release_date = f"{year}-{month:02d}-{day:02d}"
+        # Create metadata object
+        metadata = LyricsMetadata(
+            source="genius",
+            track_name=raw_data.get("title", ""),
+            artist_names=raw_data.get("artist_names", ""),
+            album_name=raw_data.get("album", {}).get("name"),
+            lyrics_provider="genius",
+            lyrics_provider_id=str(raw_data.get("id")),
+            is_synced=False,  # Genius doesn't provide synced lyrics
+            provider_metadata={
+                "genius_id": raw_data.get("id"),
+                "release_date": release_date,
+                "page_url": raw_data.get("url"),
+                "annotation_count": raw_data.get("annotation_count"),
+                "lyrics_state": raw_data.get("lyrics_state"),
+                "lyrics_owner_id": raw_data.get("lyrics_owner_id"),
+                "pyongs_count": raw_data.get("pyongs_count"),
+                "verified_annotations": len(raw_data.get("verified_annotations_by", [])),
+                "verified_contributors": len(raw_data.get("verified_contributors", [])),
+                "external_urls": {"genius": raw_data.get("url")},
+                "api_source": "lyricsgenius",
+            },
+        )
+        # Create segments with words from cleaned lyrics
+        segments = self._create_segments_with_words(lyrics, is_synced=False)
+        # Create result object with segments
+        return LyricsData(source="genius", segments=segments, metadata=metadata)
+    def _convert_rapidapi_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert RapidAPI format to standardized format."""
+        # Clean the lyrics before processing
+        lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
+        # Extract artist name from primary_artist
+        primary_artist = raw_data.get("primary_artist", {})
+        artist_name = primary_artist.get("name", "")
+        # Extract release date from release_date_for_display
+        release_date = raw_data.get("release_date_for_display")
+        # Create metadata object
+        metadata = LyricsMetadata(
+            source="genius",
+            track_name=raw_data.get("title", ""),
+            artist_names=artist_name,
+            album_name=raw_data.get("album", {}).get("name") if raw_data.get("album") else None,
+            lyrics_provider="genius",
+            lyrics_provider_id=str(raw_data.get("id")),
+            is_synced=False,  # Genius doesn't provide synced lyrics
+            provider_metadata={
+                "genius_id": raw_data.get("id"),
+                "release_date": release_date,
+                "page_url": raw_data.get("url"),
+                "annotation_count": raw_data.get("annotation_count"),
+                "lyrics_state": raw_data.get("lyrics_state"),
+                "pyongs_count": raw_data.get("pyongs_count"),
+                "external_urls": {"genius": raw_data.get("url")},
+                "api_source": "rapidapi",
+            },
+        )
+        # Create segments with words from cleaned lyrics
+        segments = self._create_segments_with_words(lyrics, is_synced=False)
+        # Create result object with segments
+        return LyricsData(source="genius", segments=segments, metadata=metadata)
+    def _clean_lyrics(self, lyrics: str) -> str:
+        """Clean and process lyrics from Genius to remove unwanted content."""
+        self.logger.debug("Starting lyrics cleaning process")
+        # Handle unexpected input types
+        if not isinstance(lyrics, str):
+            self.logger.warning(f"Expected string for lyrics, got {type(lyrics)}: {repr(lyrics)}")
+            if lyrics is None:
+                return ""
+            # Try to convert to string
+            try:
+                lyrics = str(lyrics)
+            except Exception as e:
+                self.logger.error(f"Failed to convert lyrics to string: {e}")
+                return ""
+        original = lyrics
+        lyrics = lyrics.replace("\\n", "\n")
+        lyrics = re.sub(r"You might also like", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed 'You might also like' text")
+        original = lyrics
+        lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed song name and 'Lyrics' prefix")
+        original = lyrics
+        lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed contributors count and 'Lyrics' text")
+        original = lyrics
+        lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed ticket sales text")
+        original = lyrics
+        lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed numbered embed marker")
+        original = lyrics
+        lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed 'Embed' suffix from word")
+        original = lyrics
+        lyrics = re.sub(r"^Embed$", r"", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed standalone 'Embed' text")
+        # Remove section markers but keep the lyrics content (for non-HTML lyrics)
+        # Instead of removing entire lines, just remove the square bracket markers
+        original = lyrics
+        lyrics = re.sub(r'\[Verse \d+\]', '', lyrics)
+        lyrics = re.sub(r'\[Pre-Chorus\]', '', lyrics)
+        lyrics = re.sub(r'\[Chorus\]', '', lyrics)
+        lyrics = re.sub(r'\[Refrain\]', '', lyrics)
+        lyrics = re.sub(r'\[Outro\]', '', lyrics)
+        lyrics = re.sub(r'\[Bridge\]', '', lyrics)
+        lyrics = re.sub(r'\[Intro\]', '', lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed section markers while preserving lyrics content")
+        # Remove common LyricsGenius page elements
+        self.logger.debug("Completed lyrics cleaning process")
+        return lyrics

lyrics_transcriber/lyrics/lrclib.py ADDED Viewed

@@ -0,0 +1,231 @@
+import logging
+import re
+from typing import Optional, Dict, Any, List
+import requests
+from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
+from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
+from lyrics_transcriber.utils.word_utils import WordUtils
+class LRCLIBProvider(BaseLyricsProvider):
+    """Handles fetching lyrics from LRCLIB."""
+    BASE_URL = "https://lrclib.net"
+    USER_AGENT = "lyrics-transcriber (https://github.com/nomadkaraoke/python-lyrics-transcriber)"
+    def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
+        super().__init__(config, logger)
+        self.duration = None  # Will be set when fetching lyrics
+    def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch raw song data from LRCLIB API."""
+        self.logger.info(f"Searching LRCLIB for {artist} - {title}")
+        # Try to get duration from audio file if available
+        duration = self._get_track_duration()
+        if duration:
+            # Try exact match with duration first
+            result = self._fetch_with_duration(artist, title, "", duration)
+            if result:
+                return result
+        # Fall back to search API if exact match fails or duration unavailable
+        result = self._fetch_from_search(artist, title)
+        if result:
+            return result
+        self.logger.warning(f"No lyrics found on LRCLIB for {artist} - {title}")
+        return None
+    def _get_track_duration(self) -> Optional[int]:
+        """Get track duration in seconds from audio file."""
+        if not self.audio_filepath:
+            return None
+        try:
+            import mutagen
+            audio = mutagen.File(self.audio_filepath)
+            if audio and audio.info:
+                duration = int(audio.info.length)
+                self.logger.debug(f"Track duration: {duration} seconds")
+                return duration
+        except Exception as e:
+            self.logger.warning(f"Could not determine track duration: {str(e)}")
+        return None
+    def _fetch_with_duration(self, artist: str, title: str, album: str, duration: int) -> Optional[Dict[str, Any]]:
+        """Fetch lyrics using the exact signature endpoint."""
+        try:
+            url = f"{self.BASE_URL}/api/get"
+            params = {
+                "artist_name": artist,
+                "track_name": title,
+                "album_name": album,
+                "duration": duration
+            }
+            headers = {
+                "User-Agent": self.USER_AGENT
+            }
+            self.logger.debug(f"Making LRCLIB request with duration {duration}s")
+            response = requests.get(url, headers=headers, params=params, timeout=15)
+            if response.status_code == 404:
+                self.logger.debug("Track not found with exact duration")
+                return None
+            response.raise_for_status()
+            data = response.json()
+            self.logger.info("Successfully fetched lyrics from LRCLIB")
+            return data
+        except requests.exceptions.RequestException as e:
+            self.logger.error(f"LRCLIB request failed: {str(e)}")
+            return None
+        except Exception as e:
+            self.logger.error(f"Error fetching from LRCLIB: {str(e)}")
+            return None
+    def _fetch_from_search(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch lyrics using the search endpoint."""
+        try:
+            url = f"{self.BASE_URL}/api/search"
+            params = {
+                "track_name": title,
+                "artist_name": artist
+            }
+            headers = {
+                "User-Agent": self.USER_AGENT
+            }
+            self.logger.debug(f"Making LRCLIB search request")
+            response = requests.get(url, headers=headers, params=params, timeout=15)
+            response.raise_for_status()
+            results = response.json()
+            if not results or len(results) == 0:
+                self.logger.debug("No search results from LRCLIB")
+                return None
+            # Return the first (best) match
+            best_match = results[0]
+            self.logger.info(f"Found lyrics via LRCLIB search: {best_match.get('trackName')} by {best_match.get('artistName')}")
+            return best_match
+        except requests.exceptions.RequestException as e:
+            self.logger.error(f"LRCLIB search request failed: {str(e)}")
+            return None
+        except Exception as e:
+            self.logger.error(f"Error searching LRCLIB: {str(e)}")
+            return None
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert LRCLIB's raw API response to standardized format."""
+        # Check if track is instrumental
+        is_instrumental = raw_data.get("instrumental", False)
+        # Determine if we have synced lyrics
+        synced_lyrics = raw_data.get("syncedLyrics", "")
+        plain_lyrics = raw_data.get("plainLyrics", "")
+        has_synced = bool(synced_lyrics and synced_lyrics.strip())
+        # Create metadata object
+        metadata = LyricsMetadata(
+            source="lrclib",
+            track_name=raw_data.get("trackName", ""),
+            artist_names=raw_data.get("artistName", ""),
+            album_name=raw_data.get("albumName"),
+            duration_ms=raw_data.get("duration", 0) * 1000 if raw_data.get("duration") else None,
+            is_synced=has_synced,
+            lyrics_provider="lrclib",
+            lyrics_provider_id=str(raw_data.get("id")) if raw_data.get("id") else None,
+            provider_metadata={
+                "lrclib_id": raw_data.get("id"),
+                "duration": raw_data.get("duration"),
+                "instrumental": is_instrumental,
+                "has_synced_lyrics": has_synced,
+                "has_plain_lyrics": bool(plain_lyrics and plain_lyrics.strip()),
+            },
+        )
+        # Create segments based on whether we have synced or plain lyrics
+        if has_synced:
+            segments = self._parse_synced_lyrics(synced_lyrics)
+        elif plain_lyrics:
+            segments = self._create_segments_with_words(plain_lyrics, is_synced=False)
+        else:
+            # Empty segments for instrumental tracks
+            segments = []
+        return LyricsData(source="lrclib", segments=segments, metadata=metadata)
+    def _parse_synced_lyrics(self, synced_lyrics: str) -> List[LyricsSegment]:
+        """Parse LRC format synced lyrics into segments with timing."""
+        segments = []
+        # LRC format: [mm:ss.xx] lyrics text
+        # Pattern matches timestamps like [00:17.12] or [03:20.31]
+        lrc_pattern = re.compile(r'\[(\d+):(\d+)\.(\d+)\]\s*(.+)')
+        lines = synced_lyrics.strip().split('\n')
+        for i, line in enumerate(lines):
+            match = lrc_pattern.match(line.strip())
+            if not match:
+                continue
+            minutes, seconds, centiseconds, text = match.groups()
+            # Calculate start time in seconds
+            start_time = int(minutes) * 60 + int(seconds) + int(centiseconds) / 100
+            # Estimate end time (use next line's start time or add 3 seconds for last line)
+            end_time = start_time + 3.0  # Default duration
+            if i + 1 < len(lines):
+                next_match = lrc_pattern.match(lines[i + 1].strip())
+                if next_match:
+                    next_minutes, next_seconds, next_centiseconds, _ = next_match.groups()
+                    end_time = int(next_minutes) * 60 + int(next_seconds) + int(next_centiseconds) / 100
+            # Skip empty lines
+            if not text.strip():
+                continue
+            # Split line into words
+            word_texts = text.strip().split()
+            if not word_texts:
+                continue
+            # Calculate timing for each word
+            duration = end_time - start_time
+            word_duration = duration / len(word_texts) if len(word_texts) > 0 else duration
+            words = []
+            for j, word_text in enumerate(word_texts):
+                word = Word(
+                    id=WordUtils.generate_id(),
+                    text=word_text,
+                    start_time=start_time + (j * word_duration),
+                    end_time=start_time + ((j + 1) * word_duration),
+                    confidence=1.0,
+                    created_during_correction=False,
+                )
+                words.append(word)
+            segment = LyricsSegment(
+                id=WordUtils.generate_id(),
+                text=text.strip(),
+                words=words,
+                start_time=start_time,
+                end_time=end_time
+            )
+            segments.append(segment)
+        return segments