lyrics-transcriber 0.59.0__py3-none-any.whl → 0.61.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +36 -0
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/web_assets/assets/{index-HT4AHF8U.js → index-Cy8N8ZKR.js} +2 -2
- lyrics_transcriber/frontend/web_assets/assets/{index-HT4AHF8U.js.map → index-Cy8N8ZKR.js.map} +1 -1
- lyrics_transcriber/frontend/web_assets/index.html +1 -1
- lyrics_transcriber/lyrics/genius.py +2 -1
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +163 -3
- lyrics_transcriber/output/video.py +269 -60
- {lyrics_transcriber-0.59.0.dist-info → lyrics_transcriber-0.61.0.dist-info}/METADATA +1 -1
- {lyrics_transcriber-0.59.0.dist-info → lyrics_transcriber-0.61.0.dist-info}/RECORD +14 -13
- {lyrics_transcriber-0.59.0.dist-info → lyrics_transcriber-0.61.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.59.0.dist-info → lyrics_transcriber-0.61.0.dist-info}/WHEEL +0 -0
- {lyrics_transcriber-0.59.0.dist-info → lyrics_transcriber-0.61.0.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7
7
|
<title>Lyrics Transcriber Analyzer</title>
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
8
|
+
<script type="module" crossorigin src="/assets/index-Cy8N8ZKR.js"></script>
|
9
9
|
</head>
|
10
10
|
<body>
|
11
11
|
<div id="root"></div>
|
@@ -15,7 +15,8 @@ class GeniusProvider(BaseLyricsProvider):
|
|
15
15
|
self.api_token = config.genius_api_token
|
16
16
|
self.rapidapi_key = config.rapidapi_key
|
17
17
|
self.client = None
|
18
|
-
if
|
18
|
+
# Only initialize lyricsgenius client if rapidapi_key is not set
|
19
|
+
if self.api_token and not self.rapidapi_key:
|
19
20
|
self.client = lyricsgenius.Genius(
|
20
21
|
self.api_token,
|
21
22
|
verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
|
@@ -0,0 +1,156 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Optional, Dict, Any
|
3
|
+
import requests
|
4
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata
|
5
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
6
|
+
|
7
|
+
|
8
|
+
class MusixmatchProvider(BaseLyricsProvider):
|
9
|
+
"""Handles fetching lyrics from Musixmatch via RapidAPI."""
|
10
|
+
|
11
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
12
|
+
super().__init__(config, logger)
|
13
|
+
self.rapidapi_key = config.rapidapi_key
|
14
|
+
|
15
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
16
|
+
"""Fetch raw song data from Musixmatch via RapidAPI."""
|
17
|
+
if not self.rapidapi_key:
|
18
|
+
self.logger.warning("No RapidAPI key provided for Musixmatch")
|
19
|
+
return None
|
20
|
+
|
21
|
+
self.logger.info(f"Fetching lyrics from Musixmatch for {artist} - {title}")
|
22
|
+
|
23
|
+
try:
|
24
|
+
# Construct the API URL with artist and title
|
25
|
+
url = f"https://musixmatch-song-lyrics-api.p.rapidapi.com/lyrics/{artist}/{title}/"
|
26
|
+
|
27
|
+
headers = {
|
28
|
+
"x-rapidapi-key": self.rapidapi_key,
|
29
|
+
"x-rapidapi-host": "musixmatch-song-lyrics-api.p.rapidapi.com"
|
30
|
+
}
|
31
|
+
|
32
|
+
self.logger.debug(f"Making Musixmatch API request to: {url}")
|
33
|
+
response = requests.get(url, headers=headers, timeout=10)
|
34
|
+
response.raise_for_status()
|
35
|
+
|
36
|
+
data = response.json()
|
37
|
+
|
38
|
+
# Check if we got a valid response
|
39
|
+
if not data.get("message", {}).get("body", {}).get("macro_calls"):
|
40
|
+
self.logger.warning("Invalid response structure from Musixmatch API")
|
41
|
+
return None
|
42
|
+
|
43
|
+
# Check if lyrics are available
|
44
|
+
lyrics_data = data.get("message", {}).get("body", {}).get("macro_calls", {}).get("track.lyrics.get", {})
|
45
|
+
if not lyrics_data.get("message", {}).get("body", {}).get("lyrics"):
|
46
|
+
self.logger.warning("No lyrics found in Musixmatch response")
|
47
|
+
return None
|
48
|
+
|
49
|
+
self.logger.info("Successfully fetched lyrics from Musixmatch")
|
50
|
+
return data
|
51
|
+
|
52
|
+
except requests.exceptions.RequestException as e:
|
53
|
+
self.logger.error(f"Musixmatch API request failed: {str(e)}")
|
54
|
+
return None
|
55
|
+
except Exception as e:
|
56
|
+
self.logger.error(f"Error fetching from Musixmatch: {str(e)}")
|
57
|
+
return None
|
58
|
+
|
59
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
60
|
+
"""Convert Musixmatch's raw API response to standardized format."""
|
61
|
+
try:
|
62
|
+
# Extract macro calls from the nested response
|
63
|
+
macro_calls = raw_data.get("message", {}).get("body", {}).get("macro_calls", {})
|
64
|
+
|
65
|
+
# Extract track information
|
66
|
+
track_data = macro_calls.get("matcher.track.get", {}).get("message", {}).get("body", {}).get("track", {})
|
67
|
+
|
68
|
+
# Extract lyrics information
|
69
|
+
lyrics_data = macro_calls.get("track.lyrics.get", {}).get("message", {}).get("body", {}).get("lyrics", {})
|
70
|
+
|
71
|
+
# Get the actual lyrics text
|
72
|
+
lyrics_text = lyrics_data.get("lyrics_body", "")
|
73
|
+
|
74
|
+
# Clean the lyrics
|
75
|
+
lyrics_text = self._clean_lyrics(lyrics_text)
|
76
|
+
|
77
|
+
# Create metadata object
|
78
|
+
metadata = LyricsMetadata(
|
79
|
+
source="musixmatch",
|
80
|
+
track_name=track_data.get("track_name", ""),
|
81
|
+
artist_names=track_data.get("artist_name", ""),
|
82
|
+
album_name=track_data.get("album_name", ""),
|
83
|
+
duration_ms=track_data.get("track_length", 0) * 1000 if track_data.get("track_length") else None,
|
84
|
+
explicit=bool(track_data.get("explicit", 0)),
|
85
|
+
language=lyrics_data.get("lyrics_language", ""),
|
86
|
+
is_synced=False, # Musixmatch API doesn't provide sync data in this format
|
87
|
+
lyrics_provider="musixmatch",
|
88
|
+
lyrics_provider_id=str(lyrics_data.get("lyrics_id", "")),
|
89
|
+
provider_metadata={
|
90
|
+
"musixmatch_track_id": track_data.get("track_id"),
|
91
|
+
"musixmatch_lyrics_id": lyrics_data.get("lyrics_id"),
|
92
|
+
"album_id": track_data.get("album_id"),
|
93
|
+
"artist_id": track_data.get("artist_id"),
|
94
|
+
"track_share_url": track_data.get("track_share_url"),
|
95
|
+
"track_edit_url": track_data.get("track_edit_url"),
|
96
|
+
"lyrics_language": lyrics_data.get("lyrics_language"),
|
97
|
+
"lyrics_language_description": lyrics_data.get("lyrics_language_description"),
|
98
|
+
"lyrics_copyright": lyrics_data.get("lyrics_copyright"),
|
99
|
+
"track_rating": track_data.get("track_rating"),
|
100
|
+
"num_favourite": track_data.get("num_favourite"),
|
101
|
+
"first_release_date": track_data.get("first_release_date"),
|
102
|
+
"spotify_id": track_data.get("track_spotify_id"),
|
103
|
+
"isrc": track_data.get("track_isrc"),
|
104
|
+
"api_source": "rapidapi_musixmatch",
|
105
|
+
},
|
106
|
+
)
|
107
|
+
|
108
|
+
# Create segments with words from lyrics
|
109
|
+
segments = self._create_segments_with_words(lyrics_text, is_synced=False)
|
110
|
+
|
111
|
+
# Create result object with segments
|
112
|
+
return LyricsData(source="musixmatch", segments=segments, metadata=metadata)
|
113
|
+
|
114
|
+
except Exception as e:
|
115
|
+
self.logger.error(f"Error converting Musixmatch response format: {str(e)}")
|
116
|
+
# Return empty lyrics data if conversion fails
|
117
|
+
return LyricsData(
|
118
|
+
source="musixmatch",
|
119
|
+
segments=[],
|
120
|
+
metadata=LyricsMetadata(
|
121
|
+
source="musixmatch",
|
122
|
+
track_name="",
|
123
|
+
artist_names="",
|
124
|
+
lyrics_provider="musixmatch",
|
125
|
+
is_synced=False,
|
126
|
+
provider_metadata={"api_source": "rapidapi_musixmatch", "conversion_error": str(e)},
|
127
|
+
)
|
128
|
+
)
|
129
|
+
|
130
|
+
def _clean_lyrics(self, lyrics: str) -> str:
|
131
|
+
"""Clean and process lyrics from Musixmatch to remove unwanted content."""
|
132
|
+
if not isinstance(lyrics, str):
|
133
|
+
self.logger.warning(f"Expected string for lyrics, got {type(lyrics)}: {repr(lyrics)}")
|
134
|
+
if lyrics is None:
|
135
|
+
return ""
|
136
|
+
try:
|
137
|
+
lyrics = str(lyrics)
|
138
|
+
except Exception as e:
|
139
|
+
self.logger.error(f"Failed to convert lyrics to string: {e}")
|
140
|
+
return ""
|
141
|
+
|
142
|
+
# Replace escaped newlines with actual newlines, handling whitespace
|
143
|
+
import re
|
144
|
+
lyrics = re.sub(r'\s*\\n\s*', '\n', lyrics)
|
145
|
+
|
146
|
+
# Remove any HTML tags that might be present
|
147
|
+
lyrics = re.sub(r'<[^>]+>', '', lyrics)
|
148
|
+
|
149
|
+
# Clean up multiple consecutive newlines
|
150
|
+
lyrics = re.sub(r'\n\s*\n\s*\n+', '\n\n', lyrics)
|
151
|
+
|
152
|
+
# Clean up leading/trailing whitespace
|
153
|
+
lyrics = lyrics.strip()
|
154
|
+
|
155
|
+
self.logger.debug("Completed Musixmatch lyrics cleaning process")
|
156
|
+
return lyrics
|
@@ -2,6 +2,7 @@ import logging
|
|
2
2
|
from typing import Optional, Dict, Any
|
3
3
|
import syrics.api
|
4
4
|
import time
|
5
|
+
import requests
|
5
6
|
|
6
7
|
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
|
7
8
|
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
@@ -14,9 +15,11 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
14
15
|
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
15
16
|
super().__init__(config, logger)
|
16
17
|
self.cookie = config.spotify_cookie
|
18
|
+
self.rapidapi_key = config.rapidapi_key
|
17
19
|
self.client = None
|
18
20
|
|
19
|
-
if
|
21
|
+
# Only initialize syrics client if rapidapi_key is not set
|
22
|
+
if self.cookie and not self.rapidapi_key:
|
20
23
|
max_retries = 5
|
21
24
|
retry_delay = 5 # seconds
|
22
25
|
|
@@ -32,9 +35,17 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
32
35
|
time.sleep(retry_delay)
|
33
36
|
|
34
37
|
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
35
|
-
"""Fetch raw data from Spotify APIs using syrics library."""
|
38
|
+
"""Fetch raw data from Spotify APIs using RapidAPI or syrics library."""
|
39
|
+
# Try RapidAPI first if available
|
40
|
+
if self.rapidapi_key:
|
41
|
+
self.logger.info(f"Trying RapidAPI for {artist} - {title}")
|
42
|
+
result = self._fetch_from_rapidapi(artist, title)
|
43
|
+
if result:
|
44
|
+
return result
|
45
|
+
|
46
|
+
# Fall back to syrics library
|
36
47
|
if not self.client:
|
37
|
-
self.logger.warning("No Spotify cookie provided")
|
48
|
+
self.logger.warning("No Spotify cookie provided and RapidAPI failed")
|
38
49
|
return None
|
39
50
|
|
40
51
|
try:
|
@@ -57,8 +68,82 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
57
68
|
self.logger.error(f"Error fetching from Spotify: {str(e)}")
|
58
69
|
return None
|
59
70
|
|
71
|
+
def _fetch_from_rapidapi(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
72
|
+
"""Fetch song data using RapidAPI."""
|
73
|
+
try:
|
74
|
+
# Step 1: Search for the track
|
75
|
+
search_url = "https://spotify-scraper.p.rapidapi.com/v1/track/search"
|
76
|
+
search_params = {
|
77
|
+
"name": f"{title} {artist}"
|
78
|
+
}
|
79
|
+
|
80
|
+
headers = {
|
81
|
+
"x-rapidapi-key": self.rapidapi_key,
|
82
|
+
"x-rapidapi-host": "spotify-scraper.p.rapidapi.com"
|
83
|
+
}
|
84
|
+
|
85
|
+
self.logger.debug(f"Making RapidAPI search request for '{artist} {title}'")
|
86
|
+
search_response = requests.get(search_url, headers=headers, params=search_params, timeout=10)
|
87
|
+
search_response.raise_for_status()
|
88
|
+
|
89
|
+
search_data = search_response.json()
|
90
|
+
|
91
|
+
# Check if search was successful
|
92
|
+
if not search_data.get("status") or search_data.get("errorId") != "Success":
|
93
|
+
self.logger.warning("RapidAPI search failed")
|
94
|
+
return None
|
95
|
+
|
96
|
+
track_id = search_data.get("id")
|
97
|
+
if not track_id:
|
98
|
+
self.logger.warning("No track ID found in RapidAPI search results")
|
99
|
+
return None
|
100
|
+
|
101
|
+
self.logger.debug(f"Found track ID: {track_id}")
|
102
|
+
|
103
|
+
# Step 2: Fetch lyrics using the track ID
|
104
|
+
lyrics_url = "https://spotify-scraper.p.rapidapi.com/v1/track/lyrics"
|
105
|
+
lyrics_params = {
|
106
|
+
"trackId": track_id,
|
107
|
+
"format": "json",
|
108
|
+
"removeNote": "true"
|
109
|
+
}
|
110
|
+
|
111
|
+
self.logger.debug(f"Making RapidAPI lyrics request for track ID {track_id}")
|
112
|
+
lyrics_response = requests.get(lyrics_url, headers=headers, params=lyrics_params, timeout=10)
|
113
|
+
lyrics_response.raise_for_status()
|
114
|
+
|
115
|
+
lyrics_data = lyrics_response.json()
|
116
|
+
|
117
|
+
# Create a clean RapidAPI response structure
|
118
|
+
rapidapi_response = {
|
119
|
+
"track_data": search_data,
|
120
|
+
"lyrics_data": lyrics_data,
|
121
|
+
# Mark this as RapidAPI source
|
122
|
+
"_rapidapi_source": True
|
123
|
+
}
|
124
|
+
|
125
|
+
self.logger.info("Successfully fetched lyrics from RapidAPI")
|
126
|
+
return rapidapi_response
|
127
|
+
|
128
|
+
except requests.exceptions.RequestException as e:
|
129
|
+
self.logger.error(f"RapidAPI request failed: {str(e)}")
|
130
|
+
return None
|
131
|
+
except Exception as e:
|
132
|
+
self.logger.error(f"Error fetching from RapidAPI: {str(e)}")
|
133
|
+
return None
|
134
|
+
|
60
135
|
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
61
136
|
"""Convert Spotify's raw API response to standardized format."""
|
137
|
+
# Use our explicit source marker for detection
|
138
|
+
is_rapidapi = raw_data.get("_rapidapi_source", False)
|
139
|
+
|
140
|
+
if is_rapidapi:
|
141
|
+
return self._convert_rapidapi_format(raw_data)
|
142
|
+
else:
|
143
|
+
return self._convert_syrics_format(raw_data)
|
144
|
+
|
145
|
+
def _convert_syrics_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
146
|
+
"""Convert syrics format to standardized format."""
|
62
147
|
track_data = raw_data["track_data"]
|
63
148
|
lyrics_data = raw_data["lyrics_data"]["lyrics"]
|
64
149
|
|
@@ -79,6 +164,7 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
79
164
|
"preview_url": track_data.get("preview_url"),
|
80
165
|
"external_urls": track_data.get("external_urls"),
|
81
166
|
"sync_type": lyrics_data.get("syncType"),
|
167
|
+
"api_source": "syrics",
|
82
168
|
},
|
83
169
|
)
|
84
170
|
|
@@ -122,6 +208,80 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
122
208
|
|
123
209
|
return LyricsData(source="spotify", segments=segments, metadata=metadata)
|
124
210
|
|
211
|
+
def _convert_rapidapi_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
212
|
+
"""Convert RapidAPI format to standardized format."""
|
213
|
+
track_data = raw_data["track_data"]
|
214
|
+
lyrics_data = raw_data["lyrics_data"]
|
215
|
+
|
216
|
+
# Extract artist names from RapidAPI format
|
217
|
+
artist_names = []
|
218
|
+
if "artists" in track_data:
|
219
|
+
artist_names = [artist.get("name", "") for artist in track_data["artists"]]
|
220
|
+
|
221
|
+
# Create metadata object
|
222
|
+
metadata = LyricsMetadata(
|
223
|
+
source="spotify",
|
224
|
+
track_name=track_data.get("name"),
|
225
|
+
artist_names=", ".join(artist_names),
|
226
|
+
album_name=track_data.get("album", {}).get("name"),
|
227
|
+
duration_ms=track_data.get("durationMs"),
|
228
|
+
explicit=track_data.get("explicit"),
|
229
|
+
is_synced=True, # RapidAPI format includes timing information
|
230
|
+
lyrics_provider="spotify",
|
231
|
+
lyrics_provider_id=track_data.get("id"),
|
232
|
+
provider_metadata={
|
233
|
+
"spotify_id": track_data.get("id"),
|
234
|
+
"share_url": track_data.get("shareUrl"),
|
235
|
+
"duration_text": track_data.get("durationText"),
|
236
|
+
"album_cover": track_data.get("album", {}).get("cover"),
|
237
|
+
"api_source": "rapidapi",
|
238
|
+
},
|
239
|
+
)
|
240
|
+
|
241
|
+
# Create segments with timing information from RapidAPI format
|
242
|
+
segments = []
|
243
|
+
for line in lyrics_data:
|
244
|
+
if not line.get("text"):
|
245
|
+
continue
|
246
|
+
|
247
|
+
# Skip lines that are just musical notes
|
248
|
+
if not self._clean_lyrics(line["text"]):
|
249
|
+
continue
|
250
|
+
|
251
|
+
# Split line into words
|
252
|
+
word_texts = line["text"].strip().split()
|
253
|
+
if not word_texts:
|
254
|
+
continue
|
255
|
+
|
256
|
+
# Calculate timing for each word
|
257
|
+
start_time = float(line["startMs"]) / 1000 if line.get("startMs") else 0.0
|
258
|
+
duration = float(line["durMs"]) / 1000 if line.get("durMs") else 0.0
|
259
|
+
end_time = start_time + duration
|
260
|
+
word_duration = duration / len(word_texts)
|
261
|
+
|
262
|
+
words = []
|
263
|
+
for i, word_text in enumerate(word_texts):
|
264
|
+
word = Word(
|
265
|
+
id=WordUtils.generate_id(),
|
266
|
+
text=word_text,
|
267
|
+
start_time=start_time + (i * word_duration),
|
268
|
+
end_time=start_time + ((i + 1) * word_duration),
|
269
|
+
confidence=1.0,
|
270
|
+
created_during_correction=False,
|
271
|
+
)
|
272
|
+
words.append(word)
|
273
|
+
|
274
|
+
segment = LyricsSegment(
|
275
|
+
id=WordUtils.generate_id(),
|
276
|
+
text=line["text"].strip(),
|
277
|
+
words=words,
|
278
|
+
start_time=start_time,
|
279
|
+
end_time=end_time
|
280
|
+
)
|
281
|
+
segments.append(segment)
|
282
|
+
|
283
|
+
return LyricsData(source="spotify", segments=segments, metadata=metadata)
|
284
|
+
|
125
285
|
def _clean_lyrics(self, lyrics: str) -> str:
|
126
286
|
"""Clean and process lyrics from Spotify to remove unwanted content."""
|
127
287
|
# Remove lines that contain only musical note symbols
|