SpotDown 0.0.1__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- SpotDown/__init__.py +0 -0
- SpotDown/downloader/__init__.py +0 -0
- SpotDown/downloader/youtube_downloader.py +131 -0
- SpotDown/extractor/__init__.py +6 -0
- SpotDown/extractor/spotify_extractor.py +331 -0
- SpotDown/extractor/youtube_extractor.py +271 -0
- SpotDown/main.py +139 -0
- SpotDown/utils/__init__.py +6 -0
- SpotDown/utils/config_json.py +223 -0
- SpotDown/utils/console_utils.py +188 -0
- SpotDown/utils/file_utils.py +129 -0
- SpotDown/utils/headers.py +18 -0
- {spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/METADATA +182 -182
- spotdown-0.0.7.dist-info/RECORD +18 -0
- {spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/licenses/LICENSE +674 -674
- spotdown-0.0.7.dist-info/top_level.txt +1 -0
- spotdown-0.0.1.dist-info/RECORD +0 -6
- spotdown-0.0.1.dist-info/top_level.txt +0 -1
- {spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/WHEEL +0 -0
- {spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/entry_points.txt +0 -0
SpotDown/__init__.py
ADDED
File without changes
|
File without changes
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# 05.04.2024
|
2
|
+
|
3
|
+
import io
|
4
|
+
import subprocess
|
5
|
+
from typing import Dict
|
6
|
+
|
7
|
+
|
8
|
+
# External imports
|
9
|
+
import httpx
|
10
|
+
from PIL import Image
|
11
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
12
|
+
from rich.console import Console
|
13
|
+
|
14
|
+
|
15
|
+
# Internal utils
|
16
|
+
from SpotDown.utils.config_json import config_manager
|
17
|
+
from SpotDown.utils.file_utils import FileUtils
|
18
|
+
|
19
|
+
|
20
|
+
# Variable
|
21
|
+
quality = config_manager.get("DOWNLOAD", "quality")
|
22
|
+
|
23
|
+
|
24
|
+
class YouTubeDownloader:
|
25
|
+
def __init__(self):
|
26
|
+
self.console = Console()
|
27
|
+
self.file_utils = FileUtils()
|
28
|
+
|
29
|
+
def download(self, video_info: Dict, spotify_info: Dict) -> bool:
|
30
|
+
"""
|
31
|
+
Download YouTube video as mp3 320kbps
|
32
|
+
|
33
|
+
Args:
|
34
|
+
video_info (Dict): YouTube video info
|
35
|
+
spotify_info (Dict): Spotify track info
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
bool: True if download succeeded
|
39
|
+
"""
|
40
|
+
try:
|
41
|
+
music_folder = self.file_utils.get_music_folder()
|
42
|
+
filename = self.file_utils.create_filename(
|
43
|
+
spotify_info.get('artist', 'Unknown Artist'),
|
44
|
+
spotify_info.get('title', video_info.get('title', 'Unknown Title'))
|
45
|
+
)
|
46
|
+
output_path = music_folder / f"{filename}.%(ext)s"
|
47
|
+
|
48
|
+
# Download cover image if available
|
49
|
+
cover_path = None
|
50
|
+
cover_url = spotify_info.get('cover_url')
|
51
|
+
if cover_url:
|
52
|
+
try:
|
53
|
+
cover_path = music_folder / f"{filename}_cover.jpg"
|
54
|
+
with httpx.Client(timeout=10) as client:
|
55
|
+
resp = client.get(cover_url)
|
56
|
+
if resp.status_code == 200:
|
57
|
+
|
58
|
+
# Always save only as jpg
|
59
|
+
if resp.headers.get("content-type", "").endswith("webp") or cover_url.endswith(".webp"):
|
60
|
+
img = Image.open(io.BytesIO(resp.content)).convert("RGB")
|
61
|
+
img.save(cover_path, "JPEG")
|
62
|
+
|
63
|
+
else:
|
64
|
+
img = Image.open(io.BytesIO(resp.content)).convert("RGB")
|
65
|
+
img.save(cover_path, "JPEG")
|
66
|
+
|
67
|
+
self.console.print(f"[blue]Downloaded thumbnail: {cover_path}[/blue]")
|
68
|
+
|
69
|
+
else:
|
70
|
+
cover_path = None
|
71
|
+
|
72
|
+
except Exception as e:
|
73
|
+
self.console.print(f"[yellow]Unable to download cover: {e}[/yellow]")
|
74
|
+
cover_path = None
|
75
|
+
|
76
|
+
ytdlp_options = [
|
77
|
+
'yt-dlp',
|
78
|
+
'--extract-audio',
|
79
|
+
'--audio-format', 'mp3',
|
80
|
+
'--audio-quality', quality,
|
81
|
+
'--output', str(output_path),
|
82
|
+
'--no-playlist',
|
83
|
+
'--embed-metadata',
|
84
|
+
'--add-metadata',
|
85
|
+
]
|
86
|
+
|
87
|
+
if cover_path and cover_path.exists():
|
88
|
+
ytdlp_options += ['--embed-thumbnail']
|
89
|
+
ytdlp_options.append(video_info['url'])
|
90
|
+
|
91
|
+
with Progress(
|
92
|
+
SpinnerColumn(),
|
93
|
+
TextColumn("[progress.description]{task.description}"),
|
94
|
+
console=self.console
|
95
|
+
) as progress:
|
96
|
+
task = progress.add_task("Downloading...", total=None)
|
97
|
+
process = subprocess.run(
|
98
|
+
ytdlp_options,
|
99
|
+
capture_output=True,
|
100
|
+
text=True
|
101
|
+
)
|
102
|
+
progress.remove_task(task)
|
103
|
+
|
104
|
+
if process.returncode == 0:
|
105
|
+
|
106
|
+
# Find the downloaded file
|
107
|
+
downloaded_files = list(music_folder.glob(f"{filename}.*"))
|
108
|
+
if downloaded_files:
|
109
|
+
self.console.print("[red]Download completed![/red]")
|
110
|
+
|
111
|
+
# Remove cover file after embedding
|
112
|
+
if cover_path and cover_path.exists():
|
113
|
+
try:
|
114
|
+
cover_path.unlink()
|
115
|
+
except Exception:
|
116
|
+
pass
|
117
|
+
|
118
|
+
return True
|
119
|
+
|
120
|
+
else:
|
121
|
+
self.console.print("[yellow]Download apparently succeeded but file not found[/yellow]")
|
122
|
+
return False
|
123
|
+
|
124
|
+
else:
|
125
|
+
self.console.print("[red]Download error:[/red]")
|
126
|
+
self.console.print(f"[red]{process.stderr}[/red]")
|
127
|
+
return False
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
self.console.print(f"[red]Error during download: {e}[/red]")
|
131
|
+
return False
|
@@ -0,0 +1,331 @@
|
|
1
|
+
# 05.04.2024
|
2
|
+
|
3
|
+
import os
|
4
|
+
import json
|
5
|
+
import logging
|
6
|
+
from typing import Dict, List, Optional
|
7
|
+
|
8
|
+
|
9
|
+
# External imports
|
10
|
+
from rich.console import Console
|
11
|
+
from playwright.sync_api import sync_playwright
|
12
|
+
|
13
|
+
|
14
|
+
# Internal utils
|
15
|
+
from SpotDown.utils.headers import get_userAgent
|
16
|
+
from SpotDown.utils.config_json import config_manager
|
17
|
+
|
18
|
+
|
19
|
+
# Variable
|
20
|
+
console = Console()
|
21
|
+
headless = config_manager.get("BROWSER", "headless")
|
22
|
+
timeout = config_manager.get("BROWSER", "timeout")
|
23
|
+
|
24
|
+
|
25
|
+
class SpotifyExtractor:
|
26
|
+
def __init__(self):
|
27
|
+
self.playwright = None
|
28
|
+
self.browser = None
|
29
|
+
self.context = None
|
30
|
+
self.page = None
|
31
|
+
self.user_agent = get_userAgent()
|
32
|
+
self.total_songs = None
|
33
|
+
self.playlist_items = []
|
34
|
+
|
35
|
+
def __enter__(self):
|
36
|
+
"""Context manager to automatically handle the browser"""
|
37
|
+
self.playwright = sync_playwright().start()
|
38
|
+
self.browser = self.playwright.chromium.launch(headless=headless)
|
39
|
+
self.context = self.browser.new_context(
|
40
|
+
user_agent=self.user_agent, viewport={'width': 1280, 'height': 800}, ignore_https_errors=True
|
41
|
+
)
|
42
|
+
self.page = self.context.new_page()
|
43
|
+
return self
|
44
|
+
|
45
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
46
|
+
"""Automatically closes the browser"""
|
47
|
+
if self.browser:
|
48
|
+
self.browser.close()
|
49
|
+
if self.playwright:
|
50
|
+
self.playwright.stop()
|
51
|
+
|
52
|
+
def extract_track_info(self, spotify_url: str, save_json: bool = False) -> Optional[Dict]:
|
53
|
+
"""
|
54
|
+
Extracts track information from a Spotify URL
|
55
|
+
|
56
|
+
Args:
|
57
|
+
spotify_url (str): Spotify URL of the track
|
58
|
+
save_json (bool): If True, saves the raw Spotify API JSON response in the 'log' folder
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
Dict: Track information or None if an error occurs
|
62
|
+
"""
|
63
|
+
try:
|
64
|
+
console.print("[cyan]Analyzing Spotify URL ...")
|
65
|
+
|
66
|
+
# Extract Spotify data by intercepting API calls
|
67
|
+
spotify_data, raw_json = self._extract_spotify_data(spotify_url, return_raw=True)
|
68
|
+
|
69
|
+
if not spotify_data:
|
70
|
+
console.print("[cyan]Unable to extract data from Spotify")
|
71
|
+
return None
|
72
|
+
|
73
|
+
# Save the JSON response if requested
|
74
|
+
if save_json and raw_json:
|
75
|
+
try:
|
76
|
+
log_dir = os.path.join(os.getcwd(), "log")
|
77
|
+
os.makedirs(log_dir, exist_ok=True)
|
78
|
+
|
79
|
+
# Use title and artist for the filename if available
|
80
|
+
filename = "spotify_response.json"
|
81
|
+
|
82
|
+
if spotify_data.get("artist") and spotify_data.get("title"):
|
83
|
+
safe_artist = "".join(c for c in spotify_data["artist"] if c.isalnum() or c in " _-")
|
84
|
+
safe_title = "".join(c for c in spotify_data["title"] if c.isalnum() or c in " _-")
|
85
|
+
filename = f"{safe_artist} - {safe_title}.json"
|
86
|
+
|
87
|
+
filepath = os.path.join(log_dir, filename)
|
88
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
89
|
+
json.dump(raw_json, f, ensure_ascii=False, indent=2)
|
90
|
+
|
91
|
+
console.print(f"[green]Spotify API response saved to {filepath}")
|
92
|
+
|
93
|
+
except Exception as e:
|
94
|
+
console.print(f"[yellow]Warning: Could not save JSON file: {e}")
|
95
|
+
|
96
|
+
console.print(f"[cyan]Found: [red]{spotify_data['artist']} - {spotify_data['title']}[/red]")
|
97
|
+
return spotify_data
|
98
|
+
|
99
|
+
except Exception as e:
|
100
|
+
console.print(f"[cyan]Spotify extraction error: {e}")
|
101
|
+
return None
|
102
|
+
|
103
|
+
def _extract_spotify_data(self, spotify_url: str, return_raw: bool = False) -> Optional[Dict]:
|
104
|
+
"""Extracts Spotify data by intercepting API calls"""
|
105
|
+
try:
|
106
|
+
api_responses = []
|
107
|
+
|
108
|
+
def handle_request(request):
|
109
|
+
if (request.method == "POST" and "/pathfinder/v2/query" in request.url):
|
110
|
+
try:
|
111
|
+
response = request.response()
|
112
|
+
if response and response.status == 200:
|
113
|
+
try:
|
114
|
+
response_data = response.json()
|
115
|
+
|
116
|
+
if self._is_valid_track_data(response_data):
|
117
|
+
api_responses.append(response_data)
|
118
|
+
console.print("[green]Valid API response found")
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
logging.warning(f"Error parsing API response: {e}")
|
122
|
+
|
123
|
+
except Exception as e:
|
124
|
+
logging.warning(f"Error accessing response: {e}")
|
125
|
+
|
126
|
+
self.page.on("requestfinished", handle_request)
|
127
|
+
self.page.goto(spotify_url)
|
128
|
+
|
129
|
+
# Poll every 100ms, stop waiting as soon as a valid response is found or after 10 seconds
|
130
|
+
# This avoids unnecessary waiting after a valid API response is received
|
131
|
+
for _ in range(timeout * 10): # 100 * 100ms = 10000ms (10 seconds max)
|
132
|
+
if api_responses:
|
133
|
+
break
|
134
|
+
|
135
|
+
self.page.wait_for_timeout(timeout * 10)
|
136
|
+
|
137
|
+
if not api_responses:
|
138
|
+
console.print("[cyan]No valid API responses found")
|
139
|
+
return (None, None) if return_raw else None
|
140
|
+
|
141
|
+
# Selects the most complete response
|
142
|
+
best_response = max(api_responses, key=lambda x: len(json.dumps(x)))
|
143
|
+
parsed = self._parse_spotify_response(best_response)
|
144
|
+
return (parsed, best_response) if return_raw else parsed
|
145
|
+
|
146
|
+
except Exception as e:
|
147
|
+
console.print(f"[cyan]❌ Spotify data extraction error: {e}")
|
148
|
+
return (None, None) if return_raw else None
|
149
|
+
|
150
|
+
def _is_valid_track_data(self, data: Dict) -> bool:
|
151
|
+
"""Checks if the data contains valid track information"""
|
152
|
+
try:
|
153
|
+
track_union = data.get("data", {}).get("trackUnion", {})
|
154
|
+
return bool(track_union.get("name") and track_union.get("firstArtist", {}).get("items"))
|
155
|
+
|
156
|
+
except Exception:
|
157
|
+
return False
|
158
|
+
|
159
|
+
def _parse_spotify_response(self, response: Dict) -> Dict:
|
160
|
+
"""Parses the Spotify API response"""
|
161
|
+
try:
|
162
|
+
# Extract title
|
163
|
+
track_data = response.get("data", {}).get("trackUnion", {})
|
164
|
+
title = track_data.get("name", "").strip()
|
165
|
+
|
166
|
+
# Extract artist
|
167
|
+
artist_items = track_data.get("firstArtist", {}).get("items", [])
|
168
|
+
artist = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
|
169
|
+
|
170
|
+
# Extract album
|
171
|
+
album_data = track_data.get("albumOfTrack", {})
|
172
|
+
album = album_data.get("name", "")
|
173
|
+
|
174
|
+
# Extract year
|
175
|
+
release_date = album_data.get("date", {})
|
176
|
+
year = release_date.get("year") if release_date else None
|
177
|
+
|
178
|
+
# Extract duration
|
179
|
+
duration_ms = track_data.get("duration", {}).get("totalMilliseconds")
|
180
|
+
duration_seconds = duration_ms // 1000 if duration_ms else None
|
181
|
+
duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
|
182
|
+
|
183
|
+
# Extract cover art
|
184
|
+
cover_url = ""
|
185
|
+
cover_sources = album_data.get("coverArt", {}).get("sources", [])
|
186
|
+
|
187
|
+
if cover_sources:
|
188
|
+
largest = max(
|
189
|
+
cover_sources,
|
190
|
+
key=lambda x: max(x.get("width", 0), x.get("height", 0))
|
191
|
+
)
|
192
|
+
cover_url = largest.get("url", "")
|
193
|
+
|
194
|
+
return {
|
195
|
+
'title': title,
|
196
|
+
'artist': artist,
|
197
|
+
'album': album,
|
198
|
+
'year': year,
|
199
|
+
'duration_seconds': duration_seconds,
|
200
|
+
'duration_formatted': duration_formatted,
|
201
|
+
'cover_url': cover_url
|
202
|
+
}
|
203
|
+
|
204
|
+
except Exception as e:
|
205
|
+
console.print(f"[cyan]Error parsing Spotify response: {e}")
|
206
|
+
return {}
|
207
|
+
|
208
|
+
def _format_seconds(self, seconds: int) -> str:
|
209
|
+
"""Formats seconds into mm:ss or hh:mm:ss"""
|
210
|
+
if seconds < 3600:
|
211
|
+
minutes = seconds // 60
|
212
|
+
secs = seconds % 60
|
213
|
+
return f"{minutes}:{secs:02d}"
|
214
|
+
|
215
|
+
else:
|
216
|
+
hours = seconds // 3600
|
217
|
+
minutes = (seconds % 3600) // 60
|
218
|
+
secs = seconds % 60
|
219
|
+
return f"{hours}:{minutes:02d}:{secs:02d}"
|
220
|
+
|
221
|
+
def extract_playlist_tracks(self, playlist_url: str) -> List[Dict]:
|
222
|
+
"""Extracts all tracks from a Spotify playlist URL"""
|
223
|
+
self.total_songs = None
|
224
|
+
self.playlist_items = []
|
225
|
+
console.print("[cyan]Extracting playlist tracks...")
|
226
|
+
|
227
|
+
try:
|
228
|
+
def handle_request(response):
|
229
|
+
try:
|
230
|
+
if "pathfinder/v2/query" in response.url and response.request.method == "POST":
|
231
|
+
json_data = response.json()
|
232
|
+
if (
|
233
|
+
"data" in json_data and
|
234
|
+
"playlistV2" in json_data["data"] and
|
235
|
+
"content" in json_data["data"]["playlistV2"]
|
236
|
+
):
|
237
|
+
if self.total_songs is None:
|
238
|
+
self.total_songs = json_data["data"]["playlistV2"]["content"].get("totalCount", 0)
|
239
|
+
items = json_data["data"]["playlistV2"]["content"].get("items", [])
|
240
|
+
for item in items:
|
241
|
+
parsed_item = self._parse_spotify_playlist_item(item)
|
242
|
+
if parsed_item:
|
243
|
+
self.playlist_items.append(parsed_item)
|
244
|
+
except Exception as e:
|
245
|
+
console.print(f"Error processing request: {e}")
|
246
|
+
|
247
|
+
self.page.on("response", handle_request)
|
248
|
+
self.page.goto(playlist_url)
|
249
|
+
self.page.wait_for_timeout(5000)
|
250
|
+
|
251
|
+
if self.total_songs is None:
|
252
|
+
console.print("Error: Could not extract the total number of songs.")
|
253
|
+
return []
|
254
|
+
|
255
|
+
console.print(f"[cyan]The playlist has [green]{self.total_songs}[/green] tracks")
|
256
|
+
|
257
|
+
try:
|
258
|
+
self.page.wait_for_selector('div[data-testid="playlist-tracklist"]', timeout=15000)
|
259
|
+
except Exception:
|
260
|
+
console.print("Error: Playlist table did not load")
|
261
|
+
return []
|
262
|
+
|
263
|
+
last_item_count = len(self.playlist_items)
|
264
|
+
with console.status("[cyan]Loading tracks...") as status:
|
265
|
+
while len(self.playlist_items) < self.total_songs:
|
266
|
+
status.update(f"[cyan]Progress: {len(self.playlist_items)}/{self.total_songs} tracks loaded")
|
267
|
+
rows = self.page.locator('div[role="row"]')
|
268
|
+
row_count = rows.count()
|
269
|
+
last_row = rows.nth(row_count - 1)
|
270
|
+
last_row.scroll_into_view_if_needed()
|
271
|
+
current_items = len(self.playlist_items)
|
272
|
+
if current_items > last_item_count:
|
273
|
+
last_item_count = current_items
|
274
|
+
self.page.wait_for_timeout(300)
|
275
|
+
|
276
|
+
# Remove duplicates based on title and artist
|
277
|
+
unique = {}
|
278
|
+
for item in self.playlist_items:
|
279
|
+
key = (item.get("title", ""), item.get("artist", ""))
|
280
|
+
if key not in unique:
|
281
|
+
unique[key] = item
|
282
|
+
|
283
|
+
unique_tracks = list(unique.values())
|
284
|
+
return unique_tracks
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
console.print(f"Error extracting playlist: {e}")
|
288
|
+
return []
|
289
|
+
|
290
|
+
def _parse_spotify_playlist_item(self, item: Dict) -> Dict:
|
291
|
+
"""Parses a single playlist item from Spotify API response"""
|
292
|
+
try:
|
293
|
+
# Extract added date
|
294
|
+
added_at = item.get("addedAt", {}).get("isoString", "")
|
295
|
+
|
296
|
+
# Extract track data
|
297
|
+
track_data = item.get("itemV2", {}).get("data", {})
|
298
|
+
|
299
|
+
# Extract album name
|
300
|
+
album_data = track_data.get("albumOfTrack", {})
|
301
|
+
album_name = album_data.get("name", "")
|
302
|
+
|
303
|
+
# Extract cover art URL
|
304
|
+
cover_art = album_data.get("coverArt", {}).get("sources", [{}])[0].get("url", "")
|
305
|
+
|
306
|
+
# Extract artist name
|
307
|
+
artist_items = album_data.get("artists", {}).get("items", [])
|
308
|
+
artist_name = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
|
309
|
+
|
310
|
+
# Extract track title
|
311
|
+
track_title = track_data.get("name", "")
|
312
|
+
|
313
|
+
# Extract duration in ms
|
314
|
+
duration_ms = track_data.get("trackDuration", {}).get("totalMilliseconds", 0)
|
315
|
+
|
316
|
+
# Extract play count
|
317
|
+
play_count = track_data.get("playcount", 0)
|
318
|
+
|
319
|
+
return {
|
320
|
+
"title": track_title,
|
321
|
+
"artist": artist_name,
|
322
|
+
"album": album_name,
|
323
|
+
"added_at": added_at,
|
324
|
+
"cover_art": cover_art,
|
325
|
+
"duration_ms": duration_ms,
|
326
|
+
"play_count": play_count
|
327
|
+
}
|
328
|
+
|
329
|
+
except Exception as e:
|
330
|
+
console.print(f"Error parsing playlist item: {e}")
|
331
|
+
return {}
|