SpotDown 0.0.1__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
SpotDown/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,131 @@
1
+ # 05.04.2024
2
+
3
+ import io
4
+ import subprocess
5
+ from typing import Dict
6
+
7
+
8
+ # External imports
9
+ import httpx
10
+ from PIL import Image
11
+ from rich.progress import Progress, SpinnerColumn, TextColumn
12
+ from rich.console import Console
13
+
14
+
15
+ # Internal utils
16
+ from SpotDown.utils.config_json import config_manager
17
+ from SpotDown.utils.file_utils import FileUtils
18
+
19
+
20
+ # Variable
21
+ quality = config_manager.get("DOWNLOAD", "quality")
22
+
23
+
24
+ class YouTubeDownloader:
25
+ def __init__(self):
26
+ self.console = Console()
27
+ self.file_utils = FileUtils()
28
+
29
+ def download(self, video_info: Dict, spotify_info: Dict) -> bool:
30
+ """
31
+ Download YouTube video as mp3 320kbps
32
+
33
+ Args:
34
+ video_info (Dict): YouTube video info
35
+ spotify_info (Dict): Spotify track info
36
+
37
+ Returns:
38
+ bool: True if download succeeded
39
+ """
40
+ try:
41
+ music_folder = self.file_utils.get_music_folder()
42
+ filename = self.file_utils.create_filename(
43
+ spotify_info.get('artist', 'Unknown Artist'),
44
+ spotify_info.get('title', video_info.get('title', 'Unknown Title'))
45
+ )
46
+ output_path = music_folder / f"{filename}.%(ext)s"
47
+
48
+ # Download cover image if available
49
+ cover_path = None
50
+ cover_url = spotify_info.get('cover_url')
51
+ if cover_url:
52
+ try:
53
+ cover_path = music_folder / f"{filename}_cover.jpg"
54
+ with httpx.Client(timeout=10) as client:
55
+ resp = client.get(cover_url)
56
+ if resp.status_code == 200:
57
+
58
+ # Always save only as jpg
59
+ if resp.headers.get("content-type", "").endswith("webp") or cover_url.endswith(".webp"):
60
+ img = Image.open(io.BytesIO(resp.content)).convert("RGB")
61
+ img.save(cover_path, "JPEG")
62
+
63
+ else:
64
+ img = Image.open(io.BytesIO(resp.content)).convert("RGB")
65
+ img.save(cover_path, "JPEG")
66
+
67
+ self.console.print(f"[blue]Downloaded thumbnail: {cover_path}[/blue]")
68
+
69
+ else:
70
+ cover_path = None
71
+
72
+ except Exception as e:
73
+ self.console.print(f"[yellow]Unable to download cover: {e}[/yellow]")
74
+ cover_path = None
75
+
76
+ ytdlp_options = [
77
+ 'yt-dlp',
78
+ '--extract-audio',
79
+ '--audio-format', 'mp3',
80
+ '--audio-quality', quality,
81
+ '--output', str(output_path),
82
+ '--no-playlist',
83
+ '--embed-metadata',
84
+ '--add-metadata',
85
+ ]
86
+
87
+ if cover_path and cover_path.exists():
88
+ ytdlp_options += ['--embed-thumbnail']
89
+ ytdlp_options.append(video_info['url'])
90
+
91
+ with Progress(
92
+ SpinnerColumn(),
93
+ TextColumn("[progress.description]{task.description}"),
94
+ console=self.console
95
+ ) as progress:
96
+ task = progress.add_task("Downloading...", total=None)
97
+ process = subprocess.run(
98
+ ytdlp_options,
99
+ capture_output=True,
100
+ text=True
101
+ )
102
+ progress.remove_task(task)
103
+
104
+ if process.returncode == 0:
105
+
106
+ # Find the downloaded file
107
+ downloaded_files = list(music_folder.glob(f"{filename}.*"))
108
+ if downloaded_files:
109
+ self.console.print("[red]Download completed![/red]")
110
+
111
+ # Remove cover file after embedding
112
+ if cover_path and cover_path.exists():
113
+ try:
114
+ cover_path.unlink()
115
+ except Exception:
116
+ pass
117
+
118
+ return True
119
+
120
+ else:
121
+ self.console.print("[yellow]Download apparently succeeded but file not found[/yellow]")
122
+ return False
123
+
124
+ else:
125
+ self.console.print("[red]Download error:[/red]")
126
+ self.console.print(f"[red]{process.stderr}[/red]")
127
+ return False
128
+
129
+ except Exception as e:
130
+ self.console.print(f"[red]Error during download: {e}[/red]")
131
+ return False
@@ -0,0 +1,6 @@
1
+ # 05.04.2024
2
+
3
+ from .spotify_extractor import SpotifyExtractor
4
+ from .youtube_extractor import YouTubeExtractor
5
+
6
+ __all__ = ['SpotifyExtractor', 'YouTubeExtractor']
@@ -0,0 +1,331 @@
1
+ # 05.04.2024
2
+
3
+ import os
4
+ import json
5
+ import logging
6
+ from typing import Dict, List, Optional
7
+
8
+
9
+ # External imports
10
+ from rich.console import Console
11
+ from playwright.sync_api import sync_playwright
12
+
13
+
14
+ # Internal utils
15
+ from SpotDown.utils.headers import get_userAgent
16
+ from SpotDown.utils.config_json import config_manager
17
+
18
+
19
+ # Variable
20
+ console = Console()
21
+ headless = config_manager.get("BROWSER", "headless")
22
+ timeout = config_manager.get("BROWSER", "timeout")
23
+
24
+
25
+ class SpotifyExtractor:
26
+ def __init__(self):
27
+ self.playwright = None
28
+ self.browser = None
29
+ self.context = None
30
+ self.page = None
31
+ self.user_agent = get_userAgent()
32
+ self.total_songs = None
33
+ self.playlist_items = []
34
+
35
+ def __enter__(self):
36
+ """Context manager to automatically handle the browser"""
37
+ self.playwright = sync_playwright().start()
38
+ self.browser = self.playwright.chromium.launch(headless=headless)
39
+ self.context = self.browser.new_context(
40
+ user_agent=self.user_agent, viewport={'width': 1280, 'height': 800}, ignore_https_errors=True
41
+ )
42
+ self.page = self.context.new_page()
43
+ return self
44
+
45
+ def __exit__(self, exc_type, exc_val, exc_tb):
46
+ """Automatically closes the browser"""
47
+ if self.browser:
48
+ self.browser.close()
49
+ if self.playwright:
50
+ self.playwright.stop()
51
+
52
+ def extract_track_info(self, spotify_url: str, save_json: bool = False) -> Optional[Dict]:
53
+ """
54
+ Extracts track information from a Spotify URL
55
+
56
+ Args:
57
+ spotify_url (str): Spotify URL of the track
58
+ save_json (bool): If True, saves the raw Spotify API JSON response in the 'log' folder
59
+
60
+ Returns:
61
+ Dict: Track information or None if an error occurs
62
+ """
63
+ try:
64
+ console.print("[cyan]Analyzing Spotify URL ...")
65
+
66
+ # Extract Spotify data by intercepting API calls
67
+ spotify_data, raw_json = self._extract_spotify_data(spotify_url, return_raw=True)
68
+
69
+ if not spotify_data:
70
+ console.print("[cyan]Unable to extract data from Spotify")
71
+ return None
72
+
73
+ # Save the JSON response if requested
74
+ if save_json and raw_json:
75
+ try:
76
+ log_dir = os.path.join(os.getcwd(), "log")
77
+ os.makedirs(log_dir, exist_ok=True)
78
+
79
+ # Use title and artist for the filename if available
80
+ filename = "spotify_response.json"
81
+
82
+ if spotify_data.get("artist") and spotify_data.get("title"):
83
+ safe_artist = "".join(c for c in spotify_data["artist"] if c.isalnum() or c in " _-")
84
+ safe_title = "".join(c for c in spotify_data["title"] if c.isalnum() or c in " _-")
85
+ filename = f"{safe_artist} - {safe_title}.json"
86
+
87
+ filepath = os.path.join(log_dir, filename)
88
+ with open(filepath, "w", encoding="utf-8") as f:
89
+ json.dump(raw_json, f, ensure_ascii=False, indent=2)
90
+
91
+ console.print(f"[green]Spotify API response saved to {filepath}")
92
+
93
+ except Exception as e:
94
+ console.print(f"[yellow]Warning: Could not save JSON file: {e}")
95
+
96
+ console.print(f"[cyan]Found: [red]{spotify_data['artist']} - {spotify_data['title']}[/red]")
97
+ return spotify_data
98
+
99
+ except Exception as e:
100
+ console.print(f"[cyan]Spotify extraction error: {e}")
101
+ return None
102
+
103
+ def _extract_spotify_data(self, spotify_url: str, return_raw: bool = False) -> Optional[Dict]:
104
+ """Extracts Spotify data by intercepting API calls"""
105
+ try:
106
+ api_responses = []
107
+
108
+ def handle_request(request):
109
+ if (request.method == "POST" and "/pathfinder/v2/query" in request.url):
110
+ try:
111
+ response = request.response()
112
+ if response and response.status == 200:
113
+ try:
114
+ response_data = response.json()
115
+
116
+ if self._is_valid_track_data(response_data):
117
+ api_responses.append(response_data)
118
+ console.print("[green]Valid API response found")
119
+
120
+ except Exception as e:
121
+ logging.warning(f"Error parsing API response: {e}")
122
+
123
+ except Exception as e:
124
+ logging.warning(f"Error accessing response: {e}")
125
+
126
+ self.page.on("requestfinished", handle_request)
127
+ self.page.goto(spotify_url)
128
+
129
+ # Poll every 100ms, stop waiting as soon as a valid response is found or after 10 seconds
130
+ # This avoids unnecessary waiting after a valid API response is received
131
+ for _ in range(timeout * 10): # 100 * 100ms = 10000ms (10 seconds max)
132
+ if api_responses:
133
+ break
134
+
135
+ self.page.wait_for_timeout(timeout * 10)
136
+
137
+ if not api_responses:
138
+ console.print("[cyan]No valid API responses found")
139
+ return (None, None) if return_raw else None
140
+
141
+ # Selects the most complete response
142
+ best_response = max(api_responses, key=lambda x: len(json.dumps(x)))
143
+ parsed = self._parse_spotify_response(best_response)
144
+ return (parsed, best_response) if return_raw else parsed
145
+
146
+ except Exception as e:
147
+ console.print(f"[cyan]❌ Spotify data extraction error: {e}")
148
+ return (None, None) if return_raw else None
149
+
150
+ def _is_valid_track_data(self, data: Dict) -> bool:
151
+ """Checks if the data contains valid track information"""
152
+ try:
153
+ track_union = data.get("data", {}).get("trackUnion", {})
154
+ return bool(track_union.get("name") and track_union.get("firstArtist", {}).get("items"))
155
+
156
+ except Exception:
157
+ return False
158
+
159
+ def _parse_spotify_response(self, response: Dict) -> Dict:
160
+ """Parses the Spotify API response"""
161
+ try:
162
+ # Extract title
163
+ track_data = response.get("data", {}).get("trackUnion", {})
164
+ title = track_data.get("name", "").strip()
165
+
166
+ # Extract artist
167
+ artist_items = track_data.get("firstArtist", {}).get("items", [])
168
+ artist = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
169
+
170
+ # Extract album
171
+ album_data = track_data.get("albumOfTrack", {})
172
+ album = album_data.get("name", "")
173
+
174
+ # Extract year
175
+ release_date = album_data.get("date", {})
176
+ year = release_date.get("year") if release_date else None
177
+
178
+ # Extract duration
179
+ duration_ms = track_data.get("duration", {}).get("totalMilliseconds")
180
+ duration_seconds = duration_ms // 1000 if duration_ms else None
181
+ duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
182
+
183
+ # Extract cover art
184
+ cover_url = ""
185
+ cover_sources = album_data.get("coverArt", {}).get("sources", [])
186
+
187
+ if cover_sources:
188
+ largest = max(
189
+ cover_sources,
190
+ key=lambda x: max(x.get("width", 0), x.get("height", 0))
191
+ )
192
+ cover_url = largest.get("url", "")
193
+
194
+ return {
195
+ 'title': title,
196
+ 'artist': artist,
197
+ 'album': album,
198
+ 'year': year,
199
+ 'duration_seconds': duration_seconds,
200
+ 'duration_formatted': duration_formatted,
201
+ 'cover_url': cover_url
202
+ }
203
+
204
+ except Exception as e:
205
+ console.print(f"[cyan]Error parsing Spotify response: {e}")
206
+ return {}
207
+
208
+ def _format_seconds(self, seconds: int) -> str:
209
+ """Formats seconds into mm:ss or hh:mm:ss"""
210
+ if seconds < 3600:
211
+ minutes = seconds // 60
212
+ secs = seconds % 60
213
+ return f"{minutes}:{secs:02d}"
214
+
215
+ else:
216
+ hours = seconds // 3600
217
+ minutes = (seconds % 3600) // 60
218
+ secs = seconds % 60
219
+ return f"{hours}:{minutes:02d}:{secs:02d}"
220
+
221
+ def extract_playlist_tracks(self, playlist_url: str) -> List[Dict]:
222
+ """Extracts all tracks from a Spotify playlist URL"""
223
+ self.total_songs = None
224
+ self.playlist_items = []
225
+ console.print("[cyan]Extracting playlist tracks...")
226
+
227
+ try:
228
+ def handle_request(response):
229
+ try:
230
+ if "pathfinder/v2/query" in response.url and response.request.method == "POST":
231
+ json_data = response.json()
232
+ if (
233
+ "data" in json_data and
234
+ "playlistV2" in json_data["data"] and
235
+ "content" in json_data["data"]["playlistV2"]
236
+ ):
237
+ if self.total_songs is None:
238
+ self.total_songs = json_data["data"]["playlistV2"]["content"].get("totalCount", 0)
239
+ items = json_data["data"]["playlistV2"]["content"].get("items", [])
240
+ for item in items:
241
+ parsed_item = self._parse_spotify_playlist_item(item)
242
+ if parsed_item:
243
+ self.playlist_items.append(parsed_item)
244
+ except Exception as e:
245
+ console.print(f"Error processing request: {e}")
246
+
247
+ self.page.on("response", handle_request)
248
+ self.page.goto(playlist_url)
249
+ self.page.wait_for_timeout(5000)
250
+
251
+ if self.total_songs is None:
252
+ console.print("Error: Could not extract the total number of songs.")
253
+ return []
254
+
255
+ console.print(f"[cyan]The playlist has [green]{self.total_songs}[/green] tracks")
256
+
257
+ try:
258
+ self.page.wait_for_selector('div[data-testid="playlist-tracklist"]', timeout=15000)
259
+ except Exception:
260
+ console.print("Error: Playlist table did not load")
261
+ return []
262
+
263
+ last_item_count = len(self.playlist_items)
264
+ with console.status("[cyan]Loading tracks...") as status:
265
+ while len(self.playlist_items) < self.total_songs:
266
+ status.update(f"[cyan]Progress: {len(self.playlist_items)}/{self.total_songs} tracks loaded")
267
+ rows = self.page.locator('div[role="row"]')
268
+ row_count = rows.count()
269
+ last_row = rows.nth(row_count - 1)
270
+ last_row.scroll_into_view_if_needed()
271
+ current_items = len(self.playlist_items)
272
+ if current_items > last_item_count:
273
+ last_item_count = current_items
274
+ self.page.wait_for_timeout(300)
275
+
276
+ # Remove duplicates based on title and artist
277
+ unique = {}
278
+ for item in self.playlist_items:
279
+ key = (item.get("title", ""), item.get("artist", ""))
280
+ if key not in unique:
281
+ unique[key] = item
282
+
283
+ unique_tracks = list(unique.values())
284
+ return unique_tracks
285
+
286
+ except Exception as e:
287
+ console.print(f"Error extracting playlist: {e}")
288
+ return []
289
+
290
+ def _parse_spotify_playlist_item(self, item: Dict) -> Dict:
291
+ """Parses a single playlist item from Spotify API response"""
292
+ try:
293
+ # Extract added date
294
+ added_at = item.get("addedAt", {}).get("isoString", "")
295
+
296
+ # Extract track data
297
+ track_data = item.get("itemV2", {}).get("data", {})
298
+
299
+ # Extract album name
300
+ album_data = track_data.get("albumOfTrack", {})
301
+ album_name = album_data.get("name", "")
302
+
303
+ # Extract cover art URL
304
+ cover_art = album_data.get("coverArt", {}).get("sources", [{}])[0].get("url", "")
305
+
306
+ # Extract artist name
307
+ artist_items = album_data.get("artists", {}).get("items", [])
308
+ artist_name = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
309
+
310
+ # Extract track title
311
+ track_title = track_data.get("name", "")
312
+
313
+ # Extract duration in ms
314
+ duration_ms = track_data.get("trackDuration", {}).get("totalMilliseconds", 0)
315
+
316
+ # Extract play count
317
+ play_count = track_data.get("playcount", 0)
318
+
319
+ return {
320
+ "title": track_title,
321
+ "artist": artist_name,
322
+ "album": album_name,
323
+ "added_at": added_at,
324
+ "cover_art": cover_art,
325
+ "duration_ms": duration_ms,
326
+ "play_count": play_count
327
+ }
328
+
329
+ except Exception as e:
330
+ console.print(f"Error parsing playlist item: {e}")
331
+ return {}