SpotDown 0.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  # 05.04.2024
2
2
 
3
3
  import io
4
+ import logging
4
5
  import subprocess
5
6
  from typing import Dict
6
7
 
@@ -14,7 +15,7 @@ from rich.console import Console
14
15
 
15
16
  # Internal utils
16
17
  from SpotDown.utils.config_json import config_manager
17
- from SpotDown.utils.file_utils import FileUtils
18
+ from SpotDown.utils.file_utils import file_utils
18
19
 
19
20
 
20
21
  # Variable
@@ -24,7 +25,7 @@ quality = config_manager.get("DOWNLOAD", "quality")
24
25
  class YouTubeDownloader:
25
26
  def __init__(self):
26
27
  self.console = Console()
27
- self.file_utils = FileUtils()
28
+ self.file_utils = file_utils
28
29
 
29
30
  def download(self, video_info: Dict, spotify_info: Dict) -> bool:
30
31
  """
@@ -44,6 +45,7 @@ class YouTubeDownloader:
44
45
  spotify_info.get('title', video_info.get('title', 'Unknown Title'))
45
46
  )
46
47
  output_path = music_folder / f"{filename}.%(ext)s"
48
+ logging.info(f"Start download: {video_info.get('url')} as {output_path}")
47
49
 
48
50
  # Download cover image if available
49
51
  cover_path = None
@@ -65,12 +67,15 @@ class YouTubeDownloader:
65
67
  img.save(cover_path, "JPEG")
66
68
 
67
69
  self.console.print(f"[blue]Downloaded thumbnail: {cover_path}[/blue]")
70
+ logging.info(f"Downloaded thumbnail: {cover_path}")
68
71
 
69
72
  else:
70
73
  cover_path = None
74
+ logging.warning(f"Failed to download cover image, status code: {resp.status_code}")
71
75
 
72
76
  except Exception as e:
73
77
  self.console.print(f"[yellow]Unable to download cover: {e}[/yellow]")
78
+ logging.error(f"Unable to download cover: {e}")
74
79
  cover_path = None
75
80
 
76
81
  ytdlp_options = [
@@ -82,6 +87,7 @@ class YouTubeDownloader:
82
87
  '--no-playlist',
83
88
  '--embed-metadata',
84
89
  '--add-metadata',
90
+ '--ffmpeg-location', self.file_utils.ffmpeg_path
85
91
  ]
86
92
 
87
93
  if cover_path and cover_path.exists():
@@ -94,6 +100,7 @@ class YouTubeDownloader:
94
100
  console=self.console
95
101
  ) as progress:
96
102
  task = progress.add_task("Downloading...", total=None)
103
+ logging.info(f"Running yt-dlp with options: {ytdlp_options}")
97
104
  process = subprocess.run(
98
105
  ytdlp_options,
99
106
  capture_output=True,
@@ -102,30 +109,37 @@ class YouTubeDownloader:
102
109
  progress.remove_task(task)
103
110
 
104
111
  if process.returncode == 0:
112
+ logging.info("yt-dlp finished successfully")
105
113
 
106
114
  # Find the downloaded file
107
115
  downloaded_files = list(music_folder.glob(f"{filename}.*"))
108
116
  if downloaded_files:
109
117
  self.console.print("[red]Download completed![/red]")
118
+ logging.info(f"Download completed: {downloaded_files[0]}")
110
119
 
111
120
  # Remove cover file after embedding
112
121
  if cover_path and cover_path.exists():
113
122
  try:
114
123
  cover_path.unlink()
115
- except Exception:
116
- pass
117
-
124
+ logging.info(f"Removed temporary cover file: {cover_path}")
125
+
126
+ except Exception as ex:
127
+ logging.warning(f"Failed to remove cover file: {ex}")
128
+
118
129
  return True
119
130
 
120
131
  else:
121
132
  self.console.print("[yellow]Download apparently succeeded but file not found[/yellow]")
133
+ logging.error("Download apparently succeeded but file not found")
122
134
  return False
123
-
135
+
124
136
  else:
125
137
  self.console.print("[red]Download error:[/red]")
126
138
  self.console.print(f"[red]{process.stderr}[/red]")
139
+ logging.error(f"yt-dlp error: {process.stderr}")
127
140
  return False
128
141
 
129
142
  except Exception as e:
130
143
  self.console.print(f"[red]Error during download: {e}[/red]")
144
+ logging.error(f"Error during download: {e}")
131
145
  return False
@@ -1,331 +1,218 @@
1
1
  # 05.04.2024
2
2
 
3
3
  import os
4
+ import re
5
+ import sys
4
6
  import json
5
7
  import logging
6
8
  from typing import Dict, List, Optional
9
+ from dotenv import load_dotenv
7
10
 
8
11
 
9
- # External imports
12
+ # External library
13
+ import spotipy
14
+ from spotipy.oauth2 import SpotifyClientCredentials
10
15
  from rich.console import Console
11
- from playwright.sync_api import sync_playwright
12
-
13
-
14
- # Internal utils
15
- from SpotDown.utils.headers import get_userAgent
16
- from SpotDown.utils.config_json import config_manager
16
+ from rich.progress import Progress
17
17
 
18
18
 
19
19
  # Variable
20
20
  console = Console()
21
- headless = config_manager.get("BROWSER", "headless")
22
- timeout = config_manager.get("BROWSER", "timeout")
21
+ load_dotenv()
22
+
23
+
24
+ def extract_track_id(spotify_url):
25
+ patterns = [
26
+ r'track/([a-zA-Z0-9]{22})',
27
+ r'spotify:track:([a-zA-Z0-9]{22})'
28
+ ]
29
+ for pattern in patterns:
30
+ match = re.search(pattern, spotify_url)
31
+ if match:
32
+ return match.group(1)
33
+ return None
34
+
35
+
36
+ def extract_playlist_id(spotify_url):
37
+ patterns = [
38
+ r'playlist/([a-zA-Z0-9]{22})',
39
+ r'spotify:playlist:([a-zA-Z0-9]{22})'
40
+ ]
41
+ for pattern in patterns:
42
+ match = re.search(pattern, spotify_url)
43
+ if match:
44
+ return match.group(1)
45
+ return None
23
46
 
24
47
 
25
48
  class SpotifyExtractor:
26
49
  def __init__(self):
27
- self.playwright = None
28
- self.browser = None
29
- self.context = None
30
- self.page = None
31
- self.user_agent = get_userAgent()
32
- self.total_songs = None
33
- self.playlist_items = []
50
+ client_id = os.getenv("SPOTIPY_CLIENT_ID")
51
+ client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")
52
+
53
+ if not client_id or not client_secret:
54
+ console.print("[red]Missing Spotify credentials. Please create a .env file with SPOTIFY_CLIENT_ID and SPOTIPY_CLIENT_SECRET from https://developer.spotify.com/dashboard/")
55
+ sys.exit(1)
56
+
57
+ self.sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
58
+ client_id=client_id,
59
+ client_secret=client_secret
60
+ ))
61
+ logging.info("SpotifyExtractor initialized")
34
62
 
35
63
  def __enter__(self):
36
- """Context manager to automatically handle the browser"""
37
- self.playwright = sync_playwright().start()
38
- self.browser = self.playwright.chromium.launch(headless=headless)
39
- self.context = self.browser.new_context(
40
- user_agent=self.user_agent, viewport={'width': 1280, 'height': 800}, ignore_https_errors=True
41
- )
42
- self.page = self.context.new_page()
43
64
  return self
44
65
 
45
66
  def __exit__(self, exc_type, exc_val, exc_tb):
46
- """Automatically closes the browser"""
47
- if self.browser:
48
- self.browser.close()
49
- if self.playwright:
50
- self.playwright.stop()
67
+ pass
51
68
 
52
69
  def extract_track_info(self, spotify_url: str, save_json: bool = False) -> Optional[Dict]:
53
- """
54
- Extracts track information from a Spotify URL
70
+ track_id = extract_track_id(spotify_url)
71
+ if not track_id:
72
+ logging.error("Invalid Spotify track URL")
73
+ return None
55
74
 
56
- Args:
57
- spotify_url (str): Spotify URL of the track
58
- save_json (bool): If True, saves the raw Spotify API JSON response in the 'log' folder
59
-
60
- Returns:
61
- Dict: Track information or None if an error occurs
62
- """
63
75
  try:
64
- console.print("[cyan]Analyzing Spotify URL ...")
65
-
66
- # Extract Spotify data by intercepting API calls
67
- spotify_data, raw_json = self._extract_spotify_data(spotify_url, return_raw=True)
68
-
69
- if not spotify_data:
70
- console.print("[cyan]Unable to extract data from Spotify")
71
- return None
72
-
73
- # Save the JSON response if requested
74
- if save_json and raw_json:
75
- try:
76
- log_dir = os.path.join(os.getcwd(), "log")
77
- os.makedirs(log_dir, exist_ok=True)
76
+ # Extract track info
77
+ track = self.sp.track(track_id)
78
78
 
79
- # Use title and artist for the filename if available
80
- filename = "spotify_response.json"
79
+ # Extract album info
80
+ album = track['album']
81
81
 
82
- if spotify_data.get("artist") and spotify_data.get("title"):
83
- safe_artist = "".join(c for c in spotify_data["artist"] if c.isalnum() or c in " _-")
84
- safe_title = "".join(c for c in spotify_data["title"] if c.isalnum() or c in " _-")
85
- filename = f"{safe_artist} - {safe_title}.json"
82
+ # Process extracted data
83
+ release_date = album['release_date']
84
+ year = release_date.split('-')[0] if release_date else None
86
85
 
87
- filepath = os.path.join(log_dir, filename)
88
- with open(filepath, "w", encoding="utf-8") as f:
89
- json.dump(raw_json, f, ensure_ascii=False, indent=2)
90
-
91
- console.print(f"[green]Spotify API response saved to {filepath}")
92
-
93
- except Exception as e:
94
- console.print(f"[yellow]Warning: Could not save JSON file: {e}")
95
-
96
- console.print(f"[cyan]Found: [red]{spotify_data['artist']} - {spotify_data['title']}[/red]")
97
- return spotify_data
98
-
99
- except Exception as e:
100
- console.print(f"[cyan]Spotify extraction error: {e}")
101
- return None
86
+ # Extract duration in seconds and formatted
87
+ duration_ms = track['duration_ms']
88
+ duration_seconds = duration_ms // 1000 if duration_ms else None
89
+ duration_formatted = f"{duration_seconds // 60}:{duration_seconds % 60:02d}" if duration_seconds else None
102
90
 
103
- def _extract_spotify_data(self, spotify_url: str, return_raw: bool = False) -> Optional[Dict]:
104
- """Extracts Spotify data by intercepting API calls"""
105
- try:
106
- api_responses = []
107
-
108
- def handle_request(request):
109
- if (request.method == "POST" and "/pathfinder/v2/query" in request.url):
110
- try:
111
- response = request.response()
112
- if response and response.status == 200:
113
- try:
114
- response_data = response.json()
115
-
116
- if self._is_valid_track_data(response_data):
117
- api_responses.append(response_data)
118
- console.print("[green]Valid API response found")
119
-
120
- except Exception as e:
121
- logging.warning(f"Error parsing API response: {e}")
122
-
123
- except Exception as e:
124
- logging.warning(f"Error accessing response: {e}")
125
-
126
- self.page.on("requestfinished", handle_request)
127
- self.page.goto(spotify_url)
128
-
129
- # Poll every 100ms, stop waiting as soon as a valid response is found or after 10 seconds
130
- # This avoids unnecessary waiting after a valid API response is received
131
- for _ in range(timeout * 10): # 100 * 100ms = 10000ms (10 seconds max)
132
- if api_responses:
133
- break
134
-
135
- self.page.wait_for_timeout(timeout * 10)
136
-
137
- if not api_responses:
138
- console.print("[cyan]No valid API responses found")
139
- return (None, None) if return_raw else None
140
-
141
- # Selects the most complete response
142
- best_response = max(api_responses, key=lambda x: len(json.dumps(x)))
143
- parsed = self._parse_spotify_response(best_response)
144
- return (parsed, best_response) if return_raw else parsed
91
+ # Extract cover URL
92
+ cover_url = album['images'][0]['url'] if album['images'] else None
145
93
 
146
- except Exception as e:
147
- console.print(f"[cyan] Spotify data extraction error: {e}")
148
- return (None, None) if return_raw else None
94
+ # Extract artists
95
+ artists = [artist['name'] for artist in track['artists']]
149
96
 
150
- def _is_valid_track_data(self, data: Dict) -> bool:
151
- """Checks if the data contains valid track information"""
152
- try:
153
- track_union = data.get("data", {}).get("trackUnion", {})
154
- return bool(track_union.get("name") and track_union.get("firstArtist", {}).get("items"))
155
-
156
- except Exception:
157
- return False
158
-
159
- def _parse_spotify_response(self, response: Dict) -> Dict:
160
- """Parses the Spotify API response"""
161
- try:
162
- # Extract title
163
- track_data = response.get("data", {}).get("trackUnion", {})
164
- title = track_data.get("name", "").strip()
165
-
166
- # Extract artist
167
- artist_items = track_data.get("firstArtist", {}).get("items", [])
168
- artist = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
169
-
170
- # Extract album
171
- album_data = track_data.get("albumOfTrack", {})
172
- album = album_data.get("name", "")
173
-
174
- # Extract year
175
- release_date = album_data.get("date", {})
176
- year = release_date.get("year") if release_date else None
177
-
178
- # Extract duration
179
- duration_ms = track_data.get("duration", {}).get("totalMilliseconds")
180
- duration_seconds = duration_ms // 1000 if duration_ms else None
181
- duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
182
-
183
- # Extract cover art
184
- cover_url = ""
185
- cover_sources = album_data.get("coverArt", {}).get("sources", [])
186
-
187
- if cover_sources:
188
- largest = max(
189
- cover_sources,
190
- key=lambda x: max(x.get("width", 0), x.get("height", 0))
191
- )
192
- cover_url = largest.get("url", "")
193
-
194
- return {
195
- 'title': title,
196
- 'artist': artist,
197
- 'album': album,
97
+ # Compile track info
98
+ track_info = {
99
+ 'artist': ', '.join(artists),
100
+ 'title': track['name'],
101
+ 'album': album['name'],
198
102
  'year': year,
199
103
  'duration_seconds': duration_seconds,
200
104
  'duration_formatted': duration_formatted,
201
105
  'cover_url': cover_url
202
106
  }
203
107
 
108
+ if save_json:
109
+ log_dir = os.path.join(os.getcwd(), "log")
110
+ os.makedirs(log_dir, exist_ok=True)
111
+
112
+ # Create JSON file for track info
113
+ filename = f"{track_info['artist']} - {track_info['title']}.json"
114
+ filepath = os.path.join(log_dir, filename)
115
+
116
+ # Save track info to JSON
117
+ with open(filepath, "w", encoding="utf-8") as f:
118
+ json.dump(track_info, f, ensure_ascii=False, indent=2)
119
+
120
+ return track_info
204
121
  except Exception as e:
205
- console.print(f"[cyan]Error parsing Spotify response: {e}")
206
- return {}
207
-
208
- def _format_seconds(self, seconds: int) -> str:
209
- """Formats seconds into mm:ss or hh:mm:ss"""
210
- if seconds < 3600:
211
- minutes = seconds // 60
212
- secs = seconds % 60
213
- return f"{minutes}:{secs:02d}"
214
-
215
- else:
216
- hours = seconds // 3600
217
- minutes = (seconds % 3600) // 60
218
- secs = seconds % 60
219
- return f"{hours}:{minutes:02d}:{secs:02d}"
122
+ error_msg = str(e)
123
+ logging.error(f"Spotify extraction error: {error_msg}")
124
+
125
+ if "invalid_client" in error_msg:
126
+ console.print("[red]Spotify credentials are invalid. Please check your .env file and obtain valid credentials from https://developer.spotify.com/dashboard/. Exiting.")
127
+ sys.exit(0)
128
+
129
+ return None
220
130
 
221
131
  def extract_playlist_tracks(self, playlist_url: str) -> List[Dict]:
222
- """Extracts all tracks from a Spotify playlist URL"""
223
- self.total_songs = None
224
- self.playlist_items = []
225
- console.print("[cyan]Extracting playlist tracks...")
132
+ playlist_id = extract_playlist_id(playlist_url)
226
133
 
134
+ if not playlist_id:
135
+ logging.error("Invalid Spotify playlist URL")
136
+ return []
137
+
227
138
  try:
228
- def handle_request(response):
229
- try:
230
- if "pathfinder/v2/query" in response.url and response.request.method == "POST":
231
- json_data = response.json()
232
- if (
233
- "data" in json_data and
234
- "playlistV2" in json_data["data"] and
235
- "content" in json_data["data"]["playlistV2"]
236
- ):
237
- if self.total_songs is None:
238
- self.total_songs = json_data["data"]["playlistV2"]["content"].get("totalCount", 0)
239
- items = json_data["data"]["playlistV2"]["content"].get("items", [])
240
- for item in items:
241
- parsed_item = self._parse_spotify_playlist_item(item)
242
- if parsed_item:
243
- self.playlist_items.append(parsed_item)
244
- except Exception as e:
245
- console.print(f"Error processing request: {e}")
246
-
247
- self.page.on("response", handle_request)
248
- self.page.goto(playlist_url)
249
- self.page.wait_for_timeout(5000)
250
-
251
- if self.total_songs is None:
252
- console.print("Error: Could not extract the total number of songs.")
253
- return []
254
-
255
- console.print(f"[cyan]The playlist has [green]{self.total_songs}[/green] tracks")
256
-
257
- try:
258
- self.page.wait_for_selector('div[data-testid="playlist-tracklist"]', timeout=15000)
259
- except Exception:
260
- console.print("Error: Playlist table did not load")
261
- return []
262
-
263
- last_item_count = len(self.playlist_items)
264
- with console.status("[cyan]Loading tracks...") as status:
265
- while len(self.playlist_items) < self.total_songs:
266
- status.update(f"[cyan]Progress: {len(self.playlist_items)}/{self.total_songs} tracks loaded")
267
- rows = self.page.locator('div[role="row"]')
268
- row_count = rows.count()
269
- last_row = rows.nth(row_count - 1)
270
- last_row.scroll_into_view_if_needed()
271
- current_items = len(self.playlist_items)
272
- if current_items > last_item_count:
273
- last_item_count = current_items
274
- self.page.wait_for_timeout(300)
139
+
140
+ # Extract playlist info
141
+ playlist = self.sp.playlist(playlist_id)
142
+ total_tracks = playlist['tracks']['total']
143
+ tracks_info = []
144
+ offset = 0
145
+ limit = 100
146
+ console.print(f"[green]Playlist has [red]{total_tracks}[/red] tracks.")
147
+
148
+ with Progress() as progress:
149
+ task = progress.add_task("[cyan]Extracting tracks...", total=total_tracks)
150
+
151
+ while offset < total_tracks:
152
+ progress.update(task, advance=0, description=f"[cyan]Loading tracks {offset + 1}-{min(offset + limit, total_tracks)} of {total_tracks}...")
153
+ results = self.sp.playlist_items(
154
+ playlist_id,
155
+ offset=offset,
156
+ limit=limit,
157
+ fields='items(track(name,artists(name),album(name,release_date,images),duration_ms))'
158
+ )
159
+
160
+ if not results['items']:
161
+ break
162
+
163
+ for idx, item in enumerate(results['items']):
164
+ if item['track'] is None:
165
+ continue
166
+
167
+ # Extract track details
168
+ track = item['track']
169
+
170
+ # Extract album info
171
+ album = track['album']
172
+
173
+ # Process extracted data
174
+ #release_date = album['release_date']
175
+ #year = release_date.split('-')[0] if release_date else None
176
+
177
+ # Extract duration in seconds
178
+ duration_ms = track['duration_ms']
179
+ duration_seconds = duration_ms // 1000 if duration_ms else None
180
+
181
+ # Extract cover URL
182
+ cover_url = album['images'][0]['url'] if album['images'] else None
183
+
184
+ # Extract artists
185
+ artists = [artist['name'] for artist in track['artists']]
186
+
187
+ # Compile track info
188
+ track_info = {
189
+ "title": track['name'],
190
+ "artist": ', '.join(artists),
191
+ "album": album['name'],
192
+ "added_at": None,
193
+ "cover_art": cover_url,
194
+ "duration_ms": duration_ms,
195
+ "duration_seconds": duration_seconds,
196
+ "play_count": None
197
+ }
198
+
199
+ # Append to list
200
+ tracks_info.append(track_info)
201
+ progress.update(task, advance=1)
202
+ offset += limit
275
203
 
276
204
  # Remove duplicates based on title and artist
277
205
  unique = {}
278
- for item in self.playlist_items:
206
+ for item in tracks_info:
279
207
  key = (item.get("title", ""), item.get("artist", ""))
280
208
  if key not in unique:
281
209
  unique[key] = item
282
-
210
+
211
+ # Convert back to list
283
212
  unique_tracks = list(unique.values())
213
+ console.print(f"[green]Extracted [red]{len(unique_tracks)}[/red] unique tracks from playlist")
284
214
  return unique_tracks
285
-
286
- except Exception as e:
287
- console.print(f"Error extracting playlist: {e}")
288
- return []
289
-
290
- def _parse_spotify_playlist_item(self, item: Dict) -> Dict:
291
- """Parses a single playlist item from Spotify API response"""
292
- try:
293
- # Extract added date
294
- added_at = item.get("addedAt", {}).get("isoString", "")
295
-
296
- # Extract track data
297
- track_data = item.get("itemV2", {}).get("data", {})
298
-
299
- # Extract album name
300
- album_data = track_data.get("albumOfTrack", {})
301
- album_name = album_data.get("name", "")
302
-
303
- # Extract cover art URL
304
- cover_art = album_data.get("coverArt", {}).get("sources", [{}])[0].get("url", "")
305
-
306
- # Extract artist name
307
- artist_items = album_data.get("artists", {}).get("items", [])
308
- artist_name = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
309
-
310
- # Extract track title
311
- track_title = track_data.get("name", "")
312
-
313
- # Extract duration in ms
314
- duration_ms = track_data.get("trackDuration", {}).get("totalMilliseconds", 0)
315
-
316
- # Extract play count
317
- play_count = track_data.get("playcount", 0)
318
-
319
- return {
320
- "title": track_title,
321
- "artist": artist_name,
322
- "album": album_name,
323
- "added_at": added_at,
324
- "cover_art": cover_art,
325
- "duration_ms": duration_ms,
326
- "play_count": play_count
327
- }
328
215
 
329
216
  except Exception as e:
330
- console.print(f"Error parsing playlist item: {e}")
331
- return {}
217
+ logging.error(f"Error extracting playlist: {e}")
218
+ return []