PyPI - SpotDown - Versions diffs - 0.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

SpotDown 0.1.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

SpotDown/downloader/youtube_downloader.py +20 -6
SpotDown/extractor/spotify_extractor.py +165 -278
SpotDown/extractor/youtube_extractor.py +15 -1
SpotDown/main.py +6 -8
SpotDown/upload/version.py +2 -2
SpotDown/utils/config_json.py +2 -2
SpotDown/utils/console_utils.py +1 -1
SpotDown/utils/ffmpeg_installer.py +374 -0
SpotDown/utils/file_utils.py +105 -1
SpotDown/utils/logger.py +90 -0
{spotdown-0.1.1.dist-info → spotdown-1.3.0.dist-info}/METADATA +63 -30
spotdown-1.3.0.dist-info/RECORD +21 -0
spotdown-0.1.1.dist-info/RECORD +0 -19
{spotdown-0.1.1.dist-info → spotdown-1.3.0.dist-info}/WHEEL +0 -0
{spotdown-0.1.1.dist-info → spotdown-1.3.0.dist-info}/entry_points.txt +0 -0
{spotdown-0.1.1.dist-info → spotdown-1.3.0.dist-info}/licenses/LICENSE +0 -0
{spotdown-0.1.1.dist-info → spotdown-1.3.0.dist-info}/top_level.txt +0 -0

SpotDown/downloader/youtube_downloader.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # 05.04.2024
 import io
+import logging
 import subprocess
 from typing import Dict
@@ -14,7 +15,7 @@ from rich.console import Console
 # Internal utils
 from SpotDown.utils.config_json import config_manager
-from SpotDown.utils.file_utils import FileUtils
+from SpotDown.utils.file_utils import file_utils
 # Variable
@@ -24,7 +25,7 @@ quality = config_manager.get("DOWNLOAD", "quality")
 class YouTubeDownloader:
     def __init__(self):
         self.console = Console()
-        self.file_utils = FileUtils()
+        self.file_utils = file_utils
     def download(self, video_info: Dict, spotify_info: Dict) -> bool:
         """
@@ -44,6 +45,7 @@ class YouTubeDownloader:
                 spotify_info.get('title', video_info.get('title', 'Unknown Title'))
             )
             output_path = music_folder / f"{filename}.%(ext)s"
+            logging.info(f"Start download: {video_info.get('url')} as {output_path}")
             # Download cover image if available
             cover_path = None
@@ -65,12 +67,15 @@ class YouTubeDownloader:
                                 img.save(cover_path, "JPEG")
                             self.console.print(f"[blue]Downloaded thumbnail: {cover_path}[/blue]")
+                            logging.info(f"Downloaded thumbnail: {cover_path}")
                         else:
                             cover_path = None
+                            logging.warning(f"Failed to download cover image, status code: {resp.status_code}")
                 except Exception as e:
                     self.console.print(f"[yellow]Unable to download cover: {e}[/yellow]")
+                    logging.error(f"Unable to download cover: {e}")
                     cover_path = None
             ytdlp_options = [
@@ -82,6 +87,7 @@ class YouTubeDownloader:
                 '--no-playlist',
                 '--embed-metadata',
                 '--add-metadata',
+                '--ffmpeg-location', self.file_utils.ffmpeg_path
             ]
             if cover_path and cover_path.exists():
@@ -94,6 +100,7 @@ class YouTubeDownloader:
                 console=self.console
             ) as progress:
                 task = progress.add_task("Downloading...", total=None)
+                logging.info(f"Running yt-dlp with options: {ytdlp_options}")
                 process = subprocess.run(
                     ytdlp_options,
                     capture_output=True,
@@ -102,30 +109,37 @@ class YouTubeDownloader:
                 progress.remove_task(task)
             if process.returncode == 0:
+                logging.info("yt-dlp finished successfully")
                 # Find the downloaded file
                 downloaded_files = list(music_folder.glob(f"{filename}.*"))
                 if downloaded_files:
                     self.console.print("[red]Download completed![/red]")
+                    logging.info(f"Download completed: {downloaded_files[0]}")
                     # Remove cover file after embedding
                     if cover_path and cover_path.exists():
                         try:
                             cover_path.unlink()
-                        except Exception:
-                            pass
+                            logging.info(f"Removed temporary cover file: {cover_path}")
+                        except Exception as ex:
+                            logging.warning(f"Failed to remove cover file: {ex}")
                     return True
                 else:
                     self.console.print("[yellow]Download apparently succeeded but file not found[/yellow]")
+                    logging.error("Download apparently succeeded but file not found")
                     return False
             else:
                 self.console.print("[red]Download error:[/red]")
                 self.console.print(f"[red]{process.stderr}[/red]")
+                logging.error(f"yt-dlp error: {process.stderr}")
                 return False
         except Exception as e:
             self.console.print(f"[red]Error during download: {e}[/red]")
+            logging.error(f"Error during download: {e}")
             return False

SpotDown/extractor/spotify_extractor.py CHANGED Viewed

@@ -1,331 +1,218 @@
 # 05.04.2024
 import os
+import re
+import sys
 import json
 import logging
 from typing import Dict, List, Optional
+from dotenv import load_dotenv
-# External imports
+# External library
+import spotipy
+from spotipy.oauth2 import SpotifyClientCredentials
 from rich.console import Console
-from playwright.sync_api import sync_playwright
-# Internal utils
-from SpotDown.utils.headers import get_userAgent
-from SpotDown.utils.config_json import config_manager
+from rich.progress import Progress
 # Variable
 console = Console()
-headless = config_manager.get("BROWSER", "headless")
-timeout = config_manager.get("BROWSER", "timeout")
+load_dotenv()
+def extract_track_id(spotify_url):
+    patterns = [
+        r'track/([a-zA-Z0-9]{22})',
+        r'spotify:track:([a-zA-Z0-9]{22})'
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, spotify_url)
+        if match:
+            return match.group(1)
+    return None
+def extract_playlist_id(spotify_url):
+    patterns = [
+        r'playlist/([a-zA-Z0-9]{22})',
+        r'spotify:playlist:([a-zA-Z0-9]{22})'
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, spotify_url)
+        if match:
+            return match.group(1)
+    return None
 class SpotifyExtractor:
     def __init__(self):
-        self.playwright = None
-        self.browser = None
-        self.context = None
-        self.page = None
-        self.user_agent = get_userAgent()
-        self.total_songs = None
-        self.playlist_items = []
+        client_id = os.getenv("SPOTIPY_CLIENT_ID")
+        client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")
+        if not client_id or not client_secret:
+            console.print("[red]Missing Spotify credentials. Please create a .env file with SPOTIFY_CLIENT_ID and SPOTIPY_CLIENT_SECRET from https://developer.spotify.com/dashboard/")
+            sys.exit(1)
+        self.sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
+            client_id=client_id,
+            client_secret=client_secret
+        ))
+        logging.info("SpotifyExtractor initialized")
     def __enter__(self):
-        """Context manager to automatically handle the browser"""
-        self.playwright = sync_playwright().start()
-        self.browser = self.playwright.chromium.launch(headless=headless)
-        self.context = self.browser.new_context(
-            user_agent=self.user_agent, viewport={'width': 1280, 'height': 800}, ignore_https_errors=True
-        )
-        self.page = self.context.new_page()
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        """Automatically closes the browser"""
-        if self.browser:
-            self.browser.close()
-        if self.playwright:
-            self.playwright.stop()
+        pass
     def extract_track_info(self, spotify_url: str, save_json: bool = False) -> Optional[Dict]:
-        """
-        Extracts track information from a Spotify URL
+        track_id = extract_track_id(spotify_url)
+        if not track_id:
+            logging.error("Invalid Spotify track URL")
+            return None
-        Args:
-            spotify_url (str): Spotify URL of the track
-            save_json (bool): If True, saves the raw Spotify API JSON response in the 'log' folder
-        Returns:
-            Dict: Track information or None if an error occurs
-        """
         try:
-            console.print("[cyan]Analyzing Spotify URL ...")
-            # Extract Spotify data by intercepting API calls
-            spotify_data, raw_json = self._extract_spotify_data(spotify_url, return_raw=True)
-            if not spotify_data:
-                console.print("[cyan]Unable to extract data from Spotify")
-                return None
-            # Save the JSON response if requested
-            if save_json and raw_json:
-                try:
-                    log_dir = os.path.join(os.getcwd(), "log")
-                    os.makedirs(log_dir, exist_ok=True)
+            # Extract track info
+            track = self.sp.track(track_id)
-                    # Use title and artist for the filename if available
-                    filename = "spotify_response.json"
+            # Extract album info
+            album = track['album']
-                    if spotify_data.get("artist") and spotify_data.get("title"):
-                        safe_artist = "".join(c for c in spotify_data["artist"] if c.isalnum() or c in " _-")
-                        safe_title = "".join(c for c in spotify_data["title"] if c.isalnum() or c in " _-")
-                        filename = f"{safe_artist} - {safe_title}.json"
+            # Process extracted data
+            release_date = album['release_date']
+            year = release_date.split('-')[0] if release_date else None
-                    filepath = os.path.join(log_dir, filename)
-                    with open(filepath, "w", encoding="utf-8") as f:
-                        json.dump(raw_json, f, ensure_ascii=False, indent=2)
-                    console.print(f"[green]Spotify API response saved to {filepath}")
-                except Exception as e:
-                    console.print(f"[yellow]Warning: Could not save JSON file: {e}")
-            console.print(f"[cyan]Found: [red]{spotify_data['artist']} - {spotify_data['title']}[/red]")
-            return spotify_data
-        except Exception as e:
-            console.print(f"[cyan]Spotify extraction error: {e}")
-            return None
+            # Extract duration in seconds and formatted
+            duration_ms = track['duration_ms']
+            duration_seconds = duration_ms // 1000 if duration_ms else None
+            duration_formatted = f"{duration_seconds // 60}:{duration_seconds % 60:02d}" if duration_seconds else None
-    def _extract_spotify_data(self, spotify_url: str, return_raw: bool = False) -> Optional[Dict]:
-        """Extracts Spotify data by intercepting API calls"""
-        try:
-            api_responses = []
-            def handle_request(request):
-                if (request.method == "POST" and "/pathfinder/v2/query" in request.url):
-                    try:
-                        response = request.response()
-                        if response and response.status == 200:
-                            try:
-                                response_data = response.json()
-                                if self._is_valid_track_data(response_data):
-                                    api_responses.append(response_data)
-                                    console.print("[green]Valid API response found")
-                            except Exception as e:
-                                logging.warning(f"Error parsing API response: {e}")
-                    except Exception as e:
-                        logging.warning(f"Error accessing response: {e}")
-            self.page.on("requestfinished", handle_request)
-            self.page.goto(spotify_url)
-            # Poll every 100ms, stop waiting as soon as a valid response is found or after 10 seconds
-            # This avoids unnecessary waiting after a valid API response is received
-            for _ in range(timeout * 10):  # 100 * 100ms = 10000ms (10 seconds max)
-                if api_responses:
-                    break
-                self.page.wait_for_timeout(timeout * 10)
-            if not api_responses:
-                console.print("[cyan]No valid API responses found")
-                return (None, None) if return_raw else None
-            # Selects the most complete response
-            best_response = max(api_responses, key=lambda x: len(json.dumps(x)))
-            parsed = self._parse_spotify_response(best_response)
-            return (parsed, best_response) if return_raw else parsed
+            # Extract cover URL
+            cover_url = album['images'][0]['url'] if album['images'] else None
-        except Exception as e:
-            console.print(f"[cyan]❌ Spotify data extraction error: {e}")
-            return (None, None) if return_raw else None
+            # Extract artists
+            artists = [artist['name'] for artist in track['artists']]
-    def _is_valid_track_data(self, data: Dict) -> bool:
-        """Checks if the data contains valid track information"""
-        try:
-            track_union = data.get("data", {}).get("trackUnion", {})
-            return bool(track_union.get("name") and track_union.get("firstArtist", {}).get("items"))
-        except Exception:
-            return False
-    def _parse_spotify_response(self, response: Dict) -> Dict:
-        """Parses the Spotify API response"""
-        try:
-            # Extract title
-            track_data = response.get("data", {}).get("trackUnion", {})
-            title = track_data.get("name", "").strip()
-            # Extract artist
-            artist_items = track_data.get("firstArtist", {}).get("items", [])
-            artist = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
-            # Extract album
-            album_data = track_data.get("albumOfTrack", {})
-            album = album_data.get("name", "")
-            # Extract year
-            release_date = album_data.get("date", {})
-            year = release_date.get("year") if release_date else None
-            # Extract duration
-            duration_ms = track_data.get("duration", {}).get("totalMilliseconds")
-            duration_seconds = duration_ms // 1000 if duration_ms else None
-            duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
-            # Extract cover art
-            cover_url = ""
-            cover_sources = album_data.get("coverArt", {}).get("sources", [])
-            if cover_sources:
-                largest = max(
-                    cover_sources,
-                    key=lambda x: max(x.get("width", 0), x.get("height", 0))
-                )
-                cover_url = largest.get("url", "")
-            return {
-                'title': title,
-                'artist': artist,
-                'album': album,
+            # Compile track info
+            track_info = {
+                'artist': ', '.join(artists),
+                'title': track['name'],
+                'album': album['name'],
                 'year': year,
                 'duration_seconds': duration_seconds,
                 'duration_formatted': duration_formatted,
                 'cover_url': cover_url
             }
+            if save_json:
+                log_dir = os.path.join(os.getcwd(), "log")
+                os.makedirs(log_dir, exist_ok=True)
+                # Create JSON file for track info
+                filename = f"{track_info['artist']} - {track_info['title']}.json"
+                filepath = os.path.join(log_dir, filename)
+                # Save track info to JSON
+                with open(filepath, "w", encoding="utf-8") as f:
+                    json.dump(track_info, f, ensure_ascii=False, indent=2)
+            return track_info
         except Exception as e:
-            console.print(f"[cyan]Error parsing Spotify response: {e}")
-            return {}
-    def _format_seconds(self, seconds: int) -> str:
-        """Formats seconds into mm:ss or hh:mm:ss"""
-        if seconds < 3600:
-            minutes = seconds // 60
-            secs = seconds % 60
-            return f"{minutes}:{secs:02d}"
-        else:
-            hours = seconds // 3600
-            minutes = (seconds % 3600) // 60
-            secs = seconds % 60
-            return f"{hours}:{minutes:02d}:{secs:02d}"
+            error_msg = str(e)
+            logging.error(f"Spotify extraction error: {error_msg}")
+            if "invalid_client" in error_msg:
+                console.print("[red]Spotify credentials are invalid. Please check your .env file and obtain valid credentials from https://developer.spotify.com/dashboard/. Exiting.")
+                sys.exit(0)
+            return None
     def extract_playlist_tracks(self, playlist_url: str) -> List[Dict]:
-        """Extracts all tracks from a Spotify playlist URL"""
-        self.total_songs = None
-        self.playlist_items = []
-        console.print("[cyan]Extracting playlist tracks...")
+        playlist_id = extract_playlist_id(playlist_url)
+        if not playlist_id:
+            logging.error("Invalid Spotify playlist URL")
+            return []
         try:
-            def handle_request(response):
-                try:
-                    if "pathfinder/v2/query" in response.url and response.request.method == "POST":
-                        json_data = response.json()
-                        if (
-                            "data" in json_data and
-                            "playlistV2" in json_data["data"] and
-                            "content" in json_data["data"]["playlistV2"]
-                        ):
-                            if self.total_songs is None:
-                                self.total_songs = json_data["data"]["playlistV2"]["content"].get("totalCount", 0)
-                            items = json_data["data"]["playlistV2"]["content"].get("items", [])
-                            for item in items:
-                                parsed_item = self._parse_spotify_playlist_item(item)
-                                if parsed_item:
-                                    self.playlist_items.append(parsed_item)
-                except Exception as e:
-                    console.print(f"Error processing request: {e}")
-            self.page.on("response", handle_request)
-            self.page.goto(playlist_url)
-            self.page.wait_for_timeout(5000)
-            if self.total_songs is None:
-                console.print("Error: Could not extract the total number of songs.")
-                return []
-            console.print(f"[cyan]The playlist has [green]{self.total_songs}[/green] tracks")
-            try:
-                self.page.wait_for_selector('div[data-testid="playlist-tracklist"]', timeout=15000)
-            except Exception:
-                console.print("Error: Playlist table did not load")
-                return []
-            last_item_count = len(self.playlist_items)
-            with console.status("[cyan]Loading tracks...") as status:
-                while len(self.playlist_items) < self.total_songs:
-                    status.update(f"[cyan]Progress: {len(self.playlist_items)}/{self.total_songs} tracks loaded")
-                    rows = self.page.locator('div[role="row"]')
-                    row_count = rows.count()
-                    last_row = rows.nth(row_count - 1)
-                    last_row.scroll_into_view_if_needed()
-                    current_items = len(self.playlist_items)
-                    if current_items > last_item_count:
-                        last_item_count = current_items
-                    self.page.wait_for_timeout(300)
+            # Extract playlist info
+            playlist = self.sp.playlist(playlist_id)
+            total_tracks = playlist['tracks']['total']
+            tracks_info = []
+            offset = 0
+            limit = 100
+            console.print(f"[green]Playlist has [red]{total_tracks}[/red] tracks.")
+            with Progress() as progress:
+                task = progress.add_task("[cyan]Extracting tracks...", total=total_tracks)
+                while offset < total_tracks:
+                    progress.update(task, advance=0, description=f"[cyan]Loading tracks {offset + 1}-{min(offset + limit, total_tracks)} of {total_tracks}...")
+                    results = self.sp.playlist_items(
+                        playlist_id,
+                        offset=offset,
+                        limit=limit,
+                        fields='items(track(name,artists(name),album(name,release_date,images),duration_ms))'
+                    )
+                    if not results['items']:
+                        break
+                    for idx, item in enumerate(results['items']):
+                        if item['track'] is None:
+                            continue
+                        # Extract track details
+                        track = item['track']
+                        # Extract album info
+                        album = track['album']
+                        # Process extracted data
+                        #release_date = album['release_date']
+                        #year = release_date.split('-')[0] if release_date else None
+                        # Extract duration in seconds
+                        duration_ms = track['duration_ms']
+                        duration_seconds = duration_ms // 1000 if duration_ms else None
+                        # Extract cover URL
+                        cover_url = album['images'][0]['url'] if album['images'] else None
+                        # Extract artists
+                        artists = [artist['name'] for artist in track['artists']]
+                        # Compile track info
+                        track_info = {
+                            "title": track['name'],
+                            "artist": ', '.join(artists),
+                            "album": album['name'],
+                            "added_at": None,
+                            "cover_art": cover_url,
+                            "duration_ms": duration_ms,
+                            "duration_seconds": duration_seconds,
+                            "play_count": None
+                        }
+                        # Append to list
+                        tracks_info.append(track_info)
+                        progress.update(task, advance=1)
+                    offset += limit
             # Remove duplicates based on title and artist
             unique = {}
-            for item in self.playlist_items:
+            for item in tracks_info:
                 key = (item.get("title", ""), item.get("artist", ""))
                 if key not in unique:
                     unique[key] = item
+            # Convert back to list
             unique_tracks = list(unique.values())
+            console.print(f"[green]Extracted [red]{len(unique_tracks)}[/red] unique tracks from playlist")
             return unique_tracks
-        except Exception as e:
-            console.print(f"Error extracting playlist: {e}")
-            return []
-    def _parse_spotify_playlist_item(self, item: Dict) -> Dict:
-        """Parses a single playlist item from Spotify API response"""
-        try:
-            # Extract added date
-            added_at = item.get("addedAt", {}).get("isoString", "")
-            # Extract track data
-            track_data = item.get("itemV2", {}).get("data", {})
-            # Extract album name
-            album_data = track_data.get("albumOfTrack", {})
-            album_name = album_data.get("name", "")
-            # Extract cover art URL
-            cover_art = album_data.get("coverArt", {}).get("sources", [{}])[0].get("url", "")
-            # Extract artist name
-            artist_items = album_data.get("artists", {}).get("items", [])
-            artist_name = artist_items[0].get("profile", {}).get("name", "") if artist_items else ""
-            # Extract track title
-            track_title = track_data.get("name", "")
-            # Extract duration in ms
-            duration_ms = track_data.get("trackDuration", {}).get("totalMilliseconds", 0)
-            # Extract play count
-            play_count = track_data.get("playcount", 0)
-            return {
-                "title": track_title,
-                "artist": artist_name,
-                "album": album_name,
-                "added_at": added_at,
-                "cover_art": cover_art,
-                "duration_ms": duration_ms,
-                "play_count": play_count
-            }
         except Exception as e:
-            console.print(f"Error parsing playlist item: {e}")
-            return {}
+            logging.error(f"Error extracting playlist: {e}")
+            return []

SpotDown 0.1.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

SpotDown 0.1.1py3-none-any.whl → 1.3.0py3-none-any.whl