PyPI - SpotDown - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

SpotDown 0.0.1py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

SpotDown/__init__.py +0 -0
SpotDown/downloader/__init__.py +0 -0
SpotDown/downloader/youtube_downloader.py +131 -0
SpotDown/extractor/__init__.py +6 -0
SpotDown/extractor/spotify_extractor.py +331 -0
SpotDown/extractor/youtube_extractor.py +271 -0
SpotDown/main.py +139 -0
SpotDown/utils/__init__.py +6 -0
SpotDown/utils/config_json.py +223 -0
SpotDown/utils/console_utils.py +188 -0
SpotDown/utils/file_utils.py +129 -0
SpotDown/utils/headers.py +18 -0
{spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/METADATA +182 -182
spotdown-0.0.7.dist-info/RECORD +18 -0
{spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/licenses/LICENSE +674 -674
spotdown-0.0.7.dist-info/top_level.txt +1 -0
spotdown-0.0.1.dist-info/RECORD +0 -6
spotdown-0.0.1.dist-info/top_level.txt +0 -1
{spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/WHEEL +0 -0
{spotdown-0.0.1.dist-info → spotdown-0.0.7.dist-info}/entry_points.txt +0 -0

SpotDown/extractor/youtube_extractor.py ADDED Viewed

@@ -0,0 +1,271 @@
+# 05.04.2024
+import re
+import json
+import difflib
+from urllib.parse import quote_plus
+from typing import Dict, List, Optional
+# External imports
+import httpx
+from rich.console import Console
+# Internal utils
+from SpotDown.utils.headers import get_userAgent
+# Variable
+console = Console()
+class YouTubeExtractor:
+    def __init__(self):
+        pass
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+    def search_videos(self, query: str, max_results: int = 5) -> List[Dict]:
+        """
+        Search for videos on YouTube
+        Args:
+            query (str): Search query
+            max_results (int): Maximum number of results
+        Returns:
+            List[Dict]: List of found videos
+        """
+        try:
+            search_url = f"https://www.youtube.com/results?search_query={quote_plus(query)}"
+            console.print(f"\n[bold blue]Searching on YouTube:[/bold blue] {query}")
+            with httpx.Client(timeout=10) as client:
+                response = client.get(search_url, headers={"User-Agent": get_userAgent()})
+                html = response.text
+            return self._extract_youtube_videos(html, max_results)
+        except Exception as e:
+            print(f"YouTube search error: {e}")
+            return []
+    def sort_by_duration_similarity(self, youtube_results: List[Dict], target_duration: int):
+        """
+        Sort results by duration closest to the target
+        Args:
+            youtube_results (List[Dict]): List of YouTube videos
+            target_duration (int): Target duration in seconds
+        """
+        for result in youtube_results:
+            if result.get('duration_seconds') is not None:
+                result['duration_difference'] = abs(result['duration_seconds'] - target_duration)
+            else:
+                result['duration_difference'] = float('inf')
+        youtube_results.sort(key=lambda x: x['duration_difference'])
+    def sort_by_affinity_and_duration(self, youtube_results: List[Dict], spotify_info: Dict):
+        """
+        Sort results by duration difference, title match/affinity, and channel match/affinity.
+        Args:
+            youtube_results (List[Dict]): List of YouTube videos
+            spotify_info (Dict): Spotify track info
+        """
+        target_duration = spotify_info.get('duration_seconds')
+        target_title = spotify_info.get('title', '').lower()
+        target_artist = spotify_info.get('artist', '').lower()
+        for result in youtube_results:
+            # Duration difference
+            if result.get('duration_seconds') is not None and target_duration is not None:
+                result['duration_difference'] = abs(result['duration_seconds'] - target_duration)
+            else:
+                result['duration_difference'] = float('inf')
+            yt_title = result.get('title', '').lower()
+            yt_channel = result.get('channel', '').lower()
+            # Exact title match
+            result['exact_title_match'] = yt_title == target_title
+            # Title affinity
+            result['title_affinity'] = difflib.SequenceMatcher(None, yt_title, target_title).ratio()
+            # Exact channel match
+            result['exact_channel_match'] = yt_channel == target_artist
+            # Channel affinity
+            result['channel_affinity'] = difflib.SequenceMatcher(None, yt_channel, target_artist).ratio()
+        # Sort: lowest duration difference, exact title match, highest title affinity,
+        # exact channel match, highest channel affinity
+        youtube_results.sort(
+            key=lambda x: (
+                x['duration_difference'],
+                not x['exact_title_match'],  # False (exact match) comes before True
+                -x['title_affinity'],
+                not x['exact_channel_match'],  # False (exact match) comes before True
+                -x['channel_affinity']
+            )
+        )
+    def _extract_youtube_videos(self, html: str, max_results: int) -> List[Dict]:
+        """Extract videos from YouTube HTML"""
+        try:
+            yt_match = re.search(r'var ytInitialData = ({.+?});', html, re.DOTALL)
+            if not yt_match:
+                return []
+            yt_data = json.loads(yt_match.group(1))
+            results = []
+            # Navigate the data structure
+            contents = (yt_data.get('contents', {})
+                       .get('twoColumnSearchResultsRenderer', {})
+                       .get('primaryContents', {})
+                       .get('sectionListRenderer', {})
+                       .get('contents', []))
+            for section in contents:
+                items = section.get('itemSectionRenderer', {}).get('contents', [])
+                for item in items:
+                    if 'videoRenderer' in item:
+                        video_info = self._parse_video_renderer(item['videoRenderer'])
+                        if video_info:
+                            results.append(video_info)
+                        if len(results) >= max_results:
+                            break
+                if len(results) >= max_results:
+                    break
+            return results
+        except Exception as e:
+            print(f"Video extraction error: {e}")
+            return []
+    def _parse_video_renderer(self, video_data: Dict) -> Optional[Dict]:
+        """Complete parsing of a video renderer"""
+        try:
+            video_id = video_data.get('videoId')
+            if not video_id:
+                return None
+            # Title
+            title = self._extract_text(video_data.get('title', {}))
+            if not title:
+                return None
+            # Channel
+            channel = self._extract_text(video_data.get('ownerText', {}))
+            # Duration
+            duration_seconds = self._extract_video_duration(video_data)
+            duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
+            # Views
+            views = self._extract_text(video_data.get('viewCountText', {}))
+            # Thumbnail
+            thumbnails = video_data.get('thumbnail', {}).get('thumbnails', [])
+            thumbnail = thumbnails[-1].get('url') if thumbnails else None
+            # Published date
+            published = self._extract_text(video_data.get('publishedTimeText', {}))
+            return {
+                'video_id': video_id,
+                'url': f'https://www.youtube.com/watch?v={video_id}',
+                'title': title,
+                'channel': channel or 'Unknown channel',
+                'duration_seconds': duration_seconds,
+                'duration_formatted': duration_formatted or 'N/A',
+                'views': views or 'N/A',
+                'published': published or 'N/A',
+                'thumbnail': thumbnail
+            }
+        except Exception as e:
+            print(f"Video parsing error: {e}")
+            return None
+    def _extract_text(self, text_obj: Dict) -> str:
+        """Extract text from YouTube objects"""
+        if isinstance(text_obj, str):
+            return text_obj
+        if isinstance(text_obj, dict):
+            if 'runs' in text_obj and text_obj['runs']:
+                return ''.join(run.get('text', '') for run in text_obj['runs'])
+            return text_obj.get('simpleText', '')
+        return ''
+    def _extract_video_duration(self, video_data: Dict) -> Optional[int]:
+        """Extract video duration in seconds"""
+        # First attempt: direct lengthText
+        length_text = video_data.get('lengthText', {})
+        duration_str = self._extract_text(length_text)
+        if duration_str:
+            return self._parse_duration_string(duration_str)
+        # Second attempt: search in thumbnailOverlays
+        overlays = video_data.get('thumbnailOverlays', [])
+        for overlay in overlays:
+            if 'thumbnailOverlayTimeStatusRenderer' in overlay:
+                time_status = overlay['thumbnailOverlayTimeStatusRenderer']
+                duration_text = self._extract_text(time_status.get('text', {}))
+                if duration_text:
+                    return self._parse_duration_string(duration_text)
+        return None
+    def _parse_duration_string(self, duration_str: str) -> Optional[int]:
+        """Convert duration string (e.g., '3:45') to seconds"""
+        try:
+            duration_str = re.sub(r'[^\d:]', '', duration_str)
+            parts = duration_str.split(':')
+            if len(parts) == 2:
+                minutes, seconds = int(parts[0]), int(parts[1])
+                return minutes * 60 + seconds
+            elif len(parts) == 3:
+                hours, minutes, seconds = int(parts[0]), int(parts[1]), int(parts[2])
+                return hours * 3600 + minutes * 60 + seconds
+        except (ValueError, IndexError):
+            pass
+        return None
+    def _format_seconds(self, seconds: int) -> str:
+        """Format seconds into mm:ss or hh:mm:ss"""
+        if seconds < 3600:
+            minutes = seconds // 60
+            secs = seconds % 60
+            return f"{minutes}:{secs:02d}"
+        else:
+            hours = seconds // 3600
+            minutes = (seconds % 3600) // 60
+            secs = seconds % 60
+            return f"{hours}:{minutes:02d}:{secs:02d}"

SpotDown/main.py ADDED Viewed

@@ -0,0 +1,139 @@
+# 05.04.2024
+import time
+import logging
+from typing import Dict, List, Optional
+# Internal utils
+from SpotDown.utils.console_utils import ConsoleUtils
+from SpotDown.extractor.spotify_extractor import SpotifyExtractor
+from SpotDown.extractor.youtube_extractor import YouTubeExtractor
+from SpotDown.downloader.youtube_downloader import YouTubeDownloader
+# Variable
+console = ConsoleUtils()
+def setup_logging():
+    """Initialize basic logging configuration"""
+    logging.basicConfig(level=logging.ERROR)
+def extract_spotify_data(spotify_url: str, max_retry: int = 3) -> Optional[Dict]:
+    """Extract data from Spotify URL with retry mechanism"""
+    for attempt in range(1, max_retry + 1):
+        with SpotifyExtractor() as spotify_extractor:
+            spotify_info = spotify_extractor.extract_track_info(spotify_url)
+        if spotify_info:
+            return spotify_info
+        elif attempt < max_retry:
+            console.show_warning(f"Can't extract data from Spotify. Retrying ({attempt}/{max_retry})...")
+            time.sleep(1)
+    return None
+def search_on_youtube(query: str, max_results: int, duration_seconds: Optional[int] = None) -> List[Dict]:
+    """Search for videos on YouTube and sort them by relevance"""
+    with YouTubeExtractor() as youtube_extractor:
+        results = youtube_extractor.search_videos(query, max_results)
+        if results and duration_seconds:
+            youtube_extractor.sort_by_affinity_and_duration(results, {'duration_seconds': duration_seconds})
+        return results
+def download_track(video_info: Dict, spotify_info: Dict) -> bool:
+    """Download a single track and add metadata"""
+    downloader = YouTubeDownloader()
+    music_folder = downloader.file_utils.get_music_folder()
+    filename = downloader.file_utils.create_filename(
+        spotify_info['artist'],
+        spotify_info['title']
+    )
+    console.show_download_info(music_folder, filename)
+    console.show_download_start(video_info['title'], video_info['url'])
+    return downloader.download(video_info, spotify_info)
+def handle_playlist_download(tracks: List[Dict], max_results: int):
+    """Handle downloading all tracks from a playlist"""
+    for idx, track in enumerate(tracks, 1):
+        console.start_message()
+        console.show_info(f"[purple]Downloading track [red]{idx}/{len(tracks)}[/red]: [yellow]{track['artist']} - {track['title']}[/yellow]")
+        spotify_info = {
+            'artist': track.get('artist', ''),
+            'title': track.get('title', ''),
+            'album': track.get('album', ''),
+            'duration_seconds': int(track.get('duration_ms', 0)) // 1000 if track.get('duration_ms') else None,
+            'cover_url': track.get('cover_art', '')
+        }
+        query = f"{spotify_info['artist']} {spotify_info['title']}"
+        youtube_results = search_on_youtube(query, max_results, spotify_info.get('duration_seconds'))
+        if not youtube_results:
+            console.show_error(f"No YouTube results for {spotify_info['artist']} - {spotify_info['title']}")
+            continue
+        success = download_track(youtube_results[0], spotify_info)
+        if not success:
+            console.show_error(f"Error downloading {spotify_info['artist']} - {spotify_info['title']}")
+def handle_single_track_download(spotify_info: Dict, max_results: int):
+    """Handle downloading a single track"""
+    query = f"{spotify_info['artist']} {spotify_info['title']}"
+    youtube_results = search_on_youtube(query, max_results, spotify_info.get('duration_seconds'))
+    if not youtube_results:
+        console.show_error("No YouTube results found.")
+        return
+    console.display_youtube_results(youtube_results)
+    console.show_download_menu(len(youtube_results))
+    choice = console.get_download_choice(len(youtube_results))
+    if choice == 0:
+        console.show_warning("Exit without downloading.")
+        return
+    selected_video = youtube_results[choice - 1]
+    success = download_track(selected_video, spotify_info)
+    if not success:
+        console.show_error("Error during download.")
+def run():
+    """Main execution function"""
+    setup_logging()
+    console = ConsoleUtils()
+    console.start_message()
+    #git_update()
+    spotify_url = console.get_spotify_url()
+    max_results = 5
+    if "/playlist/" in spotify_url:
+        with SpotifyExtractor() as spotify_extractor:
+            tracks = spotify_extractor.extract_playlist_tracks(spotify_url)
+        if not tracks:
+            console.show_error("No tracks found in playlist.")
+            return
+        console.show_info(f"Found [green]{len(tracks)}[/green] tracks in playlist.")
+        handle_playlist_download(tracks, max_results)
+        return
+    spotify_info = extract_spotify_data(spotify_url)
+    if not spotify_info:
+        console.show_error("Can't extract data from Spotify.")
+        return
+    time.sleep(1)
+    console.start_message()
+    console.display_spotify_info(spotify_info)
+    handle_single_track_download(spotify_info, max_results)

SpotDown/utils/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+# 05.04.2024
+from .file_utils import FileUtils
+from .console_utils import ConsoleUtils
+__all__ = ['FileUtils', 'ConsoleUtils']

SpotDown/utils/config_json.py ADDED Viewed

@@ -0,0 +1,223 @@
+# 05.04.2024
+import os
+import sys
+import json
+import logging
+from typing import Any, List
+# External imports
+import httpx
+# Internal utils
+from SpotDown.utils.headers import get_headers
+class ConfigManager:
+    def __init__(self, file_name: str = 'config.json') -> None:
+        """
+        Initialize the ConfigManager.
+        Args:
+            file_name (str, optional): Configuration file name. Default: 'config.json'.
+        """
+        # Determine the base path - use the current working directory
+        if getattr(sys, 'frozen', False):
+            # If the application is frozen (e.g., PyInstaller)
+            base_path = os.path.dirname(sys.executable)
+        else:
+            # Use the current working directory where the script is executed
+            base_path = os.getcwd()
+        # Initialize file paths
+        self.file_path = os.path.join(base_path, file_name)
+        # Initialize data structures
+        self.config = {}
+        self.cache = {}
+        # Load the configuration
+        self.load_config()
+    def download_config(self) -> None:
+        """Download config.json from the Arrowar/SpotDown GitHub repository."""
+        url = "https://raw.githubusercontent.com/Arrowar/SpotDown/main/config.json"
+        try:
+            with httpx.Client(timeout=10, headers=get_headers()) as client:
+                response = client.get(url)
+                response.raise_for_status()
+                with open(self.file_path, "w", encoding="utf-8") as f:
+                    f.write(response.text)
+            logging.info("Downloaded config.json from Arrowar/SpotDown repository.")
+        except Exception as e:
+            logging.error(f"Failed to download config.json: {e}")
+            sys.exit(1)
+    def load_config(self) -> None:
+        """Load the configuration and initialize all settings."""
+        if not os.path.exists(self.file_path):
+            self.download_config()
+        try:
+            with open(self.file_path, 'r', encoding="utf-8") as f:
+                self.config = json.load(f)
+        except json.JSONDecodeError as e:
+            logging.error(f"Error decoding config.json: {e}")
+            sys.exit(1)
+        except Exception as e:
+            logging.error(f"Error loading config.json: {e}")
+            sys.exit(1)
+    def get(self, section: str, key: str, data_type: type = str) -> Any:
+        """
+        Read a value from the configuration.
+        Args:
+            section (str): Section in the configuration
+            key (str): Key to read
+            data_type (type, optional): Expected data type. Default: str
+        Returns:
+            Any: The key value converted to the specified data type
+        """
+        cache_key = f"config.{section}.{key}"
+        logging.info(f"Reading key: {cache_key}")
+        # Check if the value is in the cache
+        if cache_key in self.cache:
+            return self.cache[cache_key]
+        config_source = self.config
+        # Check if the section and key exist
+        if section not in config_source:
+            raise ValueError(f"Section '{section}' not found in main configuration")
+        if key not in config_source[section]:
+            raise ValueError(f"Key '{key}' not found in section '{section}' of main configuration")
+        # Get and convert the value
+        value = config_source[section][key]
+        converted_value = self._convert_to_data_type(value, data_type)
+        # Save in cache
+        self.cache[cache_key] = converted_value
+        return converted_value
+    def _convert_to_data_type(self, value: Any, data_type: type) -> Any:
+        """
+        Convert the value to the specified data type.
+        Args:
+            value (Any): Value to convert
+            data_type (type): Target data type
+        Returns:
+            Any: Converted value
+        """
+        try:
+            if data_type is int:
+                return int(value)
+            elif data_type is float:
+                return float(value)
+            elif data_type is bool:
+                if isinstance(value, str):
+                    return value.lower() in ("yes", "true", "t", "1")
+                return bool(value)
+            elif data_type is list:
+                if isinstance(value, list):
+                    return value
+                if isinstance(value, str):
+                    return [item.strip() for item in value.split(',')]
+                return [value]
+            elif data_type is dict:
+                if isinstance(value, dict):
+                    return value
+                raise ValueError(f"Cannot convert {type(value).__name__} to dict")
+            else:
+                return value
+        except Exception as e:
+            logging.error(f"Error converting to {data_type.__name__}: {e}")
+            raise ValueError(f"Cannot convert '{value}' to {data_type.__name__}: {str(e)}")
+    def get_string(self, section: str, key: str) -> str:
+        """Read a string from the main configuration."""
+        return self.get(section, key, str)
+    def get_int(self, section: str, key: str) -> int:
+        """Read an integer from the main configuration."""
+        return self.get(section, key, int)
+    def get_float(self, section: str, key: str) -> float:
+        """Read a float from the main configuration."""
+        return self.get(section, key, float)
+    def get_bool(self, section: str, key: str) -> bool:
+        """Read a boolean from the main configuration."""
+        return self.get(section, key, bool)
+    def get_list(self, section: str, key: str) -> List[str]:
+        """Read a list from the main configuration."""
+        return self.get(section, key, list)
+    def get_dict(self, section: str, key: str) -> dict:
+        """Read a dictionary from the main configuration."""
+        return self.get(section, key, dict)
+    def set_key(self, section: str, key: str, value: Any) -> None:
+        """
+        Set a key in the configuration.
+        Args:
+            section (str): Section in the configuration
+            key (str): Key to set
+            value (Any): Value to associate with the key
+        """
+        try:
+            config_target = self.config
+            if section not in config_target:
+                config_target[section] = {}
+            config_target[section][key] = value
+            # Update the cache
+            cache_key = f"config.{section}.{key}"
+            self.cache[cache_key] = value
+            logging.info(f"Key '{key}' set in section '{section}' of main configuration")
+        except Exception as e:
+            error_msg = f"Error setting key '{key}' in section '{section}' of main configuration: {e}"
+            logging.error(error_msg)
+    def has_section(self, section: str) -> bool:
+        """
+        Check if a section exists in the configuration.
+        Args:
+            section (str): Section name
+        Returns:
+            bool: True if the section exists, False otherwise
+        """
+        config_source = self.config
+        return section in config_source
+config_manager = ConfigManager()

SpotDown 0.0.1__py3-none-any.whl → 0.0.7__py3-none-any.whl

SpotDown 0.0.1py3-none-any.whl → 0.0.7py3-none-any.whl