SpotDown 0.0.1__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ # 05.04.2024
2
+
3
+ import re
4
+ import json
5
+ import difflib
6
+ from urllib.parse import quote_plus
7
+ from typing import Dict, List, Optional
8
+
9
+
10
+ # External imports
11
+ import httpx
12
+ from rich.console import Console
13
+
14
+
15
+ # Internal utils
16
+ from SpotDown.utils.headers import get_userAgent
17
+
18
+
19
+ # Variable
20
+ console = Console()
21
+
22
+
23
+ class YouTubeExtractor:
24
+ def __init__(self):
25
+ pass
26
+
27
+ def __enter__(self):
28
+ return self
29
+
30
+ def __exit__(self, exc_type, exc_val, exc_tb):
31
+ pass
32
+
33
+ def search_videos(self, query: str, max_results: int = 5) -> List[Dict]:
34
+ """
35
+ Search for videos on YouTube
36
+
37
+ Args:
38
+ query (str): Search query
39
+ max_results (int): Maximum number of results
40
+
41
+ Returns:
42
+ List[Dict]: List of found videos
43
+ """
44
+ try:
45
+ search_url = f"https://www.youtube.com/results?search_query={quote_plus(query)}"
46
+ console.print(f"\n[bold blue]Searching on YouTube:[/bold blue] {query}")
47
+
48
+ with httpx.Client(timeout=10) as client:
49
+ response = client.get(search_url, headers={"User-Agent": get_userAgent()})
50
+ html = response.text
51
+
52
+ return self._extract_youtube_videos(html, max_results)
53
+
54
+ except Exception as e:
55
+ print(f"YouTube search error: {e}")
56
+ return []
57
+
58
+ def sort_by_duration_similarity(self, youtube_results: List[Dict], target_duration: int):
59
+ """
60
+ Sort results by duration closest to the target
61
+
62
+ Args:
63
+ youtube_results (List[Dict]): List of YouTube videos
64
+ target_duration (int): Target duration in seconds
65
+ """
66
+ for result in youtube_results:
67
+ if result.get('duration_seconds') is not None:
68
+ result['duration_difference'] = abs(result['duration_seconds'] - target_duration)
69
+
70
+ else:
71
+ result['duration_difference'] = float('inf')
72
+
73
+ youtube_results.sort(key=lambda x: x['duration_difference'])
74
+
75
+ def sort_by_affinity_and_duration(self, youtube_results: List[Dict], spotify_info: Dict):
76
+ """
77
+ Sort results by duration difference, title match/affinity, and channel match/affinity.
78
+
79
+ Args:
80
+ youtube_results (List[Dict]): List of YouTube videos
81
+ spotify_info (Dict): Spotify track info
82
+ """
83
+ target_duration = spotify_info.get('duration_seconds')
84
+ target_title = spotify_info.get('title', '').lower()
85
+ target_artist = spotify_info.get('artist', '').lower()
86
+
87
+ for result in youtube_results:
88
+
89
+ # Duration difference
90
+ if result.get('duration_seconds') is not None and target_duration is not None:
91
+ result['duration_difference'] = abs(result['duration_seconds'] - target_duration)
92
+ else:
93
+ result['duration_difference'] = float('inf')
94
+
95
+ yt_title = result.get('title', '').lower()
96
+ yt_channel = result.get('channel', '').lower()
97
+
98
+ # Exact title match
99
+ result['exact_title_match'] = yt_title == target_title
100
+
101
+ # Title affinity
102
+ result['title_affinity'] = difflib.SequenceMatcher(None, yt_title, target_title).ratio()
103
+
104
+ # Exact channel match
105
+ result['exact_channel_match'] = yt_channel == target_artist
106
+
107
+ # Channel affinity
108
+ result['channel_affinity'] = difflib.SequenceMatcher(None, yt_channel, target_artist).ratio()
109
+
110
+ # Sort: lowest duration difference, exact title match, highest title affinity,
111
+ # exact channel match, highest channel affinity
112
+ youtube_results.sort(
113
+ key=lambda x: (
114
+ x['duration_difference'],
115
+ not x['exact_title_match'], # False (exact match) comes before True
116
+ -x['title_affinity'],
117
+ not x['exact_channel_match'], # False (exact match) comes before True
118
+ -x['channel_affinity']
119
+ )
120
+ )
121
+
122
+ def _extract_youtube_videos(self, html: str, max_results: int) -> List[Dict]:
123
+ """Extract videos from YouTube HTML"""
124
+ try:
125
+ yt_match = re.search(r'var ytInitialData = ({.+?});', html, re.DOTALL)
126
+ if not yt_match:
127
+ return []
128
+
129
+ yt_data = json.loads(yt_match.group(1))
130
+ results = []
131
+
132
+ # Navigate the data structure
133
+ contents = (yt_data.get('contents', {})
134
+ .get('twoColumnSearchResultsRenderer', {})
135
+ .get('primaryContents', {})
136
+ .get('sectionListRenderer', {})
137
+ .get('contents', []))
138
+
139
+ for section in contents:
140
+ items = section.get('itemSectionRenderer', {}).get('contents', [])
141
+
142
+ for item in items:
143
+ if 'videoRenderer' in item:
144
+ video_info = self._parse_video_renderer(item['videoRenderer'])
145
+
146
+ if video_info:
147
+ results.append(video_info)
148
+
149
+ if len(results) >= max_results:
150
+ break
151
+
152
+ if len(results) >= max_results:
153
+ break
154
+
155
+ return results
156
+
157
+ except Exception as e:
158
+ print(f"Video extraction error: {e}")
159
+ return []
160
+
161
+ def _parse_video_renderer(self, video_data: Dict) -> Optional[Dict]:
162
+ """Complete parsing of a video renderer"""
163
+ try:
164
+ video_id = video_data.get('videoId')
165
+ if not video_id:
166
+ return None
167
+
168
+ # Title
169
+ title = self._extract_text(video_data.get('title', {}))
170
+ if not title:
171
+ return None
172
+
173
+ # Channel
174
+ channel = self._extract_text(video_data.get('ownerText', {}))
175
+
176
+ # Duration
177
+ duration_seconds = self._extract_video_duration(video_data)
178
+ duration_formatted = self._format_seconds(duration_seconds) if duration_seconds else None
179
+
180
+ # Views
181
+ views = self._extract_text(video_data.get('viewCountText', {}))
182
+
183
+ # Thumbnail
184
+ thumbnails = video_data.get('thumbnail', {}).get('thumbnails', [])
185
+ thumbnail = thumbnails[-1].get('url') if thumbnails else None
186
+
187
+ # Published date
188
+ published = self._extract_text(video_data.get('publishedTimeText', {}))
189
+
190
+ return {
191
+ 'video_id': video_id,
192
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
193
+ 'title': title,
194
+ 'channel': channel or 'Unknown channel',
195
+ 'duration_seconds': duration_seconds,
196
+ 'duration_formatted': duration_formatted or 'N/A',
197
+ 'views': views or 'N/A',
198
+ 'published': published or 'N/A',
199
+ 'thumbnail': thumbnail
200
+ }
201
+
202
+ except Exception as e:
203
+ print(f"Video parsing error: {e}")
204
+ return None
205
+
206
+ def _extract_text(self, text_obj: Dict) -> str:
207
+ """Extract text from YouTube objects"""
208
+ if isinstance(text_obj, str):
209
+ return text_obj
210
+
211
+ if isinstance(text_obj, dict):
212
+ if 'runs' in text_obj and text_obj['runs']:
213
+ return ''.join(run.get('text', '') for run in text_obj['runs'])
214
+
215
+ return text_obj.get('simpleText', '')
216
+
217
+ return ''
218
+
219
+ def _extract_video_duration(self, video_data: Dict) -> Optional[int]:
220
+ """Extract video duration in seconds"""
221
+
222
+ # First attempt: direct lengthText
223
+ length_text = video_data.get('lengthText', {})
224
+ duration_str = self._extract_text(length_text)
225
+
226
+ if duration_str:
227
+ return self._parse_duration_string(duration_str)
228
+
229
+ # Second attempt: search in thumbnailOverlays
230
+ overlays = video_data.get('thumbnailOverlays', [])
231
+ for overlay in overlays:
232
+ if 'thumbnailOverlayTimeStatusRenderer' in overlay:
233
+ time_status = overlay['thumbnailOverlayTimeStatusRenderer']
234
+ duration_text = self._extract_text(time_status.get('text', {}))
235
+
236
+ if duration_text:
237
+ return self._parse_duration_string(duration_text)
238
+
239
+ return None
240
+
241
+ def _parse_duration_string(self, duration_str: str) -> Optional[int]:
242
+ """Convert duration string (e.g., '3:45') to seconds"""
243
+ try:
244
+ duration_str = re.sub(r'[^\d:]', '', duration_str)
245
+ parts = duration_str.split(':')
246
+
247
+ if len(parts) == 2:
248
+ minutes, seconds = int(parts[0]), int(parts[1])
249
+ return minutes * 60 + seconds
250
+
251
+ elif len(parts) == 3:
252
+ hours, minutes, seconds = int(parts[0]), int(parts[1]), int(parts[2])
253
+ return hours * 3600 + minutes * 60 + seconds
254
+
255
+ except (ValueError, IndexError):
256
+ pass
257
+
258
+ return None
259
+
260
+ def _format_seconds(self, seconds: int) -> str:
261
+ """Format seconds into mm:ss or hh:mm:ss"""
262
+ if seconds < 3600:
263
+ minutes = seconds // 60
264
+ secs = seconds % 60
265
+ return f"{minutes}:{secs:02d}"
266
+
267
+ else:
268
+ hours = seconds // 3600
269
+ minutes = (seconds % 3600) // 60
270
+ secs = seconds % 60
271
+ return f"{hours}:{minutes:02d}:{secs:02d}"
SpotDown/main.py ADDED
@@ -0,0 +1,139 @@
1
+ # 05.04.2024
2
+
3
+ import time
4
+ import logging
5
+ from typing import Dict, List, Optional
6
+
7
+
8
+ # Internal utils
9
+ from SpotDown.utils.console_utils import ConsoleUtils
10
+ from SpotDown.extractor.spotify_extractor import SpotifyExtractor
11
+ from SpotDown.extractor.youtube_extractor import YouTubeExtractor
12
+ from SpotDown.downloader.youtube_downloader import YouTubeDownloader
13
+
14
+
15
+
16
+ # Variable
17
+ console = ConsoleUtils()
18
+
19
+
20
+ def setup_logging():
21
+ """Initialize basic logging configuration"""
22
+ logging.basicConfig(level=logging.ERROR)
23
+
24
+
25
+ def extract_spotify_data(spotify_url: str, max_retry: int = 3) -> Optional[Dict]:
26
+ """Extract data from Spotify URL with retry mechanism"""
27
+ for attempt in range(1, max_retry + 1):
28
+ with SpotifyExtractor() as spotify_extractor:
29
+ spotify_info = spotify_extractor.extract_track_info(spotify_url)
30
+ if spotify_info:
31
+ return spotify_info
32
+ elif attempt < max_retry:
33
+ console.show_warning(f"Can't extract data from Spotify. Retrying ({attempt}/{max_retry})...")
34
+ time.sleep(1)
35
+ return None
36
+
37
+
38
+ def search_on_youtube(query: str, max_results: int, duration_seconds: Optional[int] = None) -> List[Dict]:
39
+ """Search for videos on YouTube and sort them by relevance"""
40
+ with YouTubeExtractor() as youtube_extractor:
41
+ results = youtube_extractor.search_videos(query, max_results)
42
+ if results and duration_seconds:
43
+ youtube_extractor.sort_by_affinity_and_duration(results, {'duration_seconds': duration_seconds})
44
+ return results
45
+
46
+
47
+ def download_track(video_info: Dict, spotify_info: Dict) -> bool:
48
+ """Download a single track and add metadata"""
49
+ downloader = YouTubeDownloader()
50
+ music_folder = downloader.file_utils.get_music_folder()
51
+ filename = downloader.file_utils.create_filename(
52
+ spotify_info['artist'],
53
+ spotify_info['title']
54
+ )
55
+ console.show_download_info(music_folder, filename)
56
+ console.show_download_start(video_info['title'], video_info['url'])
57
+ return downloader.download(video_info, spotify_info)
58
+
59
+
60
+ def handle_playlist_download(tracks: List[Dict], max_results: int):
61
+ """Handle downloading all tracks from a playlist"""
62
+ for idx, track in enumerate(tracks, 1):
63
+ console.start_message()
64
+ console.show_info(f"[purple]Downloading track [red]{idx}/{len(tracks)}[/red]: [yellow]{track['artist']} - {track['title']}[/yellow]")
65
+
66
+ spotify_info = {
67
+ 'artist': track.get('artist', ''),
68
+ 'title': track.get('title', ''),
69
+ 'album': track.get('album', ''),
70
+ 'duration_seconds': int(track.get('duration_ms', 0)) // 1000 if track.get('duration_ms') else None,
71
+ 'cover_url': track.get('cover_art', '')
72
+ }
73
+
74
+ query = f"{spotify_info['artist']} {spotify_info['title']}"
75
+ youtube_results = search_on_youtube(query, max_results, spotify_info.get('duration_seconds'))
76
+
77
+ if not youtube_results:
78
+ console.show_error(f"No YouTube results for {spotify_info['artist']} - {spotify_info['title']}")
79
+ continue
80
+
81
+ success = download_track(youtube_results[0], spotify_info)
82
+ if not success:
83
+ console.show_error(f"Error downloading {spotify_info['artist']} - {spotify_info['title']}")
84
+
85
+
86
+ def handle_single_track_download(spotify_info: Dict, max_results: int):
87
+ """Handle downloading a single track"""
88
+ query = f"{spotify_info['artist']} {spotify_info['title']}"
89
+ youtube_results = search_on_youtube(query, max_results, spotify_info.get('duration_seconds'))
90
+
91
+ if not youtube_results:
92
+ console.show_error("No YouTube results found.")
93
+ return
94
+
95
+ console.display_youtube_results(youtube_results)
96
+ console.show_download_menu(len(youtube_results))
97
+
98
+ choice = console.get_download_choice(len(youtube_results))
99
+ if choice == 0:
100
+ console.show_warning("Exit without downloading.")
101
+ return
102
+
103
+ selected_video = youtube_results[choice - 1]
104
+ success = download_track(selected_video, spotify_info)
105
+ if not success:
106
+ console.show_error("Error during download.")
107
+
108
+
109
+ def run():
110
+ """Main execution function"""
111
+ setup_logging()
112
+
113
+ console = ConsoleUtils()
114
+ console.start_message()
115
+ #git_update()
116
+
117
+ spotify_url = console.get_spotify_url()
118
+ max_results = 5
119
+
120
+ if "/playlist/" in spotify_url:
121
+ with SpotifyExtractor() as spotify_extractor:
122
+ tracks = spotify_extractor.extract_playlist_tracks(spotify_url)
123
+ if not tracks:
124
+ console.show_error("No tracks found in playlist.")
125
+ return
126
+ console.show_info(f"Found [green]{len(tracks)}[/green] tracks in playlist.")
127
+ handle_playlist_download(tracks, max_results)
128
+ return
129
+
130
+ spotify_info = extract_spotify_data(spotify_url)
131
+ if not spotify_info:
132
+ console.show_error("Can't extract data from Spotify.")
133
+ return
134
+
135
+ time.sleep(1)
136
+ console.start_message()
137
+ console.display_spotify_info(spotify_info)
138
+
139
+ handle_single_track_download(spotify_info, max_results)
@@ -0,0 +1,6 @@
1
+ # 05.04.2024
2
+
3
+ from .file_utils import FileUtils
4
+ from .console_utils import ConsoleUtils
5
+
6
+ __all__ = ['FileUtils', 'ConsoleUtils']
@@ -0,0 +1,223 @@
1
+ # 05.04.2024
2
+
3
+ import os
4
+ import sys
5
+ import json
6
+ import logging
7
+ from typing import Any, List
8
+
9
+
10
+ # External imports
11
+ import httpx
12
+
13
+
14
+ # Internal utils
15
+ from SpotDown.utils.headers import get_headers
16
+
17
+
18
+ class ConfigManager:
19
+ def __init__(self, file_name: str = 'config.json') -> None:
20
+ """
21
+ Initialize the ConfigManager.
22
+
23
+ Args:
24
+ file_name (str, optional): Configuration file name. Default: 'config.json'.
25
+ """
26
+ # Determine the base path - use the current working directory
27
+ if getattr(sys, 'frozen', False):
28
+ # If the application is frozen (e.g., PyInstaller)
29
+ base_path = os.path.dirname(sys.executable)
30
+
31
+ else:
32
+ # Use the current working directory where the script is executed
33
+ base_path = os.getcwd()
34
+
35
+ # Initialize file paths
36
+ self.file_path = os.path.join(base_path, file_name)
37
+
38
+ # Initialize data structures
39
+ self.config = {}
40
+ self.cache = {}
41
+
42
+ # Load the configuration
43
+ self.load_config()
44
+
45
+ def download_config(self) -> None:
46
+ """Download config.json from the Arrowar/SpotDown GitHub repository."""
47
+ url = "https://raw.githubusercontent.com/Arrowar/SpotDown/main/config.json"
48
+ try:
49
+ with httpx.Client(timeout=10, headers=get_headers()) as client:
50
+ response = client.get(url)
51
+ response.raise_for_status()
52
+
53
+ with open(self.file_path, "w", encoding="utf-8") as f:
54
+ f.write(response.text)
55
+
56
+ logging.info("Downloaded config.json from Arrowar/SpotDown repository.")
57
+
58
+ except Exception as e:
59
+ logging.error(f"Failed to download config.json: {e}")
60
+ sys.exit(1)
61
+
62
+ def load_config(self) -> None:
63
+ """Load the configuration and initialize all settings."""
64
+ if not os.path.exists(self.file_path):
65
+ self.download_config()
66
+
67
+ try:
68
+ with open(self.file_path, 'r', encoding="utf-8") as f:
69
+ self.config = json.load(f)
70
+
71
+ except json.JSONDecodeError as e:
72
+ logging.error(f"Error decoding config.json: {e}")
73
+ sys.exit(1)
74
+
75
+ except Exception as e:
76
+ logging.error(f"Error loading config.json: {e}")
77
+ sys.exit(1)
78
+
79
+ def get(self, section: str, key: str, data_type: type = str) -> Any:
80
+ """
81
+ Read a value from the configuration.
82
+
83
+ Args:
84
+ section (str): Section in the configuration
85
+ key (str): Key to read
86
+ data_type (type, optional): Expected data type. Default: str
87
+
88
+ Returns:
89
+ Any: The key value converted to the specified data type
90
+ """
91
+ cache_key = f"config.{section}.{key}"
92
+ logging.info(f"Reading key: {cache_key}")
93
+
94
+ # Check if the value is in the cache
95
+ if cache_key in self.cache:
96
+ return self.cache[cache_key]
97
+
98
+ config_source = self.config
99
+
100
+ # Check if the section and key exist
101
+ if section not in config_source:
102
+ raise ValueError(f"Section '{section}' not found in main configuration")
103
+
104
+ if key not in config_source[section]:
105
+ raise ValueError(f"Key '{key}' not found in section '{section}' of main configuration")
106
+
107
+ # Get and convert the value
108
+ value = config_source[section][key]
109
+ converted_value = self._convert_to_data_type(value, data_type)
110
+
111
+ # Save in cache
112
+ self.cache[cache_key] = converted_value
113
+
114
+ return converted_value
115
+
116
+ def _convert_to_data_type(self, value: Any, data_type: type) -> Any:
117
+ """
118
+ Convert the value to the specified data type.
119
+
120
+ Args:
121
+ value (Any): Value to convert
122
+ data_type (type): Target data type
123
+
124
+ Returns:
125
+ Any: Converted value
126
+ """
127
+ try:
128
+ if data_type is int:
129
+ return int(value)
130
+
131
+ elif data_type is float:
132
+ return float(value)
133
+
134
+ elif data_type is bool:
135
+ if isinstance(value, str):
136
+ return value.lower() in ("yes", "true", "t", "1")
137
+ return bool(value)
138
+
139
+ elif data_type is list:
140
+ if isinstance(value, list):
141
+ return value
142
+ if isinstance(value, str):
143
+ return [item.strip() for item in value.split(',')]
144
+ return [value]
145
+
146
+ elif data_type is dict:
147
+ if isinstance(value, dict):
148
+ return value
149
+
150
+ raise ValueError(f"Cannot convert {type(value).__name__} to dict")
151
+ else:
152
+ return value
153
+
154
+ except Exception as e:
155
+ logging.error(f"Error converting to {data_type.__name__}: {e}")
156
+ raise ValueError(f"Cannot convert '{value}' to {data_type.__name__}: {str(e)}")
157
+
158
+ def get_string(self, section: str, key: str) -> str:
159
+ """Read a string from the main configuration."""
160
+ return self.get(section, key, str)
161
+
162
+ def get_int(self, section: str, key: str) -> int:
163
+ """Read an integer from the main configuration."""
164
+ return self.get(section, key, int)
165
+
166
+ def get_float(self, section: str, key: str) -> float:
167
+ """Read a float from the main configuration."""
168
+ return self.get(section, key, float)
169
+
170
+ def get_bool(self, section: str, key: str) -> bool:
171
+ """Read a boolean from the main configuration."""
172
+ return self.get(section, key, bool)
173
+
174
+ def get_list(self, section: str, key: str) -> List[str]:
175
+ """Read a list from the main configuration."""
176
+ return self.get(section, key, list)
177
+
178
+ def get_dict(self, section: str, key: str) -> dict:
179
+ """Read a dictionary from the main configuration."""
180
+ return self.get(section, key, dict)
181
+
182
+ def set_key(self, section: str, key: str, value: Any) -> None:
183
+ """
184
+ Set a key in the configuration.
185
+
186
+ Args:
187
+ section (str): Section in the configuration
188
+ key (str): Key to set
189
+ value (Any): Value to associate with the key
190
+ """
191
+ try:
192
+ config_target = self.config
193
+
194
+ if section not in config_target:
195
+ config_target[section] = {}
196
+
197
+ config_target[section][key] = value
198
+
199
+ # Update the cache
200
+ cache_key = f"config.{section}.{key}"
201
+ self.cache[cache_key] = value
202
+
203
+ logging.info(f"Key '{key}' set in section '{section}' of main configuration")
204
+
205
+ except Exception as e:
206
+ error_msg = f"Error setting key '{key}' in section '{section}' of main configuration: {e}"
207
+ logging.error(error_msg)
208
+
209
+ def has_section(self, section: str) -> bool:
210
+ """
211
+ Check if a section exists in the configuration.
212
+
213
+ Args:
214
+ section (str): Section name
215
+
216
+ Returns:
217
+ bool: True if the section exists, False otherwise
218
+ """
219
+ config_source = self.config
220
+ return section in config_source
221
+
222
+
223
+ config_manager = ConfigManager()