musicdl 2.1.11__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- musicdl/__init__.py +5 -5
- musicdl/modules/__init__.py +10 -3
- musicdl/modules/common/__init__.py +2 -0
- musicdl/modules/common/gdstudio.py +204 -0
- musicdl/modules/js/__init__.py +1 -0
- musicdl/modules/js/youtube/__init__.py +2 -0
- musicdl/modules/js/youtube/botguard.js +1 -0
- musicdl/modules/js/youtube/jsinterp.py +902 -0
- musicdl/modules/js/youtube/runner.js +2 -0
- musicdl/modules/sources/__init__.py +41 -10
- musicdl/modules/sources/apple.py +207 -0
- musicdl/modules/sources/base.py +256 -28
- musicdl/modules/sources/bilibili.py +118 -0
- musicdl/modules/sources/buguyy.py +148 -0
- musicdl/modules/sources/fangpi.py +153 -0
- musicdl/modules/sources/fivesing.py +108 -0
- musicdl/modules/sources/gequbao.py +148 -0
- musicdl/modules/sources/jamendo.py +108 -0
- musicdl/modules/sources/joox.py +104 -68
- musicdl/modules/sources/kugou.py +129 -76
- musicdl/modules/sources/kuwo.py +188 -68
- musicdl/modules/sources/lizhi.py +107 -0
- musicdl/modules/sources/migu.py +172 -66
- musicdl/modules/sources/mitu.py +140 -0
- musicdl/modules/sources/mp3juice.py +264 -0
- musicdl/modules/sources/netease.py +163 -115
- musicdl/modules/sources/qianqian.py +125 -77
- musicdl/modules/sources/qq.py +232 -94
- musicdl/modules/sources/tidal.py +342 -0
- musicdl/modules/sources/ximalaya.py +256 -0
- musicdl/modules/sources/yinyuedao.py +144 -0
- musicdl/modules/sources/youtube.py +238 -0
- musicdl/modules/utils/__init__.py +12 -4
- musicdl/modules/utils/appleutils.py +563 -0
- musicdl/modules/utils/data.py +107 -0
- musicdl/modules/utils/logger.py +211 -58
- musicdl/modules/utils/lyric.py +73 -0
- musicdl/modules/utils/misc.py +335 -23
- musicdl/modules/utils/modulebuilder.py +75 -0
- musicdl/modules/utils/neteaseutils.py +81 -0
- musicdl/modules/utils/qqutils.py +184 -0
- musicdl/modules/utils/quarkparser.py +105 -0
- musicdl/modules/utils/songinfoutils.py +54 -0
- musicdl/modules/utils/tidalutils.py +738 -0
- musicdl/modules/utils/youtubeutils.py +3606 -0
- musicdl/musicdl.py +184 -86
- musicdl-2.7.3.dist-info/LICENSE +203 -0
- musicdl-2.7.3.dist-info/METADATA +704 -0
- musicdl-2.7.3.dist-info/RECORD +53 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/WHEEL +5 -5
- musicdl-2.7.3.dist-info/entry_points.txt +2 -0
- musicdl/modules/sources/baiduFlac.py +0 -69
- musicdl/modules/sources/xiami.py +0 -104
- musicdl/modules/utils/downloader.py +0 -80
- musicdl-2.1.11.dist-info/LICENSE +0 -22
- musicdl-2.1.11.dist-info/METADATA +0 -82
- musicdl-2.1.11.dist-info/RECORD +0 -24
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/top_level.txt +0 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/zip-safe +0 -0
|
@@ -1,10 +1,41 @@
|
|
|
1
|
-
'''
|
|
2
|
-
from .qq import
|
|
3
|
-
from .
|
|
4
|
-
from .
|
|
5
|
-
from .
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
9
|
-
from .
|
|
10
|
-
from .
|
|
1
|
+
'''initialize'''
|
|
2
|
+
from .qq import QQMusicClient
|
|
3
|
+
from .mitu import MituMusicClient
|
|
4
|
+
from .joox import JooxMusicClient
|
|
5
|
+
from .base import BaseMusicClient
|
|
6
|
+
from .kuwo import KuwoMusicClient
|
|
7
|
+
from .migu import MiguMusicClient
|
|
8
|
+
from .tidal import TIDALMusicClient
|
|
9
|
+
from .lizhi import LizhiMusicClient
|
|
10
|
+
from .apple import AppleMusicClient
|
|
11
|
+
from .kugou import KugouMusicClient
|
|
12
|
+
from .fangpi import FangpiMusicClient
|
|
13
|
+
from .buguyy import BuguyyMusicClient
|
|
14
|
+
from ..utils import BaseModuleBuilder
|
|
15
|
+
from .netease import NeteaseMusicClient
|
|
16
|
+
from .youtube import YouTubeMusicClient
|
|
17
|
+
from .gequbao import GequbaoMusicClient
|
|
18
|
+
from .jamendo import JamendoMusicClient
|
|
19
|
+
from ..common import GDStudioMusicClient
|
|
20
|
+
from .mp3juice import MP3JuiceMusicClient
|
|
21
|
+
from .fivesing import FiveSingMusicClient
|
|
22
|
+
from .qianqian import QianqianMusicClient
|
|
23
|
+
from .ximalaya import XimalayaMusicClient
|
|
24
|
+
from .bilibili import BilibiliMusicClient
|
|
25
|
+
from .yinyuedao import YinyuedaoMusicClient
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
'''MusicClientBuilder'''
|
|
29
|
+
class MusicClientBuilder(BaseModuleBuilder):
|
|
30
|
+
REGISTERED_MODULES = {
|
|
31
|
+
'QQMusicClient': QQMusicClient, 'MituMusicClient': MituMusicClient, 'BuguyyMusicClient': BuguyyMusicClient, 'GequbaoMusicClient': GequbaoMusicClient,
|
|
32
|
+
'MP3JuiceMusicClient': MP3JuiceMusicClient, 'YinyuedaoMusicClient': YinyuedaoMusicClient, 'LizhiMusicClient': LizhiMusicClient, 'XimalayaMusicClient': XimalayaMusicClient,
|
|
33
|
+
'JooxMusicClient': JooxMusicClient, 'KuwoMusicClient': KuwoMusicClient, 'KugouMusicClient': KugouMusicClient, 'FiveSingMusicClient': FiveSingMusicClient,
|
|
34
|
+
'QianqianMusicClient': QianqianMusicClient, 'MiguMusicClient': MiguMusicClient, 'NeteaseMusicClient': NeteaseMusicClient, 'YouTubeMusicClient': YouTubeMusicClient,
|
|
35
|
+
'TIDALMusicClient': TIDALMusicClient, 'AppleMusicClient': AppleMusicClient, 'FangpiMusicClient': FangpiMusicClient, 'GDStudioMusicClient': GDStudioMusicClient,
|
|
36
|
+
'JamendoMusicClient': JamendoMusicClient, 'BilibiliMusicClient': BilibiliMusicClient,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
'''BuildMusicClient'''
|
|
41
|
+
BuildMusicClient = MusicClientBuilder().build
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Function:
|
|
3
|
+
Implementation of AppleMusicClient: https://music.apple.com/{geo}/new
|
|
4
|
+
Author:
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
|
+
Charles的皮卡丘
|
|
8
|
+
'''
|
|
9
|
+
import re
|
|
10
|
+
import os
|
|
11
|
+
import copy
|
|
12
|
+
import shutil
|
|
13
|
+
from .base import BaseMusicClient
|
|
14
|
+
from urllib.parse import urlencode
|
|
15
|
+
from rich.progress import Progress
|
|
16
|
+
from ..utils.appleutils import AppleMusicClientUtils, DownloadItem
|
|
17
|
+
from ..utils import touchdir, legalizestring, resp2json, seconds2hms, usesearchheaderscookies, safeextractfromdict, usedownloadheaderscookies, SongInfo, SongInfoUtils
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
'''AppleMusicClient'''
|
|
21
|
+
class AppleMusicClient(BaseMusicClient):
|
|
22
|
+
source = 'AppleMusicClient'
|
|
23
|
+
def __init__(self, **kwargs):
|
|
24
|
+
super(AppleMusicClient, self).__init__(**kwargs)
|
|
25
|
+
# headers setting
|
|
26
|
+
self.default_search_headers = {
|
|
27
|
+
"authorization": f"Bearer {self._fetchtoken()}",
|
|
28
|
+
"accept": "*/*",
|
|
29
|
+
"accept-language": "en-US",
|
|
30
|
+
"origin": "https://music.apple.com",
|
|
31
|
+
"priority": "u=1, i",
|
|
32
|
+
"referer": "https://music.apple.com",
|
|
33
|
+
"sec-ch-ua": '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"',
|
|
34
|
+
"sec-ch-ua-mobile": "?0",
|
|
35
|
+
"sec-ch-ua-platform": '"Windows"',
|
|
36
|
+
"sec-fetch-dest": "empty",
|
|
37
|
+
"sec-fetch-mode": "cors",
|
|
38
|
+
"sec-fetch-site": "same-site",
|
|
39
|
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
40
|
+
}
|
|
41
|
+
self.default_download_headers = copy.deepcopy(self.default_search_headers)
|
|
42
|
+
self.default_headers = self.default_search_headers
|
|
43
|
+
# account info (whether a VIP user)
|
|
44
|
+
self.account_info = {}
|
|
45
|
+
if not self.default_cookies or 'media-user-token' not in self.default_cookies:
|
|
46
|
+
self.logger_handle.warning(f'{self.source}.__init__ >>> "media-user-token" is not configured, so song downloads are restricted and only the preview portion of the track can be downloaded.')
|
|
47
|
+
else:
|
|
48
|
+
self.account_info = self._fetchaccountinfo()
|
|
49
|
+
# init session
|
|
50
|
+
self._initsession()
|
|
51
|
+
'''_download'''
|
|
52
|
+
@usedownloadheaderscookies
|
|
53
|
+
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0):
|
|
54
|
+
if isinstance(song_info.download_url, str): return super()._download(song_info=song_info, request_overrides=request_overrides, downloaded_song_infos=downloaded_song_infos, progress=progress, song_progress_id=song_progress_id)
|
|
55
|
+
request_overrides = request_overrides or {}
|
|
56
|
+
try:
|
|
57
|
+
touchdir(song_info.work_dir)
|
|
58
|
+
tmp_dir = os.path.join(self.work_dir.replace(' ', ''), self.source.replace(' ', ''), song_info.identifier.replace(' ', '')) # replace space to avoid bugs
|
|
59
|
+
touchdir(tmp_dir)
|
|
60
|
+
download_item: DownloadItem = song_info.download_url
|
|
61
|
+
download_item.final_path = os.path.join(tmp_dir, f'{song_info.identifier}.{song_info.ext}')
|
|
62
|
+
progress.update(song_progress_id, total=1)
|
|
63
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Downloading)")
|
|
64
|
+
AppleMusicClientUtils.download(download_item, work_dir=tmp_dir)
|
|
65
|
+
shutil.move(download_item.final_path, song_info.save_path)
|
|
66
|
+
progress.update(song_progress_id, total=os.path.getsize(song_info.save_path))
|
|
67
|
+
progress.advance(song_progress_id, os.path.getsize(song_info.save_path))
|
|
68
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Success)")
|
|
69
|
+
downloaded_song_infos.append(SongInfoUtils.fillsongtechinfo(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print))
|
|
70
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
71
|
+
except Exception as err:
|
|
72
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Error: {err})")
|
|
73
|
+
return downloaded_song_infos
|
|
74
|
+
'''_fetchtoken'''
|
|
75
|
+
def _fetchtoken(self, request_overrides: dict = None):
|
|
76
|
+
request_overrides = request_overrides or {}
|
|
77
|
+
resp = self.get('https://music.apple.com', **request_overrides)
|
|
78
|
+
resp.raise_for_status()
|
|
79
|
+
home_page = resp.text
|
|
80
|
+
index_js_uri_match = re.search(r"/(assets/index-legacy[~-][^/\"]+\.js)", home_page)
|
|
81
|
+
index_js_uri = index_js_uri_match.group(1)
|
|
82
|
+
resp = self.get(f"https://music.apple.com/{index_js_uri}", **request_overrides)
|
|
83
|
+
resp.raise_for_status()
|
|
84
|
+
index_js_page = resp.text
|
|
85
|
+
token_match = re.search('(?=eyJh)(.*?)(?=")', index_js_page)
|
|
86
|
+
token = token_match.group(1)
|
|
87
|
+
return token
|
|
88
|
+
'''_fetchaccountinfo'''
|
|
89
|
+
def _fetchaccountinfo(self, request_overrides: dict = None):
|
|
90
|
+
if self.account_info or (not self.default_cookies or 'media-user-token' not in self.default_cookies): return self.account_info
|
|
91
|
+
request_overrides = request_overrides or {}
|
|
92
|
+
resp = self.get('https://amp-api.music.apple.com/v1/me/account?meta=subscription', **request_overrides)
|
|
93
|
+
resp.raise_for_status()
|
|
94
|
+
account_info = resp2json(resp=resp)
|
|
95
|
+
self.account_info = account_info
|
|
96
|
+
return self.account_info
|
|
97
|
+
'''_constructsearchurls'''
|
|
98
|
+
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
|
99
|
+
# init
|
|
100
|
+
rule, request_overrides = rule or {}, request_overrides or {}
|
|
101
|
+
account_info = self._fetchaccountinfo(request_overrides=request_overrides)
|
|
102
|
+
geo = safeextractfromdict(account_info, ['meta', 'subscription', 'storefront'], 'us')
|
|
103
|
+
# search rules
|
|
104
|
+
default_rule = {
|
|
105
|
+
"groups": "song", "l": "en-US", "offset": "0", "term": keyword, "types": "activities,albums,apple-curators,artists,curators,editorial-items,music-movies,music-videos,playlists,record-labels,songs,stations,tv-episodes,uploaded-videos",
|
|
106
|
+
"art[url]": "f", "extend": "artistUrl", "fields[albums]": "artistName,artistUrl,artwork,contentRating,editorialArtwork,editorialNotes,name,playParams,releaseDate,url,trackCount", "fields[artists]": "url,name,artwork",
|
|
107
|
+
"format[resources]": "map", "include[editorial-items]": "contents", "include[songs]": "artists", "limit": "10", "omit[resource]": "autos", "platform": "web", "relate[albums]": "artists", "relate[editorial-items]": "contents",
|
|
108
|
+
"relate[songs]": "albums", "types": "activities,albums,apple-curators,artists,curators,music-movies,music-videos,playlists,songs,stations,tv-episodes,uploaded-videos", "with": "lyrics,serverBubbles",
|
|
109
|
+
}
|
|
110
|
+
default_rule.update(rule)
|
|
111
|
+
geo = default_rule.pop('geo', geo)
|
|
112
|
+
# construct search urls based on search rules
|
|
113
|
+
base_url = f'https://amp-api-edge.music.apple.com/v1/catalog/{geo}/search?'
|
|
114
|
+
search_urls, page_size, count = [], self.search_size_per_page, 0
|
|
115
|
+
while self.search_size_per_source > count:
|
|
116
|
+
page_rule = copy.deepcopy(default_rule)
|
|
117
|
+
page_rule['limit'] = page_size
|
|
118
|
+
page_rule['offset'] = str(int(count // page_size) * page_size)
|
|
119
|
+
search_urls.append(base_url + urlencode(page_rule))
|
|
120
|
+
count += page_size
|
|
121
|
+
# return
|
|
122
|
+
return search_urls
|
|
123
|
+
'''_fetchlicenseexchange'''
|
|
124
|
+
def _fetchlicenseexchange(self, track_id: str, track_uri: str, challenge: str, key_system: str = "com.widevine.alpha", request_overrides: dict = None):
|
|
125
|
+
request_overrides = request_overrides or {}
|
|
126
|
+
json_data = {"challenge": challenge, "key-system": key_system, "uri": track_uri, "adamId": track_id, "isLibrary": False, "user-initiated": True}
|
|
127
|
+
resp = self.post("https://play.itunes.apple.com/WebObjects/MZPlay.woa/wa/acquireWebPlaybackLicense", json=json_data, **request_overrides)
|
|
128
|
+
resp.raise_for_status()
|
|
129
|
+
license_exchange = resp2json(resp)
|
|
130
|
+
return license_exchange
|
|
131
|
+
'''_search'''
|
|
132
|
+
@usesearchheaderscookies
|
|
133
|
+
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
|
134
|
+
# init
|
|
135
|
+
request_overrides = request_overrides or {}
|
|
136
|
+
# successful
|
|
137
|
+
try:
|
|
138
|
+
# --search results
|
|
139
|
+
resp = self.get(search_url, **request_overrides)
|
|
140
|
+
resp.raise_for_status()
|
|
141
|
+
search_results: dict = resp2json(resp)['resources']['songs']
|
|
142
|
+
for song_key, search_result in search_results.items():
|
|
143
|
+
# --download results
|
|
144
|
+
if not isinstance(search_result, dict) or ('id' not in search_result):
|
|
145
|
+
continue
|
|
146
|
+
search_result['song_key'] = song_key
|
|
147
|
+
song_info = SongInfo(source=self.source)
|
|
148
|
+
# ----non-vip users
|
|
149
|
+
if not self.default_cookies or 'media-user-token' not in self.default_cookies:
|
|
150
|
+
download_result = safeextractfromdict(search_result, ['attributes', 'previews', 0], {})
|
|
151
|
+
download_url: str = download_result.get('url')
|
|
152
|
+
if not download_url: continue
|
|
153
|
+
song_info = SongInfo(
|
|
154
|
+
source=self.source, download_url=download_url, download_url_status=self.audio_link_tester.test(download_url, request_overrides),
|
|
155
|
+
ext=download_url.split('.')[-1].split('?')[0], raw_data={'search': search_result, 'download': download_result, 'lyric': {}}, lyric='NULL',
|
|
156
|
+
)
|
|
157
|
+
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(song_info.download_url, request_overrides)
|
|
158
|
+
ext, file_size = song_info.download_url_status['probe_status']['ext'], song_info.download_url_status['probe_status']['file_size']
|
|
159
|
+
if file_size and file_size != 'NULL': song_info.file_size = file_size
|
|
160
|
+
if not song_info.file_size: song_info.file_size = 'NULL'
|
|
161
|
+
if ext and ext != 'NULL': song_info.ext = ext
|
|
162
|
+
# ----vip users
|
|
163
|
+
else:
|
|
164
|
+
account_info = self._fetchaccountinfo(request_overrides=request_overrides)
|
|
165
|
+
geo = safeextractfromdict(account_info, ['meta', 'subscription', 'storefront'], 'us')
|
|
166
|
+
params = {"extend": "extendedAssetUrls", "include": "lyrics,albums"}
|
|
167
|
+
try:
|
|
168
|
+
resp = self.get(f'https://amp-api.music.apple.com/v1/catalog/{geo}/songs/{search_result["id"]}', params=params, **request_overrides)
|
|
169
|
+
resp.raise_for_status()
|
|
170
|
+
download_result = resp2json(resp=resp)
|
|
171
|
+
song_metadata = download_result['data'][0]
|
|
172
|
+
resp = self.post("https://play.itunes.apple.com/WebObjects/MZPlay.woa/wa/webPlayback", json={"salableAdamId": search_result["id"], "language": "en-US"}, **request_overrides)
|
|
173
|
+
resp.raise_for_status()
|
|
174
|
+
webplayback = resp2json(resp=resp)
|
|
175
|
+
download_result['webplayback'] = webplayback
|
|
176
|
+
except:
|
|
177
|
+
continue
|
|
178
|
+
download_item: DownloadItem = AppleMusicClientUtils.getsongdownloaditem(song_metadata=song_metadata, webplayback=webplayback, get_license_exchange_func=self._fetchlicenseexchange, request_overrides=request_overrides)
|
|
179
|
+
lyric_result, lyric = download_item.lyrics_results if download_item.lyrics_results else {}, download_item.lyrics.synced if download_item.lyrics.synced else 'NULL'
|
|
180
|
+
download_url, ext = download_item, download_item.stream_info.file_format.value
|
|
181
|
+
song_info = SongInfo(
|
|
182
|
+
source=self.source, download_url=download_url, download_url_status=self.audio_link_tester.test(download_item.stream_info.audio_track.stream_url, request_overrides),
|
|
183
|
+
ext=ext, raw_data={'search': search_result, 'download': download_result, 'lyric': lyric_result}, lyric=lyric,
|
|
184
|
+
)
|
|
185
|
+
song_info.download_url_status['probe_status'] = self.audio_link_tester.probe(download_item.stream_info.audio_track.stream_url, request_overrides)
|
|
186
|
+
file_size = song_info.download_url_status['probe_status']['file_size']
|
|
187
|
+
if file_size and file_size != 'NULL': song_info.file_size = file_size
|
|
188
|
+
if not song_info.file_size: song_info.file_size = 'NULL'
|
|
189
|
+
if not song_info.with_valid_download_url: continue
|
|
190
|
+
song_info.update(
|
|
191
|
+
duration=seconds2hms(float(safeextractfromdict(search_result, ['attributes', 'durationInMillis'], '0')) / 1000),
|
|
192
|
+
song_name=legalizestring(safeextractfromdict(search_result, ['attributes', 'name'], 'NULL'), replace_null_string='NULL'),
|
|
193
|
+
singers=legalizestring(safeextractfromdict(search_result, ['attributes', 'artistName'], 'NULL'), replace_null_string='NULL'),
|
|
194
|
+
album=legalizestring(safeextractfromdict(search_result, ['attributes', 'albumName'], 'NULL'), replace_null_string='NULL'),
|
|
195
|
+
identifier=search_result['id'],
|
|
196
|
+
)
|
|
197
|
+
# --append to song_infos
|
|
198
|
+
song_infos.append(song_info)
|
|
199
|
+
# --judgement for search_size
|
|
200
|
+
if self.strict_limit_search_size_per_page and len(song_infos) >= self.search_size_per_page: break
|
|
201
|
+
# --update progress
|
|
202
|
+
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Success)")
|
|
203
|
+
# failure
|
|
204
|
+
except Exception as err:
|
|
205
|
+
progress.update(progress_id, description=f"{self.source}.search >>> {search_url} (Error: {err})")
|
|
206
|
+
# return
|
|
207
|
+
return song_infos
|
musicdl/modules/sources/base.py
CHANGED
|
@@ -1,38 +1,266 @@
|
|
|
1
1
|
'''
|
|
2
2
|
Function:
|
|
3
|
-
|
|
3
|
+
Implementation of BaseMusicClient
|
|
4
4
|
Author:
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
7
|
Charles的皮卡丘
|
|
8
8
|
'''
|
|
9
|
+
import os
|
|
10
|
+
import copy
|
|
11
|
+
import pickle
|
|
9
12
|
import requests
|
|
10
|
-
from
|
|
13
|
+
from rich.text import Text
|
|
14
|
+
from itertools import chain
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from rich.progress import Task
|
|
17
|
+
from freeproxy import freeproxy
|
|
18
|
+
from fake_useragent import UserAgent
|
|
19
|
+
from pathvalidate import sanitize_filepath
|
|
20
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
21
|
+
from ..utils import (
|
|
22
|
+
LoggerHandle, AudioLinkTester, SongInfo, SongInfoUtils, touchdir, usedownloadheaderscookies, usesearchheaderscookies, cookies2dict, cookies2string
|
|
23
|
+
)
|
|
24
|
+
from rich.progress import (
|
|
25
|
+
Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, DownloadColumn, TransferSpeedColumn,
|
|
26
|
+
TimeRemainingColumn, MofNCompleteColumn, ProgressColumn,
|
|
27
|
+
)
|
|
11
28
|
|
|
12
29
|
|
|
13
|
-
'''
|
|
14
|
-
class
|
|
15
|
-
def __init__(self
|
|
16
|
-
self.
|
|
30
|
+
'''AudioAwareColumn'''
|
|
31
|
+
class AudioAwareColumn(ProgressColumn):
|
|
32
|
+
def __init__(self):
|
|
33
|
+
super(AudioAwareColumn, self).__init__()
|
|
34
|
+
self._download_col = DownloadColumn()
|
|
35
|
+
'''render'''
|
|
36
|
+
def render(self, task: Task):
|
|
37
|
+
kind = task.fields.get("kind", "download")
|
|
38
|
+
if kind == "overall":
|
|
39
|
+
completed = int(task.completed)
|
|
40
|
+
total = int(task.total) if task.total is not None else 0
|
|
41
|
+
return Text(f"{completed}/{total} audios")
|
|
42
|
+
else:
|
|
43
|
+
return self._download_col.render(task)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
'''BaseMusicClient'''
|
|
47
|
+
class BaseMusicClient():
|
|
48
|
+
source = 'BaseMusicClient'
|
|
49
|
+
def __init__(self, search_size_per_source: int = 5, auto_set_proxies: bool = False, random_update_ua: bool = False, max_retries: int = 5, maintain_session: bool = False,
|
|
50
|
+
logger_handle: LoggerHandle = None, disable_print: bool = False, work_dir: str = 'musicdl_outputs', proxy_sources: list = None, default_search_cookies: dict | str = None,
|
|
51
|
+
default_download_cookies: dict | str = None, search_size_per_page: int = 10, strict_limit_search_size_per_page: bool = True, quark_parser_config: dict = None):
|
|
52
|
+
# set up work dir
|
|
53
|
+
touchdir(work_dir)
|
|
54
|
+
# set attributes
|
|
55
|
+
self.search_size_per_source = search_size_per_source
|
|
56
|
+
self.auto_set_proxies = auto_set_proxies
|
|
57
|
+
self.random_update_ua = random_update_ua
|
|
58
|
+
self.max_retries = max_retries
|
|
59
|
+
self.maintain_session = maintain_session
|
|
60
|
+
self.logger_handle = logger_handle if logger_handle else LoggerHandle()
|
|
61
|
+
self.disable_print = disable_print
|
|
62
|
+
self.work_dir = work_dir
|
|
63
|
+
self.proxy_sources = proxy_sources
|
|
64
|
+
self.default_search_cookies = cookies2dict(default_search_cookies)
|
|
65
|
+
self.default_download_cookies = cookies2dict(default_download_cookies)
|
|
66
|
+
self.default_cookies = self.default_search_cookies
|
|
67
|
+
self.search_size_per_page = min(search_size_per_source, search_size_per_page)
|
|
68
|
+
self.strict_limit_search_size_per_page = strict_limit_search_size_per_page
|
|
69
|
+
self.quark_parser_config = quark_parser_config or {}
|
|
70
|
+
# init requests.Session
|
|
71
|
+
self.default_search_headers = {'User-Agent': UserAgent().random}
|
|
72
|
+
self.default_download_headers = {'User-Agent': UserAgent().random}
|
|
73
|
+
self.quark_default_download_headers = {
|
|
74
|
+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.225.400 QQBrowser/12.2.5544.400',
|
|
75
|
+
'origin': 'https://pan.quark.cn', 'referer': 'https://pan.quark.cn/', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': cookies2string(self.quark_parser_config.get('cookies', '')),
|
|
76
|
+
}
|
|
77
|
+
self.quark_default_download_cookies = {} # placeholder, useless now
|
|
78
|
+
self.default_headers = self.default_search_headers
|
|
79
|
+
self._initsession()
|
|
80
|
+
# proxied_session_client
|
|
81
|
+
self.proxied_session_client = freeproxy.ProxiedSessionClient(
|
|
82
|
+
proxy_sources=['ProxiflyProxiedSession'] if proxy_sources is None else proxy_sources,
|
|
83
|
+
disable_print=True
|
|
84
|
+
) if auto_set_proxies else None
|
|
85
|
+
'''_initsession'''
|
|
86
|
+
def _initsession(self):
|
|
17
87
|
self.session = requests.Session()
|
|
18
|
-
self.session.
|
|
19
|
-
self.
|
|
20
|
-
self.
|
|
21
|
-
'''
|
|
22
|
-
def
|
|
23
|
-
raise NotImplementedError('not be implemented
|
|
24
|
-
'''
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
88
|
+
self.session.headers = self.default_headers
|
|
89
|
+
self.audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.default_download_headers), cookies=copy.deepcopy(self.default_download_cookies))
|
|
90
|
+
self.quark_audio_link_tester = AudioLinkTester(headers=copy.deepcopy(self.quark_default_download_headers), cookies=copy.deepcopy(self.quark_default_download_cookies))
|
|
91
|
+
'''_constructsearchurls'''
|
|
92
|
+
def _constructsearchurls(self, keyword: str, rule: dict = None, request_overrides: dict = None):
|
|
93
|
+
raise NotImplementedError('not to be implemented')
|
|
94
|
+
'''_constructuniqueworkdir'''
|
|
95
|
+
def _constructuniqueworkdir(self, keyword: str):
|
|
96
|
+
time_stamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
97
|
+
work_dir = os.path.join(self.work_dir, self.source, f'{time_stamp} {keyword.replace(" ", "")}')
|
|
98
|
+
touchdir(work_dir)
|
|
99
|
+
return work_dir
|
|
100
|
+
'''_removeduplicates'''
|
|
101
|
+
def _removeduplicates(self, song_infos: list[SongInfo] = None) -> list[SongInfo]:
|
|
102
|
+
unique_song_infos, identifiers = [], set()
|
|
103
|
+
for song_info in song_infos:
|
|
104
|
+
if song_info.identifier in identifiers: continue
|
|
105
|
+
identifiers.add(song_info.identifier)
|
|
106
|
+
unique_song_infos.append(song_info)
|
|
107
|
+
return unique_song_infos
|
|
108
|
+
'''_search'''
|
|
109
|
+
@usesearchheaderscookies
|
|
110
|
+
def _search(self, keyword: str = '', search_url: str = '', request_overrides: dict = None, song_infos: list = [], progress: Progress = None, progress_id: int = 0):
|
|
111
|
+
raise NotImplementedError('not be implemented')
|
|
112
|
+
'''search'''
|
|
113
|
+
@usesearchheaderscookies
|
|
114
|
+
def search(self, keyword: str, num_threadings=5, request_overrides: dict = None, rule: dict = None):
|
|
115
|
+
# init
|
|
116
|
+
rule, request_overrides = rule or {}, request_overrides or {}
|
|
117
|
+
# logging
|
|
118
|
+
self.logger_handle.info(f'Start to search music files using {self.source}.', disable_print=self.disable_print)
|
|
119
|
+
# construct search urls
|
|
120
|
+
search_urls = self._constructsearchurls(keyword=keyword, rule=rule, request_overrides=request_overrides)
|
|
121
|
+
# multi threadings for searching music files
|
|
122
|
+
with Progress(TextColumn("{task.description}"), BarColumn(bar_width=None), MofNCompleteColumn(), TimeRemainingColumn()) as progress:
|
|
123
|
+
progress_id = progress.add_task(f"{self.source}.search >>> completed (0/{len(search_urls)})", total=len(search_urls))
|
|
124
|
+
song_infos, submitted_tasks = {}, []
|
|
125
|
+
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
|
|
126
|
+
for search_url_idx, search_url in enumerate(search_urls):
|
|
127
|
+
song_infos[str(search_url_idx)] = []
|
|
128
|
+
submitted_tasks.append(pool.submit(
|
|
129
|
+
self._search, keyword, search_url, request_overrides, song_infos[str(search_url_idx)], progress, progress_id
|
|
130
|
+
))
|
|
131
|
+
for _ in as_completed(submitted_tasks):
|
|
132
|
+
progress.advance(progress_id, 1)
|
|
133
|
+
num_searched_urls = int(progress.tasks[progress_id].completed)
|
|
134
|
+
progress.update(progress_id, description=f"{self.source}.search >>> completed ({num_searched_urls}/{len(search_urls)})")
|
|
135
|
+
song_infos = list(chain.from_iterable(song_infos.values()))
|
|
136
|
+
song_infos = self._removeduplicates(song_infos=song_infos)
|
|
137
|
+
work_dir = self._constructuniqueworkdir(keyword=keyword)
|
|
138
|
+
for song_info in song_infos: song_info.work_dir = work_dir
|
|
139
|
+
# logging
|
|
140
|
+
if len(song_infos) > 0:
|
|
141
|
+
work_dir = song_infos[0].work_dir
|
|
142
|
+
touchdir(work_dir)
|
|
143
|
+
self._savetopkl([s.todict() for s in song_infos], os.path.join(work_dir, 'search_results.pkl'))
|
|
144
|
+
else:
|
|
145
|
+
work_dir = self.work_dir
|
|
146
|
+
self.logger_handle.info(f'Finished searching music files using {self.source}. Search results have been saved to {work_dir}, valid items: {len(song_infos)}.', disable_print=self.disable_print)
|
|
147
|
+
# return
|
|
148
|
+
return song_infos
|
|
149
|
+
'''_download'''
|
|
150
|
+
@usedownloadheaderscookies
|
|
151
|
+
def _download(self, song_info: SongInfo, request_overrides: dict = None, downloaded_song_infos: list = [], progress: Progress = None, song_progress_id: int = 0):
|
|
152
|
+
request_overrides = request_overrides or {}
|
|
153
|
+
try:
|
|
154
|
+
touchdir(song_info.work_dir)
|
|
155
|
+
if song_info.default_download_headers: request_overrides['headers'] = song_info.default_download_headers
|
|
156
|
+
with self.get(song_info.download_url, stream=True, **request_overrides) as resp:
|
|
157
|
+
resp.raise_for_status()
|
|
158
|
+
total_size, chunk_size, downloaded_size = int(resp.headers.get('content-length', 0)), song_info.get('chunk_size', 1024), 0
|
|
159
|
+
progress.update(song_progress_id, total=total_size)
|
|
160
|
+
with open(song_info.save_path, "wb") as fp:
|
|
161
|
+
for chunk in resp.iter_content(chunk_size=chunk_size):
|
|
162
|
+
if not chunk: continue
|
|
163
|
+
fp.write(chunk)
|
|
164
|
+
downloaded_size = downloaded_size + len(chunk)
|
|
165
|
+
if total_size > 0:
|
|
166
|
+
downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, total_size / 1024 / 1024)
|
|
167
|
+
else:
|
|
168
|
+
progress.update(song_progress_id, total=downloaded_size)
|
|
169
|
+
downloading_text = "%0.2fMB/%0.2fMB" % (downloaded_size / 1024 / 1024, downloaded_size / 1024 / 1024)
|
|
170
|
+
progress.advance(song_progress_id, len(chunk))
|
|
171
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Downloading: {downloading_text})")
|
|
172
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Success)")
|
|
173
|
+
downloaded_song_infos.append(SongInfoUtils.fillsongtechinfo(copy.deepcopy(song_info), logger_handle=self.logger_handle, disable_print=self.disable_print))
|
|
174
|
+
except Exception as err:
|
|
175
|
+
progress.update(song_progress_id, description=f"{self.source}.download >>> {song_info.song_name} (Error: {err})")
|
|
176
|
+
return downloaded_song_infos
|
|
177
|
+
'''download'''
|
|
178
|
+
@usedownloadheaderscookies
|
|
179
|
+
def download(self, song_infos: list[SongInfo], num_threadings=5, request_overrides: dict = None):
|
|
180
|
+
# init
|
|
181
|
+
request_overrides = request_overrides or {}
|
|
182
|
+
# logging
|
|
183
|
+
self.logger_handle.info(f'Start to download music files using {self.source}.', disable_print=self.disable_print)
|
|
184
|
+
# multi threadings for downloading music files
|
|
185
|
+
columns = [
|
|
186
|
+
SpinnerColumn(), TextColumn("{task.description}"), BarColumn(bar_width=None), TaskProgressColumn(),
|
|
187
|
+
AudioAwareColumn(), TransferSpeedColumn(), TimeRemainingColumn(),
|
|
188
|
+
]
|
|
189
|
+
with Progress(*columns, refresh_per_second=20, expand=True) as progress:
|
|
190
|
+
songs_progress_id = progress.add_task(f"{self.source}.download >>> completed (0/{len(song_infos)})", total=len(song_infos), kind='overall')
|
|
191
|
+
song_progress_ids, downloaded_song_infos, submitted_tasks = [], [], []
|
|
192
|
+
for _, song_info in enumerate(song_infos):
|
|
193
|
+
desc = f"{self.source}.download >>> {song_info.song_name} (Preparing)"
|
|
194
|
+
song_progress_ids.append(progress.add_task(desc, total=None, kind='download'))
|
|
195
|
+
with ThreadPoolExecutor(max_workers=num_threadings) as pool:
|
|
196
|
+
for song_progress_id, song_info in zip(song_progress_ids, song_infos):
|
|
197
|
+
submitted_tasks.append(pool.submit(
|
|
198
|
+
self._download, song_info, request_overrides, downloaded_song_infos, progress, song_progress_id
|
|
199
|
+
))
|
|
200
|
+
for _ in as_completed(submitted_tasks):
|
|
201
|
+
progress.advance(songs_progress_id, 1)
|
|
202
|
+
num_downloaded_songs = int(progress.tasks[songs_progress_id].completed)
|
|
203
|
+
progress.update(songs_progress_id, description=f"{self.source}.download >>> completed ({num_downloaded_songs}/{len(song_infos)})")
|
|
204
|
+
# logging
|
|
205
|
+
if len(downloaded_song_infos) > 0:
|
|
206
|
+
work_dir = downloaded_song_infos[0]['work_dir']
|
|
207
|
+
touchdir(work_dir)
|
|
208
|
+
self._savetopkl([s.todict() for s in downloaded_song_infos], os.path.join(work_dir, 'download_results.pkl'))
|
|
209
|
+
else:
|
|
210
|
+
work_dir = self.work_dir
|
|
211
|
+
self.logger_handle.info(f'Finished downloading music files using {self.source}. Download results have been saved to {work_dir}, valid downloads: {len(downloaded_song_infos)}.', disable_print=self.disable_print)
|
|
212
|
+
# return
|
|
213
|
+
return downloaded_song_infos
|
|
214
|
+
'''get'''
|
|
215
|
+
def get(self, url, **kwargs):
|
|
216
|
+
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
|
|
217
|
+
resp = None
|
|
218
|
+
for _ in range(self.max_retries):
|
|
219
|
+
if not self.maintain_session:
|
|
220
|
+
self._initsession()
|
|
221
|
+
if self.random_update_ua: self.session.headers.update({'User-Agent': UserAgent().random})
|
|
222
|
+
if self.auto_set_proxies:
|
|
223
|
+
try:
|
|
224
|
+
self.session.proxies = self.proxied_session_client.getrandomproxy()
|
|
225
|
+
except Exception as err:
|
|
226
|
+
self.logger_handle.error(f'{self.source}.get >>> {url} (Error: {err})', disable_print=self.disable_print)
|
|
227
|
+
self.session.proxies = {}
|
|
228
|
+
else:
|
|
229
|
+
self.session.proxies = {}
|
|
230
|
+
try:
|
|
231
|
+
resp = self.session.get(url, **kwargs)
|
|
232
|
+
except Exception as err:
|
|
233
|
+
self.logger_handle.error(f'{self.source}.get >>> {url} (Error: {err})', disable_print=self.disable_print)
|
|
234
|
+
continue
|
|
235
|
+
if resp.status_code != 200: continue
|
|
236
|
+
return resp
|
|
237
|
+
return resp
|
|
238
|
+
'''post'''
|
|
239
|
+
def post(self, url, **kwargs):
|
|
240
|
+
if 'cookies' not in kwargs: kwargs['cookies'] = self.default_cookies
|
|
241
|
+
resp = None
|
|
242
|
+
for _ in range(self.max_retries):
|
|
243
|
+
if not self.maintain_session:
|
|
244
|
+
self._initsession()
|
|
245
|
+
if self.random_update_ua: self.session.headers.update({'User-Agent': UserAgent().random})
|
|
246
|
+
if self.auto_set_proxies:
|
|
247
|
+
try:
|
|
248
|
+
self.session.proxies = self.proxied_session_client.getrandomproxy()
|
|
249
|
+
except Exception as err:
|
|
250
|
+
self.logger_handle.error(f'{self.source}.post >>> {url} (Error: {err})', disable_print=self.disable_print)
|
|
251
|
+
self.session.proxies = {}
|
|
31
252
|
else:
|
|
32
|
-
self.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
253
|
+
self.session.proxies = {}
|
|
254
|
+
try:
|
|
255
|
+
resp = self.session.post(url, **kwargs)
|
|
256
|
+
except Exception as err:
|
|
257
|
+
self.logger_handle.error(f'{self.source}.post >>> {url} (Error: {err})', disable_print=self.disable_print)
|
|
258
|
+
continue
|
|
259
|
+
if resp.status_code != 200: continue
|
|
260
|
+
return resp
|
|
261
|
+
return resp
|
|
262
|
+
'''_savetopkl'''
|
|
263
|
+
def _savetopkl(self, data, file_path, auto_sanitize=True):
|
|
264
|
+
if auto_sanitize: file_path = sanitize_filepath(file_path)
|
|
265
|
+
with open(file_path, 'wb') as fp:
|
|
266
|
+
pickle.dump(data, fp)
|