parsehub 2.0.21__tar.gz → 2.0.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.21/src/parsehub.egg-info → parsehub-2.0.22}/PKG-INFO +1 -1
- {parsehub-2.0.21 → parsehub-2.0.22}/pyproject.toml +1 -1
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/ytdlp.py +85 -49
- {parsehub-2.0.21 → parsehub-2.0.22/src/parsehub.egg-info}/PKG-INFO +1 -1
- {parsehub-2.0.21 → parsehub-2.0.22}/LICENSE +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/README.md +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/setup.cfg +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/cli.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/cli_config.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/config/config.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/base.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/bilibili.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/coolapk.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/douyin.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/instagram.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/kuaishou.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/pipix.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/threads.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/tieba.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/tiktok.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/twitter.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/weibo.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/xhs.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/youtube.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/bilibili.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/douyin.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/tieba.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/tiktok.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/twitter.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/xiaoheihe.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/__init__.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/media_file.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/platform.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/post.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/result.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/downloader.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/utils.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/SOURCES.txt +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/entry_points.txt +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/top_level.txt +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/test/test_cli.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/test/test_cli_config.py +0 -0
- {parsehub-2.0.21 → parsehub-2.0.22}/test/test_core_offline.py +0 -0
|
@@ -17,23 +17,75 @@ from ...types import (
|
|
|
17
17
|
from .base import BaseParser
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def
|
|
21
|
-
"""
|
|
20
|
+
def switch_ytdlp_proxy(ydl: YoutubeDL, proxy: str | None) -> None:
|
|
21
|
+
"""切换同一个 YoutubeDL 实例后续请求使用的代理。"""
|
|
22
|
+
ydl.params["proxy"] = proxy or ""
|
|
23
|
+
|
|
24
|
+
# proxies 是 cached_property,必须清掉,否则仍会使用解析阶段的 proxy map
|
|
25
|
+
ydl.__dict__.pop("proxies", None)
|
|
26
|
+
|
|
27
|
+
# _request_director 也是 cached_property,内部 handler 初始化时已经绑定旧 proxies
|
|
28
|
+
director = ydl.__dict__.pop("_request_director", None)
|
|
29
|
+
if director is not None:
|
|
30
|
+
director.close()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None:
|
|
34
|
+
"""在独立线程中下载视频"""
|
|
22
35
|
try:
|
|
23
36
|
with YoutubeDL(yto_params) as ydl:
|
|
24
|
-
|
|
37
|
+
info = ydl.extract_info(url, download=False)
|
|
38
|
+
switch_ytdlp_proxy(ydl, proxy)
|
|
39
|
+
ydl.process_ie_result(info, download=True)
|
|
25
40
|
except Exception as e:
|
|
26
41
|
error_msg = f"{type(e).__name__}: {str(e)}"
|
|
27
42
|
raise RuntimeError(error_msg) from None
|
|
28
43
|
|
|
29
44
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
45
|
+
class MonotonicDownloadProgress:
|
|
46
|
+
def __init__(self, emit, *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1):
|
|
47
|
+
self.emit = emit
|
|
48
|
+
self.start = start
|
|
49
|
+
self.end = end
|
|
50
|
+
self.min_step = min_step
|
|
51
|
+
self.current = start
|
|
52
|
+
|
|
53
|
+
def __call__(self, d: dict):
|
|
54
|
+
status = d.get("status")
|
|
55
|
+
|
|
56
|
+
if status == "downloading":
|
|
57
|
+
percent = self._download_percent(d)
|
|
58
|
+
if percent is None:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
mapped = self.start + percent * (self.end - self.start) / 100
|
|
62
|
+
|
|
63
|
+
if mapped >= self.current + self.min_step:
|
|
64
|
+
self.current = mapped
|
|
65
|
+
self.emit(round(self.current, 1))
|
|
66
|
+
|
|
67
|
+
elif status == "finished":
|
|
68
|
+
if self.current < self.end:
|
|
69
|
+
self.current = self.end
|
|
70
|
+
self.emit(round(self.current, 1))
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def _download_percent(d: dict) -> float | None:
|
|
74
|
+
downloaded = d.get("downloaded_bytes") or 0
|
|
33
75
|
total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
76
|
+
if downloaded == total == 1024:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
if total > 0:
|
|
80
|
+
return min(downloaded / total * 100, 100)
|
|
81
|
+
|
|
82
|
+
# 分片下载有时没有稳定总大小,但有 frag 进度;作为兜底
|
|
83
|
+
frag_index = d.get("fragment_index")
|
|
84
|
+
frag_count = d.get("fragment_count")
|
|
85
|
+
if frag_index is not None and frag_count:
|
|
86
|
+
return min(frag_index / frag_count * 100, 100)
|
|
87
|
+
|
|
88
|
+
return None
|
|
37
89
|
|
|
38
90
|
|
|
39
91
|
class YtParser(BaseParser, register=False):
|
|
@@ -81,6 +133,7 @@ class YtParser(BaseParser, register=False):
|
|
|
81
133
|
width=width,
|
|
82
134
|
height=height,
|
|
83
135
|
paramss=self.params,
|
|
136
|
+
proxy=self.proxy,
|
|
84
137
|
)
|
|
85
138
|
|
|
86
139
|
def _extract_info(self, url):
|
|
@@ -109,7 +162,6 @@ class YtParser(BaseParser, register=False):
|
|
|
109
162
|
# }
|
|
110
163
|
# ],
|
|
111
164
|
"playlist_items": "1", # 分p列表默认解析第一个
|
|
112
|
-
# "progress_hooks": [progress_hook],
|
|
113
165
|
}
|
|
114
166
|
return params
|
|
115
167
|
|
|
@@ -140,52 +192,34 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
140
192
|
callback_kwargs = {}
|
|
141
193
|
|
|
142
194
|
paramss = self.dl.paramss.copy()
|
|
143
|
-
if proxy:
|
|
144
|
-
paramss["proxy"] = proxy
|
|
195
|
+
if self.dl.proxy:
|
|
196
|
+
paramss["proxy"] = self.dl.proxy
|
|
145
197
|
|
|
146
198
|
paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
|
|
147
199
|
|
|
148
|
-
# if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
|
|
149
|
-
# # 视频超过限制时长,获取最低画质
|
|
150
|
-
# paramss["format"] = "worstvideo* + worstaudio / worst"
|
|
151
|
-
|
|
152
200
|
if callback:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
# progress_mode = "count"
|
|
169
|
-
# asyncio.run_coroutine_threadsafe(
|
|
170
|
-
# callback(0, 1, "count", *callback_args, **callback_kwargs),
|
|
171
|
-
# loop,
|
|
172
|
-
# )
|
|
173
|
-
# elif d["status"] == "finished" and progress_mode == "count":
|
|
174
|
-
# asyncio.run_coroutine_threadsafe(
|
|
175
|
-
# callback(1, 1, "count", *callback_args, **callback_kwargs),
|
|
176
|
-
# loop,
|
|
177
|
-
# )
|
|
178
|
-
#
|
|
179
|
-
# paramss["progress_hooks"] = [_progress_hook]
|
|
180
|
-
|
|
181
|
-
await self._run_download(paramss)
|
|
201
|
+
loop = asyncio.get_running_loop()
|
|
202
|
+
|
|
203
|
+
def _callback(count: float):
|
|
204
|
+
asyncio.run_coroutine_threadsafe(
|
|
205
|
+
callback(int(count), 100, "bytes", *callback_args, **callback_kwargs), loop
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
progress = MonotonicDownloadProgress(
|
|
209
|
+
_callback,
|
|
210
|
+
start=0,
|
|
211
|
+
end=99,
|
|
212
|
+
)
|
|
213
|
+
paramss["progress_hooks"] = [progress]
|
|
214
|
+
|
|
215
|
+
await self._run_download(paramss, proxy=proxy)
|
|
182
216
|
|
|
183
217
|
v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
|
|
184
218
|
if not v:
|
|
185
219
|
raise DownloadError("下载失败 -1")
|
|
186
220
|
|
|
187
221
|
if callback:
|
|
188
|
-
await callback(
|
|
222
|
+
await callback(100, 100, "bytes", *callback_args, **callback_kwargs)
|
|
189
223
|
|
|
190
224
|
video_path = v[0]
|
|
191
225
|
return DownloadResult(
|
|
@@ -198,12 +232,12 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
198
232
|
output_dir,
|
|
199
233
|
)
|
|
200
234
|
|
|
201
|
-
async def _run_download(self, paramss: dict, count: int = 0) -> None:
|
|
235
|
+
async def _run_download(self, paramss: dict, count: int = 0, *, proxy: str | None = None) -> None:
|
|
202
236
|
if count > 2:
|
|
203
237
|
raise DownloadError("下载失败 -2")
|
|
204
238
|
|
|
205
239
|
try:
|
|
206
|
-
await asyncio.to_thread(download_video, paramss,
|
|
240
|
+
await asyncio.to_thread(download_video, paramss, self.dl.url, proxy=proxy)
|
|
207
241
|
except RuntimeError as e:
|
|
208
242
|
error = str(e)
|
|
209
243
|
if any(
|
|
@@ -214,7 +248,7 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
214
248
|
)
|
|
215
249
|
):
|
|
216
250
|
paramss.pop("writeautomaticsub", None)
|
|
217
|
-
await self._run_download(paramss, count + 1)
|
|
251
|
+
await self._run_download(paramss, count + 1, proxy=proxy)
|
|
218
252
|
|
|
219
253
|
except Exception as e:
|
|
220
254
|
raise DownloadError(f"下载失败: {str(e)}") from e
|
|
@@ -234,3 +268,5 @@ class YtVideoInfo:
|
|
|
234
268
|
duration: int = 0
|
|
235
269
|
width: int = 0
|
|
236
270
|
height: int = 0
|
|
271
|
+
proxy: str | None = None
|
|
272
|
+
"""解析时用的代理"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|