parsehub 2.0.21__tar.gz → 2.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {parsehub-2.0.21/src/parsehub.egg-info → parsehub-2.0.22}/PKG-INFO +1 -1
  2. {parsehub-2.0.21 → parsehub-2.0.22}/pyproject.toml +1 -1
  3. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/ytdlp.py +85 -49
  4. {parsehub-2.0.21 → parsehub-2.0.22/src/parsehub.egg-info}/PKG-INFO +1 -1
  5. {parsehub-2.0.21 → parsehub-2.0.22}/LICENSE +0 -0
  6. {parsehub-2.0.21 → parsehub-2.0.22}/README.md +0 -0
  7. {parsehub-2.0.21 → parsehub-2.0.22}/setup.cfg +0 -0
  8. {parsehub-2.0.21 → parsehub-2.0.22}/src/__init__.py +0 -0
  9. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/__init__.py +0 -0
  10. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/cli.py +0 -0
  11. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/cli_config.py +0 -0
  12. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/config/__init__.py +0 -0
  13. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/config/config.py +0 -0
  14. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/errors.py +0 -0
  15. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/__init__.py +0 -0
  16. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/__init__.py +0 -0
  17. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/base/base.py +0 -0
  18. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/__init__.py +0 -0
  19. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/bilibili.py +0 -0
  20. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/coolapk.py +0 -0
  21. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/douyin.py +0 -0
  22. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/facebook.py +0 -0
  23. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/instagram.py +0 -0
  24. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  25. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/pipix.py +0 -0
  26. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/threads.py +0 -0
  27. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/tieba.py +0 -0
  28. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/tiktok.py +0 -0
  29. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/twitter.py +0 -0
  30. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/weibo.py +0 -0
  31. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/weixin.py +0 -0
  32. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/xhs.py +0 -0
  33. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
  34. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/youtube.py +0 -0
  35. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  36. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/__init__.py +0 -0
  37. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/bilibili.py +0 -0
  38. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/coolapk.py +0 -0
  39. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/douyin.py +0 -0
  40. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/instagram.py +0 -0
  41. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/kuaishou.py +0 -0
  42. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/pipix.py +0 -0
  43. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/threads.py +0 -0
  44. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/tieba.py +0 -0
  45. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/tiktok.py +0 -0
  46. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/twitter.py +0 -0
  47. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/weibo.py +0 -0
  48. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/weixin.py +0 -0
  49. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/xhs.py +0 -0
  50. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/xiaoheihe.py +0 -0
  51. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/provider_api/zuiyou.py +0 -0
  52. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/__init__.py +0 -0
  53. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/callback.py +0 -0
  54. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/media_file.py +0 -0
  55. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/media_ref.py +0 -0
  56. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/platform.py +0 -0
  57. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/post.py +0 -0
  58. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/types/result.py +0 -0
  59. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/downloader.py +0 -0
  60. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/media_info.py +0 -0
  61. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub/utils/utils.py +0 -0
  62. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/SOURCES.txt +0 -0
  63. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/dependency_links.txt +0 -0
  64. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/entry_points.txt +0 -0
  65. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/requires.txt +0 -0
  66. {parsehub-2.0.21 → parsehub-2.0.22}/src/parsehub.egg-info/top_level.txt +0 -0
  67. {parsehub-2.0.21 → parsehub-2.0.22}/test/test_cli.py +0 -0
  68. {parsehub-2.0.21 → parsehub-2.0.22}/test/test_cli_config.py +0 -0
  69. {parsehub-2.0.21 → parsehub-2.0.22}/test/test_core_offline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.21
3
+ Version: 2.0.22
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.21"
3
+ version = "2.0.22"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -17,23 +17,75 @@ from ...types import (
17
17
  from .base import BaseParser
18
18
 
19
19
 
20
- def download_video(yto_params: dict, urls: list[str]) -> None:
21
- """在独立进程中下载视频"""
20
+ def switch_ytdlp_proxy(ydl: YoutubeDL, proxy: str | None) -> None:
21
+ """切换同一个 YoutubeDL 实例后续请求使用的代理。"""
22
+ ydl.params["proxy"] = proxy or ""
23
+
24
+ # proxies 是 cached_property,必须清掉,否则仍会使用解析阶段的 proxy map
25
+ ydl.__dict__.pop("proxies", None)
26
+
27
+ # _request_director 也是 cached_property,内部 handler 初始化时已经绑定旧 proxies
28
+ director = ydl.__dict__.pop("_request_director", None)
29
+ if director is not None:
30
+ director.close()
31
+
32
+
33
+ def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None:
34
+ """在独立线程中下载视频"""
22
35
  try:
23
36
  with YoutubeDL(yto_params) as ydl:
24
- return ydl.download(urls)
37
+ info = ydl.extract_info(url, download=False)
38
+ switch_ytdlp_proxy(ydl, proxy)
39
+ ydl.process_ie_result(info, download=True)
25
40
  except Exception as e:
26
41
  error_msg = f"{type(e).__name__}: {str(e)}"
27
42
  raise RuntimeError(error_msg) from None
28
43
 
29
44
 
30
- def progress_hook(d: dict):
31
- if d["status"] == "downloading":
32
- downloaded = d.get("downloaded_bytes", 0)
45
+ class MonotonicDownloadProgress:
46
+ def __init__(self, emit, *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1):
47
+ self.emit = emit
48
+ self.start = start
49
+ self.end = end
50
+ self.min_step = min_step
51
+ self.current = start
52
+
53
+ def __call__(self, d: dict):
54
+ status = d.get("status")
55
+
56
+ if status == "downloading":
57
+ percent = self._download_percent(d)
58
+ if percent is None:
59
+ return
60
+
61
+ mapped = self.start + percent * (self.end - self.start) / 100
62
+
63
+ if mapped >= self.current + self.min_step:
64
+ self.current = mapped
65
+ self.emit(round(self.current, 1))
66
+
67
+ elif status == "finished":
68
+ if self.current < self.end:
69
+ self.current = self.end
70
+ self.emit(round(self.current, 1))
71
+
72
+ @staticmethod
73
+ def _download_percent(d: dict) -> float | None:
74
+ downloaded = d.get("downloaded_bytes") or 0
33
75
  total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0
34
- print(f"{downloaded}/{total}")
35
- elif d["status"] == "finished":
36
- print("下载完成,准备处理文件")
76
+ if downloaded == total == 1024:
77
+ return None
78
+
79
+ if total > 0:
80
+ return min(downloaded / total * 100, 100)
81
+
82
+ # 分片下载有时没有稳定总大小,但有 frag 进度;作为兜底
83
+ frag_index = d.get("fragment_index")
84
+ frag_count = d.get("fragment_count")
85
+ if frag_index is not None and frag_count:
86
+ return min(frag_index / frag_count * 100, 100)
87
+
88
+ return None
37
89
 
38
90
 
39
91
  class YtParser(BaseParser, register=False):
@@ -81,6 +133,7 @@ class YtParser(BaseParser, register=False):
81
133
  width=width,
82
134
  height=height,
83
135
  paramss=self.params,
136
+ proxy=self.proxy,
84
137
  )
85
138
 
86
139
  def _extract_info(self, url):
@@ -109,7 +162,6 @@ class YtParser(BaseParser, register=False):
109
162
  # }
110
163
  # ],
111
164
  "playlist_items": "1", # 分p列表默认解析第一个
112
- # "progress_hooks": [progress_hook],
113
165
  }
114
166
  return params
115
167
 
@@ -140,52 +192,34 @@ class YtVideoParseResult(VideoParseResult):
140
192
  callback_kwargs = {}
141
193
 
142
194
  paramss = self.dl.paramss.copy()
143
- if proxy:
144
- paramss["proxy"] = proxy
195
+ if self.dl.proxy:
196
+ paramss["proxy"] = self.dl.proxy
145
197
 
146
198
  paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
147
199
 
148
- # if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
149
- # # 视频超过限制时长,获取最低画质
150
- # paramss["format"] = "worstvideo* + worstaudio / worst"
151
-
152
200
  if callback:
153
- # 已知问题: yt-dlp 返回的总进度不统一, 暂使用 count 进度
154
- await callback(0, 1, "count", *callback_args, **callback_kwargs)
155
- # loop = asyncio.get_running_loop()
156
- # progress_mode = "bytes"
157
- # def _progress_hook(d):
158
- # nonlocal progress_mode
159
- # if d["status"] == "downloading":
160
- # downloaded = int(d.get("downloaded_bytes", 0))
161
- # total = int(d.get("total_bytes") or d.get("total_bytes_estimate") or 0)
162
- # if total and progress_mode == "bytes":
163
- # asyncio.run_coroutine_threadsafe(
164
- # callback(downloaded, total, "bytes", *callback_args, **callback_kwargs),
165
- # loop,
166
- # )
167
- # else:
168
- # progress_mode = "count"
169
- # asyncio.run_coroutine_threadsafe(
170
- # callback(0, 1, "count", *callback_args, **callback_kwargs),
171
- # loop,
172
- # )
173
- # elif d["status"] == "finished" and progress_mode == "count":
174
- # asyncio.run_coroutine_threadsafe(
175
- # callback(1, 1, "count", *callback_args, **callback_kwargs),
176
- # loop,
177
- # )
178
- #
179
- # paramss["progress_hooks"] = [_progress_hook]
180
-
181
- await self._run_download(paramss)
201
+ loop = asyncio.get_running_loop()
202
+
203
+ def _callback(count: float):
204
+ asyncio.run_coroutine_threadsafe(
205
+ callback(int(count), 100, "bytes", *callback_args, **callback_kwargs), loop
206
+ )
207
+
208
+ progress = MonotonicDownloadProgress(
209
+ _callback,
210
+ start=0,
211
+ end=99,
212
+ )
213
+ paramss["progress_hooks"] = [progress]
214
+
215
+ await self._run_download(paramss, proxy=proxy)
182
216
 
183
217
  v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
184
218
  if not v:
185
219
  raise DownloadError("下载失败 -1")
186
220
 
187
221
  if callback:
188
- await callback(1, 1, "count", *callback_args, **callback_kwargs)
222
+ await callback(100, 100, "bytes", *callback_args, **callback_kwargs)
189
223
 
190
224
  video_path = v[0]
191
225
  return DownloadResult(
@@ -198,12 +232,12 @@ class YtVideoParseResult(VideoParseResult):
198
232
  output_dir,
199
233
  )
200
234
 
201
- async def _run_download(self, paramss: dict, count: int = 0) -> None:
235
+ async def _run_download(self, paramss: dict, count: int = 0, *, proxy: str | None = None) -> None:
202
236
  if count > 2:
203
237
  raise DownloadError("下载失败 -2")
204
238
 
205
239
  try:
206
- await asyncio.to_thread(download_video, paramss, [self.dl.url])
240
+ await asyncio.to_thread(download_video, paramss, self.dl.url, proxy=proxy)
207
241
  except RuntimeError as e:
208
242
  error = str(e)
209
243
  if any(
@@ -214,7 +248,7 @@ class YtVideoParseResult(VideoParseResult):
214
248
  )
215
249
  ):
216
250
  paramss.pop("writeautomaticsub", None)
217
- await self._run_download(paramss, count + 1)
251
+ await self._run_download(paramss, count + 1, proxy=proxy)
218
252
 
219
253
  except Exception as e:
220
254
  raise DownloadError(f"下载失败: {str(e)}") from e
@@ -234,3 +268,5 @@ class YtVideoInfo:
234
268
  duration: int = 0
235
269
  width: int = 0
236
270
  height: int = 0
271
+ proxy: str | None = None
272
+ """解析时用的代理"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.21
3
+ Version: 2.0.22
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes