parsehub 2.0.8__tar.gz → 2.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {parsehub-2.0.8/src/parsehub.egg-info → parsehub-2.0.10}/PKG-INFO +1 -1
  2. {parsehub-2.0.8 → parsehub-2.0.10}/pyproject.toml +1 -1
  3. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/ytdlp.py +42 -13
  4. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/coolapk.py +11 -2
  5. {parsehub-2.0.8 → parsehub-2.0.10/src/parsehub.egg-info}/PKG-INFO +1 -1
  6. {parsehub-2.0.8 → parsehub-2.0.10}/LICENSE +0 -0
  7. {parsehub-2.0.8 → parsehub-2.0.10}/README.md +0 -0
  8. {parsehub-2.0.8 → parsehub-2.0.10}/setup.cfg +0 -0
  9. {parsehub-2.0.8 → parsehub-2.0.10}/src/__init__.py +0 -0
  10. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/__init__.py +0 -0
  11. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/config/__init__.py +0 -0
  12. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/config/config.py +0 -0
  13. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/errors.py +0 -0
  14. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/__init__.py +0 -0
  15. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/__init__.py +0 -0
  16. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/base.py +0 -0
  17. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/__init__.py +0 -0
  18. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/bilibili.py +0 -0
  19. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/douyin.py +0 -0
  20. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/facebook.py +0 -0
  21. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/instagram.py +0 -0
  22. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  23. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/pipix.py +0 -0
  24. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/threads.py +0 -0
  25. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/tieba.py +0 -0
  26. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/twitter.py +0 -0
  27. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/weibo.py +0 -0
  28. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/weixin.py +0 -0
  29. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/xhs.py +0 -0
  30. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
  31. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/youtube.py +0 -0
  32. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  33. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/__init__.py +0 -0
  34. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/bilibili.py +0 -0
  35. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/coolapk.py +0 -0
  36. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/instagram.py +0 -0
  37. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/kuaishou.py +0 -0
  38. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/pipix.py +0 -0
  39. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/threads.py +0 -0
  40. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/tieba.py +0 -0
  41. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/twitter.py +0 -0
  42. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/weibo.py +0 -0
  43. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/weixin.py +0 -0
  44. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/xhs.py +0 -0
  45. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/xiaoheihe.py +0 -0
  46. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/zuiyou.py +0 -0
  47. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/__init__.py +0 -0
  48. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/callback.py +0 -0
  49. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/media_file.py +0 -0
  50. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/media_ref.py +0 -0
  51. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/platform.py +0 -0
  52. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/post.py +0 -0
  53. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/result.py +0 -0
  54. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/downloader.py +0 -0
  55. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/media_info.py +0 -0
  56. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/utils.py +0 -0
  57. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/SOURCES.txt +0 -0
  58. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/dependency_links.txt +0 -0
  59. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/requires.txt +0 -0
  60. {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.8
3
+ Version: 2.0.10
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.8"
3
+ version = "2.0.10"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- from concurrent.futures import ProcessPoolExecutor
3
2
  from dataclasses import dataclass
4
3
  from pathlib import Path
5
4
  from typing import Union
@@ -17,8 +16,6 @@ from ...types import (
17
16
  )
18
17
  from .base import BaseParser
19
18
 
20
- EXC = ProcessPoolExecutor()
21
-
22
19
 
23
20
  def download_video(yto_params: dict, urls: list[str]) -> None:
24
21
  """在独立进程中下载视频"""
@@ -30,6 +27,15 @@ def download_video(yto_params: dict, urls: list[str]) -> None:
30
27
  raise RuntimeError(error_msg) from None
31
28
 
32
29
 
30
+ def progress_hook(d: dict):
31
+ if d["status"] == "downloading":
32
+ downloaded = d.get("downloaded_bytes", 0)
33
+ total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0
34
+ print(f"{downloaded}/{total}")
35
+ elif d["status"] == "finished":
36
+ print("下载完成,准备处理文件")
37
+
38
+
33
39
  class YtParser(BaseParser, register=False):
34
40
  """yt-dlp解析器"""
35
41
 
@@ -103,6 +109,7 @@ class YtParser(BaseParser, register=False):
103
109
  # }
104
110
  # ],
105
111
  "playlist_items": "1", # 分p列表默认解析第一个
112
+ # "progress_hooks": [progress_hook],
106
113
  }
107
114
  return params
108
115
 
@@ -143,17 +150,40 @@ class YtVideoParseResult(VideoParseResult):
143
150
  # paramss["format"] = "worstvideo* + worstaudio / worst"
144
151
 
145
152
  if callback:
146
- await callback(0, 1, "count", *callback_args, **callback_kwargs)
147
-
148
- await self.__download(paramss)
153
+ loop = asyncio.get_running_loop()
154
+ progress_mode = "bytes"
155
+
156
+ def _progress_hook(d):
157
+ nonlocal progress_mode
158
+ if d["status"] == "downloading":
159
+ # 已知问题: yt-dlp 返回的总进度不统一
160
+ downloaded = int(d.get("downloaded_bytes", 0))
161
+ total = int(d.get("total_bytes") or d.get("total_bytes_estimate") or 0)
162
+ if total and progress_mode == "bytes":
163
+ asyncio.run_coroutine_threadsafe(
164
+ callback(downloaded, total, "bytes", *callback_args, **callback_kwargs),
165
+ loop,
166
+ )
167
+ else:
168
+ progress_mode = "count"
169
+ asyncio.run_coroutine_threadsafe(
170
+ callback(0, 1, "count", *callback_args, **callback_kwargs),
171
+ loop,
172
+ )
173
+ elif d["status"] == "finished" and progress_mode == "count":
174
+ asyncio.run_coroutine_threadsafe(
175
+ callback(1, 1, "count", *callback_args, **callback_kwargs),
176
+ loop,
177
+ )
178
+
179
+ paramss["progress_hooks"] = [_progress_hook]
180
+
181
+ await self._run_download(paramss)
149
182
 
150
183
  v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
151
184
  if not v:
152
185
  raise DownloadError("下载失败 -1")
153
186
 
154
- if callback:
155
- await callback(1, 1, "count", *callback_args, **callback_kwargs)
156
-
157
187
  video_path = v[0]
158
188
  return DownloadResult(
159
189
  VideoFile(
@@ -165,14 +195,13 @@ class YtVideoParseResult(VideoParseResult):
165
195
  output_dir,
166
196
  )
167
197
 
168
- async def __download(self, paramss: dict, count: int = 0) -> None:
198
+ async def _run_download(self, paramss: dict, count: int = 0) -> None:
169
199
  if count > 2:
170
200
  raise DownloadError("下载失败 -2")
171
201
 
172
- loop = asyncio.get_running_loop()
173
202
  try:
174
203
  await asyncio.wait_for(
175
- loop.run_in_executor(EXC, download_video, paramss, [self.dl.url]),
204
+ asyncio.to_thread(download_video, paramss, [self.dl.url]),
176
205
  timeout=300,
177
206
  )
178
207
  except TimeoutError as e:
@@ -187,7 +216,7 @@ class YtVideoParseResult(VideoParseResult):
187
216
  )
188
217
  ):
189
218
  paramss.pop("writeautomaticsub", None)
190
- await self.__download(paramss, count + 1)
219
+ await self._run_download(paramss, count + 1)
191
220
 
192
221
  except Exception as e:
193
222
  raise DownloadError(f"下载失败: {str(e)}") from e
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from pathlib import Path
2
3
  from typing import Union
3
4
 
@@ -37,18 +38,26 @@ class CoolapkParser(BaseParser):
37
38
  media=media,
38
39
  markdown_content=coolapk.markdown_content,
39
40
  )
41
+ content = self.hashtag_handler(coolapk.text_content)
40
42
  if any(isinstance(m, AniRef) for m in media):
41
43
  return CoolapkMultimediaParseResult(
42
44
  title=coolapk.title,
43
45
  media=media,
44
- content=coolapk.text_content,
46
+ content=content,
45
47
  )
46
48
  return CoolapkImageParseResult(
47
49
  title=coolapk.title,
48
50
  photo=media,
49
- content=coolapk.text_content,
51
+ content=content,
50
52
  )
51
53
 
54
+ @staticmethod
55
+ def hashtag_handler(desc: str):
56
+ hashtags = re.findall(r" ?#[^#]+# ?", desc)
57
+ for hashtag in hashtags:
58
+ desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
59
+ return desc
60
+
52
61
 
53
62
  class CoolapkParseResult(ParseResult):
54
63
  async def _do_download(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.8
3
+ Version: 2.0.10
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
File without changes
File without changes
File without changes
File without changes