parsehub 2.0.23__tar.gz → 2.0.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {parsehub-2.0.23/src/parsehub.egg-info → parsehub-2.0.25}/PKG-INFO +1 -1
  2. {parsehub-2.0.23 → parsehub-2.0.25}/pyproject.toml +7 -6
  3. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/__init__.py +3 -3
  4. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/cli.py +5 -4
  5. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/base/base.py +2 -1
  6. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/base/ytdlp.py +18 -14
  7. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/bilibili.py +7 -7
  8. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/coolapk.py +3 -3
  9. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/instagram.py +4 -3
  10. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/tiktok.py +2 -2
  11. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/twitter.py +3 -3
  12. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/weibo.py +2 -2
  13. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/xhs.py +10 -5
  14. parsehub-2.0.25/src/parsehub/parsers/parser/xiaoheihe.py +60 -0
  15. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/youtube.py +1 -1
  16. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/bilibili.py +11 -10
  17. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/coolapk.py +1 -1
  18. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/douyin.py +13 -7
  19. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/instagram.py +11 -10
  20. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/kuaishou.py +6 -5
  21. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/pipix.py +1 -1
  22. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/threads.py +11 -9
  23. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/tieba.py +4 -3
  24. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/tiktok.py +2 -2
  25. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/twitter.py +15 -10
  26. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/weibo.py +26 -2
  27. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/weixin.py +5 -3
  28. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/xhs.py +16 -13
  29. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/xiaoheihe.py +12 -8
  30. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/zuiyou.py +6 -6
  31. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/callback.py +2 -2
  32. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/media_file.py +5 -5
  33. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/platform.py +2 -2
  34. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/result.py +14 -10
  35. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/utils/utils.py +2 -2
  36. {parsehub-2.0.23 → parsehub-2.0.25/src/parsehub.egg-info}/PKG-INFO +1 -1
  37. {parsehub-2.0.23 → parsehub-2.0.25}/test/test_core_offline.py +8 -3
  38. parsehub-2.0.23/src/parsehub/parsers/parser/xiaoheihe.py +0 -50
  39. {parsehub-2.0.23 → parsehub-2.0.25}/LICENSE +0 -0
  40. {parsehub-2.0.23 → parsehub-2.0.25}/README.md +0 -0
  41. {parsehub-2.0.23 → parsehub-2.0.25}/setup.cfg +0 -0
  42. {parsehub-2.0.23 → parsehub-2.0.25}/src/__init__.py +0 -0
  43. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/cli_config.py +0 -0
  44. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/config/__init__.py +0 -0
  45. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/config/config.py +0 -0
  46. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/errors.py +0 -0
  47. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/__init__.py +0 -0
  48. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/base/__init__.py +0 -0
  49. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/__init__.py +0 -0
  50. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/douyin.py +0 -0
  51. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/facebook.py +0 -0
  52. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  53. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/pipix.py +0 -0
  54. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/threads.py +0 -0
  55. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/tieba.py +0 -0
  56. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/weixin.py +0 -0
  57. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  58. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/provider_api/__init__.py +0 -0
  59. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/__init__.py +0 -0
  60. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/media_ref.py +0 -0
  61. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/types/post.py +0 -0
  62. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/utils/downloader.py +0 -0
  63. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub/utils/media_info.py +0 -0
  64. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub.egg-info/SOURCES.txt +0 -0
  65. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub.egg-info/dependency_links.txt +0 -0
  66. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub.egg-info/entry_points.txt +0 -0
  67. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub.egg-info/requires.txt +0 -0
  68. {parsehub-2.0.23 → parsehub-2.0.25}/src/parsehub.egg-info/top_level.txt +0 -0
  69. {parsehub-2.0.23 → parsehub-2.0.25}/test/test_cli.py +0 -0
  70. {parsehub-2.0.23 → parsehub-2.0.25}/test/test_cli_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.23
3
+ Version: 2.0.25
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.23"
3
+ version = "2.0.25"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -85,14 +85,15 @@ ignore = [
85
85
  [tool.mypy]
86
86
  python_version = "3.12"
87
87
  files = ["./"]
88
+ exclude = ["test"]
88
89
  ignore_missing_imports = true
89
- warn_return_any = false
90
+ warn_return_any = true
90
91
  warn_unused_ignores = true
91
- check_untyped_defs = false
92
- disallow_untyped_defs = false
93
- no_implicit_optional = false
92
+ check_untyped_defs = true
93
+ disallow_untyped_defs = true
94
+ no_implicit_optional = true
94
95
 
95
96
  [tool.pytest.ini_options]
96
97
  testpaths = ["test"]
97
98
  pythonpath = ["src"]
98
- python_files = ["test_*.py"]
99
+ python_files = ["test_*.py"]
@@ -13,7 +13,7 @@ logger.disable(__name__)
13
13
 
14
14
 
15
15
  class ParseHub:
16
- def __init__(self):
16
+ def __init__(self) -> None:
17
17
  self.parsers: list[type[BaseParser]] = BaseParser.get_registry()
18
18
 
19
19
  async def parse(self, url: str, *, proxy: str | None = None, cookie: str | dict | None = None) -> AnyParseResult:
@@ -185,7 +185,7 @@ class ParseHub:
185
185
  return parser
186
186
  return None
187
187
 
188
- def get_parser(self, url) -> type[BaseParser] | None:
188
+ def get_parser(self, url: str) -> type[BaseParser] | None:
189
189
  """获取解析器
190
190
  :param url: 分享文案 / 分享链接
191
191
  """
@@ -193,7 +193,7 @@ class ParseHub:
193
193
  return parser
194
194
  return None
195
195
 
196
- def get_platform(self, url) -> Platform | None:
196
+ def get_platform(self, url: str) -> Platform | None:
197
197
  """获取平台
198
198
  :param url: 分享文案 / 分享链接
199
199
  """
@@ -46,7 +46,7 @@ def main(argv: list[str] | None = None) -> int:
46
46
  try:
47
47
  args = parser.parse_args(_normalize_argv(raw_argv))
48
48
  _finalize_output_args(args)
49
- return args.func(args)
49
+ return int(args.func(args))
50
50
  except SystemExit as e:
51
51
  return _normalize_exit_code(e.code)
52
52
  except ValueError as e:
@@ -365,14 +365,15 @@ def _cookie_prompt() -> Any:
365
365
 
366
366
  def _load_platform_config(platform_id: str | None) -> PlatformConfig:
367
367
  if not platform_id:
368
- return _platform_config_type()()
369
- return _config_store().get_platform(platform_id)
368
+ return cast("PlatformConfig", _platform_config_type()())
369
+ return cast("PlatformConfig", _config_store().get_platform(platform_id))
370
370
 
371
371
 
372
372
  def _load_cookie(platform_id: str | None) -> str | None:
373
373
  if not platform_id:
374
374
  return None
375
- return _cookie_store().get(platform_id)
375
+ value = _cookie_store().get(platform_id)
376
+ return value if isinstance(value, str) or value is None else str(value)
376
377
 
377
378
 
378
379
  def _detect_platform_id(hub: Any, url_or_text: str) -> str | None:
@@ -2,6 +2,7 @@ import importlib
2
2
  import pkgutil
3
3
  import re
4
4
  from abc import ABC, abstractmethod
5
+ from typing import Any
5
6
  from urllib.parse import parse_qs, urlencode, urlparse
6
7
 
7
8
  import httpx
@@ -34,7 +35,7 @@ class BaseParser(ABC):
34
35
  self.proxy = proxy
35
36
  self.cookie = normalize_cookie(cookie)
36
37
 
37
- def __init_subclass__(cls, /, register: bool = True, **kwargs):
38
+ def __init_subclass__(cls, /, register: bool = True, **kwargs: Any) -> None:
38
39
  super().__init_subclass__(**kwargs)
39
40
  if register:
40
41
  if not cls.__platform__:
@@ -1,6 +1,8 @@
1
1
  import asyncio
2
+ from collections.abc import Callable
2
3
  from dataclasses import dataclass
3
4
  from pathlib import Path
5
+ from typing import Any, cast
4
6
 
5
7
  from yt_dlp import YoutubeDL
6
8
 
@@ -30,7 +32,7 @@ def switch_ytdlp_proxy(ydl: YoutubeDL, proxy: str | None) -> None:
30
32
  director.close()
31
33
 
32
34
 
33
- def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None:
35
+ def download_video(yto_params: dict[str, Any], url: str, proxy: str | None = None) -> None:
34
36
  """在独立线程中下载视频"""
35
37
  try:
36
38
  with YoutubeDL(yto_params) as ydl:
@@ -43,14 +45,16 @@ def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None
43
45
 
44
46
 
45
47
  class MonotonicDownloadProgress:
46
- def __init__(self, emit, *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1):
48
+ def __init__(
49
+ self, emit: Callable[[float], None], *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1
50
+ ) -> None:
47
51
  self.emit = emit
48
52
  self.start = start
49
53
  self.end = end
50
54
  self.min_step = min_step
51
55
  self.current = start
52
56
 
53
- def __call__(self, d: dict):
57
+ def __call__(self, d: dict[str, Any]) -> None:
54
58
  status = d.get("status")
55
59
 
56
60
  if status == "downloading":
@@ -82,8 +86,8 @@ class MonotonicDownloadProgress:
82
86
  # 分片下载有时没有稳定总大小,但有 frag 进度;作为兜底
83
87
  frag_index = d.get("fragment_index")
84
88
  frag_count = d.get("fragment_count")
85
- if frag_index is not None and frag_count:
86
- return min(frag_index / frag_count * 100, 100)
89
+ if isinstance(frag_index, int | float) and isinstance(frag_count, int | float) and frag_count:
90
+ return min(float(frag_index) / float(frag_count) * 100, 100.0)
87
91
 
88
92
  return None
89
93
 
@@ -106,7 +110,7 @@ class YtParser(BaseParser, register=False):
106
110
  ),
107
111
  )
108
112
 
109
- async def _parse(self, url) -> "YtVideoInfo":
113
+ async def _parse(self, url: str) -> "YtVideoInfo":
110
114
  try:
111
115
  dl = await asyncio.wait_for(asyncio.to_thread(self._extract_info, url), timeout=30)
112
116
  except TimeoutError as e:
@@ -136,20 +140,20 @@ class YtParser(BaseParser, register=False):
136
140
  proxy=self.proxy,
137
141
  )
138
142
 
139
- def _extract_info(self, url):
143
+ def _extract_info(self, url: str) -> dict[str, Any]:
140
144
  params = self.params.copy()
141
145
  if self.proxy:
142
146
  params["proxy"] = self.proxy
143
147
 
144
148
  try:
145
149
  with YoutubeDL(params) as ydl:
146
- return ydl.extract_info(url, download=False)
150
+ return cast(dict[str, Any], ydl.extract_info(url, download=False))
147
151
  except Exception as e:
148
152
  error_msg = f"{type(e).__name__}: {str(e)}"
149
153
  raise RuntimeError(error_msg) from None
150
154
 
151
155
  @property
152
- def params(self) -> dict:
156
+ def params(self) -> dict[str, Any]:
153
157
  params = {
154
158
  "format": "mp4+bestvideo[height<=1080]+bestaudio",
155
159
  "quiet": True, # 不输出日志
@@ -170,9 +174,9 @@ class YtVideoParseResult(VideoParseResult):
170
174
  def __init__(
171
175
  self,
172
176
  dl: "YtVideoInfo",
173
- title,
174
- video=None,
175
- content=None,
177
+ title: str | None,
178
+ video: VideoRef | None = None,
179
+ content: str | None = None,
176
180
  ):
177
181
  """dl: yt-dlp解析结果"""
178
182
  self.dl = dl
@@ -201,7 +205,7 @@ class YtVideoParseResult(VideoParseResult):
201
205
  if callback:
202
206
  loop = asyncio.get_running_loop()
203
207
 
204
- def _callback(count: float):
208
+ def _callback(count: float) -> None:
205
209
  asyncio.run_coroutine_threadsafe(
206
210
  callback(int(count), 100, "bytes", *callback_args, **callback_kwargs), loop
207
211
  )
@@ -237,7 +241,7 @@ class YtVideoParseResult(VideoParseResult):
237
241
  output_dir,
238
242
  )
239
243
 
240
- async def _run_download(self, paramss: dict, count: int = 0, *, proxy: str | None = None) -> None:
244
+ async def _run_download(self, paramss: dict[str, Any], count: int = 0, *, proxy: str | None = None) -> None:
241
245
  if count > 2:
242
246
  raise DownloadError("下载失败 -2")
243
247
 
@@ -58,7 +58,7 @@ class BiliParse(YtParser):
58
58
  raise ParseError("Bilibili 解析失败") from e
59
59
 
60
60
  @staticmethod
61
- def _is_bvid(url: str):
61
+ def _is_bvid(url: str) -> bool:
62
62
  if url.lower().startswith("bv"):
63
63
  return True
64
64
  else:
@@ -79,7 +79,7 @@ class BiliParse(YtParser):
79
79
  return await super().get_raw_url(url, clean_all=clean_all)
80
80
 
81
81
  @staticmethod
82
- async def is_dynamic(url) -> str | None:
82
+ async def is_dynamic(url: str) -> str | None:
83
83
  """是动态"""
84
84
  if re.search(r"\b\d{18,19}\b", url):
85
85
  return url
@@ -93,9 +93,9 @@ class BiliParse(YtParser):
93
93
  if "风控" in str(e):
94
94
  raise ParseError(f"账号风控\n使用的cookie: {cookie_ellipsis(self.cookie)}") from e
95
95
  raise ParseError(str(e)) from e
96
- return dynamic_info
96
+ return cast(BiliDynamic, dynamic_info)
97
97
 
98
- async def bili_api_parse(self, url) -> BiliVideoParseResult | ImageParseResult:
98
+ async def bili_api_parse(self, url: str) -> BiliVideoParseResult | ImageParseResult:
99
99
  async with BiliAPI(proxy=self.proxy) as bili:
100
100
  video_info = await bili.get_video_info(url)
101
101
 
@@ -138,16 +138,16 @@ class BiliParse(YtParser):
138
138
  ),
139
139
  )
140
140
 
141
- async def ytp_parse(self, url) -> YtVideoParseResult:
141
+ async def ytp_parse(self, url: str) -> YtVideoParseResult:
142
142
  result = cast(YtVideoParseResult, await super()._do_parse(url))
143
143
  return YtVideoParseResult(
144
144
  title=result.title,
145
145
  dl=result.dl,
146
- video=result.media,
146
+ video=cast(VideoRef | None, result.media),
147
147
  )
148
148
 
149
149
  @staticmethod
150
- def change_source(url: str):
150
+ def change_source(url: str) -> str:
151
151
  return re.sub(
152
152
  r"upos-.*.(bilivideo.com|mirrorakam.akamaized.net)",
153
153
  "upos-sz-upcdnbda2.bilivideo.com",
@@ -52,7 +52,7 @@ class CoolapkParser(BaseParser):
52
52
  )
53
53
 
54
54
  @staticmethod
55
- def hashtag_handler(desc: str):
55
+ def hashtag_handler(desc: str) -> str:
56
56
  hashtags = re.findall(r" ?#[^#]+# ?", desc)
57
57
  for hashtag in hashtags:
58
58
  desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
@@ -64,11 +64,11 @@ class CoolapkParseResult(ParseResult):
64
64
  self,
65
65
  *,
66
66
  output_dir: str | Path,
67
- callback: ProgressCallback = None,
67
+ callback: ProgressCallback | None = None,
68
68
  callback_args: tuple = (),
69
69
  callback_kwargs: dict | None = None,
70
70
  proxy: str | None = None,
71
- headers: dict = None,
71
+ headers: dict | None = None,
72
72
  ) -> "DownloadResult":
73
73
  headers = {
74
74
  "Accept": (
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import re
3
+ from typing import Any, cast
3
4
 
4
5
  from instaloader import BadResponseException
5
6
 
@@ -56,7 +57,7 @@ class InstagramParser(BaseParser):
56
57
  case _:
57
58
  raise ParseError("不支持的类型")
58
59
 
59
- async def _parse(self, url, shortcode, cookie=None) -> MyPost:
60
+ async def _parse(self, url: str, shortcode: str, cookie: dict[str, Any] | None = None) -> MyPost:
60
61
  try:
61
62
  post = await asyncio.wait_for(
62
63
  asyncio.to_thread(
@@ -84,10 +85,10 @@ class InstagramParser(BaseParser):
84
85
  text = str(e)
85
86
  raise ParseError(f"无法获取帖子内容: {text}") from e
86
87
  else:
87
- return post
88
+ return cast(MyPost, post)
88
89
 
89
90
  @staticmethod
90
- def get_short_code(url: str):
91
+ def get_short_code(url: str) -> str | None:
91
92
  url = url.removesuffix("/")
92
93
  shortcode = re.search(r"/(share|p|reel|.*/p|.*/reel)/(.*)", url)
93
94
  return shortcode.group(2).split("/")[0] if shortcode else None
@@ -1,7 +1,7 @@
1
1
  from dataclasses import dataclass, field
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
- from typing import Self, Union
4
+ from typing import Any, Self, Union
5
5
 
6
6
  from ... import ProgressCallback
7
7
  from ...provider_api.tiktok import TikTokWebCrawler
@@ -110,7 +110,7 @@ def preferred_video_url(data: dict | str | list | None) -> str | None:
110
110
  return next((url for url in urls if "aweme" in url), None) or (urls[0] if urls else None)
111
111
 
112
112
 
113
- def as_int(value) -> int:
113
+ def as_int(value: Any) -> int:
114
114
  try:
115
115
  return int(value or 0)
116
116
  except (TypeError, ValueError):
@@ -26,7 +26,7 @@ class TwitterParser(BaseParser):
26
26
  __supported_type__ = ["视频", "图文"]
27
27
  __match__ = r"^(http(s)?://)?.+(twitter|fixupx|x).com/.*/status/\d+"
28
28
 
29
- async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
29
+ async def _do_parse(self, raw_url: str) -> MultimediaParseResult | RichTextParseResult:
30
30
  tweet = await self._parse(raw_url)
31
31
  return await self.media_parse(tweet)
32
32
 
@@ -34,7 +34,7 @@ class TwitterParser(BaseParser):
34
34
  url = await super().get_raw_url(url, clean_all=clean_all)
35
35
  return str(urlunparse(urlparse(url)._replace(netloc="x.com")))
36
36
 
37
- async def _parse(self, url: str):
37
+ async def _parse(self, url: str) -> TwitterTweet:
38
38
  x = Twitter(self.proxy, cookie=None)
39
39
  try:
40
40
  tweet = await x.fetch_tweet(url)
@@ -55,7 +55,7 @@ class TwitterParser(BaseParser):
55
55
  return tweet
56
56
 
57
57
  @staticmethod
58
- async def media_parse(tweet: TwitterTweet):
58
+ async def media_parse(tweet: TwitterTweet) -> MultimediaParseResult | RichTextParseResult:
59
59
  media: list[AnyMediaRef] = []
60
60
  if tweet.media:
61
61
  for m in tweet.media:
@@ -17,7 +17,7 @@ from ..base.base import BaseParser
17
17
  class WeiboParser(BaseParser):
18
18
  __platform__ = Platform.WEIBO
19
19
  __supported_type__ = ["视频", "图文"]
20
- __match__ = r"^(http(s)?://)(m\.|)weibo.(com|cn)/(?!(u/)).+"
20
+ __match__ = r"^(http(s)?://)((m\.|)weibo\.(com|cn)/(?!(u/)).+|mapp\.api\.weibo\.cn/fx/.+)"
21
21
 
22
22
  async def _do_parse(self, raw_url: str) -> MultimediaParseResult | VideoParseResult | ImageParseResult:
23
23
  weibo = await WeiboAPI(self.proxy).parse(raw_url)
@@ -91,7 +91,7 @@ class WeiboParser(BaseParser):
91
91
  return text.strip()
92
92
 
93
93
  @staticmethod
94
- def hashtag_handler(desc: str):
94
+ def hashtag_handler(desc: str) -> str:
95
95
  hashtags = re.findall(r" ?#[^#]+# ?", desc)
96
96
  for hashtag in hashtags:
97
97
  desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
@@ -31,6 +31,8 @@ class XHSParser(BaseParser):
31
31
  desc = self.hashtag_handler(result.desc)
32
32
  match result.type:
33
33
  case XHSPostType.VIDEO:
34
+ if not result.media:
35
+ raise ParseError("未获取到视频")
34
36
  v: XHSMedia = result.media[0]
35
37
  return VideoParseResult(
36
38
  video=VideoRef(
@@ -40,10 +42,13 @@ class XHSParser(BaseParser):
40
42
  content=desc,
41
43
  )
42
44
  case XHSPostType.IMAGE:
45
+ media_list = result.media or []
43
46
  photos: list[ImageRef | LivePhotoRef] = []
44
- for i in result.media:
47
+ for i in media_list:
45
48
  if i.type == XHSMediaType.LIVE_PHOTO:
46
- photos.append(LivePhotoRef(url=i.thumb_url, video_url=i.url, width=i.width, height=i.height))
49
+ photos.append(
50
+ LivePhotoRef(url=i.thumb_url or "", video_url=i.url, width=i.width, height=i.height)
51
+ )
47
52
  else:
48
53
  # 小红书图片格式: "png" | "webp" | "jpeg" | "heic" | "avif"
49
54
  ext = await self.get_ext_by_url(i.url)
@@ -61,7 +66,7 @@ class XHSParser(BaseParser):
61
66
  case _:
62
67
  raise ParseError("不支持的类型")
63
68
 
64
- async def get_ext_by_url(self, url: str):
69
+ async def get_ext_by_url(self, url: str) -> str:
65
70
  async with httpx.AsyncClient(proxy=self.proxy) as client:
66
71
  try:
67
72
  response = await client.head(url, follow_redirects=True)
@@ -72,12 +77,12 @@ class XHSParser(BaseParser):
72
77
  media_type = content_type.split(";")[0].strip()
73
78
  if "/" in media_type:
74
79
  extension = media_type.split("/")[-1]
75
- return extension
80
+ return str(extension)
76
81
 
77
82
  return ""
78
83
 
79
84
  @staticmethod
80
- def hashtag_handler(desc: str | None):
85
+ def hashtag_handler(desc: str | None) -> str:
81
86
  if not desc:
82
87
  return ""
83
88
  hashtags = re.findall(r" ?#[^#\[\]]+\[话题]# ?", desc)
@@ -0,0 +1,60 @@
1
+ from ...parsers.base import BaseParser
2
+ from ...provider_api.xiaoheihe import XiaoHeiHeAPI, XiaoHeiHeMediaType, XiaoHeiHePost, XiaoHeiHePostType
3
+ from ...types import (
4
+ AniRef,
5
+ AnyParseResult,
6
+ ImageParseResult,
7
+ ImageRef,
8
+ MultimediaParseResult,
9
+ ParseError,
10
+ Platform,
11
+ RichTextParseResult,
12
+ VideoParseResult,
13
+ VideoRef,
14
+ )
15
+
16
+
17
+ class XiaoHeiHeParser(BaseParser):
18
+ __platform__ = Platform.XIAOHEIHE
19
+ __supported_type__ = ["视频", "图文"]
20
+ __match__ = r"^(http(s)?://)?.+xiaoheihe.cn/(v3|app)/bbs/(app|link).+"
21
+ __redirect_keywords__ = ["api.xiaoheihe"]
22
+
23
+ async def _do_parse(self, raw_url: str) -> AnyParseResult:
24
+ xhh = await XiaoHeiHeAPI(proxy=self.proxy).parse(raw_url)
25
+ match xhh.type:
26
+ case XiaoHeiHePostType.VIDEO:
27
+ return VideoParseResult(
28
+ video=self.__parse_video(xhh),
29
+ title=xhh.title,
30
+ content=xhh.content,
31
+ )
32
+ case XiaoHeiHePostType.IMAGE:
33
+ media = self.__parse_images(xhh)
34
+ if not media or all(isinstance(m, ImageRef) for m in media):
35
+ return ImageParseResult(photo=media, title=xhh.title, content=xhh.content)
36
+ return MultimediaParseResult(media=media, title=xhh.title, content=xhh.content)
37
+ case XiaoHeiHePostType.ARTICLE:
38
+ return RichTextParseResult(
39
+ title=xhh.title,
40
+ media=self.__parse_images(xhh),
41
+ markdown_content=xhh.content,
42
+ )
43
+ raise ParseError("不支持的类型")
44
+
45
+ @staticmethod
46
+ def __parse_video(xhh: XiaoHeiHePost) -> VideoRef:
47
+ if not xhh.media:
48
+ raise ParseError("未获取到视频")
49
+ media = xhh.media[0]
50
+ return VideoRef(url=media.url, thumb_url=media.thumb_url)
51
+
52
+ @staticmethod
53
+ def __parse_images(xhh: XiaoHeiHePost) -> list[ImageRef | AniRef]:
54
+ images: list[ImageRef | AniRef] = []
55
+ for media in xhh.media or []:
56
+ if media.type == XiaoHeiHeMediaType.IMAGE:
57
+ images.append(ImageRef(url=media.url, width=media.width or 0, height=media.height or 0))
58
+ else:
59
+ images.append(AniRef(url=media.url, width=media.width or 0, height=media.height or 0))
60
+ return images
@@ -13,7 +13,7 @@ class YtbParse(YtParser):
13
13
  __reserved_parameters__ = ["v", "list", "index"]
14
14
 
15
15
  @property
16
- def params(self):
16
+ def params(self) -> dict[str, Any]:
17
17
  sub: dict[str, Any] = {
18
18
  # "writesubtitles": True, # 下载字幕
19
19
  # "writeautomaticsub": True, # 下载自动生成的字幕
@@ -1,3 +1,4 @@
1
+ # mypy: disable-error-code=no-untyped-def
1
2
  import asyncio
2
3
  import re
3
4
  import time
@@ -7,7 +8,7 @@ from dataclasses import dataclass
7
8
  from enum import Enum
8
9
  from functools import reduce
9
10
  from hashlib import md5
10
- from typing import Any
11
+ from typing import Any, cast
11
12
 
12
13
  import httpx
13
14
 
@@ -70,7 +71,7 @@ class BiliAPI:
70
71
  raise Exception("动态不可见")
71
72
  case _:
72
73
  raise Exception(f"获取动态信息失败: {mj}")
73
- return BiliDynamic.parse(data)
74
+ return BiliDynamic.parse(cast(dict[str, Any], data))
74
75
 
75
76
  async def get_video_info(self, url: str):
76
77
  """获取视频详细信息"""
@@ -117,7 +118,7 @@ class BiliAPI:
117
118
  params=params,
118
119
  cookies=cookies,
119
120
  )
120
- return response.json()
121
+ return cast(dict[str, Any], response.json())
121
122
 
122
123
  async def get_buvid(self):
123
124
  """获取 buvid"""
@@ -135,7 +136,7 @@ class BiliAPI:
135
136
  wbi = await BiliWbiSigner().wbi(bvid=bvid, cid=cid, up_mid=up_mid)
136
137
  return await self.get_ai_summary(bvid, cid, up_mid, wbi["w_rid"], wbi["wts"])
137
138
 
138
- async def get_ai_summary(self, bvid: str, cid: int, up_mid: int, w_rid: str, wts: int):
139
+ async def get_ai_summary(self, bvid: str, cid: int, up_mid: int, w_rid: str, wts: int) -> "AISummaryResult":
139
140
  url = "https://api.bilibili.com/x/web-interface/view/conclusion/get"
140
141
  result = await self._get_client().get(
141
142
  url,
@@ -249,7 +250,7 @@ class BiliDynamic:
249
250
  images: list[BiliImage] | None = None
250
251
 
251
252
  @classmethod
252
- def parse(cls, data: dict):
253
+ def parse(cls, data: dict) -> "BiliDynamic":
253
254
  module_dynamic: dict = data["item"]["modules"]["module_dynamic"]
254
255
  major: dict | None = module_dynamic.get("major", None)
255
256
  if not major:
@@ -258,7 +259,7 @@ class BiliDynamic:
258
259
  return cls._parse_major(module_dynamic, major)
259
260
 
260
261
  @classmethod
261
- def _parse_major(cls, module_dynamic: dict, major: dict):
262
+ def _parse_major(cls, module_dynamic: dict, major: dict) -> "BiliDynamic":
262
263
  major_type = major["type"]
263
264
  major_parsers: dict[MajorType, Callable[[dict, dict], BiliDynamic]] = {
264
265
  MajorType.MAJOR_TYPE_MEDIALIST: cls._parse_medialist,
@@ -278,12 +279,12 @@ class BiliDynamic:
278
279
  return major_parser(module_dynamic, major)
279
280
 
280
281
  @classmethod
281
- def _parse_pgc_union(cls, _, major: dict):
282
+ def _parse_pgc_union(cls, _: dict, major: dict) -> "BiliDynamic":
282
283
  pgc = major["pgc"]
283
284
  return cls(title=pgc["title"], images=[BiliImage(url=pgc["cover"])])
284
285
 
285
286
  @classmethod
286
- def _parse_forward(cls, module_dynamic: dict):
287
+ def _parse_forward(cls, module_dynamic: dict) -> "BiliDynamic":
287
288
  return cls(content=cls._get_desc_text(module_dynamic))
288
289
 
289
290
  @classmethod
@@ -301,7 +302,7 @@ class BiliDynamic:
301
302
  return cls(title=music["title"], images=cls._get_major_cover(music))
302
303
 
303
304
  @classmethod
304
- def _parse_opus(cls, _, major: dict):
305
+ def _parse_opus(cls, _: dict, major: dict) -> "BiliDynamic":
305
306
  opus = major["opus"]
306
307
  images = None
307
308
  if pics := opus["pics"]:
@@ -362,7 +363,7 @@ class BiliDynamic:
362
363
  @staticmethod
363
364
  def _get_desc_text(module_dynamic: dict) -> str | None:
364
365
  if desc := module_dynamic["desc"]:
365
- return desc["text"].strip()
366
+ return str(desc["text"]).strip()
366
367
  return None
367
368
 
368
369
  @staticmethod
@@ -16,7 +16,7 @@ class Coolapk:
16
16
  imgs: list[str] | None = None
17
17
 
18
18
  @classmethod
19
- async def parse(cls, url: str, proxy: str = None) -> "Coolapk":
19
+ async def parse(cls, url: str, proxy: str | None = None) -> "Coolapk":
20
20
  async with httpx.AsyncClient(headers={"User-Agent": GlobalConfig.ua}, proxy=proxy) as client:
21
21
  result = await client.get(url)
22
22
  soup = BeautifulSoup(result.text, "lxml")