parsehub 2.0.22__tar.gz → 2.0.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {parsehub-2.0.22/src/parsehub.egg-info → parsehub-2.0.23}/PKG-INFO +1 -1
  2. {parsehub-2.0.22 → parsehub-2.0.23}/pyproject.toml +18 -1
  3. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/__init__.py +7 -4
  4. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/cli.py +9 -8
  5. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/base/base.py +6 -4
  6. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/base/ytdlp.py +11 -6
  7. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/bilibili.py +9 -7
  8. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/coolapk.py +2 -2
  9. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/douyin.py +7 -5
  10. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/instagram.py +5 -5
  11. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/pipix.py +3 -3
  12. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/threads.py +2 -2
  13. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/tieba.py +5 -3
  14. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/tiktok.py +6 -4
  15. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/twitter.py +12 -3
  16. parsehub-2.0.23/src/parsehub/parsers/parser/weibo.py +101 -0
  17. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/xiaoheihe.py +5 -3
  18. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/bilibili.py +9 -7
  19. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/coolapk.py +4 -2
  20. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/douyin.py +2 -1
  21. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/instagram.py +4 -4
  22. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/pipix.py +6 -3
  23. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/threads.py +2 -2
  24. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/tieba.py +7 -4
  25. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/twitter.py +10 -6
  26. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/weibo.py +68 -64
  27. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/weixin.py +11 -7
  28. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/xhs.py +10 -4
  29. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/xiaoheihe.py +4 -3
  30. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/zuiyou.py +4 -1
  31. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/result.py +34 -29
  32. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/utils/downloader.py +8 -8
  33. {parsehub-2.0.22 → parsehub-2.0.23/src/parsehub.egg-info}/PKG-INFO +1 -1
  34. {parsehub-2.0.22 → parsehub-2.0.23}/test/test_cli.py +4 -4
  35. {parsehub-2.0.22 → parsehub-2.0.23}/test/test_cli_config.py +1 -1
  36. {parsehub-2.0.22 → parsehub-2.0.23}/test/test_core_offline.py +5 -5
  37. parsehub-2.0.22/src/parsehub/parsers/parser/weibo.py +0 -89
  38. {parsehub-2.0.22 → parsehub-2.0.23}/LICENSE +0 -0
  39. {parsehub-2.0.22 → parsehub-2.0.23}/README.md +0 -0
  40. {parsehub-2.0.22 → parsehub-2.0.23}/setup.cfg +0 -0
  41. {parsehub-2.0.22 → parsehub-2.0.23}/src/__init__.py +0 -0
  42. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/cli_config.py +0 -0
  43. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/config/__init__.py +0 -0
  44. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/config/config.py +0 -0
  45. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/errors.py +0 -0
  46. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/__init__.py +0 -0
  47. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/base/__init__.py +0 -0
  48. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/__init__.py +0 -0
  49. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/facebook.py +0 -0
  50. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  51. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/weixin.py +0 -0
  52. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/xhs.py +0 -0
  53. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/youtube.py +0 -0
  54. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  55. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/__init__.py +0 -0
  56. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/kuaishou.py +0 -0
  57. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/provider_api/tiktok.py +0 -0
  58. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/__init__.py +0 -0
  59. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/callback.py +0 -0
  60. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/media_file.py +0 -0
  61. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/media_ref.py +0 -0
  62. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/platform.py +0 -0
  63. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/types/post.py +0 -0
  64. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/utils/media_info.py +0 -0
  65. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub/utils/utils.py +0 -0
  66. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub.egg-info/SOURCES.txt +0 -0
  67. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub.egg-info/dependency_links.txt +0 -0
  68. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub.egg-info/entry_points.txt +0 -0
  69. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub.egg-info/requires.txt +0 -0
  70. {parsehub-2.0.22 → parsehub-2.0.23}/src/parsehub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.22
3
+ Version: 2.0.23
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.22"
3
+ version = "2.0.23"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -52,6 +52,8 @@ cli = [
52
52
 
53
53
  [dependency-groups]
54
54
  dev = [
55
+ "mypy>=2.1.0",
56
+ "pytest>=9.0.3",
55
57
  "ruff>=0.14.14",
56
58
  ]
57
59
 
@@ -79,3 +81,18 @@ ignore = [
79
81
  "B008", # 不在参数默认值中执行函数调用
80
82
  "C901", # 函数复杂度过高
81
83
  ]
84
+
85
+ [tool.mypy]
86
+ python_version = "3.12"
87
+ files = ["./"]
88
+ ignore_missing_imports = true
89
+ warn_return_any = false
90
+ warn_unused_ignores = true
91
+ check_untyped_defs = false
92
+ disallow_untyped_defs = false
93
+ no_implicit_optional = false
94
+
95
+ [tool.pytest.ini_options]
96
+ testpaths = ["test"]
97
+ pythonpath = ["src"]
98
+ python_files = ["test_*.py"]
@@ -47,9 +47,9 @@ class ParseHub:
47
47
  async def download(
48
48
  self,
49
49
  url: str,
50
- path: str | Path = None,
50
+ path: str | Path | None = None,
51
51
  *,
52
- callback: ProgressCallback = None,
52
+ callback: ProgressCallback | None = None,
53
53
  callback_args: tuple = (),
54
54
  callback_kwargs: dict | None = None,
55
55
  proxy: str | None = None,
@@ -169,6 +169,8 @@ class ParseHub:
169
169
  :return: 原始链接
170
170
  """
171
171
  parser = self.get_parser(url)
172
+ if not parser:
173
+ raise UnknownPlatform(url)
172
174
  try:
173
175
  return await parser(proxy=proxy).get_raw_url(url, clean_all=clean_all)
174
176
  except Exception as e:
@@ -210,9 +212,10 @@ class ParseHub:
210
212
  """
211
213
  return [
212
214
  {
213
- "id": parser.__platform__.id,
214
- "name": parser.__platform__.display_name,
215
+ "id": platform.id,
216
+ "name": platform.display_name,
215
217
  "supported_types": parser.__supported_type__,
216
218
  }
217
219
  for parser in self.parsers
220
+ if (platform := parser.__platform__) is not None
218
221
  ]
@@ -8,7 +8,7 @@ import unicodedata
8
8
  from dataclasses import asdict, is_dataclass
9
9
  from importlib.metadata import PackageNotFoundError, version
10
10
  from pathlib import Path
11
- from typing import TYPE_CHECKING, Any
11
+ from typing import TYPE_CHECKING, Any, NoReturn, cast
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from .cli_config import AutoCookieStore, PlatformConfig
@@ -20,12 +20,13 @@ _CLI_EXTRA_MODULES = ("argcomplete", "platformdirs")
20
20
  class _ChineseArgumentParser(argparse.ArgumentParser):
21
21
  def __init__(self, *args: Any, **kwargs: Any):
22
22
  kwargs.setdefault("formatter_class", argparse.RawDescriptionHelpFormatter)
23
- add_help = kwargs.pop("add_help", True)
24
- super().__init__(*args, add_help=False, **kwargs)
23
+ add_help = bool(kwargs.pop("add_help", True))
24
+ kwargs["add_help"] = False
25
+ super().__init__(*args, **kwargs)
25
26
  if add_help:
26
27
  self.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="显示帮助信息")
27
28
 
28
- def error(self, message: str) -> None:
29
+ def error(self, message: str) -> NoReturn:
29
30
  self.print_usage(sys.stderr)
30
31
  translated = _translate_argparse_error(message)
31
32
  hint = _usage_hint(self.prog)
@@ -212,7 +213,7 @@ def _add_set_commands(subparsers: argparse._SubParsersAction) -> None:
212
213
 
213
214
  def _add_platform_argument(parser: argparse.ArgumentParser) -> None:
214
215
  action = parser.add_argument("platform", help="平台 ID,如 xhs")
215
- action.completer = _complete_platforms
216
+ action.completer = _complete_platforms # type: ignore[attr-defined]
216
217
 
217
218
 
218
219
  def _add_json_options(parser: argparse.ArgumentParser) -> None:
@@ -436,7 +437,7 @@ def _platform_config_row(
436
437
 
437
438
 
438
439
  def _print_json(data: Any, *, pretty: bool) -> None:
439
- kwargs = {"ensure_ascii": False}
440
+ kwargs: dict[str, Any] = {"ensure_ascii": False}
440
441
  if pretty:
441
442
  kwargs["indent"] = 2
442
443
  else:
@@ -547,8 +548,8 @@ def _download_result_to_dict(result: Any) -> dict[str, Any]:
547
548
  def _jsonable(value: Any) -> Any:
548
549
  if isinstance(value, Path):
549
550
  return str(value)
550
- if is_dataclass(value):
551
- return _jsonable(asdict(value))
551
+ if is_dataclass(value) and not isinstance(value, type):
552
+ return _jsonable(asdict(cast(Any, value)))
552
553
  if isinstance(value, dict):
553
554
  return {str(k): _jsonable(v) for k, v in value.items()}
554
555
  if isinstance(value, (list, tuple)):
@@ -34,7 +34,7 @@ class BaseParser(ABC):
34
34
  self.proxy = proxy
35
35
  self.cookie = normalize_cookie(cookie)
36
36
 
37
- def __init_subclass__(cls, /, register=True, **kwargs):
37
+ def __init_subclass__(cls, /, register: bool = True, **kwargs):
38
38
  super().__init_subclass__(**kwargs)
39
39
  if register:
40
40
  if not cls.__platform__:
@@ -56,7 +56,7 @@ class BaseParser(ABC):
56
56
  def match(cls, text: str) -> bool:
57
57
  """判断是否匹配该解析器"""
58
58
  url = match_url(text)
59
- return bool(re.match(cls.__match__, url))
59
+ return bool(cls.__match__ and re.match(cls.__match__, url))
60
60
 
61
61
  async def parse(self, url: str) -> AnyParseResult:
62
62
  """解析
@@ -66,7 +66,8 @@ class BaseParser(ABC):
66
66
  raw_url = await self.get_raw_url(url, clean_all=False)
67
67
  result = await self._do_parse(raw_url)
68
68
  result.platform = self.__platform__
69
- result.raw_url = self._clean_params(raw_url, self.__after_clean_parameters__)
69
+ raw_url_clean = self._clean_params(raw_url, self.__after_clean_parameters__)
70
+ result.raw_url = raw_url_clean
70
71
  return result
71
72
 
72
73
  @abstractmethod
@@ -104,7 +105,8 @@ class BaseParser(ABC):
104
105
 
105
106
  :return:
106
107
  """
107
- url = match_url(url)
108
+ matched_url = match_url(url)
109
+ url = matched_url or url
108
110
  if not url.startswith("http"):
109
111
  url = f"https://{url}"
110
112
  if any(x in url for x in self.__redirect_keywords__):
@@ -1,11 +1,11 @@
1
1
  import asyncio
2
2
  from dataclasses import dataclass
3
3
  from pathlib import Path
4
- from typing import Union
5
4
 
6
5
  from yt_dlp import YoutubeDL
7
6
 
8
7
  from ...types import (
8
+ AnyParseResult,
9
9
  DownloadError,
10
10
  DownloadResult,
11
11
  ParseError,
@@ -91,7 +91,7 @@ class MonotonicDownloadProgress:
91
91
  class YtParser(BaseParser, register=False):
92
92
  """yt-dlp解析器"""
93
93
 
94
- async def _do_parse(self, raw_url: str) -> Union["YtVideoParseResult"]:
94
+ async def _do_parse(self, raw_url: str) -> AnyParseResult:
95
95
  video_info = await self._parse(raw_url)
96
96
  return YtVideoParseResult(
97
97
  dl=video_info,
@@ -114,8 +114,8 @@ class YtParser(BaseParser, register=False):
114
114
  except Exception as e:
115
115
  raise ParseError(f"解析视频信息失败: {str(e)}") from e
116
116
 
117
- if dl.get("_type") and dl["_type"] == "playlist": # type: ignore
118
- dl = dl["entries"][0] # type: ignore
117
+ if dl.get("_type") and dl["_type"] == "playlist":
118
+ dl = dl["entries"][0]
119
119
  url = dl["webpage_url"]
120
120
  title = dl["title"]
121
121
  duration = dl.get("duration", 0)
@@ -190,12 +190,13 @@ class YtVideoParseResult(VideoParseResult):
190
190
  ) -> "DownloadResult":
191
191
  if callback_kwargs is None:
192
192
  callback_kwargs = {}
193
+ output_dir_path = Path(output_dir)
193
194
 
194
195
  paramss = self.dl.paramss.copy()
195
196
  if self.dl.proxy:
196
197
  paramss["proxy"] = self.dl.proxy
197
198
 
198
- paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
199
+ paramss["outtmpl"] = f"{output_dir_path.joinpath('ytdlp_%(id)s')}.%(ext)s"
199
200
 
200
201
  if callback:
201
202
  loop = asyncio.get_running_loop()
@@ -214,7 +215,11 @@ class YtVideoParseResult(VideoParseResult):
214
215
 
215
216
  await self._run_download(paramss, proxy=proxy)
216
217
 
217
- v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
218
+ v = (
219
+ list(output_dir_path.glob("*.mp4"))
220
+ or list(output_dir_path.glob("*.mkv"))
221
+ or list(output_dir_path.glob("*.webm"))
222
+ )
218
223
  if not v:
219
224
  raise DownloadError("下载失败 -1")
220
225
 
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import re
2
4
  from pathlib import Path
3
- from typing import Union
5
+ from typing import cast
4
6
  from urllib.parse import parse_qs, urlparse
5
7
 
6
8
  from loguru import logger
@@ -29,11 +31,11 @@ class BiliParse(YtParser):
29
31
  __reserved_parameters__ = ["p"]
30
32
  __redirect_keywords__ = ["b23.tv", "bili2233.cn"]
31
33
 
32
- async def _do_parse(self, raw_url: str) -> Union["YtVideoParseResult", "BiliVideoParseResult", ImageParseResult]:
34
+ async def _do_parse(self, raw_url: str) -> YtVideoParseResult | BiliVideoParseResult | ImageParseResult:
33
35
  if await self.is_dynamic(raw_url):
34
36
  dynamic = await self.get_dynamic_info(raw_url)
35
37
  content = self.hashtag_handler(dynamic.content)
36
- photos = []
38
+ photos: list[LivePhotoRef | ImageRef] = []
37
39
  if dynamic.images:
38
40
  for i in dynamic.images:
39
41
  if i.live_url:
@@ -93,7 +95,7 @@ class BiliParse(YtParser):
93
95
  raise ParseError(str(e)) from e
94
96
  return dynamic_info
95
97
 
96
- async def bili_api_parse(self, url) -> Union["BiliVideoParseResult", "ImageParseResult"]:
98
+ async def bili_api_parse(self, url) -> BiliVideoParseResult | ImageParseResult:
97
99
  async with BiliAPI(proxy=self.proxy) as bili:
98
100
  video_info = await bili.get_video_info(url)
99
101
 
@@ -136,8 +138,8 @@ class BiliParse(YtParser):
136
138
  ),
137
139
  )
138
140
 
139
- async def ytp_parse(self, url) -> Union["YtVideoParseResult"]:
140
- result = await super()._do_parse(url)
141
+ async def ytp_parse(self, url) -> YtVideoParseResult:
142
+ result = cast(YtVideoParseResult, await super()._do_parse(url))
141
143
  return YtVideoParseResult(
142
144
  title=result.title,
143
145
  dl=result.dl,
@@ -172,7 +174,7 @@ class BiliVideoParseResult(VideoParseResult):
172
174
  callback_kwargs: dict | None = None,
173
175
  proxy: str | None = None,
174
176
  headers: dict | None = None,
175
- ) -> "DownloadResult":
177
+ ) -> DownloadResult:
176
178
  headers = {"referer": "https://www.bilibili.com", "User-Agent": GlobalConfig.ua}
177
179
  return await super()._do_download(
178
180
  output_dir=output_dir,
@@ -31,14 +31,14 @@ class CoolapkParser(BaseParser):
31
31
  coolapk = await Coolapk.parse(raw_url, proxy=self.proxy)
32
32
  except Exception as e:
33
33
  raise ParseError(str(e)) from e
34
- media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs]
34
+ media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs or []]
35
35
  if coolapk.markdown_content:
36
36
  return CoolapkRichTextParseResult(
37
37
  title=coolapk.title,
38
38
  media=media,
39
39
  markdown_content=coolapk.markdown_content,
40
40
  )
41
- content = self.hashtag_handler(coolapk.text_content)
41
+ content = self.hashtag_handler(coolapk.text_content or "")
42
42
  if any(isinstance(m, AniRef) for m in media):
43
43
  return CoolapkMultimediaParseResult(
44
44
  title=coolapk.title,
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
4
  from typing import Self, Union
@@ -47,6 +47,8 @@ class DouyinParser(BaseParser):
47
47
  @staticmethod
48
48
  def _build_video_result(result: "DouyinApiResult") -> VideoParseResult:
49
49
  """构建视频解析结果"""
50
+ if result.video is None:
51
+ raise ParseError("抖音解析失败: 未获取到视频")
50
52
  return DouyinVideoParseResult(
51
53
  title=result.desc,
52
54
  video=result.video,
@@ -134,9 +136,9 @@ class DouyinApiResult:
134
136
  """抖音 API 解析结果"""
135
137
 
136
138
  type: DouyinMediaType
137
- video: VideoRef = None
139
+ video: VideoRef | None = None
138
140
  desc: str = ""
139
- image_list: list[ImageRef | LivePhotoRef] = None
141
+ image_list: list[ImageRef | LivePhotoRef] = field(default_factory=list)
140
142
 
141
143
  @classmethod
142
144
  def parse(cls, json_dict: dict) -> Self:
@@ -162,7 +164,7 @@ class DouyinApiResult:
162
164
  has_live_photos = any(img.get("video") for img in images)
163
165
 
164
166
  if has_live_photos:
165
- image_list = []
167
+ image_list: list[ImageRef | LivePhotoRef] = []
166
168
  for image in images:
167
169
  if video := image.get("video"):
168
170
  video_info = parse_video_info(video)
@@ -206,7 +208,7 @@ class DouyinApiResult:
206
208
  def _parse_image_post_info(cls, image_post_info: dict, desc: str) -> Self:
207
209
  """解析新版图片格式 (image_post_info 字段)"""
208
210
  images = image_post_info.get("images", [])
209
- image_list = []
211
+ image_list: list[ImageRef | LivePhotoRef] = []
210
212
 
211
213
  for image in images:
212
214
  display_image = image.get("display_image", {})
@@ -15,7 +15,7 @@ class InstagramParser(BaseParser):
15
15
  __match__ = r"^(http(s)?://)(www\.|)instagram\.com/(p|reel|share|.*/p|.*/reel)/.*"
16
16
  __redirect_keywords__ = ["share"]
17
17
 
18
- async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult | None:
18
+ async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult:
19
19
  shortcode = self.get_short_code(raw_url)
20
20
  if not shortcode:
21
21
  raise ValueError("Instagram帖子链接无效")
@@ -32,7 +32,7 @@ class InstagramParser(BaseParser):
32
32
  case "GraphSidecar":
33
33
  media = [
34
34
  VideoRef(url=i.video_url, thumb_url=i.display_url, width=i.width, height=i.height)
35
- if i.is_video
35
+ if i.is_video and i.video_url
36
36
  else ImageRef(url=i.display_url, width=i.width, height=i.height)
37
37
  for i in post.get_sidecar_nodes()
38
38
  ]
@@ -44,9 +44,9 @@ class InstagramParser(BaseParser):
44
44
  case "GraphVideo":
45
45
  return VideoParseResult(
46
46
  video=VideoRef(
47
- url=post.video_url,
47
+ url=post.video_url or post.url,
48
48
  thumb_url=post.url,
49
- duration=int(post.video_duration),
49
+ duration=int(post.video_duration or 0),
50
50
  width=width,
51
51
  height=height,
52
52
  ),
@@ -81,7 +81,7 @@ class InstagramParser(BaseParser):
81
81
  if cookie:
82
82
  text = f"Instagram 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(cookie)}"
83
83
  else:
84
- text = e
84
+ text = str(e)
85
85
  raise ParseError(f"无法获取帖子内容: {text}") from e
86
86
  else:
87
87
  return post
@@ -23,9 +23,9 @@ class PipixParser(BaseParser):
23
23
  video=VideoRef(
24
24
  url=ppx.video_url,
25
25
  thumb_url=ppx.video_thumb,
26
- duration=ppx.video_duration,
27
- height=ppx.video_height,
28
- width=ppx.video_width,
26
+ duration=ppx.video_duration or 0,
27
+ height=ppx.video_height or 0,
28
+ width=ppx.video_width or 0,
29
29
  ),
30
30
  )
31
31
  else:
@@ -1,5 +1,5 @@
1
1
  from ...provider_api.threads import ThreadsAPI, ThreadsMedia, ThreadsMediaType
2
- from ...types import ImageRef, MultimediaParseResult, Platform, VideoRef
2
+ from ...types import AnyMediaRef, ImageRef, MultimediaParseResult, Platform, VideoRef
3
3
  from ..base.base import BaseParser
4
4
 
5
5
 
@@ -10,7 +10,7 @@ class ThreadsParser(BaseParser):
10
10
 
11
11
  async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
12
12
  post = await ThreadsAPI(proxy=self.proxy).parse(raw_url)
13
- media = []
13
+ media: list[AnyMediaRef] = []
14
14
  if post.media:
15
15
  pm: list[ThreadsMedia] = post.media if isinstance(post.media, list) else [post.media]
16
16
  for m in pm:
@@ -2,7 +2,7 @@ from typing import Union
2
2
 
3
3
  import httpx
4
4
 
5
- from ...provider_api.tieba import TieBa, TieBaError, TieBaPostType
5
+ from ...provider_api.tieba import TieBa, TieBaError, TieBaPostType, TieBaVideo
6
6
  from ...types import AniRef, ImageParseResult, ImageRef, ParseError, Platform, VideoParseResult, VideoRef
7
7
  from ..base.base import BaseParser
8
8
 
@@ -22,6 +22,8 @@ class TieBaParser(BaseParser):
22
22
 
23
23
  match tb.type:
24
24
  case TieBaPostType.VIDEO:
25
+ if not isinstance(tb.media, TieBaVideo):
26
+ raise ParseError("贴吧解析失败: 未获取到视频")
25
27
  return VideoParseResult(
26
28
  title=tb.title,
27
29
  video=VideoRef(
@@ -35,8 +37,8 @@ class TieBaParser(BaseParser):
35
37
  )
36
38
 
37
39
  case TieBaPostType.PHOTO:
38
- images = []
39
- if tb.media:
40
+ images: list[ImageRef | AniRef] = []
41
+ if isinstance(tb.media, list):
40
42
  for i in tb.media:
41
43
  async with httpx.AsyncClient(proxy=self.proxy) as cli:
42
44
  try:
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
4
  from typing import Self, Union
@@ -44,6 +44,8 @@ class TikTokParser(BaseParser):
44
44
 
45
45
  @staticmethod
46
46
  def _build_video_result(result: "TikTokApiResult") -> VideoParseResult:
47
+ if result.video is None:
48
+ raise ParseError("TikTok 解析失败: 未获取到视频")
47
49
  return TikTokVideoParseResult(
48
50
  title=result.desc,
49
51
  video=result.video,
@@ -199,9 +201,9 @@ class TikTokMediaType(Enum):
199
201
  @dataclass
200
202
  class TikTokApiResult:
201
203
  type: TikTokMediaType
202
- video: VideoRef = None
204
+ video: VideoRef | None = None
203
205
  desc: str = ""
204
- image_list: list[ImageRef] = None
206
+ image_list: list[ImageRef] = field(default_factory=list)
205
207
 
206
208
  @classmethod
207
209
  def parse(cls, json_dict: dict) -> Self:
@@ -216,7 +218,7 @@ class TikTokApiResult:
216
218
 
217
219
  @classmethod
218
220
  def _parse_image_post(cls, image_post_info: dict, desc: str) -> Self:
219
- image_list = []
221
+ image_list: list[ImageRef] = []
220
222
 
221
223
  for image in image_post_info.get("images", []):
222
224
  display_image = (
@@ -7,7 +7,16 @@ from ...provider_api.twitter import (
7
7
  TwitterTweet,
8
8
  TwitterVideo,
9
9
  )
10
- from ...types import AniRef, ImageRef, MultimediaParseResult, ParseError, Platform, RichTextParseResult, VideoRef
10
+ from ...types import (
11
+ AniRef,
12
+ AnyMediaRef,
13
+ ImageRef,
14
+ MultimediaParseResult,
15
+ ParseError,
16
+ Platform,
17
+ RichTextParseResult,
18
+ VideoRef,
19
+ )
11
20
  from ...utils.utils import cookie_ellipsis
12
21
  from ..base.base import BaseParser
13
22
 
@@ -47,12 +56,12 @@ class TwitterParser(BaseParser):
47
56
 
48
57
  @staticmethod
49
58
  async def media_parse(tweet: TwitterTweet):
50
- media = []
59
+ media: list[AnyMediaRef] = []
51
60
  if tweet.media:
52
61
  for m in tweet.media:
53
62
  match m:
54
63
  case TwitterPhoto():
55
- path = ImageRef(url=m.url, height=m.height, width=m.width, thumb_url=m.thumb_url)
64
+ path: AnyMediaRef = ImageRef(url=m.url, height=m.height, width=m.width, thumb_url=m.thumb_url)
56
65
  case TwitterVideo():
57
66
  path = VideoRef(
58
67
  url=m.url,
@@ -0,0 +1,101 @@
1
+ import re
2
+
3
+ from ...provider_api.weibo import MediaType, MixMediaInfoItem, PicInfo, WeiboAPI
4
+ from ...types import (
5
+ AniRef,
6
+ ImageParseResult,
7
+ ImageRef,
8
+ LivePhotoRef,
9
+ MultimediaParseResult,
10
+ Platform,
11
+ VideoParseResult,
12
+ VideoRef,
13
+ )
14
+ from ..base.base import BaseParser
15
+
16
+
17
+ class WeiboParser(BaseParser):
18
+ __platform__ = Platform.WEIBO
19
+ __supported_type__ = ["视频", "图文"]
20
+ __match__ = r"^(http(s)?://)(m\.|)weibo.(com|cn)/(?!(u/)).+"
21
+
22
+ async def _do_parse(self, raw_url: str) -> MultimediaParseResult | VideoParseResult | ImageParseResult:
23
+ weibo = await WeiboAPI(self.proxy).parse(raw_url)
24
+ data = weibo.data
25
+ text = self.f_text(data.content)
26
+ media: list[VideoRef | ImageRef | LivePhotoRef | AniRef] = []
27
+
28
+ if not data.pic_infos and data.page_info and data.page_info.object_type == MediaType.VIDEO:
29
+ playback = data.page_info.media_info and data.page_info.media_info.playback
30
+ if playback:
31
+ return VideoParseResult(
32
+ content=text,
33
+ video=VideoRef(
34
+ url=playback.url,
35
+ thumb_url=data.page_info.page_pic,
36
+ width=playback.width,
37
+ height=playback.height,
38
+ duration=int(playback.duration),
39
+ ),
40
+ )
41
+
42
+ media_info: list[PicInfo | MixMediaInfoItem] | None = None
43
+ if data.retweeted_status and data.retweeted_status.pic_infos:
44
+ media_info = list(data.retweeted_status.pic_infos)
45
+ elif data.pic_infos:
46
+ media_info = list(data.pic_infos)
47
+ elif data.mix_media_info and data.mix_media_info.items:
48
+ media_info = list(data.mix_media_info.items)
49
+ if not media_info:
50
+ return MultimediaParseResult(content=text, media=[])
51
+
52
+ for i in media_info:
53
+ match i.type:
54
+ case MediaType.VIDEO:
55
+ if i.media_url:
56
+ media.append(
57
+ VideoRef(
58
+ url=i.media_url,
59
+ thumb_url=i.thumb_url,
60
+ width=i.width,
61
+ height=i.height,
62
+ duration=i.duration,
63
+ )
64
+ )
65
+ case MediaType.LIVE_PHOTO:
66
+ if i.thumb_url:
67
+ media.append(
68
+ LivePhotoRef(
69
+ url=i.thumb_url,
70
+ ext="mov",
71
+ video_url=i.media_url,
72
+ width=i.width,
73
+ height=i.height,
74
+ )
75
+ )
76
+ case MediaType.GIF:
77
+ if i.media_url:
78
+ media.append(AniRef(url=i.media_url, thumb_url=i.thumb_url))
79
+ case _:
80
+ if i.media_url:
81
+ media.append(ImageRef(url=i.media_url, thumb_url=i.thumb_url, width=i.width, height=i.height))
82
+ if all((isinstance(m, ImageRef) or isinstance(m, LivePhotoRef)) for m in media):
83
+ photos = [m for m in media if isinstance(m, ImageRef | LivePhotoRef)]
84
+ return ImageParseResult(content=text, photo=photos)
85
+ return MultimediaParseResult(content=text, media=media)
86
+
87
+ def f_text(self, text: str | None) -> str:
88
+ # text = re.sub(r'<a href="https://video.weibo.com.*?>.*的微博视频.*</a>', "", text)
89
+ # text = re.sub(r"<[^>]+>", " ", text)
90
+ text = self.hashtag_handler(text or "")
91
+ return text.strip()
92
+
93
+ @staticmethod
94
+ def hashtag_handler(desc: str):
95
+ hashtags = re.findall(r" ?#[^#]+# ?", desc)
96
+ for hashtag in hashtags:
97
+ desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
98
+ return desc
99
+
100
+
101
+ __all__ = ["WeiboParser"]
@@ -36,13 +36,15 @@ class XiaoHeiHeParser(BaseParser):
36
36
  def __parse_media(xhh: XiaoHeiHePost):
37
37
  match xhh.type:
38
38
  case XiaoHeiHePostType.VIDEO:
39
+ if not xhh.media:
40
+ return None
39
41
  return VideoRef(url=xhh.media[0].url, thumb_url=xhh.media[0].thumb_url)
40
42
  case XiaoHeiHePostType.IMAGE | XiaoHeiHePostType.ARTICLE:
41
43
  images: list[ImageRef | AniRef] = []
42
- for i in xhh.media:
44
+ for i in xhh.media or []:
43
45
  if i.type == XiaoHeiHeMediaType.IMAGE:
44
- images.append(ImageRef(url=i.url, width=i.width, height=i.height))
46
+ images.append(ImageRef(url=i.url, width=i.width or 0, height=i.height or 0))
45
47
  else:
46
- images.append(AniRef(url=i.url, width=i.width, height=i.height))
48
+ images.append(AniRef(url=i.url, width=i.width or 0, height=i.height or 0))
47
49
 
48
50
  return images