parsehub 2.0.22__tar.gz → 2.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {parsehub-2.0.22/src/parsehub.egg-info → parsehub-2.0.24}/PKG-INFO +1 -1
  2. {parsehub-2.0.22 → parsehub-2.0.24}/pyproject.toml +19 -1
  3. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/__init__.py +10 -7
  4. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/cli.py +14 -12
  5. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/base.py +7 -4
  6. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/ytdlp.py +29 -20
  7. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/bilibili.py +14 -12
  8. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/coolapk.py +5 -5
  9. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/douyin.py +7 -5
  10. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/instagram.py +9 -8
  11. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/pipix.py +3 -3
  12. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/threads.py +2 -2
  13. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/tieba.py +5 -3
  14. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/tiktok.py +8 -6
  15. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/twitter.py +15 -6
  16. parsehub-2.0.24/src/parsehub/parsers/parser/weibo.py +101 -0
  17. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/xhs.py +10 -5
  18. parsehub-2.0.24/src/parsehub/parsers/parser/xiaoheihe.py +60 -0
  19. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/youtube.py +1 -1
  20. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/bilibili.py +20 -17
  21. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/coolapk.py +5 -3
  22. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/douyin.py +15 -8
  23. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/instagram.py +13 -12
  24. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/kuaishou.py +6 -5
  25. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/pipix.py +7 -4
  26. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/threads.py +13 -11
  27. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/tieba.py +11 -7
  28. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/tiktok.py +2 -2
  29. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/twitter.py +25 -16
  30. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/weibo.py +94 -66
  31. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/weixin.py +16 -10
  32. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/xhs.py +26 -17
  33. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/xiaoheihe.py +14 -9
  34. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/zuiyou.py +10 -7
  35. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/callback.py +2 -2
  36. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/media_file.py +5 -5
  37. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/platform.py +2 -2
  38. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/result.py +40 -35
  39. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/downloader.py +8 -8
  40. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/utils.py +2 -2
  41. {parsehub-2.0.22 → parsehub-2.0.24/src/parsehub.egg-info}/PKG-INFO +1 -1
  42. {parsehub-2.0.22 → parsehub-2.0.24}/test/test_cli.py +4 -4
  43. {parsehub-2.0.22 → parsehub-2.0.24}/test/test_cli_config.py +1 -1
  44. {parsehub-2.0.22 → parsehub-2.0.24}/test/test_core_offline.py +13 -8
  45. parsehub-2.0.22/src/parsehub/parsers/parser/weibo.py +0 -89
  46. parsehub-2.0.22/src/parsehub/parsers/parser/xiaoheihe.py +0 -48
  47. {parsehub-2.0.22 → parsehub-2.0.24}/LICENSE +0 -0
  48. {parsehub-2.0.22 → parsehub-2.0.24}/README.md +0 -0
  49. {parsehub-2.0.22 → parsehub-2.0.24}/setup.cfg +0 -0
  50. {parsehub-2.0.22 → parsehub-2.0.24}/src/__init__.py +0 -0
  51. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/cli_config.py +0 -0
  52. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/config/__init__.py +0 -0
  53. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/config/config.py +0 -0
  54. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/errors.py +0 -0
  55. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/__init__.py +0 -0
  56. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/__init__.py +0 -0
  57. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/__init__.py +0 -0
  58. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/facebook.py +0 -0
  59. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  60. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/weixin.py +0 -0
  61. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  62. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/__init__.py +0 -0
  63. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/__init__.py +0 -0
  64. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/media_ref.py +0 -0
  65. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/post.py +0 -0
  66. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/media_info.py +0 -0
  67. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/SOURCES.txt +0 -0
  68. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/dependency_links.txt +0 -0
  69. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/entry_points.txt +0 -0
  70. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/requires.txt +0 -0
  71. {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.22
3
+ Version: 2.0.24
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.22"
3
+ version = "2.0.24"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -52,6 +52,8 @@ cli = [
52
52
 
53
53
  [dependency-groups]
54
54
  dev = [
55
+ "mypy>=2.1.0",
56
+ "pytest>=9.0.3",
55
57
  "ruff>=0.14.14",
56
58
  ]
57
59
 
@@ -79,3 +81,19 @@ ignore = [
79
81
  "B008", # 不在参数默认值中执行函数调用
80
82
  "C901", # 函数复杂度过高
81
83
  ]
84
+
85
+ [tool.mypy]
86
+ python_version = "3.12"
87
+ files = ["./"]
88
+ exclude = ["test"]
89
+ ignore_missing_imports = true
90
+ warn_return_any = true
91
+ warn_unused_ignores = true
92
+ check_untyped_defs = true
93
+ disallow_untyped_defs = true
94
+ no_implicit_optional = true
95
+
96
+ [tool.pytest.ini_options]
97
+ testpaths = ["test"]
98
+ pythonpath = ["src"]
99
+ python_files = ["test_*.py"]
@@ -13,7 +13,7 @@ logger.disable(__name__)
13
13
 
14
14
 
15
15
  class ParseHub:
16
- def __init__(self):
16
+ def __init__(self) -> None:
17
17
  self.parsers: list[type[BaseParser]] = BaseParser.get_registry()
18
18
 
19
19
  async def parse(self, url: str, *, proxy: str | None = None, cookie: str | dict | None = None) -> AnyParseResult:
@@ -47,9 +47,9 @@ class ParseHub:
47
47
  async def download(
48
48
  self,
49
49
  url: str,
50
- path: str | Path = None,
50
+ path: str | Path | None = None,
51
51
  *,
52
- callback: ProgressCallback = None,
52
+ callback: ProgressCallback | None = None,
53
53
  callback_args: tuple = (),
54
54
  callback_kwargs: dict | None = None,
55
55
  proxy: str | None = None,
@@ -169,6 +169,8 @@ class ParseHub:
169
169
  :return: 原始链接
170
170
  """
171
171
  parser = self.get_parser(url)
172
+ if not parser:
173
+ raise UnknownPlatform(url)
172
174
  try:
173
175
  return await parser(proxy=proxy).get_raw_url(url, clean_all=clean_all)
174
176
  except Exception as e:
@@ -183,7 +185,7 @@ class ParseHub:
183
185
  return parser
184
186
  return None
185
187
 
186
- def get_parser(self, url) -> type[BaseParser] | None:
188
+ def get_parser(self, url: str) -> type[BaseParser] | None:
187
189
  """获取解析器
188
190
  :param url: 分享文案 / 分享链接
189
191
  """
@@ -191,7 +193,7 @@ class ParseHub:
191
193
  return parser
192
194
  return None
193
195
 
194
- def get_platform(self, url) -> Platform | None:
196
+ def get_platform(self, url: str) -> Platform | None:
195
197
  """获取平台
196
198
  :param url: 分享文案 / 分享链接
197
199
  """
@@ -210,9 +212,10 @@ class ParseHub:
210
212
  """
211
213
  return [
212
214
  {
213
- "id": parser.__platform__.id,
214
- "name": parser.__platform__.display_name,
215
+ "id": platform.id,
216
+ "name": platform.display_name,
215
217
  "supported_types": parser.__supported_type__,
216
218
  }
217
219
  for parser in self.parsers
220
+ if (platform := parser.__platform__) is not None
218
221
  ]
@@ -8,7 +8,7 @@ import unicodedata
8
8
  from dataclasses import asdict, is_dataclass
9
9
  from importlib.metadata import PackageNotFoundError, version
10
10
  from pathlib import Path
11
- from typing import TYPE_CHECKING, Any
11
+ from typing import TYPE_CHECKING, Any, NoReturn, cast
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from .cli_config import AutoCookieStore, PlatformConfig
@@ -20,12 +20,13 @@ _CLI_EXTRA_MODULES = ("argcomplete", "platformdirs")
20
20
  class _ChineseArgumentParser(argparse.ArgumentParser):
21
21
  def __init__(self, *args: Any, **kwargs: Any):
22
22
  kwargs.setdefault("formatter_class", argparse.RawDescriptionHelpFormatter)
23
- add_help = kwargs.pop("add_help", True)
24
- super().__init__(*args, add_help=False, **kwargs)
23
+ add_help = bool(kwargs.pop("add_help", True))
24
+ kwargs["add_help"] = False
25
+ super().__init__(*args, **kwargs)
25
26
  if add_help:
26
27
  self.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="显示帮助信息")
27
28
 
28
- def error(self, message: str) -> None:
29
+ def error(self, message: str) -> NoReturn:
29
30
  self.print_usage(sys.stderr)
30
31
  translated = _translate_argparse_error(message)
31
32
  hint = _usage_hint(self.prog)
@@ -45,7 +46,7 @@ def main(argv: list[str] | None = None) -> int:
45
46
  try:
46
47
  args = parser.parse_args(_normalize_argv(raw_argv))
47
48
  _finalize_output_args(args)
48
- return args.func(args)
49
+ return int(args.func(args))
49
50
  except SystemExit as e:
50
51
  return _normalize_exit_code(e.code)
51
52
  except ValueError as e:
@@ -212,7 +213,7 @@ def _add_set_commands(subparsers: argparse._SubParsersAction) -> None:
212
213
 
213
214
  def _add_platform_argument(parser: argparse.ArgumentParser) -> None:
214
215
  action = parser.add_argument("platform", help="平台 ID,如 xhs")
215
- action.completer = _complete_platforms
216
+ action.completer = _complete_platforms # type: ignore[attr-defined]
216
217
 
217
218
 
218
219
  def _add_json_options(parser: argparse.ArgumentParser) -> None:
@@ -364,14 +365,15 @@ def _cookie_prompt() -> Any:
364
365
 
365
366
  def _load_platform_config(platform_id: str | None) -> PlatformConfig:
366
367
  if not platform_id:
367
- return _platform_config_type()()
368
- return _config_store().get_platform(platform_id)
368
+ return cast("PlatformConfig", _platform_config_type()())
369
+ return cast("PlatformConfig", _config_store().get_platform(platform_id))
369
370
 
370
371
 
371
372
  def _load_cookie(platform_id: str | None) -> str | None:
372
373
  if not platform_id:
373
374
  return None
374
- return _cookie_store().get(platform_id)
375
+ value = _cookie_store().get(platform_id)
376
+ return value if isinstance(value, str) or value is None else str(value)
375
377
 
376
378
 
377
379
  def _detect_platform_id(hub: Any, url_or_text: str) -> str | None:
@@ -436,7 +438,7 @@ def _platform_config_row(
436
438
 
437
439
 
438
440
  def _print_json(data: Any, *, pretty: bool) -> None:
439
- kwargs = {"ensure_ascii": False}
441
+ kwargs: dict[str, Any] = {"ensure_ascii": False}
440
442
  if pretty:
441
443
  kwargs["indent"] = 2
442
444
  else:
@@ -547,8 +549,8 @@ def _download_result_to_dict(result: Any) -> dict[str, Any]:
547
549
  def _jsonable(value: Any) -> Any:
548
550
  if isinstance(value, Path):
549
551
  return str(value)
550
- if is_dataclass(value):
551
- return _jsonable(asdict(value))
552
+ if is_dataclass(value) and not isinstance(value, type):
553
+ return _jsonable(asdict(cast(Any, value)))
552
554
  if isinstance(value, dict):
553
555
  return {str(k): _jsonable(v) for k, v in value.items()}
554
556
  if isinstance(value, (list, tuple)):
@@ -2,6 +2,7 @@ import importlib
2
2
  import pkgutil
3
3
  import re
4
4
  from abc import ABC, abstractmethod
5
+ from typing import Any
5
6
  from urllib.parse import parse_qs, urlencode, urlparse
6
7
 
7
8
  import httpx
@@ -34,7 +35,7 @@ class BaseParser(ABC):
34
35
  self.proxy = proxy
35
36
  self.cookie = normalize_cookie(cookie)
36
37
 
37
- def __init_subclass__(cls, /, register=True, **kwargs):
38
+ def __init_subclass__(cls, /, register: bool = True, **kwargs: Any) -> None:
38
39
  super().__init_subclass__(**kwargs)
39
40
  if register:
40
41
  if not cls.__platform__:
@@ -56,7 +57,7 @@ class BaseParser(ABC):
56
57
  def match(cls, text: str) -> bool:
57
58
  """判断是否匹配该解析器"""
58
59
  url = match_url(text)
59
- return bool(re.match(cls.__match__, url))
60
+ return bool(cls.__match__ and re.match(cls.__match__, url))
60
61
 
61
62
  async def parse(self, url: str) -> AnyParseResult:
62
63
  """解析
@@ -66,7 +67,8 @@ class BaseParser(ABC):
66
67
  raw_url = await self.get_raw_url(url, clean_all=False)
67
68
  result = await self._do_parse(raw_url)
68
69
  result.platform = self.__platform__
69
- result.raw_url = self._clean_params(raw_url, self.__after_clean_parameters__)
70
+ raw_url_clean = self._clean_params(raw_url, self.__after_clean_parameters__)
71
+ result.raw_url = raw_url_clean
70
72
  return result
71
73
 
72
74
  @abstractmethod
@@ -104,7 +106,8 @@ class BaseParser(ABC):
104
106
 
105
107
  :return:
106
108
  """
107
- url = match_url(url)
109
+ matched_url = match_url(url)
110
+ url = matched_url or url
108
111
  if not url.startswith("http"):
109
112
  url = f"https://{url}"
110
113
  if any(x in url for x in self.__redirect_keywords__):
@@ -1,11 +1,13 @@
1
1
  import asyncio
2
+ from collections.abc import Callable
2
3
  from dataclasses import dataclass
3
4
  from pathlib import Path
4
- from typing import Union
5
+ from typing import Any, cast
5
6
 
6
7
  from yt_dlp import YoutubeDL
7
8
 
8
9
  from ...types import (
10
+ AnyParseResult,
9
11
  DownloadError,
10
12
  DownloadResult,
11
13
  ParseError,
@@ -30,7 +32,7 @@ def switch_ytdlp_proxy(ydl: YoutubeDL, proxy: str | None) -> None:
30
32
  director.close()
31
33
 
32
34
 
33
- def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None:
35
+ def download_video(yto_params: dict[str, Any], url: str, proxy: str | None = None) -> None:
34
36
  """在独立线程中下载视频"""
35
37
  try:
36
38
  with YoutubeDL(yto_params) as ydl:
@@ -43,14 +45,16 @@ def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None
43
45
 
44
46
 
45
47
  class MonotonicDownloadProgress:
46
- def __init__(self, emit, *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1):
48
+ def __init__(
49
+ self, emit: Callable[[float], None], *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1
50
+ ) -> None:
47
51
  self.emit = emit
48
52
  self.start = start
49
53
  self.end = end
50
54
  self.min_step = min_step
51
55
  self.current = start
52
56
 
53
- def __call__(self, d: dict):
57
+ def __call__(self, d: dict[str, Any]) -> None:
54
58
  status = d.get("status")
55
59
 
56
60
  if status == "downloading":
@@ -82,8 +86,8 @@ class MonotonicDownloadProgress:
82
86
  # 分片下载有时没有稳定总大小,但有 frag 进度;作为兜底
83
87
  frag_index = d.get("fragment_index")
84
88
  frag_count = d.get("fragment_count")
85
- if frag_index is not None and frag_count:
86
- return min(frag_index / frag_count * 100, 100)
89
+ if isinstance(frag_index, int | float) and isinstance(frag_count, int | float) and frag_count:
90
+ return min(float(frag_index) / float(frag_count) * 100, 100.0)
87
91
 
88
92
  return None
89
93
 
@@ -91,7 +95,7 @@ class MonotonicDownloadProgress:
91
95
  class YtParser(BaseParser, register=False):
92
96
  """yt-dlp解析器"""
93
97
 
94
- async def _do_parse(self, raw_url: str) -> Union["YtVideoParseResult"]:
98
+ async def _do_parse(self, raw_url: str) -> AnyParseResult:
95
99
  video_info = await self._parse(raw_url)
96
100
  return YtVideoParseResult(
97
101
  dl=video_info,
@@ -106,7 +110,7 @@ class YtParser(BaseParser, register=False):
106
110
  ),
107
111
  )
108
112
 
109
- async def _parse(self, url) -> "YtVideoInfo":
113
+ async def _parse(self, url: str) -> "YtVideoInfo":
110
114
  try:
111
115
  dl = await asyncio.wait_for(asyncio.to_thread(self._extract_info, url), timeout=30)
112
116
  except TimeoutError as e:
@@ -114,8 +118,8 @@ class YtParser(BaseParser, register=False):
114
118
  except Exception as e:
115
119
  raise ParseError(f"解析视频信息失败: {str(e)}") from e
116
120
 
117
- if dl.get("_type") and dl["_type"] == "playlist": # type: ignore
118
- dl = dl["entries"][0] # type: ignore
121
+ if dl.get("_type") and dl["_type"] == "playlist":
122
+ dl = dl["entries"][0]
119
123
  url = dl["webpage_url"]
120
124
  title = dl["title"]
121
125
  duration = dl.get("duration", 0)
@@ -136,20 +140,20 @@ class YtParser(BaseParser, register=False):
136
140
  proxy=self.proxy,
137
141
  )
138
142
 
139
- def _extract_info(self, url):
143
+ def _extract_info(self, url: str) -> dict[str, Any]:
140
144
  params = self.params.copy()
141
145
  if self.proxy:
142
146
  params["proxy"] = self.proxy
143
147
 
144
148
  try:
145
149
  with YoutubeDL(params) as ydl:
146
- return ydl.extract_info(url, download=False)
150
+ return cast(dict[str, Any], ydl.extract_info(url, download=False))
147
151
  except Exception as e:
148
152
  error_msg = f"{type(e).__name__}: {str(e)}"
149
153
  raise RuntimeError(error_msg) from None
150
154
 
151
155
  @property
152
- def params(self) -> dict:
156
+ def params(self) -> dict[str, Any]:
153
157
  params = {
154
158
  "format": "mp4+bestvideo[height<=1080]+bestaudio",
155
159
  "quiet": True, # 不输出日志
@@ -170,9 +174,9 @@ class YtVideoParseResult(VideoParseResult):
170
174
  def __init__(
171
175
  self,
172
176
  dl: "YtVideoInfo",
173
- title,
174
- video=None,
175
- content=None,
177
+ title: str | None,
178
+ video: VideoRef | None = None,
179
+ content: str | None = None,
176
180
  ):
177
181
  """dl: yt-dlp解析结果"""
178
182
  self.dl = dl
@@ -190,17 +194,18 @@ class YtVideoParseResult(VideoParseResult):
190
194
  ) -> "DownloadResult":
191
195
  if callback_kwargs is None:
192
196
  callback_kwargs = {}
197
+ output_dir_path = Path(output_dir)
193
198
 
194
199
  paramss = self.dl.paramss.copy()
195
200
  if self.dl.proxy:
196
201
  paramss["proxy"] = self.dl.proxy
197
202
 
198
- paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
203
+ paramss["outtmpl"] = f"{output_dir_path.joinpath('ytdlp_%(id)s')}.%(ext)s"
199
204
 
200
205
  if callback:
201
206
  loop = asyncio.get_running_loop()
202
207
 
203
- def _callback(count: float):
208
+ def _callback(count: float) -> None:
204
209
  asyncio.run_coroutine_threadsafe(
205
210
  callback(int(count), 100, "bytes", *callback_args, **callback_kwargs), loop
206
211
  )
@@ -214,7 +219,11 @@ class YtVideoParseResult(VideoParseResult):
214
219
 
215
220
  await self._run_download(paramss, proxy=proxy)
216
221
 
217
- v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
222
+ v = (
223
+ list(output_dir_path.glob("*.mp4"))
224
+ or list(output_dir_path.glob("*.mkv"))
225
+ or list(output_dir_path.glob("*.webm"))
226
+ )
218
227
  if not v:
219
228
  raise DownloadError("下载失败 -1")
220
229
 
@@ -232,7 +241,7 @@ class YtVideoParseResult(VideoParseResult):
232
241
  output_dir,
233
242
  )
234
243
 
235
- async def _run_download(self, paramss: dict, count: int = 0, *, proxy: str | None = None) -> None:
244
+ async def _run_download(self, paramss: dict[str, Any], count: int = 0, *, proxy: str | None = None) -> None:
236
245
  if count > 2:
237
246
  raise DownloadError("下载失败 -2")
238
247
 
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import re
2
4
  from pathlib import Path
3
- from typing import Union
5
+ from typing import cast
4
6
  from urllib.parse import parse_qs, urlparse
5
7
 
6
8
  from loguru import logger
@@ -29,11 +31,11 @@ class BiliParse(YtParser):
29
31
  __reserved_parameters__ = ["p"]
30
32
  __redirect_keywords__ = ["b23.tv", "bili2233.cn"]
31
33
 
32
- async def _do_parse(self, raw_url: str) -> Union["YtVideoParseResult", "BiliVideoParseResult", ImageParseResult]:
34
+ async def _do_parse(self, raw_url: str) -> YtVideoParseResult | BiliVideoParseResult | ImageParseResult:
33
35
  if await self.is_dynamic(raw_url):
34
36
  dynamic = await self.get_dynamic_info(raw_url)
35
37
  content = self.hashtag_handler(dynamic.content)
36
- photos = []
38
+ photos: list[LivePhotoRef | ImageRef] = []
37
39
  if dynamic.images:
38
40
  for i in dynamic.images:
39
41
  if i.live_url:
@@ -56,7 +58,7 @@ class BiliParse(YtParser):
56
58
  raise ParseError("Bilibili 解析失败") from e
57
59
 
58
60
  @staticmethod
59
- def _is_bvid(url: str):
61
+ def _is_bvid(url: str) -> bool:
60
62
  if url.lower().startswith("bv"):
61
63
  return True
62
64
  else:
@@ -77,7 +79,7 @@ class BiliParse(YtParser):
77
79
  return await super().get_raw_url(url, clean_all=clean_all)
78
80
 
79
81
  @staticmethod
80
- async def is_dynamic(url) -> str | None:
82
+ async def is_dynamic(url: str) -> str | None:
81
83
  """是动态"""
82
84
  if re.search(r"\b\d{18,19}\b", url):
83
85
  return url
@@ -91,9 +93,9 @@ class BiliParse(YtParser):
91
93
  if "风控" in str(e):
92
94
  raise ParseError(f"账号风控\n使用的cookie: {cookie_ellipsis(self.cookie)}") from e
93
95
  raise ParseError(str(e)) from e
94
- return dynamic_info
96
+ return cast(BiliDynamic, dynamic_info)
95
97
 
96
- async def bili_api_parse(self, url) -> Union["BiliVideoParseResult", "ImageParseResult"]:
98
+ async def bili_api_parse(self, url: str) -> BiliVideoParseResult | ImageParseResult:
97
99
  async with BiliAPI(proxy=self.proxy) as bili:
98
100
  video_info = await bili.get_video_info(url)
99
101
 
@@ -136,16 +138,16 @@ class BiliParse(YtParser):
136
138
  ),
137
139
  )
138
140
 
139
- async def ytp_parse(self, url) -> Union["YtVideoParseResult"]:
140
- result = await super()._do_parse(url)
141
+ async def ytp_parse(self, url: str) -> YtVideoParseResult:
142
+ result = cast(YtVideoParseResult, await super()._do_parse(url))
141
143
  return YtVideoParseResult(
142
144
  title=result.title,
143
145
  dl=result.dl,
144
- video=result.media,
146
+ video=cast(VideoRef | None, result.media),
145
147
  )
146
148
 
147
149
  @staticmethod
148
- def change_source(url: str):
150
+ def change_source(url: str) -> str:
149
151
  return re.sub(
150
152
  r"upos-.*.(bilivideo.com|mirrorakam.akamaized.net)",
151
153
  "upos-sz-upcdnbda2.bilivideo.com",
@@ -172,7 +174,7 @@ class BiliVideoParseResult(VideoParseResult):
172
174
  callback_kwargs: dict | None = None,
173
175
  proxy: str | None = None,
174
176
  headers: dict | None = None,
175
- ) -> "DownloadResult":
177
+ ) -> DownloadResult:
176
178
  headers = {"referer": "https://www.bilibili.com", "User-Agent": GlobalConfig.ua}
177
179
  return await super()._do_download(
178
180
  output_dir=output_dir,
@@ -31,14 +31,14 @@ class CoolapkParser(BaseParser):
31
31
  coolapk = await Coolapk.parse(raw_url, proxy=self.proxy)
32
32
  except Exception as e:
33
33
  raise ParseError(str(e)) from e
34
- media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs]
34
+ media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs or []]
35
35
  if coolapk.markdown_content:
36
36
  return CoolapkRichTextParseResult(
37
37
  title=coolapk.title,
38
38
  media=media,
39
39
  markdown_content=coolapk.markdown_content,
40
40
  )
41
- content = self.hashtag_handler(coolapk.text_content)
41
+ content = self.hashtag_handler(coolapk.text_content or "")
42
42
  if any(isinstance(m, AniRef) for m in media):
43
43
  return CoolapkMultimediaParseResult(
44
44
  title=coolapk.title,
@@ -52,7 +52,7 @@ class CoolapkParser(BaseParser):
52
52
  )
53
53
 
54
54
  @staticmethod
55
- def hashtag_handler(desc: str):
55
+ def hashtag_handler(desc: str) -> str:
56
56
  hashtags = re.findall(r" ?#[^#]+# ?", desc)
57
57
  for hashtag in hashtags:
58
58
  desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
@@ -64,11 +64,11 @@ class CoolapkParseResult(ParseResult):
64
64
  self,
65
65
  *,
66
66
  output_dir: str | Path,
67
- callback: ProgressCallback = None,
67
+ callback: ProgressCallback | None = None,
68
68
  callback_args: tuple = (),
69
69
  callback_kwargs: dict | None = None,
70
70
  proxy: str | None = None,
71
- headers: dict = None,
71
+ headers: dict | None = None,
72
72
  ) -> "DownloadResult":
73
73
  headers = {
74
74
  "Accept": (
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
4
  from typing import Self, Union
@@ -47,6 +47,8 @@ class DouyinParser(BaseParser):
47
47
  @staticmethod
48
48
  def _build_video_result(result: "DouyinApiResult") -> VideoParseResult:
49
49
  """构建视频解析结果"""
50
+ if result.video is None:
51
+ raise ParseError("抖音解析失败: 未获取到视频")
50
52
  return DouyinVideoParseResult(
51
53
  title=result.desc,
52
54
  video=result.video,
@@ -134,9 +136,9 @@ class DouyinApiResult:
134
136
  """抖音 API 解析结果"""
135
137
 
136
138
  type: DouyinMediaType
137
- video: VideoRef = None
139
+ video: VideoRef | None = None
138
140
  desc: str = ""
139
- image_list: list[ImageRef | LivePhotoRef] = None
141
+ image_list: list[ImageRef | LivePhotoRef] = field(default_factory=list)
140
142
 
141
143
  @classmethod
142
144
  def parse(cls, json_dict: dict) -> Self:
@@ -162,7 +164,7 @@ class DouyinApiResult:
162
164
  has_live_photos = any(img.get("video") for img in images)
163
165
 
164
166
  if has_live_photos:
165
- image_list = []
167
+ image_list: list[ImageRef | LivePhotoRef] = []
166
168
  for image in images:
167
169
  if video := image.get("video"):
168
170
  video_info = parse_video_info(video)
@@ -206,7 +208,7 @@ class DouyinApiResult:
206
208
  def _parse_image_post_info(cls, image_post_info: dict, desc: str) -> Self:
207
209
  """解析新版图片格式 (image_post_info 字段)"""
208
210
  images = image_post_info.get("images", [])
209
- image_list = []
211
+ image_list: list[ImageRef | LivePhotoRef] = []
210
212
 
211
213
  for image in images:
212
214
  display_image = image.get("display_image", {})
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import re
3
+ from typing import Any, cast
3
4
 
4
5
  from instaloader import BadResponseException
5
6
 
@@ -15,7 +16,7 @@ class InstagramParser(BaseParser):
15
16
  __match__ = r"^(http(s)?://)(www\.|)instagram\.com/(p|reel|share|.*/p|.*/reel)/.*"
16
17
  __redirect_keywords__ = ["share"]
17
18
 
18
- async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult | None:
19
+ async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult:
19
20
  shortcode = self.get_short_code(raw_url)
20
21
  if not shortcode:
21
22
  raise ValueError("Instagram帖子链接无效")
@@ -32,7 +33,7 @@ class InstagramParser(BaseParser):
32
33
  case "GraphSidecar":
33
34
  media = [
34
35
  VideoRef(url=i.video_url, thumb_url=i.display_url, width=i.width, height=i.height)
35
- if i.is_video
36
+ if i.is_video and i.video_url
36
37
  else ImageRef(url=i.display_url, width=i.width, height=i.height)
37
38
  for i in post.get_sidecar_nodes()
38
39
  ]
@@ -44,9 +45,9 @@ class InstagramParser(BaseParser):
44
45
  case "GraphVideo":
45
46
  return VideoParseResult(
46
47
  video=VideoRef(
47
- url=post.video_url,
48
+ url=post.video_url or post.url,
48
49
  thumb_url=post.url,
49
- duration=int(post.video_duration),
50
+ duration=int(post.video_duration or 0),
50
51
  width=width,
51
52
  height=height,
52
53
  ),
@@ -56,7 +57,7 @@ class InstagramParser(BaseParser):
56
57
  case _:
57
58
  raise ParseError("不支持的类型")
58
59
 
59
- async def _parse(self, url, shortcode, cookie=None) -> MyPost:
60
+ async def _parse(self, url: str, shortcode: str, cookie: dict[str, Any] | None = None) -> MyPost:
60
61
  try:
61
62
  post = await asyncio.wait_for(
62
63
  asyncio.to_thread(
@@ -81,13 +82,13 @@ class InstagramParser(BaseParser):
81
82
  if cookie:
82
83
  text = f"Instagram 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(cookie)}"
83
84
  else:
84
- text = e
85
+ text = str(e)
85
86
  raise ParseError(f"无法获取帖子内容: {text}") from e
86
87
  else:
87
- return post
88
+ return cast(MyPost, post)
88
89
 
89
90
  @staticmethod
90
- def get_short_code(url: str):
91
+ def get_short_code(url: str) -> str | None:
91
92
  url = url.removesuffix("/")
92
93
  shortcode = re.search(r"/(share|p|reel|.*/p|.*/reel)/(.*)", url)
93
94
  return shortcode.group(2).split("/")[0] if shortcode else None
@@ -23,9 +23,9 @@ class PipixParser(BaseParser):
23
23
  video=VideoRef(
24
24
  url=ppx.video_url,
25
25
  thumb_url=ppx.video_thumb,
26
- duration=ppx.video_duration,
27
- height=ppx.video_height,
28
- width=ppx.video_width,
26
+ duration=ppx.video_duration or 0,
27
+ height=ppx.video_height or 0,
28
+ width=ppx.video_width or 0,
29
29
  ),
30
30
  )
31
31
  else:
@@ -1,5 +1,5 @@
1
1
  from ...provider_api.threads import ThreadsAPI, ThreadsMedia, ThreadsMediaType
2
- from ...types import ImageRef, MultimediaParseResult, Platform, VideoRef
2
+ from ...types import AnyMediaRef, ImageRef, MultimediaParseResult, Platform, VideoRef
3
3
  from ..base.base import BaseParser
4
4
 
5
5
 
@@ -10,7 +10,7 @@ class ThreadsParser(BaseParser):
10
10
 
11
11
  async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
12
12
  post = await ThreadsAPI(proxy=self.proxy).parse(raw_url)
13
- media = []
13
+ media: list[AnyMediaRef] = []
14
14
  if post.media:
15
15
  pm: list[ThreadsMedia] = post.media if isinstance(post.media, list) else [post.media]
16
16
  for m in pm: