parsehub 2.0.22__tar.gz → 2.0.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.22/src/parsehub.egg-info → parsehub-2.0.24}/PKG-INFO +1 -1
- {parsehub-2.0.22 → parsehub-2.0.24}/pyproject.toml +19 -1
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/__init__.py +10 -7
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/cli.py +14 -12
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/base.py +7 -4
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/ytdlp.py +29 -20
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/bilibili.py +14 -12
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/coolapk.py +5 -5
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/douyin.py +7 -5
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/instagram.py +9 -8
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/pipix.py +3 -3
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/threads.py +2 -2
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/tieba.py +5 -3
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/tiktok.py +8 -6
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/twitter.py +15 -6
- parsehub-2.0.24/src/parsehub/parsers/parser/weibo.py +101 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/xhs.py +10 -5
- parsehub-2.0.24/src/parsehub/parsers/parser/xiaoheihe.py +60 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/youtube.py +1 -1
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/bilibili.py +20 -17
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/coolapk.py +5 -3
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/douyin.py +15 -8
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/instagram.py +13 -12
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/kuaishou.py +6 -5
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/pipix.py +7 -4
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/threads.py +13 -11
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/tieba.py +11 -7
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/tiktok.py +2 -2
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/twitter.py +25 -16
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/weibo.py +94 -66
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/weixin.py +16 -10
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/xhs.py +26 -17
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/xiaoheihe.py +14 -9
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/zuiyou.py +10 -7
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/callback.py +2 -2
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/media_file.py +5 -5
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/platform.py +2 -2
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/result.py +40 -35
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/downloader.py +8 -8
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/utils.py +2 -2
- {parsehub-2.0.22 → parsehub-2.0.24/src/parsehub.egg-info}/PKG-INFO +1 -1
- {parsehub-2.0.22 → parsehub-2.0.24}/test/test_cli.py +4 -4
- {parsehub-2.0.22 → parsehub-2.0.24}/test/test_cli_config.py +1 -1
- {parsehub-2.0.22 → parsehub-2.0.24}/test/test_core_offline.py +13 -8
- parsehub-2.0.22/src/parsehub/parsers/parser/weibo.py +0 -89
- parsehub-2.0.22/src/parsehub/parsers/parser/xiaoheihe.py +0 -48
- {parsehub-2.0.22 → parsehub-2.0.24}/LICENSE +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/README.md +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/setup.cfg +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/cli_config.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/config/config.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/kuaishou.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/__init__.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/types/post.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/SOURCES.txt +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/entry_points.txt +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.22 → parsehub-2.0.24}/src/parsehub.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "parsehub"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.24"
|
|
4
4
|
description = "轻量、异步、开箱即用的社交媒体聚合解析库"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12.0"
|
|
@@ -52,6 +52,8 @@ cli = [
|
|
|
52
52
|
|
|
53
53
|
[dependency-groups]
|
|
54
54
|
dev = [
|
|
55
|
+
"mypy>=2.1.0",
|
|
56
|
+
"pytest>=9.0.3",
|
|
55
57
|
"ruff>=0.14.14",
|
|
56
58
|
]
|
|
57
59
|
|
|
@@ -79,3 +81,19 @@ ignore = [
|
|
|
79
81
|
"B008", # 不在参数默认值中执行函数调用
|
|
80
82
|
"C901", # 函数复杂度过高
|
|
81
83
|
]
|
|
84
|
+
|
|
85
|
+
[tool.mypy]
|
|
86
|
+
python_version = "3.12"
|
|
87
|
+
files = ["./"]
|
|
88
|
+
exclude = ["test"]
|
|
89
|
+
ignore_missing_imports = true
|
|
90
|
+
warn_return_any = true
|
|
91
|
+
warn_unused_ignores = true
|
|
92
|
+
check_untyped_defs = true
|
|
93
|
+
disallow_untyped_defs = true
|
|
94
|
+
no_implicit_optional = true
|
|
95
|
+
|
|
96
|
+
[tool.pytest.ini_options]
|
|
97
|
+
testpaths = ["test"]
|
|
98
|
+
pythonpath = ["src"]
|
|
99
|
+
python_files = ["test_*.py"]
|
|
@@ -13,7 +13,7 @@ logger.disable(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class ParseHub:
|
|
16
|
-
def __init__(self):
|
|
16
|
+
def __init__(self) -> None:
|
|
17
17
|
self.parsers: list[type[BaseParser]] = BaseParser.get_registry()
|
|
18
18
|
|
|
19
19
|
async def parse(self, url: str, *, proxy: str | None = None, cookie: str | dict | None = None) -> AnyParseResult:
|
|
@@ -47,9 +47,9 @@ class ParseHub:
|
|
|
47
47
|
async def download(
|
|
48
48
|
self,
|
|
49
49
|
url: str,
|
|
50
|
-
path: str | Path = None,
|
|
50
|
+
path: str | Path | None = None,
|
|
51
51
|
*,
|
|
52
|
-
callback: ProgressCallback = None,
|
|
52
|
+
callback: ProgressCallback | None = None,
|
|
53
53
|
callback_args: tuple = (),
|
|
54
54
|
callback_kwargs: dict | None = None,
|
|
55
55
|
proxy: str | None = None,
|
|
@@ -169,6 +169,8 @@ class ParseHub:
|
|
|
169
169
|
:return: 原始链接
|
|
170
170
|
"""
|
|
171
171
|
parser = self.get_parser(url)
|
|
172
|
+
if not parser:
|
|
173
|
+
raise UnknownPlatform(url)
|
|
172
174
|
try:
|
|
173
175
|
return await parser(proxy=proxy).get_raw_url(url, clean_all=clean_all)
|
|
174
176
|
except Exception as e:
|
|
@@ -183,7 +185,7 @@ class ParseHub:
|
|
|
183
185
|
return parser
|
|
184
186
|
return None
|
|
185
187
|
|
|
186
|
-
def get_parser(self, url) -> type[BaseParser] | None:
|
|
188
|
+
def get_parser(self, url: str) -> type[BaseParser] | None:
|
|
187
189
|
"""获取解析器
|
|
188
190
|
:param url: 分享文案 / 分享链接
|
|
189
191
|
"""
|
|
@@ -191,7 +193,7 @@ class ParseHub:
|
|
|
191
193
|
return parser
|
|
192
194
|
return None
|
|
193
195
|
|
|
194
|
-
def get_platform(self, url) -> Platform | None:
|
|
196
|
+
def get_platform(self, url: str) -> Platform | None:
|
|
195
197
|
"""获取平台
|
|
196
198
|
:param url: 分享文案 / 分享链接
|
|
197
199
|
"""
|
|
@@ -210,9 +212,10 @@ class ParseHub:
|
|
|
210
212
|
"""
|
|
211
213
|
return [
|
|
212
214
|
{
|
|
213
|
-
"id":
|
|
214
|
-
"name":
|
|
215
|
+
"id": platform.id,
|
|
216
|
+
"name": platform.display_name,
|
|
215
217
|
"supported_types": parser.__supported_type__,
|
|
216
218
|
}
|
|
217
219
|
for parser in self.parsers
|
|
220
|
+
if (platform := parser.__platform__) is not None
|
|
218
221
|
]
|
|
@@ -8,7 +8,7 @@ import unicodedata
|
|
|
8
8
|
from dataclasses import asdict, is_dataclass
|
|
9
9
|
from importlib.metadata import PackageNotFoundError, version
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import TYPE_CHECKING, Any
|
|
11
|
+
from typing import TYPE_CHECKING, Any, NoReturn, cast
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
14
|
from .cli_config import AutoCookieStore, PlatformConfig
|
|
@@ -20,12 +20,13 @@ _CLI_EXTRA_MODULES = ("argcomplete", "platformdirs")
|
|
|
20
20
|
class _ChineseArgumentParser(argparse.ArgumentParser):
|
|
21
21
|
def __init__(self, *args: Any, **kwargs: Any):
|
|
22
22
|
kwargs.setdefault("formatter_class", argparse.RawDescriptionHelpFormatter)
|
|
23
|
-
add_help = kwargs.pop("add_help", True)
|
|
24
|
-
|
|
23
|
+
add_help = bool(kwargs.pop("add_help", True))
|
|
24
|
+
kwargs["add_help"] = False
|
|
25
|
+
super().__init__(*args, **kwargs)
|
|
25
26
|
if add_help:
|
|
26
27
|
self.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="显示帮助信息")
|
|
27
28
|
|
|
28
|
-
def error(self, message: str) ->
|
|
29
|
+
def error(self, message: str) -> NoReturn:
|
|
29
30
|
self.print_usage(sys.stderr)
|
|
30
31
|
translated = _translate_argparse_error(message)
|
|
31
32
|
hint = _usage_hint(self.prog)
|
|
@@ -45,7 +46,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
45
46
|
try:
|
|
46
47
|
args = parser.parse_args(_normalize_argv(raw_argv))
|
|
47
48
|
_finalize_output_args(args)
|
|
48
|
-
return args.func(args)
|
|
49
|
+
return int(args.func(args))
|
|
49
50
|
except SystemExit as e:
|
|
50
51
|
return _normalize_exit_code(e.code)
|
|
51
52
|
except ValueError as e:
|
|
@@ -212,7 +213,7 @@ def _add_set_commands(subparsers: argparse._SubParsersAction) -> None:
|
|
|
212
213
|
|
|
213
214
|
def _add_platform_argument(parser: argparse.ArgumentParser) -> None:
|
|
214
215
|
action = parser.add_argument("platform", help="平台 ID,如 xhs")
|
|
215
|
-
action.completer = _complete_platforms
|
|
216
|
+
action.completer = _complete_platforms # type: ignore[attr-defined]
|
|
216
217
|
|
|
217
218
|
|
|
218
219
|
def _add_json_options(parser: argparse.ArgumentParser) -> None:
|
|
@@ -364,14 +365,15 @@ def _cookie_prompt() -> Any:
|
|
|
364
365
|
|
|
365
366
|
def _load_platform_config(platform_id: str | None) -> PlatformConfig:
|
|
366
367
|
if not platform_id:
|
|
367
|
-
return _platform_config_type()()
|
|
368
|
-
return _config_store().get_platform(platform_id)
|
|
368
|
+
return cast("PlatformConfig", _platform_config_type()())
|
|
369
|
+
return cast("PlatformConfig", _config_store().get_platform(platform_id))
|
|
369
370
|
|
|
370
371
|
|
|
371
372
|
def _load_cookie(platform_id: str | None) -> str | None:
|
|
372
373
|
if not platform_id:
|
|
373
374
|
return None
|
|
374
|
-
|
|
375
|
+
value = _cookie_store().get(platform_id)
|
|
376
|
+
return value if isinstance(value, str) or value is None else str(value)
|
|
375
377
|
|
|
376
378
|
|
|
377
379
|
def _detect_platform_id(hub: Any, url_or_text: str) -> str | None:
|
|
@@ -436,7 +438,7 @@ def _platform_config_row(
|
|
|
436
438
|
|
|
437
439
|
|
|
438
440
|
def _print_json(data: Any, *, pretty: bool) -> None:
|
|
439
|
-
kwargs = {"ensure_ascii": False}
|
|
441
|
+
kwargs: dict[str, Any] = {"ensure_ascii": False}
|
|
440
442
|
if pretty:
|
|
441
443
|
kwargs["indent"] = 2
|
|
442
444
|
else:
|
|
@@ -547,8 +549,8 @@ def _download_result_to_dict(result: Any) -> dict[str, Any]:
|
|
|
547
549
|
def _jsonable(value: Any) -> Any:
|
|
548
550
|
if isinstance(value, Path):
|
|
549
551
|
return str(value)
|
|
550
|
-
if is_dataclass(value):
|
|
551
|
-
return _jsonable(asdict(value))
|
|
552
|
+
if is_dataclass(value) and not isinstance(value, type):
|
|
553
|
+
return _jsonable(asdict(cast(Any, value)))
|
|
552
554
|
if isinstance(value, dict):
|
|
553
555
|
return {str(k): _jsonable(v) for k, v in value.items()}
|
|
554
556
|
if isinstance(value, (list, tuple)):
|
|
@@ -2,6 +2,7 @@ import importlib
|
|
|
2
2
|
import pkgutil
|
|
3
3
|
import re
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Any
|
|
5
6
|
from urllib.parse import parse_qs, urlencode, urlparse
|
|
6
7
|
|
|
7
8
|
import httpx
|
|
@@ -34,7 +35,7 @@ class BaseParser(ABC):
|
|
|
34
35
|
self.proxy = proxy
|
|
35
36
|
self.cookie = normalize_cookie(cookie)
|
|
36
37
|
|
|
37
|
-
def __init_subclass__(cls, /, register=True, **kwargs):
|
|
38
|
+
def __init_subclass__(cls, /, register: bool = True, **kwargs: Any) -> None:
|
|
38
39
|
super().__init_subclass__(**kwargs)
|
|
39
40
|
if register:
|
|
40
41
|
if not cls.__platform__:
|
|
@@ -56,7 +57,7 @@ class BaseParser(ABC):
|
|
|
56
57
|
def match(cls, text: str) -> bool:
|
|
57
58
|
"""判断是否匹配该解析器"""
|
|
58
59
|
url = match_url(text)
|
|
59
|
-
return bool(re.match(cls.__match__, url))
|
|
60
|
+
return bool(cls.__match__ and re.match(cls.__match__, url))
|
|
60
61
|
|
|
61
62
|
async def parse(self, url: str) -> AnyParseResult:
|
|
62
63
|
"""解析
|
|
@@ -66,7 +67,8 @@ class BaseParser(ABC):
|
|
|
66
67
|
raw_url = await self.get_raw_url(url, clean_all=False)
|
|
67
68
|
result = await self._do_parse(raw_url)
|
|
68
69
|
result.platform = self.__platform__
|
|
69
|
-
|
|
70
|
+
raw_url_clean = self._clean_params(raw_url, self.__after_clean_parameters__)
|
|
71
|
+
result.raw_url = raw_url_clean
|
|
70
72
|
return result
|
|
71
73
|
|
|
72
74
|
@abstractmethod
|
|
@@ -104,7 +106,8 @@ class BaseParser(ABC):
|
|
|
104
106
|
|
|
105
107
|
:return:
|
|
106
108
|
"""
|
|
107
|
-
|
|
109
|
+
matched_url = match_url(url)
|
|
110
|
+
url = matched_url or url
|
|
108
111
|
if not url.startswith("http"):
|
|
109
112
|
url = f"https://{url}"
|
|
110
113
|
if any(x in url for x in self.__redirect_keywords__):
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
from collections.abc import Callable
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
5
|
+
from typing import Any, cast
|
|
5
6
|
|
|
6
7
|
from yt_dlp import YoutubeDL
|
|
7
8
|
|
|
8
9
|
from ...types import (
|
|
10
|
+
AnyParseResult,
|
|
9
11
|
DownloadError,
|
|
10
12
|
DownloadResult,
|
|
11
13
|
ParseError,
|
|
@@ -30,7 +32,7 @@ def switch_ytdlp_proxy(ydl: YoutubeDL, proxy: str | None) -> None:
|
|
|
30
32
|
director.close()
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None:
|
|
35
|
+
def download_video(yto_params: dict[str, Any], url: str, proxy: str | None = None) -> None:
|
|
34
36
|
"""在独立线程中下载视频"""
|
|
35
37
|
try:
|
|
36
38
|
with YoutubeDL(yto_params) as ydl:
|
|
@@ -43,14 +45,16 @@ def download_video(yto_params: dict, url: str, proxy: str | None = None) -> None
|
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
class MonotonicDownloadProgress:
|
|
46
|
-
def __init__(
|
|
48
|
+
def __init__(
|
|
49
|
+
self, emit: Callable[[float], None], *, start: float = 0.0, end: float = 100.0, min_step: float = 0.1
|
|
50
|
+
) -> None:
|
|
47
51
|
self.emit = emit
|
|
48
52
|
self.start = start
|
|
49
53
|
self.end = end
|
|
50
54
|
self.min_step = min_step
|
|
51
55
|
self.current = start
|
|
52
56
|
|
|
53
|
-
def __call__(self, d: dict):
|
|
57
|
+
def __call__(self, d: dict[str, Any]) -> None:
|
|
54
58
|
status = d.get("status")
|
|
55
59
|
|
|
56
60
|
if status == "downloading":
|
|
@@ -82,8 +86,8 @@ class MonotonicDownloadProgress:
|
|
|
82
86
|
# 分片下载有时没有稳定总大小,但有 frag 进度;作为兜底
|
|
83
87
|
frag_index = d.get("fragment_index")
|
|
84
88
|
frag_count = d.get("fragment_count")
|
|
85
|
-
if frag_index
|
|
86
|
-
return min(frag_index / frag_count * 100, 100)
|
|
89
|
+
if isinstance(frag_index, int | float) and isinstance(frag_count, int | float) and frag_count:
|
|
90
|
+
return min(float(frag_index) / float(frag_count) * 100, 100.0)
|
|
87
91
|
|
|
88
92
|
return None
|
|
89
93
|
|
|
@@ -91,7 +95,7 @@ class MonotonicDownloadProgress:
|
|
|
91
95
|
class YtParser(BaseParser, register=False):
|
|
92
96
|
"""yt-dlp解析器"""
|
|
93
97
|
|
|
94
|
-
async def _do_parse(self, raw_url: str) ->
|
|
98
|
+
async def _do_parse(self, raw_url: str) -> AnyParseResult:
|
|
95
99
|
video_info = await self._parse(raw_url)
|
|
96
100
|
return YtVideoParseResult(
|
|
97
101
|
dl=video_info,
|
|
@@ -106,7 +110,7 @@ class YtParser(BaseParser, register=False):
|
|
|
106
110
|
),
|
|
107
111
|
)
|
|
108
112
|
|
|
109
|
-
async def _parse(self, url) -> "YtVideoInfo":
|
|
113
|
+
async def _parse(self, url: str) -> "YtVideoInfo":
|
|
110
114
|
try:
|
|
111
115
|
dl = await asyncio.wait_for(asyncio.to_thread(self._extract_info, url), timeout=30)
|
|
112
116
|
except TimeoutError as e:
|
|
@@ -114,8 +118,8 @@ class YtParser(BaseParser, register=False):
|
|
|
114
118
|
except Exception as e:
|
|
115
119
|
raise ParseError(f"解析视频信息失败: {str(e)}") from e
|
|
116
120
|
|
|
117
|
-
if dl.get("_type") and dl["_type"] == "playlist":
|
|
118
|
-
dl = dl["entries"][0]
|
|
121
|
+
if dl.get("_type") and dl["_type"] == "playlist":
|
|
122
|
+
dl = dl["entries"][0]
|
|
119
123
|
url = dl["webpage_url"]
|
|
120
124
|
title = dl["title"]
|
|
121
125
|
duration = dl.get("duration", 0)
|
|
@@ -136,20 +140,20 @@ class YtParser(BaseParser, register=False):
|
|
|
136
140
|
proxy=self.proxy,
|
|
137
141
|
)
|
|
138
142
|
|
|
139
|
-
def _extract_info(self, url):
|
|
143
|
+
def _extract_info(self, url: str) -> dict[str, Any]:
|
|
140
144
|
params = self.params.copy()
|
|
141
145
|
if self.proxy:
|
|
142
146
|
params["proxy"] = self.proxy
|
|
143
147
|
|
|
144
148
|
try:
|
|
145
149
|
with YoutubeDL(params) as ydl:
|
|
146
|
-
return ydl.extract_info(url, download=False)
|
|
150
|
+
return cast(dict[str, Any], ydl.extract_info(url, download=False))
|
|
147
151
|
except Exception as e:
|
|
148
152
|
error_msg = f"{type(e).__name__}: {str(e)}"
|
|
149
153
|
raise RuntimeError(error_msg) from None
|
|
150
154
|
|
|
151
155
|
@property
|
|
152
|
-
def params(self) -> dict:
|
|
156
|
+
def params(self) -> dict[str, Any]:
|
|
153
157
|
params = {
|
|
154
158
|
"format": "mp4+bestvideo[height<=1080]+bestaudio",
|
|
155
159
|
"quiet": True, # 不输出日志
|
|
@@ -170,9 +174,9 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
170
174
|
def __init__(
|
|
171
175
|
self,
|
|
172
176
|
dl: "YtVideoInfo",
|
|
173
|
-
title,
|
|
174
|
-
video=None,
|
|
175
|
-
content=None,
|
|
177
|
+
title: str | None,
|
|
178
|
+
video: VideoRef | None = None,
|
|
179
|
+
content: str | None = None,
|
|
176
180
|
):
|
|
177
181
|
"""dl: yt-dlp解析结果"""
|
|
178
182
|
self.dl = dl
|
|
@@ -190,17 +194,18 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
190
194
|
) -> "DownloadResult":
|
|
191
195
|
if callback_kwargs is None:
|
|
192
196
|
callback_kwargs = {}
|
|
197
|
+
output_dir_path = Path(output_dir)
|
|
193
198
|
|
|
194
199
|
paramss = self.dl.paramss.copy()
|
|
195
200
|
if self.dl.proxy:
|
|
196
201
|
paramss["proxy"] = self.dl.proxy
|
|
197
202
|
|
|
198
|
-
paramss["outtmpl"] = f"{
|
|
203
|
+
paramss["outtmpl"] = f"{output_dir_path.joinpath('ytdlp_%(id)s')}.%(ext)s"
|
|
199
204
|
|
|
200
205
|
if callback:
|
|
201
206
|
loop = asyncio.get_running_loop()
|
|
202
207
|
|
|
203
|
-
def _callback(count: float):
|
|
208
|
+
def _callback(count: float) -> None:
|
|
204
209
|
asyncio.run_coroutine_threadsafe(
|
|
205
210
|
callback(int(count), 100, "bytes", *callback_args, **callback_kwargs), loop
|
|
206
211
|
)
|
|
@@ -214,7 +219,11 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
214
219
|
|
|
215
220
|
await self._run_download(paramss, proxy=proxy)
|
|
216
221
|
|
|
217
|
-
v =
|
|
222
|
+
v = (
|
|
223
|
+
list(output_dir_path.glob("*.mp4"))
|
|
224
|
+
or list(output_dir_path.glob("*.mkv"))
|
|
225
|
+
or list(output_dir_path.glob("*.webm"))
|
|
226
|
+
)
|
|
218
227
|
if not v:
|
|
219
228
|
raise DownloadError("下载失败 -1")
|
|
220
229
|
|
|
@@ -232,7 +241,7 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
232
241
|
output_dir,
|
|
233
242
|
)
|
|
234
243
|
|
|
235
|
-
async def _run_download(self, paramss: dict, count: int = 0, *, proxy: str | None = None) -> None:
|
|
244
|
+
async def _run_download(self, paramss: dict[str, Any], count: int = 0, *, proxy: str | None = None) -> None:
|
|
236
245
|
if count > 2:
|
|
237
246
|
raise DownloadError("下载失败 -2")
|
|
238
247
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import re
|
|
2
4
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
5
|
+
from typing import cast
|
|
4
6
|
from urllib.parse import parse_qs, urlparse
|
|
5
7
|
|
|
6
8
|
from loguru import logger
|
|
@@ -29,11 +31,11 @@ class BiliParse(YtParser):
|
|
|
29
31
|
__reserved_parameters__ = ["p"]
|
|
30
32
|
__redirect_keywords__ = ["b23.tv", "bili2233.cn"]
|
|
31
33
|
|
|
32
|
-
async def _do_parse(self, raw_url: str) ->
|
|
34
|
+
async def _do_parse(self, raw_url: str) -> YtVideoParseResult | BiliVideoParseResult | ImageParseResult:
|
|
33
35
|
if await self.is_dynamic(raw_url):
|
|
34
36
|
dynamic = await self.get_dynamic_info(raw_url)
|
|
35
37
|
content = self.hashtag_handler(dynamic.content)
|
|
36
|
-
photos = []
|
|
38
|
+
photos: list[LivePhotoRef | ImageRef] = []
|
|
37
39
|
if dynamic.images:
|
|
38
40
|
for i in dynamic.images:
|
|
39
41
|
if i.live_url:
|
|
@@ -56,7 +58,7 @@ class BiliParse(YtParser):
|
|
|
56
58
|
raise ParseError("Bilibili 解析失败") from e
|
|
57
59
|
|
|
58
60
|
@staticmethod
|
|
59
|
-
def _is_bvid(url: str):
|
|
61
|
+
def _is_bvid(url: str) -> bool:
|
|
60
62
|
if url.lower().startswith("bv"):
|
|
61
63
|
return True
|
|
62
64
|
else:
|
|
@@ -77,7 +79,7 @@ class BiliParse(YtParser):
|
|
|
77
79
|
return await super().get_raw_url(url, clean_all=clean_all)
|
|
78
80
|
|
|
79
81
|
@staticmethod
|
|
80
|
-
async def is_dynamic(url) -> str | None:
|
|
82
|
+
async def is_dynamic(url: str) -> str | None:
|
|
81
83
|
"""是动态"""
|
|
82
84
|
if re.search(r"\b\d{18,19}\b", url):
|
|
83
85
|
return url
|
|
@@ -91,9 +93,9 @@ class BiliParse(YtParser):
|
|
|
91
93
|
if "风控" in str(e):
|
|
92
94
|
raise ParseError(f"账号风控\n使用的cookie: {cookie_ellipsis(self.cookie)}") from e
|
|
93
95
|
raise ParseError(str(e)) from e
|
|
94
|
-
return dynamic_info
|
|
96
|
+
return cast(BiliDynamic, dynamic_info)
|
|
95
97
|
|
|
96
|
-
async def bili_api_parse(self, url) ->
|
|
98
|
+
async def bili_api_parse(self, url: str) -> BiliVideoParseResult | ImageParseResult:
|
|
97
99
|
async with BiliAPI(proxy=self.proxy) as bili:
|
|
98
100
|
video_info = await bili.get_video_info(url)
|
|
99
101
|
|
|
@@ -136,16 +138,16 @@ class BiliParse(YtParser):
|
|
|
136
138
|
),
|
|
137
139
|
)
|
|
138
140
|
|
|
139
|
-
async def ytp_parse(self, url) ->
|
|
140
|
-
result = await super()._do_parse(url)
|
|
141
|
+
async def ytp_parse(self, url: str) -> YtVideoParseResult:
|
|
142
|
+
result = cast(YtVideoParseResult, await super()._do_parse(url))
|
|
141
143
|
return YtVideoParseResult(
|
|
142
144
|
title=result.title,
|
|
143
145
|
dl=result.dl,
|
|
144
|
-
video=result.media,
|
|
146
|
+
video=cast(VideoRef | None, result.media),
|
|
145
147
|
)
|
|
146
148
|
|
|
147
149
|
@staticmethod
|
|
148
|
-
def change_source(url: str):
|
|
150
|
+
def change_source(url: str) -> str:
|
|
149
151
|
return re.sub(
|
|
150
152
|
r"upos-.*.(bilivideo.com|mirrorakam.akamaized.net)",
|
|
151
153
|
"upos-sz-upcdnbda2.bilivideo.com",
|
|
@@ -172,7 +174,7 @@ class BiliVideoParseResult(VideoParseResult):
|
|
|
172
174
|
callback_kwargs: dict | None = None,
|
|
173
175
|
proxy: str | None = None,
|
|
174
176
|
headers: dict | None = None,
|
|
175
|
-
) ->
|
|
177
|
+
) -> DownloadResult:
|
|
176
178
|
headers = {"referer": "https://www.bilibili.com", "User-Agent": GlobalConfig.ua}
|
|
177
179
|
return await super()._do_download(
|
|
178
180
|
output_dir=output_dir,
|
|
@@ -31,14 +31,14 @@ class CoolapkParser(BaseParser):
|
|
|
31
31
|
coolapk = await Coolapk.parse(raw_url, proxy=self.proxy)
|
|
32
32
|
except Exception as e:
|
|
33
33
|
raise ParseError(str(e)) from e
|
|
34
|
-
media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs]
|
|
34
|
+
media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs or []]
|
|
35
35
|
if coolapk.markdown_content:
|
|
36
36
|
return CoolapkRichTextParseResult(
|
|
37
37
|
title=coolapk.title,
|
|
38
38
|
media=media,
|
|
39
39
|
markdown_content=coolapk.markdown_content,
|
|
40
40
|
)
|
|
41
|
-
content = self.hashtag_handler(coolapk.text_content)
|
|
41
|
+
content = self.hashtag_handler(coolapk.text_content or "")
|
|
42
42
|
if any(isinstance(m, AniRef) for m in media):
|
|
43
43
|
return CoolapkMultimediaParseResult(
|
|
44
44
|
title=coolapk.title,
|
|
@@ -52,7 +52,7 @@ class CoolapkParser(BaseParser):
|
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
@staticmethod
|
|
55
|
-
def hashtag_handler(desc: str):
|
|
55
|
+
def hashtag_handler(desc: str) -> str:
|
|
56
56
|
hashtags = re.findall(r" ?#[^#]+# ?", desc)
|
|
57
57
|
for hashtag in hashtags:
|
|
58
58
|
desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
|
|
@@ -64,11 +64,11 @@ class CoolapkParseResult(ParseResult):
|
|
|
64
64
|
self,
|
|
65
65
|
*,
|
|
66
66
|
output_dir: str | Path,
|
|
67
|
-
callback: ProgressCallback = None,
|
|
67
|
+
callback: ProgressCallback | None = None,
|
|
68
68
|
callback_args: tuple = (),
|
|
69
69
|
callback_kwargs: dict | None = None,
|
|
70
70
|
proxy: str | None = None,
|
|
71
|
-
headers: dict = None,
|
|
71
|
+
headers: dict | None = None,
|
|
72
72
|
) -> "DownloadResult":
|
|
73
73
|
headers = {
|
|
74
74
|
"Accept": (
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Self, Union
|
|
@@ -47,6 +47,8 @@ class DouyinParser(BaseParser):
|
|
|
47
47
|
@staticmethod
|
|
48
48
|
def _build_video_result(result: "DouyinApiResult") -> VideoParseResult:
|
|
49
49
|
"""构建视频解析结果"""
|
|
50
|
+
if result.video is None:
|
|
51
|
+
raise ParseError("抖音解析失败: 未获取到视频")
|
|
50
52
|
return DouyinVideoParseResult(
|
|
51
53
|
title=result.desc,
|
|
52
54
|
video=result.video,
|
|
@@ -134,9 +136,9 @@ class DouyinApiResult:
|
|
|
134
136
|
"""抖音 API 解析结果"""
|
|
135
137
|
|
|
136
138
|
type: DouyinMediaType
|
|
137
|
-
video: VideoRef = None
|
|
139
|
+
video: VideoRef | None = None
|
|
138
140
|
desc: str = ""
|
|
139
|
-
image_list: list[ImageRef | LivePhotoRef] =
|
|
141
|
+
image_list: list[ImageRef | LivePhotoRef] = field(default_factory=list)
|
|
140
142
|
|
|
141
143
|
@classmethod
|
|
142
144
|
def parse(cls, json_dict: dict) -> Self:
|
|
@@ -162,7 +164,7 @@ class DouyinApiResult:
|
|
|
162
164
|
has_live_photos = any(img.get("video") for img in images)
|
|
163
165
|
|
|
164
166
|
if has_live_photos:
|
|
165
|
-
image_list = []
|
|
167
|
+
image_list: list[ImageRef | LivePhotoRef] = []
|
|
166
168
|
for image in images:
|
|
167
169
|
if video := image.get("video"):
|
|
168
170
|
video_info = parse_video_info(video)
|
|
@@ -206,7 +208,7 @@ class DouyinApiResult:
|
|
|
206
208
|
def _parse_image_post_info(cls, image_post_info: dict, desc: str) -> Self:
|
|
207
209
|
"""解析新版图片格式 (image_post_info 字段)"""
|
|
208
210
|
images = image_post_info.get("images", [])
|
|
209
|
-
image_list = []
|
|
211
|
+
image_list: list[ImageRef | LivePhotoRef] = []
|
|
210
212
|
|
|
211
213
|
for image in images:
|
|
212
214
|
display_image = image.get("display_image", {})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import re
|
|
3
|
+
from typing import Any, cast
|
|
3
4
|
|
|
4
5
|
from instaloader import BadResponseException
|
|
5
6
|
|
|
@@ -15,7 +16,7 @@ class InstagramParser(BaseParser):
|
|
|
15
16
|
__match__ = r"^(http(s)?://)(www\.|)instagram\.com/(p|reel|share|.*/p|.*/reel)/.*"
|
|
16
17
|
__redirect_keywords__ = ["share"]
|
|
17
18
|
|
|
18
|
-
async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult
|
|
19
|
+
async def _do_parse(self, raw_url: str) -> VideoParseResult | ImageParseResult | MultimediaParseResult:
|
|
19
20
|
shortcode = self.get_short_code(raw_url)
|
|
20
21
|
if not shortcode:
|
|
21
22
|
raise ValueError("Instagram帖子链接无效")
|
|
@@ -32,7 +33,7 @@ class InstagramParser(BaseParser):
|
|
|
32
33
|
case "GraphSidecar":
|
|
33
34
|
media = [
|
|
34
35
|
VideoRef(url=i.video_url, thumb_url=i.display_url, width=i.width, height=i.height)
|
|
35
|
-
if i.is_video
|
|
36
|
+
if i.is_video and i.video_url
|
|
36
37
|
else ImageRef(url=i.display_url, width=i.width, height=i.height)
|
|
37
38
|
for i in post.get_sidecar_nodes()
|
|
38
39
|
]
|
|
@@ -44,9 +45,9 @@ class InstagramParser(BaseParser):
|
|
|
44
45
|
case "GraphVideo":
|
|
45
46
|
return VideoParseResult(
|
|
46
47
|
video=VideoRef(
|
|
47
|
-
url=post.video_url,
|
|
48
|
+
url=post.video_url or post.url,
|
|
48
49
|
thumb_url=post.url,
|
|
49
|
-
duration=int(post.video_duration),
|
|
50
|
+
duration=int(post.video_duration or 0),
|
|
50
51
|
width=width,
|
|
51
52
|
height=height,
|
|
52
53
|
),
|
|
@@ -56,7 +57,7 @@ class InstagramParser(BaseParser):
|
|
|
56
57
|
case _:
|
|
57
58
|
raise ParseError("不支持的类型")
|
|
58
59
|
|
|
59
|
-
async def _parse(self, url, shortcode, cookie=None) -> MyPost:
|
|
60
|
+
async def _parse(self, url: str, shortcode: str, cookie: dict[str, Any] | None = None) -> MyPost:
|
|
60
61
|
try:
|
|
61
62
|
post = await asyncio.wait_for(
|
|
62
63
|
asyncio.to_thread(
|
|
@@ -81,13 +82,13 @@ class InstagramParser(BaseParser):
|
|
|
81
82
|
if cookie:
|
|
82
83
|
text = f"Instagram 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(cookie)}"
|
|
83
84
|
else:
|
|
84
|
-
text = e
|
|
85
|
+
text = str(e)
|
|
85
86
|
raise ParseError(f"无法获取帖子内容: {text}") from e
|
|
86
87
|
else:
|
|
87
|
-
return post
|
|
88
|
+
return cast(MyPost, post)
|
|
88
89
|
|
|
89
90
|
@staticmethod
|
|
90
|
-
def get_short_code(url: str):
|
|
91
|
+
def get_short_code(url: str) -> str | None:
|
|
91
92
|
url = url.removesuffix("/")
|
|
92
93
|
shortcode = re.search(r"/(share|p|reel|.*/p|.*/reel)/(.*)", url)
|
|
93
94
|
return shortcode.group(2).split("/")[0] if shortcode else None
|
|
@@ -23,9 +23,9 @@ class PipixParser(BaseParser):
|
|
|
23
23
|
video=VideoRef(
|
|
24
24
|
url=ppx.video_url,
|
|
25
25
|
thumb_url=ppx.video_thumb,
|
|
26
|
-
duration=ppx.video_duration,
|
|
27
|
-
height=ppx.video_height,
|
|
28
|
-
width=ppx.video_width,
|
|
26
|
+
duration=ppx.video_duration or 0,
|
|
27
|
+
height=ppx.video_height or 0,
|
|
28
|
+
width=ppx.video_width or 0,
|
|
29
29
|
),
|
|
30
30
|
)
|
|
31
31
|
else:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ...provider_api.threads import ThreadsAPI, ThreadsMedia, ThreadsMediaType
|
|
2
|
-
from ...types import ImageRef, MultimediaParseResult, Platform, VideoRef
|
|
2
|
+
from ...types import AnyMediaRef, ImageRef, MultimediaParseResult, Platform, VideoRef
|
|
3
3
|
from ..base.base import BaseParser
|
|
4
4
|
|
|
5
5
|
|
|
@@ -10,7 +10,7 @@ class ThreadsParser(BaseParser):
|
|
|
10
10
|
|
|
11
11
|
async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
|
|
12
12
|
post = await ThreadsAPI(proxy=self.proxy).parse(raw_url)
|
|
13
|
-
media = []
|
|
13
|
+
media: list[AnyMediaRef] = []
|
|
14
14
|
if post.media:
|
|
15
15
|
pm: list[ThreadsMedia] = post.media if isinstance(post.media, list) else [post.media]
|
|
16
16
|
for m in pm:
|