parsehub 2.0.1__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.1/src/parsehub.egg-info → parsehub-2.0.3}/PKG-INFO +5 -13
- {parsehub-2.0.1 → parsehub-2.0.3}/README.md +4 -12
- {parsehub-2.0.1 → parsehub-2.0.3}/pyproject.toml +2 -4
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/__init__.py +39 -22
- parsehub-2.0.3/src/parsehub/config/__init__.py +3 -0
- parsehub-2.0.3/src/parsehub/config/config.py +20 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/base/base.py +8 -9
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/base/ytdlp.py +13 -17
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/bilibili.py +15 -17
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/coolapk.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/instagram.py +3 -3
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/kuaishou.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/pipix.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/threads.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/tieba.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/twitter.py +5 -5
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/weibo.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/weixin.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/xhs.py +2 -2
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/xiaoheihe.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/youtube.py +2 -2
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/zuiyou.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/bilibili.py +2 -2
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/twitter.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/xiaoheihe.py +2 -2
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/__init__.py +2 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/media_file.py +1 -1
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/media_ref.py +3 -3
- parsehub-2.0.3/src/parsehub/types/post.py +11 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/result.py +96 -36
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/utils/downloader.py +4 -4
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/utils/utils.py +36 -0
- {parsehub-2.0.1 → parsehub-2.0.3/src/parsehub.egg-info}/PKG-INFO +5 -13
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub.egg-info/SOURCES.txt +1 -0
- parsehub-2.0.1/src/parsehub/config/__init__.py +0 -3
- parsehub-2.0.1/src/parsehub/config/config.py +0 -63
- {parsehub-2.0.1 → parsehub-2.0.3}/LICENSE +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/setup.cfg +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/__init__.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/douyin.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/tieba.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/types/platform.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.1 → parsehub-2.0.3}/src/parsehub.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parsehub
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
|
|
5
5
|
Author-email: 梓澪 <zilingmio@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -125,26 +125,21 @@ print(result)
|
|
|
125
125
|
|
|
126
126
|
```python
|
|
127
127
|
from parsehub import ParseHub
|
|
128
|
-
from parsehub.config import ParseConfig
|
|
129
128
|
|
|
130
|
-
|
|
131
|
-
cookie="key1=value1; key2=value2", # 从浏览器中获取
|
|
132
|
-
proxy="http://127.0.0.1:7890", # 可选
|
|
133
|
-
)
|
|
134
|
-
ph = ParseHub(config=config)
|
|
129
|
+
ph = ParseHub(cookie="key1=value1; key2=value2", proxy="http://127.0.0.1:7890",)
|
|
135
130
|
```
|
|
136
131
|
|
|
137
132
|
Cookie 支持多种格式传入:
|
|
138
133
|
|
|
139
134
|
```python
|
|
140
135
|
# 字符串
|
|
141
|
-
|
|
136
|
+
ParseHub(cookie="key1=value1; key2=value2")
|
|
142
137
|
|
|
143
138
|
# JSON 字符串
|
|
144
|
-
|
|
139
|
+
ParseHub(cookie='{"key1": "value1", "key2": "value2"}')
|
|
145
140
|
|
|
146
141
|
# 字典
|
|
147
|
-
|
|
142
|
+
ParseHub(cookie={"key1": "value1", "key2": "value2"})
|
|
148
143
|
```
|
|
149
144
|
|
|
150
145
|
目前支持 Cookie 登录的平台:
|
|
@@ -158,9 +153,6 @@ from parsehub.config import GlobalConfig
|
|
|
158
153
|
|
|
159
154
|
# 自定义默认下载目录
|
|
160
155
|
GlobalConfig.default_save_dir = "./my_downloads"
|
|
161
|
-
|
|
162
|
-
# 视频时长限制 (超过此时长将下载最低画质,0 为不限制)
|
|
163
|
-
GlobalConfig.duration_limit = 600 # 秒
|
|
164
156
|
```
|
|
165
157
|
|
|
166
158
|
## 🤝 参考项目
|
|
@@ -88,26 +88,21 @@ print(result)
|
|
|
88
88
|
|
|
89
89
|
```python
|
|
90
90
|
from parsehub import ParseHub
|
|
91
|
-
from parsehub.config import ParseConfig
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
cookie="key1=value1; key2=value2", # 从浏览器中获取
|
|
95
|
-
proxy="http://127.0.0.1:7890", # 可选
|
|
96
|
-
)
|
|
97
|
-
ph = ParseHub(config=config)
|
|
92
|
+
ph = ParseHub(cookie="key1=value1; key2=value2", proxy="http://127.0.0.1:7890",)
|
|
98
93
|
```
|
|
99
94
|
|
|
100
95
|
Cookie 支持多种格式传入:
|
|
101
96
|
|
|
102
97
|
```python
|
|
103
98
|
# 字符串
|
|
104
|
-
|
|
99
|
+
ParseHub(cookie="key1=value1; key2=value2")
|
|
105
100
|
|
|
106
101
|
# JSON 字符串
|
|
107
|
-
|
|
102
|
+
ParseHub(cookie='{"key1": "value1", "key2": "value2"}')
|
|
108
103
|
|
|
109
104
|
# 字典
|
|
110
|
-
|
|
105
|
+
ParseHub(cookie={"key1": "value1", "key2": "value2"})
|
|
111
106
|
```
|
|
112
107
|
|
|
113
108
|
目前支持 Cookie 登录的平台:
|
|
@@ -121,9 +116,6 @@ from parsehub.config import GlobalConfig
|
|
|
121
116
|
|
|
122
117
|
# 自定义默认下载目录
|
|
123
118
|
GlobalConfig.default_save_dir = "./my_downloads"
|
|
124
|
-
|
|
125
|
-
# 视频时长限制 (超过此时长将下载最低画质,0 为不限制)
|
|
126
|
-
GlobalConfig.duration_limit = 600 # 秒
|
|
127
119
|
```
|
|
128
120
|
|
|
129
121
|
## 🤝 参考项目
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "parsehub"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.3"
|
|
4
4
|
description = "轻量、异步、开箱即用的社交媒体聚合解析库"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12.0"
|
|
@@ -52,9 +52,7 @@ Issues = "https://github.com/z-mio/parsehub/issues"
|
|
|
52
52
|
|
|
53
53
|
[tool.ruff]
|
|
54
54
|
line-length = 120
|
|
55
|
-
|
|
56
|
-
"test",
|
|
57
|
-
]
|
|
55
|
+
|
|
58
56
|
[tool.ruff.lint]
|
|
59
57
|
select = [
|
|
60
58
|
"E", # pycodestyle 错误检查
|
|
@@ -2,7 +2,6 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
from loguru import logger
|
|
4
4
|
|
|
5
|
-
from .config.config import ParseConfig
|
|
6
5
|
from .errors import ParseError, UnknownPlatform
|
|
7
6
|
from .parsers.base import BaseParser
|
|
8
7
|
from .types import Platform
|
|
@@ -14,31 +13,37 @@ logger.disable(__name__)
|
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class ParseHub:
|
|
17
|
-
def __init__(self
|
|
18
|
-
"""初始化解析器"""
|
|
19
|
-
self.config = config
|
|
16
|
+
def __init__(self):
|
|
20
17
|
self.parsers: list[type[BaseParser]] = BaseParser.get_registry()
|
|
21
18
|
|
|
22
|
-
async def parse(self, url: str) -> AnyParseResult:
|
|
19
|
+
async def parse(self, url: str, *, proxy: str | None = None, cookie: str | dict | None = None) -> AnyParseResult:
|
|
23
20
|
"""解析
|
|
24
21
|
:param url: 分享文案 / 分享链接
|
|
22
|
+
:param proxy: 代理
|
|
23
|
+
:param cookie: cookie
|
|
24
|
+
:return: AnyParseResult
|
|
25
25
|
"""
|
|
26
26
|
parser = self.get_parser(url)
|
|
27
|
-
|
|
27
|
+
if not parser:
|
|
28
|
+
raise UnknownPlatform(url)
|
|
29
|
+
p = parser(proxy=proxy, cookie=cookie)
|
|
28
30
|
return await p.parse(url)
|
|
29
31
|
|
|
30
|
-
def parse_sync(self, url: str) -> AnyParseResult:
|
|
32
|
+
def parse_sync(self, url: str, *, proxy: str | None = None, cookie: str | dict | None = None) -> AnyParseResult:
|
|
31
33
|
"""
|
|
32
34
|
同步解析
|
|
33
35
|
:param url: 分享文案 / 分享链接
|
|
36
|
+
:param proxy: 代理
|
|
37
|
+
:param cookie: cookie
|
|
34
38
|
:return: AnyParseResult
|
|
35
39
|
"""
|
|
36
|
-
return get_event_loop().run_until_complete(self.parse(url))
|
|
40
|
+
return get_event_loop().run_until_complete(self.parse(url, proxy=proxy, cookie=cookie))
|
|
37
41
|
|
|
38
42
|
async def download(
|
|
39
43
|
self,
|
|
40
44
|
url: str,
|
|
41
45
|
path: str | Path = None,
|
|
46
|
+
*,
|
|
42
47
|
callback: ProgressCallback = None,
|
|
43
48
|
callback_args: tuple = (),
|
|
44
49
|
proxy: str | None = None,
|
|
@@ -63,13 +68,13 @@ class ParseHub:
|
|
|
63
68
|
- ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
|
|
64
69
|
"""
|
|
65
70
|
result = await self.parse(url)
|
|
66
|
-
return await result.download(path, callback, callback_args, proxy)
|
|
71
|
+
return await result.download(path, callback=callback, callback_args=callback_args, proxy=proxy)
|
|
67
72
|
|
|
68
73
|
def download_sync(
|
|
69
74
|
self,
|
|
70
75
|
url: str,
|
|
71
|
-
path: str | Path = None,
|
|
72
|
-
callback: ProgressCallback = None,
|
|
76
|
+
path: str | Path | None = None,
|
|
77
|
+
callback: ProgressCallback | None = None,
|
|
73
78
|
callback_args: tuple = (),
|
|
74
79
|
proxy: str | None = None,
|
|
75
80
|
) -> DownloadResult:
|
|
@@ -93,34 +98,46 @@ class ParseHub:
|
|
|
93
98
|
- ``bytes``: 字节进度,用于单文件下载时报告已下载/总字节数
|
|
94
99
|
- ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
|
|
95
100
|
"""
|
|
96
|
-
return get_event_loop().run_until_complete(
|
|
101
|
+
return get_event_loop().run_until_complete(
|
|
102
|
+
self.download(url, path, callback=callback, callback_args=callback_args, proxy=proxy)
|
|
103
|
+
)
|
|
97
104
|
|
|
98
|
-
async def get_raw_url(self, url: str) -> str:
|
|
99
|
-
"""获取原始链接
|
|
105
|
+
async def get_raw_url(self, url: str, proxy: str | None = None) -> str:
|
|
106
|
+
"""获取原始链接
|
|
107
|
+
:param url: 分享文案 / 分享链接
|
|
108
|
+
:param proxy: 代理
|
|
109
|
+
:return: 原始链接
|
|
110
|
+
"""
|
|
100
111
|
parser = self.get_parser(url)
|
|
101
112
|
try:
|
|
102
|
-
return await parser(
|
|
113
|
+
return await parser(proxy=proxy).get_raw_url(url)
|
|
103
114
|
except Exception as e:
|
|
104
115
|
raise ParseError from e
|
|
105
116
|
|
|
106
117
|
def _select_parser(self, url: str) -> type[BaseParser] | None:
|
|
107
|
-
"""选择解析器
|
|
118
|
+
"""选择解析器
|
|
119
|
+
:param url: 分享文案 / 分享链接
|
|
120
|
+
"""
|
|
108
121
|
for parser in self.parsers:
|
|
109
122
|
if parser.match(url):
|
|
110
123
|
return parser
|
|
111
124
|
return None
|
|
112
125
|
|
|
113
|
-
def get_parser(self, url) -> type[BaseParser]:
|
|
114
|
-
"""获取解析器
|
|
126
|
+
def get_parser(self, url) -> type[BaseParser] | None:
|
|
127
|
+
"""获取解析器
|
|
128
|
+
:param url: 分享文案 / 分享链接
|
|
129
|
+
"""
|
|
115
130
|
if parser := self._select_parser(url):
|
|
116
131
|
return parser
|
|
117
|
-
|
|
132
|
+
return None
|
|
118
133
|
|
|
119
|
-
def get_platform(self, url) -> Platform:
|
|
120
|
-
"""获取平台
|
|
134
|
+
def get_platform(self, url) -> Platform | None:
|
|
135
|
+
"""获取平台
|
|
136
|
+
:param url: 分享文案 / 分享链接
|
|
137
|
+
"""
|
|
121
138
|
if parser := self._select_parser(url):
|
|
122
139
|
return parser.__platform__
|
|
123
|
-
|
|
140
|
+
return None
|
|
124
141
|
|
|
125
142
|
def get_platforms(self) -> list[dict]:
|
|
126
143
|
"""获取所有解析器的信息
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, HttpUrl
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class _GlobalConfig(BaseModel):
|
|
8
|
+
model_config = ConfigDict(validate_assignment=True)
|
|
9
|
+
|
|
10
|
+
ua: str = (
|
|
11
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
12
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"
|
|
13
|
+
)
|
|
14
|
+
douyin_api: HttpUrl = "https://douyin.wtf/"
|
|
15
|
+
"""抖音解析API, 建议自行部署: https://github.com/Evil0ctal/Douyin_TikTok_Download_API"""
|
|
16
|
+
default_save_dir: Path = Path(sys.argv[0]).parent / "downloads"
|
|
17
|
+
"""默认下载目录"""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
GlobalConfig = _GlobalConfig()
|
|
@@ -7,31 +7,30 @@ from urllib.parse import parse_qs, urlencode, urlparse
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
9
|
from ... import parsers
|
|
10
|
-
from ...config.config import GlobalConfig
|
|
10
|
+
from ...config.config import GlobalConfig
|
|
11
11
|
from ...types import AnyParseResult, ParseError
|
|
12
12
|
from ...types.platform import Platform
|
|
13
|
-
from ...utils.utils import match_url
|
|
13
|
+
from ...utils.utils import match_url, normalize_cookie
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class BaseParser(ABC):
|
|
17
17
|
_registry: list[type["BaseParser"]] = []
|
|
18
18
|
_registry_initialized: bool = False
|
|
19
19
|
|
|
20
|
-
__platform__: Platform = None
|
|
20
|
+
__platform__: Platform | None = None
|
|
21
21
|
"""平台"""
|
|
22
22
|
__supported_type__: list[str] = []
|
|
23
23
|
"""支持的类型, 例如: 图文, 视频, 动态"""
|
|
24
|
-
__match__: str = None
|
|
24
|
+
__match__: str | None = None
|
|
25
25
|
"""匹配规则"""
|
|
26
26
|
__reserved_parameters__: list[str] = []
|
|
27
27
|
"""要保留的参数, 例如翻页. 默认清除全部参数"""
|
|
28
28
|
__redirect_keywords__: list[str] = []
|
|
29
29
|
"""如果链接包含其中之一, 则遵循重定向规则"""
|
|
30
30
|
|
|
31
|
-
def __init__(self,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
self.cfg = config
|
|
31
|
+
def __init__(self, *, proxy: str | None = None, cookie: str | dict | None = None):
|
|
32
|
+
self.proxy = proxy
|
|
33
|
+
self.cookie = normalize_cookie(cookie)
|
|
35
34
|
|
|
36
35
|
def __init_subclass__(cls, /, register=True, **kwargs):
|
|
37
36
|
super().__init_subclass__(**kwargs)
|
|
@@ -84,7 +83,7 @@ class BaseParser(ABC):
|
|
|
84
83
|
if not url.startswith("http"):
|
|
85
84
|
url = f"https://{url}"
|
|
86
85
|
if any(x in url for x in self.__redirect_keywords__):
|
|
87
|
-
async with httpx.AsyncClient(proxy=self.
|
|
86
|
+
async with httpx.AsyncClient(proxy=self.proxy, timeout=30) as client:
|
|
88
87
|
try:
|
|
89
88
|
r = await client.get(
|
|
90
89
|
url,
|
|
@@ -6,7 +6,6 @@ from typing import Union
|
|
|
6
6
|
|
|
7
7
|
from yt_dlp import YoutubeDL
|
|
8
8
|
|
|
9
|
-
from ...config.config import GlobalConfig
|
|
10
9
|
from ...types import (
|
|
11
10
|
DownloadError,
|
|
12
11
|
DownloadResult,
|
|
@@ -36,13 +35,11 @@ class YtParser(BaseParser, register=False):
|
|
|
36
35
|
|
|
37
36
|
async def _do_parse(self, raw_url: str) -> Union["YtVideoParseResult"]:
|
|
38
37
|
video_info = await self._parse(raw_url)
|
|
39
|
-
_d = {
|
|
40
|
-
"title": video_info.title,
|
|
41
|
-
"content": video_info.description,
|
|
42
|
-
"raw_url": raw_url,
|
|
43
|
-
"dl": video_info,
|
|
44
|
-
}
|
|
45
38
|
return YtVideoParseResult(
|
|
39
|
+
dl=video_info,
|
|
40
|
+
title=video_info.title,
|
|
41
|
+
content=video_info.description,
|
|
42
|
+
raw_url=raw_url,
|
|
46
43
|
video=VideoRef(
|
|
47
44
|
url=raw_url,
|
|
48
45
|
thumb_url=video_info.thumbnail,
|
|
@@ -50,7 +47,6 @@ class YtParser(BaseParser, register=False):
|
|
|
50
47
|
height=video_info.height,
|
|
51
48
|
duration=video_info.duration,
|
|
52
49
|
),
|
|
53
|
-
**_d,
|
|
54
50
|
)
|
|
55
51
|
|
|
56
52
|
async def _parse(self, url) -> "YtVideoInfo":
|
|
@@ -84,8 +80,8 @@ class YtParser(BaseParser, register=False):
|
|
|
84
80
|
|
|
85
81
|
def _extract_info(self, url):
|
|
86
82
|
params = self.params.copy()
|
|
87
|
-
if self.
|
|
88
|
-
params["proxy"] = self.
|
|
83
|
+
if self.proxy:
|
|
84
|
+
params["proxy"] = self.proxy
|
|
89
85
|
|
|
90
86
|
try:
|
|
91
87
|
with YoutubeDL(params) as ydl:
|
|
@@ -115,11 +111,11 @@ class YtParser(BaseParser, register=False):
|
|
|
115
111
|
class YtVideoParseResult(VideoParseResult):
|
|
116
112
|
def __init__(
|
|
117
113
|
self,
|
|
114
|
+
dl: "YtVideoInfo",
|
|
118
115
|
title,
|
|
119
116
|
video=None,
|
|
120
117
|
content=None,
|
|
121
118
|
raw_url=None,
|
|
122
|
-
dl: "YtVideoInfo" = None,
|
|
123
119
|
):
|
|
124
120
|
"""dl: yt-dlp解析结果"""
|
|
125
121
|
self.dl = dl
|
|
@@ -129,10 +125,10 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
129
125
|
self,
|
|
130
126
|
*,
|
|
131
127
|
output_dir: str | Path,
|
|
132
|
-
callback: ProgressCallback = None,
|
|
128
|
+
callback: ProgressCallback | None = None,
|
|
133
129
|
callback_args: tuple = (),
|
|
134
130
|
proxy: str | None = None,
|
|
135
|
-
headers: dict = None,
|
|
131
|
+
headers: dict | None = None,
|
|
136
132
|
) -> "DownloadResult":
|
|
137
133
|
paramss = self.dl.paramss.copy()
|
|
138
134
|
if proxy:
|
|
@@ -140,9 +136,9 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
140
136
|
|
|
141
137
|
paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
|
|
142
138
|
|
|
143
|
-
if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
|
|
144
|
-
|
|
145
|
-
|
|
139
|
+
# if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
|
|
140
|
+
# # 视频超过限制时长,获取最低画质
|
|
141
|
+
# paramss["format"] = "worstvideo* + worstaudio / worst"
|
|
146
142
|
|
|
147
143
|
if callback:
|
|
148
144
|
await callback(0, 1, "count", *callback_args)
|
|
@@ -204,8 +200,8 @@ class YtVideoInfo:
|
|
|
204
200
|
description: str
|
|
205
201
|
thumbnail: str
|
|
206
202
|
url: str
|
|
203
|
+
paramss: dict
|
|
207
204
|
"""Youtube 链接, 非视频下载链接"""
|
|
208
205
|
duration: int = 0
|
|
209
206
|
width: int = 0
|
|
210
207
|
height: int = 0
|
|
211
|
-
paramss: dict = None
|
|
@@ -78,7 +78,7 @@ class BiliParse(YtParser):
|
|
|
78
78
|
|
|
79
79
|
async def is_dynamic(self, url) -> str | None:
|
|
80
80
|
"""是动态"""
|
|
81
|
-
async with httpx.AsyncClient(proxy=self.
|
|
81
|
+
async with httpx.AsyncClient(proxy=self.proxy) as cli:
|
|
82
82
|
url = str((await cli.get(url, follow_redirects=True, timeout=30)).url)
|
|
83
83
|
|
|
84
84
|
if re.search(r"\b\d{18,19}\b", url):
|
|
@@ -86,17 +86,17 @@ class BiliParse(YtParser):
|
|
|
86
86
|
return None
|
|
87
87
|
|
|
88
88
|
async def get_dynamic_info(self, url: str) -> BiliDynamic:
|
|
89
|
-
async with BiliAPI(proxy=self.
|
|
89
|
+
async with BiliAPI(proxy=self.proxy) as bili:
|
|
90
90
|
try:
|
|
91
|
-
dynamic_info = await bili.get_dynamic_info(url, cookie=self.
|
|
91
|
+
dynamic_info = await bili.get_dynamic_info(url, cookie=self.cookie)
|
|
92
92
|
except Exception as e:
|
|
93
93
|
if "风控" in str(e):
|
|
94
|
-
raise ParseError(f"账号风控\n使用的cookie: {cookie_ellipsis(self.
|
|
94
|
+
raise ParseError(f"账号风控\n使用的cookie: {cookie_ellipsis(self.cookie)}") from e
|
|
95
95
|
raise ParseError(str(e)) from e
|
|
96
96
|
return dynamic_info
|
|
97
97
|
|
|
98
98
|
async def bili_api_parse(self, url) -> Union["BiliVideoParseResult", "ImageParseResult"]:
|
|
99
|
-
async with BiliAPI(proxy=self.
|
|
99
|
+
async with BiliAPI(proxy=self.proxy) as bili:
|
|
100
100
|
video_info = await bili.get_video_info(url)
|
|
101
101
|
|
|
102
102
|
if not (data := video_info.get("data")):
|
|
@@ -118,10 +118,11 @@ class BiliParse(YtParser):
|
|
|
118
118
|
dimension = page_info["dimension"]
|
|
119
119
|
|
|
120
120
|
b3, b4 = await bili.get_buvid()
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
video_playurl = await bili.get_video_playurl(url, cid, b3, b4)
|
|
122
|
+
# if GlobalConfig.duration_limit and duration > GlobalConfig.duration_limit:
|
|
123
|
+
# video_playurl = await bili.get_video_playurl(url, cid, b3, b4, False)
|
|
124
|
+
# else:
|
|
125
|
+
# video_playurl = await bili.get_video_playurl(url, cid, b3, b4)
|
|
125
126
|
|
|
126
127
|
durl = video_playurl["data"]["durl"][0]
|
|
127
128
|
video_url = self.change_source(durl["backup_url"][0]) if durl.get("backup_url") else durl["url"]
|
|
@@ -140,13 +141,10 @@ class BiliParse(YtParser):
|
|
|
140
141
|
|
|
141
142
|
async def ytp_parse(self, url) -> Union["YtVideoParseResult"]:
|
|
142
143
|
result = await super()._do_parse(url)
|
|
143
|
-
_d = {
|
|
144
|
-
"title": result.title,
|
|
145
|
-
"raw_url": result.raw_url,
|
|
146
|
-
"dl": result.dl,
|
|
147
|
-
}
|
|
148
144
|
return YtVideoParseResult(
|
|
149
|
-
|
|
145
|
+
title=result.title,
|
|
146
|
+
raw_url=result.raw_url,
|
|
147
|
+
dl=result.dl,
|
|
150
148
|
video=result.media,
|
|
151
149
|
)
|
|
152
150
|
|
|
@@ -173,10 +171,10 @@ class BiliVideoParseResult(VideoParseResult):
|
|
|
173
171
|
self,
|
|
174
172
|
*,
|
|
175
173
|
output_dir: str | Path,
|
|
176
|
-
callback: ProgressCallback = None,
|
|
174
|
+
callback: ProgressCallback | None = None,
|
|
177
175
|
callback_args: tuple = (),
|
|
178
176
|
proxy: str | None = None,
|
|
179
|
-
headers: dict = None,
|
|
177
|
+
headers: dict | None = None,
|
|
180
178
|
) -> "DownloadResult":
|
|
181
179
|
headers = {"referer": "https://www.bilibili.com", "User-Agent": GlobalConfig.ua}
|
|
182
180
|
return await super()._do_download(
|
|
@@ -29,7 +29,7 @@ class CoolapkParser(BaseParser):
|
|
|
29
29
|
) -> Union["CoolapkImageParseResult", "CoolapkRichTextParseResult", "CoolapkMultimediaParseResult"]:
|
|
30
30
|
raw_url_ = clear_params(raw_url, ["s", "shareKey"])
|
|
31
31
|
try:
|
|
32
|
-
coolapk = await Coolapk.parse(raw_url, proxy=self.
|
|
32
|
+
coolapk = await Coolapk.parse(raw_url, proxy=self.proxy)
|
|
33
33
|
except Exception as e:
|
|
34
34
|
raise ParseError(str(e)) from e
|
|
35
35
|
media = [AniRef(url=i) if ".gif" in i else ImageRef(url=i) for i in coolapk.imgs]
|
|
@@ -59,7 +59,7 @@ class InstagramParser(BaseParser):
|
|
|
59
59
|
post = await asyncio.wait_for(
|
|
60
60
|
asyncio.to_thread(
|
|
61
61
|
MyPost.from_shortcode,
|
|
62
|
-
MyInstaloaderContext(self.
|
|
62
|
+
MyInstaloaderContext(self.proxy, cookie),
|
|
63
63
|
shortcode,
|
|
64
64
|
),
|
|
65
65
|
30,
|
|
@@ -69,8 +69,8 @@ class InstagramParser(BaseParser):
|
|
|
69
69
|
except BadResponseException as e:
|
|
70
70
|
match str(e):
|
|
71
71
|
case "Fetching Post metadata failed.":
|
|
72
|
-
if self.
|
|
73
|
-
return await self._parse(url, shortcode, self.
|
|
72
|
+
if self.cookie and cookie is None:
|
|
73
|
+
return await self._parse(url, shortcode, self.cookie)
|
|
74
74
|
else:
|
|
75
75
|
raise ParseError("受限视频无法解析: 你必须年满 18 周岁才能观看这个视频") from e
|
|
76
76
|
case _:
|
|
@@ -10,7 +10,7 @@ class KuaiShouParser(BaseParser):
|
|
|
10
10
|
__redirect_keywords__ = ["v.kuaishou", "/f/"]
|
|
11
11
|
|
|
12
12
|
async def _do_parse(self, raw_url: str) -> VideoParseResult:
|
|
13
|
-
ks = KuaiShouAPI(self.
|
|
13
|
+
ks = KuaiShouAPI(self.cookie)
|
|
14
14
|
try:
|
|
15
15
|
result = await ks.get_video_info(raw_url)
|
|
16
16
|
except Exception as e:
|
|
@@ -13,7 +13,7 @@ class PipixParser(BaseParser):
|
|
|
13
13
|
|
|
14
14
|
async def _do_parse(self, raw_url: str) -> Union["ImageParseResult", "VideoParseResult"]:
|
|
15
15
|
try:
|
|
16
|
-
ppx = await Pipix(self.
|
|
16
|
+
ppx = await Pipix(self.proxy).parse(raw_url)
|
|
17
17
|
except Exception as e:
|
|
18
18
|
raise ParseError("皮皮虾解析失败") from e
|
|
19
19
|
|
|
@@ -9,7 +9,7 @@ class ThreadsParser(BaseParser):
|
|
|
9
9
|
__match__ = r"^(http(s)?://)?.+threads.com/@[\w.]+/post/.*"
|
|
10
10
|
|
|
11
11
|
async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
|
|
12
|
-
post = await ThreadsAPI(proxy=self.
|
|
12
|
+
post = await ThreadsAPI(proxy=self.proxy).parse(raw_url)
|
|
13
13
|
media = []
|
|
14
14
|
if post.media:
|
|
15
15
|
pm: list[ThreadsMedia] = post.media if isinstance(post.media, list) else [post.media]
|
|
@@ -12,7 +12,7 @@ class TieBaParser(BaseParser):
|
|
|
12
12
|
|
|
13
13
|
async def _do_parse(self, raw_url: str) -> Union["ImageParseResult", "VideoParseResult"]:
|
|
14
14
|
try:
|
|
15
|
-
tb = await TieBa(self.
|
|
15
|
+
tb = await TieBa(self.proxy).parse(raw_url)
|
|
16
16
|
except Exception as e:
|
|
17
17
|
raise ParseError("贴吧解析失败") from e
|
|
18
18
|
|
|
@@ -15,7 +15,7 @@ from ..base.base import BaseParser
|
|
|
15
15
|
class TwitterParser(BaseParser):
|
|
16
16
|
__platform__ = Platform.TWITTER
|
|
17
17
|
__supported_type__ = ["视频", "图文"]
|
|
18
|
-
__match__ = r"^(http(s)?://)?.+(twitter|x).com/.*/status/\d+"
|
|
18
|
+
__match__ = r"^(http(s)?://)?.+(twitter|fixupx|x).com/.*/status/\d+"
|
|
19
19
|
|
|
20
20
|
async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
|
|
21
21
|
tweet = await self._parse(raw_url)
|
|
@@ -26,18 +26,18 @@ class TwitterParser(BaseParser):
|
|
|
26
26
|
return str(urlunparse(urlparse(url)._replace(netloc="x.com")))
|
|
27
27
|
|
|
28
28
|
async def _parse(self, url: str):
|
|
29
|
-
x = Twitter(self.
|
|
29
|
+
x = Twitter(self.proxy, cookie=None)
|
|
30
30
|
try:
|
|
31
31
|
tweet = await x.fetch_tweet(url)
|
|
32
32
|
except Exception as e:
|
|
33
33
|
if any(s in str(e) for s in ("error -2",)):
|
|
34
|
-
if self.
|
|
35
|
-
x2 = Twitter(self.
|
|
34
|
+
if self.cookie:
|
|
35
|
+
x2 = Twitter(self.proxy, cookie=self.cookie)
|
|
36
36
|
try:
|
|
37
37
|
tweet = await x2.fetch_tweet(url)
|
|
38
38
|
except Exception as e2:
|
|
39
39
|
raise ParseError(
|
|
40
|
-
f"Twitter 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(self.
|
|
40
|
+
f"Twitter 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(self.cookie)}"
|
|
41
41
|
) from e2
|
|
42
42
|
else:
|
|
43
43
|
raise ParseError(str(e)) from e
|
|
@@ -20,7 +20,7 @@ class WeiboParser(BaseParser):
|
|
|
20
20
|
__match__ = r"^(http(s)?://)(m\.|)weibo.(com|cn)/(?!(u/)).+"
|
|
21
21
|
|
|
22
22
|
async def _do_parse(self, raw_url: str) -> MultimediaParseResult | VideoParseResult | ImageParseResult:
|
|
23
|
-
weibo = await WeiboAPI(self.
|
|
23
|
+
weibo = await WeiboAPI(self.proxy).parse(raw_url)
|
|
24
24
|
data = weibo.data
|
|
25
25
|
text = self.f_text(data.content)
|
|
26
26
|
media = []
|
|
@@ -9,7 +9,7 @@ class WXParser(BaseParser):
|
|
|
9
9
|
__match__ = r"^(http(s)?://)mp.weixin.qq.com/s/.*"
|
|
10
10
|
|
|
11
11
|
async def _do_parse(self, raw_url: str) -> "RichTextParseResult":
|
|
12
|
-
wx = await WX.parse(raw_url, self.
|
|
12
|
+
wx = await WX.parse(raw_url, self.proxy)
|
|
13
13
|
return RichTextParseResult(
|
|
14
14
|
title=wx.title,
|
|
15
15
|
media=[ImageRef(url=i) for i in wx.imgs],
|
|
@@ -27,7 +27,7 @@ class XHSParser(BaseParser):
|
|
|
27
27
|
|
|
28
28
|
async def _do_parse(self, raw_url: str) -> Union["VideoParseResult", "ImageParseResult", "MultimediaParseResult"]:
|
|
29
29
|
raw_url_ = clear_params(raw_url, "xsec_token")
|
|
30
|
-
xhs = XHSAPI(proxy=self.
|
|
30
|
+
xhs = XHSAPI(proxy=self.proxy)
|
|
31
31
|
result = await xhs.extract(raw_url)
|
|
32
32
|
|
|
33
33
|
desc = self.hashtag_handler(result.desc)
|
|
@@ -63,7 +63,7 @@ class XHSParser(BaseParser):
|
|
|
63
63
|
raise ParseError("不支持的类型")
|
|
64
64
|
|
|
65
65
|
async def get_ext_by_url(self, url: str):
|
|
66
|
-
async with httpx.AsyncClient(proxy=self.
|
|
66
|
+
async with httpx.AsyncClient(proxy=self.proxy) as client:
|
|
67
67
|
try:
|
|
68
68
|
response = await client.head(url, follow_redirects=True)
|
|
69
69
|
except Exception:
|
|
@@ -20,7 +20,7 @@ class XiaoHeiHeParser(BaseParser):
|
|
|
20
20
|
__redirect_keywords__ = ["api.xiaoheihe"]
|
|
21
21
|
|
|
22
22
|
async def _do_parse(self, raw_url: str) -> AnyParseResult:
|
|
23
|
-
xhh: XiaoHeiHePost = await XiaoHeiHeAPI(proxy=self.
|
|
23
|
+
xhh: XiaoHeiHePost = await XiaoHeiHeAPI(proxy=self.proxy).parse(raw_url)
|
|
24
24
|
media = self.__parse_media(xhh)
|
|
25
25
|
v = {"title": xhh.title, "content": xhh.content, "raw_url": raw_url}
|
|
26
26
|
match xhh.type:
|
|
@@ -20,8 +20,8 @@ class YtbParse(YtParser):
|
|
|
20
20
|
# "subtitlesformat": "ttml", # 字幕格式
|
|
21
21
|
# "subtitleslangs": ["en", "ja", "zh-CN"], # 字幕语言
|
|
22
22
|
}
|
|
23
|
-
if self.
|
|
24
|
-
sub["cookiefile"] = io.StringIO(self.to_netscape_cookie(self.
|
|
23
|
+
if self.cookie:
|
|
24
|
+
sub["cookiefile"] = io.StringIO(self.to_netscape_cookie(self.cookie, "youtube.com"))
|
|
25
25
|
p = sub | super().params
|
|
26
26
|
return p
|
|
27
27
|
|
|
@@ -10,7 +10,7 @@ class ZuiYouParser(BaseParser):
|
|
|
10
10
|
__reserved_parameters__ = ["pid"]
|
|
11
11
|
|
|
12
12
|
async def _do_parse(self, raw_url: str) -> MultimediaParseResult:
|
|
13
|
-
zy = await ZuiYou(self.
|
|
13
|
+
zy = await ZuiYou(self.proxy).parse(raw_url)
|
|
14
14
|
return MultimediaParseResult(
|
|
15
15
|
content=zy.content,
|
|
16
16
|
media=[
|