parsehub 2.0.31__tar.gz → 2.0.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.31/src/parsehub.egg-info → parsehub-2.0.32}/PKG-INFO +1 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/pyproject.toml +1 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/__init__.py +8 -2
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/cli.py +2 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/base/base.py +3 -3
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/base/ytdlp.py +2 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/bilibili.py +13 -4
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/coolapk.py +2 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/douyin.py +4 -2
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/instagram.py +5 -5
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/kuaishou.py +1 -1
- parsehub-2.0.32/src/parsehub/parsers/parser/snapchat.py +11 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/tiktok.py +3 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/twitter.py +3 -6
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/xhs.py +1 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/youtube.py +4 -3
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/platform.py +1 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/result.py +11 -1
- parsehub-2.0.32/src/parsehub/utils/downloader.py +546 -0
- parsehub-2.0.32/src/parsehub/utils/helpers.py +83 -0
- {parsehub-2.0.31 → parsehub-2.0.32/src/parsehub.egg-info}/PKG-INFO +1 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub.egg-info/SOURCES.txt +2 -1
- {parsehub-2.0.31 → parsehub-2.0.32}/test/test_cli.py +5 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/test/test_core_offline.py +5 -26
- parsehub-2.0.31/src/parsehub/utils/downloader.py +0 -207
- parsehub-2.0.31/src/parsehub/utils/utils.py +0 -72
- {parsehub-2.0.31 → parsehub-2.0.32}/LICENSE +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/README.md +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/setup.cfg +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/cli_config.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/config/config.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/pipix.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/threads.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/tieba.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/weibo.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/bilibili.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/douyin.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/tieba.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/tiktok.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/twitter.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/xiaoheihe.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/__init__.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/media_file.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/types/post.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub.egg-info/entry_points.txt +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/src/parsehub.egg-info/top_level.txt +0 -0
- {parsehub-2.0.31 → parsehub-2.0.32}/test/test_cli_config.py +0 -0
|
@@ -7,7 +7,7 @@ from .parsers.base import BaseParser
|
|
|
7
7
|
from .types import Platform
|
|
8
8
|
from .types.callback import ProgressCallback
|
|
9
9
|
from .types.result import AnyParseResult, DownloadResult
|
|
10
|
-
from .utils.
|
|
10
|
+
from .utils.helpers import SecretCookie, run_sync
|
|
11
11
|
|
|
12
12
|
logger.disable(__name__)
|
|
13
13
|
|
|
@@ -27,7 +27,7 @@ class ParseHub:
|
|
|
27
27
|
if not parser:
|
|
28
28
|
raise UnknownPlatform(url)
|
|
29
29
|
try:
|
|
30
|
-
p = parser(proxy=proxy, cookie=cookie)
|
|
30
|
+
p = parser(proxy=proxy, cookie=SecretCookie(cookie))
|
|
31
31
|
return await p.parse(url)
|
|
32
32
|
except ParseError:
|
|
33
33
|
raise
|
|
@@ -56,6 +56,7 @@ class ParseHub:
|
|
|
56
56
|
parse_proxy: str | None = None,
|
|
57
57
|
parse_cookie: str | dict | None = None,
|
|
58
58
|
save_metadata: bool = False,
|
|
59
|
+
connections: int = 4,
|
|
59
60
|
) -> DownloadResult:
|
|
60
61
|
"""下载
|
|
61
62
|
:param url: 分享文案 / 分享链接
|
|
@@ -67,6 +68,7 @@ class ParseHub:
|
|
|
67
68
|
:param parse_proxy: 解析代理
|
|
68
69
|
:param parse_cookie: 解析 cookie
|
|
69
70
|
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
71
|
+
:param connections: 多线程下载连接数, 默认为 4
|
|
70
72
|
:return: DownloadResult
|
|
71
73
|
|
|
72
74
|
Note:
|
|
@@ -88,6 +90,7 @@ class ParseHub:
|
|
|
88
90
|
callback_kwargs=callback_kwargs,
|
|
89
91
|
proxy=proxy,
|
|
90
92
|
save_metadata=save_metadata,
|
|
93
|
+
connections=connections,
|
|
91
94
|
)
|
|
92
95
|
|
|
93
96
|
def download_sync(
|
|
@@ -101,6 +104,7 @@ class ParseHub:
|
|
|
101
104
|
parse_proxy: str | None = None,
|
|
102
105
|
parse_cookie: str | dict | None = None,
|
|
103
106
|
save_metadata: bool = False,
|
|
107
|
+
connections: int = 4,
|
|
104
108
|
) -> DownloadResult:
|
|
105
109
|
"""
|
|
106
110
|
同步下载
|
|
@@ -113,6 +117,7 @@ class ParseHub:
|
|
|
113
117
|
:param parse_proxy: 解析代理
|
|
114
118
|
:param parse_cookie: 解析 cookie
|
|
115
119
|
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
120
|
+
:param connections: 多线程下载连接数, 默认为 4
|
|
116
121
|
:return: DownloadResult
|
|
117
122
|
|
|
118
123
|
Note:
|
|
@@ -137,6 +142,7 @@ class ParseHub:
|
|
|
137
142
|
parse_proxy=parse_proxy,
|
|
138
143
|
parse_cookie=parse_cookie,
|
|
139
144
|
save_metadata=save_metadata,
|
|
145
|
+
connections=connections,
|
|
140
146
|
)
|
|
141
147
|
)
|
|
142
148
|
|
|
@@ -116,6 +116,7 @@ def _build_parser(prog: str) -> argparse.ArgumentParser:
|
|
|
116
116
|
)
|
|
117
117
|
download_parser.add_argument("-q", "--quiet", action="store_true", help="不输出状态和进度信息")
|
|
118
118
|
download_parser.add_argument("--no-progress", action="store_true", help="不显示下载进度")
|
|
119
|
+
download_parser.add_argument("--connections", type=int, default=4, help="单文件分片下载连接数,设为 1 可禁用分片")
|
|
119
120
|
_add_json_options(download_parser)
|
|
120
121
|
download_parser.set_defaults(func=_cmd_download)
|
|
121
122
|
|
|
@@ -256,6 +257,7 @@ def _cmd_download(args: argparse.Namespace) -> int:
|
|
|
256
257
|
parse_proxy=parse_proxy,
|
|
257
258
|
parse_cookie=parse_cookie,
|
|
258
259
|
save_metadata=args.save_metadata,
|
|
260
|
+
connections=args.connections,
|
|
259
261
|
)
|
|
260
262
|
reporter.finish()
|
|
261
263
|
|
|
@@ -11,7 +11,7 @@ from ... import parsers
|
|
|
11
11
|
from ...config.config import GlobalConfig
|
|
12
12
|
from ...types import AnyParseResult, ParseError
|
|
13
13
|
from ...types.platform import Platform
|
|
14
|
-
from ...utils.
|
|
14
|
+
from ...utils.helpers import SecretCookie, match_url
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class BaseParser(ABC):
|
|
@@ -31,9 +31,9 @@ class BaseParser(ABC):
|
|
|
31
31
|
__redirect_keywords__: list[str] = []
|
|
32
32
|
"""如果链接包含其中之一, 则遵循重定向规则"""
|
|
33
33
|
|
|
34
|
-
def __init__(self, *, proxy: str | None = None, cookie:
|
|
34
|
+
def __init__(self, *, proxy: str | None = None, cookie: SecretCookie = SecretCookie()):
|
|
35
35
|
self.proxy = proxy
|
|
36
|
-
self.cookie =
|
|
36
|
+
self.cookie = cookie
|
|
37
37
|
|
|
38
38
|
def __init_subclass__(cls, /, register: bool = True, **kwargs: Any) -> None:
|
|
39
39
|
super().__init_subclass__(**kwargs)
|
|
@@ -155,7 +155,6 @@ class YtParser(BaseParser, register=False):
|
|
|
155
155
|
@property
|
|
156
156
|
def params(self) -> dict[str, Any]:
|
|
157
157
|
params = {
|
|
158
|
-
"format": "mp4+bestvideo[height<=1080]+bestaudio",
|
|
159
158
|
"quiet": True, # 不输出日志
|
|
160
159
|
"noprogress": True, # 不输出下载进度
|
|
161
160
|
# "writethumbnail": True, # 下载缩略图
|
|
@@ -191,6 +190,7 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
191
190
|
callback_kwargs: dict | None = None,
|
|
192
191
|
proxy: str | None = None,
|
|
193
192
|
headers: dict | None = None,
|
|
193
|
+
connections: int = 4,
|
|
194
194
|
) -> "DownloadResult":
|
|
195
195
|
if callback_kwargs is None:
|
|
196
196
|
callback_kwargs = {}
|
|
@@ -201,6 +201,7 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
201
201
|
paramss["proxy"] = self.dl.proxy
|
|
202
202
|
|
|
203
203
|
paramss["outtmpl"] = f"{output_dir_path.joinpath(self.name)}.%(ext)s"
|
|
204
|
+
paramss["concurrent_fragment_downloads"] = connections # 多线程下载
|
|
204
205
|
|
|
205
206
|
if callback:
|
|
206
207
|
loop = asyncio.get_running_loop()
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import cast
|
|
5
|
+
from typing import Any, cast
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
8
|
from loguru import logger
|
|
@@ -20,7 +20,6 @@ from ...types import (
|
|
|
20
20
|
VideoParseResult,
|
|
21
21
|
VideoRef,
|
|
22
22
|
)
|
|
23
|
-
from ...utils.utils import cookie_ellipsis
|
|
24
23
|
from ..base.ytdlp import YtParser, YtVideoParseResult
|
|
25
24
|
|
|
26
25
|
|
|
@@ -88,10 +87,10 @@ class BiliParse(YtParser):
|
|
|
88
87
|
async def get_dynamic_info(self, url: str) -> BiliDynamic:
|
|
89
88
|
async with BiliAPI(proxy=self.proxy) as bili:
|
|
90
89
|
try:
|
|
91
|
-
dynamic_info = await bili.get_dynamic_info(url, cookie=self.cookie)
|
|
90
|
+
dynamic_info = await bili.get_dynamic_info(url, cookie=self.cookie.get_value())
|
|
92
91
|
except Exception as e:
|
|
93
92
|
if "风控" in str(e):
|
|
94
|
-
raise ParseError(f"账号风控\n使用的cookie: {
|
|
93
|
+
raise ParseError(f"账号风控\n使用的cookie: {self.cookie}") from e
|
|
95
94
|
raise ParseError(str(e)) from e
|
|
96
95
|
return cast(BiliDynamic, dynamic_info)
|
|
97
96
|
|
|
@@ -163,6 +162,14 @@ class BiliParse(YtParser):
|
|
|
163
162
|
desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
|
|
164
163
|
return desc.strip()
|
|
165
164
|
|
|
165
|
+
@property
|
|
166
|
+
def params(self) -> dict[str, Any]:
|
|
167
|
+
sub: dict[str, Any] = {
|
|
168
|
+
"format": "mp4+bestvideo[height<=1080]+bestaudio",
|
|
169
|
+
}
|
|
170
|
+
p = sub | super().params
|
|
171
|
+
return p
|
|
172
|
+
|
|
166
173
|
|
|
167
174
|
class BiliVideoParseResult(VideoParseResult):
|
|
168
175
|
async def _do_download(
|
|
@@ -174,6 +181,7 @@ class BiliVideoParseResult(VideoParseResult):
|
|
|
174
181
|
callback_kwargs: dict | None = None,
|
|
175
182
|
proxy: str | None = None,
|
|
176
183
|
headers: dict | None = None,
|
|
184
|
+
connections: int = 4,
|
|
177
185
|
) -> DownloadResult:
|
|
178
186
|
headers = {"referer": "https://www.bilibili.com", "User-Agent": GlobalConfig.ua}
|
|
179
187
|
return await super()._do_download(
|
|
@@ -183,6 +191,7 @@ class BiliVideoParseResult(VideoParseResult):
|
|
|
183
191
|
callback_kwargs=callback_kwargs,
|
|
184
192
|
proxy=proxy,
|
|
185
193
|
headers=headers,
|
|
194
|
+
connections=connections,
|
|
186
195
|
)
|
|
187
196
|
|
|
188
197
|
|
|
@@ -69,6 +69,7 @@ class CoolapkParseResult(ParseResult):
|
|
|
69
69
|
callback_kwargs: dict | None = None,
|
|
70
70
|
proxy: str | None = None,
|
|
71
71
|
headers: dict | None = None,
|
|
72
|
+
connections: int = 4,
|
|
72
73
|
) -> "DownloadResult":
|
|
73
74
|
headers = {
|
|
74
75
|
"Accept": (
|
|
@@ -83,6 +84,7 @@ class CoolapkParseResult(ParseResult):
|
|
|
83
84
|
callback_kwargs=callback_kwargs,
|
|
84
85
|
proxy=proxy,
|
|
85
86
|
headers=headers,
|
|
87
|
+
connections=connections,
|
|
86
88
|
)
|
|
87
89
|
|
|
88
90
|
|
|
@@ -38,10 +38,10 @@ class DouyinParser(BaseParser):
|
|
|
38
38
|
|
|
39
39
|
async def _fetch_api_result(self, url: str) -> "DouyinApiResult":
|
|
40
40
|
"""获取并解析抖音 API 结果"""
|
|
41
|
-
if not self.cookie:
|
|
41
|
+
if not (cookie := self.cookie.get_value()):
|
|
42
42
|
raise ParseError("抖音 Cookie 未配置")
|
|
43
43
|
|
|
44
|
-
crawler = DouyinWebCrawler(proxy=self.proxy, cookie=
|
|
44
|
+
crawler = DouyinWebCrawler(proxy=self.proxy, cookie=cookie)
|
|
45
45
|
response = await crawler.parse(url)
|
|
46
46
|
return DouyinApiResult.parse(response)
|
|
47
47
|
|
|
@@ -74,6 +74,7 @@ class DouyinParseResult(ParseResult):
|
|
|
74
74
|
callback_kwargs: dict | None = None,
|
|
75
75
|
proxy: str | None = None,
|
|
76
76
|
headers: dict | None = None,
|
|
77
|
+
connections: int = 4,
|
|
77
78
|
) -> "DownloadResult":
|
|
78
79
|
headers = {
|
|
79
80
|
"Referer": "https://www.douyin.com/",
|
|
@@ -85,6 +86,7 @@ class DouyinParseResult(ParseResult):
|
|
|
85
86
|
callback_kwargs=callback_kwargs,
|
|
86
87
|
proxy=proxy,
|
|
87
88
|
headers=headers,
|
|
89
|
+
connections=connections,
|
|
88
90
|
)
|
|
89
91
|
|
|
90
92
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import re
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import cast
|
|
4
4
|
|
|
5
5
|
from instaloader import BadResponseException
|
|
6
6
|
|
|
7
7
|
from ...provider_api.instagram import MyInstaloaderContext, MyPost
|
|
8
8
|
from ...types import ImageParseResult, ImageRef, MultimediaParseResult, ParseError, Platform, VideoParseResult, VideoRef
|
|
9
|
-
from ...utils.
|
|
9
|
+
from ...utils.helpers import SecretCookie
|
|
10
10
|
from ..base.base import BaseParser
|
|
11
11
|
|
|
12
12
|
|
|
@@ -57,12 +57,12 @@ class InstagramParser(BaseParser):
|
|
|
57
57
|
case _:
|
|
58
58
|
raise ParseError("不支持的类型")
|
|
59
59
|
|
|
60
|
-
async def _parse(self, url: str, shortcode: str, cookie:
|
|
60
|
+
async def _parse(self, url: str, shortcode: str, cookie: SecretCookie | None = None) -> MyPost:
|
|
61
61
|
try:
|
|
62
62
|
post = await asyncio.wait_for(
|
|
63
63
|
asyncio.to_thread(
|
|
64
64
|
MyPost.from_shortcode,
|
|
65
|
-
MyInstaloaderContext(self.proxy, cookie),
|
|
65
|
+
MyInstaloaderContext(self.proxy, cookie.get_value() if cookie else None),
|
|
66
66
|
shortcode,
|
|
67
67
|
),
|
|
68
68
|
30,
|
|
@@ -80,7 +80,7 @@ class InstagramParser(BaseParser):
|
|
|
80
80
|
raise ParseError("无法获取帖子内容") from e
|
|
81
81
|
except Exception as e:
|
|
82
82
|
if cookie:
|
|
83
|
-
text = f"Instagram 账号可能已被封禁\n\n使用的Cookie: {
|
|
83
|
+
text = f"Instagram 账号可能已被封禁\n\n使用的Cookie: {cookie}"
|
|
84
84
|
else:
|
|
85
85
|
text = str(e)
|
|
86
86
|
raise ParseError(f"无法获取帖子内容: {text}") from e
|
|
@@ -10,7 +10,7 @@ class KuaiShouParser(BaseParser):
|
|
|
10
10
|
__redirect_keywords__ = ["v.kuaishou", "/f/"]
|
|
11
11
|
|
|
12
12
|
async def _do_parse(self, raw_url: str) -> VideoParseResult:
|
|
13
|
-
ks = KuaiShouAPI(self.cookie, self.proxy)
|
|
13
|
+
ks = KuaiShouAPI(self.cookie.get_value(), self.proxy)
|
|
14
14
|
try:
|
|
15
15
|
result = await ks.get_video_info(raw_url)
|
|
16
16
|
except Exception as e:
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from ...types.platform import Platform
|
|
2
|
+
from ..base.ytdlp import YtParser
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Snapchatarse(YtParser):
|
|
6
|
+
__platform__ = Platform.SNAPCHAT
|
|
7
|
+
__supported_type__ = ["视频"]
|
|
8
|
+
__match__ = r"^(http(s)?://)?(?:www\.)?snapchat\.com/@([a-zA-Z0-9._-]+)(?:/spotlight)?/([a-zA-Z0-9_-]+)"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = ["Snapchatarse"]
|
|
@@ -33,7 +33,7 @@ class TikTokParser(BaseParser):
|
|
|
33
33
|
return self._build_image_result(result)
|
|
34
34
|
|
|
35
35
|
async def _fetch_api_result(self, url: str) -> "TikTokApiResult":
|
|
36
|
-
crawler = TikTokWebCrawler(proxy=self.proxy, cookie=self.cookie)
|
|
36
|
+
crawler = TikTokWebCrawler(proxy=self.proxy, cookie=self.cookie.get_value())
|
|
37
37
|
try:
|
|
38
38
|
response = await crawler.parse(url)
|
|
39
39
|
return TikTokApiResult.parse(response)
|
|
@@ -69,6 +69,7 @@ class TikTokVideoParseResult(VideoParseResult):
|
|
|
69
69
|
callback_kwargs: dict | None = None,
|
|
70
70
|
proxy: str | None = None,
|
|
71
71
|
headers: dict | None = None,
|
|
72
|
+
connections: int = 4,
|
|
72
73
|
) -> "DownloadResult":
|
|
73
74
|
headers = {
|
|
74
75
|
"Referer": "https://www.tiktok.com/",
|
|
@@ -80,6 +81,7 @@ class TikTokVideoParseResult(VideoParseResult):
|
|
|
80
81
|
callback_kwargs=callback_kwargs,
|
|
81
82
|
proxy=proxy,
|
|
82
83
|
headers=headers,
|
|
84
|
+
connections=connections,
|
|
83
85
|
)
|
|
84
86
|
|
|
85
87
|
|
|
@@ -17,7 +17,6 @@ from ...types import (
|
|
|
17
17
|
RichTextParseResult,
|
|
18
18
|
VideoRef,
|
|
19
19
|
)
|
|
20
|
-
from ...utils.utils import cookie_ellipsis
|
|
21
20
|
from ..base.base import BaseParser
|
|
22
21
|
|
|
23
22
|
|
|
@@ -40,14 +39,12 @@ class TwitterParser(BaseParser):
|
|
|
40
39
|
tweet = await x.fetch_tweet(url)
|
|
41
40
|
except Exception as e:
|
|
42
41
|
if any(s in str(e) for s in ("error -2",)):
|
|
43
|
-
if self.cookie:
|
|
44
|
-
x2 = Twitter(self.proxy, cookie=
|
|
42
|
+
if cookie := self.cookie.get_value():
|
|
43
|
+
x2 = Twitter(self.proxy, cookie=cookie)
|
|
45
44
|
try:
|
|
46
45
|
tweet = await x2.fetch_tweet(url)
|
|
47
46
|
except Exception as e2:
|
|
48
|
-
raise ParseError(
|
|
49
|
-
f"Twitter 账号可能已被封禁\n\n使用的Cookie: {cookie_ellipsis(self.cookie)}"
|
|
50
|
-
) from e2
|
|
47
|
+
raise ParseError(f"Twitter 账号可能已被封禁\n\n使用的Cookie: {self.cookie}") from e2
|
|
51
48
|
else:
|
|
52
49
|
raise ParseError(str(e)) from e
|
|
53
50
|
else:
|
|
@@ -25,7 +25,7 @@ class XHSParser(BaseParser):
|
|
|
25
25
|
__after_clean_parameters__ = ["xsec_token"]
|
|
26
26
|
|
|
27
27
|
async def _do_parse(self, raw_url: str) -> Union["VideoParseResult", "ImageParseResult", "MultimediaParseResult"]:
|
|
28
|
-
xhs = XHSAPI(proxy=self.proxy, cookie=self.cookie)
|
|
28
|
+
xhs = XHSAPI(proxy=self.proxy, cookie=self.cookie.get_value())
|
|
29
29
|
result = await xhs.extract(raw_url)
|
|
30
30
|
|
|
31
31
|
desc = self.hashtag_handler(result.desc)
|
|
@@ -15,18 +15,19 @@ class YtbParse(YtParser):
|
|
|
15
15
|
@property
|
|
16
16
|
def params(self) -> dict[str, Any]:
|
|
17
17
|
sub: dict[str, Any] = {
|
|
18
|
+
"format": "mp4+bestvideo[height<=1080]+bestaudio",
|
|
18
19
|
# "writesubtitles": True, # 下载字幕
|
|
19
20
|
# "writeautomaticsub": True, # 下载自动生成的字幕
|
|
20
21
|
# "subtitlesformat": "ttml", # 字幕格式
|
|
21
22
|
# "subtitleslangs": ["en", "ja", "zh-CN"], # 字幕语言
|
|
22
23
|
}
|
|
23
|
-
if self.cookie:
|
|
24
|
-
sub["cookiefile"] = io.StringIO(self.to_netscape_cookie(
|
|
24
|
+
if cookie := self.cookie.get_value():
|
|
25
|
+
sub["cookiefile"] = io.StringIO(self.to_netscape_cookie(cookie, "youtube.com"))
|
|
25
26
|
p = sub | super().params
|
|
26
27
|
return p
|
|
27
28
|
|
|
28
29
|
@staticmethod
|
|
29
|
-
def to_netscape_cookie(cookie: dict, domain: str) -> str | None:
|
|
30
|
+
def to_netscape_cookie(cookie: dict | None, domain: str) -> str | None:
|
|
30
31
|
"""将字典格式 cookie 转为 Netscape 格式字符串
|
|
31
32
|
:param cookie: 字典格式 cookie
|
|
32
33
|
:param domain: cookie 所属域名, 例如 "youtube.com"
|
|
@@ -15,7 +15,7 @@ from slugify import slugify
|
|
|
15
15
|
from ..config import GlobalConfig
|
|
16
16
|
from ..errors import DeleteError, DownloadError
|
|
17
17
|
from ..utils.downloader import download
|
|
18
|
-
from ..utils.
|
|
18
|
+
from ..utils.helpers import run_sync
|
|
19
19
|
from .callback import ProgressCallback
|
|
20
20
|
from .media_file import AniFile, AnyMediaFile, ImageFile, LivePhotoFile, VideoFile
|
|
21
21
|
from .media_ref import AniRef, AnyMediaRef, ImageRef, LivePhotoRef, VideoRef
|
|
@@ -87,6 +87,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
87
87
|
callback_kwargs: dict | None = None,
|
|
88
88
|
proxy: str | None = None,
|
|
89
89
|
headers: dict | None = None,
|
|
90
|
+
connections: int = 4,
|
|
90
91
|
) -> "DownloadResult":
|
|
91
92
|
"""
|
|
92
93
|
执行下载
|
|
@@ -96,6 +97,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
96
97
|
:param callback_kwargs: 回调函数的关键字参数
|
|
97
98
|
:param proxy: 代理
|
|
98
99
|
:param headers: 请求头
|
|
100
|
+
:param connections: 多线程下载连接数, 默认为 4
|
|
99
101
|
:return: DownloadResult
|
|
100
102
|
"""
|
|
101
103
|
if self.media is None:
|
|
@@ -134,6 +136,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
134
136
|
progress=dl_progress,
|
|
135
137
|
progress_args=dl_progress_args,
|
|
136
138
|
progress_kwargs=dl_progress_kwargs,
|
|
139
|
+
connections=connections,
|
|
137
140
|
)
|
|
138
141
|
except Exception as e:
|
|
139
142
|
shutil.rmtree(output_dir, ignore_errors=True)
|
|
@@ -161,6 +164,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
161
164
|
save_path,
|
|
162
165
|
headers=headers,
|
|
163
166
|
proxy=proxy,
|
|
167
|
+
connections=connections,
|
|
164
168
|
)
|
|
165
169
|
except Exception as e:
|
|
166
170
|
shutil.rmtree(output_dir, ignore_errors=True)
|
|
@@ -190,6 +194,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
190
194
|
callback_kwargs: dict | None = None,
|
|
191
195
|
proxy: str | None = None,
|
|
192
196
|
save_metadata: bool = False,
|
|
197
|
+
connections: int = 4,
|
|
193
198
|
) -> "DownloadResult":
|
|
194
199
|
"""
|
|
195
200
|
:param path: 保存路径
|
|
@@ -198,6 +203,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
198
203
|
:param callback_kwargs: 回调函数的关键字参数
|
|
199
204
|
:param proxy: 代理
|
|
200
205
|
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
206
|
+
:param connections: 多线程下载连接数, 默认为 4
|
|
201
207
|
:return: DownloadResult
|
|
202
208
|
|
|
203
209
|
Note:
|
|
@@ -230,6 +236,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
230
236
|
callback_args=callback_args,
|
|
231
237
|
callback_kwargs=callback_kwargs,
|
|
232
238
|
proxy=proxy,
|
|
239
|
+
connections=connections,
|
|
233
240
|
)
|
|
234
241
|
except Exception as e:
|
|
235
242
|
shutil.rmtree(output_dir, ignore_errors=True)
|
|
@@ -244,6 +251,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
244
251
|
callback_kwargs: dict | None = None,
|
|
245
252
|
proxy: str | None = None,
|
|
246
253
|
save_metadata: bool = False,
|
|
254
|
+
connections: int = 4,
|
|
247
255
|
) -> "DownloadResult":
|
|
248
256
|
"""
|
|
249
257
|
:param path: 保存路径
|
|
@@ -252,6 +260,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
252
260
|
:param callback_kwargs: 回调函数的关键字参数
|
|
253
261
|
:param proxy: 代理
|
|
254
262
|
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
263
|
+
:param connections: 多线程下载连接数, 默认为 4
|
|
255
264
|
:return: DownloadResult
|
|
256
265
|
|
|
257
266
|
Note:
|
|
@@ -273,6 +282,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
273
282
|
callback_kwargs=callback_kwargs,
|
|
274
283
|
proxy=proxy,
|
|
275
284
|
save_metadata=save_metadata,
|
|
285
|
+
connections=connections,
|
|
276
286
|
)
|
|
277
287
|
)
|
|
278
288
|
|