nonebot-plugin-parser 2.0.1__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/PKG-INFO +2 -3
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/README.md +1 -2
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/pyproject.toml +2 -2
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/__init__.py +9 -6
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/task.py +2 -1
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/ytdlp.py +13 -8
- nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/exception.py +40 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/helper.py +6 -23
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/__init__.py +20 -17
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -41
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/acfun.py +18 -17
- nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/parsers/base.py +144 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +11 -17
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/data.py +23 -97
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +46 -8
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/slides.py +1 -18
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/video.py +1 -16
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/kuaishou.py +21 -15
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/twitter.py +51 -34
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/weibo.py +54 -50
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/xiaohongshu.py +33 -18
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/youtube.py +23 -21
- nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/renders/base.py +76 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/common.py +3 -3
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/default.py +7 -1
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/utils.py +1 -0
- nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/exception.py +0 -20
- nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/parsers/base.py +0 -128
- nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/renders/base.py +0 -54
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/config.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/constants.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/fonts/HYSongYunLangHeiW-1.ttf +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
- {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: nonebot-plugin-parser
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun
|
|
5
5
|
Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
|
|
6
6
|
Author: fllesser
|
|
@@ -42,8 +42,7 @@ Description-Content-Type: text/markdown
|
|
|
42
42
|
<br/>
|
|
43
43
|
[](https://results.pre-commit.ci/latest/github/fllesser/nonebot-plugin-parser/master)
|
|
44
44
|
[](https://codecov.io/gh/fllesser/nonebot-plugin-parser)
|
|
45
|
-
[](https://qm.qq.com/q/y4T4CjHimc)
|
|
47
46
|
</div>
|
|
48
47
|
|
|
49
48
|
> [!IMPORTANT]
|
|
@@ -12,8 +12,7 @@
|
|
|
12
12
|
<br/>
|
|
13
13
|
[](https://results.pre-commit.ci/latest/github/fllesser/nonebot-plugin-parser/master)
|
|
14
14
|
[](https://codecov.io/gh/fllesser/nonebot-plugin-parser)
|
|
15
|
-
[](https://qm.qq.com/q/y4T4CjHimc)
|
|
17
16
|
</div>
|
|
18
17
|
|
|
19
18
|
> [!IMPORTANT]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nonebot-plugin-parser"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.3"
|
|
4
4
|
description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun"
|
|
5
5
|
authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
|
|
6
6
|
readme = "README.md"
|
|
@@ -185,7 +185,7 @@ build-backend = "uv_build"
|
|
|
185
185
|
|
|
186
186
|
|
|
187
187
|
[tool.bumpversion]
|
|
188
|
-
current_version = "2.0.
|
|
188
|
+
current_version = "2.0.3"
|
|
189
189
|
commit = true
|
|
190
190
|
message = "🔖 release: bump vesion from {current_version} to {new_version}"
|
|
191
191
|
tag = true
|
|
@@ -8,7 +8,7 @@ from tqdm.asyncio import tqdm
|
|
|
8
8
|
|
|
9
9
|
from ..config import pconfig
|
|
10
10
|
from ..constants import COMMON_HEADER, DOWNLOAD_TIMEOUT
|
|
11
|
-
from ..exception import DownloadException,
|
|
11
|
+
from ..exception import DownloadException, SizeLimitException, ZeroSizeException
|
|
12
12
|
from ..utils import generate_file_name, merge_av, safe_unlink
|
|
13
13
|
from .task import auto_task
|
|
14
14
|
from .ytdlp import YtdlpDownloader
|
|
@@ -50,7 +50,6 @@ class StreamDownloader:
|
|
|
50
50
|
if not file_name:
|
|
51
51
|
file_name = generate_file_name(url)
|
|
52
52
|
file_path = self.cache_dir / file_name
|
|
53
|
-
|
|
54
53
|
# 如果文件存在,则直接返回
|
|
55
54
|
if file_path.exists():
|
|
56
55
|
return file_path
|
|
@@ -61,11 +60,15 @@ class StreamDownloader:
|
|
|
61
60
|
async with self.client.stream("GET", url, headers=headers, follow_redirects=True) as response:
|
|
62
61
|
response.raise_for_status()
|
|
63
62
|
content_length = response.headers.get("Content-Length")
|
|
64
|
-
content_length = int(content_length) if content_length else
|
|
63
|
+
content_length = int(content_length) if content_length else 0
|
|
64
|
+
|
|
65
|
+
if content_length == 0:
|
|
66
|
+
logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载")
|
|
67
|
+
raise ZeroSizeException
|
|
65
68
|
|
|
66
|
-
if
|
|
67
|
-
logger.warning(f"{
|
|
68
|
-
raise
|
|
69
|
+
if (file_size := content_length / 1024 / 1024) > pconfig.max_size:
|
|
70
|
+
logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB 超过 {pconfig.max_size} MB, 取消下载")
|
|
71
|
+
raise SizeLimitException
|
|
69
72
|
|
|
70
73
|
with self.get_progress_bar(file_name, content_length) as bar:
|
|
71
74
|
async with aiofiles.open(file_path, "wb") as file:
|
|
@@ -13,6 +13,7 @@ def auto_task(func: Callable[P, Coroutine[Any, Any, T]]) -> Callable[P, Task[T]]
|
|
|
13
13
|
@wraps(func)
|
|
14
14
|
def wrapper(*args: P.args, **kwargs: P.kwargs) -> Task[T]:
|
|
15
15
|
coro = func(*args, **kwargs)
|
|
16
|
-
|
|
16
|
+
name = " | ".join(str(arg) for arg in args if isinstance(arg, str))
|
|
17
|
+
return create_task(coro, name=func.__name__ + " | " + name)
|
|
17
18
|
|
|
18
19
|
return wrapper
|
|
@@ -7,7 +7,7 @@ from msgspec import Struct
|
|
|
7
7
|
import yt_dlp
|
|
8
8
|
|
|
9
9
|
from ..config import pconfig
|
|
10
|
-
from ..exception import ParseException
|
|
10
|
+
from ..exception import DurationLimitException, ParseException
|
|
11
11
|
from ..utils import LimitedSizeDict, generate_file_name
|
|
12
12
|
from .task import auto_task
|
|
13
13
|
|
|
@@ -39,7 +39,7 @@ class YtdlpDownloader:
|
|
|
39
39
|
"""YtdlpDownloader class"""
|
|
40
40
|
|
|
41
41
|
def __init__(self):
|
|
42
|
-
self.
|
|
42
|
+
self._video_info_mapping = LimitedSizeDict[str, VideoInfo]()
|
|
43
43
|
self._ydl_extract_base_opts: dict[str, Any] = {
|
|
44
44
|
"quiet": True,
|
|
45
45
|
"skip_download": True,
|
|
@@ -60,9 +60,9 @@ class YtdlpDownloader:
|
|
|
60
60
|
Returns:
|
|
61
61
|
dict[str, str]: video info
|
|
62
62
|
"""
|
|
63
|
-
|
|
64
|
-
if
|
|
65
|
-
return
|
|
63
|
+
video_info = self._video_info_mapping.get(url, None)
|
|
64
|
+
if video_info:
|
|
65
|
+
return video_info
|
|
66
66
|
ydl_opts = {} | self._ydl_extract_base_opts
|
|
67
67
|
|
|
68
68
|
if cookiefile:
|
|
@@ -72,8 +72,9 @@ class YtdlpDownloader:
|
|
|
72
72
|
info_dict = await asyncio.to_thread(ydl.extract_info, url, download=False)
|
|
73
73
|
if not info_dict:
|
|
74
74
|
raise ParseException("获取视频信息失败")
|
|
75
|
+
|
|
75
76
|
video_info = msgspec.convert(info_dict, VideoInfo)
|
|
76
|
-
self.
|
|
77
|
+
self._video_info_mapping[url] = video_info
|
|
77
78
|
return video_info
|
|
78
79
|
|
|
79
80
|
@auto_task
|
|
@@ -87,8 +88,11 @@ class YtdlpDownloader:
|
|
|
87
88
|
Returns:
|
|
88
89
|
Path: video file path
|
|
89
90
|
"""
|
|
90
|
-
|
|
91
|
-
duration =
|
|
91
|
+
video_info = await self.extract_video_info(url, cookiefile)
|
|
92
|
+
duration = video_info.duration
|
|
93
|
+
if duration > pconfig.duration_maximum:
|
|
94
|
+
raise DurationLimitException
|
|
95
|
+
|
|
92
96
|
video_path = pconfig.cache_dir / generate_file_name(url, ".mp4")
|
|
93
97
|
if video_path.exists():
|
|
94
98
|
return video_path
|
|
@@ -121,6 +125,7 @@ class YtdlpDownloader:
|
|
|
121
125
|
audio_path = pconfig.cache_dir / f"{file_name}.flac"
|
|
122
126
|
if audio_path.exists():
|
|
123
127
|
return audio_path
|
|
128
|
+
|
|
124
129
|
ydl_opts = {
|
|
125
130
|
"outtmpl": f"{pconfig.cache_dir / file_name}.%(ext)s",
|
|
126
131
|
"format": "bestaudio/best",
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
class ParseException(Exception):
|
|
2
|
+
"""异常基类"""
|
|
3
|
+
|
|
4
|
+
def __init__(self, message: str):
|
|
5
|
+
self.message = message
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DownloadException(ParseException):
|
|
9
|
+
"""下载异常"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, message: str | None = None):
|
|
12
|
+
self.message = message or "媒体下载失败"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DownloadLimitException(DownloadException):
|
|
16
|
+
"""下载超过限制异常"""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SizeLimitException(DownloadLimitException):
|
|
23
|
+
"""下载大小超过限制异常"""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self.message = "媒体大小超过配置限制,取消下载"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DurationLimitException(DownloadLimitException):
|
|
30
|
+
"""下载时长超过限制异常"""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
self.message = "媒体时长超过配置限制,取消下载"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ZeroSizeException(DownloadException):
|
|
37
|
+
"""下载大小为 0 异常"""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
self.message = "媒体大小为 0, 取消下载"
|
{nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/helper.py
RENAMED
|
@@ -7,19 +7,19 @@ from nonebot_plugin_alconna.uniseg import Segment, UniMessage, Voice
|
|
|
7
7
|
from nonebot_plugin_alconna.uniseg.segment import CustomNode, Reference
|
|
8
8
|
|
|
9
9
|
from .config import pconfig
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
ForwardNodeInner = str | Segment | UniMessage
|
|
12
|
+
"""转发消息节点内部允许的类型"""
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class UniHelper:
|
|
14
16
|
@staticmethod
|
|
15
|
-
def construct_forward_message(
|
|
16
|
-
segments: Sequence[str | Segment | UniMessage], user_id: str | None = None
|
|
17
|
-
) -> Reference:
|
|
17
|
+
def construct_forward_message(segments: Sequence[ForwardNodeInner], user_id: str | None = None) -> Reference:
|
|
18
18
|
"""构造转发消息
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
21
|
user_id (str): 用户ID
|
|
22
|
-
segments (Sequence[
|
|
22
|
+
segments (Sequence[ForwardNode]): 消息段
|
|
23
23
|
|
|
24
24
|
Returns:
|
|
25
25
|
Reference: 转发消息
|
|
@@ -39,23 +39,6 @@ class UniHelper:
|
|
|
39
39
|
|
|
40
40
|
return Reference(nodes=nodes)
|
|
41
41
|
|
|
42
|
-
@classmethod
|
|
43
|
-
async def send_segments(cls, segments: Sequence[Segment | str]) -> None:
|
|
44
|
-
"""发送消息段
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
segments (Sequence[Segment | str]): 消息段
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
if len(segments) > 2:
|
|
51
|
-
forward_msg = cls.construct_forward_message(segments)
|
|
52
|
-
await UniMessage([forward_msg]).send()
|
|
53
|
-
|
|
54
|
-
else:
|
|
55
|
-
segments = list(segments)
|
|
56
|
-
segments[:-1] = [Text(seg + "\n") if isinstance(seg, str) else seg for seg in segments[:-1]]
|
|
57
|
-
await UniMessage(segments).send()
|
|
58
|
-
|
|
59
42
|
@staticmethod
|
|
60
43
|
def img_seg(img_path: Path | None = None, raw: bytes | None = None) -> Image:
|
|
61
44
|
"""获取图片 Seg
|
|
@@ -103,7 +86,7 @@ class UniHelper:
|
|
|
103
86
|
# 检测文件大小
|
|
104
87
|
file_size_byte_count = int(video_path.stat().st_size)
|
|
105
88
|
if file_size_byte_count == 0:
|
|
106
|
-
|
|
89
|
+
return Text("视频文件大小为 0")
|
|
107
90
|
elif file_size_byte_count > 100 * 1024 * 1024:
|
|
108
91
|
# 转为文件 Seg
|
|
109
92
|
return cls.file_seg(video_path, display_name=video_path.name)
|
|
@@ -62,15 +62,11 @@ async def _(
|
|
|
62
62
|
await _message_reaction(event, "resolving")
|
|
63
63
|
|
|
64
64
|
cache_key = matched.group(0)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# 获取对应平台 parser
|
|
69
|
-
parser = KEYWORD_PARSER_MAP
|
|
70
|
-
|
|
71
|
-
if parser is None:
|
|
72
|
-
logger.warning("没有找到对应平台的 Parser")
|
|
73
|
-
return
|
|
65
|
+
# 1. 获取缓存结果
|
|
66
|
+
result = RESULT_CACHE.get(cache_key)
|
|
67
|
+
if result is None:
|
|
68
|
+
# 2. 获取对应平台 parser
|
|
69
|
+
parser = KEYWORD_PARSER_MAP[keyword]
|
|
74
70
|
|
|
75
71
|
try:
|
|
76
72
|
result = await parser.parse(matched)
|
|
@@ -78,16 +74,23 @@ async def _(
|
|
|
78
74
|
# await UniMessage(str(e)).send()
|
|
79
75
|
await _message_reaction(event, "fail")
|
|
80
76
|
raise
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
77
|
+
logger.debug(f"解析结果: {result}")
|
|
78
|
+
else:
|
|
79
|
+
logger.debug(f"命中缓存: {cache_key}, 结果: {result}")
|
|
84
80
|
|
|
85
81
|
# 3. 渲染内容消息并发送
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
82
|
+
try:
|
|
83
|
+
renderer = get_renderer(result.platform.name)
|
|
84
|
+
async for message in renderer.render_messages(result):
|
|
85
|
+
await message.send()
|
|
86
|
+
except Exception:
|
|
87
|
+
await _message_reaction(event, "fail")
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
# 4. 无 raise 再缓存解析结果
|
|
91
|
+
RESULT_CACHE[cache_key] = result
|
|
92
|
+
|
|
93
|
+
# 5. 添加成功的消息响应
|
|
91
94
|
await _message_reaction(event, "done")
|
|
92
95
|
|
|
93
96
|
|
|
@@ -112,47 +112,6 @@ def extract_msg_text(message: UniMsg, state: T_State) -> None:
|
|
|
112
112
|
state[R_EXTRACT_KEY] = text
|
|
113
113
|
|
|
114
114
|
|
|
115
|
-
class UrlKeywordsRule:
|
|
116
|
-
"""检查消息是否含有关键词 增强版"""
|
|
117
|
-
|
|
118
|
-
__slots__ = ("keywords",)
|
|
119
|
-
|
|
120
|
-
def __init__(self, *keywords: str):
|
|
121
|
-
self.keywords = keywords
|
|
122
|
-
|
|
123
|
-
def __repr__(self) -> str:
|
|
124
|
-
return f"UrlKeywords(keywords={self.keywords})"
|
|
125
|
-
|
|
126
|
-
def __eq__(self, other: object) -> bool:
|
|
127
|
-
return isinstance(other, UrlKeywordsRule) and frozenset(self.keywords) == frozenset(other.keywords)
|
|
128
|
-
|
|
129
|
-
def __hash__(self) -> int:
|
|
130
|
-
return hash(frozenset(self.keywords))
|
|
131
|
-
|
|
132
|
-
async def __call__(self, state: T_State, text: str = ExtractText()) -> bool:
|
|
133
|
-
if not text:
|
|
134
|
-
return False
|
|
135
|
-
if key := next((k for k in self.keywords if k in text), None):
|
|
136
|
-
state[R_KEYWORD_KEY] = key
|
|
137
|
-
return True
|
|
138
|
-
return False
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def url_keywords(*keywords: str) -> Rule:
|
|
142
|
-
return Rule(UrlKeywordsRule(*keywords))
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def on_url_keyword(*keywords: str, priority: int = 5) -> type[Matcher]:
|
|
146
|
-
matcher = Matcher.new(
|
|
147
|
-
"message",
|
|
148
|
-
is_not_in_disabled_groups & url_keywords(*keywords),
|
|
149
|
-
priority=priority,
|
|
150
|
-
block=True,
|
|
151
|
-
source=get_matcher_source(1),
|
|
152
|
-
)
|
|
153
|
-
return matcher
|
|
154
|
-
|
|
155
|
-
|
|
156
115
|
class KeyPatternList(list[tuple[str, re.Pattern[str]]]):
|
|
157
116
|
def __init__(self, *args: tuple[str, str | re.Pattern[str]]):
|
|
158
117
|
super().__init__()
|
|
@@ -2,11 +2,12 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import re
|
|
5
|
+
import time
|
|
5
6
|
from typing import ClassVar
|
|
6
7
|
from typing_extensions import override
|
|
7
8
|
|
|
8
9
|
import aiofiles
|
|
9
|
-
import
|
|
10
|
+
from httpx import AsyncClient, HTTPError
|
|
10
11
|
from nonebot import logger
|
|
11
12
|
|
|
12
13
|
from ..config import pconfig
|
|
@@ -14,8 +15,7 @@ from ..constants import COMMON_TIMEOUT, DOWNLOAD_TIMEOUT
|
|
|
14
15
|
from ..download import DOWNLOADER
|
|
15
16
|
from ..exception import DownloadException, ParseException
|
|
16
17
|
from ..utils import safe_unlink
|
|
17
|
-
from .base import BaseParser
|
|
18
|
-
from .data import Author, ParseResult, Platform, VideoContent
|
|
18
|
+
from .base import BaseParser, Platform
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class AcfunParser(BaseParser):
|
|
@@ -40,10 +40,11 @@ class AcfunParser(BaseParser):
|
|
|
40
40
|
Returns:
|
|
41
41
|
tuple: (m3u8_url, title, description, author, upload_time)
|
|
42
42
|
"""
|
|
43
|
+
|
|
43
44
|
# 拼接查询参数
|
|
44
45
|
url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
|
|
45
46
|
|
|
46
|
-
async with
|
|
47
|
+
async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
|
|
47
48
|
response = await client.get(url)
|
|
48
49
|
response.raise_for_status()
|
|
49
50
|
raw = response.text
|
|
@@ -88,7 +89,7 @@ class AcfunParser(BaseParser):
|
|
|
88
89
|
max_size_in_bytes = pconfig.max_size * 1024 * 1024
|
|
89
90
|
async with (
|
|
90
91
|
aiofiles.open(video_file, "wb") as f,
|
|
91
|
-
|
|
92
|
+
AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
|
|
92
93
|
):
|
|
93
94
|
total_size = 0
|
|
94
95
|
with DOWNLOADER.get_progress_bar(video_file.name) as bar:
|
|
@@ -101,7 +102,7 @@ class AcfunParser(BaseParser):
|
|
|
101
102
|
if total_size > max_size_in_bytes:
|
|
102
103
|
# 直接截断
|
|
103
104
|
break
|
|
104
|
-
except
|
|
105
|
+
except HTTPError:
|
|
105
106
|
await safe_unlink(video_file)
|
|
106
107
|
logger.exception("acfun 视频下载失败")
|
|
107
108
|
raise DownloadException("acfun 视频下载失败")
|
|
@@ -116,7 +117,7 @@ class AcfunParser(BaseParser):
|
|
|
116
117
|
Returns:
|
|
117
118
|
list[str]: 视频链接
|
|
118
119
|
"""
|
|
119
|
-
async with
|
|
120
|
+
async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
|
|
120
121
|
response = await client.get(m3u8_url)
|
|
121
122
|
m3u8_file = response.text
|
|
122
123
|
# 分离ts文件链接
|
|
@@ -135,14 +136,14 @@ class AcfunParser(BaseParser):
|
|
|
135
136
|
return m3u8_full_urls
|
|
136
137
|
|
|
137
138
|
@override
|
|
138
|
-
async def parse(self, matched: re.Match[str])
|
|
139
|
+
async def parse(self, matched: re.Match[str]):
|
|
139
140
|
"""解析 URL 获取内容信息并下载资源
|
|
140
141
|
|
|
141
142
|
Args:
|
|
142
143
|
matched: 正则表达式匹配对象,由平台对应的模式匹配得到
|
|
143
144
|
|
|
144
145
|
Returns:
|
|
145
|
-
ParseResult:
|
|
146
|
+
ParseResult: 解析结果
|
|
146
147
|
|
|
147
148
|
Raises:
|
|
148
149
|
ParseException: 解析失败时抛出
|
|
@@ -152,19 +153,19 @@ class AcfunParser(BaseParser):
|
|
|
152
153
|
url = f"https://www.acfun.cn/v/ac{acid}"
|
|
153
154
|
|
|
154
155
|
m3u8_url, title, description, author, upload_time = await self.parse_video_info(url)
|
|
156
|
+
author = self.create_author(author) if author else None
|
|
155
157
|
|
|
156
|
-
|
|
158
|
+
# 2024-12-1 -> timestamp
|
|
159
|
+
timestamp = int(time.mktime(time.strptime(upload_time, "%Y-%m-%d")))
|
|
160
|
+
text = f"简介: {description}"
|
|
157
161
|
|
|
158
162
|
# 下载视频
|
|
159
163
|
video_task = asyncio.create_task(self.download_video(m3u8_url, acid))
|
|
160
164
|
|
|
161
|
-
extra = {}
|
|
162
|
-
if extra_info:
|
|
163
|
-
extra["info"] = extra_info
|
|
164
|
-
|
|
165
165
|
return self.result(
|
|
166
166
|
title=title,
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
167
|
+
text=text,
|
|
168
|
+
author=author,
|
|
169
|
+
timestamp=timestamp,
|
|
170
|
+
contents=[self.create_video_content(video_task)],
|
|
170
171
|
)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Parser 基类定义"""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from asyncio import Task
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import re
|
|
8
|
+
from typing import ClassVar
|
|
9
|
+
from typing_extensions import Unpack
|
|
10
|
+
|
|
11
|
+
from ..constants import ANDROID_HEADER, COMMON_HEADER, COMMON_TIMEOUT, IOS_HEADER
|
|
12
|
+
from .data import ParseResult, ParseResultKwargs, Platform
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseParser(ABC):
|
|
16
|
+
"""所有平台 Parser 的抽象基类
|
|
17
|
+
|
|
18
|
+
子类必须实现:
|
|
19
|
+
- platform: 平台信息(包含名称和显示名称)
|
|
20
|
+
- patterns: URL 正则表达式模式列表
|
|
21
|
+
- parse: 解析 URL 的方法(接收正则表达式对象)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# 类变量:存储所有已注册的 Parser 类
|
|
25
|
+
_registry: ClassVar[list[type["BaseParser"]]] = []
|
|
26
|
+
|
|
27
|
+
platform: ClassVar[Platform]
|
|
28
|
+
""" 平台信息(包含名称和显示名称) """
|
|
29
|
+
|
|
30
|
+
patterns: ClassVar[list[tuple[str, str]]]
|
|
31
|
+
""" URL 正则表达式模式列表 [(keyword, pattern), ...] """
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
self.headers = COMMON_HEADER.copy()
|
|
35
|
+
self.ios_headers = IOS_HEADER.copy()
|
|
36
|
+
self.android_headers = ANDROID_HEADER.copy()
|
|
37
|
+
self.timeout = COMMON_TIMEOUT
|
|
38
|
+
|
|
39
|
+
def __init_subclass__(cls, **kwargs):
|
|
40
|
+
"""自动注册子类到 _registry"""
|
|
41
|
+
super().__init_subclass__(**kwargs)
|
|
42
|
+
if ABC not in cls.__bases__: # 跳过抽象类
|
|
43
|
+
BaseParser._registry.append(cls)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def get_all_subclass(cls) -> list[type["BaseParser"]]:
|
|
47
|
+
"""获取所有已注册的 Parser 类"""
|
|
48
|
+
return cls._registry
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
async def parse(self, matched: re.Match[str]) -> ParseResult:
|
|
52
|
+
"""解析 URL 获取内容信息并下载资源
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
matched: 正则表达式匹配对象,由平台对应的模式匹配得到
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
ParseResult: 解析结果(已下载资源,包含 Path)
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ParseException: 解析失败时抛出
|
|
62
|
+
"""
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def search_url(cls, url: str) -> re.Match[str]:
|
|
67
|
+
from nonebot import logger
|
|
68
|
+
|
|
69
|
+
"""搜索 URL 匹配模式"""
|
|
70
|
+
for keyword, pattern in cls.patterns:
|
|
71
|
+
if keyword not in url:
|
|
72
|
+
continue
|
|
73
|
+
if searched := re.search(pattern, url):
|
|
74
|
+
return searched
|
|
75
|
+
logger.debug(f"keyword '{keyword}' is in '{url}', but not matched")
|
|
76
|
+
raise ValueError(f"无法匹配 {url}")
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def result(cls, **kwargs: Unpack[ParseResultKwargs]) -> ParseResult:
|
|
80
|
+
"""构建解析结果"""
|
|
81
|
+
return ParseResult(platform=cls.platform, **kwargs)
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
async def get_redirect_url(url: str, headers: dict[str, str] | None = None) -> str:
|
|
85
|
+
"""获取重定向后的URL"""
|
|
86
|
+
from httpx import AsyncClient
|
|
87
|
+
|
|
88
|
+
headers = headers or COMMON_HEADER.copy()
|
|
89
|
+
async with AsyncClient(headers=headers, verify=False, follow_redirects=False, timeout=COMMON_TIMEOUT) as client:
|
|
90
|
+
response = await client.get(url)
|
|
91
|
+
if response.status_code >= 400:
|
|
92
|
+
response.raise_for_status()
|
|
93
|
+
return response.headers.get("Location", url)
|
|
94
|
+
|
|
95
|
+
def create_author(self, name: str, avatar_url: str | None = None, description: str | None = None):
|
|
96
|
+
"""创建作者对象"""
|
|
97
|
+
from ..download import DOWNLOADER
|
|
98
|
+
from .data import Author
|
|
99
|
+
|
|
100
|
+
avatar_task = None
|
|
101
|
+
if avatar_url:
|
|
102
|
+
avatar_task = DOWNLOADER.download_img(avatar_url, ext_headers=self.headers)
|
|
103
|
+
return Author(name=name, avatar=avatar_task, description=description)
|
|
104
|
+
|
|
105
|
+
def create_video_content(self, url_or_task: str | Task[Path], cover_url: str | None = None, duration: float = 0.0):
|
|
106
|
+
"""创建视频内容"""
|
|
107
|
+
from ..download import DOWNLOADER
|
|
108
|
+
from .data import VideoContent
|
|
109
|
+
|
|
110
|
+
cover_task = None
|
|
111
|
+
if cover_url:
|
|
112
|
+
cover_task = DOWNLOADER.download_img(cover_url, ext_headers=self.headers)
|
|
113
|
+
if isinstance(url_or_task, str):
|
|
114
|
+
video_task = DOWNLOADER.download_video(url_or_task, ext_headers=self.headers)
|
|
115
|
+
else:
|
|
116
|
+
video_task = url_or_task
|
|
117
|
+
return VideoContent(video_task, cover_task, duration)
|
|
118
|
+
|
|
119
|
+
def create_image_contents(self, image_urls: Sequence[str]):
|
|
120
|
+
"""创建图片内容列表"""
|
|
121
|
+
from ..download import DOWNLOADER
|
|
122
|
+
from .data import ImageContent
|
|
123
|
+
|
|
124
|
+
img_tasks = [DOWNLOADER.download_img(url, ext_headers=self.headers) for url in image_urls]
|
|
125
|
+
return [ImageContent(task) for task in img_tasks]
|
|
126
|
+
|
|
127
|
+
def create_dynamic_contents(self, dynamic_urls: Sequence[str]):
|
|
128
|
+
"""创建动态内容列表"""
|
|
129
|
+
from ..download import DOWNLOADER
|
|
130
|
+
from .data import DynamicContent
|
|
131
|
+
|
|
132
|
+
dynamic_tasks = [DOWNLOADER.download_video(url, ext_headers=self.headers) for url in dynamic_urls]
|
|
133
|
+
return [DynamicContent(task) for task in dynamic_tasks]
|
|
134
|
+
|
|
135
|
+
def create_audio_content(self, url_or_task: str | Task[Path], duration: float = 0.0):
|
|
136
|
+
"""创建音频内容"""
|
|
137
|
+
from ..download import DOWNLOADER
|
|
138
|
+
from .data import AudioContent
|
|
139
|
+
|
|
140
|
+
if isinstance(url_or_task, str):
|
|
141
|
+
audio_task = DOWNLOADER.download_audio(url_or_task, ext_headers=self.headers)
|
|
142
|
+
else:
|
|
143
|
+
audio_task = url_or_task
|
|
144
|
+
return AudioContent(audio_task, duration)
|