nonebot-plugin-parser 2.0.0__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/PKG-INFO +4 -3
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/README.md +3 -2
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/pyproject.toml +2 -2
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/config.py +7 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/data.py +2 -4
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/video.py +5 -5
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/kuaishou.py +1 -1
- nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/parsers/twitter.py +121 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/weibo.py +11 -3
- nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/renders/common.py +973 -0
- nonebot_plugin_parser-2.0.0/src/nonebot_plugin_parser/parsers/twitter.py +0 -109
- nonebot_plugin_parser-2.0.0/src/nonebot_plugin_parser/renders/common.py +0 -404
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/constants.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/task.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/exception.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/helper.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/acfun.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/base.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/slides.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/xiaohongshu.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/youtube.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/base.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/default.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/fonts/HYSongYunLangHeiW-1.ttf +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
- {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: nonebot-plugin-parser
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun
|
|
5
5
|
Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
|
|
6
6
|
Author: fllesser
|
|
@@ -150,9 +150,10 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
150
150
|
| parser_need_upload | 否 | False | 音频解析,是否需要上传群文件 |
|
|
151
151
|
| parser_use_base64 | 否 | False | 视频,图片,音频是否使用 base64 发送,注意:编解码和传输 base64 会占用更多的内存,性能和带宽, 甚至可能会使 websocket 连接崩溃,因此该配置项仅推荐 nonebot 和 协议端不在同一机器的用户配置 |
|
|
152
152
|
| parser_duration_maximum | 否 | 480 | 视频最大解析时长,单位:_秒_ |
|
|
153
|
-
| parser_max_size | 否 |
|
|
153
|
+
| parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
|
|
154
154
|
| parser_disabled_platforms | 否 | [] | 全局禁止的解析,示例 parser_disabled_platforms=["bilibili", "douyin"] 表示禁止了哔哩哔哩和抖, 请根据自己需求填写["bilibili", "douyin", "kuaishou", "twitter", "youtube", "acfun", "tiktok", "weibo", "xiaohongshu"] |
|
|
155
|
-
|
|
|
155
|
+
| parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
|
|
156
|
+
| parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
|
|
156
157
|
|
|
157
158
|
## 🎉 使用
|
|
158
159
|
### 指令表
|
|
@@ -120,9 +120,10 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
120
120
|
| parser_need_upload | 否 | False | 音频解析,是否需要上传群文件 |
|
|
121
121
|
| parser_use_base64 | 否 | False | 视频,图片,音频是否使用 base64 发送,注意:编解码和传输 base64 会占用更多的内存,性能和带宽, 甚至可能会使 websocket 连接崩溃,因此该配置项仅推荐 nonebot 和 协议端不在同一机器的用户配置 |
|
|
122
122
|
| parser_duration_maximum | 否 | 480 | 视频最大解析时长,单位:_秒_ |
|
|
123
|
-
| parser_max_size | 否 |
|
|
123
|
+
| parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
|
|
124
124
|
| parser_disabled_platforms | 否 | [] | 全局禁止的解析,示例 parser_disabled_platforms=["bilibili", "douyin"] 表示禁止了哔哩哔哩和抖, 请根据自己需求填写["bilibili", "douyin", "kuaishou", "twitter", "youtube", "acfun", "tiktok", "weibo", "xiaohongshu"] |
|
|
125
|
-
|
|
|
125
|
+
| parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
|
|
126
|
+
| parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
|
|
126
127
|
|
|
127
128
|
## 🎉 使用
|
|
128
129
|
### 指令表
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nonebot-plugin-parser"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.1"
|
|
4
4
|
description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun"
|
|
5
5
|
authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
|
|
6
6
|
readme = "README.md"
|
|
@@ -185,7 +185,7 @@ build-backend = "uv_build"
|
|
|
185
185
|
|
|
186
186
|
|
|
187
187
|
[tool.bumpversion]
|
|
188
|
-
current_version = "2.0.
|
|
188
|
+
current_version = "2.0.1"
|
|
189
189
|
commit = true
|
|
190
190
|
message = "🔖 release: bump vesion from {current_version} to {new_version}"
|
|
191
191
|
tag = true
|
{nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/config.py
RENAMED
|
@@ -43,6 +43,8 @@ class Config(BaseModel):
|
|
|
43
43
|
"""资源最大大小 默认 100 单位 MB"""
|
|
44
44
|
parser_duration_maximum: int = 480
|
|
45
45
|
"""视频/音频最大时长"""
|
|
46
|
+
parser_append_url: bool = False
|
|
47
|
+
"""是否在解析结果中附加原始URL"""
|
|
46
48
|
parser_disabled_platforms: list[PlatformNames] = []
|
|
47
49
|
"""禁止的解析器"""
|
|
48
50
|
parser_bili_video_codes: list[VideoCodecs] = [VideoCodecs.AVC, VideoCodecs.AV1, VideoCodecs.HEV]
|
|
@@ -120,6 +122,11 @@ class Config(BaseModel):
|
|
|
120
122
|
"""是否使用 base64 编码发送图片,音频,视频"""
|
|
121
123
|
return self.parser_use_base64
|
|
122
124
|
|
|
125
|
+
@property
|
|
126
|
+
def append_url(self) -> bool:
|
|
127
|
+
"""是否在解析结果中附加原始URL"""
|
|
128
|
+
return self.parser_append_url
|
|
129
|
+
|
|
123
130
|
|
|
124
131
|
pconfig: Config = get_plugin_config(Config)
|
|
125
132
|
"""配置"""
|
|
@@ -195,8 +195,6 @@ class ParseResult:
|
|
|
195
195
|
for cont in self.contents:
|
|
196
196
|
if isinstance(cont, VideoContent):
|
|
197
197
|
return await cont.get_cover_path()
|
|
198
|
-
if isinstance(cont, ImageContent):
|
|
199
|
-
return await cont.get_path()
|
|
200
198
|
return None
|
|
201
199
|
|
|
202
200
|
async def contents_to_segs(self):
|
|
@@ -261,7 +259,7 @@ class ParseData:
|
|
|
261
259
|
url: str | None = None
|
|
262
260
|
video_url: str | None = None
|
|
263
261
|
cover_url: str | None = None
|
|
264
|
-
images_urls: list[str]
|
|
265
|
-
dynamic_urls: list[str]
|
|
262
|
+
images_urls: list[str] = field(default_factory=list)
|
|
263
|
+
dynamic_urls: list[str] = field(default_factory=list)
|
|
266
264
|
extra: dict[str, Any] = field(default_factory=dict)
|
|
267
265
|
repost: "ParseData | None" = None
|
|
@@ -43,8 +43,8 @@ class VideoData(Struct):
|
|
|
43
43
|
video: Video | None = None
|
|
44
44
|
|
|
45
45
|
@property
|
|
46
|
-
def images_urls(self) -> list[str]
|
|
47
|
-
return [image.url_list[0] for image in self.images] if self.images else
|
|
46
|
+
def images_urls(self) -> list[str]:
|
|
47
|
+
return [image.url_list[0] for image in self.images] if self.images else []
|
|
48
48
|
|
|
49
49
|
@property
|
|
50
50
|
def video_url(self) -> str | None:
|
|
@@ -65,14 +65,14 @@ class VideoData(Struct):
|
|
|
65
65
|
@property
|
|
66
66
|
def parse_data(self) -> ParseData:
|
|
67
67
|
"""转换为ParseData对象"""
|
|
68
|
-
|
|
68
|
+
images_urls = self.images_urls
|
|
69
69
|
return ParseData(
|
|
70
70
|
title=self.desc,
|
|
71
71
|
name=self.author.nickname,
|
|
72
72
|
avatar_url=self.avatar_url,
|
|
73
73
|
timestamp=self.create_time,
|
|
74
|
-
images_urls=
|
|
75
|
-
video_url=self.video_url if
|
|
74
|
+
images_urls=images_urls,
|
|
75
|
+
video_url=self.video_url if len(images_urls) == 0 else None,
|
|
76
76
|
cover_url=self.cover_url,
|
|
77
77
|
)
|
|
78
78
|
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, ClassVar
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
from ..exception import ParseException
|
|
7
|
+
from .base import BaseParser
|
|
8
|
+
from .data import ParseResult, Platform
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TwitterParser(BaseParser):
|
|
12
|
+
# 平台信息
|
|
13
|
+
platform: ClassVar[Platform] = Platform(name="twitter", display_name="小蓝鸟")
|
|
14
|
+
|
|
15
|
+
# URL 正则表达式模式(keyword, pattern)
|
|
16
|
+
patterns: ClassVar[list[tuple[str, str]]] = [
|
|
17
|
+
("x.com", r"https?://x.com/[0-9-a-zA-Z_]{1,20}/status/([0-9]+)"),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
async def _req_xdown_api(self, url: str) -> dict[str, Any]:
|
|
21
|
+
headers = {
|
|
22
|
+
"Accept": "application/json, text/plain, */*",
|
|
23
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
24
|
+
"Origin": "https://xdown.app",
|
|
25
|
+
"Referer": "https://xdown.app/",
|
|
26
|
+
**self.headers,
|
|
27
|
+
}
|
|
28
|
+
data = {"q": url, "lang": "zh-cn"}
|
|
29
|
+
async with httpx.AsyncClient(headers=headers, timeout=self.timeout) as client:
|
|
30
|
+
url = "https://xdown.app/api/ajaxSearch"
|
|
31
|
+
response = await client.post(url, data=data)
|
|
32
|
+
return response.json()
|
|
33
|
+
|
|
34
|
+
async def parse(self, matched: re.Match[str]) -> ParseResult:
|
|
35
|
+
"""解析 URL 获取内容信息并下载资源
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
matched: 正则表达式匹配对象,由平台对应的模式匹配得到
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
ParseResult: 解析结果(已下载资源,包含 Path)
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ParseException: 解析失败时抛出
|
|
45
|
+
"""
|
|
46
|
+
# 从匹配对象中获取原始URL
|
|
47
|
+
url = matched.group(0)
|
|
48
|
+
resp = await self._req_xdown_api(url)
|
|
49
|
+
if resp.get("status") != "ok":
|
|
50
|
+
raise ParseException("解析失败")
|
|
51
|
+
|
|
52
|
+
html_content = resp.get("data")
|
|
53
|
+
|
|
54
|
+
if html_content is None:
|
|
55
|
+
raise ParseException("解析失败, 数据为空")
|
|
56
|
+
|
|
57
|
+
data = self.parse_twitter_html(html_content)
|
|
58
|
+
|
|
59
|
+
return self.build_result(data)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def parse_twitter_html(cls, html_content: str):
|
|
63
|
+
"""解析 Twitter HTML 内容
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
html_content (str): Twitter HTML 内容
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
ParseData: 解析数据
|
|
70
|
+
"""
|
|
71
|
+
from bs4 import BeautifulSoup, Tag
|
|
72
|
+
|
|
73
|
+
from .data import ParseData
|
|
74
|
+
|
|
75
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
76
|
+
data = ParseData()
|
|
77
|
+
|
|
78
|
+
# 1. 提取缩略图链接
|
|
79
|
+
img_tag = soup.find("img")
|
|
80
|
+
if img_tag and isinstance(img_tag, Tag):
|
|
81
|
+
src = img_tag.get("src")
|
|
82
|
+
if src and isinstance(src, str):
|
|
83
|
+
data.cover_url = src
|
|
84
|
+
|
|
85
|
+
# 2. 提取下载链接
|
|
86
|
+
download_links = soup.find_all("a", class_="tw-button-dl")
|
|
87
|
+
# class="abutton is-success is-fullwidth btn-premium mt-3"
|
|
88
|
+
download_items = soup.find_all("a", class_="abutton")
|
|
89
|
+
for link in download_links + download_items:
|
|
90
|
+
if isinstance(link, Tag) and (href := link.get("href")) and isinstance(href, str):
|
|
91
|
+
href = href
|
|
92
|
+
else:
|
|
93
|
+
continue
|
|
94
|
+
text = link.get_text(strip=True)
|
|
95
|
+
|
|
96
|
+
if "下载图片" in text:
|
|
97
|
+
# 从图片下载链接中提取原始图片URL
|
|
98
|
+
data.images_urls.append(href)
|
|
99
|
+
elif "下载 gif" in text:
|
|
100
|
+
data.dynamic_urls.append(href) # GIF和MP4是同一个文件
|
|
101
|
+
elif "下载 MP4" in text:
|
|
102
|
+
# 从GIF/MP4下载链接中提取原始视频URL
|
|
103
|
+
data.video_url = href
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
# 3. 提取标题
|
|
107
|
+
title_tag = soup.find("h3")
|
|
108
|
+
if title_tag:
|
|
109
|
+
data.title = title_tag.get_text(strip=True)
|
|
110
|
+
|
|
111
|
+
# # 4. 提取Twitter ID
|
|
112
|
+
# twitter_id_input = soup.find("input", {"id": "TwitterId"})
|
|
113
|
+
# if (
|
|
114
|
+
# twitter_id_input
|
|
115
|
+
# and isinstance(twitter_id_input, Tag)
|
|
116
|
+
# and (value := twitter_id_input.get("value"))
|
|
117
|
+
# and isinstance(value, str)
|
|
118
|
+
# ):
|
|
119
|
+
data.name = "暂时无法获取用户名"
|
|
120
|
+
|
|
121
|
+
return data
|
|
@@ -175,7 +175,6 @@ class WeiBoParser(BaseParser):
|
|
|
175
175
|
|
|
176
176
|
# 用 bytes 更稳,避免编码歧义
|
|
177
177
|
weibo_data = msgspec.json.decode(response.content, type=WeiboResponse).data
|
|
178
|
-
url = f"https://weibo.com/{weibo_data.user.id}/{weibo_data.bid}"
|
|
179
178
|
return self.build_result(weibo_data.parse_data)
|
|
180
179
|
|
|
181
180
|
def _base62_encode(self, number: int) -> str:
|
|
@@ -269,7 +268,7 @@ class WeiboData(Struct):
|
|
|
269
268
|
|
|
270
269
|
@property
|
|
271
270
|
def title(self) -> str:
|
|
272
|
-
return self.
|
|
271
|
+
return self.page_info.title if self.page_info else ""
|
|
273
272
|
|
|
274
273
|
@property
|
|
275
274
|
def display_name(self) -> str:
|
|
@@ -303,6 +302,14 @@ class WeiboData(Struct):
|
|
|
303
302
|
return [x.large.url for x in self.pics]
|
|
304
303
|
return []
|
|
305
304
|
|
|
305
|
+
@property
|
|
306
|
+
def url(self) -> str:
|
|
307
|
+
return f"https://weibo.com/{self.user.id}/{self.bid}"
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def timestamp(self) -> int:
|
|
311
|
+
return int(time.mktime(time.strptime(self.created_at, "%a %b %d %H:%M:%S %z %Y")))
|
|
312
|
+
|
|
306
313
|
@property
|
|
307
314
|
def parse_data(self) -> ParseData:
|
|
308
315
|
return ParseData(
|
|
@@ -310,10 +317,11 @@ class WeiboData(Struct):
|
|
|
310
317
|
name=self.display_name,
|
|
311
318
|
avatar_url=self.user.profile_image_url,
|
|
312
319
|
text=self.text_content,
|
|
313
|
-
timestamp=
|
|
320
|
+
timestamp=self.timestamp,
|
|
314
321
|
video_url=self.video_url,
|
|
315
322
|
cover_url=self.cover_url,
|
|
316
323
|
images_urls=self.pic_urls,
|
|
324
|
+
url=self.url,
|
|
317
325
|
repost=self.retweeted_status.parse_data if self.retweeted_status else None,
|
|
318
326
|
)
|
|
319
327
|
|