nonebot-plugin-parser 2.0.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/PKG-INFO +4 -3
  2. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/README.md +3 -2
  3. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/pyproject.toml +2 -2
  4. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/config.py +7 -0
  5. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/data.py +2 -4
  6. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/video.py +5 -5
  7. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/kuaishou.py +1 -1
  8. nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/parsers/twitter.py +121 -0
  9. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/weibo.py +11 -3
  10. nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/renders/common.py +973 -0
  11. nonebot_plugin_parser-2.0.0/src/nonebot_plugin_parser/parsers/twitter.py +0 -109
  12. nonebot_plugin_parser-2.0.0/src/nonebot_plugin_parser/renders/common.py +0 -404
  13. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/__init__.py +0 -0
  14. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/constants.py +0 -0
  15. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/__init__.py +0 -0
  16. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/task.py +0 -0
  17. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
  18. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/exception.py +0 -0
  19. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/helper.py +0 -0
  20. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
  21. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
  22. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -0
  23. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
  24. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/acfun.py +0 -0
  25. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/base.py +0 -0
  26. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +0 -0
  27. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
  28. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
  29. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
  30. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +0 -0
  31. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/douyin/slides.py +0 -0
  32. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
  33. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
  34. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/xiaohongshu.py +0 -0
  35. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/parsers/youtube.py +0 -0
  36. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
  37. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/base.py +0 -0
  38. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/default.py +0 -0
  39. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/fonts/HYSongYunLangHeiW-1.ttf +0 -0
  40. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
  41. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
  42. {nonebot_plugin_parser-2.0.0 → nonebot_plugin_parser-2.0.1}/src/nonebot_plugin_parser/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nonebot-plugin-parser
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun
5
5
  Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
6
6
  Author: fllesser
@@ -150,9 +150,10 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
150
150
  | parser_need_upload | 否 | False | 音频解析,是否需要上传群文件 |
151
151
  | parser_use_base64 | 否 | False | 视频,图片,音频是否使用 base64 发送,注意:编解码和传输 base64 会占用更多的内存,性能和带宽, 甚至可能会使 websocket 连接崩溃,因此该配置项仅推荐 nonebot 和 协议端不在同一机器的用户配置 |
152
152
  | parser_duration_maximum | 否 | 480 | 视频最大解析时长,单位:_秒_ |
153
- | parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
153
+ | parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
154
154
  | parser_disabled_platforms | 否 | [] | 全局禁止的解析,示例 parser_disabled_platforms=["bilibili", "douyin"] 表示禁止了哔哩哔哩和抖, 请根据自己需求填写["bilibili", "douyin", "kuaishou", "twitter", "youtube", "acfun", "tiktok", "weibo", "xiaohongshu"] |
155
- | parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
155
+ | parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
156
+ | parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
156
157
 
157
158
  ## 🎉 使用
158
159
  ### 指令表
@@ -120,9 +120,10 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
120
120
  | parser_need_upload | 否 | False | 音频解析,是否需要上传群文件 |
121
121
  | parser_use_base64 | 否 | False | 视频,图片,音频是否使用 base64 发送,注意:编解码和传输 base64 会占用更多的内存,性能和带宽, 甚至可能会使 websocket 连接崩溃,因此该配置项仅推荐 nonebot 和 协议端不在同一机器的用户配置 |
122
122
  | parser_duration_maximum | 否 | 480 | 视频最大解析时长,单位:_秒_ |
123
- | parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
123
+ | parser_max_size | 否 | 90 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 |
124
124
  | parser_disabled_platforms | 否 | [] | 全局禁止的解析,示例 parser_disabled_platforms=["bilibili", "douyin"] 表示禁止了哔哩哔哩和抖, 请根据自己需求填写["bilibili", "douyin", "kuaishou", "twitter", "youtube", "acfun", "tiktok", "weibo", "xiaohongshu"] |
125
- | parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
125
+ | parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit) |
126
+ | parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
126
127
 
127
128
  ## 🎉 使用
128
129
  ### 指令表
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nonebot-plugin-parser"
3
- version = "2.0.0"
3
+ version = "2.0.1"
4
4
  description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun"
5
5
  authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
6
6
  readme = "README.md"
@@ -185,7 +185,7 @@ build-backend = "uv_build"
185
185
 
186
186
 
187
187
  [tool.bumpversion]
188
- current_version = "2.0.0"
188
+ current_version = "2.0.1"
189
189
  commit = true
190
190
  message = "🔖 release: bump vesion from {current_version} to {new_version}"
191
191
  tag = true
@@ -43,6 +43,8 @@ class Config(BaseModel):
43
43
  """资源最大大小 默认 100 单位 MB"""
44
44
  parser_duration_maximum: int = 480
45
45
  """视频/音频最大时长"""
46
+ parser_append_url: bool = False
47
+ """是否在解析结果中附加原始URL"""
46
48
  parser_disabled_platforms: list[PlatformNames] = []
47
49
  """禁止的解析器"""
48
50
  parser_bili_video_codes: list[VideoCodecs] = [VideoCodecs.AVC, VideoCodecs.AV1, VideoCodecs.HEV]
@@ -120,6 +122,11 @@ class Config(BaseModel):
120
122
  """是否使用 base64 编码发送图片,音频,视频"""
121
123
  return self.parser_use_base64
122
124
 
125
+ @property
126
+ def append_url(self) -> bool:
127
+ """是否在解析结果中附加原始URL"""
128
+ return self.parser_append_url
129
+
123
130
 
124
131
  pconfig: Config = get_plugin_config(Config)
125
132
  """配置"""
@@ -195,8 +195,6 @@ class ParseResult:
195
195
  for cont in self.contents:
196
196
  if isinstance(cont, VideoContent):
197
197
  return await cont.get_cover_path()
198
- if isinstance(cont, ImageContent):
199
- return await cont.get_path()
200
198
  return None
201
199
 
202
200
  async def contents_to_segs(self):
@@ -261,7 +259,7 @@ class ParseData:
261
259
  url: str | None = None
262
260
  video_url: str | None = None
263
261
  cover_url: str | None = None
264
- images_urls: list[str] | None = None
265
- dynamic_urls: list[str] | None = None
262
+ images_urls: list[str] = field(default_factory=list)
263
+ dynamic_urls: list[str] = field(default_factory=list)
266
264
  extra: dict[str, Any] = field(default_factory=dict)
267
265
  repost: "ParseData | None" = None
@@ -43,8 +43,8 @@ class VideoData(Struct):
43
43
  video: Video | None = None
44
44
 
45
45
  @property
46
- def images_urls(self) -> list[str] | None:
47
- return [image.url_list[0] for image in self.images] if self.images else None
46
+ def images_urls(self) -> list[str]:
47
+ return [image.url_list[0] for image in self.images] if self.images else []
48
48
 
49
49
  @property
50
50
  def video_url(self) -> str | None:
@@ -65,14 +65,14 @@ class VideoData(Struct):
65
65
  @property
66
66
  def parse_data(self) -> ParseData:
67
67
  """转换为ParseData对象"""
68
-
68
+ images_urls = self.images_urls
69
69
  return ParseData(
70
70
  title=self.desc,
71
71
  name=self.author.nickname,
72
72
  avatar_url=self.avatar_url,
73
73
  timestamp=self.create_time,
74
- images_urls=self.images_urls,
75
- video_url=self.video_url if self.images_urls is None else None,
74
+ images_urls=images_urls,
75
+ video_url=self.video_url if len(images_urls) == 0 else None,
76
76
  cover_url=self.cover_url,
77
77
  )
78
78
 
@@ -86,7 +86,7 @@ class Atlas(Struct):
86
86
  @property
87
87
  def img_urls(self):
88
88
  if len(self.cdn_list) == 0 or len(self.img_route_list) == 0:
89
- return None
89
+ return []
90
90
  cdn = random.choice(self.cdn_list).cdn
91
91
  return [f"https://{cdn}/{url}" for url in self.img_route_list]
92
92
 
@@ -0,0 +1,121 @@
1
+ import re
2
+ from typing import Any, ClassVar
3
+
4
+ import httpx
5
+
6
+ from ..exception import ParseException
7
+ from .base import BaseParser
8
+ from .data import ParseResult, Platform
9
+
10
+
11
+ class TwitterParser(BaseParser):
12
+ # 平台信息
13
+ platform: ClassVar[Platform] = Platform(name="twitter", display_name="小蓝鸟")
14
+
15
+ # URL 正则表达式模式(keyword, pattern)
16
+ patterns: ClassVar[list[tuple[str, str]]] = [
17
+ ("x.com", r"https?://x.com/[0-9-a-zA-Z_]{1,20}/status/([0-9]+)"),
18
+ ]
19
+
20
+ async def _req_xdown_api(self, url: str) -> dict[str, Any]:
21
+ headers = {
22
+ "Accept": "application/json, text/plain, */*",
23
+ "Content-Type": "application/x-www-form-urlencoded",
24
+ "Origin": "https://xdown.app",
25
+ "Referer": "https://xdown.app/",
26
+ **self.headers,
27
+ }
28
+ data = {"q": url, "lang": "zh-cn"}
29
+ async with httpx.AsyncClient(headers=headers, timeout=self.timeout) as client:
30
+ url = "https://xdown.app/api/ajaxSearch"
31
+ response = await client.post(url, data=data)
32
+ return response.json()
33
+
34
+ async def parse(self, matched: re.Match[str]) -> ParseResult:
35
+ """解析 URL 获取内容信息并下载资源
36
+
37
+ Args:
38
+ matched: 正则表达式匹配对象,由平台对应的模式匹配得到
39
+
40
+ Returns:
41
+ ParseResult: 解析结果(已下载资源,包含 Path)
42
+
43
+ Raises:
44
+ ParseException: 解析失败时抛出
45
+ """
46
+ # 从匹配对象中获取原始URL
47
+ url = matched.group(0)
48
+ resp = await self._req_xdown_api(url)
49
+ if resp.get("status") != "ok":
50
+ raise ParseException("解析失败")
51
+
52
+ html_content = resp.get("data")
53
+
54
+ if html_content is None:
55
+ raise ParseException("解析失败, 数据为空")
56
+
57
+ data = self.parse_twitter_html(html_content)
58
+
59
+ return self.build_result(data)
60
+
61
+ @classmethod
62
+ def parse_twitter_html(cls, html_content: str):
63
+ """解析 Twitter HTML 内容
64
+
65
+ Args:
66
+ html_content (str): Twitter HTML 内容
67
+
68
+ Returns:
69
+ ParseData: 解析数据
70
+ """
71
+ from bs4 import BeautifulSoup, Tag
72
+
73
+ from .data import ParseData
74
+
75
+ soup = BeautifulSoup(html_content, "html.parser")
76
+ data = ParseData()
77
+
78
+ # 1. 提取缩略图链接
79
+ img_tag = soup.find("img")
80
+ if img_tag and isinstance(img_tag, Tag):
81
+ src = img_tag.get("src")
82
+ if src and isinstance(src, str):
83
+ data.cover_url = src
84
+
85
+ # 2. 提取下载链接
86
+ download_links = soup.find_all("a", class_="tw-button-dl")
87
+ # class="abutton is-success is-fullwidth btn-premium mt-3"
88
+ download_items = soup.find_all("a", class_="abutton")
89
+ for link in download_links + download_items:
90
+ if isinstance(link, Tag) and (href := link.get("href")) and isinstance(href, str):
91
+ href = href
92
+ else:
93
+ continue
94
+ text = link.get_text(strip=True)
95
+
96
+ if "下载图片" in text:
97
+ # 从图片下载链接中提取原始图片URL
98
+ data.images_urls.append(href)
99
+ elif "下载 gif" in text:
100
+ data.dynamic_urls.append(href) # GIF和MP4是同一个文件
101
+ elif "下载 MP4" in text:
102
+ # 从GIF/MP4下载链接中提取原始视频URL
103
+ data.video_url = href
104
+ break
105
+
106
+ # 3. 提取标题
107
+ title_tag = soup.find("h3")
108
+ if title_tag:
109
+ data.title = title_tag.get_text(strip=True)
110
+
111
+ # # 4. 提取Twitter ID
112
+ # twitter_id_input = soup.find("input", {"id": "TwitterId"})
113
+ # if (
114
+ # twitter_id_input
115
+ # and isinstance(twitter_id_input, Tag)
116
+ # and (value := twitter_id_input.get("value"))
117
+ # and isinstance(value, str)
118
+ # ):
119
+ data.name = "暂时无法获取用户名"
120
+
121
+ return data
@@ -175,7 +175,6 @@ class WeiBoParser(BaseParser):
175
175
 
176
176
  # 用 bytes 更稳,避免编码歧义
177
177
  weibo_data = msgspec.json.decode(response.content, type=WeiboResponse).data
178
- url = f"https://weibo.com/{weibo_data.user.id}/{weibo_data.bid}"
179
178
  return self.build_result(weibo_data.parse_data)
180
179
 
181
180
  def _base62_encode(self, number: int) -> str:
@@ -269,7 +268,7 @@ class WeiboData(Struct):
269
268
 
270
269
  @property
271
270
  def title(self) -> str:
272
- return self.status_title or self.page_info.title if self.page_info else ""
271
+ return self.page_info.title if self.page_info else ""
273
272
 
274
273
  @property
275
274
  def display_name(self) -> str:
@@ -303,6 +302,14 @@ class WeiboData(Struct):
303
302
  return [x.large.url for x in self.pics]
304
303
  return []
305
304
 
305
+ @property
306
+ def url(self) -> str:
307
+ return f"https://weibo.com/{self.user.id}/{self.bid}"
308
+
309
+ @property
310
+ def timestamp(self) -> int:
311
+ return int(time.mktime(time.strptime(self.created_at, "%a %b %d %H:%M:%S %z %Y")))
312
+
306
313
  @property
307
314
  def parse_data(self) -> ParseData:
308
315
  return ParseData(
@@ -310,10 +317,11 @@ class WeiboData(Struct):
310
317
  name=self.display_name,
311
318
  avatar_url=self.user.profile_image_url,
312
319
  text=self.text_content,
313
- timestamp=int(time.mktime(time.strptime(self.created_at, "%a %b %d %H:%M:%S %z %Y"))),
320
+ timestamp=self.timestamp,
314
321
  video_url=self.video_url,
315
322
  cover_url=self.cover_url,
316
323
  images_urls=self.pic_urls,
324
+ url=self.url,
317
325
  repost=self.retweeted_status.parse_data if self.retweeted_status else None,
318
326
  )
319
327