nonebot-plugin-parser 2.3.4__tar.gz → 2.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/PKG-INFO +22 -11
  2. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/README.md +19 -8
  3. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/pyproject.toml +4 -4
  4. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/matchers/rule.py +40 -31
  5. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/acfun/__init__.py +151 -0
  6. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/acfun/video.py +77 -0
  7. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/base.py +1 -0
  8. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +5 -47
  9. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/dynamic.py +1 -1
  10. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/data.py +2 -2
  11. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +6 -7
  12. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/douyin/slides.py +4 -1
  13. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/douyin/video.py +4 -1
  14. nonebot_plugin_parser-2.3.4/src/nonebot_plugin_parser/parsers/kuaishou.py → nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py +8 -71
  15. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/kuaishou/states.py +62 -0
  16. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/tiktok.py +2 -2
  17. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/twitter.py +3 -2
  18. nonebot_plugin_parser-2.3.4/src/nonebot_plugin_parser/parsers/weibo.py → nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/weibo/__init__.py +9 -131
  19. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/weibo/article.py +23 -0
  20. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/weibo/common.py +110 -0
  21. nonebot_plugin_parser-2.3.4/src/nonebot_plugin_parser/parsers/xiaohongshu.py → nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py +33 -133
  22. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/xiaohongshu/common.py +33 -0
  23. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/xiaohongshu/discovery.py +61 -0
  24. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/xiaohongshu/explore.py +61 -0
  25. nonebot_plugin_parser-2.3.4/src/nonebot_plugin_parser/parsers/youtube.py → nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/youtube/__init__.py +12 -57
  26. nonebot_plugin_parser-2.3.6/src/nonebot_plugin_parser/parsers/youtube/meta.py +43 -0
  27. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/common.py +4 -2
  28. nonebot_plugin_parser-2.3.4/src/nonebot_plugin_parser/parsers/acfun.py +0 -151
  29. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/__init__.py +0 -0
  30. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/config.py +0 -0
  31. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/constants.py +0 -0
  32. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/download/__init__.py +0 -0
  33. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/download/task.py +0 -0
  34. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
  35. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/exception.py +0 -0
  36. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/helper.py +0 -0
  37. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
  38. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
  39. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
  40. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/article.py +0 -0
  41. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/common.py +0 -0
  42. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/favlist.py +0 -0
  43. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/live.py +0 -0
  44. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
  45. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
  46. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
  47. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
  48. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
  49. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/base.py +0 -0
  50. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/default.py +0 -0
  51. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/htmlrender.py +0 -0
  52. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf +0 -0
  53. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/bilibili.png +0 -0
  54. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/douyin.png +0 -0
  55. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/kuaishou.png +0 -0
  56. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/media_button.png +0 -0
  57. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/tiktok.png +0 -0
  58. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/twitter.png +0 -0
  59. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/weibo.png +0 -0
  60. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/xiaohongshu.png +0 -0
  61. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/resources/youtube.png +0 -0
  62. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/templates/card.html.jinja +0 -0
  63. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
  64. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
  65. {nonebot_plugin_parser-2.3.4 → nonebot_plugin_parser-2.3.6}/src/nonebot_plugin_parser/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nonebot-plugin-parser
3
- Version: 2.3.4
3
+ Version: 2.3.6
4
4
  Summary: NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga
5
5
  Keywords: acfun,bilibili,douyin,kuaishou,nga,nonebot,nonebot2,tiktok,twitter,video,weibo,xiaohongshu,youtube
6
6
  Author: fllesser
@@ -23,12 +23,12 @@ Requires-Dist: nonebot2>=2.4.3,<3.0.0
23
23
  Requires-Dist: pillow>=11.0.0
24
24
  Requires-Dist: tqdm>=4.67.1,<5.0.0
25
25
  Requires-Dist: aiofiles>=25.1.0
26
- Requires-Dist: curl-cffi>=0.13.0,<1.0.0
27
26
  Requires-Dist: httpx>=0.27.2,<1.0.0
28
27
  Requires-Dist: msgspec>=0.20.0,<1.0.0
29
28
  Requires-Dist: apilmoji[tqdm]>=0.2.4,<1.0.0
30
29
  Requires-Dist: beautifulsoup4>=4.12.0,<5.0.0
31
- Requires-Dist: bilibili-api-python>=17.4.0,<18.0.0
30
+ Requires-Dist: curl-cffi>=0.13.0,!=0.14.0,<1.0.0
31
+ Requires-Dist: bilibili-api-python>=17.4.1,<18.0.0
32
32
  Requires-Dist: nonebot-plugin-alconna>=0.59.4,<1.0.0
33
33
  Requires-Dist: nonebot-plugin-apscheduler>=0.5.0,<1.0.0
34
34
  Requires-Dist: nonebot-plugin-localstore>=0.7.4,<1.0.0
@@ -71,8 +71,10 @@ Description-Content-Type: text/markdown
71
71
 
72
72
  </div>
73
73
 
74
- > [!IMPORTANT] > **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~ ⭐️
75
- > <img width="100%" src="https://starify.komoridevs.icu/api/starify?owner=fllesser&repo=nonebot-plugin-parser" alt="starify" />
74
+ > [!IMPORTANT]
75
+ > **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~ ⭐️
76
+
77
+ <img width="100%" src="https://starify.komoridevs.icu/api/starify?owner=fllesser&repo=nonebot-plugin-parser" alt="starify" />
76
78
 
77
79
  ## 📖 介绍
78
80
 
@@ -106,7 +108,8 @@ Description-Content-Type: text/markdown
106
108
 
107
109
  ## 💿 安装
108
110
 
109
- > [!Warning] > **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
111
+ > [!Warning]
112
+ > **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
110
113
 
111
114
  <details>
112
115
  <summary>使用 nb-cli 安装/更新</summary>
@@ -186,19 +189,27 @@ Description-Content-Type: text/markdown
186
189
 
187
190
  `ytdlp`, 用于解析 `youtube` 和 `tiktok` 视频
188
191
 
189
- uv add nonebot-plugin-parser[ytdlp]
192
+ uv add "nonebot-plugin-parser[ytdlp]"
190
193
 
191
194
  [emosvg](https://github.com/fllesser/emosvg) 用于渲染 `emoji` 表情, 基于 `cairo` 和 `svg` 实现,`win/mac` 用户,请确保自己会配置 `cairo`, 插件默认使用的依赖于网络的 `apilmoji`,已缓存的 `emoji` 渲染速度略快于 `emosvg`
192
195
 
193
- uv add nonebot-plugin-parser[emosvg]
196
+ uv add "nonebot-plugin-parser[emosvg]"
197
+
198
+ `htmlkit`, 无 js 渲染 `html`, 插件目前还没有供 `htmlkit` 使用的模版, 因此可忽略此依赖
199
+
200
+ uv add "nonebot-plugin-parser[htmlkit]"
201
+
202
+ `htmlrender`, 使用 `playwright` 渲染 `html`, 插件现有模版有点问题,并且极其丑陋,不建议使用
203
+
204
+ uv add "nonebot-plugin-parser[htmlrender]"
194
205
 
195
- `htmlkit`, 用于渲染 html 内容, 插件目前还没有 html 卡片渲染, 因此可忽略此依赖, 插件目前仅使用 `PIL` 渲染媒体卡片
206
+ 现版本推荐组合
196
207
 
197
- uv add nonebot-plugin-parser[htmlkit]
208
+ uv add "nonebot-plugin-parser[ytdlp,emosvg]"
198
209
 
199
210
  `all` 顾名思义,安装所有可选依赖
200
211
 
201
- uv add nonebot-plugin-parser[all]
212
+ uv add "nonebot-plugin-parser[all]"
202
213
 
203
214
  </details>
204
215
 
@@ -17,8 +17,10 @@
17
17
 
18
18
  </div>
19
19
 
20
- > [!IMPORTANT] > **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~ ⭐️
21
- > <img width="100%" src="https://starify.komoridevs.icu/api/starify?owner=fllesser&repo=nonebot-plugin-parser" alt="starify" />
20
+ > [!IMPORTANT]
21
+ > **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~ ⭐️
22
+
23
+ <img width="100%" src="https://starify.komoridevs.icu/api/starify?owner=fllesser&repo=nonebot-plugin-parser" alt="starify" />
22
24
 
23
25
  ## 📖 介绍
24
26
 
@@ -52,7 +54,8 @@
52
54
 
53
55
  ## 💿 安装
54
56
 
55
- > [!Warning] > **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
57
+ > [!Warning]
58
+ > **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
56
59
 
57
60
  <details>
58
61
  <summary>使用 nb-cli 安装/更新</summary>
@@ -132,19 +135,27 @@
132
135
 
133
136
  `ytdlp`, 用于解析 `youtube` 和 `tiktok` 视频
134
137
 
135
- uv add nonebot-plugin-parser[ytdlp]
138
+ uv add "nonebot-plugin-parser[ytdlp]"
136
139
 
137
140
  [emosvg](https://github.com/fllesser/emosvg) 用于渲染 `emoji` 表情, 基于 `cairo` 和 `svg` 实现,`win/mac` 用户,请确保自己会配置 `cairo`, 插件默认使用的依赖于网络的 `apilmoji`,已缓存的 `emoji` 渲染速度略快于 `emosvg`
138
141
 
139
- uv add nonebot-plugin-parser[emosvg]
142
+ uv add "nonebot-plugin-parser[emosvg]"
143
+
144
+ `htmlkit`, 无 js 渲染 `html`, 插件目前还没有供 `htmlkit` 使用的模版, 因此可忽略此依赖
145
+
146
+ uv add "nonebot-plugin-parser[htmlkit]"
147
+
148
+ `htmlrender`, 使用 `playwright` 渲染 `html`, 插件现有模版有点问题,并且极其丑陋,不建议使用
149
+
150
+ uv add "nonebot-plugin-parser[htmlrender]"
140
151
 
141
- `htmlkit`, 用于渲染 html 内容, 插件目前还没有 html 卡片渲染, 因此可忽略此依赖, 插件目前仅使用 `PIL` 渲染媒体卡片
152
+ 现版本推荐组合
142
153
 
143
- uv add nonebot-plugin-parser[htmlkit]
154
+ uv add "nonebot-plugin-parser[ytdlp,emosvg]"
144
155
 
145
156
  `all` 顾名思义,安装所有可选依赖
146
157
 
147
- uv add nonebot-plugin-parser[all]
158
+ uv add "nonebot-plugin-parser[all]"
148
159
 
149
160
  </details>
150
161
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nonebot-plugin-parser"
3
- version = "2.3.4"
3
+ version = "2.3.6"
4
4
  description = "NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -43,12 +43,12 @@ dependencies = [
43
43
  "pillow>=11.0.0",
44
44
  "tqdm>=4.67.1,<5.0.0",
45
45
  "aiofiles>=25.1.0",
46
- "curl_cffi>=0.13.0,<1.0.0",
47
46
  "httpx>=0.27.2,<1.0.0",
48
47
  "msgspec>=0.20.0,<1.0.0",
49
48
  "apilmoji[tqdm]>=0.2.4,<1.0.0",
50
49
  "beautifulsoup4>=4.12.0,<5.0.0",
51
- "bilibili-api-python>=17.4.0,<18.0.0",
50
+ "curl_cffi>=0.13.0,<1.0.0,!=0.14.0",
51
+ "bilibili-api-python>=17.4.1,<18.0.0",
52
52
  "nonebot-plugin-alconna>=0.59.4,<1.0.0",
53
53
  "nonebot-plugin-apscheduler>=0.5.0,<1.0.0",
54
54
  "nonebot-plugin-localstore>=0.7.4,<1.0.0",
@@ -118,7 +118,7 @@ nonebug = { git = "https://github.com/nonebot/nonebug" }
118
118
  [tool.bumpversion]
119
119
  tag = true
120
120
  commit = true
121
- current_version = "2.3.4"
121
+ current_version = "2.3.6"
122
122
  message = "release: bump vesion from {current_version} to {new_version}"
123
123
 
124
124
  [[tool.bumpversion.files]]
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Any, Literal
2
+ from typing import Literal
3
3
 
4
4
  import msgspec
5
5
  from nonebot import logger
@@ -19,6 +19,32 @@ from ..config import gconfig
19
19
  PSR_SEARCHED_KEY: Literal["psr-searched"] = "psr-searched"
20
20
 
21
21
 
22
+ # 定义 JSON 卡片的数据结构
23
+ class MetaDetail(msgspec.Struct):
24
+ qqdocurl: str | None = None
25
+
26
+
27
+ class MetaNews(msgspec.Struct):
28
+ jumpUrl: str | None = None
29
+
30
+
31
+ class MetaMusic(msgspec.Struct):
32
+ jumpUrl: str | None = None
33
+
34
+
35
+ class Meta(msgspec.Struct):
36
+ detail_1: MetaDetail | None = None
37
+ news: MetaNews | None = None
38
+ music: MetaMusic | None = None
39
+
40
+
41
+ class RawData(msgspec.Struct):
42
+ meta: Meta | None = None
43
+
44
+
45
+ raw_decoder = msgspec.json.Decoder(RawData)
46
+
47
+
22
48
  class SearchResult:
23
49
  """匹配结果"""
24
50
 
@@ -45,24 +71,6 @@ def _searched(state: T_State) -> SearchResult | None:
45
71
  return state.get(PSR_SEARCHED_KEY)
46
72
 
47
73
 
48
- def _escape_raw(raw: str) -> str:
49
- """
50
- 转义原始字符串中的特殊字符
51
- Args:
52
- raw: 原始字符串
53
-
54
- Returns:
55
- str: 转义后的字符串
56
- """
57
- replacements = [
58
- ("\\", ""),
59
- ("&amp;", "&"),
60
- ]
61
- for old, new in replacements:
62
- raw = raw.replace(old, new)
63
- return raw
64
-
65
-
66
74
  def _extract_url(hyper: Hyper) -> str | None:
67
75
  """处理 JSON 类型的消息段,提取 URL
68
76
 
@@ -79,24 +87,25 @@ def _extract_url(hyper: Hyper) -> str | None:
79
87
  return None
80
88
 
81
89
  try:
82
- raw: dict[str, Any] = msgspec.json.decode(raw_str)
90
+ raw = raw_decoder.decode(raw_str)
83
91
  except msgspec.DecodeError:
84
92
  logger.exception(f"json 卡片解析失败: {raw_str}")
85
93
  return None
86
94
 
87
- meta: dict[str, Any] | None = raw.get("meta")
88
- if not meta:
95
+ if not raw.meta:
89
96
  return None
90
97
 
91
- for key1, key2 in (
92
- ("detail_1", "qqdocurl"),
93
- ("news", "jumpUrl"),
94
- ("music", "jumpUrl"),
95
- ):
96
- if url := meta.get(key1, {}).get(key2):
97
- logger.debug(f"extract url from raw:meta:{key1}:{key2}: {url}")
98
- return url
99
- return None
98
+ meta, url = raw.meta, None
99
+
100
+ if meta.detail_1:
101
+ url = meta.detail_1.qqdocurl
102
+ elif meta.news:
103
+ url = meta.news.jumpUrl
104
+ elif meta.music:
105
+ url = meta.music.jumpUrl
106
+
107
+ logger.debug(f"extract url[{url}] from raw#meta[{meta}]")
108
+ return url
100
109
 
101
110
 
102
111
  def _extract_text(message: UniMsg) -> str | None:
@@ -0,0 +1,151 @@
1
+ import re
2
+ import asyncio
3
+ from typing import ClassVar
4
+ from pathlib import Path
5
+ from urllib.parse import urljoin
6
+
7
+ import aiofiles
8
+ from httpx import HTTPError, AsyncClient
9
+ from nonebot import logger
10
+
11
+ from ..base import (
12
+ DOWNLOADER,
13
+ COMMON_TIMEOUT,
14
+ DOWNLOAD_TIMEOUT,
15
+ Platform,
16
+ BaseParser,
17
+ PlatformEnum,
18
+ ParseException,
19
+ DownloadException,
20
+ DurationLimitException,
21
+ handle,
22
+ pconfig,
23
+ )
24
+
25
+
26
+ class AcfunParser(BaseParser):
27
+ # 平台信息
28
+ platform: ClassVar[Platform] = Platform(name=PlatformEnum.ACFUN, display_name="猴山")
29
+
30
+ def __init__(self):
31
+ super().__init__()
32
+ self.headers["referer"] = "https://www.acfun.cn/"
33
+
34
+ @handle("acfun.cn", r"(?:ac=|/ac)(?P<acid>\d+)")
35
+ async def _parse(self, searched: re.Match[str]):
36
+ acid = int(searched.group("acid"))
37
+ url = f"https://www.acfun.cn/v/ac{acid}"
38
+
39
+ video_info = await self.parse_video_info(url)
40
+ author = self.create_author(video_info.name, video_info.avatar_url)
41
+
42
+ video_task = asyncio.create_task(
43
+ self.download_video(
44
+ video_info.m3u8_url,
45
+ f"acfun_{acid}.mp4",
46
+ video_info.duration,
47
+ )
48
+ )
49
+
50
+ video_content = self.create_video_content(video_task, cover_url=video_info.coverUrl)
51
+
52
+ return self.result(
53
+ title=video_info.title,
54
+ text=video_info.text,
55
+ author=author,
56
+ timestamp=video_info.timestamp,
57
+ contents=[video_content],
58
+ )
59
+
60
+ async def parse_video_info(self, url: str):
61
+ """解析acfun链接获取详细信息
62
+
63
+ Args:
64
+ url (str): 链接
65
+
66
+ Returns:
67
+ video.VideoInfo
68
+ """
69
+ from . import video
70
+
71
+ # 拼接查询参数
72
+ url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
73
+
74
+ async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
75
+ response = await client.get(url)
76
+ response.raise_for_status()
77
+ raw = response.text
78
+
79
+ matched = re.search(r"window\.videoInfo =(.*?)</script>", raw)
80
+ if not matched:
81
+ raise ParseException("解析 acfun 视频信息失败")
82
+
83
+ raw = str(matched.group(1))
84
+ raw = re.sub(r'\\{1,4}"', '"', raw)
85
+ raw = raw.replace('"{', "{").replace('}"', "}")
86
+ return video.decoder.decode(raw)
87
+
88
+ async def download_video(self, m3u8_url: str, file_name: str, duration: int) -> Path:
89
+ """下载acfun视频
90
+
91
+ Args:
92
+ m3u8_url (str): m3u8链接
93
+ file_name (str): 文件名
94
+ duration (int): 视频时长(秒)
95
+
96
+ Returns:
97
+ Path: 下载的mp4文件
98
+ """
99
+
100
+ if duration >= pconfig.duration_maximum:
101
+ raise DurationLimitException
102
+
103
+ video_file = pconfig.cache_dir / file_name
104
+ if video_file.exists():
105
+ return video_file
106
+
107
+ m3u8_slices = await self._get_m3u8_slices(m3u8_url)
108
+
109
+ try:
110
+ async with (
111
+ aiofiles.open(video_file, "wb") as f,
112
+ AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
113
+ ):
114
+ total_size = 0
115
+ with DOWNLOADER.get_progress_bar(file_name) as bar:
116
+ for url in m3u8_slices:
117
+ async with client.stream("GET", url) as response:
118
+ async for chunk in response.aiter_bytes(chunk_size=1024 * 1024):
119
+ await f.write(chunk)
120
+ total_size += len(chunk)
121
+ bar.update(len(chunk))
122
+ except HTTPError:
123
+ video_file.unlink(missing_ok=True)
124
+ logger.exception("视频下载失败")
125
+ raise DownloadException("视频下载失败")
126
+ return video_file
127
+
128
+ async def _get_m3u8_slices(self, m3u8_url: str):
129
+ """拼接m3u8链接
130
+
131
+ Args:
132
+ m3u8_url (str): m3u8链接
133
+ m3u8_slice (str): m3u8切片
134
+
135
+ Returns:
136
+ list[str]: 视频链接
137
+ """
138
+ async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
139
+ response = await client.get(m3u8_url)
140
+ response.raise_for_status()
141
+
142
+ slices_text = response.text
143
+
144
+ slices: list[str] = []
145
+ for line in slices_text.splitlines():
146
+ line = line.strip()
147
+ if not line or line.startswith("#"):
148
+ continue
149
+ slices.append(urljoin(m3u8_url, line))
150
+
151
+ return slices
@@ -0,0 +1,77 @@
1
+ from msgspec import Struct
2
+ from msgspec.json import Decoder
3
+
4
+
5
+ class User(Struct):
6
+ name: str
7
+ headUrl: str
8
+
9
+
10
+ class Representation(Struct):
11
+ url: str
12
+ m3u8Slice: str
13
+ qualityType: str
14
+
15
+ @property
16
+ def m3u8_slice(self) -> str:
17
+ return self.m3u8Slice.replace("\\\\n", "\n")
18
+
19
+
20
+ class AdaptationSet(Struct):
21
+ representation: list[Representation]
22
+
23
+
24
+ class KsPlay(Struct):
25
+ adaptationSet: list[AdaptationSet]
26
+
27
+
28
+ class CurrentVideoInfo(Struct):
29
+ ksPlayJson: KsPlay
30
+ durationMillis: int
31
+
32
+ @property
33
+ def representations(self) -> list[Representation]:
34
+ return self.ksPlayJson.adaptationSet[0].representation
35
+
36
+
37
+ class VideoInfo(Struct, kw_only=True):
38
+ title: str
39
+ description: str | None
40
+ createTimeMillis: int
41
+ user: User
42
+ currentVideoInfo: CurrentVideoInfo
43
+ coverUrl: str
44
+
45
+ @property
46
+ def name(self) -> str:
47
+ return self.user.name
48
+
49
+ @property
50
+ def avatar_url(self) -> str:
51
+ return self.user.headUrl
52
+
53
+ @property
54
+ def text(self) -> str | None:
55
+ return f"简介: {self.description}" if self.description else None
56
+
57
+ @property
58
+ def timestamp(self) -> int:
59
+ return self.createTimeMillis // 1000
60
+
61
+ @property
62
+ def duration(self) -> int:
63
+ return self.currentVideoInfo.durationMillis // 1000
64
+
65
+ @property
66
+ def m3u8_url(self) -> str:
67
+ representations = self.currentVideoInfo.representations
68
+
69
+ quality_types = ("1080p", "720p", "480p", "360p")
70
+ for r in representations:
71
+ if r.qualityType in quality_types:
72
+ return r.url
73
+
74
+ return representations[0].url
75
+
76
+
77
+ decoder = Decoder(VideoInfo)
@@ -12,6 +12,7 @@ from .data import Platform, ParseResult, ParseResultKwargs
12
12
  from ..config import pconfig as pconfig
13
13
  from ..download import DOWNLOADER as DOWNLOADER
14
14
  from ..constants import IOS_HEADER, COMMON_HEADER, ANDROID_HEADER, COMMON_TIMEOUT
15
+ from ..constants import DOWNLOAD_TIMEOUT as DOWNLOAD_TIMEOUT
15
16
  from ..constants import PlatformEnum as PlatformEnum
16
17
  from ..exception import TipException as TipException
17
18
  from ..exception import ParseException as ParseException
@@ -88,7 +88,7 @@ class BilibiliParser(BaseParser):
88
88
  async def _parse_read(self, searched: Match[str]):
89
89
  """解析专栏信息"""
90
90
  read_id = int(searched.group("read_id"))
91
- return await self.parse_read(read_id)
91
+ return await self.parse_read_with_opus(read_id)
92
92
 
93
93
  @handle("/opus/", r"bilibili\.com/opus/(?P<opus_id>\d+)")
94
94
  async def _parse_opus(self, searched: Match[str]):
@@ -175,14 +175,11 @@ class BilibiliParser(BaseParser):
175
175
  """
176
176
  from bilibili_api.dynamic import Dynamic
177
177
 
178
- from .dynamic import DynamicItem
178
+ from .dynamic import DynamicData
179
179
 
180
180
  dynamic = Dynamic(dynamic_id, await self.credential)
181
+ dynamic_info = convert(await dynamic.get_info(), DynamicData).item
181
182
 
182
- # 转换为结构体
183
- dynamic_data = convert(await dynamic.get_info(), DynamicItem)
184
- dynamic_info = dynamic_data.item
185
- # 使用结构体属性提取信息
186
183
  author = self.create_author(dynamic_info.name, dynamic_info.avatar)
187
184
 
188
185
  # 下载图片
@@ -208,8 +205,8 @@ class BilibiliParser(BaseParser):
208
205
  opus = Opus(opus_id, await self.credential)
209
206
  return await self._parse_opus_obj(opus)
210
207
 
211
- async def parse_read_old(self, read_id: int):
212
- """解析专栏信息, 已废弃
208
+ async def parse_read_with_opus(self, read_id: int):
209
+ """解析专栏信息, 使用 Opus 接口
213
210
 
214
211
  Args:
215
212
  read_id (int): 专栏 id
@@ -297,45 +294,6 @@ class BilibiliParser(BaseParser):
297
294
  author=author,
298
295
  )
299
296
 
300
- async def parse_read(self, read_id: int):
301
- """专栏解析
302
-
303
- Args:
304
- read_id (int): 专栏 id
305
-
306
- Returns:
307
- texts: list[str], urls: list[str]
308
- """
309
- from bilibili_api.article import Article
310
-
311
- from .article import TextNode, ImageNode, ArticleInfo
312
-
313
- ar = Article(read_id)
314
- # 加载内容
315
- await ar.fetch_content()
316
- data = ar.json()
317
- article_info = convert(data, ArticleInfo)
318
- logger.debug(f"article_info: {article_info}")
319
-
320
- contents: list[MediaContent] = []
321
- current_text = ""
322
- for child in article_info.gen_text_img():
323
- if isinstance(child, ImageNode):
324
- contents.append(self.create_graphics_content(child.url, current_text.strip(), child.alt))
325
- current_text = ""
326
- elif isinstance(child, TextNode):
327
- current_text += child.text
328
-
329
- author = self.create_author(*article_info.author_info)
330
-
331
- return self.result(
332
- title=article_info.title,
333
- timestamp=article_info.timestamp,
334
- text=current_text.strip(),
335
- author=author,
336
- contents=contents,
337
- )
338
-
339
297
  async def parse_favlist(self, fav_id: int):
340
298
  """解析收藏夹信息
341
299
 
@@ -191,7 +191,7 @@ class DynamicInfo(Struct):
191
191
  return None
192
192
 
193
193
 
194
- class DynamicItem(Struct):
194
+ class DynamicData(Struct):
195
195
  """动态项目"""
196
196
 
197
197
  item: DynamicInfo
@@ -58,7 +58,7 @@ class VideoContent(MediaContent):
58
58
  return f"时长: {minutes}:{seconds:02d}"
59
59
 
60
60
  def __repr__(self) -> str:
61
- repr = f"VideoContent(path={repr_path_task(self.path_task)}"
61
+ repr = f"VideoContent({repr_path_task(self.path_task)}"
62
62
  if self.cover is not None:
63
63
  repr += f", cover={repr_path_task(self.cover)}"
64
64
  return repr + ")"
@@ -88,7 +88,7 @@ class GraphicsContent(MediaContent):
88
88
  """图片描述 渲染时居中显示"""
89
89
 
90
90
  def __repr__(self) -> str:
91
- repr = f"GraphicsContent(path={repr_path_task(self.path_task)}"
91
+ repr = f"GraphicsContent({repr_path_task(self.path_task)}"
92
92
  if self.text:
93
93
  repr += f", text={self.text}"
94
94
  if self.alt:
@@ -1,7 +1,6 @@
1
1
  import re
2
2
  from typing import ClassVar
3
3
 
4
- import msgspec
5
4
  from httpx import AsyncClient
6
5
  from nonebot import logger
7
6
 
@@ -61,6 +60,8 @@ class DouyinParser(BaseParser):
61
60
  return f"https://m.douyin.com/share/{ty}/{vid}"
62
61
 
63
62
  async def parse_video(self, url: str):
63
+ from . import video
64
+
64
65
  async with AsyncClient(
65
66
  headers=self.ios_headers,
66
67
  timeout=COMMON_TIMEOUT,
@@ -81,9 +82,7 @@ class DouyinParser(BaseParser):
81
82
  if not matched or not matched.group(1):
82
83
  raise ParseException("can't find _ROUTER_DATA in html")
83
84
 
84
- from .video import RouterData
85
-
86
- video_data = msgspec.json.decode(matched.group(1).strip(), type=RouterData).video_data
85
+ video_data = video.decoder.decode(matched.group(1).strip()).video_data
87
86
  # 使用新的简洁构建方式
88
87
  contents = []
89
88
 
@@ -108,6 +107,8 @@ class DouyinParser(BaseParser):
108
107
  )
109
108
 
110
109
  async def parse_slides(self, video_id: str):
110
+ from . import slides
111
+
111
112
  url = "https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/"
112
113
  params = {
113
114
  "aweme_ids": f"[{video_id}]",
@@ -117,9 +118,7 @@ class DouyinParser(BaseParser):
117
118
  response = await client.get(url, params=params)
118
119
  response.raise_for_status()
119
120
 
120
- from .slides import SlidesInfo
121
-
122
- slides_data = msgspec.json.decode(response.content, type=SlidesInfo).aweme_details[0]
121
+ slides_data = slides.decoder.decode(response.content).aweme_details[0]
123
122
  contents = []
124
123
 
125
124
  # 添加图片内容
@@ -1,6 +1,6 @@
1
1
  from random import choice
2
2
 
3
- from msgspec import Struct, field
3
+ from msgspec import Struct, json, field
4
4
 
5
5
 
6
6
  class PlayAddr(Struct):
@@ -57,3 +57,6 @@ class SlidesData(Struct):
57
57
 
58
58
  class SlidesInfo(Struct):
59
59
  aweme_details: list[SlidesData] = field(default_factory=list)
60
+
61
+
62
+ decoder = json.Decoder(SlidesInfo)