nonebot-plugin-parser 2.0.7__tar.gz → 2.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/PKG-INFO +20 -2
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/README.md +19 -1
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/pyproject.toml +2 -2
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +77 -68
- nonebot_plugin_parser-2.0.8/src/nonebot_plugin_parser/parsers/bilibili/article.py +118 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +33 -27
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/data.py +9 -9
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/base.py +2 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/common.py +230 -58
- nonebot_plugin_parser-2.0.7/src/nonebot_plugin_parser/parsers/bilibili/article.py +0 -119
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/config.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/constants.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/download/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/download/task.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/exception.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/helper.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/acfun.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/base.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/common.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/dynamic.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/favlist.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/live.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/douyin/slides.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/douyin/video.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/kuaishou.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/twitter.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/weibo.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/xiaohongshu.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/parsers/youtube.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/default.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/bilibili.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/douyin.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/kuaishou.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/media_button.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/tiktok.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/twitter.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/weibo.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/xiaohongshu.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/resources/youtube.png +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
- {nonebot_plugin_parser-2.0.7 → nonebot_plugin_parser-2.0.8}/src/nonebot_plugin_parser/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: nonebot-plugin-parser
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.8
|
|
4
4
|
Summary: NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun
|
|
5
5
|
Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
|
|
6
6
|
Author: fllesser
|
|
@@ -68,6 +68,18 @@ Description-Content-Type: text/markdown
|
|
|
68
68
|
|
|
69
69
|
支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-parser/blob/master/test_url.md)
|
|
70
70
|
|
|
71
|
+
## 🎨 效果图
|
|
72
|
+
|
|
73
|
+
<div align="center">
|
|
74
|
+
|
|
75
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/video.png" width="160" />
|
|
76
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/9_pic.png" width="160" />
|
|
77
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/4_pic.png" width="160" />
|
|
78
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/repost_video.png" width="160" />
|
|
79
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/repost_2_pic.png" width="160" />
|
|
80
|
+
|
|
81
|
+
</div>
|
|
82
|
+
|
|
71
83
|
## 💿 安装
|
|
72
84
|
> [!Warning]
|
|
73
85
|
> **如果你已经在使用 nonebot-plugin-resolver,请在安装此插件前卸载**
|
|
@@ -169,7 +181,7 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
169
181
|
| parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit, 暂不可用) |
|
|
170
182
|
| parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
|
|
171
183
|
| parser_custom_font | 否 | None | 自定义渲染字体,配置字体文件名,并将字体文件放置于 localstore 生成的插件 data 目录下(如 ./data/nonebot_plugin_parser/) |
|
|
172
|
-
| parser_need_forward_contents | 否 | True |
|
|
184
|
+
| parser_need_forward_contents | 否 | True | 是否需要转发媒体内容(超过 4 项时始终使用合并转发) |
|
|
173
185
|
## 🎉 使用
|
|
174
186
|
### 指令表
|
|
175
187
|
| 指令 | 权限 | 需要@ | 范围 | 说明 |
|
|
@@ -177,6 +189,12 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
177
189
|
| 开启解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 开启解析 |
|
|
178
190
|
| 关闭解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 关闭解析 |
|
|
179
191
|
|
|
192
|
+
### 推荐的字体
|
|
193
|
+
- [LXGW ZhenKai / 霞鹜臻楷](https://github.com/lxgw/LxgwZhenKai) 效果图使用字体
|
|
194
|
+
- [LXGW Neo XiHei / 霞鹜新晰黑](https://github.com/lxgw/LxgwNeoXiHei)
|
|
195
|
+
- [LXGW Neo ZhiSong / 霞鹜新致宋 / 霞鶩新緻宋](https://github.com/lxgw/LxgwNeoZhiSong)
|
|
196
|
+
|
|
197
|
+
|
|
180
198
|
## 致谢
|
|
181
199
|
[nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver)
|
|
182
200
|
[parse-video-py](https://github.com/wujunwei928/parse-video-py)
|
|
@@ -38,6 +38,18 @@
|
|
|
38
38
|
|
|
39
39
|
支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-parser/blob/master/test_url.md)
|
|
40
40
|
|
|
41
|
+
## 🎨 效果图
|
|
42
|
+
|
|
43
|
+
<div align="center">
|
|
44
|
+
|
|
45
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/video.png" width="160" />
|
|
46
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/9_pic.png" width="160" />
|
|
47
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/4_pic.png" width="160" />
|
|
48
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/repost_video.png" width="160" />
|
|
49
|
+
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/repost_2_pic.png" width="160" />
|
|
50
|
+
|
|
51
|
+
</div>
|
|
52
|
+
|
|
41
53
|
## 💿 安装
|
|
42
54
|
> [!Warning]
|
|
43
55
|
> **如果你已经在使用 nonebot-plugin-resolver,请在安装此插件前卸载**
|
|
@@ -139,7 +151,7 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
139
151
|
| parser_render_type | 否 | "common" | 渲染器类型,可选 "default"(无图片渲染), "common"(PIL 通用图片渲染), "htmlkit"(htmlkit, 暂不可用) |
|
|
140
152
|
| parser_append_url | 否 | False | 是否在解析结果中附加原始URL |
|
|
141
153
|
| parser_custom_font | 否 | None | 自定义渲染字体,配置字体文件名,并将字体文件放置于 localstore 生成的插件 data 目录下(如 ./data/nonebot_plugin_parser/) |
|
|
142
|
-
| parser_need_forward_contents | 否 | True |
|
|
154
|
+
| parser_need_forward_contents | 否 | True | 是否需要转发媒体内容(超过 4 项时始终使用合并转发) |
|
|
143
155
|
## 🎉 使用
|
|
144
156
|
### 指令表
|
|
145
157
|
| 指令 | 权限 | 需要@ | 范围 | 说明 |
|
|
@@ -147,6 +159,12 @@ Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c
|
|
|
147
159
|
| 开启解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 开启解析 |
|
|
148
160
|
| 关闭解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 关闭解析 |
|
|
149
161
|
|
|
162
|
+
### 推荐的字体
|
|
163
|
+
- [LXGW ZhenKai / 霞鹜臻楷](https://github.com/lxgw/LxgwZhenKai) 效果图使用字体
|
|
164
|
+
- [LXGW Neo XiHei / 霞鹜新晰黑](https://github.com/lxgw/LxgwNeoXiHei)
|
|
165
|
+
- [LXGW Neo ZhiSong / 霞鹜新致宋 / 霞鶩新緻宋](https://github.com/lxgw/LxgwNeoZhiSong)
|
|
166
|
+
|
|
167
|
+
|
|
150
168
|
## 致谢
|
|
151
169
|
[nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver)
|
|
152
170
|
[parse-video-py](https://github.com/wujunwei928/parse-video-py)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nonebot-plugin-parser"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.8"
|
|
4
4
|
description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun"
|
|
5
5
|
authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
|
|
6
6
|
readme = "README.md"
|
|
@@ -186,7 +186,7 @@ build-backend = "uv_build"
|
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
[tool.bumpversion]
|
|
189
|
-
current_version = "2.0.
|
|
189
|
+
current_version = "2.0.8"
|
|
190
190
|
commit = true
|
|
191
191
|
message = "🔖 release: bump vesion from {current_version} to {new_version}"
|
|
192
192
|
tag = true
|
|
@@ -161,6 +161,7 @@ class BilibiliParser(BaseParser):
|
|
|
161
161
|
async def parse_others(self, url: str):
|
|
162
162
|
"""解析其他类型链接"""
|
|
163
163
|
# 判断链接类型并解析
|
|
164
|
+
logger.debug(f"解析其他类型链接: {url}")
|
|
164
165
|
# 1. 动态
|
|
165
166
|
if "t.bilibili.com" in url:
|
|
166
167
|
return await self.parse_dynamic(url)
|
|
@@ -199,47 +200,6 @@ class BilibiliParser(BaseParser):
|
|
|
199
200
|
|
|
200
201
|
raise ParseException("不支持的 Bilibili 链接")
|
|
201
202
|
|
|
202
|
-
async def _init_credential(self) -> Credential | None:
|
|
203
|
-
"""初始化 bilibili api"""
|
|
204
|
-
|
|
205
|
-
if not pconfig.bili_ck:
|
|
206
|
-
logger.warning("未配置 r_bili_ck, 无法使用哔哩哔哩 AI 总结, 可能无法解析 720p 以上画质视频")
|
|
207
|
-
return None
|
|
208
|
-
|
|
209
|
-
credential = Credential.from_cookies(ck2dict(pconfig.bili_ck))
|
|
210
|
-
if not await credential.check_valid() and self._cookies_file.exists():
|
|
211
|
-
logger.info(f"r_bili_ck 已过期, 尝试从 {self._cookies_file} 加载")
|
|
212
|
-
credential = Credential.from_cookies(json.loads(self._cookies_file.read_text()))
|
|
213
|
-
else:
|
|
214
|
-
logger.info(f"r_bili_ck 有效, 保存到 {self._cookies_file}")
|
|
215
|
-
self._cookies_file.write_text(json.dumps(credential.get_cookies()))
|
|
216
|
-
|
|
217
|
-
return credential
|
|
218
|
-
|
|
219
|
-
@property
|
|
220
|
-
async def credential(self) -> Credential | None:
|
|
221
|
-
"""获取哔哩哔哩登录凭证"""
|
|
222
|
-
|
|
223
|
-
if self._credential is None:
|
|
224
|
-
self._credential = await self._init_credential()
|
|
225
|
-
if self._credential is None:
|
|
226
|
-
return None
|
|
227
|
-
|
|
228
|
-
if not await self._credential.check_valid():
|
|
229
|
-
logger.warning("哔哩哔哩 cookies 已过期, 请重新配置 r_bili_ck")
|
|
230
|
-
return self._credential
|
|
231
|
-
|
|
232
|
-
if await self._credential.check_refresh():
|
|
233
|
-
logger.info("哔哩哔哩 cookies 需要刷新")
|
|
234
|
-
if self._credential.has_ac_time_value() and self._credential.has_bili_jct():
|
|
235
|
-
await self._credential.refresh()
|
|
236
|
-
logger.info(f"哔哩哔哩 cookies 刷新成功, 保存到 {self._cookies_file}")
|
|
237
|
-
self._cookies_file.write_text(json.dumps(self._credential.get_cookies()))
|
|
238
|
-
else:
|
|
239
|
-
logger.warning("哔哩哔哩 cookies 刷新需要包含 SESSDATA, ac_time_value, bili_jct")
|
|
240
|
-
|
|
241
|
-
return self._credential
|
|
242
|
-
|
|
243
203
|
async def parse_dynamic(self, url: str):
|
|
244
204
|
"""解析动态信息
|
|
245
205
|
|
|
@@ -286,8 +246,8 @@ class BilibiliParser(BaseParser):
|
|
|
286
246
|
opus = Opus(opus_id, await self.credential)
|
|
287
247
|
return await self._parse_opus(opus)
|
|
288
248
|
|
|
289
|
-
async def
|
|
290
|
-
"""
|
|
249
|
+
async def parse_read_old(self, read_id: int):
|
|
250
|
+
"""解析专栏信息, 已废弃
|
|
291
251
|
|
|
292
252
|
Args:
|
|
293
253
|
read_id (int): 专栏 id
|
|
@@ -307,39 +267,40 @@ class BilibiliParser(BaseParser):
|
|
|
307
267
|
ParseResult: 解析结果
|
|
308
268
|
"""
|
|
309
269
|
|
|
310
|
-
from .opus import
|
|
270
|
+
from .opus import ImageNode, OpusItem, TextNode
|
|
311
271
|
|
|
312
272
|
opus_info = await bili_opus.get_info()
|
|
313
273
|
if not isinstance(opus_info, dict):
|
|
314
274
|
raise ParseException("获取图文动态信息失败")
|
|
315
275
|
# 转换为结构体
|
|
316
276
|
opus_data = msgspec.convert(opus_info, OpusItem)
|
|
317
|
-
|
|
277
|
+
logger.debug(f"opus_data: {opus_data}")
|
|
318
278
|
author = self.create_author(*opus_data.name_avatar)
|
|
319
279
|
|
|
320
280
|
# 按顺序处理图文内容(参考 parse_read 的逻辑)
|
|
321
281
|
contents: list[MediaContent] = []
|
|
322
|
-
|
|
282
|
+
current_text = ""
|
|
323
283
|
|
|
324
284
|
for node in opus_data.gen_text_img():
|
|
325
285
|
match node:
|
|
326
|
-
case
|
|
327
|
-
contents.append(
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
286
|
+
case ImageNode():
|
|
287
|
+
contents.append(
|
|
288
|
+
self.create_graphics_content(
|
|
289
|
+
node.url,
|
|
290
|
+
current_text.strip(),
|
|
291
|
+
node.alt,
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
current_text = ""
|
|
295
|
+
case TextNode():
|
|
296
|
+
current_text += node.text
|
|
336
297
|
|
|
337
298
|
return self.result(
|
|
338
299
|
title=opus_data.title,
|
|
339
300
|
author=author,
|
|
340
301
|
timestamp=opus_data.timestamp,
|
|
341
302
|
contents=contents,
|
|
342
|
-
text=
|
|
303
|
+
text=current_text.strip(),
|
|
343
304
|
)
|
|
344
305
|
|
|
345
306
|
async def parse_live(self, room_id: int):
|
|
@@ -374,7 +335,7 @@ class BilibiliParser(BaseParser):
|
|
|
374
335
|
|
|
375
336
|
return self.result(title=room_data.title, text=room_data.detail, contents=contents, author=author)
|
|
376
337
|
|
|
377
|
-
async def
|
|
338
|
+
async def parse_read(self, read_id: int):
|
|
378
339
|
"""专栏解析
|
|
379
340
|
|
|
380
341
|
Args:
|
|
@@ -393,23 +354,30 @@ class BilibiliParser(BaseParser):
|
|
|
393
354
|
data = ar.json()
|
|
394
355
|
article_info = msgspec.convert(data, ArticleInfo)
|
|
395
356
|
logger.debug(f"article_info: {article_info}")
|
|
357
|
+
|
|
396
358
|
contents: list[MediaContent] = []
|
|
397
|
-
|
|
359
|
+
current_text = ""
|
|
398
360
|
for child in article_info.gen_text_img():
|
|
399
361
|
match child:
|
|
400
362
|
case ImageNode():
|
|
401
|
-
contents.append(
|
|
402
|
-
|
|
363
|
+
contents.append(
|
|
364
|
+
self.create_graphics_content(
|
|
365
|
+
child.url,
|
|
366
|
+
current_text.strip(),
|
|
367
|
+
child.alt,
|
|
368
|
+
)
|
|
369
|
+
)
|
|
370
|
+
current_text = ""
|
|
403
371
|
case TextNode():
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
372
|
+
current_text += child.text
|
|
373
|
+
|
|
374
|
+
author = self.create_author(*article_info.author_info)
|
|
407
375
|
|
|
408
376
|
return self.result(
|
|
409
|
-
title=article_info.
|
|
410
|
-
timestamp=article_info.
|
|
411
|
-
text=
|
|
412
|
-
author=
|
|
377
|
+
title=article_info.title,
|
|
378
|
+
timestamp=article_info.timestamp,
|
|
379
|
+
text=current_text.strip(),
|
|
380
|
+
author=author,
|
|
413
381
|
contents=contents,
|
|
414
382
|
)
|
|
415
383
|
|
|
@@ -499,3 +467,44 @@ class BilibiliParser(BaseParser):
|
|
|
499
467
|
return video_stream.url, None
|
|
500
468
|
logger.debug(f"音频流质量: {audio_stream.audio_quality.name}")
|
|
501
469
|
return video_stream.url, audio_stream.url
|
|
470
|
+
|
|
471
|
+
async def _init_credential(self) -> Credential | None:
|
|
472
|
+
"""初始化 bilibili api"""
|
|
473
|
+
|
|
474
|
+
if not pconfig.bili_ck:
|
|
475
|
+
logger.warning("未配置 r_bili_ck, 无法使用哔哩哔哩 AI 总结, 可能无法解析 720p 以上画质视频")
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
credential = Credential.from_cookies(ck2dict(pconfig.bili_ck))
|
|
479
|
+
if not await credential.check_valid() and self._cookies_file.exists():
|
|
480
|
+
logger.info(f"r_bili_ck 已过期, 尝试从 {self._cookies_file} 加载")
|
|
481
|
+
credential = Credential.from_cookies(json.loads(self._cookies_file.read_text()))
|
|
482
|
+
else:
|
|
483
|
+
logger.info(f"r_bili_ck 有效, 保存到 {self._cookies_file}")
|
|
484
|
+
self._cookies_file.write_text(json.dumps(credential.get_cookies()))
|
|
485
|
+
|
|
486
|
+
return credential
|
|
487
|
+
|
|
488
|
+
@property
|
|
489
|
+
async def credential(self) -> Credential | None:
|
|
490
|
+
"""获取哔哩哔哩登录凭证"""
|
|
491
|
+
|
|
492
|
+
if self._credential is None:
|
|
493
|
+
self._credential = await self._init_credential()
|
|
494
|
+
if self._credential is None:
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
if not await self._credential.check_valid():
|
|
498
|
+
logger.warning("哔哩哔哩 cookies 已过期, 请重新配置 r_bili_ck")
|
|
499
|
+
return self._credential
|
|
500
|
+
|
|
501
|
+
if await self._credential.check_refresh():
|
|
502
|
+
logger.info("哔哩哔哩 cookies 需要刷新")
|
|
503
|
+
if self._credential.has_ac_time_value() and self._credential.has_bili_jct():
|
|
504
|
+
await self._credential.refresh()
|
|
505
|
+
logger.info(f"哔哩哔哩 cookies 刷新成功, 保存到 {self._cookies_file}")
|
|
506
|
+
self._cookies_file.write_text(json.dumps(self._credential.get_cookies()))
|
|
507
|
+
else:
|
|
508
|
+
logger.warning("哔哩哔哩 cookies 刷新需要包含 SESSDATA, ac_time_value, bili_jct")
|
|
509
|
+
|
|
510
|
+
return self._credential
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Bilibili 专栏文章解析器"""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from msgspec import Struct
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TextNode(Struct):
|
|
10
|
+
"""文本节点"""
|
|
11
|
+
|
|
12
|
+
text: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ImageNode(Struct):
|
|
16
|
+
"""图片节点"""
|
|
17
|
+
|
|
18
|
+
url: str
|
|
19
|
+
alt: str | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Author(Struct):
|
|
23
|
+
"""作者信息"""
|
|
24
|
+
|
|
25
|
+
mid: int
|
|
26
|
+
name: str
|
|
27
|
+
face: str
|
|
28
|
+
fans: int
|
|
29
|
+
level: int
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Stats(Struct):
|
|
33
|
+
"""统计信息"""
|
|
34
|
+
|
|
35
|
+
view: int
|
|
36
|
+
favorite: int
|
|
37
|
+
like: int
|
|
38
|
+
reply: int
|
|
39
|
+
share: int
|
|
40
|
+
coin: int
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Meta(Struct):
|
|
44
|
+
"""文章元信息"""
|
|
45
|
+
|
|
46
|
+
id: int
|
|
47
|
+
title: str
|
|
48
|
+
summary: str
|
|
49
|
+
publish_time: int
|
|
50
|
+
author: Author
|
|
51
|
+
stats: Stats
|
|
52
|
+
tags: list[dict[str, Any]]
|
|
53
|
+
words: int
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ArticleInfo(Struct):
|
|
57
|
+
"""文章信息"""
|
|
58
|
+
|
|
59
|
+
type: str
|
|
60
|
+
meta: Meta
|
|
61
|
+
children: list[dict[str, Any]]
|
|
62
|
+
|
|
63
|
+
def gen_text_img(self) -> Generator[TextNode | ImageNode, None, None]:
|
|
64
|
+
"""生成文本和图片节点(保持顺序)"""
|
|
65
|
+
for child in self.children:
|
|
66
|
+
if child.get("type") == "ParagraphNode":
|
|
67
|
+
# 处理段落节点,提取所有文本内容
|
|
68
|
+
text_content = self._extract_text_from_children(child.get("children", []))
|
|
69
|
+
text_content = text_content.strip()
|
|
70
|
+
if text_content:
|
|
71
|
+
yield TextNode(text="\n\n" + text_content)
|
|
72
|
+
elif child.get("type") == "ImageNode":
|
|
73
|
+
# 处理图片节点
|
|
74
|
+
yield ImageNode(url=child.get("url", ""), alt=child.get("alt"))
|
|
75
|
+
elif child.get("type") == "VideoCardNode":
|
|
76
|
+
# 处理视频卡片节点(转换为文本描述)
|
|
77
|
+
yield TextNode(text=f"\n [视频卡片: {child.get('aid', 0)}]")
|
|
78
|
+
|
|
79
|
+
def _extract_text_from_children(self, children: list[dict[str, Any]]) -> str:
|
|
80
|
+
"""从子节点列表中提取文本内容"""
|
|
81
|
+
text_content = ""
|
|
82
|
+
for child in children:
|
|
83
|
+
if child.get("type") == "TextNode":
|
|
84
|
+
text_content += child.get("text", "")
|
|
85
|
+
elif child.get("type") in ["BoldNode", "FontSizeNode", "ColorNode"]:
|
|
86
|
+
# 递归处理嵌套节点
|
|
87
|
+
text_content += self._extract_text_from_children(child.get("children", []))
|
|
88
|
+
return text_content
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def author_info(self) -> tuple[str, str]:
|
|
92
|
+
"""获取作者信息"""
|
|
93
|
+
return self.meta.author.name, self.meta.author.face
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def title(self) -> str:
|
|
97
|
+
"""获取标题"""
|
|
98
|
+
return self.meta.title
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def timestamp(self) -> int:
|
|
102
|
+
"""获取发布时间戳"""
|
|
103
|
+
return self.meta.publish_time
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def summary(self) -> str:
|
|
107
|
+
"""获取摘要"""
|
|
108
|
+
return self.meta.summary
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def stats(self) -> Stats:
|
|
112
|
+
"""获取统计信息"""
|
|
113
|
+
return self.meta.stats
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def tags(self) -> list[str]:
|
|
117
|
+
"""获取标签列表"""
|
|
118
|
+
return [tag.get("name", "") for tag in self.meta.tags]
|
|
@@ -4,14 +4,14 @@ from typing import Any
|
|
|
4
4
|
from msgspec import Struct
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
class
|
|
7
|
+
class TextNode(Struct, tag="TextNode"):
|
|
8
8
|
"""图文动态文本节点"""
|
|
9
9
|
|
|
10
10
|
text: str
|
|
11
11
|
"""文本内容"""
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
14
|
+
class ImageNode(Struct, tag="ImageNode"):
|
|
15
15
|
"""图文动态图片节点"""
|
|
16
16
|
|
|
17
17
|
url: str
|
|
@@ -20,7 +20,7 @@ class OpusImageNode(Struct, tag="OpusImageNode"):
|
|
|
20
20
|
"""图片描述"""
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class
|
|
23
|
+
class Author(Struct):
|
|
24
24
|
"""图文动态作者信息"""
|
|
25
25
|
|
|
26
26
|
name: str
|
|
@@ -30,7 +30,7 @@ class OpusAuthor(Struct):
|
|
|
30
30
|
pub_ts: int
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class
|
|
33
|
+
class Image(Struct):
|
|
34
34
|
"""图文动态图片信息"""
|
|
35
35
|
|
|
36
36
|
url: str
|
|
@@ -39,36 +39,36 @@ class OpusImage(Struct):
|
|
|
39
39
|
# size: float
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class Pic(Struct):
|
|
43
43
|
"""图文动态图片组"""
|
|
44
44
|
|
|
45
|
-
pics: list[
|
|
45
|
+
pics: list[Image]
|
|
46
46
|
style: int
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
class
|
|
49
|
+
class Text(Struct):
|
|
50
50
|
"""图文动态文本"""
|
|
51
51
|
|
|
52
52
|
nodes: list[dict[str, Any]]
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
class
|
|
55
|
+
class Paragraph(Struct):
|
|
56
56
|
"""图文动态段落"""
|
|
57
57
|
|
|
58
58
|
para_type: int
|
|
59
|
-
text:
|
|
60
|
-
pic:
|
|
59
|
+
text: Text | None = None
|
|
60
|
+
pic: Pic | None = None
|
|
61
61
|
# align: int = 0
|
|
62
62
|
# format: dict[str, Any] | None = None
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
class
|
|
65
|
+
class Content(Struct):
|
|
66
66
|
"""图文动态内容"""
|
|
67
67
|
|
|
68
|
-
paragraphs: list[
|
|
68
|
+
paragraphs: list[Paragraph]
|
|
69
69
|
|
|
70
70
|
|
|
71
|
-
class
|
|
71
|
+
class Stat(Struct):
|
|
72
72
|
"""图文动态统计"""
|
|
73
73
|
|
|
74
74
|
like: dict[str, Any] | None = None
|
|
@@ -78,12 +78,12 @@ class OpusStat(Struct):
|
|
|
78
78
|
coin: dict[str, Any] | None = None
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
class
|
|
81
|
+
class Module(Struct):
|
|
82
82
|
"""图文动态模块"""
|
|
83
83
|
|
|
84
84
|
module_type: str
|
|
85
|
-
module_author:
|
|
86
|
-
module_content:
|
|
85
|
+
module_author: Author | None = None
|
|
86
|
+
module_content: Content | None = None
|
|
87
87
|
# module_stat: OpusStat | None = None
|
|
88
88
|
|
|
89
89
|
|
|
@@ -93,19 +93,19 @@ class Basic(Struct):
|
|
|
93
93
|
title: str
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
class
|
|
96
|
+
class Info(Struct):
|
|
97
97
|
"""图文动态信息"""
|
|
98
98
|
|
|
99
99
|
id_str: str
|
|
100
100
|
type: int
|
|
101
|
-
modules: list[
|
|
101
|
+
modules: list[Module]
|
|
102
102
|
basic: Basic | None = None
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
class OpusItem(Struct):
|
|
106
106
|
"""图文动态项目"""
|
|
107
107
|
|
|
108
|
-
item:
|
|
108
|
+
item: Info
|
|
109
109
|
|
|
110
110
|
@property
|
|
111
111
|
def title(self) -> str | None:
|
|
@@ -124,21 +124,27 @@ class OpusItem(Struct):
|
|
|
124
124
|
return module.module_author.pub_ts
|
|
125
125
|
return None
|
|
126
126
|
|
|
127
|
-
def gen_text_img(self) -> Generator[
|
|
127
|
+
def gen_text_img(self) -> Generator[TextNode | ImageNode, None, None]:
|
|
128
128
|
"""生成图文节点(保持顺序)"""
|
|
129
129
|
for module in self.item.modules:
|
|
130
130
|
if module.module_type == "MODULE_TYPE_CONTENT" and module.module_content:
|
|
131
131
|
for paragraph in module.module_content.paragraphs:
|
|
132
132
|
# 处理文本段落
|
|
133
133
|
if paragraph.text and paragraph.text.nodes:
|
|
134
|
-
text_content =
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if text_content.strip():
|
|
139
|
-
yield OpusTextNode(text=text_content.strip())
|
|
134
|
+
text_content = self._extract_text_from_nodes(paragraph.text.nodes)
|
|
135
|
+
text_content = text_content.strip()
|
|
136
|
+
if text_content:
|
|
137
|
+
yield TextNode(text="\n\n" + text_content)
|
|
140
138
|
|
|
141
139
|
# 处理图片段落
|
|
142
140
|
if paragraph.pic and paragraph.pic.pics:
|
|
143
141
|
for pic in paragraph.pic.pics:
|
|
144
|
-
yield
|
|
142
|
+
yield ImageNode(url=pic.url)
|
|
143
|
+
|
|
144
|
+
def _extract_text_from_nodes(self, nodes: list[dict[str, Any]]) -> str:
|
|
145
|
+
"""从节点列表中提取文本内容"""
|
|
146
|
+
text_content = ""
|
|
147
|
+
for node in nodes:
|
|
148
|
+
if node.get("type") in ["TEXT_NODE_TYPE_WORD", "TEXT_NODE_TYPE_RICH"] and node.get("word"):
|
|
149
|
+
text_content += node["word"].get("words", "")
|
|
150
|
+
return text_content
|
|
@@ -12,7 +12,7 @@ def repr_path_task(path_task: Path | Task[Path]) -> str:
|
|
|
12
12
|
return f"task={path_task.get_name()}, done={path_task.done()}"
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
@dataclass(repr=False)
|
|
15
|
+
@dataclass(repr=False, slots=True)
|
|
16
16
|
class MediaContent:
|
|
17
17
|
path_task: Path | Task[Path]
|
|
18
18
|
|
|
@@ -27,14 +27,14 @@ class MediaContent:
|
|
|
27
27
|
return f"{prefix}({repr_path_task(self.path_task)})"
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
@dataclass(repr=False)
|
|
30
|
+
@dataclass(repr=False, slots=True)
|
|
31
31
|
class AudioContent(MediaContent):
|
|
32
32
|
"""音频内容"""
|
|
33
33
|
|
|
34
34
|
duration: float = 0.0
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
@dataclass(repr=False)
|
|
37
|
+
@dataclass(repr=False, slots=True)
|
|
38
38
|
class VideoContent(MediaContent):
|
|
39
39
|
"""视频内容"""
|
|
40
40
|
|
|
@@ -64,21 +64,21 @@ class VideoContent(MediaContent):
|
|
|
64
64
|
return repr + ")"
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
@dataclass(repr=False)
|
|
67
|
+
@dataclass(repr=False, slots=True)
|
|
68
68
|
class ImageContent(MediaContent):
|
|
69
69
|
"""图片内容"""
|
|
70
70
|
|
|
71
71
|
pass
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
@dataclass(repr=False)
|
|
74
|
+
@dataclass(repr=False, slots=True)
|
|
75
75
|
class DynamicContent(MediaContent):
|
|
76
76
|
"""动态内容 视频格式 后续转 gif"""
|
|
77
77
|
|
|
78
78
|
gif_path: Path | None = None
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
@dataclass(repr=False)
|
|
81
|
+
@dataclass(repr=False, slots=True)
|
|
82
82
|
class GraphicsContent(MediaContent):
|
|
83
83
|
"""图文内容 渲染时文字在前 图片在后"""
|
|
84
84
|
|
|
@@ -96,7 +96,7 @@ class GraphicsContent(MediaContent):
|
|
|
96
96
|
return repr + ")"
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
@dataclass
|
|
99
|
+
@dataclass(slots=True)
|
|
100
100
|
class Platform:
|
|
101
101
|
"""平台信息"""
|
|
102
102
|
|
|
@@ -106,7 +106,7 @@ class Platform:
|
|
|
106
106
|
""" 平台显示名称 """
|
|
107
107
|
|
|
108
108
|
|
|
109
|
-
@dataclass(repr=False)
|
|
109
|
+
@dataclass(repr=False, slots=True)
|
|
110
110
|
class Author:
|
|
111
111
|
"""作者信息"""
|
|
112
112
|
|
|
@@ -134,7 +134,7 @@ class Author:
|
|
|
134
134
|
return repr + ")"
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
@dataclass(repr=False)
|
|
137
|
+
@dataclass(repr=False, slots=True)
|
|
138
138
|
class ParseResult:
|
|
139
139
|
"""完整的解析结果"""
|
|
140
140
|
|