nonebot-plugin-parser 2.0.9__tar.gz → 2.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/PKG-INFO +5 -7
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/README.md +2 -4
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/pyproject.toml +7 -7
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/__init__.py +2 -2
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/config.py +6 -5
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/__init__.py +2 -2
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +4 -4
- nonebot_plugin_parser-2.0.11/src/nonebot_plugin_parser/parsers/xiaohongshu.py +259 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/common.py +86 -118
- nonebot_plugin_parser-2.0.9/src/nonebot_plugin_parser/parsers/xiaohongshu.py +0 -170
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/constants.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/download/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/download/task.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/exception.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/helper.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/acfun.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/base.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/article.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/common.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/dynamic.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/favlist.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/live.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/data.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/douyin/slides.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/douyin/video.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/kuaishou.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/twitter.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/weibo.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/parsers/youtube.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/base.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/default.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/bilibili.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/douyin.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/kuaishou.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/media_button.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/tiktok.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/twitter.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/weibo.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/xiaohongshu.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/resources/youtube.png +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
- {nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: nonebot-plugin-parser
|
|
3
|
-
Version: 2.0.
|
|
4
|
-
Summary: NoneBot2
|
|
3
|
+
Version: 2.0.11
|
|
4
|
+
Summary: NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga
|
|
5
5
|
Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
|
|
6
6
|
Author: fllesser
|
|
7
7
|
Author-email: fllesser <fllessive@gmail.com>
|
|
@@ -19,7 +19,7 @@ Requires-Dist: nonebot-plugin-localstore>=0.7.4,<1.0.0
|
|
|
19
19
|
Requires-Dist: nonebot-plugin-apscheduler>=0.5.0,<1.0.0
|
|
20
20
|
Requires-Dist: nonebot-plugin-alconna>=0.59.4
|
|
21
21
|
Requires-Dist: nonebot-plugin-uninfo>=0.9.0
|
|
22
|
-
Requires-Dist: nonebot-plugin-htmlkit>=0.1.
|
|
22
|
+
Requires-Dist: nonebot-plugin-htmlkit>=0.1.0rc3 ; extra == 'htmlkit'
|
|
23
23
|
Requires-Dist: jinja2>=3.1.6 ; extra == 'htmlkit'
|
|
24
24
|
Requires-Python: >=3.10
|
|
25
25
|
Project-URL: IssueTracker, https://github.com/fllesser/nonebot-plugin-parser/issues
|
|
@@ -52,8 +52,6 @@ Description-Content-Type: text/markdown
|
|
|
52
52
|
|
|
53
53
|
## 📖 介绍
|
|
54
54
|
|
|
55
|
-
[nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver) 重制版
|
|
56
|
-
|
|
57
55
|
| 平台 | 触发的消息形态 | 视频 | 图集 | 音频 |
|
|
58
56
|
| ------- | ------------------------------------- | ---- | ---- | ---- |
|
|
59
57
|
| B站 | BV号/链接(包含短链,BV,av)/卡片/小程序 | ✅ | ✅ | ✅ |
|
|
@@ -69,7 +67,7 @@ Description-Content-Type: text/markdown
|
|
|
69
67
|
支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-parser/blob/master/test_url.md)
|
|
70
68
|
|
|
71
69
|
## 🎨 效果图
|
|
72
|
-
|
|
70
|
+
插件默认启用 PIL 实现的通用媒体卡片渲染,效果图如下
|
|
73
71
|
<div align="center">
|
|
74
72
|
|
|
75
73
|
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/video.png" width="160" />
|
|
@@ -82,7 +80,7 @@ Description-Content-Type: text/markdown
|
|
|
82
80
|
|
|
83
81
|
## 💿 安装
|
|
84
82
|
> [!Warning]
|
|
85
|
-
> **如果你已经在使用 nonebot-plugin-resolver,请在安装此插件前卸载**
|
|
83
|
+
> **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
|
|
86
84
|
|
|
87
85
|
<details open>
|
|
88
86
|
<summary>使用 nb-cli 安装/更新</summary>
|
|
@@ -22,8 +22,6 @@
|
|
|
22
22
|
|
|
23
23
|
## 📖 介绍
|
|
24
24
|
|
|
25
|
-
[nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver) 重制版
|
|
26
|
-
|
|
27
25
|
| 平台 | 触发的消息形态 | 视频 | 图集 | 音频 |
|
|
28
26
|
| ------- | ------------------------------------- | ---- | ---- | ---- |
|
|
29
27
|
| B站 | BV号/链接(包含短链,BV,av)/卡片/小程序 | ✅ | ✅ | ✅ |
|
|
@@ -39,7 +37,7 @@
|
|
|
39
37
|
支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-parser/blob/master/test_url.md)
|
|
40
38
|
|
|
41
39
|
## 🎨 效果图
|
|
42
|
-
|
|
40
|
+
插件默认启用 PIL 实现的通用媒体卡片渲染,效果图如下
|
|
43
41
|
<div align="center">
|
|
44
42
|
|
|
45
43
|
<img src="https://raw.githubusercontent.com/fllesser/nonebot-plugin-parser/refs/heads/resources/resources/renderdamine/video.png" width="160" />
|
|
@@ -52,7 +50,7 @@
|
|
|
52
50
|
|
|
53
51
|
## 💿 安装
|
|
54
52
|
> [!Warning]
|
|
55
|
-
> **如果你已经在使用 nonebot-plugin-resolver,请在安装此插件前卸载**
|
|
53
|
+
> **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**
|
|
56
54
|
|
|
57
55
|
<details open>
|
|
58
56
|
<summary>使用 nb-cli 安装/更新</summary>
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nonebot-plugin-parser"
|
|
3
|
-
version = "2.0.
|
|
4
|
-
description = "NoneBot2
|
|
3
|
+
version = "2.0.11"
|
|
4
|
+
description = "NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga"
|
|
5
5
|
authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
requires-python = ">=3.10"
|
|
@@ -41,7 +41,7 @@ dependencies = [
|
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
[project.optional-dependencies]
|
|
44
|
-
htmlkit = ["nonebot-plugin-htmlkit>=0.1.
|
|
44
|
+
htmlkit = ["nonebot-plugin-htmlkit>=0.1.0rc3", "jinja2>=3.1.6"]
|
|
45
45
|
|
|
46
46
|
[project.urls]
|
|
47
47
|
Repository = "https://github.com/fllesser/nonebot-plugin-parser"
|
|
@@ -54,7 +54,7 @@ dev = [
|
|
|
54
54
|
"nonebot2[fastapi]>=2.4.3,<3.0.0",
|
|
55
55
|
"nonebot-adapter-telegram>=0.1.0b20",
|
|
56
56
|
"pre-commit>=4.3.0",
|
|
57
|
-
"ruff>=0.
|
|
57
|
+
"ruff>=0.14.0,<1.0.0",
|
|
58
58
|
"bump-my-version>=1.2.4",
|
|
59
59
|
]
|
|
60
60
|
|
|
@@ -72,7 +72,7 @@ test = [
|
|
|
72
72
|
all_extras = ["nonebot-plugin-htmlkit>=0.1.0rc1", "jinja2>=3.1.6"]
|
|
73
73
|
|
|
74
74
|
[tool.uv]
|
|
75
|
-
required-version = ">=0.
|
|
75
|
+
required-version = ">=0.9.2"
|
|
76
76
|
default-groups = ["test", "dev", "all_extras"]
|
|
77
77
|
|
|
78
78
|
[tool.nonebot]
|
|
@@ -99,7 +99,7 @@ addopts = [
|
|
|
99
99
|
[tool.poe.tasks]
|
|
100
100
|
test_others = "pytest tests/others --cov=src --cov-report=xml:coverage1.xml --junitxml=junit1.xml -n auto"
|
|
101
101
|
test_parsers = "pytest tests/parsers --cov=src --cov-report=xml:coverage2.xml --junitxml=junit2.xml -n auto"
|
|
102
|
-
test_render = "pytest tests/render --cov=src --cov-report=xml:coverage3.xml --junitxml=junit3.xml
|
|
102
|
+
test_render = "pytest tests/render --cov=src --cov-report=xml:coverage3.xml --junitxml=junit3.xml"
|
|
103
103
|
bump = "bump-my-version bump"
|
|
104
104
|
show-bump = "bump-my-version show-bump"
|
|
105
105
|
|
|
@@ -186,7 +186,7 @@ build-backend = "uv_build"
|
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
[tool.bumpversion]
|
|
189
|
-
current_version = "2.0.
|
|
189
|
+
current_version = "2.0.11"
|
|
190
190
|
commit = true
|
|
191
191
|
message = "🔖 release: bump vesion from {current_version} to {new_version}"
|
|
192
192
|
tag = true
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/__init__.py
RENAMED
|
@@ -11,8 +11,8 @@ from .matchers import clear_result_cache
|
|
|
11
11
|
from .utils import safe_unlink
|
|
12
12
|
|
|
13
13
|
__plugin_meta__ = PluginMetadata(
|
|
14
|
-
name="
|
|
15
|
-
description="
|
|
14
|
+
name="链接分享解析 Alconna 版",
|
|
15
|
+
description="全新通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga",
|
|
16
16
|
usage="发送支持平台的(BV号/链接/小程序/卡片)即可",
|
|
17
17
|
type="application",
|
|
18
18
|
homepage="https://github.com/fllesser/nonebot-plugin-parser",
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/config.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
+
from functools import cached_property
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Literal
|
|
4
5
|
|
|
@@ -56,22 +57,22 @@ class Config(BaseModel):
|
|
|
56
57
|
parser_need_forward_contents: bool = True
|
|
57
58
|
"""是否需要转发媒体内容"""
|
|
58
59
|
|
|
59
|
-
@
|
|
60
|
+
@cached_property
|
|
60
61
|
def nickname(self) -> str:
|
|
61
62
|
"""全局名称"""
|
|
62
63
|
return _nickname
|
|
63
64
|
|
|
64
|
-
@
|
|
65
|
+
@cached_property
|
|
65
66
|
def cache_dir(self) -> Path:
|
|
66
67
|
"""插件缓存目录"""
|
|
67
68
|
return _cache_dir
|
|
68
69
|
|
|
69
|
-
@
|
|
70
|
+
@cached_property
|
|
70
71
|
def config_dir(self) -> Path:
|
|
71
72
|
"""插件配置目录"""
|
|
72
73
|
return _config_dir
|
|
73
74
|
|
|
74
|
-
@
|
|
75
|
+
@cached_property
|
|
75
76
|
def data_dir(self) -> Path:
|
|
76
77
|
"""插件数据目录"""
|
|
77
78
|
return _data_dir
|
|
@@ -131,7 +132,7 @@ class Config(BaseModel):
|
|
|
131
132
|
"""是否在解析结果中附加原始URL"""
|
|
132
133
|
return self.parser_append_url
|
|
133
134
|
|
|
134
|
-
@
|
|
135
|
+
@cached_property
|
|
135
136
|
def custom_font(self) -> Path | None:
|
|
136
137
|
"""自定义字体"""
|
|
137
138
|
return (self.data_dir / self.parser_custom_font) if self.parser_custom_font else None
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# 导出所有 Parser 类
|
|
2
|
-
from .acfun import AcfunParser as AcfunParser
|
|
2
|
+
from .acfun import AcfunParser as AcfunParser # noqa: I001
|
|
3
3
|
from .base import BaseParser as BaseParser
|
|
4
|
-
from .bilibili import BilibiliParser as BilibiliParser
|
|
5
4
|
from .data import ParseResult as ParseResult
|
|
6
5
|
from .douyin import DouyinParser as DouyinParser
|
|
7
6
|
from .kuaishou import KuaiShouParser as KuaiShouParser
|
|
@@ -11,6 +10,7 @@ from .twitter import TwitterParser as TwitterParser
|
|
|
11
10
|
from .weibo import WeiBoParser as WeiBoParser
|
|
12
11
|
from .xiaohongshu import XiaoHongShuParser as XiaoHongShuParser
|
|
13
12
|
from .youtube import YouTubeParser as YouTubeParser
|
|
13
|
+
from .bilibili import BilibiliParser as BilibiliParser
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
"ParseResult",
|
|
@@ -460,15 +460,15 @@ class BilibiliParser(BaseParser):
|
|
|
460
460
|
"""初始化 bilibili api"""
|
|
461
461
|
|
|
462
462
|
if not pconfig.bili_ck:
|
|
463
|
-
logger.warning("未配置
|
|
463
|
+
logger.warning("未配置 parser_bili_ck, 无法使用哔哩哔哩 AI 总结, 可能无法解析 720p 以上画质视频")
|
|
464
464
|
return None
|
|
465
465
|
|
|
466
466
|
credential = Credential.from_cookies(ck2dict(pconfig.bili_ck))
|
|
467
467
|
if not await credential.check_valid() and self._cookies_file.exists():
|
|
468
|
-
logger.info(f"
|
|
468
|
+
logger.info(f"parser_bili_ck 已过期, 尝试从 {self._cookies_file} 加载")
|
|
469
469
|
credential = Credential.from_cookies(json.loads(self._cookies_file.read_text()))
|
|
470
470
|
else:
|
|
471
|
-
logger.info(f"
|
|
471
|
+
logger.info(f"parser_bili_ck 有效, 保存到 {self._cookies_file}")
|
|
472
472
|
self._cookies_file.write_text(json.dumps(credential.get_cookies()))
|
|
473
473
|
|
|
474
474
|
return credential
|
|
@@ -483,7 +483,7 @@ class BilibiliParser(BaseParser):
|
|
|
483
483
|
return None
|
|
484
484
|
|
|
485
485
|
if not await self._credential.check_valid():
|
|
486
|
-
logger.warning("哔哩哔哩 cookies 已过期, 请重新配置
|
|
486
|
+
logger.warning("哔哩哔哩 cookies 已过期, 请重新配置 parser_bili_ck")
|
|
487
487
|
return self._credential
|
|
488
488
|
|
|
489
489
|
if await self._credential.check_refresh():
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, ClassVar
|
|
4
|
+
from typing_extensions import override
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import msgspec
|
|
9
|
+
from msgspec import Struct, field
|
|
10
|
+
from nonebot import logger
|
|
11
|
+
|
|
12
|
+
from ..exception import ParseException
|
|
13
|
+
from .base import BaseParser, Platform
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class XiaoHongShuParser(BaseParser):
|
|
17
|
+
# 平台信息
|
|
18
|
+
platform: ClassVar[Platform] = Platform(name="xiaohongshu", display_name="小红书")
|
|
19
|
+
|
|
20
|
+
# URL 正则表达式模式(keyword, pattern)
|
|
21
|
+
patterns: ClassVar[list[tuple[str, str]]] = [
|
|
22
|
+
("xiaohongshu.com", r"https?://(?:www\.)?xiaohongshu\.com/[A-Za-z0-9._?%&+=/#@-]*"),
|
|
23
|
+
("xhslink.com", r"https?://xhslink\.com/[A-Za-z0-9._?%&+=/#@-]*"),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
super().__init__()
|
|
28
|
+
explore_headers = {
|
|
29
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,"
|
|
30
|
+
"image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
31
|
+
}
|
|
32
|
+
self.headers.update(explore_headers)
|
|
33
|
+
discovery_headers = {
|
|
34
|
+
"origin": "https://www.xiaohongshu.com",
|
|
35
|
+
"x-requested-with": "XMLHttpRequest",
|
|
36
|
+
"sec-fetch-site": "same-origin",
|
|
37
|
+
"sec-fetch-mode": "cors",
|
|
38
|
+
"sec-fetch-dest": "empty",
|
|
39
|
+
}
|
|
40
|
+
self.ios_headers.update(discovery_headers)
|
|
41
|
+
|
|
42
|
+
@override
|
|
43
|
+
async def parse(self, matched: re.Match[str]):
|
|
44
|
+
"""解析 URL 获取内容信息并下载资源
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
matched: 正则表达式匹配对象,由平台对应的模式匹配得到
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
ParseResult: 解析结果
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
ParseException: 解析失败时抛出
|
|
54
|
+
"""
|
|
55
|
+
# 从匹配对象中获取原始URL
|
|
56
|
+
url = matched.group(0)
|
|
57
|
+
# 处理 xhslink 短链
|
|
58
|
+
if "xhslink" in url:
|
|
59
|
+
url = await self.get_redirect_url(url, self.ios_headers)
|
|
60
|
+
logger.debug(f"xhslink redirect url: {url}")
|
|
61
|
+
|
|
62
|
+
urlpath = urlparse(url).path
|
|
63
|
+
|
|
64
|
+
if urlpath.startswith("/explore/"):
|
|
65
|
+
xhs_id = urlpath.split("/")[-1]
|
|
66
|
+
return await self._parse_explore(url, xhs_id)
|
|
67
|
+
elif urlpath.startswith("/discovery/item/"):
|
|
68
|
+
return await self._parse_discovery(url)
|
|
69
|
+
else:
|
|
70
|
+
raise ParseException(f"不支持的小红书链接: {url}, urlpath: {urlpath}")
|
|
71
|
+
|
|
72
|
+
async def _parse_explore(self, url: str, xhs_id: str):
|
|
73
|
+
async with httpx.AsyncClient(
|
|
74
|
+
headers=self.headers,
|
|
75
|
+
timeout=self.timeout,
|
|
76
|
+
) as client:
|
|
77
|
+
response = await client.get(url)
|
|
78
|
+
html = response.text
|
|
79
|
+
logger.info(f"url: {response.url} | status_code: {response.status_code}")
|
|
80
|
+
|
|
81
|
+
json_obj = self._extract_initial_state_json(html)
|
|
82
|
+
|
|
83
|
+
# ["note"]["noteDetailMap"][xhs_id]["note"]
|
|
84
|
+
note_data = json_obj.get("note", {}).get("noteDetailMap", {}).get(xhs_id, {}).get("note", {})
|
|
85
|
+
if not note_data:
|
|
86
|
+
raise ParseException("can't find note detail in json_obj")
|
|
87
|
+
|
|
88
|
+
class Image(Struct):
|
|
89
|
+
urlDefault: str
|
|
90
|
+
|
|
91
|
+
class User(Struct):
|
|
92
|
+
nickname: str
|
|
93
|
+
avatar: str
|
|
94
|
+
|
|
95
|
+
class NoteDetail(Struct):
|
|
96
|
+
type: str
|
|
97
|
+
title: str
|
|
98
|
+
desc: str
|
|
99
|
+
user: User
|
|
100
|
+
imageList: list[Image] = field(default_factory=list)
|
|
101
|
+
video: Video | None = None
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def nickname(self) -> str:
|
|
105
|
+
return self.user.nickname
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def avatar_url(self) -> str:
|
|
109
|
+
return self.user.avatar
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def image_urls(self) -> list[str]:
|
|
113
|
+
return [item.urlDefault for item in self.imageList]
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def video_url(self) -> str | None:
|
|
117
|
+
if self.type != "video" or not self.video:
|
|
118
|
+
return None
|
|
119
|
+
return self.video.video_url
|
|
120
|
+
|
|
121
|
+
note_detail = msgspec.convert(note_data, type=NoteDetail)
|
|
122
|
+
|
|
123
|
+
contents = []
|
|
124
|
+
# 添加视频内容
|
|
125
|
+
if video_url := note_detail.video_url:
|
|
126
|
+
# 使用第一张图片作为封面
|
|
127
|
+
cover_url = note_detail.image_urls[0] if note_detail.image_urls else None
|
|
128
|
+
contents.append(self.create_video_content(video_url, cover_url))
|
|
129
|
+
|
|
130
|
+
# 添加图片内容
|
|
131
|
+
elif image_urls := note_detail.image_urls:
|
|
132
|
+
contents.extend(self.create_image_contents(image_urls))
|
|
133
|
+
|
|
134
|
+
# 构建作者
|
|
135
|
+
author = self.create_author(note_detail.nickname, note_detail.avatar_url)
|
|
136
|
+
|
|
137
|
+
return self.result(
|
|
138
|
+
title=note_detail.title,
|
|
139
|
+
text=note_detail.desc,
|
|
140
|
+
author=author,
|
|
141
|
+
contents=contents,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
async def _parse_discovery(self, url: str):
|
|
145
|
+
async with httpx.AsyncClient(
|
|
146
|
+
headers=self.ios_headers,
|
|
147
|
+
timeout=self.timeout,
|
|
148
|
+
follow_redirects=True,
|
|
149
|
+
cookies=httpx.Cookies(),
|
|
150
|
+
trust_env=False,
|
|
151
|
+
) as client:
|
|
152
|
+
response = await client.get(url)
|
|
153
|
+
html = response.text
|
|
154
|
+
|
|
155
|
+
json_obj = self._extract_initial_state_json(html)
|
|
156
|
+
note_data = json_obj.get("noteData")
|
|
157
|
+
if not note_data:
|
|
158
|
+
raise ParseException("can't find noteData in json_obj")
|
|
159
|
+
preload_data = note_data.get("normalNotePreloadData", {})
|
|
160
|
+
note_data = note_data.get("data", {}).get("noteData", {})
|
|
161
|
+
if not note_data:
|
|
162
|
+
raise ParseException("can't find noteData in noteData.data")
|
|
163
|
+
|
|
164
|
+
class Image(Struct):
|
|
165
|
+
url: str
|
|
166
|
+
urlSizeLarge: str | None = None
|
|
167
|
+
|
|
168
|
+
class User(Struct):
|
|
169
|
+
nickName: str
|
|
170
|
+
avatar: str
|
|
171
|
+
|
|
172
|
+
class NoteData(Struct):
|
|
173
|
+
type: str
|
|
174
|
+
title: str
|
|
175
|
+
desc: str
|
|
176
|
+
user: User
|
|
177
|
+
time: int
|
|
178
|
+
lastUpdateTime: int
|
|
179
|
+
imageList: list[Image] = [] # 有水印
|
|
180
|
+
video: Video | None = None
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def image_urls(self) -> list[str]:
|
|
184
|
+
return [item.url for item in self.imageList]
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def video_url(self) -> str | None:
|
|
188
|
+
if self.type != "video" or not self.video:
|
|
189
|
+
return None
|
|
190
|
+
return self.video.video_url
|
|
191
|
+
|
|
192
|
+
class NormalNotePreloadData(Struct):
|
|
193
|
+
title: str
|
|
194
|
+
desc: str
|
|
195
|
+
imagesList: list[Image] = [] # 无水印, 但只有一只,用于视频封面
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def image_urls(self) -> list[str]:
|
|
199
|
+
return [item.urlSizeLarge or item.url for item in self.imagesList]
|
|
200
|
+
|
|
201
|
+
note_data = msgspec.convert(note_data, type=NoteData)
|
|
202
|
+
|
|
203
|
+
contents = []
|
|
204
|
+
if video_url := note_data.video_url:
|
|
205
|
+
if preload_data:
|
|
206
|
+
preload_data = msgspec.convert(preload_data, type=NormalNotePreloadData)
|
|
207
|
+
img_urls = preload_data.image_urls
|
|
208
|
+
else:
|
|
209
|
+
img_urls = note_data.image_urls
|
|
210
|
+
contents.append(self.create_video_content(video_url, img_urls[0]))
|
|
211
|
+
elif img_urls := note_data.image_urls:
|
|
212
|
+
contents.extend(self.create_image_contents(img_urls))
|
|
213
|
+
|
|
214
|
+
return self.result(
|
|
215
|
+
title=note_data.title,
|
|
216
|
+
author=self.create_author(note_data.user.nickName, note_data.user.avatar),
|
|
217
|
+
contents=contents,
|
|
218
|
+
text=note_data.desc,
|
|
219
|
+
timestamp=note_data.time // 1000,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def _extract_initial_state_json(self, html: str) -> dict[str, Any]:
|
|
223
|
+
pattern = r"window\.__INITIAL_STATE__=(.*?)</script>"
|
|
224
|
+
matched = re.search(pattern, html)
|
|
225
|
+
if not matched:
|
|
226
|
+
raise ParseException("小红书分享链接失效或内容已删除")
|
|
227
|
+
|
|
228
|
+
json_str = matched.group(1).replace("undefined", "null")
|
|
229
|
+
return json.loads(json_str)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class Stream(Struct):
|
|
233
|
+
h264: list[dict[str, Any]] | None = None
|
|
234
|
+
h265: list[dict[str, Any]] | None = None
|
|
235
|
+
av1: list[dict[str, Any]] | None = None
|
|
236
|
+
h266: list[dict[str, Any]] | None = None
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class Media(Struct):
|
|
240
|
+
stream: Stream
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class Video(Struct):
|
|
244
|
+
media: Media
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def video_url(self) -> str | None:
|
|
248
|
+
stream = self.media.stream
|
|
249
|
+
|
|
250
|
+
# h264 有水印,h265 无水印
|
|
251
|
+
if stream.h265:
|
|
252
|
+
return stream.h265[0]["masterUrl"]
|
|
253
|
+
elif stream.h264:
|
|
254
|
+
return stream.h264[0]["masterUrl"]
|
|
255
|
+
elif stream.av1:
|
|
256
|
+
return stream.av1[0]["masterUrl"]
|
|
257
|
+
elif stream.h266:
|
|
258
|
+
return stream.h266[0]["masterUrl"]
|
|
259
|
+
return None
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from functools import lru_cache
|
|
2
3
|
from io import BytesIO
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import ClassVar
|
|
@@ -19,6 +20,53 @@ class FontInfo:
|
|
|
19
20
|
cjk_width: int
|
|
20
21
|
ascii_width: int
|
|
21
22
|
|
|
23
|
+
def __hash__(self) -> int:
|
|
24
|
+
"""实现哈希方法以支持 @lru_cache"""
|
|
25
|
+
return hash((self.line_height, self.cjk_width, self.ascii_width))
|
|
26
|
+
|
|
27
|
+
@lru_cache(maxsize=100)
|
|
28
|
+
def get_char_width(self, char: str) -> int:
|
|
29
|
+
"""获取字符宽度,使用缓存优化"""
|
|
30
|
+
bbox = self.font.getbbox(char)
|
|
31
|
+
width = int(bbox[2] - bbox[0])
|
|
32
|
+
return width
|
|
33
|
+
|
|
34
|
+
def get_char_width_fast(self, char: str) -> int:
|
|
35
|
+
"""快速获取单个字符宽度"""
|
|
36
|
+
if self._is_cjk_char(char):
|
|
37
|
+
return self.cjk_width
|
|
38
|
+
elif self._is_ascii_char(char):
|
|
39
|
+
return self.ascii_width
|
|
40
|
+
else:
|
|
41
|
+
return self.get_char_width(char)
|
|
42
|
+
|
|
43
|
+
def get_text_width(self, text: str) -> int:
|
|
44
|
+
"""计算文本宽度,使用预计算的字符宽度优化性能
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
text: 要计算宽度的文本
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
文本宽度(像素)
|
|
51
|
+
"""
|
|
52
|
+
if not text:
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
total_width = 0
|
|
56
|
+
for char in text:
|
|
57
|
+
total_width += self.get_char_width_fast(char)
|
|
58
|
+
return total_width
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _is_cjk_char(char: str) -> bool:
|
|
62
|
+
"""判断是否为中日韩字符"""
|
|
63
|
+
return "\u4e00" <= char <= "\u9fff"
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _is_ascii_char(char: str) -> bool:
|
|
67
|
+
"""判断是否为ASCII字符"""
|
|
68
|
+
return ord(char) < 128
|
|
69
|
+
|
|
22
70
|
|
|
23
71
|
@dataclass(eq=False, frozen=True, slots=True)
|
|
24
72
|
class FontSet:
|
|
@@ -28,6 +76,7 @@ class FontSet:
|
|
|
28
76
|
title_font: FontInfo
|
|
29
77
|
text_font: FontInfo
|
|
30
78
|
extra_font: FontInfo
|
|
79
|
+
indicator_font: FontInfo
|
|
31
80
|
|
|
32
81
|
|
|
33
82
|
@dataclass(eq=False, frozen=True, slots=True)
|
|
@@ -169,9 +218,9 @@ class CommonRenderer(ImageRenderer):
|
|
|
169
218
|
"""转发缩放比例"""
|
|
170
219
|
|
|
171
220
|
# 字体大小和行高
|
|
172
|
-
FONT_SIZES: ClassVar[dict[str, int]] = {"name": 28, "title": 30, "text": 24, "extra": 24}
|
|
221
|
+
FONT_SIZES: ClassVar[dict[str, int]] = {"name": 28, "title": 30, "text": 24, "extra": 24, "indicator": 60}
|
|
173
222
|
"""字体大小"""
|
|
174
|
-
LINE_HEIGHTS: ClassVar[dict[str, int]] = {"name": 32, "title": 36, "text": 28, "extra": 28}
|
|
223
|
+
LINE_HEIGHTS: ClassVar[dict[str, int]] = {"name": 32, "title": 36, "text": 28, "extra": 28, "indicator": 68}
|
|
175
224
|
"""行高"""
|
|
176
225
|
|
|
177
226
|
RESOURCES_DIR: ClassVar[Path] = Path(__file__).parent / "resources"
|
|
@@ -228,6 +277,7 @@ class CommonRenderer(ImageRenderer):
|
|
|
228
277
|
title_font=font_infos["title"],
|
|
229
278
|
text_font=font_infos["text"],
|
|
230
279
|
extra_font=font_infos["extra"],
|
|
280
|
+
indicator_font=font_infos["indicator"],
|
|
231
281
|
)
|
|
232
282
|
|
|
233
283
|
logger.success(f"加载字体「{self.font_path.name}」成功")
|
|
@@ -859,11 +909,11 @@ class CommonRenderer(ImageRenderer):
|
|
|
859
909
|
if section.alt_text:
|
|
860
910
|
y_pos += self.SECTION_SPACING # 图片和alt文本之间的间距
|
|
861
911
|
# 计算文本居中位置
|
|
862
|
-
|
|
863
|
-
text_width =
|
|
912
|
+
extra_font_info = self.fontset.extra_font
|
|
913
|
+
text_width = extra_font_info.get_text_width(section.alt_text)
|
|
864
914
|
text_x = self.PADDING + (content_width - text_width) // 2
|
|
865
|
-
draw.text((text_x, y_pos), section.alt_text, fill=self.EXTRA_COLOR, font=
|
|
866
|
-
y_pos +=
|
|
915
|
+
draw.text((text_x, y_pos), section.alt_text, fill=self.EXTRA_COLOR, font=extra_font_info.font)
|
|
916
|
+
y_pos += extra_font_info.line_height
|
|
867
917
|
|
|
868
918
|
return y_pos + self.SECTION_SPACING
|
|
869
919
|
|
|
@@ -985,19 +1035,14 @@ class CommonRenderer(ImageRenderer):
|
|
|
985
1035
|
|
|
986
1036
|
# 绘制+N文字
|
|
987
1037
|
text = f"+{count}"
|
|
988
|
-
|
|
989
|
-
font_size = min(img_width, img_height) // 4
|
|
990
|
-
font = ImageFont.truetype(self.font_path, font_size)
|
|
991
|
-
|
|
1038
|
+
font_info = self.fontset.indicator_font
|
|
992
1039
|
# 计算文字位置(居中)
|
|
993
|
-
|
|
994
|
-
text_width = bbox[2] - bbox[0]
|
|
995
|
-
text_height = bbox[3] - bbox[1]
|
|
1040
|
+
text_width = font_info.get_text_width(text)
|
|
996
1041
|
text_x = img_x + (img_width - text_width) // 2
|
|
997
|
-
text_y = img_y + (img_height -
|
|
1042
|
+
text_y = img_y + (img_height - font_info.line_height) // 2
|
|
998
1043
|
|
|
999
|
-
#
|
|
1000
|
-
draw.text((text_x, text_y), text, fill=(255, 255, 255
|
|
1044
|
+
# 绘制50%透明白色文字
|
|
1045
|
+
draw.text((text_x, text_y), text, fill=(255, 255, 255), font=font_info.font)
|
|
1001
1046
|
|
|
1002
1047
|
def _draw_rounded_rectangle(
|
|
1003
1048
|
self, image: Image.Image, bbox: tuple[int, int, int, int], fill_color: tuple[int, int, int], radius: int = 8
|
|
@@ -1056,81 +1101,14 @@ class CommonRenderer(ImageRenderer):
|
|
|
1056
1101
|
lines = []
|
|
1057
1102
|
paragraphs = text.split("\n")
|
|
1058
1103
|
|
|
1059
|
-
# 字符宽度缓存
|
|
1060
|
-
char_width_cache = {}
|
|
1061
|
-
|
|
1062
|
-
def get_char_width(char: str) -> int:
|
|
1063
|
-
"""获取字符宽度,使用缓存优化"""
|
|
1064
|
-
if char in char_width_cache:
|
|
1065
|
-
return char_width_cache[char]
|
|
1066
|
-
|
|
1067
|
-
bbox = font_info.font.getbbox(char)
|
|
1068
|
-
width = int(bbox[2] - bbox[0])
|
|
1069
|
-
char_width_cache[char] = width
|
|
1070
|
-
return width
|
|
1071
|
-
|
|
1072
|
-
def is_cjk_char(char: str) -> bool:
|
|
1073
|
-
"""判断是否为中日韩字符"""
|
|
1074
|
-
return "\u4e00" <= char <= "\u9fff"
|
|
1075
|
-
|
|
1076
|
-
def is_ascii_char(char: str) -> bool:
|
|
1077
|
-
"""判断是否为ASCII字符"""
|
|
1078
|
-
return ord(char) < 128
|
|
1079
|
-
|
|
1080
1104
|
def is_punctuation(char: str) -> bool:
|
|
1081
|
-
"""
|
|
1105
|
+
"""判断是否为不能为行首的标点符号"""
|
|
1082
1106
|
# 中文标点符号
|
|
1083
|
-
chinese_punctuation = "
|
|
1107
|
+
chinese_punctuation = ",。!?;:、)】》〉」』〕〗〙〛…—·"
|
|
1084
1108
|
# 英文标点符号
|
|
1085
|
-
english_punctuation = ",.;:!?
|
|
1086
|
-
# Unicode 标点符号类别
|
|
1087
|
-
import unicodedata
|
|
1088
|
-
|
|
1089
|
-
return (
|
|
1090
|
-
char in chinese_punctuation or char in english_punctuation or unicodedata.category(char).startswith("P")
|
|
1091
|
-
)
|
|
1092
|
-
|
|
1093
|
-
def get_text_width_fast(text: str) -> int:
|
|
1094
|
-
"""快速计算文本宽度"""
|
|
1095
|
-
if not text:
|
|
1096
|
-
return 0
|
|
1097
|
-
|
|
1098
|
-
total_width = 0
|
|
1099
|
-
for char in text:
|
|
1100
|
-
if is_cjk_char(char):
|
|
1101
|
-
total_width += font_info.cjk_width
|
|
1102
|
-
elif is_ascii_char(char):
|
|
1103
|
-
total_width += font_info.ascii_width
|
|
1104
|
-
else:
|
|
1105
|
-
total_width += get_char_width(char)
|
|
1106
|
-
return total_width
|
|
1107
|
-
|
|
1108
|
-
def find_break_point(text: str) -> int:
|
|
1109
|
-
"""找到合适的断点位置,避免标点符号在行首"""
|
|
1110
|
-
if not text:
|
|
1111
|
-
return 0
|
|
1112
|
-
|
|
1113
|
-
# 从后往前找断点
|
|
1114
|
-
for i in range(len(text) - 1, 0, -1):
|
|
1115
|
-
char = text[i]
|
|
1116
|
-
|
|
1117
|
-
# 优先在空格处断行
|
|
1118
|
-
if char == " ":
|
|
1119
|
-
return i
|
|
1120
|
-
|
|
1121
|
-
# 对于中文,可以在任意字符处断行
|
|
1122
|
-
if is_cjk_char(char):
|
|
1123
|
-
return i
|
|
1109
|
+
english_punctuation = ",.;:!?)]}"
|
|
1124
1110
|
|
|
1125
|
-
|
|
1126
|
-
if is_punctuation(char):
|
|
1127
|
-
continue
|
|
1128
|
-
|
|
1129
|
-
# 其他字符可以作为断点
|
|
1130
|
-
return i
|
|
1131
|
-
|
|
1132
|
-
# 如果找不到合适的断点,在中间位置断行
|
|
1133
|
-
return max(1, len(text) // 2)
|
|
1111
|
+
return char in chinese_punctuation or char in english_punctuation
|
|
1134
1112
|
|
|
1135
1113
|
for paragraph in paragraphs:
|
|
1136
1114
|
if not paragraph:
|
|
@@ -1138,51 +1116,41 @@ class CommonRenderer(ImageRenderer):
|
|
|
1138
1116
|
continue
|
|
1139
1117
|
|
|
1140
1118
|
current_line = ""
|
|
1119
|
+
current_line_width = 0
|
|
1141
1120
|
remaining_text = paragraph
|
|
1142
1121
|
|
|
1143
1122
|
while remaining_text:
|
|
1123
|
+
next_char = remaining_text[0]
|
|
1124
|
+
char_width = font_info.get_char_width_fast(next_char)
|
|
1125
|
+
|
|
1144
1126
|
# 如果当前行为空,直接添加字符
|
|
1145
1127
|
if not current_line:
|
|
1146
|
-
current_line =
|
|
1128
|
+
current_line = next_char
|
|
1129
|
+
current_line_width = char_width
|
|
1147
1130
|
remaining_text = remaining_text[1:]
|
|
1148
1131
|
continue
|
|
1149
1132
|
|
|
1150
|
-
#
|
|
1151
|
-
|
|
1152
|
-
|
|
1133
|
+
# 如果是标点符号,直接添加到当前行(标点符号不应该单独成行)
|
|
1134
|
+
if is_punctuation(next_char):
|
|
1135
|
+
current_line += next_char
|
|
1136
|
+
current_line_width += char_width
|
|
1137
|
+
remaining_text = remaining_text[1:]
|
|
1138
|
+
continue
|
|
1139
|
+
|
|
1140
|
+
# 测试添加下一个字符后的宽度
|
|
1141
|
+
test_width = current_line_width + char_width
|
|
1153
1142
|
|
|
1154
1143
|
if test_width <= max_width:
|
|
1155
1144
|
# 宽度合适,继续添加
|
|
1156
|
-
current_line
|
|
1145
|
+
current_line += next_char
|
|
1146
|
+
current_line_width = test_width
|
|
1157
1147
|
remaining_text = remaining_text[1:]
|
|
1158
1148
|
else:
|
|
1159
1149
|
# 宽度超限,需要断行
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
remaining_text = remaining_text[1:]
|
|
1165
|
-
else:
|
|
1166
|
-
# 尝试找到合适的断点
|
|
1167
|
-
break_point = find_break_point(current_line)
|
|
1168
|
-
|
|
1169
|
-
# 保存当前行
|
|
1170
|
-
lines.append(current_line[:break_point].rstrip())
|
|
1171
|
-
|
|
1172
|
-
# 开始新行,跳过行首的标点符号
|
|
1173
|
-
current_line = current_line[break_point:].lstrip()
|
|
1174
|
-
|
|
1175
|
-
# 如果新行以标点符号开头,将其移到上一行
|
|
1176
|
-
while current_line and is_punctuation(current_line[0]):
|
|
1177
|
-
if lines:
|
|
1178
|
-
lines[-1] += current_line[0]
|
|
1179
|
-
current_line = current_line[1:]
|
|
1180
|
-
else:
|
|
1181
|
-
break
|
|
1182
|
-
|
|
1183
|
-
if not current_line:
|
|
1184
|
-
current_line = remaining_text[0]
|
|
1185
|
-
remaining_text = remaining_text[1:]
|
|
1150
|
+
lines.append(current_line)
|
|
1151
|
+
current_line = next_char
|
|
1152
|
+
current_line_width = char_width
|
|
1153
|
+
remaining_text = remaining_text[1:]
|
|
1186
1154
|
|
|
1187
1155
|
# 保存最后一行
|
|
1188
1156
|
if current_line:
|
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import re
|
|
3
|
-
from typing import ClassVar
|
|
4
|
-
from typing_extensions import override
|
|
5
|
-
from urllib.parse import parse_qs, urlparse
|
|
6
|
-
|
|
7
|
-
import httpx
|
|
8
|
-
import msgspec
|
|
9
|
-
|
|
10
|
-
from ..exception import ParseException
|
|
11
|
-
from .base import BaseParser, Platform
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class XiaoHongShuParser(BaseParser):
|
|
15
|
-
# 平台信息
|
|
16
|
-
platform: ClassVar[Platform] = Platform(name="xiaohongshu", display_name="小红书")
|
|
17
|
-
|
|
18
|
-
# URL 正则表达式模式(keyword, pattern)
|
|
19
|
-
patterns: ClassVar[list[tuple[str, str]]] = [
|
|
20
|
-
("xiaohongshu.com", r"https?://(?:www\.)?xiaohongshu\.com/[A-Za-z0-9._?%&+=/#@-]*"),
|
|
21
|
-
("xhslink.com", r"https?://xhslink\.com/[A-Za-z0-9._?%&+=/#@-]*"),
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
def __init__(self):
|
|
25
|
-
super().__init__()
|
|
26
|
-
extra_headers = {
|
|
27
|
-
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
|
|
28
|
-
"application/signed-exchange;v=b3;q=0.9",
|
|
29
|
-
}
|
|
30
|
-
self.headers.update(extra_headers)
|
|
31
|
-
|
|
32
|
-
@override
|
|
33
|
-
async def parse(self, matched: re.Match[str]):
|
|
34
|
-
"""解析 URL 获取内容信息并下载资源
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
matched: 正则表达式匹配对象,由平台对应的模式匹配得到
|
|
38
|
-
|
|
39
|
-
Returns:
|
|
40
|
-
ParseResult: 解析结果
|
|
41
|
-
|
|
42
|
-
Raises:
|
|
43
|
-
ParseException: 解析失败时抛出
|
|
44
|
-
"""
|
|
45
|
-
# 从匹配对象中获取原始URL
|
|
46
|
-
url = matched.group(0)
|
|
47
|
-
# 处理 xhslink 短链
|
|
48
|
-
if "xhslink" in url:
|
|
49
|
-
url = await self.get_redirect_url(url, self.headers)
|
|
50
|
-
# ?: 非捕获组
|
|
51
|
-
pattern = r"(?:/explore/|/discovery/item/|source=note¬eId=)(\w+)"
|
|
52
|
-
match_result = re.search(pattern, url)
|
|
53
|
-
if not match_result:
|
|
54
|
-
raise ParseException("小红书分享链接不完整")
|
|
55
|
-
xhs_id = match_result.group(1)
|
|
56
|
-
# 解析 URL 参数
|
|
57
|
-
parsed_url = urlparse(url)
|
|
58
|
-
params = parse_qs(parsed_url.query)
|
|
59
|
-
# 提取 xsec_source 和 xsec_token
|
|
60
|
-
xsec_source = params.get("xsec_source", [None])[0] or "pc_feed"
|
|
61
|
-
xsec_token = params.get("xsec_token", [None])[0]
|
|
62
|
-
|
|
63
|
-
# 构造完整 URL
|
|
64
|
-
url = f"https://www.xiaohongshu.com/explore/{xhs_id}?xsec_source={xsec_source}&xsec_token={xsec_token}"
|
|
65
|
-
async with httpx.AsyncClient(headers=self.headers, timeout=self.timeout) as client:
|
|
66
|
-
response = await client.get(url)
|
|
67
|
-
html = response.text
|
|
68
|
-
|
|
69
|
-
pattern = r"window.__INITIAL_STATE__=(.*?)</script>"
|
|
70
|
-
match_result = re.search(pattern, html)
|
|
71
|
-
if not match_result:
|
|
72
|
-
raise ParseException("小红书分享链接失效或内容已删除")
|
|
73
|
-
|
|
74
|
-
json_str = match_result.group(1)
|
|
75
|
-
json_str = json_str.replace("undefined", "null")
|
|
76
|
-
|
|
77
|
-
json_obj = json.loads(json_str)
|
|
78
|
-
|
|
79
|
-
note_data = json_obj["note"]["noteDetailMap"][xhs_id]["note"]
|
|
80
|
-
note_detail = msgspec.convert(note_data, type=NoteDetail)
|
|
81
|
-
|
|
82
|
-
# 使用新的简洁构建方式
|
|
83
|
-
contents = []
|
|
84
|
-
|
|
85
|
-
# 添加视频内容
|
|
86
|
-
if video_url := note_detail.video_url:
|
|
87
|
-
# 使用第一张图片作为封面
|
|
88
|
-
cover_url = note_detail.image_urls[0] if note_detail.image_urls else None
|
|
89
|
-
contents.append(self.create_video_content(video_url, cover_url))
|
|
90
|
-
|
|
91
|
-
# 添加图片内容
|
|
92
|
-
elif image_urls := note_detail.image_urls:
|
|
93
|
-
contents.extend(self.create_image_contents(image_urls))
|
|
94
|
-
|
|
95
|
-
# 构建作者
|
|
96
|
-
author = self.create_author(note_detail.nickname, note_detail.avatar_url)
|
|
97
|
-
|
|
98
|
-
return self.result(
|
|
99
|
-
title=note_detail.title_desc,
|
|
100
|
-
author=author,
|
|
101
|
-
contents=contents,
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
from msgspec import Struct, field
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
class Image(Struct):
|
|
109
|
-
urlDefault: str
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
class Stream(Struct):
|
|
113
|
-
h264: list[dict] | None = None
|
|
114
|
-
h265: list[dict] | None = None
|
|
115
|
-
av1: list[dict] | None = None
|
|
116
|
-
h266: list[dict] | None = None
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class Media(Struct):
|
|
120
|
-
stream: Stream
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
class Video(Struct):
|
|
124
|
-
media: Media
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
class User(Struct):
|
|
128
|
-
nickname: str
|
|
129
|
-
avatar: str
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
class NoteDetail(Struct):
|
|
133
|
-
type: str
|
|
134
|
-
title: str
|
|
135
|
-
desc: str
|
|
136
|
-
user: User
|
|
137
|
-
imageList: list[Image] = field(default_factory=list)
|
|
138
|
-
video: Video | None = None
|
|
139
|
-
|
|
140
|
-
@property
|
|
141
|
-
def nickname(self) -> str:
|
|
142
|
-
return self.user.nickname
|
|
143
|
-
|
|
144
|
-
@property
|
|
145
|
-
def avatar_url(self) -> str:
|
|
146
|
-
return self.user.avatar
|
|
147
|
-
|
|
148
|
-
@property
|
|
149
|
-
def title_desc(self) -> str:
|
|
150
|
-
return f"{self.title}\n{self.desc}".strip()
|
|
151
|
-
|
|
152
|
-
@property
|
|
153
|
-
def image_urls(self) -> list[str]:
|
|
154
|
-
return [item.urlDefault for item in self.imageList]
|
|
155
|
-
|
|
156
|
-
@property
|
|
157
|
-
def video_url(self) -> str | None:
|
|
158
|
-
if self.type != "video" or not self.video:
|
|
159
|
-
return None
|
|
160
|
-
stream = self.video.media.stream
|
|
161
|
-
|
|
162
|
-
if stream.h264:
|
|
163
|
-
return stream.h264[0]["masterUrl"]
|
|
164
|
-
elif stream.h265:
|
|
165
|
-
return stream.h265[0]["masterUrl"]
|
|
166
|
-
elif stream.av1:
|
|
167
|
-
return stream.av1[0]["masterUrl"]
|
|
168
|
-
elif stream.h266:
|
|
169
|
-
return stream.h266[0]["masterUrl"]
|
|
170
|
-
return None
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/constants.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/exception.py
RENAMED
|
File without changes
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nonebot_plugin_parser-2.0.9 → nonebot_plugin_parser-2.0.11}/src/nonebot_plugin_parser/utils.py
RENAMED
|
File without changes
|