nonebot-plugin-parser 2.3.5__tar.gz → 2.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/PKG-INFO +6 -3
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/README.md +3 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/pyproject.toml +4 -4
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/config.py +7 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/matchers/rule.py +40 -31
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/acfun/__init__.py +151 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/acfun/video.py +77 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/base.py +1 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +5 -47
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/dynamic.py +1 -1
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/data.py +2 -2
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +6 -7
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/slides.py +4 -1
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/video.py +4 -1
- nonebot_plugin_parser-2.3.5/src/nonebot_plugin_parser/parsers/kuaishou.py → nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/kuaishou/__init__.py +8 -71
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/kuaishou/states.py +62 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/tiktok.py +2 -2
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/twitter.py +3 -2
- nonebot_plugin_parser-2.3.5/src/nonebot_plugin_parser/parsers/weibo.py → nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/weibo/__init__.py +30 -181
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/weibo/article.py +23 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/weibo/common.py +110 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/weibo/show.py +61 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py +144 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/xiaohongshu/common.py +33 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/xiaohongshu/discovery.py +61 -0
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/xiaohongshu/explore.py +61 -0
- nonebot_plugin_parser-2.3.5/src/nonebot_plugin_parser/parsers/youtube.py → nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/youtube/__init__.py +12 -57
- nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/youtube/meta.py +43 -0
- nonebot_plugin_parser-2.3.5/src/nonebot_plugin_parser/parsers/acfun.py +0 -151
- nonebot_plugin_parser-2.3.5/src/nonebot_plugin_parser/parsers/xiaohongshu.py +0 -252
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/__init__.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/constants.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/download/__init__.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/download/task.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/download/ytdlp.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/exception.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/helper.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/matchers/__init__.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/article.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/common.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/favlist.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/live.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/base.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/common.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/default.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/htmlrender.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/HYSongYunLangHeiW-1.ttf +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/bilibili.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/douyin.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/kuaishou.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/media_button.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/tiktok.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/twitter.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/weibo.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/xiaohongshu.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/resources/youtube.png +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/templates/card.html.jinja +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
- {nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: nonebot-plugin-parser
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.7
|
|
4
4
|
Summary: NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga
|
|
5
5
|
Keywords: acfun,bilibili,douyin,kuaishou,nga,nonebot,nonebot2,tiktok,twitter,video,weibo,xiaohongshu,youtube
|
|
6
6
|
Author: fllesser
|
|
@@ -23,12 +23,12 @@ Requires-Dist: nonebot2>=2.4.3,<3.0.0
|
|
|
23
23
|
Requires-Dist: pillow>=11.0.0
|
|
24
24
|
Requires-Dist: tqdm>=4.67.1,<5.0.0
|
|
25
25
|
Requires-Dist: aiofiles>=25.1.0
|
|
26
|
-
Requires-Dist: curl-cffi>=0.13.0,<1.0.0
|
|
27
26
|
Requires-Dist: httpx>=0.27.2,<1.0.0
|
|
28
27
|
Requires-Dist: msgspec>=0.20.0,<1.0.0
|
|
29
28
|
Requires-Dist: apilmoji[tqdm]>=0.2.4,<1.0.0
|
|
30
29
|
Requires-Dist: beautifulsoup4>=4.12.0,<5.0.0
|
|
31
|
-
Requires-Dist:
|
|
30
|
+
Requires-Dist: curl-cffi>=0.13.0,!=0.14.0,<1.0.0
|
|
31
|
+
Requires-Dist: bilibili-api-python>=17.4.1,<18.0.0
|
|
32
32
|
Requires-Dist: nonebot-plugin-alconna>=0.59.4,<1.0.0
|
|
33
33
|
Requires-Dist: nonebot-plugin-apscheduler>=0.5.0,<1.0.0
|
|
34
34
|
Requires-Dist: nonebot-plugin-localstore>=0.7.4,<1.0.0
|
|
@@ -263,6 +263,9 @@ parser_bili_video_codes='["avc", "av01", "hev"]'
|
|
|
263
263
|
# 360p(16), 480p(32), 720p(64), 1080p(80), 1080p+(112), 1080p_60(116), 4k(120)
|
|
264
264
|
parser_bili_video_quality=80
|
|
265
265
|
|
|
266
|
+
# [可选] 小红书 Cookie, 部分链接解析有水印,可填
|
|
267
|
+
parser_xhs_ck=""
|
|
268
|
+
|
|
266
269
|
# [可选] Youtube Cookie, Youtube 视频因人机检测下载失败,需填
|
|
267
270
|
parser_ytb_ck=""
|
|
268
271
|
|
|
@@ -209,6 +209,9 @@ parser_bili_video_codes='["avc", "av01", "hev"]'
|
|
|
209
209
|
# 360p(16), 480p(32), 720p(64), 1080p(80), 1080p+(112), 1080p_60(116), 4k(120)
|
|
210
210
|
parser_bili_video_quality=80
|
|
211
211
|
|
|
212
|
+
# [可选] 小红书 Cookie, 部分链接解析有水印,可填
|
|
213
|
+
parser_xhs_ck=""
|
|
214
|
+
|
|
212
215
|
# [可选] Youtube Cookie, Youtube 视频因人机检测下载失败,需填
|
|
213
216
|
parser_ytb_ck=""
|
|
214
217
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nonebot-plugin-parser"
|
|
3
|
-
version = "2.3.
|
|
3
|
+
version = "2.3.7"
|
|
4
4
|
description = "NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -43,12 +43,12 @@ dependencies = [
|
|
|
43
43
|
"pillow>=11.0.0",
|
|
44
44
|
"tqdm>=4.67.1,<5.0.0",
|
|
45
45
|
"aiofiles>=25.1.0",
|
|
46
|
-
"curl_cffi>=0.13.0,<1.0.0",
|
|
47
46
|
"httpx>=0.27.2,<1.0.0",
|
|
48
47
|
"msgspec>=0.20.0,<1.0.0",
|
|
49
48
|
"apilmoji[tqdm]>=0.2.4,<1.0.0",
|
|
50
49
|
"beautifulsoup4>=4.12.0,<5.0.0",
|
|
51
|
-
"
|
|
50
|
+
"curl_cffi>=0.13.0,<1.0.0,!=0.14.0",
|
|
51
|
+
"bilibili-api-python>=17.4.1,<18.0.0",
|
|
52
52
|
"nonebot-plugin-alconna>=0.59.4,<1.0.0",
|
|
53
53
|
"nonebot-plugin-apscheduler>=0.5.0,<1.0.0",
|
|
54
54
|
"nonebot-plugin-localstore>=0.7.4,<1.0.0",
|
|
@@ -118,7 +118,7 @@ nonebug = { git = "https://github.com/nonebot/nonebug" }
|
|
|
118
118
|
[tool.bumpversion]
|
|
119
119
|
tag = true
|
|
120
120
|
commit = true
|
|
121
|
-
current_version = "2.3.
|
|
121
|
+
current_version = "2.3.7"
|
|
122
122
|
message = "release: bump vesion from {current_version} to {new_version}"
|
|
123
123
|
|
|
124
124
|
[[tool.bumpversion.files]]
|
{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/config.py
RENAMED
|
@@ -20,6 +20,8 @@ class Config(BaseModel):
|
|
|
20
20
|
"""bilibili cookies"""
|
|
21
21
|
parser_ytb_ck: str | None = None
|
|
22
22
|
"""youtube cookies"""
|
|
23
|
+
parser_xhs_ck: str | None = None
|
|
24
|
+
"""小红书 cookies"""
|
|
23
25
|
parser_proxy: str | None = None
|
|
24
26
|
"""代理"""
|
|
25
27
|
parser_need_upload: bool = False
|
|
@@ -113,6 +115,11 @@ class Config(BaseModel):
|
|
|
113
115
|
"""youtube cookies"""
|
|
114
116
|
return self.parser_ytb_ck
|
|
115
117
|
|
|
118
|
+
@property
|
|
119
|
+
def xhs_ck(self) -> str | None:
|
|
120
|
+
"""小红书 cookies"""
|
|
121
|
+
return self.parser_xhs_ck
|
|
122
|
+
|
|
116
123
|
@property
|
|
117
124
|
def proxy(self) -> str | None:
|
|
118
125
|
"""代理"""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Literal
|
|
3
3
|
|
|
4
4
|
import msgspec
|
|
5
5
|
from nonebot import logger
|
|
@@ -19,6 +19,32 @@ from ..config import gconfig
|
|
|
19
19
|
PSR_SEARCHED_KEY: Literal["psr-searched"] = "psr-searched"
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
# 定义 JSON 卡片的数据结构
|
|
23
|
+
class MetaDetail(msgspec.Struct):
|
|
24
|
+
qqdocurl: str | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MetaNews(msgspec.Struct):
|
|
28
|
+
jumpUrl: str | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MetaMusic(msgspec.Struct):
|
|
32
|
+
jumpUrl: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Meta(msgspec.Struct):
|
|
36
|
+
detail_1: MetaDetail | None = None
|
|
37
|
+
news: MetaNews | None = None
|
|
38
|
+
music: MetaMusic | None = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RawData(msgspec.Struct):
|
|
42
|
+
meta: Meta | None = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
raw_decoder = msgspec.json.Decoder(RawData)
|
|
46
|
+
|
|
47
|
+
|
|
22
48
|
class SearchResult:
|
|
23
49
|
"""匹配结果"""
|
|
24
50
|
|
|
@@ -45,24 +71,6 @@ def _searched(state: T_State) -> SearchResult | None:
|
|
|
45
71
|
return state.get(PSR_SEARCHED_KEY)
|
|
46
72
|
|
|
47
73
|
|
|
48
|
-
def _escape_raw(raw: str) -> str:
|
|
49
|
-
"""
|
|
50
|
-
转义原始字符串中的特殊字符
|
|
51
|
-
Args:
|
|
52
|
-
raw: 原始字符串
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
str: 转义后的字符串
|
|
56
|
-
"""
|
|
57
|
-
replacements = [
|
|
58
|
-
("\\", ""),
|
|
59
|
-
("&", "&"),
|
|
60
|
-
]
|
|
61
|
-
for old, new in replacements:
|
|
62
|
-
raw = raw.replace(old, new)
|
|
63
|
-
return raw
|
|
64
|
-
|
|
65
|
-
|
|
66
74
|
def _extract_url(hyper: Hyper) -> str | None:
|
|
67
75
|
"""处理 JSON 类型的消息段,提取 URL
|
|
68
76
|
|
|
@@ -79,24 +87,25 @@ def _extract_url(hyper: Hyper) -> str | None:
|
|
|
79
87
|
return None
|
|
80
88
|
|
|
81
89
|
try:
|
|
82
|
-
raw
|
|
90
|
+
raw = raw_decoder.decode(raw_str)
|
|
83
91
|
except msgspec.DecodeError:
|
|
84
92
|
logger.exception(f"json 卡片解析失败: {raw_str}")
|
|
85
93
|
return None
|
|
86
94
|
|
|
87
|
-
|
|
88
|
-
if not meta:
|
|
95
|
+
if not raw.meta:
|
|
89
96
|
return None
|
|
90
97
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
meta, url = raw.meta, None
|
|
99
|
+
|
|
100
|
+
if meta.detail_1:
|
|
101
|
+
url = meta.detail_1.qqdocurl
|
|
102
|
+
elif meta.news:
|
|
103
|
+
url = meta.news.jumpUrl
|
|
104
|
+
elif meta.music:
|
|
105
|
+
url = meta.music.jumpUrl
|
|
106
|
+
|
|
107
|
+
logger.debug(f"extract url[{url}] from raw#meta[{meta}]")
|
|
108
|
+
return url
|
|
100
109
|
|
|
101
110
|
|
|
102
111
|
def _extract_text(message: UniMsg) -> str | None:
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import asyncio
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from urllib.parse import urljoin
|
|
6
|
+
|
|
7
|
+
import aiofiles
|
|
8
|
+
from httpx import HTTPError, AsyncClient
|
|
9
|
+
from nonebot import logger
|
|
10
|
+
|
|
11
|
+
from ..base import (
|
|
12
|
+
DOWNLOADER,
|
|
13
|
+
COMMON_TIMEOUT,
|
|
14
|
+
DOWNLOAD_TIMEOUT,
|
|
15
|
+
Platform,
|
|
16
|
+
BaseParser,
|
|
17
|
+
PlatformEnum,
|
|
18
|
+
ParseException,
|
|
19
|
+
DownloadException,
|
|
20
|
+
DurationLimitException,
|
|
21
|
+
handle,
|
|
22
|
+
pconfig,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AcfunParser(BaseParser):
|
|
27
|
+
# 平台信息
|
|
28
|
+
platform: ClassVar[Platform] = Platform(name=PlatformEnum.ACFUN, display_name="猴山")
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
super().__init__()
|
|
32
|
+
self.headers["referer"] = "https://www.acfun.cn/"
|
|
33
|
+
|
|
34
|
+
@handle("acfun.cn", r"(?:ac=|/ac)(?P<acid>\d+)")
|
|
35
|
+
async def _parse(self, searched: re.Match[str]):
|
|
36
|
+
acid = int(searched.group("acid"))
|
|
37
|
+
url = f"https://www.acfun.cn/v/ac{acid}"
|
|
38
|
+
|
|
39
|
+
video_info = await self.parse_video_info(url)
|
|
40
|
+
author = self.create_author(video_info.name, video_info.avatar_url)
|
|
41
|
+
|
|
42
|
+
video_task = asyncio.create_task(
|
|
43
|
+
self.download_video(
|
|
44
|
+
video_info.m3u8_url,
|
|
45
|
+
f"acfun_{acid}.mp4",
|
|
46
|
+
video_info.duration,
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
video_content = self.create_video_content(video_task, cover_url=video_info.coverUrl)
|
|
51
|
+
|
|
52
|
+
return self.result(
|
|
53
|
+
title=video_info.title,
|
|
54
|
+
text=video_info.text,
|
|
55
|
+
author=author,
|
|
56
|
+
timestamp=video_info.timestamp,
|
|
57
|
+
contents=[video_content],
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def parse_video_info(self, url: str):
|
|
61
|
+
"""解析acfun链接获取详细信息
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
url (str): 链接
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
video.VideoInfo
|
|
68
|
+
"""
|
|
69
|
+
from . import video
|
|
70
|
+
|
|
71
|
+
# 拼接查询参数
|
|
72
|
+
url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
|
|
73
|
+
|
|
74
|
+
async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
|
|
75
|
+
response = await client.get(url)
|
|
76
|
+
response.raise_for_status()
|
|
77
|
+
raw = response.text
|
|
78
|
+
|
|
79
|
+
matched = re.search(r"window\.videoInfo =(.*?)</script>", raw)
|
|
80
|
+
if not matched:
|
|
81
|
+
raise ParseException("解析 acfun 视频信息失败")
|
|
82
|
+
|
|
83
|
+
raw = str(matched.group(1))
|
|
84
|
+
raw = re.sub(r'\\{1,4}"', '"', raw)
|
|
85
|
+
raw = raw.replace('"{', "{").replace('}"', "}")
|
|
86
|
+
return video.decoder.decode(raw)
|
|
87
|
+
|
|
88
|
+
async def download_video(self, m3u8_url: str, file_name: str, duration: int) -> Path:
|
|
89
|
+
"""下载acfun视频
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
m3u8_url (str): m3u8链接
|
|
93
|
+
file_name (str): 文件名
|
|
94
|
+
duration (int): 视频时长(秒)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Path: 下载的mp4文件
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
if duration >= pconfig.duration_maximum:
|
|
101
|
+
raise DurationLimitException
|
|
102
|
+
|
|
103
|
+
video_file = pconfig.cache_dir / file_name
|
|
104
|
+
if video_file.exists():
|
|
105
|
+
return video_file
|
|
106
|
+
|
|
107
|
+
m3u8_slices = await self._get_m3u8_slices(m3u8_url)
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
async with (
|
|
111
|
+
aiofiles.open(video_file, "wb") as f,
|
|
112
|
+
AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
|
|
113
|
+
):
|
|
114
|
+
total_size = 0
|
|
115
|
+
with DOWNLOADER.get_progress_bar(file_name) as bar:
|
|
116
|
+
for url in m3u8_slices:
|
|
117
|
+
async with client.stream("GET", url) as response:
|
|
118
|
+
async for chunk in response.aiter_bytes(chunk_size=1024 * 1024):
|
|
119
|
+
await f.write(chunk)
|
|
120
|
+
total_size += len(chunk)
|
|
121
|
+
bar.update(len(chunk))
|
|
122
|
+
except HTTPError:
|
|
123
|
+
video_file.unlink(missing_ok=True)
|
|
124
|
+
logger.exception("视频下载失败")
|
|
125
|
+
raise DownloadException("视频下载失败")
|
|
126
|
+
return video_file
|
|
127
|
+
|
|
128
|
+
async def _get_m3u8_slices(self, m3u8_url: str):
|
|
129
|
+
"""拼接m3u8链接
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
m3u8_url (str): m3u8链接
|
|
133
|
+
m3u8_slice (str): m3u8切片
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
list[str]: 视频链接
|
|
137
|
+
"""
|
|
138
|
+
async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
|
|
139
|
+
response = await client.get(m3u8_url)
|
|
140
|
+
response.raise_for_status()
|
|
141
|
+
|
|
142
|
+
slices_text = response.text
|
|
143
|
+
|
|
144
|
+
slices: list[str] = []
|
|
145
|
+
for line in slices_text.splitlines():
|
|
146
|
+
line = line.strip()
|
|
147
|
+
if not line or line.startswith("#"):
|
|
148
|
+
continue
|
|
149
|
+
slices.append(urljoin(m3u8_url, line))
|
|
150
|
+
|
|
151
|
+
return slices
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from msgspec import Struct
|
|
2
|
+
from msgspec.json import Decoder
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class User(Struct):
|
|
6
|
+
name: str
|
|
7
|
+
headUrl: str
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Representation(Struct):
|
|
11
|
+
url: str
|
|
12
|
+
m3u8Slice: str
|
|
13
|
+
qualityType: str
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def m3u8_slice(self) -> str:
|
|
17
|
+
return self.m3u8Slice.replace("\\\\n", "\n")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AdaptationSet(Struct):
|
|
21
|
+
representation: list[Representation]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class KsPlay(Struct):
|
|
25
|
+
adaptationSet: list[AdaptationSet]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CurrentVideoInfo(Struct):
|
|
29
|
+
ksPlayJson: KsPlay
|
|
30
|
+
durationMillis: int
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def representations(self) -> list[Representation]:
|
|
34
|
+
return self.ksPlayJson.adaptationSet[0].representation
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VideoInfo(Struct, kw_only=True):
|
|
38
|
+
title: str
|
|
39
|
+
description: str | None
|
|
40
|
+
createTimeMillis: int
|
|
41
|
+
user: User
|
|
42
|
+
currentVideoInfo: CurrentVideoInfo
|
|
43
|
+
coverUrl: str
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def name(self) -> str:
|
|
47
|
+
return self.user.name
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def avatar_url(self) -> str:
|
|
51
|
+
return self.user.headUrl
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def text(self) -> str | None:
|
|
55
|
+
return f"简介: {self.description}" if self.description else None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def timestamp(self) -> int:
|
|
59
|
+
return self.createTimeMillis // 1000
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def duration(self) -> int:
|
|
63
|
+
return self.currentVideoInfo.durationMillis // 1000
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def m3u8_url(self) -> str:
|
|
67
|
+
representations = self.currentVideoInfo.representations
|
|
68
|
+
|
|
69
|
+
quality_types = ("1080p", "720p", "480p", "360p")
|
|
70
|
+
for r in representations:
|
|
71
|
+
if r.qualityType in quality_types:
|
|
72
|
+
return r.url
|
|
73
|
+
|
|
74
|
+
return representations[0].url
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
decoder = Decoder(VideoInfo)
|
|
@@ -12,6 +12,7 @@ from .data import Platform, ParseResult, ParseResultKwargs
|
|
|
12
12
|
from ..config import pconfig as pconfig
|
|
13
13
|
from ..download import DOWNLOADER as DOWNLOADER
|
|
14
14
|
from ..constants import IOS_HEADER, COMMON_HEADER, ANDROID_HEADER, COMMON_TIMEOUT
|
|
15
|
+
from ..constants import DOWNLOAD_TIMEOUT as DOWNLOAD_TIMEOUT
|
|
15
16
|
from ..constants import PlatformEnum as PlatformEnum
|
|
16
17
|
from ..exception import TipException as TipException
|
|
17
18
|
from ..exception import ParseException as ParseException
|
|
@@ -88,7 +88,7 @@ class BilibiliParser(BaseParser):
|
|
|
88
88
|
async def _parse_read(self, searched: Match[str]):
|
|
89
89
|
"""解析专栏信息"""
|
|
90
90
|
read_id = int(searched.group("read_id"))
|
|
91
|
-
return await self.
|
|
91
|
+
return await self.parse_read_with_opus(read_id)
|
|
92
92
|
|
|
93
93
|
@handle("/opus/", r"bilibili\.com/opus/(?P<opus_id>\d+)")
|
|
94
94
|
async def _parse_opus(self, searched: Match[str]):
|
|
@@ -175,14 +175,11 @@ class BilibiliParser(BaseParser):
|
|
|
175
175
|
"""
|
|
176
176
|
from bilibili_api.dynamic import Dynamic
|
|
177
177
|
|
|
178
|
-
from .dynamic import
|
|
178
|
+
from .dynamic import DynamicData
|
|
179
179
|
|
|
180
180
|
dynamic = Dynamic(dynamic_id, await self.credential)
|
|
181
|
+
dynamic_info = convert(await dynamic.get_info(), DynamicData).item
|
|
181
182
|
|
|
182
|
-
# 转换为结构体
|
|
183
|
-
dynamic_data = convert(await dynamic.get_info(), DynamicItem)
|
|
184
|
-
dynamic_info = dynamic_data.item
|
|
185
|
-
# 使用结构体属性提取信息
|
|
186
183
|
author = self.create_author(dynamic_info.name, dynamic_info.avatar)
|
|
187
184
|
|
|
188
185
|
# 下载图片
|
|
@@ -208,8 +205,8 @@ class BilibiliParser(BaseParser):
|
|
|
208
205
|
opus = Opus(opus_id, await self.credential)
|
|
209
206
|
return await self._parse_opus_obj(opus)
|
|
210
207
|
|
|
211
|
-
async def
|
|
212
|
-
"""解析专栏信息,
|
|
208
|
+
async def parse_read_with_opus(self, read_id: int):
|
|
209
|
+
"""解析专栏信息, 使用 Opus 接口
|
|
213
210
|
|
|
214
211
|
Args:
|
|
215
212
|
read_id (int): 专栏 id
|
|
@@ -297,45 +294,6 @@ class BilibiliParser(BaseParser):
|
|
|
297
294
|
author=author,
|
|
298
295
|
)
|
|
299
296
|
|
|
300
|
-
async def parse_read(self, read_id: int):
|
|
301
|
-
"""专栏解析
|
|
302
|
-
|
|
303
|
-
Args:
|
|
304
|
-
read_id (int): 专栏 id
|
|
305
|
-
|
|
306
|
-
Returns:
|
|
307
|
-
texts: list[str], urls: list[str]
|
|
308
|
-
"""
|
|
309
|
-
from bilibili_api.article import Article
|
|
310
|
-
|
|
311
|
-
from .article import TextNode, ImageNode, ArticleInfo
|
|
312
|
-
|
|
313
|
-
ar = Article(read_id)
|
|
314
|
-
# 加载内容
|
|
315
|
-
await ar.fetch_content()
|
|
316
|
-
data = ar.json()
|
|
317
|
-
article_info = convert(data, ArticleInfo)
|
|
318
|
-
logger.debug(f"article_info: {article_info}")
|
|
319
|
-
|
|
320
|
-
contents: list[MediaContent] = []
|
|
321
|
-
current_text = ""
|
|
322
|
-
for child in article_info.gen_text_img():
|
|
323
|
-
if isinstance(child, ImageNode):
|
|
324
|
-
contents.append(self.create_graphics_content(child.url, current_text.strip(), child.alt))
|
|
325
|
-
current_text = ""
|
|
326
|
-
elif isinstance(child, TextNode):
|
|
327
|
-
current_text += child.text
|
|
328
|
-
|
|
329
|
-
author = self.create_author(*article_info.author_info)
|
|
330
|
-
|
|
331
|
-
return self.result(
|
|
332
|
-
title=article_info.title,
|
|
333
|
-
timestamp=article_info.timestamp,
|
|
334
|
-
text=current_text.strip(),
|
|
335
|
-
author=author,
|
|
336
|
-
contents=contents,
|
|
337
|
-
)
|
|
338
|
-
|
|
339
297
|
async def parse_favlist(self, fav_id: int):
|
|
340
298
|
"""解析收藏夹信息
|
|
341
299
|
|
|
@@ -58,7 +58,7 @@ class VideoContent(MediaContent):
|
|
|
58
58
|
return f"时长: {minutes}:{seconds:02d}"
|
|
59
59
|
|
|
60
60
|
def __repr__(self) -> str:
|
|
61
|
-
repr = f"VideoContent(
|
|
61
|
+
repr = f"VideoContent({repr_path_task(self.path_task)}"
|
|
62
62
|
if self.cover is not None:
|
|
63
63
|
repr += f", cover={repr_path_task(self.cover)}"
|
|
64
64
|
return repr + ")"
|
|
@@ -88,7 +88,7 @@ class GraphicsContent(MediaContent):
|
|
|
88
88
|
"""图片描述 渲染时居中显示"""
|
|
89
89
|
|
|
90
90
|
def __repr__(self) -> str:
|
|
91
|
-
repr = f"GraphicsContent(
|
|
91
|
+
repr = f"GraphicsContent({repr_path_task(self.path_task)}"
|
|
92
92
|
if self.text:
|
|
93
93
|
repr += f", text={self.text}"
|
|
94
94
|
if self.alt:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import ClassVar
|
|
3
3
|
|
|
4
|
-
import msgspec
|
|
5
4
|
from httpx import AsyncClient
|
|
6
5
|
from nonebot import logger
|
|
7
6
|
|
|
@@ -61,6 +60,8 @@ class DouyinParser(BaseParser):
|
|
|
61
60
|
return f"https://m.douyin.com/share/{ty}/{vid}"
|
|
62
61
|
|
|
63
62
|
async def parse_video(self, url: str):
|
|
63
|
+
from . import video
|
|
64
|
+
|
|
64
65
|
async with AsyncClient(
|
|
65
66
|
headers=self.ios_headers,
|
|
66
67
|
timeout=COMMON_TIMEOUT,
|
|
@@ -81,9 +82,7 @@ class DouyinParser(BaseParser):
|
|
|
81
82
|
if not matched or not matched.group(1):
|
|
82
83
|
raise ParseException("can't find _ROUTER_DATA in html")
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
video_data = msgspec.json.decode(matched.group(1).strip(), type=RouterData).video_data
|
|
85
|
+
video_data = video.decoder.decode(matched.group(1).strip()).video_data
|
|
87
86
|
# 使用新的简洁构建方式
|
|
88
87
|
contents = []
|
|
89
88
|
|
|
@@ -108,6 +107,8 @@ class DouyinParser(BaseParser):
|
|
|
108
107
|
)
|
|
109
108
|
|
|
110
109
|
async def parse_slides(self, video_id: str):
|
|
110
|
+
from . import slides
|
|
111
|
+
|
|
111
112
|
url = "https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/"
|
|
112
113
|
params = {
|
|
113
114
|
"aweme_ids": f"[{video_id}]",
|
|
@@ -117,9 +118,7 @@ class DouyinParser(BaseParser):
|
|
|
117
118
|
response = await client.get(url, params=params)
|
|
118
119
|
response.raise_for_status()
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
slides_data = msgspec.json.decode(response.content, type=SlidesInfo).aweme_details[0]
|
|
121
|
+
slides_data = slides.decoder.decode(response.content).aweme_details[0]
|
|
123
122
|
contents = []
|
|
124
123
|
|
|
125
124
|
# 添加图片内容
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from random import choice
|
|
2
2
|
|
|
3
|
-
from msgspec import Struct, field
|
|
3
|
+
from msgspec import Struct, json, field
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class PlayAddr(Struct):
|
|
@@ -57,3 +57,6 @@ class SlidesData(Struct):
|
|
|
57
57
|
|
|
58
58
|
class SlidesInfo(Struct):
|
|
59
59
|
aweme_details: list[SlidesData] = field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
decoder = json.Decoder(SlidesInfo)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from random import choice
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from msgspec import Struct, field
|
|
4
|
+
from msgspec import Struct, json, field
|
|
5
5
|
|
|
6
6
|
from ..base import ParseException
|
|
7
7
|
|
|
@@ -93,3 +93,6 @@ class RouterData(Struct):
|
|
|
93
93
|
elif page := self.loader_data.note_page:
|
|
94
94
|
return page.video_info_res.video_data
|
|
95
95
|
raise ParseException("can't find video_(id)/page or note_(id)/page in router data")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
decoder = json.Decoder(RouterData)
|