nonebot-plugin-parser 2.0.1__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/PKG-INFO +2 -3
  2. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/README.md +1 -2
  3. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/pyproject.toml +2 -2
  4. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/__init__.py +9 -6
  5. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/task.py +2 -1
  6. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/download/ytdlp.py +13 -8
  7. nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/exception.py +40 -0
  8. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/helper.py +6 -23
  9. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/__init__.py +20 -17
  10. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/preprocess.py +0 -41
  11. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/acfun.py +18 -17
  12. nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/parsers/base.py +144 -0
  13. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py +11 -17
  14. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/data.py +23 -97
  15. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/__init__.py +46 -8
  16. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/slides.py +1 -18
  17. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/douyin/video.py +1 -16
  18. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/kuaishou.py +21 -15
  19. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/twitter.py +51 -34
  20. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/weibo.py +54 -50
  21. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/xiaohongshu.py +33 -18
  22. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/youtube.py +23 -21
  23. nonebot_plugin_parser-2.0.3/src/nonebot_plugin_parser/renders/base.py +76 -0
  24. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/common.py +3 -3
  25. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/default.py +7 -1
  26. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/utils.py +1 -0
  27. nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/exception.py +0 -20
  28. nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/parsers/base.py +0 -128
  29. nonebot_plugin_parser-2.0.1/src/nonebot_plugin_parser/renders/base.py +0 -54
  30. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/__init__.py +0 -0
  31. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/config.py +0 -0
  32. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/constants.py +0 -0
  33. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/matchers/filter.py +0 -0
  34. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/__init__.py +0 -0
  35. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/opus.py +0 -0
  36. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/bilibili/video.py +0 -0
  37. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/cookie.py +0 -0
  38. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/nga.py +0 -0
  39. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/parsers/tiktok.py +0 -0
  40. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/__init__.py +0 -0
  41. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/fonts/HYSongYunLangHeiW-1.ttf +0 -0
  42. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/templates/weibo.html.jinja +0 -0
  43. {nonebot_plugin_parser-2.0.1 → nonebot_plugin_parser-2.0.3}/src/nonebot_plugin_parser/renders/weibo.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nonebot-plugin-parser
3
- Version: 2.0.1
3
+ Version: 2.0.3
4
4
  Summary: NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun
5
5
  Keywords: nonebot,nonebot2,video,bilibili,youtube,tiktok,twitter,kuaishou,acfun,weibo,xiaohongshu,nga,douyin
6
6
  Author: fllesser
@@ -42,8 +42,7 @@ Description-Content-Type: text/markdown
42
42
  <br/>
43
43
  [![pre-commit](https://results.pre-commit.ci/badge/github/fllesser/nonebot-plugin-parser/master.svg)](https://results.pre-commit.ci/latest/github/fllesser/nonebot-plugin-parser/master)
44
44
  [![codecov](https://codecov.io/gh/fllesser/nonebot-plugin-parser/graph/badge.svg?token=VCS8IHSO7U)](https://codecov.io/gh/fllesser/nonebot-plugin-parser)
45
- [![pepy](https://static.pepy.tech/badge/nonebot-plugin-parser)](https://pepy.tech/projects/nonebot-plugin-parser)
46
-
45
+ [![qqgroup](https://img.shields.io/badge/QQ%E7%BE%A4-820082006-orange?style=flat-square)](https://qm.qq.com/q/y4T4CjHimc)
47
46
  </div>
48
47
 
49
48
  > [!IMPORTANT]
@@ -12,8 +12,7 @@
12
12
  <br/>
13
13
  [![pre-commit](https://results.pre-commit.ci/badge/github/fllesser/nonebot-plugin-parser/master.svg)](https://results.pre-commit.ci/latest/github/fllesser/nonebot-plugin-parser/master)
14
14
  [![codecov](https://codecov.io/gh/fllesser/nonebot-plugin-parser/graph/badge.svg?token=VCS8IHSO7U)](https://codecov.io/gh/fllesser/nonebot-plugin-parser)
15
- [![pepy](https://static.pepy.tech/badge/nonebot-plugin-parser)](https://pepy.tech/projects/nonebot-plugin-parser)
16
-
15
+ [![qqgroup](https://img.shields.io/badge/QQ%E7%BE%A4-820082006-orange?style=flat-square)](https://qm.qq.com/q/y4T4CjHimc)
17
16
  </div>
18
17
 
19
18
  > [!IMPORTANT]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nonebot-plugin-parser"
3
- version = "2.0.1"
3
+ version = "2.0.3"
4
4
  description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun"
5
5
  authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
6
6
  readme = "README.md"
@@ -185,7 +185,7 @@ build-backend = "uv_build"
185
185
 
186
186
 
187
187
  [tool.bumpversion]
188
- current_version = "2.0.1"
188
+ current_version = "2.0.3"
189
189
  commit = true
190
190
  message = "🔖 release: bump vesion from {current_version} to {new_version}"
191
191
  tag = true
@@ -8,7 +8,7 @@ from tqdm.asyncio import tqdm
8
8
 
9
9
  from ..config import pconfig
10
10
  from ..constants import COMMON_HEADER, DOWNLOAD_TIMEOUT
11
- from ..exception import DownloadException, DownloadSizeLimitException
11
+ from ..exception import DownloadException, SizeLimitException, ZeroSizeException
12
12
  from ..utils import generate_file_name, merge_av, safe_unlink
13
13
  from .task import auto_task
14
14
  from .ytdlp import YtdlpDownloader
@@ -50,7 +50,6 @@ class StreamDownloader:
50
50
  if not file_name:
51
51
  file_name = generate_file_name(url)
52
52
  file_path = self.cache_dir / file_name
53
-
54
53
  # 如果文件存在,则直接返回
55
54
  if file_path.exists():
56
55
  return file_path
@@ -61,11 +60,15 @@ class StreamDownloader:
61
60
  async with self.client.stream("GET", url, headers=headers, follow_redirects=True) as response:
62
61
  response.raise_for_status()
63
62
  content_length = response.headers.get("Content-Length")
64
- content_length = int(content_length) if content_length else None
63
+ content_length = int(content_length) if content_length else 0
64
+
65
+ if content_length == 0:
66
+ logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载")
67
+ raise ZeroSizeException
65
68
 
66
- if content_length and (file_size := content_length / 1024 / 1024) > pconfig.max_size:
67
- logger.warning(f"{file_name} 大小 {file_size:.2f} MB 超过 {pconfig.max_size} MB, 取消下载")
68
- raise DownloadSizeLimitException
69
+ if (file_size := content_length / 1024 / 1024) > pconfig.max_size:
70
+ logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB 超过 {pconfig.max_size} MB, 取消下载")
71
+ raise SizeLimitException
69
72
 
70
73
  with self.get_progress_bar(file_name, content_length) as bar:
71
74
  async with aiofiles.open(file_path, "wb") as file:
@@ -13,6 +13,7 @@ def auto_task(func: Callable[P, Coroutine[Any, Any, T]]) -> Callable[P, Task[T]]
13
13
  @wraps(func)
14
14
  def wrapper(*args: P.args, **kwargs: P.kwargs) -> Task[T]:
15
15
  coro = func(*args, **kwargs)
16
- return create_task(coro)
16
+ name = " | ".join(str(arg) for arg in args if isinstance(arg, str))
17
+ return create_task(coro, name=func.__name__ + " | " + name)
17
18
 
18
19
  return wrapper
@@ -7,7 +7,7 @@ from msgspec import Struct
7
7
  import yt_dlp
8
8
 
9
9
  from ..config import pconfig
10
- from ..exception import ParseException
10
+ from ..exception import DurationLimitException, ParseException
11
11
  from ..utils import LimitedSizeDict, generate_file_name
12
12
  from .task import auto_task
13
13
 
@@ -39,7 +39,7 @@ class YtdlpDownloader:
39
39
  """YtdlpDownloader class"""
40
40
 
41
41
  def __init__(self):
42
- self._url_info_mapping = LimitedSizeDict[str, VideoInfo]()
42
+ self._video_info_mapping = LimitedSizeDict[str, VideoInfo]()
43
43
  self._ydl_extract_base_opts: dict[str, Any] = {
44
44
  "quiet": True,
45
45
  "skip_download": True,
@@ -60,9 +60,9 @@ class YtdlpDownloader:
60
60
  Returns:
61
61
  dict[str, str]: video info
62
62
  """
63
- info_dict = self._url_info_mapping.get(url, None)
64
- if info_dict:
65
- return info_dict
63
+ video_info = self._video_info_mapping.get(url, None)
64
+ if video_info:
65
+ return video_info
66
66
  ydl_opts = {} | self._ydl_extract_base_opts
67
67
 
68
68
  if cookiefile:
@@ -72,8 +72,9 @@ class YtdlpDownloader:
72
72
  info_dict = await asyncio.to_thread(ydl.extract_info, url, download=False)
73
73
  if not info_dict:
74
74
  raise ParseException("获取视频信息失败")
75
+
75
76
  video_info = msgspec.convert(info_dict, VideoInfo)
76
- self._url_info_mapping[url] = video_info
77
+ self._video_info_mapping[url] = video_info
77
78
  return video_info
78
79
 
79
80
  @auto_task
@@ -87,8 +88,11 @@ class YtdlpDownloader:
87
88
  Returns:
88
89
  Path: video file path
89
90
  """
90
- info_dict = await self.extract_video_info(url, cookiefile)
91
- duration = info_dict.duration
91
+ video_info = await self.extract_video_info(url, cookiefile)
92
+ duration = video_info.duration
93
+ if duration > pconfig.duration_maximum:
94
+ raise DurationLimitException
95
+
92
96
  video_path = pconfig.cache_dir / generate_file_name(url, ".mp4")
93
97
  if video_path.exists():
94
98
  return video_path
@@ -121,6 +125,7 @@ class YtdlpDownloader:
121
125
  audio_path = pconfig.cache_dir / f"{file_name}.flac"
122
126
  if audio_path.exists():
123
127
  return audio_path
128
+
124
129
  ydl_opts = {
125
130
  "outtmpl": f"{pconfig.cache_dir / file_name}.%(ext)s",
126
131
  "format": "bestaudio/best",
@@ -0,0 +1,40 @@
1
+ class ParseException(Exception):
2
+ """异常基类"""
3
+
4
+ def __init__(self, message: str):
5
+ self.message = message
6
+
7
+
8
+ class DownloadException(ParseException):
9
+ """下载异常"""
10
+
11
+ def __init__(self, message: str | None = None):
12
+ self.message = message or "媒体下载失败"
13
+
14
+
15
+ class DownloadLimitException(DownloadException):
16
+ """下载超过限制异常"""
17
+
18
+ def __init__(self):
19
+ raise NotImplementedError
20
+
21
+
22
+ class SizeLimitException(DownloadLimitException):
23
+ """下载大小超过限制异常"""
24
+
25
+ def __init__(self):
26
+ self.message = "媒体大小超过配置限制,取消下载"
27
+
28
+
29
+ class DurationLimitException(DownloadLimitException):
30
+ """下载时长超过限制异常"""
31
+
32
+ def __init__(self):
33
+ self.message = "媒体时长超过配置限制,取消下载"
34
+
35
+
36
+ class ZeroSizeException(DownloadException):
37
+ """下载大小为 0 异常"""
38
+
39
+ def __init__(self):
40
+ self.message = "媒体大小为 0, 取消下载"
@@ -7,19 +7,19 @@ from nonebot_plugin_alconna.uniseg import Segment, UniMessage, Voice
7
7
  from nonebot_plugin_alconna.uniseg.segment import CustomNode, Reference
8
8
 
9
9
  from .config import pconfig
10
- from .exception import ParseException
10
+
11
+ ForwardNodeInner = str | Segment | UniMessage
12
+ """转发消息节点内部允许的类型"""
11
13
 
12
14
 
13
15
  class UniHelper:
14
16
  @staticmethod
15
- def construct_forward_message(
16
- segments: Sequence[str | Segment | UniMessage], user_id: str | None = None
17
- ) -> Reference:
17
+ def construct_forward_message(segments: Sequence[ForwardNodeInner], user_id: str | None = None) -> Reference:
18
18
  """构造转发消息
19
19
 
20
20
  Args:
21
21
  user_id (str): 用户ID
22
- segments (Sequence[Segment | str]): 消息段
22
+ segments (Sequence[ForwardNode]): 消息段
23
23
 
24
24
  Returns:
25
25
  Reference: 转发消息
@@ -39,23 +39,6 @@ class UniHelper:
39
39
 
40
40
  return Reference(nodes=nodes)
41
41
 
42
- @classmethod
43
- async def send_segments(cls, segments: Sequence[Segment | str]) -> None:
44
- """发送消息段
45
-
46
- Args:
47
- segments (Sequence[Segment | str]): 消息段
48
- """
49
-
50
- if len(segments) > 2:
51
- forward_msg = cls.construct_forward_message(segments)
52
- await UniMessage([forward_msg]).send()
53
-
54
- else:
55
- segments = list(segments)
56
- segments[:-1] = [Text(seg + "\n") if isinstance(seg, str) else seg for seg in segments[:-1]]
57
- await UniMessage(segments).send()
58
-
59
42
  @staticmethod
60
43
  def img_seg(img_path: Path | None = None, raw: bytes | None = None) -> Image:
61
44
  """获取图片 Seg
@@ -103,7 +86,7 @@ class UniHelper:
103
86
  # 检测文件大小
104
87
  file_size_byte_count = int(video_path.stat().st_size)
105
88
  if file_size_byte_count == 0:
106
- raise ParseException("视频文件大小为 0")
89
+ return Text("视频文件大小为 0")
107
90
  elif file_size_byte_count > 100 * 1024 * 1024:
108
91
  # 转为文件 Seg
109
92
  return cls.file_seg(video_path, display_name=video_path.name)
@@ -62,15 +62,11 @@ async def _(
62
62
  await _message_reaction(event, "resolving")
63
63
 
64
64
  cache_key = matched.group(0)
65
- if result := RESULT_CACHE.get(cache_key):
66
- logger.debug(f"命中缓存: {cache_key}")
67
- else:
68
- # 获取对应平台 parser
69
- parser = KEYWORD_PARSER_MAP.get(keyword)
70
-
71
- if parser is None:
72
- logger.warning("没有找到对应平台的 Parser")
73
- return
65
+ # 1. 获取缓存结果
66
+ result = RESULT_CACHE.get(cache_key)
67
+ if result is None:
68
+ # 2. 获取对应平台 parser
69
+ parser = KEYWORD_PARSER_MAP[keyword]
74
70
 
75
71
  try:
76
72
  result = await parser.parse(matched)
@@ -78,16 +74,23 @@ async def _(
78
74
  # await UniMessage(str(e)).send()
79
75
  await _message_reaction(event, "fail")
80
76
  raise
81
-
82
- # 缓存解析结果
83
- RESULT_CACHE[cache_key] = result
77
+ logger.debug(f"解析结果: {result}")
78
+ else:
79
+ logger.debug(f"命中缓存: {cache_key}, 结果: {result}")
84
80
 
85
81
  # 3. 渲染内容消息并发送
86
- renderer = get_renderer(result.platform.name)
87
- async for message in renderer.render_messages(result):
88
- await message.send()
89
-
90
- # 4. 添加成功的消息响应
82
+ try:
83
+ renderer = get_renderer(result.platform.name)
84
+ async for message in renderer.render_messages(result):
85
+ await message.send()
86
+ except Exception:
87
+ await _message_reaction(event, "fail")
88
+ raise
89
+
90
+ # 4. 无 raise 再缓存解析结果
91
+ RESULT_CACHE[cache_key] = result
92
+
93
+ # 5. 添加成功的消息响应
91
94
  await _message_reaction(event, "done")
92
95
 
93
96
 
@@ -112,47 +112,6 @@ def extract_msg_text(message: UniMsg, state: T_State) -> None:
112
112
  state[R_EXTRACT_KEY] = text
113
113
 
114
114
 
115
- class UrlKeywordsRule:
116
- """检查消息是否含有关键词 增强版"""
117
-
118
- __slots__ = ("keywords",)
119
-
120
- def __init__(self, *keywords: str):
121
- self.keywords = keywords
122
-
123
- def __repr__(self) -> str:
124
- return f"UrlKeywords(keywords={self.keywords})"
125
-
126
- def __eq__(self, other: object) -> bool:
127
- return isinstance(other, UrlKeywordsRule) and frozenset(self.keywords) == frozenset(other.keywords)
128
-
129
- def __hash__(self) -> int:
130
- return hash(frozenset(self.keywords))
131
-
132
- async def __call__(self, state: T_State, text: str = ExtractText()) -> bool:
133
- if not text:
134
- return False
135
- if key := next((k for k in self.keywords if k in text), None):
136
- state[R_KEYWORD_KEY] = key
137
- return True
138
- return False
139
-
140
-
141
- def url_keywords(*keywords: str) -> Rule:
142
- return Rule(UrlKeywordsRule(*keywords))
143
-
144
-
145
- def on_url_keyword(*keywords: str, priority: int = 5) -> type[Matcher]:
146
- matcher = Matcher.new(
147
- "message",
148
- is_not_in_disabled_groups & url_keywords(*keywords),
149
- priority=priority,
150
- block=True,
151
- source=get_matcher_source(1),
152
- )
153
- return matcher
154
-
155
-
156
115
  class KeyPatternList(list[tuple[str, re.Pattern[str]]]):
157
116
  def __init__(self, *args: tuple[str, str | re.Pattern[str]]):
158
117
  super().__init__()
@@ -2,11 +2,12 @@ import asyncio
2
2
  import json
3
3
  from pathlib import Path
4
4
  import re
5
+ import time
5
6
  from typing import ClassVar
6
7
  from typing_extensions import override
7
8
 
8
9
  import aiofiles
9
- import httpx
10
+ from httpx import AsyncClient, HTTPError
10
11
  from nonebot import logger
11
12
 
12
13
  from ..config import pconfig
@@ -14,8 +15,7 @@ from ..constants import COMMON_TIMEOUT, DOWNLOAD_TIMEOUT
14
15
  from ..download import DOWNLOADER
15
16
  from ..exception import DownloadException, ParseException
16
17
  from ..utils import safe_unlink
17
- from .base import BaseParser
18
- from .data import Author, ParseResult, Platform, VideoContent
18
+ from .base import BaseParser, Platform
19
19
 
20
20
 
21
21
  class AcfunParser(BaseParser):
@@ -40,10 +40,11 @@ class AcfunParser(BaseParser):
40
40
  Returns:
41
41
  tuple: (m3u8_url, title, description, author, upload_time)
42
42
  """
43
+
43
44
  # 拼接查询参数
44
45
  url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
45
46
 
46
- async with httpx.AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
47
+ async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
47
48
  response = await client.get(url)
48
49
  response.raise_for_status()
49
50
  raw = response.text
@@ -88,7 +89,7 @@ class AcfunParser(BaseParser):
88
89
  max_size_in_bytes = pconfig.max_size * 1024 * 1024
89
90
  async with (
90
91
  aiofiles.open(video_file, "wb") as f,
91
- httpx.AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
92
+ AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
92
93
  ):
93
94
  total_size = 0
94
95
  with DOWNLOADER.get_progress_bar(video_file.name) as bar:
@@ -101,7 +102,7 @@ class AcfunParser(BaseParser):
101
102
  if total_size > max_size_in_bytes:
102
103
  # 直接截断
103
104
  break
104
- except httpx.HTTPError:
105
+ except HTTPError:
105
106
  await safe_unlink(video_file)
106
107
  logger.exception("acfun 视频下载失败")
107
108
  raise DownloadException("acfun 视频下载失败")
@@ -116,7 +117,7 @@ class AcfunParser(BaseParser):
116
117
  Returns:
117
118
  list[str]: 视频链接
118
119
  """
119
- async with httpx.AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
120
+ async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
120
121
  response = await client.get(m3u8_url)
121
122
  m3u8_file = response.text
122
123
  # 分离ts文件链接
@@ -135,14 +136,14 @@ class AcfunParser(BaseParser):
135
136
  return m3u8_full_urls
136
137
 
137
138
  @override
138
- async def parse(self, matched: re.Match[str]) -> ParseResult:
139
+ async def parse(self, matched: re.Match[str]):
139
140
  """解析 URL 获取内容信息并下载资源
140
141
 
141
142
  Args:
142
143
  matched: 正则表达式匹配对象,由平台对应的模式匹配得到
143
144
 
144
145
  Returns:
145
- ParseResult: 解析结果(已下载资源,包含 Path)
146
+ ParseResult: 解析结果
146
147
 
147
148
  Raises:
148
149
  ParseException: 解析失败时抛出
@@ -152,19 +153,19 @@ class AcfunParser(BaseParser):
152
153
  url = f"https://www.acfun.cn/v/ac{acid}"
153
154
 
154
155
  m3u8_url, title, description, author, upload_time = await self.parse_video_info(url)
156
+ author = self.create_author(author) if author else None
155
157
 
156
- extra_info = f"简介: {description}\n上传于 {upload_time}" if description or upload_time else None
158
+ # 2024-12-1 -> timestamp
159
+ timestamp = int(time.mktime(time.strptime(upload_time, "%Y-%m-%d")))
160
+ text = f"简介: {description}"
157
161
 
158
162
  # 下载视频
159
163
  video_task = asyncio.create_task(self.download_video(m3u8_url, acid))
160
164
 
161
- extra = {}
162
- if extra_info:
163
- extra["info"] = extra_info
164
-
165
165
  return self.result(
166
166
  title=title,
167
- author=Author(name=author) if author else None,
168
- contents=[VideoContent(video_task)],
169
- extra=extra,
167
+ text=text,
168
+ author=author,
169
+ timestamp=timestamp,
170
+ contents=[self.create_video_content(video_task)],
170
171
  )
@@ -0,0 +1,144 @@
1
+ """Parser 基类定义"""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from asyncio import Task
5
+ from collections.abc import Sequence
6
+ from pathlib import Path
7
+ import re
8
+ from typing import ClassVar
9
+ from typing_extensions import Unpack
10
+
11
+ from ..constants import ANDROID_HEADER, COMMON_HEADER, COMMON_TIMEOUT, IOS_HEADER
12
+ from .data import ParseResult, ParseResultKwargs, Platform
13
+
14
+
15
+ class BaseParser(ABC):
16
+ """所有平台 Parser 的抽象基类
17
+
18
+ 子类必须实现:
19
+ - platform: 平台信息(包含名称和显示名称)
20
+ - patterns: URL 正则表达式模式列表
21
+ - parse: 解析 URL 的方法(接收正则表达式对象)
22
+ """
23
+
24
+ # 类变量:存储所有已注册的 Parser 类
25
+ _registry: ClassVar[list[type["BaseParser"]]] = []
26
+
27
+ platform: ClassVar[Platform]
28
+ """ 平台信息(包含名称和显示名称) """
29
+
30
+ patterns: ClassVar[list[tuple[str, str]]]
31
+ """ URL 正则表达式模式列表 [(keyword, pattern), ...] """
32
+
33
+ def __init__(self):
34
+ self.headers = COMMON_HEADER.copy()
35
+ self.ios_headers = IOS_HEADER.copy()
36
+ self.android_headers = ANDROID_HEADER.copy()
37
+ self.timeout = COMMON_TIMEOUT
38
+
39
+ def __init_subclass__(cls, **kwargs):
40
+ """自动注册子类到 _registry"""
41
+ super().__init_subclass__(**kwargs)
42
+ if ABC not in cls.__bases__: # 跳过抽象类
43
+ BaseParser._registry.append(cls)
44
+
45
+ @classmethod
46
+ def get_all_subclass(cls) -> list[type["BaseParser"]]:
47
+ """获取所有已注册的 Parser 类"""
48
+ return cls._registry
49
+
50
+ @abstractmethod
51
+ async def parse(self, matched: re.Match[str]) -> ParseResult:
52
+ """解析 URL 获取内容信息并下载资源
53
+
54
+ Args:
55
+ matched: 正则表达式匹配对象,由平台对应的模式匹配得到
56
+
57
+ Returns:
58
+ ParseResult: 解析结果(已下载资源,包含 Path)
59
+
60
+ Raises:
61
+ ParseException: 解析失败时抛出
62
+ """
63
+ raise NotImplementedError
64
+
65
+ @classmethod
66
+ def search_url(cls, url: str) -> re.Match[str]:
67
+ from nonebot import logger
68
+
69
+ """搜索 URL 匹配模式"""
70
+ for keyword, pattern in cls.patterns:
71
+ if keyword not in url:
72
+ continue
73
+ if searched := re.search(pattern, url):
74
+ return searched
75
+ logger.debug(f"keyword '{keyword}' is in '{url}', but not matched")
76
+ raise ValueError(f"无法匹配 {url}")
77
+
78
+ @classmethod
79
+ def result(cls, **kwargs: Unpack[ParseResultKwargs]) -> ParseResult:
80
+ """构建解析结果"""
81
+ return ParseResult(platform=cls.platform, **kwargs)
82
+
83
+ @staticmethod
84
+ async def get_redirect_url(url: str, headers: dict[str, str] | None = None) -> str:
85
+ """获取重定向后的URL"""
86
+ from httpx import AsyncClient
87
+
88
+ headers = headers or COMMON_HEADER.copy()
89
+ async with AsyncClient(headers=headers, verify=False, follow_redirects=False, timeout=COMMON_TIMEOUT) as client:
90
+ response = await client.get(url)
91
+ if response.status_code >= 400:
92
+ response.raise_for_status()
93
+ return response.headers.get("Location", url)
94
+
95
+ def create_author(self, name: str, avatar_url: str | None = None, description: str | None = None):
96
+ """创建作者对象"""
97
+ from ..download import DOWNLOADER
98
+ from .data import Author
99
+
100
+ avatar_task = None
101
+ if avatar_url:
102
+ avatar_task = DOWNLOADER.download_img(avatar_url, ext_headers=self.headers)
103
+ return Author(name=name, avatar=avatar_task, description=description)
104
+
105
+ def create_video_content(self, url_or_task: str | Task[Path], cover_url: str | None = None, duration: float = 0.0):
106
+ """创建视频内容"""
107
+ from ..download import DOWNLOADER
108
+ from .data import VideoContent
109
+
110
+ cover_task = None
111
+ if cover_url:
112
+ cover_task = DOWNLOADER.download_img(cover_url, ext_headers=self.headers)
113
+ if isinstance(url_or_task, str):
114
+ video_task = DOWNLOADER.download_video(url_or_task, ext_headers=self.headers)
115
+ else:
116
+ video_task = url_or_task
117
+ return VideoContent(video_task, cover_task, duration)
118
+
119
+ def create_image_contents(self, image_urls: Sequence[str]):
120
+ """创建图片内容列表"""
121
+ from ..download import DOWNLOADER
122
+ from .data import ImageContent
123
+
124
+ img_tasks = [DOWNLOADER.download_img(url, ext_headers=self.headers) for url in image_urls]
125
+ return [ImageContent(task) for task in img_tasks]
126
+
127
+ def create_dynamic_contents(self, dynamic_urls: Sequence[str]):
128
+ """创建动态内容列表"""
129
+ from ..download import DOWNLOADER
130
+ from .data import DynamicContent
131
+
132
+ dynamic_tasks = [DOWNLOADER.download_video(url, ext_headers=self.headers) for url in dynamic_urls]
133
+ return [DynamicContent(task) for task in dynamic_tasks]
134
+
135
+ def create_audio_content(self, url_or_task: str | Task[Path], duration: float = 0.0):
136
+ """创建音频内容"""
137
+ from ..download import DOWNLOADER
138
+ from .data import AudioContent
139
+
140
+ if isinstance(url_or_task, str):
141
+ audio_task = DOWNLOADER.download_audio(url_or_task, ext_headers=self.headers)
142
+ else:
143
+ audio_task = url_or_task
144
+ return AudioContent(audio_task, duration)