PyPI - nonebot-plugin-parser - Versions diffs - 2.3.5__tar.gz → 2.3.7__tar.gz - Mend

nonebot-plugin-parser 2.3.5tar.gz → 2.3.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: nonebot-plugin-parser
-Version: 2.3.5
+Version: 2.3.7
 Summary: NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga
 Keywords: acfun,bilibili,douyin,kuaishou,nga,nonebot,nonebot2,tiktok,twitter,video,weibo,xiaohongshu,youtube
 Author: fllesser
@@ -23,12 +23,12 @@ Requires-Dist: nonebot2>=2.4.3,<3.0.0
 Requires-Dist: pillow>=11.0.0
 Requires-Dist: tqdm>=4.67.1,<5.0.0
 Requires-Dist: aiofiles>=25.1.0
-Requires-Dist: curl-cffi>=0.13.0,<1.0.0
 Requires-Dist: httpx>=0.27.2,<1.0.0
 Requires-Dist: msgspec>=0.20.0,<1.0.0
 Requires-Dist: apilmoji[tqdm]>=0.2.4,<1.0.0
 Requires-Dist: beautifulsoup4>=4.12.0,<5.0.0
-Requires-Dist: bilibili-api-python>=17.4.0,<18.0.0
+Requires-Dist: curl-cffi>=0.13.0,!=0.14.0,<1.0.0
+Requires-Dist: bilibili-api-python>=17.4.1,<18.0.0
 Requires-Dist: nonebot-plugin-alconna>=0.59.4,<1.0.0
 Requires-Dist: nonebot-plugin-apscheduler>=0.5.0,<1.0.0
 Requires-Dist: nonebot-plugin-localstore>=0.7.4,<1.0.0
@@ -263,6 +263,9 @@ parser_bili_video_codes='["avc", "av01", "hev"]'
 # 360p(16), 480p(32), 720p(64), 1080p(80), 1080p+(112), 1080p_60(116), 4k(120)
 parser_bili_video_quality=80
+# [可选] 小红书 Cookie, 部分链接解析有水印，可填
+parser_xhs_ck=""
 # [可选] Youtube Cookie, Youtube 视频因人机检测下载失败，需填
 parser_ytb_ck=""

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/README.md RENAMED Viewed

@@ -209,6 +209,9 @@ parser_bili_video_codes='["avc", "av01", "hev"]'
 # 360p(16), 480p(32), 720p(64), 1080p(80), 1080p+(112), 1080p_60(116), 4k(120)
 parser_bili_video_quality=80
+# [可选] 小红书 Cookie, 部分链接解析有水印，可填
+parser_xhs_ck=""
 # [可选] Youtube Cookie, Youtube 视频因人机检测下载失败，需填
 parser_ytb_ck=""

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "nonebot-plugin-parser"
-version = "2.3.5"
+version = "2.3.7"
 description = "NoneBot2 链接分享解析 Alconna 版, 通用媒体卡片渲染(PIL 实现), 支持 B站/抖音/快手/微博/小红书/youtube/tiktok/twitter/acfun/nga"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -43,12 +43,12 @@ dependencies = [
   "pillow>=11.0.0",
   "tqdm>=4.67.1,<5.0.0",
   "aiofiles>=25.1.0",
-  "curl_cffi>=0.13.0,<1.0.0",
   "httpx>=0.27.2,<1.0.0",
   "msgspec>=0.20.0,<1.0.0",
   "apilmoji[tqdm]>=0.2.4,<1.0.0",
   "beautifulsoup4>=4.12.0,<5.0.0",
-  "bilibili-api-python>=17.4.0,<18.0.0",
+  "curl_cffi>=0.13.0,<1.0.0,!=0.14.0",
+  "bilibili-api-python>=17.4.1,<18.0.0",
   "nonebot-plugin-alconna>=0.59.4,<1.0.0",
   "nonebot-plugin-apscheduler>=0.5.0,<1.0.0",
   "nonebot-plugin-localstore>=0.7.4,<1.0.0",
@@ -118,7 +118,7 @@ nonebug = { git = "https://github.com/nonebot/nonebug" }
 [tool.bumpversion]
 tag = true
 commit = true
-current_version = "2.3.5"
+current_version = "2.3.7"
 message = "release: bump vesion from {current_version} to {new_version}"
 [[tool.bumpversion.files]]

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/config.py RENAMED Viewed

@@ -20,6 +20,8 @@ class Config(BaseModel):
     """bilibili cookies"""
     parser_ytb_ck: str | None = None
     """youtube cookies"""
+    parser_xhs_ck: str | None = None
+    """小红书 cookies"""
     parser_proxy: str | None = None
     """代理"""
     parser_need_upload: bool = False
@@ -113,6 +115,11 @@ class Config(BaseModel):
         """youtube cookies"""
         return self.parser_ytb_ck
+    @property
+    def xhs_ck(self) -> str | None:
+        """小红书 cookies"""
+        return self.parser_xhs_ck
     @property
     def proxy(self) -> str | None:
         """代理"""

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/matchers/rule.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Any, Literal
+from typing import Literal
 import msgspec
 from nonebot import logger
@@ -19,6 +19,32 @@ from ..config import gconfig
 PSR_SEARCHED_KEY: Literal["psr-searched"] = "psr-searched"
+# 定义 JSON 卡片的数据结构
+class MetaDetail(msgspec.Struct):
+    qqdocurl: str | None = None
+class MetaNews(msgspec.Struct):
+    jumpUrl: str | None = None
+class MetaMusic(msgspec.Struct):
+    jumpUrl: str | None = None
+class Meta(msgspec.Struct):
+    detail_1: MetaDetail | None = None
+    news: MetaNews | None = None
+    music: MetaMusic | None = None
+class RawData(msgspec.Struct):
+    meta: Meta | None = None
+raw_decoder = msgspec.json.Decoder(RawData)
 class SearchResult:
     """匹配结果"""
@@ -45,24 +71,6 @@ def _searched(state: T_State) -> SearchResult | None:
     return state.get(PSR_SEARCHED_KEY)
-def _escape_raw(raw: str) -> str:
-    """
-    转义原始字符串中的特殊字符
-    Args:
-        raw: 原始字符串
-    Returns:
-        str: 转义后的字符串
-    """
-    replacements = [
-        ("\\", ""),
-        ("&amp;", "&"),
-    ]
-    for old, new in replacements:
-        raw = raw.replace(old, new)
-    return raw
 def _extract_url(hyper: Hyper) -> str | None:
     """处理 JSON 类型的消息段，提取 URL
@@ -79,24 +87,25 @@ def _extract_url(hyper: Hyper) -> str | None:
         return None
     try:
-        raw: dict[str, Any] = msgspec.json.decode(raw_str)
+        raw = raw_decoder.decode(raw_str)
     except msgspec.DecodeError:
         logger.exception(f"json 卡片解析失败: {raw_str}")
         return None
-    meta: dict[str, Any] | None = raw.get("meta")
-    if not meta:
+    if not raw.meta:
         return None
-    for key1, key2 in (
-        ("detail_1", "qqdocurl"),
-        ("news", "jumpUrl"),
-        ("music", "jumpUrl"),
-    ):
-        if url := meta.get(key1, {}).get(key2):
-            logger.debug(f"extract url from raw:meta:{key1}:{key2}: {url}")
-            return url
-    return None
+    meta, url = raw.meta, None
+    if meta.detail_1:
+        url = meta.detail_1.qqdocurl
+    elif meta.news:
+        url = meta.news.jumpUrl
+    elif meta.music:
+        url = meta.music.jumpUrl
+    logger.debug(f"extract url[{url}] from raw#meta[{meta}]")
+    return url
 def _extract_text(message: UniMsg) -> str | None:

nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/acfun/__init__.py ADDED Viewed

@@ -0,0 +1,151 @@
+import re
+import asyncio
+from typing import ClassVar
+from pathlib import Path
+from urllib.parse import urljoin
+import aiofiles
+from httpx import HTTPError, AsyncClient
+from nonebot import logger
+from ..base import (
+    DOWNLOADER,
+    COMMON_TIMEOUT,
+    DOWNLOAD_TIMEOUT,
+    Platform,
+    BaseParser,
+    PlatformEnum,
+    ParseException,
+    DownloadException,
+    DurationLimitException,
+    handle,
+    pconfig,
+)
+class AcfunParser(BaseParser):
+    # 平台信息
+    platform: ClassVar[Platform] = Platform(name=PlatformEnum.ACFUN, display_name="猴山")
+    def __init__(self):
+        super().__init__()
+        self.headers["referer"] = "https://www.acfun.cn/"
+    @handle("acfun.cn", r"(?:ac=|/ac)(?P<acid>\d+)")
+    async def _parse(self, searched: re.Match[str]):
+        acid = int(searched.group("acid"))
+        url = f"https://www.acfun.cn/v/ac{acid}"
+        video_info = await self.parse_video_info(url)
+        author = self.create_author(video_info.name, video_info.avatar_url)
+        video_task = asyncio.create_task(
+            self.download_video(
+                video_info.m3u8_url,
+                f"acfun_{acid}.mp4",
+                video_info.duration,
+            )
+        )
+        video_content = self.create_video_content(video_task, cover_url=video_info.coverUrl)
+        return self.result(
+            title=video_info.title,
+            text=video_info.text,
+            author=author,
+            timestamp=video_info.timestamp,
+            contents=[video_content],
+        )
+    async def parse_video_info(self, url: str):
+        """解析acfun链接获取详细信息
+        Args:
+            url (str): 链接
+        Returns:
+            video.VideoInfo
+        """
+        from . import video
+        # 拼接查询参数
+        url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1"
+        async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            raw = response.text
+        matched = re.search(r"window\.videoInfo =(.*?)</script>", raw)
+        if not matched:
+            raise ParseException("解析 acfun 视频信息失败")
+        raw = str(matched.group(1))
+        raw = re.sub(r'\\{1,4}"', '"', raw)
+        raw = raw.replace('"{', "{").replace('}"', "}")
+        return video.decoder.decode(raw)
+    async def download_video(self, m3u8_url: str, file_name: str, duration: int) -> Path:
+        """下载acfun视频
+        Args:
+            m3u8_url (str): m3u8链接
+            file_name (str): 文件名
+            duration (int): 视频时长(秒)
+        Returns:
+            Path: 下载的mp4文件
+        """
+        if duration >= pconfig.duration_maximum:
+            raise DurationLimitException
+        video_file = pconfig.cache_dir / file_name
+        if video_file.exists():
+            return video_file
+        m3u8_slices = await self._get_m3u8_slices(m3u8_url)
+        try:
+            async with (
+                aiofiles.open(video_file, "wb") as f,
+                AsyncClient(headers=self.headers, timeout=DOWNLOAD_TIMEOUT) as client,
+            ):
+                total_size = 0
+                with DOWNLOADER.get_progress_bar(file_name) as bar:
+                    for url in m3u8_slices:
+                        async with client.stream("GET", url) as response:
+                            async for chunk in response.aiter_bytes(chunk_size=1024 * 1024):
+                                await f.write(chunk)
+                                total_size += len(chunk)
+                                bar.update(len(chunk))
+        except HTTPError:
+            video_file.unlink(missing_ok=True)
+            logger.exception("视频下载失败")
+            raise DownloadException("视频下载失败")
+        return video_file
+    async def _get_m3u8_slices(self, m3u8_url: str):
+        """拼接m3u8链接
+        Args:
+            m3u8_url (str): m3u8链接
+            m3u8_slice (str): m3u8切片
+        Returns:
+            list[str]: 视频链接
+        """
+        async with AsyncClient(headers=self.headers, timeout=COMMON_TIMEOUT) as client:
+            response = await client.get(m3u8_url)
+            response.raise_for_status()
+        slices_text = response.text
+        slices: list[str] = []
+        for line in slices_text.splitlines():
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            slices.append(urljoin(m3u8_url, line))
+        return slices

nonebot_plugin_parser-2.3.7/src/nonebot_plugin_parser/parsers/acfun/video.py ADDED Viewed

@@ -0,0 +1,77 @@
+from msgspec import Struct
+from msgspec.json import Decoder
+class User(Struct):
+    name: str
+    headUrl: str
+class Representation(Struct):
+    url: str
+    m3u8Slice: str
+    qualityType: str
+    @property
+    def m3u8_slice(self) -> str:
+        return self.m3u8Slice.replace("\\\\n", "\n")
+class AdaptationSet(Struct):
+    representation: list[Representation]
+class KsPlay(Struct):
+    adaptationSet: list[AdaptationSet]
+class CurrentVideoInfo(Struct):
+    ksPlayJson: KsPlay
+    durationMillis: int
+    @property
+    def representations(self) -> list[Representation]:
+        return self.ksPlayJson.adaptationSet[0].representation
+class VideoInfo(Struct, kw_only=True):
+    title: str
+    description: str | None
+    createTimeMillis: int
+    user: User
+    currentVideoInfo: CurrentVideoInfo
+    coverUrl: str
+    @property
+    def name(self) -> str:
+        return self.user.name
+    @property
+    def avatar_url(self) -> str:
+        return self.user.headUrl
+    @property
+    def text(self) -> str | None:
+        return f"简介: {self.description}" if self.description else None
+    @property
+    def timestamp(self) -> int:
+        return self.createTimeMillis // 1000
+    @property
+    def duration(self) -> int:
+        return self.currentVideoInfo.durationMillis // 1000
+    @property
+    def m3u8_url(self) -> str:
+        representations = self.currentVideoInfo.representations
+        quality_types = ("1080p", "720p", "480p", "360p")
+        for r in representations:
+            if r.qualityType in quality_types:
+                return r.url
+        return representations[0].url
+decoder = Decoder(VideoInfo)

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/base.py RENAMED Viewed

@@ -12,6 +12,7 @@ from .data import Platform, ParseResult, ParseResultKwargs
 from ..config import pconfig as pconfig
 from ..download import DOWNLOADER as DOWNLOADER
 from ..constants import IOS_HEADER, COMMON_HEADER, ANDROID_HEADER, COMMON_TIMEOUT
+from ..constants import DOWNLOAD_TIMEOUT as DOWNLOAD_TIMEOUT
 from ..constants import PlatformEnum as PlatformEnum
 from ..exception import TipException as TipException
 from ..exception import ParseException as ParseException

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/__init__.py RENAMED Viewed

@@ -88,7 +88,7 @@ class BilibiliParser(BaseParser):
     async def _parse_read(self, searched: Match[str]):
         """解析专栏信息"""
         read_id = int(searched.group("read_id"))
-        return await self.parse_read(read_id)
+        return await self.parse_read_with_opus(read_id)
     @handle("/opus/", r"bilibili\.com/opus/(?P<opus_id>\d+)")
     async def _parse_opus(self, searched: Match[str]):
@@ -175,14 +175,11 @@ class BilibiliParser(BaseParser):
         """
         from bilibili_api.dynamic import Dynamic
-        from .dynamic import DynamicItem
+        from .dynamic import DynamicData
         dynamic = Dynamic(dynamic_id, await self.credential)
+        dynamic_info = convert(await dynamic.get_info(), DynamicData).item
-        # 转换为结构体
-        dynamic_data = convert(await dynamic.get_info(), DynamicItem)
-        dynamic_info = dynamic_data.item
-        # 使用结构体属性提取信息
         author = self.create_author(dynamic_info.name, dynamic_info.avatar)
         # 下载图片
@@ -208,8 +205,8 @@ class BilibiliParser(BaseParser):
         opus = Opus(opus_id, await self.credential)
         return await self._parse_opus_obj(opus)
-    async def parse_read_old(self, read_id: int):
-        """解析专栏信息, 已废弃
+    async def parse_read_with_opus(self, read_id: int):
+        """解析专栏信息, 使用 Opus 接口
         Args:
             read_id (int): 专栏 id
@@ -297,45 +294,6 @@ class BilibiliParser(BaseParser):
             author=author,
         )
-    async def parse_read(self, read_id: int):
-        """专栏解析
-        Args:
-            read_id (int): 专栏 id
-        Returns:
-            texts: list[str], urls: list[str]
-        """
-        from bilibili_api.article import Article
-        from .article import TextNode, ImageNode, ArticleInfo
-        ar = Article(read_id)
-        # 加载内容
-        await ar.fetch_content()
-        data = ar.json()
-        article_info = convert(data, ArticleInfo)
-        logger.debug(f"article_info: {article_info}")
-        contents: list[MediaContent] = []
-        current_text = ""
-        for child in article_info.gen_text_img():
-            if isinstance(child, ImageNode):
-                contents.append(self.create_graphics_content(child.url, current_text.strip(), child.alt))
-                current_text = ""
-            elif isinstance(child, TextNode):
-                current_text += child.text
-        author = self.create_author(*article_info.author_info)
-        return self.result(
-            title=article_info.title,
-            timestamp=article_info.timestamp,
-            text=current_text.strip(),
-            author=author,
-            contents=contents,
-        )
     async def parse_favlist(self, fav_id: int):
         """解析收藏夹信息

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/bilibili/dynamic.py RENAMED Viewed

@@ -191,7 +191,7 @@ class DynamicInfo(Struct):
         return None
-class DynamicItem(Struct):
+class DynamicData(Struct):
     """动态项目"""
     item: DynamicInfo

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/data.py RENAMED Viewed

@@ -58,7 +58,7 @@ class VideoContent(MediaContent):
         return f"时长: {minutes}:{seconds:02d}"
     def __repr__(self) -> str:
-        repr = f"VideoContent(path={repr_path_task(self.path_task)}"
+        repr = f"VideoContent({repr_path_task(self.path_task)}"
         if self.cover is not None:
             repr += f", cover={repr_path_task(self.cover)}"
         return repr + ")"
@@ -88,7 +88,7 @@ class GraphicsContent(MediaContent):
     """图片描述 渲染时居中显示"""
     def __repr__(self) -> str:
-        repr = f"GraphicsContent(path={repr_path_task(self.path_task)}"
+        repr = f"GraphicsContent({repr_path_task(self.path_task)}"
         if self.text:
             repr += f", text={self.text}"
         if self.alt:

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/__init__.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import re
 from typing import ClassVar
-import msgspec
 from httpx import AsyncClient
 from nonebot import logger
@@ -61,6 +60,8 @@ class DouyinParser(BaseParser):
         return f"https://m.douyin.com/share/{ty}/{vid}"
     async def parse_video(self, url: str):
+        from . import video
         async with AsyncClient(
             headers=self.ios_headers,
             timeout=COMMON_TIMEOUT,
@@ -81,9 +82,7 @@ class DouyinParser(BaseParser):
         if not matched or not matched.group(1):
             raise ParseException("can't find _ROUTER_DATA in html")
-        from .video import RouterData
-        video_data = msgspec.json.decode(matched.group(1).strip(), type=RouterData).video_data
+        video_data = video.decoder.decode(matched.group(1).strip()).video_data
         # 使用新的简洁构建方式
         contents = []
@@ -108,6 +107,8 @@ class DouyinParser(BaseParser):
         )
     async def parse_slides(self, video_id: str):
+        from . import slides
         url = "https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/"
         params = {
             "aweme_ids": f"[{video_id}]",
@@ -117,9 +118,7 @@ class DouyinParser(BaseParser):
             response = await client.get(url, params=params)
             response.raise_for_status()
-        from .slides import SlidesInfo
-        slides_data = msgspec.json.decode(response.content, type=SlidesInfo).aweme_details[0]
+        slides_data = slides.decoder.decode(response.content).aweme_details[0]
         contents = []
         # 添加图片内容

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/slides.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from random import choice
-from msgspec import Struct, field
+from msgspec import Struct, json, field
 class PlayAddr(Struct):
@@ -57,3 +57,6 @@ class SlidesData(Struct):
 class SlidesInfo(Struct):
     aweme_details: list[SlidesData] = field(default_factory=list)
+decoder = json.Decoder(SlidesInfo)

{nonebot_plugin_parser-2.3.5 → nonebot_plugin_parser-2.3.7}/src/nonebot_plugin_parser/parsers/douyin/video.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from random import choice
 from typing import Any
-from msgspec import Struct, field
+from msgspec import Struct, json, field
 from ..base import ParseException
@@ -93,3 +93,6 @@ class RouterData(Struct):
         elif page := self.loader_data.note_page:
             return page.video_info_res.video_data
         raise ParseException("can't find video_(id)/page or note_(id)/page in router data")
+decoder = json.Decoder(RouterData)

nonebot-plugin-parser 2.3.5__tar.gz → 2.3.7__tar.gz

nonebot-plugin-parser 2.3.5tar.gz → 2.3.7tar.gz