PyPI - parsehub - Versions diffs - 2.0.5__tar.gz → 2.0.7__tar.gz - Mend

parsehub 2.0.5tar.gz → 2.0.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{parsehub-2.0.5/src/parsehub.egg-info → parsehub-2.0.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: parsehub
-Version: 2.0.5
+Version: 2.0.7
 Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
 Author-email: 梓澪 <zilingmio@gmail.com>
 License: MIT

{parsehub-2.0.5 → parsehub-2.0.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "parsehub"
-version = "2.0.5"
+version = "2.0.7"
 description = "轻量、异步、开箱即用的社交媒体聚合解析库"
 readme = "README.md"
 requires-python = ">=3.12.0"

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/__init__.py RENAMED Viewed

@@ -125,15 +125,37 @@ class ParseHub:
             )
         )
-    async def get_raw_url(self, url: str, proxy: str | None = None) -> str:
+    async def get_raw_url(self, url: str, proxy: str | None = None, clean_all: bool = True) -> str:
         """获取原始链接
         :param url: 分享文案 / 分享链接
         :param proxy: 代理
+        :param clean_all: 是否清除全部可清除的参数 (包括解析后才需清除的参数)
+        Example:
+            以小红书为例，其解析器配置如下::
+                __reserved_parameters__ = []
+                __after_clean_parameters__ = ["xsec_token"]
+            原始链接::
+                https://www.xiaohongshu.com/explore/abc123?xsec_token=xxx&tracking=yyy
+            ``clean_all=False`` (解析阶段，保留解析所需的参数)::
+                https://www.xiaohongshu.com/explore/abc123?xsec_token=xxx
+                # tracking 被清除，xsec_token 保留（解析时需要它）
+            ``clean_all=True`` (最终输出，清除所有非必要参数)::
+                https://www.xiaohongshu.com/explore/abc123
+                # xsec_token 也被清除，返回干净的链接
         :return: 原始链接
         """
         parser = self.get_parser(url)
         try:
-            return await parser(proxy=proxy).get_raw_url(url, after_clean_parameters=True)
+            return await parser(proxy=proxy).get_raw_url(url, clean_all=clean_all)
         except Exception as e:
             raise ParseError from e

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/base/base.py RENAMED Viewed

@@ -63,7 +63,7 @@ class BaseParser(ABC):
         :param url: 分享文案 / 分享链接
         :return: 解析结果
         """
-        raw_url = await self.get_raw_url(url, after_clean_parameters=False)
+        raw_url = await self.get_raw_url(url, clean_all=False)
         result = await self._do_parse(raw_url)
         result.platform = self.__platform__
         result.raw_url = self._clean_params(raw_url, self.__after_clean_parameters__)
@@ -76,11 +76,32 @@ class BaseParser(ABC):
         """
         raise NotImplementedError
-    async def get_raw_url(self, url: str, after_clean_parameters: bool = False) -> str:
+    async def get_raw_url(self, url: str, clean_all: bool = False) -> str:
         """
         清除链接中的参数
         :param url: 链接
-        :param after_clean_parameters: 是否执行后清理参数
+        :param clean_all: 是否清除全部可清除的参数 (包括解析后才需清除的参数)
+        Example:
+            以小红书为例，其解析器配置如下::
+                __reserved_parameters__ = []
+                __after_clean_parameters__ = ["xsec_token"]
+            原始链接::
+                https://www.xiaohongshu.com/explore/abc123?xsec_token=xxx&tracking=yyy
+            ``clean_all=False`` (解析阶段，保留解析所需的参数)::
+                https://www.xiaohongshu.com/explore/abc123?xsec_token=xxx
+                # tracking 被清除，xsec_token 保留（解析时需要它）
+            ``clean_all=True`` (最终输出，清除所有非必要参数)::
+                https://www.xiaohongshu.com/explore/abc123
+                # xsec_token 也被清除，返回干净的链接
         :return:
         """
         url = match_url(url)
@@ -107,9 +128,7 @@ class BaseParser(ABC):
         for i in query_params.copy().keys():
             is_reserved = i in self.__reserved_parameters__
             is_after_clean = i in self.__after_clean_parameters__
-            keep = (is_reserved and not (after_clean_parameters and is_after_clean)) or (
-                is_after_clean and not after_clean_parameters
-            )
+            keep = (is_reserved and not (clean_all and is_after_clean)) or (is_after_clean and not clean_all)
             if not keep:
                 query_params.pop(i, None)

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/bilibili.py RENAMED Viewed

@@ -66,12 +66,12 @@ class BiliParse(YtParser):
         else:
             return super().match(url)
-    async def get_raw_url(self, url: str, after_clean_parameters: bool = False) -> str:
+    async def get_raw_url(self, url: str, clean_all: bool = False) -> str:
         """获取原始链接"""
         if self._is_bvid(url):
             return f"https://www.bilibili.com/video/{url}"
         else:
-            return await super().get_raw_url(url, after_clean_parameters=after_clean_parameters)
+            return await super().get_raw_url(url, clean_all=clean_all)
     @staticmethod
     async def is_dynamic(url) -> str | None:

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/instagram.py RENAMED Viewed

@@ -28,7 +28,6 @@ class InstagramParser(BaseParser):
             dimensions = {}
         width, height = dimensions.get("width", 0) or 0, dimensions.get("height", 0) or 0
-        k = {"title": post.title, "content": post.caption, "raw_url": raw_url}
         match post.typename:
             case "GraphSidecar":
                 media = [
@@ -37,9 +36,11 @@ class InstagramParser(BaseParser):
                     else ImageRef(url=i.display_url, width=i.width, height=i.height)
                     for i in post.get_sidecar_nodes()
                 ]
-                return MultimediaParseResult(media=media, **k)
+                return MultimediaParseResult(media=media, title=post.title, content=post.caption)
             case "GraphImage":
-                return ImageParseResult(photo=[ImageRef(url=post.url, width=width, height=height)], **k)
+                return ImageParseResult(
+                    photo=[ImageRef(url=post.url, width=width, height=height)], title=post.title, content=post.caption
+                )
             case "GraphVideo":
                 return VideoParseResult(
                     video=VideoRef(
@@ -49,7 +50,8 @@ class InstagramParser(BaseParser):
                         width=width,
                         height=height,
                     ),
-                    **k,
+                    title=post.title,
+                    content=post.caption,
                 )
             case _:
                 raise ParseError("不支持的类型")

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/twitter.py RENAMED Viewed

@@ -19,10 +19,10 @@ class TwitterParser(BaseParser):
     async def _do_parse(self, raw_url: str) -> "MultimediaParseResult":
         tweet = await self._parse(raw_url)
-        return await self.media_parse(raw_url, tweet)
+        return await self.media_parse(tweet)
-    async def get_raw_url(self, url: str, after_clean_parameters: bool = False) -> str:
-        url = await super().get_raw_url(url, after_clean_parameters=after_clean_parameters)
+    async def get_raw_url(self, url: str, clean_all: bool = False) -> str:
+        url = await super().get_raw_url(url, clean_all=clean_all)
         return str(urlunparse(urlparse(url)._replace(netloc="x.com")))
     async def _parse(self, url: str):
@@ -46,7 +46,7 @@ class TwitterParser(BaseParser):
         return tweet
     @staticmethod
-    async def media_parse(url, tweet: TwitterTweet):
+    async def media_parse(tweet: TwitterTweet):
         media = []
         for m in tweet.media:
             match m:

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/xhs.py RENAMED Viewed

@@ -29,7 +29,6 @@ class XHSParser(BaseParser):
         result = await xhs.extract(raw_url)
         desc = self.hashtag_handler(result.desc)
-        k = {"title": result.title, "content": desc, "raw_url": raw_url}
         match result.type:
             case XHSPostType.VIDEO:
                 v: XHSMedia = result.media[0]
@@ -37,7 +36,8 @@ class XHSParser(BaseParser):
                     video=VideoRef(
                         url=v.url, thumb_url=v.thumb_url, duration=v.duration, height=v.height, width=v.width
                     ),
-                    **k,
+                    title=result.title,
+                    content=desc,
                 )
             case XHSPostType.IMAGE:
                 photos: list[ImageRef | LivePhotoRef] = []
@@ -55,7 +55,8 @@ class XHSParser(BaseParser):
                 return ImageParseResult(
                     photo=photos,
-                    **k,
+                    title=result.title,
+                    content=desc,
                 )
             case _:
                 raise ParseError("不支持的类型")

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/xiaoheihe.py RENAMED Viewed

@@ -22,14 +22,13 @@ class XiaoHeiHeParser(BaseParser):
     async def _do_parse(self, raw_url: str) -> AnyParseResult:
         xhh: XiaoHeiHePost = await XiaoHeiHeAPI(proxy=self.proxy).parse(raw_url)
         media = self.__parse_media(xhh)
-        v = {"title": xhh.title, "content": xhh.content, "raw_url": raw_url}
         match xhh.type:
             case XiaoHeiHePostType.VIDEO:
-                return VideoParseResult(video=media, **v)
+                return VideoParseResult(video=media, title=xhh.title, content=xhh.content)
             case XiaoHeiHePostType.IMAGE:
                 if not media or all(isinstance(m, ImageRef) for m in media):
-                    return ImageParseResult(photo=media, **v)
-                return MultimediaParseResult(media=media, **v)
+                    return ImageParseResult(photo=media, title=xhh.title, content=xhh.content)
+                return MultimediaParseResult(media=media, title=xhh.title, content=xhh.content)
             case XiaoHeiHePostType.ARTICLE:
                 return RichTextParseResult(title=xhh.title, media=media, markdown_content=xhh.content)

{parsehub-2.0.5 → parsehub-2.0.7/src/parsehub.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: parsehub
-Version: 2.0.5
+Version: 2.0.7
 Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
 Author-email: 梓澪 <zilingmio@gmail.com>
 License: MIT

{parsehub-2.0.5 → parsehub-2.0.7}/LICENSE RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/README.md RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/setup.cfg RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/config/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/config/config.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/errors.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/base/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/base/ytdlp.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/coolapk.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/douyin.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/facebook.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/kuaishou.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/pipix.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/threads.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/tieba.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/weibo.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/weixin.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/youtube.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/parsers/parser/zuiyou.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/bilibili.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/coolapk.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/instagram.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/kuaishou.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/pipix.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/threads.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/tieba.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/twitter.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/weibo.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/weixin.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/xhs.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/xiaoheihe.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/provider_api/zuiyou.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/callback.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/media_file.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/media_ref.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/platform.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/post.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/types/result.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/utils/downloader.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/utils/media_info.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub/utils/utils.py RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub.egg-info/requires.txt RENAMED Viewed

File without changes

{parsehub-2.0.5 → parsehub-2.0.7}/src/parsehub.egg-info/top_level.txt RENAMED Viewed

File without changes

parsehub 2.0.5__tar.gz → 2.0.7__tar.gz

parsehub 2.0.5tar.gz → 2.0.7tar.gz