PyPI - parsehub - Versions diffs - 2.0.29__tar.gz → 2.0.30__tar.gz - Mend

parsehub 2.0.29tar.gz → 2.0.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

{parsehub-2.0.29/src/parsehub.egg-info → parsehub-2.0.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: parsehub
-Version: 2.0.29
+Version: 2.0.30
 Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
 Author-email: 梓澪 <zilingmio@gmail.com>
 License: MIT

{parsehub-2.0.29 → parsehub-2.0.30}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "parsehub"
-version = "2.0.29"
+version = "2.0.30"
 description = "轻量、异步、开箱即用的社交媒体聚合解析库"
 readme = "README.md"
 requires-python = ">=3.12.0"

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/ytdlp.py RENAMED Viewed

@@ -185,7 +185,7 @@ class YtVideoParseResult(VideoParseResult):
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,
@@ -200,7 +200,7 @@ class YtVideoParseResult(VideoParseResult):
         if self.dl.proxy:
             paramss["proxy"] = self.dl.proxy
-        paramss["outtmpl"] = f"{output_dir_path.joinpath('ytdlp_%(id)s')}.%(ext)s"
+        paramss["outtmpl"] = f"{output_dir_path.joinpath(self.name)}.%(ext)s"
         if callback:
             loop = asyncio.get_running_loop()

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/bilibili.py RENAMED Viewed

@@ -168,7 +168,7 @@ class BiliVideoParseResult(VideoParseResult):
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/coolapk.py RENAMED Viewed

@@ -63,7 +63,7 @@ class CoolapkParseResult(ParseResult):
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/douyin.py RENAMED Viewed

@@ -68,7 +68,7 @@ class DouyinParseResult(ParseResult):
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/kuaishou.py RENAMED Viewed

@@ -6,7 +6,7 @@ from ..base.base import BaseParser
 class KuaiShouParser(BaseParser):
     __platform__ = Platform.KUAISHOU
     __supported_type__ = ["视频"]
-    __match__ = r"^(http(s)?://)?(www|v)\.kuaishou.com/.+"
+    __match__ = r"^(http(s)?://)?(www|v|live)\.kuaishou.com/.+"
     __redirect_keywords__ = ["v.kuaishou", "/f/"]
     async def _do_parse(self, raw_url: str) -> VideoParseResult:

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/tiktok.py RENAMED Viewed

@@ -63,7 +63,7 @@ class TikTokVideoParseResult(VideoParseResult):
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/weibo.py RENAMED Viewed

@@ -99,6 +99,7 @@ class MediaType(Enum):
     PHOTO = "pic"
     LIVE_PHOTO = "livephoto"
     GIF = "gif"
+    ARTICLE = "article"
 class Info(abc.ABC):
@@ -123,7 +124,7 @@ class Playback:
     size: int = 0
     @classmethod
-    def parse(cls, playback: dict) -> "Playback":
+    def parse(cls, playback: dict) -> Self:
         pi = playback["play_info"]
         url = pi["url"]
         width = pi["width"]
@@ -143,8 +144,8 @@ class MediaInfo:
     prefetch_size: int | None = None
     playback: Playback | None = None
-    @staticmethod
-    def parse(media_dict: dict) -> "MediaInfo":
+    @classmethod
+    def parse(cls, media_dict: dict) -> Self:
         format_ = media_dict["format"]
         mp4_hd_url = media_dict.get("mp4_hd_url")
         mp4_sd_url = media_dict.get("mp4_sd_url")
@@ -152,7 +153,7 @@ class MediaInfo:
         prefetch_size = media_dict["prefetch_size"]
         playback_list = media_dict.get("playback_list", [])
         playback = Playback.parse(playback_list[0]) if playback_list else None
-        return MediaInfo(format_, mp4_hd_url, mp4_sd_url, duration, prefetch_size, playback)
+        return cls(format_, mp4_hd_url, mp4_sd_url, duration, prefetch_size, playback)
 @dataclass
@@ -162,13 +163,16 @@ class PageInfo(Info):
     page_pic: str | None = None
     short_url: str | None = None
-    @staticmethod
-    def parse(page_info_dict: dict) -> "PageInfo":
+    @classmethod
+    def parse(cls, page_info_dict: dict) -> Self:
         object_type = MediaType(page_info_dict["object_type"])
-        media_info = MediaInfo.parse(page_info_dict["media_info"])
+        if object_type != MediaType.ARTICLE:
+            media_info = MediaInfo.parse(page_info_dict["media_info"])
+        else:
+            media_info = None
         page_pic = page_info_dict.get("page_pic")
         short_url = page_info_dict.get("short_url")
-        return PageInfo(object_type, media_info, page_pic, short_url)
+        return cls(object_type, media_info, page_pic, short_url)
     @property
     def media_url(self) -> str | None:
@@ -220,9 +224,9 @@ class PicInfo(Info):
     largest: Pic | None = None
     video: str | None = None
-    @staticmethod
-    def parse(pic_dict: dict) -> "PicInfo":
-        return PicInfo(
+    @classmethod
+    def parse(cls, pic_dict: dict) -> Self:
+        return cls(
             pic_id=pic_dict["pic_id"],
             type=MediaType(pic_dict["type"]),
             thumbnail=Pic(**pic_dict["thumbnail"]),
@@ -281,8 +285,8 @@ class MixMediaInfoItem(Info):
 class MixMediaInfo:
     items: list[MixMediaInfoItem] | None = None
-    @staticmethod
-    def parse(mix_media_info_dict: dict) -> "MixMediaInfo":
+    @classmethod
+    def parse(cls, mix_media_info_dict: dict) -> Self:
         items: list[MixMediaInfoItem] = []
         for item_dict in mix_media_info_dict["items"]:
             type_ = MediaType(item_dict["type"])
@@ -294,7 +298,7 @@ class MixMediaInfo:
             else:
                 data = None
             items.append(MixMediaInfoItem(type_, data))
-        return MixMediaInfo(items)
+        return cls(items)
 @dataclass
@@ -308,8 +312,8 @@ class Data:
     mix_media_info: MixMediaInfo | None = None
     retweeted_status: "Data | None" = None
-    @staticmethod
-    def parse(data_dict: dict) -> "Data":
+    @classmethod
+    def parse(cls, data_dict: dict) -> Self:
         if page_info := data_dict.get("page_info"):
             data_dict["page_info"] = PageInfo.parse(page_info)
         if pic_infos := data_dict.get("pic_infos"):
@@ -318,10 +322,10 @@ class Data:
             data_dict["mix_media_info"] = MixMediaInfo.parse(mix_media_info)
         if retweeted_status := data_dict.get("retweeted_status"):
             data_dict["retweeted_status"] = Data.parse(retweeted_status)
-        return Data.from_kwargs(**data_dict)
+        return cls.from_kwargs(**data_dict)
     @classmethod
-    def from_kwargs(cls, **kwargs: Any) -> "Data":
+    def from_kwargs(cls, **kwargs: Any) -> Self:
         cls_fields = set(signature(cls).parameters)
         native_args, new_args = {}, {}
@@ -377,4 +381,4 @@ class WeiboTVContent:
 if __name__ == "__main__":
-    print(asyncio.run(WeiboAPI().parse("https://weibo.com/tv/show/1034:5306598453608528")))
+    print(asyncio.run(WeiboAPI().parse("https://weibo.com/ttarticle/p/show?id=2309405312350592041114")))

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/result.py RENAMED Viewed

@@ -41,11 +41,15 @@ class ParseResult(ABC):  # noqa: B024
         :param content: 正文 (纯文本)
         :param platform: 平台
         """
-        self.raw_url: str | None = None
+        self.raw_url: str = ""
         self.title = (title or "").strip()
         self.content = (content or "").strip()
         self.media = media
         self.platform = platform
+        self.name = slugify(
+            self.title or self.content or str(time.time_ns()), allow_unicode=True, max_length=50, lowercase=False
+        )
+        """符合路径命名规范的名称, 可用于目录和文件名"""
     def __repr__(self) -> str:
         media_count = (
@@ -77,7 +81,7 @@ class ParseResult(ABC):  # noqa: B024
     async def _do_download(
         self,
         *,
-        output_dir: str | Path,
+        output_dir: Path,
         callback: ProgressCallback | None = None,
         callback_args: tuple = (),
         callback_kwargs: dict | None = None,
@@ -114,10 +118,17 @@ class ParseResult(ABC):  # noqa: B024
                 dl_progress_args = callback_args
                 dl_progress_kwargs = callback_kwargs or {}
+            index = i + 1
             try:
+                save_path = (
+                    output_dir.joinpath(f"{self.name}.{media.ext}")
+                    if is_single
+                    else output_dir.joinpath(f"{index:03d}_{self.name}.{media.ext}")
+                )
                 f = await download(
                     media.url,
-                    f"{output_dir}/{i}.{media.ext}",
+                    save_path,
                     headers=headers,
                     proxy=proxy,
                     progress=dl_progress,
@@ -140,9 +151,14 @@ class ParseResult(ABC):  # noqa: B024
                     mf = LivePhotoFile(path=f, width=media.width, height=media.height, duration=media.duration)
                     if media.video_url:
                         try:
+                            save_path = (
+                                output_dir.joinpath(f"{self.name}_video.{media.video_ext}")
+                                if is_single
+                                else output_dir.joinpath(f"{index:03d}_{self.name}_video.{media.video_ext}")
+                            )
                             vf = await download(
                                 media.video_url,
-                                f"{output_dir}/{i}_video.{media.video_ext}",
+                                save_path,
                                 headers=headers,
                                 proxy=proxy,
                             )
@@ -196,13 +212,10 @@ class ParseResult(ABC):  # noqa: B024
                 - ``count``: 计数进度，用于多文件下载时报告已完成/总文件数
         """
         save_dir = Path(path) if path else GlobalConfig.default_save_dir
-        r = slugify(
-            self.title or self.content or str(time.time_ns()), allow_unicode=True, max_length=20, lowercase=False
-        )
-        output_dir = save_dir.joinpath(r)
+        output_dir = save_dir.joinpath(self.name)
         counter = 2
         while output_dir.exists():
-            output_dir = save_dir.joinpath(f"{r}_{counter}")
+            output_dir = save_dir.joinpath(f"{self.name}_{counter}")
             counter += 1
         output_dir.mkdir(parents=True, exist_ok=True)

{parsehub-2.0.29 → parsehub-2.0.30/src/parsehub.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: parsehub
-Version: 2.0.29
+Version: 2.0.30
 Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
 Author-email: 梓澪 <zilingmio@gmail.com>
 License: MIT

{parsehub-2.0.29 → parsehub-2.0.30}/test/test_core_offline.py RENAMED Viewed

@@ -268,6 +268,7 @@ class TestPlatformUrlMatching(unittest.TestCase):
                 "https://www.kuaishou.com/short-video/3xexample",
                 "https://v.kuaishou.com/example",
                 "https://www.kuaishou.com/f/example",
+                "https://live.kuaishou.com/u/3xmdumq6gmzrr64/3xjsfb8u3d7gzyu",
             ],
             Platform.PIPIX: [
                 "https://h5.pipix.com/s/example/",

{parsehub-2.0.29 → parsehub-2.0.30}/LICENSE RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/README.md RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/setup.cfg RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/cli.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/cli_config.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/config/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/config/config.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/errors.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/base.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/facebook.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/instagram.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/pipix.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/threads.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/tieba.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/twitter.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/weibo.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/weixin.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/xhs.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/xiaoheihe.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/youtube.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/zuiyou.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/bilibili.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/coolapk.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/douyin.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/instagram.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/kuaishou.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/pipix.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/threads.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/tieba.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/tiktok.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/twitter.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/weixin.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/xhs.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/xiaoheihe.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/zuiyou.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/__init__.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/callback.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/media_file.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/media_ref.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/platform.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/post.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/downloader.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/media_info.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/utils.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/entry_points.txt RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/requires.txt RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/top_level.txt RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/test/test_cli.py RENAMED Viewed

File without changes

{parsehub-2.0.29 → parsehub-2.0.30}/test/test_cli_config.py RENAMED Viewed

File without changes

parsehub 2.0.29__tar.gz → 2.0.30__tar.gz

parsehub 2.0.29tar.gz → 2.0.30tar.gz