parsehub 2.0.29__tar.gz → 2.0.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {parsehub-2.0.29/src/parsehub.egg-info → parsehub-2.0.30}/PKG-INFO +1 -1
  2. {parsehub-2.0.29 → parsehub-2.0.30}/pyproject.toml +1 -1
  3. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/ytdlp.py +2 -2
  4. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/bilibili.py +1 -1
  5. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/coolapk.py +1 -1
  6. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/douyin.py +1 -1
  7. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/kuaishou.py +1 -1
  8. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/tiktok.py +1 -1
  9. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/weibo.py +23 -19
  10. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/result.py +22 -9
  11. {parsehub-2.0.29 → parsehub-2.0.30/src/parsehub.egg-info}/PKG-INFO +1 -1
  12. {parsehub-2.0.29 → parsehub-2.0.30}/test/test_core_offline.py +1 -0
  13. {parsehub-2.0.29 → parsehub-2.0.30}/LICENSE +0 -0
  14. {parsehub-2.0.29 → parsehub-2.0.30}/README.md +0 -0
  15. {parsehub-2.0.29 → parsehub-2.0.30}/setup.cfg +0 -0
  16. {parsehub-2.0.29 → parsehub-2.0.30}/src/__init__.py +0 -0
  17. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/__init__.py +0 -0
  18. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/cli.py +0 -0
  19. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/cli_config.py +0 -0
  20. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/config/__init__.py +0 -0
  21. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/config/config.py +0 -0
  22. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/errors.py +0 -0
  23. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/__init__.py +0 -0
  24. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/__init__.py +0 -0
  25. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/base/base.py +0 -0
  26. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/__init__.py +0 -0
  27. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/facebook.py +0 -0
  28. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/instagram.py +0 -0
  29. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/pipix.py +0 -0
  30. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/threads.py +0 -0
  31. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/tieba.py +0 -0
  32. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/twitter.py +0 -0
  33. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/weibo.py +0 -0
  34. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/weixin.py +0 -0
  35. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/xhs.py +0 -0
  36. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
  37. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/youtube.py +0 -0
  38. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  39. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/__init__.py +0 -0
  40. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/bilibili.py +0 -0
  41. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/coolapk.py +0 -0
  42. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/douyin.py +0 -0
  43. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/instagram.py +0 -0
  44. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/kuaishou.py +0 -0
  45. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/pipix.py +0 -0
  46. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/threads.py +0 -0
  47. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/tieba.py +0 -0
  48. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/tiktok.py +0 -0
  49. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/twitter.py +0 -0
  50. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/weixin.py +0 -0
  51. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/xhs.py +0 -0
  52. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/xiaoheihe.py +0 -0
  53. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/provider_api/zuiyou.py +0 -0
  54. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/__init__.py +0 -0
  55. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/callback.py +0 -0
  56. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/media_file.py +0 -0
  57. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/media_ref.py +0 -0
  58. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/platform.py +0 -0
  59. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/types/post.py +0 -0
  60. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/downloader.py +0 -0
  61. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/media_info.py +0 -0
  62. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub/utils/utils.py +0 -0
  63. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/SOURCES.txt +0 -0
  64. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/dependency_links.txt +0 -0
  65. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/entry_points.txt +0 -0
  66. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/requires.txt +0 -0
  67. {parsehub-2.0.29 → parsehub-2.0.30}/src/parsehub.egg-info/top_level.txt +0 -0
  68. {parsehub-2.0.29 → parsehub-2.0.30}/test/test_cli.py +0 -0
  69. {parsehub-2.0.29 → parsehub-2.0.30}/test/test_cli_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.29
3
+ Version: 2.0.30
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.29"
3
+ version = "2.0.30"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -185,7 +185,7 @@ class YtVideoParseResult(VideoParseResult):
185
185
  async def _do_download(
186
186
  self,
187
187
  *,
188
- output_dir: str | Path,
188
+ output_dir: Path,
189
189
  callback: ProgressCallback | None = None,
190
190
  callback_args: tuple = (),
191
191
  callback_kwargs: dict | None = None,
@@ -200,7 +200,7 @@ class YtVideoParseResult(VideoParseResult):
200
200
  if self.dl.proxy:
201
201
  paramss["proxy"] = self.dl.proxy
202
202
 
203
- paramss["outtmpl"] = f"{output_dir_path.joinpath('ytdlp_%(id)s')}.%(ext)s"
203
+ paramss["outtmpl"] = f"{output_dir_path.joinpath(self.name)}.%(ext)s"
204
204
 
205
205
  if callback:
206
206
  loop = asyncio.get_running_loop()
@@ -168,7 +168,7 @@ class BiliVideoParseResult(VideoParseResult):
168
168
  async def _do_download(
169
169
  self,
170
170
  *,
171
- output_dir: str | Path,
171
+ output_dir: Path,
172
172
  callback: ProgressCallback | None = None,
173
173
  callback_args: tuple = (),
174
174
  callback_kwargs: dict | None = None,
@@ -63,7 +63,7 @@ class CoolapkParseResult(ParseResult):
63
63
  async def _do_download(
64
64
  self,
65
65
  *,
66
- output_dir: str | Path,
66
+ output_dir: Path,
67
67
  callback: ProgressCallback | None = None,
68
68
  callback_args: tuple = (),
69
69
  callback_kwargs: dict | None = None,
@@ -68,7 +68,7 @@ class DouyinParseResult(ParseResult):
68
68
  async def _do_download(
69
69
  self,
70
70
  *,
71
- output_dir: str | Path,
71
+ output_dir: Path,
72
72
  callback: ProgressCallback | None = None,
73
73
  callback_args: tuple = (),
74
74
  callback_kwargs: dict | None = None,
@@ -6,7 +6,7 @@ from ..base.base import BaseParser
6
6
  class KuaiShouParser(BaseParser):
7
7
  __platform__ = Platform.KUAISHOU
8
8
  __supported_type__ = ["视频"]
9
- __match__ = r"^(http(s)?://)?(www|v)\.kuaishou.com/.+"
9
+ __match__ = r"^(http(s)?://)?(www|v|live)\.kuaishou.com/.+"
10
10
  __redirect_keywords__ = ["v.kuaishou", "/f/"]
11
11
 
12
12
  async def _do_parse(self, raw_url: str) -> VideoParseResult:
@@ -63,7 +63,7 @@ class TikTokVideoParseResult(VideoParseResult):
63
63
  async def _do_download(
64
64
  self,
65
65
  *,
66
- output_dir: str | Path,
66
+ output_dir: Path,
67
67
  callback: ProgressCallback | None = None,
68
68
  callback_args: tuple = (),
69
69
  callback_kwargs: dict | None = None,
@@ -99,6 +99,7 @@ class MediaType(Enum):
99
99
  PHOTO = "pic"
100
100
  LIVE_PHOTO = "livephoto"
101
101
  GIF = "gif"
102
+ ARTICLE = "article"
102
103
 
103
104
 
104
105
  class Info(abc.ABC):
@@ -123,7 +124,7 @@ class Playback:
123
124
  size: int = 0
124
125
 
125
126
  @classmethod
126
- def parse(cls, playback: dict) -> "Playback":
127
+ def parse(cls, playback: dict) -> Self:
127
128
  pi = playback["play_info"]
128
129
  url = pi["url"]
129
130
  width = pi["width"]
@@ -143,8 +144,8 @@ class MediaInfo:
143
144
  prefetch_size: int | None = None
144
145
  playback: Playback | None = None
145
146
 
146
- @staticmethod
147
- def parse(media_dict: dict) -> "MediaInfo":
147
+ @classmethod
148
+ def parse(cls, media_dict: dict) -> Self:
148
149
  format_ = media_dict["format"]
149
150
  mp4_hd_url = media_dict.get("mp4_hd_url")
150
151
  mp4_sd_url = media_dict.get("mp4_sd_url")
@@ -152,7 +153,7 @@ class MediaInfo:
152
153
  prefetch_size = media_dict["prefetch_size"]
153
154
  playback_list = media_dict.get("playback_list", [])
154
155
  playback = Playback.parse(playback_list[0]) if playback_list else None
155
- return MediaInfo(format_, mp4_hd_url, mp4_sd_url, duration, prefetch_size, playback)
156
+ return cls(format_, mp4_hd_url, mp4_sd_url, duration, prefetch_size, playback)
156
157
 
157
158
 
158
159
  @dataclass
@@ -162,13 +163,16 @@ class PageInfo(Info):
162
163
  page_pic: str | None = None
163
164
  short_url: str | None = None
164
165
 
165
- @staticmethod
166
- def parse(page_info_dict: dict) -> "PageInfo":
166
+ @classmethod
167
+ def parse(cls, page_info_dict: dict) -> Self:
167
168
  object_type = MediaType(page_info_dict["object_type"])
168
- media_info = MediaInfo.parse(page_info_dict["media_info"])
169
+ if object_type != MediaType.ARTICLE:
170
+ media_info = MediaInfo.parse(page_info_dict["media_info"])
171
+ else:
172
+ media_info = None
169
173
  page_pic = page_info_dict.get("page_pic")
170
174
  short_url = page_info_dict.get("short_url")
171
- return PageInfo(object_type, media_info, page_pic, short_url)
175
+ return cls(object_type, media_info, page_pic, short_url)
172
176
 
173
177
  @property
174
178
  def media_url(self) -> str | None:
@@ -220,9 +224,9 @@ class PicInfo(Info):
220
224
  largest: Pic | None = None
221
225
  video: str | None = None
222
226
 
223
- @staticmethod
224
- def parse(pic_dict: dict) -> "PicInfo":
225
- return PicInfo(
227
+ @classmethod
228
+ def parse(cls, pic_dict: dict) -> Self:
229
+ return cls(
226
230
  pic_id=pic_dict["pic_id"],
227
231
  type=MediaType(pic_dict["type"]),
228
232
  thumbnail=Pic(**pic_dict["thumbnail"]),
@@ -281,8 +285,8 @@ class MixMediaInfoItem(Info):
281
285
  class MixMediaInfo:
282
286
  items: list[MixMediaInfoItem] | None = None
283
287
 
284
- @staticmethod
285
- def parse(mix_media_info_dict: dict) -> "MixMediaInfo":
288
+ @classmethod
289
+ def parse(cls, mix_media_info_dict: dict) -> Self:
286
290
  items: list[MixMediaInfoItem] = []
287
291
  for item_dict in mix_media_info_dict["items"]:
288
292
  type_ = MediaType(item_dict["type"])
@@ -294,7 +298,7 @@ class MixMediaInfo:
294
298
  else:
295
299
  data = None
296
300
  items.append(MixMediaInfoItem(type_, data))
297
- return MixMediaInfo(items)
301
+ return cls(items)
298
302
 
299
303
 
300
304
  @dataclass
@@ -308,8 +312,8 @@ class Data:
308
312
  mix_media_info: MixMediaInfo | None = None
309
313
  retweeted_status: "Data | None" = None
310
314
 
311
- @staticmethod
312
- def parse(data_dict: dict) -> "Data":
315
+ @classmethod
316
+ def parse(cls, data_dict: dict) -> Self:
313
317
  if page_info := data_dict.get("page_info"):
314
318
  data_dict["page_info"] = PageInfo.parse(page_info)
315
319
  if pic_infos := data_dict.get("pic_infos"):
@@ -318,10 +322,10 @@ class Data:
318
322
  data_dict["mix_media_info"] = MixMediaInfo.parse(mix_media_info)
319
323
  if retweeted_status := data_dict.get("retweeted_status"):
320
324
  data_dict["retweeted_status"] = Data.parse(retweeted_status)
321
- return Data.from_kwargs(**data_dict)
325
+ return cls.from_kwargs(**data_dict)
322
326
 
323
327
  @classmethod
324
- def from_kwargs(cls, **kwargs: Any) -> "Data":
328
+ def from_kwargs(cls, **kwargs: Any) -> Self:
325
329
  cls_fields = set(signature(cls).parameters)
326
330
 
327
331
  native_args, new_args = {}, {}
@@ -377,4 +381,4 @@ class WeiboTVContent:
377
381
 
378
382
 
379
383
  if __name__ == "__main__":
380
- print(asyncio.run(WeiboAPI().parse("https://weibo.com/tv/show/1034:5306598453608528")))
384
+ print(asyncio.run(WeiboAPI().parse("https://weibo.com/ttarticle/p/show?id=2309405312350592041114")))
@@ -41,11 +41,15 @@ class ParseResult(ABC): # noqa: B024
41
41
  :param content: 正文 (纯文本)
42
42
  :param platform: 平台
43
43
  """
44
- self.raw_url: str | None = None
44
+ self.raw_url: str = ""
45
45
  self.title = (title or "").strip()
46
46
  self.content = (content or "").strip()
47
47
  self.media = media
48
48
  self.platform = platform
49
+ self.name = slugify(
50
+ self.title or self.content or str(time.time_ns()), allow_unicode=True, max_length=50, lowercase=False
51
+ )
52
+ """符合路径命名规范的名称, 可用于目录和文件名"""
49
53
 
50
54
  def __repr__(self) -> str:
51
55
  media_count = (
@@ -77,7 +81,7 @@ class ParseResult(ABC): # noqa: B024
77
81
  async def _do_download(
78
82
  self,
79
83
  *,
80
- output_dir: str | Path,
84
+ output_dir: Path,
81
85
  callback: ProgressCallback | None = None,
82
86
  callback_args: tuple = (),
83
87
  callback_kwargs: dict | None = None,
@@ -114,10 +118,17 @@ class ParseResult(ABC): # noqa: B024
114
118
  dl_progress_args = callback_args
115
119
  dl_progress_kwargs = callback_kwargs or {}
116
120
 
121
+ index = i + 1
122
+
117
123
  try:
124
+ save_path = (
125
+ output_dir.joinpath(f"{self.name}.{media.ext}")
126
+ if is_single
127
+ else output_dir.joinpath(f"{index:03d}_{self.name}.{media.ext}")
128
+ )
118
129
  f = await download(
119
130
  media.url,
120
- f"{output_dir}/{i}.{media.ext}",
131
+ save_path,
121
132
  headers=headers,
122
133
  proxy=proxy,
123
134
  progress=dl_progress,
@@ -140,9 +151,14 @@ class ParseResult(ABC): # noqa: B024
140
151
  mf = LivePhotoFile(path=f, width=media.width, height=media.height, duration=media.duration)
141
152
  if media.video_url:
142
153
  try:
154
+ save_path = (
155
+ output_dir.joinpath(f"{self.name}_video.{media.video_ext}")
156
+ if is_single
157
+ else output_dir.joinpath(f"{index:03d}_{self.name}_video.{media.video_ext}")
158
+ )
143
159
  vf = await download(
144
160
  media.video_url,
145
- f"{output_dir}/{i}_video.{media.video_ext}",
161
+ save_path,
146
162
  headers=headers,
147
163
  proxy=proxy,
148
164
  )
@@ -196,13 +212,10 @@ class ParseResult(ABC): # noqa: B024
196
212
  - ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
197
213
  """
198
214
  save_dir = Path(path) if path else GlobalConfig.default_save_dir
199
- r = slugify(
200
- self.title or self.content or str(time.time_ns()), allow_unicode=True, max_length=20, lowercase=False
201
- )
202
- output_dir = save_dir.joinpath(r)
215
+ output_dir = save_dir.joinpath(self.name)
203
216
  counter = 2
204
217
  while output_dir.exists():
205
- output_dir = save_dir.joinpath(f"{r}_{counter}")
218
+ output_dir = save_dir.joinpath(f"{self.name}_{counter}")
206
219
  counter += 1
207
220
  output_dir.mkdir(parents=True, exist_ok=True)
208
221
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.29
3
+ Version: 2.0.30
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -268,6 +268,7 @@ class TestPlatformUrlMatching(unittest.TestCase):
268
268
  "https://www.kuaishou.com/short-video/3xexample",
269
269
  "https://v.kuaishou.com/example",
270
270
  "https://www.kuaishou.com/f/example",
271
+ "https://live.kuaishou.com/u/3xmdumq6gmzrr64/3xjsfb8u3d7gzyu",
271
272
  ],
272
273
  Platform.PIPIX: [
273
274
  "https://h5.pipix.com/s/example/",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes