parsehub 2.0.15__tar.gz → 2.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {parsehub-2.0.15/src/parsehub.egg-info → parsehub-2.0.16}/PKG-INFO +11 -3
  2. {parsehub-2.0.15 → parsehub-2.0.16}/README.md +9 -2
  3. {parsehub-2.0.15 → parsehub-2.0.16}/pyproject.toml +2 -1
  4. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/config/config.py +1 -3
  5. parsehub-2.0.16/src/parsehub/parsers/parser/douyin.py +251 -0
  6. parsehub-2.0.16/src/parsehub/parsers/parser/tiktok.py +228 -0
  7. parsehub-2.0.16/src/parsehub/provider_api/douyin.py +766 -0
  8. parsehub-2.0.16/src/parsehub/provider_api/tiktok.py +124 -0
  9. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/xhs.py +2 -0
  10. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/platform.py +2 -1
  11. {parsehub-2.0.15 → parsehub-2.0.16/src/parsehub.egg-info}/PKG-INFO +11 -3
  12. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub.egg-info/SOURCES.txt +3 -0
  13. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub.egg-info/requires.txt +1 -0
  14. parsehub-2.0.15/src/parsehub/parsers/parser/douyin.py +0 -168
  15. {parsehub-2.0.15 → parsehub-2.0.16}/LICENSE +0 -0
  16. {parsehub-2.0.15 → parsehub-2.0.16}/setup.cfg +0 -0
  17. {parsehub-2.0.15 → parsehub-2.0.16}/src/__init__.py +0 -0
  18. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/__init__.py +0 -0
  19. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/config/__init__.py +0 -0
  20. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/errors.py +0 -0
  21. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/__init__.py +0 -0
  22. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/base/__init__.py +0 -0
  23. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/base/base.py +0 -0
  24. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/base/ytdlp.py +0 -0
  25. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/__init__.py +0 -0
  26. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/bilibili.py +0 -0
  27. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/coolapk.py +0 -0
  28. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/facebook.py +0 -0
  29. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/instagram.py +0 -0
  30. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/kuaishou.py +0 -0
  31. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/pipix.py +0 -0
  32. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/threads.py +0 -0
  33. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/tieba.py +0 -0
  34. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/twitter.py +0 -0
  35. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/weibo.py +0 -0
  36. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/weixin.py +0 -0
  37. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/xhs.py +0 -0
  38. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
  39. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/youtube.py +0 -0
  40. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/parsers/parser/zuiyou.py +0 -0
  41. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/__init__.py +0 -0
  42. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/bilibili.py +0 -0
  43. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/coolapk.py +0 -0
  44. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/instagram.py +0 -0
  45. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/kuaishou.py +0 -0
  46. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/pipix.py +0 -0
  47. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/threads.py +0 -0
  48. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/tieba.py +0 -0
  49. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/twitter.py +0 -0
  50. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/weibo.py +0 -0
  51. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/weixin.py +0 -0
  52. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/xiaoheihe.py +0 -0
  53. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/provider_api/zuiyou.py +0 -0
  54. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/__init__.py +0 -0
  55. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/callback.py +0 -0
  56. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/media_file.py +0 -0
  57. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/media_ref.py +0 -0
  58. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/post.py +0 -0
  59. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/types/result.py +0 -0
  60. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/utils/downloader.py +0 -0
  61. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/utils/media_info.py +0 -0
  62. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub/utils/utils.py +0 -0
  63. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub.egg-info/dependency_links.txt +0 -0
  64. {parsehub-2.0.15 → parsehub-2.0.16}/src/parsehub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsehub
3
- Version: 2.0.15
3
+ Version: 2.0.16
4
4
  Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
5
5
  Author-email: 梓澪 <zilingmio@gmail.com>
6
6
  License: MIT
@@ -34,6 +34,7 @@ Requires-Dist: pillow>=12.1.0
34
34
  Requires-Dist: python-slugify[unidecode]>=8.0.4
35
35
  Requires-Dist: opencv-python-headless>=4.13.0.92
36
36
  Requires-Dist: cryptography>=46.0.6
37
+ Requires-Dist: gmssl>=3.2.2
37
38
  Dynamic: license-file
38
39
 
39
40
  <div align="center">
@@ -105,7 +106,8 @@ print(result)
105
106
  | **Facebook** | ✅ | | |
106
107
  | **Threads** | ✅ | ✅ | |
107
108
  | **Bilibili** | ✅ | | 📝 动态 |
108
- | **抖音 / TikTok** | ✅ | ✅ | |
109
+ | **抖音** | ✅ | ✅ | |
110
+ | **TikTok** | ✅ | ✅ | |
109
111
  | **微博** | ✅ | ✅ | |
110
112
  | **小红书** | ✅ | ✅ | |
111
113
  | **贴吧** | ✅ | ✅ | |
@@ -164,7 +166,13 @@ ParseHub(cookie={"key1": "value1", "key2": "value2"})
164
166
 
165
167
  目前支持 Cookie 登录的平台:
166
168
 
167
- `Twitter` · `Instagram` · `Kuaishou` · `Bilibili` · `YouTube`
169
+ - `Twitter`
170
+ - `Instagram`
171
+ - `Kuaishou`
172
+ - `Bilibili`
173
+ - `YouTube`
174
+ - `抖音`
175
+ - `TikTok`
168
176
 
169
177
  ### 全局配置
170
178
 
@@ -67,7 +67,8 @@ print(result)
67
67
  | **Facebook** | ✅ | | |
68
68
  | **Threads** | ✅ | ✅ | |
69
69
  | **Bilibili** | ✅ | | 📝 动态 |
70
- | **抖音 / TikTok** | ✅ | ✅ | |
70
+ | **抖音** | ✅ | ✅ | |
71
+ | **TikTok** | ✅ | ✅ | |
71
72
  | **微博** | ✅ | ✅ | |
72
73
  | **小红书** | ✅ | ✅ | |
73
74
  | **贴吧** | ✅ | ✅ | |
@@ -126,7 +127,13 @@ ParseHub(cookie={"key1": "value1", "key2": "value2"})
126
127
 
127
128
  目前支持 Cookie 登录的平台:
128
129
 
129
- `Twitter` · `Instagram` · `Kuaishou` · `Bilibili` · `YouTube`
130
+ - `Twitter`
131
+ - `Instagram`
132
+ - `Kuaishou`
133
+ - `Bilibili`
134
+ - `YouTube`
135
+ - `抖音`
136
+ - `TikTok`
130
137
 
131
138
  ### 全局配置
132
139
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "parsehub"
3
- version = "2.0.15"
3
+ version = "2.0.16"
4
4
  description = "轻量、异步、开箱即用的社交媒体聚合解析库"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12.0"
@@ -37,6 +37,7 @@ dependencies = [
37
37
  "python-slugify[unidecode]>=8.0.4",
38
38
  "opencv-python-headless>=4.13.0.92",
39
39
  "cryptography>=46.0.6",
40
+ "gmssl>=3.2.2",
40
41
  ]
41
42
 
42
43
  [dependency-groups]
@@ -1,7 +1,7 @@
1
1
  import sys
2
2
  from pathlib import Path
3
3
 
4
- from pydantic import BaseModel, ConfigDict, HttpUrl
4
+ from pydantic import BaseModel, ConfigDict
5
5
 
6
6
 
7
7
  class _GlobalConfig(BaseModel):
@@ -11,8 +11,6 @@ class _GlobalConfig(BaseModel):
11
11
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
12
12
  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"
13
13
  )
14
- douyin_api: HttpUrl = "https://douyin.wtf/"
15
- """抖音解析API, 建议自行部署: https://github.com/Evil0ctal/Douyin_TikTok_Download_API"""
16
14
  default_save_dir: Path = Path(sys.argv[0]).parent / "downloads"
17
15
  """默认下载目录"""
18
16
 
@@ -0,0 +1,251 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from pathlib import Path
4
+ from typing import Self, Union
5
+
6
+ from ... import ProgressCallback
7
+ from ...provider_api.douyin import DouyinWebCrawler
8
+ from ...types import (
9
+ DownloadResult,
10
+ ImageParseResult,
11
+ ImageRef,
12
+ LivePhotoRef,
13
+ MultimediaParseResult,
14
+ ParseError,
15
+ Platform,
16
+ VideoParseResult,
17
+ VideoRef,
18
+ )
19
+ from ..base.base import BaseParser
20
+
21
+
22
+ class DouyinParser(BaseParser):
23
+ __platform__ = Platform.DOUYIN
24
+ __supported_type__ = ["视频", "图文"]
25
+ __match__ = r"^(http(s)?://)?.+douyin.com/(?!share/user|qishui).+"
26
+ __redirect_keywords__ = ["v.douyin", "iesdouyin"]
27
+ __reserved_parameters__ = ["modal_id"]
28
+
29
+ async def _do_parse(self, raw_url: str) -> Union["VideoParseResult", "ImageParseResult", "MultimediaParseResult"]:
30
+ result = await self._fetch_api_result(raw_url)
31
+
32
+ match result.type:
33
+ case DouyinMediaType.VIDEO:
34
+ return self._build_video_result(result)
35
+ case DouyinMediaType.IMAGE:
36
+ return self._build_image_result(result)
37
+
38
+ async def _fetch_api_result(self, url: str) -> "DouyinApiResult":
39
+ """获取并解析抖音 API 结果"""
40
+ if not self.cookie:
41
+ raise ParseError("抖音 Cookie 未配置")
42
+
43
+ crawler = DouyinWebCrawler(proxy=self.proxy, cookie=self.cookie)
44
+ response = await crawler.parse(url)
45
+ return DouyinApiResult.parse(response)
46
+
47
+ @staticmethod
48
+ def _build_video_result(result: "DouyinApiResult") -> VideoParseResult:
49
+ """构建视频解析结果"""
50
+ return DouyinVideoParseResult(
51
+ title=result.desc,
52
+ video=result.video,
53
+ )
54
+
55
+ @staticmethod
56
+ def _build_image_result(result: "DouyinApiResult") -> ImageParseResult:
57
+ """构建图片解析结果"""
58
+ return ImageParseResult(
59
+ title=result.desc,
60
+ photo=result.image_list,
61
+ )
62
+
63
+
64
+ class DouyinVideoParseResult(VideoParseResult):
65
+ async def _do_download(
66
+ self,
67
+ *,
68
+ output_dir: str | Path,
69
+ callback: ProgressCallback | None = None,
70
+ callback_args: tuple = (),
71
+ callback_kwargs: dict | None = None,
72
+ proxy: str | None = None,
73
+ headers: dict | None = None,
74
+ ) -> "DownloadResult":
75
+ headers = {
76
+ "Referer": "https://www.douyin.com/",
77
+ }
78
+ return await super()._do_download(
79
+ output_dir=output_dir,
80
+ callback=callback,
81
+ callback_args=callback_args,
82
+ callback_kwargs=callback_kwargs,
83
+ proxy=proxy,
84
+ headers=headers,
85
+ )
86
+
87
+
88
+ def remove_video_watermark(url: str) -> str:
89
+ """移除抖音视频水印标识 (playwm -> play)"""
90
+ return url.replace("playwm", "play")
91
+
92
+
93
+ def parse_video_info(video_data: dict) -> dict:
94
+ bit_rates = video_data.get("bit_rate")
95
+ if not bit_rates:
96
+ raise ParseError("抖音解析失败: 未获取到视频下载地址")
97
+
98
+ # 按分辨率降序排列,选择最高质量
99
+ bit_rates.sort(
100
+ key=lambda x: x.get("play_addr", {}).get("width", 0) * x.get("play_addr", {}).get("height", 0),
101
+ reverse=True,
102
+ )
103
+ best_quality = bit_rates[0]
104
+
105
+ play_addr = best_quality.get("play_addr", {})
106
+ video_url_list = play_addr.get("url_list", [])
107
+ if not video_url_list:
108
+ raise ParseError("抖音解析失败: 视频下载地址为空")
109
+
110
+ video_url = remove_video_watermark(video_url_list[0])
111
+
112
+ cover = video_data.get("cover", {})
113
+ thumb_url_list = cover.get("url_list", [])
114
+ thumb_url = thumb_url_list[-1] if thumb_url_list else None
115
+
116
+ return {
117
+ "video_url": video_url,
118
+ "thumb_url": thumb_url,
119
+ "duration": best_quality.get("duration", 0),
120
+ "width": play_addr.get("width", 0),
121
+ "height": play_addr.get("height", 0),
122
+ }
123
+
124
+
125
+ class DouyinMediaType(Enum):
126
+ """抖音媒体类型"""
127
+
128
+ VIDEO = "video"
129
+ IMAGE = "image" # 实况图片 + 图片
130
+
131
+
132
+ @dataclass
133
+ class DouyinApiResult:
134
+ """抖音 API 解析结果"""
135
+
136
+ type: DouyinMediaType
137
+ video: VideoRef = None
138
+ desc: str = ""
139
+ image_list: list[ImageRef | LivePhotoRef] = None
140
+
141
+ @classmethod
142
+ def parse(cls, json_dict: dict) -> Self:
143
+ data = json_dict.get("aweme_detail")
144
+ if not data:
145
+ raise ParseError("抖音解析失败: 未获取到作品详情")
146
+
147
+ desc = data.get("desc", "")
148
+
149
+ if images := data.get("images"):
150
+ return cls._parse_images(images, desc)
151
+ elif image_post_info := data.get("image_post_info"):
152
+ return cls._parse_image_post_info(image_post_info, desc)
153
+ else:
154
+ return cls._parse_video(data, desc)
155
+
156
+ @classmethod
157
+ def _parse_images(cls, images: list[dict], desc: str) -> Self:
158
+ """解析旧版图片格式 (images 字段)
159
+
160
+ 支持普通图片和实况照片 (LivePhoto)
161
+ """
162
+ has_live_photos = any(img.get("video") for img in images)
163
+
164
+ if has_live_photos:
165
+ image_list = []
166
+ for image in images:
167
+ if video := image.get("video"):
168
+ video_info = parse_video_info(video)
169
+ image_list.append(
170
+ LivePhotoRef(
171
+ url=video_info["thumb_url"],
172
+ video_url=video_info["video_url"],
173
+ width=int(video_info["width"]),
174
+ height=int(video_info["height"]),
175
+ duration=int(video_info["duration"]) or 3,
176
+ )
177
+ )
178
+ else:
179
+ url_list = image.get("url_list", [])
180
+ if url_list:
181
+ image_list.append(
182
+ ImageRef(
183
+ url=url_list[-1],
184
+ height=image.get("height", 0),
185
+ width=image.get("width", 0),
186
+ )
187
+ )
188
+ else:
189
+ image_list = [
190
+ ImageRef(
191
+ url=img["url_list"][-1],
192
+ height=img.get("height", 0),
193
+ width=img.get("width", 0),
194
+ )
195
+ for img in images
196
+ if img.get("url_list")
197
+ ]
198
+
199
+ return cls(
200
+ type=DouyinMediaType.IMAGE,
201
+ desc=desc,
202
+ image_list=image_list,
203
+ )
204
+
205
+ @classmethod
206
+ def _parse_image_post_info(cls, image_post_info: dict, desc: str) -> Self:
207
+ """解析新版图片格式 (image_post_info 字段)"""
208
+ images = image_post_info.get("images", [])
209
+ image_list = []
210
+
211
+ for image in images:
212
+ display_image = image.get("display_image", {})
213
+ url_list = display_image.get("url_list", [])
214
+ if url_list:
215
+ image_list.append(
216
+ ImageRef(
217
+ url=url_list[-1],
218
+ height=display_image.get("height", 0),
219
+ width=display_image.get("width", 0),
220
+ )
221
+ )
222
+
223
+ return cls(
224
+ type=DouyinMediaType.IMAGE,
225
+ image_list=image_list,
226
+ desc=desc,
227
+ )
228
+
229
+ @classmethod
230
+ def _parse_video(cls, data: dict, desc: str) -> Self:
231
+ """解析视频"""
232
+ video_data = data.get("video")
233
+ if not video_data:
234
+ raise ParseError("抖音解析失败: 未获取到视频数据")
235
+
236
+ video_info = parse_video_info(video_data)
237
+
238
+ return cls(
239
+ type=DouyinMediaType.VIDEO,
240
+ video=VideoRef(
241
+ url=video_info["video_url"],
242
+ thumb_url=video_info["thumb_url"],
243
+ width=video_info["width"],
244
+ height=video_info["height"],
245
+ duration=video_info["duration"],
246
+ ),
247
+ desc=desc,
248
+ )
249
+
250
+
251
+ __all__ = ["DouyinParser"]
@@ -0,0 +1,228 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from pathlib import Path
4
+ from typing import Self, Union
5
+
6
+ from ... import ProgressCallback
7
+ from ...config import GlobalConfig
8
+ from ...provider_api.tiktok import TikTokWebCrawler
9
+ from ...types import (
10
+ DownloadResult,
11
+ ImageParseResult,
12
+ ImageRef,
13
+ LivePhotoRef,
14
+ MultimediaParseResult,
15
+ ParseError,
16
+ Platform,
17
+ VideoParseResult,
18
+ VideoRef,
19
+ )
20
+ from ..base.base import BaseParser
21
+
22
+
23
+ class TikTokParser(BaseParser):
24
+ __platform__ = Platform.TIKTOK
25
+ __supported_type__ = ["视频", "图文"]
26
+ __match__ = r"^(http(s)?://)?.+tiktok.com/(?!share/user|qishui).+"
27
+ __redirect_keywords__ = ["vt.tiktok"]
28
+
29
+ async def _do_parse(self, raw_url: str) -> Union["VideoParseResult", "ImageParseResult", "MultimediaParseResult"]:
30
+ result = await self._fetch_api_result(raw_url)
31
+
32
+ match result.type:
33
+ case TikTokMediaType.VIDEO:
34
+ return self._build_video_result(result)
35
+ case TikTokMediaType.IMAGE:
36
+ return self._build_image_result(result)
37
+
38
+ async def _fetch_api_result(self, url: str) -> "TikTokApiResult":
39
+ crawler = TikTokWebCrawler(proxy=self.proxy, cookie=self.cookie)
40
+ try:
41
+ response = await crawler.parse(url)
42
+ return TikTokApiResult.parse(response)
43
+ except ParseError:
44
+ raise
45
+ except Exception as e:
46
+ raise ParseError(f"TikTok 解析失败: {e}") from e
47
+
48
+ @staticmethod
49
+ def _build_video_result(result: "TikTokApiResult") -> VideoParseResult:
50
+ return TikTokVideoParseResult(
51
+ title=result.desc,
52
+ video=result.video,
53
+ )
54
+
55
+ @staticmethod
56
+ def _build_image_result(result: "TikTokApiResult") -> ImageParseResult:
57
+ return ImageParseResult(
58
+ title=result.desc,
59
+ photo=result.image_list,
60
+ )
61
+
62
+
63
+ class TikTokVideoParseResult(VideoParseResult):
64
+ async def _do_download(
65
+ self,
66
+ *,
67
+ output_dir: str | Path,
68
+ callback: ProgressCallback | None = None,
69
+ callback_args: tuple = (),
70
+ callback_kwargs: dict | None = None,
71
+ proxy: str | None = None,
72
+ headers: dict | None = None,
73
+ ) -> "DownloadResult":
74
+ headers = {
75
+ "User-Agent": GlobalConfig.ua,
76
+ "Referer": "https://www.tiktok.com/",
77
+ }
78
+ return await super()._do_download(
79
+ output_dir=output_dir,
80
+ callback=callback,
81
+ callback_args=callback_args,
82
+ callback_kwargs=callback_kwargs,
83
+ proxy=proxy,
84
+ headers=headers,
85
+ )
86
+
87
+
88
+ def first_url(data: dict | None) -> str | None:
89
+ url_list = (data or {}).get("url_list") or (data or {}).get("UrlList") or []
90
+ return next((url for url in url_list if url), None)
91
+
92
+
93
+ def as_int(value) -> int:
94
+ try:
95
+ return int(value or 0)
96
+ except (TypeError, ValueError):
97
+ return 0
98
+
99
+
100
+ def pick_cover(video_data: dict) -> str | None:
101
+ for key in ("origin_cover", "cover", "dynamic_cover", "originCover", "dynamicCover"):
102
+ cover_url = first_url(video_data.get(key))
103
+ if cover_url:
104
+ return cover_url
105
+ cover = video_data.get("cover")
106
+ return cover if isinstance(cover, str) else None
107
+
108
+
109
+ def parse_video_info(video_data: dict) -> dict:
110
+ bit_rates = video_data.get("bit_rate") or video_data.get("bitrateInfo") or []
111
+ candidates = []
112
+
113
+ for bit_rate in bit_rates:
114
+ play_addr = bit_rate.get("play_addr") or bit_rate.get("PlayAddr") or {}
115
+ video_url = first_url(play_addr)
116
+ if not video_url:
117
+ continue
118
+
119
+ width = as_int(play_addr.get("width") or play_addr.get("Width") or video_data.get("width"))
120
+ height = as_int(play_addr.get("height") or play_addr.get("Height") or video_data.get("height"))
121
+ bitrate = as_int(bit_rate.get("bit_rate") or bit_rate.get("Bitrate") or bit_rate.get("bitrate"))
122
+ data_size = as_int(play_addr.get("data_size") or play_addr.get("DataSize") or bit_rate.get("data_size"))
123
+ duration = as_int(play_addr.get("duration") or play_addr.get("Duration") or video_data.get("duration"))
124
+
125
+ candidates.append(
126
+ {
127
+ "video_url": video_url,
128
+ "thumb_url": pick_cover(video_data),
129
+ "duration": duration,
130
+ "width": width,
131
+ "height": height,
132
+ "quality": (width * height, bitrate, data_size),
133
+ }
134
+ )
135
+
136
+ if not candidates:
137
+ play_addr = video_data.get("play_addr") or video_data.get("playAddr") or {}
138
+ video_url = first_url(play_addr)
139
+ if video_url:
140
+ width = as_int(play_addr.get("width") or video_data.get("width"))
141
+ height = as_int(play_addr.get("height") or video_data.get("height"))
142
+ candidates.append(
143
+ {
144
+ "video_url": video_url,
145
+ "thumb_url": pick_cover(video_data),
146
+ "duration": as_int(play_addr.get("duration") or video_data.get("duration")),
147
+ "width": width,
148
+ "height": height,
149
+ "quality": (width * height, 0, 0),
150
+ }
151
+ )
152
+
153
+ if not candidates:
154
+ raise ParseError("TikTok 解析失败: 未获取到无水印视频下载地址")
155
+
156
+ return max(candidates, key=lambda x: x["quality"])
157
+
158
+
159
+ class TikTokMediaType(Enum):
160
+ VIDEO = "video"
161
+ IMAGE = "image"
162
+
163
+
164
+ @dataclass
165
+ class TikTokApiResult:
166
+ type: TikTokMediaType
167
+ video: VideoRef = None
168
+ desc: str = ""
169
+ image_list: list[ImageRef | LivePhotoRef] = None
170
+
171
+ @classmethod
172
+ def parse(cls, json_dict: dict) -> Self:
173
+ if not json_dict:
174
+ raise ParseError("TikTok 解析失败: 未获取到作品详情")
175
+
176
+ desc = json_dict.get("desc", "")
177
+ image_post_info: dict = json_dict.get("image_post_info", {}) or json_dict.get("imagePost", {})
178
+ if image_post_info:
179
+ return cls._parse_image_post(image_post_info, desc)
180
+ return cls._parse_video(json_dict, desc)
181
+
182
+ @classmethod
183
+ def _parse_image_post(cls, image_post_info: dict, desc: str) -> Self:
184
+ image_list = []
185
+
186
+ for image in image_post_info.get("images", []):
187
+ display_image = image.get("display_image") or image.get("displayImage") or image.get("image") or {}
188
+ url = first_url(display_image)
189
+ if url:
190
+ image_list.append(
191
+ ImageRef(
192
+ url=url,
193
+ height=as_int(display_image.get("height") or display_image.get("Height")),
194
+ width=as_int(display_image.get("width") or display_image.get("Width")),
195
+ )
196
+ )
197
+
198
+ if not image_list:
199
+ raise ParseError("TikTok 解析失败: 未获取到无水印图文下载地址")
200
+
201
+ return cls(
202
+ type=TikTokMediaType.IMAGE,
203
+ desc=desc,
204
+ image_list=image_list,
205
+ )
206
+
207
+ @classmethod
208
+ def _parse_video(cls, data: dict, desc: str) -> Self:
209
+ video_data = data.get("video", {})
210
+ if not video_data:
211
+ raise ParseError("TikTok 解析失败: 未获取到视频数据")
212
+
213
+ video_info = parse_video_info(video_data)
214
+
215
+ return cls(
216
+ type=TikTokMediaType.VIDEO,
217
+ video=VideoRef(
218
+ url=video_info["video_url"],
219
+ thumb_url=video_info["thumb_url"],
220
+ width=video_info["width"],
221
+ height=video_info["height"],
222
+ duration=video_info["duration"],
223
+ ),
224
+ desc=desc,
225
+ )
226
+
227
+
228
+ __all__ = ["TikTokParser"]