parsehub 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.2/src/parsehub.egg-info → parsehub-2.0.4}/PKG-INFO +5 -13
- {parsehub-2.0.2 → parsehub-2.0.4}/README.md +4 -12
- {parsehub-2.0.2 → parsehub-2.0.4}/pyproject.toml +1 -1
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/__init__.py +13 -2
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/config/config.py +2 -3
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/base/ytdlp.py +3 -4
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/bilibili.py +5 -4
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/xiaoheihe.py +2 -2
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/__init__.py +2 -0
- parsehub-2.0.4/src/parsehub/types/post.py +11 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/result.py +66 -5
- {parsehub-2.0.2 → parsehub-2.0.4/src/parsehub.egg-info}/PKG-INFO +5 -13
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub.egg-info/SOURCES.txt +1 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/LICENSE +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/setup.cfg +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/base/base.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/coolapk.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/douyin.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/instagram.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/kuaishou.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/pipix.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/threads.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/tieba.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/twitter.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/weibo.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/xhs.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/youtube.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/bilibili.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/tieba.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/twitter.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/media_file.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/types/platform.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/utils/downloader.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub/utils/utils.py +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.2 → parsehub-2.0.4}/src/parsehub.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parsehub
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
|
|
5
5
|
Author-email: 梓澪 <zilingmio@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -125,26 +125,21 @@ print(result)
|
|
|
125
125
|
|
|
126
126
|
```python
|
|
127
127
|
from parsehub import ParseHub
|
|
128
|
-
from parsehub.config import ParseConfig
|
|
129
128
|
|
|
130
|
-
|
|
131
|
-
cookie="key1=value1; key2=value2", # 从浏览器中获取
|
|
132
|
-
proxy="http://127.0.0.1:7890", # 可选
|
|
133
|
-
)
|
|
134
|
-
ph = ParseHub(config=config)
|
|
129
|
+
ph = ParseHub(cookie="key1=value1; key2=value2", proxy="http://127.0.0.1:7890",)
|
|
135
130
|
```
|
|
136
131
|
|
|
137
132
|
Cookie 支持多种格式传入:
|
|
138
133
|
|
|
139
134
|
```python
|
|
140
135
|
# 字符串
|
|
141
|
-
|
|
136
|
+
ParseHub(cookie="key1=value1; key2=value2")
|
|
142
137
|
|
|
143
138
|
# JSON 字符串
|
|
144
|
-
|
|
139
|
+
ParseHub(cookie='{"key1": "value1", "key2": "value2"}')
|
|
145
140
|
|
|
146
141
|
# 字典
|
|
147
|
-
|
|
142
|
+
ParseHub(cookie={"key1": "value1", "key2": "value2"})
|
|
148
143
|
```
|
|
149
144
|
|
|
150
145
|
目前支持 Cookie 登录的平台:
|
|
@@ -158,9 +153,6 @@ from parsehub.config import GlobalConfig
|
|
|
158
153
|
|
|
159
154
|
# 自定义默认下载目录
|
|
160
155
|
GlobalConfig.default_save_dir = "./my_downloads"
|
|
161
|
-
|
|
162
|
-
# 视频时长限制 (超过此时长将下载最低画质,0 为不限制)
|
|
163
|
-
GlobalConfig.duration_limit = 600 # 秒
|
|
164
156
|
```
|
|
165
157
|
|
|
166
158
|
## 🤝 参考项目
|
|
@@ -88,26 +88,21 @@ print(result)
|
|
|
88
88
|
|
|
89
89
|
```python
|
|
90
90
|
from parsehub import ParseHub
|
|
91
|
-
from parsehub.config import ParseConfig
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
cookie="key1=value1; key2=value2", # 从浏览器中获取
|
|
95
|
-
proxy="http://127.0.0.1:7890", # 可选
|
|
96
|
-
)
|
|
97
|
-
ph = ParseHub(config=config)
|
|
92
|
+
ph = ParseHub(cookie="key1=value1; key2=value2", proxy="http://127.0.0.1:7890",)
|
|
98
93
|
```
|
|
99
94
|
|
|
100
95
|
Cookie 支持多种格式传入:
|
|
101
96
|
|
|
102
97
|
```python
|
|
103
98
|
# 字符串
|
|
104
|
-
|
|
99
|
+
ParseHub(cookie="key1=value1; key2=value2")
|
|
105
100
|
|
|
106
101
|
# JSON 字符串
|
|
107
|
-
|
|
102
|
+
ParseHub(cookie='{"key1": "value1", "key2": "value2"}')
|
|
108
103
|
|
|
109
104
|
# 字典
|
|
110
|
-
|
|
105
|
+
ParseHub(cookie={"key1": "value1", "key2": "value2"})
|
|
111
106
|
```
|
|
112
107
|
|
|
113
108
|
目前支持 Cookie 登录的平台:
|
|
@@ -121,9 +116,6 @@ from parsehub.config import GlobalConfig
|
|
|
121
116
|
|
|
122
117
|
# 自定义默认下载目录
|
|
123
118
|
GlobalConfig.default_save_dir = "./my_downloads"
|
|
124
|
-
|
|
125
|
-
# 视频时长限制 (超过此时长将下载最低画质,0 为不限制)
|
|
126
|
-
GlobalConfig.duration_limit = 600 # 秒
|
|
127
119
|
```
|
|
128
120
|
|
|
129
121
|
## 🤝 参考项目
|
|
@@ -43,9 +43,11 @@ class ParseHub:
|
|
|
43
43
|
self,
|
|
44
44
|
url: str,
|
|
45
45
|
path: str | Path = None,
|
|
46
|
+
*,
|
|
46
47
|
callback: ProgressCallback = None,
|
|
47
48
|
callback_args: tuple = (),
|
|
48
49
|
proxy: str | None = None,
|
|
50
|
+
save_metadata: bool = False,
|
|
49
51
|
) -> DownloadResult:
|
|
50
52
|
"""下载
|
|
51
53
|
:param url: 分享文案 / 分享链接
|
|
@@ -53,6 +55,7 @@ class ParseHub:
|
|
|
53
55
|
:param callback: 下载进度回调函数
|
|
54
56
|
:param callback_args: 下载进度回调函数参数
|
|
55
57
|
:param proxy: 代理
|
|
58
|
+
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
56
59
|
:return: DownloadResult
|
|
57
60
|
|
|
58
61
|
Note:
|
|
@@ -67,7 +70,9 @@ class ParseHub:
|
|
|
67
70
|
- ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
|
|
68
71
|
"""
|
|
69
72
|
result = await self.parse(url)
|
|
70
|
-
return await result.download(
|
|
73
|
+
return await result.download(
|
|
74
|
+
path, callback=callback, callback_args=callback_args, proxy=proxy, save_metadata=save_metadata
|
|
75
|
+
)
|
|
71
76
|
|
|
72
77
|
def download_sync(
|
|
73
78
|
self,
|
|
@@ -76,6 +81,7 @@ class ParseHub:
|
|
|
76
81
|
callback: ProgressCallback | None = None,
|
|
77
82
|
callback_args: tuple = (),
|
|
78
83
|
proxy: str | None = None,
|
|
84
|
+
save_metadata: bool = False,
|
|
79
85
|
) -> DownloadResult:
|
|
80
86
|
"""
|
|
81
87
|
同步下载
|
|
@@ -84,6 +90,7 @@ class ParseHub:
|
|
|
84
90
|
:param callback: 进度回调函数
|
|
85
91
|
:param callback_args: 进度回调函数参数
|
|
86
92
|
:param proxy: 代理
|
|
93
|
+
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
87
94
|
:return: DownloadResult
|
|
88
95
|
|
|
89
96
|
Note:
|
|
@@ -97,7 +104,11 @@ class ParseHub:
|
|
|
97
104
|
- ``bytes``: 字节进度,用于单文件下载时报告已下载/总字节数
|
|
98
105
|
- ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
|
|
99
106
|
"""
|
|
100
|
-
return get_event_loop().run_until_complete(
|
|
107
|
+
return get_event_loop().run_until_complete(
|
|
108
|
+
self.download(
|
|
109
|
+
url, path, callback=callback, callback_args=callback_args, proxy=proxy, save_metadata=save_metadata
|
|
110
|
+
)
|
|
111
|
+
)
|
|
101
112
|
|
|
102
113
|
async def get_raw_url(self, url: str, proxy: str | None = None) -> str:
|
|
103
114
|
"""获取原始链接
|
|
@@ -12,10 +12,9 @@ class _GlobalConfig(BaseModel):
|
|
|
12
12
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"
|
|
13
13
|
)
|
|
14
14
|
douyin_api: HttpUrl = "https://douyin.wtf/"
|
|
15
|
-
"""建议自行部署: https://github.com/Evil0ctal/Douyin_TikTok_Download_API"""
|
|
16
|
-
duration_limit: int = 0
|
|
17
|
-
"""部分平台下载超过指定时长的视频时, 下载最低画质, 单位秒, 0为不限制"""
|
|
15
|
+
"""抖音解析API, 建议自行部署: https://github.com/Evil0ctal/Douyin_TikTok_Download_API"""
|
|
18
16
|
default_save_dir: Path = Path(sys.argv[0]).parent / "downloads"
|
|
17
|
+
"""默认下载目录"""
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
GlobalConfig = _GlobalConfig()
|
|
@@ -6,7 +6,6 @@ from typing import Union
|
|
|
6
6
|
|
|
7
7
|
from yt_dlp import YoutubeDL
|
|
8
8
|
|
|
9
|
-
from ...config.config import GlobalConfig
|
|
10
9
|
from ...types import (
|
|
11
10
|
DownloadError,
|
|
12
11
|
DownloadResult,
|
|
@@ -137,9 +136,9 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
137
136
|
|
|
138
137
|
paramss["outtmpl"] = f"{output_dir.joinpath('ytdlp_%(id)s')}.%(ext)s"
|
|
139
138
|
|
|
140
|
-
if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
|
|
141
|
-
|
|
142
|
-
|
|
139
|
+
# if GlobalConfig.duration_limit and self.dl.duration > GlobalConfig.duration_limit:
|
|
140
|
+
# # 视频超过限制时长,获取最低画质
|
|
141
|
+
# paramss["format"] = "worstvideo* + worstaudio / worst"
|
|
143
142
|
|
|
144
143
|
if callback:
|
|
145
144
|
await callback(0, 1, "count", *callback_args)
|
|
@@ -118,10 +118,11 @@ class BiliParse(YtParser):
|
|
|
118
118
|
dimension = page_info["dimension"]
|
|
119
119
|
|
|
120
120
|
b3, b4 = await bili.get_buvid()
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
video_playurl = await bili.get_video_playurl(url, cid, b3, b4)
|
|
122
|
+
# if GlobalConfig.duration_limit and duration > GlobalConfig.duration_limit:
|
|
123
|
+
# video_playurl = await bili.get_video_playurl(url, cid, b3, b4, False)
|
|
124
|
+
# else:
|
|
125
|
+
# video_playurl = await bili.get_video_playurl(url, cid, b3, b4)
|
|
125
126
|
|
|
126
127
|
durl = video_playurl["data"]["durl"][0]
|
|
127
128
|
video_url = self.change_source(durl["backup_url"][0]) if durl.get("backup_url") else durl["url"]
|
|
@@ -182,8 +182,8 @@ class XiaoHeiHeAPI:
|
|
|
182
182
|
type=media_type,
|
|
183
183
|
url=image["url"],
|
|
184
184
|
thumb_url=image["url"],
|
|
185
|
-
height=int(image.get("height", 0)),
|
|
186
|
-
width=int(image.get("width", 0)),
|
|
185
|
+
height=int(float(image.get("height", 0))),
|
|
186
|
+
width=int(float(image.get("width", 0))),
|
|
187
187
|
)
|
|
188
188
|
)
|
|
189
189
|
return XiaoHeiHePost(type=post_type, title=title, content=content, media=images)
|
|
@@ -3,6 +3,7 @@ from .callback import ProgressCallback, ProgressUnit
|
|
|
3
3
|
from .media_file import AniFile, AnyMediaFile, ImageFile, LivePhotoFile, MediaFile, VideoFile
|
|
4
4
|
from .media_ref import AniRef, AnyMediaRef, ImageRef, LivePhotoRef, MediaRef, VideoRef
|
|
5
5
|
from .platform import Platform
|
|
6
|
+
from .post import PostType
|
|
6
7
|
from .result import (
|
|
7
8
|
AnyParseResult,
|
|
8
9
|
DownloadResult,
|
|
@@ -38,4 +39,5 @@ __all__ = [
|
|
|
38
39
|
"AnyMediaFile",
|
|
39
40
|
"ProgressCallback",
|
|
40
41
|
"ProgressUnit",
|
|
42
|
+
"PostType",
|
|
41
43
|
]
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import shutil
|
|
2
3
|
import time
|
|
3
4
|
from abc import ABC
|
|
5
|
+
from dataclasses import asdict
|
|
4
6
|
from pathlib import Path
|
|
7
|
+
from typing import ClassVar
|
|
5
8
|
|
|
9
|
+
import aiofiles
|
|
6
10
|
from bs4 import BeautifulSoup
|
|
7
11
|
from markdown import markdown as md_to_html
|
|
8
12
|
from slugify import slugify
|
|
@@ -15,11 +19,14 @@ from .callback import ProgressCallback
|
|
|
15
19
|
from .media_file import AniFile, AnyMediaFile, ImageFile, LivePhotoFile, VideoFile
|
|
16
20
|
from .media_ref import AniRef, AnyMediaRef, ImageRef, LivePhotoRef, VideoRef
|
|
17
21
|
from .platform import Platform
|
|
22
|
+
from .post import PostType
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class ParseResult(ABC): # noqa: B024
|
|
21
26
|
"""解析结果基类"""
|
|
22
27
|
|
|
28
|
+
type: ClassVar[PostType] = PostType.UNKNOWN
|
|
29
|
+
|
|
23
30
|
def __init__(
|
|
24
31
|
self,
|
|
25
32
|
raw_url: str,
|
|
@@ -31,7 +38,7 @@ class ParseResult(ABC): # noqa: B024
|
|
|
31
38
|
"""
|
|
32
39
|
:param title: 标题
|
|
33
40
|
:param media: 媒体下载链接
|
|
34
|
-
:param content: 正文
|
|
41
|
+
:param content: 正文 (纯文本)
|
|
35
42
|
:param raw_url: 原始帖子链接
|
|
36
43
|
:param platform: 平台
|
|
37
44
|
"""
|
|
@@ -49,6 +56,23 @@ class ParseResult(ABC): # noqa: B024
|
|
|
49
56
|
f"raw_url={self.raw_url})"
|
|
50
57
|
)
|
|
51
58
|
|
|
59
|
+
def to_dict(self) -> dict:
|
|
60
|
+
"""转换为字典"""
|
|
61
|
+
media = None
|
|
62
|
+
if isinstance(self.media, list):
|
|
63
|
+
media = [asdict(m) for m in self.media]
|
|
64
|
+
elif self.media:
|
|
65
|
+
media = asdict(self.media)
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"platform": self.platform.id if self.platform else None,
|
|
69
|
+
"type": self.type.value,
|
|
70
|
+
"title": self.title,
|
|
71
|
+
"content": self.content,
|
|
72
|
+
"raw_url": self.raw_url,
|
|
73
|
+
"media": media,
|
|
74
|
+
}
|
|
75
|
+
|
|
52
76
|
async def _do_download(
|
|
53
77
|
self,
|
|
54
78
|
*,
|
|
@@ -135,15 +159,18 @@ class ParseResult(ABC): # noqa: B024
|
|
|
135
159
|
async def download(
|
|
136
160
|
self,
|
|
137
161
|
path: str | Path | None = None,
|
|
162
|
+
*,
|
|
138
163
|
callback: ProgressCallback | None = None,
|
|
139
164
|
callback_args: tuple = (),
|
|
140
165
|
proxy: str | None = None,
|
|
166
|
+
save_metadata: bool = False,
|
|
141
167
|
) -> "DownloadResult":
|
|
142
168
|
"""
|
|
143
169
|
:param path: 保存路径
|
|
144
170
|
:param callback: 下载进度回调函数
|
|
145
171
|
:param callback_args: 下载进度回调函数参数
|
|
146
172
|
:param proxy: 代理
|
|
173
|
+
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
147
174
|
:return: DownloadResult
|
|
148
175
|
|
|
149
176
|
Note:
|
|
@@ -168,22 +195,33 @@ class ParseResult(ABC): # noqa: B024
|
|
|
168
195
|
counter += 1
|
|
169
196
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
170
197
|
|
|
171
|
-
|
|
172
|
-
output_dir
|
|
173
|
-
|
|
198
|
+
if save_metadata:
|
|
199
|
+
async with aiofiles.open(output_dir.joinpath("metadata.json"), "w", encoding="utf-8") as f:
|
|
200
|
+
await f.write(json.dumps(self.to_dict(), ensure_ascii=False, indent=4))
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
return await self._do_download(
|
|
204
|
+
output_dir=output_dir, callback=callback, callback_args=callback_args, proxy=proxy
|
|
205
|
+
)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
shutil.rmtree(output_dir, ignore_errors=True)
|
|
208
|
+
raise e
|
|
174
209
|
|
|
175
210
|
def download_sync(
|
|
176
211
|
self,
|
|
177
212
|
path: str | Path | None = None,
|
|
213
|
+
*,
|
|
178
214
|
callback: ProgressCallback | None = None,
|
|
179
215
|
callback_args: tuple = (),
|
|
180
216
|
proxy: str | None = None,
|
|
217
|
+
save_metadata: bool = False,
|
|
181
218
|
) -> "DownloadResult":
|
|
182
219
|
"""
|
|
183
220
|
:param path: 保存路径
|
|
184
221
|
:param callback: 下载进度回调函数
|
|
185
222
|
:param callback_args: 下载进度回调函数参数
|
|
186
223
|
:param proxy: 代理
|
|
224
|
+
:param save_metadata: 保存解析结果为 metadata.json, 默认为 False
|
|
187
225
|
:return: DownloadResult
|
|
188
226
|
|
|
189
227
|
Note:
|
|
@@ -197,12 +235,18 @@ class ParseResult(ABC): # noqa: B024
|
|
|
197
235
|
- ``bytes``: 字节进度,用于单文件下载时报告已下载/总字节数
|
|
198
236
|
- ``count``: 计数进度,用于多文件下载时报告已完成/总文件数
|
|
199
237
|
"""
|
|
200
|
-
return get_event_loop().run_until_complete(
|
|
238
|
+
return get_event_loop().run_until_complete(
|
|
239
|
+
self.download(
|
|
240
|
+
path, callback=callback, callback_args=callback_args, proxy=proxy, save_metadata=save_metadata
|
|
241
|
+
)
|
|
242
|
+
)
|
|
201
243
|
|
|
202
244
|
|
|
203
245
|
class VideoParseResult(ParseResult):
|
|
204
246
|
"""单个视频"""
|
|
205
247
|
|
|
248
|
+
type = PostType.VIDEO
|
|
249
|
+
|
|
206
250
|
def __init__(
|
|
207
251
|
self,
|
|
208
252
|
raw_url: str,
|
|
@@ -222,6 +266,8 @@ class VideoParseResult(ParseResult):
|
|
|
222
266
|
class ImageParseResult(ParseResult):
|
|
223
267
|
"""单图 / 多图 / 图集 / 实况照片"""
|
|
224
268
|
|
|
269
|
+
type = PostType.IMAGE
|
|
270
|
+
|
|
225
271
|
def __init__(
|
|
226
272
|
self,
|
|
227
273
|
raw_url: str,
|
|
@@ -237,6 +283,8 @@ class ImageParseResult(ParseResult):
|
|
|
237
283
|
class MultimediaParseResult(ParseResult):
|
|
238
284
|
"""多视频 / 视频 + 图片 / GIF / 实况照片"""
|
|
239
285
|
|
|
286
|
+
type = PostType.MULTIMEDIA
|
|
287
|
+
|
|
240
288
|
def __init__(
|
|
241
289
|
self,
|
|
242
290
|
raw_url: str,
|
|
@@ -250,6 +298,8 @@ class MultimediaParseResult(ParseResult):
|
|
|
250
298
|
class RichTextParseResult(ParseResult):
|
|
251
299
|
"""图文混排的文章"""
|
|
252
300
|
|
|
301
|
+
type = PostType.RICHTEXT
|
|
302
|
+
|
|
253
303
|
def __init__(
|
|
254
304
|
self,
|
|
255
305
|
raw_url: str,
|
|
@@ -274,6 +324,17 @@ class RichTextParseResult(ParseResult):
|
|
|
274
324
|
f" markdown_content={self.markdown_content or ''}, media={media_count} raw_url={self.raw_url})"
|
|
275
325
|
)
|
|
276
326
|
|
|
327
|
+
def to_dict(self) -> dict:
|
|
328
|
+
"""转换为字典"""
|
|
329
|
+
data = super().to_dict()
|
|
330
|
+
# 在 "content" 后面插入 "markdown_content"
|
|
331
|
+
result = {}
|
|
332
|
+
for key, value in data.items():
|
|
333
|
+
result[key] = value
|
|
334
|
+
if key == "content":
|
|
335
|
+
result["markdown_content"] = self.markdown_content
|
|
336
|
+
return result
|
|
337
|
+
|
|
277
338
|
@property
|
|
278
339
|
def plaintext_content(self) -> str:
|
|
279
340
|
"""从 markdown 转换为纯文本"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parsehub
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
|
|
5
5
|
Author-email: 梓澪 <zilingmio@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -125,26 +125,21 @@ print(result)
|
|
|
125
125
|
|
|
126
126
|
```python
|
|
127
127
|
from parsehub import ParseHub
|
|
128
|
-
from parsehub.config import ParseConfig
|
|
129
128
|
|
|
130
|
-
|
|
131
|
-
cookie="key1=value1; key2=value2", # 从浏览器中获取
|
|
132
|
-
proxy="http://127.0.0.1:7890", # 可选
|
|
133
|
-
)
|
|
134
|
-
ph = ParseHub(config=config)
|
|
129
|
+
ph = ParseHub(cookie="key1=value1; key2=value2", proxy="http://127.0.0.1:7890",)
|
|
135
130
|
```
|
|
136
131
|
|
|
137
132
|
Cookie 支持多种格式传入:
|
|
138
133
|
|
|
139
134
|
```python
|
|
140
135
|
# 字符串
|
|
141
|
-
|
|
136
|
+
ParseHub(cookie="key1=value1; key2=value2")
|
|
142
137
|
|
|
143
138
|
# JSON 字符串
|
|
144
|
-
|
|
139
|
+
ParseHub(cookie='{"key1": "value1", "key2": "value2"}')
|
|
145
140
|
|
|
146
141
|
# 字典
|
|
147
|
-
|
|
142
|
+
ParseHub(cookie={"key1": "value1", "key2": "value2"})
|
|
148
143
|
```
|
|
149
144
|
|
|
150
145
|
目前支持 Cookie 登录的平台:
|
|
@@ -158,9 +153,6 @@ from parsehub.config import GlobalConfig
|
|
|
158
153
|
|
|
159
154
|
# 自定义默认下载目录
|
|
160
155
|
GlobalConfig.default_save_dir = "./my_downloads"
|
|
161
|
-
|
|
162
|
-
# 视频时长限制 (超过此时长将下载最低画质,0 为不限制)
|
|
163
|
-
GlobalConfig.duration_limit = 600 # 秒
|
|
164
156
|
```
|
|
165
157
|
|
|
166
158
|
## 🤝 参考项目
|
|
@@ -51,6 +51,7 @@ src/parsehub/types/callback.py
|
|
|
51
51
|
src/parsehub/types/media_file.py
|
|
52
52
|
src/parsehub/types/media_ref.py
|
|
53
53
|
src/parsehub/types/platform.py
|
|
54
|
+
src/parsehub/types/post.py
|
|
54
55
|
src/parsehub/types/result.py
|
|
55
56
|
src/parsehub/utils/downloader.py
|
|
56
57
|
src/parsehub/utils/media_info.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|