parsehub 2.0.8__tar.gz → 2.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.8/src/parsehub.egg-info → parsehub-2.0.10}/PKG-INFO +1 -1
- {parsehub-2.0.8 → parsehub-2.0.10}/pyproject.toml +1 -1
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/ytdlp.py +42 -13
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/coolapk.py +11 -2
- {parsehub-2.0.8 → parsehub-2.0.10/src/parsehub.egg-info}/PKG-INFO +1 -1
- {parsehub-2.0.8 → parsehub-2.0.10}/LICENSE +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/README.md +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/setup.cfg +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/config/config.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/base/base.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/bilibili.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/douyin.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/instagram.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/kuaishou.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/pipix.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/threads.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/tieba.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/twitter.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/weibo.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/xhs.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/youtube.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/bilibili.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/tieba.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/twitter.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/xiaoheihe.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/__init__.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/media_file.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/platform.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/post.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/types/result.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/downloader.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub/utils/utils.py +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/SOURCES.txt +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/requires.txt +0 -0
- {parsehub-2.0.8 → parsehub-2.0.10}/src/parsehub.egg-info/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from concurrent.futures import ProcessPoolExecutor
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Union
|
|
@@ -17,8 +16,6 @@ from ...types import (
|
|
|
17
16
|
)
|
|
18
17
|
from .base import BaseParser
|
|
19
18
|
|
|
20
|
-
EXC = ProcessPoolExecutor()
|
|
21
|
-
|
|
22
19
|
|
|
23
20
|
def download_video(yto_params: dict, urls: list[str]) -> None:
|
|
24
21
|
"""在独立进程中下载视频"""
|
|
@@ -30,6 +27,15 @@ def download_video(yto_params: dict, urls: list[str]) -> None:
|
|
|
30
27
|
raise RuntimeError(error_msg) from None
|
|
31
28
|
|
|
32
29
|
|
|
30
|
+
def progress_hook(d: dict):
|
|
31
|
+
if d["status"] == "downloading":
|
|
32
|
+
downloaded = d.get("downloaded_bytes", 0)
|
|
33
|
+
total = d.get("total_bytes") or d.get("total_bytes_estimate") or 0
|
|
34
|
+
print(f"{downloaded}/{total}")
|
|
35
|
+
elif d["status"] == "finished":
|
|
36
|
+
print("下载完成,准备处理文件")
|
|
37
|
+
|
|
38
|
+
|
|
33
39
|
class YtParser(BaseParser, register=False):
|
|
34
40
|
"""yt-dlp解析器"""
|
|
35
41
|
|
|
@@ -103,6 +109,7 @@ class YtParser(BaseParser, register=False):
|
|
|
103
109
|
# }
|
|
104
110
|
# ],
|
|
105
111
|
"playlist_items": "1", # 分p列表默认解析第一个
|
|
112
|
+
# "progress_hooks": [progress_hook],
|
|
106
113
|
}
|
|
107
114
|
return params
|
|
108
115
|
|
|
@@ -143,17 +150,40 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
143
150
|
# paramss["format"] = "worstvideo* + worstaudio / worst"
|
|
144
151
|
|
|
145
152
|
if callback:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
153
|
+
loop = asyncio.get_running_loop()
|
|
154
|
+
progress_mode = "bytes"
|
|
155
|
+
|
|
156
|
+
def _progress_hook(d):
|
|
157
|
+
nonlocal progress_mode
|
|
158
|
+
if d["status"] == "downloading":
|
|
159
|
+
# 已知问题: yt-dlp 返回的总进度不统一
|
|
160
|
+
downloaded = int(d.get("downloaded_bytes", 0))
|
|
161
|
+
total = int(d.get("total_bytes") or d.get("total_bytes_estimate") or 0)
|
|
162
|
+
if total and progress_mode == "bytes":
|
|
163
|
+
asyncio.run_coroutine_threadsafe(
|
|
164
|
+
callback(downloaded, total, "bytes", *callback_args, **callback_kwargs),
|
|
165
|
+
loop,
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
progress_mode = "count"
|
|
169
|
+
asyncio.run_coroutine_threadsafe(
|
|
170
|
+
callback(0, 1, "count", *callback_args, **callback_kwargs),
|
|
171
|
+
loop,
|
|
172
|
+
)
|
|
173
|
+
elif d["status"] == "finished" and progress_mode == "count":
|
|
174
|
+
asyncio.run_coroutine_threadsafe(
|
|
175
|
+
callback(1, 1, "count", *callback_args, **callback_kwargs),
|
|
176
|
+
loop,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
paramss["progress_hooks"] = [_progress_hook]
|
|
180
|
+
|
|
181
|
+
await self._run_download(paramss)
|
|
149
182
|
|
|
150
183
|
v = list(output_dir.glob("*.mp4")) or list(output_dir.glob("*.mkv")) or list(output_dir.glob("*.webm"))
|
|
151
184
|
if not v:
|
|
152
185
|
raise DownloadError("下载失败 -1")
|
|
153
186
|
|
|
154
|
-
if callback:
|
|
155
|
-
await callback(1, 1, "count", *callback_args, **callback_kwargs)
|
|
156
|
-
|
|
157
187
|
video_path = v[0]
|
|
158
188
|
return DownloadResult(
|
|
159
189
|
VideoFile(
|
|
@@ -165,14 +195,13 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
165
195
|
output_dir,
|
|
166
196
|
)
|
|
167
197
|
|
|
168
|
-
async def
|
|
198
|
+
async def _run_download(self, paramss: dict, count: int = 0) -> None:
|
|
169
199
|
if count > 2:
|
|
170
200
|
raise DownloadError("下载失败 -2")
|
|
171
201
|
|
|
172
|
-
loop = asyncio.get_running_loop()
|
|
173
202
|
try:
|
|
174
203
|
await asyncio.wait_for(
|
|
175
|
-
|
|
204
|
+
asyncio.to_thread(download_video, paramss, [self.dl.url]),
|
|
176
205
|
timeout=300,
|
|
177
206
|
)
|
|
178
207
|
except TimeoutError as e:
|
|
@@ -187,7 +216,7 @@ class YtVideoParseResult(VideoParseResult):
|
|
|
187
216
|
)
|
|
188
217
|
):
|
|
189
218
|
paramss.pop("writeautomaticsub", None)
|
|
190
|
-
await self.
|
|
219
|
+
await self._run_download(paramss, count + 1)
|
|
191
220
|
|
|
192
221
|
except Exception as e:
|
|
193
222
|
raise DownloadError(f"下载失败: {str(e)}") from e
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Union
|
|
3
4
|
|
|
@@ -37,18 +38,26 @@ class CoolapkParser(BaseParser):
|
|
|
37
38
|
media=media,
|
|
38
39
|
markdown_content=coolapk.markdown_content,
|
|
39
40
|
)
|
|
41
|
+
content = self.hashtag_handler(coolapk.text_content)
|
|
40
42
|
if any(isinstance(m, AniRef) for m in media):
|
|
41
43
|
return CoolapkMultimediaParseResult(
|
|
42
44
|
title=coolapk.title,
|
|
43
45
|
media=media,
|
|
44
|
-
content=
|
|
46
|
+
content=content,
|
|
45
47
|
)
|
|
46
48
|
return CoolapkImageParseResult(
|
|
47
49
|
title=coolapk.title,
|
|
48
50
|
photo=media,
|
|
49
|
-
content=
|
|
51
|
+
content=content,
|
|
50
52
|
)
|
|
51
53
|
|
|
54
|
+
@staticmethod
|
|
55
|
+
def hashtag_handler(desc: str):
|
|
56
|
+
hashtags = re.findall(r" ?#[^#]+# ?", desc)
|
|
57
|
+
for hashtag in hashtags:
|
|
58
|
+
desc = desc.replace(hashtag, f" {hashtag.strip().removesuffix('#')} ")
|
|
59
|
+
return desc
|
|
60
|
+
|
|
52
61
|
|
|
53
62
|
class CoolapkParseResult(ParseResult):
|
|
54
63
|
async def _do_download(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|