parsehub 2.0.13__tar.gz → 2.0.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsehub-2.0.13/src/parsehub.egg-info → parsehub-2.0.15}/PKG-INFO +2 -1
- {parsehub-2.0.13 → parsehub-2.0.15}/pyproject.toml +2 -1
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/douyin.py +1 -1
- parsehub-2.0.15/src/parsehub/parsers/parser/tieba.py +78 -0
- parsehub-2.0.15/src/parsehub/provider_api/tieba.py +153 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/xiaoheihe.py +198 -110
- {parsehub-2.0.13 → parsehub-2.0.15/src/parsehub.egg-info}/PKG-INFO +2 -1
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub.egg-info/requires.txt +1 -0
- parsehub-2.0.13/src/parsehub/parsers/parser/tieba.py +0 -25
- parsehub-2.0.13/src/parsehub/provider_api/tieba.py +0 -74
- {parsehub-2.0.13 → parsehub-2.0.15}/LICENSE +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/README.md +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/setup.cfg +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/config/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/config/config.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/errors.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/base/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/base/base.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/base/ytdlp.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/bilibili.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/coolapk.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/facebook.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/instagram.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/kuaishou.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/pipix.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/threads.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/twitter.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/weibo.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/weixin.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/xhs.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/xiaoheihe.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/youtube.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/parsers/parser/zuiyou.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/bilibili.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/coolapk.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/instagram.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/kuaishou.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/pipix.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/threads.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/twitter.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/weibo.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/weixin.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/xhs.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/provider_api/zuiyou.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/__init__.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/callback.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/media_file.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/media_ref.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/platform.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/post.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/types/result.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/utils/downloader.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/utils/media_info.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub/utils/utils.py +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub.egg-info/SOURCES.txt +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub.egg-info/dependency_links.txt +0 -0
- {parsehub-2.0.13 → parsehub-2.0.15}/src/parsehub.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parsehub
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.15
|
|
4
4
|
Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
|
|
5
5
|
Author-email: 梓澪 <zilingmio@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -33,6 +33,7 @@ Requires-Dist: httpx>=0.24.1
|
|
|
33
33
|
Requires-Dist: pillow>=12.1.0
|
|
34
34
|
Requires-Dist: python-slugify[unidecode]>=8.0.4
|
|
35
35
|
Requires-Dist: opencv-python-headless>=4.13.0.92
|
|
36
|
+
Requires-Dist: cryptography>=46.0.6
|
|
36
37
|
Dynamic: license-file
|
|
37
38
|
|
|
38
39
|
<div align="center">
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "parsehub"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.15"
|
|
4
4
|
description = "轻量、异步、开箱即用的社交媒体聚合解析库"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12.0"
|
|
@@ -36,6 +36,7 @@ dependencies = [
|
|
|
36
36
|
"pillow>=12.1.0",
|
|
37
37
|
"python-slugify[unidecode]>=8.0.4",
|
|
38
38
|
"opencv-python-headless>=4.13.0.92",
|
|
39
|
+
"cryptography>=46.0.6",
|
|
39
40
|
]
|
|
40
41
|
|
|
41
42
|
[dependency-groups]
|
|
@@ -21,7 +21,7 @@ from ..base.base import BaseParser
|
|
|
21
21
|
class DouyinParser(BaseParser):
|
|
22
22
|
__platform__ = Platform.DOUYIN
|
|
23
23
|
__supported_type__ = ["视频", "图文"]
|
|
24
|
-
__match__ = r"^(http(s)?://)?.+douyin.com/(?!share/user).+|^(http(s)?://)?.+tiktok.com/.+"
|
|
24
|
+
__match__ = r"^(http(s)?://)?.+douyin.com/(?!share/user|qishui).+|^(http(s)?://)?.+tiktok.com/.+"
|
|
25
25
|
__redirect_keywords__ = ["v.douyin", "vt.tiktok", "iesdouyin"]
|
|
26
26
|
__reserved_parameters__ = ["modal_id"]
|
|
27
27
|
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
from ...provider_api.tieba import TieBa, TieBaError, TieBaPostType
|
|
6
|
+
from ...types import AniRef, ImageParseResult, ImageRef, ParseError, Platform, VideoParseResult, VideoRef
|
|
7
|
+
from ..base.base import BaseParser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TieBaParser(BaseParser):
|
|
11
|
+
__platform__ = Platform.TIEBA
|
|
12
|
+
__supported_type__ = ["视频", "图文"]
|
|
13
|
+
__match__ = r"^(http(s)?://)?.+tieba.baidu.com/p/\d+"
|
|
14
|
+
|
|
15
|
+
async def _do_parse(self, raw_url: str) -> Union["ImageParseResult", "VideoParseResult"]:
|
|
16
|
+
try:
|
|
17
|
+
tb = await TieBa(self.proxy).parse(raw_url)
|
|
18
|
+
except TieBaError as e:
|
|
19
|
+
raise ParseError(e.msg if e.msg else "贴吧解析失败: 未知错误") from e
|
|
20
|
+
except Exception as e:
|
|
21
|
+
raise ParseError("贴吧解析失败: 未知错误") from e
|
|
22
|
+
|
|
23
|
+
match tb.type:
|
|
24
|
+
case TieBaPostType.VIDEO:
|
|
25
|
+
return VideoParseResult(
|
|
26
|
+
title=tb.title,
|
|
27
|
+
video=VideoRef(
|
|
28
|
+
url=tb.media.url,
|
|
29
|
+
thumb_url=tb.media.thumb_url,
|
|
30
|
+
width=tb.media.width,
|
|
31
|
+
height=tb.media.height,
|
|
32
|
+
duration=tb.media.duration,
|
|
33
|
+
),
|
|
34
|
+
content=tb.content,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
case TieBaPostType.PHOTO:
|
|
38
|
+
images = []
|
|
39
|
+
if tb.media:
|
|
40
|
+
for i in tb.media:
|
|
41
|
+
async with httpx.AsyncClient(proxy=self.proxy) as cli:
|
|
42
|
+
try:
|
|
43
|
+
r = await cli.head(i.url)
|
|
44
|
+
r.raise_for_status()
|
|
45
|
+
except Exception:
|
|
46
|
+
images.append(
|
|
47
|
+
ImageRef(
|
|
48
|
+
url=i.url,
|
|
49
|
+
thumb_url=i.thumb_url,
|
|
50
|
+
width=i.width,
|
|
51
|
+
height=i.height,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
else:
|
|
55
|
+
headers = r.headers
|
|
56
|
+
if (t := headers.get("content-type")) and "gif" in t:
|
|
57
|
+
images.append(
|
|
58
|
+
AniRef(
|
|
59
|
+
url=i.url,
|
|
60
|
+
thumb_url=i.thumb_url,
|
|
61
|
+
width=i.width,
|
|
62
|
+
height=i.height,
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
images.append(
|
|
67
|
+
ImageRef(
|
|
68
|
+
url=i.url,
|
|
69
|
+
thumb_url=i.thumb_url,
|
|
70
|
+
width=i.width,
|
|
71
|
+
height=i.height,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return ImageParseResult(title=tb.title, content=tb.content, photo=images)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
__all__ = ["TieBaParser"]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Self
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TieBa:
|
|
11
|
+
def __init__(self, proxy: str | None = None):
|
|
12
|
+
self.proxy = proxy
|
|
13
|
+
|
|
14
|
+
async def parse(self, url: str) -> "TieBaPost":
|
|
15
|
+
data = await self.fetch_post_data(url)
|
|
16
|
+
return TieBaPost.parse(data)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def gen_sign(params: dict):
|
|
20
|
+
items = sorted(params.items())
|
|
21
|
+
base_str = "".join([f"{k}={v}" for k, v in items])
|
|
22
|
+
salt = "36770b1f34c9bbf2e7d1a99d2b82fa9e"
|
|
23
|
+
return hashlib.md5((base_str + salt).encode("utf-8")).hexdigest()
|
|
24
|
+
|
|
25
|
+
async def fetch_tbs(self) -> str:
|
|
26
|
+
async with httpx.AsyncClient(proxy=self.proxy) as cli:
|
|
27
|
+
result = await cli.get("http://tieba.baidu.com/dc/common/tbs")
|
|
28
|
+
result.raise_for_status()
|
|
29
|
+
result = result.json()
|
|
30
|
+
if tbs := result.get("tbs"):
|
|
31
|
+
return tbs
|
|
32
|
+
raise TieBaError("获取 tbs 失败")
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def get_kz(url: str) -> str:
|
|
36
|
+
if match := re.search(r"/p/(\d+)", url):
|
|
37
|
+
return match.group(1)
|
|
38
|
+
raise ValueError("无法从 URL 中提取帖子 ID")
|
|
39
|
+
|
|
40
|
+
async def fetch_post_data(self, url: str) -> dict:
|
|
41
|
+
kz = self.get_kz(url)
|
|
42
|
+
tbs = await self.fetch_tbs()
|
|
43
|
+
data = {
|
|
44
|
+
"pn": "1",
|
|
45
|
+
"lz": "0",
|
|
46
|
+
"r": "2",
|
|
47
|
+
"mark_type": "0",
|
|
48
|
+
"back": "0",
|
|
49
|
+
"fr": "personalize_page",
|
|
50
|
+
"kz": kz,
|
|
51
|
+
"session_request_times": "1",
|
|
52
|
+
"tbs": tbs,
|
|
53
|
+
"subapp_type": "pc",
|
|
54
|
+
"_client_type": "20",
|
|
55
|
+
}
|
|
56
|
+
data["sign"] = self.gen_sign(data)
|
|
57
|
+
async with httpx.AsyncClient(proxy=self.proxy, timeout=30) as cli:
|
|
58
|
+
result = await cli.post("https://tieba.baidu.com/c/f/pb/page_pc", data=data)
|
|
59
|
+
result.raise_for_status()
|
|
60
|
+
result = result.json()
|
|
61
|
+
if result["error_code"]:
|
|
62
|
+
raise TieBaError(em if (em := result["error_msg"]) else "获取帖子内容失败")
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TieBaPostType(Enum):
|
|
67
|
+
PHOTO = "PHOTO"
|
|
68
|
+
VIDEO = "VIDEO"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class TieBaVideo:
|
|
73
|
+
url: str
|
|
74
|
+
thumb_url: str | None = None
|
|
75
|
+
width: int = 0
|
|
76
|
+
height: int = 0
|
|
77
|
+
duration: int = 0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class TieBaPhoto:
|
|
82
|
+
url: str
|
|
83
|
+
thumb_url: str | None = None
|
|
84
|
+
width: int = 0
|
|
85
|
+
height: int = 0
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class TieBaPost:
|
|
90
|
+
type: TieBaPostType
|
|
91
|
+
title: str
|
|
92
|
+
content: str
|
|
93
|
+
media: list[TieBaPhoto] | TieBaVideo | None = None
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def parse(cls, data: dict) -> Self:
|
|
97
|
+
thread = data["thread"]
|
|
98
|
+
origin_thread_info = thread["origin_thread_info"]
|
|
99
|
+
|
|
100
|
+
# title
|
|
101
|
+
title = origin_thread_info["title"]
|
|
102
|
+
|
|
103
|
+
# content
|
|
104
|
+
origin_content = origin_thread_info["content"]
|
|
105
|
+
content_list: list[str] = []
|
|
106
|
+
for oc in origin_content:
|
|
107
|
+
oc_type = oc["type"]
|
|
108
|
+
match oc_type:
|
|
109
|
+
case 0:
|
|
110
|
+
content_list.append(oc["text"])
|
|
111
|
+
content = "\n".join(content_list)
|
|
112
|
+
|
|
113
|
+
# media
|
|
114
|
+
media = []
|
|
115
|
+
if origin_media := origin_thread_info.get("media"):
|
|
116
|
+
post_type = TieBaPostType.PHOTO
|
|
117
|
+
for om in origin_media:
|
|
118
|
+
media.append(
|
|
119
|
+
TieBaPhoto(
|
|
120
|
+
url=om["big_pic"],
|
|
121
|
+
thumb_url=om["small_pic"],
|
|
122
|
+
width=om["width"],
|
|
123
|
+
height=om["height"],
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
elif video_info := thread.get("video_info"):
|
|
128
|
+
post_type = TieBaPostType.VIDEO
|
|
129
|
+
media.append(
|
|
130
|
+
TieBaVideo(
|
|
131
|
+
url=video_info["video_url"],
|
|
132
|
+
thumb_url=video_info["thumbnail_url"],
|
|
133
|
+
width=video_info["video_width"],
|
|
134
|
+
height=video_info["video_height"],
|
|
135
|
+
duration=video_info["video_duration"],
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
else:
|
|
139
|
+
post_type = TieBaPostType.PHOTO
|
|
140
|
+
|
|
141
|
+
m = media[0] if post_type == TieBaPostType.VIDEO else media if media else None
|
|
142
|
+
return TieBaPost(
|
|
143
|
+
type=post_type,
|
|
144
|
+
title=title,
|
|
145
|
+
content=content,
|
|
146
|
+
media=m,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class TieBaError(Exception):
|
|
151
|
+
def __init__(self, msg: str):
|
|
152
|
+
self.msg = msg
|
|
153
|
+
super().__init__(msg)
|
|
@@ -1,101 +1,24 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import gzip
|
|
1
3
|
import hashlib
|
|
2
4
|
import json
|
|
3
5
|
import random
|
|
4
6
|
import re
|
|
5
7
|
import time
|
|
8
|
+
import uuid
|
|
6
9
|
from dataclasses import dataclass
|
|
7
10
|
from enum import Enum
|
|
8
11
|
from urllib.parse import parse_qs, urlparse
|
|
9
12
|
|
|
10
13
|
import httpx
|
|
14
|
+
from cryptography.hazmat.decrepit.ciphers.algorithms import TripleDES
|
|
15
|
+
from cryptography.hazmat.primitives import serialization
|
|
16
|
+
from cryptography.hazmat.primitives.asymmetric import padding
|
|
17
|
+
from cryptography.hazmat.primitives.ciphers.algorithms import AES
|
|
18
|
+
from cryptography.hazmat.primitives.ciphers.base import Cipher
|
|
19
|
+
from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB
|
|
11
20
|
from markdownify import MarkdownConverter
|
|
12
21
|
|
|
13
|
-
# TODO: 逆向 EP 和 DATA
|
|
14
|
-
V4_EP = (
|
|
15
|
-
"CFcLOAE8E7Ew0J7yxtc9hPtklLIOym8yh1eU5jpB6D0M86gJERnbWbE7wPEWM95v8cWsxACqGq7iU"
|
|
16
|
-
"OEnrD2ODeFIj5VZdvbD3zhhOgT4FB6QfskCkuCN+JP/+aLz0rg/B+c/9fd5513ESuZxFVqUmrwe/v"
|
|
17
|
-
"jqZh5nS6Bsyt50VN8="
|
|
18
|
-
)
|
|
19
|
-
V4_DATA = (
|
|
20
|
-
"7ccf4483919143daa17cca371b849651ab10c58aa97415e3fcc9b2f4c0bc776844997f4059"
|
|
21
|
-
"512c213b3cc965e84693188b08f1ddb8924922598173e0cfa0bab40f242bcd20e11c728da7"
|
|
22
|
-
"5a75d64b75d4070affa0d64831d0b32efde8c74ac4e6adeef18bbcbd1d21131746d131e30c"
|
|
23
|
-
"8ba5939ea8247e79534f6688fed7545d5060b069e85c19d11c0277ee8015d2a989d84ce1bd"
|
|
24
|
-
"01ed2754a365959496343de0152044cef7db82d0353a091f566253f2f8ca14a192c64b610f"
|
|
25
|
-
"643309079d235d355438c84f566943df3df71c2cc979a68c6f36ce62861d6ddb64874d03f8"
|
|
26
|
-
"b596b1380de9f84a60aff650ec59e4b2427ba7492f541354ee4dfe09b02c7296539978d281"
|
|
27
|
-
"2269a7d37121ba96133b7e2b5fdba4922efc6f4bacd31855ca2604b86096ed5abfa6b87656"
|
|
28
|
-
"8298f4bd75c1de979608714b5f0ec2bd852ec6974f929891cfff70392b0c42c7efd9f53e6b"
|
|
29
|
-
"52541d08d654f85d92b29b553b3ea4de3c0ddf88ea77815871e476d5ba8b61dfbb427e3147"
|
|
30
|
-
"62b58a306479eeeb7831864bb593c91af9c85004e891efe5d495b3d1cb4885996ffeda2d50"
|
|
31
|
-
"7f747be1022544cf6ca1e4663bba30d7e7be129b23c5dd4ee1b56d2c48969eeee5b7b0e062"
|
|
32
|
-
"8cfc0527c5e2880c43a61dd753c72b76a0ec1556cac7682f54f0582b50419dbfaa504a9363"
|
|
33
|
-
"54dcb289d282dfa94ded53d926a4385cf437e35afed207c8ccf9eea2e2d493b645034a79ea"
|
|
34
|
-
"115b5df365cc3c6b160d0de25d5d94efd576538386521cc617058831a39bd9009555fe8bc3"
|
|
35
|
-
"419e1f4c9c51271d3996dd5616d0071d850a36799296abb9084a8a6b406f62341ebe581d50"
|
|
36
|
-
"11029c18e88074a2cd7e9fb6be16b948da4d696c624412a8adb4651af89e43db779ed90114"
|
|
37
|
-
"001c7ad552a6baf80447c751c39ce85ce713a661dd7b67be37aa749b46d8827b2187401e8c"
|
|
38
|
-
"3e26a5993b654d3b7e6a6323a512a00f925f887d7ce231f20788d999c527b63160b6b1893a"
|
|
39
|
-
"5891ab183760ca28c95232c164563857a98b963838d385b9638295ded7b69eeb7a43185463"
|
|
40
|
-
"d2278bd59409f5badc24abffcf5cab137f93d89657992b72c340d1a87ddec55a828d33857d"
|
|
41
|
-
"ae8b27fc0aad082e14cd8ef294938dddd095f11dd842f94aa055f3b0ba880cc87771f0d61d"
|
|
42
|
-
"cdc419027c010afb23d668b337cf63ce8359f51623326a81e7513beebfd98d3531b8c701b4"
|
|
43
|
-
"cc58b42937245244228fcefe0c74b491e765e98ec0f71814788c347b5340163aaa8aae7c97"
|
|
44
|
-
"332acb3270583f0d77c15c3216696ad4951e24a19107fd5fe150fc275198fe4c9794f2785d"
|
|
45
|
-
"a3b0b840ebfe75e823b997f0d2eda75f5debbeced24462f1b976e5fc9d643858143d1b0ee4"
|
|
46
|
-
"6dc3936991f50b5d9d7040a5d9f1cb202fbbc06420cdee16fafc0a6929789088ce8e695332"
|
|
47
|
-
"b0178a64761a352b15d87aa3a40529febc881d46a3ae80933e407fc2b28c5e0771dd426b02"
|
|
48
|
-
"1cf177e2ef53c94a0cc5fcc83212843955af3e5f3bb8b24e9ed121669dadd689d54644b507"
|
|
49
|
-
"1581b0e882d4513220cdf1fd5345b76d1fe1d824357bf3acd8a1c58d4bfb4fe3f39922f72e"
|
|
50
|
-
"2eb9a74ee4b5f248bf7e279569597f45ef0e7fcbefa2619dcf367fe3638cc93fe90583a72e"
|
|
51
|
-
"4190729c8c5ab6dd6fb6a37b43eaa90c2e25530ac9d9e923492037f1f14c0da73e4968391f"
|
|
52
|
-
"c96fe10e2bfbefd620bbd6ea4e948cf04d6219e2c32ee6875cb0c2515b3a9ff993438412d3"
|
|
53
|
-
"b1b71ba4c50ea98216b50778a1c909cbb7802acc8348aad6a9118a91a9be87f8610a1ab363"
|
|
54
|
-
"ba06beb726e0a5ce56820e6baf9de2d87a10ce1d5cdd2d94c9e0bab0a3b7b8809d52dd3926"
|
|
55
|
-
"873caf244ab322a0f2f4c4d9c119153d0b3105c8321dd30378b5345418c5a509fe731aef31"
|
|
56
|
-
"7b156cdd606d71b291954181fc3efd71467d809b90d2b02a876ddbe7c758c3189ff6ecca21"
|
|
57
|
-
"44b2a63ef949d7b8b643e3ca7a20c2e5c843e6e34f0260d3963982510a8c077dd7f47158de"
|
|
58
|
-
"ee71befbab650ef1fad54a622bf4d1c297d9a39995fb1420bdba52d20a939b2da9ec3d8a13"
|
|
59
|
-
"b156a597f9de8a683ad68a5725a3d2afbdfdbf9c024793558ba6bbd1f6d5f520988358f6d8"
|
|
60
|
-
"02c0ea8580d4f93218d729cf3bbec52e6224175a0f37dd5bb4901ec5efaa6625c6b6c3b452"
|
|
61
|
-
"752584d2e634fdee181ef7772857de3831725a6bbb6a22c29a4ddee5e8d1bf5c9aebc1b863"
|
|
62
|
-
"5ee14584163dae9d4fb2c28be4220a23bb889d1965b870c32273b0166f3195b22cb85fc570"
|
|
63
|
-
"fb3b13335c49792aaef7b675135a5ced82efe0c36713d7b40123254a7cb0099139bc6634c3"
|
|
64
|
-
"c1af20595392a6436b192b8e6bb43038a33dff4d22f6f11497cbcb5662e11f2d1510a77b61"
|
|
65
|
-
"0d1150b15a76b6c916767f1f7f0883db4a0f7b96e9d9b0884249f965212ec1cb54056ee26d"
|
|
66
|
-
"a2a883f29acdfc7040d4e2e99c4ffd42a8bb1c7852cb5b4c758cdc295baaf973eebd6e720c"
|
|
67
|
-
"bf0bd6b30ad4a7133929e4b1223c4a579dc1dde1f4fdc1fec5a83c0e3d5335f2dc79e57efc"
|
|
68
|
-
"74f64b4d69d0151d4025ee5392fd844f783e2c614903e0b3685362f142fa091dce36382c1d"
|
|
69
|
-
"dc3a6a63815fe062c59e86cad9d26bb54dbf93297ad4ae75039719eddf659c22f0922f08fe"
|
|
70
|
-
"9a2241200f87bfe60f92d9983062d868d5eaced8df5b2851f86b9ee00055d386bf1276ad9b"
|
|
71
|
-
"b27f2fa4b04ca6e773ff7348eb078e7b3b20ac5f878552133a652793f630304d28f1dc8ecb"
|
|
72
|
-
"eedf571f743ffb494c9b34a47df86df8530af4243f0fedfea466c374ff920571a998ebb799"
|
|
73
|
-
"6c9b0ec4ef5780bd519f19106ad1a5b16183bf62cbc0d7d7e4c297df6c0870fd07825d29c9"
|
|
74
|
-
"b51ecdc227efeda8848eaca34a4c65ef35c0d5d3fa6e02f416cf25c84ef054206906e0950e"
|
|
75
|
-
"24250b6e8cbea114c42de785f2ac69204ff675c7bd8f89bb1f683b9adb1c08d73cea3b5cfe"
|
|
76
|
-
"420fa46a893b9b4ba5674c502bebc59d492942af6eef30a09eb9ff94ead00ebc2007702868"
|
|
77
|
-
"63ec52c88a45ec7cbe5414485d28c64112aca5015f1976c2bd772cacb7baa5ae267035c7a1"
|
|
78
|
-
"d9703289821b84ef386f6998777f72f44392f28daa1dc23d26445ed5ca382405ae8b2b47a0"
|
|
79
|
-
"06d56a040b55c6796328ace7d8faa040d3009e5b627e12c30ec6c02bff8de7173b9f393320"
|
|
80
|
-
"3e0fb8e06f812ee8ba5a673f3fa31c27e5309a3f7e0a8a55829c0f5c8c7433bbc4db4cfce9"
|
|
81
|
-
"aea6f37058dd0bcaef20b54546466bdef7b5f69745d4d4ba59c61bc64fd4202f9ce95cc8e1"
|
|
82
|
-
"a56273db05551b6de959c5e2d5f2ccc6d9893d99e48a1ff043889c5bdcb96512ccff7237bd"
|
|
83
|
-
"95fd344d3dd46e8d19743a65cde0aeace9ec6563f4c5d2a1dd6e72a32b48dc9444246d6d9e"
|
|
84
|
-
"a5a9a8d4216b9e0b41f1e54179c52c9f456dbe6c4e8872627b54d7ca6957a270bac31a98cb"
|
|
85
|
-
"2bacf895f30ed6a508b9bdeb288ccfbf5166cec8535ab73c5fa90b41f4ba5d8a55a7cfb8d9"
|
|
86
|
-
"783e00356ee534676215463f0aa1333b3388c13c8c0f176af6d7d2a01e2dd01cac2eb73574"
|
|
87
|
-
"bd6c0930c412cf12bcb80708706cc94b2b9546621f64547b8543179a203d9d871dfc4d5cd4"
|
|
88
|
-
"8334f42598f62e7c8199782bd605c75dd719c0db51ed801a47938746caf258966fc3132f6c"
|
|
89
|
-
"77b0a97ba78ece0e150fee450a90433d2b8534d276b07e8d4586043de0ffe1af106f026d45"
|
|
90
|
-
"41ad961aea6f69fa92344ed9a93f76f2a9f0f29110a4f0a7bda6a84a46d815c68784ab6685"
|
|
91
|
-
"466059376f0f8866107623c49d59acf60a010c923a73177ea9f58e187bcec2d6feb94a5220"
|
|
92
|
-
"56325e1651b5499fd28c17456a756e171840b7f8f1d6785e3e63d0bb5a690cc148f45ba0b0"
|
|
93
|
-
"6b5e0c8da2c6711a6b5011fdfc57221767bce9925d149f357cfa8f108965f9f6037f9b3bc9"
|
|
94
|
-
"46d90499ec8c40108216ed10eea155cb8d8e7bf76cc17efc1fda962101dc22114ca7b3b39c"
|
|
95
|
-
"44c3345d0e1c525e4cbdc1f49dbb66ad1f5874bb91a577cf66428fa861624febfb03c369d1"
|
|
96
|
-
"9d794544"
|
|
97
|
-
)
|
|
98
|
-
|
|
99
22
|
|
|
100
23
|
class XiaoHeiHePostType(Enum):
|
|
101
24
|
VIDEO = "video"
|
|
@@ -208,7 +131,7 @@ class XiaoHeiHeAPI:
|
|
|
208
131
|
"owner_only": "1",
|
|
209
132
|
**sig_params,
|
|
210
133
|
}
|
|
211
|
-
cookies = {"x_xhh_tokenid": await
|
|
134
|
+
cookies = {"x_xhh_tokenid": await SecuritySm.get_d_id()}
|
|
212
135
|
async with httpx.AsyncClient(proxy=self.proxy) as cli:
|
|
213
136
|
result = await cli.get(self.api_url + "/bbs/app/link/tree", params=params, cookies=cookies)
|
|
214
137
|
result.raise_for_status()
|
|
@@ -231,28 +154,6 @@ class XiaoHeiHeAPI:
|
|
|
231
154
|
case _:
|
|
232
155
|
raise Exception(status)
|
|
233
156
|
|
|
234
|
-
async def v4(self):
|
|
235
|
-
json_data = {
|
|
236
|
-
"appId": "heybox_website",
|
|
237
|
-
"organization": "0yD85BjYvGFAvHaSQ1mc",
|
|
238
|
-
"ep": V4_EP,
|
|
239
|
-
"data": V4_DATA,
|
|
240
|
-
"os": "web",
|
|
241
|
-
"encode": 5,
|
|
242
|
-
"compress": 2,
|
|
243
|
-
}
|
|
244
|
-
async with httpx.AsyncClient(proxy=self.proxy) as cli:
|
|
245
|
-
result = await cli.post("https://fp-it.portal101.cn/deviceprofile/v4", json=json_data)
|
|
246
|
-
result.raise_for_status()
|
|
247
|
-
return result.json()
|
|
248
|
-
|
|
249
|
-
async def fetch_x_xhh_tokenid(self) -> str:
|
|
250
|
-
data = await self.v4()
|
|
251
|
-
device_id = data.get("detail", {}).get("deviceId")
|
|
252
|
-
if not device_id:
|
|
253
|
-
raise Exception("获取 x_xhh_tokenid 失败")
|
|
254
|
-
return f"B{device_id}"
|
|
255
|
-
|
|
256
157
|
|
|
257
158
|
class XiaoHeiHeSign:
|
|
258
159
|
"""
|
|
@@ -448,7 +349,7 @@ class XiaoHeiHeSign:
|
|
|
448
349
|
|
|
449
350
|
|
|
450
351
|
class XHHConverter(MarkdownConverter):
|
|
451
|
-
def convert_img(self, el,
|
|
352
|
+
def convert_img(self, el, _, parent_tags):
|
|
452
353
|
alt = el.attrs.get("alt", None) or ""
|
|
453
354
|
src = el.attrs.get("data-original", None) or ""
|
|
454
355
|
title = el.attrs.get("title", None) or ""
|
|
@@ -459,6 +360,193 @@ class XHHConverter(MarkdownConverter):
|
|
|
459
360
|
return f""
|
|
460
361
|
|
|
461
362
|
|
|
363
|
+
class SecuritySm:
|
|
364
|
+
# FROM https://github.com/YueHen14/skyland-auto-sign/blob/6e7115b5580377c842f50f05b0fa39ab079c17b1/SecuritySm.py
|
|
365
|
+
|
|
366
|
+
# 查询dId请求头
|
|
367
|
+
DEVICES_INFO_URL = "https://fp-it.portal101.cn/deviceprofile/v4"
|
|
368
|
+
|
|
369
|
+
# 数美配置
|
|
370
|
+
SM_CONFIG = {
|
|
371
|
+
"organization": "0yD85BjYvGFAvHaSQ1mc",
|
|
372
|
+
"appId": "heybox_website",
|
|
373
|
+
"publicKey": (
|
|
374
|
+
"MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCXj9exmI4nQjmT52iwr+yf7hAQ06bfSZHTAH"
|
|
375
|
+
"UfRBYiagCf/whhd8es0R79wBigpiHLd28TKA8b8mGR8OiiI1hV+qfynCWihvp3mdj8MiiH6SU3"
|
|
376
|
+
"lhro2hkfYzImZB0RmWr2zE4Xt1+A6Oyp6bf+W7JSxYUXHw3nNv7Td4jw4jEFKQIDAQAB"
|
|
377
|
+
), # 小黑盒公钥
|
|
378
|
+
"protocol": "https",
|
|
379
|
+
"apiHost": "fp-it.portal101.cn",
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
PK = serialization.load_der_public_key(base64.b64decode(SM_CONFIG["publicKey"]))
|
|
383
|
+
|
|
384
|
+
DES_RULE = {
|
|
385
|
+
"appId": {"cipher": "DES", "is_encrypt": 1, "key": "uy7mzc4h", "obfuscated_name": "xx"},
|
|
386
|
+
"box": {"is_encrypt": 0, "obfuscated_name": "jf"},
|
|
387
|
+
"canvas": {"cipher": "DES", "is_encrypt": 1, "key": "snrn887t", "obfuscated_name": "yk"},
|
|
388
|
+
"clientSize": {"cipher": "DES", "is_encrypt": 1, "key": "cpmjjgsu", "obfuscated_name": "zx"},
|
|
389
|
+
"organization": {"cipher": "DES", "is_encrypt": 1, "key": "78moqjfc", "obfuscated_name": "dp"},
|
|
390
|
+
"os": {"cipher": "DES", "is_encrypt": 1, "key": "je6vk6t4", "obfuscated_name": "pj"},
|
|
391
|
+
"platform": {"cipher": "DES", "is_encrypt": 1, "key": "pakxhcd2", "obfuscated_name": "gm"},
|
|
392
|
+
"plugins": {"cipher": "DES", "is_encrypt": 1, "key": "v51m3pzl", "obfuscated_name": "kq"},
|
|
393
|
+
"pmf": {"cipher": "DES", "is_encrypt": 1, "key": "2mdeslu3", "obfuscated_name": "vw"},
|
|
394
|
+
"protocol": {"is_encrypt": 0, "obfuscated_name": "protocol"},
|
|
395
|
+
"referer": {"cipher": "DES", "is_encrypt": 1, "key": "y7bmrjlc", "obfuscated_name": "ab"},
|
|
396
|
+
"res": {"cipher": "DES", "is_encrypt": 1, "key": "whxqm2a7", "obfuscated_name": "hf"},
|
|
397
|
+
"rtype": {"cipher": "DES", "is_encrypt": 1, "key": "x8o2h2bl", "obfuscated_name": "lo"},
|
|
398
|
+
"sdkver": {"cipher": "DES", "is_encrypt": 1, "key": "9q3dcxp2", "obfuscated_name": "sc"},
|
|
399
|
+
"status": {"cipher": "DES", "is_encrypt": 1, "key": "2jbrxxw4", "obfuscated_name": "an"},
|
|
400
|
+
"subVersion": {"cipher": "DES", "is_encrypt": 1, "key": "eo3i2puh", "obfuscated_name": "ns"},
|
|
401
|
+
"svm": {"cipher": "DES", "is_encrypt": 1, "key": "fzj3kaeh", "obfuscated_name": "qr"},
|
|
402
|
+
"time": {"cipher": "DES", "is_encrypt": 1, "key": "q2t3odsk", "obfuscated_name": "nb"},
|
|
403
|
+
"timezone": {"cipher": "DES", "is_encrypt": 1, "key": "1uv05lj5", "obfuscated_name": "as"},
|
|
404
|
+
"tn": {"cipher": "DES", "is_encrypt": 1, "key": "x9nzj1bp", "obfuscated_name": "py"},
|
|
405
|
+
"trees": {"cipher": "DES", "is_encrypt": 1, "key": "acfs0xo4", "obfuscated_name": "pi"},
|
|
406
|
+
"ua": {"cipher": "DES", "is_encrypt": 1, "key": "k92crp1t", "obfuscated_name": "bj"},
|
|
407
|
+
"url": {"cipher": "DES", "is_encrypt": 1, "key": "y95hjkoo", "obfuscated_name": "cf"},
|
|
408
|
+
"version": {"is_encrypt": 0, "obfuscated_name": "version"},
|
|
409
|
+
"vpw": {"cipher": "DES", "is_encrypt": 1, "key": "r9924ab5", "obfuscated_name": "ca"},
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
BROWSER_ENV = {
|
|
413
|
+
"plugins": (
|
|
414
|
+
"MicrosoftEdgePDFPluginPortableDocumentFormatinternal-pdf-viewer1,Micros"
|
|
415
|
+
"oftEdgePDFViewermhjfbmdgcfjbbpaeojofohoefgiehjai1"
|
|
416
|
+
),
|
|
417
|
+
"ua": (
|
|
418
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
419
|
+
"Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0"
|
|
420
|
+
),
|
|
421
|
+
"canvas": "259ffe69", # 基于浏览器的canvas获得的值,不知道复用行不行
|
|
422
|
+
"timezone": -480, # 时区,应该是固定值吧
|
|
423
|
+
"platform": "Win32",
|
|
424
|
+
"url": "https://www.skland.com/", # 固定值
|
|
425
|
+
"referer": "",
|
|
426
|
+
"res": "1920_1080_24_1.25", # 屏幕宽度_高度_色深_window.devicePixelRatio
|
|
427
|
+
"clientSize": "0_0_1080_1920_1920_1080_1920_1080",
|
|
428
|
+
"status": "0011", # 不知道在干啥
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
# 将浏览器环境对象的key全部排序,然后对其所有的值及其子对象的值加入数字并字符串相加。若值为数字,
|
|
432
|
+
# 则乘以10000(0x2710)再将其转成字符串存入数组,最后再做md5,存入tn变量(tn变量要做加密)
|
|
433
|
+
|
|
434
|
+
# 把这个对象用加密规则进行加密,然后对结果做GZIP压缩(结果是对象,应该有序列化),最后做AES加密(加密细节目前不
|
|
435
|
+
# 清除),密钥为变量priId
|
|
436
|
+
|
|
437
|
+
# 加密规则:新对象的key使用相对应加解密规则的obfuscated_name值,value为字符串化后进行进行DES加密,再进行btoa加密
|
|
438
|
+
|
|
439
|
+
@classmethod
|
|
440
|
+
def _DES(cls, o: dict):
|
|
441
|
+
result = {}
|
|
442
|
+
for i in o.keys():
|
|
443
|
+
if i in cls.DES_RULE.keys():
|
|
444
|
+
rule = cls.DES_RULE[i]
|
|
445
|
+
res = o[i]
|
|
446
|
+
if rule["is_encrypt"] == 1:
|
|
447
|
+
c = Cipher(TripleDES(rule["key"].encode("utf-8")), ECB())
|
|
448
|
+
data = str(res).encode("utf-8")
|
|
449
|
+
# 补足字节
|
|
450
|
+
data += b"\x00" * 8
|
|
451
|
+
res = base64.b64encode(c.encryptor().update(data)).decode("utf-8")
|
|
452
|
+
result[rule["obfuscated_name"]] = res
|
|
453
|
+
else:
|
|
454
|
+
result[i] = o[i]
|
|
455
|
+
return result
|
|
456
|
+
|
|
457
|
+
@staticmethod
|
|
458
|
+
def _AES(v: bytes, k: bytes):
|
|
459
|
+
iv = "0102030405060708"
|
|
460
|
+
key = AES(k)
|
|
461
|
+
c = Cipher(key, CBC(iv.encode("utf-8")))
|
|
462
|
+
c.encryptor()
|
|
463
|
+
# 填充明文
|
|
464
|
+
v += b"\x00"
|
|
465
|
+
while len(v) % 16 != 0:
|
|
466
|
+
v += b"\x00"
|
|
467
|
+
return c.encryptor().update(v).hex()
|
|
468
|
+
|
|
469
|
+
@staticmethod
|
|
470
|
+
def GZIP(o: dict):
|
|
471
|
+
# 这个压缩结果似乎和前台不太一样,不清楚是否会影响
|
|
472
|
+
json_str = json.dumps(o, ensure_ascii=False)
|
|
473
|
+
stream = gzip.compress(json_str.encode("utf-8"), 2, mtime=0)
|
|
474
|
+
return base64.b64encode(stream)
|
|
475
|
+
|
|
476
|
+
# 获得tn的值,后续做DES加密用
|
|
477
|
+
@staticmethod
|
|
478
|
+
def get_tn(o: dict):
|
|
479
|
+
sorted_keys = sorted(o.keys())
|
|
480
|
+
|
|
481
|
+
result_list = []
|
|
482
|
+
|
|
483
|
+
for i in sorted_keys:
|
|
484
|
+
v = o[i]
|
|
485
|
+
if isinstance(v, (int, float)):
|
|
486
|
+
v = str(v * 10000)
|
|
487
|
+
elif isinstance(v, dict):
|
|
488
|
+
v = SecuritySm.get_tn(v)
|
|
489
|
+
result_list.append(v)
|
|
490
|
+
return "".join(result_list)
|
|
491
|
+
|
|
492
|
+
@staticmethod
|
|
493
|
+
def get_smid():
|
|
494
|
+
t = time.localtime()
|
|
495
|
+
_time = f"{t.tm_year}{t.tm_mon:0>2d}{t.tm_mday:0>2d}{t.tm_hour:0>2d}{t.tm_min:0>2d}{t.tm_sec:0>2d}"
|
|
496
|
+
uid = str(uuid.uuid4())
|
|
497
|
+
v = _time + hashlib.md5(uid.encode("utf-8")).hexdigest() + "00"
|
|
498
|
+
smsk_web = hashlib.md5(("smsk_web_" + v).encode("utf-8")).hexdigest()[0:14]
|
|
499
|
+
return v + smsk_web + "0"
|
|
500
|
+
|
|
501
|
+
@classmethod
|
|
502
|
+
async def get_d_id(cls):
|
|
503
|
+
uid = str(uuid.uuid4()).encode("utf-8")
|
|
504
|
+
priId = hashlib.md5(uid).hexdigest()[0:16]
|
|
505
|
+
ep = cls.PK.encrypt(uid, padding.PKCS1v15())
|
|
506
|
+
ep = base64.b64encode(ep).decode("utf-8")
|
|
507
|
+
|
|
508
|
+
browser = cls.BROWSER_ENV.copy()
|
|
509
|
+
current_time = int(time.time() * 1000)
|
|
510
|
+
browser.update({"vpw": str(uuid.uuid4()), "svm": current_time, "trees": str(uuid.uuid4()), "pmf": current_time})
|
|
511
|
+
|
|
512
|
+
des_target = {
|
|
513
|
+
**browser,
|
|
514
|
+
"protocol": 102,
|
|
515
|
+
"organization": cls.SM_CONFIG["organization"],
|
|
516
|
+
"appId": cls.SM_CONFIG["appId"],
|
|
517
|
+
"os": "web",
|
|
518
|
+
"version": "3.0.0",
|
|
519
|
+
"sdkver": "3.0.0",
|
|
520
|
+
"box": "", # 似乎是个SMID,但是第一次的时候是空,不过不影响结果
|
|
521
|
+
"rtype": "all",
|
|
522
|
+
"smid": cls.get_smid(),
|
|
523
|
+
"subVersion": "1.0.0",
|
|
524
|
+
"time": 0,
|
|
525
|
+
}
|
|
526
|
+
des_target["tn"] = hashlib.md5(cls.get_tn(des_target).encode()).hexdigest()
|
|
527
|
+
|
|
528
|
+
des_result = cls._AES(cls.GZIP(cls._DES(des_target)), priId.encode("utf-8"))
|
|
529
|
+
async with httpx.AsyncClient() as client:
|
|
530
|
+
response = await client.post(
|
|
531
|
+
cls.DEVICES_INFO_URL,
|
|
532
|
+
json={
|
|
533
|
+
"appId": "heybox_website",
|
|
534
|
+
"compress": 2,
|
|
535
|
+
"data": des_result,
|
|
536
|
+
"encode": 5,
|
|
537
|
+
"ep": ep,
|
|
538
|
+
"organization": cls.SM_CONFIG["organization"],
|
|
539
|
+
"os": "web", # 固定值
|
|
540
|
+
},
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
resp = response.json()
|
|
544
|
+
if resp["code"] != 1100:
|
|
545
|
+
raise Exception("did计算失败")
|
|
546
|
+
# 开头必须是B
|
|
547
|
+
return "B" + resp["detail"]["deviceId"]
|
|
548
|
+
|
|
549
|
+
|
|
462
550
|
if __name__ == "__main__":
|
|
463
551
|
signer = XiaoHeiHeSign(method_key="g")
|
|
464
552
|
result = signer.sign("/bbs/app/link/tree")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: parsehub
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.15
|
|
4
4
|
Summary: 轻量、异步、开箱即用的社交媒体聚合解析库
|
|
5
5
|
Author-email: 梓澪 <zilingmio@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -33,6 +33,7 @@ Requires-Dist: httpx>=0.24.1
|
|
|
33
33
|
Requires-Dist: pillow>=12.1.0
|
|
34
34
|
Requires-Dist: python-slugify[unidecode]>=8.0.4
|
|
35
35
|
Requires-Dist: opencv-python-headless>=4.13.0.92
|
|
36
|
+
Requires-Dist: cryptography>=46.0.6
|
|
36
37
|
Dynamic: license-file
|
|
37
38
|
|
|
38
39
|
<div align="center">
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
|
|
3
|
-
from ...provider_api.tieba import TieBa
|
|
4
|
-
from ...types import ImageParseResult, ParseError, Platform, VideoParseResult
|
|
5
|
-
from ..base.base import BaseParser
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TieBaParser(BaseParser):
|
|
9
|
-
__platform__ = Platform.TIEBA
|
|
10
|
-
__supported_type__ = ["视频", "图文"]
|
|
11
|
-
__match__ = r"^(http(s)?://)?.+tieba.baidu.com/p/\d+"
|
|
12
|
-
|
|
13
|
-
async def _do_parse(self, raw_url: str) -> Union["ImageParseResult", "VideoParseResult"]:
|
|
14
|
-
try:
|
|
15
|
-
tb = await TieBa(self.proxy).parse(raw_url)
|
|
16
|
-
except Exception as e:
|
|
17
|
-
raise ParseError("贴吧解析失败") from e
|
|
18
|
-
|
|
19
|
-
if tb.video_url:
|
|
20
|
-
return VideoParseResult(title=tb.title, video=tb.video_url, content=tb.content)
|
|
21
|
-
else:
|
|
22
|
-
return ImageParseResult(title=tb.title, photo=tb.img_url, content=tb.content)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
__all__ = ["TieBaParser"]
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
|
|
3
|
-
import httpx
|
|
4
|
-
from bs4 import BeautifulSoup
|
|
5
|
-
from httpx import Response
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TieBa:
|
|
9
|
-
def __init__(self, proxy: str | None = None):
|
|
10
|
-
self.proxy = proxy
|
|
11
|
-
|
|
12
|
-
@staticmethod
|
|
13
|
-
def _parse_out_the_body(text):
|
|
14
|
-
soup = BeautifulSoup(str(text), "lxml")
|
|
15
|
-
div_tag = soup.find_all("div")
|
|
16
|
-
[img.extract() for img in soup.find_all("img")]
|
|
17
|
-
[i.unwrap() for i in div_tag]
|
|
18
|
-
text = soup.text.strip()
|
|
19
|
-
# text = re.sub(
|
|
20
|
-
# r"(<br/><br/>)+|点击展开,查看完整图片|<i.*></i>", "", str(soup)
|
|
21
|
-
# ).strip()
|
|
22
|
-
# text = re.sub(r'<span class="apc_src_wrapper">视频来自:.*</span>', "", text)
|
|
23
|
-
return text
|
|
24
|
-
|
|
25
|
-
@staticmethod
|
|
26
|
-
async def get_tieba_img_url(html: Response):
|
|
27
|
-
"""获取帖子中所有图片的URL"""
|
|
28
|
-
soup = BeautifulSoup(html.text, "lxml")
|
|
29
|
-
d_post_content_firstfloor = soup.find("div", {"class": "d_post_content_firstfloor"})
|
|
30
|
-
img_tags = d_post_content_firstfloor.find_all("img", {"class": "BDE_Image"})
|
|
31
|
-
return [img["src"] for img in img_tags if "src" in img.attrs]
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
async def get_tieba_video_url(html: Response):
|
|
35
|
-
"""获取帖子中所有视频的URL"""
|
|
36
|
-
soup = BeautifulSoup(html.text, "lxml")
|
|
37
|
-
d_post_content_firstfloor = soup.find("div", {"class": "d_post_content_firstfloor"})
|
|
38
|
-
|
|
39
|
-
if video_tags := d_post_content_firstfloor.find("embed", {"class": "BDE_Flash"}):
|
|
40
|
-
return video_tags["data-video"]
|
|
41
|
-
return None
|
|
42
|
-
|
|
43
|
-
async def get_the_content(self, html: Response):
|
|
44
|
-
"""获取帖子的标题和内容"""
|
|
45
|
-
soup = BeautifulSoup(html.text, "lxml")
|
|
46
|
-
title = soup.find("h3", {"class": ["core_title_txt", "pull-left", "text-overflow"]}) or soup.find(
|
|
47
|
-
"h1", {"class": "core_title_txt"}
|
|
48
|
-
)
|
|
49
|
-
if not title:
|
|
50
|
-
raise Exception("未获取到标题内容")
|
|
51
|
-
title = title.text.strip()
|
|
52
|
-
content = soup.find("div", {"class": ["d_post_content", "j_d_post_content"]})
|
|
53
|
-
content = self._parse_out_the_body(content)
|
|
54
|
-
return title, content
|
|
55
|
-
|
|
56
|
-
async def get_html(self, t_url) -> Response:
|
|
57
|
-
async with httpx.AsyncClient(proxy=self.proxy) as c:
|
|
58
|
-
return await c.get(t_url, headers={"User-Agent": "Mozilla5.0/"}, timeout=15)
|
|
59
|
-
|
|
60
|
-
async def parse(self, t_url) -> "TieBaPost":
|
|
61
|
-
res = await self.get_html(t_url)
|
|
62
|
-
|
|
63
|
-
title, content = await self.get_the_content(res)
|
|
64
|
-
img_url = await self.get_tieba_img_url(res)
|
|
65
|
-
video_url = await self.get_tieba_video_url(res)
|
|
66
|
-
return TieBaPost(title, content, img_url, video_url)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@dataclass
|
|
70
|
-
class TieBaPost:
|
|
71
|
-
title: str
|
|
72
|
-
content: str
|
|
73
|
-
img_url: list
|
|
74
|
-
video_url: str = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|