ChatNet 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatnet/__init__.py +5 -0
- chatnet/cli.py +58 -0
- chatnet/config.py +21 -0
- chatnet/ecnu/__init__.py +5 -0
- chatnet/ecnu/captcha.py +183 -0
- chatnet/ecnu/cli.py +410 -0
- chatnet/ecnu/portal.py +685 -0
- chatnet-0.1.0.dist-info/METADATA +115 -0
- chatnet-0.1.0.dist-info/RECORD +13 -0
- chatnet-0.1.0.dist-info/WHEEL +5 -0
- chatnet-0.1.0.dist-info/entry_points.txt +5 -0
- chatnet-0.1.0.dist-info/licenses/LICENSE +21 -0
- chatnet-0.1.0.dist-info/top_level.txt +1 -0
chatnet/ecnu/portal.py
ADDED
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
"""HTTP client and parsers for the ECNU self-service portal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import shlex
|
|
9
|
+
from dataclasses import asdict, dataclass
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from html import unescape
|
|
12
|
+
from html.parser import HTMLParser
|
|
13
|
+
from http.cookies import SimpleCookie
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
from urllib.parse import urljoin, urlparse
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
BASE_URL = "https://login.ecnu.edu.cn:8800"
|
|
21
|
+
LOGIN_PATH = "/login"
|
|
22
|
+
VALIDATE_USER_PATH = "/site/validate-user"
|
|
23
|
+
VALIDATE_SMS_PATH = "/site/validate-smscode"
|
|
24
|
+
LOGOUT_PATH = "/site/logout"
|
|
25
|
+
HOME_PATH = "/home"
|
|
26
|
+
USER_INFO_PATH = "/users"
|
|
27
|
+
AUTH_LOG_PATH = "/log/auth"
|
|
28
|
+
DETAIL_LOG_PATH = "/log/detail"
|
|
29
|
+
VISITOR_LIST_PATH = "/visitors/manual/index"
|
|
30
|
+
VISITOR_CREATE_PATH = "/visitors/manual/create"
|
|
31
|
+
VISITOR_UPDATE_PATH = "/visitors/manual/update"
|
|
32
|
+
VISITOR_DELETE_PATH = "/visitors/manual/delete"
|
|
33
|
+
VISITOR_LOCK_PATH = "/visitors/manual/lock"
|
|
34
|
+
|
|
35
|
+
BROWSER_HEADERS = {
|
|
36
|
+
"User-Agent": (
|
|
37
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
38
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
39
|
+
"Chrome/126.0.0.0 Safari/537.36"
|
|
40
|
+
),
|
|
41
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
SAMPLE_LOGIN_HTML = """
|
|
45
|
+
<html><head>
|
|
46
|
+
<meta name="csrf-param" content="_csrf-8800">
|
|
47
|
+
<meta name="csrf-token" content="sample-token">
|
|
48
|
+
</head><body>
|
|
49
|
+
<form id="login-form" action="/login" method="post">
|
|
50
|
+
<input type="hidden" name="_csrf-8800" value="sample-token">
|
|
51
|
+
<input type="hidden" id="public" value="-----BEGIN PUBLIC KEY-----\nSAMPLE\n-----END PUBLIC KEY-----\n">
|
|
52
|
+
<img id="loginform-verifycode-image" src="/site/captcha?v=sample">
|
|
53
|
+
</form></body></html>
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
SAMPLE_HOME_HTML = """
|
|
57
|
+
<div class="wrap home-patch">
|
|
58
|
+
<div class="panel panel-default">
|
|
59
|
+
<ul class="list-group">
|
|
60
|
+
<li class="list-group-item"><label class="list-group-label">用户名</label>20260000000</li>
|
|
61
|
+
<li class="list-group-item"><label class="list-group-label">姓名</label>Test User</li>
|
|
62
|
+
<li class="list-group-item"><label class="list-group-label">状态</label><a class="btn btn-xs btn-success">正常</a></li>
|
|
63
|
+
</ul>
|
|
64
|
+
</div>
|
|
65
|
+
<table><thead><tr><th>用户名</th><th>IP地址</th></tr></thead><tbody><tr><td>u</td><td>1.1.1.1</td></tr></tbody></table>
|
|
66
|
+
<table><thead><tr><th>产品ID</th><th>产品名称</th></tr></thead><tbody><tr><td>2</td><td>统一身份认证-全日制学生</td></tr></tbody></table>
|
|
67
|
+
</div>
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
SAMPLE_VISITOR_HTML = """
|
|
71
|
+
<html><head>
|
|
72
|
+
<meta name="csrf-param" content="_csrf-8800">
|
|
73
|
+
<meta name="csrf-token" content="sample-token">
|
|
74
|
+
</head><body>
|
|
75
|
+
<div class="summary">第<b>1-2</b>条,共<b>2</b>条数据.</div>
|
|
76
|
+
<table>
|
|
77
|
+
<thead><tr><th>#</th><th>账号</th><th>状态</th><th>已用流量</th><th>已用时长</th><th>备注信息</th><th>密码</th><th>操作</th></tr></thead>
|
|
78
|
+
<tbody>
|
|
79
|
+
<tr data-key="10256701"><td>1</td><td>20260000000m1</td><td>正常</td><td>0byte</td><td>0秒</td><td>temp</td><td>******</td><td><a href="/visitors/manual/update?id=10256701" title="更新"></a> <a href="/visitors/manual/lock?id=10256701" title="锁定"></a> <a href="/visitors/manual/delete?id=10256701" title="销户"></a></td></tr>
|
|
80
|
+
<tr data-key="10256703"><td>2</td><td>20260000000m2</td><td>正常</td><td>0byte</td><td>0秒</td><td>GuestB</td><td>******</td><td><a href="/visitors/manual/update?id=10256703" title="更新"></a> <a href="/visitors/manual/lock?id=10256703" title="锁定"></a> <a href="/visitors/manual/delete?id=10256703" title="销户"></a></td></tr>
|
|
81
|
+
</tbody></table>
|
|
82
|
+
</body></html>
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class LoginBootstrap:
|
|
88
|
+
csrf_param: str
|
|
89
|
+
csrf_token: str
|
|
90
|
+
public_key: str
|
|
91
|
+
captcha_url: str
|
|
92
|
+
fetched_at: str
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class VisitorRow:
|
|
97
|
+
visitor_id: str
|
|
98
|
+
index: str
|
|
99
|
+
account: str
|
|
100
|
+
status: str
|
|
101
|
+
used_flow: str
|
|
102
|
+
used_time: str
|
|
103
|
+
remark: str
|
|
104
|
+
masked_password: str
|
|
105
|
+
update_url: str | None
|
|
106
|
+
lock_url: str | None
|
|
107
|
+
delete_url: str | None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class SimpleTableParser(HTMLParser):
|
|
111
|
+
def __init__(self) -> None:
|
|
112
|
+
super().__init__()
|
|
113
|
+
self.tables: list[list[list[dict[str, Any]]]] = []
|
|
114
|
+
self._current_table: list[list[dict[str, Any]]] | None = None
|
|
115
|
+
self._current_row: list[dict[str, Any]] | None = None
|
|
116
|
+
self._current_cell: list[str] | None = None
|
|
117
|
+
self._current_cell_is_header = False
|
|
118
|
+
self._cell_depth = 0
|
|
119
|
+
|
|
120
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
121
|
+
if tag == "table":
|
|
122
|
+
self._current_table = []
|
|
123
|
+
elif tag == "tr" and self._current_table is not None:
|
|
124
|
+
self._current_row = []
|
|
125
|
+
elif tag in {"td", "th"} and self._current_row is not None:
|
|
126
|
+
self._current_cell = []
|
|
127
|
+
self._current_cell_is_header = tag == "th"
|
|
128
|
+
self._cell_depth = 1
|
|
129
|
+
elif self._current_cell is not None:
|
|
130
|
+
self._cell_depth += 1
|
|
131
|
+
if tag == "br":
|
|
132
|
+
self._current_cell.append("\n")
|
|
133
|
+
|
|
134
|
+
def handle_endtag(self, tag: str) -> None:
|
|
135
|
+
if tag == "table" and self._current_table is not None:
|
|
136
|
+
self.tables.append(self._current_table)
|
|
137
|
+
self._current_table = None
|
|
138
|
+
elif tag == "tr" and self._current_table is not None and self._current_row is not None:
|
|
139
|
+
if self._current_row:
|
|
140
|
+
self._current_table.append(self._current_row)
|
|
141
|
+
self._current_row = None
|
|
142
|
+
elif tag in {"td", "th"} and self._current_row is not None and self._current_cell is not None:
|
|
143
|
+
text = clean_text("".join(self._current_cell))
|
|
144
|
+
self._current_row.append({"text": text, "is_header": self._current_cell_is_header})
|
|
145
|
+
self._current_cell = None
|
|
146
|
+
self._current_cell_is_header = False
|
|
147
|
+
self._cell_depth = 0
|
|
148
|
+
elif self._current_cell is not None and self._cell_depth > 0:
|
|
149
|
+
self._cell_depth -= 1
|
|
150
|
+
|
|
151
|
+
def handle_data(self, data: str) -> None:
|
|
152
|
+
if self._current_cell is not None:
|
|
153
|
+
self._current_cell.append(data)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class PortalClient:
|
|
157
|
+
def __init__(
|
|
158
|
+
self,
|
|
159
|
+
base_url: str,
|
|
160
|
+
state_file: Path,
|
|
161
|
+
cookie_header: str | None = None,
|
|
162
|
+
timeout: int = 20,
|
|
163
|
+
) -> None:
|
|
164
|
+
self.base_url = base_url.rstrip("/")
|
|
165
|
+
self.state_file = state_file
|
|
166
|
+
self.timeout = timeout
|
|
167
|
+
self.session = requests.Session()
|
|
168
|
+
self.session.headers.update(BROWSER_HEADERS)
|
|
169
|
+
self.state = load_json(self.state_file)
|
|
170
|
+
state_cookies = self.state.get("cookies") or {}
|
|
171
|
+
if state_cookies:
|
|
172
|
+
requests.utils.add_dict_to_cookiejar(self.session.cookies, state_cookies)
|
|
173
|
+
if cookie_header:
|
|
174
|
+
requests.utils.add_dict_to_cookiejar(self.session.cookies, parse_cookie_header(cookie_header))
|
|
175
|
+
|
|
176
|
+
def _url(self, path: str) -> str:
|
|
177
|
+
return urljoin(self.base_url + "/", path.lstrip("/"))
|
|
178
|
+
|
|
179
|
+
def _save_state(self, extra: dict[str, Any] | None = None) -> None:
|
|
180
|
+
payload = dict(self.state)
|
|
181
|
+
payload["base_url"] = self.base_url
|
|
182
|
+
payload["cookies"] = requests.utils.dict_from_cookiejar(self.session.cookies)
|
|
183
|
+
if extra:
|
|
184
|
+
payload.update(extra)
|
|
185
|
+
save_json(self.state_file, payload)
|
|
186
|
+
self.state = payload
|
|
187
|
+
|
|
188
|
+
def cookie_header(self) -> str:
|
|
189
|
+
cookies = requests.utils.dict_from_cookiejar(self.session.cookies)
|
|
190
|
+
return "; ".join(f"{k}={v}" for k, v in cookies.items())
|
|
191
|
+
|
|
192
|
+
def get(self, path: str, **kwargs: Any) -> requests.Response:
|
|
193
|
+
resp = self.session.get(self._url(path), timeout=self.timeout, allow_redirects=True, **kwargs)
|
|
194
|
+
self._save_state()
|
|
195
|
+
return resp
|
|
196
|
+
|
|
197
|
+
def post(self, path: str, **kwargs: Any) -> requests.Response:
|
|
198
|
+
resp = self.session.post(self._url(path), timeout=self.timeout, allow_redirects=True, **kwargs)
|
|
199
|
+
self._save_state()
|
|
200
|
+
return resp
|
|
201
|
+
|
|
202
|
+
def _authenticated_response(self, resp: requests.Response) -> requests.Response:
|
|
203
|
+
resp.raise_for_status()
|
|
204
|
+
if urlparse(resp.url).path == LOGIN_PATH:
|
|
205
|
+
raise RuntimeError("Not authenticated: request was redirected to the login page.")
|
|
206
|
+
return resp
|
|
207
|
+
|
|
208
|
+
def reset_login_session(self) -> None:
|
|
209
|
+
self.session.cookies.clear()
|
|
210
|
+
for key in ["authenticated_at", "login_bootstrap", "captcha_path", "username"]:
|
|
211
|
+
self.state.pop(key, None)
|
|
212
|
+
self._save_state()
|
|
213
|
+
|
|
214
|
+
def fetch_login_bootstrap(self) -> tuple[LoginBootstrap, str]:
|
|
215
|
+
resp = self.get(LOGIN_PATH)
|
|
216
|
+
resp.raise_for_status()
|
|
217
|
+
html = resp.text
|
|
218
|
+
bootstrap = LoginBootstrap(
|
|
219
|
+
csrf_param=require_value(extract_meta_content(html, "csrf-param"), "missing login csrf-param"),
|
|
220
|
+
csrf_token=require_value(extract_meta_content(html, "csrf-token"), "missing login csrf-token"),
|
|
221
|
+
public_key=require_value(extract_login_public_key(html), "missing login RSA public key"),
|
|
222
|
+
captcha_url=require_value(extract_captcha_url(html), "missing login captcha url"),
|
|
223
|
+
fetched_at=datetime.now().isoformat(timespec="seconds"),
|
|
224
|
+
)
|
|
225
|
+
self._save_state({"login_bootstrap": asdict(bootstrap)})
|
|
226
|
+
return bootstrap, html
|
|
227
|
+
|
|
228
|
+
def login_init(self, captcha_path: Path) -> dict[str, Any]:
|
|
229
|
+
self.reset_login_session()
|
|
230
|
+
bootstrap, _ = self.fetch_login_bootstrap()
|
|
231
|
+
captcha_path.parent.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
resp = self.session.get(self._url(bootstrap.captcha_url), timeout=self.timeout)
|
|
233
|
+
resp.raise_for_status()
|
|
234
|
+
captcha_path.write_bytes(resp.content)
|
|
235
|
+
self._save_state({"login_bootstrap": asdict(bootstrap), "captcha_path": str(captcha_path)})
|
|
236
|
+
return {
|
|
237
|
+
"captcha_path": str(captcha_path),
|
|
238
|
+
"captcha_url": self._url(bootstrap.captcha_url),
|
|
239
|
+
"csrf_param": bootstrap.csrf_param,
|
|
240
|
+
"fetched_at": bootstrap.fetched_at,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
def login(self, username: str, password: str, verify_code: str, sms_code: str | None = None) -> dict[str, Any]:
|
|
244
|
+
bootstrap = self._ensure_login_bootstrap()
|
|
245
|
+
encrypted_password = self._encrypt_password(password, bootstrap.public_key)
|
|
246
|
+
validated = self._validate_user(username, encrypted_password, verify_code, bootstrap)
|
|
247
|
+
result: dict[str, Any] = {
|
|
248
|
+
"validate_user": validated,
|
|
249
|
+
"captcha_url": self._url(bootstrap.captcha_url),
|
|
250
|
+
"state_file": str(self.state_file),
|
|
251
|
+
}
|
|
252
|
+
if not validated.get("success"):
|
|
253
|
+
return result
|
|
254
|
+
if validated.get("inputSms"):
|
|
255
|
+
if not sms_code:
|
|
256
|
+
result["message"] = "SMS verification is required; rerun login with --sms-code."
|
|
257
|
+
return result
|
|
258
|
+
sms_result = self._validate_sms(username, sms_code, bootstrap)
|
|
259
|
+
result["validate_sms"] = sms_result
|
|
260
|
+
if not sms_result.get("success"):
|
|
261
|
+
return result
|
|
262
|
+
submit_result = self._submit_login(username, encrypted_password, verify_code, sms_code, bootstrap)
|
|
263
|
+
result["submit_login"] = submit_result
|
|
264
|
+
return result
|
|
265
|
+
|
|
266
|
+
def login_auto(
|
|
267
|
+
self,
|
|
268
|
+
username: str,
|
|
269
|
+
password: str,
|
|
270
|
+
sms_code: str | None,
|
|
271
|
+
rounds: int,
|
|
272
|
+
topk: int,
|
|
273
|
+
captcha_path: Path,
|
|
274
|
+
) -> dict[str, Any]:
|
|
275
|
+
from .captcha import recognize_captcha_topk
|
|
276
|
+
|
|
277
|
+
attempts: list[dict[str, Any]] = []
|
|
278
|
+
for round_index in range(1, rounds + 1):
|
|
279
|
+
init_result = self.login_init(captcha_path)
|
|
280
|
+
candidates = recognize_captcha_topk(captcha_path.read_bytes(), topk=topk)
|
|
281
|
+
round_info: dict[str, Any] = {
|
|
282
|
+
"round": round_index,
|
|
283
|
+
"captcha_path": str(captcha_path),
|
|
284
|
+
"captcha_url": init_result["captcha_url"],
|
|
285
|
+
"candidates": candidates,
|
|
286
|
+
"attempts": [],
|
|
287
|
+
}
|
|
288
|
+
for candidate in candidates:
|
|
289
|
+
login_result = self.login(username, password, candidate, sms_code=sms_code)
|
|
290
|
+
candidate_info = {
|
|
291
|
+
"candidate": candidate,
|
|
292
|
+
"validate_user": login_result.get("validate_user"),
|
|
293
|
+
"validate_sms": login_result.get("validate_sms"),
|
|
294
|
+
"submit_login": login_result.get("submit_login"),
|
|
295
|
+
"message": login_result.get("message"),
|
|
296
|
+
}
|
|
297
|
+
round_info["attempts"].append(candidate_info)
|
|
298
|
+
validated = login_result.get("validate_user") or {}
|
|
299
|
+
if validated.get("success"):
|
|
300
|
+
return {
|
|
301
|
+
"success": bool(login_result.get("submit_login", {}).get("success")),
|
|
302
|
+
"requires_sms": bool(login_result.get("message")),
|
|
303
|
+
"login_result": login_result,
|
|
304
|
+
"attempts": attempts + [round_info],
|
|
305
|
+
}
|
|
306
|
+
if not is_retryable_captcha_error(validated):
|
|
307
|
+
return {"success": False, "login_result": login_result, "attempts": attempts + [round_info], "aborted": True}
|
|
308
|
+
attempts.append(round_info)
|
|
309
|
+
return {"success": False, "attempts": attempts, "message": f"Captcha auto-login failed after {rounds} rounds x {topk} candidates."}
|
|
310
|
+
|
|
311
|
+
def logout(self) -> dict[str, Any]:
|
|
312
|
+
csrf_param, csrf_token, _ = self.fetch_csrf(HOME_PATH)
|
|
313
|
+
resp = self.post(LOGOUT_PATH, data={csrf_param: csrf_token}, headers={"Referer": self._url(HOME_PATH)})
|
|
314
|
+
resp.raise_for_status()
|
|
315
|
+
self.state.pop("authenticated_at", None)
|
|
316
|
+
self._save_state()
|
|
317
|
+
return {"success": urlparse(resp.url).path == LOGIN_PATH, "final_url": resp.url}
|
|
318
|
+
|
|
319
|
+
def home_summary(self) -> dict[str, Any]:
|
|
320
|
+
resp = self.fetch_page(HOME_PATH)
|
|
321
|
+
html = resp.text
|
|
322
|
+
tables = parse_tables(html)
|
|
323
|
+
return {
|
|
324
|
+
"user_info": parse_home_user_info(html),
|
|
325
|
+
"online_info": table_to_dicts(find_table_with_headers(tables, ["用户名", "IP地址"]) or {"headers": [], "rows": []}),
|
|
326
|
+
"product_info": table_to_dicts(find_table_with_headers(tables, ["产品ID", "产品名称"]) or {"headers": [], "rows": []}),
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
def user_info(self) -> dict[str, str]:
|
|
330
|
+
resp = self.fetch_page(USER_INFO_PATH)
|
|
331
|
+
tables = parse_tables(resp.text)
|
|
332
|
+
return parse_detail_view_table(tables[0] if tables else {"headers": [], "rows": []})
|
|
333
|
+
|
|
334
|
+
def auth_logs(self, start_time: str | None, end_time: str | None, limit: int | None) -> dict[str, Any]:
|
|
335
|
+
return self._query_log_page(AUTH_LOG_PATH, "AuthLogSearch[start_time]", "AuthLogSearch[end_time]", start_time, end_time, limit)
|
|
336
|
+
|
|
337
|
+
def detail_logs(self, start_time: str | None, end_time: str | None, limit: int | None) -> dict[str, Any]:
|
|
338
|
+
return self._query_log_page(DETAIL_LOG_PATH, "DetailLogSearch[start_time]", "DetailLogSearch[end_time]", start_time, end_time, limit)
|
|
339
|
+
|
|
340
|
+
def list_visitors(self) -> dict[str, Any]:
|
|
341
|
+
_, _, html = self.fetch_csrf(VISITOR_LIST_PATH)
|
|
342
|
+
rows = [asdict(row) for row in parse_visitor_rows(html)]
|
|
343
|
+
return {"count": len(rows), "summary": extract_summary_text(html), "rows": rows}
|
|
344
|
+
|
|
345
|
+
def get_visitor(self, visitor_id: str | None = None, account: str | None = None) -> dict[str, Any]:
|
|
346
|
+
if not visitor_id and not account:
|
|
347
|
+
raise ValueError("Provide either id or account.")
|
|
348
|
+
rows = parse_visitor_rows(self.fetch_csrf(VISITOR_LIST_PATH)[2])
|
|
349
|
+
for row in rows:
|
|
350
|
+
if visitor_id and row.visitor_id == visitor_id:
|
|
351
|
+
return asdict(row)
|
|
352
|
+
if account and row.account == account:
|
|
353
|
+
return asdict(row)
|
|
354
|
+
raise ValueError("Visitor not found.")
|
|
355
|
+
|
|
356
|
+
def create_visitor(self, remark: str, dry_run: bool = False) -> dict[str, Any]:
|
|
357
|
+
validate_remark(remark)
|
|
358
|
+
csrf_param, csrf_token, _ = self.fetch_csrf(VISITOR_LIST_PATH)
|
|
359
|
+
headers = {
|
|
360
|
+
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
361
|
+
"Origin": self.base_url,
|
|
362
|
+
"Referer": self._url(VISITOR_LIST_PATH),
|
|
363
|
+
"X-CSRF-Token": csrf_token,
|
|
364
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
365
|
+
}
|
|
366
|
+
data = {"remark": remark, "agreement": "true"}
|
|
367
|
+
return self._maybe_post(VISITOR_CREATE_PATH, headers, data, dry_run, json_response=True, csrf_param=csrf_param)
|
|
368
|
+
|
|
369
|
+
def update_visitor(self, visitor_id: str, remark: str, password: str, dry_run: bool = False) -> dict[str, Any]:
|
|
370
|
+
validate_remark(remark)
|
|
371
|
+
validate_password(password)
|
|
372
|
+
path = f"{VISITOR_UPDATE_PATH}?id={visitor_id}"
|
|
373
|
+
csrf_param, csrf_token, _ = self.fetch_csrf(path)
|
|
374
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded", "Origin": self.base_url, "Referer": self._url(path)}
|
|
375
|
+
data = {
|
|
376
|
+
csrf_param: csrf_token,
|
|
377
|
+
"VisitorsPasswordForm[remark]": remark,
|
|
378
|
+
"VisitorsPasswordForm[password]": password,
|
|
379
|
+
"VisitorsPasswordForm[password1]": password,
|
|
380
|
+
}
|
|
381
|
+
return self._maybe_post(path, headers, data, dry_run)
|
|
382
|
+
|
|
383
|
+
def delete_visitor(self, visitor_id: str, dry_run: bool = False) -> dict[str, Any]:
|
|
384
|
+
return self._post_csrf_action(VISITOR_LIST_PATH, f"{VISITOR_DELETE_PATH}?id={visitor_id}", dry_run)
|
|
385
|
+
|
|
386
|
+
def lock_visitor(self, visitor_id: str, dry_run: bool = False) -> dict[str, Any]:
|
|
387
|
+
return self._post_csrf_action(VISITOR_LIST_PATH, f"{VISITOR_LOCK_PATH}?id={visitor_id}", dry_run)
|
|
388
|
+
|
|
389
|
+
def fetch_page(self, path: str) -> requests.Response:
|
|
390
|
+
return self._authenticated_response(self.get(path))
|
|
391
|
+
|
|
392
|
+
def fetch_csrf(self, path: str) -> tuple[str, str, str]:
|
|
393
|
+
resp = self.fetch_page(path)
|
|
394
|
+
html = resp.text
|
|
395
|
+
param = extract_meta_content(html, "csrf-param")
|
|
396
|
+
token = extract_meta_content(html, "csrf-token")
|
|
397
|
+
if not param or not token:
|
|
398
|
+
raise RuntimeError(f"Failed to locate CSRF meta tags on {path}.")
|
|
399
|
+
return param, token, html
|
|
400
|
+
|
|
401
|
+
def _ensure_login_bootstrap(self) -> LoginBootstrap:
|
|
402
|
+
raw = self.state.get("login_bootstrap")
|
|
403
|
+
if raw:
|
|
404
|
+
return LoginBootstrap(**raw)
|
|
405
|
+
bootstrap, _ = self.fetch_login_bootstrap()
|
|
406
|
+
return bootstrap
|
|
407
|
+
|
|
408
|
+
def _encrypt_password(self, password: str, public_key: str) -> str:
|
|
409
|
+
from Crypto.Cipher import PKCS1_v1_5
|
|
410
|
+
from Crypto.PublicKey import RSA
|
|
411
|
+
|
|
412
|
+
rsa_key = RSA.import_key(public_key)
|
|
413
|
+
cipher = PKCS1_v1_5.new(rsa_key)
|
|
414
|
+
return base64.b64encode(cipher.encrypt(password.encode("utf-8"))).decode("ascii")
|
|
415
|
+
|
|
416
|
+
def _validate_user(self, username: str, encrypted_password: str, verify_code: str, bootstrap: LoginBootstrap) -> dict[str, Any]:
|
|
417
|
+
headers = {"Accept": "*/*", "Origin": self.base_url, "Referer": self._url(LOGIN_PATH), "X-CSRF-Token": bootstrap.csrf_token, "X-Requested-With": "XMLHttpRequest"}
|
|
418
|
+
data = {"LoginForm[username]": username, "LoginForm[password]": encrypted_password, "LoginForm[verifyCode]": verify_code}
|
|
419
|
+
resp = self.post(VALIDATE_USER_PATH, headers=headers, data=data)
|
|
420
|
+
resp.raise_for_status()
|
|
421
|
+
return json.loads(resp.text)
|
|
422
|
+
|
|
423
|
+
def _validate_sms(self, username: str, sms_code: str, bootstrap: LoginBootstrap) -> dict[str, Any]:
|
|
424
|
+
headers = {"Accept": "*/*", "Origin": self.base_url, "Referer": self._url(LOGIN_PATH), "X-CSRF-Token": bootstrap.csrf_token, "X-Requested-With": "XMLHttpRequest"}
|
|
425
|
+
resp = self.post(VALIDATE_SMS_PATH, headers=headers, data={"uname": username, "code": sms_code})
|
|
426
|
+
resp.raise_for_status()
|
|
427
|
+
return json.loads(resp.text)
|
|
428
|
+
|
|
429
|
+
def _submit_login(
|
|
430
|
+
self,
|
|
431
|
+
username: str,
|
|
432
|
+
encrypted_password: str,
|
|
433
|
+
verify_code: str,
|
|
434
|
+
sms_code: str | None,
|
|
435
|
+
bootstrap: LoginBootstrap,
|
|
436
|
+
) -> dict[str, Any]:
|
|
437
|
+
data = {
|
|
438
|
+
bootstrap.csrf_param: bootstrap.csrf_token,
|
|
439
|
+
"LoginForm[username]": username,
|
|
440
|
+
"LoginForm[password]": encrypted_password,
|
|
441
|
+
"LoginForm[verifyCode]": verify_code,
|
|
442
|
+
"LoginForm[smsCode]": sms_code or "",
|
|
443
|
+
}
|
|
444
|
+
resp = self.post(LOGIN_PATH, headers={"Origin": self.base_url, "Referer": self._url(LOGIN_PATH)}, data=data)
|
|
445
|
+
resp.raise_for_status()
|
|
446
|
+
success = urlparse(resp.url).path != LOGIN_PATH
|
|
447
|
+
if success:
|
|
448
|
+
self._save_state({"authenticated_at": datetime.now().isoformat(timespec="seconds"), "login_bootstrap": asdict(bootstrap), "username": username})
|
|
449
|
+
return {"success": success, "final_url": resp.url, "error": None if success else extract_error_summary(resp.text)}
|
|
450
|
+
|
|
451
|
+
def _query_log_page(
|
|
452
|
+
self,
|
|
453
|
+
path: str,
|
|
454
|
+
start_field: str,
|
|
455
|
+
end_field: str,
|
|
456
|
+
start_time: str | None,
|
|
457
|
+
end_time: str | None,
|
|
458
|
+
limit: int | None,
|
|
459
|
+
) -> dict[str, Any]:
|
|
460
|
+
csrf_param, csrf_token, html = self.fetch_csrf(path)
|
|
461
|
+
if start_time or end_time:
|
|
462
|
+
data = {csrf_param: csrf_token, start_field: start_time or "", end_field: end_time or ""}
|
|
463
|
+
html = self._authenticated_response(self.post(path, data=data, headers={"Referer": self._url(path)})).text
|
|
464
|
+
table = first_multirow_table(parse_tables(html))
|
|
465
|
+
rows = table_to_dicts(table) if table else []
|
|
466
|
+
return {"count": len(rows[:limit] if limit is not None else rows), "rows": rows[:limit] if limit is not None else rows, "summary": extract_summary_text(html)}
|
|
467
|
+
|
|
468
|
+
def _post_csrf_action(self, referer_path: str, action_path: str, dry_run: bool) -> dict[str, Any]:
|
|
469
|
+
csrf_param, csrf_token, _ = self.fetch_csrf(referer_path)
|
|
470
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded", "Origin": self.base_url, "Referer": self._url(referer_path)}
|
|
471
|
+
return self._maybe_post(action_path, headers, {csrf_param: csrf_token}, dry_run)
|
|
472
|
+
|
|
473
|
+
def _maybe_post(
|
|
474
|
+
self,
|
|
475
|
+
path: str,
|
|
476
|
+
headers: dict[str, str],
|
|
477
|
+
data: dict[str, str],
|
|
478
|
+
dry_run: bool,
|
|
479
|
+
json_response: bool = False,
|
|
480
|
+
csrf_param: str | None = None,
|
|
481
|
+
) -> dict[str, Any]:
|
|
482
|
+
spec = request_spec("POST", self._url(path), headers, data)
|
|
483
|
+
if csrf_param:
|
|
484
|
+
spec["csrf_param"] = csrf_param
|
|
485
|
+
if dry_run:
|
|
486
|
+
spec["dry_run"] = True
|
|
487
|
+
spec["curl"] = curl_string("POST", self._url(path), headers, data)
|
|
488
|
+
return spec
|
|
489
|
+
resp = self.post(path, headers=headers, data=data)
|
|
490
|
+
resp.raise_for_status()
|
|
491
|
+
if json_response:
|
|
492
|
+
return {"request": spec, "response": resp.json()}
|
|
493
|
+
return {"request": spec, "response": {"status_code": resp.status_code, "ok": "操作成功" in resp.text, "url": resp.url}}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def clean_text(text: str) -> str:
|
|
497
|
+
return re.sub(r"\s+", " ", unescape(text or "")).strip()
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def strip_tags(fragment: str) -> str:
|
|
501
|
+
return clean_text(re.sub(r"<[^>]+>", " ", fragment, flags=re.S))
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def require_value(value: str | None, message: str) -> str:
|
|
505
|
+
if not value:
|
|
506
|
+
raise RuntimeError(message)
|
|
507
|
+
return value
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def extract_meta_content(html: str, meta_name: str) -> str | None:
|
|
511
|
+
match = re.search(rf'<meta[^>]+name=["\']{re.escape(meta_name)}["\'][^>]+content=["\']([^"\']+)["\']', html, re.I)
|
|
512
|
+
return match.group(1) if match else None
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def extract_login_public_key(html: str) -> str | None:
|
|
516
|
+
match = re.search(r'<input[^>]+id=["\']public["\'][^>]+value=["\'](.*?-----END PUBLIC KEY-----\s*)["\']', html, re.S | re.I)
|
|
517
|
+
return unescape(match.group(1)).strip() if match else None
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def extract_captcha_url(html: str) -> str | None:
|
|
521
|
+
match = re.search(r'<img[^>]+id=["\']loginform-verifycode-image["\'][^>]+src=["\']([^"\']+)["\']', html, re.I)
|
|
522
|
+
return unescape(match.group(1)) if match else None
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def extract_error_summary(html: str) -> str | None:
|
|
526
|
+
items = re.findall(r'<div class="alert alert-danger error-summary".*?<li>(.*?)</li>', html, re.S | re.I)
|
|
527
|
+
return clean_text("; ".join(strip_tags(x) for x in items)) if items else None
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def parse_cookie_header(cookie_header: str) -> dict[str, str]:
|
|
531
|
+
cookie = SimpleCookie()
|
|
532
|
+
cookie.load(cookie_header)
|
|
533
|
+
return {key: morsel.value for key, morsel in cookie.items()}
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def load_json(path: Path) -> dict[str, Any]:
|
|
537
|
+
if not path.exists():
|
|
538
|
+
return {}
|
|
539
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def save_json(path: Path, data: dict[str, Any]) -> None:
|
|
543
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
544
|
+
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def parse_tables(html: str) -> list[dict[str, Any]]:
|
|
548
|
+
parser = SimpleTableParser()
|
|
549
|
+
parser.feed(html)
|
|
550
|
+
results: list[dict[str, Any]] = []
|
|
551
|
+
for raw_table in parser.tables:
|
|
552
|
+
headers: list[str] = []
|
|
553
|
+
rows: list[list[str]] = []
|
|
554
|
+
if raw_table:
|
|
555
|
+
first = raw_table[0]
|
|
556
|
+
if first and all(cell["is_header"] for cell in first):
|
|
557
|
+
headers = [cell["text"] for cell in first]
|
|
558
|
+
body_rows = raw_table[1:]
|
|
559
|
+
else:
|
|
560
|
+
body_rows = raw_table
|
|
561
|
+
rows = [[cell["text"] for cell in row] for row in body_rows]
|
|
562
|
+
results.append({"headers": headers, "rows": rows})
|
|
563
|
+
return results
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def table_to_dicts(table: dict[str, Any]) -> list[dict[str, str]]:
|
|
567
|
+
headers = table.get("headers") or []
|
|
568
|
+
rows = table.get("rows") or []
|
|
569
|
+
if not headers:
|
|
570
|
+
return [{str(i): value for i, value in enumerate(row)} for row in rows]
|
|
571
|
+
return [{headers[i]: (row + [""] * max(0, len(headers) - len(row)))[i] for i in range(len(headers))} for row in rows]
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def parse_detail_view_table(table: dict[str, Any]) -> dict[str, str]:
|
|
575
|
+
return {row[0]: row[1] for row in table.get("rows") or [] if len(row) >= 2}
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def first_multirow_table(tables: list[dict[str, Any]]) -> dict[str, Any] | None:
|
|
579
|
+
for table in tables:
|
|
580
|
+
if table.get("headers") and table.get("rows"):
|
|
581
|
+
return table
|
|
582
|
+
return None
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def find_table_with_headers(tables: list[dict[str, Any]], expected: list[str]) -> dict[str, Any] | None:
|
|
586
|
+
for table in tables:
|
|
587
|
+
headers = table.get("headers") or []
|
|
588
|
+
if len(headers) >= len(expected) and headers[: len(expected)] == expected:
|
|
589
|
+
return table
|
|
590
|
+
return None
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def extract_summary_text(html: str) -> str | None:
|
|
594
|
+
match = re.search(r'<div class="summary">(.*?)</div>', html, re.S | re.I)
|
|
595
|
+
return strip_tags(match.group(1)) if match else None
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def parse_home_user_info(html: str) -> dict[str, str]:
|
|
599
|
+
out: dict[str, str] = {}
|
|
600
|
+
for label, value in re.findall(r'<li class="list-group-item">.*?<label class="list-group-label">(.*?)</label>(.*?)</li>', html, re.S | re.I):
|
|
601
|
+
out[clean_text(label)] = strip_tags(value)
|
|
602
|
+
return out
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def parse_action_url(action_cell: str, action: str) -> str | None:
|
|
606
|
+
match = re.search(rf'href=["\']([^"\']*{re.escape(action)}[^"\']*)["\']', action_cell, re.I)
|
|
607
|
+
return unescape(match.group(1)) if match else None
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def parse_visitor_rows(html: str) -> list[VisitorRow]:
|
|
611
|
+
rows: list[VisitorRow] = []
|
|
612
|
+
for row_match in re.finditer(r'<tr\s+data-key=["\'](\d+)["\']>(.*?)</tr>', html, re.S | re.I):
|
|
613
|
+
visitor_id = row_match.group(1)
|
|
614
|
+
cells = re.findall(r"<td.*?>(.*?)</td>", row_match.group(2), re.S | re.I)
|
|
615
|
+
if len(cells) < 8:
|
|
616
|
+
continue
|
|
617
|
+
rows.append(
|
|
618
|
+
VisitorRow(
|
|
619
|
+
visitor_id=visitor_id,
|
|
620
|
+
index=strip_tags(cells[0]),
|
|
621
|
+
account=strip_tags(cells[1]),
|
|
622
|
+
status=strip_tags(cells[2]),
|
|
623
|
+
used_flow=strip_tags(cells[3]),
|
|
624
|
+
used_time=strip_tags(cells[4]),
|
|
625
|
+
remark=strip_tags(cells[5]),
|
|
626
|
+
masked_password=strip_tags(cells[6]),
|
|
627
|
+
update_url=parse_action_url(cells[7], "/update"),
|
|
628
|
+
lock_url=parse_action_url(cells[7], "/lock"),
|
|
629
|
+
delete_url=parse_action_url(cells[7], "/delete"),
|
|
630
|
+
)
|
|
631
|
+
)
|
|
632
|
+
return rows
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def request_spec(method: str, url: str, headers: dict[str, str], data: dict[str, str] | None = None) -> dict[str, Any]:
|
|
636
|
+
return {"method": method, "url": url, "headers": headers, "data": data or {}}
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def curl_string(method: str, url: str, headers: dict[str, str], data: dict[str, str] | None = None) -> str:
|
|
640
|
+
parts = ["curl", "-X", shlex.quote(method)]
|
|
641
|
+
for key, value in headers.items():
|
|
642
|
+
parts.extend(["-H", shlex.quote(f"{key}: {value}")])
|
|
643
|
+
if data:
|
|
644
|
+
encoded = "&".join(f"{requests.utils.quote(str(k), safe='[]')}={requests.utils.quote(str(v))}" for k, v in data.items())
|
|
645
|
+
parts.extend(["--data", shlex.quote(encoded)])
|
|
646
|
+
parts.append(shlex.quote(url))
|
|
647
|
+
return " ".join(parts)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def validate_remark(remark: str) -> None:
|
|
651
|
+
if not re.fullmatch(r"[A-Za-z\u4e00-\u9fa5]{2,14}", remark):
|
|
652
|
+
raise ValueError("Remark must be 2-14 Chinese or English letters.")
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def validate_password(password: str) -> None:
|
|
656
|
+
pattern = re.compile(r"^(?![a-zA-Z]+$)(?!\d+$)(?![!@#$%^&*()_\-+=\{\}\[\]|\\:;\"',.?`~/<>]+$)[a-zA-Z\d!@#$%^&*()_\-+=\{\}\[\]|\\:;\"',.?`~/<>]{8,20}$")
|
|
657
|
+
if not pattern.fullmatch(password):
|
|
658
|
+
raise ValueError("Password must be 8-20 chars and include letters, digits, and special characters.")
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def is_retryable_captcha_error(payload: dict[str, Any]) -> bool:
|
|
662
|
+
message = str(payload.get("message", ""))
|
|
663
|
+
lowered = message.lower()
|
|
664
|
+
return "验证码" in message or "captcha" in lowered or "verify" in lowered
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def run_selftest() -> dict[str, Any]:
|
|
668
|
+
bootstrap = LoginBootstrap(
|
|
669
|
+
csrf_param=require_value(extract_meta_content(SAMPLE_LOGIN_HTML, "csrf-param"), "csrf-param"),
|
|
670
|
+
csrf_token=require_value(extract_meta_content(SAMPLE_LOGIN_HTML, "csrf-token"), "csrf-token"),
|
|
671
|
+
public_key=require_value(extract_login_public_key(SAMPLE_LOGIN_HTML), "public"),
|
|
672
|
+
captcha_url=require_value(extract_captcha_url(SAMPLE_LOGIN_HTML), "captcha"),
|
|
673
|
+
fetched_at="2026-06-15T03:00:00",
|
|
674
|
+
)
|
|
675
|
+
home = parse_home_user_info(SAMPLE_HOME_HTML)
|
|
676
|
+
tables = parse_tables(SAMPLE_HOME_HTML)
|
|
677
|
+
visitors = parse_visitor_rows(SAMPLE_VISITOR_HTML)
|
|
678
|
+
validate_remark("GuestB")
|
|
679
|
+
validate_password("Temp!234")
|
|
680
|
+
assert bootstrap.csrf_param == "_csrf-8800"
|
|
681
|
+
assert home["用户名"] == "20260000000"
|
|
682
|
+
assert find_table_with_headers(tables, ["产品ID", "产品名称"]) is not None
|
|
683
|
+
assert len(visitors) == 2
|
|
684
|
+
assert visitors[1].remark == "GuestB"
|
|
685
|
+
return {"ok": True, "visitors": len(visitors)}
|