ChatNet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chatnet/ecnu/portal.py ADDED
@@ -0,0 +1,685 @@
1
+ """HTTP client and parsers for the ECNU self-service portal."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import re
8
+ import shlex
9
+ from dataclasses import asdict, dataclass
10
+ from datetime import datetime
11
+ from html import unescape
12
+ from html.parser import HTMLParser
13
+ from http.cookies import SimpleCookie
14
+ from pathlib import Path
15
+ from typing import Any
16
+ from urllib.parse import urljoin, urlparse
17
+
18
+ import requests
19
+
20
+ BASE_URL = "https://login.ecnu.edu.cn:8800"
21
+ LOGIN_PATH = "/login"
22
+ VALIDATE_USER_PATH = "/site/validate-user"
23
+ VALIDATE_SMS_PATH = "/site/validate-smscode"
24
+ LOGOUT_PATH = "/site/logout"
25
+ HOME_PATH = "/home"
26
+ USER_INFO_PATH = "/users"
27
+ AUTH_LOG_PATH = "/log/auth"
28
+ DETAIL_LOG_PATH = "/log/detail"
29
+ VISITOR_LIST_PATH = "/visitors/manual/index"
30
+ VISITOR_CREATE_PATH = "/visitors/manual/create"
31
+ VISITOR_UPDATE_PATH = "/visitors/manual/update"
32
+ VISITOR_DELETE_PATH = "/visitors/manual/delete"
33
+ VISITOR_LOCK_PATH = "/visitors/manual/lock"
34
+
35
+ BROWSER_HEADERS = {
36
+ "User-Agent": (
37
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
38
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
39
+ "Chrome/126.0.0.0 Safari/537.36"
40
+ ),
41
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
42
+ }
43
+
44
+ SAMPLE_LOGIN_HTML = """
45
+ <html><head>
46
+ <meta name="csrf-param" content="_csrf-8800">
47
+ <meta name="csrf-token" content="sample-token">
48
+ </head><body>
49
+ <form id="login-form" action="/login" method="post">
50
+ <input type="hidden" name="_csrf-8800" value="sample-token">
51
+ <input type="hidden" id="public" value="-----BEGIN PUBLIC KEY-----\nSAMPLE\n-----END PUBLIC KEY-----\n">
52
+ <img id="loginform-verifycode-image" src="/site/captcha?v=sample">
53
+ </form></body></html>
54
+ """
55
+
56
+ SAMPLE_HOME_HTML = """
57
+ <div class="wrap home-patch">
58
+ <div class="panel panel-default">
59
+ <ul class="list-group">
60
+ <li class="list-group-item"><label class="list-group-label">用户名</label>20260000000</li>
61
+ <li class="list-group-item"><label class="list-group-label">姓名</label>Test User</li>
62
+ <li class="list-group-item"><label class="list-group-label">状态</label><a class="btn btn-xs btn-success">正常</a></li>
63
+ </ul>
64
+ </div>
65
+ <table><thead><tr><th>用户名</th><th>IP地址</th></tr></thead><tbody><tr><td>u</td><td>1.1.1.1</td></tr></tbody></table>
66
+ <table><thead><tr><th>产品ID</th><th>产品名称</th></tr></thead><tbody><tr><td>2</td><td>统一身份认证-全日制学生</td></tr></tbody></table>
67
+ </div>
68
+ """
69
+
70
+ SAMPLE_VISITOR_HTML = """
71
+ <html><head>
72
+ <meta name="csrf-param" content="_csrf-8800">
73
+ <meta name="csrf-token" content="sample-token">
74
+ </head><body>
75
+ <div class="summary">第<b>1-2</b>条,共<b>2</b>条数据.</div>
76
+ <table>
77
+ <thead><tr><th>#</th><th>账号</th><th>状态</th><th>已用流量</th><th>已用时长</th><th>备注信息</th><th>密码</th><th>操作</th></tr></thead>
78
+ <tbody>
79
+ <tr data-key="10256701"><td>1</td><td>20260000000m1</td><td>正常</td><td>0byte</td><td>0秒</td><td>temp</td><td>******</td><td><a href="/visitors/manual/update?id=10256701" title="更新"></a> <a href="/visitors/manual/lock?id=10256701" title="锁定"></a> <a href="/visitors/manual/delete?id=10256701" title="销户"></a></td></tr>
80
+ <tr data-key="10256703"><td>2</td><td>20260000000m2</td><td>正常</td><td>0byte</td><td>0秒</td><td>GuestB</td><td>******</td><td><a href="/visitors/manual/update?id=10256703" title="更新"></a> <a href="/visitors/manual/lock?id=10256703" title="锁定"></a> <a href="/visitors/manual/delete?id=10256703" title="销户"></a></td></tr>
81
+ </tbody></table>
82
+ </body></html>
83
+ """
84
+
85
+
86
+ @dataclass
87
+ class LoginBootstrap:
88
+ csrf_param: str
89
+ csrf_token: str
90
+ public_key: str
91
+ captcha_url: str
92
+ fetched_at: str
93
+
94
+
95
+ @dataclass
96
+ class VisitorRow:
97
+ visitor_id: str
98
+ index: str
99
+ account: str
100
+ status: str
101
+ used_flow: str
102
+ used_time: str
103
+ remark: str
104
+ masked_password: str
105
+ update_url: str | None
106
+ lock_url: str | None
107
+ delete_url: str | None
108
+
109
+
110
+ class SimpleTableParser(HTMLParser):
111
+ def __init__(self) -> None:
112
+ super().__init__()
113
+ self.tables: list[list[list[dict[str, Any]]]] = []
114
+ self._current_table: list[list[dict[str, Any]]] | None = None
115
+ self._current_row: list[dict[str, Any]] | None = None
116
+ self._current_cell: list[str] | None = None
117
+ self._current_cell_is_header = False
118
+ self._cell_depth = 0
119
+
120
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
121
+ if tag == "table":
122
+ self._current_table = []
123
+ elif tag == "tr" and self._current_table is not None:
124
+ self._current_row = []
125
+ elif tag in {"td", "th"} and self._current_row is not None:
126
+ self._current_cell = []
127
+ self._current_cell_is_header = tag == "th"
128
+ self._cell_depth = 1
129
+ elif self._current_cell is not None:
130
+ self._cell_depth += 1
131
+ if tag == "br":
132
+ self._current_cell.append("\n")
133
+
134
+ def handle_endtag(self, tag: str) -> None:
135
+ if tag == "table" and self._current_table is not None:
136
+ self.tables.append(self._current_table)
137
+ self._current_table = None
138
+ elif tag == "tr" and self._current_table is not None and self._current_row is not None:
139
+ if self._current_row:
140
+ self._current_table.append(self._current_row)
141
+ self._current_row = None
142
+ elif tag in {"td", "th"} and self._current_row is not None and self._current_cell is not None:
143
+ text = clean_text("".join(self._current_cell))
144
+ self._current_row.append({"text": text, "is_header": self._current_cell_is_header})
145
+ self._current_cell = None
146
+ self._current_cell_is_header = False
147
+ self._cell_depth = 0
148
+ elif self._current_cell is not None and self._cell_depth > 0:
149
+ self._cell_depth -= 1
150
+
151
+ def handle_data(self, data: str) -> None:
152
+ if self._current_cell is not None:
153
+ self._current_cell.append(data)
154
+
155
+
156
+ class PortalClient:
157
+ def __init__(
158
+ self,
159
+ base_url: str,
160
+ state_file: Path,
161
+ cookie_header: str | None = None,
162
+ timeout: int = 20,
163
+ ) -> None:
164
+ self.base_url = base_url.rstrip("/")
165
+ self.state_file = state_file
166
+ self.timeout = timeout
167
+ self.session = requests.Session()
168
+ self.session.headers.update(BROWSER_HEADERS)
169
+ self.state = load_json(self.state_file)
170
+ state_cookies = self.state.get("cookies") or {}
171
+ if state_cookies:
172
+ requests.utils.add_dict_to_cookiejar(self.session.cookies, state_cookies)
173
+ if cookie_header:
174
+ requests.utils.add_dict_to_cookiejar(self.session.cookies, parse_cookie_header(cookie_header))
175
+
176
+ def _url(self, path: str) -> str:
177
+ return urljoin(self.base_url + "/", path.lstrip("/"))
178
+
179
+ def _save_state(self, extra: dict[str, Any] | None = None) -> None:
180
+ payload = dict(self.state)
181
+ payload["base_url"] = self.base_url
182
+ payload["cookies"] = requests.utils.dict_from_cookiejar(self.session.cookies)
183
+ if extra:
184
+ payload.update(extra)
185
+ save_json(self.state_file, payload)
186
+ self.state = payload
187
+
188
+ def cookie_header(self) -> str:
189
+ cookies = requests.utils.dict_from_cookiejar(self.session.cookies)
190
+ return "; ".join(f"{k}={v}" for k, v in cookies.items())
191
+
192
+ def get(self, path: str, **kwargs: Any) -> requests.Response:
193
+ resp = self.session.get(self._url(path), timeout=self.timeout, allow_redirects=True, **kwargs)
194
+ self._save_state()
195
+ return resp
196
+
197
+ def post(self, path: str, **kwargs: Any) -> requests.Response:
198
+ resp = self.session.post(self._url(path), timeout=self.timeout, allow_redirects=True, **kwargs)
199
+ self._save_state()
200
+ return resp
201
+
202
+ def _authenticated_response(self, resp: requests.Response) -> requests.Response:
203
+ resp.raise_for_status()
204
+ if urlparse(resp.url).path == LOGIN_PATH:
205
+ raise RuntimeError("Not authenticated: request was redirected to the login page.")
206
+ return resp
207
+
208
+ def reset_login_session(self) -> None:
209
+ self.session.cookies.clear()
210
+ for key in ["authenticated_at", "login_bootstrap", "captcha_path", "username"]:
211
+ self.state.pop(key, None)
212
+ self._save_state()
213
+
214
+ def fetch_login_bootstrap(self) -> tuple[LoginBootstrap, str]:
215
+ resp = self.get(LOGIN_PATH)
216
+ resp.raise_for_status()
217
+ html = resp.text
218
+ bootstrap = LoginBootstrap(
219
+ csrf_param=require_value(extract_meta_content(html, "csrf-param"), "missing login csrf-param"),
220
+ csrf_token=require_value(extract_meta_content(html, "csrf-token"), "missing login csrf-token"),
221
+ public_key=require_value(extract_login_public_key(html), "missing login RSA public key"),
222
+ captcha_url=require_value(extract_captcha_url(html), "missing login captcha url"),
223
+ fetched_at=datetime.now().isoformat(timespec="seconds"),
224
+ )
225
+ self._save_state({"login_bootstrap": asdict(bootstrap)})
226
+ return bootstrap, html
227
+
228
+ def login_init(self, captcha_path: Path) -> dict[str, Any]:
229
+ self.reset_login_session()
230
+ bootstrap, _ = self.fetch_login_bootstrap()
231
+ captcha_path.parent.mkdir(parents=True, exist_ok=True)
232
+ resp = self.session.get(self._url(bootstrap.captcha_url), timeout=self.timeout)
233
+ resp.raise_for_status()
234
+ captcha_path.write_bytes(resp.content)
235
+ self._save_state({"login_bootstrap": asdict(bootstrap), "captcha_path": str(captcha_path)})
236
+ return {
237
+ "captcha_path": str(captcha_path),
238
+ "captcha_url": self._url(bootstrap.captcha_url),
239
+ "csrf_param": bootstrap.csrf_param,
240
+ "fetched_at": bootstrap.fetched_at,
241
+ }
242
+
243
+ def login(self, username: str, password: str, verify_code: str, sms_code: str | None = None) -> dict[str, Any]:
244
+ bootstrap = self._ensure_login_bootstrap()
245
+ encrypted_password = self._encrypt_password(password, bootstrap.public_key)
246
+ validated = self._validate_user(username, encrypted_password, verify_code, bootstrap)
247
+ result: dict[str, Any] = {
248
+ "validate_user": validated,
249
+ "captcha_url": self._url(bootstrap.captcha_url),
250
+ "state_file": str(self.state_file),
251
+ }
252
+ if not validated.get("success"):
253
+ return result
254
+ if validated.get("inputSms"):
255
+ if not sms_code:
256
+ result["message"] = "SMS verification is required; rerun login with --sms-code."
257
+ return result
258
+ sms_result = self._validate_sms(username, sms_code, bootstrap)
259
+ result["validate_sms"] = sms_result
260
+ if not sms_result.get("success"):
261
+ return result
262
+ submit_result = self._submit_login(username, encrypted_password, verify_code, sms_code, bootstrap)
263
+ result["submit_login"] = submit_result
264
+ return result
265
+
266
+ def login_auto(
267
+ self,
268
+ username: str,
269
+ password: str,
270
+ sms_code: str | None,
271
+ rounds: int,
272
+ topk: int,
273
+ captcha_path: Path,
274
+ ) -> dict[str, Any]:
275
+ from .captcha import recognize_captcha_topk
276
+
277
+ attempts: list[dict[str, Any]] = []
278
+ for round_index in range(1, rounds + 1):
279
+ init_result = self.login_init(captcha_path)
280
+ candidates = recognize_captcha_topk(captcha_path.read_bytes(), topk=topk)
281
+ round_info: dict[str, Any] = {
282
+ "round": round_index,
283
+ "captcha_path": str(captcha_path),
284
+ "captcha_url": init_result["captcha_url"],
285
+ "candidates": candidates,
286
+ "attempts": [],
287
+ }
288
+ for candidate in candidates:
289
+ login_result = self.login(username, password, candidate, sms_code=sms_code)
290
+ candidate_info = {
291
+ "candidate": candidate,
292
+ "validate_user": login_result.get("validate_user"),
293
+ "validate_sms": login_result.get("validate_sms"),
294
+ "submit_login": login_result.get("submit_login"),
295
+ "message": login_result.get("message"),
296
+ }
297
+ round_info["attempts"].append(candidate_info)
298
+ validated = login_result.get("validate_user") or {}
299
+ if validated.get("success"):
300
+ return {
301
+ "success": bool(login_result.get("submit_login", {}).get("success")),
302
+ "requires_sms": bool(login_result.get("message")),
303
+ "login_result": login_result,
304
+ "attempts": attempts + [round_info],
305
+ }
306
+ if not is_retryable_captcha_error(validated):
307
+ return {"success": False, "login_result": login_result, "attempts": attempts + [round_info], "aborted": True}
308
+ attempts.append(round_info)
309
+ return {"success": False, "attempts": attempts, "message": f"Captcha auto-login failed after {rounds} rounds x {topk} candidates."}
310
+
311
+ def logout(self) -> dict[str, Any]:
312
+ csrf_param, csrf_token, _ = self.fetch_csrf(HOME_PATH)
313
+ resp = self.post(LOGOUT_PATH, data={csrf_param: csrf_token}, headers={"Referer": self._url(HOME_PATH)})
314
+ resp.raise_for_status()
315
+ self.state.pop("authenticated_at", None)
316
+ self._save_state()
317
+ return {"success": urlparse(resp.url).path == LOGIN_PATH, "final_url": resp.url}
318
+
319
+ def home_summary(self) -> dict[str, Any]:
320
+ resp = self.fetch_page(HOME_PATH)
321
+ html = resp.text
322
+ tables = parse_tables(html)
323
+ return {
324
+ "user_info": parse_home_user_info(html),
325
+ "online_info": table_to_dicts(find_table_with_headers(tables, ["用户名", "IP地址"]) or {"headers": [], "rows": []}),
326
+ "product_info": table_to_dicts(find_table_with_headers(tables, ["产品ID", "产品名称"]) or {"headers": [], "rows": []}),
327
+ }
328
+
329
+ def user_info(self) -> dict[str, str]:
330
+ resp = self.fetch_page(USER_INFO_PATH)
331
+ tables = parse_tables(resp.text)
332
+ return parse_detail_view_table(tables[0] if tables else {"headers": [], "rows": []})
333
+
334
+ def auth_logs(self, start_time: str | None, end_time: str | None, limit: int | None) -> dict[str, Any]:
335
+ return self._query_log_page(AUTH_LOG_PATH, "AuthLogSearch[start_time]", "AuthLogSearch[end_time]", start_time, end_time, limit)
336
+
337
+ def detail_logs(self, start_time: str | None, end_time: str | None, limit: int | None) -> dict[str, Any]:
338
+ return self._query_log_page(DETAIL_LOG_PATH, "DetailLogSearch[start_time]", "DetailLogSearch[end_time]", start_time, end_time, limit)
339
+
340
+ def list_visitors(self) -> dict[str, Any]:
341
+ _, _, html = self.fetch_csrf(VISITOR_LIST_PATH)
342
+ rows = [asdict(row) for row in parse_visitor_rows(html)]
343
+ return {"count": len(rows), "summary": extract_summary_text(html), "rows": rows}
344
+
345
+ def get_visitor(self, visitor_id: str | None = None, account: str | None = None) -> dict[str, Any]:
346
+ if not visitor_id and not account:
347
+ raise ValueError("Provide either id or account.")
348
+ rows = parse_visitor_rows(self.fetch_csrf(VISITOR_LIST_PATH)[2])
349
+ for row in rows:
350
+ if visitor_id and row.visitor_id == visitor_id:
351
+ return asdict(row)
352
+ if account and row.account == account:
353
+ return asdict(row)
354
+ raise ValueError("Visitor not found.")
355
+
356
+ def create_visitor(self, remark: str, dry_run: bool = False) -> dict[str, Any]:
357
+ validate_remark(remark)
358
+ csrf_param, csrf_token, _ = self.fetch_csrf(VISITOR_LIST_PATH)
359
+ headers = {
360
+ "Accept": "application/json, text/javascript, */*; q=0.01",
361
+ "Origin": self.base_url,
362
+ "Referer": self._url(VISITOR_LIST_PATH),
363
+ "X-CSRF-Token": csrf_token,
364
+ "X-Requested-With": "XMLHttpRequest",
365
+ }
366
+ data = {"remark": remark, "agreement": "true"}
367
+ return self._maybe_post(VISITOR_CREATE_PATH, headers, data, dry_run, json_response=True, csrf_param=csrf_param)
368
+
369
+ def update_visitor(self, visitor_id: str, remark: str, password: str, dry_run: bool = False) -> dict[str, Any]:
370
+ validate_remark(remark)
371
+ validate_password(password)
372
+ path = f"{VISITOR_UPDATE_PATH}?id={visitor_id}"
373
+ csrf_param, csrf_token, _ = self.fetch_csrf(path)
374
+ headers = {"Content-Type": "application/x-www-form-urlencoded", "Origin": self.base_url, "Referer": self._url(path)}
375
+ data = {
376
+ csrf_param: csrf_token,
377
+ "VisitorsPasswordForm[remark]": remark,
378
+ "VisitorsPasswordForm[password]": password,
379
+ "VisitorsPasswordForm[password1]": password,
380
+ }
381
+ return self._maybe_post(path, headers, data, dry_run)
382
+
383
+ def delete_visitor(self, visitor_id: str, dry_run: bool = False) -> dict[str, Any]:
384
+ return self._post_csrf_action(VISITOR_LIST_PATH, f"{VISITOR_DELETE_PATH}?id={visitor_id}", dry_run)
385
+
386
+ def lock_visitor(self, visitor_id: str, dry_run: bool = False) -> dict[str, Any]:
387
+ return self._post_csrf_action(VISITOR_LIST_PATH, f"{VISITOR_LOCK_PATH}?id={visitor_id}", dry_run)
388
+
389
+ def fetch_page(self, path: str) -> requests.Response:
390
+ return self._authenticated_response(self.get(path))
391
+
392
+ def fetch_csrf(self, path: str) -> tuple[str, str, str]:
393
+ resp = self.fetch_page(path)
394
+ html = resp.text
395
+ param = extract_meta_content(html, "csrf-param")
396
+ token = extract_meta_content(html, "csrf-token")
397
+ if not param or not token:
398
+ raise RuntimeError(f"Failed to locate CSRF meta tags on {path}.")
399
+ return param, token, html
400
+
401
+ def _ensure_login_bootstrap(self) -> LoginBootstrap:
402
+ raw = self.state.get("login_bootstrap")
403
+ if raw:
404
+ return LoginBootstrap(**raw)
405
+ bootstrap, _ = self.fetch_login_bootstrap()
406
+ return bootstrap
407
+
408
+ def _encrypt_password(self, password: str, public_key: str) -> str:
409
+ from Crypto.Cipher import PKCS1_v1_5
410
+ from Crypto.PublicKey import RSA
411
+
412
+ rsa_key = RSA.import_key(public_key)
413
+ cipher = PKCS1_v1_5.new(rsa_key)
414
+ return base64.b64encode(cipher.encrypt(password.encode("utf-8"))).decode("ascii")
415
+
416
+ def _validate_user(self, username: str, encrypted_password: str, verify_code: str, bootstrap: LoginBootstrap) -> dict[str, Any]:
417
+ headers = {"Accept": "*/*", "Origin": self.base_url, "Referer": self._url(LOGIN_PATH), "X-CSRF-Token": bootstrap.csrf_token, "X-Requested-With": "XMLHttpRequest"}
418
+ data = {"LoginForm[username]": username, "LoginForm[password]": encrypted_password, "LoginForm[verifyCode]": verify_code}
419
+ resp = self.post(VALIDATE_USER_PATH, headers=headers, data=data)
420
+ resp.raise_for_status()
421
+ return json.loads(resp.text)
422
+
423
+ def _validate_sms(self, username: str, sms_code: str, bootstrap: LoginBootstrap) -> dict[str, Any]:
424
+ headers = {"Accept": "*/*", "Origin": self.base_url, "Referer": self._url(LOGIN_PATH), "X-CSRF-Token": bootstrap.csrf_token, "X-Requested-With": "XMLHttpRequest"}
425
+ resp = self.post(VALIDATE_SMS_PATH, headers=headers, data={"uname": username, "code": sms_code})
426
+ resp.raise_for_status()
427
+ return json.loads(resp.text)
428
+
429
+ def _submit_login(
430
+ self,
431
+ username: str,
432
+ encrypted_password: str,
433
+ verify_code: str,
434
+ sms_code: str | None,
435
+ bootstrap: LoginBootstrap,
436
+ ) -> dict[str, Any]:
437
+ data = {
438
+ bootstrap.csrf_param: bootstrap.csrf_token,
439
+ "LoginForm[username]": username,
440
+ "LoginForm[password]": encrypted_password,
441
+ "LoginForm[verifyCode]": verify_code,
442
+ "LoginForm[smsCode]": sms_code or "",
443
+ }
444
+ resp = self.post(LOGIN_PATH, headers={"Origin": self.base_url, "Referer": self._url(LOGIN_PATH)}, data=data)
445
+ resp.raise_for_status()
446
+ success = urlparse(resp.url).path != LOGIN_PATH
447
+ if success:
448
+ self._save_state({"authenticated_at": datetime.now().isoformat(timespec="seconds"), "login_bootstrap": asdict(bootstrap), "username": username})
449
+ return {"success": success, "final_url": resp.url, "error": None if success else extract_error_summary(resp.text)}
450
+
451
+ def _query_log_page(
452
+ self,
453
+ path: str,
454
+ start_field: str,
455
+ end_field: str,
456
+ start_time: str | None,
457
+ end_time: str | None,
458
+ limit: int | None,
459
+ ) -> dict[str, Any]:
460
+ csrf_param, csrf_token, html = self.fetch_csrf(path)
461
+ if start_time or end_time:
462
+ data = {csrf_param: csrf_token, start_field: start_time or "", end_field: end_time or ""}
463
+ html = self._authenticated_response(self.post(path, data=data, headers={"Referer": self._url(path)})).text
464
+ table = first_multirow_table(parse_tables(html))
465
+ rows = table_to_dicts(table) if table else []
466
+ return {"count": len(rows[:limit] if limit is not None else rows), "rows": rows[:limit] if limit is not None else rows, "summary": extract_summary_text(html)}
467
+
468
+ def _post_csrf_action(self, referer_path: str, action_path: str, dry_run: bool) -> dict[str, Any]:
469
+ csrf_param, csrf_token, _ = self.fetch_csrf(referer_path)
470
+ headers = {"Content-Type": "application/x-www-form-urlencoded", "Origin": self.base_url, "Referer": self._url(referer_path)}
471
+ return self._maybe_post(action_path, headers, {csrf_param: csrf_token}, dry_run)
472
+
473
+ def _maybe_post(
474
+ self,
475
+ path: str,
476
+ headers: dict[str, str],
477
+ data: dict[str, str],
478
+ dry_run: bool,
479
+ json_response: bool = False,
480
+ csrf_param: str | None = None,
481
+ ) -> dict[str, Any]:
482
+ spec = request_spec("POST", self._url(path), headers, data)
483
+ if csrf_param:
484
+ spec["csrf_param"] = csrf_param
485
+ if dry_run:
486
+ spec["dry_run"] = True
487
+ spec["curl"] = curl_string("POST", self._url(path), headers, data)
488
+ return spec
489
+ resp = self.post(path, headers=headers, data=data)
490
+ resp.raise_for_status()
491
+ if json_response:
492
+ return {"request": spec, "response": resp.json()}
493
+ return {"request": spec, "response": {"status_code": resp.status_code, "ok": "操作成功" in resp.text, "url": resp.url}}
494
+
495
+
496
+ def clean_text(text: str) -> str:
497
+ return re.sub(r"\s+", " ", unescape(text or "")).strip()
498
+
499
+
500
+ def strip_tags(fragment: str) -> str:
501
+ return clean_text(re.sub(r"<[^>]+>", " ", fragment, flags=re.S))
502
+
503
+
504
+ def require_value(value: str | None, message: str) -> str:
505
+ if not value:
506
+ raise RuntimeError(message)
507
+ return value
508
+
509
+
510
+ def extract_meta_content(html: str, meta_name: str) -> str | None:
511
+ match = re.search(rf'<meta[^>]+name=["\']{re.escape(meta_name)}["\'][^>]+content=["\']([^"\']+)["\']', html, re.I)
512
+ return match.group(1) if match else None
513
+
514
+
515
+ def extract_login_public_key(html: str) -> str | None:
516
+ match = re.search(r'<input[^>]+id=["\']public["\'][^>]+value=["\'](.*?-----END PUBLIC KEY-----\s*)["\']', html, re.S | re.I)
517
+ return unescape(match.group(1)).strip() if match else None
518
+
519
+
520
+ def extract_captcha_url(html: str) -> str | None:
521
+ match = re.search(r'<img[^>]+id=["\']loginform-verifycode-image["\'][^>]+src=["\']([^"\']+)["\']', html, re.I)
522
+ return unescape(match.group(1)) if match else None
523
+
524
+
525
+ def extract_error_summary(html: str) -> str | None:
526
+ items = re.findall(r'<div class="alert alert-danger error-summary".*?<li>(.*?)</li>', html, re.S | re.I)
527
+ return clean_text("; ".join(strip_tags(x) for x in items)) if items else None
528
+
529
+
530
+ def parse_cookie_header(cookie_header: str) -> dict[str, str]:
531
+ cookie = SimpleCookie()
532
+ cookie.load(cookie_header)
533
+ return {key: morsel.value for key, morsel in cookie.items()}
534
+
535
+
536
+ def load_json(path: Path) -> dict[str, Any]:
537
+ if not path.exists():
538
+ return {}
539
+ return json.loads(path.read_text(encoding="utf-8"))
540
+
541
+
542
+ def save_json(path: Path, data: dict[str, Any]) -> None:
543
+ path.parent.mkdir(parents=True, exist_ok=True)
544
+ path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
545
+
546
+
547
+ def parse_tables(html: str) -> list[dict[str, Any]]:
548
+ parser = SimpleTableParser()
549
+ parser.feed(html)
550
+ results: list[dict[str, Any]] = []
551
+ for raw_table in parser.tables:
552
+ headers: list[str] = []
553
+ rows: list[list[str]] = []
554
+ if raw_table:
555
+ first = raw_table[0]
556
+ if first and all(cell["is_header"] for cell in first):
557
+ headers = [cell["text"] for cell in first]
558
+ body_rows = raw_table[1:]
559
+ else:
560
+ body_rows = raw_table
561
+ rows = [[cell["text"] for cell in row] for row in body_rows]
562
+ results.append({"headers": headers, "rows": rows})
563
+ return results
564
+
565
+
566
+ def table_to_dicts(table: dict[str, Any]) -> list[dict[str, str]]:
567
+ headers = table.get("headers") or []
568
+ rows = table.get("rows") or []
569
+ if not headers:
570
+ return [{str(i): value for i, value in enumerate(row)} for row in rows]
571
+ return [{headers[i]: (row + [""] * max(0, len(headers) - len(row)))[i] for i in range(len(headers))} for row in rows]
572
+
573
+
574
+ def parse_detail_view_table(table: dict[str, Any]) -> dict[str, str]:
575
+ return {row[0]: row[1] for row in table.get("rows") or [] if len(row) >= 2}
576
+
577
+
578
+ def first_multirow_table(tables: list[dict[str, Any]]) -> dict[str, Any] | None:
579
+ for table in tables:
580
+ if table.get("headers") and table.get("rows"):
581
+ return table
582
+ return None
583
+
584
+
585
+ def find_table_with_headers(tables: list[dict[str, Any]], expected: list[str]) -> dict[str, Any] | None:
586
+ for table in tables:
587
+ headers = table.get("headers") or []
588
+ if len(headers) >= len(expected) and headers[: len(expected)] == expected:
589
+ return table
590
+ return None
591
+
592
+
593
+ def extract_summary_text(html: str) -> str | None:
594
+ match = re.search(r'<div class="summary">(.*?)</div>', html, re.S | re.I)
595
+ return strip_tags(match.group(1)) if match else None
596
+
597
+
598
+ def parse_home_user_info(html: str) -> dict[str, str]:
599
+ out: dict[str, str] = {}
600
+ for label, value in re.findall(r'<li class="list-group-item">.*?<label class="list-group-label">(.*?)</label>(.*?)</li>', html, re.S | re.I):
601
+ out[clean_text(label)] = strip_tags(value)
602
+ return out
603
+
604
+
605
+ def parse_action_url(action_cell: str, action: str) -> str | None:
606
+ match = re.search(rf'href=["\']([^"\']*{re.escape(action)}[^"\']*)["\']', action_cell, re.I)
607
+ return unescape(match.group(1)) if match else None
608
+
609
+
610
+ def parse_visitor_rows(html: str) -> list[VisitorRow]:
611
+ rows: list[VisitorRow] = []
612
+ for row_match in re.finditer(r'<tr\s+data-key=["\'](\d+)["\']>(.*?)</tr>', html, re.S | re.I):
613
+ visitor_id = row_match.group(1)
614
+ cells = re.findall(r"<td.*?>(.*?)</td>", row_match.group(2), re.S | re.I)
615
+ if len(cells) < 8:
616
+ continue
617
+ rows.append(
618
+ VisitorRow(
619
+ visitor_id=visitor_id,
620
+ index=strip_tags(cells[0]),
621
+ account=strip_tags(cells[1]),
622
+ status=strip_tags(cells[2]),
623
+ used_flow=strip_tags(cells[3]),
624
+ used_time=strip_tags(cells[4]),
625
+ remark=strip_tags(cells[5]),
626
+ masked_password=strip_tags(cells[6]),
627
+ update_url=parse_action_url(cells[7], "/update"),
628
+ lock_url=parse_action_url(cells[7], "/lock"),
629
+ delete_url=parse_action_url(cells[7], "/delete"),
630
+ )
631
+ )
632
+ return rows
633
+
634
+
635
+ def request_spec(method: str, url: str, headers: dict[str, str], data: dict[str, str] | None = None) -> dict[str, Any]:
636
+ return {"method": method, "url": url, "headers": headers, "data": data or {}}
637
+
638
+
639
+ def curl_string(method: str, url: str, headers: dict[str, str], data: dict[str, str] | None = None) -> str:
640
+ parts = ["curl", "-X", shlex.quote(method)]
641
+ for key, value in headers.items():
642
+ parts.extend(["-H", shlex.quote(f"{key}: {value}")])
643
+ if data:
644
+ encoded = "&".join(f"{requests.utils.quote(str(k), safe='[]')}={requests.utils.quote(str(v))}" for k, v in data.items())
645
+ parts.extend(["--data", shlex.quote(encoded)])
646
+ parts.append(shlex.quote(url))
647
+ return " ".join(parts)
648
+
649
+
650
+ def validate_remark(remark: str) -> None:
651
+ if not re.fullmatch(r"[A-Za-z\u4e00-\u9fa5]{2,14}", remark):
652
+ raise ValueError("Remark must be 2-14 Chinese or English letters.")
653
+
654
+
655
+ def validate_password(password: str) -> None:
656
+ pattern = re.compile(r"^(?![a-zA-Z]+$)(?!\d+$)(?![!@#$%^&*()_\-+=\{\}\[\]|\\:;\"',.?`~/<>]+$)[a-zA-Z\d!@#$%^&*()_\-+=\{\}\[\]|\\:;\"',.?`~/<>]{8,20}$")
657
+ if not pattern.fullmatch(password):
658
+ raise ValueError("Password must be 8-20 chars and include letters, digits, and special characters.")
659
+
660
+
661
+ def is_retryable_captcha_error(payload: dict[str, Any]) -> bool:
662
+ message = str(payload.get("message", ""))
663
+ lowered = message.lower()
664
+ return "验证码" in message or "captcha" in lowered or "verify" in lowered
665
+
666
+
667
+ def run_selftest() -> dict[str, Any]:
668
+ bootstrap = LoginBootstrap(
669
+ csrf_param=require_value(extract_meta_content(SAMPLE_LOGIN_HTML, "csrf-param"), "csrf-param"),
670
+ csrf_token=require_value(extract_meta_content(SAMPLE_LOGIN_HTML, "csrf-token"), "csrf-token"),
671
+ public_key=require_value(extract_login_public_key(SAMPLE_LOGIN_HTML), "public"),
672
+ captcha_url=require_value(extract_captcha_url(SAMPLE_LOGIN_HTML), "captcha"),
673
+ fetched_at="2026-06-15T03:00:00",
674
+ )
675
+ home = parse_home_user_info(SAMPLE_HOME_HTML)
676
+ tables = parse_tables(SAMPLE_HOME_HTML)
677
+ visitors = parse_visitor_rows(SAMPLE_VISITOR_HTML)
678
+ validate_remark("GuestB")
679
+ validate_password("Temp!234")
680
+ assert bootstrap.csrf_param == "_csrf-8800"
681
+ assert home["用户名"] == "20260000000"
682
+ assert find_table_with_headers(tables, ["产品ID", "产品名称"]) is not None
683
+ assert len(visitors) == 2
684
+ assert visitors[1].remark == "GuestB"
685
+ return {"ok": True, "visitors": len(visitors)}