mooncat-browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +213 -0
  2. package/browser-op/backend/browserd.cjs +1004 -0
  3. package/browser-op/backend/rpc-client.cjs +64 -0
  4. package/browser-op/backend/state.cjs +51 -0
  5. package/browser-op/cdp/capture-inject.js +426 -0
  6. package/browser-op/cdp/capture-inject.ts +426 -0
  7. package/browser-op/cdp/capture-service.cjs +172 -0
  8. package/browser-op/cdp/chrome-launcher.cjs +370 -0
  9. package/browser-op/cdp/chrome-path.cjs +57 -0
  10. package/browser-op/cdp/state.cjs +89 -0
  11. package/browser-op/extension/extension-detect.cjs +228 -0
  12. package/browser-op/extension/server.cjs +197 -0
  13. package/browser-op/extension/service.cjs +228 -0
  14. package/browser-op/extension/state.cjs +78 -0
  15. package/browser-op/index.cjs +389 -0
  16. package/browser-op/package.json +17 -0
  17. package/browser-op/py/behavior.py +138 -0
  18. package/browser-op/py/browser.py +340 -0
  19. package/browser-op/py/captcha.py +115 -0
  20. package/browser-op/py/crawler.py +125 -0
  21. package/browser-op/py/examples/01_open_and_probe.py +48 -0
  22. package/browser-op/py/examples/02_reuse_and_probe.py +66 -0
  23. package/browser-op/py/examples/03_interact.py +66 -0
  24. package/browser-op/py/find.py +150 -0
  25. package/browser-op/py/honeypot.py +73 -0
  26. package/browser-op/py/humanize.py +392 -0
  27. package/browser-op/py/image.py +186 -0
  28. package/browser-op/py/interact.py +193 -0
  29. package/browser-op/py/markdown.py +38 -0
  30. package/browser-op/py/pyproject.toml +32 -0
  31. package/browser-op/py/ready.py +208 -0
  32. package/browser-op/py/scroll.py +180 -0
  33. package/browser-op/py/upload.py +103 -0
  34. package/browser-op/py/visual_target.py +47 -0
  35. package/browser-op/py/visualize.py +91 -0
  36. package/browser-op/state.cjs +63 -0
  37. package/browser-op/web/behavior.js +153 -0
  38. package/browser-op/web/browser.js +231 -0
  39. package/browser-op/web/captcha.js +85 -0
  40. package/browser-op/web/crawler.js +109 -0
  41. package/browser-op/web/find.js +147 -0
  42. package/browser-op/web/honeypot.js +68 -0
  43. package/browser-op/web/humanize.js +522 -0
  44. package/browser-op/web/image.js +177 -0
  45. package/browser-op/web/interact.js +169 -0
  46. package/browser-op/web/markdown.js +80 -0
  47. package/browser-op/web/ready.js +295 -0
  48. package/browser-op/web/scroll.js +167 -0
  49. package/browser-op/web/upload.js +116 -0
  50. package/browser-op/web/visual-runtime.inject.cjs +6 -0
  51. package/browser-op/webplater/.env.example +7 -0
  52. package/browser-op/webplater/ARCHITECTURE.md +102 -0
  53. package/browser-op/webplater/dist/chrome-mv3/assets/popup-BUZEUmsx.css +1 -0
  54. package/browser-op/webplater/dist/chrome-mv3/background.js +2 -0
  55. package/browser-op/webplater/dist/chrome-mv3/capture.js +310 -0
  56. package/browser-op/webplater/dist/chrome-mv3/chunks/_virtual_wxt-html-plugins-DPbbfBKe.js +1 -0
  57. package/browser-op/webplater/dist/chrome-mv3/chunks/offscreen-CFXYw9Mo.js +1 -0
  58. package/browser-op/webplater/dist/chrome-mv3/chunks/popup-C-lpxZZO.js +1 -0
  59. package/browser-op/webplater/dist/chrome-mv3/content-scripts/content.js +7 -0
  60. package/browser-op/webplater/dist/chrome-mv3/manifest.json +1 -0
  61. package/browser-op/webplater/dist/chrome-mv3/offscreen.html +16 -0
  62. package/browser-op/webplater/dist/chrome-mv3/popup.html +31 -0
  63. package/browser-op/webplater/entrypoints/background.ts +938 -0
  64. package/browser-op/webplater/entrypoints/content.ts +1150 -0
  65. package/browser-op/webplater/entrypoints/offscreen/index.html +15 -0
  66. package/browser-op/webplater/entrypoints/offscreen/main.ts +161 -0
  67. package/browser-op/webplater/entrypoints/popup/index.html +29 -0
  68. package/browser-op/webplater/entrypoints/popup/main.ts +61 -0
  69. package/browser-op/webplater/entrypoints/popup/style.css +100 -0
  70. package/browser-op/webplater/lib/snapshot.ts +352 -0
  71. package/browser-op/webplater/package.json +29 -0
  72. package/browser-op/webplater/pnpm-lock.yaml +3411 -0
  73. package/browser-op/webplater/public/capture.js +310 -0
  74. package/browser-op/webplater/scripts/publish-extension.mjs +176 -0
  75. package/browser-op/webplater/tsconfig.json +19 -0
  76. package/browser-op/webplater/wxt.config.ts +34 -0
  77. package/dist/actions.md +102 -0
  78. package/dist/cli.d.ts +2 -0
  79. package/dist/cli.d.ts.map +1 -0
  80. package/dist/cli.js +278 -0
  81. package/dist/cli.js.map +1 -0
  82. package/dist/client.d.ts +94 -0
  83. package/dist/client.d.ts.map +1 -0
  84. package/dist/client.js +277 -0
  85. package/dist/client.js.map +1 -0
  86. package/dist/config.d.ts +61 -0
  87. package/dist/config.d.ts.map +1 -0
  88. package/dist/config.js +119 -0
  89. package/dist/config.js.map +1 -0
  90. package/dist/protocol.d.ts +195 -0
  91. package/dist/protocol.d.ts.map +1 -0
  92. package/dist/protocol.js +11 -0
  93. package/dist/protocol.js.map +1 -0
  94. package/dist/server.d.ts +66 -0
  95. package/dist/server.d.ts.map +1 -0
  96. package/dist/server.js +259 -0
  97. package/dist/server.js.map +1 -0
  98. package/package.json +78 -0
  99. package/schemas/browser.clearCookies.schema.json +13 -0
  100. package/schemas/browser.close.schema.json +9 -0
  101. package/schemas/browser.getCookies.schema.json +13 -0
  102. package/schemas/browser.getDownload.schema.json +15 -0
  103. package/schemas/browser.health.schema.json +9 -0
  104. package/schemas/browser.listDownloads.schema.json +16 -0
  105. package/schemas/browser.listTabs.schema.json +9 -0
  106. package/schemas/browser.newTab.schema.json +15 -0
  107. package/schemas/browser.open.schema.json +15 -0
  108. package/schemas/browser.operate.schema.json +15 -0
  109. package/schemas/browser.reuseTab.schema.json +15 -0
  110. package/schemas/browser.setCookies.schema.json +15 -0
  111. package/schemas/browser.waitFor.schema.json +15 -0
  112. package/schemas/browser.waitForDownload.schema.json +15 -0
  113. package/skills/browser/SKILL.md +110 -0
  114. package/skills/browser/references/collect.md +163 -0
  115. package/skills/browser/references/high-risk.md +161 -0
  116. package/skills/browser/references/operate-actions.md +92 -0
  117. package/skills/browser/references/probing.md +302 -0
@@ -0,0 +1,66 @@
1
+ """
2
+ 样例 03: 交互操作 (click/fill/press + waitForSelector)
3
+
4
+ 演示 Python 客户端的自动化控制能力:
5
+ - wait_for_selector (等元素出现, 比 sleep 靠谱)
6
+ - fill / press / click (填表单 + 提交)
7
+ - inner_text (取结果)
8
+ - 截图存盘
9
+
10
+ 场景: 在 httpbin.org/forms/post 上填一个表单并提交。
11
+ httpbin 无风控, 适合演练交互。
12
+
13
+ 跑: python lib/py/examples/03_interact.py
14
+ """
15
+ import sys
16
+ import os
17
+ import base64
18
+ from pathlib import Path
19
+
20
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
21
+
22
+ from browser import Browser # noqa: E402
23
+
24
+
25
+ def main() -> None:
26
+ b = Browser()
27
+ print("[03] open...")
28
+ b.open(route_mode="auto", headless=False)
29
+
30
+ ph = b.new_tab("https://httpbin.org/forms/post")
31
+ print("[03] new tab → httpbin form")
32
+
33
+ # 1. 等表单出现 (比 sleep 靠谱)
34
+ b.wait_for_selector(ph, selector="input[type=submit]", timeout=15000)
35
+ print("[03] 表单已加载")
36
+
37
+ # 2. fill: 填字段 (httpbin 表单有 custname/custtel/custemail)
38
+ b.fill(ph, selector="input[name=custname]", value="Test User")
39
+ b.fill(ph, selector="input[name=custtel]", value="1234567890")
40
+ b.fill(ph, selector="input[name=custemail]", value="test@example.com")
41
+ print("[03] 已填表单")
42
+
43
+ # 3. screenshot 存盘 (看当前状态)
44
+ shot = b.screenshot(ph)
45
+ if isinstance(shot, dict) and shot.get("dataUrl"):
46
+ buf = base64.b64decode(shot["dataUrl"].split(",")[1])
47
+ out = Path(__file__).parent / "_artifacts" / "03_form_filled.png"
48
+ out.parent.mkdir(parents=True, exist_ok=True)
49
+ out.write_bytes(buf)
50
+ print(f"[03] 截图: {out} ({len(buf)} bytes)")
51
+
52
+ # 4. click 提交
53
+ b.click(ph, selector="input[type=submit]")
54
+ print("[03] 点击 submit")
55
+
56
+ # 5. 等结果页 (httpbin 会返回提交的 JSON)
57
+ b.wait_for_selector(ph, selector="pre", timeout=10000)
58
+ result_text = b.inner_text(ph, selector="pre")
59
+ print("[03] 提交结果 (httpbin 回显):")
60
+ print(result_text[:300] if result_text else "(空)")
61
+
62
+ print("[03] DONE (浏览器保持打开)")
63
+
64
+
65
+ if __name__ == "__main__":
66
+ main()
@@ -0,0 +1,150 @@
1
+ """web/find — 元素定位 (路由无关).
2
+
3
+ 忠实复刻 lib/web/find.js。
4
+ 通用能力: 按文本/selector 找元素, 返回坐标 + CSS path。
5
+ 解决 SPA 两大定位问题:
6
+ ① 标题被拆成多 span → 叶子精确匹配失败 → 兜底取包含匹配最深层
7
+ ② aria snapshot 抓不到 (CSS Modules 非语义 DOM) → 不依赖 aria-ref
8
+ clickByText: 基于此的点击封装 (sycm 这类页面文本是唯一稳定位)
9
+
10
+ 依赖: browser (operate)
11
+ """
12
+ from __future__ import annotations
13
+ import json
14
+ import asyncio
15
+ from typing import Any
16
+
17
+ import sys
18
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
19
+ from browser import operate # noqa: E402
20
+
21
+
22
+ async def find_element(page_handle: dict, criteria: dict | None = None) -> dict:
23
+ """查找页面上匹配的元素, 返回坐标 + CSS path。
24
+
25
+ 文本匹配策略 (解决标题被拆 span):
26
+ 1. 叶子节点精确匹配 (最稳)
27
+ 2. 兜底: 任意层级 innerText 包含, 取后代最少的 (最接近文本本身的元素)
28
+
29
+ criteria:
30
+ text: 按文本找 (exact=False 时子串匹配)
31
+ exact: bool
32
+ selector: 按 CSS selector 找 (与 text 二选一)
33
+ visibleOnly: 只返回可见元素
34
+ 返回: {found, x, y, width, height, path, text} 或 {found: False}
35
+ """
36
+ criteria = criteria or {}
37
+ text = criteria.get("text")
38
+ exact = bool(criteria.get("exact"))
39
+ selector = criteria.get("selector")
40
+ visible_only = bool(criteria.get("visibleOnly"))
41
+ if not text and not selector:
42
+ raise ValueError("findElement: text 或 selector 必须传一个")
43
+ source = """(() => {
44
+ const text = """ + json.dumps(text or "") + """
45
+ const exact = """ + json.dumps(exact) + """
46
+ const selector = """ + json.dumps(selector or "") + """
47
+ const visibleOnly = """ + json.dumps(visible_only) + """
48
+ const all = [...document.querySelectorAll('*')]
49
+ const isVisible = (el) => {
50
+ if (!el) return false
51
+ const r = el.getBoundingClientRect()
52
+ const s = getComputedStyle(el)
53
+ return r.width > 0 && r.height > 0 && s.visibility !== 'hidden' && s.display !== 'none' && s.opacity !== '0'
54
+ }
55
+ let hit = null
56
+ if (selector) {
57
+ hit = document.querySelector(selector)
58
+ } else if (text) {
59
+ hit = all.find(el => el.children.length === 0 && (exact ? (el.innerText||'').trim()===text : (el.innerText||'').trim()===text))
60
+ if (!hit) {
61
+ const cands = all.filter(el => (el.innerText||'').includes(text) && (exact ? (el.innerText||'').trim()===text : true))
62
+ cands.sort((a,b) => a.querySelectorAll('*').length - b.querySelectorAll('*').length)
63
+ hit = cands[0]
64
+ }
65
+ }
66
+ if (!hit) return { found: false }
67
+ if (visibleOnly && !isVisible(hit)) return { found: false }
68
+ const path = []
69
+ let cur = hit
70
+ let depth = 0
71
+ while (cur && cur !== document.body && depth < 4) {
72
+ let seg = cur.tagName.toLowerCase()
73
+ if (cur.id) seg += '#' + cur.id
74
+ else if (cur.className) {
75
+ const c = (cur.className||'').toString().trim().split(/\\s+/).filter(x=>x).slice(0,1).join('.')
76
+ if (c) seg += '.' + c
77
+ }
78
+ path.unshift(seg)
79
+ cur = cur.parentElement
80
+ depth++
81
+ }
82
+ const r = hit.getBoundingClientRect()
83
+ return {
84
+ found: true,
85
+ x: Math.round(r.x), y: Math.round(r.y),
86
+ width: Math.round(r.width), height: Math.round(r.height),
87
+ path: path.join(' > '),
88
+ text: (hit.innerText||'').trim().slice(0, 60)
89
+ }
90
+ })()"""
91
+ try:
92
+ r = await operate(page_handle, {"action": "evaluate", "source": source, "_skipVisualize": True})
93
+ return r or {"found": False}
94
+ except Exception:
95
+ return {"found": False}
96
+
97
+
98
+ async def click_by_text(page_handle: dict, text: str, opts: dict | None = None) -> dict:
99
+ """按文本点击元素 (SPA 最稳定定位, sycm 这类页面文本是唯一不随部署变的锚点)。
100
+
101
+ 容错 "访客分析 >" 这种 (文本带后缀): 默认 startsWith 匹配, exact=True 则精确。
102
+
103
+ opts:
104
+ exact: bool (False=允许后缀)
105
+ scrollIntoView: bool (点击前滚到元素, 找不到时下滚探测)
106
+ 返回: {clicked, path, text} 或 {clicked: False, error}
107
+ """
108
+ opts = opts or {}
109
+ exact = bool(opts.get("exact"))
110
+ scroll_into_view = opts.get("scrollIntoView", True)
111
+
112
+ el = await find_element(page_handle, {"text": text, "exact": exact})
113
+ if not el.get("found") and scroll_into_view:
114
+ for _ in range(40):
115
+ await operate(page_handle, {
116
+ "action": "evaluate",
117
+ "source": '() => { window.scrollBy(0, 40 + Math.floor(Math.random()*30)); return null }',
118
+ "_skipVisualize": True,
119
+ })
120
+ await asyncio.sleep(0.06 + __import__("random").random() * 0.06)
121
+ el = await find_element(page_handle, {"text": text, "exact": exact})
122
+ if el.get("found"):
123
+ break
124
+ at_bottom = False
125
+ try:
126
+ at_bottom = await operate(page_handle, {
127
+ "action": "evaluate",
128
+ "source": '()=>(window.innerHeight + (window.scrollY||0) + 5) >= document.body.scrollHeight',
129
+ "_skipVisualize": True,
130
+ })
131
+ except Exception:
132
+ pass
133
+ if at_bottom:
134
+ break
135
+ if not el.get("found"):
136
+ return {"clicked": False, "error": "not found: " + text}
137
+ clicked = await operate(page_handle, {
138
+ "action": "evaluate",
139
+ "source": """(() => {
140
+ const all = [...document.querySelectorAll('*')]
141
+ const hit = all.find(e => e.children.length === 0 && (e.innerText||'').trim() === """ + json.dumps(text) + """)
142
+ || all.find(e => e.children.length === 0 && (e.innerText||'').trim().startsWith(""" + json.dumps(text) + """))
143
+ if (!hit) return false
144
+ const clickable = hit.closest('button, a, [role=button], [onclick]') || hit
145
+ clickable.click()
146
+ return true
147
+ })()""",
148
+ "_skipVisualize": True,
149
+ })
150
+ return {"clicked": clicked is not False, "path": el.get("path"), "text": el.get("text")}
@@ -0,0 +1,73 @@
1
+ """web/honeypot — 蜜罐陷阱检测(路由无关)
2
+
3
+ 忠实复刻 lib/web/honeypot.js。
4
+ 扫描页面隐藏的陷阱元素(隐藏 link/input/button),避免触发反爬。
5
+ 基于 operate({action:'evaluate'}) 跑页面 JS。
6
+
7
+ 依赖:browser(operate)
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import sys
12
+ from typing import Any
13
+
14
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
15
+ from browser import operate # noqa: E402
16
+
17
+ # 页面内扫描隐藏陷阱的探测脚本(在页面 MAIN world 执行)
18
+ # 检测:display:none / opacity:0 / visibility:hidden / 零尺寸 / 离屏 / 父级隐藏(向上查 3 层)
19
+ SCAN_SOURCE = '''() => {
20
+ const traps = []
21
+ const isHidden = (el) => {
22
+ const s = getComputedStyle(el)
23
+ if (s.display === "none") return "display:none"
24
+ if (parseFloat(s.opacity) === 0) return "opacity:0"
25
+ if (s.visibility === "hidden") return "visibility:hidden"
26
+ const r = el.getBoundingClientRect()
27
+ if (r.width === 0 || r.height === 0) return "zero-size"
28
+ if (r.top < 0 && r.bottom < 0) return "offscreen-top"
29
+ if (r.left < 0 && r.right < 0) return "offscreen-left"
30
+ return null
31
+ }
32
+ const walk = (el, depth) => {
33
+ if (!el || depth > 3) return null
34
+ const reason = isHidden(el)
35
+ if (reason) return reason + (depth > 0 ? "(parent)" : "")
36
+ return walk(el.parentElement, depth + 1)
37
+ }
38
+ const sel = "a[href], input, button, [role=button], [onclick]"
39
+ document.querySelectorAll(sel).forEach((el) => {
40
+ const reason = walk(el, 0)
41
+ if (reason) {
42
+ traps.push({
43
+ tag: el.tagName.toLowerCase(),
44
+ text: (el.innerText || el.value || "").slice(0, 80),
45
+ href: el.getAttribute("href") || null,
46
+ name: el.getAttribute("name") || null,
47
+ type: el.getAttribute("type") || null,
48
+ reason,
49
+ rect: (() => { const r = el.getBoundingClientRect(); return { x: r.x, y: r.y, w: r.width, h: r.height } })(),
50
+ })
51
+ }
52
+ })
53
+ return traps
54
+ }'''
55
+
56
+
57
+ def scan(page_handle: dict) -> list:
58
+ """扫描页面的隐藏陷阱元素。
59
+
60
+ page_handle: 单个 page 句柄(来自 newTab)
61
+ 返回: list[{tag, text, href, name, type, reason, rect}]
62
+ """
63
+ if not page_handle:
64
+ raise ValueError('scan: pageHandle required')
65
+ r = operate(page_handle, {'action': 'evaluate', 'source': SCAN_SOURCE})
66
+ # evaluate 在 CDP 分支直接返回结果;插件分支返回 {ok,...} 或裸结果,兼容两种
67
+ if isinstance(r, list):
68
+ return r
69
+ if isinstance(r, dict) and isinstance(r.get('value'), list):
70
+ return r['value']
71
+ if isinstance(r, dict) and isinstance(r.get('result'), list):
72
+ return r['result']
73
+ return []
@@ -0,0 +1,392 @@
1
+ """web/humanize — 拟人化行为注入 (路由无关, 全走 operate).
2
+
3
+ 忠实复刻 lib/web/humanize.js。
4
+ 设计: 随机触发, 不是每次 action 都注入。每个 operate 有概率走 humanize 序列,
5
+ 概率按 riskLevel 调。避免"每次必注入"本身变成新的固定模式。
6
+
7
+ 嵌入方式: operate 顶部作为隐式中间件, 调用方无感。
8
+
9
+ 依赖: browser (operate), scroll, interact
10
+ """
11
+ from __future__ import annotations
12
+ import math
13
+ import random
14
+ import time
15
+ from typing import Any
16
+
17
+ import sys
18
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
19
+ from browser import operate # noqa: E402
20
+ from scroll import scroll_by, scroll_to_y, scroll_to_element # noqa: E402
21
+
22
+ try:
23
+ from interact import highlight, toast
24
+ except Exception:
25
+ highlight = None
26
+ toast = None
27
+
28
+
29
+ # 风险等级配置
30
+ RISK_LEVELS = {
31
+ "low": {"delayScale": 0.6, "sequenceLen": (1, 3), "triggerProb": 0.35, "scrollProb": 0.3, "wanderProb": 0.25},
32
+ "medium": {"delayScale": 1.0, "sequenceLen": (2, 4), "triggerProb": 0.55, "scrollProb": 0.5, "wanderProb": 0.4},
33
+ "high": {"delayScale": 1.8, "sequenceLen": (3, 6), "triggerProb": 0.75, "scrollProb": 0.7, "wanderProb": 0.6},
34
+ }
35
+
36
+ HIGH_RISK_DOMAINS = [
37
+ "taobao.com", "tmall.com", "jd.com", "1688.com", "pinduoduo.com",
38
+ "yangkeduo.com", "xiaohongshu.com", "xhslink.com", "weidian.com",
39
+ "douyin.com", "kuaishou.com", "bilibili.com", "weibo.com",
40
+ "alipay.com", "bank", "icbc", "cmbchina", "12306.cn",
41
+ ]
42
+
43
+ _last_mouse_pos: dict = {"x": 300, "y": 300}
44
+
45
+
46
+ def _gaussian(mean: float, std: float) -> float:
47
+ u1 = random.random()
48
+ while u1 == 0:
49
+ u1 = random.random()
50
+ u2 = random.random()
51
+ while u2 == 0:
52
+ u2 = random.random()
53
+ return mean + std * math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
54
+
55
+
56
+ def _bezier(p0: float, p1: float, p2: float, p3: float, t: float) -> float:
57
+ u = 1 - t
58
+ return u * u * u * p0 + 3 * u * u * t * p1 + 3 * u * t * t * p2 + t * t * t * p3
59
+
60
+
61
+ def _bezier_trajectory(from_pt: dict, to_pt: dict, steps: int) -> list[dict]:
62
+ pts = []
63
+ mx = (from_pt["x"] + to_pt["x"]) / 2 + (random.random() - 0.5) * abs(to_pt["x"] - from_pt["x"]) * 0.6
64
+ my = (from_pt["y"] + to_pt["y"]) / 2 + (random.random() - 0.5) * abs(to_pt["y"] - from_pt["y"]) * 0.6
65
+ c1 = {"x": from_pt["x"] + (mx - from_pt["x"]) * 0.3, "y": from_pt["y"] + (my - from_pt["y"]) * 0.3}
66
+ c2 = {"x": to_pt["x"] + (mx - to_pt["x"]) * 0.3, "y": to_pt["y"] + (my - to_pt["y"]) * 0.3}
67
+ for i in range(steps + 1):
68
+ t = i / steps
69
+ pts.append({
70
+ "x": _bezier(from_pt["x"], c1["x"], c2["x"], to_pt["x"], t) + (random.random() - 0.5) * 2,
71
+ "y": _bezier(from_pt["y"], c1["y"], c2["y"], to_pt["y"], t) + (random.random() - 0.5) * 2,
72
+ })
73
+ return pts
74
+
75
+
76
+ # 原子行为
77
+
78
+ def _read(page_handle: dict, scale: float) -> None:
79
+ time.sleep(max(0, _gaussian(2.5, 1.2) * scale))
80
+
81
+
82
+ def _idle(page_handle: dict, scale: float) -> None:
83
+ time.sleep(max(0, _gaussian(1.2, 0.6) * scale))
84
+
85
+
86
+ def _scroll_smooth(page_handle: dict, scale: float, direction: str | None = None) -> None:
87
+ dir_flag = direction or ("down" if random.random() < 0.8 else "up")
88
+ pixels = random.randint(120, 480)
89
+ dy = pixels if dir_flag == "down" else -pixels
90
+ try:
91
+ scroll_by(page_handle, dy)
92
+ except Exception:
93
+ pass
94
+ time.sleep(max(0, _gaussian(0.5, 0.2) * scale))
95
+
96
+
97
+ def _scroll_to_ratio(page_handle: dict, scale: float) -> None:
98
+ ratio = random.choice([0.25, 0.4, 0.55, 0.7, 0.85])
99
+ target_y = operate(page_handle, {"_skipHumanize": True, "action": "evaluate", "source": f"(r) => Math.max(0, (document.body.scrollHeight - window.innerHeight) * r)", "args": ratio})
100
+ try:
101
+ if target_y:
102
+ y_val = int(target_y) if not isinstance(target_y, dict) else int(target_y.get("value", 0))
103
+ scroll_to_y(page_handle, y_val)
104
+ except Exception:
105
+ pass
106
+ time.sleep(max(0, _gaussian(0.8, 0.3) * scale))
107
+
108
+
109
+ def _mouse_wander(page_handle: dict, scale: float) -> None:
110
+ global _last_mouse_pos
111
+ rand_x = random.uniform(100, 1100)
112
+ rand_y = random.uniform(100, 700)
113
+ el_info = operate(page_handle, {"_skipHumanize": True, "action": "evaluate", "source": "(x, y) => { const el = document.elementFromPoint(x, y); if (!el) return null; const r = el.getBoundingClientRect(); return { x: Math.round(r.x + r.width/2), y: Math.round(r.y + r.height/2), tag: el.tagName, text: (el.innerText||'').trim().slice(0,20) } }", "args": [rand_x, rand_y]})
114
+ target = el_info
115
+ if isinstance(target, dict) and "value" in target:
116
+ target = target["value"]
117
+ if target and target.get("x") is not None:
118
+ if highlight:
119
+ try:
120
+ highlight(page_handle, {"x": target["x"] - 30, "y": target["y"] - 10, "w": 60, "h": 20, "kind": "rect"})
121
+ except Exception:
122
+ pass
123
+ tx = target["x"] if target and target.get("x") is not None else rand_x
124
+ ty = target["y"] if target and target.get("y") is not None else rand_y
125
+ steps = random.randint(12, 22)
126
+ pts = _bezier_trajectory(_last_mouse_pos, {"x": tx, "y": ty}, steps)
127
+ for pt in pts:
128
+ operate(page_handle, {"_skipHumanize": True, "action": "mouseMove", "x": round(pt["x"]), "y": round(pt["y"])})
129
+ time.sleep(random.uniform(0.008, 0.022))
130
+ _last_mouse_pos = {"x": tx, "y": ty}
131
+ time.sleep(max(0, _gaussian(0.3, 0.15) * scale))
132
+
133
+
134
+ def _hover_random(page_handle: dict, scale: float) -> None:
135
+ global _last_mouse_pos
136
+ pos = operate(page_handle, {"_skipHumanize": True, "action": "evaluate", "source": "() => { const els = document.querySelectorAll('a, button, input, [role=button]'); const arr = [...els].filter(e => { const r = e.getBoundingClientRect(); return r.width > 0 && r.height > 0 && r.top > 0 && r.top < window.innerHeight - 50 }); if (!arr.length) return null; const el = arr[Math.floor(Math.random()*arr.length)]; const r = el.getBoundingClientRect(); return { x: Math.round(r.x + r.width/2), y: Math.round(r.y + r.height/2) } }"})
137
+ target = pos
138
+ if isinstance(target, dict) and "value" in target:
139
+ target = target["value"]
140
+ if not target or target.get("x") is None:
141
+ return
142
+ if highlight:
143
+ try:
144
+ highlight(page_handle, {"x": target["x"] - 40, "y": target["y"] - 10, "w": 80, "h": 20, "kind": "rect"})
145
+ except Exception:
146
+ pass
147
+ steps = random.randint(10, 18)
148
+ pts = _bezier_trajectory(_last_mouse_pos, {"x": target["x"], "y": target["y"]}, steps)
149
+ for pt in pts:
150
+ operate(page_handle, {"_skipHumanize": True, "action": "mouseMove", "x": round(pt["x"]), "y": round(pt["y"])})
151
+ time.sleep(random.uniform(0.01, 0.025))
152
+ _last_mouse_pos = {"x": target["x"], "y": target["y"]}
153
+ time.sleep(max(0, _gaussian(0.4, 0.2) * scale))
154
+
155
+
156
+ def _scroll_key(page_handle: dict, scale: float) -> None:
157
+ delta = random.randint(300, 600) * (1 if random.random() < 0.85 else -1)
158
+ try:
159
+ scroll_by(page_handle, delta)
160
+ except Exception:
161
+ pass
162
+ time.sleep(max(0, _gaussian(0.6, 0.25) * scale))
163
+
164
+
165
+ def _mouse_jitter(page_handle: dict, scale: float) -> None:
166
+ global _last_mouse_pos
167
+ for _ in range(random.randint(2, 5)):
168
+ operate(page_handle, {"_skipHumanize": True, "action": "mouseMove", "x": round(_last_mouse_pos["x"] + (random.random() - 0.5) * 20), "y": round(_last_mouse_pos["y"] + (random.random() - 0.5) * 20)})
169
+ time.sleep(random.uniform(0.03, 0.08))
170
+ time.sleep(max(0, _gaussian(0.2, 0.1) * scale))
171
+
172
+
173
+ def _tab_nav(page_handle: dict, scale: float) -> None:
174
+ count = random.randint(1, 3)
175
+ for _ in range(count):
176
+ operate(page_handle, {"_skipHumanize": True, "action": "press", "selector": "body", "key": "Tab"})
177
+ time.sleep(max(0, _gaussian(0.25, 0.1) * scale))
178
+
179
+
180
+ def _noop(page_handle: dict, scale: float) -> None:
181
+ time.sleep(random.uniform(0.1, 0.4))
182
+
183
+
184
+ def _weighted_pick(pool: list[dict]) -> Any:
185
+ total = sum(a.get("weight", 1) for a in pool)
186
+ r = random.random() * total
187
+ for item in pool:
188
+ r -= item.get("weight", 1)
189
+ if r <= 0:
190
+ return item["fn"]
191
+ return pool[0]["fn"]
192
+
193
+
194
+ def _build_sequence(phase: str, risk_cfg: dict) -> list:
195
+ min_len, max_len = risk_cfg["sequenceLen"]
196
+ target_len = random.randint(min_len, max_len)
197
+ seq = []
198
+
199
+ attention_pool = [{"fn": _read, "weight": 3}, {"fn": _idle, "weight": 1}]
200
+ scroll_pool = [{"fn": _scroll_smooth, "weight": 4}, {"fn": _scroll_to_ratio, "weight": 2}, {"fn": _scroll_key, "weight": 1}]
201
+ mouse_pool = [{"fn": _mouse_wander, "weight": 3}, {"fn": _hover_random, "weight": 2}, {"fn": _mouse_jitter, "weight": 1}]
202
+ meta_pool = [{"fn": _noop, "weight": 2}]
203
+ all_pool = attention_pool + scroll_pool + mouse_pool + meta_pool
204
+
205
+ if phase == "navigate":
206
+ seq.append(_read)
207
+ for _ in range(random.randint(1, 3)):
208
+ seq.append(_weighted_pick(scroll_pool))
209
+ if random.random() < risk_cfg["wanderProb"]:
210
+ seq.append(_weighted_pick(mouse_pool))
211
+ while len(seq) < target_len and random.random() < 0.5:
212
+ seq.append(_weighted_pick(attention_pool + scroll_pool + mouse_pool + meta_pool))
213
+ elif phase == "interact":
214
+ aim_len = random.randint(0, min(2, target_len))
215
+ for _ in range(aim_len):
216
+ pool = mouse_pool if random.random() < 0.6 else attention_pool + meta_pool
217
+ seq.append(_weighted_pick(pool))
218
+ elif phase == "after-interact":
219
+ after_len = random.randint(0, min(2, target_len))
220
+ for _ in range(after_len):
221
+ seq.append(_weighted_pick(attention_pool + scroll_pool + meta_pool))
222
+ else:
223
+ for _ in range(target_len):
224
+ seq.append(_weighted_pick(all_pool))
225
+
226
+ if seq and random.random() < 0.1:
227
+ seq.insert(random.randint(0, len(seq)), _idle)
228
+ return seq
229
+
230
+
231
+ def _should_trigger(risk_cfg: dict) -> bool:
232
+ return random.random() < risk_cfg["triggerProb"]
233
+
234
+
235
+ def _reposition(page_handle: dict, selector: str, scale: float) -> None:
236
+ global _last_mouse_pos
237
+ if not selector:
238
+ return
239
+ try:
240
+ scroll_to_element(page_handle, selector, {"block": "center"})
241
+ except Exception:
242
+ pass
243
+ time.sleep(max(0, _gaussian(0.5, 0.2) * scale))
244
+ box_r = operate(page_handle, {"_skipHumanize": True, "action": "boundingBox", "selector": selector})
245
+ box = box_r
246
+ if isinstance(box, dict) and "value" in box:
247
+ box = box["value"]
248
+ if box and box.get("width") is not None:
249
+ cx = box["x"] + box["width"] * (0.4 + random.random() * 0.2)
250
+ cy = box["y"] + box["height"] * (0.4 + random.random() * 0.2)
251
+ steps = random.randint(8, 16)
252
+ pts = _bezier_trajectory(_last_mouse_pos, {"x": cx, "y": cy}, steps)
253
+ for pt in pts:
254
+ operate(page_handle, {"_skipHumanize": True, "action": "mouseMove", "x": round(pt["x"]), "y": round(pt["y"])})
255
+ time.sleep(random.uniform(0.01, 0.025))
256
+ _last_mouse_pos = {"x": cx, "y": cy}
257
+ try:
258
+ operate(page_handle, {"_skipHumanize": True, "action": "focus", "selector": selector})
259
+ except Exception:
260
+ pass
261
+
262
+
263
+ def before_action(page_handle: dict, action: str, params: dict, risk_cfg: dict) -> None:
264
+ """在 operate 执行 action 之前调用 (随机触发)."""
265
+ scale = risk_cfg["delayScale"]
266
+ is_interact = action in ("click", "fill", "type", "press", "hover", "dblclick", "check", "uncheck", "selectOption", "dragTo", "setInputFiles")
267
+ is_navigate = action in ("goto", "goBack", "goForward", "reload")
268
+
269
+ if is_navigate:
270
+ return
271
+
272
+ if not _should_trigger(risk_cfg):
273
+ if is_interact and params.get("selector"):
274
+ _reposition(page_handle, params["selector"], scale)
275
+ return
276
+
277
+ seq = _build_sequence("interact" if is_interact else "default", risk_cfg)
278
+ for atom in seq:
279
+ try:
280
+ atom(page_handle, scale)
281
+ except Exception:
282
+ pass
283
+
284
+ if is_interact and params.get("selector"):
285
+ _reposition(page_handle, params["selector"], scale)
286
+
287
+
288
+ def after_action(page_handle: dict, action: str, risk_cfg: dict) -> None:
289
+ """在 operate 执行 action 之后调用 (随机触发)."""
290
+ scale = risk_cfg["delayScale"]
291
+ is_navigate = action in ("goto", "goBack", "goForward", "reload")
292
+
293
+ if is_navigate:
294
+ seq = _build_sequence("navigate", risk_cfg)
295
+ for atom in seq:
296
+ try:
297
+ atom(page_handle, scale)
298
+ except Exception:
299
+ pass
300
+ return
301
+
302
+ if not _should_trigger(risk_cfg):
303
+ return
304
+ seq = _build_sequence("after-interact", risk_cfg)
305
+ for atom in seq:
306
+ try:
307
+ atom(page_handle, scale)
308
+ except Exception:
309
+ pass
310
+
311
+
312
+ def create_humanizer(options: dict | None = None) -> dict:
313
+ """创建 humanizer 实例.
314
+
315
+ options:
316
+ riskLevel: 'low'|'medium'|'high' (默认 'medium')
317
+ 返回: {beforeAction, afterAction, riskLevel, riskCfg}
318
+ """
319
+ opts = options or {}
320
+ level = opts.get("riskLevel", "medium")
321
+ risk_cfg = RISK_LEVELS.get(level, RISK_LEVELS["medium"])
322
+ safe_level = level if level in RISK_LEVELS else "medium"
323
+
324
+ return {
325
+ "riskLevel": safe_level,
326
+ "riskCfg": risk_cfg,
327
+ "beforeAction": lambda ph, a, p: before_action(ph, a, p, risk_cfg),
328
+ "afterAction": lambda ph, a: after_action(ph, a, risk_cfg),
329
+ }
330
+
331
+
332
+ def human_act(page_handle: dict, opts: dict | None = None) -> None:
333
+ """自由调用的拟人接口 (采集/evaluate 场景).
334
+
335
+ 一个调用搞定采集前的拟人 + 归位。
336
+
337
+ opts:
338
+ target: 归位目标 (CSS selector)
339
+ targetXY: 归位目标 (坐标), 与 target 二选一
340
+ phase: 'default'|'navigate'|'interact'|'after'
341
+ riskLevel: 'low'|'medium'|'high'
342
+ reposition: 拟人后归位到目标 (默认 True)
343
+ """
344
+ global _last_mouse_pos
345
+ opts = opts or {}
346
+ risk_cfg = RISK_LEVELS.get(opts.get("riskLevel", "medium"), RISK_LEVELS["medium"])
347
+ scale = risk_cfg["delayScale"]
348
+ phase = opts.get("phase", "default")
349
+ target = opts.get("target")
350
+ target_xy = opts.get("targetXY")
351
+
352
+ if target and highlight:
353
+ try:
354
+ highlight(page_handle, {"kind": "selector", "selector": target})
355
+ except Exception:
356
+ pass
357
+ elif target_xy and highlight:
358
+ try:
359
+ highlight(page_handle, {"kind": "point", "x": target_xy["x"], "y": target_xy["y"]})
360
+ except Exception:
361
+ pass
362
+
363
+ if _should_trigger(risk_cfg):
364
+ seq_phase = "navigate" if phase == "navigate" else ("after-interact" if phase == "after" else "default")
365
+ seq = _build_sequence(seq_phase, risk_cfg)
366
+ for atom in seq:
367
+ try:
368
+ atom(page_handle, scale)
369
+ except Exception:
370
+ pass
371
+
372
+ if opts.get("reposition", True) and phase != "after":
373
+ if target:
374
+ _reposition(page_handle, target, scale)
375
+ elif target_xy:
376
+ steps = random.randint(8, 16)
377
+ pts = _bezier_trajectory(_last_mouse_pos, {"x": target_xy["x"], "y": target_xy["y"]}, steps)
378
+ for pt in pts:
379
+ operate(page_handle, {"_skipHumanize": True, "action": "mouseMove", "x": round(pt["x"]), "y": round(pt["y"])})
380
+ time.sleep(random.uniform(0.01, 0.025))
381
+ _last_mouse_pos = {"x": target_xy["x"], "y": target_xy["y"]}
382
+
383
+
384
+ def detect_risk_level(url: str | None = None) -> str:
385
+ """检测 url 是否命中高风控域名."""
386
+ if not url:
387
+ return "medium"
388
+ lower = url.lower()
389
+ for domain in HIGH_RISK_DOMAINS:
390
+ if domain in lower:
391
+ return "high"
392
+ return "medium"