mooncat-browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +213 -0
  2. package/browser-op/backend/browserd.cjs +1004 -0
  3. package/browser-op/backend/rpc-client.cjs +64 -0
  4. package/browser-op/backend/state.cjs +51 -0
  5. package/browser-op/cdp/capture-inject.js +426 -0
  6. package/browser-op/cdp/capture-inject.ts +426 -0
  7. package/browser-op/cdp/capture-service.cjs +172 -0
  8. package/browser-op/cdp/chrome-launcher.cjs +370 -0
  9. package/browser-op/cdp/chrome-path.cjs +57 -0
  10. package/browser-op/cdp/state.cjs +89 -0
  11. package/browser-op/extension/extension-detect.cjs +228 -0
  12. package/browser-op/extension/server.cjs +197 -0
  13. package/browser-op/extension/service.cjs +228 -0
  14. package/browser-op/extension/state.cjs +78 -0
  15. package/browser-op/index.cjs +389 -0
  16. package/browser-op/package.json +17 -0
  17. package/browser-op/py/behavior.py +138 -0
  18. package/browser-op/py/browser.py +340 -0
  19. package/browser-op/py/captcha.py +115 -0
  20. package/browser-op/py/crawler.py +125 -0
  21. package/browser-op/py/examples/01_open_and_probe.py +48 -0
  22. package/browser-op/py/examples/02_reuse_and_probe.py +66 -0
  23. package/browser-op/py/examples/03_interact.py +66 -0
  24. package/browser-op/py/find.py +150 -0
  25. package/browser-op/py/honeypot.py +73 -0
  26. package/browser-op/py/humanize.py +392 -0
  27. package/browser-op/py/image.py +186 -0
  28. package/browser-op/py/interact.py +193 -0
  29. package/browser-op/py/markdown.py +38 -0
  30. package/browser-op/py/pyproject.toml +32 -0
  31. package/browser-op/py/ready.py +208 -0
  32. package/browser-op/py/scroll.py +180 -0
  33. package/browser-op/py/upload.py +103 -0
  34. package/browser-op/py/visual_target.py +47 -0
  35. package/browser-op/py/visualize.py +91 -0
  36. package/browser-op/state.cjs +63 -0
  37. package/browser-op/web/behavior.js +153 -0
  38. package/browser-op/web/browser.js +231 -0
  39. package/browser-op/web/captcha.js +85 -0
  40. package/browser-op/web/crawler.js +109 -0
  41. package/browser-op/web/find.js +147 -0
  42. package/browser-op/web/honeypot.js +68 -0
  43. package/browser-op/web/humanize.js +522 -0
  44. package/browser-op/web/image.js +177 -0
  45. package/browser-op/web/interact.js +169 -0
  46. package/browser-op/web/markdown.js +80 -0
  47. package/browser-op/web/ready.js +295 -0
  48. package/browser-op/web/scroll.js +167 -0
  49. package/browser-op/web/upload.js +116 -0
  50. package/browser-op/web/visual-runtime.inject.cjs +6 -0
  51. package/browser-op/webplater/.env.example +7 -0
  52. package/browser-op/webplater/ARCHITECTURE.md +102 -0
  53. package/browser-op/webplater/dist/chrome-mv3/assets/popup-BUZEUmsx.css +1 -0
  54. package/browser-op/webplater/dist/chrome-mv3/background.js +2 -0
  55. package/browser-op/webplater/dist/chrome-mv3/capture.js +310 -0
  56. package/browser-op/webplater/dist/chrome-mv3/chunks/_virtual_wxt-html-plugins-DPbbfBKe.js +1 -0
  57. package/browser-op/webplater/dist/chrome-mv3/chunks/offscreen-CFXYw9Mo.js +1 -0
  58. package/browser-op/webplater/dist/chrome-mv3/chunks/popup-C-lpxZZO.js +1 -0
  59. package/browser-op/webplater/dist/chrome-mv3/content-scripts/content.js +7 -0
  60. package/browser-op/webplater/dist/chrome-mv3/manifest.json +1 -0
  61. package/browser-op/webplater/dist/chrome-mv3/offscreen.html +16 -0
  62. package/browser-op/webplater/dist/chrome-mv3/popup.html +31 -0
  63. package/browser-op/webplater/entrypoints/background.ts +938 -0
  64. package/browser-op/webplater/entrypoints/content.ts +1150 -0
  65. package/browser-op/webplater/entrypoints/offscreen/index.html +15 -0
  66. package/browser-op/webplater/entrypoints/offscreen/main.ts +161 -0
  67. package/browser-op/webplater/entrypoints/popup/index.html +29 -0
  68. package/browser-op/webplater/entrypoints/popup/main.ts +61 -0
  69. package/browser-op/webplater/entrypoints/popup/style.css +100 -0
  70. package/browser-op/webplater/lib/snapshot.ts +352 -0
  71. package/browser-op/webplater/package.json +29 -0
  72. package/browser-op/webplater/pnpm-lock.yaml +3411 -0
  73. package/browser-op/webplater/public/capture.js +310 -0
  74. package/browser-op/webplater/scripts/publish-extension.mjs +176 -0
  75. package/browser-op/webplater/tsconfig.json +19 -0
  76. package/browser-op/webplater/wxt.config.ts +34 -0
  77. package/dist/actions.md +102 -0
  78. package/dist/cli.d.ts +2 -0
  79. package/dist/cli.d.ts.map +1 -0
  80. package/dist/cli.js +278 -0
  81. package/dist/cli.js.map +1 -0
  82. package/dist/client.d.ts +94 -0
  83. package/dist/client.d.ts.map +1 -0
  84. package/dist/client.js +277 -0
  85. package/dist/client.js.map +1 -0
  86. package/dist/config.d.ts +61 -0
  87. package/dist/config.d.ts.map +1 -0
  88. package/dist/config.js +119 -0
  89. package/dist/config.js.map +1 -0
  90. package/dist/protocol.d.ts +195 -0
  91. package/dist/protocol.d.ts.map +1 -0
  92. package/dist/protocol.js +11 -0
  93. package/dist/protocol.js.map +1 -0
  94. package/dist/server.d.ts +66 -0
  95. package/dist/server.d.ts.map +1 -0
  96. package/dist/server.js +259 -0
  97. package/dist/server.js.map +1 -0
  98. package/package.json +78 -0
  99. package/schemas/browser.clearCookies.schema.json +13 -0
  100. package/schemas/browser.close.schema.json +9 -0
  101. package/schemas/browser.getCookies.schema.json +13 -0
  102. package/schemas/browser.getDownload.schema.json +15 -0
  103. package/schemas/browser.health.schema.json +9 -0
  104. package/schemas/browser.listDownloads.schema.json +16 -0
  105. package/schemas/browser.listTabs.schema.json +9 -0
  106. package/schemas/browser.newTab.schema.json +15 -0
  107. package/schemas/browser.open.schema.json +15 -0
  108. package/schemas/browser.operate.schema.json +15 -0
  109. package/schemas/browser.reuseTab.schema.json +15 -0
  110. package/schemas/browser.setCookies.schema.json +15 -0
  111. package/schemas/browser.waitFor.schema.json +15 -0
  112. package/schemas/browser.waitForDownload.schema.json +15 -0
  113. package/skills/browser/SKILL.md +110 -0
  114. package/skills/browser/references/collect.md +163 -0
  115. package/skills/browser/references/high-risk.md +161 -0
  116. package/skills/browser/references/operate-actions.md +92 -0
  117. package/skills/browser/references/probing.md +302 -0
@@ -0,0 +1,186 @@
1
+ """web/image — 图片捕获 (多策略, 路由无关, 全走 operate).
2
+
3
+ 忠实复刻 lib/web/image.js。
4
+ 四个策略 + 保存:
5
+ - img_from_screenshot: 用 operate screenshot (**仅视觉确认, 不取资源**)
6
+ - img_from_element: evaluate 提取 src, 再 fetch_as_base64
7
+ - fetch_as_base64: evaluate 内 fetch (共享 cookie jar)
8
+ - capture_images: 扫描页面所有 img + background-image, 逐个 fetch
9
+
10
+ 依赖: browser (operate), os (file save)
11
+ """
12
+ from __future__ import annotations
13
+ import base64
14
+ import json
15
+ import os
16
+ from typing import Any
17
+
18
+ import sys
19
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
20
+ from browser import operate # noqa: E402
21
+
22
+
23
+ def img_from_screenshot(page_handle: dict, options: dict | None = None) -> dict:
24
+ """截图取图 (可视区域或元素).
25
+
26
+ options:
27
+ selector: 元素截图 selector, 不传则整页可视区
28
+ 返回: {data(str base64), mimeType}
29
+ """
30
+ if not page_handle:
31
+ raise ValueError("img_from_screenshot: page_handle required")
32
+ r = operate(page_handle, {"action": "screenshot"})
33
+ data_url = ""
34
+ if isinstance(r, dict):
35
+ data_url = r.get("dataUrl", "") or (r.get("result") or {}).get("dataUrl", "")
36
+ if not data_url:
37
+ raise ValueError("img_from_screenshot: no dataUrl")
38
+ m = data_url.split(";base64,")
39
+ if len(m) < 2:
40
+ raise ValueError("img_from_screenshot: invalid dataUrl")
41
+ mime_type = m[0].replace("data:", "")
42
+ return {"data": m[1], "mimeType": mime_type}
43
+
44
+
45
+ def fetch_as_base64(page_handle: dict, url: str) -> dict:
46
+ """通过页面 fetch 重新请求图片为 base64 (共享 cookie).
47
+
48
+ 先 fetch; fetch 报错 (CORS/防盗链) 才 fallback 到 crossOrigin 重载.
49
+ 返回: {data(str base64), mimeType}
50
+ """
51
+ if not page_handle:
52
+ raise ValueError("fetch_as_base64: page_handle required")
53
+ if not url:
54
+ raise ValueError("fetch_as_base64: url required")
55
+
56
+ FETCH_SOURCE = f"""async (u) => {{
57
+ try {{
58
+ const res = await fetch(u, {{ credentials: "include" }});
59
+ const ct = res.headers.get("content-type") || "application/octet-stream";
60
+ const buf = await res.arrayBuffer();
61
+ const bytes = new Uint8Array(buf);
62
+ let bin = "";
63
+ for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
64
+ return {{ data: btoa(bin), mimeType: ct }};
65
+ }} catch (e) {{
66
+ return {{ error: String((e && e.message) || e) }};
67
+ }}
68
+ }}"""
69
+
70
+ CROSS_ORIGIN_EXPORT = """async (u) => {
71
+ return await new Promise(resolve => {
72
+ const im = new Image();
73
+ im.crossOrigin = "anonymous";
74
+ im.onload = () => {
75
+ try {
76
+ const c = document.createElement("canvas");
77
+ c.width = im.naturalWidth; c.height = im.naturalHeight;
78
+ c.getContext("2d").drawImage(im, 0, 0);
79
+ const url = c.toDataURL("image/png");
80
+ if (url.length < 100) return resolve({ error: "toDataURL empty (tainted)" });
81
+ resolve({ data: url.split(",")[1], mimeType: "image/png" });
82
+ } catch (e) { resolve({ error: "canvas: " + String((e && e.message) || e) }); }
83
+ };
84
+ im.onerror = () => resolve({ error: "reload failed (CDN denied CORS)" });
85
+ im.src = u;
86
+ });
87
+ }"""
88
+
89
+ r = operate(page_handle, {"action": "evaluate", "source": FETCH_SOURCE, "args": url})
90
+ d = r
91
+ if isinstance(d, dict) and "value" in d:
92
+ d = d["value"]
93
+
94
+ if d and d.get("data") and "error" not in d:
95
+ return {"data": d["data"], "mimeType": d["mimeType"]}
96
+
97
+ if d and d.get("error"):
98
+ r2 = operate(page_handle, {"action": "evaluate", "source": CROSS_ORIGIN_EXPORT, "args": url})
99
+ d2 = r2
100
+ if isinstance(d2, dict) and "value" in d2:
101
+ d2 = d2["value"]
102
+ if d2 and d2.get("data") and "error" not in d2:
103
+ return {"data": d2["data"], "mimeType": d2["mimeType"]}
104
+ err_msg = d["error"]
105
+ d2_err = d2.get("error", "") if d2 else ""
106
+ raise RuntimeError(f"fetch_as_base64: {err_msg}" + (f" (crossOrigin fallback also failed: {d2_err})" if d2_err else ""))
107
+
108
+ raise RuntimeError(f"fetch_as_base64: empty response from {url}")
109
+
110
+
111
+ def img_from_element(page_handle: dict, selector: str, captured: list | None = None) -> dict:
112
+ """从 img 元素取图: 先提取 src, 再 fetch_as_base64.
113
+
114
+ captured: capture_images 的缓存 (优先用)
115
+ """
116
+ if not page_handle:
117
+ raise ValueError("img_from_element: page_handle required")
118
+ if not selector:
119
+ raise ValueError("img_from_element: selector required")
120
+
121
+ SRC_SOURCE = f"""(s) => {{
122
+ const el = document.querySelector(s);
123
+ if (!el) return null;
124
+ return el.src || el.getAttribute("src") || el.getAttribute("data-src") || el.getAttribute("href") || null;
125
+ }}"""
126
+
127
+ src_r = operate(page_handle, {"action": "evaluate", "source": SRC_SOURCE, "args": selector})
128
+ src = None
129
+ if isinstance(src_r, str):
130
+ src = src_r
131
+ elif isinstance(src_r, dict):
132
+ src = src_r.get("value") or src_r.get("result") or None
133
+ if not src:
134
+ raise ValueError(f"img_from_element: no src for {selector}")
135
+
136
+ return fetch_as_base64(page_handle, src)
137
+
138
+
139
+ def capture_images(page_handle: dict, match_fn: Any = None) -> list[dict]:
140
+ """扫描页面所有图片, 返回 [{url,data,mimeType}].
141
+
142
+ match_fn: URL 过滤函数 (url -> bool)
143
+ """
144
+ if not page_handle:
145
+ raise ValueError("capture_images: page_handle required")
146
+
147
+ SCAN_SRC = """(() => {
148
+ const urls = new Set();
149
+ document.querySelectorAll("img[src]").forEach((el) => { try { if (el.src) urls.add(el.src) } catch {} });
150
+ document.querySelectorAll("[data-src]").forEach((el) => { try { const s = el.getAttribute("data-src"); if (s) urls.add(new URL(s, location.href).href) } catch {} });
151
+ document.querySelectorAll("*").forEach((el) => {
152
+ const bg = getComputedStyle(el).backgroundImage;
153
+ if (bg && bg !== "none") {
154
+ const m = bg.matchAll(/url\\(["']?([^"')]+)["']?\\)/g);
155
+ for (const x of m) { try { urls.add(new URL(x[1], location.href).href) } catch {} }
156
+ }
157
+ });
158
+ return [...urls];
159
+ })()"""
160
+
161
+ urls_r = operate(page_handle, {"action": "evaluate", "source": SCAN_SRC})
162
+ urls = []
163
+ if isinstance(urls_r, list):
164
+ urls = urls_r
165
+ elif isinstance(urls_r, dict):
166
+ urls = urls_r.get("value") or urls_r.get("result") or []
167
+
168
+ out = []
169
+ for u in urls:
170
+ if match_fn and not match_fn(u):
171
+ continue
172
+ try:
173
+ img = fetch_as_base64(page_handle, u)
174
+ if img and img.get("data"):
175
+ out.append({"url": u, "data": img["data"], "mimeType": img["mimeType"]})
176
+ except Exception:
177
+ pass
178
+ return out
179
+
180
+
181
+ def save_image(data: str, mime_type: str, save_path: str) -> dict:
182
+ """保存 base64 到文件."""
183
+ buf = base64.b64decode(data)
184
+ with open(save_path, "wb") as f:
185
+ f.write(buf)
186
+ return {"savePath": save_path, "bytes": len(buf), "mimeType": mime_type}
@@ -0,0 +1,193 @@
1
+ """web/interact — 页面可视化 overlay (路由无关, 纯 fixed div, 不碰业务 DOM).
2
+
3
+ 忠实复刻 lib/web/interact.js。
4
+ highlight/toast/status —— 注入页面 overlay, 仅 headed 可见。
5
+ visualMark —— eval 内部调用的全局标记函数 (数据源统一: eval 内的元素直接标记)。
6
+
7
+ 依赖: browser (operate)
8
+ """
9
+ from __future__ import annotations
10
+ import json
11
+ from typing import Any
12
+
13
+ import sys
14
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
15
+ from browser import operate # noqa: E402
16
+
17
+ OVERLAY_ID = "bee-interact-overlay"
18
+ STYLE_ID = "bee-interact-style"
19
+
20
+ STYLE_CSS = """
21
+ .bee-tk{position:fixed;z-index:2147483647;font-family:system-ui,Arial,sans-serif;font-size:13px;color:#fff;background:rgba(30,30,30,.92);padding:8px 12px;border-radius:6px;box-shadow:0 2px 12px rgba(0,0,0,.3);max-width:60vw}
22
+ .bee-tk.b-success{background:rgba(22,101,52,.95)} .bee-tk.b-warn{background:rgba(180,83,9,.95)} .bee-tk.b-error{background:rgba(153,27,27,.95)}
23
+ .bee-status{top:12px;right:12px}
24
+ .bee-toast{bottom:16px;right:16px;transition:opacity .3s}
25
+ .bee-hl-box{position:fixed;outline:4px solid #ff0000;outline-offset:3px;box-shadow:0 0 0 7px rgba(255,0,0,.45);z-index:2147483647;pointer-events:none;border-radius:2px}
26
+ """
27
+
28
+ # ─── eval 内部调用的 visualMark (页面侧, 复用 highlight overlay 逻辑) ───
29
+ # 必须在 ENSURE_STYLE_SOURCE 之前定义 (后者引用 .toString())
30
+ # 支持输入: element / {x,y} / {x,y,w,h} / [el1, el2]
31
+ # 和 highlight 同一套 overlay (fixed div, 3秒后自动消失, 不碰业务 DOM)
32
+ VISUAL_MARK_FN = """function (target, label) {
33
+ document.querySelectorAll('.bee-hl-box').forEach(function (e) { e.remove() })
34
+ function drawRect (x, y, w, h) {
35
+ if (w <= 0 || h <= 0) return
36
+ var box = document.createElement('div')
37
+ box.className = 'bee-hl-box'
38
+ box.style.cssText = 'left:' + x + 'px;top:' + y + 'px;width:' + w + 'px;height:' + h + 'px'
39
+ document.body.appendChild(box)
40
+ setTimeout(function () { box.style.transition = 'opacity 0.3s'; box.style.opacity = '0'; setTimeout(function () { box.remove() }, 300) }, 3000)
41
+ }
42
+ function markOne (t) {
43
+ if (!t) return
44
+ if (t instanceof Element) {
45
+ var r = t.getBoundingClientRect()
46
+ drawRect(r.left, r.top, r.width, r.height)
47
+ } else if (typeof t === 'object' && t.x != null) {
48
+ if (t.w != null) drawRect(t.x, t.y, t.w, t.h)
49
+ else drawRect(t.x - 15, t.y - 8, 30, 16)
50
+ }
51
+ }
52
+ if (Array.isArray(target)) target.forEach(markOne)
53
+ else markOne(target)
54
+ }"""
55
+
56
+ # 注入样式 + visualMark 全局函数 (幂等)
57
+ ENSURE_STYLE_SOURCE = """(() => {
58
+ if (window.visualMark) return null
59
+ if (!document.getElementById('""" + STYLE_ID + """')) {
60
+ var s = document.createElement('style')
61
+ s.id = '""" + STYLE_ID + """'
62
+ s.textContent = """ + json.dumps(STYLE_CSS) + """
63
+ document.head.appendChild(s)
64
+ }
65
+ window.visualMark = """ + VISUAL_MARK_FN + """
66
+ return null
67
+ })()"""
68
+
69
+
70
+ async def ensure_style(page_handle: dict) -> None:
71
+ await operate(page_handle, {"action": "evaluate", "source": ENSURE_STYLE_SOURCE, "_skipVisualize": True, "_skipHumanize": True})
72
+
73
+
74
+ # ─── highlight: 外部解析目标 (内置操作用) ───
75
+
76
+ # 页面侧 overlay 逻辑 (纯 fixed div, 不碰业务 DOM)
77
+ HIGHLIGHT_OVERLAY = """function (target) {
78
+ document.querySelectorAll('.bee-hl-box').forEach(function (e) { e.remove() })
79
+ var rects = []
80
+ function drawRect (x, y, w, h) {
81
+ if (w <= 0 || h <= 0) return false
82
+ var box = document.createElement('div')
83
+ box.className = 'bee-hl-box'
84
+ box.style.cssText = 'left:' + x + 'px;top:' + y + 'px;width:' + w + 'px;height:' + h + 'px'
85
+ document.body.appendChild(box)
86
+ setTimeout(function () { box.style.transition = 'opacity 0.3s'; box.style.opacity = '0'; setTimeout(function () { box.remove() }, 300) }, 3000)
87
+ rects.push({ x: x, y: y, w: w, h: h })
88
+ return true
89
+ }
90
+ function resolveSelector (sel) {
91
+ try { var el = document.querySelector(sel); if (!el) return null; var r = el.getBoundingClientRect(); return { x: r.left, y: r.top, w: r.width, h: r.height } } catch (e) { return null }
92
+ }
93
+ function resolveText (text) {
94
+ var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null)
95
+ var node
96
+ while ((node = walker.nextNode())) {
97
+ if (node.textContent && node.textContent.indexOf(text) >= 0) {
98
+ var range = document.createRange(); range.selectNodeContents(node)
99
+ var cr = range.getClientRects()
100
+ for (var i = 0; i < cr.length; i++) { if (cr[i].width > 0 && cr[i].height > 0) return { x: cr[i].left, y: cr[i].top, w: cr[i].width, h: cr[i].height } }
101
+ }
102
+ }
103
+ var all = [].slice.call(document.querySelectorAll('*'))
104
+ var hit = all.find(function (e) { return e.children.length === 0 && (e.innerText || '').trim() === text })
105
+ if (!hit) hit = all.find(function (e) { return (e.innerText || '').indexOf(text) >= 0 })
106
+ if (!hit) return null
107
+ var r = hit.getBoundingClientRect()
108
+ return { x: r.left, y: r.top, w: r.width, h: r.height }
109
+ }
110
+ try {
111
+ if (target.kind === 'selector') { var rect = resolveSelector(target.selector); if (rect && drawRect(rect.x, rect.y, rect.w, rect.h)) return { ok: true, rects: rects }; return { ok: false, reason: 'selector not found: ' + target.selector } }
112
+ if (target.kind === 'text') { var rect2 = resolveText(target.text); if (rect2 && drawRect(rect2.x, rect2.y, rect2.w, rect2.h)) return { ok: true, rects: rects }; return { ok: false, reason: 'text not found: ' + target.text } }
113
+ if (target.kind === 'point') { drawRect(target.x - 15, target.y - 8, 30, 16); return { ok: true, rects: rects } }
114
+ if (target.kind === 'rect') { drawRect(target.x, target.y, target.w, target.h); return { ok: true, rects: rects } }
115
+ if (target.kind === 'pair') { var ok1 = false, ok2 = false; var r1 = resolveSelector(target.source); if (r1) { drawRect(r1.x, r1.y, r1.w, r1.h); ok1 = true } var r2 = resolveSelector(target.target); if (r2) { drawRect(r2.x, r2.y, r2.w, r2.h); ok2 = true } return { ok: ok1 && ok2, rects: rects, reason: (!ok1 ? 'source ' : '') + (!ok2 ? 'target' : '') } }
116
+ return { ok: false, reason: 'unknown kind: ' + target.kind }
117
+ } catch (e) { return { ok: false, reason: e.message } }
118
+ }"""
119
+
120
+
121
+ async def highlight(page_handle: dict, visual_target: dict | None, options: dict | None = None) -> dict:
122
+ """高亮目标 (纯 overlay, 不改业务 DOM class)."""
123
+ if not page_handle:
124
+ raise ValueError("highlight: page_handle required")
125
+ if not visual_target:
126
+ return {"ok": False, "reason": "no target"}
127
+ await ensure_style(page_handle)
128
+ result = await operate(page_handle, {
129
+ "action": "evaluate",
130
+ "source": HIGHLIGHT_OVERLAY,
131
+ "args": visual_target,
132
+ "_skipVisualize": True,
133
+ "_skipHumanize": True,
134
+ })
135
+ r = result if isinstance(result, dict) else {"ok": False, "reason": "no result"}
136
+ ms = (options or {}).get("ms", 1000)
137
+ if r.get("ok") and ms and ms > 0:
138
+ import asyncio
139
+ await asyncio.sleep(ms / 1000)
140
+ return r
141
+
142
+
143
+ async def clear(page_handle: dict) -> None:
144
+ """清除所有 overlay."""
145
+ if not page_handle:
146
+ raise ValueError("clear: page_handle required")
147
+ await operate(page_handle, {
148
+ "action": "evaluate",
149
+ "source": '(() => { document.querySelectorAll(".bee-tk,.bee-hl-box").forEach(function(e){e.remove()}); return null })()',
150
+ "_skipVisualize": True, "_skipHumanize": True,
151
+ })
152
+ return None
153
+
154
+
155
+ async def toast(page_handle: dict, message: str, level_or_opts: str | dict = "info") -> None:
156
+ """底部右下角 toast."""
157
+ if not page_handle:
158
+ raise ValueError("toast: page_handle required")
159
+ level = level_or_opts.get("level", "info") if isinstance(level_or_opts, dict) else level_or_opts
160
+ ms = level_or_opts.get("ms", 2500) if isinstance(level_or_opts, dict) else 2500
161
+ await ensure_style(page_handle)
162
+ cls = {"success": "b-success", "warn": "b-warn", "error": "b-error"}.get(level, "")
163
+ src = """(() => {
164
+ const e = document.createElement("div")
165
+ e.className = "bee-tk bee-toast """ + cls + """"
166
+ e.textContent = """ + json.dumps(message) + """
167
+ document.body.appendChild(e)
168
+ setTimeout(() => { e.style.opacity = "0"; setTimeout(() => e.remove(), 300) }, """ + str(ms) + """)
169
+ return null
170
+ })()"""
171
+ await operate(page_handle, {"action": "evaluate", "source": src, "_skipVisualize": True, "_skipHumanize": True})
172
+ return None
173
+
174
+
175
+ async def status(page_handle: dict, text: str, step: str | None = None) -> None:
176
+ """右上角状态栏."""
177
+ if not page_handle:
178
+ raise ValueError("status: page_handle required")
179
+ await ensure_style(page_handle)
180
+ content = f"{step}: {text}" if step else str(text)
181
+ src = """(() => {
182
+ let e = document.getElementById('""" + OVERLAY_ID + """')
183
+ if (!e) { e = document.createElement("div"); e.id = '""" + OVERLAY_ID + """'; e.className = "bee-tk bee-status"; document.body.appendChild(e) }
184
+ e.textContent = """ + json.dumps(content) + """
185
+ return null
186
+ })()"""
187
+ await operate(page_handle, {"action": "evaluate", "source": src, "_skipVisualize": True, "_skipHumanize": True})
188
+ return None
189
+
190
+
191
+ def visual_mark_fn_source() -> str:
192
+ """返回 visualMark 函数源码 (供其他模块 eval 注入用)."""
193
+ return VISUAL_MARK_FN
@@ -0,0 +1,38 @@
1
+ """web/markdown — HTML 转 Markdown (纯函数, 路由无关).
2
+
3
+ 忠实复刻 lib/web/markdown.js。
4
+ 不依赖任何浏览器通道。HTML 字符串进来, Markdown 出去。
5
+ 在 Python 中用 html2text 或 markdownify 替代 turndown + cheerio。
6
+
7
+ 依赖: markdownify (pip install markdownify)
8
+ """
9
+ from __future__ import annotations
10
+ import re
11
+ from typing import Any
12
+
13
+
14
+ def html_to_markdown(html: str) -> dict:
15
+ """将 HTML 字符串转换为 Markdown.
16
+
17
+ 返回: {markdown: str, title: str | None}
18
+ """
19
+ title = None
20
+
21
+ # 提取 title
22
+ m = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
23
+ if m:
24
+ title = m.group(1).strip()
25
+
26
+ # 删除 script/style 标签
27
+ cleaned = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.IGNORECASE | re.DOTALL)
28
+
29
+ try:
30
+ import markdownify
31
+ md = markdownify.markdownify(cleaned, heading_style="ATX", strip=["script", "style"])
32
+ except ImportError:
33
+ raise RuntimeError("web/markdown 需要 markdownify, 请先安装: pip install markdownify")
34
+
35
+ # 截断 data URI
36
+ md = re.sub(r'!\[([^\]]*)\]\(data:image/[^;]+;base64,[^\)]+\)', r'![\1](data:data:image/...)', md)
37
+
38
+ return {"markdown": md.strip(), "title": title}
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "mooncat-browser-client"
3
+ version = "0.1.0"
4
+ description = "Python client for the local browser daemon"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "requests>=2.32.0",
8
+ ]
9
+
10
+ [build-system]
11
+ requires = ["hatchling"]
12
+ build-backend = "hatchling.build"
13
+
14
+ [tool.hatch.build.targets.wheel]
15
+ py-modules = [
16
+ "behavior",
17
+ "browser",
18
+ "captcha",
19
+ "crawler",
20
+ "find",
21
+ "honeypot",
22
+ "humanize",
23
+ "image",
24
+ "interact",
25
+ "markdown",
26
+ "ready",
27
+ "scroll",
28
+ "upload",
29
+ "visualize",
30
+ "visual_target",
31
+ ]
32
+
@@ -0,0 +1,208 @@
1
+ """web/ready — 页面就绪等待 + 网页内弹窗清理 (路由无关).
2
+
3
+ 忠实复刻 lib/web/ready.js。
4
+ wait_for_ready() 等待采集前提
5
+ dismiss_popups() 关闭网页内 DOM 弹窗
6
+
7
+ 依赖: browser (operate), find
8
+ """
9
+ from __future__ import annotations
10
+ import json
11
+ import time
12
+ import random
13
+
14
+ import sys
15
+ sys.path.insert(0, __file__.rsplit("\\", 1)[0] if "\\" in __file__ else __file__.rsplit("/", 1)[0])
16
+ from browser import operate
17
+ from find import find_element
18
+
19
+ POPUP_CLOSE_SELECTORS = [
20
+ '[class*="close"][aria-label]', '[class*="Close"]',
21
+ 'button[class*="close"]', 'a[class*="close"]',
22
+ '.modal-close', '.dialog-close', '.popup-close',
23
+ '[class*="modal"] [class*="close"]', '[class*="dialog"] [class*="close"]',
24
+ '[class*="icon-close"]', '[class*="iconClose"]', '.icon-close',
25
+ ]
26
+
27
+ CONFIRM_TEXTS = ["我知道了", "知道了", "确定", "关闭", "不再提示", "下次再说", "残忍拒绝", "暂不", "取消"]
28
+
29
+
30
+ def wait_for_ready(page_handle, opts=None):
31
+ # (same as before - uses snapshot/evaluate, no f-string issue)
32
+ opts = opts or {}
33
+ ready_text = opts.get("readyText", [])
34
+ ready_selector = opts.get("readySelector")
35
+ timeout_ms = opts.get("timeoutMs", 30000)
36
+ poll_ms = opts.get("pollMs", 2000)
37
+ reload_max = opts.get("reloadMax", 1)
38
+ settle_ms = opts.get("settleMs", 2000)
39
+
40
+ def check():
41
+ if ready_text:
42
+ snap = operate(page_handle, {"action": "snapshot", "_skipVisualize": True})
43
+ yaml = ""
44
+ if isinstance(snap, dict):
45
+ yaml = snap.get("yaml", "")
46
+ for t in ready_text:
47
+ if t in yaml:
48
+ return {"ready": True, "via": "snapshot:" + t}
49
+ body = operate(page_handle, {"action": "evaluate", "source": "() => (document.body && document.body.innerText) || \"\"", "_skipVisualize": True})
50
+ body_text = ""
51
+ if isinstance(body, str):
52
+ body_text = body
53
+ elif isinstance(body, dict):
54
+ body_text = body.get("value", "")
55
+ for t in ready_text:
56
+ if t in body_text:
57
+ return {"ready": True, "via": "text:" + t}
58
+ if ready_selector:
59
+ c = operate(page_handle, {"action": "count", "selector": ready_selector, "_skipVisualize": True})
60
+ count = 0
61
+ if isinstance(c, dict):
62
+ count = c.get("count", 0)
63
+ elif isinstance(c, (int, float)):
64
+ count = int(c)
65
+ if count > 0:
66
+ return {"ready": True, "via": "selector:" + ready_selector}
67
+ return {"ready": False}
68
+
69
+ attempts = 0
70
+ for reload_cnt in range(reload_max + 1):
71
+ deadline = time.time() + timeout_ms / 1000
72
+ while time.time() < deadline:
73
+ attempts += 1
74
+ r = check()
75
+ if r.get("ready"):
76
+ if settle_ms > 0:
77
+ time.sleep(settle_ms / 1000)
78
+ return {"ready": True, "via": r["via"], "attempts": attempts}
79
+ time.sleep(poll_ms / 1000)
80
+ if reload_cnt < reload_max:
81
+ operate(page_handle, {"action": "reload", "_skipVisualize": True})
82
+ time.sleep(3)
83
+
84
+ raise RuntimeError(
85
+ "waitForReady \u8d85\u65f6: \u672a\u7b49\u5230\u5c31\u7eea\u6761\u4ef6 text="
86
+ + json.dumps(ready_text) + " sel=" + (ready_selector or "\u65e0")
87
+ + " (\u5c1d\u8bd5 " + str(attempts) + " \u6b21, \u5237\u65b0 " + str(reload_max) + " \u6b21)"
88
+ )
89
+
90
+
91
+ _JS_DISMISS = (
92
+ "(() => {"
93
+ "const t=%s;"
94
+ "const s=%s;"
95
+ "const m=[];let d=0;"
96
+ "const v=e=>{if(!e)return 0;const r=e.getBoundingClientRect();const c=getComputedStyle(e);return r.w>0&&r.h>0&&c.v!='hidden'&&c.d!='none'&&c.o!='0'};"
97
+ "const c=(e,mm)=>{if(v(e)){try{e.click();d++;m.push(mm)}catch(e){}return 1}return 0};"
98
+ "for(const x of s){let el;try{el=Array.from(document.querySelectorAll(x))}catch(e){continue}for(const e of el){if(c(e,'s:'+x))return{d,m}}}"
99
+ "const b=Array.from(document.querySelectorAll('button,a,span,div'));"
100
+ "for(const e of b){const t=(e.innerText||e.textContent||'').trim();if(t&&t.length<=6&&t.indexOf(t)>=0){if(c(e,'t:'+t))return{d,m}}}"
101
+ "const k=Array.from(document.querySelectorAll('[class*=\"modal\"],[class*=\"popup\"],[class*=\"dialog\"],[class*=\"Mask\"],[class*=\"overlay\"]'));"
102
+ "for(const e of k){if(v(e)&&!e.querySelector('button,a')){try{e.style.display='none';d++;m.push('h')}catch(e){}}}"
103
+ "return{d,m}"
104
+ "})()"
105
+ )
106
+
107
+
108
+ def dismiss_popups(page_handle, opts=None):
109
+ opts = opts or {}
110
+ texts = CONFIRM_TEXTS + opts.get("extraCloseTexts", [])
111
+ sels = POPUP_CLOSE_SELECTORS + opts.get("extraSelectors", [])
112
+ source = _JS_DISMISS % (json.dumps(texts), json.dumps(sels))
113
+ result = operate(page_handle, {"action": "evaluate", "source": source, "_skipVisualize": True})
114
+ if isinstance(result, dict):
115
+ return {"dismissed": result.get("d", 0), "methods": result.get("m", [])}
116
+ return {"dismissed": 0, "methods": []}
117
+
118
+
119
+ # ─── scroll: 滚动封装 (SPA 页面数据常需滚动才渲染) ───
120
+ # 模拟人类: 分步 scrollBy + 随机停顿 (不是原生 smooth, 原生平滑无停顿不触发懒加载)
121
+
122
+
123
+ def human_scroll_to_y(page_handle, target_y, max_steps=200):
124
+ """分步滚动到目标 scrollY (人类化: 每步 30~60px, 30~60ms 停顿)."""
125
+ source = """(targetY) => new Promise((resolve) => {
126
+ const start = window.scrollY || window.pageYOffset || 0
127
+ const dir = targetY > start ? 1 : -1
128
+ let steps = 0
129
+ const tick = () => {
130
+ const cur = window.scrollY || window.pageYOffset || 0
131
+ steps++
132
+ if (steps > """ + str(max_steps) + """ || (dir > 0 ? cur >= targetY - 5 : cur <= targetY + 5)) { resolve(); return }
133
+ const step = 30 + Math.floor(Math.random() * 30)
134
+ window.scrollBy(0, dir * step)
135
+ setTimeout(tick, 30 + Math.random() * 30)
136
+ }
137
+ tick()
138
+ })"""
139
+ operate(page_handle, {"action": "evaluate", "source": source, "args": target_y, "_skipVisualize": True})
140
+
141
+
142
+ def scroll_to_text(page_handle, text, opts=None):
143
+ """滚动到指定文本所在的元素 (SPA 最稳定位). 先拿到目标元素 Y, 再人类化分步滚过去."""
144
+ opts = opts or {}
145
+ exact = bool(opts.get("exact"))
146
+ block = opts.get("block", "center")
147
+
148
+ def find_pos():
149
+ el = find_element(page_handle, {"text": text, "exact": exact})
150
+ if not el.get("found"):
151
+ return {"found": False}
152
+ vh = operate(page_handle, {"action": "evaluate", "source": "()=>window.innerHeight", "_skipVisualize": True})
153
+ vh = vh if isinstance(vh, (int, float)) else 800
154
+ offset = vh / 2
155
+ if block == "start":
156
+ offset = 50
157
+ elif block == "end":
158
+ offset = vh - 50
159
+ return {"found": True, "targetY": max(0, el.get("y", 0) + el.get("height", 0) / 2 - offset)}
160
+
161
+ pos = find_pos()
162
+ if not pos.get("found"):
163
+ for _ in range(60):
164
+ operate(page_handle, {"action": "evaluate", "source": "() => { window.scrollBy(0, 40 + Math.floor(Math.random()*30)); return null }", "_skipVisualize": True})
165
+ time.sleep(0.06 + random.random() * 0.06)
166
+ pos = find_pos()
167
+ if pos.get("found"):
168
+ break
169
+ at_bottom = False
170
+ try:
171
+ at_bottom = operate(page_handle, {"action": "evaluate", "source": "()=>(window.innerHeight + (window.scrollY||0) + 5) >= document.body.scrollHeight", "_skipVisualize": True})
172
+ except Exception:
173
+ pass
174
+ if at_bottom:
175
+ break
176
+ if not pos.get("found"):
177
+ return {"found": False, "scrolled": False}
178
+ human_scroll_to_y(page_handle, pos["targetY"])
179
+ time.sleep(0.6)
180
+ return {"found": True, "scrolled": True}
181
+
182
+
183
+ def scroll_to(page_handle, selector, opts=None):
184
+ """滚动到 selector 元素 (人类化分步)."""
185
+ opts = opts or {}
186
+ block = opts.get("block", "center")
187
+ el = find_element(page_handle, {"selector": selector})
188
+ if not el.get("found"):
189
+ return {"found": False, "scrolled": False}
190
+ vh = operate(page_handle, {"action": "evaluate", "source": "()=>window.innerHeight", "_skipVisualize": True})
191
+ vh = vh if isinstance(vh, (int, float)) else 800
192
+ offset = vh / 2
193
+ if block == "start":
194
+ offset = 50
195
+ elif block == "end":
196
+ offset = vh - 50
197
+ target_y = max(0, el.get("y", 0) + el.get("height", 0) / 2 - offset)
198
+ human_scroll_to_y(page_handle, target_y)
199
+ return {"found": True, "scrolled": True}
200
+
201
+
202
+ def scroll_to_end(page_handle, to="top"):
203
+ """滚动到页顶/页底 (人类化分步)."""
204
+ if to == "bottom":
205
+ operate(page_handle, {"action": "evaluate", "source": "() => new Promise((resolve) => { let last=-1; const t=setInterval(()=>{ window.scrollBy(0,80+Math.random()*60); const h=document.body.scrollHeight; if(h===last){clearInterval(t);resolve()} last=h }, 80+Math.random()*60) })", "_skipVisualize": True})
206
+ else:
207
+ human_scroll_to_y(page_handle, 0)
208
+ return True