mooncat-browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +213 -0
  2. package/browser-op/backend/browserd.cjs +1004 -0
  3. package/browser-op/backend/rpc-client.cjs +64 -0
  4. package/browser-op/backend/state.cjs +51 -0
  5. package/browser-op/cdp/capture-inject.js +426 -0
  6. package/browser-op/cdp/capture-inject.ts +426 -0
  7. package/browser-op/cdp/capture-service.cjs +172 -0
  8. package/browser-op/cdp/chrome-launcher.cjs +370 -0
  9. package/browser-op/cdp/chrome-path.cjs +57 -0
  10. package/browser-op/cdp/state.cjs +89 -0
  11. package/browser-op/extension/extension-detect.cjs +228 -0
  12. package/browser-op/extension/server.cjs +197 -0
  13. package/browser-op/extension/service.cjs +228 -0
  14. package/browser-op/extension/state.cjs +78 -0
  15. package/browser-op/index.cjs +389 -0
  16. package/browser-op/package.json +17 -0
  17. package/browser-op/py/behavior.py +138 -0
  18. package/browser-op/py/browser.py +340 -0
  19. package/browser-op/py/captcha.py +115 -0
  20. package/browser-op/py/crawler.py +125 -0
  21. package/browser-op/py/examples/01_open_and_probe.py +48 -0
  22. package/browser-op/py/examples/02_reuse_and_probe.py +66 -0
  23. package/browser-op/py/examples/03_interact.py +66 -0
  24. package/browser-op/py/find.py +150 -0
  25. package/browser-op/py/honeypot.py +73 -0
  26. package/browser-op/py/humanize.py +392 -0
  27. package/browser-op/py/image.py +186 -0
  28. package/browser-op/py/interact.py +193 -0
  29. package/browser-op/py/markdown.py +38 -0
  30. package/browser-op/py/pyproject.toml +32 -0
  31. package/browser-op/py/ready.py +208 -0
  32. package/browser-op/py/scroll.py +180 -0
  33. package/browser-op/py/upload.py +103 -0
  34. package/browser-op/py/visual_target.py +47 -0
  35. package/browser-op/py/visualize.py +91 -0
  36. package/browser-op/state.cjs +63 -0
  37. package/browser-op/web/behavior.js +153 -0
  38. package/browser-op/web/browser.js +231 -0
  39. package/browser-op/web/captcha.js +85 -0
  40. package/browser-op/web/crawler.js +109 -0
  41. package/browser-op/web/find.js +147 -0
  42. package/browser-op/web/honeypot.js +68 -0
  43. package/browser-op/web/humanize.js +522 -0
  44. package/browser-op/web/image.js +177 -0
  45. package/browser-op/web/interact.js +169 -0
  46. package/browser-op/web/markdown.js +80 -0
  47. package/browser-op/web/ready.js +295 -0
  48. package/browser-op/web/scroll.js +167 -0
  49. package/browser-op/web/upload.js +116 -0
  50. package/browser-op/web/visual-runtime.inject.cjs +6 -0
  51. package/browser-op/webplater/.env.example +7 -0
  52. package/browser-op/webplater/ARCHITECTURE.md +102 -0
  53. package/browser-op/webplater/dist/chrome-mv3/assets/popup-BUZEUmsx.css +1 -0
  54. package/browser-op/webplater/dist/chrome-mv3/background.js +2 -0
  55. package/browser-op/webplater/dist/chrome-mv3/capture.js +310 -0
  56. package/browser-op/webplater/dist/chrome-mv3/chunks/_virtual_wxt-html-plugins-DPbbfBKe.js +1 -0
  57. package/browser-op/webplater/dist/chrome-mv3/chunks/offscreen-CFXYw9Mo.js +1 -0
  58. package/browser-op/webplater/dist/chrome-mv3/chunks/popup-C-lpxZZO.js +1 -0
  59. package/browser-op/webplater/dist/chrome-mv3/content-scripts/content.js +7 -0
  60. package/browser-op/webplater/dist/chrome-mv3/manifest.json +1 -0
  61. package/browser-op/webplater/dist/chrome-mv3/offscreen.html +16 -0
  62. package/browser-op/webplater/dist/chrome-mv3/popup.html +31 -0
  63. package/browser-op/webplater/entrypoints/background.ts +938 -0
  64. package/browser-op/webplater/entrypoints/content.ts +1150 -0
  65. package/browser-op/webplater/entrypoints/offscreen/index.html +15 -0
  66. package/browser-op/webplater/entrypoints/offscreen/main.ts +161 -0
  67. package/browser-op/webplater/entrypoints/popup/index.html +29 -0
  68. package/browser-op/webplater/entrypoints/popup/main.ts +61 -0
  69. package/browser-op/webplater/entrypoints/popup/style.css +100 -0
  70. package/browser-op/webplater/lib/snapshot.ts +352 -0
  71. package/browser-op/webplater/package.json +29 -0
  72. package/browser-op/webplater/pnpm-lock.yaml +3411 -0
  73. package/browser-op/webplater/public/capture.js +310 -0
  74. package/browser-op/webplater/scripts/publish-extension.mjs +176 -0
  75. package/browser-op/webplater/tsconfig.json +19 -0
  76. package/browser-op/webplater/wxt.config.ts +34 -0
  77. package/dist/actions.md +102 -0
  78. package/dist/cli.d.ts +2 -0
  79. package/dist/cli.d.ts.map +1 -0
  80. package/dist/cli.js +278 -0
  81. package/dist/cli.js.map +1 -0
  82. package/dist/client.d.ts +94 -0
  83. package/dist/client.d.ts.map +1 -0
  84. package/dist/client.js +277 -0
  85. package/dist/client.js.map +1 -0
  86. package/dist/config.d.ts +61 -0
  87. package/dist/config.d.ts.map +1 -0
  88. package/dist/config.js +119 -0
  89. package/dist/config.js.map +1 -0
  90. package/dist/protocol.d.ts +195 -0
  91. package/dist/protocol.d.ts.map +1 -0
  92. package/dist/protocol.js +11 -0
  93. package/dist/protocol.js.map +1 -0
  94. package/dist/server.d.ts +66 -0
  95. package/dist/server.d.ts.map +1 -0
  96. package/dist/server.js +259 -0
  97. package/dist/server.js.map +1 -0
  98. package/package.json +78 -0
  99. package/schemas/browser.clearCookies.schema.json +13 -0
  100. package/schemas/browser.close.schema.json +9 -0
  101. package/schemas/browser.getCookies.schema.json +13 -0
  102. package/schemas/browser.getDownload.schema.json +15 -0
  103. package/schemas/browser.health.schema.json +9 -0
  104. package/schemas/browser.listDownloads.schema.json +16 -0
  105. package/schemas/browser.listTabs.schema.json +9 -0
  106. package/schemas/browser.newTab.schema.json +15 -0
  107. package/schemas/browser.open.schema.json +15 -0
  108. package/schemas/browser.operate.schema.json +15 -0
  109. package/schemas/browser.reuseTab.schema.json +15 -0
  110. package/schemas/browser.setCookies.schema.json +15 -0
  111. package/schemas/browser.waitFor.schema.json +15 -0
  112. package/schemas/browser.waitForDownload.schema.json +15 -0
  113. package/skills/browser/SKILL.md +110 -0
  114. package/skills/browser/references/collect.md +163 -0
  115. package/skills/browser/references/high-risk.md +161 -0
  116. package/skills/browser/references/operate-actions.md +92 -0
  117. package/skills/browser/references/probing.md +302 -0
@@ -0,0 +1,177 @@
1
+ // -*- coding: utf-8 -*-
2
+ //
3
+ // web/image — 图片捕获(多策略,路由无关,全走 operate)
4
+ //
5
+ // 四个策略 + 保存:
6
+ // - imgFromScreenshot:用 operate screenshot(**仅视觉确认,不取资源**)
7
+ // - imgFromElement:evaluate 提取 src,再 fetchAsBase64
8
+ // - fetchAsBase64:evaluate 内 fetch(共享 cookie jar)
9
+ // - captureImages:扫描页面所有 img + background-image,逐个 fetch
10
+ //
11
+ // 防盗链/CORS 适配:fetch 失败时自动 fallback 到 crossOrigin 重载 + canvas 导出
12
+ // (见 crossOriginExport)。这是通用能力,覆盖大多数 CDN(alicdn/OSS 等)。
13
+ // 机理:<img> 加载不受 CORS 限,但 fetch 跨域被拦;crossOrigin='anonymous'
14
+ // 重载后若 CDN 返回 Access-Control-Allow-Origin,canvas 不 tainted 可导出原图。
15
+ //
16
+ // 依赖:./browser(operate), node:fs(saveImage)
17
+
18
+ 'use strict'
19
+
20
+ const fs = require('node:fs')
21
+ const { operate } = require('./browser')
22
+
23
+ // 页面内 fetch 图片为 base64(共享页面 cookie)
24
+ const FETCH_SOURCE = (url) => 'async (u) => {\n' +
25
+ ' try {\n' +
26
+ ' const res = await fetch(u, { credentials: "include" })\n' +
27
+ ' const ct = res.headers.get("content-type") || "application/octet-stream"\n' +
28
+ ' const buf = await res.arrayBuffer()\n' +
29
+ ' const bytes = new Uint8Array(buf)\n' +
30
+ ' let bin = ""\n' +
31
+ ' for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i])\n' +
32
+ ' return { data: btoa(bin), mimeType: ct }\n' +
33
+ ' } catch (e) {\n' +
34
+ ' return { error: String((e && e.message) || e) }\n' +
35
+ ' }\n' +
36
+ '}'
37
+
38
+ // 提取元素 src(el.src 自动解析为绝对 URL,避免相对路径 fetch 失败)
39
+ const SRC_SOURCE = (selector) => '(s) => {\n' +
40
+ ' const el = document.querySelector(s)\n' +
41
+ ' if (!el) return null\n' +
42
+ ' const src = el.src || el.getAttribute("src") || el.getAttribute("data-src") || el.getAttribute("href") || null\n' +
43
+ ' return src\n' +
44
+ '}'
45
+
46
+
47
+ // crossOrigin 重载 + canvas 导出(防盗链/CORS 兼底)
48
+ // 给定 URL,新建带 crossOrigin='anonymous' 的 Image 重新加载,成功后 canvas toDataURL。
49
+ // 仅当 CDN 返回 Access-Control-Allow-Origin 时成功(大多数 CDN 都是)。
50
+ const CROSS_ORIGIN_EXPORT = 'async (u) => {\n' +
51
+ ' return await new Promise(resolve => {\n' +
52
+ ' const im = new Image()\n' +
53
+ ' im.crossOrigin = "anonymous"\n' +
54
+ ' im.onload = () => {\n' +
55
+ ' try {\n' +
56
+ ' const c = document.createElement("canvas")\n' +
57
+ ' c.width = im.naturalWidth; c.height = im.naturalHeight\n' +
58
+ ' c.getContext("2d").drawImage(im, 0, 0)\n' +
59
+ ' const url = c.toDataURL("image/png")\n' +
60
+ ' if (url.length < 100) return resolve({ error: "toDataURL empty (tainted)" })\n' +
61
+ ' resolve({ data: url.split(",")[1], mimeType: "image/png" })\n' +
62
+ ' } catch (e) { resolve({ error: "canvas: " + String((e && e.message) || e) }) }\n' +
63
+ ' }\n' +
64
+ ' im.onerror = () => resolve({ error: "reload failed (CDN denied CORS)" })\n' +
65
+ ' im.src = u\n' +
66
+ ' })\n' +
67
+ '}'
68
+
69
+
70
+ // 先 fetch;fetch 报错(CORS/防盗链)才 fallback 到 crossOrigin 重载。返回 {data, mimeType}。
71
+ // 第一步 fetch 成功就直接返回(普通站点零开销);失败才走重载,一次兼容到底。
72
+
73
+ /**
74
+ * 截图取图(可视区域或元素)。
75
+ * @param {object} pageHandle 单个 page 句柄
76
+ * @param {object} [options]
77
+ * @param {string} [options.selector] - 元素截图 selector,不传则整页可视区
78
+ * @returns {Promise<{data, mimeType}>} data 为纯 base64
79
+ */
80
+ async function imgFromScreenshot (pageHandle, options = {}) {
81
+ if (!pageHandle) throw new Error('imgFromScreenshot: pageHandle required')
82
+ const r = await operate(pageHandle, { action: 'screenshot' })
83
+ const dataUrl = r.dataUrl || (r.result && r.result.dataUrl) || ''
84
+ const m = dataUrl.match(/^data:([^;]+);base64,(.*)$/)
85
+ if (!m) throw new Error('imgFromScreenshot: no dataUrl')
86
+ return { data: m[2], mimeType: m[1] }
87
+ }
88
+
89
+ /**
90
+ * 通过页面 fetch 重新请求图片为 base64(共享 cookie)。
91
+ * @param {object} pageHandle 单个 page 句柄
92
+ * @param {string} url
93
+ * @returns {Promise<{data, mimeType}>}
94
+ */
95
+ async function fetchAsBase64 (pageHandle, url) {
96
+ if (!pageHandle) throw new Error('fetchAsBase64: pageHandle required')
97
+ if (!url) throw new Error('fetchAsBase64: url required')
98
+ const r = await operate(pageHandle, { action: 'evaluate', source: FETCH_SOURCE(url), args: url })
99
+ const d = (r && r.value) || r || {}
100
+ // fetch 成功直接返回
101
+ if (d && d.data && d.data.length > 0 && !d.error) return { data: d.data, mimeType: d.mimeType }
102
+ // fetch 失败(CORS/防盗链)→ fallback crossOrigin 重载 + canvas
103
+ if (d && d.error) {
104
+ const r2 = await operate(pageHandle, { action: 'evaluate', source: CROSS_ORIGIN_EXPORT, args: url })
105
+ const d2 = (r2 && r2.value) || r2 || {}
106
+ if (d2 && d2.data && d2.data.length > 0 && !d2.error) return { data: d2.data, mimeType: d2.mimeType }
107
+ throw new Error('fetchAsBase64: ' + d.error + (d2.error ? ' (crossOrigin fallback also failed: ' + d2.error + ')' : ''))
108
+ }
109
+ throw new Error('fetchAsBase64: empty response from ' + url)
110
+ }
111
+
112
+ /**
113
+ * 从 img 元素取图:先提取 src,再 fetchAsBase64。
114
+ * @param {object} pageHandle 单个 page 句柄
115
+ * @param {string} selector
116
+ * @param {Array} [captured] - captureImages 的缓存(优先用)
117
+ * @returns {Promise<{data, mimeType}>}
118
+ */
119
+ async function imgFromElement (pageHandle, selector, captured) {
120
+ if (!pageHandle) throw new Error('imgFromElement: pageHandle required')
121
+ if (!selector) throw new Error('imgFromElement: selector required')
122
+ const srcR = await operate(pageHandle, { action: 'evaluate', source: SRC_SOURCE(selector), args: selector })
123
+ const src = (srcR && typeof srcR === 'string') ? srcR : (srcR && srcR.value) || (srcR && srcR.result)
124
+ if (!src) throw new Error('imgFromElement: no src for ' + selector)
125
+ if (Array.isArray(captured) && captured.includes(src)) {
126
+ return fetchAsBase64(pageHandle, src)
127
+ }
128
+ return fetchAsBase64(pageHandle, src)
129
+ }
130
+
131
+ /**
132
+ * 扫描页面所有图片,返回 [{url,data,mimeType}]。
133
+ * @param {object} pageHandle 单个 page 句柄
134
+ * @param {(url:string)=>boolean} [match] - URL 过滤函数
135
+ * @returns {Promise<Array<{url,data,mimeType}>>}
136
+ */
137
+ async function captureImages (pageHandle, match) {
138
+ if (!pageHandle) throw new Error('captureImages: pageHandle required')
139
+ const scanSrc = '(() => {\n' +
140
+ ' const urls = new Set()\n' +
141
+ ' document.querySelectorAll("img[src]").forEach((el) => { try { if (el.src) urls.add(el.src) } catch {} })\n' +
142
+ ' document.querySelectorAll("[data-src]").forEach((el) => { try { const s = el.getAttribute("data-src"); if (s) urls.add(new URL(s, location.href).href) } catch {} })\n' +
143
+ ' document.querySelectorAll("*").forEach((el) => {\n' +
144
+ ' const bg = getComputedStyle(el).backgroundImage\n' +
145
+ ' if (bg && bg !== "none") {\n' +
146
+ ' const m = bg.matchAll(/url\\(["\']?([^"\')]+)["\']?\\)/g)\n' +
147
+ ' for (const x of m) { try { urls.add(new URL(x[1], location.href).href) } catch {} }\n' +
148
+ ' }\n' +
149
+ ' })\n' +
150
+ ' return [...urls]\n' +
151
+ '})()'
152
+ const urlsR = await operate(pageHandle, { action: 'evaluate', source: scanSrc })
153
+ const urls = Array.isArray(urlsR) ? urlsR : (urlsR && urlsR.value) || (urlsR && urlsR.result) || []
154
+ const out = []
155
+ for (const u of urls) {
156
+ if (match && typeof match === 'function' && !match(u)) continue
157
+ try {
158
+ const img = await fetchAsBase64(pageHandle, u)
159
+ if (img && img.data && img.data.length > 0) out.push({ url: u, data: img.data, mimeType: img.mimeType })
160
+ } catch {}
161
+ }
162
+ return out
163
+ }
164
+
165
+ /**
166
+ * 保存 base64 到文件。
167
+ * @param {string} data - 纯 base64
168
+ * @param {string} mimeType
169
+ * @param {string} savePath - 绝对路径
170
+ */
171
+ async function saveImage (data, mimeType, savePath) {
172
+ const buf = Buffer.from(data, 'base64')
173
+ await fs.promises.writeFile(savePath, buf)
174
+ return { savePath, bytes: buf.length, mimeType }
175
+ }
176
+
177
+ module.exports = { captureImages, fetchAsBase64, imgFromElement, imgFromScreenshot, saveImage }
@@ -0,0 +1,169 @@
1
+ // -*- coding: utf-8 -*-
2
+ //
3
+ // web/interact — 页面可视化 overlay(路由无关, 纯 fixed div, 不碰业务 DOM)
4
+ //
5
+ // 三类用途:
6
+ // highlight/visualMark — 高亮真实操作对象 (单源 visual-runtime: resolve → mark 同一元素)
7
+ // toast/status — 提示/状态栏 overlay
8
+ // ensureVisual — 正式注入 visual-runtime (open 后页面有 window.visualMark)
9
+ //
10
+ // 铁律: 高亮对象 = 执行对象。这里只接受"已拿到的真实对象" (元素引用 / 坐标),
11
+ // 不再做 selector→元素的二次解析 (解析走单源 visual-runtime.resolveSelector,
12
+ // 由 operate 在后端一次完成 resolve+mark+execute)。
13
+ //
14
+ // 依赖:./browser(operate)、./visual-runtime.inject(注入串)
15
+
16
+ 'use strict'
17
+
18
+ const { operate } = require('./browser')
19
+ const VISUAL_RUNTIME_SRC = require('./visual-runtime.inject.cjs')
20
+
21
+ const OVERLAY_ID = 'bee-interact-overlay'
22
+ const STYLE_ID = 'bee-interact-style'
23
+
24
+ // toast/status 用独立 CSS (visual-runtime 只管高亮框, 不管 toast)
25
+ const TOAST_CSS = '\n' +
26
+ ' .bee-tk{position:fixed;z-index:2147483647;font-family:system-ui,Arial,sans-serif;font-size:13px;color:#fff;background:rgba(30,30,30,.92);padding:8px 12px;border-radius:6px;box-shadow:0 2px 12px rgba(0,0,0,.3);max-width:60vw}\n' +
27
+ ' .bee-tk.b-success{background:rgba(22,101,52,.95)} .bee-tk.b-warn{background:rgba(180,83,9,.95)} .bee-tk.b-error{background:rgba(153,27,27,.95)}\n' +
28
+ ' .bee-status{top:12px;right:12px}\n' +
29
+ ' .bee-toast{bottom:16px;right:16px;transition:opacity .3s}\n' +
30
+ ' '
31
+
32
+ // 注入 toast/status 样式 (幂等)。visual-runtime 的样式由它自己的 ensureVisualOverlay 注入。
33
+ const ENSURE_TOAST_STYLE = '(() => {\n' +
34
+ ' if (!document.getElementById("' + STYLE_ID + '")) {\n' +
35
+ ' var s = document.createElement("style")\n' +
36
+ ' s.id = "' + STYLE_ID + '"\n' +
37
+ ' s.textContent = ' + JSON.stringify(TOAST_CSS) + '\n' +
38
+ ' document.head.appendChild(s)\n' +
39
+ ' }\n' +
40
+ ' return null\n' +
41
+ '})()'
42
+
43
+ /**
44
+ * 正式注入 visual-runtime 到页面 MAIN world (幂等)。
45
+ *
46
+ * 注入后页面有 window.visualMark / window.resolveSelector / window.__beeVR。
47
+ * 这是 eval 契约的正式入口: eval 内拿到真实元素后调 window.visualMark(它)。
48
+ * 不再用旧版 "调一次假 highlight 触发 ensureStyle" 的 hack。
49
+ *
50
+ * 注意: open() 后每个新 page 自动注入 (browserd ensureVisualRuntime);
51
+ * 这个函数供 humanize/find 等页面侧代码在不确定注入状态时显式保险。
52
+ */
53
+ async function ensureVisual (pageHandle) {
54
+ if (!pageHandle) throw new Error('ensureVisual: pageHandle required')
55
+ await operate(pageHandle, {
56
+ action: 'evaluate',
57
+ source: '(() => { if (!window.__beeVR) { ' + VISUAL_RUNTIME_SRC + ' } return !!window.__beeVR })()',
58
+ _skipHumanize: true
59
+ })
60
+ return null
61
+ }
62
+
63
+ async function ensureToastStyle (pageHandle) {
64
+ await operate(pageHandle, { action: 'evaluate', source: ENSURE_TOAST_STYLE, _skipHumanize: true })
65
+ }
66
+
67
+ // ─── highlight / visualMark: 高亮真实对象 ───
68
+ //
69
+ // target 直接是真实对象 (元素引用 / {x,y[,w,h]} / 数组), 不接受 selector/文本。
70
+ // 要按 selector 高亮, 用 highlightSelector (走单源 resolver)。
71
+
72
+ /**
73
+ * 在真实对象上画高亮 overlay。target 必须是页面内已拿到的真实元素/坐标。
74
+ * @param {object} pageHandle
75
+ * @param {Element|{x,y,w?,h?}|Array} target 真实对象 (页面侧 evaluate 拿到的)
76
+ * @param {object} [options] - { ms:1000 }
77
+ * @returns {Promise<{ok:boolean}>}
78
+ *
79
+ * 注意: target 是在调用方 evaluate 内构造的; 外部传坐标时,
80
+ * 内部用 visualMark 的 point 语义画小标记。
81
+ */
82
+ async function highlight (pageHandle, target, options = {}) {
83
+ if (!pageHandle) throw new Error('highlight: pageHandle required')
84
+ if (!target) return { ok: false, reason: 'no target' }
85
+ await ensureVisual(pageHandle)
86
+ // 走注入的 visualMark: target 经 evaluate 序列化传入页面
87
+ // (坐标对象可序列化; 元素引用需在 evaluate 内拿, 这里主要服务坐标场景)
88
+ const r = await operate(pageHandle, {
89
+ action: 'evaluate',
90
+ source: '(t) => { if (!window.__beeVR) return { ok:false, reason:"runtime not injected" }; window.__beeVR.visualMark(t); return { ok:true } }',
91
+ args: target,
92
+ _skipHumanize: true
93
+ }).catch((e) => ({ ok: false, reason: e.message }))
94
+ const ms = options.ms != null ? options.ms : 1000
95
+ if (r && r.ok && ms > 0) await new Promise((res) => setTimeout(res, ms))
96
+ return r || { ok: false, reason: 'no result' }
97
+ }
98
+
99
+ /**
100
+ * 按 selector 高亮 (走单源 resolver, resolve → mark 同一元素)。
101
+ * 调用方已有真实元素时优先用 highlight(el); 只有 selector 时用这个。
102
+ */
103
+ async function highlightSelector (pageHandle, selector, options = {}) {
104
+ if (!pageHandle) throw new Error('highlightSelector: pageHandle required')
105
+ if (!selector) return { ok: false, reason: 'no selector' }
106
+ await ensureVisual(pageHandle)
107
+ const r = await operate(pageHandle, {
108
+ action: 'evaluate',
109
+ source: '(sel) => { if (!window.__beeVR) return { ok:false, reason:"runtime not injected" }; const ok = window.__beeVR.highlightResolved(sel, undefined); return { ok } }',
110
+ args: selector,
111
+ _skipHumanize: true
112
+ }).catch((e) => ({ ok: false, reason: e.message }))
113
+ const ms = options.ms != null ? options.ms : 1000
114
+ if (r && r.ok && ms > 0) await new Promise((res) => setTimeout(res, ms))
115
+ return r || { ok: false, reason: 'no result' }
116
+ }
117
+
118
+ /**
119
+ * 清除所有 overlay。
120
+ */
121
+ async function clear (pageHandle) {
122
+ if (!pageHandle) throw new Error('clear: pageHandle required')
123
+ await operate(pageHandle, {
124
+ action: 'evaluate',
125
+ source: '(() => { document.querySelectorAll(".bee-tk,.bee-hl-box").forEach(function(e){e.remove()}); return null })()',
126
+ _skipHumanize: true
127
+ })
128
+ return null
129
+ }
130
+
131
+ /**
132
+ * 底部右下角 toast。
133
+ */
134
+ async function toast (pageHandle, message, levelOrOpts = 'info') {
135
+ if (!pageHandle) throw new Error('toast: pageHandle required')
136
+ const level = typeof levelOrOpts === 'object' ? (levelOrOpts.level || 'info') : levelOrOpts
137
+ const ms = typeof levelOrOpts === 'object' ? (Number(levelOrOpts.ms) || 2500) : 2500
138
+ await ensureToastStyle(pageHandle)
139
+ const cls = level === 'success' ? 'b-success' : level === 'warn' ? 'b-warn' : level === 'error' ? 'b-error' : ''
140
+ const src = '(() => {\n' +
141
+ ' const e = document.createElement("div")\n' +
142
+ ' e.className = "bee-tk bee-toast ' + cls + '"\n' +
143
+ ' e.textContent = ' + JSON.stringify(message) + '\n' +
144
+ ' document.body.appendChild(e)\n' +
145
+ ' setTimeout(() => { e.style.opacity = "0"; setTimeout(() => e.remove(), 300) }, ' + ms + ')\n' +
146
+ ' return null\n' +
147
+ ' })()'
148
+ await operate(pageHandle, { action: 'evaluate', source: src, _skipHumanize: true })
149
+ return null
150
+ }
151
+
152
+ /**
153
+ * 右上角状态栏。
154
+ */
155
+ async function status (pageHandle, text, step) {
156
+ if (!pageHandle) throw new Error('status: pageHandle required')
157
+ await ensureToastStyle(pageHandle)
158
+ const content = step ? String(step) + ': ' + String(text) : String(text)
159
+ const src = '(() => {\n' +
160
+ ' let e = document.getElementById("' + OVERLAY_ID + '")\n' +
161
+ ' if (!e) { e = document.createElement("div"); e.id = "' + OVERLAY_ID + '"; e.className = "bee-tk bee-status"; document.body.appendChild(e) }\n' +
162
+ ' e.textContent = ' + JSON.stringify(content) + '\n' +
163
+ ' return null\n' +
164
+ ' })()'
165
+ await operate(pageHandle, { action: 'evaluate', source: src, _skipHumanize: true })
166
+ return null
167
+ }
168
+
169
+ module.exports = { ensureVisual, highlight, highlightSelector, clear, toast, status }
@@ -0,0 +1,80 @@
1
+ // -*- coding: utf-8 -*-
2
+ //
3
+ // web/markdown — HTML 转 Markdown(纯函数,路由无关)
4
+ //
5
+ // 完整复刻自 bee/resources/sandbox/builtin-libs/web/markdown.js。
6
+ // 不依赖任何浏览器通道。HTML 字符串进来,Markdown 出去。
7
+ // 用 turndown + cheerio。
8
+ //
9
+ // 依赖:turndown, cheerio(调用方自装:npm i turndown cheerio)
10
+
11
+ 'use strict'
12
+
13
+ let _deps = null
14
+ async function getDeps () {
15
+ if (_deps) return _deps
16
+ try {
17
+ const TurndownModule = require('turndown')
18
+ const TurndownService = TurndownModule.default || TurndownModule
19
+ const cheerio = require('cheerio')
20
+ _deps = { TurndownService, cheerio }
21
+ } catch (e) {
22
+ throw new Error(
23
+ 'web/markdown 需要 turndown + cheerio,请先安装:npm i turndown cheerio\n' +
24
+ '原始错误: ' + (e && e.message)
25
+ )
26
+ }
27
+ return _deps
28
+ }
29
+
30
+ /**
31
+ * 将 HTML 字符串转换为 Markdown。
32
+ *
33
+ * @param {string} html - HTML 字符串
34
+ * @returns {Promise<{markdown: string, title: string|null}>}
35
+ */
36
+ async function htmlToMarkdown (html) {
37
+ const { TurndownService, cheerio } = await getDeps()
38
+ const $ = cheerio.load(html)
39
+ const title = $('title').text() || null
40
+
41
+ // 删除 script/style 标签
42
+ $('script, style').remove()
43
+
44
+ const body = $('body').length > 0 ? $('body') : $.root()
45
+ const converter = new TurndownService({ headingStyle: 'atx' })
46
+
47
+ // 过滤 javascript: 链接
48
+ converter.addRule('filterLinks', {
49
+ filter: 'a',
50
+ replacement: (content, node) => {
51
+ const href = node.getAttribute('href')
52
+ if (!href) return content
53
+ try {
54
+ const protocol = new URL(href).protocol
55
+ if (protocol && !['http:', 'https:', 'file:'].includes(protocol.toLowerCase())) {
56
+ return content
57
+ }
58
+ } catch {}
59
+ const t = node.getAttribute('title')
60
+ const titlePart = t ? ' "' + t.replace(/"/g, '\\"') + '"' : ''
61
+ return '[' + content + '](' + href + titlePart + ')'
62
+ }
63
+ })
64
+
65
+ // 截断 data URI
66
+ converter.addRule('filterImages', {
67
+ filter: 'img',
68
+ replacement: (content, node) => {
69
+ const alt = node.getAttribute('alt') || ''
70
+ let src = node.getAttribute('src') || node.getAttribute('data-src') || ''
71
+ if (src.startsWith('data:')) src = src.split(',')[0] + '...'
72
+ return '![' + alt + '](' + src + ')'
73
+ }
74
+ })
75
+
76
+ const md = converter.turndown(body.html() || '')
77
+ return { markdown: md.trim(), title }
78
+ }
79
+
80
+ module.exports = { htmlToMarkdown }