@pyrokine/mcp-chrome 1.6.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -43
- package/dist/anti-detection/behavior.d.ts.map +1 -1
- package/dist/anti-detection/behavior.js.map +1 -1
- package/dist/anti-detection/index.d.ts +1 -1
- package/dist/anti-detection/index.d.ts.map +1 -1
- package/dist/anti-detection/index.js +1 -1
- package/dist/anti-detection/index.js.map +1 -1
- package/dist/anti-detection/injection.d.ts +6 -2
- package/dist/anti-detection/injection.d.ts.map +1 -1
- package/dist/anti-detection/injection.js +32 -79
- package/dist/anti-detection/injection.js.map +1 -1
- package/dist/cdp/client.d.ts +2 -2
- package/dist/cdp/client.d.ts.map +1 -1
- package/dist/cdp/client.js +8 -10
- package/dist/cdp/client.js.map +1 -1
- package/dist/cdp/index.d.ts.map +1 -1
- package/dist/cdp/index.js.map +1 -1
- package/dist/cdp/launcher.d.ts.map +1 -1
- package/dist/cdp/launcher.js +40 -13
- package/dist/cdp/launcher.js.map +1 -1
- package/dist/core/auto-wait.d.ts +2 -2
- package/dist/core/auto-wait.d.ts.map +1 -1
- package/dist/core/auto-wait.js +2 -2
- package/dist/core/auto-wait.js.map +1 -1
- package/dist/core/browser-driver.d.ts +307 -0
- package/dist/core/browser-driver.d.ts.map +1 -0
- package/dist/core/browser-driver.js +21 -0
- package/dist/core/browser-driver.js.map +1 -0
- package/dist/core/error-sanitizer.d.ts +25 -0
- package/dist/core/error-sanitizer.d.ts.map +1 -0
- package/dist/core/error-sanitizer.js +66 -0
- package/dist/core/error-sanitizer.js.map +1 -0
- package/dist/core/errors.d.ts +10 -1
- package/dist/core/errors.d.ts.map +1 -1
- package/dist/core/errors.js +17 -4
- package/dist/core/errors.js.map +1 -1
- package/dist/core/extension-errors.d.ts +20 -0
- package/dist/core/extension-errors.d.ts.map +1 -0
- package/dist/core/extension-errors.js +40 -0
- package/dist/core/extension-errors.js.map +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js.map +1 -1
- package/dist/core/locator.d.ts +2 -2
- package/dist/core/locator.d.ts.map +1 -1
- package/dist/core/locator.js +25 -65
- package/dist/core/locator.js.map +1 -1
- package/dist/core/retry.d.ts +2 -2
- package/dist/core/retry.d.ts.map +1 -1
- package/dist/core/retry.js +2 -2
- package/dist/core/retry.js.map +1 -1
- package/dist/core/session.d.ts +153 -46
- package/dist/core/session.d.ts.map +1 -1
- package/dist/core/session.js +672 -177
- package/dist/core/session.js.map +1 -1
- package/dist/core/types.d.ts +11 -3
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/types.js +13 -6
- package/dist/core/types.js.map +1 -1
- package/dist/core/unified-session.d.ts +69 -68
- package/dist/core/unified-session.d.ts.map +1 -1
- package/dist/core/unified-session.js +356 -615
- package/dist/core/unified-session.js.map +1 -1
- package/dist/core/utils.d.ts +7 -0
- package/dist/core/utils.d.ts.map +1 -0
- package/dist/core/utils.js +33 -0
- package/dist/core/utils.js.map +1 -0
- package/dist/extension/bridge.d.ts +80 -39
- package/dist/extension/bridge.d.ts.map +1 -1
- package/dist/extension/bridge.js +195 -65
- package/dist/extension/bridge.js.map +1 -1
- package/dist/extension/http-server.d.ts +6 -4
- package/dist/extension/http-server.d.ts.map +1 -1
- package/dist/extension/http-server.js +45 -31
- package/dist/extension/http-server.js.map +1 -1
- package/dist/extension/index.d.ts.map +1 -1
- package/dist/extension/index.js.map +1 -1
- package/dist/index.js +27 -3
- package/dist/index.js.map +1 -1
- package/dist/tools/browse.d.ts.map +1 -1
- package/dist/tools/browse.js +33 -35
- package/dist/tools/browse.js.map +1 -1
- package/dist/tools/cookies.d.ts.map +1 -1
- package/dist/tools/cookies.js +38 -16
- package/dist/tools/cookies.js.map +1 -1
- package/dist/tools/evaluate.d.ts.map +1 -1
- package/dist/tools/evaluate.js +59 -13
- package/dist/tools/evaluate.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +263 -155
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/input.d.ts.map +1 -1
- package/dist/tools/input.js +311 -75
- package/dist/tools/input.js.map +1 -1
- package/dist/tools/logs.d.ts.map +1 -1
- package/dist/tools/logs.js +31 -17
- package/dist/tools/logs.js.map +1 -1
- package/dist/tools/manage.d.ts.map +1 -1
- package/dist/tools/manage.js +25 -28
- package/dist/tools/manage.js.map +1 -1
- package/dist/tools/schema.d.ts +1 -1
- package/dist/tools/schema.d.ts.map +1 -1
- package/dist/tools/schema.js +31 -55
- package/dist/tools/schema.js.map +1 -1
- package/dist/tools/wait.d.ts.map +1 -1
- package/dist/tools/wait.js +73 -22
- package/dist/tools/wait.js.map +1 -1
- package/package.json +48 -40
package/dist/tools/extract.js
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* - metadata: 页面元信息(title/og/jsonLd 等)
|
|
11
11
|
*/
|
|
12
12
|
import { mkdir, writeFile } from 'fs/promises';
|
|
13
|
-
import { basename, dirname, extname, join } from 'path';
|
|
13
|
+
import { basename, dirname, extname, join, resolve, sep } from 'path';
|
|
14
14
|
import { z } from 'zod';
|
|
15
15
|
import { formatErrorResponse, formatResponse, getSession, getUnifiedSession } from '../core/index.js';
|
|
16
16
|
import { targetToFindParams, targetZodSchema } from './schema.js';
|
|
@@ -20,21 +20,48 @@ const MAX_APPENDIX_IMAGES = 20;
|
|
|
20
20
|
* extract 参数 schema
|
|
21
21
|
*/
|
|
22
22
|
const extractSchema = z.object({
|
|
23
|
-
type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata'])
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata']).describe('提取类型'),
|
|
24
|
+
target: targetZodSchema
|
|
25
|
+
.optional()
|
|
26
|
+
.describe('目标元素(attribute 必填;text/html 可选,省略则提取整个页面;screenshot 可选用于元素截图;state 可选(仅 Extension)用于返回目标子树;metadata 不需要)'),
|
|
26
27
|
attribute: z.string().optional().describe('属性名(attribute)'),
|
|
27
|
-
images: z
|
|
28
|
+
images: z
|
|
29
|
+
.enum(['info', 'data'])
|
|
30
|
+
.optional()
|
|
31
|
+
.describe('图片提取模式(仅 html 类型有效),info: 元信息(src/alt/尺寸);data: 含图片数据'),
|
|
28
32
|
fullPage: z.boolean().optional().describe('是否全页面截图(screenshot)'),
|
|
29
|
-
scale: z
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
scale: z
|
|
34
|
+
.number()
|
|
35
|
+
.optional()
|
|
36
|
+
.describe('截图缩放比例(screenshot fullPage),默认 1,设为 0.5 可降低分辨率加速大页面截图'),
|
|
37
|
+
format: z
|
|
38
|
+
.enum(['png', 'jpeg', 'webp'])
|
|
39
|
+
.optional()
|
|
40
|
+
.describe('截图格式(screenshot),默认 png,jpeg/webp 体积更小,复杂页面推荐 jpeg 减少超时'),
|
|
41
|
+
quality: z
|
|
42
|
+
.number()
|
|
43
|
+
.min(0)
|
|
44
|
+
.max(100)
|
|
45
|
+
.optional()
|
|
46
|
+
.describe('截图质量(screenshot,仅 jpeg/webp 有效),0-100,推荐 80'),
|
|
47
|
+
output: z
|
|
48
|
+
.string()
|
|
49
|
+
.optional()
|
|
50
|
+
.describe('输出文件路径(可选),若指定结果写入文件,否则返回内容,images=data 时作为输出目录路径'),
|
|
51
|
+
tabId: z
|
|
52
|
+
.string()
|
|
33
53
|
.optional()
|
|
34
|
-
.describe('
|
|
35
|
-
|
|
54
|
+
.describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
|
|
55
|
+
depth: z.number().optional().describe('DOM 遍历深度限制(state),默认 15,减小可降低返回数据量'),
|
|
56
|
+
mode: z
|
|
57
|
+
.enum(['accessibility', 'domsnapshot'])
|
|
58
|
+
.optional()
|
|
59
|
+
.describe('页面状态提取模式(state 类型有效),accessibility=可访问性树(默认,与原 read_page 一致),domsnapshot=CDP DOMSnapshot 全量快照(仅 CDP 模式)'),
|
|
36
60
|
timeout: z.number().optional().describe('等待目标元素超时'),
|
|
37
|
-
frame: z
|
|
61
|
+
frame: z
|
|
62
|
+
.union([z.string(), z.number()])
|
|
63
|
+
.optional()
|
|
64
|
+
.describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
|
|
38
65
|
});
|
|
39
66
|
/**
|
|
40
67
|
* extract 工具处理器
|
|
@@ -97,36 +124,10 @@ async function handleExtract(args) {
|
|
|
97
124
|
}
|
|
98
125
|
case 'attribute': {
|
|
99
126
|
if (!args.target) {
|
|
100
|
-
return
|
|
101
|
-
content: [
|
|
102
|
-
{
|
|
103
|
-
type: 'text',
|
|
104
|
-
text: JSON.stringify({
|
|
105
|
-
error: {
|
|
106
|
-
code: 'INVALID_ARGUMENT',
|
|
107
|
-
message: 'attribute 提取需要 target 参数',
|
|
108
|
-
},
|
|
109
|
-
}),
|
|
110
|
-
},
|
|
111
|
-
],
|
|
112
|
-
isError: true,
|
|
113
|
-
};
|
|
127
|
+
return formatErrorResponse(new Error('attribute 提取需要 target 参数'));
|
|
114
128
|
}
|
|
115
129
|
if (!args.attribute) {
|
|
116
|
-
return
|
|
117
|
-
content: [
|
|
118
|
-
{
|
|
119
|
-
type: 'text',
|
|
120
|
-
text: JSON.stringify({
|
|
121
|
-
error: {
|
|
122
|
-
code: 'INVALID_ARGUMENT',
|
|
123
|
-
message: 'attribute 提取需要 attribute 参数',
|
|
124
|
-
},
|
|
125
|
-
}),
|
|
126
|
-
},
|
|
127
|
-
],
|
|
128
|
-
isError: true,
|
|
129
|
-
};
|
|
130
|
+
return formatErrorResponse(new Error('attribute 提取需要 attribute 参数'));
|
|
130
131
|
}
|
|
131
132
|
let value;
|
|
132
133
|
if (useExtension) {
|
|
@@ -146,84 +147,20 @@ async function handleExtract(args) {
|
|
|
146
147
|
// 有 target 时获取元素区域用于裁剪(支持所有 target 类型)
|
|
147
148
|
let clip;
|
|
148
149
|
if (args.target) {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
clip = rect;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
|
|
163
|
-
const nth = nthParam ?? 0;
|
|
164
|
-
const rect = await session.evaluate(`function(selector, text, xpath, nth) {
|
|
165
|
-
function toRect(el) {
|
|
166
|
-
var r = el.getBoundingClientRect();
|
|
167
|
-
return {x: r.x, y: r.y, width: r.width, height: r.height};
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
function findByXPath(xp, n) {
|
|
171
|
-
var r = document.evaluate(xp, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
172
|
-
return r.snapshotLength > n ? r.snapshotItem(n) : null;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
function findBySelector(sel, txt, n) {
|
|
176
|
-
var els = document.querySelectorAll(sel);
|
|
177
|
-
var matchCount = 0;
|
|
178
|
-
for (var i = 0; i < els.length; ++i) {
|
|
179
|
-
var el = els[i];
|
|
180
|
-
if (txt) {
|
|
181
|
-
var content = (el.textContent || '').trim();
|
|
182
|
-
if (!content.includes(txt)) continue;
|
|
183
|
-
}
|
|
184
|
-
if (matchCount < n) { ++matchCount; continue; }
|
|
185
|
-
return el;
|
|
186
|
-
}
|
|
187
|
-
return null;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
function findByText(txt, n) {
|
|
191
|
-
var root = document.body || document.documentElement;
|
|
192
|
-
if (!root) return null;
|
|
193
|
-
var walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
|
|
194
|
-
var matchCount = 0;
|
|
195
|
-
var el = walker.currentNode;
|
|
196
|
-
while (el) {
|
|
197
|
-
var content = (el.textContent || '').trim();
|
|
198
|
-
if (content && content.includes(txt)) {
|
|
199
|
-
if (matchCount < n) { ++matchCount; }
|
|
200
|
-
else { return el; }
|
|
201
|
-
}
|
|
202
|
-
el = walker.nextNode();
|
|
203
|
-
}
|
|
204
|
-
return null;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
var el = null;
|
|
208
|
-
if (xpath) {
|
|
209
|
-
el = findByXPath(xpath, nth);
|
|
210
|
-
} else if (selector) {
|
|
211
|
-
el = findBySelector(selector, text, nth);
|
|
212
|
-
} else if (text) {
|
|
213
|
-
el = findByText(text, nth);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
return el ? toRect(el) : null;
|
|
217
|
-
}`, [selector ?? null, text ?? null, xpath ?? null, nth]);
|
|
218
|
-
if (rect && rect.width > 0 && rect.height > 0) {
|
|
150
|
+
const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
|
|
151
|
+
const nth = nthParam ?? 0;
|
|
152
|
+
// unified.find 内部根据 Extension 连接状态自动路由到 Extension/CDP 路径,
|
|
153
|
+
// 返回视口绝对坐标(含 iframe 坐标修正)
|
|
154
|
+
const found = await unifiedSession.find(selector, text, xpath);
|
|
155
|
+
if (found.length > nth) {
|
|
156
|
+
const rect = found[nth].rect;
|
|
157
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
219
158
|
clip = rect;
|
|
220
159
|
}
|
|
221
160
|
}
|
|
222
161
|
}
|
|
223
162
|
const base64 = await unifiedSession.screenshot({
|
|
224
|
-
fullPage: clip ?
|
|
225
|
-
false :
|
|
226
|
-
(args.fullPage ?? false),
|
|
163
|
+
fullPage: clip ? false : (args.fullPage ?? false),
|
|
227
164
|
scale: args.scale,
|
|
228
165
|
format: args.format,
|
|
229
166
|
quality: args.quality,
|
|
@@ -244,12 +181,52 @@ async function handleExtract(args) {
|
|
|
244
181
|
{
|
|
245
182
|
type: 'image',
|
|
246
183
|
data: base64,
|
|
247
|
-
mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : args.format ?? 'png'}`,
|
|
184
|
+
mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : (args.format ?? 'png')}`,
|
|
248
185
|
},
|
|
249
186
|
],
|
|
250
187
|
};
|
|
251
188
|
}
|
|
252
189
|
case 'state': {
|
|
190
|
+
// mode=domsnapshot:用 CDP DOMSnapshot.captureSnapshot 取全量快照(仅 CDP 模式)
|
|
191
|
+
if (args.mode === 'domsnapshot') {
|
|
192
|
+
if (useExtension) {
|
|
193
|
+
return {
|
|
194
|
+
content: [
|
|
195
|
+
{
|
|
196
|
+
type: 'text',
|
|
197
|
+
text: JSON.stringify({
|
|
198
|
+
error: {
|
|
199
|
+
code: 'INVALID_ARGUMENT',
|
|
200
|
+
message: 'mode=domsnapshot 仅 CDP 模式支持,Extension 模式请用默认 accessibility',
|
|
201
|
+
},
|
|
202
|
+
}),
|
|
203
|
+
},
|
|
204
|
+
],
|
|
205
|
+
isError: true,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
const snapshot = await unifiedSession.sendCdpCommand('DOMSnapshot.captureSnapshot', {
|
|
209
|
+
computedStyles: ['display', 'visibility', 'opacity'],
|
|
210
|
+
includePaintOrder: false,
|
|
211
|
+
includeDOMRects: true,
|
|
212
|
+
});
|
|
213
|
+
if (args.output) {
|
|
214
|
+
await writeOutputFile(args.output, JSON.stringify(snapshot, null, 2), 'utf-8');
|
|
215
|
+
return formatResponse({
|
|
216
|
+
success: true,
|
|
217
|
+
type: 'state',
|
|
218
|
+
mode: 'domsnapshot',
|
|
219
|
+
output: args.output,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
return formatResponse({
|
|
223
|
+
success: true,
|
|
224
|
+
type: 'state',
|
|
225
|
+
mode: 'domsnapshot',
|
|
226
|
+
snapshot,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
// 默认:accessibility 树(原行为)
|
|
253
230
|
// 有 target 时获取子树的无障碍状态
|
|
254
231
|
let refId;
|
|
255
232
|
if (args.target && useExtension) {
|
|
@@ -260,7 +237,14 @@ async function handleExtract(args) {
|
|
|
260
237
|
refId = elements[nth].refId;
|
|
261
238
|
}
|
|
262
239
|
}
|
|
263
|
-
const
|
|
240
|
+
const readPageOptions = {};
|
|
241
|
+
if (refId) {
|
|
242
|
+
readPageOptions.refId = refId;
|
|
243
|
+
}
|
|
244
|
+
if (args.depth !== undefined) {
|
|
245
|
+
readPageOptions.depth = args.depth;
|
|
246
|
+
}
|
|
247
|
+
const state = await unifiedSession.readPage(Object.keys(readPageOptions).length > 0 ? readPageOptions : undefined);
|
|
264
248
|
if (args.output) {
|
|
265
249
|
await writeOutputFile(args.output, JSON.stringify(state, null, 2), 'utf-8');
|
|
266
250
|
return formatResponse({
|
|
@@ -315,10 +299,15 @@ async function handleExtract(args) {
|
|
|
315
299
|
}
|
|
316
300
|
}
|
|
317
301
|
// ==================== HTML + 图片提取 ====================
|
|
318
|
-
/**
|
|
302
|
+
/** 写入文件前自动创建父目录(验证路径在 cwd 范围内)*/
|
|
319
303
|
async function writeOutputFile(path, data, encoding) {
|
|
320
|
-
|
|
321
|
-
|
|
304
|
+
const cwd = process.cwd();
|
|
305
|
+
const safePath = resolve(cwd, path);
|
|
306
|
+
if (!safePath.startsWith(cwd + sep) && safePath !== cwd) {
|
|
307
|
+
throw new Error(`output 路径超出工作目录范围: ${path}`);
|
|
308
|
+
}
|
|
309
|
+
await mkdir(dirname(safePath), { recursive: true });
|
|
310
|
+
await writeFile(safePath, data, encoding);
|
|
322
311
|
}
|
|
323
312
|
/**
|
|
324
313
|
* 处理 html + images 提取
|
|
@@ -342,7 +331,11 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
|
|
|
342
331
|
var images = [];
|
|
343
332
|
for (var i = 0; i < imgList.length; i++) {
|
|
344
333
|
var img = imgList[i];
|
|
345
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
334
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
335
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
336
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
337
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
338
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
346
339
|
}
|
|
347
340
|
return {html: html, images: images};
|
|
348
341
|
})`, undefined, undefined, [selector, nth]);
|
|
@@ -373,14 +366,19 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
|
|
|
373
366
|
const appendixMode = !args.output;
|
|
374
367
|
const imageDataList = await fetchImageData(unifiedSession, result.images, appendixMode ? MAX_APPENDIX_IMAGES : undefined);
|
|
375
368
|
if (args.output) {
|
|
369
|
+
const cwd2 = process.cwd();
|
|
370
|
+
const safeOutputDir = resolve(cwd2, args.output);
|
|
371
|
+
if (!safeOutputDir.startsWith(cwd2 + sep) && safeOutputDir !== cwd2) {
|
|
372
|
+
return formatErrorResponse(new Error(`output 路径超出工作目录范围: ${args.output}`));
|
|
373
|
+
}
|
|
376
374
|
// 写入目录
|
|
377
|
-
await writeImageDirectory(
|
|
375
|
+
await writeImageDirectory(safeOutputDir, result.html, result.images, imageDataList);
|
|
378
376
|
return formatResponse({
|
|
379
377
|
success: true,
|
|
380
378
|
type: 'html',
|
|
381
|
-
output:
|
|
379
|
+
output: safeOutputDir,
|
|
382
380
|
imageCount: result.images.length,
|
|
383
|
-
index: join(
|
|
381
|
+
index: join(safeOutputDir, 'index.json'),
|
|
384
382
|
});
|
|
385
383
|
}
|
|
386
384
|
// 无 output:MCP 附录方式返回
|
|
@@ -400,7 +398,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
|
|
|
400
398
|
var images = [];
|
|
401
399
|
for (var i = 0; i < imgList.length; i++) {
|
|
402
400
|
var img = imgList[i];
|
|
403
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
401
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
402
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
403
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
404
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
405
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
404
406
|
}
|
|
405
407
|
return {html: html, images: images};
|
|
406
408
|
}`);
|
|
@@ -411,7 +413,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
|
|
|
411
413
|
var images = [];
|
|
412
414
|
for (var i = 0; i < imgs.length; i++) {
|
|
413
415
|
var img = imgs[i];
|
|
414
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
416
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
417
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
418
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
419
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
420
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
415
421
|
}
|
|
416
422
|
return {html: html, images: images};
|
|
417
423
|
})()`);
|
|
@@ -440,10 +446,10 @@ async function fetchImageData(unifiedSession, images, limit) {
|
|
|
440
446
|
continue;
|
|
441
447
|
}
|
|
442
448
|
if (effectiveSrc.startsWith('data:')) {
|
|
443
|
-
const match = effectiveSrc.match(/^data:([^;]+);base64,(
|
|
444
|
-
preResolved.push(match
|
|
445
|
-
{ base64: match
|
|
446
|
-
{ base64: null, mimeType: 'image/png' });
|
|
449
|
+
const match = effectiveSrc.match(/^data:(?<mime>[^;]+);base64,(?<data>.+)$/);
|
|
450
|
+
preResolved.push(match
|
|
451
|
+
? { base64: match.groups.data, mimeType: match.groups.mime }
|
|
452
|
+
: { base64: null, mimeType: 'image/png' });
|
|
447
453
|
continue;
|
|
448
454
|
}
|
|
449
455
|
if (!effectiveSrc.startsWith('http')) {
|
|
@@ -583,7 +589,7 @@ function buildImageAppendixResponse(html, images, imageDataList) {
|
|
|
583
589
|
if (images.length > MAX_APPENDIX_IMAGES) {
|
|
584
590
|
content.push({
|
|
585
591
|
type: 'text',
|
|
586
|
-
text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES}
|
|
592
|
+
text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES} 张附带数据,使用 output 参数导出全部图片)`,
|
|
587
593
|
});
|
|
588
594
|
}
|
|
589
595
|
return { content };
|
|
@@ -599,10 +605,14 @@ function guessMimeType(url) {
|
|
|
599
605
|
return 'image/png';
|
|
600
606
|
}
|
|
601
607
|
const map = {
|
|
602
|
-
'.jpg': 'image/jpeg',
|
|
603
|
-
'.
|
|
604
|
-
'.
|
|
605
|
-
'.
|
|
608
|
+
'.jpg': 'image/jpeg',
|
|
609
|
+
'.jpeg': 'image/jpeg',
|
|
610
|
+
'.png': 'image/png',
|
|
611
|
+
'.gif': 'image/gif',
|
|
612
|
+
'.webp': 'image/webp',
|
|
613
|
+
'.svg': 'image/svg+xml',
|
|
614
|
+
'.ico': 'image/x-icon',
|
|
615
|
+
'.bmp': 'image/bmp',
|
|
606
616
|
'.avif': 'image/avif',
|
|
607
617
|
};
|
|
608
618
|
return map[ext] ?? 'image/png';
|
|
@@ -610,10 +620,14 @@ function guessMimeType(url) {
|
|
|
610
620
|
/** MIME 类型转文件扩展名 */
|
|
611
621
|
function mimeToExt(mimeType) {
|
|
612
622
|
const map = {
|
|
613
|
-
'image/jpeg': '.jpg',
|
|
614
|
-
'image/
|
|
615
|
-
'image/
|
|
616
|
-
'image/
|
|
623
|
+
'image/jpeg': '.jpg',
|
|
624
|
+
'image/png': '.png',
|
|
625
|
+
'image/gif': '.gif',
|
|
626
|
+
'image/webp': '.webp',
|
|
627
|
+
'image/svg+xml': '.svg',
|
|
628
|
+
'image/x-icon': '.ico',
|
|
629
|
+
'image/bmp': '.bmp',
|
|
630
|
+
'image/avif': '.avif',
|
|
617
631
|
};
|
|
618
632
|
return map[mimeType] ?? '.png';
|
|
619
633
|
}
|
|
@@ -660,6 +674,21 @@ async function extractHTML(session, target, timeout) {
|
|
|
660
674
|
*/
|
|
661
675
|
async function extractAttribute(session, target, attribute, timeout) {
|
|
662
676
|
const locator = session.createLocator(target, timeout !== undefined ? { timeout } : undefined);
|
|
677
|
+
// computed style: computed:color → getComputedStyle(el).color
|
|
678
|
+
if (attribute.startsWith('computed:')) {
|
|
679
|
+
const prop = attribute.slice('computed:'.length);
|
|
680
|
+
if (prop === '*') {
|
|
681
|
+
return locator.evaluateOn(`function() {
|
|
682
|
+
var cs = window.getComputedStyle(this);
|
|
683
|
+
var obj = {};
|
|
684
|
+
for (var i = 0; i < cs.length; i++) { obj[cs[i]] = cs.getPropertyValue(cs[i]); }
|
|
685
|
+
return JSON.stringify(obj);
|
|
686
|
+
}`);
|
|
687
|
+
}
|
|
688
|
+
return locator.evaluateOn(`function() {
|
|
689
|
+
return window.getComputedStyle(this).getPropertyValue(${JSON.stringify(prop)});
|
|
690
|
+
}`);
|
|
691
|
+
}
|
|
663
692
|
// 使用 JSON.stringify 安全转义属性名,防止 JS 注入
|
|
664
693
|
return locator.evaluateOn(`function() {
|
|
665
694
|
return this.getAttribute(${JSON.stringify(attribute)});
|
|
@@ -673,20 +702,42 @@ async function extractTextExtension(unifiedSession, target) {
|
|
|
673
702
|
if (!target) {
|
|
674
703
|
return unifiedSession.getText();
|
|
675
704
|
}
|
|
705
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
706
|
+
const expr = '(function(x, y) { var el = document.elementFromPoint(x, y); ' +
|
|
707
|
+
"return el ? (el.textContent || '') : '' })";
|
|
708
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y]);
|
|
709
|
+
}
|
|
676
710
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
677
711
|
const nth = nthParam ?? 0;
|
|
678
712
|
if (selector) {
|
|
713
|
+
if (text) {
|
|
714
|
+
const expr = '(function(s, t, n) { var els = Array.from(document.querySelectorAll(s))' +
|
|
715
|
+
'.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
|
|
716
|
+
"return n < els.length ? (els[n].textContent || '') : '' })";
|
|
717
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth]);
|
|
718
|
+
}
|
|
679
719
|
if (nth > 0) {
|
|
680
|
-
|
|
720
|
+
const expr = '(function(s, n) { var els = document.querySelectorAll(s); ' +
|
|
721
|
+
"return n < els.length ? (els[n].textContent || '') : '' })";
|
|
722
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth]);
|
|
681
723
|
}
|
|
682
724
|
return unifiedSession.getText(selector);
|
|
683
725
|
}
|
|
684
726
|
// xpath/text 定位:通过 evaluate 在页面上下文中查找
|
|
685
727
|
if (xpath) {
|
|
686
|
-
|
|
728
|
+
const expr = '(function(xp, n) { var r = document.evaluate(xp, document, null, ' +
|
|
729
|
+
'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
|
|
730
|
+
"return n < r.snapshotLength ? (r.snapshotItem(n).textContent || '') : '' })";
|
|
731
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth]);
|
|
687
732
|
}
|
|
688
733
|
if (text) {
|
|
689
|
-
|
|
734
|
+
const expr = '(function(t, n) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
|
|
735
|
+
'var found = []; var seen = new WeakSet(); var node; ' +
|
|
736
|
+
'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
|
|
737
|
+
'&& node.parentElement && !seen.has(node.parentElement)) { ' +
|
|
738
|
+
'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
|
|
739
|
+
"return n < found.length ? (found[n].textContent || '') : '' })";
|
|
740
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [text, nth]);
|
|
690
741
|
}
|
|
691
742
|
return unifiedSession.getText();
|
|
692
743
|
}
|
|
@@ -698,20 +749,41 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
|
|
|
698
749
|
if (!target) {
|
|
699
750
|
return unifiedSession.getHtml(undefined, outer);
|
|
700
751
|
}
|
|
752
|
+
const prop = outer ? 'outerHTML' : 'innerHTML';
|
|
753
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
754
|
+
const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' + "return el ? (el[p] || '') : '' })";
|
|
755
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
|
|
756
|
+
}
|
|
701
757
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
702
758
|
const nth = nthParam ?? 0;
|
|
703
|
-
const prop = outer ? 'outerHTML' : 'innerHTML';
|
|
704
759
|
if (selector) {
|
|
760
|
+
if (text) {
|
|
761
|
+
const expr = '(function(s, t, n, p) { var els = Array.from(document.querySelectorAll(s))' +
|
|
762
|
+
'.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
|
|
763
|
+
"return n < els.length ? (els[n][p] || '') : '' })";
|
|
764
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth, prop]);
|
|
765
|
+
}
|
|
705
766
|
if (nth > 0) {
|
|
706
|
-
|
|
767
|
+
const expr = '(function(s, n, p) { var els = document.querySelectorAll(s); ' +
|
|
768
|
+
"return n < els.length ? (els[n][p] || '') : '' })";
|
|
769
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, prop]);
|
|
707
770
|
}
|
|
708
771
|
return unifiedSession.getHtml(selector, outer);
|
|
709
772
|
}
|
|
710
773
|
if (xpath) {
|
|
711
|
-
|
|
774
|
+
const expr = '(function(xp, n, p) { var r = document.evaluate(xp, document, null, ' +
|
|
775
|
+
'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
|
|
776
|
+
"return n < r.snapshotLength ? (r.snapshotItem(n)[p] || '') : '' })";
|
|
777
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth, prop]);
|
|
712
778
|
}
|
|
713
779
|
if (text) {
|
|
714
|
-
|
|
780
|
+
const expr = '(function(t, n, p) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
|
|
781
|
+
'var found = []; var seen = new WeakSet(); var node; ' +
|
|
782
|
+
'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
|
|
783
|
+
'&& node.parentElement && !seen.has(node.parentElement)) { ' +
|
|
784
|
+
'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
|
|
785
|
+
"return n < found.length ? (found[n][p] || '') : '' })";
|
|
786
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [text, nth, prop]);
|
|
715
787
|
}
|
|
716
788
|
return unifiedSession.getHtml(undefined, outer);
|
|
717
789
|
}
|
|
@@ -719,9 +791,25 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
|
|
|
719
791
|
* Extension 模式:提取属性
|
|
720
792
|
*/
|
|
721
793
|
async function extractAttributeExtension(unifiedSession, target, attribute) {
|
|
794
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
795
|
+
if (attribute.startsWith('computed:')) {
|
|
796
|
+
const prop = attribute.slice('computed:'.length);
|
|
797
|
+
const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' +
|
|
798
|
+
'return el ? window.getComputedStyle(el).getPropertyValue(p) : null })';
|
|
799
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
|
|
800
|
+
}
|
|
801
|
+
const expr = '(function(x, y, a) { var el = document.elementFromPoint(x, y); ' +
|
|
802
|
+
'return el ? el.getAttribute(a) : null })';
|
|
803
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, attribute]);
|
|
804
|
+
}
|
|
722
805
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
723
|
-
//
|
|
724
|
-
if (
|
|
806
|
+
// computed style: computed:color → getComputedStyle(el)
|
|
807
|
+
if (attribute.startsWith('computed:')) {
|
|
808
|
+
const prop = attribute.slice('computed:'.length);
|
|
809
|
+
return extractComputedStyleExtension(unifiedSession, selector, text, xpath, nthParam ?? 0, prop);
|
|
810
|
+
}
|
|
811
|
+
// xpath 定位(含 text+xpath)或 text 且无 selector 时:先 find 得到 refId,再获取属性
|
|
812
|
+
if (xpath || (text && !selector)) {
|
|
725
813
|
const elements = await unifiedSession.find(selector, text, xpath);
|
|
726
814
|
if (elements.length > 0) {
|
|
727
815
|
const nth = nthParam ?? 0;
|
|
@@ -734,18 +822,39 @@ async function extractAttributeExtension(unifiedSession, target, attribute) {
|
|
|
734
822
|
}
|
|
735
823
|
if (selector) {
|
|
736
824
|
const nth = nthParam ?? 0;
|
|
825
|
+
if (text) {
|
|
826
|
+
// selector + text 组合:find 已实现 AND 过滤
|
|
827
|
+
const elements = await unifiedSession.find(selector, text, undefined);
|
|
828
|
+
if (nth >= elements.length)
|
|
829
|
+
return null;
|
|
830
|
+
return unifiedSession.getAttribute(undefined, elements[nth].refId, attribute);
|
|
831
|
+
}
|
|
737
832
|
if (nth > 0) {
|
|
738
|
-
|
|
833
|
+
const expr = '(function(s, n, a) { var els = document.querySelectorAll(s); ' +
|
|
834
|
+
'return n < els.length ? els[n].getAttribute(a) : null })';
|
|
835
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, attribute]);
|
|
739
836
|
}
|
|
740
837
|
return unifiedSession.getAttribute(selector, undefined, attribute);
|
|
741
838
|
}
|
|
742
839
|
return null;
|
|
743
840
|
}
|
|
841
|
+
/**
|
|
842
|
+
* Extension 模式:提取 computed style
|
|
843
|
+
*/
|
|
844
|
+
async function extractComputedStyleExtension(unifiedSession, selector, text, xpath, nth, prop) {
|
|
845
|
+
const elements = await unifiedSession.find(selector, text, xpath);
|
|
846
|
+
if (elements.length === 0 || nth >= elements.length) {
|
|
847
|
+
return null;
|
|
848
|
+
}
|
|
849
|
+
const refId = elements[nth].refId;
|
|
850
|
+
// 通过 Extension ISOLATED 世界执行(访问 __mcpElementMap),避免 MAIN 世界找不到 refId
|
|
851
|
+
return unifiedSession.getComputedStyle(refId, prop);
|
|
852
|
+
}
|
|
744
853
|
/**
|
|
745
854
|
* Extension 模式:等待目标元素出现
|
|
746
855
|
*
|
|
747
|
-
* 在 extract 操作前轮询 find()
|
|
748
|
-
* 用于实现 extract 的 timeout
|
|
856
|
+
* 在 extract 操作前轮询 find(),直到找到匹配元素或超时,
|
|
857
|
+
* 用于实现 extract 的 timeout 参数语义
|
|
749
858
|
*/
|
|
750
859
|
async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
751
860
|
const startTime = Date.now();
|
|
@@ -761,7 +870,7 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
|
761
870
|
}
|
|
762
871
|
if (!unifiedSession.isExtensionConnected()) {
|
|
763
872
|
lastError = new Error('Extension 未连接');
|
|
764
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
873
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
765
874
|
continue;
|
|
766
875
|
}
|
|
767
876
|
try {
|
|
@@ -773,16 +882,15 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
|
773
882
|
}
|
|
774
883
|
catch (err) {
|
|
775
884
|
// 暂时性错误(RPC 超时、发送失败、连接断开)可重试,其他确定性错误立即抛出
|
|
776
|
-
if (err instanceof
|
|
777
|
-
Error &&
|
|
885
|
+
if (err instanceof Error &&
|
|
778
886
|
/Request timeout|Failed to send|disconnect|未连接|stopped|replaced/i.test(err.message)) {
|
|
779
887
|
lastError = err;
|
|
780
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
888
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
781
889
|
continue;
|
|
782
890
|
}
|
|
783
891
|
throw err;
|
|
784
892
|
}
|
|
785
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
893
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
786
894
|
}
|
|
787
895
|
}
|
|
788
896
|
/**
|
|
@@ -790,7 +898,7 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
|
790
898
|
*/
|
|
791
899
|
export function registerExtractTool(server) {
|
|
792
900
|
server.registerTool('extract', {
|
|
793
|
-
description:
|
|
901
|
+
description: `提取页面内容:文本、HTML(可附带图片)、属性、截图、状态、页面元信息`,
|
|
794
902
|
inputSchema: extractSchema,
|
|
795
903
|
}, (args) => handleExtract(args));
|
|
796
904
|
}
|