@pyrokine/mcp-chrome 1.7.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -31
- package/dist/anti-detection/behavior.d.ts.map +1 -1
- package/dist/anti-detection/behavior.js.map +1 -1
- package/dist/anti-detection/index.d.ts +1 -1
- package/dist/anti-detection/index.d.ts.map +1 -1
- package/dist/anti-detection/index.js +1 -1
- package/dist/anti-detection/index.js.map +1 -1
- package/dist/anti-detection/injection.d.ts +6 -2
- package/dist/anti-detection/injection.d.ts.map +1 -1
- package/dist/anti-detection/injection.js +34 -80
- package/dist/anti-detection/injection.js.map +1 -1
- package/dist/cdp/client.d.ts +2 -2
- package/dist/cdp/client.d.ts.map +1 -1
- package/dist/cdp/client.js +8 -10
- package/dist/cdp/client.js.map +1 -1
- package/dist/cdp/index.d.ts.map +1 -1
- package/dist/cdp/index.js.map +1 -1
- package/dist/cdp/launcher.d.ts.map +1 -1
- package/dist/cdp/launcher.js +40 -13
- package/dist/cdp/launcher.js.map +1 -1
- package/dist/core/auto-wait.d.ts +2 -2
- package/dist/core/auto-wait.d.ts.map +1 -1
- package/dist/core/auto-wait.js +2 -2
- package/dist/core/auto-wait.js.map +1 -1
- package/dist/core/browser-driver.d.ts +307 -0
- package/dist/core/browser-driver.d.ts.map +1 -0
- package/dist/core/browser-driver.js +21 -0
- package/dist/core/browser-driver.js.map +1 -0
- package/dist/core/error-sanitizer.d.ts +25 -0
- package/dist/core/error-sanitizer.d.ts.map +1 -0
- package/dist/core/error-sanitizer.js +66 -0
- package/dist/core/error-sanitizer.js.map +1 -0
- package/dist/core/errors.d.ts +10 -1
- package/dist/core/errors.d.ts.map +1 -1
- package/dist/core/errors.js +17 -4
- package/dist/core/errors.js.map +1 -1
- package/dist/core/extension-errors.d.ts +20 -0
- package/dist/core/extension-errors.d.ts.map +1 -0
- package/dist/core/extension-errors.js +40 -0
- package/dist/core/extension-errors.js.map +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js.map +1 -1
- package/dist/core/locator.d.ts +2 -2
- package/dist/core/locator.d.ts.map +1 -1
- package/dist/core/locator.js +25 -65
- package/dist/core/locator.js.map +1 -1
- package/dist/core/retry.d.ts +2 -2
- package/dist/core/retry.d.ts.map +1 -1
- package/dist/core/retry.js +2 -2
- package/dist/core/retry.js.map +1 -1
- package/dist/core/session.d.ts +149 -46
- package/dist/core/session.d.ts.map +1 -1
- package/dist/core/session.js +673 -181
- package/dist/core/session.js.map +1 -1
- package/dist/core/types.d.ts +9 -3
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/types.js +13 -6
- package/dist/core/types.js.map +1 -1
- package/dist/core/unified-session.d.ts +46 -85
- package/dist/core/unified-session.d.ts.map +1 -1
- package/dist/core/unified-session.js +341 -650
- package/dist/core/unified-session.js.map +1 -1
- package/dist/core/utils.d.ts +7 -0
- package/dist/core/utils.d.ts.map +1 -0
- package/dist/core/utils.js +33 -0
- package/dist/core/utils.js.map +1 -0
- package/dist/extension/bridge.d.ts +69 -52
- package/dist/extension/bridge.d.ts.map +1 -1
- package/dist/extension/bridge.js +242 -111
- package/dist/extension/bridge.js.map +1 -1
- package/dist/extension/http-server.d.ts +6 -4
- package/dist/extension/http-server.d.ts.map +1 -1
- package/dist/extension/http-server.js +45 -31
- package/dist/extension/http-server.js.map +1 -1
- package/dist/extension/index.d.ts.map +1 -1
- package/dist/extension/index.js.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/tools/browse.d.ts.map +1 -1
- package/dist/tools/browse.js +32 -34
- package/dist/tools/browse.js.map +1 -1
- package/dist/tools/cookies.d.ts.map +1 -1
- package/dist/tools/cookies.js +38 -16
- package/dist/tools/cookies.js.map +1 -1
- package/dist/tools/evaluate.d.ts.map +1 -1
- package/dist/tools/evaluate.js +54 -23
- package/dist/tools/evaluate.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +221 -153
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/input.d.ts.map +1 -1
- package/dist/tools/input.js +281 -89
- package/dist/tools/input.js.map +1 -1
- package/dist/tools/logs.d.ts.map +1 -1
- package/dist/tools/logs.js +31 -17
- package/dist/tools/logs.js.map +1 -1
- package/dist/tools/manage.d.ts.map +1 -1
- package/dist/tools/manage.js +25 -28
- package/dist/tools/manage.js.map +1 -1
- package/dist/tools/schema.d.ts +1 -1
- package/dist/tools/schema.d.ts.map +1 -1
- package/dist/tools/schema.js +31 -55
- package/dist/tools/schema.js.map +1 -1
- package/dist/tools/wait.d.ts.map +1 -1
- package/dist/tools/wait.js +19 -16
- package/dist/tools/wait.js.map +1 -1
- package/package.json +48 -40
package/dist/tools/extract.js
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* - metadata: 页面元信息(title/og/jsonLd 等)
|
|
11
11
|
*/
|
|
12
12
|
import { mkdir, writeFile } from 'fs/promises';
|
|
13
|
-
import { basename, dirname, extname, join } from 'path';
|
|
13
|
+
import { basename, dirname, extname, join, resolve, sep } from 'path';
|
|
14
14
|
import { z } from 'zod';
|
|
15
15
|
import { formatErrorResponse, formatResponse, getSession, getUnifiedSession } from '../core/index.js';
|
|
16
16
|
import { targetToFindParams, targetZodSchema } from './schema.js';
|
|
@@ -20,22 +20,48 @@ const MAX_APPENDIX_IMAGES = 20;
|
|
|
20
20
|
* extract 参数 schema
|
|
21
21
|
*/
|
|
22
22
|
const extractSchema = z.object({
|
|
23
|
-
type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata'])
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata']).describe('提取类型'),
|
|
24
|
+
target: targetZodSchema
|
|
25
|
+
.optional()
|
|
26
|
+
.describe('目标元素(attribute 必填;text/html 可选,省略则提取整个页面;screenshot 可选用于元素截图;state 可选(仅 Extension)用于返回目标子树;metadata 不需要)'),
|
|
26
27
|
attribute: z.string().optional().describe('属性名(attribute)'),
|
|
27
|
-
images: z
|
|
28
|
+
images: z
|
|
29
|
+
.enum(['info', 'data'])
|
|
30
|
+
.optional()
|
|
31
|
+
.describe('图片提取模式(仅 html 类型有效),info: 元信息(src/alt/尺寸);data: 含图片数据'),
|
|
28
32
|
fullPage: z.boolean().optional().describe('是否全页面截图(screenshot)'),
|
|
29
|
-
scale: z
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
scale: z
|
|
34
|
+
.number()
|
|
35
|
+
.optional()
|
|
36
|
+
.describe('截图缩放比例(screenshot fullPage),默认 1,设为 0.5 可降低分辨率加速大页面截图'),
|
|
37
|
+
format: z
|
|
38
|
+
.enum(['png', 'jpeg', 'webp'])
|
|
39
|
+
.optional()
|
|
40
|
+
.describe('截图格式(screenshot),默认 png,jpeg/webp 体积更小,复杂页面推荐 jpeg 减少超时'),
|
|
41
|
+
quality: z
|
|
42
|
+
.number()
|
|
43
|
+
.min(0)
|
|
44
|
+
.max(100)
|
|
45
|
+
.optional()
|
|
46
|
+
.describe('截图质量(screenshot,仅 jpeg/webp 有效),0-100,推荐 80'),
|
|
47
|
+
output: z
|
|
48
|
+
.string()
|
|
33
49
|
.optional()
|
|
34
|
-
.describe('
|
|
35
|
-
tabId: z
|
|
50
|
+
.describe('输出文件路径(可选),若指定结果写入文件,否则返回内容,images=data 时作为输出目录路径'),
|
|
51
|
+
tabId: z
|
|
52
|
+
.string()
|
|
53
|
+
.optional()
|
|
54
|
+
.describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
|
|
36
55
|
depth: z.number().optional().describe('DOM 遍历深度限制(state),默认 15,减小可降低返回数据量'),
|
|
56
|
+
mode: z
|
|
57
|
+
.enum(['accessibility', 'domsnapshot'])
|
|
58
|
+
.optional()
|
|
59
|
+
.describe('页面状态提取模式(state 类型有效),accessibility=可访问性树(默认,与原 read_page 一致),domsnapshot=CDP DOMSnapshot 全量快照(仅 CDP 模式)'),
|
|
37
60
|
timeout: z.number().optional().describe('等待目标元素超时'),
|
|
38
|
-
frame: z
|
|
61
|
+
frame: z
|
|
62
|
+
.union([z.string(), z.number()])
|
|
63
|
+
.optional()
|
|
64
|
+
.describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
|
|
39
65
|
});
|
|
40
66
|
/**
|
|
41
67
|
* extract 工具处理器
|
|
@@ -98,36 +124,10 @@ async function handleExtract(args) {
|
|
|
98
124
|
}
|
|
99
125
|
case 'attribute': {
|
|
100
126
|
if (!args.target) {
|
|
101
|
-
return
|
|
102
|
-
content: [
|
|
103
|
-
{
|
|
104
|
-
type: 'text',
|
|
105
|
-
text: JSON.stringify({
|
|
106
|
-
error: {
|
|
107
|
-
code: 'INVALID_ARGUMENT',
|
|
108
|
-
message: 'attribute 提取需要 target 参数',
|
|
109
|
-
},
|
|
110
|
-
}),
|
|
111
|
-
},
|
|
112
|
-
],
|
|
113
|
-
isError: true,
|
|
114
|
-
};
|
|
127
|
+
return formatErrorResponse(new Error('attribute 提取需要 target 参数'));
|
|
115
128
|
}
|
|
116
129
|
if (!args.attribute) {
|
|
117
|
-
return
|
|
118
|
-
content: [
|
|
119
|
-
{
|
|
120
|
-
type: 'text',
|
|
121
|
-
text: JSON.stringify({
|
|
122
|
-
error: {
|
|
123
|
-
code: 'INVALID_ARGUMENT',
|
|
124
|
-
message: 'attribute 提取需要 attribute 参数',
|
|
125
|
-
},
|
|
126
|
-
}),
|
|
127
|
-
},
|
|
128
|
-
],
|
|
129
|
-
isError: true,
|
|
130
|
-
};
|
|
130
|
+
return formatErrorResponse(new Error('attribute 提取需要 attribute 参数'));
|
|
131
131
|
}
|
|
132
132
|
let value;
|
|
133
133
|
if (useExtension) {
|
|
@@ -147,84 +147,20 @@ async function handleExtract(args) {
|
|
|
147
147
|
// 有 target 时获取元素区域用于裁剪(支持所有 target 类型)
|
|
148
148
|
let clip;
|
|
149
149
|
if (args.target) {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
clip = rect;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
else {
|
|
163
|
-
const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
|
|
164
|
-
const nth = nthParam ?? 0;
|
|
165
|
-
const rect = await session.evaluate(`function(selector, text, xpath, nth) {
|
|
166
|
-
function toRect(el) {
|
|
167
|
-
var r = el.getBoundingClientRect();
|
|
168
|
-
return {x: r.x, y: r.y, width: r.width, height: r.height};
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
function findByXPath(xp, n) {
|
|
172
|
-
var r = document.evaluate(xp, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
173
|
-
return r.snapshotLength > n ? r.snapshotItem(n) : null;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
function findBySelector(sel, txt, n) {
|
|
177
|
-
var els = document.querySelectorAll(sel);
|
|
178
|
-
var matchCount = 0;
|
|
179
|
-
for (var i = 0; i < els.length; ++i) {
|
|
180
|
-
var el = els[i];
|
|
181
|
-
if (txt) {
|
|
182
|
-
var content = (el.textContent || '').trim();
|
|
183
|
-
if (!content.includes(txt)) continue;
|
|
184
|
-
}
|
|
185
|
-
if (matchCount < n) { ++matchCount; continue; }
|
|
186
|
-
return el;
|
|
187
|
-
}
|
|
188
|
-
return null;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
function findByText(txt, n) {
|
|
192
|
-
var root = document.body || document.documentElement;
|
|
193
|
-
if (!root) return null;
|
|
194
|
-
var walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
|
|
195
|
-
var matchCount = 0;
|
|
196
|
-
var el = walker.currentNode;
|
|
197
|
-
while (el) {
|
|
198
|
-
var content = (el.textContent || '').trim();
|
|
199
|
-
if (content && content.includes(txt)) {
|
|
200
|
-
if (matchCount < n) { ++matchCount; }
|
|
201
|
-
else { return el; }
|
|
202
|
-
}
|
|
203
|
-
el = walker.nextNode();
|
|
204
|
-
}
|
|
205
|
-
return null;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
var el = null;
|
|
209
|
-
if (xpath) {
|
|
210
|
-
el = findByXPath(xpath, nth);
|
|
211
|
-
} else if (selector) {
|
|
212
|
-
el = findBySelector(selector, text, nth);
|
|
213
|
-
} else if (text) {
|
|
214
|
-
el = findByText(text, nth);
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
return el ? toRect(el) : null;
|
|
218
|
-
}`, [selector ?? null, text ?? null, xpath ?? null, nth]);
|
|
219
|
-
if (rect && rect.width > 0 && rect.height > 0) {
|
|
150
|
+
const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
|
|
151
|
+
const nth = nthParam ?? 0;
|
|
152
|
+
// unified.find 内部根据 Extension 连接状态自动路由到 Extension/CDP 路径,
|
|
153
|
+
// 返回视口绝对坐标(含 iframe 坐标修正)
|
|
154
|
+
const found = await unifiedSession.find(selector, text, xpath);
|
|
155
|
+
if (found.length > nth) {
|
|
156
|
+
const rect = found[nth].rect;
|
|
157
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
220
158
|
clip = rect;
|
|
221
159
|
}
|
|
222
160
|
}
|
|
223
161
|
}
|
|
224
162
|
const base64 = await unifiedSession.screenshot({
|
|
225
|
-
fullPage: clip ?
|
|
226
|
-
false :
|
|
227
|
-
(args.fullPage ?? false),
|
|
163
|
+
fullPage: clip ? false : (args.fullPage ?? false),
|
|
228
164
|
scale: args.scale,
|
|
229
165
|
format: args.format,
|
|
230
166
|
quality: args.quality,
|
|
@@ -245,12 +181,52 @@ async function handleExtract(args) {
|
|
|
245
181
|
{
|
|
246
182
|
type: 'image',
|
|
247
183
|
data: base64,
|
|
248
|
-
mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : args.format ?? 'png'}`,
|
|
184
|
+
mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : (args.format ?? 'png')}`,
|
|
249
185
|
},
|
|
250
186
|
],
|
|
251
187
|
};
|
|
252
188
|
}
|
|
253
189
|
case 'state': {
|
|
190
|
+
// mode=domsnapshot:用 CDP DOMSnapshot.captureSnapshot 取全量快照(仅 CDP 模式)
|
|
191
|
+
if (args.mode === 'domsnapshot') {
|
|
192
|
+
if (useExtension) {
|
|
193
|
+
return {
|
|
194
|
+
content: [
|
|
195
|
+
{
|
|
196
|
+
type: 'text',
|
|
197
|
+
text: JSON.stringify({
|
|
198
|
+
error: {
|
|
199
|
+
code: 'INVALID_ARGUMENT',
|
|
200
|
+
message: 'mode=domsnapshot 仅 CDP 模式支持,Extension 模式请用默认 accessibility',
|
|
201
|
+
},
|
|
202
|
+
}),
|
|
203
|
+
},
|
|
204
|
+
],
|
|
205
|
+
isError: true,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
const snapshot = await unifiedSession.sendCdpCommand('DOMSnapshot.captureSnapshot', {
|
|
209
|
+
computedStyles: ['display', 'visibility', 'opacity'],
|
|
210
|
+
includePaintOrder: false,
|
|
211
|
+
includeDOMRects: true,
|
|
212
|
+
});
|
|
213
|
+
if (args.output) {
|
|
214
|
+
await writeOutputFile(args.output, JSON.stringify(snapshot, null, 2), 'utf-8');
|
|
215
|
+
return formatResponse({
|
|
216
|
+
success: true,
|
|
217
|
+
type: 'state',
|
|
218
|
+
mode: 'domsnapshot',
|
|
219
|
+
output: args.output,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
return formatResponse({
|
|
223
|
+
success: true,
|
|
224
|
+
type: 'state',
|
|
225
|
+
mode: 'domsnapshot',
|
|
226
|
+
snapshot,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
// 默认:accessibility 树(原行为)
|
|
254
230
|
// 有 target 时获取子树的无障碍状态
|
|
255
231
|
let refId;
|
|
256
232
|
if (args.target && useExtension) {
|
|
@@ -323,10 +299,15 @@ async function handleExtract(args) {
|
|
|
323
299
|
}
|
|
324
300
|
}
|
|
325
301
|
// ==================== HTML + 图片提取 ====================
|
|
326
|
-
/**
|
|
302
|
+
/** 写入文件前自动创建父目录(验证路径在 cwd 范围内)*/
|
|
327
303
|
async function writeOutputFile(path, data, encoding) {
|
|
328
|
-
|
|
329
|
-
|
|
304
|
+
const cwd = process.cwd();
|
|
305
|
+
const safePath = resolve(cwd, path);
|
|
306
|
+
if (!safePath.startsWith(cwd + sep) && safePath !== cwd) {
|
|
307
|
+
throw new Error(`output 路径超出工作目录范围: ${path}`);
|
|
308
|
+
}
|
|
309
|
+
await mkdir(dirname(safePath), { recursive: true });
|
|
310
|
+
await writeFile(safePath, data, encoding);
|
|
330
311
|
}
|
|
331
312
|
/**
|
|
332
313
|
* 处理 html + images 提取
|
|
@@ -350,7 +331,11 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
|
|
|
350
331
|
var images = [];
|
|
351
332
|
for (var i = 0; i < imgList.length; i++) {
|
|
352
333
|
var img = imgList[i];
|
|
353
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
334
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
335
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
336
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
337
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
338
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
354
339
|
}
|
|
355
340
|
return {html: html, images: images};
|
|
356
341
|
})`, undefined, undefined, [selector, nth]);
|
|
@@ -381,14 +366,19 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
|
|
|
381
366
|
const appendixMode = !args.output;
|
|
382
367
|
const imageDataList = await fetchImageData(unifiedSession, result.images, appendixMode ? MAX_APPENDIX_IMAGES : undefined);
|
|
383
368
|
if (args.output) {
|
|
369
|
+
const cwd2 = process.cwd();
|
|
370
|
+
const safeOutputDir = resolve(cwd2, args.output);
|
|
371
|
+
if (!safeOutputDir.startsWith(cwd2 + sep) && safeOutputDir !== cwd2) {
|
|
372
|
+
return formatErrorResponse(new Error(`output 路径超出工作目录范围: ${args.output}`));
|
|
373
|
+
}
|
|
384
374
|
// 写入目录
|
|
385
|
-
await writeImageDirectory(
|
|
375
|
+
await writeImageDirectory(safeOutputDir, result.html, result.images, imageDataList);
|
|
386
376
|
return formatResponse({
|
|
387
377
|
success: true,
|
|
388
378
|
type: 'html',
|
|
389
|
-
output:
|
|
379
|
+
output: safeOutputDir,
|
|
390
380
|
imageCount: result.images.length,
|
|
391
|
-
index: join(
|
|
381
|
+
index: join(safeOutputDir, 'index.json'),
|
|
392
382
|
});
|
|
393
383
|
}
|
|
394
384
|
// 无 output:MCP 附录方式返回
|
|
@@ -408,7 +398,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
|
|
|
408
398
|
var images = [];
|
|
409
399
|
for (var i = 0; i < imgList.length; i++) {
|
|
410
400
|
var img = imgList[i];
|
|
411
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
401
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
402
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
403
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
404
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
405
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
412
406
|
}
|
|
413
407
|
return {html: html, images: images};
|
|
414
408
|
}`);
|
|
@@ -419,7 +413,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
|
|
|
419
413
|
var images = [];
|
|
420
414
|
for (var i = 0; i < imgs.length; i++) {
|
|
421
415
|
var img = imgs[i];
|
|
422
|
-
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
416
|
+
images.push({index: i, src: img.src, dataSrc: (function() {
|
|
417
|
+
var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
|
|
418
|
+
if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
|
|
419
|
+
})(), alt: img.alt, width: img.width, height: img.height,
|
|
420
|
+
naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
|
|
423
421
|
}
|
|
424
422
|
return {html: html, images: images};
|
|
425
423
|
})()`);
|
|
@@ -448,10 +446,10 @@ async function fetchImageData(unifiedSession, images, limit) {
|
|
|
448
446
|
continue;
|
|
449
447
|
}
|
|
450
448
|
if (effectiveSrc.startsWith('data:')) {
|
|
451
|
-
const match = effectiveSrc.match(/^data:([^;]+);base64,(
|
|
452
|
-
preResolved.push(match
|
|
453
|
-
{ base64: match
|
|
454
|
-
{ base64: null, mimeType: 'image/png' });
|
|
449
|
+
const match = effectiveSrc.match(/^data:(?<mime>[^;]+);base64,(?<data>.+)$/);
|
|
450
|
+
preResolved.push(match
|
|
451
|
+
? { base64: match.groups.data, mimeType: match.groups.mime }
|
|
452
|
+
: { base64: null, mimeType: 'image/png' });
|
|
455
453
|
continue;
|
|
456
454
|
}
|
|
457
455
|
if (!effectiveSrc.startsWith('http')) {
|
|
@@ -591,7 +589,7 @@ function buildImageAppendixResponse(html, images, imageDataList) {
|
|
|
591
589
|
if (images.length > MAX_APPENDIX_IMAGES) {
|
|
592
590
|
content.push({
|
|
593
591
|
type: 'text',
|
|
594
|
-
text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES}
|
|
592
|
+
text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES} 张附带数据,使用 output 参数导出全部图片)`,
|
|
595
593
|
});
|
|
596
594
|
}
|
|
597
595
|
return { content };
|
|
@@ -607,10 +605,14 @@ function guessMimeType(url) {
|
|
|
607
605
|
return 'image/png';
|
|
608
606
|
}
|
|
609
607
|
const map = {
|
|
610
|
-
'.jpg': 'image/jpeg',
|
|
611
|
-
'.
|
|
612
|
-
'.
|
|
613
|
-
'.
|
|
608
|
+
'.jpg': 'image/jpeg',
|
|
609
|
+
'.jpeg': 'image/jpeg',
|
|
610
|
+
'.png': 'image/png',
|
|
611
|
+
'.gif': 'image/gif',
|
|
612
|
+
'.webp': 'image/webp',
|
|
613
|
+
'.svg': 'image/svg+xml',
|
|
614
|
+
'.ico': 'image/x-icon',
|
|
615
|
+
'.bmp': 'image/bmp',
|
|
614
616
|
'.avif': 'image/avif',
|
|
615
617
|
};
|
|
616
618
|
return map[ext] ?? 'image/png';
|
|
@@ -618,10 +620,14 @@ function guessMimeType(url) {
|
|
|
618
620
|
/** MIME 类型转文件扩展名 */
|
|
619
621
|
function mimeToExt(mimeType) {
|
|
620
622
|
const map = {
|
|
621
|
-
'image/jpeg': '.jpg',
|
|
622
|
-
'image/
|
|
623
|
-
'image/
|
|
624
|
-
'image/
|
|
623
|
+
'image/jpeg': '.jpg',
|
|
624
|
+
'image/png': '.png',
|
|
625
|
+
'image/gif': '.gif',
|
|
626
|
+
'image/webp': '.webp',
|
|
627
|
+
'image/svg+xml': '.svg',
|
|
628
|
+
'image/x-icon': '.ico',
|
|
629
|
+
'image/bmp': '.bmp',
|
|
630
|
+
'image/avif': '.avif',
|
|
625
631
|
};
|
|
626
632
|
return map[mimeType] ?? '.png';
|
|
627
633
|
}
|
|
@@ -696,20 +702,42 @@ async function extractTextExtension(unifiedSession, target) {
|
|
|
696
702
|
if (!target) {
|
|
697
703
|
return unifiedSession.getText();
|
|
698
704
|
}
|
|
705
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
706
|
+
const expr = '(function(x, y) { var el = document.elementFromPoint(x, y); ' +
|
|
707
|
+
"return el ? (el.textContent || '') : '' })";
|
|
708
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y]);
|
|
709
|
+
}
|
|
699
710
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
700
711
|
const nth = nthParam ?? 0;
|
|
701
712
|
if (selector) {
|
|
713
|
+
if (text) {
|
|
714
|
+
const expr = '(function(s, t, n) { var els = Array.from(document.querySelectorAll(s))' +
|
|
715
|
+
'.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
|
|
716
|
+
"return n < els.length ? (els[n].textContent || '') : '' })";
|
|
717
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth]);
|
|
718
|
+
}
|
|
702
719
|
if (nth > 0) {
|
|
703
|
-
|
|
720
|
+
const expr = '(function(s, n) { var els = document.querySelectorAll(s); ' +
|
|
721
|
+
"return n < els.length ? (els[n].textContent || '') : '' })";
|
|
722
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth]);
|
|
704
723
|
}
|
|
705
724
|
return unifiedSession.getText(selector);
|
|
706
725
|
}
|
|
707
726
|
// xpath/text 定位:通过 evaluate 在页面上下文中查找
|
|
708
727
|
if (xpath) {
|
|
709
|
-
|
|
728
|
+
const expr = '(function(xp, n) { var r = document.evaluate(xp, document, null, ' +
|
|
729
|
+
'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
|
|
730
|
+
"return n < r.snapshotLength ? (r.snapshotItem(n).textContent || '') : '' })";
|
|
731
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth]);
|
|
710
732
|
}
|
|
711
733
|
if (text) {
|
|
712
|
-
|
|
734
|
+
const expr = '(function(t, n) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
|
|
735
|
+
'var found = []; var seen = new WeakSet(); var node; ' +
|
|
736
|
+
'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
|
|
737
|
+
'&& node.parentElement && !seen.has(node.parentElement)) { ' +
|
|
738
|
+
'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
|
|
739
|
+
"return n < found.length ? (found[n].textContent || '') : '' })";
|
|
740
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [text, nth]);
|
|
713
741
|
}
|
|
714
742
|
return unifiedSession.getText();
|
|
715
743
|
}
|
|
@@ -721,20 +749,41 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
|
|
|
721
749
|
if (!target) {
|
|
722
750
|
return unifiedSession.getHtml(undefined, outer);
|
|
723
751
|
}
|
|
752
|
+
const prop = outer ? 'outerHTML' : 'innerHTML';
|
|
753
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
754
|
+
const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' + "return el ? (el[p] || '') : '' })";
|
|
755
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
|
|
756
|
+
}
|
|
724
757
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
725
758
|
const nth = nthParam ?? 0;
|
|
726
|
-
const prop = outer ? 'outerHTML' : 'innerHTML';
|
|
727
759
|
if (selector) {
|
|
760
|
+
if (text) {
|
|
761
|
+
const expr = '(function(s, t, n, p) { var els = Array.from(document.querySelectorAll(s))' +
|
|
762
|
+
'.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
|
|
763
|
+
"return n < els.length ? (els[n][p] || '') : '' })";
|
|
764
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth, prop]);
|
|
765
|
+
}
|
|
728
766
|
if (nth > 0) {
|
|
729
|
-
|
|
767
|
+
const expr = '(function(s, n, p) { var els = document.querySelectorAll(s); ' +
|
|
768
|
+
"return n < els.length ? (els[n][p] || '') : '' })";
|
|
769
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, prop]);
|
|
730
770
|
}
|
|
731
771
|
return unifiedSession.getHtml(selector, outer);
|
|
732
772
|
}
|
|
733
773
|
if (xpath) {
|
|
734
|
-
|
|
774
|
+
const expr = '(function(xp, n, p) { var r = document.evaluate(xp, document, null, ' +
|
|
775
|
+
'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
|
|
776
|
+
"return n < r.snapshotLength ? (r.snapshotItem(n)[p] || '') : '' })";
|
|
777
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth, prop]);
|
|
735
778
|
}
|
|
736
779
|
if (text) {
|
|
737
|
-
|
|
780
|
+
const expr = '(function(t, n, p) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
|
|
781
|
+
'var found = []; var seen = new WeakSet(); var node; ' +
|
|
782
|
+
'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
|
|
783
|
+
'&& node.parentElement && !seen.has(node.parentElement)) { ' +
|
|
784
|
+
'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
|
|
785
|
+
"return n < found.length ? (found[n][p] || '') : '' })";
|
|
786
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [text, nth, prop]);
|
|
738
787
|
}
|
|
739
788
|
return unifiedSession.getHtml(undefined, outer);
|
|
740
789
|
}
|
|
@@ -742,14 +791,25 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
|
|
|
742
791
|
* Extension 模式:提取属性
|
|
743
792
|
*/
|
|
744
793
|
async function extractAttributeExtension(unifiedSession, target, attribute) {
|
|
794
|
+
if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
|
|
795
|
+
if (attribute.startsWith('computed:')) {
|
|
796
|
+
const prop = attribute.slice('computed:'.length);
|
|
797
|
+
const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' +
|
|
798
|
+
'return el ? window.getComputedStyle(el).getPropertyValue(p) : null })';
|
|
799
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
|
|
800
|
+
}
|
|
801
|
+
const expr = '(function(x, y, a) { var el = document.elementFromPoint(x, y); ' +
|
|
802
|
+
'return el ? el.getAttribute(a) : null })';
|
|
803
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, attribute]);
|
|
804
|
+
}
|
|
745
805
|
const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
|
|
746
806
|
// computed style: computed:color → getComputedStyle(el)
|
|
747
807
|
if (attribute.startsWith('computed:')) {
|
|
748
808
|
const prop = attribute.slice('computed:'.length);
|
|
749
809
|
return extractComputedStyleExtension(unifiedSession, selector, text, xpath, nthParam ?? 0, prop);
|
|
750
810
|
}
|
|
751
|
-
// xpath
|
|
752
|
-
if (xpath || text) {
|
|
811
|
+
// xpath 定位(含 text+xpath)或 text 且无 selector 时:先 find 得到 refId,再获取属性
|
|
812
|
+
if (xpath || (text && !selector)) {
|
|
753
813
|
const elements = await unifiedSession.find(selector, text, xpath);
|
|
754
814
|
if (elements.length > 0) {
|
|
755
815
|
const nth = nthParam ?? 0;
|
|
@@ -762,8 +822,17 @@ async function extractAttributeExtension(unifiedSession, target, attribute) {
|
|
|
762
822
|
}
|
|
763
823
|
if (selector) {
|
|
764
824
|
const nth = nthParam ?? 0;
|
|
825
|
+
if (text) {
|
|
826
|
+
// selector + text 组合:find 已实现 AND 过滤
|
|
827
|
+
const elements = await unifiedSession.find(selector, text, undefined);
|
|
828
|
+
if (nth >= elements.length)
|
|
829
|
+
return null;
|
|
830
|
+
return unifiedSession.getAttribute(undefined, elements[nth].refId, attribute);
|
|
831
|
+
}
|
|
765
832
|
if (nth > 0) {
|
|
766
|
-
|
|
833
|
+
const expr = '(function(s, n, a) { var els = document.querySelectorAll(s); ' +
|
|
834
|
+
'return n < els.length ? els[n].getAttribute(a) : null })';
|
|
835
|
+
return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, attribute]);
|
|
767
836
|
}
|
|
768
837
|
return unifiedSession.getAttribute(selector, undefined, attribute);
|
|
769
838
|
}
|
|
@@ -784,8 +853,8 @@ async function extractComputedStyleExtension(unifiedSession, selector, text, xpa
|
|
|
784
853
|
/**
|
|
785
854
|
* Extension 模式:等待目标元素出现
|
|
786
855
|
*
|
|
787
|
-
* 在 extract 操作前轮询 find()
|
|
788
|
-
* 用于实现 extract 的 timeout
|
|
856
|
+
* 在 extract 操作前轮询 find(),直到找到匹配元素或超时,
|
|
857
|
+
* 用于实现 extract 的 timeout 参数语义
|
|
789
858
|
*/
|
|
790
859
|
async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
791
860
|
const startTime = Date.now();
|
|
@@ -801,7 +870,7 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
|
801
870
|
}
|
|
802
871
|
if (!unifiedSession.isExtensionConnected()) {
|
|
803
872
|
lastError = new Error('Extension 未连接');
|
|
804
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
873
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
805
874
|
continue;
|
|
806
875
|
}
|
|
807
876
|
try {
|
|
@@ -813,16 +882,15 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
|
|
|
813
882
|
}
|
|
814
883
|
catch (err) {
|
|
815
884
|
// 暂时性错误(RPC 超时、发送失败、连接断开)可重试,其他确定性错误立即抛出
|
|
816
|
-
if (err instanceof
|
|
817
|
-
Error &&
|
|
885
|
+
if (err instanceof Error &&
|
|
818
886
|
/Request timeout|Failed to send|disconnect|未连接|stopped|replaced/i.test(err.message)) {
|
|
819
887
|
lastError = err;
|
|
820
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
888
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
821
889
|
continue;
|
|
822
890
|
}
|
|
823
891
|
throw err;
|
|
824
892
|
}
|
|
825
|
-
await new Promise(r => setTimeout(r, retryDelay));
|
|
893
|
+
await new Promise((r) => setTimeout(r, retryDelay));
|
|
826
894
|
}
|
|
827
895
|
}
|
|
828
896
|
/**
|