@pyrokine/mcp-chrome 1.7.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +71 -31
  2. package/dist/anti-detection/behavior.d.ts.map +1 -1
  3. package/dist/anti-detection/behavior.js.map +1 -1
  4. package/dist/anti-detection/index.d.ts +1 -1
  5. package/dist/anti-detection/index.d.ts.map +1 -1
  6. package/dist/anti-detection/index.js +1 -1
  7. package/dist/anti-detection/index.js.map +1 -1
  8. package/dist/anti-detection/injection.d.ts +6 -2
  9. package/dist/anti-detection/injection.d.ts.map +1 -1
  10. package/dist/anti-detection/injection.js +34 -80
  11. package/dist/anti-detection/injection.js.map +1 -1
  12. package/dist/cdp/client.d.ts +2 -2
  13. package/dist/cdp/client.d.ts.map +1 -1
  14. package/dist/cdp/client.js +8 -10
  15. package/dist/cdp/client.js.map +1 -1
  16. package/dist/cdp/index.d.ts.map +1 -1
  17. package/dist/cdp/index.js.map +1 -1
  18. package/dist/cdp/launcher.d.ts.map +1 -1
  19. package/dist/cdp/launcher.js +40 -13
  20. package/dist/cdp/launcher.js.map +1 -1
  21. package/dist/core/auto-wait.d.ts +2 -2
  22. package/dist/core/auto-wait.d.ts.map +1 -1
  23. package/dist/core/auto-wait.js +2 -2
  24. package/dist/core/auto-wait.js.map +1 -1
  25. package/dist/core/browser-driver.d.ts +307 -0
  26. package/dist/core/browser-driver.d.ts.map +1 -0
  27. package/dist/core/browser-driver.js +21 -0
  28. package/dist/core/browser-driver.js.map +1 -0
  29. package/dist/core/error-sanitizer.d.ts +25 -0
  30. package/dist/core/error-sanitizer.d.ts.map +1 -0
  31. package/dist/core/error-sanitizer.js +66 -0
  32. package/dist/core/error-sanitizer.js.map +1 -0
  33. package/dist/core/errors.d.ts +10 -1
  34. package/dist/core/errors.d.ts.map +1 -1
  35. package/dist/core/errors.js +17 -4
  36. package/dist/core/errors.js.map +1 -1
  37. package/dist/core/extension-errors.d.ts +20 -0
  38. package/dist/core/extension-errors.d.ts.map +1 -0
  39. package/dist/core/extension-errors.js +40 -0
  40. package/dist/core/extension-errors.js.map +1 -0
  41. package/dist/core/index.d.ts.map +1 -1
  42. package/dist/core/index.js.map +1 -1
  43. package/dist/core/locator.d.ts +2 -2
  44. package/dist/core/locator.d.ts.map +1 -1
  45. package/dist/core/locator.js +25 -65
  46. package/dist/core/locator.js.map +1 -1
  47. package/dist/core/retry.d.ts +2 -2
  48. package/dist/core/retry.d.ts.map +1 -1
  49. package/dist/core/retry.js +2 -2
  50. package/dist/core/retry.js.map +1 -1
  51. package/dist/core/session.d.ts +149 -46
  52. package/dist/core/session.d.ts.map +1 -1
  53. package/dist/core/session.js +673 -181
  54. package/dist/core/session.js.map +1 -1
  55. package/dist/core/types.d.ts +9 -3
  56. package/dist/core/types.d.ts.map +1 -1
  57. package/dist/core/types.js +13 -6
  58. package/dist/core/types.js.map +1 -1
  59. package/dist/core/unified-session.d.ts +46 -85
  60. package/dist/core/unified-session.d.ts.map +1 -1
  61. package/dist/core/unified-session.js +341 -650
  62. package/dist/core/unified-session.js.map +1 -1
  63. package/dist/core/utils.d.ts +7 -0
  64. package/dist/core/utils.d.ts.map +1 -0
  65. package/dist/core/utils.js +33 -0
  66. package/dist/core/utils.js.map +1 -0
  67. package/dist/extension/bridge.d.ts +69 -52
  68. package/dist/extension/bridge.d.ts.map +1 -1
  69. package/dist/extension/bridge.js +242 -111
  70. package/dist/extension/bridge.js.map +1 -1
  71. package/dist/extension/http-server.d.ts +6 -4
  72. package/dist/extension/http-server.d.ts.map +1 -1
  73. package/dist/extension/http-server.js +45 -31
  74. package/dist/extension/http-server.js.map +1 -1
  75. package/dist/extension/index.d.ts.map +1 -1
  76. package/dist/extension/index.js.map +1 -1
  77. package/dist/index.js +3 -1
  78. package/dist/index.js.map +1 -1
  79. package/dist/tools/browse.d.ts.map +1 -1
  80. package/dist/tools/browse.js +32 -34
  81. package/dist/tools/browse.js.map +1 -1
  82. package/dist/tools/cookies.d.ts.map +1 -1
  83. package/dist/tools/cookies.js +38 -16
  84. package/dist/tools/cookies.js.map +1 -1
  85. package/dist/tools/evaluate.d.ts.map +1 -1
  86. package/dist/tools/evaluate.js +54 -23
  87. package/dist/tools/evaluate.js.map +1 -1
  88. package/dist/tools/extract.d.ts.map +1 -1
  89. package/dist/tools/extract.js +221 -153
  90. package/dist/tools/extract.js.map +1 -1
  91. package/dist/tools/index.d.ts.map +1 -1
  92. package/dist/tools/index.js.map +1 -1
  93. package/dist/tools/input.d.ts.map +1 -1
  94. package/dist/tools/input.js +281 -89
  95. package/dist/tools/input.js.map +1 -1
  96. package/dist/tools/logs.d.ts.map +1 -1
  97. package/dist/tools/logs.js +31 -17
  98. package/dist/tools/logs.js.map +1 -1
  99. package/dist/tools/manage.d.ts.map +1 -1
  100. package/dist/tools/manage.js +25 -28
  101. package/dist/tools/manage.js.map +1 -1
  102. package/dist/tools/schema.d.ts +1 -1
  103. package/dist/tools/schema.d.ts.map +1 -1
  104. package/dist/tools/schema.js +31 -55
  105. package/dist/tools/schema.js.map +1 -1
  106. package/dist/tools/wait.d.ts.map +1 -1
  107. package/dist/tools/wait.js +19 -16
  108. package/dist/tools/wait.js.map +1 -1
  109. package/package.json +48 -40
@@ -10,7 +10,7 @@
10
10
  * - metadata: 页面元信息(title/og/jsonLd 等)
11
11
  */
12
12
  import { mkdir, writeFile } from 'fs/promises';
13
- import { basename, dirname, extname, join } from 'path';
13
+ import { basename, dirname, extname, join, resolve, sep } from 'path';
14
14
  import { z } from 'zod';
15
15
  import { formatErrorResponse, formatResponse, getSession, getUnifiedSession } from '../core/index.js';
16
16
  import { targetToFindParams, targetZodSchema } from './schema.js';
@@ -20,22 +20,48 @@ const MAX_APPENDIX_IMAGES = 20;
20
20
  * extract 参数 schema
21
21
  */
22
22
  const extractSchema = z.object({
23
- type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata'])
24
- .describe('提取类型'),
25
- target: targetZodSchema.optional().describe('目标元素(attribute 必填;text/html 可选,省略则提取整个页面;screenshot 可选用于元素截图;state 可选(仅 Extension)用于返回目标子树;metadata 不需要)'),
23
+ type: z.enum(['text', 'html', 'attribute', 'screenshot', 'state', 'metadata']).describe('提取类型'),
24
+ target: targetZodSchema
25
+ .optional()
26
+ .describe('目标元素(attribute 必填;text/html 可选,省略则提取整个页面;screenshot 可选用于元素截图;state 可选(仅 Extension)用于返回目标子树;metadata 不需要)'),
26
27
  attribute: z.string().optional().describe('属性名(attribute)'),
27
- images: z.enum(['info', 'data']).optional().describe('图片提取模式(仅 html 类型有效)。info: 元信息(src/alt/尺寸);data: 含图片数据'),
28
+ images: z
29
+ .enum(['info', 'data'])
30
+ .optional()
31
+ .describe('图片提取模式(仅 html 类型有效),info: 元信息(src/alt/尺寸);data: 含图片数据'),
28
32
  fullPage: z.boolean().optional().describe('是否全页面截图(screenshot)'),
29
- scale: z.number().optional().describe('截图缩放比例(screenshot fullPage)。默认 1,设为 0.5 可降低分辨率加速大页面截图'),
30
- format: z.enum(['png', 'jpeg', 'webp']).optional().describe('截图格式(screenshot)。默认 png,jpeg/webp 体积更小,复杂页面推荐 jpeg 减少超时'),
31
- quality: z.number().min(0).max(100).optional().describe('截图质量(screenshot,仅 jpeg/webp 有效)。0-100,推荐 80'),
32
- output: z.string()
33
+ scale: z
34
+ .number()
35
+ .optional()
36
+ .describe('截图缩放比例(screenshot fullPage),默认 1,设为 0.5 可降低分辨率加速大页面截图'),
37
+ format: z
38
+ .enum(['png', 'jpeg', 'webp'])
39
+ .optional()
40
+ .describe('截图格式(screenshot),默认 png,jpeg/webp 体积更小,复杂页面推荐 jpeg 减少超时'),
41
+ quality: z
42
+ .number()
43
+ .min(0)
44
+ .max(100)
45
+ .optional()
46
+ .describe('截图质量(screenshot,仅 jpeg/webp 有效),0-100,推荐 80'),
47
+ output: z
48
+ .string()
33
49
  .optional()
34
- .describe('输出文件路径(可选)。若指定,结果写入文件;否则返回内容。images=data 时作为输出目录路径'),
35
- tabId: z.string().optional().describe('目标 Tab ID(可选,仅 Extension 模式)。不指定则使用当前 attach 的 tab。可操作非当前 attach 的 tab。CDP 模式下不支持此参数'),
50
+ .describe('输出文件路径(可选),若指定结果写入文件,否则返回内容,images=data 时作为输出目录路径'),
51
+ tabId: z
52
+ .string()
53
+ .optional()
54
+ .describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
36
55
  depth: z.number().optional().describe('DOM 遍历深度限制(state),默认 15,减小可降低返回数据量'),
56
+ mode: z
57
+ .enum(['accessibility', 'domsnapshot'])
58
+ .optional()
59
+ .describe('页面状态提取模式(state 类型有效),accessibility=可访问性树(默认,与原 read_page 一致),domsnapshot=CDP DOMSnapshot 全量快照(仅 CDP 模式)'),
37
60
  timeout: z.number().optional().describe('等待目标元素超时'),
38
- frame: z.union([z.string(), z.number()]).optional().describe('iframe 定位(可选,仅 Extension 模式)。CSS 选择器(如 "iframe#main")或索引(如 0)。不指定则在主框架操作'),
61
+ frame: z
62
+ .union([z.string(), z.number()])
63
+ .optional()
64
+ .describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
39
65
  });
40
66
  /**
41
67
  * extract 工具处理器
@@ -98,36 +124,10 @@ async function handleExtract(args) {
98
124
  }
99
125
  case 'attribute': {
100
126
  if (!args.target) {
101
- return {
102
- content: [
103
- {
104
- type: 'text',
105
- text: JSON.stringify({
106
- error: {
107
- code: 'INVALID_ARGUMENT',
108
- message: 'attribute 提取需要 target 参数',
109
- },
110
- }),
111
- },
112
- ],
113
- isError: true,
114
- };
127
+ return formatErrorResponse(new Error('attribute 提取需要 target 参数'));
115
128
  }
116
129
  if (!args.attribute) {
117
- return {
118
- content: [
119
- {
120
- type: 'text',
121
- text: JSON.stringify({
122
- error: {
123
- code: 'INVALID_ARGUMENT',
124
- message: 'attribute 提取需要 attribute 参数',
125
- },
126
- }),
127
- },
128
- ],
129
- isError: true,
130
- };
130
+ return formatErrorResponse(new Error('attribute 提取需要 attribute 参数'));
131
131
  }
132
132
  let value;
133
133
  if (useExtension) {
@@ -147,84 +147,20 @@ async function handleExtract(args) {
147
147
  // 有 target 时获取元素区域用于裁剪(支持所有 target 类型)
148
148
  let clip;
149
149
  if (args.target) {
150
- if (useExtension) {
151
- const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
152
- const nth = nthParam ?? 0;
153
- const found = await unifiedSession.find(selector, text, xpath);
154
- if (found.length > nth) {
155
- const rect = found[nth].rect;
156
- if (rect.width > 0 && rect.height > 0) {
157
- // find() 返回视口绝对坐标(已包含 iframe 坐标修正)
158
- clip = rect;
159
- }
160
- }
161
- }
162
- else {
163
- const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
164
- const nth = nthParam ?? 0;
165
- const rect = await session.evaluate(`function(selector, text, xpath, nth) {
166
- function toRect(el) {
167
- var r = el.getBoundingClientRect();
168
- return {x: r.x, y: r.y, width: r.width, height: r.height};
169
- }
170
-
171
- function findByXPath(xp, n) {
172
- var r = document.evaluate(xp, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
173
- return r.snapshotLength > n ? r.snapshotItem(n) : null;
174
- }
175
-
176
- function findBySelector(sel, txt, n) {
177
- var els = document.querySelectorAll(sel);
178
- var matchCount = 0;
179
- for (var i = 0; i < els.length; ++i) {
180
- var el = els[i];
181
- if (txt) {
182
- var content = (el.textContent || '').trim();
183
- if (!content.includes(txt)) continue;
184
- }
185
- if (matchCount < n) { ++matchCount; continue; }
186
- return el;
187
- }
188
- return null;
189
- }
190
-
191
- function findByText(txt, n) {
192
- var root = document.body || document.documentElement;
193
- if (!root) return null;
194
- var walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
195
- var matchCount = 0;
196
- var el = walker.currentNode;
197
- while (el) {
198
- var content = (el.textContent || '').trim();
199
- if (content && content.includes(txt)) {
200
- if (matchCount < n) { ++matchCount; }
201
- else { return el; }
202
- }
203
- el = walker.nextNode();
204
- }
205
- return null;
206
- }
207
-
208
- var el = null;
209
- if (xpath) {
210
- el = findByXPath(xpath, nth);
211
- } else if (selector) {
212
- el = findBySelector(selector, text, nth);
213
- } else if (text) {
214
- el = findByText(text, nth);
215
- }
216
-
217
- return el ? toRect(el) : null;
218
- }`, [selector ?? null, text ?? null, xpath ?? null, nth]);
219
- if (rect && rect.width > 0 && rect.height > 0) {
150
+ const { selector, text, xpath, nth: nthParam, } = targetToFindParams(args.target);
151
+ const nth = nthParam ?? 0;
152
+ // unified.find 内部根据 Extension 连接状态自动路由到 Extension/CDP 路径,
153
+ // 返回视口绝对坐标(含 iframe 坐标修正)
154
+ const found = await unifiedSession.find(selector, text, xpath);
155
+ if (found.length > nth) {
156
+ const rect = found[nth].rect;
157
+ if (rect.width > 0 && rect.height > 0) {
220
158
  clip = rect;
221
159
  }
222
160
  }
223
161
  }
224
162
  const base64 = await unifiedSession.screenshot({
225
- fullPage: clip ?
226
- false :
227
- (args.fullPage ?? false),
163
+ fullPage: clip ? false : (args.fullPage ?? false),
228
164
  scale: args.scale,
229
165
  format: args.format,
230
166
  quality: args.quality,
@@ -245,12 +181,52 @@ async function handleExtract(args) {
245
181
  {
246
182
  type: 'image',
247
183
  data: base64,
248
- mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : args.format ?? 'png'}`,
184
+ mimeType: `image/${args.format === 'jpeg' ? 'jpeg' : (args.format ?? 'png')}`,
249
185
  },
250
186
  ],
251
187
  };
252
188
  }
253
189
  case 'state': {
190
+ // mode=domsnapshot:用 CDP DOMSnapshot.captureSnapshot 取全量快照(仅 CDP 模式)
191
+ if (args.mode === 'domsnapshot') {
192
+ if (useExtension) {
193
+ return {
194
+ content: [
195
+ {
196
+ type: 'text',
197
+ text: JSON.stringify({
198
+ error: {
199
+ code: 'INVALID_ARGUMENT',
200
+ message: 'mode=domsnapshot 仅 CDP 模式支持,Extension 模式请用默认 accessibility',
201
+ },
202
+ }),
203
+ },
204
+ ],
205
+ isError: true,
206
+ };
207
+ }
208
+ const snapshot = await unifiedSession.sendCdpCommand('DOMSnapshot.captureSnapshot', {
209
+ computedStyles: ['display', 'visibility', 'opacity'],
210
+ includePaintOrder: false,
211
+ includeDOMRects: true,
212
+ });
213
+ if (args.output) {
214
+ await writeOutputFile(args.output, JSON.stringify(snapshot, null, 2), 'utf-8');
215
+ return formatResponse({
216
+ success: true,
217
+ type: 'state',
218
+ mode: 'domsnapshot',
219
+ output: args.output,
220
+ });
221
+ }
222
+ return formatResponse({
223
+ success: true,
224
+ type: 'state',
225
+ mode: 'domsnapshot',
226
+ snapshot,
227
+ });
228
+ }
229
+ // 默认:accessibility 树(原行为)
254
230
  // 有 target 时获取子树的无障碍状态
255
231
  let refId;
256
232
  if (args.target && useExtension) {
@@ -323,10 +299,15 @@ async function handleExtract(args) {
323
299
  }
324
300
  }
325
301
  // ==================== HTML + 图片提取 ====================
326
- /** 写入文件前自动创建父目录 */
302
+ /** 写入文件前自动创建父目录(验证路径在 cwd 范围内)*/
327
303
  async function writeOutputFile(path, data, encoding) {
328
- await mkdir(dirname(path), { recursive: true });
329
- await writeFile(path, data, encoding);
304
+ const cwd = process.cwd();
305
+ const safePath = resolve(cwd, path);
306
+ if (!safePath.startsWith(cwd + sep) && safePath !== cwd) {
307
+ throw new Error(`output 路径超出工作目录范围: ${path}`);
308
+ }
309
+ await mkdir(dirname(safePath), { recursive: true });
310
+ await writeFile(safePath, data, encoding);
330
311
  }
331
312
  /**
332
313
  * 处理 html + images 提取
@@ -350,7 +331,11 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
350
331
  var images = [];
351
332
  for (var i = 0; i < imgList.length; i++) {
352
333
  var img = imgList[i];
353
- images.push({index: i, src: img.src, dataSrc: (function() { var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || ''; if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw } })(), alt: img.alt, width: img.width, height: img.height, naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
334
+ images.push({index: i, src: img.src, dataSrc: (function() {
335
+ var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
336
+ if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
337
+ })(), alt: img.alt, width: img.width, height: img.height,
338
+ naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
354
339
  }
355
340
  return {html: html, images: images};
356
341
  })`, undefined, undefined, [selector, nth]);
@@ -381,14 +366,19 @@ async function handleHtmlWithImages(unifiedSession, session, useExtension, args)
381
366
  const appendixMode = !args.output;
382
367
  const imageDataList = await fetchImageData(unifiedSession, result.images, appendixMode ? MAX_APPENDIX_IMAGES : undefined);
383
368
  if (args.output) {
369
+ const cwd2 = process.cwd();
370
+ const safeOutputDir = resolve(cwd2, args.output);
371
+ if (!safeOutputDir.startsWith(cwd2 + sep) && safeOutputDir !== cwd2) {
372
+ return formatErrorResponse(new Error(`output 路径超出工作目录范围: ${args.output}`));
373
+ }
384
374
  // 写入目录
385
- await writeImageDirectory(args.output, result.html, result.images, imageDataList);
375
+ await writeImageDirectory(safeOutputDir, result.html, result.images, imageDataList);
386
376
  return formatResponse({
387
377
  success: true,
388
378
  type: 'html',
389
- output: args.output,
379
+ output: safeOutputDir,
390
380
  imageCount: result.images.length,
391
- index: join(args.output, 'index.json'),
381
+ index: join(safeOutputDir, 'index.json'),
392
382
  });
393
383
  }
394
384
  // 无 output:MCP 附录方式返回
@@ -408,7 +398,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
408
398
  var images = [];
409
399
  for (var i = 0; i < imgList.length; i++) {
410
400
  var img = imgList[i];
411
- images.push({index: i, src: img.src, dataSrc: (function() { var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || ''; if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw } })(), alt: img.alt, width: img.width, height: img.height, naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
401
+ images.push({index: i, src: img.src, dataSrc: (function() {
402
+ var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
403
+ if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
404
+ })(), alt: img.alt, width: img.width, height: img.height,
405
+ naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
412
406
  }
413
407
  return {html: html, images: images};
414
408
  }`);
@@ -419,7 +413,11 @@ async function extractHtmlWithImagesCdp(session, selector, timeout) {
419
413
  var images = [];
420
414
  for (var i = 0; i < imgs.length; i++) {
421
415
  var img = imgs[i];
422
- images.push({index: i, src: img.src, dataSrc: (function() { var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || ''; if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw } })(), alt: img.alt, width: img.width, height: img.height, naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
416
+ images.push({index: i, src: img.src, dataSrc: (function() {
417
+ var raw = img.dataset.src || img.dataset.lazySrc || img.dataset.original || '';
418
+ if (!raw) return ''; try { return new URL(raw, location.href).href } catch(e) { return raw }
419
+ })(), alt: img.alt, width: img.width, height: img.height,
420
+ naturalWidth: img.naturalWidth, naturalHeight: img.naturalHeight});
423
421
  }
424
422
  return {html: html, images: images};
425
423
  })()`);
@@ -448,10 +446,10 @@ async function fetchImageData(unifiedSession, images, limit) {
448
446
  continue;
449
447
  }
450
448
  if (effectiveSrc.startsWith('data:')) {
451
- const match = effectiveSrc.match(/^data:([^;]+);base64,(.+)$/);
452
- preResolved.push(match ?
453
- { base64: match[2], mimeType: match[1] } :
454
- { base64: null, mimeType: 'image/png' });
449
+ const match = effectiveSrc.match(/^data:(?<mime>[^;]+);base64,(?<data>.+)$/);
450
+ preResolved.push(match
451
+ ? { base64: match.groups.data, mimeType: match.groups.mime }
452
+ : { base64: null, mimeType: 'image/png' });
455
453
  continue;
456
454
  }
457
455
  if (!effectiveSrc.startsWith('http')) {
@@ -591,7 +589,7 @@ function buildImageAppendixResponse(html, images, imageDataList) {
591
589
  if (images.length > MAX_APPENDIX_IMAGES) {
592
590
  content.push({
593
591
  type: 'text',
594
- text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES} 张附带数据。使用 output 参数导出全部图片)`,
592
+ text: `\n(共 ${images.length} 张图片,仅前 ${MAX_APPENDIX_IMAGES} 张附带数据,使用 output 参数导出全部图片)`,
595
593
  });
596
594
  }
597
595
  return { content };
@@ -607,10 +605,14 @@ function guessMimeType(url) {
607
605
  return 'image/png';
608
606
  }
609
607
  const map = {
610
- '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
611
- '.png': 'image/png', '.gif': 'image/gif',
612
- '.webp': 'image/webp', '.svg': 'image/svg+xml',
613
- '.ico': 'image/x-icon', '.bmp': 'image/bmp',
608
+ '.jpg': 'image/jpeg',
609
+ '.jpeg': 'image/jpeg',
610
+ '.png': 'image/png',
611
+ '.gif': 'image/gif',
612
+ '.webp': 'image/webp',
613
+ '.svg': 'image/svg+xml',
614
+ '.ico': 'image/x-icon',
615
+ '.bmp': 'image/bmp',
614
616
  '.avif': 'image/avif',
615
617
  };
616
618
  return map[ext] ?? 'image/png';
@@ -618,10 +620,14 @@ function guessMimeType(url) {
618
620
  /** MIME 类型转文件扩展名 */
619
621
  function mimeToExt(mimeType) {
620
622
  const map = {
621
- 'image/jpeg': '.jpg', 'image/png': '.png',
622
- 'image/gif': '.gif', 'image/webp': '.webp',
623
- 'image/svg+xml': '.svg', 'image/x-icon': '.ico',
624
- 'image/bmp': '.bmp', 'image/avif': '.avif',
623
+ 'image/jpeg': '.jpg',
624
+ 'image/png': '.png',
625
+ 'image/gif': '.gif',
626
+ 'image/webp': '.webp',
627
+ 'image/svg+xml': '.svg',
628
+ 'image/x-icon': '.ico',
629
+ 'image/bmp': '.bmp',
630
+ 'image/avif': '.avif',
625
631
  };
626
632
  return map[mimeType] ?? '.png';
627
633
  }
@@ -696,20 +702,42 @@ async function extractTextExtension(unifiedSession, target) {
696
702
  if (!target) {
697
703
  return unifiedSession.getText();
698
704
  }
705
+ if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
706
+ const expr = '(function(x, y) { var el = document.elementFromPoint(x, y); ' +
707
+ "return el ? (el.textContent || '') : '' })";
708
+ return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y]);
709
+ }
699
710
  const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
700
711
  const nth = nthParam ?? 0;
701
712
  if (selector) {
713
+ if (text) {
714
+ const expr = '(function(s, t, n) { var els = Array.from(document.querySelectorAll(s))' +
715
+ '.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
716
+ "return n < els.length ? (els[n].textContent || '') : '' })";
717
+ return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth]);
718
+ }
702
719
  if (nth > 0) {
703
- return unifiedSession.evaluate(`(function(s, n) { var els = document.querySelectorAll(s); return n < els.length ? (els[n].textContent || '') : '' })`, undefined, undefined, [selector, nth]);
720
+ const expr = '(function(s, n) { var els = document.querySelectorAll(s); ' +
721
+ "return n < els.length ? (els[n].textContent || '') : '' })";
722
+ return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth]);
704
723
  }
705
724
  return unifiedSession.getText(selector);
706
725
  }
707
726
  // xpath/text 定位:通过 evaluate 在页面上下文中查找
708
727
  if (xpath) {
709
- return unifiedSession.evaluate(`(function(xp, n) { var r = document.evaluate(xp, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); return n < r.snapshotLength ? (r.snapshotItem(n).textContent || '') : '' })`, undefined, undefined, [xpath, nth]);
728
+ const expr = '(function(xp, n) { var r = document.evaluate(xp, document, null, ' +
729
+ 'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
730
+ "return n < r.snapshotLength ? (r.snapshotItem(n).textContent || '') : '' })";
731
+ return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth]);
710
732
  }
711
733
  if (text) {
712
- return unifiedSession.evaluate(`(function(t, n) { var els = document.querySelectorAll('*'); var found = []; for (var i = 0; i < els.length; i++) { var cn = els[i].childNodes; for (var j = 0; j < cn.length; j++) { if (cn[j].nodeType === 3 && cn[j].textContent && cn[j].textContent.includes(t)) { found.push(els[i]); break; } } } return n < found.length ? (found[n].textContent || '') : '' })`, undefined, undefined, [text, nth]);
734
+ const expr = '(function(t, n) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
735
+ 'var found = []; var seen = new WeakSet(); var node; ' +
736
+ 'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
737
+ '&& node.parentElement && !seen.has(node.parentElement)) { ' +
738
+ 'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
739
+ "return n < found.length ? (found[n].textContent || '') : '' })";
740
+ return unifiedSession.evaluate(expr, undefined, undefined, [text, nth]);
713
741
  }
714
742
  return unifiedSession.getText();
715
743
  }
@@ -721,20 +749,41 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
721
749
  if (!target) {
722
750
  return unifiedSession.getHtml(undefined, outer);
723
751
  }
752
+ const prop = outer ? 'outerHTML' : 'innerHTML';
753
+ if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
754
+ const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' + "return el ? (el[p] || '') : '' })";
755
+ return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
756
+ }
724
757
  const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
725
758
  const nth = nthParam ?? 0;
726
- const prop = outer ? 'outerHTML' : 'innerHTML';
727
759
  if (selector) {
760
+ if (text) {
761
+ const expr = '(function(s, t, n, p) { var els = Array.from(document.querySelectorAll(s))' +
762
+ '.filter(function(e) { return (e.textContent || "").includes(t); }); ' +
763
+ "return n < els.length ? (els[n][p] || '') : '' })";
764
+ return unifiedSession.evaluate(expr, undefined, undefined, [selector, text, nth, prop]);
765
+ }
728
766
  if (nth > 0) {
729
- return unifiedSession.evaluate(`(function(s, n, p) { var els = document.querySelectorAll(s); return n < els.length ? (els[n][p] || '') : '' })`, undefined, undefined, [selector, nth, prop]);
767
+ const expr = '(function(s, n, p) { var els = document.querySelectorAll(s); ' +
768
+ "return n < els.length ? (els[n][p] || '') : '' })";
769
+ return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, prop]);
730
770
  }
731
771
  return unifiedSession.getHtml(selector, outer);
732
772
  }
733
773
  if (xpath) {
734
- return unifiedSession.evaluate(`(function(xp, n, p) { var r = document.evaluate(xp, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); return n < r.snapshotLength ? (r.snapshotItem(n)[p] || '') : '' })`, undefined, undefined, [xpath, nth, prop]);
774
+ const expr = '(function(xp, n, p) { var r = document.evaluate(xp, document, null, ' +
775
+ 'XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); ' +
776
+ "return n < r.snapshotLength ? (r.snapshotItem(n)[p] || '') : '' })";
777
+ return unifiedSession.evaluate(expr, undefined, undefined, [xpath, nth, prop]);
735
778
  }
736
779
  if (text) {
737
- return unifiedSession.evaluate(`(function(t, n, p) { var els = document.querySelectorAll('*'); var found = []; for (var i = 0; i < els.length; i++) { var cn = els[i].childNodes; for (var j = 0; j < cn.length; j++) { if (cn[j].nodeType === 3 && cn[j].textContent && cn[j].textContent.includes(t)) { found.push(els[i]); break; } } } return n < found.length ? (found[n][p] || '') : '' })`, undefined, undefined, [text, nth, prop]);
780
+ const expr = '(function(t, n, p) { var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ' +
781
+ 'var found = []; var seen = new WeakSet(); var node; ' +
782
+ 'while ((node = walker.nextNode())) { if (node.textContent && node.textContent.includes(t) ' +
783
+ '&& node.parentElement && !seen.has(node.parentElement)) { ' +
784
+ 'seen.add(node.parentElement); found.push(node.parentElement); } } ' +
785
+ "return n < found.length ? (found[n][p] || '') : '' })";
786
+ return unifiedSession.evaluate(expr, undefined, undefined, [text, nth, prop]);
738
787
  }
739
788
  return unifiedSession.getHtml(undefined, outer);
740
789
  }
@@ -742,14 +791,25 @@ async function extractHtmlExtension(unifiedSession, target, outer = true) {
742
791
  * Extension 模式:提取属性
743
792
  */
744
793
  async function extractAttributeExtension(unifiedSession, target, attribute) {
794
+ if ('x' in target && 'y' in target && typeof target.x === 'number' && typeof target.y === 'number') {
795
+ if (attribute.startsWith('computed:')) {
796
+ const prop = attribute.slice('computed:'.length);
797
+ const expr = '(function(x, y, p) { var el = document.elementFromPoint(x, y); ' +
798
+ 'return el ? window.getComputedStyle(el).getPropertyValue(p) : null })';
799
+ return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, prop]);
800
+ }
801
+ const expr = '(function(x, y, a) { var el = document.elementFromPoint(x, y); ' +
802
+ 'return el ? el.getAttribute(a) : null })';
803
+ return unifiedSession.evaluate(expr, undefined, undefined, [target.x, target.y, attribute]);
804
+ }
745
805
  const { selector, text, xpath, nth: nthParam } = targetToFindParams(target);
746
806
  // computed style: computed:color → getComputedStyle(el)
747
807
  if (attribute.startsWith('computed:')) {
748
808
  const prop = attribute.slice('computed:'.length);
749
809
  return extractComputedStyleExtension(unifiedSession, selector, text, xpath, nthParam ?? 0, prop);
750
810
  }
751
- // xpath/text 定位需要先 find 得到 refId,再获取属性
752
- if (xpath || text) {
811
+ // xpath 定位(含 text+xpath)或 text 且无 selector 时:先 find 得到 refId,再获取属性
812
+ if (xpath || (text && !selector)) {
753
813
  const elements = await unifiedSession.find(selector, text, xpath);
754
814
  if (elements.length > 0) {
755
815
  const nth = nthParam ?? 0;
@@ -762,8 +822,17 @@ async function extractAttributeExtension(unifiedSession, target, attribute) {
762
822
  }
763
823
  if (selector) {
764
824
  const nth = nthParam ?? 0;
825
+ if (text) {
826
+ // selector + text 组合:find 已实现 AND 过滤
827
+ const elements = await unifiedSession.find(selector, text, undefined);
828
+ if (nth >= elements.length)
829
+ return null;
830
+ return unifiedSession.getAttribute(undefined, elements[nth].refId, attribute);
831
+ }
765
832
  if (nth > 0) {
766
- return unifiedSession.evaluate(`(function(s, n, a) { var els = document.querySelectorAll(s); return n < els.length ? els[n].getAttribute(a) : null })`, undefined, undefined, [selector, nth, attribute]);
833
+ const expr = '(function(s, n, a) { var els = document.querySelectorAll(s); ' +
834
+ 'return n < els.length ? els[n].getAttribute(a) : null })';
835
+ return unifiedSession.evaluate(expr, undefined, undefined, [selector, nth, attribute]);
767
836
  }
768
837
  return unifiedSession.getAttribute(selector, undefined, attribute);
769
838
  }
@@ -784,8 +853,8 @@ async function extractComputedStyleExtension(unifiedSession, selector, text, xpa
784
853
  /**
785
854
  * Extension 模式:等待目标元素出现
786
855
  *
787
- * 在 extract 操作前轮询 find(),直到找到匹配元素或超时。
788
- * 用于实现 extract 的 timeout 参数语义。
856
+ * 在 extract 操作前轮询 find(),直到找到匹配元素或超时,
857
+ * 用于实现 extract 的 timeout 参数语义
789
858
  */
790
859
  async function waitForTargetExtension(unifiedSession, target, timeout) {
791
860
  const startTime = Date.now();
@@ -801,7 +870,7 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
801
870
  }
802
871
  if (!unifiedSession.isExtensionConnected()) {
803
872
  lastError = new Error('Extension 未连接');
804
- await new Promise(r => setTimeout(r, retryDelay));
873
+ await new Promise((r) => setTimeout(r, retryDelay));
805
874
  continue;
806
875
  }
807
876
  try {
@@ -813,16 +882,15 @@ async function waitForTargetExtension(unifiedSession, target, timeout) {
813
882
  }
814
883
  catch (err) {
815
884
  // 暂时性错误(RPC 超时、发送失败、连接断开)可重试,其他确定性错误立即抛出
816
- if (err instanceof
817
- Error &&
885
+ if (err instanceof Error &&
818
886
  /Request timeout|Failed to send|disconnect|未连接|stopped|replaced/i.test(err.message)) {
819
887
  lastError = err;
820
- await new Promise(r => setTimeout(r, retryDelay));
888
+ await new Promise((r) => setTimeout(r, retryDelay));
821
889
  continue;
822
890
  }
823
891
  throw err;
824
892
  }
825
- await new Promise(r => setTimeout(r, retryDelay));
893
+ await new Promise((r) => setTimeout(r, retryDelay));
826
894
  }
827
895
  }
828
896
  /**