@pyrokine/mcp-chrome 1.7.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +71 -31
  2. package/dist/anti-detection/behavior.d.ts.map +1 -1
  3. package/dist/anti-detection/behavior.js.map +1 -1
  4. package/dist/anti-detection/index.d.ts +1 -1
  5. package/dist/anti-detection/index.d.ts.map +1 -1
  6. package/dist/anti-detection/index.js +1 -1
  7. package/dist/anti-detection/index.js.map +1 -1
  8. package/dist/anti-detection/injection.d.ts +6 -2
  9. package/dist/anti-detection/injection.d.ts.map +1 -1
  10. package/dist/anti-detection/injection.js +32 -79
  11. package/dist/anti-detection/injection.js.map +1 -1
  12. package/dist/cdp/client.d.ts +2 -2
  13. package/dist/cdp/client.d.ts.map +1 -1
  14. package/dist/cdp/client.js +8 -10
  15. package/dist/cdp/client.js.map +1 -1
  16. package/dist/cdp/index.d.ts.map +1 -1
  17. package/dist/cdp/index.js.map +1 -1
  18. package/dist/cdp/launcher.d.ts.map +1 -1
  19. package/dist/cdp/launcher.js +40 -13
  20. package/dist/cdp/launcher.js.map +1 -1
  21. package/dist/core/auto-wait.d.ts +2 -2
  22. package/dist/core/auto-wait.d.ts.map +1 -1
  23. package/dist/core/auto-wait.js +2 -2
  24. package/dist/core/auto-wait.js.map +1 -1
  25. package/dist/core/browser-driver.d.ts +307 -0
  26. package/dist/core/browser-driver.d.ts.map +1 -0
  27. package/dist/core/browser-driver.js +21 -0
  28. package/dist/core/browser-driver.js.map +1 -0
  29. package/dist/core/error-sanitizer.d.ts +25 -0
  30. package/dist/core/error-sanitizer.d.ts.map +1 -0
  31. package/dist/core/error-sanitizer.js +66 -0
  32. package/dist/core/error-sanitizer.js.map +1 -0
  33. package/dist/core/errors.d.ts +10 -1
  34. package/dist/core/errors.d.ts.map +1 -1
  35. package/dist/core/errors.js +17 -4
  36. package/dist/core/errors.js.map +1 -1
  37. package/dist/core/extension-errors.d.ts +20 -0
  38. package/dist/core/extension-errors.d.ts.map +1 -0
  39. package/dist/core/extension-errors.js +40 -0
  40. package/dist/core/extension-errors.js.map +1 -0
  41. package/dist/core/index.d.ts.map +1 -1
  42. package/dist/core/index.js.map +1 -1
  43. package/dist/core/locator.d.ts +2 -2
  44. package/dist/core/locator.d.ts.map +1 -1
  45. package/dist/core/locator.js +25 -65
  46. package/dist/core/locator.js.map +1 -1
  47. package/dist/core/retry.d.ts +2 -2
  48. package/dist/core/retry.d.ts.map +1 -1
  49. package/dist/core/retry.js +2 -2
  50. package/dist/core/retry.js.map +1 -1
  51. package/dist/core/session.d.ts +153 -46
  52. package/dist/core/session.d.ts.map +1 -1
  53. package/dist/core/session.js +672 -177
  54. package/dist/core/session.js.map +1 -1
  55. package/dist/core/types.d.ts +9 -3
  56. package/dist/core/types.d.ts.map +1 -1
  57. package/dist/core/types.js +13 -6
  58. package/dist/core/types.js.map +1 -1
  59. package/dist/core/unified-session.d.ts +46 -81
  60. package/dist/core/unified-session.d.ts.map +1 -1
  61. package/dist/core/unified-session.js +338 -635
  62. package/dist/core/unified-session.js.map +1 -1
  63. package/dist/core/utils.d.ts +7 -0
  64. package/dist/core/utils.d.ts.map +1 -0
  65. package/dist/core/utils.js +33 -0
  66. package/dist/core/utils.js.map +1 -0
  67. package/dist/extension/bridge.d.ts +69 -50
  68. package/dist/extension/bridge.d.ts.map +1 -1
  69. package/dist/extension/bridge.js +176 -77
  70. package/dist/extension/bridge.js.map +1 -1
  71. package/dist/extension/http-server.d.ts +6 -4
  72. package/dist/extension/http-server.d.ts.map +1 -1
  73. package/dist/extension/http-server.js +45 -31
  74. package/dist/extension/http-server.js.map +1 -1
  75. package/dist/extension/index.d.ts.map +1 -1
  76. package/dist/extension/index.js.map +1 -1
  77. package/dist/index.js +3 -1
  78. package/dist/index.js.map +1 -1
  79. package/dist/tools/browse.d.ts.map +1 -1
  80. package/dist/tools/browse.js +32 -34
  81. package/dist/tools/browse.js.map +1 -1
  82. package/dist/tools/cookies.d.ts.map +1 -1
  83. package/dist/tools/cookies.js +38 -16
  84. package/dist/tools/cookies.js.map +1 -1
  85. package/dist/tools/evaluate.d.ts.map +1 -1
  86. package/dist/tools/evaluate.js +54 -23
  87. package/dist/tools/evaluate.js.map +1 -1
  88. package/dist/tools/extract.d.ts.map +1 -1
  89. package/dist/tools/extract.js +221 -153
  90. package/dist/tools/extract.js.map +1 -1
  91. package/dist/tools/index.d.ts.map +1 -1
  92. package/dist/tools/index.js.map +1 -1
  93. package/dist/tools/input.d.ts.map +1 -1
  94. package/dist/tools/input.js +271 -90
  95. package/dist/tools/input.js.map +1 -1
  96. package/dist/tools/logs.d.ts.map +1 -1
  97. package/dist/tools/logs.js +31 -17
  98. package/dist/tools/logs.js.map +1 -1
  99. package/dist/tools/manage.d.ts.map +1 -1
  100. package/dist/tools/manage.js +25 -28
  101. package/dist/tools/manage.js.map +1 -1
  102. package/dist/tools/schema.d.ts +1 -1
  103. package/dist/tools/schema.d.ts.map +1 -1
  104. package/dist/tools/schema.js +31 -55
  105. package/dist/tools/schema.js.map +1 -1
  106. package/dist/tools/wait.d.ts.map +1 -1
  107. package/dist/tools/wait.js +19 -16
  108. package/dist/tools/wait.js.map +1 -1
  109. package/package.json +48 -40
@@ -16,26 +16,57 @@ import { targetToFindParams, targetZodSchema } from './schema.js';
16
16
  * InputEvent schema
17
17
  */
18
18
  const inputEventSchema = z.object({
19
- type: z.enum([
20
- 'keydown', 'keyup', 'click', 'mousedown', 'mouseup', 'mousemove',
21
- 'wheel', 'touchstart', 'touchmove', 'touchend', 'type', 'wait',
22
- 'select', 'replace',
23
- ]).describe('事件类型'),
19
+ type: z
20
+ .enum([
21
+ 'keydown',
22
+ 'keyup',
23
+ 'click',
24
+ 'mousedown',
25
+ 'mouseup',
26
+ 'mousemove',
27
+ 'wheel',
28
+ 'touchstart',
29
+ 'touchmove',
30
+ 'touchend',
31
+ 'type',
32
+ 'wait',
33
+ 'select',
34
+ 'replace',
35
+ 'drag',
36
+ ])
37
+ .describe('事件类型'),
24
38
  key: z.string().optional().describe('按键(keydown/keyup)'),
25
- button: z.enum(['left', 'middle', 'right', 'back', 'forward'])
39
+ commands: z
40
+ .array(z.string())
26
41
  .optional()
27
- .describe('鼠标按钮'),
28
- target: targetZodSchema.optional().describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type 可选,用于先定位再操作;select/replace 可选,用于限定搜索范围)'),
42
+ .describe('浏览器编辑命令(keydown 专用),如 ["selectAll"]、["copy"]、["paste"]、["cut"]、["undo"]、["redo"],触发原生编辑命令,优先于纯键盘事件,用于跨平台快捷键场景,需要 inputMode=precise(stealth 模式无法触发 Chrome 原生编辑命令,会报错)'),
43
+ button: z.enum(['left', 'middle', 'right', 'back', 'forward']).optional().describe('鼠标按钮'),
44
+ clickCount: z
45
+ .number()
46
+ .int()
47
+ .min(1)
48
+ .optional()
49
+ .describe('鼠标点击次数(click,默认 1,设为 2 触发双击事件,设为 3 触发三击事件)'),
50
+ target: targetZodSchema
51
+ .optional()
52
+ .describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type/drag 可选,用于先定位再操作;select/replace 可选,用于限定搜索范围;drag 时为拖拽源)'),
53
+ to: targetZodSchema.optional().describe('拖拽目标元素(drag 事件必填)'),
29
54
  steps: z.number().optional().describe('移动步数(mousemove/touchmove)'),
30
55
  deltaX: z.number().optional().describe('水平滚动量'),
31
56
  deltaY: z.number().optional().describe('垂直滚动量'),
32
- text: z.string().optional().describe('输入文本(type)或替换文本(replace)'),
33
- delay: z.number().optional().describe('按键间隔毫秒'),
57
+ text: z.string().max(10000).optional().describe('输入文本(type,最大 10000 字符)或替换文本(replace)'),
58
+ delay: z.number().min(0).max(100).optional().describe('按键间隔毫秒(type 事件最大 100ms,避免长时延 DoS)'),
34
59
  ms: z.number().optional().describe('等待毫秒'),
35
60
  find: z.string().optional().describe('要查找并选中的文本(select/replace)'),
36
61
  nth: z.number().optional().describe('第 N 个匹配(select/replace,从 0 开始,默认 0 即第一个)'),
37
- dispatch: z.boolean().optional().describe('使用 dispatch 模式输入(type),直接设置 value 并触发 input/change 事件,兼容 React/Vue 等框架的受控组件,默认 false 使用键盘事件'),
38
- force: z.boolean().optional().describe('强制执行(click),跳过可操作性检查(可见性、遮挡检测等),直接在目标元素上触发事件,用于已知需要绕过检查的场景'),
62
+ dispatch: z
63
+ .boolean()
64
+ .optional()
65
+ .describe('使用 dispatch 模式输入(type),直接设置 value 并触发 input/change 事件,兼容 React/Vue 等框架的受控组件,默认 false 使用键盘事件'),
66
+ force: z
67
+ .boolean()
68
+ .optional()
69
+ .describe('强制执行(click),跳过可操作性检查(可见性、遮挡检测等),直接在目标元素上触发事件,用于已知需要绕过检查的场景'),
39
70
  });
40
71
  /**
41
72
  * input 参数 schema
@@ -43,9 +74,15 @@ const inputEventSchema = z.object({
43
74
  const inputSchema = z.object({
44
75
  events: z.array(inputEventSchema).describe('事件序列'),
45
76
  humanize: z.boolean().optional().describe('启用人类行为模拟(贝塞尔曲线移动、随机延迟)'),
46
- tabId: z.string().optional().describe('目标 Tab ID(可选,仅 Extension 模式)。不指定则使用当前 attach 的 tab。可操作非当前 attach 的 tab。CDP 模式下不支持此参数'),
77
+ tabId: z
78
+ .string()
79
+ .optional()
80
+ .describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
47
81
  timeout: z.number().optional().describe('超时毫秒'),
48
- frame: z.union([z.string(), z.number()]).optional().describe('iframe 定位(可选,仅 Extension 模式)。CSS 选择器(如 "iframe#main")或索引(如 0)。不指定则在主框架操作'),
82
+ frame: z
83
+ .union([z.string(), z.number()])
84
+ .optional()
85
+ .describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
49
86
  });
50
87
  /**
51
88
  * input 工具处理器
@@ -69,7 +106,7 @@ async function handleInput(args) {
69
106
  }
70
107
  }
71
108
  else {
72
- // CDP 模式:使用原有逻辑
109
+ // CDP 模式:逐事件分发(无 Extension bridge)
73
110
  const session = getSession();
74
111
  for (const event of args.events) {
75
112
  if (event.type === 'select' || event.type === 'replace') {
@@ -109,6 +146,27 @@ async function handleInput(args) {
109
146
  *
110
147
  * @returns 格式化标签名(如 "code"),若被替换文本在格式化节点内
111
148
  */
149
+ /**
150
+ * 如果 target 是选择器类型,先通过 actionableClick 聚焦
151
+ * select/replace 事件用,保证选区建立前 activeElement 就是目标
152
+ */
153
+ async function focusTargetIfNeeded(unifiedSession, target, nth, timeout) {
154
+ if (!target || 'x' in target || 'y' in target) {
155
+ return;
156
+ }
157
+ const params = targetToFindParams(target);
158
+ const els = await unifiedSession.find(params.selector, params.text, params.xpath, timeout);
159
+ const nth0 = params.nth ?? nth ?? 0;
160
+ if (els.length > nth0) {
161
+ try {
162
+ await unifiedSession.actionableClick(els[nth0].refId);
163
+ }
164
+ catch (err) {
165
+ // 失败时不中断(可能是 contenteditable 不接受 click focus),但记录 warning
166
+ console.warn('[MCP] focusTargetIfNeeded 聚焦失败,select/replace 将回退到 mouseClick 聚焦:', err);
167
+ }
168
+ }
169
+ }
112
170
  async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeout) {
113
171
  // 将 target 转为查询参数,传入注入脚本进行 DOM 查询
114
172
  let scopeSelector = null;
@@ -153,6 +211,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
153
211
  pos = val.indexOf(findText, pos + (n > 0 ? 1 : 0));
154
212
  if (pos === -1) return {type: 'notfound'};
155
213
  }
214
+ // 原子化:定位到 input 同时完成 focus + setSelectionRange,避免外层 mouseClick 聚焦不可靠
215
+ root.focus();
216
+ if (typeof root.setSelectionRange === 'function') {
217
+ root.setSelectionRange(pos, pos + findText.length);
218
+ }
156
219
  return {type: 'input', selectionStart: pos, selectionEnd: pos + findText.length};
157
220
  }
158
221
 
@@ -167,9 +230,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
167
230
  if (ip === -1) break;
168
231
  }
169
232
  if (ip !== -1) {
170
- var r = inp.getBoundingClientRect();
171
- return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length,
172
- focusX: r.x + r.width / 2, focusY: r.y + r.height / 2};
233
+ inp.focus();
234
+ if (typeof inp.setSelectionRange === 'function') {
235
+ inp.setSelectionRange(ip, ip + findText.length);
236
+ }
237
+ return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length};
173
238
  }
174
239
  }
175
240
 
@@ -243,28 +308,7 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
243
308
  : `未找到文本: "${findText}"${nth > 0 ? `(第 ${nth} 个匹配)` : ''}`);
244
309
  }
245
310
  if (result.type === 'input') {
246
- // input/textarea:聚焦 + setSelectionRange
247
- const r = result;
248
- if (r.focusX !== undefined && r.focusY !== undefined) {
249
- let x = r.focusX;
250
- let y = r.focusY;
251
- const frameOffset = unifiedSession.getFrameOffset();
252
- if (frameOffset && unifiedSession.getInputMode() !== 'stealth') {
253
- x += frameOffset.x;
254
- y += frameOffset.y;
255
- }
256
- await unifiedSession.mouseMove(x, y);
257
- await unifiedSession.mouseClick('left');
258
- }
259
- else if (scopeTarget) {
260
- const point = await getTargetPointExtension(unifiedSession, scopeTarget, timeout);
261
- await unifiedSession.mouseMove(point.x, point.y);
262
- await unifiedSession.mouseClick('left');
263
- }
264
- await unifiedSession.evaluate(`function(start, end) {
265
- var el = document.activeElement;
266
- if (el && el.setSelectionRange) el.setSelectionRange(start, end);
267
- }`, undefined, timeout, [result.selectionStart, result.selectionEnd]);
311
+ // 注入脚本已完成 focus + setSelectionRange(原子化,避免外层 mouseClick 聚焦不可靠)
268
312
  return undefined;
269
313
  }
270
314
  // DOM 文本节点:鼠标选择
@@ -286,53 +330,77 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
286
330
  await unifiedSession.keyUp('Shift');
287
331
  return coords.formatted;
288
332
  }
333
+ /**
334
+ * 验证事件参数(两种执行模式共享),避免在 Extension/CDP 两个 switch 中重复校验
335
+ */
336
+ function validateEvent(event) {
337
+ if (event.commands && event.commands.length > 0 && event.type !== 'keydown') {
338
+ throw new Error(`commands 参数只能用于 keydown 事件,当前事件类型为 ${event.type},如需触发编辑命令,请把 commands 放在 keydown 事件上`);
339
+ }
340
+ switch (event.type) {
341
+ case 'keydown':
342
+ case 'keyup':
343
+ if (!event.key) {
344
+ throw new Error(`${event.type} 事件需要 key 参数`);
345
+ }
346
+ break;
347
+ case 'wait':
348
+ if (event.ms === undefined) {
349
+ throw new Error('wait 事件需要 ms 参数');
350
+ }
351
+ break;
352
+ }
353
+ }
289
354
  /**
290
355
  * Extension 模式:执行单个事件
291
356
  *
292
357
  * @returns 可选警告信息(如格式丢失提示)
293
358
  */
294
359
  async function executeEventExtension(unifiedSession, event, humanize, timeout) {
360
+ validateEvent(event);
295
361
  switch (event.type) {
296
362
  case 'keydown': {
297
- if (!event.key) {
298
- throw new Error('keydown 事件需要 key 参数');
299
- }
300
- await unifiedSession.keyDown(event.key);
363
+ await unifiedSession.keyDown(event.key, event.commands);
301
364
  break;
302
365
  }
303
366
  case 'keyup': {
304
- if (!event.key) {
305
- throw new Error('keyup 事件需要 key 参数');
306
- }
307
367
  await unifiedSession.keyUp(event.key);
308
368
  break;
309
369
  }
310
370
  case 'click': {
371
+ const button = event.button ?? 'left';
372
+ const clickCount = event.clickCount ?? 1;
311
373
  if (event.target) {
312
374
  // 坐标型 target:不过 actionableClick,但仍需 getTargetPointExtension 做 iframe offset 修正
313
375
  if ('x' in event.target && 'y' in event.target) {
314
376
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
315
377
  await unifiedSession.mouseMove(point.x, point.y);
316
- await unifiedSession.mouseClick(event.button ?? 'left');
378
+ await unifiedSession.mouseClick(button, clickCount);
317
379
  break;
318
380
  }
319
- // 优先使用 actionable click(带可操作性检查、自动滚动、遮挡检测)
320
- const { selector, text: searchText, xpath, nth: nthParam } = targetToFindParams(event.target);
321
- const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
322
- const nth = nthParam ?? 0;
323
- if (elements.length > 0 && nth < elements.length) {
324
- const refId = elements[nth].refId;
325
- const result = await unifiedSession.actionableClick(refId, event.force);
326
- if (!result.success) {
327
- throw new Error(result.error || 'Click failed');
381
+ // 左键单击:优先用 actionable click(带可操作性检查、自动滚动、遮挡检测)
382
+ // 非左键 / 多击:actionableClick 依赖 HTMLElement.click() 只能触发单次左键,必须走坐标路径
383
+ if (button === 'left' && clickCount === 1) {
384
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
385
+ const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
386
+ const nth = nthParam ?? 0;
387
+ if (elements.length > 0 && nth < elements.length) {
388
+ const refId = elements[nth].refId;
389
+ const result = await unifiedSession.actionableClick(refId, event.force);
390
+ if (!result.success) {
391
+ throw new Error(result.error || 'Click failed');
392
+ }
393
+ break;
328
394
  }
329
- break;
330
395
  }
331
- // fallback: 找不到 refId 时用坐标方式
396
+ // fallback: 找不到 refId 或需走坐标路径时
397
+ // refId 透传:stealth 模式下嵌套 iframe overlay 场景绕过 elementFromPoint
332
398
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
333
399
  await unifiedSession.mouseMove(point.x, point.y);
400
+ await unifiedSession.mouseClick(button, clickCount, point.refId);
401
+ break;
334
402
  }
335
- await unifiedSession.mouseClick(event.button ?? 'left');
403
+ await unifiedSession.mouseClick(button, clickCount);
336
404
  break;
337
405
  }
338
406
  case 'mousedown': {
@@ -366,6 +434,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
366
434
  }
367
435
  case 'wheel': {
368
436
  if (event.target) {
437
+ const { selector, text, xpath, nth: nthParam, } = targetToFindParams(event.target);
438
+ const elements = await unifiedSession.find(selector, text, xpath, timeout);
439
+ const nth = nthParam ?? 0;
440
+ if (elements.length > nth) {
441
+ // 用 refId 直接滚动目标元素(支持视口外元素)
442
+ await unifiedSession.scroll(event.deltaX ?? 0, event.deltaY ?? 0, elements[nth].refId);
443
+ break;
444
+ }
445
+ // 找不到元素时 fallback 到坐标方式
369
446
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
370
447
  await unifiedSession.mouseMove(point.x, point.y);
371
448
  }
@@ -405,7 +482,7 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
405
482
  if ('x' in event.target && 'y' in event.target) {
406
483
  throw new Error('dispatch 模式不支持坐标型 target,请使用 CSS 选择器、role 或文本定位');
407
484
  }
408
- const { selector, text: searchText, xpath, nth: nthParam } = targetToFindParams(event.target);
485
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
409
486
  const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
410
487
  const nth = nthParam ?? 0;
411
488
  if (elements.length === 0 || nth >= elements.length) {
@@ -427,6 +504,12 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
427
504
  await unifiedSession.mouseMove(point.x, point.y);
428
505
  await unifiedSession.mouseClick('left');
429
506
  }
507
+ else {
508
+ const hasActiveFocus = await unifiedSession.evaluate('!!document.activeElement && document.activeElement !== document.body && document.activeElement !== document.documentElement');
509
+ if (!hasActiveFocus) {
510
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
511
+ }
512
+ }
430
513
  const delay = event.delay ?? 0;
431
514
  if (humanize) {
432
515
  for (const char of event.text) {
@@ -440,16 +523,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
440
523
  break;
441
524
  }
442
525
  case 'wait': {
443
- if (!event.ms) {
444
- throw new Error('wait 事件需要 ms 参数');
445
- }
446
- await new Promise(resolve => setTimeout(resolve, event.ms));
526
+ await new Promise((resolve) => setTimeout(resolve, event.ms));
447
527
  break;
448
528
  }
449
529
  case 'select': {
450
530
  if (!event.find) {
451
531
  throw new Error('select 事件需要 find 参数');
452
532
  }
533
+ // 自动聚焦目标元素(selectText 内 mouseClick focus 对 React 等场景不可靠)
534
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
453
535
  await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
454
536
  break;
455
537
  }
@@ -460,6 +542,8 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
460
542
  if (event.text === undefined) {
461
543
  throw new Error('replace 事件需要 text 参数');
462
544
  }
545
+ // 自动聚焦目标元素
546
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
463
547
  // Step 1: 选中文本
464
548
  const formatted = await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
465
549
  // 轮询等待选区同步(最多 500ms)
@@ -477,7 +561,7 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
477
561
  selectionConfirmed = true;
478
562
  break;
479
563
  }
480
- await new Promise(resolve => setTimeout(resolve, 20));
564
+ await new Promise((resolve) => setTimeout(resolve, 20));
481
565
  }
482
566
  if (!selectionConfirmed) {
483
567
  throw new Error(`选区同步失败:文本 "${event.find}" 已定位但未能建立选区,无法执行替换`);
@@ -517,6 +601,54 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
517
601
  }
518
602
  break;
519
603
  }
604
+ case 'drag': {
605
+ if (!event.target) {
606
+ throw new Error('drag 事件需要 target 参数(拖拽源)');
607
+ }
608
+ if (!event.to) {
609
+ throw new Error('drag 事件需要 to 参数(拖拽目标)');
610
+ }
611
+ // drag 仅支持选择器类 target(CSS/text/xpath/ARIA 等),不支持坐标
612
+ // 理由:drag 依赖 refId 在 Extension ISOLATED 世界 dispatchEvent,坐标无法生成 refId
613
+ if ('x' in event.target || 'y' in event.target) {
614
+ throw new Error('drag 的 target 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
615
+ }
616
+ if ('x' in event.to || 'y' in event.to) {
617
+ throw new Error('drag 的 to 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
618
+ }
619
+ // 用 find 定位确认元素存在(支持 ARIA/testId 等高级定位),拿 refId 传入 extension 侧 dispatchEvent
620
+ const srcParams = targetToFindParams(event.target);
621
+ const dstParams = targetToFindParams(event.to);
622
+ const srcNth = srcParams.nth ?? 0;
623
+ const dstNth = dstParams.nth ?? 0;
624
+ // 执行 drag,失败时重试一次(React 重渲染可能导致 refId 失效)
625
+ const attemptDrag = async () => {
626
+ const srcEls = await unifiedSession.find(srcParams.selector, srcParams.text, srcParams.xpath, timeout);
627
+ const dstEls = await unifiedSession.find(dstParams.selector, dstParams.text, dstParams.xpath, timeout);
628
+ if (srcEls.length <= srcNth) {
629
+ throw new Error(`drag 源元素未找到: ${JSON.stringify(event.target)}`);
630
+ }
631
+ if (dstEls.length <= dstNth) {
632
+ throw new Error(`drag 目标元素未找到: ${JSON.stringify(event.to)}`);
633
+ }
634
+ return unifiedSession.dragAndDrop(srcEls[srcNth].refId, dstEls[dstNth].refId);
635
+ };
636
+ let dragResult = await attemptDrag();
637
+ let retried = false;
638
+ // 仅对 refId 失效(REF_STALE)重试:源/目标元素从 DOM 移除,典型是 React 重渲染
639
+ if (!dragResult.success && dragResult.code === 'REF_STALE') {
640
+ console.warn('[MCP] drag refId 失效,自动重试一次:', dragResult.error);
641
+ dragResult = await attemptDrag();
642
+ retried = true;
643
+ }
644
+ if (!dragResult.success) {
645
+ throw new Error(dragResult.error || 'drag 执行失败');
646
+ }
647
+ if (retried) {
648
+ return 'drag 因 refId 失效已自动重试一次(可能是 React 等框架重渲染导致)';
649
+ }
650
+ break;
651
+ }
520
652
  default:
521
653
  throw new Error(`未知事件类型: ${event.type}`);
522
654
  }
@@ -552,34 +684,63 @@ async function getTargetPointExtension(unifiedSession, target, timeout) {
552
684
  if (nth >= elements.length) {
553
685
  throw new Error(`第 ${nth} 个匹配元素不存在(共 ${elements.length} 个)`);
554
686
  }
555
- const rect = elements[nth].rect;
556
- const point = {
557
- x: rect.x + rect.width / 2,
558
- y: rect.y + rect.height / 2,
559
- };
560
- // 元素定位:find() 返回视口绝对坐标
561
- if (frameOffset && isStealth) {
562
- // stealth: 转回 iframe 相对坐标
563
- return { x: point.x - frameOffset.x, y: point.y - frameOffset.y };
687
+ // 视口外时滚动后重新取 rect:与 actionableClick (left+single) 行为对齐,
688
+ // 否则非左键 / 多击的坐标路径在视口外坐标 dispatch,浏览器找不到元素,事件丢失
689
+ const refId = elements[nth].refId;
690
+ const iframeRelRect = await unifiedSession.evaluate(`(() => {
691
+ const ref = window.__mcpElementMap?.[${JSON.stringify(refId)}];
692
+ const el = ref?.deref();
693
+ if (!el) return null;
694
+ const r = el.getBoundingClientRect();
695
+ if (r.top < 0 || r.bottom > window.innerHeight || r.left < 0 || r.right > window.innerWidth) {
696
+ el.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
697
+ }
698
+ const r2 = el.getBoundingClientRect();
699
+ return { x: r2.x, y: r2.y, width: r2.width, height: r2.height };
700
+ })()`);
701
+ // refId 失效等异常:fallback 到原始 find rect(父视口绝对)
702
+ if (!iframeRelRect) {
703
+ const rect = elements[nth].rect;
704
+ const point = { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
705
+ if (frameOffset && isStealth) {
706
+ return { x: point.x - frameOffset.x, y: point.y - frameOffset.y, refId };
707
+ }
708
+ return { ...point, refId };
709
+ }
710
+ // 主 frame:iframeRelRect 就是父视口绝对
711
+ if (!frameOffset) {
712
+ return {
713
+ x: iframeRelRect.x + iframeRelRect.width / 2,
714
+ y: iframeRelRect.y + iframeRelRect.height / 2,
715
+ refId,
716
+ };
717
+ }
718
+ // iframe + stealth:消费者(chrome.scripting in iframe)需要 iframe 相对
719
+ if (isStealth) {
720
+ return {
721
+ x: iframeRelRect.x + iframeRelRect.width / 2,
722
+ y: iframeRelRect.y + iframeRelRect.height / 2,
723
+ refId,
724
+ };
564
725
  }
565
- return point;
726
+ // iframe + precise:消费者(chrome.debugger)需要父视口绝对。
727
+ // scrollIntoView({block:'center'}) 会 cascade 到父框架,导致 frameOffset 与父绝对 rect 都过期,
728
+ // refetch find() 让 content-handler 重新计算 frameOffset 并返回最新父绝对 rect
729
+ const refreshed = await unifiedSession.find(selector, text, xpath, timeout);
730
+ const rect = refreshed[nth]?.rect ?? elements[nth].rect;
731
+ return { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2, refId };
566
732
  }
567
733
  /**
568
734
  * CDP 模式:执行单个事件
569
735
  */
570
736
  async function executeEvent(session, event, humanize, timeout) {
737
+ validateEvent(event);
571
738
  switch (event.type) {
572
739
  case 'keydown': {
573
- if (!event.key) {
574
- throw new Error('keydown 事件需要 key 参数');
575
- }
576
- await session.keyDown(event.key);
740
+ await session.keyDown(event.key, event.commands);
577
741
  break;
578
742
  }
579
743
  case 'keyup': {
580
- if (!event.key) {
581
- throw new Error('keyup 事件需要 key 参数');
582
- }
583
744
  await session.keyUp(event.key);
584
745
  break;
585
746
  }
@@ -587,8 +748,12 @@ async function executeEvent(session, event, humanize, timeout) {
587
748
  if (event.target) {
588
749
  await moveToTarget(session, event.target, humanize, timeout);
589
750
  }
590
- await session.mouseDown(event.button ?? 'left');
591
- await session.mouseUp(event.button ?? 'left');
751
+ const cdpButton = event.button ?? 'left';
752
+ const cdpClickCount = event.clickCount ?? 1;
753
+ for (let i = 1; i <= cdpClickCount; i++) {
754
+ await session.mouseDown(cdpButton, i);
755
+ await session.mouseUp(cdpButton, i);
756
+ }
592
757
  break;
593
758
  }
594
759
  case 'mousedown': {
@@ -662,6 +827,12 @@ async function executeEvent(session, event, humanize, timeout) {
662
827
  await session.mouseDown('left');
663
828
  await session.mouseUp('left');
664
829
  }
830
+ else {
831
+ const hasActiveFocus = await session.evaluate('!!document.activeElement && document.activeElement !== document.body && document.activeElement !== document.documentElement');
832
+ if (!hasActiveFocus) {
833
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
834
+ }
835
+ }
665
836
  const delay = event.delay ?? 0;
666
837
  if (humanize) {
667
838
  // 人类化打字
@@ -676,13 +847,14 @@ async function executeEvent(session, event, humanize, timeout) {
676
847
  break;
677
848
  }
678
849
  case 'wait': {
679
- if (!event.ms) {
680
- throw new Error('wait 事件需要 ms 参数');
681
- }
682
850
  await new Promise((resolve) => setTimeout(resolve, event.ms));
683
851
  break;
684
852
  }
685
853
  default:
854
+ // drag 仅在 Extension 模式可用,给出明确错误而非通用"未知事件类型"
855
+ if (event.type === 'drag') {
856
+ throw new Error('drag 事件仅在 Extension 模式下可用,当前为 CDP 模式');
857
+ }
686
858
  throw new Error(`未知事件类型: ${event.type}`);
687
859
  }
688
860
  }
@@ -750,13 +922,22 @@ export function registerInputTool(server) {
750
922
  2. 用 CSS 选择器 + nth 精确定位元素(避免坐标点击)
751
923
  3. 再 input click/type 操作目标元素
752
924
 
753
- 组合键需拆分为独立事件。示例(Ctrl+A 全选):
925
+ 组合键拆分为独立事件示例(修饰键 + 字母键):
754
926
  events: [
755
927
  {type: "keydown", key: "Control"},
756
928
  {type: "keydown", key: "a"},
757
929
  {type: "keyup", key: "a"},
758
930
  {type: "keyup", key: "Control"}
759
- ]`,
931
+ ]
932
+
933
+ 浏览器编辑命令(selectAll/copy/paste/cut/undo/redo 等)需用 commands 字段,跨平台可靠:
934
+ events: [
935
+ {type: "keydown", key: "a", commands: ["selectAll"]},
936
+ {type: "keyup", key: "a"}
937
+ ]
938
+ 注意:纯键盘事件(不带 commands)仅保证 JS keyboard event 可被监听,不保证触发浏览器原生编辑行为;
939
+ 全选/复制/粘贴等语义用 commands;"全选并替换文本"用 select/replace 事件更简洁;
940
+ commands 仅支持 inputMode=precise,stealth 模式下会报错`,
760
941
  inputSchema: inputSchema,
761
942
  }, (args) => handleInput(args));
762
943
  }