@pyrokine/mcp-chrome 1.7.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +71 -31
  2. package/dist/anti-detection/behavior.d.ts.map +1 -1
  3. package/dist/anti-detection/behavior.js.map +1 -1
  4. package/dist/anti-detection/index.d.ts +1 -1
  5. package/dist/anti-detection/index.d.ts.map +1 -1
  6. package/dist/anti-detection/index.js +1 -1
  7. package/dist/anti-detection/index.js.map +1 -1
  8. package/dist/anti-detection/injection.d.ts +6 -2
  9. package/dist/anti-detection/injection.d.ts.map +1 -1
  10. package/dist/anti-detection/injection.js +34 -80
  11. package/dist/anti-detection/injection.js.map +1 -1
  12. package/dist/cdp/client.d.ts +2 -2
  13. package/dist/cdp/client.d.ts.map +1 -1
  14. package/dist/cdp/client.js +8 -10
  15. package/dist/cdp/client.js.map +1 -1
  16. package/dist/cdp/index.d.ts.map +1 -1
  17. package/dist/cdp/index.js.map +1 -1
  18. package/dist/cdp/launcher.d.ts.map +1 -1
  19. package/dist/cdp/launcher.js +40 -13
  20. package/dist/cdp/launcher.js.map +1 -1
  21. package/dist/core/auto-wait.d.ts +2 -2
  22. package/dist/core/auto-wait.d.ts.map +1 -1
  23. package/dist/core/auto-wait.js +2 -2
  24. package/dist/core/auto-wait.js.map +1 -1
  25. package/dist/core/browser-driver.d.ts +307 -0
  26. package/dist/core/browser-driver.d.ts.map +1 -0
  27. package/dist/core/browser-driver.js +21 -0
  28. package/dist/core/browser-driver.js.map +1 -0
  29. package/dist/core/error-sanitizer.d.ts +25 -0
  30. package/dist/core/error-sanitizer.d.ts.map +1 -0
  31. package/dist/core/error-sanitizer.js +66 -0
  32. package/dist/core/error-sanitizer.js.map +1 -0
  33. package/dist/core/errors.d.ts +10 -1
  34. package/dist/core/errors.d.ts.map +1 -1
  35. package/dist/core/errors.js +17 -4
  36. package/dist/core/errors.js.map +1 -1
  37. package/dist/core/extension-errors.d.ts +20 -0
  38. package/dist/core/extension-errors.d.ts.map +1 -0
  39. package/dist/core/extension-errors.js +40 -0
  40. package/dist/core/extension-errors.js.map +1 -0
  41. package/dist/core/index.d.ts.map +1 -1
  42. package/dist/core/index.js.map +1 -1
  43. package/dist/core/locator.d.ts +2 -2
  44. package/dist/core/locator.d.ts.map +1 -1
  45. package/dist/core/locator.js +25 -65
  46. package/dist/core/locator.js.map +1 -1
  47. package/dist/core/retry.d.ts +2 -2
  48. package/dist/core/retry.d.ts.map +1 -1
  49. package/dist/core/retry.js +2 -2
  50. package/dist/core/retry.js.map +1 -1
  51. package/dist/core/session.d.ts +149 -46
  52. package/dist/core/session.d.ts.map +1 -1
  53. package/dist/core/session.js +673 -181
  54. package/dist/core/session.js.map +1 -1
  55. package/dist/core/types.d.ts +9 -3
  56. package/dist/core/types.d.ts.map +1 -1
  57. package/dist/core/types.js +13 -6
  58. package/dist/core/types.js.map +1 -1
  59. package/dist/core/unified-session.d.ts +46 -85
  60. package/dist/core/unified-session.d.ts.map +1 -1
  61. package/dist/core/unified-session.js +341 -650
  62. package/dist/core/unified-session.js.map +1 -1
  63. package/dist/core/utils.d.ts +7 -0
  64. package/dist/core/utils.d.ts.map +1 -0
  65. package/dist/core/utils.js +33 -0
  66. package/dist/core/utils.js.map +1 -0
  67. package/dist/extension/bridge.d.ts +69 -52
  68. package/dist/extension/bridge.d.ts.map +1 -1
  69. package/dist/extension/bridge.js +242 -111
  70. package/dist/extension/bridge.js.map +1 -1
  71. package/dist/extension/http-server.d.ts +6 -4
  72. package/dist/extension/http-server.d.ts.map +1 -1
  73. package/dist/extension/http-server.js +45 -31
  74. package/dist/extension/http-server.js.map +1 -1
  75. package/dist/extension/index.d.ts.map +1 -1
  76. package/dist/extension/index.js.map +1 -1
  77. package/dist/index.js +3 -1
  78. package/dist/index.js.map +1 -1
  79. package/dist/tools/browse.d.ts.map +1 -1
  80. package/dist/tools/browse.js +32 -34
  81. package/dist/tools/browse.js.map +1 -1
  82. package/dist/tools/cookies.d.ts.map +1 -1
  83. package/dist/tools/cookies.js +38 -16
  84. package/dist/tools/cookies.js.map +1 -1
  85. package/dist/tools/evaluate.d.ts.map +1 -1
  86. package/dist/tools/evaluate.js +54 -23
  87. package/dist/tools/evaluate.js.map +1 -1
  88. package/dist/tools/extract.d.ts.map +1 -1
  89. package/dist/tools/extract.js +221 -153
  90. package/dist/tools/extract.js.map +1 -1
  91. package/dist/tools/index.d.ts.map +1 -1
  92. package/dist/tools/index.js.map +1 -1
  93. package/dist/tools/input.d.ts.map +1 -1
  94. package/dist/tools/input.js +281 -89
  95. package/dist/tools/input.js.map +1 -1
  96. package/dist/tools/logs.d.ts.map +1 -1
  97. package/dist/tools/logs.js +31 -17
  98. package/dist/tools/logs.js.map +1 -1
  99. package/dist/tools/manage.d.ts.map +1 -1
  100. package/dist/tools/manage.js +25 -28
  101. package/dist/tools/manage.js.map +1 -1
  102. package/dist/tools/schema.d.ts +1 -1
  103. package/dist/tools/schema.d.ts.map +1 -1
  104. package/dist/tools/schema.js +31 -55
  105. package/dist/tools/schema.js.map +1 -1
  106. package/dist/tools/wait.d.ts.map +1 -1
  107. package/dist/tools/wait.js +19 -16
  108. package/dist/tools/wait.js.map +1 -1
  109. package/package.json +48 -40
@@ -16,26 +16,58 @@ import { targetToFindParams, targetZodSchema } from './schema.js';
16
16
  * InputEvent schema
17
17
  */
18
18
  const inputEventSchema = z.object({
19
- type: z.enum([
20
- 'keydown', 'keyup', 'click', 'mousedown', 'mouseup', 'mousemove',
21
- 'wheel', 'touchstart', 'touchmove', 'touchend', 'type', 'wait',
22
- 'select', 'replace',
23
- ]).describe('事件类型'),
19
+ type: z
20
+ .enum([
21
+ 'keydown',
22
+ 'keyup',
23
+ 'click',
24
+ 'mousedown',
25
+ 'mouseup',
26
+ 'mousemove',
27
+ 'wheel',
28
+ 'touchstart',
29
+ 'touchmove',
30
+ 'touchend',
31
+ 'type',
32
+ 'wait',
33
+ 'select',
34
+ 'replace',
35
+ 'drag',
36
+ ])
37
+ .describe('事件类型'),
24
38
  key: z.string().optional().describe('按键(keydown/keyup)'),
25
- button: z.enum(['left', 'middle', 'right', 'back', 'forward'])
39
+ commands: z
40
+ .array(z.string())
26
41
  .optional()
27
- .describe('鼠标按钮'),
28
- target: targetZodSchema.optional().describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type 可选,用于先定位再操作;select/replace 可选,用于限定搜索范围)'),
42
+ .describe('浏览器编辑命令(keydown 专用),如 ["selectAll"]、["copy"]、["paste"]、["cut"]、["undo"]、["redo"],触发原生编辑命令,优先于纯键盘事件')
43
+ .describe('用于跨平台快捷键场景,需要 inputMode=precise'),
44
+ button: z.enum(['left', 'middle', 'right', 'back', 'forward']).optional().describe('鼠标按钮'),
45
+ clickCount: z
46
+ .number()
47
+ .int()
48
+ .min(1)
49
+ .optional()
50
+ .describe('鼠标点击次数(click,默认 1,设为 2 触发双击事件,设为 3 触发三击事件)'),
51
+ target: targetZodSchema
52
+ .optional()
53
+ .describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type/drag 可选;select/replace 可选;drag 时为拖拽源)'),
54
+ to: targetZodSchema.optional().describe('拖拽目标元素(drag 事件必填)'),
29
55
  steps: z.number().optional().describe('移动步数(mousemove/touchmove)'),
30
56
  deltaX: z.number().optional().describe('水平滚动量'),
31
57
  deltaY: z.number().optional().describe('垂直滚动量'),
32
- text: z.string().optional().describe('输入文本(type)或替换文本(replace)'),
33
- delay: z.number().optional().describe('按键间隔毫秒'),
58
+ text: z.string().max(10000).optional().describe('输入文本(type,最大 10000 字符)或替换文本(replace)'),
59
+ delay: z.number().min(0).max(100).optional().describe('按键间隔毫秒(type 事件最大 100ms,避免长时延 DoS)'),
34
60
  ms: z.number().optional().describe('等待毫秒'),
35
61
  find: z.string().optional().describe('要查找并选中的文本(select/replace)'),
36
62
  nth: z.number().optional().describe('第 N 个匹配(select/replace,从 0 开始,默认 0 即第一个)'),
37
- dispatch: z.boolean().optional().describe('使用 dispatch 模式输入(type),直接设置 value 并触发 input/change 事件,兼容 React/Vue 等框架的受控组件,默认 false 使用键盘事件'),
38
- force: z.boolean().optional().describe('强制执行(click),跳过可操作性检查(可见性、遮挡检测等),直接在目标元素上触发事件,用于已知需要绕过检查的场景'),
63
+ dispatch: z
64
+ .boolean()
65
+ .optional()
66
+ .describe('使用 dispatch 模式输入(type),直接设置 value 并触发 input/change 事件,兼容 React/Vue 等框架的受控组件,默认 false 使用键盘事件'),
67
+ force: z
68
+ .boolean()
69
+ .optional()
70
+ .describe('强制执行(click),跳过可操作性检查(可见性、遮挡检测等),直接在目标元素上触发事件,用于已知需要绕过检查的场景'),
39
71
  });
40
72
  /**
41
73
  * input 参数 schema
@@ -43,9 +75,15 @@ const inputEventSchema = z.object({
43
75
  const inputSchema = z.object({
44
76
  events: z.array(inputEventSchema).describe('事件序列'),
45
77
  humanize: z.boolean().optional().describe('启用人类行为模拟(贝塞尔曲线移动、随机延迟)'),
46
- tabId: z.string().optional().describe('目标 Tab ID(可选,仅 Extension 模式)。不指定则使用当前 attach 的 tab。可操作非当前 attach 的 tab。CDP 模式下不支持此参数'),
78
+ tabId: z
79
+ .string()
80
+ .optional()
81
+ .describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
47
82
  timeout: z.number().optional().describe('超时毫秒'),
48
- frame: z.union([z.string(), z.number()]).optional().describe('iframe 定位(可选,仅 Extension 模式)。CSS 选择器(如 "iframe#main")或索引(如 0)。不指定则在主框架操作'),
83
+ frame: z
84
+ .union([z.string(), z.number()])
85
+ .optional()
86
+ .describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
49
87
  });
50
88
  /**
51
89
  * input 工具处理器
@@ -69,7 +107,7 @@ async function handleInput(args) {
69
107
  }
70
108
  }
71
109
  else {
72
- // CDP 模式:使用原有逻辑
110
+ // CDP 模式:逐事件分发(无 Extension bridge)
73
111
  const session = getSession();
74
112
  for (const event of args.events) {
75
113
  if (event.type === 'select' || event.type === 'replace') {
@@ -109,6 +147,27 @@ async function handleInput(args) {
109
147
  *
110
148
  * @returns 格式化标签名(如 "code"),若被替换文本在格式化节点内
111
149
  */
150
+ /**
151
+ * 如果 target 是选择器类型,先通过 actionableClick 聚焦
152
+ * select/replace 事件用,保证选区建立前 activeElement 就是目标
153
+ */
154
+ async function focusTargetIfNeeded(unifiedSession, target, nth, timeout) {
155
+ if (!target || 'x' in target || 'y' in target) {
156
+ return;
157
+ }
158
+ const params = targetToFindParams(target);
159
+ const els = await unifiedSession.find(params.selector, params.text, params.xpath, timeout);
160
+ const nth0 = params.nth ?? nth ?? 0;
161
+ if (els.length > nth0) {
162
+ try {
163
+ await unifiedSession.actionableClick(els[nth0].refId);
164
+ }
165
+ catch (err) {
166
+ // 失败时不中断(可能是 contenteditable 不接受 click focus),但记录 warning
167
+ console.warn('[MCP] focusTargetIfNeeded 聚焦失败,select/replace 将回退到 mouseClick 聚焦:', err);
168
+ }
169
+ }
170
+ }
112
171
  async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeout) {
113
172
  // 将 target 转为查询参数,传入注入脚本进行 DOM 查询
114
173
  let scopeSelector = null;
@@ -153,6 +212,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
153
212
  pos = val.indexOf(findText, pos + (n > 0 ? 1 : 0));
154
213
  if (pos === -1) return {type: 'notfound'};
155
214
  }
215
+ // 原子化:定位到 input 同时完成 focus + setSelectionRange,避免外层 mouseClick 聚焦不可靠
216
+ root.focus();
217
+ if (typeof root.setSelectionRange === 'function') {
218
+ root.setSelectionRange(pos, pos + findText.length);
219
+ }
156
220
  return {type: 'input', selectionStart: pos, selectionEnd: pos + findText.length};
157
221
  }
158
222
 
@@ -167,9 +231,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
167
231
  if (ip === -1) break;
168
232
  }
169
233
  if (ip !== -1) {
170
- var r = inp.getBoundingClientRect();
171
- return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length,
172
- focusX: r.x + r.width / 2, focusY: r.y + r.height / 2};
234
+ inp.focus();
235
+ if (typeof inp.setSelectionRange === 'function') {
236
+ inp.setSelectionRange(ip, ip + findText.length);
237
+ }
238
+ return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length};
173
239
  }
174
240
  }
175
241
 
@@ -243,28 +309,7 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
243
309
  : `未找到文本: "${findText}"${nth > 0 ? `(第 ${nth} 个匹配)` : ''}`);
244
310
  }
245
311
  if (result.type === 'input') {
246
- // input/textarea:聚焦 + setSelectionRange
247
- const r = result;
248
- if (r.focusX !== undefined && r.focusY !== undefined) {
249
- let x = r.focusX;
250
- let y = r.focusY;
251
- const frameOffset = unifiedSession.getFrameOffset();
252
- if (frameOffset && unifiedSession.getInputMode() !== 'stealth') {
253
- x += frameOffset.x;
254
- y += frameOffset.y;
255
- }
256
- await unifiedSession.mouseMove(x, y);
257
- await unifiedSession.mouseClick('left');
258
- }
259
- else if (scopeTarget) {
260
- const point = await getTargetPointExtension(unifiedSession, scopeTarget, timeout);
261
- await unifiedSession.mouseMove(point.x, point.y);
262
- await unifiedSession.mouseClick('left');
263
- }
264
- await unifiedSession.evaluate(`function(start, end) {
265
- var el = document.activeElement;
266
- if (el && el.setSelectionRange) el.setSelectionRange(start, end);
267
- }`, undefined, timeout, [result.selectionStart, result.selectionEnd]);
312
+ // 注入脚本已完成 focus + setSelectionRange(原子化,避免外层 mouseClick 聚焦不可靠)
268
313
  return undefined;
269
314
  }
270
315
  // DOM 文本节点:鼠标选择
@@ -286,53 +331,83 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
286
331
  await unifiedSession.keyUp('Shift');
287
332
  return coords.formatted;
288
333
  }
334
+ /**
335
+ * 验证事件参数(两种执行模式共享),避免在 Extension/CDP 两个 switch 中重复校验
336
+ */
337
+ function validateEvent(event) {
338
+ if (event.commands && event.commands.length > 0 && event.type !== 'keydown') {
339
+ throw new Error(`commands 参数只能用于 keydown 事件,当前事件类型为 ${event.type},如需触发编辑命令,请把 commands 放在 keydown 事件上`);
340
+ }
341
+ switch (event.type) {
342
+ case 'keydown':
343
+ case 'keyup':
344
+ if (!event.key) {
345
+ throw new Error(`${event.type} 事件需要 key 参数`);
346
+ }
347
+ break;
348
+ case 'wait':
349
+ if (event.ms === undefined) {
350
+ throw new Error('wait 事件需要 ms 参数');
351
+ }
352
+ break;
353
+ }
354
+ }
289
355
  /**
290
356
  * Extension 模式:执行单个事件
291
357
  *
292
358
  * @returns 可选警告信息(如格式丢失提示)
293
359
  */
294
360
  async function executeEventExtension(unifiedSession, event, humanize, timeout) {
361
+ validateEvent(event);
295
362
  switch (event.type) {
296
363
  case 'keydown': {
297
- if (!event.key) {
298
- throw new Error('keydown 事件需要 key 参数');
364
+ if (event.commands && event.commands.length > 0 && event.target) {
365
+ const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
366
+ await unifiedSession.mouseMove(point.x, point.y);
367
+ await unifiedSession.mouseDown('left');
368
+ await unifiedSession.mouseUp('left');
299
369
  }
300
- await unifiedSession.keyDown(event.key);
370
+ await unifiedSession.keyDown(event.key, event.commands);
301
371
  break;
302
372
  }
303
373
  case 'keyup': {
304
- if (!event.key) {
305
- throw new Error('keyup 事件需要 key 参数');
306
- }
307
374
  await unifiedSession.keyUp(event.key);
308
375
  break;
309
376
  }
310
377
  case 'click': {
378
+ const button = event.button ?? 'left';
379
+ const clickCount = event.clickCount ?? 1;
311
380
  if (event.target) {
312
381
  // 坐标型 target:不过 actionableClick,但仍需 getTargetPointExtension 做 iframe offset 修正
313
382
  if ('x' in event.target && 'y' in event.target) {
314
383
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
315
384
  await unifiedSession.mouseMove(point.x, point.y);
316
- await unifiedSession.mouseClick(event.button ?? 'left');
385
+ await unifiedSession.mouseClick(button, clickCount);
317
386
  break;
318
387
  }
319
- // 优先使用 actionable click(带可操作性检查、自动滚动、遮挡检测)
320
- const { selector, text: searchText, xpath, nth: nthParam } = targetToFindParams(event.target);
321
- const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
322
- const nth = nthParam ?? 0;
323
- if (elements.length > 0 && nth < elements.length) {
324
- const refId = elements[nth].refId;
325
- const result = await unifiedSession.actionableClick(refId, event.force);
326
- if (!result.success) {
327
- throw new Error(result.error || 'Click failed');
388
+ // 左键单击:优先用 actionable click(带可操作性检查、自动滚动、遮挡检测)
389
+ // 非左键 / 多击:actionableClick 依赖 HTMLElement.click() 只能触发单次左键,必须走坐标路径
390
+ if (button === 'left' && clickCount === 1) {
391
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
392
+ const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
393
+ const nth = nthParam ?? 0;
394
+ if (elements.length > 0 && nth < elements.length) {
395
+ const refId = elements[nth].refId;
396
+ const result = await unifiedSession.actionableClick(refId, event.force);
397
+ if (!result.success) {
398
+ throw new Error(result.error || 'Click failed');
399
+ }
400
+ break;
328
401
  }
329
- break;
330
402
  }
331
- // fallback: 找不到 refId 时用坐标方式
403
+ // fallback: 找不到 refId 或需走坐标路径时
404
+ // refId 透传:stealth 模式下嵌套 iframe overlay 场景绕过 elementFromPoint
332
405
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
333
406
  await unifiedSession.mouseMove(point.x, point.y);
407
+ await unifiedSession.mouseClick(button, clickCount, typeof point.refId === 'string' ? point.refId : undefined);
408
+ break;
334
409
  }
335
- await unifiedSession.mouseClick(event.button ?? 'left');
410
+ await unifiedSession.mouseClick(button, clickCount);
336
411
  break;
337
412
  }
338
413
  case 'mousedown': {
@@ -366,6 +441,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
366
441
  }
367
442
  case 'wheel': {
368
443
  if (event.target) {
444
+ const { selector, text, xpath, nth: nthParam, } = targetToFindParams(event.target);
445
+ const elements = await unifiedSession.find(selector, text, xpath, timeout);
446
+ const nth = nthParam ?? 0;
447
+ if (elements.length > nth) {
448
+ // 用 refId 直接滚动目标元素(支持视口外元素)
449
+ await unifiedSession.scroll(event.deltaX ?? 0, event.deltaY ?? 0, elements[nth].refId);
450
+ break;
451
+ }
452
+ // 找不到元素时 fallback 到坐标方式
369
453
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
370
454
  await unifiedSession.mouseMove(point.x, point.y);
371
455
  }
@@ -405,7 +489,7 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
405
489
  if ('x' in event.target && 'y' in event.target) {
406
490
  throw new Error('dispatch 模式不支持坐标型 target,请使用 CSS 选择器、role 或文本定位');
407
491
  }
408
- const { selector, text: searchText, xpath, nth: nthParam } = targetToFindParams(event.target);
492
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
409
493
  const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
410
494
  const nth = nthParam ?? 0;
411
495
  if (elements.length === 0 || nth >= elements.length) {
@@ -427,6 +511,14 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
427
511
  await unifiedSession.mouseMove(point.x, point.y);
428
512
  await unifiedSession.mouseClick('left');
429
513
  }
514
+ else {
515
+ const hasActiveFocus = await unifiedSession.evaluate('!!document.activeElement && ' +
516
+ 'document.activeElement !== document.body && ' +
517
+ 'document.activeElement !== document.documentElement');
518
+ if (!hasActiveFocus) {
519
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
520
+ }
521
+ }
430
522
  const delay = event.delay ?? 0;
431
523
  if (humanize) {
432
524
  for (const char of event.text) {
@@ -440,16 +532,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
440
532
  break;
441
533
  }
442
534
  case 'wait': {
443
- if (!event.ms) {
444
- throw new Error('wait 事件需要 ms 参数');
445
- }
446
- await new Promise(resolve => setTimeout(resolve, event.ms));
535
+ await new Promise((resolve) => setTimeout(resolve, event.ms));
447
536
  break;
448
537
  }
449
538
  case 'select': {
450
539
  if (!event.find) {
451
540
  throw new Error('select 事件需要 find 参数');
452
541
  }
542
+ // 自动聚焦目标元素(selectText 内 mouseClick focus 对 React 等场景不可靠)
543
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
453
544
  await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
454
545
  break;
455
546
  }
@@ -460,6 +551,8 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
460
551
  if (event.text === undefined) {
461
552
  throw new Error('replace 事件需要 text 参数');
462
553
  }
554
+ // 自动聚焦目标元素
555
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
463
556
  // Step 1: 选中文本
464
557
  const formatted = await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
465
558
  // 轮询等待选区同步(最多 500ms)
@@ -477,7 +570,7 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
477
570
  selectionConfirmed = true;
478
571
  break;
479
572
  }
480
- await new Promise(resolve => setTimeout(resolve, 20));
573
+ await new Promise((resolve) => setTimeout(resolve, 20));
481
574
  }
482
575
  if (!selectionConfirmed) {
483
576
  throw new Error(`选区同步失败:文本 "${event.find}" 已定位但未能建立选区,无法执行替换`);
@@ -517,6 +610,54 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
517
610
  }
518
611
  break;
519
612
  }
613
+ case 'drag': {
614
+ if (!event.target) {
615
+ throw new Error('drag 事件需要 target 参数(拖拽源)');
616
+ }
617
+ if (!event.to) {
618
+ throw new Error('drag 事件需要 to 参数(拖拽目标)');
619
+ }
620
+ // drag 仅支持选择器类 target(CSS/text/xpath/ARIA 等),不支持坐标
621
+ // 理由:drag 依赖 refId 在 Extension ISOLATED 世界 dispatchEvent,坐标无法生成 refId
622
+ if ('x' in event.target || 'y' in event.target) {
623
+ throw new Error('drag 的 target 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
624
+ }
625
+ if ('x' in event.to || 'y' in event.to) {
626
+ throw new Error('drag 的 to 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
627
+ }
628
+ // 用 find 定位确认元素存在(支持 ARIA/testId 等高级定位),拿 refId 传入 extension 侧 dispatchEvent
629
+ const srcParams = targetToFindParams(event.target);
630
+ const dstParams = targetToFindParams(event.to);
631
+ const srcNth = srcParams.nth ?? 0;
632
+ const dstNth = dstParams.nth ?? 0;
633
+ // 执行 drag,失败时重试一次(React 重渲染可能导致 refId 失效)
634
+ const attemptDrag = async () => {
635
+ const srcEls = await unifiedSession.find(srcParams.selector, srcParams.text, srcParams.xpath, timeout);
636
+ const dstEls = await unifiedSession.find(dstParams.selector, dstParams.text, dstParams.xpath, timeout);
637
+ if (srcEls.length <= srcNth) {
638
+ throw new Error(`drag 源元素未找到: ${JSON.stringify(event.target)}`);
639
+ }
640
+ if (dstEls.length <= dstNth) {
641
+ throw new Error(`drag 目标元素未找到: ${JSON.stringify(event.to)}`);
642
+ }
643
+ return unifiedSession.dragAndDrop(srcEls[srcNth].refId, dstEls[dstNth].refId);
644
+ };
645
+ let dragResult = await attemptDrag();
646
+ let retried = false;
647
+ // 仅对 refId 失效(REF_STALE)重试:源/目标元素从 DOM 移除,典型是 React 重渲染
648
+ if (!dragResult.success && dragResult.code === 'REF_STALE') {
649
+ console.warn('[MCP] drag refId 失效,自动重试一次:', dragResult.error);
650
+ dragResult = await attemptDrag();
651
+ retried = true;
652
+ }
653
+ if (!dragResult.success) {
654
+ throw new Error(dragResult.error || 'drag 执行失败');
655
+ }
656
+ if (retried) {
657
+ return 'drag 因 refId 失效已自动重试一次(可能是 React 等框架重渲染导致)';
658
+ }
659
+ break;
660
+ }
520
661
  default:
521
662
  throw new Error(`未知事件类型: ${event.type}`);
522
663
  }
@@ -552,34 +693,63 @@ async function getTargetPointExtension(unifiedSession, target, timeout) {
552
693
  if (nth >= elements.length) {
553
694
  throw new Error(`第 ${nth} 个匹配元素不存在(共 ${elements.length} 个)`);
554
695
  }
555
- const rect = elements[nth].rect;
556
- const point = {
557
- x: rect.x + rect.width / 2,
558
- y: rect.y + rect.height / 2,
559
- };
560
- // 元素定位:find() 返回视口绝对坐标
561
- if (frameOffset && isStealth) {
562
- // stealth: 转回 iframe 相对坐标
563
- return { x: point.x - frameOffset.x, y: point.y - frameOffset.y };
696
+ // 视口外时滚动后重新取 rect:与 actionableClick (left+single) 行为对齐,
697
+ // 否则非左键 / 多击的坐标路径在视口外坐标 dispatch,浏览器找不到元素,事件丢失
698
+ const refId = elements[nth].refId;
699
+ const iframeRelRect = await unifiedSession.evaluate(`(() => {
700
+ const ref = window.__mcpElementMap?.[${JSON.stringify(refId)}];
701
+ const el = ref?.deref();
702
+ if (!el) return null;
703
+ const r = el.getBoundingClientRect();
704
+ if (r.top < 0 || r.bottom > window.innerHeight || r.left < 0 || r.right > window.innerWidth) {
705
+ el.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
706
+ }
707
+ const r2 = el.getBoundingClientRect();
708
+ return { x: r2.x, y: r2.y, width: r2.width, height: r2.height };
709
+ })()`);
710
+ // refId 失效等异常:fallback 到原始 find rect(父视口绝对)
711
+ if (!iframeRelRect) {
712
+ const rect = elements[nth].rect;
713
+ const point = { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
714
+ if (frameOffset && isStealth) {
715
+ return { x: point.x - frameOffset.x, y: point.y - frameOffset.y, refId };
716
+ }
717
+ return { ...point, refId };
718
+ }
719
+ // 主 frame:iframeRelRect 就是父视口绝对
720
+ if (!frameOffset) {
721
+ return {
722
+ x: iframeRelRect.x + iframeRelRect.width / 2,
723
+ y: iframeRelRect.y + iframeRelRect.height / 2,
724
+ refId,
725
+ };
726
+ }
727
+ // iframe + stealth:消费者(chrome.scripting in iframe)需要 iframe 相对
728
+ if (isStealth) {
729
+ return {
730
+ x: iframeRelRect.x + iframeRelRect.width / 2,
731
+ y: iframeRelRect.y + iframeRelRect.height / 2,
732
+ refId,
733
+ };
564
734
  }
565
- return point;
735
+ // iframe + precise:消费者(chrome.debugger)需要父视口绝对。
736
+ // scrollIntoView({block:'center'}) 会 cascade 到父框架,导致 frameOffset 与父绝对 rect 都过期,
737
+ // refetch find() 让 content-handler 重新计算 frameOffset 并返回最新父绝对 rect
738
+ const refreshed = await unifiedSession.find(selector, text, xpath, timeout);
739
+ const rect = refreshed[nth]?.rect ?? elements[nth].rect;
740
+ return { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2, refId };
566
741
  }
567
742
  /**
568
743
  * CDP 模式:执行单个事件
569
744
  */
570
745
  async function executeEvent(session, event, humanize, timeout) {
746
+ validateEvent(event);
571
747
  switch (event.type) {
572
748
  case 'keydown': {
573
- if (!event.key) {
574
- throw new Error('keydown 事件需要 key 参数');
575
- }
576
- await session.keyDown(event.key);
749
+ await session.keyDown(event.key, event.commands);
577
750
  break;
578
751
  }
579
752
  case 'keyup': {
580
- if (!event.key) {
581
- throw new Error('keyup 事件需要 key 参数');
582
- }
583
753
  await session.keyUp(event.key);
584
754
  break;
585
755
  }
@@ -587,8 +757,12 @@ async function executeEvent(session, event, humanize, timeout) {
587
757
  if (event.target) {
588
758
  await moveToTarget(session, event.target, humanize, timeout);
589
759
  }
590
- await session.mouseDown(event.button ?? 'left');
591
- await session.mouseUp(event.button ?? 'left');
760
+ const cdpButton = event.button ?? 'left';
761
+ const cdpClickCount = event.clickCount ?? 1;
762
+ for (let i = 1; i <= cdpClickCount; i++) {
763
+ await session.mouseDown(cdpButton, i);
764
+ await session.mouseUp(cdpButton, i);
765
+ }
592
766
  break;
593
767
  }
594
768
  case 'mousedown': {
@@ -662,6 +836,14 @@ async function executeEvent(session, event, humanize, timeout) {
662
836
  await session.mouseDown('left');
663
837
  await session.mouseUp('left');
664
838
  }
839
+ else {
840
+ const hasActiveFocus = await session.evaluate('!!document.activeElement && ' +
841
+ 'document.activeElement !== document.body && ' +
842
+ 'document.activeElement !== document.documentElement');
843
+ if (!hasActiveFocus) {
844
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
845
+ }
846
+ }
665
847
  const delay = event.delay ?? 0;
666
848
  if (humanize) {
667
849
  // 人类化打字
@@ -676,13 +858,14 @@ async function executeEvent(session, event, humanize, timeout) {
676
858
  break;
677
859
  }
678
860
  case 'wait': {
679
- if (!event.ms) {
680
- throw new Error('wait 事件需要 ms 参数');
681
- }
682
861
  await new Promise((resolve) => setTimeout(resolve, event.ms));
683
862
  break;
684
863
  }
685
864
  default:
865
+ // drag 仅在 Extension 模式可用,给出明确错误而非通用"未知事件类型"
866
+ if (event.type === 'drag') {
867
+ throw new Error('drag 事件仅在 Extension 模式下可用,当前为 CDP 模式');
868
+ }
686
869
  throw new Error(`未知事件类型: ${event.type}`);
687
870
  }
688
871
  }
@@ -750,13 +933,22 @@ export function registerInputTool(server) {
750
933
  2. 用 CSS 选择器 + nth 精确定位元素(避免坐标点击)
751
934
  3. 再 input click/type 操作目标元素
752
935
 
753
- 组合键需拆分为独立事件。示例(Ctrl+A 全选):
936
+ 组合键拆分为独立事件示例(修饰键 + 字母键):
754
937
  events: [
755
938
  {type: "keydown", key: "Control"},
756
939
  {type: "keydown", key: "a"},
757
940
  {type: "keyup", key: "a"},
758
941
  {type: "keyup", key: "Control"}
759
- ]`,
942
+ ]
943
+
944
+ 浏览器编辑命令(selectAll/copy/paste/cut/undo/redo 等)需用 commands 字段,跨平台可靠:
945
+ events: [
946
+ {type: "keydown", key: "a", commands: ["selectAll"]},
947
+ {type: "keyup", key: "a"}
948
+ ]
949
+ 注意:纯键盘事件(不带 commands)仅保证 JS keyboard event 可被监听,不保证触发浏览器原生编辑行为;
950
+ 全选/复制/粘贴等语义用 commands;"全选并替换文本"用 select/replace 事件更简洁;
951
+ commands 仅支持 inputMode=precise,stealth 模式下会报错`,
760
952
  inputSchema: inputSchema,
761
953
  }, (args) => handleInput(args));
762
954
  }