@pyrokine/mcp-chrome 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +101 -43
  2. package/dist/anti-detection/behavior.d.ts.map +1 -1
  3. package/dist/anti-detection/behavior.js.map +1 -1
  4. package/dist/anti-detection/index.d.ts +1 -1
  5. package/dist/anti-detection/index.d.ts.map +1 -1
  6. package/dist/anti-detection/index.js +1 -1
  7. package/dist/anti-detection/index.js.map +1 -1
  8. package/dist/anti-detection/injection.d.ts +6 -2
  9. package/dist/anti-detection/injection.d.ts.map +1 -1
  10. package/dist/anti-detection/injection.js +32 -79
  11. package/dist/anti-detection/injection.js.map +1 -1
  12. package/dist/cdp/client.d.ts +2 -2
  13. package/dist/cdp/client.d.ts.map +1 -1
  14. package/dist/cdp/client.js +8 -10
  15. package/dist/cdp/client.js.map +1 -1
  16. package/dist/cdp/index.d.ts.map +1 -1
  17. package/dist/cdp/index.js.map +1 -1
  18. package/dist/cdp/launcher.d.ts.map +1 -1
  19. package/dist/cdp/launcher.js +40 -13
  20. package/dist/cdp/launcher.js.map +1 -1
  21. package/dist/core/auto-wait.d.ts +2 -2
  22. package/dist/core/auto-wait.d.ts.map +1 -1
  23. package/dist/core/auto-wait.js +2 -2
  24. package/dist/core/auto-wait.js.map +1 -1
  25. package/dist/core/browser-driver.d.ts +307 -0
  26. package/dist/core/browser-driver.d.ts.map +1 -0
  27. package/dist/core/browser-driver.js +21 -0
  28. package/dist/core/browser-driver.js.map +1 -0
  29. package/dist/core/error-sanitizer.d.ts +25 -0
  30. package/dist/core/error-sanitizer.d.ts.map +1 -0
  31. package/dist/core/error-sanitizer.js +66 -0
  32. package/dist/core/error-sanitizer.js.map +1 -0
  33. package/dist/core/errors.d.ts +10 -1
  34. package/dist/core/errors.d.ts.map +1 -1
  35. package/dist/core/errors.js +17 -4
  36. package/dist/core/errors.js.map +1 -1
  37. package/dist/core/extension-errors.d.ts +20 -0
  38. package/dist/core/extension-errors.d.ts.map +1 -0
  39. package/dist/core/extension-errors.js +40 -0
  40. package/dist/core/extension-errors.js.map +1 -0
  41. package/dist/core/index.d.ts.map +1 -1
  42. package/dist/core/index.js.map +1 -1
  43. package/dist/core/locator.d.ts +2 -2
  44. package/dist/core/locator.d.ts.map +1 -1
  45. package/dist/core/locator.js +25 -65
  46. package/dist/core/locator.js.map +1 -1
  47. package/dist/core/retry.d.ts +2 -2
  48. package/dist/core/retry.d.ts.map +1 -1
  49. package/dist/core/retry.js +2 -2
  50. package/dist/core/retry.js.map +1 -1
  51. package/dist/core/session.d.ts +153 -46
  52. package/dist/core/session.d.ts.map +1 -1
  53. package/dist/core/session.js +672 -177
  54. package/dist/core/session.js.map +1 -1
  55. package/dist/core/types.d.ts +11 -3
  56. package/dist/core/types.d.ts.map +1 -1
  57. package/dist/core/types.js +13 -6
  58. package/dist/core/types.js.map +1 -1
  59. package/dist/core/unified-session.d.ts +69 -68
  60. package/dist/core/unified-session.d.ts.map +1 -1
  61. package/dist/core/unified-session.js +356 -615
  62. package/dist/core/unified-session.js.map +1 -1
  63. package/dist/core/utils.d.ts +7 -0
  64. package/dist/core/utils.d.ts.map +1 -0
  65. package/dist/core/utils.js +33 -0
  66. package/dist/core/utils.js.map +1 -0
  67. package/dist/extension/bridge.d.ts +80 -39
  68. package/dist/extension/bridge.d.ts.map +1 -1
  69. package/dist/extension/bridge.js +195 -65
  70. package/dist/extension/bridge.js.map +1 -1
  71. package/dist/extension/http-server.d.ts +6 -4
  72. package/dist/extension/http-server.d.ts.map +1 -1
  73. package/dist/extension/http-server.js +45 -31
  74. package/dist/extension/http-server.js.map +1 -1
  75. package/dist/extension/index.d.ts.map +1 -1
  76. package/dist/extension/index.js.map +1 -1
  77. package/dist/index.js +27 -3
  78. package/dist/index.js.map +1 -1
  79. package/dist/tools/browse.d.ts.map +1 -1
  80. package/dist/tools/browse.js +33 -35
  81. package/dist/tools/browse.js.map +1 -1
  82. package/dist/tools/cookies.d.ts.map +1 -1
  83. package/dist/tools/cookies.js +38 -16
  84. package/dist/tools/cookies.js.map +1 -1
  85. package/dist/tools/evaluate.d.ts.map +1 -1
  86. package/dist/tools/evaluate.js +59 -13
  87. package/dist/tools/evaluate.js.map +1 -1
  88. package/dist/tools/extract.d.ts.map +1 -1
  89. package/dist/tools/extract.js +263 -155
  90. package/dist/tools/extract.js.map +1 -1
  91. package/dist/tools/index.d.ts.map +1 -1
  92. package/dist/tools/index.js.map +1 -1
  93. package/dist/tools/input.d.ts.map +1 -1
  94. package/dist/tools/input.js +311 -75
  95. package/dist/tools/input.js.map +1 -1
  96. package/dist/tools/logs.d.ts.map +1 -1
  97. package/dist/tools/logs.js +31 -17
  98. package/dist/tools/logs.js.map +1 -1
  99. package/dist/tools/manage.d.ts.map +1 -1
  100. package/dist/tools/manage.js +25 -28
  101. package/dist/tools/manage.js.map +1 -1
  102. package/dist/tools/schema.d.ts +1 -1
  103. package/dist/tools/schema.d.ts.map +1 -1
  104. package/dist/tools/schema.js +31 -55
  105. package/dist/tools/schema.js.map +1 -1
  106. package/dist/tools/wait.d.ts.map +1 -1
  107. package/dist/tools/wait.js +73 -22
  108. package/dist/tools/wait.js.map +1 -1
  109. package/package.json +48 -40
@@ -16,24 +16,57 @@ import { targetToFindParams, targetZodSchema } from './schema.js';
16
16
  * InputEvent schema
17
17
  */
18
18
  const inputEventSchema = z.object({
19
- type: z.enum([
20
- 'keydown', 'keyup', 'click', 'mousedown', 'mouseup', 'mousemove',
21
- 'wheel', 'touchstart', 'touchmove', 'touchend', 'type', 'wait',
22
- 'select', 'replace',
23
- ]).describe('事件类型'),
19
+ type: z
20
+ .enum([
21
+ 'keydown',
22
+ 'keyup',
23
+ 'click',
24
+ 'mousedown',
25
+ 'mouseup',
26
+ 'mousemove',
27
+ 'wheel',
28
+ 'touchstart',
29
+ 'touchmove',
30
+ 'touchend',
31
+ 'type',
32
+ 'wait',
33
+ 'select',
34
+ 'replace',
35
+ 'drag',
36
+ ])
37
+ .describe('事件类型'),
24
38
  key: z.string().optional().describe('按键(keydown/keyup)'),
25
- button: z.enum(['left', 'middle', 'right', 'back', 'forward'])
39
+ commands: z
40
+ .array(z.string())
26
41
  .optional()
27
- .describe('鼠标按钮'),
28
- target: targetZodSchema.optional().describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type 可选,用于先定位再操作;select/replace 可选,用于限定搜索范围)'),
42
+ .describe('浏览器编辑命令(keydown 专用),如 ["selectAll"]、["copy"]、["paste"]、["cut"]、["undo"]、["redo"],触发原生编辑命令,优先于纯键盘事件,用于跨平台快捷键场景,需要 inputMode=precise(stealth 模式无法触发 Chrome 原生编辑命令,会报错)'),
43
+ button: z.enum(['left', 'middle', 'right', 'back', 'forward']).optional().describe('鼠标按钮'),
44
+ clickCount: z
45
+ .number()
46
+ .int()
47
+ .min(1)
48
+ .optional()
49
+ .describe('鼠标点击次数(click,默认 1,设为 2 触发双击事件,设为 3 触发三击事件)'),
50
+ target: targetZodSchema
51
+ .optional()
52
+ .describe('目标元素(mousemove/touchstart/touchmove 必填;click/mousedown/wheel/type/drag 可选,用于先定位再操作;select/replace 可选,用于限定搜索范围;drag 时为拖拽源)'),
53
+ to: targetZodSchema.optional().describe('拖拽目标元素(drag 事件必填)'),
29
54
  steps: z.number().optional().describe('移动步数(mousemove/touchmove)'),
30
55
  deltaX: z.number().optional().describe('水平滚动量'),
31
56
  deltaY: z.number().optional().describe('垂直滚动量'),
32
- text: z.string().optional().describe('输入文本(type)或替换文本(replace)'),
33
- delay: z.number().optional().describe('按键间隔毫秒'),
57
+ text: z.string().max(10000).optional().describe('输入文本(type,最大 10000 字符)或替换文本(replace)'),
58
+ delay: z.number().min(0).max(100).optional().describe('按键间隔毫秒(type 事件最大 100ms,避免长时延 DoS)'),
34
59
  ms: z.number().optional().describe('等待毫秒'),
35
60
  find: z.string().optional().describe('要查找并选中的文本(select/replace)'),
36
61
  nth: z.number().optional().describe('第 N 个匹配(select/replace,从 0 开始,默认 0 即第一个)'),
62
+ dispatch: z
63
+ .boolean()
64
+ .optional()
65
+ .describe('使用 dispatch 模式输入(type),直接设置 value 并触发 input/change 事件,兼容 React/Vue 等框架的受控组件,默认 false 使用键盘事件'),
66
+ force: z
67
+ .boolean()
68
+ .optional()
69
+ .describe('强制执行(click),跳过可操作性检查(可见性、遮挡检测等),直接在目标元素上触发事件,用于已知需要绕过检查的场景'),
37
70
  });
38
71
  /**
39
72
  * input 参数 schema
@@ -41,9 +74,15 @@ const inputEventSchema = z.object({
41
74
  const inputSchema = z.object({
42
75
  events: z.array(inputEventSchema).describe('事件序列'),
43
76
  humanize: z.boolean().optional().describe('启用人类行为模拟(贝塞尔曲线移动、随机延迟)'),
44
- tabId: z.string().optional().describe('目标 Tab ID(可选,仅 Extension 模式)。不指定则使用当前 attach 的 tab。可操作非当前 attach 的 tab。CDP 模式下不支持此参数'),
77
+ tabId: z
78
+ .string()
79
+ .optional()
80
+ .describe('目标 Tab ID(可选,仅 Extension 模式),不指定则使用当前 attach 的 tab,可操作非当前 attach 的 tab,CDP 模式下不支持此参数'),
45
81
  timeout: z.number().optional().describe('超时毫秒'),
46
- frame: z.union([z.string(), z.number()]).optional().describe('iframe 定位(可选,仅 Extension 模式)。CSS 选择器(如 "iframe#main")或索引(如 0)。不指定则在主框架操作'),
82
+ frame: z
83
+ .union([z.string(), z.number()])
84
+ .optional()
85
+ .describe('iframe 定位(可选,仅 Extension 模式),CSS 选择器(如 "iframe#main")或索引(如 0),不指定则在主框架操作'),
47
86
  });
48
87
  /**
49
88
  * input 工具处理器
@@ -67,7 +106,7 @@ async function handleInput(args) {
67
106
  }
68
107
  }
69
108
  else {
70
- // CDP 模式:使用原有逻辑
109
+ // CDP 模式:逐事件分发(无 Extension bridge)
71
110
  const session = getSession();
72
111
  for (const event of args.events) {
73
112
  if (event.type === 'select' || event.type === 'replace') {
@@ -107,6 +146,27 @@ async function handleInput(args) {
107
146
  *
108
147
  * @returns 格式化标签名(如 "code"),若被替换文本在格式化节点内
109
148
  */
149
+ /**
150
+ * 如果 target 是选择器类型,先通过 actionableClick 聚焦
151
+ * select/replace 事件用,保证选区建立前 activeElement 就是目标
152
+ */
153
+ async function focusTargetIfNeeded(unifiedSession, target, nth, timeout) {
154
+ if (!target || 'x' in target || 'y' in target) {
155
+ return;
156
+ }
157
+ const params = targetToFindParams(target);
158
+ const els = await unifiedSession.find(params.selector, params.text, params.xpath, timeout);
159
+ const nth0 = params.nth ?? nth ?? 0;
160
+ if (els.length > nth0) {
161
+ try {
162
+ await unifiedSession.actionableClick(els[nth0].refId);
163
+ }
164
+ catch (err) {
165
+ // 失败时不中断(可能是 contenteditable 不接受 click focus),但记录 warning
166
+ console.warn('[MCP] focusTargetIfNeeded 聚焦失败,select/replace 将回退到 mouseClick 聚焦:', err);
167
+ }
168
+ }
169
+ }
110
170
  async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeout) {
111
171
  // 将 target 转为查询参数,传入注入脚本进行 DOM 查询
112
172
  let scopeSelector = null;
@@ -151,6 +211,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
151
211
  pos = val.indexOf(findText, pos + (n > 0 ? 1 : 0));
152
212
  if (pos === -1) return {type: 'notfound'};
153
213
  }
214
+ // 原子化:定位到 input 同时完成 focus + setSelectionRange,避免外层 mouseClick 聚焦不可靠
215
+ root.focus();
216
+ if (typeof root.setSelectionRange === 'function') {
217
+ root.setSelectionRange(pos, pos + findText.length);
218
+ }
154
219
  return {type: 'input', selectionStart: pos, selectionEnd: pos + findText.length};
155
220
  }
156
221
 
@@ -165,9 +230,11 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
165
230
  if (ip === -1) break;
166
231
  }
167
232
  if (ip !== -1) {
168
- var r = inp.getBoundingClientRect();
169
- return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length,
170
- focusX: r.x + r.width / 2, focusY: r.y + r.height / 2};
233
+ inp.focus();
234
+ if (typeof inp.setSelectionRange === 'function') {
235
+ inp.setSelectionRange(ip, ip + findText.length);
236
+ }
237
+ return {type: 'input', selectionStart: ip, selectionEnd: ip + findText.length};
171
238
  }
172
239
  }
173
240
 
@@ -241,28 +308,7 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
241
308
  : `未找到文本: "${findText}"${nth > 0 ? `(第 ${nth} 个匹配)` : ''}`);
242
309
  }
243
310
  if (result.type === 'input') {
244
- // input/textarea:聚焦 + setSelectionRange
245
- const r = result;
246
- if (r.focusX !== undefined && r.focusY !== undefined) {
247
- let x = r.focusX;
248
- let y = r.focusY;
249
- const frameOffset = unifiedSession.getFrameOffset();
250
- if (frameOffset && unifiedSession.getInputMode() !== 'stealth') {
251
- x += frameOffset.x;
252
- y += frameOffset.y;
253
- }
254
- await unifiedSession.mouseMove(x, y);
255
- await unifiedSession.mouseClick('left');
256
- }
257
- else if (scopeTarget) {
258
- const point = await getTargetPointExtension(unifiedSession, scopeTarget, timeout);
259
- await unifiedSession.mouseMove(point.x, point.y);
260
- await unifiedSession.mouseClick('left');
261
- }
262
- await unifiedSession.evaluate(`function(start, end) {
263
- var el = document.activeElement;
264
- if (el && el.setSelectionRange) el.setSelectionRange(start, end);
265
- }`, undefined, timeout, [result.selectionStart, result.selectionEnd]);
311
+ // 注入脚本已完成 focus + setSelectionRange(原子化,避免外层 mouseClick 聚焦不可靠)
266
312
  return undefined;
267
313
  }
268
314
  // DOM 文本节点:鼠标选择
@@ -284,33 +330,77 @@ async function selectText(unifiedSession, findText, scopeTarget, nth = 0, timeou
284
330
  await unifiedSession.keyUp('Shift');
285
331
  return coords.formatted;
286
332
  }
333
+ /**
334
+ * 验证事件参数(两种执行模式共享),避免在 Extension/CDP 两个 switch 中重复校验
335
+ */
336
+ function validateEvent(event) {
337
+ if (event.commands && event.commands.length > 0 && event.type !== 'keydown') {
338
+ throw new Error(`commands 参数只能用于 keydown 事件,当前事件类型为 ${event.type},如需触发编辑命令,请把 commands 放在 keydown 事件上`);
339
+ }
340
+ switch (event.type) {
341
+ case 'keydown':
342
+ case 'keyup':
343
+ if (!event.key) {
344
+ throw new Error(`${event.type} 事件需要 key 参数`);
345
+ }
346
+ break;
347
+ case 'wait':
348
+ if (event.ms === undefined) {
349
+ throw new Error('wait 事件需要 ms 参数');
350
+ }
351
+ break;
352
+ }
353
+ }
287
354
  /**
288
355
  * Extension 模式:执行单个事件
289
356
  *
290
357
  * @returns 可选警告信息(如格式丢失提示)
291
358
  */
292
359
  async function executeEventExtension(unifiedSession, event, humanize, timeout) {
360
+ validateEvent(event);
293
361
  switch (event.type) {
294
362
  case 'keydown': {
295
- if (!event.key) {
296
- throw new Error('keydown 事件需要 key 参数');
297
- }
298
- await unifiedSession.keyDown(event.key);
363
+ await unifiedSession.keyDown(event.key, event.commands);
299
364
  break;
300
365
  }
301
366
  case 'keyup': {
302
- if (!event.key) {
303
- throw new Error('keyup 事件需要 key 参数');
304
- }
305
367
  await unifiedSession.keyUp(event.key);
306
368
  break;
307
369
  }
308
370
  case 'click': {
371
+ const button = event.button ?? 'left';
372
+ const clickCount = event.clickCount ?? 1;
309
373
  if (event.target) {
374
+ // 坐标型 target:不过 actionableClick,但仍需 getTargetPointExtension 做 iframe offset 修正
375
+ if ('x' in event.target && 'y' in event.target) {
376
+ const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
377
+ await unifiedSession.mouseMove(point.x, point.y);
378
+ await unifiedSession.mouseClick(button, clickCount);
379
+ break;
380
+ }
381
+ // 左键单击:优先用 actionable click(带可操作性检查、自动滚动、遮挡检测)
382
+ // 非左键 / 多击:actionableClick 依赖 HTMLElement.click() 只能触发单次左键,必须走坐标路径
383
+ if (button === 'left' && clickCount === 1) {
384
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
385
+ const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
386
+ const nth = nthParam ?? 0;
387
+ if (elements.length > 0 && nth < elements.length) {
388
+ const refId = elements[nth].refId;
389
+ const result = await unifiedSession.actionableClick(refId, event.force);
390
+ if (!result.success) {
391
+ throw new Error(result.error || 'Click failed');
392
+ }
393
+ break;
394
+ }
395
+ }
396
+ // fallback: 找不到 refId 或需走坐标路径时
397
+ // refId 透传:stealth 模式下嵌套 iframe overlay 场景绕过 elementFromPoint
310
398
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
311
399
  await unifiedSession.mouseMove(point.x, point.y);
400
+ await unifiedSession.mouseClick(button, clickCount, point.refId);
401
+ break;
312
402
  }
313
- await unifiedSession.mouseClick(event.button ?? 'left');
403
+ await unifiedSession.mouseClick(button, clickCount);
314
404
  break;
315
405
  }
316
406
  case 'mousedown': {
@@ -344,6 +434,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
344
434
  }
345
435
  case 'wheel': {
346
436
  if (event.target) {
437
+ const { selector, text, xpath, nth: nthParam, } = targetToFindParams(event.target);
438
+ const elements = await unifiedSession.find(selector, text, xpath, timeout);
439
+ const nth = nthParam ?? 0;
440
+ if (elements.length > nth) {
441
+ // 用 refId 直接滚动目标元素(支持视口外元素)
442
+ await unifiedSession.scroll(event.deltaX ?? 0, event.deltaY ?? 0, elements[nth].refId);
443
+ break;
444
+ }
445
+ // 找不到元素时 fallback 到坐标方式
347
446
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
348
447
  await unifiedSession.mouseMove(point.x, point.y);
349
448
  }
@@ -374,12 +473,43 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
374
473
  if (!event.text) {
375
474
  throw new Error('type 事件需要 text 参数');
376
475
  }
476
+ // dispatch 模式:直接设置 value + 触发事件(兼容 React/Vue 受控组件)
477
+ if (event.dispatch) {
478
+ // 定位目标元素
479
+ if (!event.target) {
480
+ throw new Error('dispatch 模式需要 target 参数定位输入元素');
481
+ }
482
+ if ('x' in event.target && 'y' in event.target) {
483
+ throw new Error('dispatch 模式不支持坐标型 target,请使用 CSS 选择器、role 或文本定位');
484
+ }
485
+ const { selector, text: searchText, xpath, nth: nthParam, } = targetToFindParams(event.target);
486
+ const elements = await unifiedSession.find(selector, searchText, xpath, timeout);
487
+ const nth = nthParam ?? 0;
488
+ if (elements.length === 0 || nth >= elements.length) {
489
+ throw new Error('目标元素未找到');
490
+ }
491
+ const refId = elements[nth].refId;
492
+ // 通过 Extension ISOLATED 世界执行 dispatch(访问 __mcpElementMap)
493
+ // 参考 Playwright fill():nativeInputValueSetter + dispatchEvent
494
+ const result = await unifiedSession.dispatchInput(refId, event.text);
495
+ if (!result.success) {
496
+ throw new Error(result.error || 'dispatch 输入失败');
497
+ }
498
+ break;
499
+ }
500
+ // 默认模式:键盘事件
377
501
  // 如果有 target,先点击目标(聚焦)
378
502
  if (event.target) {
379
503
  const point = await getTargetPointExtension(unifiedSession, event.target, timeout);
380
504
  await unifiedSession.mouseMove(point.x, point.y);
381
505
  await unifiedSession.mouseClick('left');
382
506
  }
507
+ else {
508
+ const hasActiveFocus = await unifiedSession.evaluate('!!document.activeElement && document.activeElement !== document.body && document.activeElement !== document.documentElement');
509
+ if (!hasActiveFocus) {
510
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
511
+ }
512
+ }
383
513
  const delay = event.delay ?? 0;
384
514
  if (humanize) {
385
515
  for (const char of event.text) {
@@ -393,16 +523,15 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
393
523
  break;
394
524
  }
395
525
  case 'wait': {
396
- if (!event.ms) {
397
- throw new Error('wait 事件需要 ms 参数');
398
- }
399
- await new Promise(resolve => setTimeout(resolve, event.ms));
526
+ await new Promise((resolve) => setTimeout(resolve, event.ms));
400
527
  break;
401
528
  }
402
529
  case 'select': {
403
530
  if (!event.find) {
404
531
  throw new Error('select 事件需要 find 参数');
405
532
  }
533
+ // 自动聚焦目标元素(selectText 内 mouseClick focus 对 React 等场景不可靠)
534
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
406
535
  await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
407
536
  break;
408
537
  }
@@ -413,6 +542,8 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
413
542
  if (event.text === undefined) {
414
543
  throw new Error('replace 事件需要 text 参数');
415
544
  }
545
+ // 自动聚焦目标元素
546
+ await focusTargetIfNeeded(unifiedSession, event.target, event.nth, timeout);
416
547
  // Step 1: 选中文本
417
548
  const formatted = await selectText(unifiedSession, event.find, event.target, event.nth, timeout);
418
549
  // 轮询等待选区同步(最多 500ms)
@@ -430,7 +561,7 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
430
561
  selectionConfirmed = true;
431
562
  break;
432
563
  }
433
- await new Promise(resolve => setTimeout(resolve, 20));
564
+ await new Promise((resolve) => setTimeout(resolve, 20));
434
565
  }
435
566
  if (!selectionConfirmed) {
436
567
  throw new Error(`选区同步失败:文本 "${event.find}" 已定位但未能建立选区,无法执行替换`);
@@ -470,6 +601,54 @@ async function executeEventExtension(unifiedSession, event, humanize, timeout) {
470
601
  }
471
602
  break;
472
603
  }
604
+ case 'drag': {
605
+ if (!event.target) {
606
+ throw new Error('drag 事件需要 target 参数(拖拽源)');
607
+ }
608
+ if (!event.to) {
609
+ throw new Error('drag 事件需要 to 参数(拖拽目标)');
610
+ }
611
+ // drag 仅支持选择器类 target(CSS/text/xpath/ARIA 等),不支持坐标
612
+ // 理由:drag 依赖 refId 在 Extension ISOLATED 世界 dispatchEvent,坐标无法生成 refId
613
+ if ('x' in event.target || 'y' in event.target) {
614
+ throw new Error('drag 的 target 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
615
+ }
616
+ if ('x' in event.to || 'y' in event.to) {
617
+ throw new Error('drag 的 to 不支持坐标类型,请使用选择器(css/text/xpath/role 等)');
618
+ }
619
+ // 用 find 定位确认元素存在(支持 ARIA/testId 等高级定位),拿 refId 传入 extension 侧 dispatchEvent
620
+ const srcParams = targetToFindParams(event.target);
621
+ const dstParams = targetToFindParams(event.to);
622
+ const srcNth = srcParams.nth ?? 0;
623
+ const dstNth = dstParams.nth ?? 0;
624
+ // 执行 drag,失败时重试一次(React 重渲染可能导致 refId 失效)
625
+ const attemptDrag = async () => {
626
+ const srcEls = await unifiedSession.find(srcParams.selector, srcParams.text, srcParams.xpath, timeout);
627
+ const dstEls = await unifiedSession.find(dstParams.selector, dstParams.text, dstParams.xpath, timeout);
628
+ if (srcEls.length <= srcNth) {
629
+ throw new Error(`drag 源元素未找到: ${JSON.stringify(event.target)}`);
630
+ }
631
+ if (dstEls.length <= dstNth) {
632
+ throw new Error(`drag 目标元素未找到: ${JSON.stringify(event.to)}`);
633
+ }
634
+ return unifiedSession.dragAndDrop(srcEls[srcNth].refId, dstEls[dstNth].refId);
635
+ };
636
+ let dragResult = await attemptDrag();
637
+ let retried = false;
638
+ // 仅对 refId 失效(REF_STALE)重试:源/目标元素从 DOM 移除,典型是 React 重渲染
639
+ if (!dragResult.success && dragResult.code === 'REF_STALE') {
640
+ console.warn('[MCP] drag refId 失效,自动重试一次:', dragResult.error);
641
+ dragResult = await attemptDrag();
642
+ retried = true;
643
+ }
644
+ if (!dragResult.success) {
645
+ throw new Error(dragResult.error || 'drag 执行失败');
646
+ }
647
+ if (retried) {
648
+ return 'drag 因 refId 失效已自动重试一次(可能是 React 等框架重渲染导致)';
649
+ }
650
+ break;
651
+ }
473
652
  default:
474
653
  throw new Error(`未知事件类型: ${event.type}`);
475
654
  }
@@ -505,34 +684,63 @@ async function getTargetPointExtension(unifiedSession, target, timeout) {
505
684
  if (nth >= elements.length) {
506
685
  throw new Error(`第 ${nth} 个匹配元素不存在(共 ${elements.length} 个)`);
507
686
  }
508
- const rect = elements[nth].rect;
509
- const point = {
510
- x: rect.x + rect.width / 2,
511
- y: rect.y + rect.height / 2,
512
- };
513
- // 元素定位:find() 返回视口绝对坐标
514
- if (frameOffset && isStealth) {
515
- // stealth: 转回 iframe 相对坐标
516
- return { x: point.x - frameOffset.x, y: point.y - frameOffset.y };
687
+ // 视口外时滚动后重新取 rect:与 actionableClick (left+single) 行为对齐,
688
+ // 否则非左键 / 多击的坐标路径在视口外坐标 dispatch,浏览器找不到元素,事件丢失
689
+ const refId = elements[nth].refId;
690
+ const iframeRelRect = await unifiedSession.evaluate(`(() => {
691
+ const ref = window.__mcpElementMap?.[${JSON.stringify(refId)}];
692
+ const el = ref?.deref();
693
+ if (!el) return null;
694
+ const r = el.getBoundingClientRect();
695
+ if (r.top < 0 || r.bottom > window.innerHeight || r.left < 0 || r.right > window.innerWidth) {
696
+ el.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
697
+ }
698
+ const r2 = el.getBoundingClientRect();
699
+ return { x: r2.x, y: r2.y, width: r2.width, height: r2.height };
700
+ })()`);
701
+ // refId 失效等异常:fallback 到原始 find rect(父视口绝对)
702
+ if (!iframeRelRect) {
703
+ const rect = elements[nth].rect;
704
+ const point = { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
705
+ if (frameOffset && isStealth) {
706
+ return { x: point.x - frameOffset.x, y: point.y - frameOffset.y, refId };
707
+ }
708
+ return { ...point, refId };
517
709
  }
518
- return point;
710
+ // 主 frame:iframeRelRect 就是父视口绝对
711
+ if (!frameOffset) {
712
+ return {
713
+ x: iframeRelRect.x + iframeRelRect.width / 2,
714
+ y: iframeRelRect.y + iframeRelRect.height / 2,
715
+ refId,
716
+ };
717
+ }
718
+ // iframe + stealth:消费者(chrome.scripting in iframe)需要 iframe 相对
719
+ if (isStealth) {
720
+ return {
721
+ x: iframeRelRect.x + iframeRelRect.width / 2,
722
+ y: iframeRelRect.y + iframeRelRect.height / 2,
723
+ refId,
724
+ };
725
+ }
726
+ // iframe + precise:消费者(chrome.debugger)需要父视口绝对。
727
+ // scrollIntoView({block:'center'}) 会 cascade 到父框架,导致 frameOffset 与父绝对 rect 都过期,
728
+ // refetch find() 让 content-handler 重新计算 frameOffset 并返回最新父绝对 rect
729
+ const refreshed = await unifiedSession.find(selector, text, xpath, timeout);
730
+ const rect = refreshed[nth]?.rect ?? elements[nth].rect;
731
+ return { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2, refId };
519
732
  }
520
733
  /**
521
734
  * CDP 模式:执行单个事件
522
735
  */
523
736
  async function executeEvent(session, event, humanize, timeout) {
737
+ validateEvent(event);
524
738
  switch (event.type) {
525
739
  case 'keydown': {
526
- if (!event.key) {
527
- throw new Error('keydown 事件需要 key 参数');
528
- }
529
- await session.keyDown(event.key);
740
+ await session.keyDown(event.key, event.commands);
530
741
  break;
531
742
  }
532
743
  case 'keyup': {
533
- if (!event.key) {
534
- throw new Error('keyup 事件需要 key 参数');
535
- }
536
744
  await session.keyUp(event.key);
537
745
  break;
538
746
  }
@@ -540,8 +748,12 @@ async function executeEvent(session, event, humanize, timeout) {
540
748
  if (event.target) {
541
749
  await moveToTarget(session, event.target, humanize, timeout);
542
750
  }
543
- await session.mouseDown(event.button ?? 'left');
544
- await session.mouseUp(event.button ?? 'left');
751
+ const cdpButton = event.button ?? 'left';
752
+ const cdpClickCount = event.clickCount ?? 1;
753
+ for (let i = 1; i <= cdpClickCount; i++) {
754
+ await session.mouseDown(cdpButton, i);
755
+ await session.mouseUp(cdpButton, i);
756
+ }
545
757
  break;
546
758
  }
547
759
  case 'mousedown': {
@@ -606,12 +818,21 @@ async function executeEvent(session, event, humanize, timeout) {
606
818
  if (!event.text) {
607
819
  throw new Error('type 事件需要 text 参数');
608
820
  }
821
+ if (event.dispatch) {
822
+ throw new Error('dispatch 模式需要 Extension 连接,当前为 CDP 模式');
823
+ }
609
824
  // 如果有 target,先点击目标(聚焦),使用 input 等待类型
610
825
  if (event.target) {
611
826
  await moveToTarget(session, event.target, humanize, timeout, undefined, 'input');
612
827
  await session.mouseDown('left');
613
828
  await session.mouseUp('left');
614
829
  }
830
+ else {
831
+ const hasActiveFocus = await session.evaluate('!!document.activeElement && document.activeElement !== document.body && document.activeElement !== document.documentElement');
832
+ if (!hasActiveFocus) {
833
+ throw new Error('type 事件在无 target 时需要页面已有焦点元素,请提供 target 或先 click 目标元素');
834
+ }
835
+ }
615
836
  const delay = event.delay ?? 0;
616
837
  if (humanize) {
617
838
  // 人类化打字
@@ -626,13 +847,14 @@ async function executeEvent(session, event, humanize, timeout) {
626
847
  break;
627
848
  }
628
849
  case 'wait': {
629
- if (!event.ms) {
630
- throw new Error('wait 事件需要 ms 参数');
631
- }
632
850
  await new Promise((resolve) => setTimeout(resolve, event.ms));
633
851
  break;
634
852
  }
635
853
  default:
854
+ // drag 仅在 Extension 模式可用,给出明确错误而非通用"未知事件类型"
855
+ if (event.type === 'drag') {
856
+ throw new Error('drag 事件仅在 Extension 模式下可用,当前为 CDP 模式');
857
+ }
636
858
  throw new Error(`未知事件类型: ${event.type}`);
637
859
  }
638
860
  }
@@ -695,13 +917,27 @@ export function registerInputTool(server) {
695
917
  server.registerTool('input', {
696
918
  description: `键鼠输入:键盘、鼠标及任意组合
697
919
 
698
- 组合键需拆分为独立事件。示例(Ctrl+A 全选):
920
+ 推荐操作顺序:
921
+ 1. 先用 extract type=state 或 type=html 了解页面结构
922
+ 2. 用 CSS 选择器 + nth 精确定位元素(避免坐标点击)
923
+ 3. 再 input click/type 操作目标元素
924
+
925
+ 组合键拆分为独立事件示例(修饰键 + 字母键):
699
926
  events: [
700
927
  {type: "keydown", key: "Control"},
701
928
  {type: "keydown", key: "a"},
702
929
  {type: "keyup", key: "a"},
703
930
  {type: "keyup", key: "Control"}
704
- ]`,
931
+ ]
932
+
933
+ 浏览器编辑命令(selectAll/copy/paste/cut/undo/redo 等)需用 commands 字段,跨平台可靠:
934
+ events: [
935
+ {type: "keydown", key: "a", commands: ["selectAll"]},
936
+ {type: "keyup", key: "a"}
937
+ ]
938
+ 注意:纯键盘事件(不带 commands)仅保证 JS keyboard event 可被监听,不保证触发浏览器原生编辑行为;
939
+ 全选/复制/粘贴等语义用 commands;"全选并替换文本"用 select/replace 事件更简洁;
940
+ commands 仅支持 inputMode=precise,stealth 模式下会报错`,
705
941
  inputSchema: inputSchema,
706
942
  }, (args) => handleInput(args));
707
943
  }