github-router 0.3.41 → 0.3.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ext/background.js +824 -32
- package/dist/{lifecycle-CpnAVVQ_.js → lifecycle-DU0UI2t5.js} +2 -2
- package/dist/{lifecycle-CpnAVVQ_.js.map → lifecycle-DU0UI2t5.js.map} +1 -1
- package/dist/{lifecycle-DpnTmHCo.js → lifecycle-zr19Ot-e.js} +2 -2
- package/dist/main.js +459 -55
- package/dist/main.js.map +1 -1
- package/dist/{paths-cZle37Jp.js → paths-lwEqM5-i.js} +293 -2
- package/dist/paths-lwEqM5-i.js.map +1 -0
- package/dist/{paths-B7jmIPYq.js → paths-nd-94lLq.js} +1 -1
- package/package.json +1 -1
- package/dist/paths-cZle37Jp.js.map +0 -1
|
@@ -152,6 +152,14 @@ async function toolScreenshot(args) {
|
|
|
152
152
|
await sleep(150)
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
|
+
// Both this API and CDP Page.captureScreenshot require the browser
|
|
156
|
+
// to have a real OS-level rendering surface. On Chrome-for-Testing
|
|
157
|
+
// launched in plain headed mode without --headless=new, no such
|
|
158
|
+
// surface exists and either path hangs indefinitely — the Playwright
|
|
159
|
+
// E2E harness passes --headless=new in its args list for exactly this
|
|
160
|
+
// reason. Real Chrome with a visible window has a surface and works
|
|
161
|
+
// fine. If you're driving Chrome-for-Testing programmatically and
|
|
162
|
+
// need screenshots, launch with `--headless=new`.
|
|
155
163
|
const dataUrl = await chrome.tabs.captureVisibleTab(windowId, { format })
|
|
156
164
|
// dataUrl: "data:image/png;base64,...."
|
|
157
165
|
const m = /^data:([^;]+);base64,(.*)$/.exec(dataUrl)
|
|
@@ -194,7 +202,16 @@ async function toolReadPage(args) {
|
|
|
194
202
|
const MAX = 256 * 1024
|
|
195
203
|
let text = document.body ? document.body.innerText : ""
|
|
196
204
|
if (text.length > MAX) text = text.slice(0, MAX)
|
|
197
|
-
|
|
205
|
+
// Viewport metadata so the model can correlate CSS-px bbox to
|
|
206
|
+
// device-px pixels in browser_screenshot (device_px = css_px * dpr).
|
|
207
|
+
const viewport = {
|
|
208
|
+
width: window.innerWidth,
|
|
209
|
+
height: window.innerHeight,
|
|
210
|
+
devicePixelRatio: window.devicePixelRatio,
|
|
211
|
+
scrollX: window.scrollX,
|
|
212
|
+
scrollY: window.scrollY,
|
|
213
|
+
}
|
|
214
|
+
return { text, elements, viewport }
|
|
198
215
|
},
|
|
199
216
|
})
|
|
200
217
|
if (!result || typeof result.result !== "object") {
|
|
@@ -304,13 +321,52 @@ async function toolFill(args) {
|
|
|
304
321
|
}
|
|
305
322
|
|
|
306
323
|
async function toolScroll(args) {
|
|
307
|
-
const tabId =
|
|
324
|
+
const tabId = args.tabId
|
|
308
325
|
const target = args.target
|
|
309
|
-
|
|
326
|
+
assertTabId("browser_scroll", tabId)
|
|
327
|
+
const pixels = Number.isFinite(args.pixels) ? args.pixels : 0
|
|
310
328
|
const ref = typeof args.ref === "string" ? args.ref : null
|
|
311
|
-
if (!
|
|
312
|
-
|
|
313
|
-
|
|
329
|
+
if (!["top", "bottom", "pixels", "element", "at-pointer"].includes(target)) {
|
|
330
|
+
throw new Error(`browser_scroll: target must be top|bottom|pixels|element|at-pointer, got ${String(target)}`)
|
|
331
|
+
}
|
|
332
|
+
if (target === "at-pointer") {
|
|
333
|
+
// Wheel scroll a sub-region at a pointer location. Necessary for
|
|
334
|
+
// chat windows / infinite-scroll lists / modal bodies that have
|
|
335
|
+
// their own scroll container and ignore window.scrollTo. The wheel
|
|
336
|
+
// event bubbles through the scroll-container ancestor at the
|
|
337
|
+
// pointer location, so positioning the cursor on the right region
|
|
338
|
+
// is what makes it scroll instead of the outer window.
|
|
339
|
+
const selector = typeof args.selector === "string" ? args.selector : null
|
|
340
|
+
const x = Number.isFinite(args.x) ? args.x : undefined
|
|
341
|
+
const y = Number.isFinite(args.y) ? args.y : undefined
|
|
342
|
+
assertSingleTarget("browser_scroll(at-pointer)", ref, selector, x, y)
|
|
343
|
+
const deltaX = clampNum(Number.isFinite(args.deltaX) ? args.deltaX : 0, -10_000, 10_000)
|
|
344
|
+
const deltaY = clampNum(Number.isFinite(args.deltaY) ? args.deltaY : 0, -10_000, 10_000)
|
|
345
|
+
if (deltaX === 0 && deltaY === 0) {
|
|
346
|
+
throw new Error("browser_scroll(at-pointer): at least one of deltaX / deltaY must be non-zero")
|
|
347
|
+
}
|
|
348
|
+
const force = args.force === true
|
|
349
|
+
const pos = await resolveMouseTarget(tabId, ref, selector, x, y)
|
|
350
|
+
if (pos.hitTest && !pos.hitTest.isTarget && !force) {
|
|
351
|
+
throw new Error(`target_obscured: topmost is ${pos.hitTest.topmost || pos.hitTest.note}`)
|
|
352
|
+
}
|
|
353
|
+
return await withTabInputLock(tabId, async () => {
|
|
354
|
+
await attachDebuggerOnce(tabId)
|
|
355
|
+
// Position the cursor first so the wheel event lands on the
|
|
356
|
+
// right scroll-container ancestor.
|
|
357
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchMouseEvent", {
|
|
358
|
+
type: "mouseMoved",
|
|
359
|
+
x: pos.x, y: pos.y,
|
|
360
|
+
button: "none", buttons: 0, modifiers: 0, pointerType: "mouse",
|
|
361
|
+
})
|
|
362
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchMouseEvent", {
|
|
363
|
+
type: "mouseWheel",
|
|
364
|
+
x: pos.x, y: pos.y,
|
|
365
|
+
deltaX, deltaY,
|
|
366
|
+
button: "none", buttons: 0, modifiers: 0, pointerType: "mouse",
|
|
367
|
+
})
|
|
368
|
+
return { ok: true, scrolled: { x: pos.x, y: pos.y, deltaX, deltaY } }
|
|
369
|
+
})
|
|
314
370
|
}
|
|
315
371
|
const [result] = await chrome.scripting.executeScript({
|
|
316
372
|
target: { tabId },
|
|
@@ -354,22 +410,702 @@ async function toolKeyboard(args) {
|
|
|
354
410
|
// attachment. The attach stays for the tab's lifetime — chrome's
|
|
355
411
|
// "is being controlled" banner is the visible cost, accepted in
|
|
356
412
|
// exchange for cross-tool composability.
|
|
357
|
-
await
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
413
|
+
return await withTabInputLock(tabId, async () => {
|
|
414
|
+
await attachDebuggerOnce(tabId)
|
|
415
|
+
const winVK = key.length === 1 ? key.toUpperCase().charCodeAt(0) : 0
|
|
416
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchKeyEvent", {
|
|
417
|
+
type: "keyDown",
|
|
418
|
+
modifiers: bits,
|
|
419
|
+
key,
|
|
420
|
+
text: key.length === 1 ? key : undefined,
|
|
421
|
+
windowsVirtualKeyCode: winVK,
|
|
422
|
+
})
|
|
423
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchKeyEvent", {
|
|
424
|
+
type: "keyUp",
|
|
425
|
+
modifiers: bits,
|
|
426
|
+
key,
|
|
427
|
+
windowsVirtualKeyCode: winVK,
|
|
428
|
+
})
|
|
429
|
+
return { ok: true }
|
|
365
430
|
})
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// ---------------------------------------------------------------------
|
|
434
|
+
// Humanlike input v2: browser_mouse, browser_drag, browser_type, browser_locate
|
|
435
|
+
// ---------------------------------------------------------------------
|
|
436
|
+
// All four are CDP-driven (Input.dispatchMouseEvent / Input.dispatchKeyEvent),
|
|
437
|
+
// share the per-tab input mutex (withTabInputLock), and inherit the
|
|
438
|
+
// hardened attachDebuggerOnce. resolveMouseTarget centralises ref /
|
|
439
|
+
// selector / (x,y) → bbox-center resolution AND the elementFromPoint
|
|
440
|
+
// hit-test so all three coordinate-driven tools refuse to act on
|
|
441
|
+
// occluded targets by default (force:true bypass).
|
|
442
|
+
|
|
443
|
+
const BUTTON_BITS = { left: 1, right: 2, middle: 4 }
|
|
444
|
+
|
|
445
|
+
function clampNum(v, min, max) {
|
|
446
|
+
const n = typeof v === "number" ? v : Number(v)
|
|
447
|
+
if (!Number.isFinite(n)) return min
|
|
448
|
+
return Math.max(min, Math.min(max, n))
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Per-tab input mutex. CDP mouse / keyboard state is global per
|
|
452
|
+
// attachment, so two parallel browser_mouse / browser_drag / browser_type
|
|
453
|
+
// calls on the same tab would interleave and corrupt each other (one
|
|
454
|
+
// call's mouseMoved would land mid-drag of another). The global
|
|
455
|
+
// MAX_INFLIGHT_TOOLS_CALL=8 cap doesn't help — it's global, not per-tab.
|
|
456
|
+
// This mutex is per-tab, layered on top.
|
|
457
|
+
const tabInputLockTails = new Map() // tabId → Promise (tail of the lock chain)
|
|
458
|
+
|
|
459
|
+
// Wall-clock cap on how long ONE input call may hold its tab's mutex,
|
|
460
|
+
// passed per-call (each tool sizes its own cap). Acts as a deadlock
|
|
461
|
+
// release valve when an in-extension hang outlives the dispatcher's
|
|
462
|
+
// WS-side timeout — without this cap the lock would stay held forever
|
|
463
|
+
// (CDP commands don't abort when the dispatcher's WS disconnects).
|
|
464
|
+
//
|
|
465
|
+
// On wedge: we force-detach `chrome.debugger` for the tab AND bump the
|
|
466
|
+
// tab's input generation. The detach makes all in-flight `sendCommand`
|
|
467
|
+
// promises in the wedged fn() reject with "Debugger is not attached"
|
|
468
|
+
// — without this, the wedged fn could keep dispatching stale CDP
|
|
469
|
+
// events (e.g. a leftover `mouseReleased`) after the next caller has
|
|
470
|
+
// already taken the lock and started a fresh drag, corrupting it.
|
|
471
|
+
// `attachedTabs` is cleared so the next caller's `attachDebuggerOnce`
|
|
472
|
+
// re-attaches cleanly. Cost: per-tab `consoleBuffers` /
|
|
473
|
+
// `networkBuffers` are dropped (their backing CDP domain is no longer
|
|
474
|
+
// enabled); the next `browser_console_logs` / `browser_network_log`
|
|
475
|
+
// call re-`Runtime.enable` / `Network.enable` and starts capturing
|
|
476
|
+
// fresh. A loud console.warn surfaces the wedge to forensic readers.
|
|
477
|
+
//
|
|
478
|
+
// Default cap = 60s — comfortably covers mouse/drag/scroll/keyboard
|
|
479
|
+
// dispatcher maxMs (30s/30s/15s/10s) plus CDP overhead. `browser_type`
|
|
480
|
+
// passes a larger explicit cap to accommodate its legitimately-slow
|
|
481
|
+
// per-keystroke max (210s + grace).
|
|
482
|
+
const DEFAULT_TAB_INPUT_LOCK_HOLD_CAP_MS = 60_000
|
|
483
|
+
const TYPE_TAB_INPUT_LOCK_HOLD_CAP_MS = 240_000
|
|
484
|
+
|
|
485
|
+
const tabInputGenerations = new Map() // tabId → number, bumped each acquire + on wedge
|
|
486
|
+
|
|
487
|
+
async function withTabInputLock(tabId, fn, holdCapMs = DEFAULT_TAB_INPUT_LOCK_HOLD_CAP_MS) {
|
|
488
|
+
const previousTail = tabInputLockTails.get(tabId) || Promise.resolve()
|
|
489
|
+
let release
|
|
490
|
+
const myTurn = new Promise((r) => { release = r })
|
|
491
|
+
const newTail = previousTail.then(() => myTurn)
|
|
492
|
+
tabInputLockTails.set(tabId, newTail)
|
|
493
|
+
await previousTail
|
|
494
|
+
let timer
|
|
495
|
+
let wedged = false
|
|
496
|
+
try {
|
|
497
|
+
return await Promise.race([
|
|
498
|
+
fn(),
|
|
499
|
+
new Promise((_, reject) => {
|
|
500
|
+
timer = setTimeout(() => {
|
|
501
|
+
wedged = true
|
|
502
|
+
reject(new Error(
|
|
503
|
+
`input_lock_wedged: held > ${holdCapMs}ms on tabId=${tabId}; force-detached debugger to abort the stuck CDP call.`,
|
|
504
|
+
))
|
|
505
|
+
}, holdCapMs)
|
|
506
|
+
}),
|
|
507
|
+
])
|
|
508
|
+
} finally {
|
|
509
|
+
if (timer !== undefined) clearTimeout(timer)
|
|
510
|
+
if (wedged) {
|
|
511
|
+
console.warn(`[browser-bridge] tab ${tabId} input lock wedged past ${holdCapMs}ms — force-detaching debugger`)
|
|
512
|
+
// Force-detach so the wedged fn's pending sendCommand promises
|
|
513
|
+
// reject and any further CDP calls it queues fail too. Without
|
|
514
|
+
// this, stale events from the wedged call can interleave with
|
|
515
|
+
// the next caller and corrupt drags / mouse state.
|
|
516
|
+
try {
|
|
517
|
+
await chrome.debugger.detach({ tabId })
|
|
518
|
+
} catch {
|
|
519
|
+
// already detached / tab gone — fine
|
|
520
|
+
}
|
|
521
|
+
attachedTabs.delete(tabId)
|
|
522
|
+
// Buffers need re-enabling next time their domains attach.
|
|
523
|
+
consoleBuffers.delete(tabId)
|
|
524
|
+
networkBuffers.delete(tabId)
|
|
525
|
+
// Bump the generation so any wedged fn() that checks before
|
|
526
|
+
// its next CDP send (future tools may opt in) sees the stale
|
|
527
|
+
// marker and bails out early.
|
|
528
|
+
tabInputGenerations.set(tabId, (tabInputGenerations.get(tabId) || 0) + 1)
|
|
529
|
+
}
|
|
530
|
+
release()
|
|
531
|
+
// GC the Map entry only if no later caller chained on top of us.
|
|
532
|
+
// If they did, the tail has been replaced; leave it alone.
|
|
533
|
+
if (tabInputLockTails.get(tabId) === newTail) {
|
|
534
|
+
tabInputLockTails.delete(tabId)
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
async function dispatchMouseEvent(tabId, type, x, y, button, buttons, clickCount) {
|
|
540
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchMouseEvent", {
|
|
541
|
+
type, x, y, button, buttons, clickCount, modifiers: 0, pointerType: "mouse",
|
|
371
542
|
})
|
|
372
|
-
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Resolve ref / selector / (x,y) → { x, y, draggable?, hitTest? }.
|
|
546
|
+
// hitTest carries elementFromPoint topmost-element identity so the
|
|
547
|
+
// caller can decide whether the target is actually clickable or is
|
|
548
|
+
// occluded by an overlay (default behavior: refuse with target_obscured
|
|
549
|
+
// unless force:true).
|
|
550
|
+
// Resolve ref / selector / (x,y) → { x, y, draggable?, hitTest? }.
|
|
551
|
+
// hitTest carries elementFromPoint topmost-element identity so the
|
|
552
|
+
// caller can decide whether the target is actually clickable or is
|
|
553
|
+
// occluded by an overlay (default behavior: refuse with target_obscured
|
|
554
|
+
// unless force:true). Exclusivity (exactly ONE of ref / selector /
|
|
555
|
+
// (x,y)) is checked by the caller — see assertSingleTarget.
|
|
556
|
+
async function resolveMouseTarget(tabId, ref, selector, x, y) {
|
|
557
|
+
if (Number.isFinite(x) && Number.isFinite(y)) {
|
|
558
|
+
// Coordinate mode: no target identity, no hit-test (we don't know
|
|
559
|
+
// which element the caller expects to hit).
|
|
560
|
+
return { x: Math.round(x), y: Math.round(y), draggable: false, hitTest: null }
|
|
561
|
+
}
|
|
562
|
+
if (!ref && !selector) {
|
|
563
|
+
throw new Error("target required: provide ref, selector, or both x and y")
|
|
564
|
+
}
|
|
565
|
+
const [result] = await chrome.scripting.executeScript({
|
|
566
|
+
target: { tabId },
|
|
567
|
+
func: (ref, selector) => {
|
|
568
|
+
const sel = ref
|
|
569
|
+
? `[data-gh-router-ref="${typeof CSS !== "undefined" && CSS.escape ? CSS.escape(ref) : ref.replace(/["\\]/g, "\\$&")}"]`
|
|
570
|
+
: selector
|
|
571
|
+
const el = document.querySelector(sel)
|
|
572
|
+
if (!el) return { error: `element not found: ${sel}` }
|
|
573
|
+
const rect = el.getBoundingClientRect()
|
|
574
|
+
const cx = Math.round(rect.x + rect.width / 2)
|
|
575
|
+
const cy = Math.round(rect.y + rect.height / 2)
|
|
576
|
+
const inView = cx >= 0 && cy >= 0 && cx <= window.innerWidth && cy <= window.innerHeight
|
|
577
|
+
const draggable = el.draggable === true
|
|
578
|
+
if (!inView) {
|
|
579
|
+
return {
|
|
580
|
+
x: cx, y: cy, draggable,
|
|
581
|
+
hitTest: { isTarget: false, note: "target center off-viewport" },
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
const top = document.elementFromPoint(cx, cy)
|
|
585
|
+
// isTarget: only accept when topmost IS the element, or topmost
|
|
586
|
+
// is a DESCENDANT of the element (clicking the child bubbles to
|
|
587
|
+
// the target). Do NOT accept top.contains(el) — that would be
|
|
588
|
+
// true whenever the topmost falls through to a parent (e.g. when
|
|
589
|
+
// el has pointer-events:none, or is fully covered by a sibling
|
|
590
|
+
// and elementFromPoint walks up to the container). That's
|
|
591
|
+
// exactly the "obscured" case we want to flag.
|
|
592
|
+
const isTarget = !!top && (top === el || el.contains(top))
|
|
593
|
+
let topmost = "(none)"
|
|
594
|
+
if (top) {
|
|
595
|
+
const id = top.id ? "#" + top.id : ""
|
|
596
|
+
const cls = top.className && typeof top.className === "string" ? "." + top.className.split(/\s+/).filter(Boolean).slice(0, 2).join(".") : ""
|
|
597
|
+
topmost = `${top.tagName.toLowerCase()}${id}${cls}`
|
|
598
|
+
}
|
|
599
|
+
return {
|
|
600
|
+
x: cx, y: cy, draggable,
|
|
601
|
+
hitTest: { isTarget, topmost },
|
|
602
|
+
}
|
|
603
|
+
},
|
|
604
|
+
args: [ref || null, selector || null],
|
|
605
|
+
})
|
|
606
|
+
if (!result || !result.result) {
|
|
607
|
+
throw new Error("target resolution failed: scripting.executeScript returned nothing")
|
|
608
|
+
}
|
|
609
|
+
if (result.result.error) throw new Error(result.result.error)
|
|
610
|
+
return result.result
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Validate that exactly one target descriptor is provided. The model
|
|
614
|
+
// must not silently win one over another — if both ref and (x,y) are
|
|
615
|
+
// passed, throw rather than silently picking. `prefix` is the tool /
|
|
616
|
+
// arg-group name for the error message.
|
|
617
|
+
function assertSingleTarget(prefix, ref, selector, x, y) {
|
|
618
|
+
const hasRef = !!ref
|
|
619
|
+
const hasSel = !!selector
|
|
620
|
+
const xSet = x !== undefined
|
|
621
|
+
const ySet = y !== undefined
|
|
622
|
+
if (xSet !== ySet) {
|
|
623
|
+
throw new Error(`${prefix}: x and y must be provided together`)
|
|
624
|
+
}
|
|
625
|
+
const hasCoords = xSet && ySet
|
|
626
|
+
const sources = (hasRef ? 1 : 0) + (hasSel ? 1 : 0) + (hasCoords ? 1 : 0)
|
|
627
|
+
if (sources === 0) {
|
|
628
|
+
throw new Error(`${prefix}: provide one of ref, selector, or (x, y)`)
|
|
629
|
+
}
|
|
630
|
+
if (sources > 1) {
|
|
631
|
+
throw new Error(`${prefix}: pass exactly one of ref, selector, or (x, y) — not multiple`)
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function assertTabId(prefix, tabId) {
|
|
636
|
+
if (!Number.isInteger(tabId) || tabId < 0) {
|
|
637
|
+
throw new Error(`${prefix}: tabId must be a non-negative integer`)
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
async function toolMouse(args) {
|
|
642
|
+
const tabId = args.tabId
|
|
643
|
+
const action = args.action
|
|
644
|
+
assertTabId("browser_mouse", tabId)
|
|
645
|
+
if (!["move", "click", "dblclick", "down", "up"].includes(action)) {
|
|
646
|
+
throw new Error(`browser_mouse: action must be move|click|dblclick|down|up, got ${String(action)}`)
|
|
647
|
+
}
|
|
648
|
+
const buttonRaw = typeof args.button === "string" ? args.button : "left"
|
|
649
|
+
if (!["left", "right", "middle"].includes(buttonRaw)) {
|
|
650
|
+
throw new Error(`browser_mouse: button must be left|right|middle, got ${buttonRaw}`)
|
|
651
|
+
}
|
|
652
|
+
const button = buttonRaw
|
|
653
|
+
const buttonBits = BUTTON_BITS[button]
|
|
654
|
+
const steps = Math.round(clampNum(args.steps ?? 1, 1, 100))
|
|
655
|
+
const stepDelayMs = Math.round(clampNum(args.stepDelayMs ?? 8, 0, 50))
|
|
656
|
+
const force = args.force === true
|
|
657
|
+
const ref = typeof args.ref === "string" ? args.ref : null
|
|
658
|
+
const selector = typeof args.selector === "string" ? args.selector : null
|
|
659
|
+
const x = Number.isFinite(args.x) ? args.x : undefined
|
|
660
|
+
const y = Number.isFinite(args.y) ? args.y : undefined
|
|
661
|
+
assertSingleTarget("browser_mouse", ref, selector, x, y)
|
|
662
|
+
|
|
663
|
+
const target = await resolveMouseTarget(tabId, ref, selector, x, y)
|
|
664
|
+
if (target.hitTest && !target.hitTest.isTarget && !force) {
|
|
665
|
+
throw new Error(
|
|
666
|
+
`target_obscured: topmost is ${target.hitTest.topmost || target.hitTest.note}. Pass force:true to bypass.`,
|
|
667
|
+
)
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return await withTabInputLock(tabId, async () => {
|
|
671
|
+
await attachDebuggerOnce(tabId)
|
|
672
|
+
// Interpolated approach: synthesise an origin point a bit away from
|
|
673
|
+
// the target and walk N steps in. We don't track a real cursor
|
|
674
|
+
// position across calls (MV3 SW dormancy would silently wipe it);
|
|
675
|
+
// the synthetic approach still fires the expected mouseMoved
|
|
676
|
+
// sequence for libraries that need a trajectory.
|
|
677
|
+
const path = steps > 1 ? interpolateApproach(target.x, target.y, steps) : [{ x: target.x, y: target.y }]
|
|
678
|
+
for (let i = 0; i < path.length; i++) {
|
|
679
|
+
await dispatchMouseEvent(tabId, "mouseMoved", path[i].x, path[i].y, "none", 0, 1)
|
|
680
|
+
if (i < path.length - 1 && stepDelayMs > 0) await sleep(stepDelayMs)
|
|
681
|
+
}
|
|
682
|
+
if (action === "move") {
|
|
683
|
+
return { ok: true, position: { x: target.x, y: target.y } }
|
|
684
|
+
}
|
|
685
|
+
if (action === "down") {
|
|
686
|
+
await dispatchMouseEvent(tabId, "mousePressed", target.x, target.y, button, buttonBits, 1)
|
|
687
|
+
return { ok: true }
|
|
688
|
+
}
|
|
689
|
+
if (action === "up") {
|
|
690
|
+
await dispatchMouseEvent(tabId, "mouseReleased", target.x, target.y, button, 0, 1)
|
|
691
|
+
return { ok: true }
|
|
692
|
+
}
|
|
693
|
+
if (action === "click") {
|
|
694
|
+
await dispatchMouseEvent(tabId, "mousePressed", target.x, target.y, button, buttonBits, 1)
|
|
695
|
+
await dispatchMouseEvent(tabId, "mouseReleased", target.x, target.y, button, 0, 1)
|
|
696
|
+
return { ok: true }
|
|
697
|
+
}
|
|
698
|
+
// dblclick: two press/release cycles with incrementing clickCount.
|
|
699
|
+
// A single press/release with clickCount:2 is NOT a real double-click;
|
|
700
|
+
// browsers expect two single clicks in quick succession with the
|
|
701
|
+
// clickCount on the second one bumped to 2, which is what fires the
|
|
702
|
+
// `dblclick` event.
|
|
703
|
+
await dispatchMouseEvent(tabId, "mousePressed", target.x, target.y, button, buttonBits, 1)
|
|
704
|
+
await dispatchMouseEvent(tabId, "mouseReleased", target.x, target.y, button, 0, 1)
|
|
705
|
+
await dispatchMouseEvent(tabId, "mousePressed", target.x, target.y, button, buttonBits, 2)
|
|
706
|
+
await dispatchMouseEvent(tabId, "mouseReleased", target.x, target.y, button, 0, 2)
|
|
707
|
+
return { ok: true }
|
|
708
|
+
})
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
function interpolateApproach(targetX, targetY, steps) {
|
|
712
|
+
// Synthetic origin: ~50px to the left and ~20px above the target so
|
|
713
|
+
// the first mouseMoved is a small approach rather than a teleport.
|
|
714
|
+
// Clamp to 0 so we never dispatch a negative-coordinate event near
|
|
715
|
+
// the viewport edge (some site code defensively bails on negative
|
|
716
|
+
// clientX/Y; some CDP versions reject outright).
|
|
717
|
+
const originX = Math.max(0, targetX - 50)
|
|
718
|
+
const originY = Math.max(0, targetY - 20)
|
|
719
|
+
const path = []
|
|
720
|
+
for (let i = 1; i <= steps; i++) {
|
|
721
|
+
const t = i / steps
|
|
722
|
+
path.push({
|
|
723
|
+
x: Math.round(originX + (targetX - originX) * t),
|
|
724
|
+
y: Math.round(originY + (targetY - originY) * t),
|
|
725
|
+
})
|
|
726
|
+
}
|
|
727
|
+
return path
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
async function toolDrag(args) {
|
|
731
|
+
const tabId = args.tabId
|
|
732
|
+
assertTabId("browser_drag", tabId)
|
|
733
|
+
const buttonRaw = typeof args.button === "string" ? args.button : "left"
|
|
734
|
+
if (!["left", "middle"].includes(buttonRaw)) {
|
|
735
|
+
throw new Error(`browser_drag: button must be left|middle, got ${buttonRaw}`)
|
|
736
|
+
}
|
|
737
|
+
const button = buttonRaw
|
|
738
|
+
const buttonBits = BUTTON_BITS[button]
|
|
739
|
+
const steps = Math.round(clampNum(args.steps ?? 15, 1, 100))
|
|
740
|
+
const stepDelayMs = Math.round(clampNum(args.stepDelayMs ?? 12, 0, 50))
|
|
741
|
+
const force = args.force === true
|
|
742
|
+
const modeRaw = typeof args.mode === "string" ? args.mode : "auto"
|
|
743
|
+
if (!["auto", "pointer", "html5"].includes(modeRaw)) {
|
|
744
|
+
throw new Error(`browser_drag: mode must be auto|pointer|html5, got ${modeRaw}`)
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
const fromRef = typeof args.fromRef === "string" ? args.fromRef : null
|
|
748
|
+
const fromSelector = typeof args.fromSelector === "string" ? args.fromSelector : null
|
|
749
|
+
const fromX = Number.isFinite(args.fromX) ? args.fromX : undefined
|
|
750
|
+
const fromY = Number.isFinite(args.fromY) ? args.fromY : undefined
|
|
751
|
+
assertSingleTarget("browser_drag.from", fromRef, fromSelector, fromX, fromY)
|
|
752
|
+
const toRef = typeof args.toRef === "string" ? args.toRef : null
|
|
753
|
+
const toSelector = typeof args.toSelector === "string" ? args.toSelector : null
|
|
754
|
+
const toX = Number.isFinite(args.toX) ? args.toX : undefined
|
|
755
|
+
const toY = Number.isFinite(args.toY) ? args.toY : undefined
|
|
756
|
+
assertSingleTarget("browser_drag.to", toRef, toSelector, toX, toY)
|
|
757
|
+
|
|
758
|
+
const from = await resolveMouseTarget(tabId, fromRef, fromSelector, fromX, fromY)
|
|
759
|
+
const to = await resolveMouseTarget(tabId, toRef, toSelector, toX, toY)
|
|
760
|
+
if (from.hitTest && !from.hitTest.isTarget && !force) {
|
|
761
|
+
throw new Error(
|
|
762
|
+
`target_obscured: drag source topmost is ${from.hitTest.topmost || from.hitTest.note}. Pass force:true to bypass.`,
|
|
763
|
+
)
|
|
764
|
+
}
|
|
765
|
+
const mode = modeRaw === "auto" ? (from.draggable ? "html5" : "pointer") : modeRaw
|
|
766
|
+
|
|
767
|
+
return await withTabInputLock(tabId, async () => {
|
|
768
|
+
await attachDebuggerOnce(tabId)
|
|
769
|
+
if (mode === "html5") {
|
|
770
|
+
await dragHtml5(tabId, from, to, button, buttonBits, steps, stepDelayMs)
|
|
771
|
+
} else {
|
|
772
|
+
await dragPointer(tabId, from, to, button, buttonBits, steps, stepDelayMs)
|
|
773
|
+
}
|
|
774
|
+
return { ok: true, mode_used: mode, from: { x: from.x, y: from.y }, to: { x: to.x, y: to.y } }
|
|
775
|
+
})
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
async function dragPointer(tabId, from, to, button, buttonBits, steps, stepDelayMs) {
|
|
779
|
+
// Pointer-event-based DnD (react-dnd, Sortable.js, mouse-event-driven
|
|
780
|
+
// drag handlers). Hold the button (buttons:buttonBits) throughout the
|
|
781
|
+
// intermediate mouseMoved events — without that bit set, pointer-event
|
|
782
|
+
// handlers see pointermove with buttons:0 and abort drag tracking.
|
|
783
|
+
//
|
|
784
|
+
// Safety: track pressed state. If ANY dispatch between mousePressed
|
|
785
|
+
// and mouseReleased throws (CDP timeout / target crash / nav / invalid
|
|
786
|
+
// coords), the finally block must still release the button — CDP mouse
|
|
787
|
+
// state is global per attachment, so a stuck press would poison every
|
|
788
|
+
// subsequent click on this tab. The per-tab mutex doesn't help; the
|
|
789
|
+
// renderer-side state survives.
|
|
790
|
+
await dispatchMouseEvent(tabId, "mouseMoved", from.x, from.y, "none", 0, 1)
|
|
791
|
+
let pressed = false
|
|
792
|
+
try {
|
|
793
|
+
await dispatchMouseEvent(tabId, "mousePressed", from.x, from.y, button, buttonBits, 1)
|
|
794
|
+
pressed = true
|
|
795
|
+
for (let i = 1; i <= steps; i++) {
|
|
796
|
+
const t = i / steps
|
|
797
|
+
const x = Math.round(from.x + (to.x - from.x) * t)
|
|
798
|
+
const y = Math.round(from.y + (to.y - from.y) * t)
|
|
799
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchMouseEvent", {
|
|
800
|
+
type: "mouseMoved", x, y, button, buttons: buttonBits, modifiers: 0, pointerType: "mouse",
|
|
801
|
+
})
|
|
802
|
+
if (i < steps && stepDelayMs > 0) await sleep(stepDelayMs)
|
|
803
|
+
}
|
|
804
|
+
await dispatchMouseEvent(tabId, "mouseReleased", to.x, to.y, button, 0, 1)
|
|
805
|
+
pressed = false
|
|
806
|
+
} finally {
|
|
807
|
+
if (pressed) {
|
|
808
|
+
try {
|
|
809
|
+
await dispatchMouseEvent(tabId, "mouseReleased", to.x, to.y, button, 0, 1)
|
|
810
|
+
} catch {
|
|
811
|
+
// Swallow — don't mask the original error. A second failure here
|
|
812
|
+
// means the tab is in worse trouble than a stuck button.
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
async function dragHtml5(tabId, from, to, button, buttonBits, steps, stepDelayMs) {
|
|
819
|
+
// HTML5 native DnD (draggable="true" elements). Raw CDP mouse events
|
|
820
|
+
// CAN'T trigger Chromium's native dragstart pipeline — the only path
|
|
821
|
+
// is Input.setInterceptDrags(true) + Input.dispatchDragEvent. We
|
|
822
|
+
// press + move a few times to trigger drag-detect, capture the
|
|
823
|
+
// DragData via the dragIntercepted event, then dispatch dragEnter /
|
|
824
|
+
// dragOver / drop to the destination.
|
|
825
|
+
//
|
|
826
|
+
// Safety: same stuck-button concern as dragPointer. Track `pressed`
|
|
827
|
+
// and release in finally. Additionally, if the dragIntercepted event
|
|
828
|
+
// never arrives within the deadline, throw instead of silently
|
|
829
|
+
// returning ok:true — the model would otherwise reason from a
|
|
830
|
+
// phantom-new-state. Caller (toolDrag) can fall back to pointer mode.
|
|
831
|
+
let intercepted = null
|
|
832
|
+
const listener = (source, method, params) => {
|
|
833
|
+
if (source.tabId !== tabId) return
|
|
834
|
+
if (method === "Input.dragIntercepted" && params && params.data) {
|
|
835
|
+
intercepted = params.data
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
chrome.debugger.onEvent.addListener(listener)
|
|
839
|
+
let pressed = false
|
|
840
|
+
try {
|
|
841
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.setInterceptDrags", { enabled: true })
|
|
842
|
+
await dispatchMouseEvent(tabId, "mouseMoved", from.x, from.y, "none", 0, 1)
|
|
843
|
+
await dispatchMouseEvent(tabId, "mousePressed", from.x, from.y, button, buttonBits, 1)
|
|
844
|
+
pressed = true
|
|
845
|
+
// A handful of intermediate moves to trigger drag-detect heuristics
|
|
846
|
+
// (Chromium fires dragstart after ~5px of movement with the button held).
|
|
847
|
+
const startMoves = Math.min(5, steps)
|
|
848
|
+
for (let i = 1; i <= startMoves; i++) {
|
|
849
|
+
const t = (i / startMoves) * 0.3 // partial progress toward dest
|
|
850
|
+
const x = Math.round(from.x + (to.x - from.x) * t)
|
|
851
|
+
const y = Math.round(from.y + (to.y - from.y) * t)
|
|
852
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchMouseEvent", {
|
|
853
|
+
type: "mouseMoved", x, y, button, buttons: buttonBits, modifiers: 0, pointerType: "mouse",
|
|
854
|
+
})
|
|
855
|
+
if (stepDelayMs > 0) await sleep(stepDelayMs)
|
|
856
|
+
}
|
|
857
|
+
// Wait for the dragIntercepted event (up to 1s). Without this we
|
|
858
|
+
// wouldn't have the DragData payload to send to dispatchDragEvent.
|
|
859
|
+
const deadline = Date.now() + 1_000
|
|
860
|
+
while (!intercepted && Date.now() < deadline) {
|
|
861
|
+
await sleep(20)
|
|
862
|
+
}
|
|
863
|
+
if (!intercepted) {
|
|
864
|
+
// Source isn't actually html5-draggable, or page called
|
|
865
|
+
// event.preventDefault() on dragstart, or drag-detect heuristic
|
|
866
|
+
// didn't fire. DO NOT silently report success — the model would
|
|
867
|
+
// reason from a phantom state. Throw so toolDrag's caller knows
|
|
868
|
+
// to retry with mode:"pointer".
|
|
869
|
+
throw new Error("drag_failed: Input.dragIntercepted never arrived within 1s — source may not be html5-draggable or dragstart was prevented. Retry with mode:\"pointer\".")
|
|
870
|
+
}
|
|
871
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchDragEvent", {
|
|
872
|
+
type: "dragEnter", x: to.x, y: to.y, data: intercepted,
|
|
873
|
+
})
|
|
874
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchDragEvent", {
|
|
875
|
+
type: "dragOver", x: to.x, y: to.y, data: intercepted,
|
|
876
|
+
})
|
|
877
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchDragEvent", {
|
|
878
|
+
type: "drop", x: to.x, y: to.y, data: intercepted,
|
|
879
|
+
})
|
|
880
|
+
await dispatchMouseEvent(tabId, "mouseReleased", to.x, to.y, button, 0, 1)
|
|
881
|
+
pressed = false
|
|
882
|
+
} finally {
|
|
883
|
+
chrome.debugger.onEvent.removeListener(listener)
|
|
884
|
+
if (pressed) {
|
|
885
|
+
try {
|
|
886
|
+
await dispatchMouseEvent(tabId, "mouseReleased", to.x, to.y, button, 0, 1)
|
|
887
|
+
} catch {
|
|
888
|
+
// Swallow — don't mask the original error.
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
try {
|
|
892
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.setInterceptDrags", { enabled: false })
|
|
893
|
+
} catch {
|
|
894
|
+
// Ignore — turning intercept off on a fresh attach is harmless.
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
async function toolType(args) {
|
|
900
|
+
const tabId = args.tabId
|
|
901
|
+
assertTabId("browser_type", tabId)
|
|
902
|
+
const textRaw = typeof args.text === "string" ? args.text : undefined
|
|
903
|
+
if (typeof textRaw !== "string") throw new Error("browser_type: text (string) is required")
|
|
904
|
+
if (textRaw.length > 4096) {
|
|
905
|
+
throw new Error("browser_type: text exceeds 4096-character limit")
|
|
906
|
+
}
|
|
907
|
+
// Normalize CRLF / lone CR to LF so Windows-origin clipboard text and
|
|
908
|
+
// HTTP-response text don't throw the "invalid control char U+000D"
|
|
909
|
+
// rejection downstream. Models pasting from any source should "just
|
|
910
|
+
// work" — the user's intent for "\r\n" is unambiguously a newline.
|
|
911
|
+
const text = textRaw.replace(/\r\n?/g, "\n")
|
|
912
|
+
const delayMs = Math.round(clampNum(args.delayMs ?? 0, 0, 50))
|
|
913
|
+
// Validate: reject control chars not in our whitelist. \n, \t, \b are
|
|
914
|
+
// remapped to named keys (Enter / Tab / Backspace). \r is already
|
|
915
|
+
// normalized to \n above. Other control chars (< 0x20) have no key
|
|
916
|
+
// mapping and would produce junk events; reject up front so the model
|
|
917
|
+
// can route them through browser_keyboard.
|
|
918
|
+
for (const ch of text) {
|
|
919
|
+
const code = ch.codePointAt(0)
|
|
920
|
+
if (code === undefined) continue
|
|
921
|
+
if (code < 0x20 && code !== 0x0A && code !== 0x09 && code !== 0x08) {
|
|
922
|
+
const hex = code.toString(16).toUpperCase().padStart(4, "0")
|
|
923
|
+
throw new Error(
|
|
924
|
+
`invalid_text: control char U+${hex} not supported. browser_type whitelist: \\n=Enter, \\t=Tab, \\b=Backspace, \\r normalized to \\n. Use browser_keyboard for other control sequences.`,
|
|
925
|
+
)
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
return await withTabInputLock(tabId, async () => {
|
|
929
|
+
await attachDebuggerOnce(tabId)
|
|
930
|
+
let count = 0
|
|
931
|
+
for (const ch of text) {
|
|
932
|
+
const code = ch.codePointAt(0)
|
|
933
|
+
let key, codeStr, vkc, sendText
|
|
934
|
+
if (code === 0x0A) {
|
|
935
|
+
key = "Enter"; codeStr = "Enter"; vkc = 13; sendText = undefined
|
|
936
|
+
} else if (code === 0x09) {
|
|
937
|
+
key = "Tab"; codeStr = "Tab"; vkc = 9; sendText = undefined
|
|
938
|
+
} else if (code === 0x08) {
|
|
939
|
+
key = "Backspace"; codeStr = "Backspace"; vkc = 8; sendText = undefined
|
|
940
|
+
} else {
|
|
941
|
+
key = ch
|
|
942
|
+
// Punctuation table fills in real Windows-VK values for the
|
|
943
|
+
// characters whose naive `charCodeAt` would collide with
|
|
944
|
+
// unrelated VK codes (e.g. '.' = 46 = VK_DELETE). Letters and
|
|
945
|
+
// digits use their natural charCode (VK_A..VK_Z / VK_0..VK_9
|
|
946
|
+
// happen to match). Everything else: 0, and CDP infers event
|
|
947
|
+
// semantics from `key` + `text`. Without this, sites that
|
|
948
|
+
// fall back to `event.keyCode` for hotkey handling would see
|
|
949
|
+
// 0 for typed punctuation; with it they get the canonical VK.
|
|
950
|
+
const punctVk = PUNCT_TO_VK[ch]
|
|
951
|
+
if (/^[a-zA-Z0-9]$/.test(ch)) {
|
|
952
|
+
vkc = ch.toUpperCase().charCodeAt(0)
|
|
953
|
+
} else if (punctVk !== undefined) {
|
|
954
|
+
vkc = punctVk
|
|
955
|
+
} else {
|
|
956
|
+
vkc = 0
|
|
957
|
+
}
|
|
958
|
+
codeStr = deriveKeyCode(ch)
|
|
959
|
+
sendText = ch
|
|
960
|
+
}
|
|
961
|
+
// Correct CDP recipe: keyDown WITH text fires keydown + keypress +
|
|
962
|
+
// input together. Do NOT also send a separate `char` event — that
|
|
963
|
+
// would double-fire keypress/input on most sites.
|
|
964
|
+
const downParams = {
|
|
965
|
+
type: "keyDown",
|
|
966
|
+
key,
|
|
967
|
+
code: codeStr || undefined,
|
|
968
|
+
modifiers: 0,
|
|
969
|
+
windowsVirtualKeyCode: vkc,
|
|
970
|
+
}
|
|
971
|
+
if (sendText !== undefined) downParams.text = sendText
|
|
972
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchKeyEvent", downParams)
|
|
973
|
+
await chrome.debugger.sendCommand({ tabId }, "Input.dispatchKeyEvent", {
|
|
974
|
+
type: "keyUp",
|
|
975
|
+
key,
|
|
976
|
+
code: codeStr || undefined,
|
|
977
|
+
modifiers: 0,
|
|
978
|
+
windowsVirtualKeyCode: vkc,
|
|
979
|
+
})
|
|
980
|
+
count++
|
|
981
|
+
if (delayMs > 0) await sleep(delayMs)
|
|
982
|
+
}
|
|
983
|
+
return { ok: true, chars: count }
|
|
984
|
+
}, TYPE_TAB_INPUT_LOCK_HOLD_CAP_MS)
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
// Windows VK codes for the printable punctuation that browser_type
|
|
988
|
+
// needs to send. Letters and digits aren't here — their natural
|
|
989
|
+
// charCode happens to match VK_A..VK_Z / VK_0..VK_9 and the typing
|
|
990
|
+
// loop derives those inline. This table covers the unshifted AND
|
|
991
|
+
// shift-modified character on each US-layout punctuation key; both
|
|
992
|
+
// map to the same physical-key VK (the shift state is implied by the
|
|
993
|
+
// `text` field, and we don't dispatch a separate shift keydown).
|
|
994
|
+
//
|
|
995
|
+
// Source: Windows VK reference (learn.microsoft.com/...windows-keyboard-codes)
|
|
996
|
+
// — VK_OEM_* are the layout-specific punctuation codes (US-QWERTY here).
|
|
997
|
+
const PUNCT_TO_VK = Object.freeze({
|
|
998
|
+
// Shift+number row
|
|
999
|
+
"!": 49, "@": 50, "#": 51, "$": 52, "%": 53,
|
|
1000
|
+
"^": 54, "&": 55, "*": 56, "(": 57, ")": 48,
|
|
1001
|
+
// VK_OEM_1 .. VK_OEM_7 + space
|
|
1002
|
+
";": 186, ":": 186,
|
|
1003
|
+
"=": 187, "+": 187,
|
|
1004
|
+
",": 188, "<": 188,
|
|
1005
|
+
"-": 189, "_": 189,
|
|
1006
|
+
".": 190, ">": 190,
|
|
1007
|
+
"/": 191, "?": 191,
|
|
1008
|
+
"`": 192, "~": 192,
|
|
1009
|
+
"[": 219, "{": 219,
|
|
1010
|
+
"\\": 220, "|": 220,
|
|
1011
|
+
"]": 221, "}": 221,
|
|
1012
|
+
"'": 222, '"': 222,
|
|
1013
|
+
" ": 32, // VK_SPACE
|
|
1014
|
+
})
|
|
1015
|
+
|
|
1016
|
+
function deriveKeyCode(ch) {
|
|
1017
|
+
// Best-effort code field. Covers ASCII printable chars including
|
|
1018
|
+
// shift-modified punctuation (! → Digit1, @ → Digit2, < → Comma,
|
|
1019
|
+
// etc) so `event.code` reports the PHYSICAL key the char lives on
|
|
1020
|
+
// — sites that check `event.code === "Digit1"` for layout-aware
|
|
1021
|
+
// shortcuts work the same whether the user typed `1` or `!`.
|
|
1022
|
+
// Non-ASCII falls back to empty string.
|
|
1023
|
+
if (/^[a-zA-Z]$/.test(ch)) return "Key" + ch.toUpperCase()
|
|
1024
|
+
if (/^[0-9]$/.test(ch)) return "Digit" + ch
|
|
1025
|
+
if (ch === " ") return "Space"
|
|
1026
|
+
const map = {
|
|
1027
|
+
// Number-row shift partners
|
|
1028
|
+
"!": "Digit1", "@": "Digit2", "#": "Digit3", "$": "Digit4", "%": "Digit5",
|
|
1029
|
+
"^": "Digit6", "&": "Digit7", "*": "Digit8", "(": "Digit9", ")": "Digit0",
|
|
1030
|
+
// OEM keys (US-QWERTY)
|
|
1031
|
+
"-": "Minus", "_": "Minus",
|
|
1032
|
+
"=": "Equal", "+": "Equal",
|
|
1033
|
+
"[": "BracketLeft", "{": "BracketLeft",
|
|
1034
|
+
"]": "BracketRight", "}": "BracketRight",
|
|
1035
|
+
"\\": "Backslash", "|": "Backslash",
|
|
1036
|
+
";": "Semicolon", ":": "Semicolon",
|
|
1037
|
+
"'": "Quote", '"': "Quote",
|
|
1038
|
+
",": "Comma", "<": "Comma",
|
|
1039
|
+
".": "Period", ">": "Period",
|
|
1040
|
+
"/": "Slash", "?": "Slash",
|
|
1041
|
+
"`": "Backquote", "~": "Backquote",
|
|
1042
|
+
}
|
|
1043
|
+
return map[ch] || ""
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
async function toolLocate(args) {
|
|
1047
|
+
const tabId = args.tabId
|
|
1048
|
+
const ref = typeof args.ref === "string" ? args.ref : null
|
|
1049
|
+
const selector = typeof args.selector === "string" ? args.selector : null
|
|
1050
|
+
assertTabId("browser_locate", tabId)
|
|
1051
|
+
if (!ref && !selector) throw new Error("browser_locate: ref or selector is required")
|
|
1052
|
+
if (ref && selector) throw new Error("browser_locate: pass exactly one of ref or selector, not both")
|
|
1053
|
+
const [result] = await chrome.scripting.executeScript({
|
|
1054
|
+
target: { tabId },
|
|
1055
|
+
func: (ref, selector) => {
|
|
1056
|
+
const viewport = {
|
|
1057
|
+
width: window.innerWidth,
|
|
1058
|
+
height: window.innerHeight,
|
|
1059
|
+
devicePixelRatio: window.devicePixelRatio,
|
|
1060
|
+
scrollX: window.scrollX,
|
|
1061
|
+
scrollY: window.scrollY,
|
|
1062
|
+
}
|
|
1063
|
+
const sel = ref
|
|
1064
|
+
? `[data-gh-router-ref="${typeof CSS !== "undefined" && CSS.escape ? CSS.escape(ref) : ref.replace(/["\\]/g, "\\$&")}"]`
|
|
1065
|
+
: selector
|
|
1066
|
+
const el = document.querySelector(sel)
|
|
1067
|
+
if (!el) return { found: false, viewport }
|
|
1068
|
+
const rect = el.getBoundingClientRect()
|
|
1069
|
+
const cx = Math.round(rect.x + rect.width / 2)
|
|
1070
|
+
const cy = Math.round(rect.y + rect.height / 2)
|
|
1071
|
+
const style = getComputedStyle(el)
|
|
1072
|
+
const visible =
|
|
1073
|
+
rect.width > 0 && rect.height > 0
|
|
1074
|
+
&& style.display !== "none"
|
|
1075
|
+
&& style.visibility !== "hidden"
|
|
1076
|
+
&& parseFloat(style.opacity || "1") > 0
|
|
1077
|
+
const inView = cx >= 0 && cy >= 0 && cx <= window.innerWidth && cy <= window.innerHeight
|
|
1078
|
+
let topmostAtCenter = null
|
|
1079
|
+
if (inView) {
|
|
1080
|
+
const top = document.elementFromPoint(cx, cy)
|
|
1081
|
+
// Same hit-test rule as resolveMouseTarget: target IS topmost
|
|
1082
|
+
// or contains it as a descendant. Ancestor-containment (top
|
|
1083
|
+
// contains el) is FALSE here because that's the obscured case.
|
|
1084
|
+
const isTarget = !!top && (top === el || el.contains(top))
|
|
1085
|
+
const topRef = top && top.getAttribute ? top.getAttribute("data-gh-router-ref") : null
|
|
1086
|
+
topmostAtCenter = {
|
|
1087
|
+
isTarget,
|
|
1088
|
+
tag: top ? top.tagName.toLowerCase() : null,
|
|
1089
|
+
refOrSelector: topRef || null,
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
return {
|
|
1093
|
+
found: true,
|
|
1094
|
+
bbox: [Math.round(rect.x), Math.round(rect.y), Math.round(rect.width), Math.round(rect.height)],
|
|
1095
|
+
center: [cx, cy],
|
|
1096
|
+
inView,
|
|
1097
|
+
visible,
|
|
1098
|
+
pointerEvents: style.pointerEvents,
|
|
1099
|
+
topmostAtCenter,
|
|
1100
|
+
viewport,
|
|
1101
|
+
}
|
|
1102
|
+
},
|
|
1103
|
+
args: [ref || null, selector || null],
|
|
1104
|
+
})
|
|
1105
|
+
if (!result || typeof result.result !== "object") {
|
|
1106
|
+
throw new Error("browser_locate: scripting.executeScript returned nothing")
|
|
1107
|
+
}
|
|
1108
|
+
return result.result
|
|
373
1109
|
}
|
|
374
1110
|
|
|
375
1111
|
async function toolWait(args) {
|
|
@@ -504,18 +1240,56 @@ const attachedTabs = new Set()
|
|
|
504
1240
|
const MAX_BUFFER_ENTRIES = 1000
|
|
505
1241
|
|
|
506
1242
|
async function attachDebuggerOnce(tabId, opts) {
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
1243
|
+
// navigator.locks serializes concurrent attach attempts after MV3 SW
|
|
1244
|
+
// respawn (when the in-memory attachedTabs Set is wiped but Chrome may
|
|
1245
|
+
// have kept the underlying CDP attachment alive past the SW death).
|
|
1246
|
+
// Without this lock, two parallel tool calls would both call
|
|
1247
|
+
// chrome.debugger.attach and the loser would throw
|
|
1248
|
+
// "Another debugger is already attached to this target".
|
|
1249
|
+
//
|
|
1250
|
+
// The "already attached" branch is subtle: "already attached" can mean
|
|
1251
|
+
// (a) WE attached and the SW just lost the cache, OR (b) DevTools /
|
|
1252
|
+
// another extension owns the session and we DON'T. Don't blindly trust
|
|
1253
|
+
// (a) — that would poison the cache and every subsequent sendCommand
|
|
1254
|
+
// would fail with cryptic CDP errors. Prove ownership with a no-op
|
|
1255
|
+
// Runtime.evaluate; only cache on success.
|
|
1256
|
+
await navigator.locks.request(`browser-mcp:debugger-attach:${tabId}`, async () => {
|
|
1257
|
+
if (!attachedTabs.has(tabId)) {
|
|
1258
|
+
let mustVerifyOwnership = false
|
|
1259
|
+
try {
|
|
1260
|
+
await chrome.debugger.attach({ tabId }, "1.3")
|
|
1261
|
+
} catch (err) {
|
|
1262
|
+
const msg = err && err.message ? err.message : String(err)
|
|
1263
|
+
const alreadyAttached = /already attached/i.test(msg) || /already debugging/i.test(msg)
|
|
1264
|
+
if (!alreadyAttached) throw err
|
|
1265
|
+
// "Already attached" — could be us (Chrome kept the attachment
|
|
1266
|
+
// past our SW death) or another debugger (DevTools open, etc.).
|
|
1267
|
+
// Don't cache yet; verify below.
|
|
1268
|
+
mustVerifyOwnership = true
|
|
1269
|
+
}
|
|
1270
|
+
if (mustVerifyOwnership) {
|
|
1271
|
+
try {
|
|
1272
|
+
await chrome.debugger.sendCommand({ tabId }, "Runtime.evaluate", {
|
|
1273
|
+
expression: "1", returnByValue: true,
|
|
1274
|
+
})
|
|
1275
|
+
// sendCommand succeeded → we own the attachment. Safe to cache.
|
|
1276
|
+
} catch {
|
|
1277
|
+
throw new Error(
|
|
1278
|
+
"browser-mcp: chrome.debugger reports attached but we do not own the session — likely DevTools is open on this tab (or another extension is debugging). Close DevTools and retry.",
|
|
1279
|
+
)
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
attachedTabs.add(tabId)
|
|
1283
|
+
}
|
|
1284
|
+
if (opts?.console && !consoleBuffers.has(tabId)) {
|
|
1285
|
+
consoleBuffers.set(tabId, [])
|
|
1286
|
+
await chrome.debugger.sendCommand({ tabId }, "Runtime.enable")
|
|
1287
|
+
}
|
|
1288
|
+
if (opts?.network && !networkBuffers.has(tabId)) {
|
|
1289
|
+
networkBuffers.set(tabId, [])
|
|
1290
|
+
await chrome.debugger.sendCommand({ tabId }, "Network.enable")
|
|
1291
|
+
}
|
|
1292
|
+
})
|
|
519
1293
|
}
|
|
520
1294
|
|
|
521
1295
|
chrome.debugger.onEvent.addListener((source, method, params) => {
|
|
@@ -548,9 +1322,23 @@ chrome.debugger.onDetach.addListener((source) => {
|
|
|
548
1322
|
attachedTabs.delete(source.tabId)
|
|
549
1323
|
consoleBuffers.delete(source.tabId)
|
|
550
1324
|
networkBuffers.delete(source.tabId)
|
|
1325
|
+
tabInputLockTails.delete(source.tabId)
|
|
551
1326
|
}
|
|
552
1327
|
})
|
|
553
1328
|
|
|
1329
|
+
// Clean per-tab state on tab close. attachedTabs / consoleBuffers /
|
|
1330
|
+
// networkBuffers are also cleaned by debugger.onDetach above (Chrome
|
|
1331
|
+
// detaches on tab close), but doing it here too is cheap and protects
|
|
1332
|
+
// against listener ordering surprises. tabInputLockTails is NOT
|
|
1333
|
+
// cleaned by onDetach in some scenarios (the lock-chain Map can leak
|
|
1334
|
+
// if a drag was in flight when the tab closed); cover it here.
|
|
1335
|
+
chrome.tabs.onRemoved.addListener((tabId) => {
|
|
1336
|
+
attachedTabs.delete(tabId)
|
|
1337
|
+
consoleBuffers.delete(tabId)
|
|
1338
|
+
networkBuffers.delete(tabId)
|
|
1339
|
+
tabInputLockTails.delete(tabId)
|
|
1340
|
+
})
|
|
1341
|
+
|
|
554
1342
|
async function toolConsoleLogs(args) {
|
|
555
1343
|
const tabId = typeof args.tabId === "number" ? args.tabId : undefined
|
|
556
1344
|
const level = typeof args.level === "string" ? args.level : "all"
|
|
@@ -594,6 +1382,10 @@ const TOOL_HANDLERS = {
|
|
|
594
1382
|
browser_download: toolDownload,
|
|
595
1383
|
browser_console_logs: toolConsoleLogs,
|
|
596
1384
|
browser_network_log: toolNetworkLog,
|
|
1385
|
+
browser_mouse: toolMouse,
|
|
1386
|
+
browser_drag: toolDrag,
|
|
1387
|
+
browser_type: toolType,
|
|
1388
|
+
browser_locate: toolLocate,
|
|
597
1389
|
}
|
|
598
1390
|
|
|
599
1391
|
// ---------------------------------------------------------------------
|