pi-chrome 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Pi Existing Chrome Profile Bridge",
4
- "version": "0.7.0",
4
+ "version": "0.8.0",
5
5
  "description": "Lets Pi control tabs in this existing Chrome profile via a local bridge at 127.0.0.1.",
6
- "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms"],
6
+ "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms", "webNavigation"],
7
7
  "host_permissions": ["<all_urls>", "http://127.0.0.1:17318/*"],
8
8
  "background": {
9
9
  "service_worker": "service_worker.js"
@@ -4,8 +4,6 @@ const POLL_ERROR_BACKOFF_MS = 2000;
4
4
  let polling = false;
5
5
 
6
6
  function armKeepaliveAlarm() {
7
- // MV3 service workers can be suspended; alarms are the supported way to
8
- // wake the extension again. Chrome's minimum period is 0.5 minutes.
9
7
  chrome.alarms.create("pi-bridge-keepalive", { periodInMinutes: 0.5 });
10
8
  }
11
9
 
@@ -41,17 +39,12 @@ async function pollLoop() {
41
39
  polling = true;
42
40
  try {
43
41
  while (true) {
44
- // Long-poll /next continuously. The bridge holds the request for up to ~25s when no
45
- // command is pending and returns {type:"none"}; we immediately re-issue the fetch so
46
- // commands sent while the SW is otherwise idle still get picked up promptly. The open
47
- // fetch also keeps the MV3 service worker alive between alarm wake-ups.
48
42
  const response = await fetch(`${BRIDGE_URL}/next?name=${encodeURIComponent(CLIENT_NAME)}`, {
49
43
  cache: "no-store",
50
44
  });
51
45
  if (!response.ok) throw new Error(`bridge /next HTTP ${response.status}`);
52
46
  const payload = await response.json();
53
47
  if (payload.type === "command") await handleCommand(payload.command);
54
- // Otherwise (type:"none"), loop and re-issue the long-poll.
55
48
  }
56
49
  } catch (error) {
57
50
  await sleep(POLL_ERROR_BACKOFF_MS);
@@ -107,11 +100,22 @@ async function dispatch(action, params) {
107
100
  return { closed: tab.id };
108
101
  }
109
102
  case "page.snapshot":
110
- return executeInTab(params, snapshotPage, [params.maxElements || 80]);
103
+ return executeInTab(params, snapshotPage, [
104
+ params.maxElements || 80,
105
+ params.containingText ?? null,
106
+ params.roleFilter ?? null,
107
+ params.nearUid ?? null,
108
+ ]);
111
109
  case "page.evaluate":
112
- return executeInTab(params, evaluateExpression, [params.expression, params.awaitPromise !== false]);
110
+ return evaluateInTab(params);
113
111
  case "page.click":
114
112
  return executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
113
+ case "page.hover":
114
+ return executeActionInTab(params, hoverPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
115
+ case "page.drag":
116
+ return executeActionInTab(params, dragPage, [params.fromUid ?? null, params.fromSelector ?? null, params.fromX ?? null, params.fromY ?? null, params.toUid ?? null, params.toSelector ?? null, params.toX ?? null, params.toY ?? null, params.steps ?? 12]);
117
+ case "page.upload":
118
+ return executeActionInTab(params, uploadFiles, [params.selector ?? null, params.uid ?? null, params.files || []]);
115
119
  case "page.type":
116
120
  return executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
117
121
  case "page.fill":
@@ -126,38 +130,27 @@ async function dispatch(action, params) {
126
130
  return executeInTab(params, getNetworkRequest, [params.requestId]);
127
131
  case "page.waitFor":
128
132
  return executeInTab(params, waitForPage, [params.kind, params.value, params.timeoutMs || 10000, params.intervalMs || 250]);
133
+ case "page.probe":
134
+ // Lightweight capability probe for /chrome-doctor. Runs in MAIN world.
135
+ return executeInTab(params, probePage, []);
129
136
  case "page.navigate": {
130
137
  const tab = await getTabByParams(params);
131
138
  if (params.foreground) await bringToFront(tab);
139
+ if (params.initScript) {
140
+ // Register a one-shot document_start content script. We register, navigate, wait, then unregister.
141
+ await registerInitScript(tab.id, params.initScript);
142
+ }
132
143
  const wait = params.waitUntilLoad !== false ? waitForTabComplete(tab.id, params.timeoutMs || 15000) : Promise.resolve(undefined);
133
144
  const updated = await chrome.tabs.update(tab.id, { url: params.url });
134
- await wait;
135
- return formatTab(await chrome.tabs.get(updated.id));
136
- }
137
- case "page.screenshot": {
138
- const tab = await getTabByParams(params);
139
- if (params.foreground) await bringToFront(tab);
140
- // captureVisibleTab requires the target tab to be the active tab in its window. Activate it
141
- // without focusing the window so other apps don't get pushed behind Chrome, and restore the
142
- // previous active tab afterwards to minimize disruption.
143
- let previousActiveId;
144
- if (!tab.active) {
145
- const activeBefore = await chrome.tabs.query({ active: true, windowId: tab.windowId });
146
- previousActiveId = activeBefore[0]?.id;
147
- await chrome.tabs.update(tab.id, { active: true });
148
- }
149
145
  try {
150
- const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
151
- format: params.format || "png",
152
- quality: params.format === "jpeg" ? params.quality : undefined,
153
- });
154
- return { dataUrl, tab: formatTab(tab) };
146
+ await wait;
155
147
  } finally {
156
- if (previousActiveId !== undefined && previousActiveId !== tab.id) {
157
- await chrome.tabs.update(previousActiveId, { active: true }).catch(() => undefined);
158
- }
148
+ if (params.initScript) await unregisterInitScript(tab.id).catch(() => undefined);
159
149
  }
150
+ return formatTab(await chrome.tabs.get(updated.id));
160
151
  }
152
+ case "page.screenshot":
153
+ return takeScreenshot(params);
161
154
  default:
162
155
  throw new Error(`Unknown action: ${action}`);
163
156
  }
@@ -198,26 +191,40 @@ async function getTabByParams(params) {
198
191
  return tab;
199
192
  }
200
193
 
194
+ // Helper sources that get concatenated into the injected MAIN-world script. Kept as separate
195
+ // functions so callers below can reference them by `.toString()`. The helpers do not perform any
196
+ // eval themselves — they're plain function declarations.
197
+ const HELPER_FUNCS = [
198
+ getPiChromeState,
199
+ rememberElement,
200
+ elementBySelectorOrUid,
201
+ installPiChromeInstrumentation,
202
+ resolvePoint,
203
+ dispatchInputEvents,
204
+ setNativeValue,
205
+ normalizeKey,
206
+ isElementVisible,
207
+ occluderAt,
208
+ pageHash,
209
+ pointerEventSequence,
210
+ ];
211
+
201
212
  async function executeInTab(params, func, args) {
202
213
  const tab = await getTabByParams(params);
203
214
  if (params.foreground) await bringToFront(tab);
204
- const helperSource = [
205
- getPiChromeState,
206
- rememberElement,
207
- elementBySelectorOrUid,
208
- installPiChromeInstrumentation,
209
- resolvePoint,
210
- dispatchInputEvents,
211
- setNativeValue,
212
- normalizeKey,
213
- ].map((helper) => helper.toString()).join("\n");
215
+ const helperSource = HELPER_FUNCS.map((helper) => helper.toString()).join("\n");
214
216
  const results = await chrome.scripting.executeScript({
215
217
  target: { tabId: tab.id },
216
218
  world: "MAIN",
217
219
  func: async (helperSource, source, invocationArgs) => {
218
220
  try {
219
- (0, eval)(helperSource);
220
- const injected = (0, eval)(`(${source})`);
221
+ // Helpers are plain function declarations; injecting them via Function constructor avoids
222
+ // running through `eval` (which is restricted under strict CSP) and keeps them isolated.
223
+ new Function(helperSource).call(globalThis);
224
+ // The action itself is reconstructed from its source text. We use `new Function` rather
225
+ // than `eval` because the latter is blocked by `script-src 'self'` (no `'unsafe-eval'`)
226
+ // CSPs that are common on production sites.
227
+ const injected = new Function(helperSource + "\nreturn (" + source + ");").call(globalThis);
221
228
  return { ok: true, value: await injected(...invocationArgs) };
222
229
  } catch (error) {
223
230
  return { ok: false, error: error?.stack || error?.message || String(error) };
@@ -237,15 +244,108 @@ async function executeInTab(params, func, args) {
237
244
  return envelope?.value;
238
245
  }
239
246
 
247
+ // Dedicated executor for page.evaluate. Doesn't go through the helper-source injection chain;
248
+ // that chain was the root cause of `chrome_evaluate` silently returning null on pages with strict
249
+ // CSP. We build a single Function in MAIN world and invoke it directly.
250
+ async function evaluateInTab(params) {
251
+ const tab = await getTabByParams(params);
252
+ if (params.foreground) await bringToFront(tab);
253
+ const expression = String(params.expression ?? "");
254
+ const awaitPromise = params.awaitPromise !== false;
255
+ const results = await chrome.scripting.executeScript({
256
+ target: { tabId: tab.id },
257
+ world: "MAIN",
258
+ func: async (expression, awaitPromise) => {
259
+ const stringify = (v) => {
260
+ if (v === undefined) return { kind: "undefined" };
261
+ if (typeof v === "function") return { kind: "function", source: v.toString().slice(0, 500) };
262
+ if (typeof v === "symbol") return { kind: "symbol", description: v.description };
263
+ if (typeof v === "bigint") return { kind: "bigint", value: v.toString() };
264
+ if (v instanceof Error) return { kind: "error", name: v.name, message: v.message, stack: v.stack };
265
+ return v;
266
+ };
267
+ // Compile via the Function constructor. We try expression form first so callers can pass
268
+ // `1+1` or `document.title` without a `return`; if that's a SyntaxError we retry with the
269
+ // statement form so callers can use multi-statement bodies (loops, var decls, etc).
270
+ const compile = (src) => {
271
+ try {
272
+ return { fn: new Function(`return (async () => (${src}))();`), mode: "expression" };
273
+ } catch (e1) {
274
+ if (e1 && e1.name === "SyntaxError") {
275
+ try {
276
+ return { fn: new Function(`return (async () => { ${src} })();`), mode: "statement" };
277
+ } catch (e2) {
278
+ throw e2;
279
+ }
280
+ }
281
+ throw e1;
282
+ }
283
+ };
284
+ try {
285
+ const { fn } = compile(expression);
286
+ const value = await fn.call(globalThis);
287
+ const resolved = awaitPromise && value && typeof value.then === "function" ? await value : value;
288
+ return { ok: true, value: stringify(resolved) };
289
+ } catch (error) {
290
+ return { ok: false, error: error?.stack || error?.message || String(error) };
291
+ }
292
+ },
293
+ args: [expression, awaitPromise],
294
+ });
295
+ const first = results?.[0];
296
+ if (first?.error) {
297
+ const message = typeof first.error === "string" ? first.error : (first.error.message || JSON.stringify(first.error));
298
+ throw new Error(`chrome_evaluate failed: ${message}`);
299
+ }
300
+ const envelope = first?.result;
301
+ if (!envelope) throw new Error("chrome_evaluate returned no envelope from MAIN world");
302
+ if (envelope.ok === false) throw new Error(envelope.error || "chrome_evaluate failed");
303
+ const v = envelope.value;
304
+ // Unwrap special markers from MAIN world
305
+ if (v && typeof v === "object" && !Array.isArray(v)) {
306
+ if (v.kind === "undefined") return undefined;
307
+ if (v.kind === "function") return `[Function: ${v.source}]`;
308
+ if (v.kind === "symbol") return `[Symbol: ${v.description}]`;
309
+ if (v.kind === "bigint") return v.value;
310
+ if (v.kind === "error") throw new Error(`${v.name}: ${v.message}\n${v.stack || ""}`);
311
+ }
312
+ return v;
313
+ }
314
+
240
315
  async function executeActionInTab(params, func, args) {
241
316
  const result = await executeInTab(params, func, args);
242
317
  if (params.includeSnapshot) {
243
- const snapshot = await executeInTab({ ...params, foreground: false }, snapshotPage, [params.maxElements || 80]);
318
+ const snapshot = await executeInTab({ ...params, foreground: false }, snapshotPage, [params.maxElements || 80, null, null, null]);
244
319
  return { result, snapshot };
245
320
  }
246
321
  return result;
247
322
  }
248
323
 
324
+ // One-shot init script registry, scoped per tab. The script source is injected at
325
+ // document_start of the next committed navigation in that tab, in MAIN world, then cleared.
326
+ const initScriptIds = new Map();
327
+ async function registerInitScript(tabId, source) {
328
+ initScriptIds.set(tabId, source);
329
+ }
330
+ async function unregisterInitScript(tabId) {
331
+ initScriptIds.delete(tabId);
332
+ }
333
+
334
+ if (chrome.webNavigation && chrome.webNavigation.onCommitted) {
335
+ chrome.webNavigation.onCommitted.addListener((details) => {
336
+ if (details.frameId !== 0) return;
337
+ const source = initScriptIds.get(details.tabId);
338
+ if (!source) return;
339
+ chrome.scripting.executeScript({
340
+ target: { tabId: details.tabId, frameIds: [0] },
341
+ world: "MAIN",
342
+ injectImmediately: true,
343
+ func: (code) => { try { new Function(code).call(globalThis); } catch (e) { console.error("[pi-chrome init script]", e); } },
344
+ args: [source],
345
+ }).catch(() => undefined);
346
+ });
347
+ }
348
+
249
349
  async function bringToFront(tab) {
250
350
  await chrome.windows.update(tab.windowId, { focused: true });
251
351
  await chrome.tabs.update(tab.id, { active: true });
@@ -268,6 +368,56 @@ function waitForTabComplete(tabId, timeoutMs) {
268
368
  });
269
369
  }
270
370
 
371
+ async function takeScreenshot(params) {
372
+ const tab = await getTabByParams(params);
373
+ if (params.foreground) await bringToFront(tab);
374
+ let previousActiveId;
375
+ if (!tab.active) {
376
+ const activeBefore = await chrome.tabs.query({ active: true, windowId: tab.windowId });
377
+ previousActiveId = activeBefore[0]?.id;
378
+ await chrome.tabs.update(tab.id, { active: true });
379
+ }
380
+ try {
381
+ if (params.fullPage) {
382
+ // Tile-stitched full page capture: scroll, capture, paste, repeat.
383
+ const tiles = await executeInTab({ ...params, foreground: false }, captureFullPageTiles, []);
384
+ // captureFullPageTiles only computes scroll positions / metrics; we capture per scroll here
385
+ // (chrome.tabs.captureVisibleTab can't be called from MAIN world).
386
+ const captured = [];
387
+ for (const tile of tiles.tiles) {
388
+ await executeInTab({ ...params, foreground: false }, scrollToY, [tile.scrollY]);
389
+ // Small settle delay; many sites have on-scroll animations / lazy-load.
390
+ await sleep(120);
391
+ const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
392
+ format: params.format || "png",
393
+ quality: params.format === "jpeg" ? params.quality : undefined,
394
+ });
395
+ captured.push({ y: tile.y, dataUrl });
396
+ }
397
+ await executeInTab({ ...params, foreground: false }, scrollToY, [tiles.originalScrollY]);
398
+ return {
399
+ fullPage: true,
400
+ tab: formatTab(tab),
401
+ dimensions: { width: tiles.width, height: tiles.height, viewportHeight: tiles.viewportHeight, dpr: tiles.dpr },
402
+ tiles: captured,
403
+ };
404
+ }
405
+ const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
406
+ format: params.format || "png",
407
+ quality: params.format === "jpeg" ? params.quality : undefined,
408
+ });
409
+ return { dataUrl, tab: formatTab(tab) };
410
+ } finally {
411
+ if (previousActiveId !== undefined && previousActiveId !== tab.id) {
412
+ await chrome.tabs.update(previousActiveId, { active: true }).catch(() => undefined);
413
+ }
414
+ }
415
+ }
416
+
417
+ // ---------------------------------------------------------------------------
418
+ // MAIN-world helpers (function declarations injected into the page).
419
+ // ---------------------------------------------------------------------------
420
+
271
421
  function getPiChromeState() {
272
422
  const state = window.__PI_CHROME_STATE__ || {
273
423
  nextElementUid: 1,
@@ -302,6 +452,77 @@ function elementBySelectorOrUid(selector, uid) {
302
452
  return null;
303
453
  }
304
454
 
455
+ function isElementVisible(element) {
456
+ if (!element || !element.getBoundingClientRect) return false;
457
+ const style = getComputedStyle(element);
458
+ if (style.visibility === "hidden" || style.display === "none") return false;
459
+ const rect = element.getBoundingClientRect();
460
+ if (rect.width === 0 || rect.height === 0) return false;
461
+ if (rect.bottom < 0 || rect.right < 0) return false;
462
+ if (rect.top > innerHeight || rect.left > innerWidth) return false;
463
+ return true;
464
+ }
465
+
466
+ function occluderAt(x, y, expected) {
467
+ const top = document.elementFromPoint(x, y);
468
+ if (!top || top === expected) return null;
469
+ if (expected && expected.contains(top)) return null;
470
+ if (top.contains(expected)) return null;
471
+ return {
472
+ tag: top.tagName.toLowerCase(),
473
+ id: top.id || undefined,
474
+ className: typeof top.className === "string" ? top.className : undefined,
475
+ };
476
+ }
477
+
478
+ function pageHash() {
479
+ // Cheap rolling hash used for `pageMutated`. Combines first 4kb of body innerText with the
480
+ // current values of inputs/textareas (which are not part of innerText) and the count of
481
+ // descendants of <body>. This catches: text changes, input value edits, and DOM structure
482
+ // changes — the three things a click/type/fill might cause.
483
+ const body = document.body;
484
+ const text = (body ? body.innerText : "").slice(0, 4000);
485
+ let h = 0;
486
+ for (let i = 0; i < text.length; i++) h = (h * 31 + text.charCodeAt(i)) | 0;
487
+ if (body) {
488
+ const inputs = body.querySelectorAll("input,textarea,select");
489
+ let valueBlob = "";
490
+ for (let i = 0; i < inputs.length && valueBlob.length < 4000; i++) {
491
+ const v = inputs[i].value;
492
+ if (typeof v === "string") valueBlob += v + "\x00";
493
+ }
494
+ for (let i = 0; i < valueBlob.length; i++) h = (h * 31 + valueBlob.charCodeAt(i)) | 0;
495
+ h = (h * 31 + body.getElementsByTagName("*").length) | 0;
496
+ }
497
+ return h;
498
+ }
499
+
500
+ function pointerEventSequence(element, x, y, sequence) {
501
+ let defaultPrevented = false;
502
+ for (const type of sequence) {
503
+ const isPointer = type.startsWith("pointer");
504
+ const Ctor = isPointer ? PointerEvent : MouseEvent;
505
+ const init = {
506
+ bubbles: true,
507
+ cancelable: true,
508
+ view: window,
509
+ clientX: x,
510
+ clientY: y,
511
+ button: 0,
512
+ buttons: type === "pointermove" || type === "mousemove" ? 0 : 1,
513
+ };
514
+ if (isPointer) {
515
+ init.pointerType = "mouse";
516
+ init.pointerId = 1;
517
+ init.isPrimary = true;
518
+ }
519
+ const ev = new Ctor(type, init);
520
+ element.dispatchEvent(ev);
521
+ if (ev.defaultPrevented) defaultPrevented = true;
522
+ }
523
+ return defaultPrevented;
524
+ }
525
+
305
526
  function installPiChromeInstrumentation() {
306
527
  const state = getPiChromeState();
307
528
  if (state.instrumentationInstalled) return;
@@ -407,7 +628,7 @@ function installPiChromeInstrumentation() {
407
628
  }
408
629
  }
409
630
 
410
- function snapshotPage(maxElements) {
631
+ function snapshotPage(maxElements, containingText, roleFilter, nearUid) {
411
632
  installPiChromeInstrumentation();
412
633
  const unique = (selector) => {
413
634
  try { return document.querySelectorAll(selector).length === 1; } catch { return false; }
@@ -434,11 +655,7 @@ function snapshotPage(maxElements) {
434
655
  }
435
656
  return parts.join(" > ");
436
657
  };
437
- const visible = (element) => {
438
- const style = getComputedStyle(element);
439
- const rect = element.getBoundingClientRect();
440
- return style.visibility !== "hidden" && style.display !== "none" && rect.width > 0 && rect.height > 0;
441
- };
658
+ const visible = (element) => isElementVisible(element);
442
659
  const labelFor = (element) => (
443
660
  element.getAttribute("aria-label") ||
444
661
  element.getAttribute("title") ||
@@ -448,9 +665,41 @@ function snapshotPage(maxElements) {
448
665
  element.textContent ||
449
666
  ""
450
667
  ).trim().replace(/\s+/g, " ").slice(0, 160);
451
- const candidates = Array.from(document.querySelectorAll('a, button, input, textarea, select, summary, [role="button"], [role="link"], [contenteditable="true"], [tabindex]:not([tabindex="-1"])'));
668
+ let candidates = Array.from(document.querySelectorAll('a, button, input, textarea, select, summary, [role="button"], [role="link"], [role="menuitem"], [role="tab"], [role="checkbox"], [contenteditable="true"], [tabindex]:not([tabindex="-1"])'));
669
+ if (containingText) {
670
+ const needle = String(containingText).toLowerCase();
671
+ candidates = candidates.filter((element) => labelFor(element).toLowerCase().includes(needle));
672
+ }
673
+ if (roleFilter) {
674
+ const wanted = String(roleFilter).toLowerCase();
675
+ candidates = candidates.filter((element) => {
676
+ const role = (element.getAttribute("role") || element.tagName).toLowerCase();
677
+ return role === wanted;
678
+ });
679
+ }
680
+ let near;
681
+ if (nearUid) {
682
+ const state = getPiChromeState();
683
+ near = state.elements[nearUid];
684
+ }
685
+ if (near) {
686
+ const nearRect = near.getBoundingClientRect();
687
+ const cx = nearRect.left + nearRect.width / 2;
688
+ const cy = nearRect.top + nearRect.height / 2;
689
+ candidates.sort((a, b) => {
690
+ const ra = a.getBoundingClientRect();
691
+ const rb = b.getBoundingClientRect();
692
+ const da = Math.hypot(ra.left + ra.width / 2 - cx, ra.top + ra.height / 2 - cy);
693
+ const db = Math.hypot(rb.left + rb.width / 2 - cx, rb.top + rb.height / 2 - cy);
694
+ return da - db;
695
+ });
696
+ }
452
697
  const elements = candidates.filter(visible).slice(0, maxElements).map((element, index) => {
453
698
  const rect = element.getBoundingClientRect();
699
+ const style = getComputedStyle(element);
700
+ const cx = rect.left + rect.width / 2;
701
+ const cy = rect.top + rect.height / 2;
702
+ const occluded = occluderAt(cx, cy, element);
454
703
  return {
455
704
  index,
456
705
  uid: rememberElement(element),
@@ -461,6 +710,9 @@ function snapshotPage(maxElements) {
461
710
  type: element.getAttribute("type") || undefined,
462
711
  role: element.getAttribute("role") || undefined,
463
712
  disabled: Boolean(element.disabled || element.getAttribute("aria-disabled") === "true"),
713
+ inert: Boolean(element.closest?.("[inert]")),
714
+ pointerEvents: style.pointerEvents,
715
+ occluded: occluded || undefined,
464
716
  rect: { x: Math.round(rect.x), y: Math.round(rect.y), width: Math.round(rect.width), height: Math.round(rect.height) },
465
717
  };
466
718
  });
@@ -470,14 +722,44 @@ function snapshotPage(maxElements) {
470
722
  viewport: { width: innerWidth, height: innerHeight, scrollX, scrollY },
471
723
  text: document.body ? document.body.innerText.replace(/\s+\n/g, "\n").trim().slice(0, 30000) : "",
472
724
  elements,
725
+ filter: { containingText: containingText || undefined, roleFilter: roleFilter || undefined, nearUid: nearUid || undefined },
473
726
  };
474
727
  }
475
728
 
476
- async function evaluateExpression(expression, awaitPromise) {
477
- installPiChromeInstrumentation();
478
- const indirectEval = (0, eval);
479
- const value = indirectEval(expression);
480
- return awaitPromise && value && typeof value.then === "function" ? await value : value;
729
+ function probePage() {
730
+ // Sanity probe used by /chrome-doctor. Returns evidence that MAIN-world execution works.
731
+ return {
732
+ arithmetic: 1 + 1,
733
+ location: location.href,
734
+ title: document.title,
735
+ documentReady: document.readyState,
736
+ userAgent: navigator.userAgent.slice(0, 200),
737
+ webdriver: !!navigator.webdriver,
738
+ };
739
+ }
740
+
741
+ function captureFullPageTiles() {
742
+ // Returns the *plan* for tile capture; the actual chrome.tabs.captureVisibleTab calls happen
743
+ // in the SW. We just report the scroll positions and metrics.
744
+ const html = document.documentElement;
745
+ const body = document.body;
746
+ const width = Math.max(html.scrollWidth, body ? body.scrollWidth : 0, innerWidth);
747
+ const height = Math.max(html.scrollHeight, body ? body.scrollHeight : 0, innerHeight);
748
+ const viewportHeight = innerHeight;
749
+ const dpr = window.devicePixelRatio || 1;
750
+ const originalScrollY = scrollY;
751
+ const tiles = [];
752
+ let y = 0;
753
+ while (y < height) {
754
+ tiles.push({ y, scrollY: y });
755
+ y += viewportHeight;
756
+ }
757
+ return { width, height, viewportHeight, dpr, originalScrollY, tiles };
758
+ }
759
+
760
+ function scrollToY(y) {
761
+ window.scrollTo({ top: y, left: 0, behavior: "instant" });
762
+ return { scrollY };
481
763
  }
482
764
 
483
765
  function resolvePoint(selector, uid, x, y) {
@@ -492,12 +774,88 @@ function resolvePoint(selector, uid, x, y) {
492
774
  }
493
775
 
494
776
  function clickPage(selector, uid, x, y) {
777
+ installPiChromeInstrumentation();
778
+ const before = pageHash();
495
779
  const point = resolvePoint(selector, uid, x, y);
496
780
  if (!point.element) throw new Error("No element at click point");
497
- for (const type of ["pointerdown", "mousedown", "pointerup", "mouseup", "click"]) {
498
- point.element.dispatchEvent(new MouseEvent(type, { bubbles: true, cancelable: true, view: window, clientX: point.x, clientY: point.y, button: 0 }));
781
+ const visible = isElementVisible(point.element);
782
+ const occluded = occluderAt(point.x, point.y, point.element);
783
+ const defaultPrevented = pointerEventSequence(point.element, point.x, point.y, [
784
+ "pointerdown", "mousedown", "pointerup", "mouseup", "click",
785
+ ]);
786
+ // Heuristic: if the clicked thing looks like a media play affordance and the page has paused
787
+ // audio/video, the synthetic click may not unlock autoplay. Surface a warning.
788
+ let autoplayHint;
789
+ const label = (point.element.getAttribute("aria-label") || point.element.textContent || "").toLowerCase();
790
+ if (/^(play|start|begin|next|continue|unmute)/.test(label.trim())) {
791
+ const idleMedia = Array.from(document.querySelectorAll("audio,video")).some((m) => m.paused);
792
+ if (idleMedia) autoplayHint = "This element looks like a media affordance and the page has paused media. Synthetic clicks do not satisfy user-activation gates; audio/video may not start.";
793
+ }
794
+ return {
795
+ x: point.x,
796
+ y: point.y,
797
+ selector,
798
+ uid,
799
+ tag: point.element.tagName,
800
+ isTrusted: false,
801
+ defaultPrevented,
802
+ elementVisible: visible,
803
+ occludedBy: occluded || undefined,
804
+ pageMutated: pageHash() !== before,
805
+ autoplayHint,
806
+ };
807
+ }
808
+
809
+ function hoverPage(selector, uid, x, y) {
810
+ installPiChromeInstrumentation();
811
+ const point = resolvePoint(selector, uid, x, y);
812
+ if (!point.element) throw new Error("No element to hover");
813
+ const defaultPrevented = pointerEventSequence(point.element, point.x, point.y, [
814
+ "pointerover", "mouseover", "pointerenter", "mouseenter", "pointermove", "mousemove",
815
+ ]);
816
+ return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName, defaultPrevented, isTrusted: false };
817
+ }
818
+
819
+ function dragPage(fromUid, fromSelector, fromX, fromY, toUid, toSelector, toX, toY, steps) {
820
+ installPiChromeInstrumentation();
821
+ const before = pageHash();
822
+ const from = resolvePoint(fromSelector, fromUid, fromX, fromY);
823
+ const to = resolvePoint(toSelector, toUid, toX, toY);
824
+ if (!from.element) throw new Error("Drag source element not found");
825
+ if (!to.element) throw new Error("Drag target element not found");
826
+ pointerEventSequence(from.element, from.x, from.y, ["pointerover", "pointerdown", "mousedown"]);
827
+ for (let i = 1; i <= steps; i++) {
828
+ const t = i / steps;
829
+ const x = from.x + (to.x - from.x) * t;
830
+ const y = from.y + (to.y - from.y) * t;
831
+ const overEl = document.elementFromPoint(x, y) || to.element;
832
+ pointerEventSequence(overEl, x, y, ["pointermove", "mousemove"]);
833
+ }
834
+ pointerEventSequence(to.element, to.x, to.y, ["pointerover", "mouseover", "pointerup", "mouseup"]);
835
+ return {
836
+ from: { x: from.x, y: from.y },
837
+ to: { x: to.x, y: to.y },
838
+ steps,
839
+ pageMutated: pageHash() !== before,
840
+ note: "Synthetic pointer drag. HTML5 DataTransfer is not synthesized; native drag-and-drop targets may not respond.",
841
+ };
842
+ }
843
+
844
+ function uploadFiles(selector, uid, files) {
845
+ installPiChromeInstrumentation();
846
+ const element = elementBySelectorOrUid(selector, uid);
847
+ if (!element || element.tagName !== "INPUT" || element.type !== "file") {
848
+ throw new Error("Target must be <input type=file>");
499
849
  }
500
- return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName };
850
+ const dt = new DataTransfer();
851
+ for (const f of files) {
852
+ const bytes = Uint8Array.from(atob(f.base64 || ""), (c) => c.charCodeAt(0));
853
+ dt.items.add(new File([bytes], f.name, { type: f.type || "application/octet-stream" }));
854
+ }
855
+ element.files = dt.files;
856
+ element.dispatchEvent(new Event("input", { bubbles: true }));
857
+ element.dispatchEvent(new Event("change", { bubbles: true }));
858
+ return { uploaded: files.map((f) => ({ name: f.name, type: f.type, size: (f.base64 || "").length })) };
501
859
  }
502
860
 
503
861
  function dispatchInputEvents(element, data, inputType = "insertText") {
@@ -515,6 +873,7 @@ function setNativeValue(element, value) {
515
873
 
516
874
  function typeIntoPage(selector, uid, text, pressEnter) {
517
875
  installPiChromeInstrumentation();
876
+ const before = pageHash();
518
877
  let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
519
878
  if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
520
879
  element.focus();
@@ -530,11 +889,17 @@ function typeIntoPage(selector, uid, text, pressEnter) {
530
889
  throw new Error("Focused element is not text-editable");
531
890
  }
532
891
  if (pressEnter) pressKeyInPage("Enter");
533
- return { selector, uid, length: text.length, pressEnter };
892
+ return {
893
+ selector, uid, length: text.length, pressEnter,
894
+ isTrusted: false,
895
+ valueMatches: "value" in element ? element.value.includes(text) : undefined,
896
+ pageMutated: pageHash() !== before,
897
+ };
534
898
  }
535
899
 
536
900
  function fillPage(selector, uid, text, submit) {
537
901
  installPiChromeInstrumentation();
902
+ const before = pageHash();
538
903
  let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
539
904
  if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
540
905
  element.focus();
@@ -550,19 +915,32 @@ function fillPage(selector, uid, text, submit) {
550
915
  throw new Error("Focused element is not text-editable");
551
916
  }
552
917
  if (submit) pressKeyInPage("Enter");
553
- return { selector, uid, length: String(text).length, submit };
918
+ return {
919
+ selector, uid, length: String(text).length, submit,
920
+ isTrusted: false,
921
+ valueMatches: "value" in element ? element.value === String(text) : undefined,
922
+ pageMutated: pageHash() !== before,
923
+ };
554
924
  }
555
925
 
556
926
  function pressKeyInPage(key) {
557
927
  const normalized = normalizeKey(key);
558
928
  const target = document.activeElement || document.body;
559
- target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
560
- target.dispatchEvent(new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true }));
929
+ const before = (typeof pageHash === "function") ? pageHash() : 0;
930
+ const down = new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true });
931
+ target.dispatchEvent(down);
932
+ const up = new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true });
933
+ target.dispatchEvent(up);
561
934
  if (normalized === "Enter") {
562
935
  const form = target.closest?.("form");
563
936
  if (form) form.requestSubmit?.();
564
937
  }
565
- return { key: normalized };
938
+ return {
939
+ key: normalized,
940
+ isTrusted: false,
941
+ defaultPrevented: down.defaultPrevented || up.defaultPrevented,
942
+ pageMutated: (typeof pageHash === "function") ? pageHash() !== before : undefined,
943
+ };
566
944
  }
567
945
 
568
946
  function listConsoleMessages(clear) {
@@ -581,7 +959,7 @@ function listNetworkRequests(includePreservedRequests, clear) {
581
959
  .filter((request) => includePreservedRequests || request.pageUrl === currentUrl)
582
960
  .map(({ responseBody, ...summary }) => ({ ...summary, hasResponseBody: responseBody !== undefined }));
583
961
  if (clear) state.network = [];
584
- return { requests, count: requests.length, note: "Captures fetch/XHR after instrumentation is installed (snapshot/evaluate/network/console tools install it). Browser-initiated document/static asset requests are not captured." };
962
+ return { requests, count: requests.length, note: "Captures fetch/XHR after instrumentation is installed. Browser-initiated document/static asset requests are not captured." };
585
963
  }
586
964
 
587
965
  function getNetworkRequest(requestId) {
@@ -596,7 +974,9 @@ async function waitForPage(kind, value, timeoutMs, intervalMs) {
596
974
  while (Date.now() - started < timeoutMs) {
597
975
  let ok = false;
598
976
  if (kind === "selector") ok = Boolean(document.querySelector(value));
599
- else ok = Boolean((0, eval)(value));
977
+ else {
978
+ try { ok = Boolean(new Function("return (" + value + ");").call(globalThis)); } catch { ok = false; }
979
+ }
600
980
  if (ok) return { elapsedMs: Date.now() - started };
601
981
  await new Promise((resolve) => setTimeout(resolve, intervalMs));
602
982
  }
@@ -46,7 +46,7 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.7.0";
49
+ const PI_CHROME_VERSION = "0.8.0";
50
50
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
51
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
52
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -83,7 +83,30 @@ function workspaceCwd(ctx: ExtensionContext): string {
83
83
 
84
84
  function browserExtensionPath(): string {
85
85
  return join(extensionRoot(), "browser-extension");
86
- }
86
+ }
87
+
88
+ function hostnameOf(url: string | undefined): string {
89
+ if (!url) return "";
90
+ try { return new URL(url).hostname; } catch { return ""; }
91
+ }
92
+
93
+ // Description of a click/type/fill result's significant fields so the agent doesn't have to
94
+ // guess whether the action actually changed the page.
95
+ function summarizeActionResult(result: unknown): string | undefined {
96
+ if (!result || typeof result !== "object") return undefined;
97
+ const r = result as Record<string, unknown>;
98
+ const parts: string[] = [];
99
+ if (r.pageMutated === false) parts.push("pageMutated=false");
100
+ if (r.defaultPrevented === true) parts.push("defaultPrevented=true");
101
+ if (r.elementVisible === false) parts.push("element NOT visible");
102
+ if (r.occludedBy) {
103
+ const o = r.occludedBy as { tag?: string; id?: string };
104
+ parts.push(`occluded by <${o.tag ?? "?"}${o.id ? "#" + o.id : ""}>`);
105
+ }
106
+ if (r.valueMatches === false) parts.push("input value did not stick");
107
+ if (r.autoplayHint) parts.push("autoplay-gated affordance — synthetic click may not start media");
108
+ return parts.length ? parts.join("; ") : undefined;
109
+ }
87
110
 
88
111
  function readRequestBody(request: IncomingMessage): Promise<string> {
89
112
  return new Promise((resolveBody, rejectBody) => {
@@ -374,21 +397,34 @@ export default function (pi: ExtensionAPI): void {
374
397
  pi.on("before_agent_start", (event) => {
375
398
  const primer = `
376
399
  <chrome-profile-bridge>
377
- Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
378
- This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
379
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing; use stable element uids from snapshots with chrome_click/chrome_type when available. For form work, use includeSnapshot=true on actions to verify in one round trip. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
400
+ Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile. Tools target the existing signed-in profile, no CDP, no throwaway profile.
401
+
402
+ Capability model (important):
403
+ - All input is **synthetic DOM events** (\`isTrusted=false\`). Synthetic events drive React/Vue/Angular state fine, but they do NOT satisfy Chrome's user-activation gates: audio/video autoplay, clipboard write, file pickers, fullscreen, and Web Push prompts will NOT open from a chrome_click.
404
+ - \`chrome_evaluate\` runs in MAIN world via the Function constructor. It works on pages with strict CSP (\`script-src 'self'\` without \`'unsafe-eval'\`), and surfaces thrown exceptions.
405
+ - Tool results include \`pageMutated\`, \`defaultPrevented\`, \`elementVisible\`, \`occludedBy\`, and (for type/fill) \`valueMatches\`. If \`pageMutated\` is false after a click that should have changed something, the click likely didn't take effect — do NOT just retry; check the action result and snapshot for the cause.
406
+
407
+ Usage rules:
408
+ 1. \`chrome_snapshot\` before clicking/typing; pass \`uid\` over \`selector\`.
409
+ 2. \`includeSnapshot=true\` on click/type/fill to verify in one round trip.
410
+ 3. If \`chrome_evaluate\` returns null when you expected a value, the expression evaluated to null/undefined in the page; surface the value via \`JSON.stringify\` to confirm.
411
+ 4. \`chrome_navigate\` supports an optional \`initScript\` that runs at document_start in MAIN world for the next navigation (good for seeding localStorage or stubbing Date.now).
412
+ 5. By default chrome_* tools focus Chrome so the user can watch; pass \`background=true\` or run /chrome-background to silence the whole session.
413
+ 6. If you hit an autoplay/clipboard/file-picker gate, tell the user; this bridge cannot satisfy it.
414
+ 7. Run /chrome-doctor when in doubt about connectivity or capabilities.
380
415
  </chrome-profile-bridge>`;
381
416
  return { systemPrompt: event.systemPrompt + primer };
382
417
  });
383
418
 
384
419
  pi.registerCommand("chrome-doctor", {
385
420
  description:
386
- "Check Chrome bridge connectivity and diagnose setup. Reports the local bridge, companion Chrome extension status (ID + version), and a one-line fix for common failures (extension not loaded, stale service worker, version drift).",
421
+ "Check Chrome bridge connectivity and capability tier. Probes the local bridge, the companion Chrome extension, MAIN-world evaluation, and CDP availability, and prints one-line fixes for common failures.",
387
422
  handler: async (_args, ctx) => {
388
423
  ctx.ui.notify("Performing Chrome bridge health check", "info");
389
424
  const lines: string[] = [`pi-chrome v${PI_CHROME_VERSION}`];
390
425
  const status = bridge.status();
391
426
  lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
427
+ let extensionAlive = false;
392
428
  try {
393
429
  const started = Date.now();
394
430
  const version = (await bridge.send("tab.version", {}, 35_000)) as {
@@ -397,10 +433,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
397
433
  bridgeUrl?: string;
398
434
  };
399
435
  const latencyMs = Date.now() - started;
400
- if (version.extensionId)
401
- lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"}, latency ${latencyMs}ms)`);
402
- else lines.push(`✓ Companion Chrome extension responding (no extension ID reported, latency ${latencyMs}ms)`);
403
- if (version.bridgeUrl) lines.push(`• Extension polling: ${version.bridgeUrl}`);
436
+ extensionAlive = true;
437
+ lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId ?? "?"}, ext v${version.extensionVersion ?? "?"}, latency ${latencyMs}ms)`);
404
438
  if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
405
439
  lines.push(
406
440
  `⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
@@ -415,6 +449,43 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
415
449
  lines.push(" Fix: run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions and keep that Chrome window open.");
416
450
  }
417
451
  }
452
+
453
+ if (extensionAlive) {
454
+ // MAIN-world evaluate probe.
455
+ try {
456
+ const value = await bridge.send("page.evaluate", { expression: "1+1", awaitPromise: true, foreground: false }, 10_000);
457
+ if (value === 2) lines.push(`✓ chrome_evaluate("1+1") = 2`);
458
+ else lines.push(`⚠ chrome_evaluate("1+1") returned ${JSON.stringify(value)} (expected 2). The current tab may have a restrictive CSP or be a chrome:// URL.`);
459
+ } catch (error) {
460
+ lines.push(`✗ chrome_evaluate failed: ${(error as Error).message}`);
461
+ }
462
+
463
+ // Capability probe via MAIN-world helper.
464
+ try {
465
+ const probe = (await bridge.send("page.probe", { foreground: false }, 10_000)) as Record<string, unknown>;
466
+ if (probe && probe.arithmetic === 2) lines.push(`✓ MAIN-world helper injection works (location=${hostnameOf(String(probe.location))})`);
467
+ if (probe && probe.webdriver) lines.push(`⚠ navigator.webdriver=true on current tab — site fingerprinting may flag automation.`);
468
+ } catch (error) {
469
+ lines.push(`⚠ page.probe failed: ${(error as Error).message}`);
470
+ }
471
+ }
472
+
473
+ // CDP availability hint.
474
+ try {
475
+ const controller = new AbortController();
476
+ const timer = setTimeout(() => controller.abort(), 250);
477
+ const response = await fetch("http://127.0.0.1:9222/json/version", { signal: controller.signal }).catch(() => undefined);
478
+ clearTimeout(timer);
479
+ if (response && response.ok) {
480
+ const info = (await response.json().catch(() => ({}))) as { Browser?: string };
481
+ lines.push(`✓ CDP endpoint reachable at 127.0.0.1:9222 (${info.Browser ?? "unknown"}). Trusted input via CDP is not yet wired into pi-chrome — reserved for a future release.`);
482
+ } else {
483
+ lines.push(`• CDP not available (no listener on 127.0.0.1:9222). Synthetic input only; autoplay/clipboard/file-picker gates cannot be satisfied. Future pi-chrome versions will use CDP for trusted input when this port is enabled.`);
484
+ }
485
+ } catch {
486
+ lines.push(`• CDP probe inconclusive.`);
487
+ }
488
+
418
489
  ctx.ui.notify(lines.join("\n"), "info");
419
490
  },
420
491
  });
@@ -540,6 +611,9 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
540
611
  urlIncludes: Type.Optional(Type.String()),
541
612
  titleIncludes: Type.Optional(Type.String()),
542
613
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
614
+ containingText: Type.Optional(Type.String({ description: "Only return elements whose label/text contains this string (case-insensitive). Useful when the page has many controls." })),
615
+ roleFilter: Type.Optional(Type.String({ description: "Only return elements matching this ARIA role or tag name (case-insensitive). e.g. 'button', 'link', 'textbox'." })),
616
+ nearUid: Type.Optional(Type.String({ description: "Sort elements by proximity to this snapshot uid. Useful for finding controls near a known anchor." })),
543
617
  background: Type.Optional(
544
618
  Type.Boolean({ description: "If true, run silently in the background without focusing Chrome. Default false (Chrome focuses + tab activates so the user can watch)." }),
545
619
  ),
@@ -569,6 +643,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
569
643
  titleIncludes: Type.Optional(Type.String()),
570
644
  waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
571
645
  timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
646
+ initScript: Type.Optional(Type.String({ description: "Optional JavaScript source to run in MAIN world at document_start of the next navigation. Useful for seeding localStorage, stubbing Date.now(), or defining navigator.webdriver=undefined. Requires the companion extension's webNavigation permission." })),
572
647
  background: Type.Optional(
573
648
  Type.Boolean({ description: "If true, navigate silently without focusing Chrome. Default false." }),
574
649
  ),
@@ -576,8 +651,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
576
651
  port: Type.Optional(Type.Number()),
577
652
  }),
578
653
  async execute(_id, params): Promise<ToolTextResult> {
579
- const result = await bridge.send("page.navigate", withBackground(params), params.timeoutMs ?? 15_000);
580
- return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
654
+ const result = await bridge.send("page.navigate", withBackground(params), (params.timeoutMs ?? 15_000) + 2_000);
655
+ return { content: [{ type: "text", text: `Navigated to ${params.url}${params.initScript ? " (with initScript)" : ""}` }], details: { result: result as Json } };
581
656
  },
582
657
  });
583
658
 
@@ -590,7 +665,6 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
590
665
  parameters: Type.Object({
591
666
  expression: Type.String(),
592
667
  awaitPromise: Type.Optional(Type.Boolean({ default: true })),
593
- returnByValue: Type.Optional(Type.Boolean({ default: true })),
594
668
  targetId: Type.Optional(Type.String()),
595
669
  urlIncludes: Type.Optional(Type.String()),
596
670
  titleIncludes: Type.Optional(Type.String()),
@@ -602,7 +676,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
602
676
  }),
603
677
  async execute(_id, params): Promise<ToolTextResult> {
604
678
  const value = await bridge.send("page.evaluate", withBackground(params), DEFAULT_TIMEOUT_MS);
605
- return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
679
+ const text = value === undefined
680
+ ? "undefined"
681
+ : typeof value === "string"
682
+ ? value
683
+ : safeJson(value) ?? "undefined";
684
+ return { content: [{ type: "text", text: truncateText(text) }], details: { value: value as Json } };
606
685
  },
607
686
  });
608
687
 
@@ -629,8 +708,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
629
708
  port: Type.Optional(Type.Number()),
630
709
  }),
631
710
  async execute(_id, params): Promise<ToolTextResult> {
632
- const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
633
- return { content: [{ type: "text", text: `Clicked ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
711
+ const raw = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
712
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
713
+ const summary = summarizeActionResult(result);
714
+ const target = params.uid ?? params.selector ?? `${params.x},${params.y}`;
715
+ const text = summary ? `Clicked ${target} — ${summary}` : `Clicked ${target}`;
716
+ return { content: [{ type: "text", text }], details: { result: raw as Json } };
634
717
  },
635
718
  });
636
719
 
@@ -657,8 +740,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
657
740
  port: Type.Optional(Type.Number()),
658
741
  }),
659
742
  async execute(_id, params): Promise<ToolTextResult> {
660
- const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
661
- return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
743
+ const raw = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
744
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
745
+ const summary = summarizeActionResult(result);
746
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
747
+ const base = `Typed ${params.text.length} character(s)${into}.`;
748
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
662
749
  },
663
750
  });
664
751
 
@@ -685,8 +772,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
685
772
  port: Type.Optional(Type.Number()),
686
773
  }),
687
774
  async execute(_id, params): Promise<ToolTextResult> {
688
- const result = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
689
- return { content: [{ type: "text", text: `Filled ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
775
+ const raw = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
776
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
777
+ const summary = summarizeActionResult(result);
778
+ const into = params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : "";
779
+ const base = `Filled ${params.text.length} character(s)${into}.`;
780
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
690
781
  },
691
782
  });
692
783
 
@@ -710,8 +801,11 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
710
801
  port: Type.Optional(Type.Number()),
711
802
  }),
712
803
  async execute(_id, params): Promise<ToolTextResult> {
713
- const result = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
714
- return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
804
+ const raw = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
805
+ const result = (params.includeSnapshot ? (raw as { result: unknown }).result : raw) as Json;
806
+ const summary = summarizeActionResult(result);
807
+ const base = `Pressed ${params.key}.`;
808
+ return { content: [{ type: "text", text: summary ? `${base} (${summary})` : base }], details: { result: raw as Json } };
715
809
  },
716
810
  });
717
811
 
@@ -825,11 +919,113 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
825
919
  const cwd = workspaceCwd(ctx);
826
920
  const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
827
921
  const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
828
- const result = (await bridge.send("page.screenshot", withBackground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
829
- const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
922
+ const result = (await bridge.send("page.screenshot", withBackground(params), params.fullPage ? 120_000 : DEFAULT_TIMEOUT_MS)) as {
923
+ dataUrl?: string;
924
+ tab?: unknown;
925
+ fullPage?: boolean;
926
+ dimensions?: { width: number; height: number; viewportHeight: number; dpr: number };
927
+ tiles?: Array<{ y: number; dataUrl: string }>;
928
+ };
830
929
  await mkdir(dirname(outputPath), { recursive: true });
930
+ if (result.fullPage && result.tiles && result.dimensions) {
931
+ // Stitch via PNG if format is png; otherwise we fall back to writing tile files and a
932
+ // manifest. We avoid pulling in an image library by writing each tile next to the main
933
+ // path with a -tileN suffix and a stitched.json manifest.
934
+ const { width, height, viewportHeight, dpr } = result.dimensions;
935
+ const manifest: Array<{ path: string; y: number }> = [];
936
+ for (let i = 0; i < result.tiles.length; i++) {
937
+ const tile = result.tiles[i];
938
+ const tilePath = outputPath.replace(/(\.[^.]+)$/, `-tile${i}$1`);
939
+ const base64 = tile.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
940
+ await writeFile(tilePath, Buffer.from(base64, "base64"));
941
+ manifest.push({ path: tilePath, y: tile.y });
942
+ }
943
+ await writeFile(outputPath + ".json", JSON.stringify({ width, height, viewportHeight, dpr, tiles: manifest }, null, 2));
944
+ return {
945
+ content: [{ type: "text", text: `Saved ${result.tiles.length} full-page tile(s) for ${width}×${height}px page. Manifest: ${outputPath}.json` }],
946
+ details: { manifest: outputPath + ".json", tiles: manifest, dimensions: result.dimensions, tab: result.tab } as unknown as Record<string, unknown>,
947
+ };
948
+ }
949
+ if (!result.dataUrl) throw new Error("Screenshot returned no dataUrl");
950
+ const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
831
951
  await writeFile(outputPath, Buffer.from(base64, "base64"));
832
952
  return { content: [{ type: "text", text: `Saved Chrome screenshot to ${outputPath}` }], details: { path: outputPath, format, tab: result.tab } };
833
953
  },
834
954
  });
955
+
956
+ pi.registerTool({
957
+ name: "chrome_hover",
958
+ label: "Chrome Hover",
959
+ description: "Hover over an element (synthetic pointerover/mouseover/pointermove) by uid, selector, or x/y. Triggers CSS :hover state and any JS hover handlers; isTrusted is false.",
960
+ promptSnippet: "Hover a Chrome element to trigger :hover / mouseover handlers.",
961
+ parameters: Type.Object({
962
+ uid: Type.Optional(Type.String()),
963
+ selector: Type.Optional(Type.String()),
964
+ x: Type.Optional(Type.Number()),
965
+ y: Type.Optional(Type.Number()),
966
+ targetId: Type.Optional(Type.String()),
967
+ urlIncludes: Type.Optional(Type.String()),
968
+ titleIncludes: Type.Optional(Type.String()),
969
+ background: Type.Optional(Type.Boolean()),
970
+ }),
971
+ async execute(_id, params): Promise<ToolTextResult> {
972
+ const result = await bridge.send("page.hover", withBackground(params), DEFAULT_TIMEOUT_MS);
973
+ return { content: [{ type: "text", text: `Hovered ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
974
+ },
975
+ });
976
+
977
+ pi.registerTool({
978
+ name: "chrome_drag",
979
+ label: "Chrome Drag",
980
+ description: "Synthetic pointer drag from one uid/selector/point to another. Dispatches pointerdown → multi-step pointermove → pointerup. Note: HTML5 DataTransfer is NOT synthesized, so native HTML5 drag-and-drop targets may not respond.",
981
+ promptSnippet: "Drag a Chrome element from one point to another.",
982
+ parameters: Type.Object({
983
+ fromUid: Type.Optional(Type.String()),
984
+ fromSelector: Type.Optional(Type.String()),
985
+ fromX: Type.Optional(Type.Number()),
986
+ fromY: Type.Optional(Type.Number()),
987
+ toUid: Type.Optional(Type.String()),
988
+ toSelector: Type.Optional(Type.String()),
989
+ toX: Type.Optional(Type.Number()),
990
+ toY: Type.Optional(Type.Number()),
991
+ steps: Type.Optional(Type.Number({ default: 12 })),
992
+ targetId: Type.Optional(Type.String()),
993
+ urlIncludes: Type.Optional(Type.String()),
994
+ titleIncludes: Type.Optional(Type.String()),
995
+ background: Type.Optional(Type.Boolean()),
996
+ }),
997
+ async execute(_id, params): Promise<ToolTextResult> {
998
+ const result = await bridge.send("page.drag", withBackground(params), DEFAULT_TIMEOUT_MS);
999
+ return { content: [{ type: "text", text: `Dragged from ${params.fromUid ?? params.fromSelector} to ${params.toUid ?? params.toSelector}` }], details: { result: result as Json } };
1000
+ },
1001
+ });
1002
+
1003
+ pi.registerTool({
1004
+ name: "chrome_upload_file",
1005
+ label: "Chrome Upload File",
1006
+ description: "Programmatically set the files of an <input type=file> element from local file paths. Uses DataTransfer to populate input.files and dispatches input+change events. Does NOT open the native file picker; works with React/Vue/Angular controlled inputs.",
1007
+ promptSnippet: "Attach local files to a Chrome <input type=file> without opening the native file picker.",
1008
+ parameters: Type.Object({
1009
+ uid: Type.Optional(Type.String()),
1010
+ selector: Type.Optional(Type.String()),
1011
+ paths: Type.Array(Type.String(), { description: "Local absolute file paths to upload." }),
1012
+ targetId: Type.Optional(Type.String()),
1013
+ urlIncludes: Type.Optional(Type.String()),
1014
+ titleIncludes: Type.Optional(Type.String()),
1015
+ background: Type.Optional(Type.Boolean()),
1016
+ }),
1017
+ async execute(_id, params, _signal, _onUpdate, ctx): Promise<ToolTextResult> {
1018
+ const { readFile } = await import("node:fs/promises");
1019
+ const { basename } = await import("node:path");
1020
+ const cwd = workspaceCwd(ctx);
1021
+ const files: Array<{ name: string; type: string; base64: string }> = [];
1022
+ for (const p of params.paths) {
1023
+ const abs = resolve(cwd, p);
1024
+ const buf = await readFile(abs);
1025
+ files.push({ name: basename(abs), type: "application/octet-stream", base64: buf.toString("base64") });
1026
+ }
1027
+ const result = await bridge.send("page.upload", withBackground({ ...params, files }), DEFAULT_TIMEOUT_MS);
1028
+ return { content: [{ type: "text", text: `Uploaded ${files.length} file(s) to ${params.uid ?? params.selector}` }], details: { result: result as Json } };
1029
+ },
1030
+ });
835
1031
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Drive your existing logged-in Chrome from Pi — no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
5
  "keywords": [
6
6
  "pi-package",