pi-chrome 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Pi Existing Chrome Profile Bridge",
4
- "version": "0.6.0",
4
+ "version": "0.7.0",
5
5
  "description": "Lets Pi control tabs in this existing Chrome profile via a local bridge at 127.0.0.1.",
6
6
  "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms"],
7
7
  "host_permissions": ["<all_urls>", "http://127.0.0.1:17318/*"],
@@ -111,11 +111,19 @@ async function dispatch(action, params) {
111
111
  case "page.evaluate":
112
112
  return executeInTab(params, evaluateExpression, [params.expression, params.awaitPromise !== false]);
113
113
  case "page.click":
114
- return executeInTab(params, clickPage, [params.selector ?? null, params.x ?? null, params.y ?? null]);
114
+ return executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
115
115
  case "page.type":
116
- return executeInTab(params, typeIntoPage, [params.selector ?? null, params.text || "", Boolean(params.pressEnter)]);
116
+ return executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
117
+ case "page.fill":
118
+ return executeActionInTab(params, fillPage, [params.selector ?? null, params.uid ?? null, params.text || "", params.submit === true]);
117
119
  case "page.key":
118
- return executeInTab(params, pressKeyInPage, [params.key]);
120
+ return executeActionInTab(params, pressKeyInPage, [params.key]);
121
+ case "page.console.list":
122
+ return executeInTab(params, listConsoleMessages, [params.clear === true]);
123
+ case "page.network.list":
124
+ return executeInTab(params, listNetworkRequests, [params.includePreservedRequests === true, params.clear === true]);
125
+ case "page.network.get":
126
+ return executeInTab(params, getNetworkRequest, [params.requestId]);
119
127
  case "page.waitFor":
120
128
  return executeInTab(params, waitForPage, [params.kind, params.value, params.timeoutMs || 10000, params.intervalMs || 250]);
121
129
  case "page.navigate": {
@@ -193,13 +201,49 @@ async function getTabByParams(params) {
193
201
  async function executeInTab(params, func, args) {
194
202
  const tab = await getTabByParams(params);
195
203
  if (params.foreground) await bringToFront(tab);
204
+ const helperSource = [
205
+ getPiChromeState,
206
+ rememberElement,
207
+ elementBySelectorOrUid,
208
+ installPiChromeInstrumentation,
209
+ resolvePoint,
210
+ dispatchInputEvents,
211
+ setNativeValue,
212
+ normalizeKey,
213
+ ].map((helper) => helper.toString()).join("\n");
196
214
  const results = await chrome.scripting.executeScript({
197
215
  target: { tabId: tab.id },
198
216
  world: "MAIN",
199
- func,
200
- args,
217
+ func: async (helperSource, source, invocationArgs) => {
218
+ try {
219
+ (0, eval)(helperSource);
220
+ const injected = (0, eval)(`(${source})`);
221
+ return { ok: true, value: await injected(...invocationArgs) };
222
+ } catch (error) {
223
+ return { ok: false, error: error?.stack || error?.message || String(error) };
224
+ }
225
+ },
226
+ args: [helperSource, func.toString(), args],
201
227
  });
202
- return results?.[0]?.result;
228
+ const first = results?.[0];
229
+ if (first?.error) {
230
+ const message = typeof first.error === "string" ? first.error : (first.error.message || JSON.stringify(first.error));
231
+ throw new Error(message);
232
+ }
233
+ const envelope = first?.result;
234
+ if (envelope && typeof envelope === "object" && envelope.ok === false) {
235
+ throw new Error(envelope.error || "Chrome page script failed");
236
+ }
237
+ return envelope?.value;
238
+ }
239
+
240
+ async function executeActionInTab(params, func, args) {
241
+ const result = await executeInTab(params, func, args);
242
+ if (params.includeSnapshot) {
243
+ const snapshot = await executeInTab({ ...params, foreground: false }, snapshotPage, [params.maxElements || 80]);
244
+ return { result, snapshot };
245
+ }
246
+ return result;
203
247
  }
204
248
 
205
249
  async function bringToFront(tab) {
@@ -224,7 +268,147 @@ function waitForTabComplete(tabId, timeoutMs) {
224
268
  });
225
269
  }
226
270
 
271
+ function getPiChromeState() {
272
+ const state = window.__PI_CHROME_STATE__ || {
273
+ nextElementUid: 1,
274
+ elements: {},
275
+ console: [],
276
+ network: [],
277
+ nextRequestId: 1,
278
+ instrumentationInstalled: false,
279
+ };
280
+ window.__PI_CHROME_STATE__ = state;
281
+ return state;
282
+ }
283
+
284
+ function rememberElement(element) {
285
+ const state = getPiChromeState();
286
+ if (!element.__piChromeUid) element.__piChromeUid = "el-" + state.nextElementUid++;
287
+ state.elements[element.__piChromeUid] = element;
288
+ return element.__piChromeUid;
289
+ }
290
+
291
+ function elementBySelectorOrUid(selector, uid) {
292
+ if (uid) {
293
+ const element = getPiChromeState().elements[uid];
294
+ if (!element || !element.isConnected) throw new Error(`No live element for uid: ${uid}. Take a fresh chrome_snapshot.`);
295
+ return element;
296
+ }
297
+ if (selector) {
298
+ const element = document.querySelector(selector);
299
+ if (!element) throw new Error(`No element matches selector: ${selector}`);
300
+ return element;
301
+ }
302
+ return null;
303
+ }
304
+
305
+ function installPiChromeInstrumentation() {
306
+ const state = getPiChromeState();
307
+ if (state.instrumentationInstalled) return;
308
+ state.instrumentationInstalled = true;
309
+ const pushConsole = (level, args) => {
310
+ state.console.push({
311
+ id: state.console.length + 1,
312
+ level,
313
+ timestamp: Date.now(),
314
+ url: location.href,
315
+ args: Array.from(args).map((arg) => {
316
+ try {
317
+ if (typeof arg === "string") return arg;
318
+ if (arg instanceof Error) return { name: arg.name, message: arg.message, stack: arg.stack };
319
+ return JSON.parse(JSON.stringify(arg));
320
+ } catch {
321
+ return String(arg);
322
+ }
323
+ }),
324
+ });
325
+ if (state.console.length > 500) state.console.splice(0, state.console.length - 500);
326
+ };
327
+ for (const level of ["debug", "log", "info", "warn", "error"]){
328
+ const original = console[level];
329
+ if (typeof original !== "function" || original.__piChromeWrapped) continue;
330
+ const wrapped = function(...args) {
331
+ pushConsole(level, args);
332
+ return original.apply(this, args);
333
+ };
334
+ wrapped.__piChromeWrapped = true;
335
+ console[level] = wrapped;
336
+ }
337
+ window.addEventListener("error", (event) => pushConsole("pageerror", [event.message, event.filename + ":" + event.lineno + ":" + event.colno]));
338
+ window.addEventListener("unhandledrejection", (event) => pushConsole("unhandledrejection", [event.reason]));
339
+
340
+ const trimBody = (text) => typeof text === "string" && text.length > 200000 ? text.slice(0, 200000) + `\n[truncated ${text.length - 200000} chars]` : text;
341
+ const record = (entry) => {
342
+ state.network.push(entry);
343
+ if (state.network.length > 1000) state.network.splice(0, state.network.length - 1000);
344
+ return entry;
345
+ };
346
+ if (window.fetch && !window.fetch.__piChromeWrapped) {
347
+ const originalFetch = window.fetch.bind(window);
348
+ const wrappedFetch = async (...args) => {
349
+ const id = "req-" + state.nextRequestId++;
350
+ const startedAt = Date.now();
351
+ const input = args[0];
352
+ const init = args[1] || {};
353
+ const url = typeof input === "string" ? input : input?.url;
354
+ const method = (init.method || input?.method || "GET").toUpperCase();
355
+ const entry = record({ id, type: "fetch", method, url: String(url || ""), startedAt, pageUrl: location.href, status: "pending" });
356
+ try {
357
+ const response = await originalFetch(...args);
358
+ entry.status = response.status;
359
+ entry.statusText = response.statusText;
360
+ entry.ok = response.ok;
361
+ entry.responseUrl = response.url;
362
+ entry.durationMs = Date.now() - startedAt;
363
+ entry.responseHeaders = Array.from(response.headers.entries());
364
+ response.clone().text().then((text) => {
365
+ entry.responseBody = trimBody(text);
366
+ entry.responseBodyTruncated = typeof text === "string" && text.length > 200000;
367
+ }).catch((error) => { entry.responseBodyError = error?.message || String(error); });
368
+ return response;
369
+ } catch (error) {
370
+ entry.error = error?.message || String(error);
371
+ entry.durationMs = Date.now() - startedAt;
372
+ throw error;
373
+ }
374
+ };
375
+ wrappedFetch.__piChromeWrapped = true;
376
+ window.fetch = wrappedFetch;
377
+ }
378
+ if (window.XMLHttpRequest && !XMLHttpRequest.prototype.open.__piChromeWrapped) {
379
+ const originalOpen = XMLHttpRequest.prototype.open;
380
+ const originalSend = XMLHttpRequest.prototype.send;
381
+ XMLHttpRequest.prototype.open = function(method, url, ...rest) {
382
+ this.__piChromeRequest = { method: String(method || "GET").toUpperCase(), url: String(url || "") };
383
+ return originalOpen.call(this, method, url, ...rest);
384
+ };
385
+ XMLHttpRequest.prototype.open.__piChromeWrapped = true;
386
+ XMLHttpRequest.prototype.send = function(body) {
387
+ const id = "req-" + state.nextRequestId++;
388
+ const startedAt = Date.now();
389
+ const info = this.__piChromeRequest || {};
390
+ const entry = record({ id, type: "xhr", method: info.method || "GET", url: info.url || "", startedAt, pageUrl: location.href, status: "pending" });
391
+ this.addEventListener("loadend", () => {
392
+ entry.status = this.status;
393
+ entry.statusText = this.statusText;
394
+ entry.responseUrl = this.responseURL;
395
+ entry.durationMs = Date.now() - startedAt;
396
+ try { entry.responseHeadersText = this.getAllResponseHeaders(); } catch {}
397
+ try {
398
+ if (typeof this.responseText === "string") {
399
+ entry.responseBody = trimBody(this.responseText);
400
+ entry.responseBodyTruncated = this.responseText.length > 200000;
401
+ }
402
+ } catch (error) { entry.responseBodyError = error?.message || String(error); }
403
+ });
404
+ this.addEventListener("error", () => { entry.error = "XMLHttpRequest error"; entry.durationMs = Date.now() - startedAt; });
405
+ return originalSend.call(this, body);
406
+ };
407
+ }
408
+ }
409
+
227
410
  function snapshotPage(maxElements) {
411
+ installPiChromeInstrumentation();
228
412
  const unique = (selector) => {
229
413
  try { return document.querySelectorAll(selector).length === 1; } catch { return false; }
230
414
  };
@@ -269,6 +453,7 @@ function snapshotPage(maxElements) {
269
453
  const rect = element.getBoundingClientRect();
270
454
  return {
271
455
  index,
456
+ uid: rememberElement(element),
272
457
  tag: element.tagName.toLowerCase(),
273
458
  selector: selectorFor(element),
274
459
  label: labelFor(element),
@@ -289,61 +474,123 @@ function snapshotPage(maxElements) {
289
474
  }
290
475
 
291
476
  async function evaluateExpression(expression, awaitPromise) {
477
+ installPiChromeInstrumentation();
292
478
  const indirectEval = (0, eval);
293
479
  const value = indirectEval(expression);
294
480
  return awaitPromise && value && typeof value.then === "function" ? await value : value;
295
481
  }
296
482
 
297
- function resolvePoint(selector, x, y) {
298
- if (selector) {
299
- const element = document.querySelector(selector);
300
- if (!element) throw new Error(`No element matches selector: ${selector}`);
483
+ function resolvePoint(selector, uid, x, y) {
484
+ const element = elementBySelectorOrUid(selector, uid);
485
+ if (element) {
301
486
  element.scrollIntoView({ block: "center", inline: "center", behavior: "instant" });
302
487
  const rect = element.getBoundingClientRect();
303
488
  return { element, x: rect.left + rect.width / 2, y: rect.top + rect.height / 2, rect };
304
489
  }
305
- if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector or x/y");
490
+ if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector, uid, or x/y");
306
491
  return { element: document.elementFromPoint(x, y), x, y, rect: undefined };
307
492
  }
308
493
 
309
- function clickPage(selector, x, y) {
310
- const point = resolvePoint(selector, x, y);
494
+ function clickPage(selector, uid, x, y) {
495
+ const point = resolvePoint(selector, uid, x, y);
311
496
  if (!point.element) throw new Error("No element at click point");
312
497
  for (const type of ["pointerdown", "mousedown", "pointerup", "mouseup", "click"]) {
313
498
  point.element.dispatchEvent(new MouseEvent(type, { bubbles: true, cancelable: true, view: window, clientX: point.x, clientY: point.y, button: 0 }));
314
499
  }
315
- return { x: point.x, y: point.y, selector, tag: point.element.tagName };
500
+ return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName };
316
501
  }
317
502
 
318
- function typeIntoPage(selector, text, pressEnter) {
319
- let element = selector ? document.querySelector(selector) : document.activeElement;
320
- if (!element) throw new Error(selector ? `No element matches selector: ${selector}` : "No active element");
503
+ function dispatchInputEvents(element, data, inputType = "insertText") {
504
+ element.dispatchEvent(new InputEvent("beforeinput", { bubbles: true, cancelable: true, inputType, data }));
505
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType, data }));
506
+ element.dispatchEvent(new Event("change", { bubbles: true }));
507
+ }
508
+
509
+ function setNativeValue(element, value) {
510
+ const prototype = element instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
511
+ const descriptor = Object.getOwnPropertyDescriptor(prototype, "value");
512
+ if (descriptor?.set) descriptor.set.call(element, value);
513
+ else element.value = value;
514
+ }
515
+
516
+ function typeIntoPage(selector, uid, text, pressEnter) {
517
+ installPiChromeInstrumentation();
518
+ let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
519
+ if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
321
520
  element.focus();
322
521
  if (element.isContentEditable) {
323
522
  document.execCommand("insertText", false, text);
324
523
  } else if ("value" in element) {
325
524
  const start = element.selectionStart ?? element.value.length;
326
525
  const end = element.selectionEnd ?? element.value.length;
327
- element.value = element.value.slice(0, start) + text + element.value.slice(end);
526
+ setNativeValue(element, element.value.slice(0, start) + text + element.value.slice(end));
328
527
  element.selectionStart = element.selectionEnd = start + text.length;
329
- element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: text }));
330
- element.dispatchEvent(new Event("change", { bubbles: true }));
528
+ dispatchInputEvents(element, text, "insertText");
331
529
  } else {
332
530
  throw new Error("Focused element is not text-editable");
333
531
  }
334
532
  if (pressEnter) pressKeyInPage("Enter");
335
- return { selector, length: text.length, pressEnter };
533
+ return { selector, uid, length: text.length, pressEnter };
534
+ }
535
+
536
+ function fillPage(selector, uid, text, submit) {
537
+ installPiChromeInstrumentation();
538
+ let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
539
+ if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
540
+ element.focus();
541
+ if (element.isContentEditable) {
542
+ element.textContent = "";
543
+ document.execCommand("insertText", false, text);
544
+ } else if ("value" in element) {
545
+ setNativeValue(element, text);
546
+ const length = String(text).length;
547
+ try { element.selectionStart = element.selectionEnd = length; } catch {}
548
+ dispatchInputEvents(element, text, "insertReplacementText");
549
+ } else {
550
+ throw new Error("Focused element is not text-editable");
551
+ }
552
+ if (submit) pressKeyInPage("Enter");
553
+ return { selector, uid, length: String(text).length, submit };
336
554
  }
337
555
 
338
556
  function pressKeyInPage(key) {
339
- const target = document.activeElement || document.body;
340
557
  const normalized = normalizeKey(key);
558
+ const target = document.activeElement || document.body;
341
559
  target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
342
560
  target.dispatchEvent(new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true }));
343
- if (normalized === "Enter" && target instanceof HTMLFormElement) target.requestSubmit();
561
+ if (normalized === "Enter") {
562
+ const form = target.closest?.("form");
563
+ if (form) form.requestSubmit?.();
564
+ }
344
565
  return { key: normalized };
345
566
  }
346
567
 
568
+ function listConsoleMessages(clear) {
569
+ installPiChromeInstrumentation();
570
+ const state = getPiChromeState();
571
+ const messages = state.console.slice();
572
+ if (clear) state.console = [];
573
+ return { messages, count: messages.length };
574
+ }
575
+
576
+ function listNetworkRequests(includePreservedRequests, clear) {
577
+ installPiChromeInstrumentation();
578
+ const state = getPiChromeState();
579
+ const currentUrl = location.href;
580
+ const requests = state.network
581
+ .filter((request) => includePreservedRequests || request.pageUrl === currentUrl)
582
+ .map(({ responseBody, ...summary }) => ({ ...summary, hasResponseBody: responseBody !== undefined }));
583
+ if (clear) state.network = [];
584
+ return { requests, count: requests.length, note: "Captures fetch/XHR after instrumentation is installed (snapshot/evaluate/network/console tools install it). Browser-initiated document/static asset requests are not captured." };
585
+ }
586
+
587
+ function getNetworkRequest(requestId) {
588
+ installPiChromeInstrumentation();
589
+ const request = getPiChromeState().network.find((entry) => entry.id === requestId);
590
+ if (!request) throw new Error(`No network request with id ${requestId}`);
591
+ return request;
592
+ }
593
+
347
594
  async function waitForPage(kind, value, timeoutMs, intervalMs) {
348
595
  const started = Date.now();
349
596
  while (Date.now() - started < timeoutMs) {
@@ -46,7 +46,7 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.6.0";
49
+ const PI_CHROME_VERSION = "0.7.0";
50
50
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
51
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
52
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -376,7 +376,7 @@ export default function (pi: ExtensionAPI): void {
376
376
  <chrome-profile-bridge>
377
377
  Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
378
378
  This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
379
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
379
+ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing; use stable element uids from snapshots with chrome_click/chrome_type when available. For form work, use includeSnapshot=true on actions to verify in one round trip. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
380
380
  </chrome-profile-bridge>`;
381
381
  return { systemPrompt: event.systemPrompt + primer };
382
382
  });
@@ -390,13 +390,17 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
390
390
  const status = bridge.status();
391
391
  lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
392
392
  try {
393
+ const started = Date.now();
393
394
  const version = (await bridge.send("tab.version", {}, 35_000)) as {
394
395
  extensionId?: string;
395
396
  extensionVersion?: string;
397
+ bridgeUrl?: string;
396
398
  };
399
+ const latencyMs = Date.now() - started;
397
400
  if (version.extensionId)
398
- lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"})`);
399
- else lines.push("✓ Companion Chrome extension responding (no extension ID reported)");
401
+ lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"}, latency ${latencyMs}ms)`);
402
+ else lines.push(`✓ Companion Chrome extension responding (no extension ID reported, latency ${latencyMs}ms)`);
403
+ if (version.bridgeUrl) lines.push(`• Extension polling: ${version.bridgeUrl}`);
400
404
  if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
401
405
  lines.push(
402
406
  `⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
@@ -529,7 +533,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
529
533
  name: "chrome_snapshot",
530
534
  label: "Chrome Snapshot",
531
535
  description:
532
- "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
536
+ "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with stable uids plus CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
533
537
  promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
534
538
  parameters: Type.Object({
535
539
  targetId: Type.Optional(Type.String()),
@@ -606,12 +610,15 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
606
610
  name: "chrome_click",
607
611
  label: "Chrome Click",
608
612
  description:
609
- "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently.",
610
- promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
613
+ "Click a snapshot uid, CSS selector, or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently. Pass includeSnapshot=true to return a fresh snapshot after the click.",
614
+ promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
611
615
  parameters: Type.Object({
612
- selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
613
- x: Type.Optional(Type.Number({ description: "Viewport x coordinate if selector is omitted." })),
614
- y: Type.Optional(Type.Number({ description: "Viewport y coordinate if selector is omitted." })),
616
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
617
+ selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer uid from chrome_snapshot when available." })),
618
+ x: Type.Optional(Type.Number({ description: "Viewport x coordinate if uid/selector is omitted." })),
619
+ y: Type.Optional(Type.Number({ description: "Viewport y coordinate if uid/selector is omitted." })),
620
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the click." })),
621
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
615
622
  targetId: Type.Optional(Type.String()),
616
623
  urlIncludes: Type.Optional(Type.String()),
617
624
  titleIncludes: Type.Optional(Type.String()),
@@ -623,7 +630,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
623
630
  }),
624
631
  async execute(_id, params): Promise<ToolTextResult> {
625
632
  const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
626
- return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
633
+ return { content: [{ type: "text", text: `Clicked ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
627
634
  },
628
635
  });
629
636
 
@@ -631,11 +638,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
631
638
  name: "chrome_type",
632
639
  label: "Chrome Type",
633
640
  description:
634
- "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently.",
635
- promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
641
+ "Focus an optional snapshot uid or CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently. Pass includeSnapshot=true to return a fresh snapshot after typing.",
642
+ promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
636
643
  parameters: Type.Object({
637
644
  text: Type.String(),
645
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
638
646
  selector: Type.Optional(Type.String({ description: "CSS selector to focus before typing." })),
647
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after typing." })),
648
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
639
649
  pressEnter: Type.Optional(Type.Boolean()),
640
650
  targetId: Type.Optional(Type.String()),
641
651
  urlIncludes: Type.Optional(Type.String()),
@@ -648,7 +658,35 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
648
658
  }),
649
659
  async execute(_id, params): Promise<ToolTextResult> {
650
660
  const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
651
- return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
661
+ return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
662
+ },
663
+ });
664
+
665
+ pi.registerTool({
666
+ name: "chrome_fill",
667
+ label: "Chrome Fill",
668
+ description:
669
+ "Set the full value of a text input, textarea, or contenteditable element using framework-aware native value setters and input/change events. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
670
+ promptSnippet: "Fill a Chrome form field by snapshot uid or selector, optionally returning a fresh snapshot.",
671
+ parameters: Type.Object({
672
+ text: Type.String(),
673
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
674
+ selector: Type.Optional(Type.String({ description: "CSS selector to fill if uid is omitted." })),
675
+ submit: Type.Optional(Type.Boolean({ description: "If true, press Enter after filling." })),
676
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after filling." })),
677
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
678
+ targetId: Type.Optional(Type.String()),
679
+ urlIncludes: Type.Optional(Type.String()),
680
+ titleIncludes: Type.Optional(Type.String()),
681
+ background: Type.Optional(
682
+ Type.Boolean({ description: "If true, fill silently without focusing Chrome. Default false." }),
683
+ ),
684
+ host: Type.Optional(Type.String()),
685
+ port: Type.Optional(Type.Number()),
686
+ }),
687
+ async execute(_id, params): Promise<ToolTextResult> {
688
+ const result = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
689
+ return { content: [{ type: "text", text: `Filled ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
652
690
  },
653
691
  });
654
692
 
@@ -656,10 +694,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
656
694
  name: "chrome_key",
657
695
  label: "Chrome Key",
658
696
  description:
659
- "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently.",
697
+ "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently. Pass includeSnapshot=true to verify after the keypress.",
660
698
  promptSnippet: "Press keys in Chrome through the companion extension.",
661
699
  parameters: Type.Object({
662
700
  key: Type.String(),
701
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the keypress." })),
702
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
663
703
  targetId: Type.Optional(Type.String()),
664
704
  urlIncludes: Type.Optional(Type.String()),
665
705
  titleIncludes: Type.Optional(Type.String()),
@@ -697,6 +737,69 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
697
737
  },
698
738
  });
699
739
 
740
+ pi.registerTool({
741
+ name: "chrome_list_console_messages",
742
+ label: "Chrome Console Messages",
743
+ description:
744
+ "List console messages captured in the page by the companion extension. Capture starts after any chrome_snapshot, chrome_evaluate, chrome_list_console_messages, or chrome_list_network_requests call installs page instrumentation.",
745
+ promptSnippet: "List captured console messages from the active Chrome page.",
746
+ parameters: Type.Object({
747
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured console log after reading." })),
748
+ targetId: Type.Optional(Type.String()),
749
+ urlIncludes: Type.Optional(Type.String()),
750
+ titleIncludes: Type.Optional(Type.String()),
751
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
752
+ host: Type.Optional(Type.String()),
753
+ port: Type.Optional(Type.Number()),
754
+ }),
755
+ async execute(_id, params): Promise<ToolTextResult> {
756
+ const result = await bridge.send("page.console.list", withBackground(params), DEFAULT_TIMEOUT_MS);
757
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
758
+ },
759
+ });
760
+
761
+ pi.registerTool({
762
+ name: "chrome_list_network_requests",
763
+ label: "Chrome Network Requests",
764
+ description:
765
+ "List fetch/XMLHttpRequest activity captured in the page by the companion extension. Capture starts after instrumentation is installed by snapshot/evaluate/network/console tools; browser document/static asset requests are not captured. Use includePreservedRequests=true to keep requests from earlier same-tab navigations that were captured before navigation.",
766
+ promptSnippet: "List captured XHR/fetch requests from the active Chrome page before doing DOM-heavy debugging.",
767
+ parameters: Type.Object({
768
+ includePreservedRequests: Type.Optional(Type.Boolean({ description: "Include captured requests from earlier locations in the same tab/session." })),
769
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured request log after reading." })),
770
+ targetId: Type.Optional(Type.String()),
771
+ urlIncludes: Type.Optional(Type.String()),
772
+ titleIncludes: Type.Optional(Type.String()),
773
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
774
+ host: Type.Optional(Type.String()),
775
+ port: Type.Optional(Type.Number()),
776
+ }),
777
+ async execute(_id, params): Promise<ToolTextResult> {
778
+ const result = await bridge.send("page.network.list", withBackground(params), DEFAULT_TIMEOUT_MS);
779
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
780
+ },
781
+ });
782
+
783
+ pi.registerTool({
784
+ name: "chrome_get_network_request",
785
+ label: "Chrome Network Request",
786
+ description: "Retrieve one captured fetch/XMLHttpRequest entry, including response body when available, by requestId from chrome_list_network_requests.",
787
+ promptSnippet: "Fetch captured request details and response body by requestId.",
788
+ parameters: Type.Object({
789
+ requestId: Type.String({ description: "Request id returned by chrome_list_network_requests." }),
790
+ targetId: Type.Optional(Type.String()),
791
+ urlIncludes: Type.Optional(Type.String()),
792
+ titleIncludes: Type.Optional(Type.String()),
793
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
794
+ host: Type.Optional(Type.String()),
795
+ port: Type.Optional(Type.Number()),
796
+ }),
797
+ async execute(_id, params): Promise<ToolTextResult> {
798
+ const result = await bridge.send("page.network.get", withBackground(params), DEFAULT_TIMEOUT_MS);
799
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
800
+ },
801
+ });
802
+
700
803
  pi.registerTool({
701
804
  name: "chrome_screenshot",
702
805
  label: "Chrome Screenshot",
package/package.json CHANGED
@@ -1,31 +1,31 @@
1
1
  {
2
- "name": "pi-chrome",
3
- "version": "0.6.0",
4
- "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
- "keywords": [
6
- "pi-package",
7
- "pi-extension",
8
- "chrome",
9
- "browser",
10
- "automation",
11
- "authenticated-session",
12
- "real-profile",
13
- "web-debugging"
14
- ],
15
- "license": "MIT",
16
- "type": "commonjs",
17
- "files": [
18
- "extensions",
19
- "README.md"
20
- ],
21
- "pi": {
22
- "extensions": [
23
- "./extensions/chrome-profile-bridge/index.ts"
24
- ]
25
- },
26
- "peerDependencies": {
27
- "@earendil-works/pi-ai": "*",
28
- "@earendil-works/pi-coding-agent": "*",
29
- "typebox": "*"
30
- }
2
+ "name": "pi-chrome",
3
+ "version": "0.7.0",
4
+ "description": "Drive your existing logged-in Chrome from Pi no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi-extension",
8
+ "chrome",
9
+ "browser",
10
+ "automation",
11
+ "authenticated-session",
12
+ "real-profile",
13
+ "web-debugging"
14
+ ],
15
+ "license": "MIT",
16
+ "type": "commonjs",
17
+ "files": [
18
+ "extensions",
19
+ "README.md"
20
+ ],
21
+ "pi": {
22
+ "extensions": [
23
+ "./extensions/chrome-profile-bridge/index.ts"
24
+ ]
25
+ },
26
+ "peerDependencies": {
27
+ "@earendil-works/pi-ai": "*",
28
+ "@earendil-works/pi-coding-agent": "*",
29
+ "typebox": "*"
30
+ }
31
31
  }