pi-chrome 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Pi Existing Chrome Profile Bridge",
4
- "version": "0.6.1",
4
+ "version": "0.7.0",
5
5
  "description": "Lets Pi control tabs in this existing Chrome profile via a local bridge at 127.0.0.1.",
6
6
  "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms"],
7
7
  "host_permissions": ["<all_urls>", "http://127.0.0.1:17318/*"],
@@ -111,11 +111,19 @@ async function dispatch(action, params) {
111
111
  case "page.evaluate":
112
112
  return executeInTab(params, evaluateExpression, [params.expression, params.awaitPromise !== false]);
113
113
  case "page.click":
114
- return executeInTab(params, clickPage, [params.selector ?? null, params.x ?? null, params.y ?? null]);
114
+ return executeActionInTab(params, clickPage, [params.selector ?? null, params.uid ?? null, params.x ?? null, params.y ?? null]);
115
115
  case "page.type":
116
- return executeInTab(params, typeIntoPage, [params.selector ?? null, params.text || "", Boolean(params.pressEnter)]);
116
+ return executeActionInTab(params, typeIntoPage, [params.selector ?? null, params.uid ?? null, params.text || "", Boolean(params.pressEnter)]);
117
+ case "page.fill":
118
+ return executeActionInTab(params, fillPage, [params.selector ?? null, params.uid ?? null, params.text || "", params.submit === true]);
117
119
  case "page.key":
118
- return executeInTab(params, pressKeyInPage, [params.key]);
120
+ return executeActionInTab(params, pressKeyInPage, [params.key]);
121
+ case "page.console.list":
122
+ return executeInTab(params, listConsoleMessages, [params.clear === true]);
123
+ case "page.network.list":
124
+ return executeInTab(params, listNetworkRequests, [params.includePreservedRequests === true, params.clear === true]);
125
+ case "page.network.get":
126
+ return executeInTab(params, getNetworkRequest, [params.requestId]);
119
127
  case "page.waitFor":
120
128
  return executeInTab(params, waitForPage, [params.kind, params.value, params.timeoutMs || 10000, params.intervalMs || 250]);
121
129
  case "page.navigate": {
@@ -193,18 +201,29 @@ async function getTabByParams(params) {
193
201
  async function executeInTab(params, func, args) {
194
202
  const tab = await getTabByParams(params);
195
203
  if (params.foreground) await bringToFront(tab);
204
+ const helperSource = [
205
+ getPiChromeState,
206
+ rememberElement,
207
+ elementBySelectorOrUid,
208
+ installPiChromeInstrumentation,
209
+ resolvePoint,
210
+ dispatchInputEvents,
211
+ setNativeValue,
212
+ normalizeKey,
213
+ ].map((helper) => helper.toString()).join("\n");
196
214
  const results = await chrome.scripting.executeScript({
197
215
  target: { tabId: tab.id },
198
216
  world: "MAIN",
199
- func: async (source, invocationArgs) => {
217
+ func: async (helperSource, source, invocationArgs) => {
200
218
  try {
219
+ (0, eval)(helperSource);
201
220
  const injected = (0, eval)(`(${source})`);
202
221
  return { ok: true, value: await injected(...invocationArgs) };
203
222
  } catch (error) {
204
223
  return { ok: false, error: error?.stack || error?.message || String(error) };
205
224
  }
206
225
  },
207
- args: [func.toString(), args],
226
+ args: [helperSource, func.toString(), args],
208
227
  });
209
228
  const first = results?.[0];
210
229
  if (first?.error) {
@@ -218,6 +237,15 @@ async function executeInTab(params, func, args) {
218
237
  return envelope?.value;
219
238
  }
220
239
 
240
+ async function executeActionInTab(params, func, args) {
241
+ const result = await executeInTab(params, func, args);
242
+ if (params.includeSnapshot) {
243
+ const snapshot = await executeInTab({ ...params, foreground: false }, snapshotPage, [params.maxElements || 80]);
244
+ return { result, snapshot };
245
+ }
246
+ return result;
247
+ }
248
+
221
249
  async function bringToFront(tab) {
222
250
  await chrome.windows.update(tab.windowId, { focused: true });
223
251
  await chrome.tabs.update(tab.id, { active: true });
@@ -240,7 +268,147 @@ function waitForTabComplete(tabId, timeoutMs) {
240
268
  });
241
269
  }
242
270
 
271
+ function getPiChromeState() {
272
+ const state = window.__PI_CHROME_STATE__ || {
273
+ nextElementUid: 1,
274
+ elements: {},
275
+ console: [],
276
+ network: [],
277
+ nextRequestId: 1,
278
+ instrumentationInstalled: false,
279
+ };
280
+ window.__PI_CHROME_STATE__ = state;
281
+ return state;
282
+ }
283
+
284
+ function rememberElement(element) {
285
+ const state = getPiChromeState();
286
+ if (!element.__piChromeUid) element.__piChromeUid = "el-" + state.nextElementUid++;
287
+ state.elements[element.__piChromeUid] = element;
288
+ return element.__piChromeUid;
289
+ }
290
+
291
+ function elementBySelectorOrUid(selector, uid) {
292
+ if (uid) {
293
+ const element = getPiChromeState().elements[uid];
294
+ if (!element || !element.isConnected) throw new Error(`No live element for uid: ${uid}. Take a fresh chrome_snapshot.`);
295
+ return element;
296
+ }
297
+ if (selector) {
298
+ const element = document.querySelector(selector);
299
+ if (!element) throw new Error(`No element matches selector: ${selector}`);
300
+ return element;
301
+ }
302
+ return null;
303
+ }
304
+
305
+ function installPiChromeInstrumentation() {
306
+ const state = getPiChromeState();
307
+ if (state.instrumentationInstalled) return;
308
+ state.instrumentationInstalled = true;
309
+ const pushConsole = (level, args) => {
310
+ state.console.push({
311
+ id: state.console.length + 1,
312
+ level,
313
+ timestamp: Date.now(),
314
+ url: location.href,
315
+ args: Array.from(args).map((arg) => {
316
+ try {
317
+ if (typeof arg === "string") return arg;
318
+ if (arg instanceof Error) return { name: arg.name, message: arg.message, stack: arg.stack };
319
+ return JSON.parse(JSON.stringify(arg));
320
+ } catch {
321
+ return String(arg);
322
+ }
323
+ }),
324
+ });
325
+ if (state.console.length > 500) state.console.splice(0, state.console.length - 500);
326
+ };
327
+ for (const level of ["debug", "log", "info", "warn", "error"]){
328
+ const original = console[level];
329
+ if (typeof original !== "function" || original.__piChromeWrapped) continue;
330
+ const wrapped = function(...args) {
331
+ pushConsole(level, args);
332
+ return original.apply(this, args);
333
+ };
334
+ wrapped.__piChromeWrapped = true;
335
+ console[level] = wrapped;
336
+ }
337
+ window.addEventListener("error", (event) => pushConsole("pageerror", [event.message, event.filename + ":" + event.lineno + ":" + event.colno]));
338
+ window.addEventListener("unhandledrejection", (event) => pushConsole("unhandledrejection", [event.reason]));
339
+
340
+ const trimBody = (text) => typeof text === "string" && text.length > 200000 ? text.slice(0, 200000) + `\n[truncated ${text.length - 200000} chars]` : text;
341
+ const record = (entry) => {
342
+ state.network.push(entry);
343
+ if (state.network.length > 1000) state.network.splice(0, state.network.length - 1000);
344
+ return entry;
345
+ };
346
+ if (window.fetch && !window.fetch.__piChromeWrapped) {
347
+ const originalFetch = window.fetch.bind(window);
348
+ const wrappedFetch = async (...args) => {
349
+ const id = "req-" + state.nextRequestId++;
350
+ const startedAt = Date.now();
351
+ const input = args[0];
352
+ const init = args[1] || {};
353
+ const url = typeof input === "string" ? input : input?.url;
354
+ const method = (init.method || input?.method || "GET").toUpperCase();
355
+ const entry = record({ id, type: "fetch", method, url: String(url || ""), startedAt, pageUrl: location.href, status: "pending" });
356
+ try {
357
+ const response = await originalFetch(...args);
358
+ entry.status = response.status;
359
+ entry.statusText = response.statusText;
360
+ entry.ok = response.ok;
361
+ entry.responseUrl = response.url;
362
+ entry.durationMs = Date.now() - startedAt;
363
+ entry.responseHeaders = Array.from(response.headers.entries());
364
+ response.clone().text().then((text) => {
365
+ entry.responseBody = trimBody(text);
366
+ entry.responseBodyTruncated = typeof text === "string" && text.length > 200000;
367
+ }).catch((error) => { entry.responseBodyError = error?.message || String(error); });
368
+ return response;
369
+ } catch (error) {
370
+ entry.error = error?.message || String(error);
371
+ entry.durationMs = Date.now() - startedAt;
372
+ throw error;
373
+ }
374
+ };
375
+ wrappedFetch.__piChromeWrapped = true;
376
+ window.fetch = wrappedFetch;
377
+ }
378
+ if (window.XMLHttpRequest && !XMLHttpRequest.prototype.open.__piChromeWrapped) {
379
+ const originalOpen = XMLHttpRequest.prototype.open;
380
+ const originalSend = XMLHttpRequest.prototype.send;
381
+ XMLHttpRequest.prototype.open = function(method, url, ...rest) {
382
+ this.__piChromeRequest = { method: String(method || "GET").toUpperCase(), url: String(url || "") };
383
+ return originalOpen.call(this, method, url, ...rest);
384
+ };
385
+ XMLHttpRequest.prototype.open.__piChromeWrapped = true;
386
+ XMLHttpRequest.prototype.send = function(body) {
387
+ const id = "req-" + state.nextRequestId++;
388
+ const startedAt = Date.now();
389
+ const info = this.__piChromeRequest || {};
390
+ const entry = record({ id, type: "xhr", method: info.method || "GET", url: info.url || "", startedAt, pageUrl: location.href, status: "pending" });
391
+ this.addEventListener("loadend", () => {
392
+ entry.status = this.status;
393
+ entry.statusText = this.statusText;
394
+ entry.responseUrl = this.responseURL;
395
+ entry.durationMs = Date.now() - startedAt;
396
+ try { entry.responseHeadersText = this.getAllResponseHeaders(); } catch {}
397
+ try {
398
+ if (typeof this.responseText === "string") {
399
+ entry.responseBody = trimBody(this.responseText);
400
+ entry.responseBodyTruncated = this.responseText.length > 200000;
401
+ }
402
+ } catch (error) { entry.responseBodyError = error?.message || String(error); }
403
+ });
404
+ this.addEventListener("error", () => { entry.error = "XMLHttpRequest error"; entry.durationMs = Date.now() - startedAt; });
405
+ return originalSend.call(this, body);
406
+ };
407
+ }
408
+ }
409
+
243
410
  function snapshotPage(maxElements) {
411
+ installPiChromeInstrumentation();
244
412
  const unique = (selector) => {
245
413
  try { return document.querySelectorAll(selector).length === 1; } catch { return false; }
246
414
  };
@@ -285,6 +453,7 @@ function snapshotPage(maxElements) {
285
453
  const rect = element.getBoundingClientRect();
286
454
  return {
287
455
  index,
456
+ uid: rememberElement(element),
288
457
  tag: element.tagName.toLowerCase(),
289
458
  selector: selectorFor(element),
290
459
  label: labelFor(element),
@@ -305,108 +474,123 @@ function snapshotPage(maxElements) {
305
474
  }
306
475
 
307
476
  async function evaluateExpression(expression, awaitPromise) {
477
+ installPiChromeInstrumentation();
308
478
  const indirectEval = (0, eval);
309
479
  const value = indirectEval(expression);
310
480
  return awaitPromise && value && typeof value.then === "function" ? await value : value;
311
481
  }
312
482
 
313
- function resolvePoint(selector, x, y) {
314
- if (selector) {
315
- const element = document.querySelector(selector);
316
- if (!element) throw new Error(`No element matches selector: ${selector}`);
483
+ function resolvePoint(selector, uid, x, y) {
484
+ const element = elementBySelectorOrUid(selector, uid);
485
+ if (element) {
317
486
  element.scrollIntoView({ block: "center", inline: "center", behavior: "instant" });
318
487
  const rect = element.getBoundingClientRect();
319
488
  return { element, x: rect.left + rect.width / 2, y: rect.top + rect.height / 2, rect };
320
489
  }
321
- if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector or x/y");
490
+ if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector, uid, or x/y");
322
491
  return { element: document.elementFromPoint(x, y), x, y, rect: undefined };
323
492
  }
324
493
 
325
- function clickPage(selector, x, y) {
326
- const resolvePoint = (selector, x, y) => {
327
- if (selector) {
328
- const element = document.querySelector(selector);
329
- if (!element) throw new Error(`No element matches selector: ${selector}`);
330
- element.scrollIntoView({ block: "center", inline: "center", behavior: "instant" });
331
- const rect = element.getBoundingClientRect();
332
- return { element, x: rect.left + rect.width / 2, y: rect.top + rect.height / 2, rect };
333
- }
334
- if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector or x/y");
335
- return { element: document.elementFromPoint(x, y), x, y, rect: undefined };
336
- };
337
- const point = resolvePoint(selector, x, y);
494
+ function clickPage(selector, uid, x, y) {
495
+ const point = resolvePoint(selector, uid, x, y);
338
496
  if (!point.element) throw new Error("No element at click point");
339
497
  for (const type of ["pointerdown", "mousedown", "pointerup", "mouseup", "click"]) {
340
498
  point.element.dispatchEvent(new MouseEvent(type, { bubbles: true, cancelable: true, view: window, clientX: point.x, clientY: point.y, button: 0 }));
341
499
  }
342
- return { x: point.x, y: point.y, selector, tag: point.element.tagName };
500
+ return { x: point.x, y: point.y, selector, uid, tag: point.element.tagName };
343
501
  }
344
502
 
345
- function typeIntoPage(selector, text, pressEnter) {
346
- const normalizeKey = (key) => {
347
- const table = {
348
- enter: "Enter",
349
- escape: "Escape",
350
- tab: "Tab",
351
- backspace: "Backspace",
352
- delete: "Delete",
353
- arrowup: "ArrowUp",
354
- arrowdown: "ArrowDown",
355
- arrowleft: "ArrowLeft",
356
- arrowright: "ArrowRight",
357
- };
358
- return table[String(key).toLowerCase()] || key;
359
- };
360
- const pressKey = (key) => {
361
- const target = document.activeElement || document.body;
362
- const normalized = normalizeKey(key);
363
- target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
364
- target.dispatchEvent(new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true }));
365
- if (normalized === "Enter" && target instanceof HTMLFormElement) target.requestSubmit();
366
- return { key: normalized };
367
- };
368
- let element = selector ? document.querySelector(selector) : document.activeElement;
369
- if (!element) throw new Error(selector ? `No element matches selector: ${selector}` : "No active element");
503
+ function dispatchInputEvents(element, data, inputType = "insertText") {
504
+ element.dispatchEvent(new InputEvent("beforeinput", { bubbles: true, cancelable: true, inputType, data }));
505
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType, data }));
506
+ element.dispatchEvent(new Event("change", { bubbles: true }));
507
+ }
508
+
509
+ function setNativeValue(element, value) {
510
+ const prototype = element instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
511
+ const descriptor = Object.getOwnPropertyDescriptor(prototype, "value");
512
+ if (descriptor?.set) descriptor.set.call(element, value);
513
+ else element.value = value;
514
+ }
515
+
516
+ function typeIntoPage(selector, uid, text, pressEnter) {
517
+ installPiChromeInstrumentation();
518
+ let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
519
+ if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
370
520
  element.focus();
371
521
  if (element.isContentEditable) {
372
522
  document.execCommand("insertText", false, text);
373
523
  } else if ("value" in element) {
374
524
  const start = element.selectionStart ?? element.value.length;
375
525
  const end = element.selectionEnd ?? element.value.length;
376
- element.value = element.value.slice(0, start) + text + element.value.slice(end);
526
+ setNativeValue(element, element.value.slice(0, start) + text + element.value.slice(end));
377
527
  element.selectionStart = element.selectionEnd = start + text.length;
378
- element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: text }));
379
- element.dispatchEvent(new Event("change", { bubbles: true }));
528
+ dispatchInputEvents(element, text, "insertText");
529
+ } else {
530
+ throw new Error("Focused element is not text-editable");
531
+ }
532
+ if (pressEnter) pressKeyInPage("Enter");
533
+ return { selector, uid, length: text.length, pressEnter };
534
+ }
535
+
536
+ function fillPage(selector, uid, text, submit) {
537
+ installPiChromeInstrumentation();
538
+ let element = elementBySelectorOrUid(selector, uid) || document.activeElement;
539
+ if (!element) throw new Error(selector || uid ? `No element for ${selector || uid}` : "No active element");
540
+ element.focus();
541
+ if (element.isContentEditable) {
542
+ element.textContent = "";
543
+ document.execCommand("insertText", false, text);
544
+ } else if ("value" in element) {
545
+ setNativeValue(element, text);
546
+ const length = String(text).length;
547
+ try { element.selectionStart = element.selectionEnd = length; } catch {}
548
+ dispatchInputEvents(element, text, "insertReplacementText");
380
549
  } else {
381
550
  throw new Error("Focused element is not text-editable");
382
551
  }
383
- if (pressEnter) pressKey("Enter");
384
- return { selector, length: text.length, pressEnter };
552
+ if (submit) pressKeyInPage("Enter");
553
+ return { selector, uid, length: String(text).length, submit };
385
554
  }
386
555
 
387
556
  function pressKeyInPage(key) {
388
- const normalizeKey = (key) => {
389
- const table = {
390
- enter: "Enter",
391
- escape: "Escape",
392
- tab: "Tab",
393
- backspace: "Backspace",
394
- delete: "Delete",
395
- arrowup: "ArrowUp",
396
- arrowdown: "ArrowDown",
397
- arrowleft: "ArrowLeft",
398
- arrowright: "ArrowRight",
399
- };
400
- return table[String(key).toLowerCase()] || key;
401
- };
402
- const target = document.activeElement || document.body;
403
557
  const normalized = normalizeKey(key);
558
+ const target = document.activeElement || document.body;
404
559
  target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
405
560
  target.dispatchEvent(new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true }));
406
- if (normalized === "Enter" && target instanceof HTMLFormElement) target.requestSubmit();
561
+ if (normalized === "Enter") {
562
+ const form = target.closest?.("form");
563
+ if (form) form.requestSubmit?.();
564
+ }
407
565
  return { key: normalized };
408
566
  }
409
567
 
568
+ function listConsoleMessages(clear) {
569
+ installPiChromeInstrumentation();
570
+ const state = getPiChromeState();
571
+ const messages = state.console.slice();
572
+ if (clear) state.console = [];
573
+ return { messages, count: messages.length };
574
+ }
575
+
576
+ function listNetworkRequests(includePreservedRequests, clear) {
577
+ installPiChromeInstrumentation();
578
+ const state = getPiChromeState();
579
+ const currentUrl = location.href;
580
+ const requests = state.network
581
+ .filter((request) => includePreservedRequests || request.pageUrl === currentUrl)
582
+ .map(({ responseBody, ...summary }) => ({ ...summary, hasResponseBody: responseBody !== undefined }));
583
+ if (clear) state.network = [];
584
+ return { requests, count: requests.length, note: "Captures fetch/XHR after instrumentation is installed (snapshot/evaluate/network/console tools install it). Browser-initiated document/static asset requests are not captured." };
585
+ }
586
+
587
+ function getNetworkRequest(requestId) {
588
+ installPiChromeInstrumentation();
589
+ const request = getPiChromeState().network.find((entry) => entry.id === requestId);
590
+ if (!request) throw new Error(`No network request with id ${requestId}`);
591
+ return request;
592
+ }
593
+
410
594
  async function waitForPage(kind, value, timeoutMs, intervalMs) {
411
595
  const started = Date.now();
412
596
  while (Date.now() - started < timeoutMs) {
@@ -46,7 +46,7 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.6.1";
49
+ const PI_CHROME_VERSION = "0.7.0";
50
50
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
51
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
52
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -376,7 +376,7 @@ export default function (pi: ExtensionAPI): void {
376
376
  <chrome-profile-bridge>
377
377
  Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
378
378
  This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
379
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
379
+ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing; use stable element uids from snapshots with chrome_click/chrome_type when available. For form work, use includeSnapshot=true on actions to verify in one round trip. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
380
380
  </chrome-profile-bridge>`;
381
381
  return { systemPrompt: event.systemPrompt + primer };
382
382
  });
@@ -390,13 +390,17 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
390
390
  const status = bridge.status();
391
391
  lines.push(`• Local bridge: mode=${status.mode}, url=${status.url}`);
392
392
  try {
393
+ const started = Date.now();
393
394
  const version = (await bridge.send("tab.version", {}, 35_000)) as {
394
395
  extensionId?: string;
395
396
  extensionVersion?: string;
397
+ bridgeUrl?: string;
396
398
  };
399
+ const latencyMs = Date.now() - started;
397
400
  if (version.extensionId)
398
- lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"})`);
399
- else lines.push("✓ Companion Chrome extension responding (no extension ID reported)");
401
+ lines.push(`✓ Companion Chrome extension responding (ID: ${version.extensionId}, ext v${version.extensionVersion ?? "unknown"}, latency ${latencyMs}ms)`);
402
+ else lines.push(`✓ Companion Chrome extension responding (no extension ID reported, latency ${latencyMs}ms)`);
403
+ if (version.bridgeUrl) lines.push(`• Extension polling: ${version.bridgeUrl}`);
400
404
  if (version.extensionVersion && version.extensionVersion !== PI_CHROME_VERSION) {
401
405
  lines.push(
402
406
  `⚠ Extension version (${version.extensionVersion}) differs from pi-chrome (${PI_CHROME_VERSION}). Reload "Pi Existing Chrome Profile Bridge" in chrome://extensions to pick up the latest service worker.`,
@@ -529,7 +533,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
529
533
  name: "chrome_snapshot",
530
534
  label: "Chrome Snapshot",
531
535
  description:
532
- "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
536
+ "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with stable uids plus CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
533
537
  promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
534
538
  parameters: Type.Object({
535
539
  targetId: Type.Optional(Type.String()),
@@ -606,12 +610,15 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
606
610
  name: "chrome_click",
607
611
  label: "Chrome Click",
608
612
  description:
609
- "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently.",
610
- promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
613
+ "Click a snapshot uid, CSS selector, or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently. Pass includeSnapshot=true to return a fresh snapshot after the click.",
614
+ promptSnippet: "Click page elements in Chrome by snapshot uid, selector, or viewport coordinate.",
611
615
  parameters: Type.Object({
612
- selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
613
- x: Type.Optional(Type.Number({ description: "Viewport x coordinate if selector is omitted." })),
614
- y: Type.Optional(Type.Number({ description: "Viewport y coordinate if selector is omitted." })),
616
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot. Prefer uid over selector after taking a snapshot." })),
617
+ selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer uid from chrome_snapshot when available." })),
618
+ x: Type.Optional(Type.Number({ description: "Viewport x coordinate if uid/selector is omitted." })),
619
+ y: Type.Optional(Type.Number({ description: "Viewport y coordinate if uid/selector is omitted." })),
620
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the click." })),
621
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
615
622
  targetId: Type.Optional(Type.String()),
616
623
  urlIncludes: Type.Optional(Type.String()),
617
624
  titleIncludes: Type.Optional(Type.String()),
@@ -623,7 +630,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
623
630
  }),
624
631
  async execute(_id, params): Promise<ToolTextResult> {
625
632
  const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
626
- return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
633
+ return { content: [{ type: "text", text: `Clicked ${params.uid ?? params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
627
634
  },
628
635
  });
629
636
 
@@ -631,11 +638,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
631
638
  name: "chrome_type",
632
639
  label: "Chrome Type",
633
640
  description:
634
- "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently.",
635
- promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
641
+ "Focus an optional snapshot uid or CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently. Pass includeSnapshot=true to return a fresh snapshot after typing.",
642
+ promptSnippet: "Type text into Chrome, optionally focusing a snapshot uid or selector first.",
636
643
  parameters: Type.Object({
637
644
  text: Type.String(),
645
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
638
646
  selector: Type.Optional(Type.String({ description: "CSS selector to focus before typing." })),
647
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after typing." })),
648
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
639
649
  pressEnter: Type.Optional(Type.Boolean()),
640
650
  targetId: Type.Optional(Type.String()),
641
651
  urlIncludes: Type.Optional(Type.String()),
@@ -648,7 +658,35 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
648
658
  }),
649
659
  async execute(_id, params): Promise<ToolTextResult> {
650
660
  const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
651
- return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
661
+ return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
662
+ },
663
+ });
664
+
665
+ pi.registerTool({
666
+ name: "chrome_fill",
667
+ label: "Chrome Fill",
668
+ description:
669
+ "Set the full value of a text input, textarea, or contenteditable element using framework-aware native value setters and input/change events. Accepts a snapshot uid or CSS selector. Pass includeSnapshot=true to verify after filling.",
670
+ promptSnippet: "Fill a Chrome form field by snapshot uid or selector, optionally returning a fresh snapshot.",
671
+ parameters: Type.Object({
672
+ text: Type.String(),
673
+ uid: Type.Optional(Type.String({ description: "Stable element uid from chrome_snapshot." })),
674
+ selector: Type.Optional(Type.String({ description: "CSS selector to fill if uid is omitted." })),
675
+ submit: Type.Optional(Type.Boolean({ description: "If true, press Enter after filling." })),
676
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after filling." })),
677
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
678
+ targetId: Type.Optional(Type.String()),
679
+ urlIncludes: Type.Optional(Type.String()),
680
+ titleIncludes: Type.Optional(Type.String()),
681
+ background: Type.Optional(
682
+ Type.Boolean({ description: "If true, fill silently without focusing Chrome. Default false." }),
683
+ ),
684
+ host: Type.Optional(Type.String()),
685
+ port: Type.Optional(Type.Number()),
686
+ }),
687
+ async execute(_id, params): Promise<ToolTextResult> {
688
+ const result = await bridge.send("page.fill", withBackground(params), DEFAULT_TIMEOUT_MS);
689
+ return { content: [{ type: "text", text: `Filled ${params.text.length} character(s)${params.uid || params.selector ? ` into ${params.uid ?? params.selector}` : ""}.` }], details: { result: result as Json } };
652
690
  },
653
691
  });
654
692
 
@@ -656,10 +694,12 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
656
694
  name: "chrome_key",
657
695
  label: "Chrome Key",
658
696
  description:
659
- "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently.",
697
+ "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently. Pass includeSnapshot=true to verify after the keypress.",
660
698
  promptSnippet: "Press keys in Chrome through the companion extension.",
661
699
  parameters: Type.Object({
662
700
  key: Type.String(),
701
+ includeSnapshot: Type.Optional(Type.Boolean({ description: "If true, include a fresh chrome_snapshot result after the keypress." })),
702
+ maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS, description: "Max elements in the included snapshot." })),
663
703
  targetId: Type.Optional(Type.String()),
664
704
  urlIncludes: Type.Optional(Type.String()),
665
705
  titleIncludes: Type.Optional(Type.String()),
@@ -697,6 +737,69 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
697
737
  },
698
738
  });
699
739
 
740
+ pi.registerTool({
741
+ name: "chrome_list_console_messages",
742
+ label: "Chrome Console Messages",
743
+ description:
744
+ "List console messages captured in the page by the companion extension. Capture starts after any chrome_snapshot, chrome_evaluate, chrome_list_console_messages, or chrome_list_network_requests call installs page instrumentation.",
745
+ promptSnippet: "List captured console messages from the active Chrome page.",
746
+ parameters: Type.Object({
747
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured console log after reading." })),
748
+ targetId: Type.Optional(Type.String()),
749
+ urlIncludes: Type.Optional(Type.String()),
750
+ titleIncludes: Type.Optional(Type.String()),
751
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
752
+ host: Type.Optional(Type.String()),
753
+ port: Type.Optional(Type.Number()),
754
+ }),
755
+ async execute(_id, params): Promise<ToolTextResult> {
756
+ const result = await bridge.send("page.console.list", withBackground(params), DEFAULT_TIMEOUT_MS);
757
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
758
+ },
759
+ });
760
+
761
+ pi.registerTool({
762
+ name: "chrome_list_network_requests",
763
+ label: "Chrome Network Requests",
764
+ description:
765
+ "List fetch/XMLHttpRequest activity captured in the page by the companion extension. Capture starts after instrumentation is installed by snapshot/evaluate/network/console tools; browser document/static asset requests are not captured. Use includePreservedRequests=true to keep requests from earlier same-tab navigations that were captured before navigation.",
766
+ promptSnippet: "List captured XHR/fetch requests from the active Chrome page before doing DOM-heavy debugging.",
767
+ parameters: Type.Object({
768
+ includePreservedRequests: Type.Optional(Type.Boolean({ description: "Include captured requests from earlier locations in the same tab/session." })),
769
+ clear: Type.Optional(Type.Boolean({ description: "Clear the captured request log after reading." })),
770
+ targetId: Type.Optional(Type.String()),
771
+ urlIncludes: Type.Optional(Type.String()),
772
+ titleIncludes: Type.Optional(Type.String()),
773
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
774
+ host: Type.Optional(Type.String()),
775
+ port: Type.Optional(Type.Number()),
776
+ }),
777
+ async execute(_id, params): Promise<ToolTextResult> {
778
+ const result = await bridge.send("page.network.list", withBackground(params), DEFAULT_TIMEOUT_MS);
779
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
780
+ },
781
+ });
782
+
783
+ pi.registerTool({
784
+ name: "chrome_get_network_request",
785
+ label: "Chrome Network Request",
786
+ description: "Retrieve one captured fetch/XMLHttpRequest entry, including response body when available, by requestId from chrome_list_network_requests.",
787
+ promptSnippet: "Fetch captured request details and response body by requestId.",
788
+ parameters: Type.Object({
789
+ requestId: Type.String({ description: "Request id returned by chrome_list_network_requests." }),
790
+ targetId: Type.Optional(Type.String()),
791
+ urlIncludes: Type.Optional(Type.String()),
792
+ titleIncludes: Type.Optional(Type.String()),
793
+ background: Type.Optional(Type.Boolean({ description: "If true, run silently without focusing Chrome. Default false." })),
794
+ host: Type.Optional(Type.String()),
795
+ port: Type.Optional(Type.Number()),
796
+ }),
797
+ async execute(_id, params): Promise<ToolTextResult> {
798
+ const result = await bridge.send("page.network.get", withBackground(params), DEFAULT_TIMEOUT_MS);
799
+ return { content: [{ type: "text", text: truncateText(safeJson(result)) }], details: { result: result as Json } };
800
+ },
801
+ });
802
+
700
803
  pi.registerTool({
701
804
  name: "chrome_screenshot",
702
805
  label: "Chrome Screenshot",
package/package.json CHANGED
@@ -1,31 +1,31 @@
1
1
  {
2
- "name": "pi-chrome",
3
- "version": "0.6.1",
4
- "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
- "keywords": [
6
- "pi-package",
7
- "pi-extension",
8
- "chrome",
9
- "browser",
10
- "automation",
11
- "authenticated-session",
12
- "real-profile",
13
- "web-debugging"
14
- ],
15
- "license": "MIT",
16
- "type": "commonjs",
17
- "files": [
18
- "extensions",
19
- "README.md"
20
- ],
21
- "pi": {
22
- "extensions": [
23
- "./extensions/chrome-profile-bridge/index.ts"
24
- ]
25
- },
26
- "peerDependencies": {
27
- "@earendil-works/pi-ai": "*",
28
- "@earendil-works/pi-coding-agent": "*",
29
- "typebox": "*"
30
- }
2
+ "name": "pi-chrome",
3
+ "version": "0.7.0",
4
+ "description": "Drive your existing logged-in Chrome from Pi no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi-extension",
8
+ "chrome",
9
+ "browser",
10
+ "automation",
11
+ "authenticated-session",
12
+ "real-profile",
13
+ "web-debugging"
14
+ ],
15
+ "license": "MIT",
16
+ "type": "commonjs",
17
+ "files": [
18
+ "extensions",
19
+ "README.md"
20
+ ],
21
+ "pi": {
22
+ "extensions": [
23
+ "./extensions/chrome-profile-bridge/index.ts"
24
+ ]
25
+ },
26
+ "peerDependencies": {
27
+ "@earendil-works/pi-ai": "*",
28
+ "@earendil-works/pi-coding-agent": "*",
29
+ "typebox": "*"
30
+ }
31
31
  }