pi-chrome 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,7 @@ Multiple Pi sessions can use Chrome at the same time. The first Pi session start
10
10
 
11
11
  - **Uses your existing Chrome profile** — works with the Chrome windows/tabs you are already using, including logged-in GitHub, admin dashboards, local apps, and internal tools.
12
12
  - **Full browser automation toolkit for Pi** — list/create/activate/close tabs, snapshot pages with usable CSS selectors, navigate, evaluate JavaScript, click, type, press keys, wait for page state, and capture screenshots.
13
+ - **Background by default** — agents can inspect, navigate, click, type, and snapshot without bringing Chrome to the foreground or interrupting whatever you are doing. Toggle for the whole session with `/chrome-foreground` (useful for demos, pair-driving, debugging) or pass `foreground: true` on a single tool call.
13
14
  - **Built-in setup and agent guidance** — `/chrome-onboard` walks users through installing the companion extension, `/chrome-status` checks connectivity, screenshots save to disk, and the prompt primer tells agents to inspect with `chrome_snapshot` before acting and avoid destructive actions unless explicitly requested.
14
15
 
15
16
  ## Install
@@ -111,20 +111,35 @@ async function dispatch(action, params) {
111
111
  return executeInTab(params, waitForPage, [params.kind, params.value, params.timeoutMs || 10000, params.intervalMs || 250]);
112
112
  case "page.navigate": {
113
113
  const tab = await getTabByParams(params);
114
+ if (params.foreground) await bringToFront(tab);
114
115
  const wait = params.waitUntilLoad !== false ? waitForTabComplete(tab.id, params.timeoutMs || 15000) : Promise.resolve(undefined);
115
- const updated = await chrome.tabs.update(tab.id, { url: params.url, active: true });
116
+ const updated = await chrome.tabs.update(tab.id, { url: params.url });
116
117
  await wait;
117
118
  return formatTab(await chrome.tabs.get(updated.id));
118
119
  }
119
120
  case "page.screenshot": {
120
121
  const tab = await getTabByParams(params);
121
- await chrome.windows.update(tab.windowId, { focused: true });
122
- await chrome.tabs.update(tab.id, { active: true });
123
- const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
124
- format: params.format || "png",
125
- quality: params.format === "jpeg" ? params.quality : undefined,
126
- });
127
- return { dataUrl, tab: formatTab(tab) };
122
+ if (params.foreground) await bringToFront(tab);
123
+ // captureVisibleTab requires the target tab to be the active tab in its window. Activate it
124
+ // without focusing the window so other apps don't get pushed behind Chrome, and restore the
125
+ // previous active tab afterwards to minimize disruption.
126
+ let previousActiveId;
127
+ if (!tab.active) {
128
+ const activeBefore = await chrome.tabs.query({ active: true, windowId: tab.windowId });
129
+ previousActiveId = activeBefore[0]?.id;
130
+ await chrome.tabs.update(tab.id, { active: true });
131
+ }
132
+ try {
133
+ const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
134
+ format: params.format || "png",
135
+ quality: params.format === "jpeg" ? params.quality : undefined,
136
+ });
137
+ return { dataUrl, tab: formatTab(tab) };
138
+ } finally {
139
+ if (previousActiveId !== undefined && previousActiveId !== tab.id) {
140
+ await chrome.tabs.update(previousActiveId, { active: true }).catch(() => undefined);
141
+ }
142
+ }
128
143
  }
129
144
  default:
130
145
  throw new Error(`Unknown action: ${action}`);
@@ -168,8 +183,7 @@ async function getTabByParams(params) {
168
183
 
169
184
  async function executeInTab(params, func, args) {
170
185
  const tab = await getTabByParams(params);
171
- await chrome.windows.update(tab.windowId, { focused: true });
172
- await chrome.tabs.update(tab.id, { active: true });
186
+ if (params.foreground) await bringToFront(tab);
173
187
  const results = await chrome.scripting.executeScript({
174
188
  target: { tabId: tab.id },
175
189
  world: "MAIN",
@@ -179,6 +193,11 @@ async function executeInTab(params, func, args) {
179
193
  return results?.[0]?.result;
180
194
  }
181
195
 
196
+ async function bringToFront(tab) {
197
+ await chrome.windows.update(tab.windowId, { focused: true });
198
+ await chrome.tabs.update(tab.id, { active: true });
199
+ }
200
+
182
201
  function waitForTabComplete(tabId, timeoutMs) {
183
202
  return new Promise((resolve, reject) => {
184
203
  const timer = setTimeout(() => {
@@ -315,6 +315,10 @@ const waitForValues = ["selector", "expression"] as const;
315
315
 
316
316
  export default function (pi: ExtensionAPI): void {
317
317
  const bridge = new ChromeProfileBridge(DEFAULT_HOST, DEFAULT_PORT);
318
+ let foregroundDefault = false;
319
+
320
+ const withForeground = <T extends Record<string, unknown>>(params: T): T =>
321
+ ({ ...params, foreground: ((params as { foreground?: boolean }).foreground) ?? foregroundDefault }) as T;
318
322
 
319
323
  pi.on("session_start", async (_event, ctx) => {
320
324
  await bridge.start();
@@ -337,7 +341,7 @@ export default function (pi: ExtensionAPI): void {
337
341
  <chrome-profile-bridge>
338
342
  Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
339
343
  This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
340
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested.
344
+ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools run in the background and do not bring Chrome to the foreground. The user can flip this for the whole session via /chrome-foreground; you can also pass foreground=true on a single tool call when you specifically need Chrome focused (for example, demoing a flow or capturing a screenshot the user should see).
341
345
  </chrome-profile-bridge>`;
342
346
  return { systemPrompt: event.systemPrompt + primer };
343
347
  });
@@ -360,6 +364,32 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
360
364
  },
361
365
  });
362
366
 
367
+ pi.registerCommand("chrome-foreground", {
368
+ description:
369
+ "Toggle whether chrome_* tools bring Chrome to the foreground. Foreground ON: you can watch the agent work in your browser, useful for demos, pair-driving, and debugging — tradeoff: Chrome pops up and steals focus, interrupting whatever app you were using. Foreground OFF (default): chrome_* tools act silently in the background, so your editor/terminal keeps focus and your workflow is not interrupted. Pass `on` / `off` to set explicitly, or no argument to toggle.",
370
+ getArgumentCompletions: (prefix) => {
371
+ const items = [
372
+ { value: "on", label: "on", description: "Bring Chrome to the foreground for chrome_* actions" },
373
+ { value: "off", label: "off", description: "Run chrome_* actions silently in the background" },
374
+ ];
375
+ const lowered = prefix.toLowerCase();
376
+ const matches = items.filter((item) => item.value.startsWith(lowered));
377
+ return matches.length > 0 ? matches : null;
378
+ },
379
+ handler: async (args, ctx) => {
380
+ const arg = (args || "").trim().toLowerCase();
381
+ if (arg === "on" || arg === "true" || arg === "1") foregroundDefault = true;
382
+ else if (arg === "off" || arg === "false" || arg === "0") foregroundDefault = false;
383
+ else foregroundDefault = !foregroundDefault;
384
+ ctx.ui.notify(
385
+ foregroundDefault
386
+ ? "Chrome foreground ON. chrome_* tools will focus Chrome and activate the target tab. Useful for demos and debugging — but Chrome will pop up over whatever you're doing."
387
+ : "Chrome foreground OFF. chrome_* tools run silently in the background. Your current app keeps focus.",
388
+ "info",
389
+ );
390
+ },
391
+ });
392
+
363
393
  pi.registerCommand("chrome-onboard", {
364
394
  description: "Guide Chrome extension setup for the existing-profile bridge",
365
395
  handler: async (_args, ctx) => {
@@ -448,18 +478,25 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
448
478
  name: "chrome_snapshot",
449
479
  label: "Chrome Snapshot",
450
480
  description:
451
- "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors.",
481
+ "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Runs in the background by default; pass foreground=true to bring Chrome to the front first.",
452
482
  promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
453
483
  parameters: Type.Object({
454
484
  targetId: Type.Optional(Type.String()),
455
485
  urlIncludes: Type.Optional(Type.String()),
456
486
  titleIncludes: Type.Optional(Type.String()),
457
487
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
488
+ foreground: Type.Optional(
489
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before inspecting. Default false to avoid interrupting the user." }),
490
+ ),
458
491
  host: Type.Optional(Type.String()),
459
492
  port: Type.Optional(Type.Number()),
460
493
  }),
461
494
  async execute(_id, params): Promise<ToolTextResult> {
462
- const snapshot = await bridge.send("page.snapshot", { ...params, maxElements: params.maxElements ?? MAX_ELEMENTS }, DEFAULT_TIMEOUT_MS);
495
+ const snapshot = await bridge.send(
496
+ "page.snapshot",
497
+ withForeground({ ...params, maxElements: params.maxElements ?? MAX_ELEMENTS }),
498
+ DEFAULT_TIMEOUT_MS,
499
+ );
463
500
  return { content: [{ type: "text", text: truncateText(safeJson(snapshot)) }], details: { snapshot } };
464
501
  },
465
502
  });
@@ -467,7 +504,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
467
504
  pi.registerTool({
468
505
  name: "chrome_navigate",
469
506
  label: "Chrome Navigate",
470
- description: "Navigate an existing Chrome tab to a URL via the companion extension. Optionally waits for load completion.",
507
+ description:
508
+ "Navigate an existing Chrome tab to a URL via the companion extension. Navigates in the background by default and does not change the user's currently active tab; pass foreground=true to also focus Chrome and activate the tab. Optionally waits for load completion.",
471
509
  promptSnippet: "Navigate a Chrome tab in the user's existing profile.",
472
510
  parameters: Type.Object({
473
511
  url: Type.String(),
@@ -476,11 +514,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
476
514
  titleIncludes: Type.Optional(Type.String()),
477
515
  waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
478
516
  timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
517
+ foreground: Type.Optional(
518
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before navigating. Default false." }),
519
+ ),
479
520
  host: Type.Optional(Type.String()),
480
521
  port: Type.Optional(Type.Number()),
481
522
  }),
482
523
  async execute(_id, params): Promise<ToolTextResult> {
483
- const result = await bridge.send("page.navigate", params, params.timeoutMs ?? 15_000);
524
+ const result = await bridge.send("page.navigate", withForeground(params), params.timeoutMs ?? 15_000);
484
525
  return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
485
526
  },
486
527
  });
@@ -489,7 +530,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
489
530
  name: "chrome_evaluate",
490
531
  label: "Chrome Evaluate",
491
532
  description:
492
- "Evaluate JavaScript in an existing Chrome tab through the companion extension. Runs in the page context and returns JSON-serializable values when possible.",
533
+ "Evaluate JavaScript in an existing Chrome tab through the companion extension. Runs in the page context (background) and returns JSON-serializable values when possible. Pass foreground=true to also focus Chrome and activate the tab.",
493
534
  promptSnippet: "Evaluate JavaScript in the active Chrome tab through the companion extension.",
494
535
  parameters: Type.Object({
495
536
  expression: Type.String(),
@@ -498,11 +539,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
498
539
  targetId: Type.Optional(Type.String()),
499
540
  urlIncludes: Type.Optional(Type.String()),
500
541
  titleIncludes: Type.Optional(Type.String()),
542
+ foreground: Type.Optional(
543
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before evaluating. Default false." }),
544
+ ),
501
545
  host: Type.Optional(Type.String()),
502
546
  port: Type.Optional(Type.Number()),
503
547
  }),
504
548
  async execute(_id, params): Promise<ToolTextResult> {
505
- const value = await bridge.send("page.evaluate", params, DEFAULT_TIMEOUT_MS);
549
+ const value = await bridge.send("page.evaluate", withForeground(params), DEFAULT_TIMEOUT_MS);
506
550
  return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
507
551
  },
508
552
  });
@@ -510,7 +554,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
510
554
  pi.registerTool({
511
555
  name: "chrome_click",
512
556
  label: "Chrome Click",
513
- description: "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension.",
557
+ description:
558
+ "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
514
559
  promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
515
560
  parameters: Type.Object({
516
561
  selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
@@ -519,11 +564,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
519
564
  targetId: Type.Optional(Type.String()),
520
565
  urlIncludes: Type.Optional(Type.String()),
521
566
  titleIncludes: Type.Optional(Type.String()),
567
+ foreground: Type.Optional(
568
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before clicking. Default false." }),
569
+ ),
522
570
  host: Type.Optional(Type.String()),
523
571
  port: Type.Optional(Type.Number()),
524
572
  }),
525
573
  async execute(_id, params): Promise<ToolTextResult> {
526
- const result = await bridge.send("page.click", params, DEFAULT_TIMEOUT_MS);
574
+ const result = await bridge.send("page.click", withForeground(params), DEFAULT_TIMEOUT_MS);
527
575
  return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
528
576
  },
529
577
  });
@@ -531,7 +579,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
531
579
  pi.registerTool({
532
580
  name: "chrome_type",
533
581
  label: "Chrome Type",
534
- description: "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension.",
582
+ description:
583
+ "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. Runs in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
535
584
  promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
536
585
  parameters: Type.Object({
537
586
  text: Type.String(),
@@ -540,11 +589,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
540
589
  targetId: Type.Optional(Type.String()),
541
590
  urlIncludes: Type.Optional(Type.String()),
542
591
  titleIncludes: Type.Optional(Type.String()),
592
+ foreground: Type.Optional(
593
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before typing. Default false." }),
594
+ ),
543
595
  host: Type.Optional(Type.String()),
544
596
  port: Type.Optional(Type.Number()),
545
597
  }),
546
598
  async execute(_id, params): Promise<ToolTextResult> {
547
- const result = await bridge.send("page.type", params, DEFAULT_TIMEOUT_MS);
599
+ const result = await bridge.send("page.type", withForeground(params), DEFAULT_TIMEOUT_MS);
548
600
  return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
549
601
  },
550
602
  });
@@ -552,18 +604,22 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
552
604
  pi.registerTool({
553
605
  name: "chrome_key",
554
606
  label: "Chrome Key",
555
- description: "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character).",
607
+ description:
608
+ "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). Runs in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
556
609
  promptSnippet: "Press keys in Chrome through the companion extension.",
557
610
  parameters: Type.Object({
558
611
  key: Type.String(),
559
612
  targetId: Type.Optional(Type.String()),
560
613
  urlIncludes: Type.Optional(Type.String()),
561
614
  titleIncludes: Type.Optional(Type.String()),
615
+ foreground: Type.Optional(
616
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before sending the key. Default false." }),
617
+ ),
562
618
  host: Type.Optional(Type.String()),
563
619
  port: Type.Optional(Type.Number()),
564
620
  }),
565
621
  async execute(_id, params): Promise<ToolTextResult> {
566
- const result = await bridge.send("page.key", params, DEFAULT_TIMEOUT_MS);
622
+ const result = await bridge.send("page.key", withForeground(params), DEFAULT_TIMEOUT_MS);
567
623
  return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
568
624
  },
569
625
  });
@@ -593,7 +649,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
593
649
  pi.registerTool({
594
650
  name: "chrome_screenshot",
595
651
  label: "Chrome Screenshot",
596
- description: "Capture a screenshot of an existing Chrome tab via the companion extension and save it to disk.",
652
+ description:
653
+ "Capture a screenshot of an existing Chrome tab via the companion extension and save it to disk. Chrome's extension screenshot API requires the target tab to be the active tab in its window, so this momentarily activates it (without focusing the window) and restores the previous active tab. Pass foreground=true to also bring the Chrome window to the front.",
597
654
  promptSnippet: "Capture Chrome screenshots and save them under .pi/chrome-screenshots by default.",
598
655
  parameters: Type.Object({
599
656
  path: Type.Optional(Type.String({ description: "Output path. Defaults to .pi/chrome-screenshots/<timestamp>.<format>." })),
@@ -603,6 +660,9 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
603
660
  targetId: Type.Optional(Type.String()),
604
661
  urlIncludes: Type.Optional(Type.String()),
605
662
  titleIncludes: Type.Optional(Type.String()),
663
+ foreground: Type.Optional(
664
+ Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before capturing. Default false." }),
665
+ ),
606
666
  host: Type.Optional(Type.String()),
607
667
  port: Type.Optional(Type.Number()),
608
668
  }),
@@ -611,7 +671,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
611
671
  const cwd = workspaceCwd(ctx);
612
672
  const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
613
673
  const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
614
- const result = (await bridge.send("page.screenshot", params, DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
674
+ const result = (await bridge.send("page.screenshot", withForeground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
615
675
  const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
616
676
  await mkdir(dirname(outputPath), { recursive: true });
617
677
  await writeFile(outputPath, Buffer.from(base64, "base64"));
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.2.2",
4
- "description": "Control your existing authenticated Chrome profile from one or more Pi sessions with tabs, snapshots, clicks, typing, JS evaluation, waits, and screenshots.",
3
+ "version": "0.3.1",
4
+ "description": "Control your existing authenticated Chrome profile from one or more Pi sessions with tabs, snapshots, clicks, typing, JS evaluation, waits, and screenshots. Background-by-default so Chrome stops popping up when agents act.",
5
5
  "keywords": [
6
6
  "pi-package",
7
7
  "pi-extension",