pi-chrome 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ Multiple Pi sessions can use Chrome at the same time. The first Pi session start
11
11
  ## Why try it?
12
12
 
13
13
  - **Uses your existing Chrome profile** — works with the Chrome windows/tabs you are already using, including logged-in GitHub, admin dashboards, local apps, and internal tools.
14
- - **Background by default** — agents can inspect, navigate, click, type, and snapshot without bringing Chrome to the foreground or interrupting whatever you are doing. Toggle for the whole session with `/chrome-foreground`, or pass `foreground: true` on a single tool call.
14
+ - **Watch your authenticated Chrome work** — by default, `chrome_*` tool calls focus Chrome and activate the target tab so you can see the agent inspect, navigate, click, and type in real time. Switch to silent/background mode for the whole session with `/chrome-background`, or pass `background: true` on a single tool call when you want quiet.
15
15
  - **Full browser automation toolkit for Pi** — list/create/activate/close tabs, snapshot pages with usable CSS selectors, navigate, evaluate JavaScript, click, type, press keys, wait for page state, and capture screenshots.
16
16
  - **Built-in setup and agent guidance** — `/chrome-onboard` walks users through installing the companion extension, `/chrome-doctor` checks connectivity and version drift, screenshots save to disk, and the prompt primer tells agents to inspect with `chrome_snapshot` before acting and avoid destructive actions unless explicitly requested.
17
17
 
@@ -65,28 +65,28 @@ pi-chrome v<version>
65
65
  ✓ Companion Chrome extension responding (ID: <chrome-extension-id>, ext v<version>)
66
66
  ```
67
67
 
68
- ## Foreground control
68
+ ## Background mode
69
69
 
70
- By default, `chrome_*` tools act silently in the background your editor or terminal keeps focus and Chrome does not pop up. This lets agents work alongside you without interrupting whatever you are doing.
70
+ By default, `chrome_*` tools focus Chrome and activate the target tab so you can watch the agent work great for demos, pair-driving, debugging, and first-time confidence that things are happening.
71
71
 
72
- When you want to watch the agent (demos, pair-driving, debugging), turn foreground on for the whole Pi session:
72
+ When you want quiet (planner / audit / worker sessions running alongside your editor), turn background mode on for the whole Pi session:
73
73
 
74
74
  ```text
75
- /chrome-foreground # toggle
76
- /chrome-foreground on # explicit
77
- /chrome-foreground off # explicit
75
+ /chrome-background # toggle
76
+ /chrome-background on # explicit
77
+ /chrome-background off # explicit
78
78
  ```
79
79
 
80
- For a single tool call, the agent can pass `foreground: true` directly. The per-call value always wins over the session toggle.
80
+ For a single tool call, the agent can pass `background: true` directly. The per-call value always wins over the session toggle.
81
81
 
82
82
  ## Quick demo prompts
83
83
 
84
84
  After setup, try one of these in Pi:
85
85
 
86
- Background inspection (no Chrome interruption):
86
+ Silent inspection (no Chrome interruption):
87
87
 
88
88
  ```text
89
- Inspect my active GitHub tab in the background with chrome_snapshot and summarize the PR state without focusing Chrome.
89
+ Inspect my active GitHub tab with chrome_snapshot using background:true and summarize the PR state without focusing Chrome.
90
90
  ```
91
91
 
92
92
  Existing authenticated tab:
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Pi Existing Chrome Profile Bridge",
4
- "version": "0.5.0",
4
+ "version": "0.6.1",
5
5
  "description": "Lets Pi control tabs in this existing Chrome profile via a local bridge at 127.0.0.1.",
6
6
  "permissions": ["tabs", "scripting", "storage", "activeTab", "alarms"],
7
7
  "host_permissions": ["<all_urls>", "http://127.0.0.1:17318/*"],
@@ -196,10 +196,26 @@ async function executeInTab(params, func, args) {
196
196
  const results = await chrome.scripting.executeScript({
197
197
  target: { tabId: tab.id },
198
198
  world: "MAIN",
199
- func,
200
- args,
199
+ func: async (source, invocationArgs) => {
200
+ try {
201
+ const injected = (0, eval)(`(${source})`);
202
+ return { ok: true, value: await injected(...invocationArgs) };
203
+ } catch (error) {
204
+ return { ok: false, error: error?.stack || error?.message || String(error) };
205
+ }
206
+ },
207
+ args: [func.toString(), args],
201
208
  });
202
- return results?.[0]?.result;
209
+ const first = results?.[0];
210
+ if (first?.error) {
211
+ const message = typeof first.error === "string" ? first.error : (first.error.message || JSON.stringify(first.error));
212
+ throw new Error(message);
213
+ }
214
+ const envelope = first?.result;
215
+ if (envelope && typeof envelope === "object" && envelope.ok === false) {
216
+ throw new Error(envelope.error || "Chrome page script failed");
217
+ }
218
+ return envelope?.value;
203
219
  }
204
220
 
205
221
  async function bringToFront(tab) {
@@ -307,6 +323,17 @@ function resolvePoint(selector, x, y) {
307
323
  }
308
324
 
309
325
  function clickPage(selector, x, y) {
326
+ const resolvePoint = (selector, x, y) => {
327
+ if (selector) {
328
+ const element = document.querySelector(selector);
329
+ if (!element) throw new Error(`No element matches selector: ${selector}`);
330
+ element.scrollIntoView({ block: "center", inline: "center", behavior: "instant" });
331
+ const rect = element.getBoundingClientRect();
332
+ return { element, x: rect.left + rect.width / 2, y: rect.top + rect.height / 2, rect };
333
+ }
334
+ if (typeof x !== "number" || typeof y !== "number") throw new Error("Provide selector or x/y");
335
+ return { element: document.elementFromPoint(x, y), x, y, rect: undefined };
336
+ };
310
337
  const point = resolvePoint(selector, x, y);
311
338
  if (!point.element) throw new Error("No element at click point");
312
339
  for (const type of ["pointerdown", "mousedown", "pointerup", "mouseup", "click"]) {
@@ -316,6 +343,28 @@ function clickPage(selector, x, y) {
316
343
  }
317
344
 
318
345
  function typeIntoPage(selector, text, pressEnter) {
346
+ const normalizeKey = (key) => {
347
+ const table = {
348
+ enter: "Enter",
349
+ escape: "Escape",
350
+ tab: "Tab",
351
+ backspace: "Backspace",
352
+ delete: "Delete",
353
+ arrowup: "ArrowUp",
354
+ arrowdown: "ArrowDown",
355
+ arrowleft: "ArrowLeft",
356
+ arrowright: "ArrowRight",
357
+ };
358
+ return table[String(key).toLowerCase()] || key;
359
+ };
360
+ const pressKey = (key) => {
361
+ const target = document.activeElement || document.body;
362
+ const normalized = normalizeKey(key);
363
+ target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
364
+ target.dispatchEvent(new KeyboardEvent("keyup", { key: normalized, bubbles: true, cancelable: true }));
365
+ if (normalized === "Enter" && target instanceof HTMLFormElement) target.requestSubmit();
366
+ return { key: normalized };
367
+ };
319
368
  let element = selector ? document.querySelector(selector) : document.activeElement;
320
369
  if (!element) throw new Error(selector ? `No element matches selector: ${selector}` : "No active element");
321
370
  element.focus();
@@ -331,11 +380,25 @@ function typeIntoPage(selector, text, pressEnter) {
331
380
  } else {
332
381
  throw new Error("Focused element is not text-editable");
333
382
  }
334
- if (pressEnter) pressKeyInPage("Enter");
383
+ if (pressEnter) pressKey("Enter");
335
384
  return { selector, length: text.length, pressEnter };
336
385
  }
337
386
 
338
387
  function pressKeyInPage(key) {
388
+ const normalizeKey = (key) => {
389
+ const table = {
390
+ enter: "Enter",
391
+ escape: "Escape",
392
+ tab: "Tab",
393
+ backspace: "Backspace",
394
+ delete: "Delete",
395
+ arrowup: "ArrowUp",
396
+ arrowdown: "ArrowDown",
397
+ arrowleft: "ArrowLeft",
398
+ arrowright: "ArrowRight",
399
+ };
400
+ return table[String(key).toLowerCase()] || key;
401
+ };
339
402
  const target = document.activeElement || document.body;
340
403
  const normalized = normalizeKey(key);
341
404
  target.dispatchEvent(new KeyboardEvent("keydown", { key: normalized, bubbles: true, cancelable: true }));
@@ -46,7 +46,7 @@ type BridgeResult = {
46
46
  error?: string;
47
47
  };
48
48
 
49
- const PI_CHROME_VERSION = "0.5.0";
49
+ const PI_CHROME_VERSION = "0.6.1";
50
50
  const DEFAULT_HOST = process.env.PI_CHROME_BRIDGE_HOST ?? "127.0.0.1";
51
51
  const DEFAULT_PORT = Number(process.env.PI_CHROME_BRIDGE_PORT ?? "17318");
52
52
  const DEFAULT_TIMEOUT_MS = 30_000;
@@ -339,10 +339,21 @@ const waitForValues = ["selector", "expression"] as const;
339
339
 
340
340
  export default function (pi: ExtensionAPI): void {
341
341
  const bridge = new ChromeProfileBridge(DEFAULT_HOST, DEFAULT_PORT);
342
- let foregroundDefault = false;
343
-
344
- const withForeground = <T extends Record<string, unknown>>(params: T): T =>
345
- ({ ...params, foreground: ((params as { foreground?: boolean }).foreground) ?? foregroundDefault }) as T;
342
+ let backgroundDefault = false;
343
+
344
+ // Translate the public `background` parameter (default false = visible/foreground) into the
345
+ // service worker's wire-level `foreground` flag, accepting legacy `foreground` as a fallback.
346
+ const withBackground = <T extends Record<string, unknown>>(params: T): T => {
347
+ const typed = params as { background?: boolean; foreground?: boolean };
348
+ const explicit =
349
+ typed.background !== undefined
350
+ ? typed.background
351
+ : typed.foreground !== undefined
352
+ ? !typed.foreground
353
+ : undefined;
354
+ const background = explicit ?? backgroundDefault;
355
+ return { ...params, foreground: !background } as T;
356
+ };
346
357
 
347
358
  pi.on("session_start", async (_event, ctx) => {
348
359
  await bridge.start();
@@ -365,7 +376,7 @@ export default function (pi: ExtensionAPI): void {
365
376
  <chrome-profile-bridge>
366
377
  Chrome control is available through the chrome_* tools via a companion Chrome extension installed in the user's normal Chrome profile.
367
378
  This is not CDP: it can use the user's existing Chrome windows and authenticated sessions after the user loads the companion browser extension.
368
- If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools run in the background and do not bring Chrome to the foreground. The user can flip this for the whole session via /chrome-foreground; you can also pass foreground=true on a single tool call when you specifically need Chrome focused (for example, demoing a flow or capturing a screenshot the user should see).
379
+ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the bundled browser-extension folder in chrome://extensions. Prefer chrome_snapshot before clicking/typing. Avoid destructive actions unless explicitly requested. By default chrome_* tools focus Chrome and activate the target tab so the user can watch the agent work. The user can switch to silent/background mode for the whole session via /chrome-background; you can also pass background=true on a single tool call when the user explicitly wants the action to be silent (for example, scraping while they keep working in another app).
369
380
  </chrome-profile-bridge>`;
370
381
  return { systemPrompt: event.systemPrompt + primer };
371
382
  });
@@ -404,13 +415,13 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
404
415
  },
405
416
  });
406
417
 
407
- pi.registerCommand("chrome-foreground", {
418
+ pi.registerCommand("chrome-background", {
408
419
  description:
409
- "Toggle whether chrome_* tools bring Chrome to the foreground. Foreground ON: you can watch the agent work in your browser, useful for demos, pair-driving, and debugging tradeoff: Chrome pops up and steals focus, interrupting whatever app you were using. Foreground OFF (default): chrome_* tools act silently in the background, so your editor/terminal keeps focus and your workflow is not interrupted. Pass `on` / `off` to set explicitly, or no argument to toggle.",
420
+ "Toggle silent/background mode for chrome_* tools. Background ON: chrome_* tools act silently your editor/terminal keeps focus, Chrome does not pop up, your workflow is not interrupted. Background OFF (default): Chrome focuses and activates the target tab so you can watch the agent work, useful for demos, pair-driving, and debugging tradeoff: Chrome pops up and steals focus. Pass `on` / `off` to set explicitly, or no argument to toggle.",
410
421
  getArgumentCompletions: (prefix) => {
411
422
  const items = [
412
- { value: "on", label: "on", description: "Bring Chrome to the foreground for chrome_* actions" },
413
- { value: "off", label: "off", description: "Run chrome_* actions silently in the background" },
423
+ { value: "on", label: "on", description: "Run chrome_* actions silently without focusing Chrome" },
424
+ { value: "off", label: "off", description: "Bring Chrome to the foreground for chrome_* actions (default)" },
414
425
  ];
415
426
  const lowered = prefix.toLowerCase();
416
427
  const matches = items.filter((item) => item.value.startsWith(lowered));
@@ -418,13 +429,13 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
418
429
  },
419
430
  handler: async (args, ctx) => {
420
431
  const arg = (args || "").trim().toLowerCase();
421
- if (arg === "on" || arg === "true" || arg === "1") foregroundDefault = true;
422
- else if (arg === "off" || arg === "false" || arg === "0") foregroundDefault = false;
423
- else foregroundDefault = !foregroundDefault;
432
+ if (arg === "on" || arg === "true" || arg === "1") backgroundDefault = true;
433
+ else if (arg === "off" || arg === "false" || arg === "0") backgroundDefault = false;
434
+ else backgroundDefault = !backgroundDefault;
424
435
  ctx.ui.notify(
425
- foregroundDefault
426
- ? "Chrome foreground ON. chrome_* tools will focus Chrome and activate the target tab. Useful for demos and debugging — but Chrome will pop up over whatever you're doing."
427
- : "Chrome foreground OFF. chrome_* tools run silently in the background. Your current app keeps focus.",
436
+ backgroundDefault
437
+ ? "Chrome background mode ON. chrome_* tools will run silently. Your current app keeps focus."
438
+ : "Chrome background mode OFF. chrome_* tools will focus Chrome and activate the target tab so you can watch the agent work.",
428
439
  "info",
429
440
  );
430
441
  },
@@ -518,15 +529,15 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
518
529
  name: "chrome_snapshot",
519
530
  label: "Chrome Snapshot",
520
531
  description:
521
- "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Runs in the background by default; pass foreground=true to bring Chrome to the front first.",
532
+ "Inspect a page in the user's existing Chrome profile: title, URL, visible body text, viewport, and clickable/focusable elements with CSS selectors. Brings Chrome to the foreground by default so the user can watch; pass background=true to inspect silently.",
522
533
  promptSnippet: "Inspect the current Chrome page and get CSS selectors for browser automation.",
523
534
  parameters: Type.Object({
524
535
  targetId: Type.Optional(Type.String()),
525
536
  urlIncludes: Type.Optional(Type.String()),
526
537
  titleIncludes: Type.Optional(Type.String()),
527
538
  maxElements: Type.Optional(Type.Number({ default: MAX_ELEMENTS })),
528
- foreground: Type.Optional(
529
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before inspecting. Default false to avoid interrupting the user." }),
539
+ background: Type.Optional(
540
+ Type.Boolean({ description: "If true, run silently in the background without focusing Chrome. Default false (Chrome focuses + tab activates so the user can watch)." }),
530
541
  ),
531
542
  host: Type.Optional(Type.String()),
532
543
  port: Type.Optional(Type.Number()),
@@ -534,7 +545,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
534
545
  async execute(_id, params): Promise<ToolTextResult> {
535
546
  const snapshot = await bridge.send(
536
547
  "page.snapshot",
537
- withForeground({ ...params, maxElements: params.maxElements ?? MAX_ELEMENTS }),
548
+ withBackground({ ...params, maxElements: params.maxElements ?? MAX_ELEMENTS }),
538
549
  DEFAULT_TIMEOUT_MS,
539
550
  );
540
551
  return { content: [{ type: "text", text: truncateText(safeJson(snapshot)) }], details: { snapshot } };
@@ -545,7 +556,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
545
556
  name: "chrome_navigate",
546
557
  label: "Chrome Navigate",
547
558
  description:
548
- "Navigate an existing Chrome tab to a URL via the companion extension. Navigates in the background by default and does not change the user's currently active tab; pass foreground=true to also focus Chrome and activate the tab. Optionally waits for load completion.",
559
+ "Navigate an existing Chrome tab to a URL via the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to navigate silently. Optionally waits for load completion.",
549
560
  promptSnippet: "Navigate a Chrome tab in the user's existing profile.",
550
561
  parameters: Type.Object({
551
562
  url: Type.String(),
@@ -554,14 +565,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
554
565
  titleIncludes: Type.Optional(Type.String()),
555
566
  waitUntilLoad: Type.Optional(Type.Boolean({ default: true })),
556
567
  timeoutMs: Type.Optional(Type.Number({ default: 15_000 })),
557
- foreground: Type.Optional(
558
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before navigating. Default false." }),
568
+ background: Type.Optional(
569
+ Type.Boolean({ description: "If true, navigate silently without focusing Chrome. Default false." }),
559
570
  ),
560
571
  host: Type.Optional(Type.String()),
561
572
  port: Type.Optional(Type.Number()),
562
573
  }),
563
574
  async execute(_id, params): Promise<ToolTextResult> {
564
- const result = await bridge.send("page.navigate", withForeground(params), params.timeoutMs ?? 15_000);
575
+ const result = await bridge.send("page.navigate", withBackground(params), params.timeoutMs ?? 15_000);
565
576
  return { content: [{ type: "text", text: `Navigated to ${params.url}` }], details: { result: result as Json } };
566
577
  },
567
578
  });
@@ -570,7 +581,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
570
581
  name: "chrome_evaluate",
571
582
  label: "Chrome Evaluate",
572
583
  description:
573
- "Evaluate JavaScript in an existing Chrome tab through the companion extension. Runs in the page context (background) and returns JSON-serializable values when possible. Pass foreground=true to also focus Chrome and activate the tab.",
584
+ "Evaluate JavaScript in an existing Chrome tab through the companion extension. Runs in the page context and returns JSON-serializable values when possible. By default focuses Chrome and activates the tab; pass background=true to evaluate silently.",
574
585
  promptSnippet: "Evaluate JavaScript in the active Chrome tab through the companion extension.",
575
586
  parameters: Type.Object({
576
587
  expression: Type.String(),
@@ -579,14 +590,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
579
590
  targetId: Type.Optional(Type.String()),
580
591
  urlIncludes: Type.Optional(Type.String()),
581
592
  titleIncludes: Type.Optional(Type.String()),
582
- foreground: Type.Optional(
583
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before evaluating. Default false." }),
593
+ background: Type.Optional(
594
+ Type.Boolean({ description: "If true, evaluate silently without focusing Chrome. Default false." }),
584
595
  ),
585
596
  host: Type.Optional(Type.String()),
586
597
  port: Type.Optional(Type.Number()),
587
598
  }),
588
599
  async execute(_id, params): Promise<ToolTextResult> {
589
- const value = await bridge.send("page.evaluate", withForeground(params), DEFAULT_TIMEOUT_MS);
600
+ const value = await bridge.send("page.evaluate", withBackground(params), DEFAULT_TIMEOUT_MS);
590
601
  return { content: [{ type: "text", text: truncateText(typeof value === "string" ? value : safeJson(value)) }], details: { value: value as Json } };
591
602
  },
592
603
  });
@@ -595,7 +606,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
595
606
  name: "chrome_click",
596
607
  label: "Chrome Click",
597
608
  description:
598
- "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
609
+ "Click a CSS selector or viewport coordinate in an existing Chrome tab through the companion extension. The click is dispatched as a synthetic DOM event; by default Chrome is focused so the user can watch, pass background=true to click silently.",
599
610
  promptSnippet: "Click page elements in Chrome by selector or viewport coordinate.",
600
611
  parameters: Type.Object({
601
612
  selector: Type.Optional(Type.String({ description: "CSS selector to click. Prefer selectors from chrome_snapshot." })),
@@ -604,14 +615,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
604
615
  targetId: Type.Optional(Type.String()),
605
616
  urlIncludes: Type.Optional(Type.String()),
606
617
  titleIncludes: Type.Optional(Type.String()),
607
- foreground: Type.Optional(
608
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before clicking. Default false." }),
618
+ background: Type.Optional(
619
+ Type.Boolean({ description: "If true, click silently without focusing Chrome. Default false." }),
609
620
  ),
610
621
  host: Type.Optional(Type.String()),
611
622
  port: Type.Optional(Type.Number()),
612
623
  }),
613
624
  async execute(_id, params): Promise<ToolTextResult> {
614
- const result = await bridge.send("page.click", withForeground(params), DEFAULT_TIMEOUT_MS);
625
+ const result = await bridge.send("page.click", withBackground(params), DEFAULT_TIMEOUT_MS);
615
626
  return { content: [{ type: "text", text: `Clicked ${params.selector ?? `${params.x},${params.y}`}` }], details: { result: result as Json } };
616
627
  },
617
628
  });
@@ -620,7 +631,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
620
631
  name: "chrome_type",
621
632
  label: "Chrome Type",
622
633
  description:
623
- "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. Runs in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
634
+ "Focus an optional CSS selector, then type text into an existing Chrome tab through the companion extension. By default focuses Chrome and activates the tab so the user can watch; pass background=true to type silently.",
624
635
  promptSnippet: "Type text into Chrome, optionally focusing a selector first.",
625
636
  parameters: Type.Object({
626
637
  text: Type.String(),
@@ -629,14 +640,14 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
629
640
  targetId: Type.Optional(Type.String()),
630
641
  urlIncludes: Type.Optional(Type.String()),
631
642
  titleIncludes: Type.Optional(Type.String()),
632
- foreground: Type.Optional(
633
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before typing. Default false." }),
643
+ background: Type.Optional(
644
+ Type.Boolean({ description: "If true, type silently without focusing Chrome. Default false." }),
634
645
  ),
635
646
  host: Type.Optional(Type.String()),
636
647
  port: Type.Optional(Type.Number()),
637
648
  }),
638
649
  async execute(_id, params): Promise<ToolTextResult> {
639
- const result = await bridge.send("page.type", withForeground(params), DEFAULT_TIMEOUT_MS);
650
+ const result = await bridge.send("page.type", withBackground(params), DEFAULT_TIMEOUT_MS);
640
651
  return { content: [{ type: "text", text: `Typed ${params.text.length} character(s)${params.selector ? ` into ${params.selector}` : ""}.` }], details: { result: result as Json } };
641
652
  },
642
653
  });
@@ -645,21 +656,21 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
645
656
  name: "chrome_key",
646
657
  label: "Chrome Key",
647
658
  description:
648
- "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). Runs in the background by default; pass foreground=true to focus Chrome and activate the tab first.",
659
+ "Send a keyboard key to an existing Chrome tab (Enter, Escape, Tab, Backspace, Delete, ArrowUp/Down/Left/Right, or one character). By default focuses Chrome and activates the tab so the user can watch; pass background=true to send the key silently.",
649
660
  promptSnippet: "Press keys in Chrome through the companion extension.",
650
661
  parameters: Type.Object({
651
662
  key: Type.String(),
652
663
  targetId: Type.Optional(Type.String()),
653
664
  urlIncludes: Type.Optional(Type.String()),
654
665
  titleIncludes: Type.Optional(Type.String()),
655
- foreground: Type.Optional(
656
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before sending the key. Default false." }),
666
+ background: Type.Optional(
667
+ Type.Boolean({ description: "If true, send the key silently without focusing Chrome. Default false." }),
657
668
  ),
658
669
  host: Type.Optional(Type.String()),
659
670
  port: Type.Optional(Type.Number()),
660
671
  }),
661
672
  async execute(_id, params): Promise<ToolTextResult> {
662
- const result = await bridge.send("page.key", withForeground(params), DEFAULT_TIMEOUT_MS);
673
+ const result = await bridge.send("page.key", withBackground(params), DEFAULT_TIMEOUT_MS);
663
674
  return { content: [{ type: "text", text: `Pressed ${params.key}.` }], details: { result: result as Json } };
664
675
  },
665
676
  });
@@ -690,7 +701,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
690
701
  name: "chrome_screenshot",
691
702
  label: "Chrome Screenshot",
692
703
  description:
693
- "Capture a screenshot of an existing Chrome tab via the companion extension and save it to disk. Chrome's extension screenshot API requires the target tab to be the active tab in its window, so this momentarily activates it (without focusing the window) and restores the previous active tab. Pass foreground=true to also bring the Chrome window to the front.",
704
+ "Capture a screenshot of an existing Chrome tab via the companion extension and save it to disk. Chrome's extension screenshot API requires the target tab to be the active tab in its window. By default Chrome is focused and the tab activates so the user can watch; pass background=true to capture silently (the tab is briefly activated within its window for the capture, then the previous active tab is restored).",
694
705
  promptSnippet: "Capture Chrome screenshots and save them under .pi/chrome-screenshots by default.",
695
706
  parameters: Type.Object({
696
707
  path: Type.Optional(Type.String({ description: "Output path. Defaults to .pi/chrome-screenshots/<timestamp>.<format>." })),
@@ -700,8 +711,8 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
700
711
  targetId: Type.Optional(Type.String()),
701
712
  urlIncludes: Type.Optional(Type.String()),
702
713
  titleIncludes: Type.Optional(Type.String()),
703
- foreground: Type.Optional(
704
- Type.Boolean({ description: "If true, focus the Chrome window and activate the tab before capturing. Default false." }),
714
+ background: Type.Optional(
715
+ Type.Boolean({ description: "If true, capture silently without focusing the Chrome window (the target tab is briefly activated within its window for the capture, then restored). Default false." }),
705
716
  ),
706
717
  host: Type.Optional(Type.String()),
707
718
  port: Type.Optional(Type.Number()),
@@ -711,7 +722,7 @@ If chrome_* tools time out, ask the user to run /chrome-onboard, then load the b
711
722
  const cwd = workspaceCwd(ctx);
712
723
  const defaultPath = join(cwd, ".pi", "chrome-screenshots", `${new Date().toISOString().replace(/[:.]/g, "-")}.${format}`);
713
724
  const outputPath = params.path ? resolve(cwd, params.path) : defaultPath;
714
- const result = (await bridge.send("page.screenshot", withForeground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
725
+ const result = (await bridge.send("page.screenshot", withBackground(params), DEFAULT_TIMEOUT_MS)) as { dataUrl: string; tab?: unknown };
715
726
  const base64 = result.dataUrl.replace(/^data:image\/(?:png|jpeg);base64,/, "");
716
727
  await mkdir(dirname(outputPath), { recursive: true });
717
728
  await writeFile(outputPath, Buffer.from(base64, "base64"));
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-chrome",
3
- "version": "0.5.0",
4
- "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, background by default.",
3
+ "version": "0.6.1",
4
+ "description": "Drive your existing logged-in Chrome from Pi \u2014 no re-login, no throwaway profile, watch the agent work in real time (or toggle quiet background mode).",
5
5
  "keywords": [
6
6
  "pi-package",
7
7
  "pi-extension",