pi-agent-browser-native 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## Unreleased
4
+
5
+ ## 0.2.8 - 2026-04-16
6
+
7
+ ### Fixed
8
+ - updated the tab-correction and tab-pinning wrapper paths for `agent-browser` `0.26.0` tab metadata, so profiled launches and follow-up commands now re-select tabs using stable upstream tab ids instead of the retired numeric index shape
9
+ - updated tab-list rendering and tool guidance to show `agent-browser`'s stable tab ids/labels instead of suggesting `tab <n>` commands that no longer work in `0.26.0`
10
+ - extended the narrow ChatGPT/OpenAI headless user-agent compatibility fallback to cover `chat.com`, so `chat.com` redirects reuse the same authenticated headless path as `chatgpt.com`
11
+
12
+ ## 0.2.7 - 2026-04-16
13
+
14
+ ### Changed
15
+ - updated the local pi development baseline to `@mariozechner/pi-coding-agent` `0.67.4`
16
+ - aligned `packageManager` metadata to `npm@10.9.8`, the latest stable npm line compatible with the declared Node runtime floor
17
+ - removed the published `@mariozechner/pi-coding-agent` peer dependency so installs rely on pi's bundled runtime instead of npm peer-resolution churn
18
+
3
19
  ## 0.2.6 - 2026-04-15
4
20
 
5
21
  ### Changed
package/README.md CHANGED
@@ -178,7 +178,7 @@ Validated workflow examples:
178
178
  - click a link and confirm the destination title
179
179
  - use an explicit `--session` across multiple tool calls
180
180
  - use an explicit `--profile` and verify persisted browser storage across restarts
181
- - open `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
181
+ - open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
182
182
  - verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
183
183
  - run `batch` with JSON via `stdin`
184
184
  - run `eval --stdin`
@@ -193,7 +193,7 @@ Current cautions:
193
193
  - implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `pi` shutdown/reload so later default calls can keep following the active managed browser on `/reload` or `/resume`, rely on the configured idle timeout to reduce stale background daemons, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` survives reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
194
194
  - `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
195
195
  - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
196
- - for direct headless local Chrome launches to `chatgpt.com` and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
196
+ - for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
197
197
  - after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
198
198
  - after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
199
199
  - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
@@ -85,7 +85,7 @@ The design should comfortably support workflows such as:
85
85
  - web research
86
86
  - using browser UIs for other LLMs such as ChatGPT, Grok, Gemini, and Claude
87
87
  - isolated authenticated browser sessions
88
- - headless authenticated ChatGPT/OpenAI browsing without forcing `--headed` or `--auto-connect`
88
+ - headless authenticated `chat.com` / ChatGPT / OpenAI browsing without forcing `--headed` or `--auto-connect`
89
89
  - upstream profile/debug workflows without adding a local profile-cloning layer in this package
90
90
 
91
91
  ## Implications for the implementation
@@ -24,7 +24,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
24
24
 
25
25
  The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
26
26
 
27
- The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <n>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
27
+ The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
28
28
 
29
29
  ## Parameters
30
30
 
@@ -187,7 +187,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
187
187
  - on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
188
188
  - treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
189
189
  - if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
190
- - for direct headless local Chrome launches to `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
190
+ - for direct headless local Chrome launches to `chat.com` / `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
191
191
 
192
192
  ## Non-goals
193
193
 
@@ -74,7 +74,7 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
74
74
  "Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
75
75
  "When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
76
76
  "If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
77
- "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
77
+ "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
78
78
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
79
79
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
80
80
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
@@ -414,25 +414,14 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
414
414
  }
415
415
 
416
416
  function selectSessionTargetTab(options: {
417
- tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
417
+ tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
418
418
  target: SessionTabTarget;
419
419
  }): OpenResultTabCorrection | undefined {
420
- const matchingTabs = options.tabs.filter((tab) => normalizeComparableUrl(tab.url) === options.target.url);
421
- if (matchingTabs.length === 0) {
422
- return undefined;
423
- }
424
- const titledMatch =
425
- typeof options.target.title === "string"
426
- ? matchingTabs.find((tab) => tab.title?.trim() === options.target.title)
427
- : undefined;
428
- const selectedTab = titledMatch ?? matchingTabs[0];
429
- return typeof selectedTab.index === "number"
430
- ? {
431
- selectedIndex: selectedTab.index,
432
- targetTitle: options.target.title,
433
- targetUrl: options.target.url,
434
- }
435
- : undefined;
420
+ return chooseOpenResultTabCorrection({
421
+ tabs: options.tabs,
422
+ targetTitle: options.target.title,
423
+ targetUrl: options.target.url,
424
+ });
436
425
  }
437
426
 
438
427
  function deriveSessionTabTarget(options: {
@@ -570,9 +559,11 @@ async function collectOpenResultTabCorrection(options: {
570
559
  if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
571
560
  return undefined;
572
561
  }
573
- const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
562
+ const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
574
563
  active: tab.active === true,
575
- index: typeof tab.index === "number" ? tab.index : undefined,
564
+ index: typeof tab.index === "number" ? tab.index : index,
565
+ label: typeof tab.label === "string" ? tab.label : undefined,
566
+ tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
576
567
  title: typeof tab.title === "string" ? tab.title : undefined,
577
568
  url: typeof tab.url === "string" ? tab.url : undefined,
578
569
  }));
@@ -590,9 +581,11 @@ async function collectSessionTabSelection(options: {
590
581
  if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
591
582
  return undefined;
592
583
  }
593
- const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
584
+ const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
594
585
  active: tab.active === true,
595
- index: typeof tab.index === "number" ? tab.index : undefined,
586
+ index: typeof tab.index === "number" ? tab.index : index,
587
+ label: typeof tab.label === "string" ? tab.label : undefined,
588
+ tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
596
589
  title: typeof tab.title === "string" ? tab.title : undefined,
597
590
  url: typeof tab.url === "string" ? tab.url : undefined,
598
591
  }));
@@ -607,7 +600,7 @@ async function applyOpenResultTabCorrection(options: {
607
600
  }): Promise<OpenResultTabCorrection | undefined> {
608
601
  const { correction, cwd, sessionName, signal } = options;
609
602
  const result = await runSessionCommandData({
610
- args: ["tab", String(correction.selectedIndex)],
603
+ args: ["tab", correction.selectedTab],
611
604
  cwd,
612
605
  sessionName,
613
606
  signal,
@@ -816,7 +809,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
816
809
  sessionTabCorrection = plannedSessionTabSelection;
817
810
  processArgs = ["--json", "--session", executionPlan.sessionName, "batch"];
818
811
  processStdin = JSON.stringify([
819
- ["tab", String(plannedSessionTabSelection.selectedIndex)],
812
+ ["tab", plannedSessionTabSelection.selectedTab],
820
813
  commandTokens,
821
814
  ...(includePinnedNavigationSummary ? [["get", "title"], ["get", "url"]] : []),
822
815
  ]);
@@ -73,8 +73,15 @@ function getTabSummary(data: Record<string, unknown>): string | undefined {
73
73
  const marker = tab.active === true ? "*" : "-";
74
74
  const title = typeof tab.title === "string" ? tab.title : "(untitled)";
75
75
  const url = typeof tab.url === "string" ? tab.url : "(no url)";
76
- const tabIndex = typeof tab.index === "number" ? tab.index : index;
77
- return `${marker} [${tabIndex}] ${title} ${url}`;
76
+ const tabSelector =
77
+ typeof tab.tabId === "string" && tab.tabId.trim().length > 0
78
+ ? tab.tabId.trim()
79
+ : typeof tab.label === "string" && tab.label.trim().length > 0
80
+ ? tab.label.trim()
81
+ : typeof tab.index === "number"
82
+ ? String(tab.index)
83
+ : String(index);
84
+ return `${marker} [${tabSelector}] ${title} — ${url}`;
78
85
  });
79
86
  return lines.join("\n");
80
87
  }
@@ -11,7 +11,7 @@ import { basename } from "node:path";
11
11
 
12
12
  const STARTUP_SCOPED_FLAGS = ["--cdp", "--profile", "--session-name"] as const;
13
13
  const OPEN_COMMANDS = new Set(["goto", "navigate", "open"]);
14
- const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.openai.com", "chatgpt.com"]);
14
+ const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
15
15
  const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
16
16
  const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
17
17
  const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
@@ -106,7 +106,8 @@ export interface CompatibilityWorkaround {
106
106
  }
107
107
 
108
108
  export interface OpenResultTabCorrection {
109
- selectedIndex: number;
109
+ selectedTab: string;
110
+ selectionKind: "index" | "label" | "tabId";
110
111
  targetTitle?: string;
111
112
  targetUrl: string;
112
113
  }
@@ -537,6 +538,26 @@ function normalizeComparableUrl(url: string): string | undefined {
537
538
  }
538
539
  }
539
540
 
541
+ function normalizeTabSelectionValue(value: string | undefined): string | undefined {
542
+ const normalizedValue = value?.trim();
543
+ return normalizedValue && normalizedValue.length > 0 ? normalizedValue : undefined;
544
+ }
545
+
546
+ function extractTabSelection(tab: { index?: number; label?: string; tabId?: string }): Pick<OpenResultTabCorrection, "selectedTab" | "selectionKind"> | undefined {
547
+ const tabId = normalizeTabSelectionValue(tab.tabId);
548
+ if (tabId) {
549
+ return { selectedTab: tabId, selectionKind: "tabId" };
550
+ }
551
+ const label = normalizeTabSelectionValue(tab.label);
552
+ if (label) {
553
+ return { selectedTab: label, selectionKind: "label" };
554
+ }
555
+ if (typeof tab.index === "number" && Number.isInteger(tab.index) && tab.index >= 0) {
556
+ return { selectedTab: String(tab.index), selectionKind: "index" };
557
+ }
558
+ return undefined;
559
+ }
560
+
540
561
  function parseComparableNavigationUrl(url: string): URL | undefined {
541
562
  try {
542
563
  return new URL(url);
@@ -727,7 +748,7 @@ export function buildExecutionPlan(
727
748
 
728
749
  export function chooseOpenResultTabCorrection(options: {
729
750
  activeTabIndex?: number;
730
- tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
751
+ tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
731
752
  targetTitle?: string;
732
753
  targetUrl?: string;
733
754
  }): OpenResultTabCorrection | undefined {
@@ -740,6 +761,8 @@ export function chooseOpenResultTabCorrection(options: {
740
761
  const tabsWithIndices = options.tabs.map((tab, index) => ({
741
762
  ...tab,
742
763
  index: typeof tab.index === "number" ? tab.index : index,
764
+ label: normalizeTabSelectionValue(tab.label),
765
+ tabId: normalizeTabSelectionValue(tab.tabId),
743
766
  }));
744
767
  const activeTab =
745
768
  tabsWithIndices.find((tab) => tab.active === true) ??
@@ -758,13 +781,14 @@ export function chooseOpenResultTabCorrection(options: {
758
781
  ? undefined
759
782
  : matchingTabs.find((tab) => typeof tab.title === "string" && tab.title.trim() === trimmedTargetTitle);
760
783
  const selectedTab = titledMatch ?? matchingTabs[0];
761
- return selectedTab.index === undefined
762
- ? undefined
763
- : {
764
- selectedIndex: selectedTab.index,
784
+ const tabSelection = extractTabSelection(selectedTab);
785
+ return tabSelection
786
+ ? {
787
+ ...tabSelection,
765
788
  targetTitle: trimmedTargetTitle.length > 0 ? trimmedTargetTitle : undefined,
766
789
  targetUrl: normalizedTargetUrl,
767
- };
790
+ }
791
+ : undefined;
768
792
  }
769
793
 
770
794
  export function parseCommandInfo(args: string[]): CommandInfo {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.6",
3
+ "version": "0.2.8",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -42,11 +42,10 @@
42
42
  ]
43
43
  },
44
44
  "peerDependencies": {
45
- "@mariozechner/pi-coding-agent": "*",
46
45
  "@sinclair/typebox": "*"
47
46
  },
48
47
  "devDependencies": {
49
- "@mariozechner/pi-coding-agent": "^0.67.2",
48
+ "@mariozechner/pi-coding-agent": "^0.67.4",
50
49
  "@sinclair/typebox": "^0.34.49",
51
50
  "@types/node": "^25.6.0",
52
51
  "tsx": "^4.21.0",
@@ -63,5 +62,5 @@
63
62
  "verify:package": "node ./scripts/verify-package.mjs",
64
63
  "verify:release": "npm run verify && npm run verify:package"
65
64
  },
66
- "packageManager": "npm@11.12.1"
65
+ "packageManager": "npm@10.9.8"
67
66
  }