npm - pi-agent-browser-native - Versions diffs - 0.2.7 → 0.2.8 - Mend

pi-agent-browser-native 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +7 -0
package/README.md +2 -2
package/docs/REQUIREMENTS.md +1 -1
package/docs/TOOL_CONTRACT.md +2 -2
package/extensions/agent-browser/index.ts +17 -24
package/extensions/agent-browser/lib/results/presentation.ts +9 -2
package/extensions/agent-browser/lib/runtime.ts +32 -8
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,13 @@
 ## Unreleased
+## 0.2.8 - 2026-04-16
+### Fixed
+- updated the tab-correction and tab-pinning wrapper paths for `agent-browser` `0.26.0` tab metadata, so profiled launches and follow-up commands now re-select tabs using stable upstream tab ids instead of the retired numeric index shape
+- updated tab-list rendering and tool guidance to show `agent-browser`'s stable tab ids/labels instead of suggesting `tab <n>` commands that no longer work in `0.26.0`
+- extended the narrow ChatGPT/OpenAI headless user-agent compatibility fallback to cover `chat.com`, so `chat.com` redirects reuse the same authenticated headless path as `chatgpt.com`
 ## 0.2.7 - 2026-04-16
 ### Changed

package/README.md CHANGED Viewed

@@ -178,7 +178,7 @@ Validated workflow examples:
 - click a link and confirm the destination title
 - use an explicit `--session` across multiple tool calls
 - use an explicit `--profile` and verify persisted browser storage across restarts
-- open `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
+- open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
 - verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
 - run `batch` with JSON via `stdin`
 - run `eval --stdin`
@@ -193,7 +193,7 @@ Current cautions:
 - implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `pi` shutdown/reload so later default calls can keep following the active managed browser on `/reload` or `/resume`, rely on the configured idle timeout to reduce stale background daemons, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` survives reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
 - `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
 - for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
-- for direct headless local Chrome launches to `chatgpt.com` and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
+- for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
 - after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
 - after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
 - explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension

package/docs/REQUIREMENTS.md CHANGED Viewed

@@ -85,7 +85,7 @@ The design should comfortably support workflows such as:
 - web research
 - using browser UIs for other LLMs such as ChatGPT, Grok, Gemini, and Claude
 - isolated authenticated browser sessions
-- headless authenticated ChatGPT/OpenAI browsing without forcing `--headed` or `--auto-connect`
+- headless authenticated `chat.com` / ChatGPT / OpenAI browsing without forcing `--headed` or `--auto-connect`
 - upstream profile/debug workflows without adding a local profile-cloning layer in this package
 ## Implications for the implementation

package/docs/TOOL_CONTRACT.md CHANGED Viewed

@@ -24,7 +24,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
 The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
-The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <n>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
+The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
 ## Parameters
@@ -187,7 +187,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
 - on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
 - treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
 - if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
-- for direct headless local Chrome launches to `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
+- for direct headless local Chrome launches to `chat.com` / `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
 ## Non-goals

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -74,7 +74,7 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
 	"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
 	"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
-	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <n> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
+	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
 	"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
@@ -414,25 +414,14 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
 }
 function selectSessionTargetTab(options: {
-	tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
+	tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
 	target: SessionTabTarget;
 }): OpenResultTabCorrection | undefined {
-	const matchingTabs = options.tabs.filter((tab) => normalizeComparableUrl(tab.url) === options.target.url);
-	if (matchingTabs.length === 0) {
-		return undefined;
-	}
-	const titledMatch =
-		typeof options.target.title === "string"
-			? matchingTabs.find((tab) => tab.title?.trim() === options.target.title)
-			: undefined;
-	const selectedTab = titledMatch ?? matchingTabs[0];
-	return typeof selectedTab.index === "number"
-		? {
-				selectedIndex: selectedTab.index,
-				targetTitle: options.target.title,
-				targetUrl: options.target.url,
-		  }
-		: undefined;
+	return chooseOpenResultTabCorrection({
+		tabs: options.tabs,
+		targetTitle: options.target.title,
+		targetUrl: options.target.url,
+	});
 }
 function deriveSessionTabTarget(options: {
@@ -570,9 +559,11 @@ async function collectOpenResultTabCorrection(options: {
 	if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
 		return undefined;
 	}
-	const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
+	const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
 		active: tab.active === true,
-		index: typeof tab.index === "number" ? tab.index : undefined,
+		index: typeof tab.index === "number" ? tab.index : index,
+		label: typeof tab.label === "string" ? tab.label : undefined,
+		tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
 		title: typeof tab.title === "string" ? tab.title : undefined,
 		url: typeof tab.url === "string" ? tab.url : undefined,
 	}));
@@ -590,9 +581,11 @@ async function collectSessionTabSelection(options: {
 	if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
 		return undefined;
 	}
-	const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
+	const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
 		active: tab.active === true,
-		index: typeof tab.index === "number" ? tab.index : undefined,
+		index: typeof tab.index === "number" ? tab.index : index,
+		label: typeof tab.label === "string" ? tab.label : undefined,
+		tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
 		title: typeof tab.title === "string" ? tab.title : undefined,
 		url: typeof tab.url === "string" ? tab.url : undefined,
 	}));
@@ -607,7 +600,7 @@ async function applyOpenResultTabCorrection(options: {
 }): Promise<OpenResultTabCorrection | undefined> {
 	const { correction, cwd, sessionName, signal } = options;
 	const result = await runSessionCommandData({
-		args: ["tab", String(correction.selectedIndex)],
+		args: ["tab", correction.selectedTab],
 		cwd,
 		sessionName,
 		signal,
@@ -816,7 +809,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					sessionTabCorrection = plannedSessionTabSelection;
 					processArgs = ["--json", "--session", executionPlan.sessionName, "batch"];
 					processStdin = JSON.stringify([
-						["tab", String(plannedSessionTabSelection.selectedIndex)],
+						["tab", plannedSessionTabSelection.selectedTab],
 						commandTokens,
 						...(includePinnedNavigationSummary ? [["get", "title"], ["get", "url"]] : []),
 					]);

package/extensions/agent-browser/lib/results/presentation.ts CHANGED Viewed

@@ -73,8 +73,15 @@ function getTabSummary(data: Record<string, unknown>): string | undefined {
 		const marker = tab.active === true ? "*" : "-";
 		const title = typeof tab.title === "string" ? tab.title : "(untitled)";
 		const url = typeof tab.url === "string" ? tab.url : "(no url)";
-		const tabIndex = typeof tab.index === "number" ? tab.index : index;
-		return `${marker} [${tabIndex}] ${title} — ${url}`;
+		const tabSelector =
+			typeof tab.tabId === "string" && tab.tabId.trim().length > 0
+				? tab.tabId.trim()
+				: typeof tab.label === "string" && tab.label.trim().length > 0
+					? tab.label.trim()
+					: typeof tab.index === "number"
+						? String(tab.index)
+						: String(index);
+		return `${marker} [${tabSelector}] ${title} — ${url}`;
 	});
 	return lines.join("\n");
 }

package/extensions/agent-browser/lib/runtime.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import { basename } from "node:path";
 const STARTUP_SCOPED_FLAGS = ["--cdp", "--profile", "--session-name"] as const;
 const OPEN_COMMANDS = new Set(["goto", "navigate", "open"]);
-const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.openai.com", "chatgpt.com"]);
+const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
 const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
 const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
 const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
@@ -106,7 +106,8 @@ export interface CompatibilityWorkaround {
 }
 export interface OpenResultTabCorrection {
-	selectedIndex: number;
+	selectedTab: string;
+	selectionKind: "index" | "label" | "tabId";
 	targetTitle?: string;
 	targetUrl: string;
 }
@@ -537,6 +538,26 @@ function normalizeComparableUrl(url: string): string | undefined {
 	}
 }
+function normalizeTabSelectionValue(value: string | undefined): string | undefined {
+	const normalizedValue = value?.trim();
+	return normalizedValue && normalizedValue.length > 0 ? normalizedValue : undefined;
+}
+function extractTabSelection(tab: { index?: number; label?: string; tabId?: string }): Pick<OpenResultTabCorrection, "selectedTab" | "selectionKind"> | undefined {
+	const tabId = normalizeTabSelectionValue(tab.tabId);
+	if (tabId) {
+		return { selectedTab: tabId, selectionKind: "tabId" };
+	}
+	const label = normalizeTabSelectionValue(tab.label);
+	if (label) {
+		return { selectedTab: label, selectionKind: "label" };
+	}
+	if (typeof tab.index === "number" && Number.isInteger(tab.index) && tab.index >= 0) {
+		return { selectedTab: String(tab.index), selectionKind: "index" };
+	}
+	return undefined;
+}
 function parseComparableNavigationUrl(url: string): URL | undefined {
 	try {
 		return new URL(url);
@@ -727,7 +748,7 @@ export function buildExecutionPlan(
 export function chooseOpenResultTabCorrection(options: {
 	activeTabIndex?: number;
-	tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
+	tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
 	targetTitle?: string;
 	targetUrl?: string;
 }): OpenResultTabCorrection | undefined {
@@ -740,6 +761,8 @@ export function chooseOpenResultTabCorrection(options: {
 	const tabsWithIndices = options.tabs.map((tab, index) => ({
 		...tab,
 		index: typeof tab.index === "number" ? tab.index : index,
+		label: normalizeTabSelectionValue(tab.label),
+		tabId: normalizeTabSelectionValue(tab.tabId),
 	}));
 	const activeTab =
 		tabsWithIndices.find((tab) => tab.active === true) ??
@@ -758,13 +781,14 @@ export function chooseOpenResultTabCorrection(options: {
 			? undefined
 			: matchingTabs.find((tab) => typeof tab.title === "string" && tab.title.trim() === trimmedTargetTitle);
 	const selectedTab = titledMatch ?? matchingTabs[0];
-	return selectedTab.index === undefined
-		? undefined
-		: {
-			selectedIndex: selectedTab.index,
+	const tabSelection = extractTabSelection(selectedTab);
+	return tabSelection
+		? {
+			...tabSelection,
 			targetTitle: trimmedTargetTitle.length > 0 ? trimmedTargetTitle : undefined,
 			targetUrl: normalizedTargetUrl,
-		};
+		}
+		: undefined;
 }
 export function parseCommandInfo(args: string[]): CommandInfo {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-agent-browser-native",
-  "version": "0.2.7",
+  "version": "0.2.8",
   "description": "pi extension that exposes agent-browser as a native tool for browser automation",
   "type": "module",
   "author": "Mitch Fultz (https://github.com/fitchmultz)",