pi-agent-browser-native 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +2 -2
- package/docs/REQUIREMENTS.md +1 -1
- package/docs/TOOL_CONTRACT.md +2 -2
- package/extensions/agent-browser/index.ts +17 -24
- package/extensions/agent-browser/lib/results/presentation.ts +9 -2
- package/extensions/agent-browser/lib/runtime.ts +32 -8
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.2.8 - 2026-04-16
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- updated the tab-correction and tab-pinning wrapper paths for `agent-browser` `0.26.0` tab metadata, so profiled launches and follow-up commands now re-select tabs using stable upstream tab ids instead of the retired numeric index shape
|
|
9
|
+
- updated tab-list rendering and tool guidance to show `agent-browser`'s stable tab ids/labels instead of suggesting `tab <n>` commands that no longer work in `0.26.0`
|
|
10
|
+
- extended the narrow ChatGPT/OpenAI headless user-agent compatibility fallback to cover `chat.com`, so `chat.com` redirects reuse the same authenticated headless path as `chatgpt.com`
|
|
11
|
+
|
|
5
12
|
## 0.2.7 - 2026-04-16
|
|
6
13
|
|
|
7
14
|
### Changed
|
package/README.md
CHANGED
|
@@ -178,7 +178,7 @@ Validated workflow examples:
|
|
|
178
178
|
- click a link and confirm the destination title
|
|
179
179
|
- use an explicit `--session` across multiple tool calls
|
|
180
180
|
- use an explicit `--profile` and verify persisted browser storage across restarts
|
|
181
|
-
- open `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
|
|
181
|
+
- open `chat.com` or `chatgpt.com` headlessly with `--profile Default` without forcing `--headed` or `--auto-connect`
|
|
182
182
|
- verify `/reload` and full restart + `/resume` keep following the same implicit managed browser session
|
|
183
183
|
- run `batch` with JSON via `stdin`
|
|
184
184
|
- run `eval --stdin`
|
|
@@ -193,7 +193,7 @@ Current cautions:
|
|
|
193
193
|
- implicit `piab-*` sessions are extension-managed convenience sessions; they stay alive across `pi` shutdown/reload so later default calls can keep following the active managed browser on `/reload` or `/resume`, rely on the configured idle timeout to reduce stale background daemons, store persisted-session large snapshot spill files under a private session-scoped artifact directory with a bounded per-session budget so `details.fullOutputPath` survives reload/resume without unbounded growth, and still clean up process-private temp spill artifacts on shutdown
|
|
194
194
|
- `sessionMode: "fresh"` without an explicit `--session` rotates that extension-managed session to the new browser so later auto calls keep using it
|
|
195
195
|
- for local Unix launches, the wrapper uses a short private socket directory under `/tmp` so extension-generated session names do not trip upstream Unix socket-path limits in longer cwd/session-name combinations
|
|
196
|
-
- for direct headless local Chrome launches to `chatgpt.com
|
|
196
|
+
- for direct headless local Chrome launches to `chat.com`, `chatgpt.com`, and `chat.openai.com`, the extension injects a normal Chrome user agent when the caller did not explicitly provide `--user-agent`; this keeps the default headless workflow usable without forcing `--headed` or `--auto-connect`
|
|
197
197
|
- after profiled `open` calls, the extension best-effort re-selects the tab that matches the returned page URL when restored profile tabs steal focus during launch
|
|
198
198
|
- after a target tab is known, later active-tab commands like `click` and `snapshot -i` best-effort pin that same tab inside the same upstream invocation when a reconnect would otherwise drift to a restored tab
|
|
199
199
|
- explicit caller-provided `--session` values are treated as user-managed and are not auto-closed by the extension
|
package/docs/REQUIREMENTS.md
CHANGED
|
@@ -85,7 +85,7 @@ The design should comfortably support workflows such as:
|
|
|
85
85
|
- web research
|
|
86
86
|
- using browser UIs for other LLMs such as ChatGPT, Grok, Gemini, and Claude
|
|
87
87
|
- isolated authenticated browser sessions
|
|
88
|
-
- headless authenticated ChatGPT/OpenAI browsing without forcing `--headed` or `--auto-connect`
|
|
88
|
+
- headless authenticated `chat.com` / ChatGPT / OpenAI browsing without forcing `--headed` or `--auto-connect`
|
|
89
89
|
- upstream profile/debug workflows without adding a local profile-cloning layer in this package
|
|
90
90
|
|
|
91
91
|
## Implications for the implementation
|
package/docs/TOOL_CONTRACT.md
CHANGED
|
@@ -24,7 +24,7 @@ It also keeps the main UX where it belongs: the agent invokes the tool directly
|
|
|
24
24
|
|
|
25
25
|
The tool guidance should be written for task discovery first, not wrapper implementation first. That means the description should emphasize browser use cases like web research, reading live docs, clicking, filling, screenshots, extraction, and authenticated/profile-based workflows. Low-level wrapper details like `stdin` and exact CLI args belong in the schema and guidelines, not the lead description.
|
|
26
26
|
|
|
27
|
-
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <
|
|
27
|
+
The tool also needs an operating playbook, not just a capability list. The model should not have to rediscover basics each session. Guidance should explicitly encode the normal browser workflow (`open` -> `snapshot -i` -> interact -> re-snapshot), the authenticated-content workflow (prefer `--profile Default` on the first browser call and let the implicit session carry continuity; use `--auto-connect` as a fallback when profile reuse is unavailable), and the preferred recovery path when a session opens on the wrong tab, an action changes origin unexpectedly, or an `open` call returns blocked/blank/unexpected results (`tab list` / `tab <tab-id-or-label>` / `snapshot -i` before retrying different URLs or fallback strategies). It should also discourage inventing fixed explicit session names for routine tasks, because those names leak stale browser state across otherwise unrelated `pi` sessions. For read-only browsing tasks, guidance should prefer answering from the current page state first: use the current snapshot, structured ref labels, or `eval --stdin` on the current page before navigating into media viewers, detail routes, or other new pages unless the current view lacks the needed information. When using `eval --stdin`, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics. When using `eval --stdin` for extraction, return the intended value instead of relying on `console.log` as the primary result channel.
|
|
28
28
|
|
|
29
29
|
## Parameters
|
|
30
30
|
|
|
@@ -187,7 +187,7 @@ If `agent-browser` is not on `PATH`, fail with a message that:
|
|
|
187
187
|
- on local Unix launches, set a short private socket directory for wrapper-spawned `agent-browser` processes so extension-generated session names do not fail the upstream Unix socket-path length limit in longer cwd/session-name combinations
|
|
188
188
|
- treat successful plain-text inspection commands like `--help` and `--version` as stateless: do not inject the implicit managed session and do not let those calls claim the managed-session slot
|
|
189
189
|
- if startup-scoped flags like `--profile`, `--session-name`, or `--cdp` are supplied after the implicit session is already active while `sessionMode` is `"auto"`, return a validation error with a structured recovery hint that recommends `sessionMode: "fresh"`
|
|
190
|
-
- for direct headless local Chrome launches to `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
|
|
190
|
+
- for direct headless local Chrome launches to `chat.com` / `chatgpt.com` / `chat.openai.com`, allow a narrow compatibility fallback that injects a normal Chrome `--user-agent` only when the caller did not explicitly provide one and did not choose `--headed`, `--cdp`, `--auto-connect`, or a provider-backed launch
|
|
191
191
|
|
|
192
192
|
## Non-goals
|
|
193
193
|
|
|
@@ -74,7 +74,7 @@ const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
74
74
|
"Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
|
|
75
75
|
"When using --profile, --session-name, or --cdp, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
|
|
76
76
|
"If you already used the implicit session and now need startup-scoped flags like --profile, --session-name, or --cdp, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
|
|
77
|
-
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <
|
|
77
|
+
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load, --url, --fn, or --text.",
|
|
78
78
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
79
79
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
80
80
|
"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
|
|
@@ -414,25 +414,14 @@ function shouldPinSessionTabForCommand(options: { command?: string; sessionName?
|
|
|
414
414
|
}
|
|
415
415
|
|
|
416
416
|
function selectSessionTargetTab(options: {
|
|
417
|
-
tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
|
|
417
|
+
tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
|
|
418
418
|
target: SessionTabTarget;
|
|
419
419
|
}): OpenResultTabCorrection | undefined {
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
typeof options.target.title === "string"
|
|
426
|
-
? matchingTabs.find((tab) => tab.title?.trim() === options.target.title)
|
|
427
|
-
: undefined;
|
|
428
|
-
const selectedTab = titledMatch ?? matchingTabs[0];
|
|
429
|
-
return typeof selectedTab.index === "number"
|
|
430
|
-
? {
|
|
431
|
-
selectedIndex: selectedTab.index,
|
|
432
|
-
targetTitle: options.target.title,
|
|
433
|
-
targetUrl: options.target.url,
|
|
434
|
-
}
|
|
435
|
-
: undefined;
|
|
420
|
+
return chooseOpenResultTabCorrection({
|
|
421
|
+
tabs: options.tabs,
|
|
422
|
+
targetTitle: options.target.title,
|
|
423
|
+
targetUrl: options.target.url,
|
|
424
|
+
});
|
|
436
425
|
}
|
|
437
426
|
|
|
438
427
|
function deriveSessionTabTarget(options: {
|
|
@@ -570,9 +559,11 @@ async function collectOpenResultTabCorrection(options: {
|
|
|
570
559
|
if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
|
|
571
560
|
return undefined;
|
|
572
561
|
}
|
|
573
|
-
const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
|
|
562
|
+
const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
|
|
574
563
|
active: tab.active === true,
|
|
575
|
-
index: typeof tab.index === "number" ? tab.index :
|
|
564
|
+
index: typeof tab.index === "number" ? tab.index : index,
|
|
565
|
+
label: typeof tab.label === "string" ? tab.label : undefined,
|
|
566
|
+
tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
|
|
576
567
|
title: typeof tab.title === "string" ? tab.title : undefined,
|
|
577
568
|
url: typeof tab.url === "string" ? tab.url : undefined,
|
|
578
569
|
}));
|
|
@@ -590,9 +581,11 @@ async function collectSessionTabSelection(options: {
|
|
|
590
581
|
if (!isRecord(tabData) || !Array.isArray(tabData.tabs)) {
|
|
591
582
|
return undefined;
|
|
592
583
|
}
|
|
593
|
-
const tabs = tabData.tabs.filter(isRecord).map((tab) => ({
|
|
584
|
+
const tabs = tabData.tabs.filter(isRecord).map((tab, index) => ({
|
|
594
585
|
active: tab.active === true,
|
|
595
|
-
index: typeof tab.index === "number" ? tab.index :
|
|
586
|
+
index: typeof tab.index === "number" ? tab.index : index,
|
|
587
|
+
label: typeof tab.label === "string" ? tab.label : undefined,
|
|
588
|
+
tabId: typeof tab.tabId === "string" ? tab.tabId : undefined,
|
|
596
589
|
title: typeof tab.title === "string" ? tab.title : undefined,
|
|
597
590
|
url: typeof tab.url === "string" ? tab.url : undefined,
|
|
598
591
|
}));
|
|
@@ -607,7 +600,7 @@ async function applyOpenResultTabCorrection(options: {
|
|
|
607
600
|
}): Promise<OpenResultTabCorrection | undefined> {
|
|
608
601
|
const { correction, cwd, sessionName, signal } = options;
|
|
609
602
|
const result = await runSessionCommandData({
|
|
610
|
-
args: ["tab",
|
|
603
|
+
args: ["tab", correction.selectedTab],
|
|
611
604
|
cwd,
|
|
612
605
|
sessionName,
|
|
613
606
|
signal,
|
|
@@ -816,7 +809,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
816
809
|
sessionTabCorrection = plannedSessionTabSelection;
|
|
817
810
|
processArgs = ["--json", "--session", executionPlan.sessionName, "batch"];
|
|
818
811
|
processStdin = JSON.stringify([
|
|
819
|
-
["tab",
|
|
812
|
+
["tab", plannedSessionTabSelection.selectedTab],
|
|
820
813
|
commandTokens,
|
|
821
814
|
...(includePinnedNavigationSummary ? [["get", "title"], ["get", "url"]] : []),
|
|
822
815
|
]);
|
|
@@ -73,8 +73,15 @@ function getTabSummary(data: Record<string, unknown>): string | undefined {
|
|
|
73
73
|
const marker = tab.active === true ? "*" : "-";
|
|
74
74
|
const title = typeof tab.title === "string" ? tab.title : "(untitled)";
|
|
75
75
|
const url = typeof tab.url === "string" ? tab.url : "(no url)";
|
|
76
|
-
const
|
|
77
|
-
|
|
76
|
+
const tabSelector =
|
|
77
|
+
typeof tab.tabId === "string" && tab.tabId.trim().length > 0
|
|
78
|
+
? tab.tabId.trim()
|
|
79
|
+
: typeof tab.label === "string" && tab.label.trim().length > 0
|
|
80
|
+
? tab.label.trim()
|
|
81
|
+
: typeof tab.index === "number"
|
|
82
|
+
? String(tab.index)
|
|
83
|
+
: String(index);
|
|
84
|
+
return `${marker} [${tabSelector}] ${title} — ${url}`;
|
|
78
85
|
});
|
|
79
86
|
return lines.join("\n");
|
|
80
87
|
}
|
|
@@ -11,7 +11,7 @@ import { basename } from "node:path";
|
|
|
11
11
|
|
|
12
12
|
const STARTUP_SCOPED_FLAGS = ["--cdp", "--profile", "--session-name"] as const;
|
|
13
13
|
const OPEN_COMMANDS = new Set(["goto", "navigate", "open"]);
|
|
14
|
-
const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.openai.com", "chatgpt.com"]);
|
|
14
|
+
const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
|
|
15
15
|
const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
|
|
16
16
|
const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
|
|
17
17
|
const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
|
|
@@ -106,7 +106,8 @@ export interface CompatibilityWorkaround {
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
export interface OpenResultTabCorrection {
|
|
109
|
-
|
|
109
|
+
selectedTab: string;
|
|
110
|
+
selectionKind: "index" | "label" | "tabId";
|
|
110
111
|
targetTitle?: string;
|
|
111
112
|
targetUrl: string;
|
|
112
113
|
}
|
|
@@ -537,6 +538,26 @@ function normalizeComparableUrl(url: string): string | undefined {
|
|
|
537
538
|
}
|
|
538
539
|
}
|
|
539
540
|
|
|
541
|
+
function normalizeTabSelectionValue(value: string | undefined): string | undefined {
|
|
542
|
+
const normalizedValue = value?.trim();
|
|
543
|
+
return normalizedValue && normalizedValue.length > 0 ? normalizedValue : undefined;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
function extractTabSelection(tab: { index?: number; label?: string; tabId?: string }): Pick<OpenResultTabCorrection, "selectedTab" | "selectionKind"> | undefined {
|
|
547
|
+
const tabId = normalizeTabSelectionValue(tab.tabId);
|
|
548
|
+
if (tabId) {
|
|
549
|
+
return { selectedTab: tabId, selectionKind: "tabId" };
|
|
550
|
+
}
|
|
551
|
+
const label = normalizeTabSelectionValue(tab.label);
|
|
552
|
+
if (label) {
|
|
553
|
+
return { selectedTab: label, selectionKind: "label" };
|
|
554
|
+
}
|
|
555
|
+
if (typeof tab.index === "number" && Number.isInteger(tab.index) && tab.index >= 0) {
|
|
556
|
+
return { selectedTab: String(tab.index), selectionKind: "index" };
|
|
557
|
+
}
|
|
558
|
+
return undefined;
|
|
559
|
+
}
|
|
560
|
+
|
|
540
561
|
function parseComparableNavigationUrl(url: string): URL | undefined {
|
|
541
562
|
try {
|
|
542
563
|
return new URL(url);
|
|
@@ -727,7 +748,7 @@ export function buildExecutionPlan(
|
|
|
727
748
|
|
|
728
749
|
export function chooseOpenResultTabCorrection(options: {
|
|
729
750
|
activeTabIndex?: number;
|
|
730
|
-
tabs: Array<{ active?: boolean; index?: number; title?: string; url?: string }>;
|
|
751
|
+
tabs: Array<{ active?: boolean; index?: number; label?: string; tabId?: string; title?: string; url?: string }>;
|
|
731
752
|
targetTitle?: string;
|
|
732
753
|
targetUrl?: string;
|
|
733
754
|
}): OpenResultTabCorrection | undefined {
|
|
@@ -740,6 +761,8 @@ export function chooseOpenResultTabCorrection(options: {
|
|
|
740
761
|
const tabsWithIndices = options.tabs.map((tab, index) => ({
|
|
741
762
|
...tab,
|
|
742
763
|
index: typeof tab.index === "number" ? tab.index : index,
|
|
764
|
+
label: normalizeTabSelectionValue(tab.label),
|
|
765
|
+
tabId: normalizeTabSelectionValue(tab.tabId),
|
|
743
766
|
}));
|
|
744
767
|
const activeTab =
|
|
745
768
|
tabsWithIndices.find((tab) => tab.active === true) ??
|
|
@@ -758,13 +781,14 @@ export function chooseOpenResultTabCorrection(options: {
|
|
|
758
781
|
? undefined
|
|
759
782
|
: matchingTabs.find((tab) => typeof tab.title === "string" && tab.title.trim() === trimmedTargetTitle);
|
|
760
783
|
const selectedTab = titledMatch ?? matchingTabs[0];
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
784
|
+
const tabSelection = extractTabSelection(selectedTab);
|
|
785
|
+
return tabSelection
|
|
786
|
+
? {
|
|
787
|
+
...tabSelection,
|
|
765
788
|
targetTitle: trimmedTargetTitle.length > 0 ? trimmedTargetTitle : undefined,
|
|
766
789
|
targetUrl: normalizedTargetUrl,
|
|
767
|
-
}
|
|
790
|
+
}
|
|
791
|
+
: undefined;
|
|
768
792
|
}
|
|
769
793
|
|
|
770
794
|
export function parseCommandInfo(args: string[]): CommandInfo {
|
package/package.json
CHANGED