pi-agent-browser-native 0.2.39 → 0.2.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Purpose: Load pi-agent-browser-native package configuration from Pi-scoped global, project, or explicit paths.
3
+ * Responsibilities: Resolve config layers, resolve secrets without exposing values, and provide redacted status for tools/CLIs.
4
+ * Scope: Package-owned configuration only; canonical config policy lives in config-policy.js, browser command execution and web-search API calls live in focused modules.
5
+ * Invariants/Assumptions: Raw project-local plaintext credentials are unsafe and rejected by the shared config policy; command credentials are resolved lazily at execution time.
6
+ */
7
+
8
+ import { exec as execCallback } from "node:child_process";
9
+ import { readFile } from "node:fs/promises";
10
+ import { promisify } from "node:util";
11
+
12
+ import {
13
+ SECRET_COMMAND_TIMEOUT_MS,
14
+ buildAgentBrowserConfigState,
15
+ getAgentBrowserConfigPaths,
16
+ getWebSearchCredentialSource,
17
+ getWebSearchProviderOrder,
18
+ loadAgentBrowserConfigStateSync,
19
+ mergeAgentBrowserConfig,
20
+ parseAgentBrowserConfigLayer,
21
+ resolveEnvInterpolations,
22
+ } from "./config-policy.js";
23
+ import type {
24
+ AgentBrowserConfig,
25
+ AgentBrowserConfigScope,
26
+ AgentBrowserConfigState,
27
+ BrowserDefaultProfileConfig,
28
+ BrowserDefaultProfilePolicy,
29
+ ConfigLayer,
30
+ CredentialSource,
31
+ CredentialSourceKind,
32
+ WebSearchProvider,
33
+ } from "./config-policy.js";
34
+
35
+ export {
36
+ AGENT_BROWSER_CONFIG_ENV,
37
+ BRAVE_API_KEY_ENV,
38
+ CONFIG_RELATIVE_PATH,
39
+ DEFAULT_WEB_SEARCH_PROVIDER,
40
+ EXA_API_KEY_ENV,
41
+ GLOBAL_CONFIG_RELATIVE_PATH,
42
+ SECRET_COMMAND_TIMEOUT_MS,
43
+ WEB_SEARCH_PROVIDER_CONFIG_KEYS,
44
+ WEB_SEARCH_PROVIDER_DESCRIPTORS,
45
+ WEB_SEARCH_PROVIDER_ENV_VARS,
46
+ WEB_SEARCH_PROVIDERS,
47
+ buildAgentBrowserConfigState,
48
+ buildWebSearchCredentialSources,
49
+ canRegisterWebSearchTool,
50
+ classifyCredentialSource,
51
+ formatBrowserExecutableStatus,
52
+ formatBrowserProfileStatus,
53
+ getAgentBrowserConfigPaths,
54
+ getCredentialSourceSummary,
55
+ getGlobalAgentBrowserConfigPath,
56
+ getProjectAgentBrowserConfigPath,
57
+ getWebSearchCredentialSource,
58
+ getWebSearchProviderConfigKey,
59
+ getWebSearchProviderDescriptor,
60
+ getWebSearchProviderEnvVar,
61
+ getWebSearchProviderLabel,
62
+ getWebSearchProviderOrder,
63
+ hasPotentialCredentialSource,
64
+ isPlaintextCredentialValue,
65
+ isProjectSafeCredentialValueForProvider,
66
+ isWebSearchProvider,
67
+ loadAgentBrowserConfigStateSync,
68
+ mergeAgentBrowserConfig,
69
+ parseAgentBrowserConfigLayer,
70
+ resolveEnvInterpolations,
71
+ summarizeConfigFiles,
72
+ validateAgentBrowserConfig,
73
+ validateWebSearchProvider,
74
+ } from "./config-policy.js";
75
+ export type {
76
+ AgentBrowserConfig,
77
+ AgentBrowserConfigScope,
78
+ AgentBrowserConfigState,
79
+ BrowserDefaultProfileConfig,
80
+ BrowserDefaultProfilePolicy,
81
+ ConfigLayer,
82
+ CredentialSource,
83
+ CredentialSourceKind,
84
+ WebSearchProvider,
85
+ WebSearchProviderDescriptor,
86
+ } from "./config-policy.js";
87
+
88
+ const exec = promisify(execCallback);
89
+
90
+ export interface ResolvedCredential {
91
+ source: CredentialSource;
92
+ value: string;
93
+ }
94
+
95
+ async function readConfigLayer(path: string, scope: ConfigLayer["scope"], errors: string[], warnings: string[]): Promise<ConfigLayer | undefined> {
96
+ let raw: string;
97
+ try {
98
+ raw = await readFile(path, "utf8");
99
+ } catch (error) {
100
+ if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
101
+ return undefined;
102
+ }
103
+ errors.push(`Could not read ${scope} config ${path}: ${error instanceof Error ? error.message : String(error)}`);
104
+ return undefined;
105
+ }
106
+ return parseAgentBrowserConfigLayer(raw, path, scope, errors, warnings);
107
+ }
108
+
109
+ export async function loadAgentBrowserConfig(options: { cwd?: string; env?: NodeJS.ProcessEnv } = {}): Promise<AgentBrowserConfigState> {
110
+ const env = options.env ?? process.env;
111
+ const paths = getAgentBrowserConfigPaths({ cwd: options.cwd, env });
112
+ const errors: string[] = [];
113
+ const warnings: string[] = [];
114
+ const layerCandidates = [
115
+ { path: paths.global, scope: "global" as const },
116
+ { path: paths.project, scope: "project" as const },
117
+ ...(paths.override ? [{ path: paths.override, scope: "override" as const }] : []),
118
+ ];
119
+ const layers: ConfigLayer[] = [];
120
+ let mergedConfig: AgentBrowserConfig = {};
121
+ for (const candidate of layerCandidates) {
122
+ const layer = await readConfigLayer(candidate.path, candidate.scope, errors, warnings);
123
+ if (!layer) continue;
124
+ layers.push(layer);
125
+ mergedConfig = mergeAgentBrowserConfig(mergedConfig, layer.config);
126
+ }
127
+ return buildAgentBrowserConfigState({ env, errors, layers, mergedConfig, paths, warnings });
128
+ }
129
+
130
+ export function loadAgentBrowserConfigSync(options: { cwd?: string; env?: NodeJS.ProcessEnv } = {}): AgentBrowserConfigState {
131
+ return loadAgentBrowserConfigStateSync(options);
132
+ }
133
+
134
+ async function resolveCommandCredential(rawValue: string, signal?: AbortSignal): Promise<string | undefined> {
135
+ const command = rawValue.slice(1).trim();
136
+ if (!command) return undefined;
137
+ try {
138
+ const result = await exec(command, {
139
+ signal,
140
+ timeout: SECRET_COMMAND_TIMEOUT_MS,
141
+ maxBuffer: 1024 * 1024,
142
+ });
143
+ const value = result.stdout.trim();
144
+ return value.length > 0 ? value : undefined;
145
+ } catch (error) {
146
+ if (signal?.aborted) throw error;
147
+ throw new Error("Credential command failed without exposing command output. Check pi-agent-browser-config web-search status and the configured secret manager command.");
148
+ }
149
+ }
150
+
151
+ export async function resolveCredentialSource(
152
+ source: CredentialSource | undefined,
153
+ options: { env?: NodeJS.ProcessEnv; signal?: AbortSignal } = {},
154
+ ): Promise<ResolvedCredential | undefined> {
155
+ if (!source) return undefined;
156
+ let value: string | undefined;
157
+ if (source.kind === "command") {
158
+ value = await resolveCommandCredential(source.rawValue, options.signal);
159
+ } else if (source.kind === "env") {
160
+ value = resolveEnvInterpolations(source.rawValue, options.env ?? process.env)?.trim();
161
+ } else {
162
+ value = source.rawValue.trim();
163
+ }
164
+ return value ? { source, value } : undefined;
165
+ }
166
+
167
+ export async function resolveWebSearchCredential(
168
+ state: AgentBrowserConfigState,
169
+ provider: WebSearchProvider,
170
+ options: { env?: NodeJS.ProcessEnv; signal?: AbortSignal } = {},
171
+ ): Promise<ResolvedCredential | undefined> {
172
+ return resolveCredentialSource(getWebSearchCredentialSource(state, provider), options);
173
+ }
174
+
175
+ export async function resolvePreferredWebSearchCredential(
176
+ state: AgentBrowserConfigState,
177
+ options: { env?: NodeJS.ProcessEnv; provider?: WebSearchProvider | "auto"; signal?: AbortSignal } = {},
178
+ ): Promise<{ provider: WebSearchProvider; credential: ResolvedCredential } | undefined> {
179
+ for (const provider of getWebSearchProviderOrder(state, options.provider)) {
180
+ const credential = await resolveWebSearchCredential(state, provider, options);
181
+ if (credential) return { provider, credential };
182
+ }
183
+ return undefined;
184
+ }
185
+
186
+ export async function hasResolvableCredentialSource(
187
+ state: AgentBrowserConfigState,
188
+ options: { env?: NodeJS.ProcessEnv } = {},
189
+ ): Promise<boolean> {
190
+ if (!state.webSearchEnabled || state.errors.length > 0) return false;
191
+ for (const provider of getWebSearchProviderOrder(state)) {
192
+ const source = getWebSearchCredentialSource(state, provider);
193
+ if (!source) continue;
194
+ if (source.kind === "command") return true;
195
+ if ((await resolveCredentialSource(source, options))?.value) return true;
196
+ }
197
+ return false;
198
+ }
@@ -181,7 +181,7 @@ export const AGENT_BROWSER_PARAMS = Type.Object({
181
181
  sessionMode: Type.Optional(
182
182
  StringEnum(["auto", "fresh"] as const, {
183
183
  description:
184
- "Session handling mode. `auto` reuses the extension-managed pi-scoped session when possible. `fresh` switches that managed session to a fresh upstream launch so launch-scoped flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device apply and later auto calls follow the new browser.",
184
+ "Session handling mode. `auto` reuses the extension-managed pi-scoped session when possible. `fresh` switches that managed session to a fresh upstream launch so launch-scoped flags like --profile, --executable-path, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device apply and later auto calls follow the new browser.",
185
185
  default: DEFAULT_SESSION_MODE,
186
186
  }),
187
187
  ),
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Purpose: Canonical launch-scoped agent-browser flag metadata shared by runtime planning and agent-facing guidance.
3
+ * Responsibilities: Define which upstream flags require a fresh launch, explain why, and expose stable guidance labels.
4
+ * Scope: Metadata only; argv parsing and execution planning live in runtime.ts.
5
+ */
6
+
7
+ export interface LaunchScopedFlagDefinition {
8
+ flag: string;
9
+ reason: string;
10
+ }
11
+
12
+ export const LAUNCH_SCOPED_FLAG_DEFINITIONS = [
13
+ {
14
+ flag: "--auto-connect",
15
+ reason: "attaches to an already-running browser at launch time instead of reusing an existing named session",
16
+ },
17
+ {
18
+ flag: "--cdp",
19
+ reason: "selects the browser/CDP endpoint used when an upstream session is launched",
20
+ },
21
+ {
22
+ flag: "--enable",
23
+ reason: "selects built-in page init scripts before the upstream browser session is launched",
24
+ },
25
+ {
26
+ flag: "--executable-path",
27
+ reason: "selects the browser executable used for the upstream launch",
28
+ },
29
+ {
30
+ flag: "--init-script",
31
+ reason: "registers page init scripts before the upstream browser session is launched",
32
+ },
33
+ {
34
+ flag: "--device",
35
+ reason: "selects the provider device for the upstream launch",
36
+ },
37
+ {
38
+ flag: "--profile",
39
+ reason: "selects Chrome profile state for the upstream launch",
40
+ },
41
+ {
42
+ flag: "--provider",
43
+ reason: "selects the upstream browser provider for the launch",
44
+ },
45
+ {
46
+ flag: "-p",
47
+ reason: "selects the upstream browser provider for the launch",
48
+ },
49
+ {
50
+ flag: "--session-name",
51
+ reason: "selects upstream saved auth/session state for the launch",
52
+ },
53
+ {
54
+ flag: "--state",
55
+ reason: "loads persisted upstream browser/auth state at launch time",
56
+ },
57
+ ] as const satisfies readonly LaunchScopedFlagDefinition[];
58
+
59
+ export const LAUNCH_SCOPED_FLAGS = LAUNCH_SCOPED_FLAG_DEFINITIONS.map((definition) => definition.flag);
60
+ export const LAUNCH_SCOPED_FLAG_LABEL = LAUNCH_SCOPED_FLAGS.join(", ");
61
+
62
+ /**
63
+ * The subset of launch-scoped flags that can restore browser/auth state with pre-existing tabs
64
+ * and are plausible wrong-active-tab sources after a fresh launch. These trigger post-open
65
+ * tab-correction (the `tab list` + re-select cycle).
66
+ */
67
+ export const LAUNCH_SCOPED_TAB_CORRECTION_FLAGS = new Set(["--profile", "--session-name", "--state"] as const);
@@ -1,3 +1,5 @@
1
+ import { LAUNCH_SCOPED_FLAG_LABEL } from "./launch-scoped-flags.js";
2
+
1
3
  /**
2
4
  * Purpose: Provide the canonical agent_browser operating playbook shared by runtime prompt metadata and generated documentation fragments.
3
5
  * Responsibilities: Define stable guidance bullets, native tool-call examples, and wrapper-behavior notes without importing runtime/browser process code.
@@ -18,7 +20,7 @@ export function buildInstalledDocsGuideline(paths: { readmePath: string; command
18
20
  }
19
21
 
20
22
  export const QUICK_START_GUIDELINES = [
21
- "Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch, including qa.attached for current sessions), electron (desktop Electron list/launch/status/cleanup/probe), or the experimental sourceLookup / networkSourceLookup helpers (candidates only; each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and is rejected with electron; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state. Do not pass --json in args; the wrapper injects it.",
23
+ `Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch, including qa.attached for current sessions), electron (desktop Electron list/launch/status/cleanup/probe), or the experimental sourceLookup / networkSourceLookup helpers (candidates only; each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and is rejected with electron; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}) to apply. Do not pass --json in args; the wrapper injects it.`,
22
24
  "There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
23
25
  "Common first calls (first-call recipe): { args: [\"open\", \"<url>\"] } → { args: [\"snapshot\", \"-i\"] } → { args: [\"click\", \"@eN\"] } or { args: [\"fill\", \"@eN\", \"<text>\"] } using @refs and visible labels from that snapshot, then { args: [\"snapshot\", \"-i\"] } after navigation or DOM changes. On https://example.com/ the main link label is Learn more (use exact snapshot text, not guessed link copy).",
24
26
  "Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded click try-*-candidate next actions or, for fill misses with current editable refs, details.richInputRecovery with focus/click actions that do not copy fill text; stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
@@ -29,8 +31,9 @@ export const QUICK_START_GUIDELINES = [
29
31
  "When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
30
32
  ] as const;
31
33
 
32
- export const BRAVE_SEARCH_PROMPT_GUIDELINE =
33
- "With BRAVE_API_KEY set, use Brave Search via bash/curl to find exact destination URLs, then open the chosen URL with agent_browser; do not browse search results just to locate a target.";
34
+ export const WEB_SEARCH_PROMPT_GUIDELINE =
35
+ "Use agent_browser_web_search for quick live search/URL discovery; it chooses Exa or Brave, preferring Exa unless configured otherwise. Use agent_browser for interaction/DOM/screenshots/auth. Do not run parallel searches: one good query, inspect results, then one follow-up max; on HTTP 429 stop and report provider limits.";
36
+
34
37
 
35
38
  export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
36
39
  "Standard workflow: open the page, snapshot -i, interact using current @refs from that snapshot, and re-snapshot after navigation, scrolling, rerendering, or other major DOM changes because refs are page-scoped; the wrapper fails mutation-prone stale/recycled refs before upstream can silently target a different current-page element.",
@@ -39,10 +42,10 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
39
42
  "When a visible text or accessible-name target should survive ref churn, prefer find locators such as role, text, label, placeholder, alt, title, or testid with the intended action instead of guessing a CSS selector.",
40
43
  "For desktop or host-controlled rich inputs, if semanticAction fill misses, refresh refs and prefer a current editable @ref from details.richInputRecovery or the latest snapshot; focus or click that ref, then use keyboard inserttext or keyboard type with the intended text. Do not auto-submit with Enter or a submit button unless the user flow explicitly calls for it.",
41
44
  "Do not assume Playwright selector dialects such as text=Close or button:has-text('Close') are supported wrapper syntax unless current upstream agent-browser behavior has been verified.",
42
- "For authenticated or user-specific content explicitly requested by the user, such as feeds, inboxes, account pages, or private dashboards, prefer --profile Default on the first browser call and let the implicit session carry continuity. Do not use a real profile for public pages just because they are dashboards. Treat visible page content from real profiles as model-visible transcript data; use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser.",
45
+ "For authenticated or user-specific content explicitly requested by the user, such as feeds, inboxes, account pages, or private dashboards, use a real profile only when the user/config asks for it or profiles have been inspected; do not assume --profile Default exists on every machine. Do not use a real profile for public pages just because they are dashboards. Treat visible page content from real profiles as model-visible transcript data; use --auto-connect only if profile-based reuse is unavailable or the task is specifically about attaching to a running debug-enabled browser. If profile/user-data-dir resolution fails, stop retrying opens, run profiles and/or doctor through agent_browser, then report what the user needs to configure.",
43
46
  "Do not invent fixed explicit session names for routine tasks. Use the implicit session unless you truly need multiple isolated browser sessions in the same conversation.",
44
- "When using --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device, put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.",
45
- "If you already used the implicit session and now need launch-scoped flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device, retry with sessionMode set to fresh or pass an explicit --session for the new launch. After a successful unnamed fresh launch, later auto calls follow that new session.",
47
+ `When using launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}), put them on the first command for that session. If you intentionally use an explicit --session, keep using that same explicit session for follow-ups.`,
48
+ `If you already used the implicit session and now need launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}), retry with top-level sessionMode set to fresh or pass an explicit --session for the new launch; never pass --session-mode inside args. After a successful unnamed fresh launch, later auto calls follow that new session.`,
46
49
  "For React introspection, launch the page with --enable react-devtools before first navigation, then use react tree, react inspect <fiberId>, sourceLookup candidates for local UI source hints, react renders start/stop, or react suspense; sourceLookup is experimental and reports confidence/evidence instead of guaranteed DOM-to-file mappings. For failed fetches and APIs, networkSourceLookup (experimental) correlates failed network requests with initiator metadata and bounded workspace URL literals—candidates only, not definitive blame. Use vitals [url] for Core Web Vitals and hydration timing, and pushstate <url> for client-side SPA navigation.",
47
50
  "For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
48
51
  "For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, auth list/show/delete/remove for local auth-profile maintenance, auth login when you need the browser to fill a saved profile, state save/load for portable test state, state list/show/rename/clear/clear -a/clean for saved-state lifecycle cleanup, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
@@ -67,12 +70,12 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
67
70
  ] as const;
68
71
 
69
72
  export const TOOL_PROMPT_GUIDELINES_SUFFIX = [
70
- "Prefer agent_browser over bash for opening sites, reading docs on the web, clicking, filling, screenshots, eval, and batch workflows.",
73
+ "Prefer agent_browser over bash for opening sites, docs, clicking, filling, screenshots, eval, and batch workflows.",
71
74
  "Do not fall back to osascript, AppleScript, or generic browser-driving bash commands when agent_browser can do the job.",
72
75
  "Pass exact agent-browser CLI arguments in args when you are not using semanticAction, job, or qa, excluding the binary name and --json (the wrapper injects --json automatically).",
73
76
  "Use stdin only for eval --stdin, batch, auth save --password-stdin, or wrapper-generated job/qa batches instead of shell heredocs or password args; other command/stdin combinations are rejected before launch.",
74
- "Let the extension-managed session handle the common path unless you explicitly need a fresh launch for upstream flags like --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device.",
75
- "Use sessionMode=fresh when switching from an existing implicit session to a new profile/debug/init-script/provider launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
77
+ `Let the extension-managed session handle the common path unless you explicitly need a fresh launch for launch-scoped flags (${LAUNCH_SCOPED_FLAG_LABEL}).`,
78
+ "Use sessionMode=fresh when switching from an existing implicit session to a new profile/browser executable/debug/init-script/provider launch without inventing a fixed explicit session name; later auto calls will follow that new session.",
76
79
  ] as const;
77
80
 
78
81
  export const INSPECTION_TOOL_CALL_EXAMPLES = [
@@ -87,30 +90,52 @@ export const WRAPPER_TAB_RECOVERY_BEHAVIOR = [
87
90
  "If a known session target unexpectedly reports about:blank, agent_browser best-effort re-selects the prior intended target when it still exists; if recovery fails, it records the observed about:blank target and reports exact recovery guidance instead of treating the prior page as active.",
88
91
  ] as const;
89
92
 
90
- export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSearch: boolean }): string[] {
93
+ export function buildSharedBrowserPlaybookGuidelines(options: { includeWebSearch: boolean }): string[] {
91
94
  return [
92
95
  SHARED_BROWSER_PLAYBOOK_GUIDELINES[0],
93
- ...(options.includeBraveSearch ? [BRAVE_SEARCH_PROMPT_GUIDELINE] : []),
96
+ ...(options.includeWebSearch ? [WEB_SEARCH_PROMPT_GUIDELINE] : []),
94
97
  ...SHARED_BROWSER_PLAYBOOK_GUIDELINES.slice(1),
95
98
  ];
96
99
  }
97
100
 
98
101
  /** Tier A: always-on tool promptGuidelines (keep small; Tier B lives in SHARED_BROWSER_PLAYBOOK_GUIDELINES and docs). */
99
102
  export const RUNTIME_PROMPT_GUIDELINES = [
100
- "Use exactly one input mode: args (open→snapshot -i→@refs), semanticAction, job, qa, sourceLookup/networkSourceLookup (candidate hints), or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; wrapper injects it.",
101
- "Common flow: open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot fills unless they may submit/navigate/rerender. Respect explicit stop boundaries: if the user says stop before order/post/purchase/submit, do not click the final action.",
102
- "Use sessionMode=fresh for launch-scoped flags on an active implicit session. For signed-in/account-specific content, start with --profile Default plus sessionMode=fresh unless asked otherwise; visible content is model-visible.",
103
- "For artifacts, save the exact user path and check details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg on PATH; close does not delete saved files; \"waited\":\"timeout\" is not proof.",
103
+ "Use exactly one input mode: args, semanticAction, job, qa, sourceLookup/networkSourceLookup, or electron. stdin only for batch/eval/auth or wrapper batch; electron rejects stdin. Do not pass --json in args; wrapper injects it.",
104
+ "Common flow: open, snapshot -i, use current @refs or semanticAction, then re-snapshot after navigation/scroll/rerender/DOM change. Batch same-snapshot fills unless they may submit/navigate/rerender. Respect explicit stop boundaries: stop before order/post/purchase/submit.",
105
+ "Use top-level sessionMode=fresh for launch-scoped flags; never put --session-mode in args. For signed-in/account-specific content, use requested/configured profiles, never assume --profile Default; on profile failures, run profiles/doctor and tell the user what to configure. Use --executable-path for configured Chromium. Profile content is model-visible.",
106
+ "For artifacts, save the exact user path and verify details.artifactVerification/details.artifacts before claiming success. If close is blocked by details.promptGuard, save the required artifact first. record stop needs ffmpeg; close does not delete saved files; waited:timeout is not proof.",
104
107
  "When details.nextActions is present, prefer exact payloads over prose/guessed selectors. For dense snapshots, check Omitted high-value controls/details.data.highValueControlRefIds. For dashboards, verify scroll with screenshot/snapshot; if nothing moved, target the real scroll region.",
105
108
  "For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with plain expression, not console.log. Batch three or more known refs/selectors (e.g. [[\"get\",\"text\",\"@e1\"],[\"get\",\"text\",\"@e2\"]]); selector visibility warnings → visible @refs/nextActions.",
106
109
  ] as const;
107
110
 
108
- export function buildToolPromptGuidelines(options: { includeBraveSearch: boolean; docs?: { readmePath: string; commandReferencePath: string; toolContractPath: string } }): string[] {
111
+ export function buildBrowserExecutablePathGuideline(executablePath: string | undefined): string | undefined {
112
+ if (!executablePath) return undefined;
113
+ return `Agent-browser config sets browser.executablePath to ${JSON.stringify(executablePath)}; for fresh browser launches that should use that Chromium-compatible executable, add --executable-path ${JSON.stringify(executablePath)} with sessionMode:fresh. The upstream profiles command still lists Chrome profiles only; for non-Chrome Chromium login state, ask the user for an explicit profile/user-data directory path or inspect local setup with profiles/doctor before recommending a profile value.`;
114
+ }
115
+
116
+ export function buildBrowserDefaultProfileGuideline(profile: { name: string; policy: "explicit-only" | "authenticated-only" | "always" } | undefined): string | undefined {
117
+ if (!profile || profile.policy === "explicit-only") return undefined;
118
+ if (profile.policy === "always") {
119
+ return `Agent-browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)} with policy always; use --profile ${JSON.stringify(profile.name)} with sessionMode:fresh when a fresh browser launch should use the configured profile, and treat profile content as model-visible user data.`;
120
+ }
121
+ return `Agent-browser config sets browser.defaultProfile.name to ${JSON.stringify(profile.name)}; for signed-in/account-specific browser tasks, start with --profile ${JSON.stringify(profile.name)} plus sessionMode:fresh unless the user asks for a different profile.`;
122
+ }
123
+
124
+ export function buildToolPromptGuidelines(options: {
125
+ browserDefaultProfile?: { name: string; policy: "explicit-only" | "authenticated-only" | "always" };
126
+ browserExecutablePath?: string;
127
+ docs?: { readmePath: string; commandReferencePath: string; toolContractPath: string };
128
+ includeWebSearch: boolean;
129
+ }): string[] {
130
+ const browserDefaultProfileGuideline = buildBrowserDefaultProfileGuideline(options.browserDefaultProfile);
131
+ const browserExecutablePathGuideline = buildBrowserExecutablePathGuideline(options.browserExecutablePath);
109
132
  return [
110
133
  ...TOOL_PROMPT_GUIDELINES_PREFIX,
111
134
  ...(options.docs ? [buildInstalledDocsGuideline(options.docs)] : []),
112
135
  ...RUNTIME_PROMPT_GUIDELINES,
113
- ...(options.includeBraveSearch ? [BRAVE_SEARCH_PROMPT_GUIDELINE] : []),
136
+ ...(browserExecutablePathGuideline ? [browserExecutablePathGuideline] : []),
137
+ ...(browserDefaultProfileGuideline ? [browserDefaultProfileGuideline] : []),
138
+ ...(options.includeWebSearch ? [WEB_SEARCH_PROMPT_GUIDELINE] : []),
114
139
  TOOL_PROMPT_GUIDELINES_SUFFIX[0],
115
140
  TOOL_PROMPT_GUIDELINES_SUFFIX[1],
116
141
  ];
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Purpose: Build browser profile/config recovery hints for launch/setup failures.
3
+ * Responsibilities: Recognize local Chrome/profile setup errors, avoid self-loop diagnostics, and emit canonical next actions.
4
+ * Scope: Error presentation only; upstream profile discovery and doctor execution remain normal agent_browser commands.
5
+ */
6
+
7
+ import { isOpenNavigationCommand } from "../../command-taxonomy.js";
8
+ import { getStartupScopedFlags, type CommandInfo } from "../../runtime.js";
9
+ import type { AgentBrowserNextAction } from "../contracts.js";
10
+ import { buildNextToolAction } from "../next-actions.js";
11
+
12
+ const BROWSER_PROFILE_CONFIG_HINT = [
13
+ "Agent-browser profile/config hint: this looks like a local browser profile or Chrome user-data-dir setup problem, not a page-specific failure.",
14
+ "Do not keep retrying the same open/profile call. Run `profiles` and/or `doctor` through agent_browser, then tell the user whether Chrome/Chromium is installed, which Chrome profile directory names are available, or whether they need to configure a full profile/user-data directory path, a non-default Chromium-compatible `--executable-path`, or remove the profile requirement for public-page browsing.",
15
+ "Use the top-level `sessionMode: \"fresh\"` field for launch-scoped profile/debug/provider flags; do not pass `--session-mode` inside args.",
16
+ ].join(" ");
17
+
18
+ function looksLikeBrowserProfileConfigError(errorText: string): boolean {
19
+ return /\b(?:No Chrome user data directory found|Cannot resolve profile name|Chrome user data directory|Chrome profile\s+.+?\s+not found|Available profiles|If you meant a directory path)\b/i.test(errorText);
20
+ }
21
+
22
+ function isLaunchOrSetupContext(args: string[] | undefined, commandInfo: CommandInfo): boolean {
23
+ const command = commandInfo.command;
24
+ if (command === "profiles" || command === "doctor") return true;
25
+ if (command && isOpenNavigationCommand(command)) return true;
26
+ return (args ? getStartupScopedFlags(args) : []).length > 0;
27
+ }
28
+
29
+ function buildBrowserProfileConfigActions(commandInfo: CommandInfo): AgentBrowserNextAction[] {
30
+ const actions: AgentBrowserNextAction[] = [];
31
+ if (commandInfo.command !== "profiles") {
32
+ actions.push(buildNextToolAction({
33
+ args: ["profiles"],
34
+ id: "inspect-browser-profiles",
35
+ reason: "List browser profiles/user-data-dir candidates before retrying profile-based launch.",
36
+ safety: "Read-only local setup inspection; does not open a page or mutate browser state.",
37
+ }));
38
+ }
39
+ if (commandInfo.command !== "doctor") {
40
+ actions.push(buildNextToolAction({
41
+ args: ["doctor"],
42
+ id: "run-agent-browser-doctor",
43
+ reason: "Inspect local agent-browser browser installation/configuration before retrying.",
44
+ safety: "Read-only local diagnostics; report findings to the user before changing setup.",
45
+ }));
46
+ }
47
+ return actions;
48
+ }
49
+
50
+ export interface BrowserProfileConfigRecovery {
51
+ actions?: AgentBrowserNextAction[];
52
+ hint: string;
53
+ }
54
+
55
+ export function buildBrowserProfileConfigRecovery(options: {
56
+ args?: string[];
57
+ commandInfo: CommandInfo;
58
+ errorText: string;
59
+ }): BrowserProfileConfigRecovery | undefined {
60
+ if (!looksLikeBrowserProfileConfigError(options.errorText)) return undefined;
61
+ if (!isLaunchOrSetupContext(options.args, options.commandInfo)) return undefined;
62
+ const actions = buildBrowserProfileConfigActions(options.commandInfo);
63
+ return {
64
+ actions: actions.length > 0 ? actions : undefined,
65
+ hint: BROWSER_PROFILE_CONFIG_HINT,
66
+ };
67
+ }
@@ -1,5 +1,6 @@
1
1
  import { isOpenNavigationCommand } from "../../command-taxonomy.js";
2
2
  import type { CommandInfo } from "../../runtime.js";
3
+ import { buildBrowserProfileConfigRecovery } from "./browser-profile-recovery.js";
3
4
  import { redactModelFacingText } from "./common.js";
4
5
  import { buildAgentBrowserNextActions } from "../action-recommendations.js";
5
6
  import { buildAgentBrowserResultCategoryDetails } from "../categories.js";
@@ -119,10 +120,12 @@ export function buildErrorPresentation(options: {
119
120
  const selectorHintedErrorText = appendSelectorRecoveryHint(safeErrorText);
120
121
  const unknownCommandSuggestions = getUnknownCommandSuggestions(commandInfo.command, safeErrorText);
121
122
  const unknownCommandSuggestionText = formatUnknownCommandSuggestionText(unknownCommandSuggestions);
123
+ const browserProfileConfigRecovery = buildBrowserProfileConfigRecovery({ args, commandInfo, errorText: safeErrorText });
122
124
  const localhostNavigationHint = getLocalhostNavigationHint(commandInfo, safeErrorText);
123
125
  const hintedErrorParts = [
124
126
  selectorHintedErrorText,
125
127
  unknownCommandSuggestionText && !selectorHintedErrorText.includes("Agent-browser hint:") ? unknownCommandSuggestionText : undefined,
128
+ browserProfileConfigRecovery?.hint,
126
129
  localhostNavigationHint,
127
130
  ].filter((part): part is string => Boolean(part));
128
131
  const hintedErrorText = hintedErrorParts.join("\n\n");
@@ -134,6 +137,7 @@ export function buildErrorPresentation(options: {
134
137
  });
135
138
  const nextActions = [
136
139
  ...(buildUnknownCommandSuggestionActions(unknownCommandSuggestions, sessionName) ?? []),
140
+ ...(browserProfileConfigRecovery?.actions ?? []),
137
141
  ...(buildAgentBrowserNextActions({
138
142
  args,
139
143
  command: commandInfo.command,
@@ -24,82 +24,14 @@ import {
24
24
  } from "./argv-grammar.js";
25
25
  import { needsManagedSession } from "./command-policy.js";
26
26
  import { isCloseCommand, isOpenNavigationCommand } from "./command-taxonomy.js";
27
+ import { LAUNCH_SCOPED_FLAG_DEFINITIONS, LAUNCH_SCOPED_FLAG_LABEL, LAUNCH_SCOPED_TAB_CORRECTION_FLAGS } from "./launch-scoped-flags.js";
27
28
 
28
29
  export type { CommandInfo } from "./argv-descriptor.js";
29
30
  export { extractCommandTokens, findCommandStartIndex, parseArgvDescriptor, parseCommandInfo } from "./argv-descriptor.js";
30
31
 
31
32
  import { isRecord } from "./parsing.js";
32
33
 
33
- /**
34
- * Launch-scoped flags that select the upstream browser session/auth mechanism at launch time.
35
- *
36
- * These flags must not be silently appended after an already-active extension-managed session
37
- * because upstream ignores or conflicts with them once a session is reused. Every flag here
38
- * participates in implicit-session validation blocking and recovery-hint generation.
39
- *
40
- * Intentionally excluded from the tab-correction subset:
41
- * - `--auto-connect` attaches to a running browser but is a general-purpose debug/attach mode,
42
- * not a state-restore mechanism that typically leaves restored tabs stealing focus.
43
- * - `--cdp` connects to an arbitrary endpoint; similar reasoning to `--auto-connect`.
44
- *
45
- * Other flags like `--headed`, `--engine`, `--executable-path`, `--user-agent`, and
46
- * `--download-path` are first-launch-sensitive but not alternate session/auth attach
47
- * mechanisms and do not inject pre-page JavaScript, so they are intentionally excluded
48
- * from the full launch-scoped set.
49
- */
50
- const LAUNCH_SCOPED_FLAG_DEFINITIONS = [
51
- {
52
- flag: "--auto-connect",
53
- reason: "attaches to an already-running browser at launch time instead of reusing an existing named session",
54
- },
55
- {
56
- flag: "--cdp",
57
- reason: "selects the browser/CDP endpoint used when an upstream session is launched",
58
- },
59
- {
60
- flag: "--enable",
61
- reason: "selects built-in page init scripts before the upstream browser session is launched",
62
- },
63
- {
64
- flag: "--init-script",
65
- reason: "registers page init scripts before the upstream browser session is launched",
66
- },
67
- {
68
- flag: "--device",
69
- reason: "selects the provider device for the upstream launch",
70
- },
71
- {
72
- flag: "--profile",
73
- reason: "selects Chrome profile state for the upstream launch",
74
- },
75
- {
76
- flag: "--provider",
77
- reason: "selects the upstream browser provider for the launch",
78
- },
79
- {
80
- flag: "-p",
81
- reason: "selects the upstream browser provider for the launch",
82
- },
83
- {
84
- flag: "--session-name",
85
- reason: "selects upstream saved auth/session state for the launch",
86
- },
87
- {
88
- flag: "--state",
89
- reason: "loads persisted upstream browser/auth state at launch time",
90
- },
91
- ] as const;
92
-
93
- const LAUNCH_SCOPED_FLAG_LABEL = LAUNCH_SCOPED_FLAG_DEFINITIONS.map((definition) => definition.flag).join(", ");
94
-
95
- /**
96
- * The subset of launch-scoped flags that can restore browser/auth state with pre-existing tabs
97
- * and are plausible wrong-active-tab sources after a fresh launch. These trigger post-open
98
- * tab-correction (the `tab list` + re-select cycle).
99
- */
100
- const LAUNCH_SCOPED_TAB_CORRECTION_FLAGS = new Set(["--profile", "--session-name", "--state"] as const);
101
34
  const OPENAI_HEADLESS_COMPAT_HOSTS = new Set(["chat.com", "chat.openai.com", "chatgpt.com"]);
102
- const BRAVE_API_KEY_ENV = "BRAVE_API_KEY";
103
35
  const AGENT_BROWSER_IDLE_TIMEOUT_ENV = "AGENT_BROWSER_IDLE_TIMEOUT_MS";
104
36
  const IMPLICIT_SESSION_IDLE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_IDLE_TIMEOUT_MS";
105
37
  const IMPLICIT_SESSION_CLOSE_TIMEOUT_ENV = "PI_AGENT_BROWSER_IMPLICIT_SESSION_CLOSE_TIMEOUT_MS";
@@ -430,10 +362,6 @@ export function isPlainTextInspectionArgs(args: string[]): boolean {
430
362
  return args.some((token) => INSPECTION_FLAGS.has(token));
431
363
  }
432
364
 
433
- export function hasUsableBraveApiKey(apiKey: string | null | undefined = process.env[BRAVE_API_KEY_ENV]): boolean {
434
- return typeof apiKey === "string" && apiKey.trim().length > 0;
435
- }
436
-
437
365
  function parseTimeoutMs(rawValue: string | undefined, minimumValue: number): number | undefined {
438
366
  if (typeof rawValue !== "string") return undefined;
439
367
  const normalizedValue = rawValue.trim();
@@ -695,6 +623,11 @@ export function validateToolArgs(args: string[]): string | undefined {
695
623
  return `Do not pass shell operators like \`${shellOperator}\`. Pass exact agent-browser CLI arguments only.`;
696
624
  }
697
625
 
626
+ const sessionModeArg = args.find((token) => token === "--session-mode" || token.startsWith("--session-mode="));
627
+ if (sessionModeArg) {
628
+ return "Do not pass `--session-mode` in args. Use the top-level agent_browser `sessionMode` field instead, for example { args: [\"--profile\", \"Default\", \"open\", \"https://example.com\"], sessionMode: \"fresh\" }.";
629
+ }
630
+
698
631
  return undefined;
699
632
  }
700
633