pi-agent-browser-native 0.2.44 → 0.2.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +20 -15
  3. package/docs/ARCHITECTURE.md +12 -10
  4. package/docs/COMMAND_REFERENCE.md +49 -27
  5. package/docs/ELECTRON.md +1 -1
  6. package/docs/RELEASE.md +6 -5
  7. package/docs/REQUIREMENTS.md +6 -3
  8. package/docs/SUPPORT_MATRIX.md +17 -13
  9. package/docs/TOOL_CONTRACT.md +87 -46
  10. package/docs/platform-smoke.md +4 -3
  11. package/extensions/agent-browser/index.ts +43 -450
  12. package/extensions/agent-browser/lib/bash-guard.ts +205 -0
  13. package/extensions/agent-browser/lib/electron/cdp.ts +69 -0
  14. package/extensions/agent-browser/lib/electron/cleanup.ts +5 -58
  15. package/extensions/agent-browser/lib/electron/discovery.ts +2 -9
  16. package/extensions/agent-browser/lib/electron/launch.ts +11 -65
  17. package/extensions/agent-browser/lib/electron/text.ts +13 -0
  18. package/extensions/agent-browser/lib/fs-utils.ts +18 -0
  19. package/extensions/agent-browser/lib/input-modes/job.ts +207 -21
  20. package/extensions/agent-browser/lib/input-modes/params.ts +28 -11
  21. package/extensions/agent-browser/lib/input-modes/semantic-action.ts +22 -2
  22. package/extensions/agent-browser/lib/input-modes/types.ts +5 -1
  23. package/extensions/agent-browser/lib/input-modes.ts +1 -0
  24. package/extensions/agent-browser/lib/json-schema.ts +73 -0
  25. package/extensions/agent-browser/lib/orchestration/browser-run/click-dispatch.ts +82 -11
  26. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +159 -30
  27. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +53 -2
  28. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +1 -0
  29. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +751 -32
  30. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +38 -7
  31. package/extensions/agent-browser/lib/orchestration/browser-run/prompt-guards.ts +0 -46
  32. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +10 -1
  33. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +28 -1
  34. package/extensions/agent-browser/lib/orchestration/electron-host/index.ts +1 -6
  35. package/extensions/agent-browser/lib/orchestration/input-plan.ts +15 -3
  36. package/extensions/agent-browser/lib/orchestration/output-file.ts +86 -0
  37. package/extensions/agent-browser/lib/pi-tool-rendering.ts +252 -0
  38. package/extensions/agent-browser/lib/playbook.ts +26 -26
  39. package/extensions/agent-browser/lib/process.ts +1 -1
  40. package/extensions/agent-browser/lib/prompt-policy.ts +1 -18
  41. package/extensions/agent-browser/lib/results/artifact-manifest.ts +1 -4
  42. package/extensions/agent-browser/lib/results/artifact-state.ts +7 -3
  43. package/extensions/agent-browser/lib/results/contracts.ts +6 -2
  44. package/extensions/agent-browser/lib/results/envelope.ts +11 -2
  45. package/extensions/agent-browser/lib/results/network-routes.ts +7 -4
  46. package/extensions/agent-browser/lib/results/network.ts +7 -1
  47. package/extensions/agent-browser/lib/results/presentation/artifacts.ts +88 -20
  48. package/extensions/agent-browser/lib/results/presentation/batch.ts +84 -12
  49. package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +81 -26
  50. package/extensions/agent-browser/lib/results/presentation/errors.ts +13 -0
  51. package/extensions/agent-browser/lib/results/presentation/registry.ts +60 -0
  52. package/extensions/agent-browser/lib/results/presentation.ts +10 -1
  53. package/extensions/agent-browser/lib/results/snapshot-high-value-controls.ts +16 -5
  54. package/extensions/agent-browser/lib/results/snapshot.ts +2 -0
  55. package/extensions/agent-browser/lib/runtime.ts +10 -1
  56. package/extensions/agent-browser/lib/session-page-state.ts +15 -6
  57. package/extensions/agent-browser/lib/string-enum-schema.ts +20 -0
  58. package/extensions/agent-browser/lib/web-search.ts +31 -13
  59. package/package.json +2 -2
  60. package/platform-smoke.config.mjs +5 -2
  61. package/scripts/platform-smoke/build-ubuntu-image.mjs +25 -0
  62. package/scripts/platform-smoke/crabbox-runner.mjs +5 -1
  63. package/scripts/platform-smoke/doctor.mjs +6 -2
  64. package/scripts/platform-smoke/linux-image/Dockerfile +3 -5
  65. package/scripts/platform-smoke/targets.mjs +2 -1
  66. package/extensions/agent-browser/lib/orchestration/browser-run/browser-action-model.ts +0 -154
@@ -33,6 +33,58 @@ function formatConfirmationRequiredSummary(confirmation: ConfirmationRequiredPre
33
33
  return `Confirmation required: ${confirmation.id}`;
34
34
  }
35
35
 
36
+ const VITALS_METRICS = ["lcp", "fcp", "ttfb", "inp", "cls"] as const;
37
+
38
+ function coerceVitalsMetricValue(value: unknown): number | undefined {
39
+ if (typeof value === "number" && Number.isFinite(value)) return value;
40
+ if (isRecord(value)) {
41
+ for (const nestedKey of ["value", "duration", "startTime", "score"] as const) {
42
+ const nestedValue = value[nestedKey];
43
+ if (typeof nestedValue === "number" && Number.isFinite(nestedValue)) return nestedValue;
44
+ }
45
+ }
46
+ return undefined;
47
+ }
48
+
49
+ function getVitalsMetric(data: Record<string, unknown>, key: string): number | undefined {
50
+ const metrics = isRecord(data.metrics) ? data.metrics : undefined;
51
+ return coerceVitalsMetricValue(data[key] ?? data[key.toUpperCase()] ?? metrics?.[key] ?? metrics?.[key.toUpperCase()]);
52
+ }
53
+
54
+ function formatVitalsMetric(key: string, value: number): string {
55
+ return key === "cls" ? `${key.toUpperCase()}: ${value}` : `${key.toUpperCase()}: ${Math.round(value)}ms`;
56
+ }
57
+
58
+ function getVitalsMetrics(data: Record<string, unknown>): string[] {
59
+ return VITALS_METRICS.flatMap((key) => {
60
+ const value = getVitalsMetric(data, key);
61
+ return value === undefined ? [] : [formatVitalsMetric(key, value)];
62
+ });
63
+ }
64
+
65
+ function getVitalsUnavailableReason(data: Record<string, unknown>): string {
66
+ for (const key of ["reason", "message", "error", "status"] as const) {
67
+ const value = data[key];
68
+ if (typeof value === "string" && value.trim().length > 0) return redactModelFacingText(value.trim());
69
+ }
70
+ return "No Core Web Vitals metric fields were present in the upstream result.";
71
+ }
72
+
73
+ function formatVitalsText(data: Record<string, unknown>): string {
74
+ const url = typeof data.url === "string" && data.url.trim().length > 0 ? redactModelFacingText(data.url.trim()) : undefined;
75
+ const metrics = getVitalsMetrics(data);
76
+ const lines = [url ? `Vitals for ${url}` : "Vitals result"];
77
+ if (metrics.length > 0) lines.push(...metrics.map((metric) => `- ${metric}`));
78
+ else lines.push(`Metrics unavailable: ${getVitalsUnavailableReason(data)}`);
79
+ return lines.join("\n");
80
+ }
81
+
82
+ function formatVitalsSummary(data: Record<string, unknown>): string | undefined {
83
+ const metrics = getVitalsMetrics(data);
84
+ if (metrics.length > 0) return `Vitals: ${metrics.join(", ")}`;
85
+ return "Vitals: metrics unavailable";
86
+ }
87
+
36
88
  function formatConfirmationRequiredText(confirmation: ConfirmationRequiredPresentation): string {
37
89
  const lines = [
38
90
  "Confirmation required.",
@@ -87,6 +139,14 @@ const COMMAND_PRESENTERS: Record<string, CommandPresenter> = {
87
139
  summary: (_commandInfo, data) => isRecord(data) && Array.isArray(data.tabs) ? `Tabs: ${data.tabs.length}` : undefined,
88
140
  text: (_commandInfo, data) => isRecord(data) ? getTabSummary(data) : undefined,
89
141
  },
142
+ vitals: {
143
+ summary: (_commandInfo, data) => isRecord(data) ? formatVitalsSummary(data) : undefined,
144
+ text: (_commandInfo, data) => isRecord(data) ? formatVitalsText(data) : undefined,
145
+ },
146
+ "web-vitals": {
147
+ summary: (_commandInfo, data) => isRecord(data) ? formatVitalsSummary(data) : undefined,
148
+ text: (_commandInfo, data) => isRecord(data) ? formatVitalsText(data) : undefined,
149
+ },
90
150
  };
91
151
 
92
152
  function formatBatchSummary(data: unknown): string | undefined {
@@ -66,6 +66,10 @@ function mergeNextActions(...groups: Array<AgentBrowserNextAction[] | undefined>
66
66
  return merged.length > 0 ? merged : undefined;
67
67
  }
68
68
 
69
+ function shouldAddAnnotatedScreenshotGuidance(commandInfo: CommandInfo, args: string[] | undefined): boolean {
70
+ return commandInfo.command === "screenshot" && (args?.includes("--annotate") ?? false);
71
+ }
72
+
69
73
  export async function buildToolPresentation(options: {
70
74
  artifactManifest?: SessionArtifactManifest;
71
75
  args?: string[];
@@ -103,7 +107,7 @@ export async function buildToolPresentation(options: {
103
107
 
104
108
  const data = enrichStreamStatusData(commandInfo, envelope?.data);
105
109
  const presentationData = redactPresentationData(commandInfo, data);
106
- const artifacts = await extractFileArtifacts({ artifactRequest, commandInfo: presentationCommandInfo, cwd, data, sessionName });
110
+ const artifacts = await extractFileArtifacts({ artifactManifest, artifactRequest, commandInfo: presentationCommandInfo, cwd, data, sessionName });
107
111
  const artifactVerification = buildArtifactVerificationSummary(artifacts);
108
112
  const artifactSummary = formatArtifactSummary(artifacts);
109
113
  const summary = artifactSummary ?? formatPresentationSummary(commandInfo, data, compiledSemanticAction);
@@ -151,6 +155,11 @@ export async function buildToolPresentation(options: {
151
155
  }
152
156
  }
153
157
 
158
+ if (shouldAddAnnotatedScreenshotGuidance(commandInfo, args) && presentation.content[0]?.type === "text") {
159
+ const guidance = "Annotated screenshot note: dense pages can produce overlapping labels. If the labels are noisy, capture a scoped element screenshot, take a non-annotated screenshot, or use snapshot -i high-value refs as the machine-readable map.";
160
+ presentation.content[0] = { ...presentation.content[0], text: `${presentation.content[0].text}\n\n${guidance}` };
161
+ }
162
+
154
163
  const imagePath = artifactRequest?.absolutePath ?? extractImagePath(commandInfo, cwd, data);
155
164
  const presentationWithImage = imagePath ? await attachInlineImage(presentation, imagePath) : presentation;
156
165
  const compactedPresentation = await compactLargePresentationOutput({
@@ -17,6 +17,7 @@ const SNAPSHOT_HIGH_VALUE_CONTROL_ROLES = new Set([
17
17
  "button",
18
18
  "checkbox",
19
19
  "combobox",
20
+ "link",
20
21
  "menuitem",
21
22
  "option",
22
23
  "radio",
@@ -30,11 +31,12 @@ const SNAPSHOT_HIGH_VALUE_CONTROL_ROLE_PRIORITY: Record<string, number> = {
30
31
  textbox: 1,
31
32
  combobox: 2,
32
33
  button: 3,
33
- tab: 4,
34
- checkbox: 5,
35
- radio: 6,
36
- option: 7,
37
- menuitem: 8,
34
+ link: 4,
35
+ tab: 5,
36
+ checkbox: 6,
37
+ radio: 7,
38
+ option: 8,
39
+ menuitem: 9,
38
40
  };
39
41
 
40
42
  const SNAPSHOT_SURFACE_CONTROL_NAME_PATTERNS = [
@@ -46,6 +48,10 @@ const SNAPSHOT_PRIMARY_ACTION_BUTTON_NAME_PATTERNS = [
46
48
  /^(?:add|apply|ask|confirm|connect|continue|create|launch|new|open|refresh|retry|run|save|search|send|start|submit)\b/i,
47
49
  ];
48
50
 
51
+ const SNAPSHOT_HIGH_VALUE_LINK_NAME_PATTERNS = [
52
+ /^[a-z0-9_.-]+\/[a-z0-9_.-]+$/i,
53
+ ];
54
+
49
55
  function getHighValueControlRole(entry: SnapshotRefEntry): string {
50
56
  return entry.isEditable === true && (entry.role === "unknown" || entry.role === "generic") ? "textbox" : entry.role;
51
57
  }
@@ -129,9 +135,14 @@ const SNAPSHOT_HIGH_VALUE_CONTROL_CATEGORY_RULES: readonly HighValueControlCateg
129
135
  },
130
136
  ] as const;
131
137
 
138
+ function isHighValueLinkRef(entry: SnapshotRefEntry): boolean {
139
+ return entry.name.length > 0 && SNAPSHOT_HIGH_VALUE_LINK_NAME_PATTERNS.some((pattern) => pattern.test(entry.name));
140
+ }
141
+
132
142
  export function isHighValueControlEntry(entry: SnapshotRefEntry): boolean {
133
143
  const role = getHighValueControlRole(entry);
134
144
  if (!SNAPSHOT_HIGH_VALUE_CONTROL_ROLES.has(role)) return false;
145
+ if (role === "link") return isHighValueLinkRef(entry);
135
146
  if (entry.isEditable === false && (role === "searchbox" || role === "textbox" || role === "combobox")) return false;
136
147
  return entry.name.length > 0 || isEditableControlRef(entry);
137
148
  }
@@ -221,6 +221,7 @@ export async function buildSnapshotPresentation(
221
221
  ...(roleCountsText ? [`Top roles: ${roleCountsText}`] : []),
222
222
  "",
223
223
  "Compact snapshot view.",
224
+ "Viewport note: compact snapshots are DOM/signal-prioritized, not guaranteed to start with the currently scrolled viewport; use the full raw snapshot, a screenshot, or listed high-value refs when viewport context matters.",
224
225
  ];
225
226
 
226
227
  if (fallbackPreview) {
@@ -294,6 +295,7 @@ export async function buildSnapshotPresentation(
294
295
  fullOutputPath,
295
296
  origin,
296
297
  previewMode: fallbackPreview ? "outline" : "structured",
298
+ viewportOrdering: "dom-signal-prioritized",
297
299
  spillError: spillErrorText,
298
300
  previewRefIds: [...previewRefIds],
299
301
  highValueControlRefIds: visibleHighValueControlEntries.map((entry) => entry.id),
@@ -619,6 +619,15 @@ export function createFreshSessionName(baseSessionName: string, ephemeralSeed: s
619
619
  return `${baseSessionName}-fresh-${suffix}`;
620
620
  }
621
621
 
622
+ function getSingleKeyCommandValidationError(args: string[]): string | undefined {
623
+ const { commandInfo, commandTokens } = parseArgvDescriptor(args);
624
+ const command = commandInfo.command;
625
+ if (command !== "press" && command !== "key" && command !== "keydown" && command !== "keyup") return undefined;
626
+ if (commandTokens.length === 2) return undefined;
627
+ const label = command === "key" ? "key/press" : command;
628
+ return `agent-browser ${label} accepts exactly one key argument. Do not pass a selector or ref to ${label}; focus or click the target first, then run ${command} <key> (for example: focus @e1, then press Enter).`;
629
+ }
630
+
622
631
  export function validateToolArgs(args: string[]): string | undefined {
623
632
  if (args.length === 0) {
624
633
  return "`args` must contain at least one agent-browser command token.";
@@ -634,7 +643,7 @@ export function validateToolArgs(args: string[]): string | undefined {
634
643
  return "Do not pass `--session-mode` in args. Use the top-level agent_browser `sessionMode` field instead, for example { args: [\"--profile\", \"Default\", \"open\", \"https://example.com\"], sessionMode: \"fresh\" }.";
635
644
  }
636
645
 
637
- return undefined;
646
+ return getSingleKeyCommandValidationError(args);
638
647
  }
639
648
 
640
649
  function getInvalidValueFlagDetails(args: string[]): InvalidValueFlagDetails | undefined {
@@ -8,6 +8,9 @@
8
8
 
9
9
  import { isCloseCommand, isReadOnlyDiagnosticSessionTargetCommand } from "./command-taxonomy.js";
10
10
  import { isRecord } from "./parsing.js";
11
+ import { getEditableRefEvidence } from "./results/editable-ref-evidence.js";
12
+ import { enrichSnapshotRefEntries, getSnapshotRefEntries } from "./results/snapshot-refs.js";
13
+ import { parseSnapshotLines } from "./results/snapshot-segments.js";
11
14
 
12
15
  export interface SessionTabTarget {
13
16
  title?: string;
@@ -21,7 +24,7 @@ interface OrderedSessionTabTarget {
21
24
 
22
25
  export interface SessionRefSnapshot {
23
26
  refIds: string[];
24
- refs?: Record<string, { name: string; role: string }>;
27
+ refs?: Record<string, { isContentEditable?: boolean; isEditable?: boolean; name: string; role: string }>;
25
28
  target?: SessionTabTarget;
26
29
  }
27
30
 
@@ -230,11 +233,15 @@ function getRestoredSessionTabTarget(details: Record<string, unknown>, command:
230
233
  return storedTarget;
231
234
  }
232
235
 
233
- function extractRefSnapshotRefs(data: unknown): Record<string, { name: string; role: string }> | undefined {
236
+ function extractRefSnapshotRefs(data: unknown): Record<string, { isContentEditable?: boolean; isEditable?: boolean; name: string; role: string }> | undefined {
234
237
  if (!isRecord(data) || !isRecord(data.refs)) return undefined;
235
- const refs = Object.fromEntries(Object.entries(data.refs).flatMap(([refId, entry]) => {
236
- if (!/^e\d+$/.test(refId) || !isRecord(entry) || typeof entry.name !== "string" || typeof entry.role !== "string") return [];
237
- return [[refId, { name: entry.name, role: entry.role }] as const];
238
+ const snapshotLines = typeof data.snapshot === "string" ? parseSnapshotLines(data.snapshot) : [];
239
+ const lineByRef = new Map(snapshotLines.flatMap((line) => line.ref ? [[line.ref, line.raw] as const] : []));
240
+ const entries = enrichSnapshotRefEntries(getSnapshotRefEntries(data), snapshotLines);
241
+ const refs = Object.fromEntries(entries.flatMap((entry) => {
242
+ if (!/^e\d+$/.test(entry.id) || entry.role.length === 0) return [];
243
+ const isContentEditable = getEditableRefEvidence({ ref: entry.refData, text: lineByRef.get(entry.id) });
244
+ return [[entry.id, { ...(isContentEditable === true ? { isContentEditable: true } : {}), ...(entry.isEditable !== undefined ? { isEditable: entry.isEditable } : {}), name: entry.name, role: entry.role }] as const];
238
245
  }));
239
246
  return Object.keys(refs).length > 0 ? refs : undefined;
240
247
  }
@@ -310,7 +317,9 @@ function getRestoredRefSnapshot(details: Record<string, unknown>): SessionRefSna
310
317
  ? Object.fromEntries(refIds.flatMap((refId) => {
311
318
  const entry = refRecord[refId];
312
319
  if (!isRecord(entry) || typeof entry.name !== "string" || typeof entry.role !== "string") return [];
313
- return [[refId, { name: entry.name, role: entry.role }] as const];
320
+ const isContentEditable = typeof entry.isContentEditable === "boolean" ? entry.isContentEditable : undefined;
321
+ const isEditable = typeof entry.isEditable === "boolean" ? entry.isEditable : undefined;
322
+ return [[refId, { ...(isContentEditable !== undefined ? { isContentEditable } : {}), ...(isEditable !== undefined ? { isEditable } : {}), name: entry.name, role: entry.role }] as const];
314
323
  }))
315
324
  : undefined;
316
325
  return {
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Purpose: Build compact JSON-schema string enums without importing pi runtime helpers.
3
+ * Responsibilities: Mirror pi-ai StringEnum's `{ type: "string", enum: [...] }` shape while keeping extension startup imports light.
4
+ * Scope: Schema construction only.
5
+ */
6
+
7
+ import { JsonSchema, type TSchemaOptions, type TUnsafe } from "./json-schema.js";
8
+
9
+ export type StringEnumBuilder = typeof StringEnum;
10
+
11
+ export function StringEnum<const Values extends readonly string[]>(
12
+ values: Values,
13
+ options?: TSchemaOptions,
14
+ ): TUnsafe<Values[number]> {
15
+ return JsonSchema.Unsafe<Values[number]>({
16
+ type: "string",
17
+ enum: [...values],
18
+ ...options,
19
+ });
20
+ }
@@ -4,9 +4,8 @@
4
4
  * Scope: Live web search only; browser automation remains in the `agent_browser` tool.
5
5
  */
6
6
 
7
- import { StringEnum } from "@earendil-works/pi-ai";
8
- import { defineTool } from "@earendil-works/pi-coding-agent";
9
- import { Type } from "typebox";
7
+ import { JsonSchema, type JsonSchemaBuilder } from "./json-schema.js";
8
+ import { StringEnum as localStringEnum, type StringEnumBuilder } from "./string-enum-schema.js";
10
9
  import {
11
10
  DEFAULT_WEB_SEARCH_PROVIDER,
12
11
  WEB_SEARCH_PROVIDERS,
@@ -119,8 +118,12 @@ export interface WebSearchProviderAdapter<Request = unknown, Response = unknown>
119
118
  provider: WebSearchProvider;
120
119
  }
121
120
 
122
- export const AgentBrowserWebSearchParams = Type.Object(
123
- {
121
+ export function createAgentBrowserWebSearchParamsSchema(
122
+ Type: JsonSchemaBuilder = JsonSchema,
123
+ StringEnum: StringEnumBuilder = localStringEnum,
124
+ ) {
125
+ return Type.Object(
126
+ {
124
127
  query: Type.String({
125
128
  minLength: 1,
126
129
  description: "Search query to run with the configured Exa or Brave web search provider.",
@@ -172,9 +175,12 @@ export const AgentBrowserWebSearchParams = Type.Object(
172
175
  description: "Optional freshness window: pd=past day, pw=past week, pm=past month, py=past year.",
173
176
  }),
174
177
  ),
175
- },
176
- { additionalProperties: false },
177
- );
178
+ },
179
+ { additionalProperties: false },
180
+ );
181
+ }
182
+
183
+ export const AgentBrowserWebSearchParams = createAgentBrowserWebSearchParamsSchema();
178
184
 
179
185
  const HTML_ENTITY_REPLACEMENTS: Readonly<Record<string, string>> = {
180
186
  amp: "&",
@@ -644,22 +650,34 @@ function buildMissingCredentialError(provider: WebSearchProviderParam): string {
644
650
  return "No Exa or Brave web search credential resolved. Configure webSearch.exaApiKey or webSearch.braveApiKey, or load EXA_API_KEY/BRAVE_API_KEY in the runtime environment.";
645
651
  }
646
652
 
653
+ type AgentBrowserWebSearchParamsInput = {
654
+ country?: string;
655
+ count?: number;
656
+ freshness?: SearchFreshness;
657
+ offset?: number;
658
+ provider?: WebSearchProviderParam;
659
+ query: string;
660
+ safesearch?: "off" | "moderate" | "strict";
661
+ searchLang?: string;
662
+ searchType?: ExaSearchType;
663
+ };
664
+
647
665
  export function createAgentBrowserWebSearchTool(configState: AgentBrowserConfigState) {
648
666
  const requestGate = new WebSearchRequestGate();
649
- return defineTool({
667
+ return {
650
668
  name: AGENT_BROWSER_WEB_SEARCH_TOOL_NAME,
651
669
  label: "Agent Browser Web Search",
652
670
  description: `Search the web with Exa or Brave when configured. Returns up to ${MAX_SEARCH_RESULT_COUNT} concise web results.`,
653
671
  promptSnippet: "Search the live web with Exa or Brave for current or external information.",
654
672
  promptGuidelines: [
655
673
  "Use agent_browser_web_search when live web search would help answer the task, find current external information, or discover candidate URLs for agent_browser.",
656
- "The tool chooses Exa or Brave from configured keys; when both are available, Exa is preferred by default unless webSearch.preferredProvider says otherwise. Use provider only when the user/config calls for a specific provider.",
674
+ "agent_browser_web_search chooses Exa or Brave from configured keys; when both are available, Exa is preferred by default unless webSearch.preferredProvider says otherwise. Use provider only when the user/config calls for a specific provider.",
657
675
  "Prefer agent_browser_web_search over opening a search engine results page with agent_browser when a quick result list is enough; use agent_browser for interaction, DOM, screenshots, or auth.",
658
676
  "Do not issue parallel or repeated agent_browser_web_search calls; use one high-signal query, inspect the results, then only run a focused follow-up if needed. If the provider returns HTTP 429, stop searching and tell the user the API plan/rate limit needs time or a plan change.",
659
677
  "After using agent_browser_web_search, cite result URLs in the final answer when web evidence informed the answer.",
660
678
  ],
661
679
  parameters: AgentBrowserWebSearchParams,
662
- async execute(_toolCallId, params, signal) {
680
+ async execute(_toolCallId: string, params: AgentBrowserWebSearchParamsInput, signal?: AbortSignal) {
663
681
  if (!configState.webSearchEnabled) {
664
682
  throw new Error("agent_browser_web_search is disabled by pi-agent-browser-native config.");
665
683
  }
@@ -695,9 +713,9 @@ export function createAgentBrowserWebSearchTool(configState: AgentBrowserConfigS
695
713
  results: normalized.results,
696
714
  };
697
715
  return {
698
- content: [{ type: "text", text: formatSearchResults(adapter.provider, normalized.returnedQuery, normalized.results) }],
716
+ content: [{ type: "text" as const, text: formatSearchResults(adapter.provider, normalized.returnedQuery, normalized.results) }],
699
717
  details,
700
718
  };
701
719
  },
702
- });
720
+ };
703
721
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-agent-browser-native",
3
- "version": "0.2.44",
3
+ "version": "0.2.46",
4
4
  "description": "pi extension that exposes agent-browser as a native tool for browser automation",
5
5
  "type": "module",
6
6
  "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -80,7 +80,7 @@
80
80
  "check:platform-smoke": "node --check platform-smoke.config.mjs && node --check scripts/platform-smoke.mjs && node --check scripts/platform-smoke/doctor.mjs && node --check scripts/platform-smoke/crabbox-runner.mjs && node --check scripts/platform-smoke/targets.mjs && node --check scripts/platform-smoke/artifacts.mjs && tsx --test test/platform-smoke.test.ts",
81
81
  "smoke:platform": "node scripts/platform-smoke.mjs",
82
82
  "smoke:platform:doctor": "node scripts/platform-smoke.mjs doctor",
83
- "smoke:platform:ubuntu-image": "docker build -t pi-agent-browser-native-platform:node24-agent-browser0.27.1 --build-arg AGENT_BROWSER_VERSION=0.27.1 -f scripts/platform-smoke/linux-image/Dockerfile .",
83
+ "smoke:platform:ubuntu-image": "node scripts/platform-smoke/build-ubuntu-image.mjs",
84
84
  "smoke:platform:macos": "node scripts/platform-smoke.mjs run --target macos",
85
85
  "smoke:platform:ubuntu": "node scripts/platform-smoke.mjs run --target ubuntu",
86
86
  "smoke:platform:windows-native": "node scripts/platform-smoke.mjs run --target windows-native",
@@ -3,6 +3,9 @@
3
3
 
4
4
  import { CAPABILITY_BASELINE } from "./scripts/agent-browser-capability-baseline.mjs";
5
5
 
6
+ export const PLATFORM_SMOKE_AGENT_BROWSER_VERSION = CAPABILITY_BASELINE.targetVersion;
7
+ export const PLATFORM_SMOKE_UBUNTU_IMAGE = `pi-agent-browser-native-platform:node24-agent-browser${PLATFORM_SMOKE_AGENT_BROWSER_VERSION}`;
8
+
6
9
  export default {
7
10
  packageName: "pi-agent-browser-native",
8
11
  artifactRoot: ".artifacts/platform-smoke",
@@ -17,11 +20,11 @@ export default {
17
20
  host: "localhost",
18
21
  port: 22,
19
22
  },
20
- ubuntuContainerImage: "pi-agent-browser-native-platform:node24-agent-browser0.27.1",
23
+ ubuntuContainerImage: PLATFORM_SMOKE_UBUNTU_IMAGE,
21
24
  windowsParallels: {
22
25
  sourceVm: "pi-extension-windows-template",
23
26
  snapshot: "crabbox-ready",
24
27
  },
25
28
  nodeValidationMajor: 22,
26
- agentBrowserVersion: CAPABILITY_BASELINE.targetVersion,
29
+ agentBrowserVersion: PLATFORM_SMOKE_AGENT_BROWSER_VERSION,
27
30
  };
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from "node:child_process";
3
+
4
+ import { CAPABILITY_BASELINE } from "../agent-browser-capability-baseline.mjs";
5
+
6
+ const version = CAPABILITY_BASELINE.targetVersion;
7
+ const image = `pi-agent-browser-native-platform:node24-agent-browser${version}`;
8
+ const args = [
9
+ "build",
10
+ "-t",
11
+ image,
12
+ "--build-arg",
13
+ `AGENT_BROWSER_VERSION=${version}`,
14
+ "-f",
15
+ "scripts/platform-smoke/linux-image/Dockerfile",
16
+ ".",
17
+ ];
18
+
19
+ console.log(`Building ${image}`);
20
+ const result = spawnSync("docker", args, { stdio: "inherit" });
21
+ if (result.error) {
22
+ console.error(result.error.message);
23
+ process.exit(1);
24
+ }
25
+ process.exit(result.status ?? 1);
@@ -2,6 +2,10 @@
2
2
 
3
3
  import { spawn } from "node:child_process";
4
4
 
5
+ import { CAPABILITY_BASELINE } from "../agent-browser-capability-baseline.mjs";
6
+
7
+ const DEFAULT_UBUNTU_IMAGE = `pi-agent-browser-native-platform:node24-agent-browser${CAPABILITY_BASELINE.targetVersion}`;
8
+
5
9
  function env(name) {
6
10
  return process.env[name] ?? "";
7
11
  }
@@ -38,7 +42,7 @@ export function describeTarget(targetName, config = {}) {
38
42
  };
39
43
  }
40
44
  case "ubuntu": {
41
- const image = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config.ubuntuContainerImage || "pi-agent-browser-native-platform:node24-agent-browser0.27.1";
45
+ const image = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config.ubuntuContainerImage || DEFAULT_UBUNTU_IMAGE;
42
46
  return {
43
47
  provider: "local-container",
44
48
  crabboxTarget: "linux",
@@ -4,6 +4,10 @@ import { execFileSync, execSync } from "node:child_process";
4
4
  import { accessSync, constants, mkdirSync, unlinkSync, writeFileSync } from "node:fs";
5
5
  import { resolve } from "node:path";
6
6
 
7
+ import { CAPABILITY_BASELINE } from "../agent-browser-capability-baseline.mjs";
8
+
9
+ const DEFAULT_UBUNTU_IMAGE = `pi-agent-browser-native-platform:node24-agent-browser${CAPABILITY_BASELINE.targetVersion}`;
10
+
7
11
  function env(name) {
8
12
  return process.env[name] ?? "";
9
13
  }
@@ -220,7 +224,7 @@ export async function runDoctor(config) {
220
224
  console.log("\n── Crabbox providers ──");
221
225
  if (cboxPath) {
222
226
  checkRequiredProviders(cbox, failures);
223
- const ubuntuImage = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config?.ubuntuContainerImage || "pi-agent-browser-native-platform:node24-agent-browser0.27.1";
227
+ const ubuntuImage = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config?.ubuntuContainerImage || DEFAULT_UBUNTU_IMAGE;
224
228
  checkCrabboxProvider(cbox, ["--provider", "local-container", "--local-container-image", ubuntuImage], "ubuntu local-container", failures);
225
229
  const macUser = env("PLATFORM_SMOKE_MAC_USER") || env("USER");
226
230
  const macHost = env("PLATFORM_SMOKE_MAC_HOST") || config?.macos?.host || "localhost";
@@ -233,7 +237,7 @@ export async function runDoctor(config) {
233
237
  const dockerVersion = shell("docker info --format '{{.ServerVersion}}'");
234
238
  if (dockerVersion) ok(`Docker ${dockerVersion}`);
235
239
  else fail("Docker is not available or not running", failures);
236
- const ubuntuImage = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config?.ubuntuContainerImage || "pi-agent-browser-native-platform:node24-agent-browser0.27.1";
240
+ const ubuntuImage = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config?.ubuntuContainerImage || DEFAULT_UBUNTU_IMAGE;
237
241
  ok(`Ubuntu image: ${ubuntuImage}`);
238
242
 
239
243
  console.log("\n── macOS SSH ──");
@@ -1,12 +1,10 @@
1
1
  # Local Crabbox Ubuntu/Linux target image for pi-agent-browser-native platform smoke.
2
- # Build with:
3
- # docker build -t pi-agent-browser-native-platform:node24-agent-browser0.27.1 \
4
- # --build-arg AGENT_BROWSER_VERSION=0.27.1 \
5
- # -f scripts/platform-smoke/linux-image/Dockerfile .
2
+ # Build with npm run smoke:platform:ubuntu-image so the agent-browser version
3
+ # comes from scripts/agent-browser-capability-baseline.mjs.
6
4
 
7
5
  FROM node:24-bookworm
8
6
 
9
- ARG AGENT_BROWSER_VERSION=0.27.1
7
+ ARG AGENT_BROWSER_VERSION
10
8
 
11
9
  USER root
12
10
  RUN apt-get update \
@@ -15,6 +15,7 @@ import {
15
15
  writeManifest,
16
16
  writeSummary,
17
17
  } from "./artifacts.mjs";
18
+ import { CAPABILITY_BASELINE } from "../agent-browser-capability-baseline.mjs";
18
19
  import { cleanupStaleTargetState, crabboxBin, describeTarget, runOnLease, stopLease, warmupLease } from "./crabbox-runner.mjs";
19
20
 
20
21
  export function platformFor(targetName) {
@@ -300,7 +301,7 @@ export function buildPlatformBuildCommand(targetName, packageName = "pi-agent-br
300
301
  return lines.join("\n");
301
302
  }
302
303
 
303
- export function buildBrowserDogfoodCommand(targetName, agentBrowserVersion = "0.27.1") {
304
+ export function buildBrowserDogfoodCommand(targetName, agentBrowserVersion = CAPABILITY_BASELINE.targetVersion) {
304
305
  if (platformFor(targetName) === "powershell") {
305
306
  return `powershell.exe -NoLogo -NoProfile -ExecutionPolicy Bypass -File .\\scripts\\platform-smoke\\browser-dogfood-windows.ps1 -AgentBrowserVersion ${psSingleQuote(agentBrowserVersion)}`;
306
307
  }