pi-agent-browser-native 0.2.30 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,9 +84,9 @@ const DEFAULT_SESSION_MODE = "auto" as const;
84
84
  const DIRECT_AGENT_BROWSER_BASH_BYPASS_ENV = "PI_AGENT_BROWSER_ALLOW_DIRECT_BASH";
85
85
  const PACKAGE_NAME = "pi-agent-browser-native";
86
86
 
87
- const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "uncheck"] as const;
87
+ const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select", "uncheck"] as const;
88
88
  const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
89
- const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
89
+ const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
90
90
  const AGENT_BROWSER_QA_LOAD_STATES = ["domcontentloaded", "load", "networkidle"] as const;
91
91
  const SOURCE_LOOKUP_WORKSPACE_EXTENSIONS = new Set([".ts", ".tsx", ".js", ".jsx"]);
92
92
  const SOURCE_LOOKUP_IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist", "build", "coverage", ".next", "out", "tmp", "temp"]);
@@ -102,8 +102,10 @@ type AgentBrowserNetworkSourceLookupStatus = "failed-requests-found" | "no-faile
102
102
 
103
103
  interface AgentBrowserSemanticActionInput {
104
104
  action: AgentBrowserSemanticActionName;
105
- locator: AgentBrowserSemanticLocator;
106
- value: string;
105
+ locator?: AgentBrowserSemanticLocator;
106
+ value?: string;
107
+ values?: string[];
108
+ selector?: string;
107
109
  text?: string;
108
110
  role?: string;
109
111
  name?: string;
@@ -112,7 +114,9 @@ interface AgentBrowserSemanticActionInput {
112
114
 
113
115
  interface CompiledAgentBrowserSemanticAction {
114
116
  action: AgentBrowserSemanticActionName;
115
- locator: AgentBrowserSemanticLocator;
117
+ locator?: AgentBrowserSemanticLocator;
118
+ selector?: string;
119
+ values?: string[];
116
120
  args: string[];
117
121
  }
118
122
 
@@ -225,6 +229,7 @@ interface CompiledAgentBrowserNetworkSourceLookup {
225
229
  filter?: string;
226
230
  maxWorkspaceFiles: number;
227
231
  requestId?: string;
232
+ session?: string;
228
233
  url?: string;
229
234
  };
230
235
  }
@@ -265,16 +270,18 @@ const AGENT_BROWSER_PARAMS = Type.Object({
265
270
  semanticAction: Type.Optional(
266
271
  Type.Object({
267
272
  action: StringEnum(AGENT_BROWSER_SEMANTIC_ACTIONS, {
268
- description: "Intent action to compile to an existing agent-browser find command.",
273
+ description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
269
274
  }),
270
- locator: StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
271
- description: "Upstream find locator family to use.",
272
- }),
273
- value: Type.String({ description: "Locator value, such as visible text, label text, placeholder text, test id, title, alt text, or role." }),
275
+ locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
276
+ description: "Upstream find locator family to use for check/click/fill/uncheck actions.",
277
+ })),
278
+ value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions." })),
279
+ values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
280
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for select actions; compiled to select <selector> <value...>." })),
274
281
  text: Type.Optional(Type.String({ description: "Text/value argument for fill actions." })),
275
282
  role: Type.Optional(Type.String({ description: "Role locator value; when set it must match value for locator=role." })),
276
283
  name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
277
- session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled find command." })),
284
+ session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled command." })),
278
285
  }),
279
286
  ),
280
287
  qa: Type.Optional(
@@ -302,6 +309,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
302
309
  Type.Object({
303
310
  filter: Type.Optional(Type.String({ description: "Optional upstream network requests filter pattern." })),
304
311
  requestId: Type.Optional(Type.String({ description: "Optional network request id to inspect with network request <id>." })),
312
+ session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the generated batch." })),
305
313
  url: Type.Optional(Type.String({ description: "Optional failed request URL or URL fragment to correlate with local source." })),
306
314
  maxWorkspaceFiles: Type.Optional(Type.Number({ description: "Maximum local source files to scan for URL literals. Defaults to 2000 and cannot exceed 5000.", minimum: 1, maximum: SOURCE_LOOKUP_MAX_WORKSPACE_FILES })),
307
315
  }),
@@ -314,8 +322,10 @@ const AGENT_BROWSER_PARAMS = Type.Object({
314
322
  description: "Constrained one-call job step compiled to existing upstream batch commands.",
315
323
  }),
316
324
  url: Type.Optional(Type.String({ description: "URL for open steps, or URL pattern for assertUrl steps." })),
317
- selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/get-like steps." })),
325
+ selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/select-like steps." })),
318
326
  text: Type.Optional(Type.String({ description: "Text for fill steps or visible text for assertText steps." })),
327
+ value: Type.Optional(Type.String({ description: "Single option value for select steps." })),
328
+ values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
319
329
  path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
320
330
  milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
321
331
  }),
@@ -355,6 +365,24 @@ function getRequiredJobString(step: Record<string, unknown>, field: "path" | "se
355
365
  return { value };
356
366
  }
357
367
 
368
+ function getSelectValues(input: Record<string, unknown>, context: string): { values?: string[]; error?: string } {
369
+ const rawValue = input.value;
370
+ const rawValues = input.values;
371
+ if (rawValue !== undefined && rawValues !== undefined) {
372
+ return { error: `${context}.value and ${context}.values cannot both be provided for select.` };
373
+ }
374
+ if (rawValues !== undefined) {
375
+ if (!Array.isArray(rawValues) || rawValues.length === 0 || rawValues.some((value) => typeof value !== "string" || value.trim().length === 0)) {
376
+ return { error: `${context}.values must be a non-empty array of non-empty strings for select.` };
377
+ }
378
+ return { values: rawValues };
379
+ }
380
+ if (typeof rawValue === "string" && rawValue.trim().length > 0) {
381
+ return { values: [rawValue] };
382
+ }
383
+ return { error: `${context}.value or ${context}.values is required for select.` };
384
+ }
385
+
358
386
  function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrowserJob; error?: string } {
359
387
  if (!isRecord(input)) {
360
388
  return { error: "job must be an object." };
@@ -388,6 +416,12 @@ function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrows
388
416
  const text = getRequiredJobString(rawStep, "text", jobAction);
389
417
  if (text.error) return { error: `job.steps[${index}]: ${text.error}` };
390
418
  args = ["fill", selector.value as string, text.value as string];
419
+ } else if (jobAction === "select") {
420
+ const selector = getRequiredJobString(rawStep, "selector", jobAction);
421
+ if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
422
+ const values = getSelectValues(rawStep, `job.steps[${index}]`);
423
+ if (values.error) return { error: values.error };
424
+ args = ["select", selector.value as string, ...(values.values as string[])];
391
425
  } else if (jobAction === "wait") {
392
426
  const milliseconds = rawStep.milliseconds;
393
427
  if (typeof milliseconds !== "number" || !Number.isInteger(milliseconds) || milliseconds <= 0) {
@@ -781,9 +815,11 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
781
815
  if (!isRecord(input)) return { error: "networkSourceLookup must be an object." };
782
816
  const filter = input.filter;
783
817
  const requestId = input.requestId;
818
+ const session = input.session;
784
819
  const url = input.url;
785
820
  if (filter !== undefined && (typeof filter !== "string" || filter.trim().length === 0)) return { error: "networkSourceLookup.filter must be a non-empty string when provided." };
786
821
  if (requestId !== undefined && (typeof requestId !== "string" || requestId.trim().length === 0)) return { error: "networkSourceLookup.requestId must be a non-empty string when provided." };
822
+ if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) return { error: "networkSourceLookup.session must be a non-empty string when provided." };
787
823
  if (url !== undefined && (typeof url !== "string" || url.trim().length === 0)) return { error: "networkSourceLookup.url must be a non-empty string when provided." };
788
824
  if (filter === undefined && requestId === undefined && url === undefined) return { error: "networkSourceLookup requires requestId, filter, or url." };
789
825
  const maxWorkspaceFiles = validateLookupMaxWorkspaceFiles(input.maxWorkspaceFiles, "networkSourceLookup.maxWorkspaceFiles");
@@ -796,7 +832,8 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
796
832
  if (effectiveFilter) {
797
833
  steps.push({ action: "network", args: ["network", "requests", "--filter", effectiveFilter] });
798
834
  }
799
- return { compiled: { args: ["batch"], query: { filter, maxWorkspaceFiles: maxWorkspaceFiles.value as number, requestId, url }, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
835
+ const args = typeof session === "string" ? ["--session", session, "batch"] : ["batch"];
836
+ return { compiled: { args, query: { filter, maxWorkspaceFiles: maxWorkspaceFiles.value as number, requestId, session, url }, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
800
837
  }
801
838
 
802
839
  function getResultPayload(item: Record<string, unknown>): unknown {
@@ -967,6 +1004,11 @@ function getCompiledSemanticActionSessionPrefix(compiled: CompiledAgentBrowserSe
967
1004
  return commandIndex > 0 ? compiled.args.slice(0, commandIndex) : [];
968
1005
  }
969
1006
 
1007
+ function isCompiledSemanticActionFindCommand(compiled: CompiledAgentBrowserSemanticAction | undefined): boolean {
1008
+ if (!compiled) return false;
1009
+ return compiled.args[getCompiledSemanticActionCommandIndex(compiled)] === "find";
1010
+ }
1011
+
970
1012
  const SEMANTIC_ACTION_CANDIDATE_ACTION_IDS = new Set([
971
1013
  "try-searchbox-name-candidate",
972
1014
  "try-textbox-name-candidate",
@@ -986,7 +1028,7 @@ function formatSemanticActionCandidateText(actions: AgentBrowserNextAction[]): s
986
1028
 
987
1029
  function buildSemanticActionCandidateActions(compiled: CompiledAgentBrowserSemanticAction): AgentBrowserNextAction[] {
988
1030
  const commandIndex = getCompiledSemanticActionCommandIndex(compiled);
989
- if (commandIndex < 0) return [];
1031
+ if (commandIndex < 0 || compiled.args[commandIndex] !== "find") return [];
990
1032
  const locator = compiled.args[commandIndex + 1];
991
1033
  const value = compiled.args[commandIndex + 2];
992
1034
  if (!locator || !value) return [];
@@ -1034,12 +1076,12 @@ function getFindNameFlagValue(args: string[], startIndex: number): string | unde
1034
1076
  }
1035
1077
 
1036
1078
  function getFindVisibleRefFallbackTarget(args: string[]): VisibleRefFallbackTarget | undefined {
1037
- const findIndex = args[0] === "--session" ? 2 : args.indexOf("find");
1038
- if (findIndex < 0) return undefined;
1079
+ const findIndex = args[0] === "--session" ? 2 : 0;
1080
+ if (args[findIndex] !== "find") return undefined;
1039
1081
  const locator = args[findIndex + 1];
1040
1082
  const value = args[findIndex + 2];
1041
1083
  const action = args[findIndex + 3];
1042
- if (!locator || !value || !isAgentBrowserSemanticActionName(action)) return undefined;
1084
+ if (!locator || !value || !isAgentBrowserSemanticActionName(action) || action === "select") return undefined;
1043
1085
  const text = action === "fill" ? args[findIndex + 4] : undefined;
1044
1086
  if (action === "fill" && (!text || text.startsWith("-"))) return undefined;
1045
1087
  if (locator === "role") {
@@ -1200,6 +1242,8 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
1200
1242
  const action = input.action;
1201
1243
  const locator = input.locator;
1202
1244
  const value = input.value;
1245
+ const values = input.values;
1246
+ const selector = input.selector;
1203
1247
  const text = input.text;
1204
1248
  const role = input.role;
1205
1249
  const name = input.name;
@@ -1207,6 +1251,27 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
1207
1251
  if (typeof action !== "string" || !AGENT_BROWSER_SEMANTIC_ACTIONS.includes(action as AgentBrowserSemanticActionName)) {
1208
1252
  return { error: `semanticAction.action must be one of: ${AGENT_BROWSER_SEMANTIC_ACTIONS.join(", ")}.` };
1209
1253
  }
1254
+ if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
1255
+ return { error: "semanticAction.session must be a non-empty string when provided." };
1256
+ }
1257
+ if (action === "select") {
1258
+ if (locator !== undefined || role !== undefined || name !== undefined) {
1259
+ return { error: "semanticAction.locator, role, and name are not supported for select; use selector plus value or values." };
1260
+ }
1261
+ if (text !== undefined) {
1262
+ return { error: "semanticAction.text is not supported for select; use value or values for option values." };
1263
+ }
1264
+ if (typeof selector !== "string" || selector.trim().length === 0) {
1265
+ return { error: "semanticAction.selector is required for select." };
1266
+ }
1267
+ const selectedValues = getSelectValues(input, "semanticAction");
1268
+ if (selectedValues.error) return { error: selectedValues.error };
1269
+ const args = typeof session === "string" ? ["--session", session, "select", selector, ...(selectedValues.values as string[])] : ["select", selector, ...(selectedValues.values as string[])];
1270
+ return { compiled: { action: "select", selector, values: selectedValues.values, args } };
1271
+ }
1272
+ if (selector !== undefined || values !== undefined) {
1273
+ return { error: "semanticAction.selector and values are only supported for select actions." };
1274
+ }
1210
1275
  if (typeof locator !== "string" || !AGENT_BROWSER_SEMANTIC_LOCATORS.includes(locator as AgentBrowserSemanticLocator)) {
1211
1276
  return { error: `semanticAction.locator must be one of: ${AGENT_BROWSER_SEMANTIC_LOCATORS.join(", ")}.` };
1212
1277
  }
@@ -1228,9 +1293,6 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
1228
1293
  if (name !== undefined && (locator !== "role" || typeof name !== "string" || name.length === 0)) {
1229
1294
  return { error: "semanticAction.name is only supported as a non-empty string for locator=role." };
1230
1295
  }
1231
- if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
1232
- return { error: "semanticAction.session must be a non-empty string when provided." };
1233
- }
1234
1296
  const args = typeof session === "string" ? ["--session", session, "find", locator, value, action] : ["find", locator, value, action];
1235
1297
  if (action === "fill") {
1236
1298
  args.push(text as string);
@@ -1614,6 +1676,9 @@ async function isDirectAgentBrowserBashAllowed(cwd: string): Promise<boolean> {
1614
1676
 
1615
1677
  const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
1616
1678
  const NAVIGATION_SUMMARY_EVAL = `({ title: document.title, url: location.href })`;
1679
+ // These commands can expose URLs for inspected resources (request URLs, cookie/storage scope, or log sources),
1680
+ // but they do not navigate the active tab and must not poison page-scoped ref guards.
1681
+ const READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS = new Set(["console", "cookies", "errors", "network", "storage"]);
1617
1682
 
1618
1683
  interface NavigationSummary {
1619
1684
  title?: string;
@@ -2259,6 +2324,15 @@ function extractSessionTabTargetFromData(data: unknown): SessionTabTarget | unde
2259
2324
  return undefined;
2260
2325
  }
2261
2326
 
2327
+ function isReadOnlyDiagnosticSessionTargetCommand(command: string | undefined, _subcommand: string | undefined): boolean {
2328
+ return command !== undefined && READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS.has(command);
2329
+ }
2330
+
2331
+ function extractSessionTabTargetFromCommandData(commandTokens: string[], data: unknown): SessionTabTarget | undefined {
2332
+ const [command, subcommand] = commandTokens;
2333
+ return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand) ? undefined : extractSessionTabTargetFromData(data);
2334
+ }
2335
+
2262
2336
  function extractBatchResultCommand(item: Record<string, unknown>): string[] {
2263
2337
  return Array.isArray(item.command) ? item.command.filter((token): token is string => typeof token === "string") : [];
2264
2338
  }
@@ -2290,7 +2364,7 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
2290
2364
  pendingTitle = undefined;
2291
2365
  continue;
2292
2366
  }
2293
- const resultTarget = extractSessionTabTargetFromData(result);
2367
+ const resultTarget = extractSessionTabTargetFromCommandData([name, subcommand].filter((token): token is string => token !== undefined), result);
2294
2368
  if (resultTarget) {
2295
2369
  currentTarget = resultTarget;
2296
2370
  }
@@ -2299,6 +2373,40 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
2299
2373
  return currentTarget;
2300
2374
  }
2301
2375
 
2376
+ function batchContainsOnlyReadOnlyDiagnosticTargets(data: unknown): boolean {
2377
+ if (!Array.isArray(data) || data.length === 0) {
2378
+ return false;
2379
+ }
2380
+ return data.every((item) => {
2381
+ if (!isRecord(item)) return false;
2382
+ const [command, subcommand] = extractBatchResultCommand(item);
2383
+ return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand);
2384
+ });
2385
+ }
2386
+
2387
+ function getRestoredSessionTabTarget(details: Record<string, unknown>, command: string | undefined, subcommand: string | undefined): SessionTabTarget | undefined {
2388
+ if (isReadOnlyDiagnosticSessionTargetCommand(command, subcommand)) {
2389
+ return undefined;
2390
+ }
2391
+ const storedTarget = isRecord(details.sessionTabTarget)
2392
+ ? normalizeSessionTabTarget({
2393
+ title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
2394
+ url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
2395
+ })
2396
+ : undefined;
2397
+ if (command !== "batch") {
2398
+ return storedTarget;
2399
+ }
2400
+ const batchTarget = extractSessionTabTargetFromBatchResults(details.data);
2401
+ if (batchTarget) {
2402
+ return batchTarget;
2403
+ }
2404
+ if (isRecord(details.compiledNetworkSourceLookup) || batchContainsOnlyReadOnlyDiagnosticTargets(details.data)) {
2405
+ return undefined;
2406
+ }
2407
+ return storedTarget;
2408
+ }
2409
+
2302
2410
  function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, OrderedSessionTabTarget> {
2303
2411
  const restoredTargets = new Map<string, OrderedSessionTabTarget>();
2304
2412
  let restoredOrder = 0;
@@ -2319,17 +2427,13 @@ function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, Orde
2319
2427
  continue;
2320
2428
  }
2321
2429
  const command = typeof details.command === "string" ? details.command : undefined;
2430
+ const subcommand = typeof details.subcommand === "string" ? details.subcommand : undefined;
2322
2431
  if (command === "close" && message.isError !== true) {
2323
2432
  restoredOrder += 1;
2324
2433
  restoredTargets.delete(sessionName);
2325
2434
  continue;
2326
2435
  }
2327
- const sessionTabTarget = isRecord(details.sessionTabTarget)
2328
- ? normalizeSessionTabTarget({
2329
- title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
2330
- url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
2331
- })
2332
- : undefined;
2436
+ const sessionTabTarget = getRestoredSessionTabTarget(details, command, subcommand);
2333
2437
  if (sessionTabTarget) {
2334
2438
  restoredOrder += 1;
2335
2439
  restoredTargets.set(sessionName, { order: restoredOrder, target: sessionTabTarget });
@@ -2751,14 +2855,18 @@ function deriveSessionTabTarget(options: {
2751
2855
  data: unknown;
2752
2856
  navigationSummary?: NavigationSummary;
2753
2857
  previousTarget?: SessionTabTarget;
2858
+ subcommand?: string;
2754
2859
  }): SessionTabTarget | undefined {
2755
2860
  if (options.command === "close") {
2756
2861
  return undefined;
2757
2862
  }
2863
+ const commandDataTarget = isReadOnlyDiagnosticSessionTargetCommand(options.command, options.subcommand)
2864
+ ? undefined
2865
+ : extractSessionTabTargetFromData(options.data);
2758
2866
  return (
2759
2867
  normalizeSessionTabTarget(options.navigationSummary) ??
2760
2868
  extractSessionTabTargetFromBatchResults(options.data) ??
2761
- extractSessionTabTargetFromData(options.data) ??
2869
+ commandDataTarget ??
2762
2870
  options.previousTarget
2763
2871
  );
2764
2872
  }
@@ -3353,14 +3461,16 @@ function looksLikeFunctionEvalStdin(stdin: string | undefined): boolean {
3353
3461
  return /^(?:async\s+)?function\b/.test(trimmed) || /^(?:async\s*)?\([^)]*\)\s*=>/.test(trimmed) || /^(?:async\s+)?[A-Za-z_$][\w$]*\s*=>/.test(trimmed);
3354
3462
  }
3355
3463
 
3356
- function isEmptyRecord(value: unknown): boolean {
3357
- return isRecord(value) && Object.keys(value).length === 0;
3464
+ function isPlainEmptyObject(value: unknown): boolean {
3465
+ if (!isRecord(value) || Array.isArray(value)) return false;
3466
+ const prototype = Object.getPrototypeOf(value);
3467
+ return (prototype === Object.prototype || prototype === null) && Object.keys(value).length === 0;
3358
3468
  }
3359
3469
 
3360
3470
  function getEvalStdinHint(options: { command?: string; data: unknown; stdin?: string }): EvalStdinHint | undefined {
3361
3471
  if (options.command !== "eval" || !looksLikeFunctionEvalStdin(options.stdin) || !isRecord(options.data)) return undefined;
3362
3472
  const result = options.data.result;
3363
- if (!isEmptyRecord(result)) return undefined;
3473
+ if (!isPlainEmptyObject(result)) return undefined;
3364
3474
  return {
3365
3475
  reason: "eval --stdin received a function-shaped snippet and the upstream JSON result was an empty object, which often means the function itself was returned or serialized instead of invoked.",
3366
3476
  suggestion: "Pass a plain expression such as `({ title: document.title })`, or invoke the function explicitly, for example `(() => ({ title: document.title }))()`.",
@@ -4009,6 +4119,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4009
4119
  const redactedCompiledNetworkSourceLookup = compiledNetworkSourceLookup && redactedCompiledNetworkSourceLookupSteps
4010
4120
  ? {
4011
4121
  ...compiledNetworkSourceLookup,
4122
+ args: redactNetworkSourceLookupArgs(compiledNetworkSourceLookup.args),
4012
4123
  query: {
4013
4124
  ...compiledNetworkSourceLookup.query,
4014
4125
  filter: redactNetworkSourceLookupUrl(compiledNetworkSourceLookup.query.filter),
@@ -4435,12 +4546,13 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4435
4546
  const observedSessionTabTarget =
4436
4547
  normalizeSessionTabTarget(navigationSummary) ??
4437
4548
  extractSessionTabTargetFromBatchResults(presentationEnvelope?.data) ??
4438
- extractSessionTabTargetFromData(presentationEnvelope?.data);
4549
+ extractSessionTabTargetFromCommandData(commandTokens, presentationEnvelope?.data);
4439
4550
  let currentSessionTabTarget = deriveSessionTabTarget({
4440
4551
  command: executionPlan.commandInfo.command,
4441
4552
  data: presentationEnvelope?.data,
4442
4553
  navigationSummary,
4443
4554
  previousTarget: priorSessionTabTarget,
4555
+ subcommand: executionPlan.commandInfo.subcommand,
4444
4556
  });
4445
4557
  let aboutBlankSessionMismatch: AboutBlankSessionMismatch | undefined;
4446
4558
  const shouldTreatAboutBlankAsMismatch =
@@ -4826,7 +4938,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
4826
4938
  if (comboboxFocusDiagnostic) {
4827
4939
  (nextActions ??= []).push(...buildComboboxFocusNextActions(executionPlan.sessionName));
4828
4940
  }
4829
- if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction) {
4941
+ if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction && isCompiledSemanticActionFindCommand(compiledSemanticAction)) {
4830
4942
  (nextActions ??= []).push({
4831
4943
  id: "retry-semantic-action-after-stale-ref",
4832
4944
  params: { args: redactedCompiledSemanticAction.args },
@@ -18,14 +18,14 @@ export function buildInstalledDocsGuideline(paths: { readmePath: string; command
18
18
  }
19
19
 
20
20
  export const QUICK_START_GUIDELINES = [
21
- "Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin find-locator shorthand compiled to find argv), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
21
+ "Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
22
22
  "There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
23
23
  "Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
24
- "Locator-first clicks and fills without hand-building find argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } } or { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded try-*-candidate next actions (and an Agent-browser candidate fallbacks prose block) for specific placeholder/text/label shapes, and stale-ref failures can return retry-semantic-action-after-stale-ref when retry safety is provable.",
25
- "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
26
- "High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
24
+ "Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded try-*-candidate next actions (and an Agent-browser candidate fallbacks prose block) for specific placeholder/text/label shapes, and stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
25
+ "Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }. For app pages with a native dropdown, job steps can include { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } before the dependent assertion.",
26
+ "High-value command reference: select <selector> <value...> changes native dropdown values; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
27
27
  "For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
28
- "When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, safety notes, or artifactPath for saved files.",
28
+ "When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
29
29
  ] as const;
30
30
 
31
31
  export const BRAVE_SEARCH_PROMPT_GUIDELINE =
@@ -45,14 +45,14 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
45
45
  "For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
46
46
  "For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, state save/load for portable test state, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
47
47
  "For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
48
- "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
48
+ "For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
49
49
  "For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
50
50
  "For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
51
51
  "If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
52
52
  "For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
53
53
  "For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
54
54
  "For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
55
- "On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.",
55
+ "On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
56
56
  "When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
57
57
  "When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
58
58
  "When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
@@ -92,12 +92,12 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSear
92
92
  const RUNTIME_PROMPT_GUIDELINES = [
93
93
  "Use exactly one input mode: args, semanticAction, job, qa, sourceLookup, or networkSourceLookup. Use stdin only for batch, eval --stdin, auth save --password-stdin, or wrapper-generated batch modes.",
94
94
  "Common flow: open, snapshot -i, interact with current @refs or semanticAction, then re-snapshot after navigation, scrolling, rerenders, or DOM changes. For ordinary forms, batch same-snapshot fill @refs before the submit/click step; split if a fill may autosubmit, navigate, or rerender later fields. Respect explicit stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action.",
95
- "Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. Use current @refs only from the latest same-page snapshot.",
95
+ "Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. For native selects, prefer select <selector> <value...> or semanticAction/job select over clicking option refs. Use current @refs only from the latest same-page snapshot.",
96
96
  "For tasks that explicitly require the user's signed-in/account-specific content, start with --profile Default plus sessionMode=fresh unless the user asks otherwise; visible page content is model-visible. Use sessionMode=fresh for other launch-scoped state such as --session-name, --cdp, --state, --auto-connect, --init-script, --enable, providers, or iOS devices; otherwise let the implicit session carry continuity.",
97
97
  "For requested screenshots, recordings, downloads, PDFs, or HARs, save the exact user path and read details.artifactVerification before claiming success; report unavailable/missing artifacts instead of silently substituting paths. record stop needs ffmpeg on PATH. close does not delete saved files; cleanup is host-owned.",
98
- "When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors.",
98
+ "When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors; network request diagnostics may include request-detail, actionable failed-request networkSourceLookup, filter, or HAR-capture follow-ups.",
99
99
  "For dense snapshots, check Omitted high-value controls and details.data.highValueControlRefIds before opening large spill files.",
100
- "For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. Combobox clicks may only focus; re-snapshot and fall back to type, Enter/arrows, select, or option refs.",
100
+ "For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. For native selects use select/semanticAction/job select instead of option refs; custom combobox clicks may only focus, so re-snapshot and fall back to type, Enter/arrows, or visible option refs.",
101
101
  "For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with a plain expression in the tool stdin field; do not rely on console.log. When reading several known refs/selectors, use batch with JSON-array stdin (for example [[\"get\",\"text\",\"@e1\"]]) or eval --stdin instead of many serial get calls. If selector visibility warnings appear, prefer visible @refs or nextActions.",
102
102
  "For non-core debugging, pass upstream commands through args: network, diff, trace/profiler/record, console/errors, stream, dashboard, chat, react, vitals, pushstate, dialog, frame, tab.",
103
103
  ] as const;