pi-agent-browser-native 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +11 -8
- package/docs/ARCHITECTURE.md +3 -3
- package/docs/COMMAND_REFERENCE.md +12 -8
- package/docs/RELEASE.md +11 -11
- package/docs/REQUIREMENTS.md +4 -3
- package/docs/SUPPORT_MATRIX.md +13 -5
- package/docs/TOOL_CONTRACT.md +30 -20
- package/extensions/agent-browser/index.ts +145 -33
- package/extensions/agent-browser/lib/playbook.ts +10 -10
- package/extensions/agent-browser/lib/results/presentation.ts +154 -2
- package/extensions/agent-browser/lib/results/shared.ts +7 -1
- package/package.json +1 -1
|
@@ -84,9 +84,9 @@ const DEFAULT_SESSION_MODE = "auto" as const;
|
|
|
84
84
|
const DIRECT_AGENT_BROWSER_BASH_BYPASS_ENV = "PI_AGENT_BROWSER_ALLOW_DIRECT_BASH";
|
|
85
85
|
const PACKAGE_NAME = "pi-agent-browser-native";
|
|
86
86
|
|
|
87
|
-
const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "uncheck"] as const;
|
|
87
|
+
const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select", "uncheck"] as const;
|
|
88
88
|
const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
|
|
89
|
-
const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
|
|
89
|
+
const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
|
|
90
90
|
const AGENT_BROWSER_QA_LOAD_STATES = ["domcontentloaded", "load", "networkidle"] as const;
|
|
91
91
|
const SOURCE_LOOKUP_WORKSPACE_EXTENSIONS = new Set([".ts", ".tsx", ".js", ".jsx"]);
|
|
92
92
|
const SOURCE_LOOKUP_IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist", "build", "coverage", ".next", "out", "tmp", "temp"]);
|
|
@@ -102,8 +102,10 @@ type AgentBrowserNetworkSourceLookupStatus = "failed-requests-found" | "no-faile
|
|
|
102
102
|
|
|
103
103
|
interface AgentBrowserSemanticActionInput {
|
|
104
104
|
action: AgentBrowserSemanticActionName;
|
|
105
|
-
locator
|
|
106
|
-
value
|
|
105
|
+
locator?: AgentBrowserSemanticLocator;
|
|
106
|
+
value?: string;
|
|
107
|
+
values?: string[];
|
|
108
|
+
selector?: string;
|
|
107
109
|
text?: string;
|
|
108
110
|
role?: string;
|
|
109
111
|
name?: string;
|
|
@@ -112,7 +114,9 @@ interface AgentBrowserSemanticActionInput {
|
|
|
112
114
|
|
|
113
115
|
interface CompiledAgentBrowserSemanticAction {
|
|
114
116
|
action: AgentBrowserSemanticActionName;
|
|
115
|
-
locator
|
|
117
|
+
locator?: AgentBrowserSemanticLocator;
|
|
118
|
+
selector?: string;
|
|
119
|
+
values?: string[];
|
|
116
120
|
args: string[];
|
|
117
121
|
}
|
|
118
122
|
|
|
@@ -225,6 +229,7 @@ interface CompiledAgentBrowserNetworkSourceLookup {
|
|
|
225
229
|
filter?: string;
|
|
226
230
|
maxWorkspaceFiles: number;
|
|
227
231
|
requestId?: string;
|
|
232
|
+
session?: string;
|
|
228
233
|
url?: string;
|
|
229
234
|
};
|
|
230
235
|
}
|
|
@@ -265,16 +270,18 @@ const AGENT_BROWSER_PARAMS = Type.Object({
|
|
|
265
270
|
semanticAction: Type.Optional(
|
|
266
271
|
Type.Object({
|
|
267
272
|
action: StringEnum(AGENT_BROWSER_SEMANTIC_ACTIONS, {
|
|
268
|
-
description: "Intent action to compile to an existing agent-browser find command.",
|
|
273
|
+
description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
|
|
269
274
|
}),
|
|
270
|
-
locator: StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
|
|
271
|
-
description: "Upstream find locator family to use.",
|
|
272
|
-
}),
|
|
273
|
-
value: Type.String({ description: "Locator value
|
|
275
|
+
locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
|
|
276
|
+
description: "Upstream find locator family to use for check/click/fill/uncheck actions.",
|
|
277
|
+
})),
|
|
278
|
+
value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions." })),
|
|
279
|
+
values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
|
|
280
|
+
selector: Type.Optional(Type.String({ description: "Selector or @ref for select actions; compiled to select <selector> <value...>." })),
|
|
274
281
|
text: Type.Optional(Type.String({ description: "Text/value argument for fill actions." })),
|
|
275
282
|
role: Type.Optional(Type.String({ description: "Role locator value; when set it must match value for locator=role." })),
|
|
276
283
|
name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
|
|
277
|
-
session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled
|
|
284
|
+
session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled command." })),
|
|
278
285
|
}),
|
|
279
286
|
),
|
|
280
287
|
qa: Type.Optional(
|
|
@@ -302,6 +309,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
|
|
|
302
309
|
Type.Object({
|
|
303
310
|
filter: Type.Optional(Type.String({ description: "Optional upstream network requests filter pattern." })),
|
|
304
311
|
requestId: Type.Optional(Type.String({ description: "Optional network request id to inspect with network request <id>." })),
|
|
312
|
+
session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the generated batch." })),
|
|
305
313
|
url: Type.Optional(Type.String({ description: "Optional failed request URL or URL fragment to correlate with local source." })),
|
|
306
314
|
maxWorkspaceFiles: Type.Optional(Type.Number({ description: "Maximum local source files to scan for URL literals. Defaults to 2000 and cannot exceed 5000.", minimum: 1, maximum: SOURCE_LOOKUP_MAX_WORKSPACE_FILES })),
|
|
307
315
|
}),
|
|
@@ -314,8 +322,10 @@ const AGENT_BROWSER_PARAMS = Type.Object({
|
|
|
314
322
|
description: "Constrained one-call job step compiled to existing upstream batch commands.",
|
|
315
323
|
}),
|
|
316
324
|
url: Type.Optional(Type.String({ description: "URL for open steps, or URL pattern for assertUrl steps." })),
|
|
317
|
-
selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/
|
|
325
|
+
selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/select-like steps." })),
|
|
318
326
|
text: Type.Optional(Type.String({ description: "Text for fill steps or visible text for assertText steps." })),
|
|
327
|
+
value: Type.Optional(Type.String({ description: "Single option value for select steps." })),
|
|
328
|
+
values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
|
|
319
329
|
path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
|
|
320
330
|
milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
|
|
321
331
|
}),
|
|
@@ -355,6 +365,24 @@ function getRequiredJobString(step: Record<string, unknown>, field: "path" | "se
|
|
|
355
365
|
return { value };
|
|
356
366
|
}
|
|
357
367
|
|
|
368
|
+
function getSelectValues(input: Record<string, unknown>, context: string): { values?: string[]; error?: string } {
|
|
369
|
+
const rawValue = input.value;
|
|
370
|
+
const rawValues = input.values;
|
|
371
|
+
if (rawValue !== undefined && rawValues !== undefined) {
|
|
372
|
+
return { error: `${context}.value and ${context}.values cannot both be provided for select.` };
|
|
373
|
+
}
|
|
374
|
+
if (rawValues !== undefined) {
|
|
375
|
+
if (!Array.isArray(rawValues) || rawValues.length === 0 || rawValues.some((value) => typeof value !== "string" || value.trim().length === 0)) {
|
|
376
|
+
return { error: `${context}.values must be a non-empty array of non-empty strings for select.` };
|
|
377
|
+
}
|
|
378
|
+
return { values: rawValues };
|
|
379
|
+
}
|
|
380
|
+
if (typeof rawValue === "string" && rawValue.trim().length > 0) {
|
|
381
|
+
return { values: [rawValue] };
|
|
382
|
+
}
|
|
383
|
+
return { error: `${context}.value or ${context}.values is required for select.` };
|
|
384
|
+
}
|
|
385
|
+
|
|
358
386
|
function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrowserJob; error?: string } {
|
|
359
387
|
if (!isRecord(input)) {
|
|
360
388
|
return { error: "job must be an object." };
|
|
@@ -388,6 +416,12 @@ function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrows
|
|
|
388
416
|
const text = getRequiredJobString(rawStep, "text", jobAction);
|
|
389
417
|
if (text.error) return { error: `job.steps[${index}]: ${text.error}` };
|
|
390
418
|
args = ["fill", selector.value as string, text.value as string];
|
|
419
|
+
} else if (jobAction === "select") {
|
|
420
|
+
const selector = getRequiredJobString(rawStep, "selector", jobAction);
|
|
421
|
+
if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
|
|
422
|
+
const values = getSelectValues(rawStep, `job.steps[${index}]`);
|
|
423
|
+
if (values.error) return { error: values.error };
|
|
424
|
+
args = ["select", selector.value as string, ...(values.values as string[])];
|
|
391
425
|
} else if (jobAction === "wait") {
|
|
392
426
|
const milliseconds = rawStep.milliseconds;
|
|
393
427
|
if (typeof milliseconds !== "number" || !Number.isInteger(milliseconds) || milliseconds <= 0) {
|
|
@@ -781,9 +815,11 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
|
|
|
781
815
|
if (!isRecord(input)) return { error: "networkSourceLookup must be an object." };
|
|
782
816
|
const filter = input.filter;
|
|
783
817
|
const requestId = input.requestId;
|
|
818
|
+
const session = input.session;
|
|
784
819
|
const url = input.url;
|
|
785
820
|
if (filter !== undefined && (typeof filter !== "string" || filter.trim().length === 0)) return { error: "networkSourceLookup.filter must be a non-empty string when provided." };
|
|
786
821
|
if (requestId !== undefined && (typeof requestId !== "string" || requestId.trim().length === 0)) return { error: "networkSourceLookup.requestId must be a non-empty string when provided." };
|
|
822
|
+
if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) return { error: "networkSourceLookup.session must be a non-empty string when provided." };
|
|
787
823
|
if (url !== undefined && (typeof url !== "string" || url.trim().length === 0)) return { error: "networkSourceLookup.url must be a non-empty string when provided." };
|
|
788
824
|
if (filter === undefined && requestId === undefined && url === undefined) return { error: "networkSourceLookup requires requestId, filter, or url." };
|
|
789
825
|
const maxWorkspaceFiles = validateLookupMaxWorkspaceFiles(input.maxWorkspaceFiles, "networkSourceLookup.maxWorkspaceFiles");
|
|
@@ -796,7 +832,8 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
|
|
|
796
832
|
if (effectiveFilter) {
|
|
797
833
|
steps.push({ action: "network", args: ["network", "requests", "--filter", effectiveFilter] });
|
|
798
834
|
}
|
|
799
|
-
|
|
835
|
+
const args = typeof session === "string" ? ["--session", session, "batch"] : ["batch"];
|
|
836
|
+
return { compiled: { args, query: { filter, maxWorkspaceFiles: maxWorkspaceFiles.value as number, requestId, session, url }, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
|
|
800
837
|
}
|
|
801
838
|
|
|
802
839
|
function getResultPayload(item: Record<string, unknown>): unknown {
|
|
@@ -967,6 +1004,11 @@ function getCompiledSemanticActionSessionPrefix(compiled: CompiledAgentBrowserSe
|
|
|
967
1004
|
return commandIndex > 0 ? compiled.args.slice(0, commandIndex) : [];
|
|
968
1005
|
}
|
|
969
1006
|
|
|
1007
|
+
function isCompiledSemanticActionFindCommand(compiled: CompiledAgentBrowserSemanticAction | undefined): boolean {
|
|
1008
|
+
if (!compiled) return false;
|
|
1009
|
+
return compiled.args[getCompiledSemanticActionCommandIndex(compiled)] === "find";
|
|
1010
|
+
}
|
|
1011
|
+
|
|
970
1012
|
const SEMANTIC_ACTION_CANDIDATE_ACTION_IDS = new Set([
|
|
971
1013
|
"try-searchbox-name-candidate",
|
|
972
1014
|
"try-textbox-name-candidate",
|
|
@@ -986,7 +1028,7 @@ function formatSemanticActionCandidateText(actions: AgentBrowserNextAction[]): s
|
|
|
986
1028
|
|
|
987
1029
|
function buildSemanticActionCandidateActions(compiled: CompiledAgentBrowserSemanticAction): AgentBrowserNextAction[] {
|
|
988
1030
|
const commandIndex = getCompiledSemanticActionCommandIndex(compiled);
|
|
989
|
-
if (commandIndex < 0) return [];
|
|
1031
|
+
if (commandIndex < 0 || compiled.args[commandIndex] !== "find") return [];
|
|
990
1032
|
const locator = compiled.args[commandIndex + 1];
|
|
991
1033
|
const value = compiled.args[commandIndex + 2];
|
|
992
1034
|
if (!locator || !value) return [];
|
|
@@ -1034,12 +1076,12 @@ function getFindNameFlagValue(args: string[], startIndex: number): string | unde
|
|
|
1034
1076
|
}
|
|
1035
1077
|
|
|
1036
1078
|
function getFindVisibleRefFallbackTarget(args: string[]): VisibleRefFallbackTarget | undefined {
|
|
1037
|
-
const findIndex = args[0] === "--session" ? 2 :
|
|
1038
|
-
if (findIndex
|
|
1079
|
+
const findIndex = args[0] === "--session" ? 2 : 0;
|
|
1080
|
+
if (args[findIndex] !== "find") return undefined;
|
|
1039
1081
|
const locator = args[findIndex + 1];
|
|
1040
1082
|
const value = args[findIndex + 2];
|
|
1041
1083
|
const action = args[findIndex + 3];
|
|
1042
|
-
if (!locator || !value || !isAgentBrowserSemanticActionName(action)) return undefined;
|
|
1084
|
+
if (!locator || !value || !isAgentBrowserSemanticActionName(action) || action === "select") return undefined;
|
|
1043
1085
|
const text = action === "fill" ? args[findIndex + 4] : undefined;
|
|
1044
1086
|
if (action === "fill" && (!text || text.startsWith("-"))) return undefined;
|
|
1045
1087
|
if (locator === "role") {
|
|
@@ -1200,6 +1242,8 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
|
|
|
1200
1242
|
const action = input.action;
|
|
1201
1243
|
const locator = input.locator;
|
|
1202
1244
|
const value = input.value;
|
|
1245
|
+
const values = input.values;
|
|
1246
|
+
const selector = input.selector;
|
|
1203
1247
|
const text = input.text;
|
|
1204
1248
|
const role = input.role;
|
|
1205
1249
|
const name = input.name;
|
|
@@ -1207,6 +1251,27 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
|
|
|
1207
1251
|
if (typeof action !== "string" || !AGENT_BROWSER_SEMANTIC_ACTIONS.includes(action as AgentBrowserSemanticActionName)) {
|
|
1208
1252
|
return { error: `semanticAction.action must be one of: ${AGENT_BROWSER_SEMANTIC_ACTIONS.join(", ")}.` };
|
|
1209
1253
|
}
|
|
1254
|
+
if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
|
|
1255
|
+
return { error: "semanticAction.session must be a non-empty string when provided." };
|
|
1256
|
+
}
|
|
1257
|
+
if (action === "select") {
|
|
1258
|
+
if (locator !== undefined || role !== undefined || name !== undefined) {
|
|
1259
|
+
return { error: "semanticAction.locator, role, and name are not supported for select; use selector plus value or values." };
|
|
1260
|
+
}
|
|
1261
|
+
if (text !== undefined) {
|
|
1262
|
+
return { error: "semanticAction.text is not supported for select; use value or values for option values." };
|
|
1263
|
+
}
|
|
1264
|
+
if (typeof selector !== "string" || selector.trim().length === 0) {
|
|
1265
|
+
return { error: "semanticAction.selector is required for select." };
|
|
1266
|
+
}
|
|
1267
|
+
const selectedValues = getSelectValues(input, "semanticAction");
|
|
1268
|
+
if (selectedValues.error) return { error: selectedValues.error };
|
|
1269
|
+
const args = typeof session === "string" ? ["--session", session, "select", selector, ...(selectedValues.values as string[])] : ["select", selector, ...(selectedValues.values as string[])];
|
|
1270
|
+
return { compiled: { action: "select", selector, values: selectedValues.values, args } };
|
|
1271
|
+
}
|
|
1272
|
+
if (selector !== undefined || values !== undefined) {
|
|
1273
|
+
return { error: "semanticAction.selector and values are only supported for select actions." };
|
|
1274
|
+
}
|
|
1210
1275
|
if (typeof locator !== "string" || !AGENT_BROWSER_SEMANTIC_LOCATORS.includes(locator as AgentBrowserSemanticLocator)) {
|
|
1211
1276
|
return { error: `semanticAction.locator must be one of: ${AGENT_BROWSER_SEMANTIC_LOCATORS.join(", ")}.` };
|
|
1212
1277
|
}
|
|
@@ -1228,9 +1293,6 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
|
|
|
1228
1293
|
if (name !== undefined && (locator !== "role" || typeof name !== "string" || name.length === 0)) {
|
|
1229
1294
|
return { error: "semanticAction.name is only supported as a non-empty string for locator=role." };
|
|
1230
1295
|
}
|
|
1231
|
-
if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
|
|
1232
|
-
return { error: "semanticAction.session must be a non-empty string when provided." };
|
|
1233
|
-
}
|
|
1234
1296
|
const args = typeof session === "string" ? ["--session", session, "find", locator, value, action] : ["find", locator, value, action];
|
|
1235
1297
|
if (action === "fill") {
|
|
1236
1298
|
args.push(text as string);
|
|
@@ -1614,6 +1676,9 @@ async function isDirectAgentBrowserBashAllowed(cwd: string): Promise<boolean> {
|
|
|
1614
1676
|
|
|
1615
1677
|
const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
|
|
1616
1678
|
const NAVIGATION_SUMMARY_EVAL = `({ title: document.title, url: location.href })`;
|
|
1679
|
+
// These commands can expose URLs for inspected resources (request URLs, cookie/storage scope, or log sources),
|
|
1680
|
+
// but they do not navigate the active tab and must not poison page-scoped ref guards.
|
|
1681
|
+
const READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS = new Set(["console", "cookies", "errors", "network", "storage"]);
|
|
1617
1682
|
|
|
1618
1683
|
interface NavigationSummary {
|
|
1619
1684
|
title?: string;
|
|
@@ -2259,6 +2324,15 @@ function extractSessionTabTargetFromData(data: unknown): SessionTabTarget | unde
|
|
|
2259
2324
|
return undefined;
|
|
2260
2325
|
}
|
|
2261
2326
|
|
|
2327
|
+
function isReadOnlyDiagnosticSessionTargetCommand(command: string | undefined, _subcommand: string | undefined): boolean {
|
|
2328
|
+
return command !== undefined && READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS.has(command);
|
|
2329
|
+
}
|
|
2330
|
+
|
|
2331
|
+
function extractSessionTabTargetFromCommandData(commandTokens: string[], data: unknown): SessionTabTarget | undefined {
|
|
2332
|
+
const [command, subcommand] = commandTokens;
|
|
2333
|
+
return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand) ? undefined : extractSessionTabTargetFromData(data);
|
|
2334
|
+
}
|
|
2335
|
+
|
|
2262
2336
|
function extractBatchResultCommand(item: Record<string, unknown>): string[] {
|
|
2263
2337
|
return Array.isArray(item.command) ? item.command.filter((token): token is string => typeof token === "string") : [];
|
|
2264
2338
|
}
|
|
@@ -2290,7 +2364,7 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
|
|
|
2290
2364
|
pendingTitle = undefined;
|
|
2291
2365
|
continue;
|
|
2292
2366
|
}
|
|
2293
|
-
const resultTarget =
|
|
2367
|
+
const resultTarget = extractSessionTabTargetFromCommandData([name, subcommand].filter((token): token is string => token !== undefined), result);
|
|
2294
2368
|
if (resultTarget) {
|
|
2295
2369
|
currentTarget = resultTarget;
|
|
2296
2370
|
}
|
|
@@ -2299,6 +2373,40 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
|
|
|
2299
2373
|
return currentTarget;
|
|
2300
2374
|
}
|
|
2301
2375
|
|
|
2376
|
+
function batchContainsOnlyReadOnlyDiagnosticTargets(data: unknown): boolean {
|
|
2377
|
+
if (!Array.isArray(data) || data.length === 0) {
|
|
2378
|
+
return false;
|
|
2379
|
+
}
|
|
2380
|
+
return data.every((item) => {
|
|
2381
|
+
if (!isRecord(item)) return false;
|
|
2382
|
+
const [command, subcommand] = extractBatchResultCommand(item);
|
|
2383
|
+
return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand);
|
|
2384
|
+
});
|
|
2385
|
+
}
|
|
2386
|
+
|
|
2387
|
+
function getRestoredSessionTabTarget(details: Record<string, unknown>, command: string | undefined, subcommand: string | undefined): SessionTabTarget | undefined {
|
|
2388
|
+
if (isReadOnlyDiagnosticSessionTargetCommand(command, subcommand)) {
|
|
2389
|
+
return undefined;
|
|
2390
|
+
}
|
|
2391
|
+
const storedTarget = isRecord(details.sessionTabTarget)
|
|
2392
|
+
? normalizeSessionTabTarget({
|
|
2393
|
+
title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
|
|
2394
|
+
url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
|
|
2395
|
+
})
|
|
2396
|
+
: undefined;
|
|
2397
|
+
if (command !== "batch") {
|
|
2398
|
+
return storedTarget;
|
|
2399
|
+
}
|
|
2400
|
+
const batchTarget = extractSessionTabTargetFromBatchResults(details.data);
|
|
2401
|
+
if (batchTarget) {
|
|
2402
|
+
return batchTarget;
|
|
2403
|
+
}
|
|
2404
|
+
if (isRecord(details.compiledNetworkSourceLookup) || batchContainsOnlyReadOnlyDiagnosticTargets(details.data)) {
|
|
2405
|
+
return undefined;
|
|
2406
|
+
}
|
|
2407
|
+
return storedTarget;
|
|
2408
|
+
}
|
|
2409
|
+
|
|
2302
2410
|
function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, OrderedSessionTabTarget> {
|
|
2303
2411
|
const restoredTargets = new Map<string, OrderedSessionTabTarget>();
|
|
2304
2412
|
let restoredOrder = 0;
|
|
@@ -2319,17 +2427,13 @@ function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, Orde
|
|
|
2319
2427
|
continue;
|
|
2320
2428
|
}
|
|
2321
2429
|
const command = typeof details.command === "string" ? details.command : undefined;
|
|
2430
|
+
const subcommand = typeof details.subcommand === "string" ? details.subcommand : undefined;
|
|
2322
2431
|
if (command === "close" && message.isError !== true) {
|
|
2323
2432
|
restoredOrder += 1;
|
|
2324
2433
|
restoredTargets.delete(sessionName);
|
|
2325
2434
|
continue;
|
|
2326
2435
|
}
|
|
2327
|
-
const sessionTabTarget =
|
|
2328
|
-
? normalizeSessionTabTarget({
|
|
2329
|
-
title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
|
|
2330
|
-
url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
|
|
2331
|
-
})
|
|
2332
|
-
: undefined;
|
|
2436
|
+
const sessionTabTarget = getRestoredSessionTabTarget(details, command, subcommand);
|
|
2333
2437
|
if (sessionTabTarget) {
|
|
2334
2438
|
restoredOrder += 1;
|
|
2335
2439
|
restoredTargets.set(sessionName, { order: restoredOrder, target: sessionTabTarget });
|
|
@@ -2751,14 +2855,18 @@ function deriveSessionTabTarget(options: {
|
|
|
2751
2855
|
data: unknown;
|
|
2752
2856
|
navigationSummary?: NavigationSummary;
|
|
2753
2857
|
previousTarget?: SessionTabTarget;
|
|
2858
|
+
subcommand?: string;
|
|
2754
2859
|
}): SessionTabTarget | undefined {
|
|
2755
2860
|
if (options.command === "close") {
|
|
2756
2861
|
return undefined;
|
|
2757
2862
|
}
|
|
2863
|
+
const commandDataTarget = isReadOnlyDiagnosticSessionTargetCommand(options.command, options.subcommand)
|
|
2864
|
+
? undefined
|
|
2865
|
+
: extractSessionTabTargetFromData(options.data);
|
|
2758
2866
|
return (
|
|
2759
2867
|
normalizeSessionTabTarget(options.navigationSummary) ??
|
|
2760
2868
|
extractSessionTabTargetFromBatchResults(options.data) ??
|
|
2761
|
-
|
|
2869
|
+
commandDataTarget ??
|
|
2762
2870
|
options.previousTarget
|
|
2763
2871
|
);
|
|
2764
2872
|
}
|
|
@@ -3353,14 +3461,16 @@ function looksLikeFunctionEvalStdin(stdin: string | undefined): boolean {
|
|
|
3353
3461
|
return /^(?:async\s+)?function\b/.test(trimmed) || /^(?:async\s*)?\([^)]*\)\s*=>/.test(trimmed) || /^(?:async\s+)?[A-Za-z_$][\w$]*\s*=>/.test(trimmed);
|
|
3354
3462
|
}
|
|
3355
3463
|
|
|
3356
|
-
function
|
|
3357
|
-
|
|
3464
|
+
function isPlainEmptyObject(value: unknown): boolean {
|
|
3465
|
+
if (!isRecord(value) || Array.isArray(value)) return false;
|
|
3466
|
+
const prototype = Object.getPrototypeOf(value);
|
|
3467
|
+
return (prototype === Object.prototype || prototype === null) && Object.keys(value).length === 0;
|
|
3358
3468
|
}
|
|
3359
3469
|
|
|
3360
3470
|
function getEvalStdinHint(options: { command?: string; data: unknown; stdin?: string }): EvalStdinHint | undefined {
|
|
3361
3471
|
if (options.command !== "eval" || !looksLikeFunctionEvalStdin(options.stdin) || !isRecord(options.data)) return undefined;
|
|
3362
3472
|
const result = options.data.result;
|
|
3363
|
-
if (!
|
|
3473
|
+
if (!isPlainEmptyObject(result)) return undefined;
|
|
3364
3474
|
return {
|
|
3365
3475
|
reason: "eval --stdin received a function-shaped snippet and the upstream JSON result was an empty object, which often means the function itself was returned or serialized instead of invoked.",
|
|
3366
3476
|
suggestion: "Pass a plain expression such as `({ title: document.title })`, or invoke the function explicitly, for example `(() => ({ title: document.title }))()`.",
|
|
@@ -4009,6 +4119,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4009
4119
|
const redactedCompiledNetworkSourceLookup = compiledNetworkSourceLookup && redactedCompiledNetworkSourceLookupSteps
|
|
4010
4120
|
? {
|
|
4011
4121
|
...compiledNetworkSourceLookup,
|
|
4122
|
+
args: redactNetworkSourceLookupArgs(compiledNetworkSourceLookup.args),
|
|
4012
4123
|
query: {
|
|
4013
4124
|
...compiledNetworkSourceLookup.query,
|
|
4014
4125
|
filter: redactNetworkSourceLookupUrl(compiledNetworkSourceLookup.query.filter),
|
|
@@ -4435,12 +4546,13 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4435
4546
|
const observedSessionTabTarget =
|
|
4436
4547
|
normalizeSessionTabTarget(navigationSummary) ??
|
|
4437
4548
|
extractSessionTabTargetFromBatchResults(presentationEnvelope?.data) ??
|
|
4438
|
-
|
|
4549
|
+
extractSessionTabTargetFromCommandData(commandTokens, presentationEnvelope?.data);
|
|
4439
4550
|
let currentSessionTabTarget = deriveSessionTabTarget({
|
|
4440
4551
|
command: executionPlan.commandInfo.command,
|
|
4441
4552
|
data: presentationEnvelope?.data,
|
|
4442
4553
|
navigationSummary,
|
|
4443
4554
|
previousTarget: priorSessionTabTarget,
|
|
4555
|
+
subcommand: executionPlan.commandInfo.subcommand,
|
|
4444
4556
|
});
|
|
4445
4557
|
let aboutBlankSessionMismatch: AboutBlankSessionMismatch | undefined;
|
|
4446
4558
|
const shouldTreatAboutBlankAsMismatch =
|
|
@@ -4826,7 +4938,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
|
|
|
4826
4938
|
if (comboboxFocusDiagnostic) {
|
|
4827
4939
|
(nextActions ??= []).push(...buildComboboxFocusNextActions(executionPlan.sessionName));
|
|
4828
4940
|
}
|
|
4829
|
-
if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction) {
|
|
4941
|
+
if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction && isCompiledSemanticActionFindCommand(compiledSemanticAction)) {
|
|
4830
4942
|
(nextActions ??= []).push({
|
|
4831
4943
|
id: "retry-semantic-action-after-stale-ref",
|
|
4832
4944
|
params: { args: redactedCompiledSemanticAction.args },
|
|
@@ -18,14 +18,14 @@ export function buildInstalledDocsGuideline(paths: { readmePath: string; command
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
export const QUICK_START_GUIDELINES = [
|
|
21
|
-
"Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin
|
|
21
|
+
"Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
|
|
22
22
|
"There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
|
|
23
23
|
"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
|
|
24
|
-
"Locator-first clicks and
|
|
25
|
-
"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
|
|
26
|
-
"High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
|
|
24
|
+
"Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded try-*-candidate next actions (and an Agent-browser candidate fallbacks prose block) for specific placeholder/text/label shapes, and stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
|
|
25
|
+
"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }. For app pages with a native dropdown, job steps can include { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } before the dependent assertion.",
|
|
26
|
+
"High-value command reference: select <selector> <value...> changes native dropdown values; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
|
|
27
27
|
"For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
|
|
28
|
-
"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, safety notes, or artifactPath for saved files.",
|
|
28
|
+
"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
|
|
29
29
|
] as const;
|
|
30
30
|
|
|
31
31
|
export const BRAVE_SEARCH_PROMPT_GUIDELINE =
|
|
@@ -45,14 +45,14 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
|
|
|
45
45
|
"For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
|
|
46
46
|
"For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, state save/load for portable test state, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
|
|
47
47
|
"For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
|
|
48
|
-
"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
|
|
48
|
+
"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
|
|
49
49
|
"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
|
|
50
50
|
"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
|
|
51
51
|
"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
|
|
52
52
|
"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
|
|
53
53
|
"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
|
|
54
54
|
"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
|
|
55
|
-
"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field
|
|
55
|
+
"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
|
|
56
56
|
"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
|
|
57
57
|
"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
|
|
58
58
|
"When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
|
|
@@ -92,12 +92,12 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSear
|
|
|
92
92
|
const RUNTIME_PROMPT_GUIDELINES = [
|
|
93
93
|
"Use exactly one input mode: args, semanticAction, job, qa, sourceLookup, or networkSourceLookup. Use stdin only for batch, eval --stdin, auth save --password-stdin, or wrapper-generated batch modes.",
|
|
94
94
|
"Common flow: open, snapshot -i, interact with current @refs or semanticAction, then re-snapshot after navigation, scrolling, rerenders, or DOM changes. For ordinary forms, batch same-snapshot fill @refs before the submit/click step; split if a fill may autosubmit, navigate, or rerender later fields. Respect explicit stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action.",
|
|
95
|
-
"Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. Use current @refs only from the latest same-page snapshot.",
|
|
95
|
+
"Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. For native selects, prefer select <selector> <value...> or semanticAction/job select over clicking option refs. Use current @refs only from the latest same-page snapshot.",
|
|
96
96
|
"For tasks that explicitly require the user's signed-in/account-specific content, start with --profile Default plus sessionMode=fresh unless the user asks otherwise; visible page content is model-visible. Use sessionMode=fresh for other launch-scoped state such as --session-name, --cdp, --state, --auto-connect, --init-script, --enable, providers, or iOS devices; otherwise let the implicit session carry continuity.",
|
|
97
97
|
"For requested screenshots, recordings, downloads, PDFs, or HARs, save the exact user path and read details.artifactVerification before claiming success; report unavailable/missing artifacts instead of silently substituting paths. record stop needs ffmpeg on PATH. close does not delete saved files; cleanup is host-owned.",
|
|
98
|
-
"When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors.",
|
|
98
|
+
"When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors; network request diagnostics may include request-detail, actionable failed-request networkSourceLookup, filter, or HAR-capture follow-ups.",
|
|
99
99
|
"For dense snapshots, check Omitted high-value controls and details.data.highValueControlRefIds before opening large spill files.",
|
|
100
|
-
"For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region.
|
|
100
|
+
"For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. For native selects use select/semanticAction/job select instead of option refs; custom combobox clicks may only focus, so re-snapshot and fall back to type, Enter/arrows, or visible option refs.",
|
|
101
101
|
"For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with a plain expression in the tool stdin field; do not rely on console.log. When reading several known refs/selectors, use batch with JSON-array stdin (for example [[\"get\",\"text\",\"@e1\"]]) or eval --stdin instead of many serial get calls. If selector visibility warnings appear, prefer visible @refs or nextActions.",
|
|
102
102
|
"For non-core debugging, pass upstream commands through args: network, diff, trace/profiler/record, console/errors, stream, dashboard, chat, react, vitals, pushstate, dialog, frame, tab.",
|
|
103
103
|
] as const;
|