npm - pi-agent-browser-native - Versions diffs - 0.2.30 → 0.2.31 - Mend

pi-agent-browser-native 0.2.30 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +13 -0
package/README.md +11 -8
package/docs/ARCHITECTURE.md +3 -3
package/docs/COMMAND_REFERENCE.md +12 -8
package/docs/RELEASE.md +11 -11
package/docs/REQUIREMENTS.md +4 -3
package/docs/SUPPORT_MATRIX.md +13 -5
package/docs/TOOL_CONTRACT.md +30 -20
package/extensions/agent-browser/index.ts +145 -33
package/extensions/agent-browser/lib/playbook.ts +10 -10
package/extensions/agent-browser/lib/results/presentation.ts +154 -2
package/extensions/agent-browser/lib/results/shared.ts +7 -1
package/package.json +1 -1

package/extensions/agent-browser/index.ts CHANGED Viewed

@@ -84,9 +84,9 @@ const DEFAULT_SESSION_MODE = "auto" as const;
 const DIRECT_AGENT_BROWSER_BASH_BYPASS_ENV = "PI_AGENT_BROWSER_ALLOW_DIRECT_BASH";
 const PACKAGE_NAME = "pi-agent-browser-native";
-const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "uncheck"] as const;
+const AGENT_BROWSER_SEMANTIC_ACTIONS = ["check", "click", "fill", "select", "uncheck"] as const;
 const AGENT_BROWSER_SEMANTIC_LOCATORS = ["alt", "label", "placeholder", "role", "testid", "text", "title"] as const;
-const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
+const AGENT_BROWSER_JOB_STEP_ACTIONS = ["open", "click", "fill", "select", "wait", "assertText", "assertUrl", "waitForDownload", "screenshot"] as const;
 const AGENT_BROWSER_QA_LOAD_STATES = ["domcontentloaded", "load", "networkidle"] as const;
 const SOURCE_LOOKUP_WORKSPACE_EXTENSIONS = new Set([".ts", ".tsx", ".js", ".jsx"]);
 const SOURCE_LOOKUP_IGNORED_DIRECTORIES = new Set([".git", "node_modules", "dist", "build", "coverage", ".next", "out", "tmp", "temp"]);
@@ -102,8 +102,10 @@ type AgentBrowserNetworkSourceLookupStatus = "failed-requests-found" | "no-faile
 interface AgentBrowserSemanticActionInput {
 	action: AgentBrowserSemanticActionName;
-	locator: AgentBrowserSemanticLocator;
-	value: string;
+	locator?: AgentBrowserSemanticLocator;
+	value?: string;
+	values?: string[];
+	selector?: string;
 	text?: string;
 	role?: string;
 	name?: string;
@@ -112,7 +114,9 @@ interface AgentBrowserSemanticActionInput {
 interface CompiledAgentBrowserSemanticAction {
 	action: AgentBrowserSemanticActionName;
-	locator: AgentBrowserSemanticLocator;
+	locator?: AgentBrowserSemanticLocator;
+	selector?: string;
+	values?: string[];
 	args: string[];
 }
@@ -225,6 +229,7 @@ interface CompiledAgentBrowserNetworkSourceLookup {
 		filter?: string;
 		maxWorkspaceFiles: number;
 		requestId?: string;
+		session?: string;
 		url?: string;
 	};
 }
@@ -265,16 +270,18 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 	semanticAction: Type.Optional(
 		Type.Object({
 			action: StringEnum(AGENT_BROWSER_SEMANTIC_ACTIONS, {
-				description: "Intent action to compile to an existing agent-browser find command.",
+				description: "Intent action to compile to an existing agent-browser find command, or to upstream select when action=select.",
 			}),
-			locator: StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
-				description: "Upstream find locator family to use.",
-			}),
-			value: Type.String({ description: "Locator value, such as visible text, label text, placeholder text, test id, title, alt text, or role." }),
+			locator: Type.Optional(StringEnum(AGENT_BROWSER_SEMANTIC_LOCATORS, {
+				description: "Upstream find locator family to use for check/click/fill/uncheck actions.",
+			})),
+			value: Type.Optional(Type.String({ description: "Locator value for find actions, or a single option value for select actions." })),
+			values: Type.Optional(Type.Array(Type.String({ description: "Option value for select actions." }), { description: "One or more option values for select actions.", minItems: 1 })),
+			selector: Type.Optional(Type.String({ description: "Selector or @ref for select actions; compiled to select <selector> <value...>." })),
 			text: Type.Optional(Type.String({ description: "Text/value argument for fill actions." })),
 			role: Type.Optional(Type.String({ description: "Role locator value; when set it must match value for locator=role." })),
 			name: Type.Optional(Type.String({ description: "Accessible name filter for locator=role; compiles to --name <name>." })),
-			session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled find command." })),
+			session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the compiled command." })),
 		}),
 	),
 	qa: Type.Optional(
@@ -302,6 +309,7 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 		Type.Object({
 			filter: Type.Optional(Type.String({ description: "Optional upstream network requests filter pattern." })),
 			requestId: Type.Optional(Type.String({ description: "Optional network request id to inspect with network request <id>." })),
+			session: Type.Optional(Type.String({ description: "Optional upstream session name; prepends --session <name> before the generated batch." })),
 			url: Type.Optional(Type.String({ description: "Optional failed request URL or URL fragment to correlate with local source." })),
 			maxWorkspaceFiles: Type.Optional(Type.Number({ description: "Maximum local source files to scan for URL literals. Defaults to 2000 and cannot exceed 5000.", minimum: 1, maximum: SOURCE_LOOKUP_MAX_WORKSPACE_FILES })),
 		}),
@@ -314,8 +322,10 @@ const AGENT_BROWSER_PARAMS = Type.Object({
 						description: "Constrained one-call job step compiled to existing upstream batch commands.",
 					}),
 					url: Type.Optional(Type.String({ description: "URL for open steps, or URL pattern for assertUrl steps." })),
-					selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/get-like steps." })),
+					selector: Type.Optional(Type.String({ description: "Selector or @ref for click/fill/select-like steps." })),
 					text: Type.Optional(Type.String({ description: "Text for fill steps or visible text for assertText steps." })),
+					value: Type.Optional(Type.String({ description: "Single option value for select steps." })),
+					values: Type.Optional(Type.Array(Type.String({ description: "Option value for select steps." }), { description: "One or more option values for select steps.", minItems: 1 })),
 					path: Type.Optional(Type.String({ description: "Artifact/download path for waitForDownload or screenshot steps." })),
 					milliseconds: Type.Optional(Type.Number({ description: "Milliseconds for wait steps." })),
 				}),
@@ -355,6 +365,24 @@ function getRequiredJobString(step: Record<string, unknown>, field: "path" | "se
 	return { value };
 }
+function getSelectValues(input: Record<string, unknown>, context: string): { values?: string[]; error?: string } {
+	const rawValue = input.value;
+	const rawValues = input.values;
+	if (rawValue !== undefined && rawValues !== undefined) {
+		return { error: `${context}.value and ${context}.values cannot both be provided for select.` };
+	}
+	if (rawValues !== undefined) {
+		if (!Array.isArray(rawValues) || rawValues.length === 0 || rawValues.some((value) => typeof value !== "string" || value.trim().length === 0)) {
+			return { error: `${context}.values must be a non-empty array of non-empty strings for select.` };
+		}
+		return { values: rawValues };
+	}
+	if (typeof rawValue === "string" && rawValue.trim().length > 0) {
+		return { values: [rawValue] };
+	}
+	return { error: `${context}.value or ${context}.values is required for select.` };
+}
 function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrowserJob; error?: string } {
 	if (!isRecord(input)) {
 		return { error: "job must be an object." };
@@ -388,6 +416,12 @@ function compileAgentBrowserJob(input: unknown): { compiled?: CompiledAgentBrows
 			const text = getRequiredJobString(rawStep, "text", jobAction);
 			if (text.error) return { error: `job.steps[${index}]: ${text.error}` };
 			args = ["fill", selector.value as string, text.value as string];
+		} else if (jobAction === "select") {
+			const selector = getRequiredJobString(rawStep, "selector", jobAction);
+			if (selector.error) return { error: `job.steps[${index}]: ${selector.error}` };
+			const values = getSelectValues(rawStep, `job.steps[${index}]`);
+			if (values.error) return { error: values.error };
+			args = ["select", selector.value as string, ...(values.values as string[])];
 		} else if (jobAction === "wait") {
 			const milliseconds = rawStep.milliseconds;
 			if (typeof milliseconds !== "number" || !Number.isInteger(milliseconds) || milliseconds <= 0) {
@@ -781,9 +815,11 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
 	if (!isRecord(input)) return { error: "networkSourceLookup must be an object." };
 	const filter = input.filter;
 	const requestId = input.requestId;
+	const session = input.session;
 	const url = input.url;
 	if (filter !== undefined && (typeof filter !== "string" || filter.trim().length === 0)) return { error: "networkSourceLookup.filter must be a non-empty string when provided." };
 	if (requestId !== undefined && (typeof requestId !== "string" || requestId.trim().length === 0)) return { error: "networkSourceLookup.requestId must be a non-empty string when provided." };
+	if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) return { error: "networkSourceLookup.session must be a non-empty string when provided." };
 	if (url !== undefined && (typeof url !== "string" || url.trim().length === 0)) return { error: "networkSourceLookup.url must be a non-empty string when provided." };
 	if (filter === undefined && requestId === undefined && url === undefined) return { error: "networkSourceLookup requires requestId, filter, or url." };
 	const maxWorkspaceFiles = validateLookupMaxWorkspaceFiles(input.maxWorkspaceFiles, "networkSourceLookup.maxWorkspaceFiles");
@@ -796,7 +832,8 @@ function compileAgentBrowserNetworkSourceLookup(input: unknown): { compiled?: Co
 	if (effectiveFilter) {
 		steps.push({ action: "network", args: ["network", "requests", "--filter", effectiveFilter] });
 	}
-	return { compiled: { args: ["batch"], query: { filter, maxWorkspaceFiles: maxWorkspaceFiles.value as number, requestId, url }, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
+	const args = typeof session === "string" ? ["--session", session, "batch"] : ["batch"];
+	return { compiled: { args, query: { filter, maxWorkspaceFiles: maxWorkspaceFiles.value as number, requestId, session, url }, stdin: JSON.stringify(steps.map((step) => step.args)), steps } };
 }
 function getResultPayload(item: Record<string, unknown>): unknown {
@@ -967,6 +1004,11 @@ function getCompiledSemanticActionSessionPrefix(compiled: CompiledAgentBrowserSe
 	return commandIndex > 0 ? compiled.args.slice(0, commandIndex) : [];
 }
+function isCompiledSemanticActionFindCommand(compiled: CompiledAgentBrowserSemanticAction | undefined): boolean {
+	if (!compiled) return false;
+	return compiled.args[getCompiledSemanticActionCommandIndex(compiled)] === "find";
+}
 const SEMANTIC_ACTION_CANDIDATE_ACTION_IDS = new Set([
 	"try-searchbox-name-candidate",
 	"try-textbox-name-candidate",
@@ -986,7 +1028,7 @@ function formatSemanticActionCandidateText(actions: AgentBrowserNextAction[]): s
 function buildSemanticActionCandidateActions(compiled: CompiledAgentBrowserSemanticAction): AgentBrowserNextAction[] {
 	const commandIndex = getCompiledSemanticActionCommandIndex(compiled);
-	if (commandIndex < 0) return [];
+	if (commandIndex < 0 || compiled.args[commandIndex] !== "find") return [];
 	const locator = compiled.args[commandIndex + 1];
 	const value = compiled.args[commandIndex + 2];
 	if (!locator || !value) return [];
@@ -1034,12 +1076,12 @@ function getFindNameFlagValue(args: string[], startIndex: number): string | unde
 }
 function getFindVisibleRefFallbackTarget(args: string[]): VisibleRefFallbackTarget | undefined {
-	const findIndex = args[0] === "--session" ? 2 : args.indexOf("find");
-	if (findIndex < 0) return undefined;
+	const findIndex = args[0] === "--session" ? 2 : 0;
+	if (args[findIndex] !== "find") return undefined;
 	const locator = args[findIndex + 1];
 	const value = args[findIndex + 2];
 	const action = args[findIndex + 3];
-	if (!locator || !value || !isAgentBrowserSemanticActionName(action)) return undefined;
+	if (!locator || !value || !isAgentBrowserSemanticActionName(action) || action === "select") return undefined;
 	const text = action === "fill" ? args[findIndex + 4] : undefined;
 	if (action === "fill" && (!text || text.startsWith("-"))) return undefined;
 	if (locator === "role") {
@@ -1200,6 +1242,8 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
 	const action = input.action;
 	const locator = input.locator;
 	const value = input.value;
+	const values = input.values;
+	const selector = input.selector;
 	const text = input.text;
 	const role = input.role;
 	const name = input.name;
@@ -1207,6 +1251,27 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
 	if (typeof action !== "string" || !AGENT_BROWSER_SEMANTIC_ACTIONS.includes(action as AgentBrowserSemanticActionName)) {
 		return { error: `semanticAction.action must be one of: ${AGENT_BROWSER_SEMANTIC_ACTIONS.join(", ")}.` };
 	}
+	if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
+		return { error: "semanticAction.session must be a non-empty string when provided." };
+	}
+	if (action === "select") {
+		if (locator !== undefined || role !== undefined || name !== undefined) {
+			return { error: "semanticAction.locator, role, and name are not supported for select; use selector plus value or values." };
+		}
+		if (text !== undefined) {
+			return { error: "semanticAction.text is not supported for select; use value or values for option values." };
+		}
+		if (typeof selector !== "string" || selector.trim().length === 0) {
+			return { error: "semanticAction.selector is required for select." };
+		}
+		const selectedValues = getSelectValues(input, "semanticAction");
+		if (selectedValues.error) return { error: selectedValues.error };
+		const args = typeof session === "string" ? ["--session", session, "select", selector, ...(selectedValues.values as string[])] : ["select", selector, ...(selectedValues.values as string[])];
+		return { compiled: { action: "select", selector, values: selectedValues.values, args } };
+	}
+	if (selector !== undefined || values !== undefined) {
+		return { error: "semanticAction.selector and values are only supported for select actions." };
+	}
 	if (typeof locator !== "string" || !AGENT_BROWSER_SEMANTIC_LOCATORS.includes(locator as AgentBrowserSemanticLocator)) {
 		return { error: `semanticAction.locator must be one of: ${AGENT_BROWSER_SEMANTIC_LOCATORS.join(", ")}.` };
 	}
@@ -1228,9 +1293,6 @@ function compileAgentBrowserSemanticAction(input: unknown): { compiled?: Compile
 	if (name !== undefined && (locator !== "role" || typeof name !== "string" || name.length === 0)) {
 		return { error: "semanticAction.name is only supported as a non-empty string for locator=role." };
 	}
-	if (session !== undefined && (typeof session !== "string" || session.trim().length === 0)) {
-		return { error: "semanticAction.session must be a non-empty string when provided." };
-	}
 	const args = typeof session === "string" ? ["--session", session, "find", locator, value, action] : ["find", locator, value, action];
 	if (action === "fill") {
 		args.push(text as string);
@@ -1614,6 +1676,9 @@ async function isDirectAgentBrowserBashAllowed(cwd: string): Promise<boolean> {
 const NAVIGATION_SUMMARY_COMMANDS = new Set(["back", "click", "dblclick", "forward", "reload"]);
 const NAVIGATION_SUMMARY_EVAL = `({ title: document.title, url: location.href })`;
+// These commands can expose URLs for inspected resources (request URLs, cookie/storage scope, or log sources),
+// but they do not navigate the active tab and must not poison page-scoped ref guards.
+const READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS = new Set(["console", "cookies", "errors", "network", "storage"]);
 interface NavigationSummary {
 	title?: string;
@@ -2259,6 +2324,15 @@ function extractSessionTabTargetFromData(data: unknown): SessionTabTarget | unde
 	return undefined;
 }
+function isReadOnlyDiagnosticSessionTargetCommand(command: string | undefined, _subcommand: string | undefined): boolean {
+	return command !== undefined && READ_ONLY_DIAGNOSTIC_SESSION_TARGET_COMMANDS.has(command);
+}
+function extractSessionTabTargetFromCommandData(commandTokens: string[], data: unknown): SessionTabTarget | undefined {
+	const [command, subcommand] = commandTokens;
+	return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand) ? undefined : extractSessionTabTargetFromData(data);
+}
 function extractBatchResultCommand(item: Record<string, unknown>): string[] {
 	return Array.isArray(item.command) ? item.command.filter((token): token is string => typeof token === "string") : [];
 }
@@ -2290,7 +2364,7 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
 			pendingTitle = undefined;
 			continue;
 		}
-		const resultTarget = extractSessionTabTargetFromData(result);
+		const resultTarget = extractSessionTabTargetFromCommandData([name, subcommand].filter((token): token is string => token !== undefined), result);
 		if (resultTarget) {
 			currentTarget = resultTarget;
 		}
@@ -2299,6 +2373,40 @@ function extractSessionTabTargetFromBatchResults(data: unknown): SessionTabTarge
 	return currentTarget;
 }
+function batchContainsOnlyReadOnlyDiagnosticTargets(data: unknown): boolean {
+	if (!Array.isArray(data) || data.length === 0) {
+		return false;
+	}
+	return data.every((item) => {
+		if (!isRecord(item)) return false;
+		const [command, subcommand] = extractBatchResultCommand(item);
+		return isReadOnlyDiagnosticSessionTargetCommand(command, subcommand);
+	});
+}
+function getRestoredSessionTabTarget(details: Record<string, unknown>, command: string | undefined, subcommand: string | undefined): SessionTabTarget | undefined {
+	if (isReadOnlyDiagnosticSessionTargetCommand(command, subcommand)) {
+		return undefined;
+	}
+	const storedTarget = isRecord(details.sessionTabTarget)
+		? normalizeSessionTabTarget({
+				title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
+				url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
+		  })
+		: undefined;
+	if (command !== "batch") {
+		return storedTarget;
+	}
+	const batchTarget = extractSessionTabTargetFromBatchResults(details.data);
+	if (batchTarget) {
+		return batchTarget;
+	}
+	if (isRecord(details.compiledNetworkSourceLookup) || batchContainsOnlyReadOnlyDiagnosticTargets(details.data)) {
+		return undefined;
+	}
+	return storedTarget;
+}
 function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, OrderedSessionTabTarget> {
 	const restoredTargets = new Map<string, OrderedSessionTabTarget>();
 	let restoredOrder = 0;
@@ -2319,17 +2427,13 @@ function restoreSessionTabTargetsFromBranch(branch: unknown[]): Map<string, Orde
 			continue;
 		}
 		const command = typeof details.command === "string" ? details.command : undefined;
+		const subcommand = typeof details.subcommand === "string" ? details.subcommand : undefined;
 		if (command === "close" && message.isError !== true) {
 			restoredOrder += 1;
 			restoredTargets.delete(sessionName);
 			continue;
 		}
-		const sessionTabTarget = isRecord(details.sessionTabTarget)
-			? normalizeSessionTabTarget({
-					title: typeof details.sessionTabTarget.title === "string" ? details.sessionTabTarget.title : undefined,
-					url: typeof details.sessionTabTarget.url === "string" ? details.sessionTabTarget.url : undefined,
-			  })
-			: undefined;
+		const sessionTabTarget = getRestoredSessionTabTarget(details, command, subcommand);
 		if (sessionTabTarget) {
 			restoredOrder += 1;
 			restoredTargets.set(sessionName, { order: restoredOrder, target: sessionTabTarget });
@@ -2751,14 +2855,18 @@ function deriveSessionTabTarget(options: {
 	data: unknown;
 	navigationSummary?: NavigationSummary;
 	previousTarget?: SessionTabTarget;
+	subcommand?: string;
 }): SessionTabTarget | undefined {
 	if (options.command === "close") {
 		return undefined;
 	}
+	const commandDataTarget = isReadOnlyDiagnosticSessionTargetCommand(options.command, options.subcommand)
+		? undefined
+		: extractSessionTabTargetFromData(options.data);
 	return (
 		normalizeSessionTabTarget(options.navigationSummary) ??
 		extractSessionTabTargetFromBatchResults(options.data) ??
-		extractSessionTabTargetFromData(options.data) ??
+		commandDataTarget ??
 		options.previousTarget
 	);
 }
@@ -3353,14 +3461,16 @@ function looksLikeFunctionEvalStdin(stdin: string | undefined): boolean {
 	return /^(?:async\s+)?function\b/.test(trimmed) || /^(?:async\s*)?\([^)]*\)\s*=>/.test(trimmed) || /^(?:async\s+)?[A-Za-z_$][\w$]*\s*=>/.test(trimmed);
 }
-function isEmptyRecord(value: unknown): boolean {
-	return isRecord(value) && Object.keys(value).length === 0;
+function isPlainEmptyObject(value: unknown): boolean {
+	if (!isRecord(value) || Array.isArray(value)) return false;
+	const prototype = Object.getPrototypeOf(value);
+	return (prototype === Object.prototype || prototype === null) && Object.keys(value).length === 0;
 }
 function getEvalStdinHint(options: { command?: string; data: unknown; stdin?: string }): EvalStdinHint | undefined {
 	if (options.command !== "eval" || !looksLikeFunctionEvalStdin(options.stdin) || !isRecord(options.data)) return undefined;
 	const result = options.data.result;
-	if (!isEmptyRecord(result)) return undefined;
+	if (!isPlainEmptyObject(result)) return undefined;
 	return {
 		reason: "eval --stdin received a function-shaped snippet and the upstream JSON result was an empty object, which often means the function itself was returned or serialized instead of invoked.",
 		suggestion: "Pass a plain expression such as `({ title: document.title })`, or invoke the function explicitly, for example `(() => ({ title: document.title }))()`.",
@@ -4009,6 +4119,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 			const redactedCompiledNetworkSourceLookup = compiledNetworkSourceLookup && redactedCompiledNetworkSourceLookupSteps
 				? {
 					...compiledNetworkSourceLookup,
+					args: redactNetworkSourceLookupArgs(compiledNetworkSourceLookup.args),
 					query: {
 						...compiledNetworkSourceLookup.query,
 						filter: redactNetworkSourceLookupUrl(compiledNetworkSourceLookup.query.filter),
@@ -4435,12 +4546,13 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					const observedSessionTabTarget =
 						normalizeSessionTabTarget(navigationSummary) ??
 						extractSessionTabTargetFromBatchResults(presentationEnvelope?.data) ??
-						extractSessionTabTargetFromData(presentationEnvelope?.data);
+						extractSessionTabTargetFromCommandData(commandTokens, presentationEnvelope?.data);
 					let currentSessionTabTarget = deriveSessionTabTarget({
 						command: executionPlan.commandInfo.command,
 						data: presentationEnvelope?.data,
 						navigationSummary,
 						previousTarget: priorSessionTabTarget,
+						subcommand: executionPlan.commandInfo.subcommand,
 					});
 					let aboutBlankSessionMismatch: AboutBlankSessionMismatch | undefined;
 					const shouldTreatAboutBlankAsMismatch =
@@ -4826,7 +4938,7 @@ export default function agentBrowserExtension(pi: ExtensionAPI) {
 					if (comboboxFocusDiagnostic) {
 						(nextActions ??= []).push(...buildComboboxFocusNextActions(executionPlan.sessionName));
 					}
-					if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction) {
+					if (categoryDetails.failureCategory === "stale-ref" && redactedCompiledSemanticAction && isCompiledSemanticActionFindCommand(compiledSemanticAction)) {
 						(nextActions ??= []).push({
 							id: "retry-semantic-action-after-stale-ref",
 							params: { args: redactedCompiledSemanticAction.args },

package/extensions/agent-browser/lib/playbook.ts CHANGED Viewed

@@ -18,14 +18,14 @@ export function buildInstalledDocsGuideline(paths: { readmePath: string; command
 }
 export const QUICK_START_GUIDELINES = [
-	"Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin find-locator shorthand compiled to find argv), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
+	"Quick start mental model: use exactly one of args (exact agent-browser CLI args after the binary), semanticAction (a thin shorthand compiled to find argv for locator actions or select argv for native dropdowns), job (a constrained short-workflow schema compiled to batch), qa (a lightweight QA preset built on job/batch), or the experimental sourceLookup / networkSourceLookup helpers (each compiled to batch); stdin is only for batch, eval --stdin, auth save --password-stdin, and wrapper-generated batch stdin from job, qa, sourceLookup, or networkSourceLookup, and other command/stdin combinations are rejected before launch; sessionMode=fresh switches the extension-managed pi-scoped session to a fresh upstream launch when you need new --profile, --session-name, --cdp, --state, --auto-connect, --init-script, --enable, -p/--provider, or iOS --device state.",
 	"There is no first-class reusable named browser recipe runtime above top-level job, the qa preset, and raw batch stdin; keep recurring flows in documentation examples or those inputs (closed RQ-0068; see docs/ARCHITECTURE.md#no-reusable-recipe-layer-yet).",
 	"Common first calls: { args: [\"open\", \"https://example.com\"] } then { args: [\"snapshot\", \"-i\"] }; after navigation, use { args: [\"click\", \"@e2\"] } then { args: [\"snapshot\", \"-i\"] }.",
-	"Locator-first clicks and fills without hand-building find argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } } or { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded try-*-candidate next actions (and an Agent-browser candidate fallbacks prose block) for specific placeholder/text/label shapes, and stale-ref failures can return retry-semantic-action-after-stale-ref when retry safety is provable.",
-	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }.",
-	"High-value command reference: download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
+	"Locator-first clicks/fills and native select changes without hand-building argv: { semanticAction: { action: \"click\", locator: \"text\", value: \"Close\" } }, { semanticAction: { action: \"fill\", locator: \"label\", value: \"Email\", text: \"user@example.com\" } }, or { semanticAction: { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } }; add semanticAction.session when targeting a named upstream browser session; details.compiledSemanticAction shows the semantic target, while details.effectiveArgs may show a resolved current @ref for active-session role/name click/check/uncheck actions to avoid hidden duplicate matches; selector-not-found failures may append bounded try-*-candidate next actions (and an Agent-browser candidate fallbacks prose block) for specific placeholder/text/label shapes, and stale-ref failures can return retry-semantic-action-after-stale-ref for compiled find actions when retry safety is provable.",
+	"Common advanced calls: { args: [\"batch\"], stdin: \"[[\\\"open\\\",\\\"https://example.com\\\"],[\\\"snapshot\\\",\\\"-i\\\"]]\" }, { job: { steps: [{ action: \"open\", url: \"https://example.com\" }, { action: \"assertText\", text: \"Example Domain\" }, { action: \"screenshot\", path: \".dogfood/example.png\" }] } }, { qa: { url: \"https://example.com\", expectedText: \"Example Domain\", screenshotPath: \".dogfood/qa-example.png\" } }, { args: [\"eval\", \"--stdin\"], stdin: \"document.title\" }, { args: [\"auth\", \"save\", \"name\", \"--password-stdin\"], stdin: \"<password from user-approved secret source>\" }, { args: [\"--profile\", \"Default\", \"open\", \"https://example.com/account\"], sessionMode: \"fresh\" }, and { args: [\"open\", \"--enable\", \"react-devtools\", \"https://example.com\"], sessionMode: \"fresh\" }. For app pages with a native dropdown, job steps can include { action: \"select\", selector: \"#flavor\", value: \"chocolate\" } before the dependent assertion.",
+	"High-value command reference: select <selector> <value...> changes native dropdown values; download <selector> <path> saves a file triggered by a click; get title/url/text/html/value/attr/count reads page state; screenshot [path] captures an image; pdf <path> saves a PDF; tab list and tab <tab-id-or-label> inspect or recover the active tab; react tree/inspect/renders/suspense introspect React after --enable react-devtools; vitals [url] measures Core Web Vitals; pushstate <url> performs SPA navigation.",
 	"For artifact-producing commands, read the visible artifact block and details.artifactVerification before using files: check requested path, absolute path, existence, size bytes, artifact kind, optional mediaType, status, optional limitation, and verified/missing/pending/unverified counts. details.artifacts contains per-file metadata. Browser close does not delete explicit saved files; if close reports details.artifactCleanup, use host file tools to remove paths listed in explicitArtifactPaths (when non-empty) after inspection. For annotated screenshots inside batch, put --annotate in top-level args (for example { args: [\"--annotate\", \"batch\"], stdin: \"[[\\\"screenshot\\\",\\\"/tmp/page.png\\\"]]\" }) rather than inside the screenshot step.",
-	"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, safety notes, or artifactPath for saved files.",
+	"When details.nextActions is present, prefer those exact native agent_browser follow-up payloads over prose guidance; they may include args, stdin, sessionMode, networkSourceLookup, safety notes, or artifactPath for saved files.",
 ] as const;
 export const BRAVE_SEARCH_PROMPT_GUIDELINE =
@@ -45,14 +45,14 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"For first-navigation setup, use open without a URL plus network route --resource-type <csv>, cookies set --curl <file>, or --init-script/--enable before navigate/opening the target page.",
 	"For stateful browser context work, prefer purpose-specific page actions before dumping browser data: use auth save --password-stdin with the tool stdin field for credentials, state save/load for portable test state, cookies get/set/clear and storage local|session only when the task needs those values, and expect cookie/storage/auth/state summaries to redact credential-like fields.",
 	"For batch chains that touch cookies, storage, auth, or other secret-bearing commands, use details.batchSteps for per-step artifacts, categories, spill paths, and full structured errors; top-level details.data on batch is only a compact redacted step matrix (success, argv-redacted command, redacted result or scrubbed error text) built from the same presentation rules as standalone calls.",
-	"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
+	"For non-core families, pass current upstream commands through the native tool directly: network route/requests/har, diff snapshot/screenshot/url, trace/profiler/record, console/errors/highlight/inspect/clipboard, stream enable/disable/status, dashboard start/stop, and chat. For compact network requests output, prefer details.nextActions for request detail, actionable failed-request networkSourceLookup, filtering, or HAR capture follow-ups instead of guessing request-id syntax. Artifact-producing commands report details.artifacts and verification state; long-running starts such as stream, dashboard, trace/profiler, and record should be paired with the matching stop/disable command when the task is done.",
 	"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
 	"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
 	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. Only use wait with an explicit argument like milliseconds, --load <state>, --url <matcher>, --fn <js>, or --text <matcher>.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
 	"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",
-	"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For comboboxes, a click/semanticAction may only focus the field; re-snapshot and fall back to type, press Enter/arrow keys, select, or visible option refs.",
+	"On dashboards with nested scroll containers, verify scroll with a screenshot or fresh snapshot -i; if the viewport did not move, prefer scrollintoview <@ref> or target the actual scrollable region. For native selects, use select <selector> <value...> (or semanticAction/job select) instead of clicking option refs; for custom comboboxes, a click/semanticAction may only focus the field, so re-snapshot and fall back to type, press Enter/arrow keys, or visible option refs.",
 	"When using eval --stdin, scope checks and actions to the target element or route whenever possible instead of relying on broad page-wide text heuristics.",
 	"When using eval --stdin for extraction, return the value you want instead of relying on console.log as the primary result channel. Prefer plain expressions like ({ title: document.title }) or explicitly invoked functions like (() => ({ title: document.title }))(); if a function-shaped snippet returns {}, details.evalStdinHint may warn that the function was serialized instead of called. If get text on a CSS selector surfaces details.selectorTextVisibility or selectorTextVisibilityAll, prefer a visible @ref, a more specific selector, or the inspect-visible-text-candidates nextAction over hidden tab content.",
 	"When details.pageChangeSummary is present, use changeType and summary as a compact signal for navigation, DOM mutation, confirmations, or artifacts; when nextActionIds is set, match those ids to entries in details.nextActions (or per-step nextActions inside batch) for concrete follow-up payloads instead of inferring from prose alone. If a no-navigation click surfaces details.overlayBlockers, inspect the fresh snapshot evidence before using a close/dismiss candidate nextAction; ordinary page chrome without dialog/alertdialog evidence should not trigger this diagnostic.",
@@ -92,12 +92,12 @@ export function buildSharedBrowserPlaybookGuidelines(options: { includeBraveSear
 const RUNTIME_PROMPT_GUIDELINES = [
 	"Use exactly one input mode: args, semanticAction, job, qa, sourceLookup, or networkSourceLookup. Use stdin only for batch, eval --stdin, auth save --password-stdin, or wrapper-generated batch modes.",
 	"Common flow: open, snapshot -i, interact with current @refs or semanticAction, then re-snapshot after navigation, scrolling, rerenders, or DOM changes. For ordinary forms, batch same-snapshot fill @refs before the submit/click step; split if a fill may autosubmit, navigate, or rerender later fields. Respect explicit stop boundaries: if the user says to stop before order/post/purchase/submit, do not click that final action.",
-	"Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. Use current @refs only from the latest same-page snapshot.",
+	"Prefer stable locators for visible text/names: semanticAction or upstream find with role/text/label/placeholder/alt/title/testid. For native selects, prefer select <selector> <value...> or semanticAction/job select over clicking option refs. Use current @refs only from the latest same-page snapshot.",
 	"For tasks that explicitly require the user's signed-in/account-specific content, start with --profile Default plus sessionMode=fresh unless the user asks otherwise; visible page content is model-visible. Use sessionMode=fresh for other launch-scoped state such as --session-name, --cdp, --state, --auto-connect, --init-script, --enable, providers, or iOS devices; otherwise let the implicit session carry continuity.",
 	"For requested screenshots, recordings, downloads, PDFs, or HARs, save the exact user path and read details.artifactVerification before claiming success; report unavailable/missing artifacts instead of silently substituting paths. record stop needs ffmpeg on PATH. close does not delete saved files; cleanup is host-owned.",
-	"When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors.",
+	"When details.nextActions is present, prefer those exact follow-up payloads over prose or guessed selectors; network request diagnostics may include request-detail, actionable failed-request networkSourceLookup, filter, or HAR-capture follow-ups.",
 	"For dense snapshots, check Omitted high-value controls and details.data.highValueControlRefIds before opening large spill files.",
-	"For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. Combobox clicks may only focus; re-snapshot and fall back to type, Enter/arrows, select, or option refs.",
+	"For dashboards, verify scroll with screenshot/snapshot; if nothing moved, use scrollintoview <@ref> or target the real scroll region. For native selects use select/semanticAction/job select instead of option refs; custom combobox clicks may only focus, so re-snapshot and fall back to type, Enter/arrows, or visible option refs.",
 	"For extraction, prefer get title/url/text/html/value/attr/count or eval --stdin with a plain expression in the tool stdin field; do not rely on console.log. When reading several known refs/selectors, use batch with JSON-array stdin (for example [[\"get\",\"text\",\"@e1\"]]) or eval --stdin instead of many serial get calls. If selector visibility warnings appear, prefer visible @refs or nextActions.",
 	"For non-core debugging, pass upstream commands through args: network, diff, trace/profiler/record, console/errors, stream, dashboard, chat, react, vitals, pushstate, dialog, frame, tab.",
 ] as const;