pi-agent-browser-native 0.2.32 → 0.2.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +36 -0
  2. package/README.md +61 -20
  3. package/docs/ARCHITECTURE.md +9 -2
  4. package/docs/COMMAND_REFERENCE.md +45 -14
  5. package/docs/ELECTRON.md +23 -4
  6. package/docs/RELEASE.md +15 -5
  7. package/docs/REQUIREMENTS.md +1 -1
  8. package/docs/SUPPORT_MATRIX.md +36 -22
  9. package/docs/TOOL_CONTRACT.md +90 -31
  10. package/extensions/agent-browser/index.ts +407 -4373
  11. package/extensions/agent-browser/lib/input-modes/electron.ts +170 -0
  12. package/extensions/agent-browser/lib/input-modes/job.ts +265 -0
  13. package/extensions/agent-browser/lib/input-modes/lookups.ts +447 -0
  14. package/extensions/agent-browser/lib/input-modes/params.ts +188 -0
  15. package/extensions/agent-browser/lib/input-modes/semantic-action.ts +107 -0
  16. package/extensions/agent-browser/lib/input-modes/shared.ts +46 -0
  17. package/extensions/agent-browser/lib/input-modes/types.ts +221 -0
  18. package/extensions/agent-browser/lib/input-modes.ts +44 -0
  19. package/extensions/agent-browser/lib/orchestration/browser-run/diagnostics.ts +762 -0
  20. package/extensions/agent-browser/lib/orchestration/browser-run/final-result.ts +450 -0
  21. package/extensions/agent-browser/lib/orchestration/browser-run/index.ts +46 -0
  22. package/extensions/agent-browser/lib/orchestration/browser-run/prepare.ts +736 -0
  23. package/extensions/agent-browser/lib/orchestration/browser-run/process-output.ts +413 -0
  24. package/extensions/agent-browser/lib/orchestration/browser-run/session-state.ts +868 -0
  25. package/extensions/agent-browser/lib/orchestration/browser-run/types.ts +482 -0
  26. package/extensions/agent-browser/lib/orchestration/browser-run.ts +1 -0
  27. package/extensions/agent-browser/lib/orchestration/input-plan.ts +338 -0
  28. package/extensions/agent-browser/lib/playbook.ts +22 -20
  29. package/extensions/agent-browser/lib/process.ts +106 -4
  30. package/extensions/agent-browser/lib/results/action-recommendations.ts +269 -0
  31. package/extensions/agent-browser/lib/results/artifact-manifest.ts +114 -0
  32. package/extensions/agent-browser/lib/results/artifact-state.ts +13 -0
  33. package/extensions/agent-browser/lib/results/categories.ts +106 -0
  34. package/extensions/agent-browser/lib/results/contracts.ts +220 -0
  35. package/extensions/agent-browser/lib/results/editable-ref-evidence.ts +72 -0
  36. package/extensions/agent-browser/lib/results/envelope.ts +2 -1
  37. package/extensions/agent-browser/lib/results/network.ts +64 -0
  38. package/extensions/agent-browser/lib/results/next-actions.ts +117 -0
  39. package/extensions/agent-browser/lib/results/presentation/artifacts.ts +506 -0
  40. package/extensions/agent-browser/lib/results/presentation/batch.ts +355 -0
  41. package/extensions/agent-browser/lib/results/presentation/common.ts +53 -0
  42. package/extensions/agent-browser/lib/results/presentation/content.ts +36 -0
  43. package/extensions/agent-browser/lib/results/presentation/diagnostics.ts +730 -0
  44. package/extensions/agent-browser/lib/results/presentation/errors.ts +125 -0
  45. package/extensions/agent-browser/lib/results/presentation/large-output.ts +182 -0
  46. package/extensions/agent-browser/lib/results/presentation/navigation.ts +216 -0
  47. package/extensions/agent-browser/lib/results/presentation/registry.ts +182 -0
  48. package/extensions/agent-browser/lib/results/presentation/semantic-action.ts +133 -0
  49. package/extensions/agent-browser/lib/results/presentation/skills.ts +143 -0
  50. package/extensions/agent-browser/lib/results/presentation.ts +96 -2403
  51. package/extensions/agent-browser/lib/results/recovery-actions.ts +139 -0
  52. package/extensions/agent-browser/lib/results/recovery-next-actions.ts +71 -0
  53. package/extensions/agent-browser/lib/results/selector-recovery.ts +312 -0
  54. package/extensions/agent-browser/lib/results/shared.ts +17 -789
  55. package/extensions/agent-browser/lib/results/snapshot-high-value-controls.ts +262 -0
  56. package/extensions/agent-browser/lib/results/snapshot-refs.ts +100 -0
  57. package/extensions/agent-browser/lib/results/snapshot-segments.ts +366 -0
  58. package/extensions/agent-browser/lib/results/snapshot-spill.ts +63 -0
  59. package/extensions/agent-browser/lib/results/snapshot.ts +37 -489
  60. package/extensions/agent-browser/lib/results/text.ts +40 -0
  61. package/extensions/agent-browser/lib/results.ts +16 -5
  62. package/extensions/agent-browser/lib/session-page-state.ts +486 -0
  63. package/package.json +2 -1
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Purpose: Centralize recovery-oriented nextAction ids and action construction.
3
+ * Responsibilities: Build tab/about:blank/no-active-page/connected-session follow-ups and rich-input recovery ids.
4
+ * Scope: Recovery action contracts only; result category classification and artifact follow-ups live elsewhere.
5
+ * Usage: Imported by shared result action builders and the extension entrypoint.
6
+ * Invariants/Assumptions: Ids are public machine-readable contracts mirrored by docs and tests.
7
+ */
8
+
9
+ import { buildNextToolAction, type AgentBrowserNextAction, withOptionalSessionArgs } from "./next-actions.js";
10
+
11
+ export type AgentBrowserRecoveryKind = "about-blank" | "connected-session" | "no-active-page" | "tab-drift";
12
+
13
+ export interface AgentBrowserRecoveryContext {
14
+ kind: AgentBrowserRecoveryKind;
15
+ recoveryApplied?: boolean;
16
+ selectedTab?: string;
17
+ sessionName?: string;
18
+ targetTitle?: string;
19
+ targetUrl?: string;
20
+ }
21
+
22
+ export const AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS = {
23
+ aboutBlankListTabs: "list-tabs-for-about-blank-recovery",
24
+ connectedSessionListTabs: "list-connected-session-tabs",
25
+ genericTabDriftListTabs: "list-tabs-for-recovery",
26
+ noActivePageListTabs: "list-tabs-after-no-active-page",
27
+ selectIntendedTabAfterDrift: "select-intended-tab-after-drift",
28
+ snapshotAfterTabRecovery: "snapshot-after-tab-recovery",
29
+ tabDriftListTabs: "list-tabs-for-tab-drift-recovery",
30
+ } as const;
31
+
32
+ export const AGENT_BROWSER_RICH_INPUT_RECOVERY_NEXT_ACTION_IDS = {
33
+ click: "click-current-editable-ref",
34
+ focus: "focus-current-editable-ref",
35
+ } as const;
36
+
37
+ export type AgentBrowserRichInputRecoveryNextActionKind = keyof typeof AGENT_BROWSER_RICH_INPUT_RECOVERY_NEXT_ACTION_IDS;
38
+
39
+ function getNumberedAgentBrowserNextActionId(baseId: string, index: number, total: number): string {
40
+ return total > 1 ? `${baseId}-${index + 1}` : baseId;
41
+ }
42
+
43
+ export function getAgentBrowserRichInputRecoveryNextActionId(kind: AgentBrowserRichInputRecoveryNextActionKind, index: number, candidateCount: number): string {
44
+ return getNumberedAgentBrowserNextActionId(AGENT_BROWSER_RICH_INPUT_RECOVERY_NEXT_ACTION_IDS[kind], index, candidateCount);
45
+ }
46
+
47
+ export function getAgentBrowserRichInputRecoveryNextActionIds(candidateCount: number): string[] {
48
+ const ids: string[] = [];
49
+ for (let index = 0; index < candidateCount; index += 1) {
50
+ ids.push(
51
+ getAgentBrowserRichInputRecoveryNextActionId("focus", index, candidateCount),
52
+ getAgentBrowserRichInputRecoveryNextActionId("click", index, candidateCount),
53
+ );
54
+ }
55
+ return ids;
56
+ }
57
+
58
+ function getRecoveryTargetDescription(recovery: AgentBrowserRecoveryContext): string {
59
+ const target = [recovery.targetTitle, recovery.targetUrl].filter((item): item is string => item !== undefined && item.length > 0).join(" at ");
60
+ return target.length > 0 ? target : "the intended tab";
61
+ }
62
+
63
+ function isStableTabId(tab: string | undefined): tab is string {
64
+ return /^t\d+$/.test(tab ?? "");
65
+ }
66
+
67
+ function buildTabSnapshotRecoveryAction(options: {
68
+ id: string;
69
+ reason: string;
70
+ recovery: AgentBrowserRecoveryContext;
71
+ safety: string;
72
+ sessionArgs: (args: string[]) => string[];
73
+ tabId: string;
74
+ }): AgentBrowserNextAction {
75
+ if (options.recovery.recoveryApplied === true) {
76
+ return buildNextToolAction({
77
+ args: options.sessionArgs(["snapshot", "-i"]),
78
+ id: options.id,
79
+ reason: options.reason,
80
+ safety: options.safety,
81
+ });
82
+ }
83
+ return buildNextToolAction({
84
+ args: options.sessionArgs(["batch"]),
85
+ id: options.id,
86
+ reason: `${options.reason} The batch selects the stable tab before snapshotting.`,
87
+ safety: `${options.safety} The snapshot retry is atomic with tab selection, so it does not assume the intended tab is already active.`,
88
+ stdin: JSON.stringify([["tab", options.tabId], ["snapshot", "-i"]]),
89
+ });
90
+ }
91
+
92
+ export function buildRecoveryNextActions(recovery: AgentBrowserRecoveryContext): AgentBrowserNextAction[] {
93
+ const sessionArgs = (args: string[]) => withOptionalSessionArgs(recovery.sessionName, args);
94
+ if (recovery.kind === "connected-session") {
95
+ return [
96
+ buildNextToolAction({
97
+ args: sessionArgs(["tab", "list"]),
98
+ id: AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.connectedSessionListTabs,
99
+ reason: "Inspect tabs exposed by the connected CDP endpoint before assuming the app surface is active.",
100
+ safety: "Read-only. Raw connect can succeed before the desktop app has an active rendered page.",
101
+ }),
102
+ ];
103
+ }
104
+ if (recovery.kind === "no-active-page") {
105
+ return [
106
+ buildNextToolAction({
107
+ args: sessionArgs(["tab", "list"]),
108
+ id: AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.noActivePageListTabs,
109
+ reason: "The snapshot found no active page; inspect the session tabs before retrying refs.",
110
+ safety: "Read-only tab listing for the same connected session.",
111
+ }),
112
+ ];
113
+ }
114
+ const targetDescription = getRecoveryTargetDescription(recovery);
115
+ const listAction = buildNextToolAction({
116
+ args: sessionArgs(["tab", "list"]),
117
+ id: recovery.kind === "about-blank" ? AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.aboutBlankListTabs : AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.tabDriftListTabs,
118
+ reason: `Inspect tabs for ${targetDescription} before continuing after tab drift.`,
119
+ safety: "Read-only tab listing; prefer stable tN tab ids over positional tab guesses.",
120
+ });
121
+ if (!isStableTabId(recovery.selectedTab)) return [listAction];
122
+ return [
123
+ listAction,
124
+ buildNextToolAction({
125
+ args: sessionArgs(["tab", recovery.selectedTab]),
126
+ id: AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.selectIntendedTabAfterDrift,
127
+ reason: `Re-select ${targetDescription} with the stable tab id already observed by the wrapper.`,
128
+ safety: "Switches only the active tab in this browser session; it does not mutate page content.",
129
+ }),
130
+ buildTabSnapshotRecoveryAction({
131
+ id: AGENT_BROWSER_RECOVERY_NEXT_ACTION_IDS.snapshotAfterTabRecovery,
132
+ reason: "Refresh interactive refs on the recovered tab before using @e refs again.",
133
+ recovery,
134
+ safety: "Read-only snapshot. Treat previous refs as stale until this succeeds.",
135
+ sessionArgs,
136
+ tabId: recovery.selectedTab,
137
+ }),
138
+ ];
139
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Purpose: Build session-aware recovery nextActions that combine result category policy with known session/tab context.
3
+ * Responsibilities: Prefix recovery argv with the active session and adapt tab/about:blank/no-active/stale-ref contexts into stable nextAction lists.
4
+ * Scope: Recovery nextAction assembly only; diagnostic detection and action-list merge ordering stay in the extension entrypoint.
5
+ * Usage: Imported by the extension entrypoint when adding recovery nextActions to tool details.
6
+ * Invariants/Assumptions: Action ids and argv ordering are public contracts; session prefixing must not double-prefix explicit --session args.
7
+ */
8
+
9
+ import { buildAgentBrowserNextActions } from "./action-recommendations.js";
10
+ import { type AgentBrowserNextAction, withOptionalSessionArgs } from "./next-actions.js";
11
+
12
+ export interface TabRecoveryCorrection {
13
+ selectedTab?: string;
14
+ targetTitle?: string;
15
+ targetUrl?: string;
16
+ }
17
+
18
+ export interface TabRecoveryTarget {
19
+ title?: string;
20
+ url?: string;
21
+ }
22
+
23
+ export function buildConnectedSessionNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
24
+ if (!sessionName) return [];
25
+ return buildAgentBrowserNextActions({
26
+ recovery: { kind: "connected-session", sessionName },
27
+ resultCategory: "success",
28
+ successCategory: "completed",
29
+ }) ?? [];
30
+ }
31
+
32
+ export function buildNoActivePageNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
33
+ if (!sessionName) return [];
34
+ return buildAgentBrowserNextActions({
35
+ recovery: { kind: "no-active-page", sessionName },
36
+ resultCategory: "failure",
37
+ }) ?? [];
38
+ }
39
+
40
+ export function buildSessionTabRecoveryNextActions(options: {
41
+ kind: "about-blank" | "tab-drift";
42
+ recoveryApplied?: boolean;
43
+ resultCategory?: "failure" | "success";
44
+ sessionName?: string;
45
+ tabCorrection?: TabRecoveryCorrection;
46
+ target?: TabRecoveryTarget;
47
+ }): AgentBrowserNextAction[] {
48
+ const resultCategory = options.resultCategory ?? "success";
49
+ return buildAgentBrowserNextActions({
50
+ recovery: {
51
+ kind: options.kind,
52
+ recoveryApplied: options.recoveryApplied,
53
+ selectedTab: options.tabCorrection?.selectedTab,
54
+ sessionName: options.sessionName,
55
+ targetTitle: options.tabCorrection?.targetTitle ?? options.target?.title,
56
+ targetUrl: options.tabCorrection?.targetUrl ?? options.target?.url,
57
+ },
58
+ resultCategory,
59
+ successCategory: resultCategory === "success" ? "completed" : undefined,
60
+ }) ?? [];
61
+ }
62
+
63
+ export function buildSessionAwareStaleRefNextActions(sessionName: string | undefined): AgentBrowserNextAction[] {
64
+ return (buildAgentBrowserNextActions({ failureCategory: "stale-ref", resultCategory: "failure" }) ?? []).map((action) => {
65
+ const actionArgs = action.params?.args;
66
+ return {
67
+ ...action,
68
+ params: action.params && actionArgs ? { ...action.params, args: withOptionalSessionArgs(sessionName, actionArgs) } : action.params,
69
+ };
70
+ });
71
+ }
@@ -0,0 +1,312 @@
1
+ /**
2
+ * Purpose: Own pure selector-miss recovery diagnostics for visible refs and rich editable inputs.
3
+ * Responsibilities: Parse find/semantic action targets, match current snapshot refs, build public diagnostics, text, and safe nextActions.
4
+ * Scope: Selector recovery policy only; subprocess snapshot probing and result orchestration stay in the extension entrypoint.
5
+ * Usage: The extension entrypoint supplies command tokens plus snapshot data after a selector-not-found failure.
6
+ * Invariants/Assumptions: Fill recovery must never echo or auto-submit the user-provided fill text; keyboard insertion remains a separate explicit action.
7
+ */
8
+
9
+ import { isRecord } from "../parsing.js";
10
+ import { extractRefSnapshotFromData, type SessionRefSnapshot } from "../session-page-state.js";
11
+ import { getEditableRefEvidence } from "./editable-ref-evidence.js";
12
+ import { type AgentBrowserNextAction, withOptionalSessionArgs } from "./next-actions.js";
13
+ import {
14
+ getAgentBrowserRichInputRecoveryNextActionId,
15
+ getAgentBrowserRichInputRecoveryNextActionIds,
16
+ } from "./recovery-actions.js";
17
+ import {
18
+ getSnapshotLineTextByRef,
19
+ getSnapshotRefRecord,
20
+ getSnapshotRefRole,
21
+ } from "./snapshot-refs.js";
22
+ import { compareRefIds } from "./text.js";
23
+
24
+ export type SelectorRecoveryActionName = "check" | "click" | "fill" | "select" | "uncheck";
25
+
26
+ export interface SelectorRecoveryCompiledAction {
27
+ action: SelectorRecoveryActionName;
28
+ args: string[];
29
+ locator?: string;
30
+ selector?: string;
31
+ values?: string[];
32
+ }
33
+
34
+ export interface VisibleRefFallbackCandidate {
35
+ action: SelectorRecoveryActionName;
36
+ args?: string[];
37
+ editableEvidence?: boolean;
38
+ name: string;
39
+ reason: string;
40
+ ref: string;
41
+ role: string;
42
+ }
43
+
44
+ export interface VisibleRefFallbackDiagnostic {
45
+ candidates: VisibleRefFallbackCandidate[];
46
+ snapshot: SessionRefSnapshot;
47
+ summary: string;
48
+ target: {
49
+ action: SelectorRecoveryActionName;
50
+ roles: string[];
51
+ targetName: string;
52
+ };
53
+ }
54
+
55
+ export interface PublicVisibleRefFallbackCandidate {
56
+ action: SelectorRecoveryActionName;
57
+ args?: string[];
58
+ name: string;
59
+ reason: string;
60
+ ref: string;
61
+ role: string;
62
+ }
63
+
64
+ export interface PublicVisibleRefFallbackDiagnostic {
65
+ candidates: PublicVisibleRefFallbackCandidate[];
66
+ snapshot: SessionRefSnapshot;
67
+ summary: string;
68
+ target: VisibleRefFallbackDiagnostic["target"];
69
+ }
70
+
71
+ export interface VisibleRefFallbackTarget {
72
+ action: SelectorRecoveryActionName;
73
+ roles: string[];
74
+ text?: string;
75
+ targetName: string;
76
+ }
77
+
78
+ export interface RichInputRecoveryCandidate {
79
+ clickArgs: string[];
80
+ focusArgs: string[];
81
+ name: string;
82
+ reason: string;
83
+ ref: string;
84
+ role: string;
85
+ }
86
+
87
+ export interface RichInputRecoveryDiagnostic {
88
+ candidates: RichInputRecoveryCandidate[];
89
+ inputMethodHint: string;
90
+ nextActionIds: string[];
91
+ summary: string;
92
+ target: {
93
+ roles: string[];
94
+ targetName: string;
95
+ };
96
+ }
97
+
98
+ const SELECTOR_RECOVERY_ACTION_NAMES = new Set<SelectorRecoveryActionName>(["check", "click", "fill", "select", "uncheck"]);
99
+ const VISIBLE_REF_FALLBACK_CANDIDATE_LIMIT = 3;
100
+ const EDITABLE_CONTROL_ROLES = new Set(["combobox", "searchbox", "textbox"]);
101
+ const RICH_INPUT_RECOVERY_EDITABLE_ROLES = new Set(["searchbox", "textbox"]);
102
+ const RICH_INPUT_RECOVERY_HINT = "After the editable ref is focused, use keyboard inserttext or keyboard type with the intended text in a separate call, and do not press Enter or otherwise submit unless the user flow explicitly calls for it.";
103
+
104
+ function isSelectorRecoveryActionName(action: string): action is SelectorRecoveryActionName {
105
+ return SELECTOR_RECOVERY_ACTION_NAMES.has(action as SelectorRecoveryActionName);
106
+ }
107
+
108
+ function getFindNameFlagValue(args: string[], startIndex: number): string | undefined {
109
+ const nameFlagIndex = args.indexOf("--name", startIndex);
110
+ const name = nameFlagIndex >= 0 ? args[nameFlagIndex + 1] : undefined;
111
+ return name && !name.startsWith("-") ? name : undefined;
112
+ }
113
+
114
+ function getFindVisibleRefFallbackTarget(args: string[], options: { allowLeadingDashFillText?: boolean } = {}): VisibleRefFallbackTarget | undefined {
115
+ const findIndex = args[0] === "--session" ? 2 : 0;
116
+ if (args[findIndex] !== "find") return undefined;
117
+ const locator = args[findIndex + 1];
118
+ const value = args[findIndex + 2];
119
+ const action = args[findIndex + 3];
120
+ if (!locator || !value || !isSelectorRecoveryActionName(action) || action === "select") return undefined;
121
+ const text = action === "fill" ? args[findIndex + 4] : undefined;
122
+ if (action === "fill" && (!text || (!options.allowLeadingDashFillText && text.startsWith("-")))) return undefined;
123
+ if (locator === "role") {
124
+ const targetName = getFindNameFlagValue(args, findIndex + 4);
125
+ return targetName ? { action, roles: [value], targetName, text } : undefined;
126
+ }
127
+ if (locator === "text" && action === "click") {
128
+ return { action, roles: ["button", "link"], targetName: value };
129
+ }
130
+ if (locator === "text" && action === "fill") {
131
+ return { action, roles: ["searchbox", "textbox"], targetName: value, text };
132
+ }
133
+ if (locator === "label" && action === "fill") {
134
+ return { action, roles: ["textbox"], targetName: value, text };
135
+ }
136
+ if (locator === "placeholder" && action === "fill") {
137
+ return { action, roles: ["searchbox", "textbox"], targetName: value, text };
138
+ }
139
+ return undefined;
140
+ }
141
+
142
+ export function getVisibleRefFallbackTarget(options: {
143
+ commandTokens: string[];
144
+ compiledSemanticAction?: SelectorRecoveryCompiledAction;
145
+ }): VisibleRefFallbackTarget | undefined {
146
+ return getFindVisibleRefFallbackTarget(options.commandTokens, { allowLeadingDashFillText: true }) ?? (options.compiledSemanticAction ? getFindVisibleRefFallbackTarget(options.compiledSemanticAction.args, { allowLeadingDashFillText: true }) : undefined);
147
+ }
148
+
149
+ function getVisibleRefFallbackCandidates(target: VisibleRefFallbackTarget, snapshotData: unknown): VisibleRefFallbackCandidate[] {
150
+ const refs = getSnapshotRefRecord(snapshotData);
151
+ if (!refs) return [];
152
+ const snapshotLineByRef = getSnapshotLineTextByRef(snapshotData);
153
+ const roleOrder = target.roles.map((role) => role.toLowerCase());
154
+ const targetName = normalizeSemanticActionAccessibleName(target.targetName);
155
+ const candidates = Object.entries(refs).flatMap(([ref, entry]): VisibleRefFallbackCandidate[] => {
156
+ if (!/^e\d+$/.test(ref) || !isRecord(entry)) return [];
157
+ const snapshotLine = snapshotLineByRef.get(ref);
158
+ const editableEvidence = getEditableRefEvidence({ ref: entry, text: snapshotLine });
159
+ const role = getSnapshotRefRole(entry, editableEvidence);
160
+ const name = typeof entry.name === "string" ? entry.name : undefined;
161
+ if (!role || !name || !roleOrder.includes(role.toLowerCase()) || normalizeSemanticActionAccessibleName(name) !== targetName) return [];
162
+ if (target.action === "fill" && editableEvidence === false && EDITABLE_CONTROL_ROLES.has(role.toLowerCase())) return [];
163
+ const directRefArgs = target.action === "fill" ? undefined : [target.action, `@${ref}`];
164
+ return [{
165
+ action: target.action,
166
+ ...(directRefArgs ? { args: directRefArgs } : {}),
167
+ name,
168
+ reason: `Current snapshot shows ${role} ${JSON.stringify(name)} at @${ref}, matching the failed ${target.action} locator exactly.`,
169
+ ref: `@${ref}`,
170
+ role,
171
+ ...(editableEvidence !== undefined ? { editableEvidence } : {}),
172
+ }];
173
+ });
174
+ candidates.sort((left, right) => roleOrder.indexOf(left.role.toLowerCase()) - roleOrder.indexOf(right.role.toLowerCase()) || compareRefIds(left.ref.slice(1), right.ref.slice(1)));
175
+ return candidates.slice(0, VISIBLE_REF_FALLBACK_CANDIDATE_LIMIT);
176
+ }
177
+
178
+ export function buildVisibleRefFallbackDiagnosticFromSnapshot(options: {
179
+ snapshotData: unknown;
180
+ target: VisibleRefFallbackTarget;
181
+ }): VisibleRefFallbackDiagnostic | undefined {
182
+ const snapshot = extractRefSnapshotFromData(options.snapshotData);
183
+ if (!snapshot) return undefined;
184
+ const candidates = getVisibleRefFallbackCandidates(options.target, options.snapshotData);
185
+ if (candidates.length === 0) return undefined;
186
+ return {
187
+ candidates,
188
+ snapshot,
189
+ summary: candidates.length === 1
190
+ ? `Current snapshot has one exact visible ref match for ${options.target.action} ${JSON.stringify(options.target.targetName)}.`
191
+ : `Current snapshot has ${candidates.length} exact visible ref matches for ${options.target.action} ${JSON.stringify(options.target.targetName)}; choose only if the intended control is unambiguous.`,
192
+ target: { action: options.target.action, roles: options.target.roles, targetName: options.target.targetName },
193
+ };
194
+ }
195
+
196
+ export interface VisibleRefActionResolution {
197
+ args: string[];
198
+ snapshot: SessionRefSnapshot;
199
+ }
200
+
201
+ export function resolveVisibleRefActionFromSnapshot(options: {
202
+ compiledAction: SelectorRecoveryCompiledAction;
203
+ snapshotData: unknown;
204
+ }): VisibleRefActionResolution | undefined {
205
+ const target = getFindVisibleRefFallbackTarget(options.compiledAction.args, { allowLeadingDashFillText: true });
206
+ if (!target || target.action === "fill" || target.action === "select") return undefined;
207
+ const snapshot = extractRefSnapshotFromData(options.snapshotData);
208
+ if (!snapshot) return undefined;
209
+ const candidate = getVisibleRefFallbackCandidates(target, options.snapshotData).find((item) => item.args !== undefined);
210
+ if (!candidate?.args) return undefined;
211
+ return { args: candidate.args, snapshot };
212
+ }
213
+
214
+ export function buildVisibleRefFallbackNextActions(options: { diagnostic: VisibleRefFallbackDiagnostic; sessionName?: string }): AgentBrowserNextAction[] {
215
+ const ambiguous = options.diagnostic.candidates.length > 1;
216
+ return options.diagnostic.candidates.flatMap((candidate, index) => candidate.args ? [{
217
+ id: ambiguous ? `try-current-visible-ref-${index + 1}` : "try-current-visible-ref",
218
+ params: { args: withOptionalSessionArgs(options.sessionName, candidate.args) },
219
+ reason: candidate.reason,
220
+ safety: ambiguous
221
+ ? "Several current refs share the same exact role/name. Inspect the snapshot and use only the ref that clearly matches the intended target."
222
+ : "Use only while this current snapshot still represents the page; refresh refs first if the page changed.",
223
+ tool: "agent_browser" as const,
224
+ }] : []);
225
+ }
226
+
227
+ export function formatVisibleRefFallbackText(diagnostic: VisibleRefFallbackDiagnostic | undefined): string | undefined {
228
+ if (!diagnostic) return undefined;
229
+ return [
230
+ "Current snapshot ref fallback:",
231
+ ...diagnostic.candidates.map((candidate) => `- ${candidate.ref}${candidate.role ? ` ${candidate.role}` : ""} ${JSON.stringify(candidate.name)}: ${candidate.reason}`),
232
+ ].join("\n");
233
+ }
234
+
235
+ export function sanitizeVisibleRefFallbackDiagnostic(diagnostic: VisibleRefFallbackDiagnostic): PublicVisibleRefFallbackDiagnostic {
236
+ return {
237
+ candidates: diagnostic.candidates.map(({ editableEvidence: _editableEvidence, ...candidate }) => candidate),
238
+ snapshot: diagnostic.snapshot,
239
+ summary: diagnostic.summary,
240
+ target: diagnostic.target,
241
+ };
242
+ }
243
+
244
+ function isRichInputRecoveryCandidate(candidate: VisibleRefFallbackCandidate): boolean {
245
+ return candidate.action === "fill" && candidate.editableEvidence !== false && RICH_INPUT_RECOVERY_EDITABLE_ROLES.has(candidate.role.toLowerCase());
246
+ }
247
+
248
+ export function buildRichInputRecoveryDiagnostic(diagnostic: VisibleRefFallbackDiagnostic | undefined): RichInputRecoveryDiagnostic | undefined {
249
+ if (!diagnostic || diagnostic.target.action !== "fill") return undefined;
250
+ const candidates = diagnostic.candidates.filter(isRichInputRecoveryCandidate).map((candidate): RichInputRecoveryCandidate => ({
251
+ clickArgs: ["click", candidate.ref],
252
+ focusArgs: ["focus", candidate.ref],
253
+ name: candidate.name,
254
+ reason: `Current snapshot shows editable ${candidate.role} ${JSON.stringify(candidate.name)} at ${candidate.ref}; focus or click it before keyboard insertion instead of retrying fill with copied text.`,
255
+ ref: candidate.ref,
256
+ role: candidate.role,
257
+ }));
258
+ if (candidates.length === 0) return undefined;
259
+ return {
260
+ candidates,
261
+ inputMethodHint: RICH_INPUT_RECOVERY_HINT,
262
+ nextActionIds: getAgentBrowserRichInputRecoveryNextActionIds(candidates.length),
263
+ summary: candidates.length === 1
264
+ ? "Fill locator missed, but the current snapshot has one exact editable ref candidate for safe keyboard-based recovery."
265
+ : `Fill locator missed, but the current snapshot has ${candidates.length} exact editable ref candidates; choose only if the intended input is unambiguous.`,
266
+ target: { roles: diagnostic.target.roles, targetName: diagnostic.target.targetName },
267
+ };
268
+ }
269
+
270
+ export function buildRichInputRecoveryNextActions(options: { diagnostic: RichInputRecoveryDiagnostic; sessionName?: string }): AgentBrowserNextAction[] {
271
+ const candidateCount = options.diagnostic.candidates.length;
272
+ const ambiguous = candidateCount > 1;
273
+ return options.diagnostic.candidates.flatMap((candidate, index): AgentBrowserNextAction[] => {
274
+ const focusId = getAgentBrowserRichInputRecoveryNextActionId("focus", index, candidateCount);
275
+ const clickId = getAgentBrowserRichInputRecoveryNextActionId("click", index, candidateCount);
276
+ const safety = ambiguous
277
+ ? `Several editable refs share the same exact name. Inspect the current snapshot and use only the ${candidate.ref} ${candidate.role} if it is clearly the intended input. No fill text or submit key is included.`
278
+ : "Does not include fill text or submit the form. After focus/click succeeds, use keyboard inserttext or keyboard type with the intended text only if this is the right input.";
279
+ return [
280
+ {
281
+ id: focusId,
282
+ params: { args: withOptionalSessionArgs(options.sessionName, candidate.focusArgs) },
283
+ reason: candidate.reason,
284
+ safety,
285
+ tool: "agent_browser" as const,
286
+ },
287
+ {
288
+ id: clickId,
289
+ params: { args: withOptionalSessionArgs(options.sessionName, candidate.clickArgs) },
290
+ reason: `Click ${candidate.ref} to focus the editable ${candidate.role} before keyboard insertion when focus alone is insufficient.`,
291
+ safety: `${safety} A click may run normal focus/click handlers, but this action does not press Enter or auto-submit.`,
292
+ tool: "agent_browser" as const,
293
+ },
294
+ ];
295
+ });
296
+ }
297
+
298
+ export function formatRichInputRecoveryText(diagnostic: RichInputRecoveryDiagnostic | undefined): string | undefined {
299
+ if (!diagnostic) return undefined;
300
+ return [
301
+ "Rich input recovery:",
302
+ ...diagnostic.candidates.map((candidate, index) => {
303
+ const [focusId, clickId] = diagnostic.nextActionIds.slice(index * 2, index * 2 + 2);
304
+ return `- ${candidate.ref} ${candidate.role} ${JSON.stringify(candidate.name)}: use ${focusId} or ${clickId}; then use keyboard inserttext/type with the intended text.`;
305
+ }),
306
+ `- ${diagnostic.inputMethodHint}`,
307
+ ].join("\n");
308
+ }
309
+
310
+ export function normalizeSemanticActionAccessibleName(name: string): string {
311
+ return name.replace(/\s+/g, " ").trim().toLowerCase();
312
+ }