@kata-sh/cli 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +156 -0
  3. package/dist/app-paths.d.ts +4 -0
  4. package/dist/app-paths.js +6 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +56 -0
  7. package/dist/loader.d.ts +2 -0
  8. package/dist/loader.js +95 -0
  9. package/dist/resource-loader.d.ts +18 -0
  10. package/dist/resource-loader.js +50 -0
  11. package/dist/wizard.d.ts +15 -0
  12. package/dist/wizard.js +159 -0
  13. package/package.json +50 -21
  14. package/pkg/dist/modes/interactive/theme/dark.json +85 -0
  15. package/pkg/dist/modes/interactive/theme/light.json +84 -0
  16. package/pkg/dist/modes/interactive/theme/theme-schema.json +335 -0
  17. package/pkg/dist/modes/interactive/theme/theme.d.ts +78 -0
  18. package/pkg/dist/modes/interactive/theme/theme.d.ts.map +1 -0
  19. package/pkg/dist/modes/interactive/theme/theme.js +949 -0
  20. package/pkg/dist/modes/interactive/theme/theme.js.map +1 -0
  21. package/pkg/package.json +8 -0
  22. package/scripts/postinstall.js +45 -0
  23. package/src/resources/AGENTS.md +108 -0
  24. package/src/resources/KATA-WORKFLOW.md +661 -0
  25. package/src/resources/agents/researcher.md +29 -0
  26. package/src/resources/agents/scout.md +56 -0
  27. package/src/resources/agents/worker.md +31 -0
  28. package/src/resources/extensions/ask-user-questions.ts +200 -0
  29. package/src/resources/extensions/bg-shell/index.ts +2758 -0
  30. package/src/resources/extensions/browser-tools/BROWSER-TOOLS-V2-PROPOSAL.md +1277 -0
  31. package/src/resources/extensions/browser-tools/core.js +1057 -0
  32. package/src/resources/extensions/browser-tools/index.ts +4916 -0
  33. package/src/resources/extensions/browser-tools/package.json +20 -0
  34. package/src/resources/extensions/context7/index.ts +428 -0
  35. package/src/resources/extensions/context7/package.json +11 -0
  36. package/src/resources/extensions/get-secrets-from-user.ts +352 -0
  37. package/src/resources/extensions/github/formatters.ts +207 -0
  38. package/src/resources/extensions/github/gh-api.ts +537 -0
  39. package/src/resources/extensions/github/index.ts +778 -0
  40. package/src/resources/extensions/kata/activity-log.ts +88 -0
  41. package/src/resources/extensions/kata/auto.ts +2786 -0
  42. package/src/resources/extensions/kata/commands.ts +355 -0
  43. package/src/resources/extensions/kata/crash-recovery.ts +85 -0
  44. package/src/resources/extensions/kata/dashboard-overlay.ts +516 -0
  45. package/src/resources/extensions/kata/docs/preferences-reference.md +103 -0
  46. package/src/resources/extensions/kata/doctor.ts +683 -0
  47. package/src/resources/extensions/kata/files.ts +730 -0
  48. package/src/resources/extensions/kata/gitignore.ts +165 -0
  49. package/src/resources/extensions/kata/guided-flow.ts +976 -0
  50. package/src/resources/extensions/kata/index.ts +556 -0
  51. package/src/resources/extensions/kata/metrics.ts +397 -0
  52. package/src/resources/extensions/kata/observability-validator.ts +408 -0
  53. package/src/resources/extensions/kata/package.json +11 -0
  54. package/src/resources/extensions/kata/paths.ts +346 -0
  55. package/src/resources/extensions/kata/preferences.ts +695 -0
  56. package/src/resources/extensions/kata/prompt-loader.ts +50 -0
  57. package/src/resources/extensions/kata/prompts/complete-milestone.md +25 -0
  58. package/src/resources/extensions/kata/prompts/complete-slice.md +27 -0
  59. package/src/resources/extensions/kata/prompts/discuss.md +151 -0
  60. package/src/resources/extensions/kata/prompts/doctor-heal.md +29 -0
  61. package/src/resources/extensions/kata/prompts/execute-task.md +64 -0
  62. package/src/resources/extensions/kata/prompts/guided-complete-slice.md +1 -0
  63. package/src/resources/extensions/kata/prompts/guided-discuss-milestone.md +3 -0
  64. package/src/resources/extensions/kata/prompts/guided-discuss-slice.md +59 -0
  65. package/src/resources/extensions/kata/prompts/guided-execute-task.md +1 -0
  66. package/src/resources/extensions/kata/prompts/guided-plan-milestone.md +23 -0
  67. package/src/resources/extensions/kata/prompts/guided-plan-slice.md +1 -0
  68. package/src/resources/extensions/kata/prompts/guided-research-slice.md +11 -0
  69. package/src/resources/extensions/kata/prompts/guided-resume-task.md +1 -0
  70. package/src/resources/extensions/kata/prompts/plan-milestone.md +47 -0
  71. package/src/resources/extensions/kata/prompts/plan-slice.md +63 -0
  72. package/src/resources/extensions/kata/prompts/queue.md +85 -0
  73. package/src/resources/extensions/kata/prompts/reassess-roadmap.md +48 -0
  74. package/src/resources/extensions/kata/prompts/replan-slice.md +39 -0
  75. package/src/resources/extensions/kata/prompts/research-milestone.md +37 -0
  76. package/src/resources/extensions/kata/prompts/research-slice.md +28 -0
  77. package/src/resources/extensions/kata/prompts/run-uat.md +109 -0
  78. package/src/resources/extensions/kata/prompts/system.md +341 -0
  79. package/src/resources/extensions/kata/session-forensics.ts +550 -0
  80. package/src/resources/extensions/kata/skill-discovery.ts +137 -0
  81. package/src/resources/extensions/kata/state.ts +509 -0
  82. package/src/resources/extensions/kata/templates/context.md +76 -0
  83. package/src/resources/extensions/kata/templates/decisions.md +8 -0
  84. package/src/resources/extensions/kata/templates/milestone-summary.md +73 -0
  85. package/src/resources/extensions/kata/templates/plan.md +133 -0
  86. package/src/resources/extensions/kata/templates/preferences.md +15 -0
  87. package/src/resources/extensions/kata/templates/project.md +31 -0
  88. package/src/resources/extensions/kata/templates/reassessment.md +28 -0
  89. package/src/resources/extensions/kata/templates/requirements.md +81 -0
  90. package/src/resources/extensions/kata/templates/research.md +46 -0
  91. package/src/resources/extensions/kata/templates/roadmap.md +118 -0
  92. package/src/resources/extensions/kata/templates/slice-context.md +58 -0
  93. package/src/resources/extensions/kata/templates/slice-summary.md +99 -0
  94. package/src/resources/extensions/kata/templates/state.md +19 -0
  95. package/src/resources/extensions/kata/templates/task-plan.md +52 -0
  96. package/src/resources/extensions/kata/templates/task-summary.md +57 -0
  97. package/src/resources/extensions/kata/templates/uat.md +54 -0
  98. package/src/resources/extensions/kata/tests/activity-log-prune.test.ts +327 -0
  99. package/src/resources/extensions/kata/tests/auto-preflight.test.ts +97 -0
  100. package/src/resources/extensions/kata/tests/auto-supervisor.test.mjs +53 -0
  101. package/src/resources/extensions/kata/tests/complete-milestone.test.ts +317 -0
  102. package/src/resources/extensions/kata/tests/cost-projection.test.ts +160 -0
  103. package/src/resources/extensions/kata/tests/derive-state-deps.test.ts +477 -0
  104. package/src/resources/extensions/kata/tests/derive-state.test.ts +1013 -0
  105. package/src/resources/extensions/kata/tests/doctor.test.ts +718 -0
  106. package/src/resources/extensions/kata/tests/idle-recovery.test.ts +490 -0
  107. package/src/resources/extensions/kata/tests/metrics-io.test.ts +254 -0
  108. package/src/resources/extensions/kata/tests/metrics.test.ts +217 -0
  109. package/src/resources/extensions/kata/tests/must-have-parser.test.ts +309 -0
  110. package/src/resources/extensions/kata/tests/parsers.test.ts +1257 -0
  111. package/src/resources/extensions/kata/tests/plan-milestone.test.ts +185 -0
  112. package/src/resources/extensions/kata/tests/plan-quality-validator.test.ts +386 -0
  113. package/src/resources/extensions/kata/tests/reassess-prompt.test.ts +208 -0
  114. package/src/resources/extensions/kata/tests/replan-slice.test.ts +686 -0
  115. package/src/resources/extensions/kata/tests/requirements.test.ts +151 -0
  116. package/src/resources/extensions/kata/tests/resolve-ts-hooks.mjs +17 -0
  117. package/src/resources/extensions/kata/tests/resolve-ts.mjs +11 -0
  118. package/src/resources/extensions/kata/tests/run-uat.test.ts +383 -0
  119. package/src/resources/extensions/kata/tests/unit-runtime.test.ts +388 -0
  120. package/src/resources/extensions/kata/tests/workspace-index.test.ts +118 -0
  121. package/src/resources/extensions/kata/tests/worktree.test.ts +222 -0
  122. package/src/resources/extensions/kata/types.ts +159 -0
  123. package/src/resources/extensions/kata/unit-runtime.ts +163 -0
  124. package/src/resources/extensions/kata/workspace-index.ts +203 -0
  125. package/src/resources/extensions/kata/worktree.ts +182 -0
  126. package/src/resources/extensions/mac-tools/index.ts +852 -0
  127. package/src/resources/extensions/mac-tools/swift-cli/Package.swift +22 -0
  128. package/src/resources/extensions/mac-tools/swift-cli/Sources/main.swift +1318 -0
  129. package/src/resources/extensions/search-the-web/cache.ts +78 -0
  130. package/src/resources/extensions/search-the-web/format.ts +258 -0
  131. package/src/resources/extensions/search-the-web/http.ts +238 -0
  132. package/src/resources/extensions/search-the-web/index.ts +68 -0
  133. package/src/resources/extensions/search-the-web/tool-fetch-page.ts +519 -0
  134. package/src/resources/extensions/search-the-web/tool-llm-context.ts +404 -0
  135. package/src/resources/extensions/search-the-web/tool-search.ts +503 -0
  136. package/src/resources/extensions/search-the-web/url-utils.ts +91 -0
  137. package/src/resources/extensions/shared/confirm-ui.ts +126 -0
  138. package/src/resources/extensions/shared/interview-ui.ts +822 -0
  139. package/src/resources/extensions/shared/next-action-ui.ts +235 -0
  140. package/src/resources/extensions/shared/progress-widget.ts +282 -0
  141. package/src/resources/extensions/shared/thinking-widget.ts +107 -0
  142. package/src/resources/extensions/shared/ui.ts +400 -0
  143. package/src/resources/extensions/shared/wizard-ui.ts +551 -0
  144. package/src/resources/extensions/slash-commands/audit.ts +92 -0
  145. package/src/resources/extensions/slash-commands/create-extension.ts +375 -0
  146. package/src/resources/extensions/slash-commands/create-slash-command.ts +280 -0
  147. package/src/resources/extensions/slash-commands/index.ts +12 -0
  148. package/src/resources/extensions/slash-commands/kata-run.ts +34 -0
  149. package/src/resources/extensions/subagent/agents.ts +126 -0
  150. package/src/resources/extensions/subagent/index.ts +1293 -0
  151. package/src/resources/skills/debug-like-expert/SKILL.md +231 -0
  152. package/src/resources/skills/debug-like-expert/references/debugging-mindset.md +253 -0
  153. package/src/resources/skills/debug-like-expert/references/hypothesis-testing.md +373 -0
  154. package/src/resources/skills/debug-like-expert/references/investigation-techniques.md +337 -0
  155. package/src/resources/skills/debug-like-expert/references/verification-patterns.md +425 -0
  156. package/src/resources/skills/debug-like-expert/references/when-to-research.md +361 -0
  157. package/src/resources/skills/frontend-design/SKILL.md +45 -0
  158. package/src/resources/skills/swiftui/SKILL.md +208 -0
  159. package/src/resources/skills/swiftui/references/animations.md +921 -0
  160. package/src/resources/skills/swiftui/references/architecture.md +1561 -0
  161. package/src/resources/skills/swiftui/references/layout-system.md +1186 -0
  162. package/src/resources/skills/swiftui/references/navigation.md +1492 -0
  163. package/src/resources/skills/swiftui/references/networking-async.md +214 -0
  164. package/src/resources/skills/swiftui/references/performance.md +1706 -0
  165. package/src/resources/skills/swiftui/references/platform-integration.md +204 -0
  166. package/src/resources/skills/swiftui/references/state-management.md +1443 -0
  167. package/src/resources/skills/swiftui/references/swiftdata.md +297 -0
  168. package/src/resources/skills/swiftui/references/testing-debugging.md +247 -0
  169. package/src/resources/skills/swiftui/references/uikit-appkit-interop.md +218 -0
  170. package/src/resources/skills/swiftui/workflows/add-feature.md +191 -0
  171. package/src/resources/skills/swiftui/workflows/build-new-app.md +311 -0
  172. package/src/resources/skills/swiftui/workflows/debug-swiftui.md +192 -0
  173. package/src/resources/skills/swiftui/workflows/optimize-performance.md +197 -0
  174. package/src/resources/skills/swiftui/workflows/ship-app.md +203 -0
  175. package/src/resources/skills/swiftui/workflows/write-tests.md +235 -0
  176. package/dist/commands/task.d.ts +0 -9
  177. package/dist/commands/task.d.ts.map +0 -1
  178. package/dist/commands/task.js +0 -129
  179. package/dist/commands/task.js.map +0 -1
  180. package/dist/commands/task.test.d.ts +0 -2
  181. package/dist/commands/task.test.d.ts.map +0 -1
  182. package/dist/commands/task.test.js +0 -169
  183. package/dist/commands/task.test.js.map +0 -1
  184. package/dist/e2e/task-e2e.test.d.ts +0 -2
  185. package/dist/e2e/task-e2e.test.d.ts.map +0 -1
  186. package/dist/e2e/task-e2e.test.js +0 -173
  187. package/dist/e2e/task-e2e.test.js.map +0 -1
  188. package/dist/index.d.ts +0 -3
  189. package/dist/index.d.ts.map +0 -1
  190. package/dist/index.js +0 -93
  191. package/dist/index.js.map +0 -1
  192. package/dist/slug.d.ts +0 -2
  193. package/dist/slug.d.ts.map +0 -1
  194. package/dist/slug.js +0 -12
  195. package/dist/slug.js.map +0 -1
  196. package/dist/slug.test.d.ts +0 -2
  197. package/dist/slug.test.d.ts.map +0 -1
  198. package/dist/slug.test.js +0 -32
  199. package/dist/slug.test.js.map +0 -1
@@ -0,0 +1,4916 @@
1
+ /**
2
+ * browser-tools — pi extension
3
+ *
4
+ * Gives the agent full browser interaction capabilities for verifying and testing
5
+ * UI work without requiring a human to look at the screen.
6
+ *
7
+ * Key design principles:
8
+ * - Every action returns feedback (accessibility snapshot, screenshots on navigate)
9
+ * - Errors include visual debugging (screenshots on failure, surfaced JS errors)
10
+ * - Smart waits (domcontentloaded + best-effort settle, not blocking networkidle)
11
+ * - 2x DPI screenshots for readable text
12
+ * - JPEG for viewport screenshots (smaller), PNG for element crops (transparency)
13
+ * - Auto-handles JS dialogs (alert/confirm/prompt) to prevent page freezes
14
+ * - Auto-switches to new tabs (popups, target="_blank")
15
+ *
16
+ * Architecture:
17
+ * - Single shared Browser + BrowserContext + Page per session
18
+ * - Console, network, and dialog events buffered in memory
19
+ * - Browser launched headed so the user can optionally watch
20
+ * - Cleaned up on session_shutdown
21
+ */
22
+
23
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
24
+ import {
25
+ DEFAULT_MAX_BYTES,
26
+ DEFAULT_MAX_LINES,
27
+ truncateHead,
28
+ } from "@mariozechner/pi-coding-agent";
29
+ import { Type } from "@sinclair/typebox";
30
+ import { StringEnum } from "@mariozechner/pi-ai";
31
+ import type { Browser, BrowserContext, Frame, Page } from "playwright";
32
+ import { mkdir, stat, writeFile, copyFile } from "node:fs/promises";
33
+ import path from "node:path";
34
+ import {
35
+ beginAction,
36
+ createActionTimeline,
37
+ createBoundedLogPusher,
38
+ createPageRegistry,
39
+ diffCompactStates,
40
+ evaluateAssertionChecks,
41
+ finishAction,
42
+ findAction,
43
+ formatTimelineEntries,
44
+ getSnapshotModeConfig,
45
+ buildFailureHypothesis,
46
+ summarizeBrowserSession,
47
+ registryAddPage,
48
+ registryGetActive,
49
+ registryListPages,
50
+ registryRemovePage,
51
+ registrySetActive,
52
+ runBatchSteps,
53
+ SNAPSHOT_MODES,
54
+ toActionParamsSummary,
55
+ validateWaitParams,
56
+ createRegionStableScript,
57
+ parseThreshold,
58
+ meetsThreshold,
59
+ includesNeedle,
60
+ } from "./core.js";
61
+
62
+ // ---------------------------------------------------------------------------
63
+ // Shared state
64
+ // ---------------------------------------------------------------------------
65
+
66
+ let browser: Browser | null = null;
67
+ let context: BrowserContext | null = null;
68
+ const pageRegistry = createPageRegistry();
69
+ let activeFrame: Frame | null = null;
70
+ const logPusher = createBoundedLogPusher(1000);
71
+
72
+ interface ConsoleEntry {
73
+ type: string;
74
+ text: string;
75
+ timestamp: number;
76
+ url: string;
77
+ pageId: number;
78
+ }
79
+
80
+ interface NetworkEntry {
81
+ method: string;
82
+ url: string;
83
+ status: number | null;
84
+ resourceType: string;
85
+ timestamp: number;
86
+ failed: boolean;
87
+ failureText?: string;
88
+ responseBody?: string; // Only captured for 4xx/5xx responses, truncated to 2000 chars
89
+ pageId: number;
90
+ }
91
+
92
+ let consoleLogs: ConsoleEntry[] = [];
93
+ let networkLogs: NetworkEntry[] = [];
94
+
95
+ interface DialogEntry {
96
+ type: string; // "alert" | "confirm" | "prompt" | "beforeunload"
97
+ message: string;
98
+ timestamp: number;
99
+ url: string;
100
+ defaultValue?: string; // For prompt dialogs
101
+ accepted: boolean; // Whether we auto-accepted or dismissed
102
+ pageId: number;
103
+ }
104
+
105
+ let dialogLogs: DialogEntry[] = [];
106
+
107
+ const pendingCriticalRequestsByPage = new WeakMap<Page, number>();
108
+
109
+ interface RefNode {
110
+ ref: string;
111
+ tag: string;
112
+ role: string;
113
+ name: string;
114
+ selectorHints: string[];
115
+ isVisible: boolean;
116
+ isEnabled: boolean;
117
+ xpathOrPath: string;
118
+ href?: string;
119
+ type?: string;
120
+ path: number[];
121
+ contentHash?: string;
122
+ structuralSignature?: string;
123
+ nearestHeading?: string;
124
+ formOwnership?: string;
125
+ }
126
+
127
+ interface RefMetadata {
128
+ url: string;
129
+ timestamp: number;
130
+ selectorScope?: string;
131
+ interactiveOnly: boolean;
132
+ limit: number;
133
+ version: number;
134
+ frameContext?: string; // Records which frame the snapshot was taken in (name or URL), undefined = main page
135
+ mode?: string; // Snapshot mode used (e.g. "form", "dialog", "navigation"), undefined = no mode (legacy interactiveOnly behavior)
136
+ }
137
+
138
+ let currentRefMap: Record<string, RefNode> = {};
139
+ let refVersion = 0;
140
+ let refMetadata: RefMetadata | null = null;
141
+ const actionTimeline = createActionTimeline(60);
142
+
143
+ interface CompactSelectorState {
144
+ exists: boolean;
145
+ visible: boolean;
146
+ value: string;
147
+ checked: boolean | null;
148
+ text: string;
149
+ }
150
+
151
+ interface CompactPageState {
152
+ url: string;
153
+ title: string;
154
+ focus: string;
155
+ headings: string[];
156
+ bodyText: string;
157
+ counts: {
158
+ landmarks: number;
159
+ buttons: number;
160
+ links: number;
161
+ inputs: number;
162
+ };
163
+ dialog: {
164
+ count: number;
165
+ title: string;
166
+ };
167
+ selectorStates: Record<string, CompactSelectorState>;
168
+ }
169
+
170
+ let lastActionBeforeState: CompactPageState | null = null;
171
+ let lastActionAfterState: CompactPageState | null = null;
172
+
173
+ const ARTIFACT_ROOT = path.resolve(process.cwd(), ".artifacts", "browser");
174
+ const HAR_FILENAME = "session.har";
175
+
176
+ interface TraceSessionState {
177
+ startedAt: number;
178
+ name: string;
179
+ title?: string;
180
+ path?: string;
181
+ }
182
+
183
+ interface HarState {
184
+ enabled: boolean;
185
+ configuredAtContextCreation: boolean;
186
+ path: string | null;
187
+ exportCount: number;
188
+ lastExportedPath: string | null;
189
+ lastExportedAt: number | null;
190
+ }
191
+
192
+ let sessionStartedAt: number | null = null;
193
+ let sessionArtifactDir: string | null = null;
194
+ let activeTraceSession: TraceSessionState | null = null;
195
+ let harState: HarState = {
196
+ enabled: false,
197
+ configuredAtContextCreation: false,
198
+ path: null,
199
+ exportCount: 0,
200
+ lastExportedPath: null,
201
+ lastExportedAt: null,
202
+ };
203
+
204
+ // ---------------------------------------------------------------------------
205
+ // Helpers
206
+ // ---------------------------------------------------------------------------
207
+
208
+ function isCriticalResourceType(resourceType: string): boolean {
209
+ return resourceType === "document" || resourceType === "fetch" || resourceType === "xhr";
210
+ }
211
+
212
+ function updatePendingCriticalRequests(p: Page, delta: number): void {
213
+ const current = pendingCriticalRequestsByPage.get(p) ?? 0;
214
+ pendingCriticalRequestsByPage.set(p, Math.max(0, current + delta));
215
+ }
216
+
217
+ function getPendingCriticalRequests(p: Page): number {
218
+ return pendingCriticalRequestsByPage.get(p) ?? 0;
219
+ }
220
+
221
+ /** Attach all event listeners to a page. Called on initial page and new tabs. */
222
+ function attachPageListeners(p: Page, pageId: number): void {
223
+ pendingCriticalRequestsByPage.set(p, 0);
224
+
225
+ // Console messages
226
+ p.on("console", (msg) => {
227
+ logPusher(consoleLogs, {
228
+ type: msg.type(),
229
+ text: msg.text(),
230
+ timestamp: Date.now(),
231
+ url: p.url(),
232
+ pageId,
233
+ });
234
+ });
235
+
236
+ // Uncaught JS errors
237
+ p.on("pageerror", (err) => {
238
+ logPusher(consoleLogs, {
239
+ type: "pageerror",
240
+ text: err.message,
241
+ timestamp: Date.now(),
242
+ url: p.url(),
243
+ pageId,
244
+ });
245
+ });
246
+
247
+ // Network requests — start/completed/failed
248
+ p.on("request", (request) => {
249
+ if (isCriticalResourceType(request.resourceType())) {
250
+ updatePendingCriticalRequests(p, 1);
251
+ }
252
+ });
253
+
254
+ p.on("requestfinished", async (request) => {
255
+ if (isCriticalResourceType(request.resourceType())) {
256
+ updatePendingCriticalRequests(p, -1);
257
+ }
258
+ try {
259
+ const response = await request.response();
260
+ const status = response?.status() ?? null;
261
+ const entry: NetworkEntry = {
262
+ method: request.method(),
263
+ url: request.url(),
264
+ status,
265
+ resourceType: request.resourceType(),
266
+ timestamp: Date.now(),
267
+ failed: false,
268
+ pageId,
269
+ };
270
+ if (response && status !== null && status >= 400) {
271
+ try {
272
+ const body = await response.text();
273
+ entry.responseBody = body.slice(0, 2000);
274
+ } catch {}
275
+ }
276
+ logPusher(networkLogs, entry);
277
+ } catch {}
278
+ });
279
+
280
+ p.on("requestfailed", (request) => {
281
+ if (isCriticalResourceType(request.resourceType())) {
282
+ updatePendingCriticalRequests(p, -1);
283
+ }
284
+ logPusher(networkLogs, {
285
+ method: request.method(),
286
+ url: request.url(),
287
+ status: null,
288
+ resourceType: request.resourceType(),
289
+ timestamp: Date.now(),
290
+ failed: true,
291
+ failureText: request.failure()?.errorText ?? "Unknown failure",
292
+ pageId,
293
+ });
294
+ });
295
+
296
+ // Auto-handle JS dialogs (alert, confirm, prompt, beforeunload)
297
+ p.on("dialog", async (dialog) => {
298
+ logPusher(dialogLogs, {
299
+ type: dialog.type(),
300
+ message: dialog.message(),
301
+ timestamp: Date.now(),
302
+ url: p.url(),
303
+ defaultValue: dialog.defaultValue() || undefined,
304
+ accepted: true,
305
+ pageId,
306
+ });
307
+ // Auto-accept all dialogs to prevent page freezes
308
+ await dialog.accept().catch(() => {});
309
+ });
310
+
311
+ // Frame detach handler — clears activeFrame if the selected frame detaches
312
+ p.on("framedetached", (frame) => {
313
+ if (activeFrame === frame) activeFrame = null;
314
+ });
315
+
316
+ // Page close handler — removes page from registry and handles active fallback
317
+ p.on("close", () => {
318
+ try {
319
+ registryRemovePage(pageRegistry, pageId);
320
+ } catch {
321
+ // Page already removed (e.g. during closeBrowser)
322
+ }
323
+ });
324
+ }
325
+
326
+ async function ensureBrowser(): Promise<{ browser: Browser; context: BrowserContext; page: Page }> {
327
+ if (browser && context) {
328
+ return { browser, context, page: getActivePage() };
329
+ }
330
+
331
+ const startedAt = ensureSessionStartedAt();
332
+ const artifactDir = await ensureSessionArtifactDir();
333
+ const sessionHarPath = path.join(artifactDir, HAR_FILENAME);
334
+ harState = {
335
+ enabled: true,
336
+ configuredAtContextCreation: true,
337
+ path: sessionHarPath,
338
+ exportCount: 0,
339
+ lastExportedPath: null,
340
+ lastExportedAt: null,
341
+ };
342
+
343
+ // Lazy import so playwright is only loaded when actually needed
344
+ const { chromium } = await import("playwright");
345
+
346
+ browser = await chromium.launch({ headless: false });
347
+ context = await browser.newContext({
348
+ deviceScaleFactor: 2,
349
+ viewport: { width: 1280, height: 800 },
350
+ recordHar: {
351
+ path: sessionHarPath,
352
+ mode: "minimal",
353
+ content: "omit",
354
+ },
355
+ });
356
+ sessionStartedAt = startedAt;
357
+ sessionArtifactDir = artifactDir;
358
+ const initialPage = await context.newPage();
359
+ const pageEntry = registryAddPage(pageRegistry, {
360
+ page: initialPage,
361
+ title: await initialPage.title().catch(() => ""),
362
+ url: initialPage.url(),
363
+ opener: null,
364
+ });
365
+ registrySetActive(pageRegistry, pageEntry.id);
366
+ attachPageListeners(initialPage, pageEntry.id);
367
+
368
+ // Register new pages (popups, target="_blank", window.open) but do NOT auto-switch
369
+ context.on("page", (newPage) => {
370
+ // Determine opener page ID — find which registry page opened this one
371
+ const openerPage = newPage.opener();
372
+ let openerId: number | null = null;
373
+ if (openerPage) {
374
+ const openerEntry = pageRegistry.pages.find((e: any) => e.page === openerPage);
375
+ if (openerEntry) openerId = openerEntry.id;
376
+ }
377
+ const entry = registryAddPage(pageRegistry, {
378
+ page: newPage,
379
+ title: "",
380
+ url: newPage.url(),
381
+ opener: openerId,
382
+ });
383
+ attachPageListeners(newPage, entry.id);
384
+ // Update title once loaded
385
+ newPage.waitForLoadState("domcontentloaded", { timeout: 5000 })
386
+ .then(() => newPage.title())
387
+ .then((title) => { entry.title = title; })
388
+ .catch(() => {});
389
+ });
390
+
391
+ return { browser, context, page: getActivePage() };
392
+ }
393
+
394
+ /** Get the currently active page from the registry. */
395
+ function getActivePage(): Page {
396
+ return registryGetActive(pageRegistry).page;
397
+ }
398
+
399
+ /** Get the active target — returns the selected frame if one is active, otherwise the active page. */
400
+ function getActiveTarget(): Page | Frame {
401
+ return activeFrame ?? getActivePage();
402
+ }
403
+
404
+ /** Safe accessor for error handling — returns the active page or null if unavailable. */
405
+ function getActivePageOrNull(): Page | null {
406
+ try {
407
+ return getActivePage();
408
+ } catch {
409
+ return null;
410
+ }
411
+ }
412
+
413
+ async function closeBrowser(): Promise<void> {
414
+ if (browser) {
415
+ await browser.close().catch(() => {});
416
+ }
417
+ browser = null;
418
+ context = null;
419
+ pageRegistry.pages = [];
420
+ pageRegistry.activePageId = null;
421
+ pageRegistry.nextId = 1;
422
+ activeFrame = null;
423
+ consoleLogs = [];
424
+ networkLogs = [];
425
+ dialogLogs = [];
426
+ currentRefMap = {};
427
+ refVersion = 0;
428
+ refMetadata = null;
429
+ lastActionBeforeState = null;
430
+ lastActionAfterState = null;
431
+ actionTimeline.entries = [];
432
+ actionTimeline.nextId = 1;
433
+ sessionStartedAt = null;
434
+ sessionArtifactDir = null;
435
+ activeTraceSession = null;
436
+ harState = {
437
+ enabled: false,
438
+ configuredAtContextCreation: false,
439
+ path: null,
440
+ exportCount: 0,
441
+ lastExportedPath: null,
442
+ lastExportedAt: null,
443
+ };
444
+ }
445
+
446
+ function truncateText(text: string): string {
447
+ const result = truncateHead(text, {
448
+ maxLines: DEFAULT_MAX_LINES,
449
+ maxBytes: DEFAULT_MAX_BYTES,
450
+ });
451
+ if (result.truncated) {
452
+ return (
453
+ result.content +
454
+ `\n\n[Output truncated: ${result.outputLines}/${result.totalLines} lines shown]`
455
+ );
456
+ }
457
+ return result.content;
458
+ }
459
+
460
+ function formatArtifactTimestamp(timestamp: number): string {
461
+ return new Date(timestamp).toISOString().replace(/[:.]/g, "-");
462
+ }
463
+
464
+ async function ensureDir(dirPath: string): Promise<string> {
465
+ await mkdir(dirPath, { recursive: true });
466
+ return dirPath;
467
+ }
468
+
469
+ async function writeArtifactFile(filePath: string, content: string | Uint8Array): Promise<{ path: string; bytes: number }> {
470
+ await ensureDir(path.dirname(filePath));
471
+ await writeFile(filePath, content);
472
+ const fileStat = await stat(filePath);
473
+ return { path: filePath, bytes: fileStat.size };
474
+ }
475
+
476
+ async function copyArtifactFile(sourcePath: string, destinationPath: string): Promise<{ path: string; bytes: number }> {
477
+ await ensureDir(path.dirname(destinationPath));
478
+ await copyFile(sourcePath, destinationPath);
479
+ const fileStat = await stat(destinationPath);
480
+ return { path: destinationPath, bytes: fileStat.size };
481
+ }
482
+
483
+ function ensureSessionStartedAt(): number {
484
+ if (!sessionStartedAt) sessionStartedAt = Date.now();
485
+ return sessionStartedAt;
486
+ }
487
+
488
+ async function ensureSessionArtifactDir(): Promise<string> {
489
+ if (sessionArtifactDir) {
490
+ await ensureDir(sessionArtifactDir);
491
+ return sessionArtifactDir;
492
+ }
493
+ const startedAt = ensureSessionStartedAt();
494
+ sessionArtifactDir = path.join(ARTIFACT_ROOT, `${formatArtifactTimestamp(startedAt)}-session`);
495
+ await ensureDir(sessionArtifactDir);
496
+ return sessionArtifactDir;
497
+ }
498
+
499
+ function buildSessionArtifactPath(filename: string): string {
500
+ if (!sessionArtifactDir) {
501
+ throw new Error("browser session artifact directory is not initialized");
502
+ }
503
+ return path.join(sessionArtifactDir, filename);
504
+ }
505
+
506
+ function getActivePageMetadata() {
507
+ const activeEntry = pageRegistry.activePageId !== null
508
+ ? pageRegistry.pages.find((entry: any) => entry.id === pageRegistry.activePageId) ?? null
509
+ : null;
510
+ return {
511
+ id: activeEntry?.id ?? null,
512
+ title: activeEntry?.title ?? "",
513
+ url: activeEntry?.url ?? "",
514
+ };
515
+ }
516
+
517
+ function getActiveFrameMetadata() {
518
+ if (!activeFrame) {
519
+ return { name: null, url: null };
520
+ }
521
+ return {
522
+ name: activeFrame.name() || null,
523
+ url: activeFrame.url() || null,
524
+ };
525
+ }
526
+
527
+ function getSessionArtifactMetadata() {
528
+ return {
529
+ artifactRoot: ARTIFACT_ROOT,
530
+ sessionStartedAt,
531
+ sessionArtifactDir,
532
+ activeTraceSession,
533
+ harState: { ...harState },
534
+ activePage: getActivePageMetadata(),
535
+ activeFrame: getActiveFrameMetadata(),
536
+ };
537
+ }
538
+
539
+ function sanitizeArtifactName(value: string, fallback: string): string {
540
+ const sanitized = value.trim().replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "");
541
+ return sanitized || fallback;
542
+ }
543
+
544
+ async function getLivePagesSnapshot() {
545
+ await ensureBrowser();
546
+ for (const entry of pageRegistry.pages) {
547
+ try {
548
+ entry.title = await entry.page.title();
549
+ entry.url = entry.page.url();
550
+ } catch {
551
+ // Page may have been closed between snapshots.
552
+ }
553
+ }
554
+ return registryListPages(pageRegistry);
555
+ }
556
+
557
+ async function resolveAccessibilityScope(selector?: string): Promise<{ selector?: string; scope: string; source: string }> {
558
+ if (selector?.trim()) {
559
+ return { selector: selector.trim(), scope: `selector:${selector.trim()}`, source: "explicit_selector" };
560
+ }
561
+ const target = getActiveTarget();
562
+ const dialogCount = await countOpenDialogs(target).catch(() => 0);
563
+ if (dialogCount > 0) {
564
+ return { selector: '[role="dialog"]:not([hidden]),dialog[open]', scope: "active dialog", source: "active_dialog" };
565
+ }
566
+ if (activeFrame) {
567
+ return { selector: "body", scope: activeFrame.name() ? `active frame:${activeFrame.name()}` : "active frame", source: "active_frame" };
568
+ }
569
+ return { selector: "body", scope: "full page", source: "full_page" };
570
+ }
571
+
572
+ async function captureAccessibilityMarkdown(selector?: string): Promise<{ snapshot: string; scope: string; source: string }> {
573
+ const target = getActiveTarget();
574
+ const scopeInfo = await resolveAccessibilityScope(selector);
575
+ const locator = target.locator(scopeInfo.selector ?? "body").first();
576
+ const snapshot = await locator.ariaSnapshot();
577
+ return { snapshot, scope: scopeInfo.scope, source: scopeInfo.source };
578
+ }
579
+
580
+ function beginTrackedAction(tool: string, params: unknown, beforeUrl: string) {
581
+ return beginAction(actionTimeline, {
582
+ tool,
583
+ paramsSummary: toActionParamsSummary(params),
584
+ beforeUrl,
585
+ });
586
+ }
587
+
588
+ function finishTrackedAction(
589
+ actionId: number,
590
+ updates: {
591
+ status: "success" | "error";
592
+ afterUrl?: string;
593
+ verificationSummary?: string;
594
+ warningSummary?: string;
595
+ diffSummary?: string;
596
+ changed?: boolean;
597
+ error?: string;
598
+ beforeState?: CompactPageState;
599
+ afterState?: CompactPageState;
600
+ }
601
+ ) {
602
+ return finishAction(actionTimeline, actionId, updates);
603
+ }
604
+
605
+ function getSinceTimestamp(sinceActionId?: number): number {
606
+ if (!sinceActionId) return 0;
607
+ const action = findAction(actionTimeline, sinceActionId);
608
+ if (!action) return 0;
609
+ return action.startedAt ?? 0;
610
+ }
611
+
612
+ function getConsoleEntriesSince(sinceActionId?: number): ConsoleEntry[] {
613
+ const since = getSinceTimestamp(sinceActionId);
614
+ return consoleLogs.filter((entry) => entry.timestamp >= since);
615
+ }
616
+
617
+ function getNetworkEntriesSince(sinceActionId?: number): NetworkEntry[] {
618
+ const since = getSinceTimestamp(sinceActionId);
619
+ return networkLogs.filter((entry) => entry.timestamp >= since);
620
+ }
621
+
622
+ async function captureCompactPageState(
623
+ p: Page,
624
+ options: { selectors?: string[]; includeBodyText?: boolean; target?: Page | Frame } = {}
625
+ ): Promise<CompactPageState> {
626
+ const selectors = Array.from(new Set((options.selectors ?? []).filter(Boolean)));
627
+ const target = options.target ?? p;
628
+ const domState = await target.evaluate(({ selectors, includeBodyText }) => {
629
+ const selectorStates: Record<string, CompactSelectorState> = {};
630
+ for (const selector of selectors) {
631
+ let el: Element | null = null;
632
+ try {
633
+ el = document.querySelector(selector);
634
+ } catch {
635
+ el = null;
636
+ }
637
+ if (!el) {
638
+ selectorStates[selector] = {
639
+ exists: false,
640
+ visible: false,
641
+ value: "",
642
+ checked: null,
643
+ text: "",
644
+ };
645
+ continue;
646
+ }
647
+ const htmlEl = el as HTMLElement;
648
+ const style = window.getComputedStyle(htmlEl);
649
+ const rect = htmlEl.getBoundingClientRect();
650
+ const visible = style.display !== "none" && style.visibility !== "hidden" && rect.width > 0 && rect.height > 0;
651
+ const input = el as HTMLInputElement;
652
+ selectorStates[selector] = {
653
+ exists: true,
654
+ visible,
655
+ value:
656
+ el instanceof HTMLInputElement ||
657
+ el instanceof HTMLTextAreaElement ||
658
+ el instanceof HTMLSelectElement
659
+ ? el.value
660
+ : htmlEl.getAttribute("value") || "",
661
+ checked: el instanceof HTMLInputElement && ["checkbox", "radio"].includes(input.type) ? input.checked : null,
662
+ text: (htmlEl.innerText || htmlEl.textContent || "").trim().replace(/\s+/g, " ").slice(0, 160),
663
+ };
664
+ }
665
+
666
+ const focused = document.activeElement as HTMLElement | null;
667
+ const focusedDesc = focused && focused !== document.body && focused !== document.documentElement
668
+ ? `${focused.tagName.toLowerCase()}${focused.id ? '#' + focused.id : ''}${focused.getAttribute('aria-label') ? ' "' + focused.getAttribute('aria-label') + '"' : ''}`
669
+ : "";
670
+ const headings = Array.from(document.querySelectorAll('h1,h2,h3')).slice(0, 5).map((h) => (h.textContent || '').trim().replace(/\s+/g, ' ').slice(0, 80));
671
+ const dialog = document.querySelector('[role="dialog"]:not([hidden]),dialog[open]');
672
+ const dialogTitle = dialog?.querySelector('[role="heading"],[aria-label]')?.textContent?.trim().slice(0, 80) ?? "";
673
+ const bodyText = includeBodyText
674
+ ? (document.body?.innerText || document.body?.textContent || "").trim().replace(/\s+/g, ' ').slice(0, 4000)
675
+ : "";
676
+ return {
677
+ url: window.location.href,
678
+ title: document.title,
679
+ focus: focusedDesc,
680
+ headings,
681
+ bodyText,
682
+ counts: {
683
+ landmarks: document.querySelectorAll('[role="main"],[role="banner"],[role="navigation"],[role="contentinfo"],[role="complementary"],[role="search"],[role="form"],[role="dialog"],[role="alert"],main,header,nav,footer,aside,section,form,dialog').length,
684
+ buttons: document.querySelectorAll('button,[role="button"]').length,
685
+ links: document.querySelectorAll('a[href]').length,
686
+ inputs: document.querySelectorAll('input,textarea,select').length,
687
+ },
688
+ dialog: {
689
+ count: document.querySelectorAll('[role="dialog"]:not([hidden]),dialog[open]').length,
690
+ title: dialogTitle,
691
+ },
692
+ selectorStates,
693
+ };
694
+ }, { selectors, includeBodyText: options.includeBodyText === true });
695
+ // URL and title always come from the Page, not the frame
696
+ return { ...domState, url: p.url(), title: await p.title() };
697
+ }
698
+
699
+ function formatCompactStateSummary(state: CompactPageState): string {
700
+ const lines: string[] = [];
701
+ lines.push(`Title: ${state.title}`);
702
+ lines.push(`URL: ${state.url}`);
703
+ lines.push(`Elements: ${state.counts.landmarks} landmarks, ${state.counts.buttons} buttons, ${state.counts.links} links, ${state.counts.inputs} inputs`);
704
+ if (state.headings.length > 0) {
705
+ lines.push("Headings: " + state.headings.map((text, index) => `H${index + 1} \"${text}\"`).join(", "));
706
+ }
707
+ if (state.focus) {
708
+ lines.push(`Focused: ${state.focus}`);
709
+ }
710
+ if (state.dialog.title) {
711
+ lines.push(`Active dialog: "${state.dialog.title}"`);
712
+ }
713
+ lines.push("Use browser_find for targeted discovery, browser_assert for verification, or browser_get_accessibility_tree for full detail.");
714
+ return lines.join("\n");
715
+ }
716
+
717
+ // ---------------------------------------------------------------------------
718
+ // Post-action helpers
719
+ // ---------------------------------------------------------------------------
720
+
721
+ /** Lightweight page summary after an action. Returns ~50-150 tokens instead of full tree. */
722
+ async function postActionSummary(p: Page, target?: Page | Frame): Promise<string> {
723
+ try {
724
+ const state = await captureCompactPageState(p, { target });
725
+ return formatCompactStateSummary(state);
726
+ } catch {
727
+ return "[summary unavailable]";
728
+ }
729
+ }
730
+
731
+ /** Capture a JPEG screenshot for error debugging. Returns base64 or null. */
732
+ async function captureErrorScreenshot(p: Page | null): Promise<{ data: string; mimeType: string } | null> {
733
+ if (!p) return null;
734
+ try {
735
+ const buf = await p.screenshot({ type: "jpeg", quality: 60 });
736
+ return { data: buf.toString("base64"), mimeType: "image/jpeg" };
737
+ } catch {
738
+ return null;
739
+ }
740
+ }
741
+
742
+ /**
743
+ * Compact, action-relevant warnings for the current page origin.
744
+ * Full diagnostics stay pull-based via browser_get_console_logs/network_logs/dialog_logs.
745
+ */
746
+ function getRecentErrors(pageUrl: string): string {
747
+ const parts: string[] = [];
748
+ const now = Date.now();
749
+ const since = now - 12_000;
750
+
751
+ const toOrigin = (url: string): string | null => {
752
+ try {
753
+ return new URL(url).origin;
754
+ } catch {
755
+ return null;
756
+ }
757
+ };
758
+ const pageOrigin = toOrigin(pageUrl);
759
+ const sameOrigin = (url: string): boolean => !pageOrigin || toOrigin(url) === pageOrigin;
760
+
761
+ const summarize = (items: string[], max: number): string[] => {
762
+ const counts = new Map<string, number>();
763
+ const order: string[] = [];
764
+ for (const item of items) {
765
+ if (!counts.has(item)) order.push(item);
766
+ counts.set(item, (counts.get(item) ?? 0) + 1);
767
+ }
768
+ return order.slice(0, max).map((item) => {
769
+ const count = counts.get(item) ?? 1;
770
+ return count > 1 ? `${item} (x${count})` : item;
771
+ });
772
+ };
773
+
774
+ const jsWarnings = consoleLogs
775
+ .filter((e) => (e.type === "error" || e.type === "pageerror") && e.timestamp >= since && sameOrigin(e.url))
776
+ .map((e) => e.text.slice(0, 120));
777
+ if (jsWarnings.length > 0) {
778
+ parts.push("JS: " + summarize(jsWarnings, 2).join(" | "));
779
+ }
780
+
781
+ const actionableStatus = new Set([401, 403, 404, 408, 409, 422, 429]);
782
+ const actionableTypes = new Set(["document", "fetch", "xhr", "script"]);
783
+ const netWarnings = networkLogs
784
+ .filter((e) => e.timestamp >= since && sameOrigin(e.url))
785
+ .filter((e) => {
786
+ if (e.failed) return actionableTypes.has(e.resourceType);
787
+ if (e.status === null) return false;
788
+ if (e.status >= 500) return true;
789
+ return actionableStatus.has(e.status) && actionableTypes.has(e.resourceType);
790
+ })
791
+ .map((e) => {
792
+ if (e.failed) return `${e.method} ${e.resourceType} FAILED`;
793
+ return `${e.method} ${e.resourceType} ${e.status}`;
794
+ });
795
+ if (netWarnings.length > 0) {
796
+ parts.push("Network: " + summarize(netWarnings, 2).join(" | "));
797
+ }
798
+
799
+ const dialogWarnings = dialogLogs
800
+ .filter((e) => e.timestamp >= since && sameOrigin(e.url))
801
+ .map((e) => `${e.type}: ${e.message.slice(0, 80)}`);
802
+ if (dialogWarnings.length > 0) {
803
+ parts.push("Dialogs: " + summarize(dialogWarnings, 1).join(" | "));
804
+ }
805
+
806
+ if (parts.length === 0) return "";
807
+ return `\n\nWarnings: ${parts.join("; ")}\nUse browser_get_console_logs/browser_get_network_logs for full diagnostics.`;
808
+ }
809
+
810
+ interface AdaptiveSettleOptions {
811
+ timeoutMs?: number;
812
+ pollMs?: number;
813
+ quietWindowMs?: number;
814
+ checkFocusStability?: boolean;
815
+ }
816
+
817
+ interface AdaptiveSettleDetails {
818
+ settleMode: "adaptive";
819
+ settleMs: number;
820
+ settleReason: "dom_quiet" | "url_changed_then_quiet" | "timeout_fallback";
821
+ settlePolls: number;
822
+ }
823
+
824
+ async function ensureMutationCounter(p: Page): Promise<void> {
825
+ await p.evaluate(() => {
826
+ const key = "__piMutationCounter" as const;
827
+ const installedKey = "__piMutationCounterInstalled" as const;
828
+ const w = window as unknown as Record<string, unknown>;
829
+ if (typeof w[key] !== "number") w[key] = 0;
830
+ if (w[installedKey]) return;
831
+ const observer = new MutationObserver(() => {
832
+ const current = typeof w[key] === "number" ? (w[key] as number) : 0;
833
+ w[key] = current + 1;
834
+ });
835
+ observer.observe(document.documentElement || document.body, {
836
+ subtree: true,
837
+ childList: true,
838
+ attributes: true,
839
+ characterData: true,
840
+ });
841
+ w[installedKey] = true;
842
+ });
843
+ }
844
+
845
+ async function readMutationCounter(p: Page): Promise<number> {
846
+ try {
847
+ return await p.evaluate(() => {
848
+ const w = window as unknown as Record<string, unknown>;
849
+ const value = w.__piMutationCounter;
850
+ return typeof value === "number" ? value : 0;
851
+ });
852
+ } catch {
853
+ return 0;
854
+ }
855
+ }
856
+
857
+ async function readFocusedDescriptor(target: Page | Frame): Promise<string> {
858
+ try {
859
+ return await target.evaluate(() => {
860
+ const el = document.activeElement as HTMLElement | null;
861
+ if (!el || el === document.body || el === document.documentElement) return "";
862
+ const id = el.id ? `#${el.id}` : "";
863
+ const role = el.getAttribute("role") || "";
864
+ const name = (el.getAttribute("aria-label") || el.getAttribute("name") || "").trim();
865
+ return `${el.tagName.toLowerCase()}${id}|${role}|${name}`;
866
+ });
867
+ } catch {
868
+ return "";
869
+ }
870
+ }
871
+
872
+ async function settleAfterActionAdaptive(
873
+ p: Page,
874
+ opts: AdaptiveSettleOptions = {}
875
+ ): Promise<AdaptiveSettleDetails> {
876
+ const timeoutMs = Math.max(150, opts.timeoutMs ?? 500);
877
+ const pollMs = Math.min(100, Math.max(20, opts.pollMs ?? 40));
878
+ const quietWindowMs = Math.max(60, opts.quietWindowMs ?? 100);
879
+ const checkFocus = opts.checkFocusStability ?? false;
880
+
881
+ const startedAt = Date.now();
882
+ let polls = 0;
883
+ let sawUrlChange = false;
884
+ let lastActivityAt = startedAt;
885
+ let previousUrl = p.url();
886
+
887
+ await ensureMutationCounter(p).catch(() => {});
888
+ let previousMutationCount = await readMutationCounter(p);
889
+ let previousFocus = checkFocus ? await readFocusedDescriptor(p) : "";
890
+
891
+ while (Date.now() - startedAt < timeoutMs) {
892
+ await new Promise((resolve) => setTimeout(resolve, pollMs));
893
+ polls += 1;
894
+ const now = Date.now();
895
+
896
+ const currentUrl = p.url();
897
+ if (currentUrl !== previousUrl) {
898
+ sawUrlChange = true;
899
+ previousUrl = currentUrl;
900
+ lastActivityAt = now;
901
+ }
902
+
903
+ const currentMutationCount = await readMutationCounter(p);
904
+ if (currentMutationCount > previousMutationCount) {
905
+ previousMutationCount = currentMutationCount;
906
+ lastActivityAt = now;
907
+ }
908
+
909
+ if (checkFocus) {
910
+ const currentFocus = await readFocusedDescriptor(p);
911
+ if (currentFocus !== previousFocus) {
912
+ previousFocus = currentFocus;
913
+ lastActivityAt = now;
914
+ }
915
+ }
916
+
917
+ const pendingCritical = getPendingCriticalRequests(p);
918
+ if (pendingCritical > 0) {
919
+ lastActivityAt = now;
920
+ continue;
921
+ }
922
+
923
+ if (now - lastActivityAt >= quietWindowMs) {
924
+ return {
925
+ settleMode: "adaptive",
926
+ settleMs: now - startedAt,
927
+ settleReason: sawUrlChange ? "url_changed_then_quiet" : "dom_quiet",
928
+ settlePolls: polls,
929
+ };
930
+ }
931
+ }
932
+
933
+ return {
934
+ settleMode: "adaptive",
935
+ settleMs: Date.now() - startedAt,
936
+ settleReason: "timeout_fallback",
937
+ settlePolls: polls,
938
+ };
939
+ }
940
+
941
+ interface ParsedRefSpec {
942
+ key: string;
943
+ version: number | null;
944
+ display: string;
945
+ }
946
+
947
+ function parseRef(input: string): ParsedRefSpec {
948
+ const trimmed = input.trim().toLowerCase();
949
+ const token = trimmed.startsWith("@") ? trimmed.slice(1) : trimmed;
950
+ const versioned = token.match(/^v(\d+):(e\d+)$/);
951
+ if (versioned) {
952
+ const version = parseInt(versioned[1], 10);
953
+ const key = versioned[2];
954
+ return { key, version, display: `@v${version}:${key}` };
955
+ }
956
+ return { key: token, version: null, display: `@${token}` };
957
+ }
958
+
959
+ function formatVersionedRef(version: number, key: string): string {
960
+ return `@v${version}:${key}`;
961
+ }
962
+
963
+ function staleRefGuidance(refDisplay: string, reason: string): string {
964
+ return `Ref ${refDisplay} could not be resolved (${reason}). The ref is likely stale after DOM/navigation changes. Call browser_snapshot_refs again to refresh refs.`;
965
+ }
966
+
967
+ interface VerificationCheck {
968
+ name: string;
969
+ passed: boolean;
970
+ value?: unknown;
971
+ expected?: unknown;
972
+ }
973
+
974
+ interface VerificationResult {
975
+ verified: boolean;
976
+ checks: VerificationCheck[];
977
+ verificationSummary: string;
978
+ retryHint?: string;
979
+ }
980
+
981
+ interface ClickTargetStateSnapshot {
982
+ exists: boolean;
983
+ ariaExpanded: string | null;
984
+ ariaPressed: string | null;
985
+ ariaSelected: string | null;
986
+ open: boolean | null;
987
+ }
988
+
989
+ function verificationFromChecks(checks: VerificationCheck[], retryHint?: string): VerificationResult {
990
+ const passedChecks = checks.filter((check) => check.passed).map((check) => check.name);
991
+ const verified = passedChecks.length > 0;
992
+ return {
993
+ verified,
994
+ checks,
995
+ verificationSummary: verified
996
+ ? `PASS (${passedChecks.join(", ")})`
997
+ : "SOFT-FAIL (no observable state change)",
998
+ retryHint: verified ? undefined : retryHint,
999
+ };
1000
+ }
1001
+
1002
+ function verificationLine(verification: VerificationResult): string {
1003
+ return `Verification: ${verification.verificationSummary}`;
1004
+ }
1005
+
1006
+ interface BrowserAssertionCheckInput {
1007
+ kind: string;
1008
+ selector?: string;
1009
+ text?: string;
1010
+ value?: string;
1011
+ checked?: boolean;
1012
+ sinceActionId?: number;
1013
+ }
1014
+
1015
+ async function collectAssertionState(
1016
+ p: Page,
1017
+ checks: BrowserAssertionCheckInput[],
1018
+ target?: Page | Frame
1019
+ ): Promise<{
1020
+ url: string;
1021
+ title: string;
1022
+ bodyText: string;
1023
+ focus: string;
1024
+ selectorStates: Record<string, CompactSelectorState>;
1025
+ consoleEntries: ConsoleEntry[];
1026
+ networkEntries: NetworkEntry[];
1027
+ allConsoleEntries: ConsoleEntry[];
1028
+ allNetworkEntries: NetworkEntry[];
1029
+ actionTimeline: ReturnType<typeof createActionTimeline>;
1030
+ }> {
1031
+ const selectors = checks.map((check) => check.selector).filter((value): value is string => !!value);
1032
+ const compactState = await captureCompactPageState(p, { selectors, includeBodyText: true, target });
1033
+ const sinceActionId = checks.reduce<number | undefined>((max, check) => {
1034
+ if (check.sinceActionId === undefined) return max;
1035
+ if (max === undefined) return check.sinceActionId;
1036
+ return Math.max(max, check.sinceActionId);
1037
+ }, undefined);
1038
+ return {
1039
+ url: compactState.url,
1040
+ title: compactState.title,
1041
+ bodyText: compactState.bodyText,
1042
+ focus: compactState.focus,
1043
+ selectorStates: compactState.selectorStates,
1044
+ consoleEntries: getConsoleEntriesSince(sinceActionId),
1045
+ networkEntries: getNetworkEntriesSince(sinceActionId),
1046
+ allConsoleEntries: consoleLogs,
1047
+ allNetworkEntries: networkLogs,
1048
+ actionTimeline: actionTimeline,
1049
+ };
1050
+ }
1051
+
1052
+ function formatAssertionText(result: ReturnType<typeof evaluateAssertionChecks>): string {
1053
+ const lines = [result.summary];
1054
+ for (const check of result.checks.slice(0, 8)) {
1055
+ lines.push(`- ${check.passed ? "PASS" : "FAIL"} ${check.name}: expected ${JSON.stringify(check.expected)}, got ${JSON.stringify(check.actual)}`);
1056
+ }
1057
+ lines.push(`Hint: ${result.agentHint}`);
1058
+ return lines.join("\n");
1059
+ }
1060
+
1061
+ function formatDiffText(diff: ReturnType<typeof diffCompactStates>): string {
1062
+ const lines = [diff.summary];
1063
+ for (const change of diff.changes.slice(0, 8)) {
1064
+ lines.push(`- ${change.type}: ${JSON.stringify(change.before ?? null)} → ${JSON.stringify(change.after ?? null)}`);
1065
+ }
1066
+ return lines.join("\n");
1067
+ }
1068
+
1069
+ function getUrlHash(url: string): string {
1070
+ try {
1071
+ return new URL(url).hash || "";
1072
+ } catch {
1073
+ return "";
1074
+ }
1075
+ }
1076
+
1077
+ async function countOpenDialogs(target: Page | Frame): Promise<number> {
1078
+ try {
1079
+ return await target.evaluate(() =>
1080
+ document.querySelectorAll('[role="dialog"]:not([hidden]),dialog[open]').length
1081
+ );
1082
+ } catch {
1083
+ return 0;
1084
+ }
1085
+ }
1086
+
1087
+ async function captureClickTargetState(target: Page | Frame, selector: string): Promise<ClickTargetStateSnapshot> {
1088
+ try {
1089
+ return await target.evaluate((sel) => {
1090
+ const el = document.querySelector(sel) as HTMLElement | null;
1091
+ if (!el) {
1092
+ return {
1093
+ exists: false,
1094
+ ariaExpanded: null,
1095
+ ariaPressed: null,
1096
+ ariaSelected: null,
1097
+ open: null,
1098
+ };
1099
+ }
1100
+ return {
1101
+ exists: true,
1102
+ ariaExpanded: el.getAttribute("aria-expanded"),
1103
+ ariaPressed: el.getAttribute("aria-pressed"),
1104
+ ariaSelected: el.getAttribute("aria-selected"),
1105
+ open: el instanceof HTMLDialogElement ? el.open : el.getAttribute("open") !== null,
1106
+ };
1107
+ }, selector);
1108
+ } catch {
1109
+ return {
1110
+ exists: false,
1111
+ ariaExpanded: null,
1112
+ ariaPressed: null,
1113
+ ariaSelected: null,
1114
+ open: null,
1115
+ };
1116
+ }
1117
+ }
1118
+
1119
+ async function readInputLikeValue(target: Page | Frame, selector?: string): Promise<string | null> {
1120
+ try {
1121
+ return await target.evaluate((sel) => {
1122
+ const resolveTarget = (): Element | null => {
1123
+ if (sel) return document.querySelector(sel);
1124
+ const active = document.activeElement;
1125
+ if (!active || active === document.body || active === document.documentElement) return null;
1126
+ return active;
1127
+ };
1128
+
1129
+ const target = resolveTarget();
1130
+ if (!target) return null;
1131
+ if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
1132
+ return target.value;
1133
+ }
1134
+ if (target instanceof HTMLSelectElement) {
1135
+ return target.value;
1136
+ }
1137
+ if ((target as HTMLElement).isContentEditable) {
1138
+ return (target.textContent ?? "").trim();
1139
+ }
1140
+ return (target as HTMLElement).getAttribute("value");
1141
+ }, selector);
1142
+ } catch {
1143
+ return null;
1144
+ }
1145
+ }
1146
+
1147
+ function firstErrorLine(err: unknown): string {
1148
+ const message = typeof err === "object" && err && "message" in err
1149
+ ? String((err as { message?: unknown }).message ?? "")
1150
+ : String(err ?? "unknown error");
1151
+ return message.split("\n")[0] || "unknown error";
1152
+ }
1153
+
1154
+ async function buildRefSnapshot(
1155
+ target: Page | Frame,
1156
+ options: { selector?: string; interactiveOnly: boolean; limit: number; mode?: string }
1157
+ ): Promise<Array<Omit<RefNode, "ref">>> {
1158
+ // Resolve mode config in Node context and serialize it as plain data for the evaluate callback
1159
+ const modeConfig = options.mode ? getSnapshotModeConfig(options.mode) : null;
1160
+ return await target.evaluate(({ selector, interactiveOnly, limit, modeConfig: mc }) => {
1161
+ const root = selector ? document.querySelector(selector) : document.body;
1162
+ if (!root) {
1163
+ throw new Error(`Selector scope not found: ${selector}`);
1164
+ }
1165
+
1166
+ // djb2 hash — must match the algorithm in core.js computeContentHash/computeStructuralSignature
1167
+ const simpleHash = (str: string): string => {
1168
+ if (!str) return "0";
1169
+ let h = 5381;
1170
+ for (let i = 0; i < str.length; i++) {
1171
+ h = ((h << 5) - h + str.charCodeAt(i)) | 0;
1172
+ }
1173
+ return (h >>> 0).toString(16);
1174
+ };
1175
+
1176
+ const interactiveRoles = new Set([
1177
+ "button", "link", "textbox", "searchbox", "combobox", "checkbox", "radio", "switch", "menuitem", "menuitemcheckbox", "menuitemradio", "tab", "option", "slider", "spinbutton",
1178
+ ]);
1179
+
1180
+ const isVisible = (el: Element): boolean => {
1181
+ const style = window.getComputedStyle(el as HTMLElement);
1182
+ if (style.display === "none" || style.visibility === "hidden") return false;
1183
+ const rect = (el as HTMLElement).getBoundingClientRect();
1184
+ return rect.width > 0 && rect.height > 0;
1185
+ };
1186
+
1187
+ const isEnabled = (el: Element): boolean => {
1188
+ const htmlEl = el as HTMLElement;
1189
+ const disabledAttr = htmlEl.getAttribute("disabled") !== null;
1190
+ const ariaDisabled = (htmlEl.getAttribute("aria-disabled") || "").toLowerCase() === "true";
1191
+ return !disabledAttr && !ariaDisabled;
1192
+ };
1193
+
1194
+ const inferRole = (el: Element): string => {
1195
+ const explicit = (el.getAttribute("role") || "").trim();
1196
+ if (explicit) return explicit;
1197
+ const tag = el.tagName.toLowerCase();
1198
+ if (tag === "a" && el.getAttribute("href")) return "link";
1199
+ if (tag === "button") return "button";
1200
+ if (tag === "select") return "combobox";
1201
+ if (tag === "textarea") return "textbox";
1202
+ if (tag === "input") {
1203
+ const type = (el.getAttribute("type") || "text").toLowerCase();
1204
+ if (["button", "submit", "reset"].includes(type)) return "button";
1205
+ if (type === "checkbox") return "checkbox";
1206
+ if (type === "radio") return "radio";
1207
+ if (type === "search") return "searchbox";
1208
+ return "textbox";
1209
+ }
1210
+ return "";
1211
+ };
1212
+
1213
+ const accessibleName = (el: Element): string => {
1214
+ const ariaLabel = el.getAttribute("aria-label")?.trim();
1215
+ if (ariaLabel) return ariaLabel;
1216
+ const labelledBy = el.getAttribute("aria-labelledby")?.trim();
1217
+ if (labelledBy) {
1218
+ const text = labelledBy
1219
+ .split(/\s+/)
1220
+ .map((id) => document.getElementById(id)?.textContent?.trim() || "")
1221
+ .join(" ")
1222
+ .trim();
1223
+ if (text) return text;
1224
+ }
1225
+ const htmlEl = el as HTMLElement;
1226
+ const placeholder = htmlEl.getAttribute("placeholder")?.trim();
1227
+ if (placeholder) return placeholder;
1228
+ const alt = htmlEl.getAttribute("alt")?.trim();
1229
+ if (alt) return alt;
1230
+ const value = (htmlEl as HTMLInputElement).value?.trim();
1231
+ if (value) return value.slice(0, 80);
1232
+ return (htmlEl.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
1233
+ };
1234
+
1235
+ const isInteractiveEl = (el: Element): boolean => {
1236
+ const tag = el.tagName.toLowerCase();
1237
+ const role = inferRole(el);
1238
+ if (["button", "input", "select", "textarea", "summary", "option"].includes(tag)) return true;
1239
+ if (tag === "a" && !!el.getAttribute("href")) return true;
1240
+ if (interactiveRoles.has(role)) return true;
1241
+ const tabindex = (el as HTMLElement).tabIndex;
1242
+ if (tabindex >= 0) return true;
1243
+ if ((el as HTMLElement).isContentEditable) return true;
1244
+ return false;
1245
+ };
1246
+
1247
+ const cssPath = (el: Element): string => {
1248
+ const htmlEl = el as HTMLElement;
1249
+ if (htmlEl.id) return `#${CSS.escape(htmlEl.id)}`;
1250
+ const parts: string[] = [];
1251
+ let current: Element | null = el;
1252
+ while (current && current.nodeType === Node.ELEMENT_NODE && current !== document.body) {
1253
+ const tag = current.tagName.toLowerCase();
1254
+ let part = tag;
1255
+ const parent = current.parentElement;
1256
+ if (parent) {
1257
+ const siblings = Array.from(parent.children).filter((c) => c.tagName === current!.tagName);
1258
+ if (siblings.length > 1) {
1259
+ const idx = siblings.indexOf(current) + 1;
1260
+ part += `:nth-of-type(${idx})`;
1261
+ }
1262
+ }
1263
+ parts.unshift(part);
1264
+ current = current.parentElement;
1265
+ }
1266
+ return `body > ${parts.join(" > ")}`;
1267
+ };
1268
+
1269
+ const domPath = (el: Element): number[] => {
1270
+ const path: number[] = [];
1271
+ let current: Element | null = el;
1272
+ while (current && current !== document.documentElement) {
1273
+ const parent = current.parentElement;
1274
+ if (!parent) break;
1275
+ const idx = Array.from(parent.children).indexOf(current);
1276
+ path.unshift(idx);
1277
+ current = parent;
1278
+ }
1279
+ return path;
1280
+ };
1281
+
1282
+ const selectorHints = (el: Element): string[] => {
1283
+ const hints: string[] = [];
1284
+ const htmlEl = el as HTMLElement;
1285
+ if (htmlEl.id) hints.push(`#${CSS.escape(htmlEl.id)}`);
1286
+ const nameAttr = htmlEl.getAttribute("name");
1287
+ if (nameAttr) hints.push(`${el.tagName.toLowerCase()}[name="${CSS.escape(nameAttr)}"]`);
1288
+ const aria = htmlEl.getAttribute("aria-label");
1289
+ if (aria) hints.push(`${el.tagName.toLowerCase()}[aria-label="${CSS.escape(aria)}"]`);
1290
+ const placeholder = htmlEl.getAttribute("placeholder");
1291
+ if (placeholder) hints.push(`${el.tagName.toLowerCase()}[placeholder="${CSS.escape(placeholder)}"]`);
1292
+ const cls = Array.from(el.classList).slice(0, 2);
1293
+ if (cls.length > 0) hints.push(`${el.tagName.toLowerCase()}.${cls.map((c) => CSS.escape(c)).join(".")}`);
1294
+ hints.push(cssPath(el));
1295
+ return Array.from(new Set(hints)).slice(0, 6);
1296
+ };
1297
+
1298
+ // Mode-based element matching — used when a snapshot mode config is provided
1299
+ const matchesMode = (el: Element, cfg: { tags: string[]; roles: string[]; selectors: string[]; ariaAttributes: string[] }): boolean => {
1300
+ const tag = el.tagName.toLowerCase();
1301
+ if (cfg.tags.length > 0 && cfg.tags.includes(tag)) return true;
1302
+ const role = inferRole(el);
1303
+ if (cfg.roles.length > 0 && cfg.roles.includes(role)) return true;
1304
+ for (const sel of cfg.selectors) {
1305
+ try { if (el.matches(sel)) return true; } catch { /* invalid selector, skip */ }
1306
+ }
1307
+ for (const attr of cfg.ariaAttributes) {
1308
+ if (el.hasAttribute(attr)) return true;
1309
+ }
1310
+ return false;
1311
+ };
1312
+
1313
+ let elements = Array.from(root.querySelectorAll("*"));
1314
+
1315
+ if (mc) {
1316
+ // Mode takes precedence over interactiveOnly
1317
+ if (mc.visibleOnly) {
1318
+ // visible_only mode: include all elements that are visible
1319
+ elements = elements.filter((el) => isVisible(el));
1320
+ } else if (mc.useInteractiveFilter) {
1321
+ // interactive mode: reuse existing isInteractiveEl
1322
+ elements = elements.filter((el) => isInteractiveEl(el));
1323
+ } else if (mc.containerExpand) {
1324
+ // Container-expanding modes (dialog, errors): match containers, then include
1325
+ // all interactive children of those containers, plus the containers themselves
1326
+ const containers: Element[] = [];
1327
+ const directMatches: Element[] = [];
1328
+ for (const el of elements) {
1329
+ if (matchesMode(el, mc)) {
1330
+ // Check if this is a container element (has children)
1331
+ const childEls = el.querySelectorAll("*");
1332
+ if (childEls.length > 0) {
1333
+ containers.push(el);
1334
+ } else {
1335
+ directMatches.push(el);
1336
+ }
1337
+ }
1338
+ }
1339
+ // Collect container elements + all interactive children inside containers
1340
+ const result = new Set<Element>(directMatches);
1341
+ for (const container of containers) {
1342
+ result.add(container);
1343
+ const children = Array.from(container.querySelectorAll("*"));
1344
+ for (const child of children) {
1345
+ if (isInteractiveEl(child)) result.add(child);
1346
+ }
1347
+ }
1348
+ elements = Array.from(result);
1349
+ } else {
1350
+ // Standard mode filtering by tag/role/selector/ariaAttribute
1351
+ elements = elements.filter((el) => matchesMode(el, mc));
1352
+ }
1353
+ } else if (!interactiveOnly) {
1354
+ if (root instanceof Element) elements.unshift(root);
1355
+ } else {
1356
+ elements = elements.filter((el) => isInteractiveEl(el));
1357
+ }
1358
+
1359
+ const seen = new Set<Element>();
1360
+ const unique = elements.filter((el) => {
1361
+ if (seen.has(el)) return false;
1362
+ seen.add(el);
1363
+ return true;
1364
+ });
1365
+
1366
+ // Fingerprint helpers — computed for each element in the snapshot
1367
+ const computeNearestHeading = (el: Element): string => {
1368
+ const headingTags = new Set(["H1", "H2", "H3", "H4", "H5", "H6"]);
1369
+ // Walk up ancestors looking for heading or preceding-sibling heading
1370
+ let current: Element | null = el;
1371
+ while (current && current !== document.body) {
1372
+ // Check preceding siblings of current
1373
+ let sib: Element | null = current.previousElementSibling;
1374
+ while (sib) {
1375
+ if (headingTags.has(sib.tagName) || sib.getAttribute("role") === "heading") {
1376
+ return (sib.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
1377
+ }
1378
+ sib = sib.previousElementSibling;
1379
+ }
1380
+ // Check if the parent itself is a heading (unlikely but possible)
1381
+ const parent = current.parentElement;
1382
+ if (parent && (headingTags.has(parent.tagName) || parent.getAttribute("role") === "heading")) {
1383
+ return (parent.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80);
1384
+ }
1385
+ current = parent;
1386
+ }
1387
+ return "";
1388
+ };
1389
+
1390
+ const computeFormOwnership = (el: Element): string => {
1391
+ // Check form attribute (explicit form association)
1392
+ const formAttr = el.getAttribute("form");
1393
+ if (formAttr) return formAttr;
1394
+ // Walk up ancestors looking for <form>
1395
+ let current: Element | null = el.parentElement;
1396
+ while (current && current !== document.body) {
1397
+ if (current.tagName === "FORM") {
1398
+ return (current as HTMLFormElement).id || (current as HTMLFormElement).name || "form";
1399
+ }
1400
+ current = current.parentElement;
1401
+ }
1402
+ return "";
1403
+ };
1404
+
1405
+ return unique.slice(0, limit).map((el) => {
1406
+ const tag = el.tagName.toLowerCase();
1407
+ const role = inferRole(el);
1408
+ const textContent = (el.textContent || "").trim().replace(/\s+/g, " ").slice(0, 200);
1409
+ const childTags = Array.from(el.children).map((c) => c.tagName.toLowerCase());
1410
+
1411
+ return {
1412
+ tag,
1413
+ role,
1414
+ name: accessibleName(el),
1415
+ selectorHints: selectorHints(el),
1416
+ isVisible: isVisible(el),
1417
+ isEnabled: isEnabled(el),
1418
+ xpathOrPath: cssPath(el),
1419
+ href: el.getAttribute("href") || undefined,
1420
+ type: el.getAttribute("type") || undefined,
1421
+ path: domPath(el),
1422
+ contentHash: simpleHash(textContent),
1423
+ structuralSignature: simpleHash(`${tag}|${role}|${childTags.join(",")}`),
1424
+ nearestHeading: computeNearestHeading(el),
1425
+ formOwnership: computeFormOwnership(el),
1426
+ };
1427
+ });
1428
+ }, { ...options, modeConfig });
1429
+ }
1430
+
1431
+ async function resolveRefTarget(
1432
+ target: Page | Frame,
1433
+ node: RefNode
1434
+ ): Promise<{ ok: true; selector: string } | { ok: false; reason: string }> {
1435
+ return await target.evaluate((refNode) => {
1436
+ const cssPath = (el: Element): string => {
1437
+ const htmlEl = el as HTMLElement;
1438
+ if (htmlEl.id) return `#${CSS.escape(htmlEl.id)}`;
1439
+ const parts: string[] = [];
1440
+ let current: Element | null = el;
1441
+ while (current && current.nodeType === Node.ELEMENT_NODE && current !== document.body) {
1442
+ const tag = current.tagName.toLowerCase();
1443
+ let part = tag;
1444
+ const parent = current.parentElement;
1445
+ if (parent) {
1446
+ const siblings = Array.from(parent.children).filter((c) => c.tagName === current!.tagName);
1447
+ if (siblings.length > 1) {
1448
+ const idx = siblings.indexOf(current) + 1;
1449
+ part += `:nth-of-type(${idx})`;
1450
+ }
1451
+ }
1452
+ parts.unshift(part);
1453
+ current = current.parentElement;
1454
+ }
1455
+ return `body > ${parts.join(" > ")}`;
1456
+ };
1457
+
1458
+ // djb2 hash — must match the algorithm in core.js and buildRefSnapshot
1459
+ const simpleHash = (str: string): string => {
1460
+ if (!str) return "0";
1461
+ let h = 5381;
1462
+ for (let i = 0; i < str.length; i++) {
1463
+ h = ((h << 5) - h + str.charCodeAt(i)) | 0;
1464
+ }
1465
+ return (h >>> 0).toString(16);
1466
+ };
1467
+
1468
+ const byPath = (): Element | null => {
1469
+ let current: Element | null = document.documentElement;
1470
+ for (const idx of refNode.path || []) {
1471
+ if (!current || idx < 0 || idx >= current.children.length) return null;
1472
+ current = current.children[idx] as Element;
1473
+ }
1474
+ return current;
1475
+ };
1476
+
1477
+ const nodeName = (el: Element): string => {
1478
+ return (
1479
+ el.getAttribute("aria-label")?.trim() ||
1480
+ (el as HTMLInputElement).value?.trim() ||
1481
+ el.getAttribute("placeholder")?.trim() ||
1482
+ (el.textContent || "").trim().replace(/\s+/g, " ").slice(0, 80)
1483
+ );
1484
+ };
1485
+
1486
+ // Tier 1: path-based resolution
1487
+ const pathEl = byPath();
1488
+ if (pathEl && pathEl.tagName.toLowerCase() === refNode.tag) {
1489
+ return { ok: true as const, selector: cssPath(pathEl) };
1490
+ }
1491
+
1492
+ // Tier 2: selector hints
1493
+ for (const hint of refNode.selectorHints || []) {
1494
+ try {
1495
+ const el = document.querySelector(hint);
1496
+ if (!el) continue;
1497
+ if (el.tagName.toLowerCase() !== refNode.tag) continue;
1498
+ return { ok: true as const, selector: cssPath(el) };
1499
+ } catch {
1500
+ // ignore malformed selector hint
1501
+ }
1502
+ }
1503
+
1504
+ // Tier 3: role + name match
1505
+ const candidates = Array.from(document.querySelectorAll(refNode.tag));
1506
+ const target = candidates.find((el) => {
1507
+ const role = el.getAttribute("role") || "";
1508
+ const name = nodeName(el);
1509
+ const roleMatch = !refNode.role || role === refNode.role;
1510
+ const nameMatch = !!refNode.name && name.toLowerCase() === refNode.name.toLowerCase();
1511
+ return roleMatch && nameMatch;
1512
+ });
1513
+ if (target) {
1514
+ return { ok: true as const, selector: cssPath(target) };
1515
+ }
1516
+
1517
+ // Tier 4: structural signature + content hash fingerprint matching
1518
+ if (refNode.contentHash && refNode.structuralSignature) {
1519
+ const fpMatches: Element[] = [];
1520
+ for (const candidate of candidates) {
1521
+ const tag = candidate.tagName.toLowerCase();
1522
+ const role = candidate.getAttribute("role") || "";
1523
+ const textContent = (candidate.textContent || "").trim().replace(/\s+/g, " ").slice(0, 200);
1524
+ const childTags = Array.from(candidate.children).map((c) => c.tagName.toLowerCase());
1525
+ const candidateContentHash = simpleHash(textContent);
1526
+ const candidateStructSig = simpleHash(`${tag}|${role}|${childTags.join(",")}`);
1527
+ if (candidateContentHash === refNode.contentHash && candidateStructSig === refNode.structuralSignature) {
1528
+ fpMatches.push(candidate);
1529
+ }
1530
+ }
1531
+ if (fpMatches.length === 1) {
1532
+ return { ok: true as const, selector: cssPath(fpMatches[0]) };
1533
+ }
1534
+ if (fpMatches.length > 1) {
1535
+ return { ok: false as const, reason: "multiple fingerprint matches — ambiguous" };
1536
+ }
1537
+ }
1538
+
1539
+ return { ok: false as const, reason: "element not found in current DOM" };
1540
+ }, node);
1541
+ }
1542
+
1543
+ // ---------------------------------------------------------------------------
1544
+ // Extension entry point
1545
+ // ---------------------------------------------------------------------------
1546
+
1547
+ export default function (pi: ExtensionAPI) {
1548
+ // Notify on load
1549
+ // Browser tools announce via tool errors if playwright is missing — no need for startup noise
1550
+
1551
+ // Clean up on exit
1552
+ pi.on("session_shutdown", async () => {
1553
+ await closeBrowser();
1554
+ });
1555
+
1556
+ // -------------------------------------------------------------------------
1557
+ // browser_navigate
1558
+ // -------------------------------------------------------------------------
1559
+ pi.registerTool({
1560
+ name: "browser_navigate",
1561
+ label: "Browser Navigate",
1562
+ description:
1563
+ "Open the browser (if not already open) and navigate to a URL. Waits for network idle. Returns page title and current URL. Use ONLY for visually verifying locally-running web apps (e.g. http://localhost:3000). Do NOT use for documentation sites, GitHub, search results, or any external URL — use web_search instead.",
1564
+ parameters: Type.Object({
1565
+ url: Type.String({ description: "URL to navigate to, e.g. http://localhost:3000" }),
1566
+ }),
1567
+
1568
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
1569
+ let actionId: number | null = null;
1570
+ let beforeState: CompactPageState | null = null;
1571
+ try {
1572
+ const { page: p } = await ensureBrowser();
1573
+ beforeState = await captureCompactPageState(p, { includeBodyText: true });
1574
+ actionId = beginTrackedAction("browser_navigate", params, beforeState.url).id;
1575
+ // Fast load + best-effort network settle (won't hang on WebSockets/polling)
1576
+ await p.goto(params.url, { waitUntil: "domcontentloaded", timeout: 30000 });
1577
+ await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
1578
+ await new Promise(resolve => setTimeout(resolve, 300));
1579
+
1580
+ const title = await p.title();
1581
+ const url = p.url();
1582
+ const viewport = p.viewportSize();
1583
+ const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
1584
+ const summary = await postActionSummary(p);
1585
+ const jsErrors = getRecentErrors(p.url());
1586
+ const afterState = await captureCompactPageState(p, { includeBodyText: true });
1587
+ const diff = diffCompactStates(beforeState, afterState);
1588
+ lastActionBeforeState = beforeState;
1589
+ lastActionAfterState = afterState;
1590
+ finishTrackedAction(actionId, {
1591
+ status: "success",
1592
+ afterUrl: afterState.url,
1593
+ warningSummary: jsErrors.trim() || undefined,
1594
+ diffSummary: diff.summary,
1595
+ changed: diff.changed,
1596
+ beforeState,
1597
+ afterState,
1598
+ });
1599
+
1600
+ let screenshotContent: any[] = [];
1601
+ try {
1602
+ const buf = await p.screenshot({ type: "jpeg", quality: 80 });
1603
+ screenshotContent = [{ type: "image", data: buf.toString("base64"), mimeType: "image/jpeg" }];
1604
+ } catch {}
1605
+
1606
+ return {
1607
+ content: [
1608
+ { type: "text", text: `Navigated to: ${url}\nTitle: ${title}\nViewport: ${vpText}\nAction: ${actionId}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}` },
1609
+ ...screenshotContent,
1610
+ ],
1611
+ details: { title, url, status: "loaded", viewport: vpText, actionId, diff },
1612
+ };
1613
+ } catch (err: any) {
1614
+ if (actionId !== null) {
1615
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
1616
+ }
1617
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
1618
+ const content: any[] = [{ type: "text", text: `Navigation failed: ${err.message}` }];
1619
+ if (errorShot) {
1620
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
1621
+ }
1622
+ return {
1623
+ content,
1624
+ details: { status: "error", error: err.message, actionId },
1625
+ isError: true,
1626
+ };
1627
+ }
1628
+ },
1629
+ });
1630
+
1631
+ // -------------------------------------------------------------------------
1632
+ // browser_go_back
1633
+ // -------------------------------------------------------------------------
1634
+ pi.registerTool({
1635
+ name: "browser_go_back",
1636
+ label: "Browser Go Back",
1637
+ description: "Navigate back in browser history. Returns a compact page summary after navigation.",
1638
+ parameters: Type.Object({}),
1639
+
1640
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
1641
+ try {
1642
+ const { page: p } = await ensureBrowser();
1643
+ const response = await p.goBack({ waitUntil: "domcontentloaded", timeout: 10000 });
1644
+
1645
+ if (!response) {
1646
+ return {
1647
+ content: [{ type: "text", text: "No previous page in history." }],
1648
+ details: {},
1649
+ isError: true,
1650
+ };
1651
+ }
1652
+
1653
+ await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
1654
+
1655
+ const title = await p.title();
1656
+ const url = p.url();
1657
+ const summary = await postActionSummary(p);
1658
+ const jsErrors = getRecentErrors(p.url());
1659
+
1660
+ return {
1661
+ content: [{ type: "text", text: `Navigated back to: ${url}\nTitle: ${title}${jsErrors}\n\nPage summary:\n${summary}` }],
1662
+ details: { title, url },
1663
+ };
1664
+ } catch (err: any) {
1665
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
1666
+ const content: any[] = [{ type: "text", text: `Go back failed: ${err.message}` }];
1667
+ if (errorShot) {
1668
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
1669
+ }
1670
+ return { content, details: { error: err.message }, isError: true };
1671
+ }
1672
+ },
1673
+ });
1674
+
1675
+ // -------------------------------------------------------------------------
1676
+ // browser_go_forward
1677
+ // -------------------------------------------------------------------------
1678
+ pi.registerTool({
1679
+ name: "browser_go_forward",
1680
+ label: "Browser Go Forward",
1681
+ description: "Navigate forward in browser history. Returns a compact page summary after navigation.",
1682
+ parameters: Type.Object({}),
1683
+
1684
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
1685
+ try {
1686
+ const { page: p } = await ensureBrowser();
1687
+ const response = await p.goForward({ waitUntil: "domcontentloaded", timeout: 10000 });
1688
+
1689
+ if (!response) {
1690
+ return {
1691
+ content: [{ type: "text", text: "No forward page in history." }],
1692
+ details: {},
1693
+ isError: true,
1694
+ };
1695
+ }
1696
+
1697
+ await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
1698
+
1699
+ const title = await p.title();
1700
+ const url = p.url();
1701
+ const summary = await postActionSummary(p);
1702
+ const jsErrors = getRecentErrors(p.url());
1703
+
1704
+ return {
1705
+ content: [{ type: "text", text: `Navigated forward to: ${url}\nTitle: ${title}${jsErrors}\n\nPage summary:\n${summary}` }],
1706
+ details: { title, url },
1707
+ };
1708
+ } catch (err: any) {
1709
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
1710
+ const content: any[] = [{ type: "text", text: `Go forward failed: ${err.message}` }];
1711
+ if (errorShot) {
1712
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
1713
+ }
1714
+ return { content, details: { error: err.message }, isError: true };
1715
+ }
1716
+ },
1717
+ });
1718
+
1719
+ // -------------------------------------------------------------------------
1720
+ // browser_reload
1721
+ // -------------------------------------------------------------------------
1722
+ pi.registerTool({
1723
+ name: "browser_reload",
1724
+ label: "Browser Reload",
1725
+ description: "Reload the current page. Returns a screenshot, compact page summary, and page metadata (same shape as browser_navigate).",
1726
+ parameters: Type.Object({}),
1727
+
1728
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
1729
+ try {
1730
+ const { page: p } = await ensureBrowser();
1731
+ await p.reload({ waitUntil: "domcontentloaded", timeout: 30000 });
1732
+ await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
1733
+
1734
+ const title = await p.title();
1735
+ const url = p.url();
1736
+ const viewport = p.viewportSize();
1737
+ const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
1738
+ const summary = await postActionSummary(p);
1739
+ const jsErrors = getRecentErrors(p.url());
1740
+
1741
+ // Include screenshot like navigate does
1742
+ let screenshotContent: any[] = [];
1743
+ try {
1744
+ const buf = await p.screenshot({ type: "jpeg", quality: 80 });
1745
+ screenshotContent = [{
1746
+ type: "image",
1747
+ data: buf.toString("base64"),
1748
+ mimeType: "image/jpeg",
1749
+ }];
1750
+ } catch {}
1751
+
1752
+ return {
1753
+ content: [
1754
+ {
1755
+ type: "text",
1756
+ text: `Reloaded: ${url}\nTitle: ${title}\nViewport: ${vpText}${jsErrors}\n\nPage summary:\n${summary}`,
1757
+ },
1758
+ ...screenshotContent,
1759
+ ],
1760
+ details: { title, url, viewport: vpText },
1761
+ };
1762
+ } catch (err: any) {
1763
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
1764
+ const content: any[] = [{ type: "text", text: `Reload failed: ${err.message}` }];
1765
+ if (errorShot) {
1766
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
1767
+ }
1768
+ return { content, details: { error: err.message }, isError: true };
1769
+ }
1770
+ },
1771
+ });
1772
+
1773
+ // -------------------------------------------------------------------------
1774
+ // browser_screenshot
1775
+ // -------------------------------------------------------------------------
1776
+ pi.registerTool({
1777
+ name: "browser_screenshot",
1778
+ label: "Browser Screenshot",
1779
+ description:
1780
+ "Take a screenshot of the current browser page and return it as an inline image. Uses JPEG for viewport/fullpage (smaller, configurable quality) and PNG for element crops (preserves transparency). Optionally crop to a specific element by CSS selector.",
1781
+ parameters: Type.Object({
1782
+ fullPage: Type.Optional(
1783
+ Type.Boolean({ description: "Capture the full scrollable page (default: false)" })
1784
+ ),
1785
+ selector: Type.Optional(
1786
+ Type.String({
1787
+ description:
1788
+ "CSS selector of a specific element to screenshot (crops to that element's bounding box). If omitted, screenshots the entire viewport.",
1789
+ })
1790
+ ),
1791
+ quality: Type.Optional(
1792
+ Type.Number({
1793
+ description:
1794
+ "JPEG quality 1-100 (default: 80). Only applies to viewport/fullpage screenshots, not element crops. Lower = smaller image.",
1795
+ })
1796
+ ),
1797
+ }),
1798
+
1799
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
1800
+ try {
1801
+ const { page: p } = await ensureBrowser();
1802
+
1803
+ let screenshotBuffer: Buffer;
1804
+ let mimeType: string;
1805
+
1806
+ if (params.selector) {
1807
+ // Element screenshots: keep PNG (may have transparency)
1808
+ const locator = p.locator(params.selector).first();
1809
+ screenshotBuffer = await locator.screenshot({ type: "png" });
1810
+ mimeType = "image/png";
1811
+ } else {
1812
+ // Viewport/fullpage: use JPEG (3-5x smaller, fine for AI analysis)
1813
+ const quality = params.quality ?? 80;
1814
+ screenshotBuffer = await p.screenshot({
1815
+ fullPage: params.fullPage ?? false,
1816
+ type: "jpeg",
1817
+ quality,
1818
+ });
1819
+ mimeType = "image/jpeg";
1820
+ }
1821
+
1822
+ const base64Data = screenshotBuffer.toString("base64");
1823
+ const title = await p.title();
1824
+ const url = p.url();
1825
+ const viewport = p.viewportSize();
1826
+ const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
1827
+ const scope = params.selector ? `element "${params.selector}"` : params.fullPage ? "full page" : "viewport";
1828
+
1829
+ return {
1830
+ content: [
1831
+ {
1832
+ type: "text",
1833
+ text: `Screenshot of ${scope}.\nPage: ${title}\nURL: ${url}\nViewport: ${vpText}`,
1834
+ },
1835
+ {
1836
+ type: "image",
1837
+ data: base64Data,
1838
+ mimeType,
1839
+ },
1840
+ ],
1841
+ details: { title, url, scope, viewport: vpText },
1842
+ };
1843
+ } catch (err: any) {
1844
+ return {
1845
+ content: [{ type: "text", text: `Screenshot failed: ${err.message}` }],
1846
+ details: { error: err.message },
1847
+ isError: true,
1848
+ };
1849
+ }
1850
+ },
1851
+ });
1852
+
1853
+ // -------------------------------------------------------------------------
1854
+ // browser_click
1855
+ // -------------------------------------------------------------------------
1856
+ pi.registerTool({
1857
+ name: "browser_click",
1858
+ label: "Browser Click",
1859
+ description:
1860
+ "Click an element on the page by CSS selector or by x,y coordinates. Returns a compact page summary plus lightweight verification details after clicking. Provide either selector or both x and y. Prefer selector over coordinates — selectors are more reliable because they handle shadow DOM via getByRole fallbacks. Use coordinates only when you have no other option.",
1861
+ parameters: Type.Object({
1862
+ selector: Type.Optional(
1863
+ Type.String({ description: "CSS selector of the element to click. The tool will try getByRole fallbacks if the CSS selector fails (handles shadow DOM)." })
1864
+ ),
1865
+ x: Type.Optional(Type.Number({ description: "X coordinate to click" })),
1866
+ y: Type.Optional(Type.Number({ description: "Y coordinate to click" })),
1867
+ }),
1868
+
1869
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
1870
+ let actionId: number | null = null;
1871
+ let beforeState: CompactPageState | null = null;
1872
+ try {
1873
+ const { page: p } = await ensureBrowser();
1874
+ const target = getActiveTarget();
1875
+ beforeState = await captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
1876
+ actionId = beginTrackedAction("browser_click", params, beforeState.url).id;
1877
+ const beforeUrl = p.url();
1878
+ const beforeHash = getUrlHash(beforeUrl);
1879
+ const beforeDialogCount = await countOpenDialogs(target);
1880
+ const beforeTargetState = params.selector
1881
+ ? await captureClickTargetState(target, params.selector)
1882
+ : null;
1883
+
1884
+ if (params.selector) {
1885
+ // Try CSS selector first (5s). If it times out or the element is in
1886
+ // shadow DOM (e.g. Google search), fall back to getByRole which
1887
+ // pierces shadow DOM automatically.
1888
+ try {
1889
+ await target.locator(params.selector).first().click({ timeout: 5000 });
1890
+ } catch {
1891
+ // Extract accessible name hint from the selector if present
1892
+ const nameMatch = params.selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
1893
+ const roleName = nameMatch?.[1];
1894
+ let clicked = false;
1895
+ for (const role of ["combobox", "searchbox", "textbox", "button", "link"] as const) {
1896
+ try {
1897
+ const loc = roleName
1898
+ ? target.getByRole(role, { name: new RegExp(roleName, "i") })
1899
+ : target.getByRole(role);
1900
+ await loc.first().click({ timeout: 3000 });
1901
+ clicked = true;
1902
+ break;
1903
+ } catch { /* try next role */ }
1904
+ }
1905
+ if (!clicked) {
1906
+ // Absolute last resort: coordinate click (mouse is page-level)
1907
+ if (params.x !== undefined && params.y !== undefined) {
1908
+ await p.mouse.click(params.x, params.y);
1909
+ } else {
1910
+ throw new Error(`Could not click selector "${params.selector}" — element not found (shadow DOM?)`);
1911
+ }
1912
+ }
1913
+ }
1914
+ } else if (params.x !== undefined && params.y !== undefined) {
1915
+ await p.mouse.click(params.x, params.y);
1916
+ } else {
1917
+ return {
1918
+ content: [
1919
+ {
1920
+ type: "text",
1921
+ text: "Must provide either selector or both x and y coordinates",
1922
+ },
1923
+ ],
1924
+ details: {},
1925
+ isError: true,
1926
+ };
1927
+ }
1928
+
1929
+ const settle = await settleAfterActionAdaptive(p);
1930
+
1931
+ const url = p.url();
1932
+ const hash = getUrlHash(url);
1933
+ const afterDialogCount = await countOpenDialogs(target);
1934
+ const afterTargetState = params.selector
1935
+ ? await captureClickTargetState(target, params.selector)
1936
+ : null;
1937
+ const targetStateChanged = !!beforeTargetState && !!afterTargetState && (
1938
+ beforeTargetState.exists !== afterTargetState.exists ||
1939
+ beforeTargetState.ariaExpanded !== afterTargetState.ariaExpanded ||
1940
+ beforeTargetState.ariaPressed !== afterTargetState.ariaPressed ||
1941
+ beforeTargetState.ariaSelected !== afterTargetState.ariaSelected ||
1942
+ beforeTargetState.open !== afterTargetState.open
1943
+ );
1944
+ const verification = verificationFromChecks(
1945
+ [
1946
+ { name: "url_changed", passed: url !== beforeUrl, value: url, expected: `!= ${beforeUrl}` },
1947
+ { name: "hash_changed", passed: hash !== beforeHash, value: hash, expected: `!= ${beforeHash}` },
1948
+ { name: "target_state_changed", passed: targetStateChanged, value: afterTargetState, expected: beforeTargetState },
1949
+ { name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
1950
+ ],
1951
+ "Try a more specific selector or click a clearly interactive element."
1952
+ );
1953
+ const clickTarget = params.selector ?? `(${params.x}, ${params.y})`;
1954
+ const summary = await postActionSummary(p, target);
1955
+ const jsErrors = getRecentErrors(p.url());
1956
+ const afterState = await captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
1957
+ const diff = diffCompactStates(beforeState!, afterState);
1958
+ lastActionBeforeState = beforeState!;
1959
+ lastActionAfterState = afterState;
1960
+ finishTrackedAction(actionId!, {
1961
+ status: "success",
1962
+ afterUrl: afterState.url,
1963
+ verificationSummary: verification.verificationSummary,
1964
+ warningSummary: jsErrors.trim() || undefined,
1965
+ diffSummary: diff.summary,
1966
+ changed: diff.changed,
1967
+ beforeState: beforeState!,
1968
+ afterState,
1969
+ });
1970
+
1971
+ return {
1972
+ content: [{ type: "text", text: `Clicked: ${clickTarget}\nURL: ${url}\nAction: ${actionId}\n${verificationLine(verification)}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
1973
+ details: { target: clickTarget, url, actionId, diff, ...settle, ...verification },
1974
+ };
1975
+ } catch (err: any) {
1976
+ if (actionId !== null) {
1977
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
1978
+ }
1979
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
1980
+ const content: any[] = [{ type: "text", text: `Click failed: ${err.message}` }];
1981
+ if (errorShot) {
1982
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
1983
+ }
1984
+ return {
1985
+ content,
1986
+ details: { error: err.message },
1987
+ isError: true,
1988
+ };
1989
+ }
1990
+ },
1991
+ });
1992
+
1993
+ // -------------------------------------------------------------------------
1994
+ // browser_drag
1995
+ // -------------------------------------------------------------------------
1996
+ pi.registerTool({
1997
+ name: "browser_drag",
1998
+ label: "Browser Drag",
1999
+ description:
2000
+ "Drag an element and drop it onto another element. Use for sortable lists, kanban boards, sliders, and any drag-and-drop UI.",
2001
+ parameters: Type.Object({
2002
+ sourceSelector: Type.String({
2003
+ description: "CSS selector of the element to drag",
2004
+ }),
2005
+ targetSelector: Type.String({
2006
+ description: "CSS selector of the element to drop onto",
2007
+ }),
2008
+ }),
2009
+
2010
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2011
+ try {
2012
+ const { page: p } = await ensureBrowser();
2013
+ const target = getActiveTarget();
2014
+ await target.dragAndDrop(params.sourceSelector, params.targetSelector, { timeout: 10000 });
2015
+ const settle = await settleAfterActionAdaptive(p);
2016
+
2017
+ const summary = await postActionSummary(p, target);
2018
+ const jsErrors = getRecentErrors(p.url());
2019
+
2020
+ return {
2021
+ content: [{
2022
+ type: "text",
2023
+ text: `Dragged "${params.sourceSelector}" → "${params.targetSelector}"${jsErrors}\n\nPage summary:\n${summary}`,
2024
+ }],
2025
+ details: { source: params.sourceSelector, target: params.targetSelector, ...settle },
2026
+ };
2027
+ } catch (err: any) {
2028
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
2029
+ const content: any[] = [{ type: "text", text: `Drag failed: ${err.message}` }];
2030
+ if (errorShot) {
2031
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
2032
+ }
2033
+ return { content, details: { error: err.message }, isError: true };
2034
+ }
2035
+ },
2036
+ });
2037
+
2038
+ // -------------------------------------------------------------------------
2039
+ // browser_type
2040
+ // -------------------------------------------------------------------------
2041
+ pi.registerTool({
2042
+ name: "browser_type",
2043
+ label: "Browser Type",
2044
+ description:
2045
+ "Type text into an input element. By default uses atomic fill (clears and sets value instantly). Use 'slowly' for character-by-character typing when you need to trigger key handlers (e.g. search autocomplete). Use 'submit' to press Enter after typing. Returns a compact page summary plus lightweight verification details. IMPORTANT: Always provide a selector — do NOT rely on coordinate clicks to focus an input before calling this. CSS attribute selectors like combobox[aria-label='X'] work for most inputs; for shadow DOM inputs (e.g. Google Search), the tool automatically tries getByRole fallbacks.",
2046
+ parameters: Type.Object({
2047
+ text: Type.String({ description: "Text to type" }),
2048
+ selector: Type.Optional(
2049
+ Type.String({ description: "CSS selector of the input to type into (clicks it first). Examples: 'input[name=q]', 'textarea', 'combobox[aria-label=\"Search\"]'. The tool will try getByRole fallbacks if the CSS selector fails." })
2050
+ ),
2051
+ clearFirst: Type.Optional(
2052
+ Type.Boolean({
2053
+ description:
2054
+ "Clear the input's existing value before typing (default: false). Use this when replacing existing text.",
2055
+ })
2056
+ ),
2057
+ submit: Type.Optional(
2058
+ Type.Boolean({
2059
+ description: "Press Enter after typing to submit the form (default: false).",
2060
+ })
2061
+ ),
2062
+ slowly: Type.Optional(
2063
+ Type.Boolean({
2064
+ description:
2065
+ "Type one character at a time instead of filling atomically. Use when you need to trigger key handlers (e.g. search autocomplete). Default: false.",
2066
+ })
2067
+ ),
2068
+ }),
2069
+
2070
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2071
+ let actionId: number | null = null;
2072
+ let beforeState: CompactPageState | null = null;
2073
+ try {
2074
+ const { page: p } = await ensureBrowser();
2075
+ const target = getActiveTarget();
2076
+ beforeState = await captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
2077
+ actionId = beginTrackedAction("browser_type", params, beforeState.url).id;
2078
+ const beforeUrl = p.url();
2079
+
2080
+ /** Helper: use getByRole fallbacks when CSS selector fails (shadow DOM etc.) */
2081
+ async function focusViaRole(selector: string): Promise<boolean> {
2082
+ const nameMatch = selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
2083
+ const roleName = nameMatch?.[1];
2084
+ for (const role of ["combobox", "searchbox", "textbox"] as const) {
2085
+ try {
2086
+ const loc = roleName
2087
+ ? target.getByRole(role, { name: new RegExp(roleName, "i") })
2088
+ : target.getByRole(role);
2089
+ await loc.first().click({ timeout: 3000 });
2090
+ return true;
2091
+ } catch { /* try next */ }
2092
+ }
2093
+ return false;
2094
+ }
2095
+
2096
+ if (params.selector) {
2097
+ if (params.slowly) {
2098
+ // Character-by-character with shadow DOM fallback
2099
+ let focused = false;
2100
+ try {
2101
+ await target.locator(params.selector).first().click({ timeout: 5000 });
2102
+ focused = true;
2103
+ } catch {
2104
+ focused = await focusViaRole(params.selector);
2105
+ }
2106
+ if (!focused) throw new Error(`Could not focus selector "${params.selector}"`);
2107
+ if (params.clearFirst) {
2108
+ await p.keyboard.press("Control+A");
2109
+ await p.keyboard.press("Delete");
2110
+ }
2111
+ await p.keyboard.type(params.text);
2112
+ } else {
2113
+ // 1. Try atomic fill (fast path — replaces value without triggering key events)
2114
+ let filled = false;
2115
+ try {
2116
+ await target.locator(params.selector).first().fill(params.text, { timeout: 5000 });
2117
+ filled = true;
2118
+ } catch { /* fall through */ }
2119
+
2120
+ if (!filled) {
2121
+ // 2. Try fill via getByRole (pierces shadow DOM)
2122
+ const nameMatch = params.selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
2123
+ const roleName = nameMatch?.[1];
2124
+ for (const role of ["combobox", "searchbox", "textbox"] as const) {
2125
+ try {
2126
+ const loc = roleName
2127
+ ? target.getByRole(role, { name: new RegExp(roleName, "i") })
2128
+ : target.getByRole(role);
2129
+ await loc.first().fill(params.text, { timeout: 3000 });
2130
+ filled = true;
2131
+ break;
2132
+ } catch { /* try next */ }
2133
+ }
2134
+ }
2135
+
2136
+ if (!filled) {
2137
+ // 3. Click to focus (with shadow DOM fallback) then pressSequentially
2138
+ // pressSequentially is more reliable than keyboard.type for complex inputs
2139
+ let focused = false;
2140
+ try {
2141
+ await target.locator(params.selector).first().click({ timeout: 5000 });
2142
+ focused = true;
2143
+ } catch {
2144
+ focused = await focusViaRole(params.selector);
2145
+ }
2146
+ if (!focused) throw new Error(`Could not focus selector "${params.selector}"`);
2147
+ if (params.clearFirst) {
2148
+ await p.keyboard.press("Control+A");
2149
+ await p.keyboard.press("Delete");
2150
+ }
2151
+ await target.locator(":focus").pressSequentially(params.text, { timeout: 5000 }).catch(() =>
2152
+ p.keyboard.type(params.text)
2153
+ );
2154
+ } else if (params.clearFirst) {
2155
+ // fill() already replaced the value; clearFirst is a no-op here
2156
+ }
2157
+ }
2158
+ } else {
2159
+ // No selector — check something is actually focused before typing
2160
+ const hasFocus = await target.evaluate(() => {
2161
+ const el = document.activeElement;
2162
+ return !!(el && el !== document.body && el !== document.documentElement);
2163
+ });
2164
+ if (!hasFocus) {
2165
+ return {
2166
+ content: [{ type: "text", text: "Type failed: no element is focused. Use browser_click to focus an input first, or provide a selector." }],
2167
+ details: { error: "no focused element" },
2168
+ isError: true,
2169
+ };
2170
+ }
2171
+ // Use pressSequentially via the focused element for reliability
2172
+ await target.locator(":focus").pressSequentially(params.text, { timeout: 10000 }).catch(() =>
2173
+ p.keyboard.type(params.text)
2174
+ );
2175
+ }
2176
+
2177
+ if (params.submit) {
2178
+ await p.keyboard.press("Enter");
2179
+ }
2180
+
2181
+ const settle = await settleAfterActionAdaptive(p);
2182
+
2183
+ const typedValue = await readInputLikeValue(target, params.selector);
2184
+ const afterUrl = p.url();
2185
+ const verification = verificationFromChecks(
2186
+ [
2187
+ { name: "value_equals_expected", passed: typedValue === params.text, value: typedValue, expected: params.text },
2188
+ { name: "value_contains_expected", passed: typeof typedValue === "string" && typedValue.includes(params.text), value: typedValue, expected: params.text },
2189
+ { name: "url_changed_after_submit", passed: !!params.submit && afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
2190
+ ],
2191
+ "Try clearFirst=true, use a more specific selector, or set slowly=true for key-driven inputs."
2192
+ );
2193
+ const typeTarget = params.selector ? ` into "${params.selector}"` : "";
2194
+ const summary = await postActionSummary(p, target);
2195
+ const jsErrors = getRecentErrors(p.url());
2196
+ const afterState = await captureCompactPageState(p, { selectors: params.selector ? [params.selector] : [], includeBodyText: true, target });
2197
+ const diff = diffCompactStates(beforeState!, afterState);
2198
+ lastActionBeforeState = beforeState!;
2199
+ lastActionAfterState = afterState;
2200
+ finishTrackedAction(actionId!, {
2201
+ status: "success",
2202
+ afterUrl: afterState.url,
2203
+ verificationSummary: verification.verificationSummary,
2204
+ warningSummary: jsErrors.trim() || undefined,
2205
+ diffSummary: diff.summary,
2206
+ changed: diff.changed,
2207
+ beforeState: beforeState!,
2208
+ afterState,
2209
+ });
2210
+
2211
+ return {
2212
+ content: [{ type: "text", text: `Typed "${params.text}"${typeTarget}\nAction: ${actionId}\n${verificationLine(verification)}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
2213
+ details: { text: params.text, selector: params.selector, typedValue, actionId, diff, ...settle, ...verification },
2214
+ };
2215
+ } catch (err: any) {
2216
+ if (actionId !== null) {
2217
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
2218
+ }
2219
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
2220
+ const content: any[] = [{ type: "text", text: `Type failed: ${err.message}` }];
2221
+ if (errorShot) {
2222
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
2223
+ }
2224
+ return {
2225
+ content,
2226
+ details: { error: err.message },
2227
+ isError: true,
2228
+ };
2229
+ }
2230
+ },
2231
+ });
2232
+
2233
+ // -------------------------------------------------------------------------
2234
+ // browser_upload_file
2235
+ // -------------------------------------------------------------------------
2236
+ pi.registerTool({
2237
+ name: "browser_upload_file",
2238
+ label: "Browser Upload File",
2239
+ description:
2240
+ "Set files on a file input element. The selector must target an <input type=\"file\"> element. Accepts one or more absolute file paths.",
2241
+ parameters: Type.Object({
2242
+ selector: Type.String({
2243
+ description: 'CSS selector targeting the <input type="file"> element',
2244
+ }),
2245
+ files: Type.Array(Type.String({ description: "Absolute path to a file" }), {
2246
+ description: "One or more file paths to upload",
2247
+ }),
2248
+ }),
2249
+
2250
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2251
+ try {
2252
+ const { page: p } = await ensureBrowser();
2253
+ const target = getActiveTarget();
2254
+ // Strip leading @ (some models add it to paths)
2255
+ const cleanFiles = params.files.map((f: string) => f.replace(/^@/, ""));
2256
+ await target.locator(params.selector).first().setInputFiles(cleanFiles);
2257
+ const settle = await settleAfterActionAdaptive(p);
2258
+
2259
+ const summary = await postActionSummary(p, target);
2260
+ const jsErrors = getRecentErrors(p.url());
2261
+
2262
+ return {
2263
+ content: [{
2264
+ type: "text",
2265
+ text: `Uploaded ${cleanFiles.length} file(s) to "${params.selector}": ${cleanFiles.join(", ")}${jsErrors}\n\nPage summary:\n${summary}`,
2266
+ }],
2267
+ details: { selector: params.selector, files: cleanFiles, ...settle },
2268
+ };
2269
+ } catch (err: any) {
2270
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
2271
+ const content: any[] = [{ type: "text", text: `Upload failed: ${err.message}` }];
2272
+ if (errorShot) {
2273
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
2274
+ }
2275
+ return { content, details: { error: err.message }, isError: true };
2276
+ }
2277
+ },
2278
+ });
2279
+
2280
+ // -------------------------------------------------------------------------
2281
+ // browser_scroll
2282
+ // -------------------------------------------------------------------------
2283
+ pi.registerTool({
2284
+ name: "browser_scroll",
2285
+ label: "Browser Scroll",
2286
+ description: "Scroll the page up or down by a given number of pixels. Returns scroll position (px and percentage) and an accessibility snapshot of the visible content.",
2287
+ parameters: Type.Object({
2288
+ direction: StringEnum(["up", "down"] as const),
2289
+ amount: Type.Optional(
2290
+ Type.Number({ description: "Pixels to scroll (default: 300)" })
2291
+ ),
2292
+ }),
2293
+
2294
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2295
+ try {
2296
+ const { page: p } = await ensureBrowser();
2297
+ const target = getActiveTarget();
2298
+ const pixels = params.amount ?? 300;
2299
+ const delta = params.direction === "up" ? -pixels : pixels;
2300
+ await p.mouse.wheel(0, delta);
2301
+
2302
+ const settle = await settleAfterActionAdaptive(p);
2303
+
2304
+ const scrollInfo = await target.evaluate(() => ({
2305
+ scrollY: Math.round(window.scrollY),
2306
+ scrollHeight: document.documentElement.scrollHeight,
2307
+ clientHeight: document.documentElement.clientHeight,
2308
+ }));
2309
+ const maxScroll = scrollInfo.scrollHeight - scrollInfo.clientHeight;
2310
+ const percent = maxScroll > 0 ? Math.round((scrollInfo.scrollY / maxScroll) * 100) : 0;
2311
+
2312
+ const summary = await postActionSummary(p, target);
2313
+ const jsErrors = getRecentErrors(p.url());
2314
+
2315
+ return {
2316
+ content: [
2317
+ {
2318
+ type: "text",
2319
+ text: `Scrolled ${params.direction} by ${pixels}px\n` +
2320
+ `Position: ${scrollInfo.scrollY}px / ${scrollInfo.scrollHeight}px (${percent}% down)\n` +
2321
+ `Viewport height: ${scrollInfo.clientHeight}px${jsErrors}\n\nPage summary:\n${summary}`,
2322
+ },
2323
+ ],
2324
+ details: { direction: params.direction, amount: pixels, ...scrollInfo, percent, ...settle },
2325
+ };
2326
+ } catch (err: any) {
2327
+ return {
2328
+ content: [{ type: "text", text: `Scroll failed: ${err.message}` }],
2329
+ details: { error: err.message },
2330
+ isError: true,
2331
+ };
2332
+ }
2333
+ },
2334
+ });
2335
+
2336
+ // -------------------------------------------------------------------------
2337
+ // browser_get_console_logs
2338
+ // -------------------------------------------------------------------------
2339
+ pi.registerTool({
2340
+ name: "browser_get_console_logs",
2341
+ label: "Browser Console Logs",
2342
+ description:
2343
+ "Get all buffered browser console logs and JavaScript errors captured since the last clear. Each entry includes timestamp and page URL. Note: JS errors are also auto-surfaced in interaction tool responses — use this for the full log.",
2344
+ parameters: Type.Object({
2345
+ clear: Type.Optional(
2346
+ Type.Boolean({
2347
+ description: "Clear the buffer after returning logs (default: true)",
2348
+ })
2349
+ ),
2350
+ }),
2351
+
2352
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2353
+ const shouldClear = params.clear !== false;
2354
+ const logs = [...consoleLogs];
2355
+
2356
+ if (shouldClear) {
2357
+ consoleLogs = [];
2358
+ }
2359
+
2360
+ if (logs.length === 0) {
2361
+ return {
2362
+ content: [{ type: "text", text: "No console logs captured." }],
2363
+ details: { logs: [], count: 0 },
2364
+ };
2365
+ }
2366
+
2367
+ const formatted = logs
2368
+ .map((entry) => {
2369
+ const time = new Date(entry.timestamp).toISOString().slice(11, 23); // HH:mm:ss.SSS
2370
+ return `[${time}] [${entry.type.toUpperCase()}] ${entry.text}`;
2371
+ })
2372
+ .join("\n");
2373
+
2374
+ const truncated = truncateText(formatted);
2375
+
2376
+ return {
2377
+ content: [
2378
+ {
2379
+ type: "text",
2380
+ text: `${logs.length} console log(s):\n\n${truncated}`,
2381
+ },
2382
+ ],
2383
+ details: { logs, count: logs.length },
2384
+ };
2385
+ },
2386
+ });
2387
+
2388
+ // -------------------------------------------------------------------------
2389
+ // browser_get_network_logs
2390
+ // -------------------------------------------------------------------------
2391
+ pi.registerTool({
2392
+ name: "browser_get_network_logs",
2393
+ label: "Browser Network Logs",
2394
+ description:
2395
+ "Get buffered network requests and responses. Shows method, URL, status code, and resource type for all requests. Includes response body for failed requests (4xx/5xx). Use to debug API failures, CORS issues, missing resources, and auth problems.",
2396
+ parameters: Type.Object({
2397
+ clear: Type.Optional(
2398
+ Type.Boolean({
2399
+ description: "Clear the buffer after returning logs (default: true)",
2400
+ })
2401
+ ),
2402
+ filter: Type.Optional(
2403
+ StringEnum(["all", "errors", "fetch-xhr"] as const)
2404
+ ),
2405
+ }),
2406
+
2407
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2408
+ const shouldClear = params.clear !== false;
2409
+ let logs = [...networkLogs];
2410
+
2411
+ if (shouldClear) {
2412
+ networkLogs = [];
2413
+ }
2414
+
2415
+ // Apply filter
2416
+ if (params.filter === "errors") {
2417
+ logs = logs.filter(e => e.failed || (e.status !== null && e.status >= 400));
2418
+ } else if (params.filter === "fetch-xhr") {
2419
+ logs = logs.filter(e => e.resourceType === "fetch" || e.resourceType === "xhr");
2420
+ }
2421
+
2422
+ if (logs.length === 0) {
2423
+ return {
2424
+ content: [{ type: "text", text: "No network requests captured." }],
2425
+ details: { logs: [], count: 0 },
2426
+ };
2427
+ }
2428
+
2429
+ const formatted = logs
2430
+ .map((entry) => {
2431
+ const time = new Date(entry.timestamp).toISOString().slice(11, 23);
2432
+ const status = entry.failed
2433
+ ? `FAILED (${entry.failureText})`
2434
+ : `${entry.status}`;
2435
+ let line = `[${time}] ${entry.method} ${entry.url} → ${status} (${entry.resourceType})`;
2436
+ if (entry.responseBody) {
2437
+ line += `\n Response: ${entry.responseBody}`;
2438
+ }
2439
+ return line;
2440
+ })
2441
+ .join("\n");
2442
+
2443
+ const truncated = truncateText(formatted);
2444
+
2445
+ return {
2446
+ content: [
2447
+ {
2448
+ type: "text",
2449
+ text: `${logs.length} network request(s):\n\n${truncated}`,
2450
+ },
2451
+ ],
2452
+ details: { count: logs.length },
2453
+ };
2454
+ },
2455
+ });
2456
+
2457
+ // -------------------------------------------------------------------------
2458
+ // browser_get_dialog_logs
2459
+ // -------------------------------------------------------------------------
2460
+ pi.registerTool({
2461
+ name: "browser_get_dialog_logs",
2462
+ label: "Browser Dialog Logs",
2463
+ description:
2464
+ "Get buffered JavaScript dialog events (alert, confirm, prompt, beforeunload). Dialogs are auto-accepted to prevent page freezes. Use this to see what dialogs appeared and their messages.",
2465
+ parameters: Type.Object({
2466
+ clear: Type.Optional(
2467
+ Type.Boolean({
2468
+ description: "Clear the buffer after returning logs (default: true)",
2469
+ })
2470
+ ),
2471
+ }),
2472
+
2473
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2474
+ const shouldClear = params.clear !== false;
2475
+ const logs = [...dialogLogs];
2476
+
2477
+ if (shouldClear) {
2478
+ dialogLogs = [];
2479
+ }
2480
+
2481
+ if (logs.length === 0) {
2482
+ return {
2483
+ content: [{ type: "text", text: "No dialog events captured." }],
2484
+ details: { logs: [], count: 0 },
2485
+ };
2486
+ }
2487
+
2488
+ const formatted = logs
2489
+ .map((entry) => {
2490
+ const time = new Date(entry.timestamp).toISOString().slice(11, 23);
2491
+ let line = `[${time}] ${entry.type}: "${entry.message}"`;
2492
+ if (entry.defaultValue) {
2493
+ line += ` (default: "${entry.defaultValue}")`;
2494
+ }
2495
+ line += ` → auto-accepted`;
2496
+ return line;
2497
+ })
2498
+ .join("\n");
2499
+
2500
+ const truncated = truncateText(formatted);
2501
+
2502
+ return {
2503
+ content: [
2504
+ {
2505
+ type: "text",
2506
+ text: `${logs.length} dialog(s):\n\n${truncated}`,
2507
+ },
2508
+ ],
2509
+ details: { logs, count: logs.length },
2510
+ };
2511
+ },
2512
+ });
2513
+
2514
+ // -------------------------------------------------------------------------
2515
+ // browser_evaluate
2516
+ // -------------------------------------------------------------------------
2517
+ pi.registerTool({
2518
+ name: "browser_evaluate",
2519
+ label: "Browser Evaluate",
2520
+ description:
2521
+ "Execute a JavaScript expression in the browser context and return the result. Useful for reading DOM state, checking values, etc.",
2522
+ parameters: Type.Object({
2523
+ expression: Type.String({
2524
+ description: "JavaScript expression to evaluate in the page context",
2525
+ }),
2526
+ }),
2527
+
2528
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2529
+ try {
2530
+ await ensureBrowser();
2531
+ const target = getActiveTarget();
2532
+ const result = await target.evaluate(params.expression);
2533
+
2534
+ // Serialize result — handle undefined, null, circular refs, and non-JSON types
2535
+ let serialized: string;
2536
+ if (result === undefined) {
2537
+ serialized = "undefined";
2538
+ } else {
2539
+ try {
2540
+ serialized = JSON.stringify(result, null, 2) ?? "undefined";
2541
+ } catch {
2542
+ // Circular or non-serializable (e.g. window.open() returns a Window ref)
2543
+ serialized = `[non-serializable: ${typeof result}]`;
2544
+ }
2545
+ }
2546
+
2547
+ const truncated = truncateText(serialized);
2548
+ return {
2549
+ content: [{ type: "text", text: truncated }],
2550
+ details: { expression: params.expression },
2551
+ };
2552
+ } catch (err: any) {
2553
+ return {
2554
+ content: [
2555
+ {
2556
+ type: "text",
2557
+ text: `Evaluation failed: ${err.message}`,
2558
+ },
2559
+ ],
2560
+ details: { error: err.message },
2561
+ isError: true,
2562
+ };
2563
+ }
2564
+ },
2565
+ });
2566
+
2567
+ // -------------------------------------------------------------------------
2568
+ // browser_close
2569
+ // -------------------------------------------------------------------------
2570
+ pi.registerTool({
2571
+ name: "browser_close",
2572
+ label: "Browser Close",
2573
+ description: "Close the browser and clean up all resources.",
2574
+ parameters: Type.Object({}),
2575
+
2576
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
2577
+ try {
2578
+ await closeBrowser();
2579
+ return {
2580
+ content: [{ type: "text", text: "Browser closed." }],
2581
+ details: {},
2582
+ };
2583
+ } catch (err: any) {
2584
+ return {
2585
+ content: [{ type: "text", text: `Close failed: ${err.message}` }],
2586
+ details: { error: err.message },
2587
+ isError: true,
2588
+ };
2589
+ }
2590
+ },
2591
+ });
2592
+
2593
+ // -------------------------------------------------------------------------
2594
+ // browser_trace_start
2595
+ // -------------------------------------------------------------------------
2596
+ pi.registerTool({
2597
+ name: "browser_trace_start",
2598
+ label: "Browser Trace Start",
2599
+ description: "Start a Playwright trace for the current browser session and persist trace metadata under the session artifact directory.",
2600
+ parameters: Type.Object({
2601
+ name: Type.Optional(Type.String({ description: "Optional short trace session name for artifact filenames." })),
2602
+ title: Type.Optional(Type.String({ description: "Optional trace title recorded in metadata." })),
2603
+ }),
2604
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2605
+ try {
2606
+ const { context: browserContext } = await ensureBrowser();
2607
+ if (activeTraceSession) {
2608
+ return {
2609
+ content: [{ type: "text", text: `Trace already active: ${activeTraceSession.name}` }],
2610
+ details: { error: "trace_already_active", activeTraceSession, ...getSessionArtifactMetadata() },
2611
+ isError: true,
2612
+ };
2613
+ }
2614
+ const startedAt = Date.now();
2615
+ const name = (params.name?.trim() || `trace-${formatArtifactTimestamp(startedAt)}`).replace(/[^a-zA-Z0-9._-]+/g, "-");
2616
+ await browserContext.tracing.start({ screenshots: true, snapshots: true, sources: true, title: params.title ?? name });
2617
+ activeTraceSession = { startedAt, name, title: params.title ?? name };
2618
+ return {
2619
+ content: [{ type: "text", text: `Trace started: ${name}\nSession dir: ${sessionArtifactDir}` }],
2620
+ details: { activeTraceSession, ...getSessionArtifactMetadata() },
2621
+ };
2622
+ } catch (err: any) {
2623
+ return {
2624
+ content: [{ type: "text", text: `Trace start failed: ${err.message}` }],
2625
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2626
+ isError: true,
2627
+ };
2628
+ }
2629
+ },
2630
+ });
2631
+
2632
+ // -------------------------------------------------------------------------
2633
+ // browser_trace_stop
2634
+ // -------------------------------------------------------------------------
2635
+ pi.registerTool({
2636
+ name: "browser_trace_stop",
2637
+ label: "Browser Trace Stop",
2638
+ description: "Stop the active Playwright trace and write the trace zip to disk under the session artifact directory.",
2639
+ parameters: Type.Object({
2640
+ name: Type.Optional(Type.String({ description: "Optional artifact basename override for the trace zip." })),
2641
+ }),
2642
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2643
+ try {
2644
+ const { context: browserContext } = await ensureBrowser();
2645
+ if (!activeTraceSession) {
2646
+ return {
2647
+ content: [{ type: "text", text: "No active trace session to stop." }],
2648
+ details: { error: "trace_not_active", ...getSessionArtifactMetadata() },
2649
+ isError: true,
2650
+ };
2651
+ }
2652
+ const traceSession = activeTraceSession;
2653
+ const traceName = (params.name?.trim() || traceSession.name).replace(/[^a-zA-Z0-9._-]+/g, "-");
2654
+ const tracePath = buildSessionArtifactPath(`${traceName}.trace.zip`);
2655
+ await browserContext.tracing.stop({ path: tracePath });
2656
+ const fileStat = await stat(tracePath);
2657
+ activeTraceSession = null;
2658
+ return {
2659
+ content: [{ type: "text", text: `Trace stopped: ${tracePath}` }],
2660
+ details: {
2661
+ path: tracePath,
2662
+ bytes: fileStat.size,
2663
+ elapsedMs: Date.now() - traceSession.startedAt,
2664
+ traceName,
2665
+ ...getSessionArtifactMetadata(),
2666
+ },
2667
+ };
2668
+ } catch (err: any) {
2669
+ return {
2670
+ content: [{ type: "text", text: `Trace stop failed: ${err.message}` }],
2671
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2672
+ isError: true,
2673
+ };
2674
+ }
2675
+ },
2676
+ });
2677
+
2678
+ // -------------------------------------------------------------------------
2679
+ // browser_export_har
2680
+ // -------------------------------------------------------------------------
2681
+ pi.registerTool({
2682
+ name: "browser_export_har",
2683
+ label: "Browser Export HAR",
2684
+ description: "Export the truthfully recorded session HAR from disk to a stable artifact path and return compact metadata.",
2685
+ parameters: Type.Object({
2686
+ filename: Type.Optional(Type.String({ description: "Optional destination filename within the session artifact directory." })),
2687
+ }),
2688
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2689
+ try {
2690
+ await ensureBrowser();
2691
+ if (!harState.enabled || !harState.configuredAtContextCreation || !harState.path) {
2692
+ return {
2693
+ content: [{ type: "text", text: "HAR export unavailable: HAR recording was not enabled at browser context creation." }],
2694
+ details: { error: "har_not_enabled", ...getSessionArtifactMetadata() },
2695
+ isError: true,
2696
+ };
2697
+ }
2698
+ const sourcePath = harState.path;
2699
+ const destinationName = (params.filename?.trim() || `export-${HAR_FILENAME}`).replace(/[^a-zA-Z0-9._-]+/g, "-");
2700
+ const destinationPath = buildSessionArtifactPath(destinationName);
2701
+ const exportResult = sourcePath === destinationPath
2702
+ ? { path: sourcePath, bytes: (await stat(sourcePath)).size }
2703
+ : await copyArtifactFile(sourcePath, destinationPath);
2704
+ harState = {
2705
+ ...harState,
2706
+ exportCount: harState.exportCount + 1,
2707
+ lastExportedPath: exportResult.path,
2708
+ lastExportedAt: Date.now(),
2709
+ };
2710
+ return {
2711
+ content: [{ type: "text", text: `HAR exported: ${exportResult.path}` }],
2712
+ details: { path: exportResult.path, bytes: exportResult.bytes, ...getSessionArtifactMetadata() },
2713
+ };
2714
+ } catch (err: any) {
2715
+ return {
2716
+ content: [{ type: "text", text: `HAR export failed: ${err.message}` }],
2717
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2718
+ isError: true,
2719
+ };
2720
+ }
2721
+ },
2722
+ });
2723
+
2724
+ // -------------------------------------------------------------------------
2725
+ // browser_timeline
2726
+ // -------------------------------------------------------------------------
2727
+ pi.registerTool({
2728
+ name: "browser_timeline",
2729
+ label: "Browser Timeline",
2730
+ description: "Return a compact structured summary of the tracked browser action timeline and optional on-disk export path.",
2731
+ parameters: Type.Object({
2732
+ writeToDisk: Type.Optional(Type.Boolean({ description: "Write the timeline JSON to disk under the session artifact directory." })),
2733
+ filename: Type.Optional(Type.String({ description: "Optional JSON filename when writeToDisk is true." })),
2734
+ }),
2735
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2736
+ try {
2737
+ await ensureBrowser();
2738
+ const timeline = formatTimelineEntries(actionTimeline.entries, {
2739
+ limit: actionTimeline.limit,
2740
+ totalActions: actionTimeline.nextId - 1,
2741
+ });
2742
+ let artifact: { path: string; bytes: number } | null = null;
2743
+ if (params.writeToDisk) {
2744
+ const filename = (params.filename?.trim() || "timeline.json").replace(/[^a-zA-Z0-9._-]+/g, "-");
2745
+ artifact = await writeArtifactFile(buildSessionArtifactPath(filename), JSON.stringify(timeline, null, 2));
2746
+ }
2747
+ return {
2748
+ content: [{ type: "text", text: artifact ? `${timeline.summary}\nArtifact: ${artifact.path}` : timeline.summary }],
2749
+ details: { ...timeline, artifact, ...getSessionArtifactMetadata() },
2750
+ };
2751
+ } catch (err: any) {
2752
+ return {
2753
+ content: [{ type: "text", text: `Timeline failed: ${err.message}` }],
2754
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2755
+ isError: true,
2756
+ };
2757
+ }
2758
+ },
2759
+ });
2760
+
2761
+ // -------------------------------------------------------------------------
2762
+ // browser_session_summary
2763
+ // -------------------------------------------------------------------------
2764
+ pi.registerTool({
2765
+ name: "browser_session_summary",
2766
+ label: "Browser Session Summary",
2767
+ description: "Return a compact structured summary of the current browser session, including pages, actions, waits/assertions, bounded-history caveats, and trace/HAR state.",
2768
+ parameters: Type.Object({}),
2769
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
2770
+ try {
2771
+ await ensureBrowser();
2772
+ const pages = await getLivePagesSnapshot();
2773
+ const baseSummary = summarizeBrowserSession({
2774
+ timeline: actionTimeline,
2775
+ totalActions: actionTimeline.nextId - 1,
2776
+ pages,
2777
+ activePageId: pageRegistry.activePageId,
2778
+ activeFrame: getActiveFrameMetadata(),
2779
+ consoleEntries: consoleLogs,
2780
+ networkEntries: networkLogs,
2781
+ dialogEntries: dialogLogs,
2782
+ consoleLimit: 1000,
2783
+ networkLimit: 1000,
2784
+ dialogLimit: 1000,
2785
+ sessionStartedAt,
2786
+ now: Date.now(),
2787
+ });
2788
+ const failureHypothesis = buildFailureHypothesis({
2789
+ timeline: actionTimeline,
2790
+ consoleEntries: consoleLogs,
2791
+ networkEntries: networkLogs,
2792
+ dialogEntries: dialogLogs,
2793
+ });
2794
+ const traceState = activeTraceSession
2795
+ ? { status: "active", ...activeTraceSession }
2796
+ : { status: "inactive", lastTracePath: sessionArtifactDir ? buildSessionArtifactPath("*.trace.zip") : null };
2797
+ const harSummary = {
2798
+ enabled: harState.enabled,
2799
+ configuredAtContextCreation: harState.configuredAtContextCreation,
2800
+ path: harState.path,
2801
+ exportCount: harState.exportCount,
2802
+ lastExportedPath: harState.lastExportedPath,
2803
+ lastExportedAt: harState.lastExportedAt,
2804
+ };
2805
+ return {
2806
+ content: [{ type: "text", text: `${baseSummary.summary}\nFailure hypothesis: ${failureHypothesis}` }],
2807
+ details: {
2808
+ ...baseSummary,
2809
+ failureHypothesis,
2810
+ trace: traceState,
2811
+ har: harSummary,
2812
+ ...getSessionArtifactMetadata(),
2813
+ },
2814
+ };
2815
+ } catch (err: any) {
2816
+ return {
2817
+ content: [{ type: "text", text: `Session summary failed: ${err.message}` }],
2818
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2819
+ isError: true,
2820
+ };
2821
+ }
2822
+ },
2823
+ });
2824
+
2825
+ // -------------------------------------------------------------------------
2826
+ // browser_debug_bundle
2827
+ // -------------------------------------------------------------------------
2828
+ pi.registerTool({
2829
+ name: "browser_debug_bundle",
2830
+ label: "Browser Debug Bundle",
2831
+ description: "Write a timestamped debug bundle to disk with screenshot, logs, timeline, pages, session summary, and accessibility output, then return compact paths and counts.",
2832
+ parameters: Type.Object({
2833
+ selector: Type.Optional(Type.String({ description: "Optional CSS selector to scope the accessibility snapshot before fallback behavior applies." })),
2834
+ name: Type.Optional(Type.String({ description: "Optional short bundle name suffix for the output directory." })),
2835
+ }),
2836
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2837
+ try {
2838
+ const { page: p } = await ensureBrowser();
2839
+ const startedAt = Date.now();
2840
+ const sessionDir = await ensureSessionArtifactDir();
2841
+ const bundleDir = path.join(ARTIFACT_ROOT, `${formatArtifactTimestamp(startedAt)}-${sanitizeArtifactName(params.name ?? "debug-bundle", "debug-bundle")}`);
2842
+ await ensureDir(bundleDir);
2843
+ const pages = await getLivePagesSnapshot();
2844
+ const timeline = formatTimelineEntries(actionTimeline.entries, {
2845
+ limit: actionTimeline.limit,
2846
+ totalActions: actionTimeline.nextId - 1,
2847
+ });
2848
+ const sessionSummary = summarizeBrowserSession({
2849
+ timeline: actionTimeline,
2850
+ totalActions: actionTimeline.nextId - 1,
2851
+ pages,
2852
+ activePageId: pageRegistry.activePageId,
2853
+ activeFrame: getActiveFrameMetadata(),
2854
+ consoleEntries: consoleLogs,
2855
+ networkEntries: networkLogs,
2856
+ dialogEntries: dialogLogs,
2857
+ consoleLimit: 1000,
2858
+ networkLimit: 1000,
2859
+ dialogLimit: 1000,
2860
+ sessionStartedAt,
2861
+ now: Date.now(),
2862
+ });
2863
+ const failureHypothesis = buildFailureHypothesis({
2864
+ timeline: actionTimeline,
2865
+ consoleEntries: consoleLogs,
2866
+ networkEntries: networkLogs,
2867
+ dialogEntries: dialogLogs,
2868
+ });
2869
+ const accessibility = await captureAccessibilityMarkdown(params.selector);
2870
+ const screenshotPath = path.join(bundleDir, "screenshot.jpg");
2871
+ await p.screenshot({ path: screenshotPath, type: "jpeg", quality: 80, fullPage: false });
2872
+ const screenshotStat = await stat(screenshotPath);
2873
+ const artifacts = {
2874
+ screenshot: { path: screenshotPath, bytes: screenshotStat.size },
2875
+ console: await writeArtifactFile(path.join(bundleDir, "console.json"), JSON.stringify(consoleLogs, null, 2)),
2876
+ network: await writeArtifactFile(path.join(bundleDir, "network.json"), JSON.stringify(networkLogs, null, 2)),
2877
+ dialog: await writeArtifactFile(path.join(bundleDir, "dialog.json"), JSON.stringify(dialogLogs, null, 2)),
2878
+ timeline: await writeArtifactFile(path.join(bundleDir, "timeline.json"), JSON.stringify(timeline, null, 2)),
2879
+ summary: await writeArtifactFile(path.join(bundleDir, "summary.json"), JSON.stringify({
2880
+ ...sessionSummary,
2881
+ failureHypothesis,
2882
+ trace: activeTraceSession,
2883
+ har: harState,
2884
+ sessionArtifactDir: sessionDir,
2885
+ }, null, 2)),
2886
+ pages: await writeArtifactFile(path.join(bundleDir, "pages.json"), JSON.stringify(pages, null, 2)),
2887
+ accessibility: await writeArtifactFile(path.join(bundleDir, "accessibility.md"), accessibility.snapshot),
2888
+ };
2889
+ return {
2890
+ content: [{ type: "text", text: `Debug bundle written: ${bundleDir}\n${sessionSummary.summary}\nFailure hypothesis: ${failureHypothesis}` }],
2891
+ details: {
2892
+ bundleDir,
2893
+ artifacts,
2894
+ accessibilityScope: accessibility.scope,
2895
+ accessibilitySource: accessibility.source,
2896
+ counts: {
2897
+ console: consoleLogs.length,
2898
+ network: networkLogs.length,
2899
+ dialog: dialogLogs.length,
2900
+ actions: timeline.count,
2901
+ pages: pages.length,
2902
+ },
2903
+ elapsedMs: Date.now() - startedAt,
2904
+ summary: sessionSummary,
2905
+ failureHypothesis,
2906
+ ...getSessionArtifactMetadata(),
2907
+ },
2908
+ };
2909
+ } catch (err: any) {
2910
+ return {
2911
+ content: [{ type: "text", text: `Debug bundle failed: ${err.message}` }],
2912
+ details: { error: err.message, ...getSessionArtifactMetadata() },
2913
+ isError: true,
2914
+ };
2915
+ }
2916
+ },
2917
+ });
2918
+
2919
+ // -------------------------------------------------------------------------
2920
+ // browser_assert
2921
+ // -------------------------------------------------------------------------
2922
+ pi.registerTool({
2923
+ name: "browser_assert",
2924
+ label: "Browser Assert",
2925
+ description:
2926
+ "Run one or more explicit browser assertions and return structured PASS/FAIL results. Prefer this for verification instead of inferring success from prose summaries.",
2927
+ promptGuidelines: [
2928
+ "Prefer browser_assert for browser verification instead of inferring success from summaries.",
2929
+ "When finishing UI work, explicit browser assertions should usually be the final verification step.",
2930
+ "Use checks for URL, text, selector state, value, and browser diagnostics whenever those signals are available.",
2931
+ ],
2932
+ parameters: Type.Object({
2933
+ checks: Type.Array(
2934
+ Type.Object({
2935
+ kind: Type.String({ description: "Assertion kind, e.g. url_contains, text_visible, selector_visible, value_equals, no_console_errors, no_failed_requests, request_url_seen, response_status, console_message_matches, network_count, console_count, no_console_errors_since, no_failed_requests_since" }),
2936
+ selector: Type.Optional(Type.String()),
2937
+ text: Type.Optional(Type.String()),
2938
+ value: Type.Optional(Type.String()),
2939
+ checked: Type.Optional(Type.Boolean()),
2940
+ sinceActionId: Type.Optional(Type.Number()),
2941
+ })
2942
+ ),
2943
+ }),
2944
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2945
+ try {
2946
+ const { page: p } = await ensureBrowser();
2947
+ const target = getActiveTarget();
2948
+ const state = await collectAssertionState(p, params.checks, target);
2949
+ const result = evaluateAssertionChecks({ checks: params.checks, state });
2950
+ return {
2951
+ content: [{ type: "text", text: `Browser assert\n\n${formatAssertionText(result)}` }],
2952
+ details: { ...result, url: state.url, title: state.title },
2953
+ isError: !result.verified,
2954
+ };
2955
+ } catch (err: any) {
2956
+ return {
2957
+ content: [{ type: "text", text: `Browser assert failed: ${err.message}` }],
2958
+ details: { error: err.message },
2959
+ isError: true,
2960
+ };
2961
+ }
2962
+ },
2963
+ });
2964
+
2965
+ // -------------------------------------------------------------------------
2966
+ // browser_diff
2967
+ // -------------------------------------------------------------------------
2968
+ pi.registerTool({
2969
+ name: "browser_diff",
2970
+ label: "Browser Diff",
2971
+ description:
2972
+ "Report meaningful browser-state changes. By default compares the current page to the most recent tracked action state. Use this to understand what changed after a click, submit, or navigation.",
2973
+ promptGuidelines: [
2974
+ "Use browser_diff after ambiguous or high-impact actions when you need to know what changed.",
2975
+ "Prefer browser_diff over requesting a broad new page inspection when the question is change detection.",
2976
+ ],
2977
+ parameters: Type.Object({
2978
+ sinceActionId: Type.Optional(Type.Number({ description: "Optional action id to diff against. Uses that action's stored after-state when available." })),
2979
+ }),
2980
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
2981
+ try {
2982
+ const { page: p } = await ensureBrowser();
2983
+ const target = getActiveTarget();
2984
+ const current = await captureCompactPageState(p, { includeBodyText: true, target });
2985
+ let baseline: CompactPageState | null = null;
2986
+ if (params.sinceActionId) {
2987
+ const action = findAction(actionTimeline, params.sinceActionId) as { afterState?: CompactPageState } | null;
2988
+ baseline = action?.afterState ?? null;
2989
+ }
2990
+ if (!baseline) {
2991
+ baseline = lastActionAfterState ?? lastActionBeforeState;
2992
+ }
2993
+ if (!baseline) {
2994
+ return {
2995
+ content: [{ type: "text", text: "Browser diff unavailable: no prior tracked browser state exists yet." }],
2996
+ details: { changed: false, changes: [], summary: "No prior tracked state" },
2997
+ isError: true,
2998
+ };
2999
+ }
3000
+ const diff = diffCompactStates(baseline, current);
3001
+ return {
3002
+ content: [{ type: "text", text: `Browser diff\n\n${formatDiffText(diff)}` }],
3003
+ details: diff,
3004
+ };
3005
+ } catch (err: any) {
3006
+ return {
3007
+ content: [{ type: "text", text: `Browser diff failed: ${err.message}` }],
3008
+ details: { error: err.message },
3009
+ isError: true,
3010
+ };
3011
+ }
3012
+ },
3013
+ });
3014
+
3015
+ // -------------------------------------------------------------------------
3016
+ // browser_batch
3017
+ // -------------------------------------------------------------------------
3018
+ pi.registerTool({
3019
+ name: "browser_batch",
3020
+ label: "Browser Batch",
3021
+ description:
3022
+ "Execute multiple explicit browser steps in one call. Prefer this for obvious action sequences like click → type → wait → assert to reduce round trips and token usage.",
3023
+ promptGuidelines: [
3024
+ "If the next 2-5 browser actions are obvious and low-risk, prefer browser_batch over multiple tiny browser calls.",
3025
+ "Use browser_batch for explicit sequences like click → type → submit → wait → assert.",
3026
+ "Keep browser_batch steps explicit; do not use it as a speculative planner.",
3027
+ ],
3028
+ parameters: Type.Object({
3029
+ steps: Type.Array(
3030
+ Type.Object({
3031
+ action: StringEnum(["navigate", "click", "type", "key_press", "wait_for", "assert", "click_ref", "fill_ref"] as const),
3032
+ selector: Type.Optional(Type.String()),
3033
+ text: Type.Optional(Type.String()),
3034
+ url: Type.Optional(Type.String()),
3035
+ key: Type.Optional(Type.String()),
3036
+ condition: Type.Optional(Type.String()),
3037
+ value: Type.Optional(Type.String()),
3038
+ threshold: Type.Optional(Type.String()),
3039
+ timeout: Type.Optional(Type.Number()),
3040
+ clearFirst: Type.Optional(Type.Boolean()),
3041
+ submit: Type.Optional(Type.Boolean()),
3042
+ ref: Type.Optional(Type.String()),
3043
+ checks: Type.Optional(Type.Array(Type.Object({
3044
+ kind: Type.String({ description: "Assertion kind, e.g. url_contains, text_visible, selector_visible, value_equals, no_console_errors, no_failed_requests, request_url_seen, response_status, console_message_matches, network_count, console_count, no_console_errors_since, no_failed_requests_since" }),
3045
+ selector: Type.Optional(Type.String()),
3046
+ text: Type.Optional(Type.String()),
3047
+ value: Type.Optional(Type.String()),
3048
+ checked: Type.Optional(Type.Boolean()),
3049
+ sinceActionId: Type.Optional(Type.Number()),
3050
+ }))),
3051
+ })
3052
+ ),
3053
+ stopOnFailure: Type.Optional(Type.Boolean({ description: "Stop after the first failing step (default: true)." })),
3054
+ finalSummaryOnly: Type.Optional(Type.Boolean({ description: "Return only the compact final batch summary in content while keeping step results in details." })),
3055
+ }),
3056
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3057
+ let actionId: number | null = null;
3058
+ let beforeState: CompactPageState | null = null;
3059
+ try {
3060
+ const { page: p } = await ensureBrowser();
3061
+ const target = getActiveTarget();
3062
+ beforeState = await captureCompactPageState(p, { includeBodyText: true, target });
3063
+ actionId = beginTrackedAction("browser_batch", params, beforeState.url).id;
3064
+ const executeStep = async (step: any, index: number) => {
3065
+ // Re-resolve target each step — frame selection may change during batch
3066
+ const stepTarget = getActiveTarget();
3067
+ try {
3068
+ switch (step.action) {
3069
+ case "navigate": {
3070
+ // Navigation is always page-level
3071
+ await p.goto(step.url, { waitUntil: "domcontentloaded", timeout: 30000 });
3072
+ await p.waitForLoadState("networkidle", { timeout: 5000 }).catch(() => {});
3073
+ return { ok: true, action: step.action, url: p.url() };
3074
+ }
3075
+ case "click": {
3076
+ await stepTarget.locator(step.selector).first().click({ timeout: step.timeout ?? 8000 });
3077
+ await settleAfterActionAdaptive(p);
3078
+ return { ok: true, action: step.action, selector: step.selector, url: p.url() };
3079
+ }
3080
+ case "type": {
3081
+ if (step.clearFirst) {
3082
+ await stepTarget.locator(step.selector).first().fill("");
3083
+ }
3084
+ await stepTarget.locator(step.selector).first().fill(step.text ?? "", { timeout: step.timeout ?? 8000 });
3085
+ if (step.submit) await p.keyboard.press("Enter");
3086
+ await settleAfterActionAdaptive(p);
3087
+ return { ok: true, action: step.action, selector: step.selector, text: step.text };
3088
+ }
3089
+ case "key_press": {
3090
+ // Keyboard is page-level
3091
+ await p.keyboard.press(step.key);
3092
+ await settleAfterActionAdaptive(p, { checkFocusStability: true });
3093
+ return { ok: true, action: step.action, key: step.key };
3094
+ }
3095
+ case "wait_for": {
3096
+ const timeout = step.timeout ?? 10000;
3097
+ // Validate params for all conditions
3098
+ const waitValidation = validateWaitParams({ condition: step.condition, value: step.value, threshold: step.threshold });
3099
+ if (waitValidation) throw new Error(waitValidation.error);
3100
+
3101
+ if (step.condition === "selector_visible") await stepTarget.waitForSelector(step.value, { state: "visible", timeout });
3102
+ else if (step.condition === "selector_hidden") await stepTarget.waitForSelector(step.value, { state: "hidden", timeout });
3103
+ else if (step.condition === "url_contains") await p.waitForURL((url) => url.toString().includes(step.value), { timeout });
3104
+ else if (step.condition === "network_idle") await p.waitForLoadState("networkidle", { timeout });
3105
+ else if (step.condition === "delay") await new Promise((resolve) => setTimeout(resolve, parseInt(step.value ?? "1000", 10)));
3106
+ else if (step.condition === "text_visible") {
3107
+ await stepTarget.waitForFunction(
3108
+ (needle: string) => (document.body?.innerText ?? "").toLowerCase().includes(needle.toLowerCase()),
3109
+ step.value!,
3110
+ { timeout }
3111
+ );
3112
+ }
3113
+ else if (step.condition === "text_hidden") {
3114
+ await stepTarget.waitForFunction(
3115
+ (needle: string) => !(document.body?.innerText ?? "").toLowerCase().includes(needle.toLowerCase()),
3116
+ step.value!,
3117
+ { timeout }
3118
+ );
3119
+ }
3120
+ else if (step.condition === "request_completed") {
3121
+ await getActivePage().waitForResponse(
3122
+ (resp: any) => resp.url().includes(step.value!),
3123
+ { timeout }
3124
+ );
3125
+ }
3126
+ else if (step.condition === "console_message") {
3127
+ const needle = step.value!;
3128
+ const startTime = Date.now();
3129
+ let found = false;
3130
+ while (Date.now() - startTime < timeout) {
3131
+ if (consoleLogs.find((entry) => includesNeedle(entry.text, needle))) { found = true; break; }
3132
+ await new Promise((resolve) => setTimeout(resolve, 100));
3133
+ }
3134
+ if (!found) throw new Error(`Timed out waiting for console message matching "${needle}" (${timeout}ms)`);
3135
+ }
3136
+ else if (step.condition === "element_count") {
3137
+ const threshold = parseThreshold(step.threshold ?? ">=1");
3138
+ if (!threshold) throw new Error(`element_count threshold is malformed: "${step.threshold}"`);
3139
+ const selector = step.value!;
3140
+ const op = threshold.op;
3141
+ const n = threshold.n;
3142
+ await stepTarget.waitForFunction(
3143
+ ({ selector, op, n }: { selector: string; op: string; n: number }) => {
3144
+ const count = document.querySelectorAll(selector).length;
3145
+ switch (op) {
3146
+ case ">=": return count >= n;
3147
+ case "<=": return count <= n;
3148
+ case "==": return count === n;
3149
+ case ">": return count > n;
3150
+ case "<": return count < n;
3151
+ default: return false;
3152
+ }
3153
+ },
3154
+ { selector, op, n },
3155
+ { timeout }
3156
+ );
3157
+ }
3158
+ else if (step.condition === "region_stable") {
3159
+ const script = createRegionStableScript(step.value!);
3160
+ await stepTarget.waitForFunction(script, undefined, { timeout, polling: 200 });
3161
+ }
3162
+ else throw new Error(`Unsupported wait condition: ${step.condition}`);
3163
+ return { ok: true, action: step.action, condition: step.condition, value: step.value };
3164
+ }
3165
+ case "assert": {
3166
+ const state = await collectAssertionState(p, step.checks ?? [], stepTarget);
3167
+ const assertion = evaluateAssertionChecks({ checks: step.checks ?? [], state });
3168
+ return { ok: assertion.verified, action: step.action, summary: assertion.summary, assertion };
3169
+ }
3170
+ case "click_ref": {
3171
+ const parsedRef = parseRef(step.ref);
3172
+ const node = currentRefMap[parsedRef.key];
3173
+ if (!node) throw new Error(`Unknown ref: ${step.ref}`);
3174
+ const resolved = await resolveRefTarget(stepTarget, node);
3175
+ if (!resolved.ok) throw new Error(resolved.reason);
3176
+ await stepTarget.locator(resolved.selector).first().click({ timeout: step.timeout ?? 8000 });
3177
+ await settleAfterActionAdaptive(p);
3178
+ return { ok: true, action: step.action, ref: step.ref };
3179
+ }
3180
+ case "fill_ref": {
3181
+ const parsedRef = parseRef(step.ref);
3182
+ const node = currentRefMap[parsedRef.key];
3183
+ if (!node) throw new Error(`Unknown ref: ${step.ref}`);
3184
+ const resolved = await resolveRefTarget(stepTarget, node);
3185
+ if (!resolved.ok) throw new Error(resolved.reason);
3186
+ if (step.clearFirst) await stepTarget.locator(resolved.selector).first().fill("");
3187
+ await stepTarget.locator(resolved.selector).first().fill(step.text ?? "", { timeout: step.timeout ?? 8000 });
3188
+ if (step.submit) await p.keyboard.press("Enter");
3189
+ await settleAfterActionAdaptive(p);
3190
+ return { ok: true, action: step.action, ref: step.ref, text: step.text };
3191
+ }
3192
+ default:
3193
+ throw new Error(`Unsupported batch action: ${step.action}`);
3194
+ }
3195
+ } catch (err: any) {
3196
+ return { ok: false, action: step.action, index, message: err.message };
3197
+ }
3198
+ };
3199
+ const run = await runBatchSteps({
3200
+ steps: params.steps,
3201
+ executeStep,
3202
+ stopOnFailure: params.stopOnFailure !== false,
3203
+ });
3204
+ // Re-resolve target at end of batch since steps may have changed frame selection
3205
+ const batchEndTarget = getActiveTarget();
3206
+ const afterState = await captureCompactPageState(p, { includeBodyText: true, target: batchEndTarget });
3207
+ const diff = diffCompactStates(beforeState!, afterState);
3208
+ lastActionBeforeState = beforeState!;
3209
+ lastActionAfterState = afterState;
3210
+ finishTrackedAction(actionId!, {
3211
+ status: run.ok ? "success" : "error",
3212
+ afterUrl: afterState.url,
3213
+ diffSummary: diff.summary,
3214
+ changed: diff.changed,
3215
+ error: run.ok ? undefined : run.summary,
3216
+ beforeState: beforeState!,
3217
+ afterState,
3218
+ });
3219
+ const summary = `${run.summary}\n${run.stepResults.map((step: any, index: number) => `- ${index + 1}. ${step.action}: ${step.ok ? "PASS" : "FAIL"}${step.message ? ` (${step.message})` : ""}`).join("\n")}`;
3220
+ return {
3221
+ content: [{ type: "text", text: params.finalSummaryOnly ? run.summary : `Browser batch\nAction: ${actionId}\n\n${summary}\n\nDiff:\n${formatDiffText(diff)}` }],
3222
+ details: { actionId, diff, ...run },
3223
+ isError: !run.ok,
3224
+ };
3225
+ } catch (err: any) {
3226
+ if (actionId !== null) {
3227
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
3228
+ }
3229
+ return {
3230
+ content: [{ type: "text", text: `Browser batch failed: ${err.message}` }],
3231
+ details: { error: err.message, actionId },
3232
+ isError: true,
3233
+ };
3234
+ }
3235
+ },
3236
+ });
3237
+
3238
+ // -------------------------------------------------------------------------
3239
+ // browser_get_accessibility_tree
3240
+ // -------------------------------------------------------------------------
3241
+ pi.registerTool({
3242
+ name: "browser_get_accessibility_tree",
3243
+ label: "Browser Accessibility Tree",
3244
+ description:
3245
+ "Get the accessibility tree of the current page as structured text. Shows roles, names, labels, values, and states of all interactive elements. Use this to understand page structure before clicking — it reveals buttons, inputs, links, and their labels without needing to guess CSS selectors or coordinates. Much more reliable than inspecting the DOM directly.",
3246
+ parameters: Type.Object({
3247
+ selector: Type.Optional(
3248
+ Type.String({
3249
+ description:
3250
+ "Scope the accessibility tree to a specific element by CSS selector (e.g. 'main', 'form', '#modal'). If omitted, returns the full page tree.",
3251
+ })
3252
+ ),
3253
+ }),
3254
+
3255
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3256
+ try {
3257
+ const { page: p } = await ensureBrowser();
3258
+ const target = getActiveTarget();
3259
+
3260
+ // Use Playwright's aria snapshot which gives a structured YAML-like representation
3261
+ let snapshot: string;
3262
+ if (params.selector) {
3263
+ const locator = target.locator(params.selector).first();
3264
+ snapshot = await locator.ariaSnapshot();
3265
+ } else {
3266
+ snapshot = await target.locator("body").ariaSnapshot();
3267
+ }
3268
+
3269
+ const truncated = truncateText(snapshot);
3270
+ const scope = params.selector ? `element "${params.selector}"` : "full page";
3271
+ const viewport = p.viewportSize();
3272
+ const vpText = viewport ? `${viewport.width}x${viewport.height}` : "unknown";
3273
+
3274
+ return {
3275
+ content: [
3276
+ {
3277
+ type: "text",
3278
+ text: `Accessibility tree for ${scope} (viewport: ${vpText}):\n\n${truncated}`,
3279
+ },
3280
+ ],
3281
+ details: { scope, snapshot, viewport: vpText },
3282
+ };
3283
+ } catch (err: any) {
3284
+ return {
3285
+ content: [
3286
+ {
3287
+ type: "text",
3288
+ text: `Accessibility tree failed: ${err.message}`,
3289
+ },
3290
+ ],
3291
+ details: { error: err.message },
3292
+ isError: true,
3293
+ };
3294
+ }
3295
+ },
3296
+ });
3297
+
3298
+ // -------------------------------------------------------------------------
3299
+ // browser_find
3300
+ // -------------------------------------------------------------------------
3301
+ pi.registerTool({
3302
+ name: "browser_find",
3303
+ label: "Browser Find",
3304
+ description:
3305
+ "Find elements on the page by text content, ARIA role, or CSS selector. Returns only the matched nodes as a compact accessibility snapshot — far cheaper than browser_get_accessibility_tree. Use this after any action to locate a specific button, input, heading, or link before clicking it.",
3306
+ promptGuidelines: [
3307
+ "Use browser_find for cheap targeted discovery before requesting the full accessibility tree.",
3308
+ "Prefer browser_find when you need one button, input, heading, dialog, or alert rather than a full-page structure dump.",
3309
+ ],
3310
+ parameters: Type.Object({
3311
+ text: Type.Optional(
3312
+ Type.String({
3313
+ description: "Find elements whose visible text contains this string (case-insensitive).",
3314
+ })
3315
+ ),
3316
+ role: Type.Optional(
3317
+ Type.String({
3318
+ description: "ARIA role to filter by, e.g. 'button', 'link', 'heading', 'textbox', 'dialog', 'alert'.",
3319
+ })
3320
+ ),
3321
+ selector: Type.Optional(
3322
+ Type.String({
3323
+ description: "CSS selector to scope the search. If omitted, searches the full page.",
3324
+ })
3325
+ ),
3326
+ limit: Type.Optional(
3327
+ Type.Number({
3328
+ description: "Maximum number of results to return (default: 20).",
3329
+ })
3330
+ ),
3331
+ }),
3332
+
3333
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3334
+ try {
3335
+ await ensureBrowser();
3336
+ const target = getActiveTarget();
3337
+ const limit = params.limit ?? 20;
3338
+
3339
+ const results = await target.evaluate(({ text, role, selector, limit }) => {
3340
+ const root = selector ? document.querySelector(selector) : document.body;
3341
+ if (!root) return [];
3342
+
3343
+ // Collect candidate elements
3344
+ let candidates: Element[];
3345
+ if (role) {
3346
+ // Query by ARIA role (native + explicit)
3347
+ const roleMap: Record<string, string> = {
3348
+ button: 'button,[role="button"]',
3349
+ link: 'a[href],[role="link"]',
3350
+ heading: 'h1,h2,h3,h4,h5,h6,[role="heading"]',
3351
+ textbox: 'input:not([type="hidden"]):not([type="checkbox"]):not([type="radio"]):not([type="submit"]):not([type="button"]),textarea,[role="textbox"]',
3352
+ checkbox: 'input[type="checkbox"],[role="checkbox"]',
3353
+ radio: 'input[type="radio"],[role="radio"]',
3354
+ combobox: 'select,[role="combobox"]',
3355
+ dialog: 'dialog,[role="dialog"]',
3356
+ alert: '[role="alert"]',
3357
+ navigation: 'nav,[role="navigation"]',
3358
+ listitem: 'li,[role="listitem"]',
3359
+ };
3360
+ const cssForRole = roleMap[role.toLowerCase()] ?? `[role="${role}"]`;
3361
+ candidates = Array.from(root.querySelectorAll(cssForRole));
3362
+ } else {
3363
+ candidates = Array.from(root.querySelectorAll('*'));
3364
+ }
3365
+
3366
+ // Filter by text if provided
3367
+ if (text) {
3368
+ const lower = text.toLowerCase();
3369
+ candidates = candidates.filter(el =>
3370
+ (el.textContent ?? "").toLowerCase().includes(lower) ||
3371
+ (el.getAttribute("aria-label") ?? "").toLowerCase().includes(lower) ||
3372
+ (el.getAttribute("placeholder") ?? "").toLowerCase().includes(lower) ||
3373
+ (el.getAttribute("value") ?? "").toLowerCase().includes(lower)
3374
+ );
3375
+ }
3376
+
3377
+ return candidates.slice(0, limit).map(el => {
3378
+ const tag = el.tagName.toLowerCase();
3379
+ const id = el.id ? `#${el.id}` : "";
3380
+ const classes = Array.from(el.classList).slice(0, 2).map(c => `.${c}`).join("");
3381
+ const ariaLabel = el.getAttribute("aria-label") ?? "";
3382
+ const placeholder = el.getAttribute("placeholder") ?? "";
3383
+ const textContent = (el.textContent ?? "").trim().slice(0, 80);
3384
+ const role = el.getAttribute("role") ?? "";
3385
+ const type = el.getAttribute("type") ?? "";
3386
+ const href = el.getAttribute("href") ?? "";
3387
+ const value = (el as HTMLInputElement).value ?? "";
3388
+
3389
+ return { tag, id, classes, ariaLabel, placeholder, textContent, role, type, href, value };
3390
+ });
3391
+ }, { text: params.text, role: params.role, selector: params.selector, limit });
3392
+
3393
+ if (results.length === 0) {
3394
+ return {
3395
+ content: [{ type: "text", text: "No elements found matching the criteria." }],
3396
+ details: { count: 0 },
3397
+ };
3398
+ }
3399
+
3400
+ const lines = results.map((r: any) => {
3401
+ const parts: string[] = [`${r.tag}${r.id}${r.classes}`];
3402
+ if (r.role) parts.push(`role="${r.role}"`);
3403
+ if (r.type) parts.push(`type="${r.type}"`);
3404
+ if (r.ariaLabel) parts.push(`aria-label="${r.ariaLabel}"`);
3405
+ if (r.placeholder) parts.push(`placeholder="${r.placeholder}"`);
3406
+ if (r.href) parts.push(`href="${r.href.slice(0, 60)}"`);
3407
+ if (r.value) parts.push(`value="${r.value.slice(0, 40)}"`);
3408
+ if (r.textContent && !r.ariaLabel) parts.push(`"${r.textContent}"`);
3409
+ return " " + parts.join(" ");
3410
+ });
3411
+
3412
+ const criteria: string[] = [];
3413
+ if (params.role) criteria.push(`role="${params.role}"`);
3414
+ if (params.text) criteria.push(`text="${params.text}"`);
3415
+ if (params.selector) criteria.push(`within="${params.selector}"`);
3416
+
3417
+ return {
3418
+ content: [
3419
+ {
3420
+ type: "text",
3421
+ text: `Found ${results.length} element(s) [${criteria.join(", ")}]:\n${lines.join("\n")}`,
3422
+ },
3423
+ ],
3424
+ details: { count: results.length, results },
3425
+ };
3426
+ } catch (err: any) {
3427
+ return {
3428
+ content: [{ type: "text", text: `Find failed: ${err.message}` }],
3429
+ details: { error: err.message },
3430
+ isError: true,
3431
+ };
3432
+ }
3433
+ },
3434
+ });
3435
+
3436
+ // -------------------------------------------------------------------------
3437
+ // browser_snapshot_refs
3438
+ // -------------------------------------------------------------------------
3439
+ pi.registerTool({
3440
+ name: "browser_snapshot_refs",
3441
+ label: "Browser Snapshot Refs",
3442
+ description:
3443
+ "Capture a compact inventory of interactive elements and assign deterministic versioned refs (@vN:e1, @vN:e2, ...). Use these refs with browser_click_ref, browser_fill_ref, and browser_hover_ref.",
3444
+ parameters: Type.Object({
3445
+ selector: Type.Optional(
3446
+ Type.String({
3447
+ description: "Optional CSS selector scope for the snapshot (e.g. 'main', 'form', '#modal').",
3448
+ })
3449
+ ),
3450
+ interactiveOnly: Type.Optional(
3451
+ Type.Boolean({
3452
+ description: "Include only interactive elements (default: true).",
3453
+ })
3454
+ ),
3455
+ limit: Type.Optional(
3456
+ Type.Number({
3457
+ description: "Maximum number of elements to include (default: 40).",
3458
+ })
3459
+ ),
3460
+ mode: Type.Optional(
3461
+ Type.String({
3462
+ description: "Semantic snapshot mode that pre-filters elements by category. When set, overrides interactiveOnly. Modes: interactive, form, dialog, navigation, errors, headings, visible_only.",
3463
+ })
3464
+ ),
3465
+ }),
3466
+
3467
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3468
+ try {
3469
+ const { page: p } = await ensureBrowser();
3470
+ const target = getActiveTarget();
3471
+
3472
+ // Validate mode if provided
3473
+ const mode = params.mode;
3474
+ if (mode !== undefined) {
3475
+ const modeConfig = getSnapshotModeConfig(mode);
3476
+ if (!modeConfig) {
3477
+ const validModes = Object.keys(SNAPSHOT_MODES).join(", ");
3478
+ return {
3479
+ content: [{ type: "text", text: `Unknown snapshot mode: "${mode}". Valid modes: ${validModes}` }],
3480
+ details: { error: `Unknown mode: ${mode}`, validModes: Object.keys(SNAPSHOT_MODES) },
3481
+ isError: true,
3482
+ };
3483
+ }
3484
+ }
3485
+
3486
+ const interactiveOnly = params.interactiveOnly !== false;
3487
+ const limit = Math.max(1, Math.min(200, Math.floor(params.limit ?? 40)));
3488
+ const rawNodes = await buildRefSnapshot(target, {
3489
+ selector: params.selector,
3490
+ interactiveOnly,
3491
+ limit,
3492
+ mode,
3493
+ });
3494
+
3495
+ refVersion += 1;
3496
+ const nextMap: Record<string, RefNode> = {};
3497
+ for (let i = 0; i < rawNodes.length; i += 1) {
3498
+ const ref = `e${i + 1}`;
3499
+ nextMap[ref] = { ref, ...rawNodes[i] };
3500
+ }
3501
+ currentRefMap = nextMap;
3502
+ // Record frame context when snapshot taken inside a frame
3503
+ const frameCtx = activeFrame ? (activeFrame.name() || activeFrame.url()) : undefined;
3504
+ refMetadata = {
3505
+ url: p.url(),
3506
+ timestamp: Date.now(),
3507
+ selectorScope: params.selector,
3508
+ interactiveOnly,
3509
+ limit,
3510
+ version: refVersion,
3511
+ frameContext: frameCtx,
3512
+ mode,
3513
+ };
3514
+
3515
+ if (rawNodes.length === 0) {
3516
+ return {
3517
+ content: [{
3518
+ type: "text",
3519
+ text: "No elements found for ref snapshot (try interactiveOnly=false or a wider selector scope).",
3520
+ }],
3521
+ details: {
3522
+ count: 0,
3523
+ version: refVersion,
3524
+ metadata: refMetadata,
3525
+ refs: {},
3526
+ },
3527
+ };
3528
+ }
3529
+
3530
+ const versionedRefs: Record<string, RefNode> = {};
3531
+ const lines = Object.values(nextMap).map((node) => {
3532
+ const versionedRef = formatVersionedRef(refVersion, node.ref);
3533
+ versionedRefs[versionedRef] = node;
3534
+ const parts: string[] = [versionedRef, node.role || node.tag];
3535
+ if (node.name) parts.push(`"${node.name}"`);
3536
+ if (node.href) parts.push(`href="${node.href.slice(0, 80)}"`);
3537
+ if (!node.isVisible) parts.push("(hidden)");
3538
+ if (!node.isEnabled) parts.push("(disabled)");
3539
+ return parts.join(" ");
3540
+ });
3541
+
3542
+ const modeLabel = mode ? `Mode: ${mode}\n` : "";
3543
+ return {
3544
+ content: [{
3545
+ type: "text",
3546
+ text:
3547
+ `Ref snapshot v${refVersion} (${rawNodes.length} element(s))\n` +
3548
+ `URL: ${p.url()}\n` +
3549
+ `Scope: ${params.selector ?? "body"}\n` +
3550
+ modeLabel +
3551
+ `Use versioned refs exactly as shown (e.g. @v${refVersion}:e1).\n\n` +
3552
+ lines.join("\n"),
3553
+ }],
3554
+ details: {
3555
+ count: rawNodes.length,
3556
+ version: refVersion,
3557
+ metadata: refMetadata,
3558
+ refs: nextMap,
3559
+ versionedRefs,
3560
+ },
3561
+ };
3562
+ } catch (err: any) {
3563
+ return {
3564
+ content: [{ type: "text", text: `Snapshot refs failed: ${err.message}` }],
3565
+ details: { error: err.message },
3566
+ isError: true,
3567
+ };
3568
+ }
3569
+ },
3570
+ });
3571
+
3572
+ // -------------------------------------------------------------------------
3573
+ // browser_get_ref
3574
+ // -------------------------------------------------------------------------
3575
+ pi.registerTool({
3576
+ name: "browser_get_ref",
3577
+ label: "Browser Get Ref",
3578
+ description: "Inspect stored metadata for one deterministic element ref (prefer versioned format, e.g. @v3:e1).",
3579
+ parameters: Type.Object({
3580
+ ref: Type.String({ description: "Reference id, preferably versioned (e.g. '@v3:e1')." }),
3581
+ }),
3582
+
3583
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3584
+ const parsedRef = parseRef(params.ref);
3585
+ if (parsedRef.version !== null && refMetadata && parsedRef.version !== refMetadata.version) {
3586
+ return {
3587
+ content: [{ type: "text", text: staleRefGuidance(parsedRef.display, `snapshot version mismatch (have v${refMetadata.version})`) }],
3588
+ details: { error: "ref_stale", ref: parsedRef.display, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
3589
+ isError: true,
3590
+ };
3591
+ }
3592
+
3593
+ const node = currentRefMap[parsedRef.key];
3594
+ if (!node) {
3595
+ return {
3596
+ content: [{ type: "text", text: staleRefGuidance(parsedRef.display, "ref not found") }],
3597
+ details: { error: "ref_not_found", ref: parsedRef.display, metadata: refMetadata },
3598
+ isError: true,
3599
+ };
3600
+ }
3601
+
3602
+ const versionedRef = formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
3603
+ return {
3604
+ content: [{
3605
+ type: "text",
3606
+ text: `${versionedRef}: ${node.role || node.tag}${node.name ? ` "${node.name}"` : ""}\nVisible: ${node.isVisible}\nEnabled: ${node.isEnabled}\nPath: ${node.xpathOrPath}`,
3607
+ }],
3608
+ details: { ref: versionedRef, node, metadata: refMetadata },
3609
+ };
3610
+ },
3611
+ });
3612
+
3613
+ // -------------------------------------------------------------------------
3614
+ // browser_click_ref
3615
+ // -------------------------------------------------------------------------
3616
+ pi.registerTool({
3617
+ name: "browser_click_ref",
3618
+ label: "Browser Click Ref",
3619
+ description: "Click a previously snapshotted element by deterministic versioned ref (e.g. @v3:e2).",
3620
+ parameters: Type.Object({
3621
+ ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e2'." }),
3622
+ }),
3623
+
3624
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3625
+ const parsedRef = parseRef(params.ref);
3626
+ const requestedRef = parsedRef.display;
3627
+ try {
3628
+ const { page: p } = await ensureBrowser();
3629
+ const target = getActiveTarget();
3630
+ if (parsedRef.version === null) {
3631
+ return {
3632
+ content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
3633
+ details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
3634
+ isError: true,
3635
+ };
3636
+ }
3637
+ if (refMetadata && parsedRef.version !== refMetadata.version) {
3638
+ return {
3639
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
3640
+ details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
3641
+ isError: true,
3642
+ };
3643
+ }
3644
+ const ref = parsedRef.key;
3645
+ const node = currentRefMap[ref];
3646
+ if (!node) {
3647
+ return {
3648
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "ref not found") }],
3649
+ details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
3650
+ isError: true,
3651
+ };
3652
+ }
3653
+ if (refMetadata?.url && refMetadata.url !== p.url()) {
3654
+ return {
3655
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "URL changed since snapshot") }],
3656
+ details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
3657
+ isError: true,
3658
+ };
3659
+ }
3660
+
3661
+ const resolved = await resolveRefTarget(target, node);
3662
+ if (!resolved.ok) {
3663
+ const reason = (resolved as { ok: false; reason: string }).reason;
3664
+ return {
3665
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, reason) }],
3666
+ details: { error: "ref_stale", ref: requestedRef, reason },
3667
+ isError: true,
3668
+ };
3669
+ }
3670
+
3671
+ const beforeUrl = p.url();
3672
+ const beforeHash = getUrlHash(beforeUrl);
3673
+ const beforeDialogCount = await countOpenDialogs(target);
3674
+ const beforeTargetState = await captureClickTargetState(target, resolved.selector);
3675
+ await target.locator(resolved.selector).first().click({ timeout: 8000 });
3676
+ const settle = await settleAfterActionAdaptive(p);
3677
+
3678
+ const afterUrl = p.url();
3679
+ const afterHash = getUrlHash(afterUrl);
3680
+ const afterDialogCount = await countOpenDialogs(target);
3681
+ const afterTargetState = await captureClickTargetState(target, resolved.selector);
3682
+ const targetStateChanged =
3683
+ beforeTargetState.exists !== afterTargetState.exists ||
3684
+ beforeTargetState.ariaExpanded !== afterTargetState.ariaExpanded ||
3685
+ beforeTargetState.ariaPressed !== afterTargetState.ariaPressed ||
3686
+ beforeTargetState.ariaSelected !== afterTargetState.ariaSelected ||
3687
+ beforeTargetState.open !== afterTargetState.open;
3688
+ const verification = verificationFromChecks(
3689
+ [
3690
+ { name: "url_changed", passed: afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
3691
+ { name: "hash_changed", passed: afterHash !== beforeHash, value: afterHash, expected: `!= ${beforeHash}` },
3692
+ { name: "target_state_changed", passed: targetStateChanged, value: afterTargetState, expected: beforeTargetState },
3693
+ { name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
3694
+ ],
3695
+ "Ref may now point to an inert element. Refresh refs with browser_snapshot_refs and retry."
3696
+ );
3697
+
3698
+ const summary = await postActionSummary(p, target);
3699
+ const jsErrors = getRecentErrors(p.url());
3700
+ const versionedRef = formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
3701
+ return {
3702
+ content: [{
3703
+ type: "text",
3704
+ text: `Clicked ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""})\n${verificationLine(verification)}${jsErrors}\n\nPage summary:\n${summary}`,
3705
+ }],
3706
+ details: { ref: versionedRef, selector: resolved.selector, url: p.url(), ...settle, ...verification },
3707
+ };
3708
+ } catch (err: any) {
3709
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
3710
+ const reason = firstErrorLine(err);
3711
+ const content: any[] = [
3712
+ { type: "text", text: staleRefGuidance(requestedRef, `action failed: ${reason}`) },
3713
+ { type: "text", text: `Click ref failed: ${err.message}` },
3714
+ ];
3715
+ if (errorShot) {
3716
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
3717
+ }
3718
+ return {
3719
+ content,
3720
+ details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
3721
+ isError: true,
3722
+ };
3723
+ }
3724
+ },
3725
+ });
3726
+
3727
+ // -------------------------------------------------------------------------
3728
+ // browser_hover_ref
3729
+ // -------------------------------------------------------------------------
3730
+ pi.registerTool({
3731
+ name: "browser_hover_ref",
3732
+ label: "Browser Hover Ref",
3733
+ description: "Hover a previously snapshotted element by deterministic versioned ref (e.g. @v3:e4).",
3734
+ parameters: Type.Object({
3735
+ ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e4'." }),
3736
+ }),
3737
+
3738
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3739
+ const parsedRef = parseRef(params.ref);
3740
+ const requestedRef = parsedRef.display;
3741
+ try {
3742
+ const { page: p } = await ensureBrowser();
3743
+ const target = getActiveTarget();
3744
+ if (parsedRef.version === null) {
3745
+ return {
3746
+ content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
3747
+ details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
3748
+ isError: true,
3749
+ };
3750
+ }
3751
+ if (refMetadata && parsedRef.version !== refMetadata.version) {
3752
+ return {
3753
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
3754
+ details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
3755
+ isError: true,
3756
+ };
3757
+ }
3758
+ const ref = parsedRef.key;
3759
+ const node = currentRefMap[ref];
3760
+ if (!node) {
3761
+ return {
3762
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "ref not found") }],
3763
+ details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
3764
+ isError: true,
3765
+ };
3766
+ }
3767
+ if (refMetadata?.url && refMetadata.url !== p.url()) {
3768
+ return {
3769
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "URL changed since snapshot") }],
3770
+ details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
3771
+ isError: true,
3772
+ };
3773
+ }
3774
+
3775
+ const resolved = await resolveRefTarget(target, node);
3776
+ if (!resolved.ok) {
3777
+ const reason = (resolved as { ok: false; reason: string }).reason;
3778
+ return {
3779
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, reason) }],
3780
+ details: { error: "ref_stale", ref: requestedRef, reason },
3781
+ isError: true,
3782
+ };
3783
+ }
3784
+
3785
+ await target.locator(resolved.selector).first().hover({ timeout: 8000 });
3786
+ const settle = await settleAfterActionAdaptive(p);
3787
+
3788
+ const summary = await postActionSummary(p, target);
3789
+ const jsErrors = getRecentErrors(p.url());
3790
+ const versionedRef = formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
3791
+ return {
3792
+ content: [{
3793
+ type: "text",
3794
+ text: `Hovered ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""})${jsErrors}\n\nPage summary:\n${summary}`,
3795
+ }],
3796
+ details: { ref: versionedRef, selector: resolved.selector, url: p.url(), ...settle },
3797
+ };
3798
+ } catch (err: any) {
3799
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
3800
+ const reason = firstErrorLine(err);
3801
+ const content: any[] = [
3802
+ { type: "text", text: staleRefGuidance(requestedRef, `action failed: ${reason}`) },
3803
+ { type: "text", text: `Hover ref failed: ${err.message}` },
3804
+ ];
3805
+ if (errorShot) {
3806
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
3807
+ }
3808
+ return {
3809
+ content,
3810
+ details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
3811
+ isError: true,
3812
+ };
3813
+ }
3814
+ },
3815
+ });
3816
+
3817
+ // -------------------------------------------------------------------------
3818
+ // browser_fill_ref
3819
+ // -------------------------------------------------------------------------
3820
+ pi.registerTool({
3821
+ name: "browser_fill_ref",
3822
+ label: "Browser Fill Ref",
3823
+ description: "Fill/type text into an input-like element by deterministic versioned ref (e.g. @v3:e1).",
3824
+ parameters: Type.Object({
3825
+ ref: Type.String({ description: "Reference id in versioned format, e.g. '@v3:e1'." }),
3826
+ text: Type.String({ description: "Text to enter." }),
3827
+ clearFirst: Type.Optional(
3828
+ Type.Boolean({ description: "Clear existing value first (default: false)." })
3829
+ ),
3830
+ submit: Type.Optional(
3831
+ Type.Boolean({ description: "Press Enter after typing (default: false)." })
3832
+ ),
3833
+ slowly: Type.Optional(
3834
+ Type.Boolean({ description: "Type character-by-character (default: false)." })
3835
+ ),
3836
+ }),
3837
+
3838
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3839
+ const parsedRef = parseRef(params.ref);
3840
+ const requestedRef = parsedRef.display;
3841
+ try {
3842
+ const { page: p } = await ensureBrowser();
3843
+ const target = getActiveTarget();
3844
+ if (parsedRef.version === null) {
3845
+ return {
3846
+ content: [{ type: "text", text: `Unversioned ref ${requestedRef} is ambiguous. Use a versioned ref (e.g. @v${refMetadata?.version ?? refVersion}:e1) from browser_snapshot_refs.` }],
3847
+ details: { error: "ref_unversioned", ref: requestedRef, metadata: refMetadata },
3848
+ isError: true,
3849
+ };
3850
+ }
3851
+ if (refMetadata && parsedRef.version !== refMetadata.version) {
3852
+ return {
3853
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, `snapshot version mismatch (have v${refMetadata.version})`) }],
3854
+ details: { error: "ref_stale", ref: requestedRef, expectedVersion: refMetadata.version, receivedVersion: parsedRef.version },
3855
+ isError: true,
3856
+ };
3857
+ }
3858
+ const ref = parsedRef.key;
3859
+ const node = currentRefMap[ref];
3860
+ if (!node) {
3861
+ return {
3862
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "ref not found") }],
3863
+ details: { error: "ref_not_found", ref: requestedRef, metadata: refMetadata },
3864
+ isError: true,
3865
+ };
3866
+ }
3867
+ if (refMetadata?.url && refMetadata.url !== p.url()) {
3868
+ return {
3869
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, "URL changed since snapshot") }],
3870
+ details: { error: "ref_stale", ref: requestedRef, snapshotUrl: refMetadata.url, currentUrl: p.url() },
3871
+ isError: true,
3872
+ };
3873
+ }
3874
+
3875
+ const resolved = await resolveRefTarget(target, node);
3876
+ if (!resolved.ok) {
3877
+ const reason = (resolved as { ok: false; reason: string }).reason;
3878
+ return {
3879
+ content: [{ type: "text", text: staleRefGuidance(requestedRef, reason) }],
3880
+ details: { error: "ref_stale", ref: requestedRef, reason },
3881
+ isError: true,
3882
+ };
3883
+ }
3884
+
3885
+ const locator = target.locator(resolved.selector).first();
3886
+ const beforeUrl = p.url();
3887
+ if (params.slowly) {
3888
+ await locator.click({ timeout: 8000 });
3889
+ if (params.clearFirst) {
3890
+ await p.keyboard.press("Control+A");
3891
+ await p.keyboard.press("Delete");
3892
+ }
3893
+ await p.keyboard.type(params.text);
3894
+ } else {
3895
+ if (params.clearFirst) {
3896
+ await locator.fill("");
3897
+ }
3898
+ await locator.fill(params.text, { timeout: 8000 });
3899
+ }
3900
+ if (params.submit) {
3901
+ await p.keyboard.press("Enter");
3902
+ }
3903
+ const settle = await settleAfterActionAdaptive(p);
3904
+
3905
+ const filledValue = await readInputLikeValue(target, resolved.selector);
3906
+ const afterUrl = p.url();
3907
+ const verification = verificationFromChecks(
3908
+ [
3909
+ { name: "value_equals_expected", passed: filledValue === params.text, value: filledValue, expected: params.text },
3910
+ { name: "value_contains_expected", passed: typeof filledValue === "string" && filledValue.includes(params.text), value: filledValue, expected: params.text },
3911
+ { name: "url_changed_after_submit", passed: !!params.submit && afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
3912
+ ],
3913
+ "Try refreshing refs and confirm this ref still targets an input-like element."
3914
+ );
3915
+
3916
+ const summary = await postActionSummary(p, target);
3917
+ const jsErrors = getRecentErrors(p.url());
3918
+ const versionedRef = formatVersionedRef(refMetadata?.version ?? refVersion, node.ref);
3919
+ return {
3920
+ content: [{
3921
+ type: "text",
3922
+ text: `Filled ${versionedRef} (${node.role || node.tag}${node.name ? ` "${node.name}"` : ""}) with "${params.text}"\n${verificationLine(verification)}${jsErrors}\n\nPage summary:\n${summary}`,
3923
+ }],
3924
+ details: { ref: versionedRef, selector: resolved.selector, url: p.url(), filledValue, ...settle, ...verification },
3925
+ };
3926
+ } catch (err: any) {
3927
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
3928
+ const reason = firstErrorLine(err);
3929
+ const content: any[] = [
3930
+ { type: "text", text: staleRefGuidance(requestedRef, `action failed: ${reason}`) },
3931
+ { type: "text", text: `Fill ref failed: ${err.message}` },
3932
+ ];
3933
+ if (errorShot) {
3934
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
3935
+ }
3936
+ return {
3937
+ content,
3938
+ details: { error: err.message, ref: requestedRef, hint: "Run browser_snapshot_refs to refresh refs." },
3939
+ isError: true,
3940
+ };
3941
+ }
3942
+ },
3943
+ });
3944
+
3945
+ // -------------------------------------------------------------------------
3946
+ // browser_wait_for
3947
+ // -------------------------------------------------------------------------
3948
+ pi.registerTool({
3949
+ name: "browser_wait_for",
3950
+ label: "Browser Wait For",
3951
+ description:
3952
+ "Wait for a condition before continuing. Use after actions that trigger async updates — data fetches, route changes, animations, loading spinners. Choose the appropriate condition: 'selector_visible' waits for an element to appear, 'selector_hidden' waits for it to disappear, 'url_contains' waits for the URL to match, 'network_idle' waits for all network requests to finish, 'delay' waits a fixed number of milliseconds, 'text_visible' waits for text to appear in the page body, 'text_hidden' waits for text to disappear from the page body, 'request_completed' waits for a network response whose URL contains the given substring, 'console_message' waits for a console log message containing the given substring, 'element_count' waits for the number of elements matching the CSS selector in 'value' to satisfy the 'threshold' expression (e.g. '>=3', '==0', '<5'), 'region_stable' waits for the DOM region matching the CSS selector in 'value' to stop changing.",
3953
+ parameters: Type.Object({
3954
+ condition: StringEnum([
3955
+ "selector_visible",
3956
+ "selector_hidden",
3957
+ "url_contains",
3958
+ "network_idle",
3959
+ "delay",
3960
+ "text_visible",
3961
+ "text_hidden",
3962
+ "request_completed",
3963
+ "console_message",
3964
+ "element_count",
3965
+ "region_stable",
3966
+ ] as const),
3967
+ value: Type.Optional(
3968
+ Type.String({
3969
+ description:
3970
+ "For selector_visible/selector_hidden/element_count/region_stable: CSS selector. For url_contains/request_completed: URL substring. For text_visible/text_hidden/console_message: text substring. For delay: milliseconds as a string (e.g. '1000'). Not used for network_idle.",
3971
+ })
3972
+ ),
3973
+ threshold: Type.Optional(
3974
+ Type.String({
3975
+ description:
3976
+ "Threshold expression for element_count (e.g. '>=3', '==0', '<5', or bare '3' which defaults to >=). Only used with element_count condition.",
3977
+ })
3978
+ ),
3979
+ timeout: Type.Optional(
3980
+ Type.Number({
3981
+ description: "Maximum milliseconds to wait before failing (default: 10000)",
3982
+ })
3983
+ ),
3984
+ }),
3985
+
3986
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
3987
+ try {
3988
+ const { page: p } = await ensureBrowser();
3989
+ const target = getActiveTarget();
3990
+ const timeout = params.timeout ?? 10000;
3991
+
3992
+ // Validate params for all conditions using shared validator
3993
+ const validation = validateWaitParams({ condition: params.condition, value: params.value, threshold: (params as any).threshold });
3994
+ if (validation) {
3995
+ return {
3996
+ content: [{ type: "text", text: validation.error }],
3997
+ details: { error: validation.error, condition: params.condition },
3998
+ isError: true,
3999
+ };
4000
+ }
4001
+
4002
+ switch (params.condition) {
4003
+ case "selector_visible": {
4004
+ if (!params.value) {
4005
+ return {
4006
+ content: [{ type: "text", text: "selector_visible requires a value (CSS selector)" }],
4007
+ details: {},
4008
+ isError: true,
4009
+ };
4010
+ }
4011
+ await target.waitForSelector(params.value, { state: "visible", timeout });
4012
+ return {
4013
+ content: [{ type: "text", text: `Element "${params.value}" is now visible` }],
4014
+ details: { condition: params.condition, value: params.value },
4015
+ };
4016
+ }
4017
+
4018
+ case "selector_hidden": {
4019
+ if (!params.value) {
4020
+ return {
4021
+ content: [{ type: "text", text: "selector_hidden requires a value (CSS selector)" }],
4022
+ details: {},
4023
+ isError: true,
4024
+ };
4025
+ }
4026
+ await target.waitForSelector(params.value, { state: "hidden", timeout });
4027
+ return {
4028
+ content: [{ type: "text", text: `Element "${params.value}" is now hidden` }],
4029
+ details: { condition: params.condition, value: params.value },
4030
+ };
4031
+ }
4032
+
4033
+ case "url_contains": {
4034
+ if (!params.value) {
4035
+ return {
4036
+ content: [{ type: "text", text: "url_contains requires a value (URL substring)" }],
4037
+ details: {},
4038
+ isError: true,
4039
+ };
4040
+ }
4041
+ await p.waitForURL((url) => url.toString().includes(params.value!), { timeout });
4042
+ return {
4043
+ content: [{ type: "text", text: `URL now contains "${params.value}". Current URL: ${p.url()}` }],
4044
+ details: { condition: params.condition, value: params.value, url: p.url() },
4045
+ };
4046
+ }
4047
+
4048
+ case "network_idle": {
4049
+ await p.waitForLoadState("networkidle", { timeout });
4050
+ return {
4051
+ content: [{ type: "text", text: "Network is idle" }],
4052
+ details: { condition: params.condition },
4053
+ };
4054
+ }
4055
+
4056
+ case "delay": {
4057
+ const ms = parseInt(params.value ?? "1000", 10);
4058
+ if (isNaN(ms)) {
4059
+ return {
4060
+ content: [{ type: "text", text: "delay requires a numeric value (milliseconds)" }],
4061
+ details: {},
4062
+ isError: true,
4063
+ };
4064
+ }
4065
+ await new Promise((resolve) => setTimeout(resolve, ms));
4066
+ return {
4067
+ content: [{ type: "text", text: `Waited ${ms}ms` }],
4068
+ details: { condition: params.condition, ms },
4069
+ };
4070
+ }
4071
+
4072
+ case "text_visible": {
4073
+ await target.waitForFunction(
4074
+ (needle: string) => {
4075
+ const body = document.body?.innerText ?? "";
4076
+ return body.toLowerCase().includes(needle.toLowerCase());
4077
+ },
4078
+ params.value!,
4079
+ { timeout }
4080
+ );
4081
+ return {
4082
+ content: [{ type: "text", text: `Text "${params.value}" is now visible on the page` }],
4083
+ details: { condition: params.condition, value: params.value },
4084
+ };
4085
+ }
4086
+
4087
+ case "text_hidden": {
4088
+ await target.waitForFunction(
4089
+ (needle: string) => {
4090
+ const body = document.body?.innerText ?? "";
4091
+ return !body.toLowerCase().includes(needle.toLowerCase());
4092
+ },
4093
+ params.value!,
4094
+ { timeout }
4095
+ );
4096
+ return {
4097
+ content: [{ type: "text", text: `Text "${params.value}" is no longer visible on the page` }],
4098
+ details: { condition: params.condition, value: params.value },
4099
+ };
4100
+ }
4101
+
4102
+ case "request_completed": {
4103
+ // waitForResponse is Page-only (not available on Frame)
4104
+ const response = await getActivePage().waitForResponse(
4105
+ (resp) => resp.url().includes(params.value!),
4106
+ { timeout }
4107
+ );
4108
+ return {
4109
+ content: [{ type: "text", text: `Request completed: ${response.url()} (status ${response.status()})` }],
4110
+ details: { condition: params.condition, value: params.value, url: response.url(), status: response.status() },
4111
+ };
4112
+ }
4113
+
4114
+ case "console_message": {
4115
+ // Poll consoleLogs array — no Playwright built-in for this
4116
+ const needle = params.value!;
4117
+ const startTime = Date.now();
4118
+ while (Date.now() - startTime < timeout) {
4119
+ const match = consoleLogs.find((entry) => includesNeedle(entry.text, needle));
4120
+ if (match) {
4121
+ return {
4122
+ content: [{ type: "text", text: `Console message matching "${needle}" found: "${match.text}"` }],
4123
+ details: { condition: params.condition, value: needle, matchedText: match.text, matchedType: match.type },
4124
+ };
4125
+ }
4126
+ await new Promise((resolve) => setTimeout(resolve, 100));
4127
+ }
4128
+ throw new Error(`Timed out waiting for console message matching "${needle}" (${timeout}ms)`);
4129
+ }
4130
+
4131
+ case "element_count": {
4132
+ const threshold = parseThreshold((params as any).threshold ?? ">=1");
4133
+ if (!threshold) {
4134
+ return {
4135
+ content: [{ type: "text", text: `element_count threshold is malformed: "${(params as any).threshold}"` }],
4136
+ details: { error: "malformed threshold", condition: params.condition },
4137
+ isError: true,
4138
+ };
4139
+ }
4140
+ const selector = params.value!;
4141
+ const op = threshold.op;
4142
+ const n = threshold.n;
4143
+ await target.waitForFunction(
4144
+ ({ selector, op, n }: { selector: string; op: string; n: number }) => {
4145
+ const count = document.querySelectorAll(selector).length;
4146
+ switch (op) {
4147
+ case ">=": return count >= n;
4148
+ case "<=": return count <= n;
4149
+ case "==": return count === n;
4150
+ case ">": return count > n;
4151
+ case "<": return count < n;
4152
+ default: return false;
4153
+ }
4154
+ },
4155
+ { selector, op, n },
4156
+ { timeout }
4157
+ );
4158
+ return {
4159
+ content: [{ type: "text", text: `Element count for "${selector}" satisfies ${op}${n}` }],
4160
+ details: { condition: params.condition, value: selector, threshold: `${op}${n}` },
4161
+ };
4162
+ }
4163
+
4164
+ case "region_stable": {
4165
+ const script = createRegionStableScript(params.value!);
4166
+ await target.waitForFunction(script, undefined, { timeout, polling: 200 });
4167
+ return {
4168
+ content: [{ type: "text", text: `Region "${params.value}" is now stable` }],
4169
+ details: { condition: params.condition, value: params.value },
4170
+ };
4171
+ }
4172
+ }
4173
+ } catch (err: any) {
4174
+ return {
4175
+ content: [{ type: "text", text: `Wait failed: ${err.message}` }],
4176
+ details: { error: err.message, condition: params.condition, value: params.value },
4177
+ isError: true,
4178
+ };
4179
+ }
4180
+ },
4181
+ });
4182
+
4183
+ // -------------------------------------------------------------------------
4184
+ // browser_hover
4185
+ // -------------------------------------------------------------------------
4186
+ pi.registerTool({
4187
+ name: "browser_hover",
4188
+ label: "Browser Hover",
4189
+ description:
4190
+ "Move the mouse over an element to trigger hover states — reveals tooltips, dropdown menus, CSS :hover effects, and other hover-dependent UI. Returns a compact page summary showing the resulting hover state.",
4191
+ parameters: Type.Object({
4192
+ selector: Type.String({
4193
+ description: "CSS selector of the element to hover over",
4194
+ }),
4195
+ }),
4196
+
4197
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4198
+ try {
4199
+ const { page: p } = await ensureBrowser();
4200
+ const target = getActiveTarget();
4201
+ await target.locator(params.selector).first().hover({ timeout: 10000 });
4202
+ const settle = await settleAfterActionAdaptive(p);
4203
+
4204
+ const summary = await postActionSummary(p, target);
4205
+ const jsErrors = getRecentErrors(p.url());
4206
+
4207
+ return {
4208
+ content: [{ type: "text", text: `Hovering over "${params.selector}"${jsErrors}\n\nPage summary:\n${summary}` }],
4209
+ details: { selector: params.selector, ...settle },
4210
+ };
4211
+ } catch (err: any) {
4212
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
4213
+ const content: any[] = [{ type: "text", text: `Hover failed: ${err.message}` }];
4214
+ if (errorShot) {
4215
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
4216
+ }
4217
+ return {
4218
+ content,
4219
+ details: { error: err.message },
4220
+ isError: true,
4221
+ };
4222
+ }
4223
+ },
4224
+ });
4225
+
4226
+ // -------------------------------------------------------------------------
4227
+ // browser_key_press
4228
+ // -------------------------------------------------------------------------
4229
+ pi.registerTool({
4230
+ name: "browser_key_press",
4231
+ label: "Browser Key Press",
4232
+ description:
4233
+ "Press a keyboard key or key combination. Returns a compact page summary plus lightweight verification details after the key press. Use for: submitting forms (Enter), closing modals (Escape), navigating focusable elements (Tab / Shift+Tab), operating dropdowns and menus (ArrowDown, ArrowUp, Space), copying/pasting (Meta+C, Meta+V). Key names follow the DOM KeyboardEvent key convention.",
4234
+ parameters: Type.Object({
4235
+ key: Type.String({
4236
+ description:
4237
+ "Key or combination to press, e.g. 'Enter', 'Escape', 'Tab', 'ArrowDown', 'ArrowUp', 'Space', 'Meta+A', 'Shift+Tab', 'Control+Enter'",
4238
+ }),
4239
+ }),
4240
+
4241
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4242
+ let actionId: number | null = null;
4243
+ let beforeState: CompactPageState | null = null;
4244
+ try {
4245
+ const { page: p } = await ensureBrowser();
4246
+ const target = getActiveTarget();
4247
+ beforeState = await captureCompactPageState(p, { includeBodyText: true, target });
4248
+ actionId = beginTrackedAction("browser_key_press", params, beforeState.url).id;
4249
+ const beforeUrl = p.url();
4250
+ const beforeFocus = await readFocusedDescriptor(target);
4251
+ const beforeDialogCount = await countOpenDialogs(target);
4252
+
4253
+ await p.keyboard.press(params.key);
4254
+ const settle = await settleAfterActionAdaptive(p, { checkFocusStability: true });
4255
+
4256
+ const afterUrl = p.url();
4257
+ const afterFocus = await readFocusedDescriptor(target);
4258
+ const afterDialogCount = await countOpenDialogs(target);
4259
+ const verification = verificationFromChecks(
4260
+ [
4261
+ { name: "url_changed", passed: afterUrl !== beforeUrl, value: afterUrl, expected: `!= ${beforeUrl}` },
4262
+ { name: "focus_changed", passed: afterFocus !== beforeFocus, value: afterFocus, expected: `!= ${beforeFocus}` },
4263
+ { name: "dialog_open", passed: afterDialogCount > beforeDialogCount, value: afterDialogCount, expected: `> ${beforeDialogCount}` },
4264
+ ],
4265
+ "If this key should trigger UI changes, confirm focus is on the intended element first."
4266
+ );
4267
+
4268
+ const summary = await postActionSummary(p, target);
4269
+ const jsErrors = getRecentErrors(p.url());
4270
+ const afterState = await captureCompactPageState(p, { includeBodyText: true, target });
4271
+ const diff = diffCompactStates(beforeState!, afterState);
4272
+ lastActionBeforeState = beforeState!;
4273
+ lastActionAfterState = afterState;
4274
+ finishTrackedAction(actionId!, {
4275
+ status: "success",
4276
+ afterUrl: afterState.url,
4277
+ verificationSummary: verification.verificationSummary,
4278
+ warningSummary: jsErrors.trim() || undefined,
4279
+ diffSummary: diff.summary,
4280
+ changed: diff.changed,
4281
+ beforeState: beforeState!,
4282
+ afterState,
4283
+ });
4284
+
4285
+ return {
4286
+ content: [{ type: "text", text: `Pressed "${params.key}"\nAction: ${actionId}\n${verificationLine(verification)}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}` }],
4287
+ details: { key: params.key, beforeFocus, afterFocus, actionId, diff, ...settle, ...verification },
4288
+ };
4289
+ } catch (err: any) {
4290
+ if (actionId !== null) {
4291
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
4292
+ }
4293
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
4294
+ const content: any[] = [{ type: "text", text: `Key press failed: ${err.message}` }];
4295
+ if (errorShot) {
4296
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
4297
+ }
4298
+ return {
4299
+ content,
4300
+ details: { error: err.message },
4301
+ isError: true,
4302
+ };
4303
+ }
4304
+ },
4305
+ });
4306
+
4307
+ // -------------------------------------------------------------------------
4308
+ // browser_select_option
4309
+ // -------------------------------------------------------------------------
4310
+ pi.registerTool({
4311
+ name: "browser_select_option",
4312
+ label: "Browser Select Option",
4313
+ description:
4314
+ "Select an option from a <select> dropdown element by its visible label or value. Returns a compact page summary plus lightweight verification details. For custom-built dropdowns use browser_click to open them then browser_click to pick the option.",
4315
+ parameters: Type.Object({
4316
+ selector: Type.String({
4317
+ description: "CSS selector targeting the <select> element",
4318
+ }),
4319
+ option: Type.String({
4320
+ description:
4321
+ "The option to select — can be the visible label text or the value attribute. Will try label first, then value.",
4322
+ }),
4323
+ }),
4324
+
4325
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4326
+ let actionId: number | null = null;
4327
+ let beforeState: CompactPageState | null = null;
4328
+ try {
4329
+ const { page: p } = await ensureBrowser();
4330
+ const target = getActiveTarget();
4331
+ beforeState = await captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
4332
+ actionId = beginTrackedAction("browser_select_option", params, beforeState.url).id;
4333
+
4334
+ let selected: string[];
4335
+ try {
4336
+ selected = await target.selectOption(params.selector, { label: params.option }, { timeout: 5000 });
4337
+ } catch {
4338
+ selected = await target.selectOption(params.selector, { value: params.option }, { timeout: 5000 });
4339
+ }
4340
+
4341
+ const settle = await settleAfterActionAdaptive(p);
4342
+
4343
+ const selectedState = await target.locator(params.selector).first().evaluate((el) => {
4344
+ if (!(el instanceof HTMLSelectElement)) {
4345
+ return { selectedValues: [] as string[], selectedLabels: [] as string[] };
4346
+ }
4347
+ const selectedOptions = Array.from(el.selectedOptions || []);
4348
+ return {
4349
+ selectedValues: selectedOptions.map((opt) => opt.value),
4350
+ selectedLabels: selectedOptions.map((opt) => (opt.textContent || "").trim()),
4351
+ };
4352
+ });
4353
+ const optionNeedle = params.option.toLowerCase();
4354
+ const verification = verificationFromChecks(
4355
+ [
4356
+ { name: "selected_values_include_option", passed: selectedState.selectedValues.includes(params.option), value: selectedState.selectedValues, expected: params.option },
4357
+ { name: "selected_labels_include_option", passed: selectedState.selectedLabels.some((label) => label.toLowerCase().includes(optionNeedle)), value: selectedState.selectedLabels, expected: params.option },
4358
+ ],
4359
+ "Confirm whether the target select uses option label or value, then retry with that exact text."
4360
+ );
4361
+
4362
+ const summary = await postActionSummary(p, target);
4363
+ const jsErrors = getRecentErrors(p.url());
4364
+ const afterState = await captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
4365
+ const diff = diffCompactStates(beforeState!, afterState);
4366
+ lastActionBeforeState = beforeState!;
4367
+ lastActionAfterState = afterState;
4368
+ finishTrackedAction(actionId!, {
4369
+ status: "success",
4370
+ afterUrl: afterState.url,
4371
+ verificationSummary: verification.verificationSummary,
4372
+ warningSummary: jsErrors.trim() || undefined,
4373
+ diffSummary: diff.summary,
4374
+ changed: diff.changed,
4375
+ beforeState: beforeState!,
4376
+ afterState,
4377
+ });
4378
+
4379
+ return {
4380
+ content: [
4381
+ {
4382
+ type: "text",
4383
+ text: `Selected "${params.option}" in "${params.selector}". Values: ${selected.join(", ")}\nAction: ${actionId}\n${verificationLine(verification)}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}`,
4384
+ },
4385
+ ],
4386
+ details: { selector: params.selector, option: params.option, selected, selectedState, actionId, diff, ...settle, ...verification },
4387
+ };
4388
+ } catch (err: any) {
4389
+ if (actionId !== null) {
4390
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
4391
+ }
4392
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
4393
+ const content: any[] = [{ type: "text", text: `Select option failed: ${err.message}` }];
4394
+ if (errorShot) {
4395
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
4396
+ }
4397
+ return {
4398
+ content,
4399
+ details: { error: err.message },
4400
+ isError: true,
4401
+ };
4402
+ }
4403
+ },
4404
+ });
4405
+
4406
+ // -------------------------------------------------------------------------
4407
+ // browser_set_checked
4408
+ // -------------------------------------------------------------------------
4409
+ pi.registerTool({
4410
+ name: "browser_set_checked",
4411
+ label: "Browser Set Checked",
4412
+ description:
4413
+ "Check or uncheck a checkbox or radio button. More reliable than clicking for form elements where you need a specific state.",
4414
+ parameters: Type.Object({
4415
+ selector: Type.String({
4416
+ description: "CSS selector targeting the checkbox or radio input",
4417
+ }),
4418
+ checked: Type.Boolean({
4419
+ description: "true to check, false to uncheck",
4420
+ }),
4421
+ }),
4422
+
4423
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4424
+ let actionId: number | null = null;
4425
+ let beforeState: CompactPageState | null = null;
4426
+ try {
4427
+ const { page: p } = await ensureBrowser();
4428
+ const target = getActiveTarget();
4429
+ beforeState = await captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
4430
+ actionId = beginTrackedAction("browser_set_checked", params, beforeState.url).id;
4431
+ await target.locator(params.selector).first().setChecked(params.checked, { timeout: 10000 });
4432
+ const settle = await settleAfterActionAdaptive(p);
4433
+
4434
+ const actualChecked = await target.locator(params.selector).first().isChecked().catch(() => null);
4435
+ const verification = verificationFromChecks(
4436
+ [
4437
+ { name: "checked_state_matches", passed: actualChecked === params.checked, value: actualChecked, expected: params.checked },
4438
+ ],
4439
+ "Ensure selector points to a checkbox/radio input and retry."
4440
+ );
4441
+
4442
+ const state = params.checked ? "checked" : "unchecked";
4443
+ const summary = await postActionSummary(p, target);
4444
+ const jsErrors = getRecentErrors(p.url());
4445
+ const afterState = await captureCompactPageState(p, { selectors: [params.selector], includeBodyText: true, target });
4446
+ const diff = diffCompactStates(beforeState!, afterState);
4447
+ lastActionBeforeState = beforeState!;
4448
+ lastActionAfterState = afterState;
4449
+ finishTrackedAction(actionId!, {
4450
+ status: "success",
4451
+ afterUrl: afterState.url,
4452
+ verificationSummary: verification.verificationSummary,
4453
+ warningSummary: jsErrors.trim() || undefined,
4454
+ diffSummary: diff.summary,
4455
+ changed: diff.changed,
4456
+ beforeState: beforeState!,
4457
+ afterState,
4458
+ });
4459
+
4460
+ return {
4461
+ content: [{
4462
+ type: "text",
4463
+ text: `Set "${params.selector}" to ${state}\nAction: ${actionId}\n${verificationLine(verification)}${jsErrors}\n\nDiff:\n${formatDiffText(diff)}\n\nPage summary:\n${summary}`,
4464
+ }],
4465
+ details: { selector: params.selector, checked: params.checked, actualChecked, actionId, diff, ...settle, ...verification },
4466
+ };
4467
+ } catch (err: any) {
4468
+ if (actionId !== null) {
4469
+ finishTrackedAction(actionId, { status: "error", afterUrl: getActivePageOrNull()?.url() ?? "", error: err.message, beforeState: beforeState ?? undefined });
4470
+ }
4471
+ const errorShot = await captureErrorScreenshot(getActivePageOrNull());
4472
+ const content: any[] = [{ type: "text", text: `Set checked failed: ${err.message}` }];
4473
+ if (errorShot) {
4474
+ content.push({ type: "image", data: errorShot.data, mimeType: errorShot.mimeType });
4475
+ }
4476
+ return { content, details: { error: err.message }, isError: true };
4477
+ }
4478
+ },
4479
+ });
4480
+
4481
+ // -------------------------------------------------------------------------
4482
+ // browser_set_viewport
4483
+ // -------------------------------------------------------------------------
4484
+ pi.registerTool({
4485
+ name: "browser_set_viewport",
4486
+ label: "Browser Set Viewport",
4487
+ description:
4488
+ "Resize the browser viewport to test responsive layouts at different screen sizes. Use presets for common breakpoints or specify exact pixel dimensions. Essential for verifying mobile/tablet/desktop layouts.",
4489
+ parameters: Type.Object({
4490
+ preset: Type.Optional(
4491
+ StringEnum(["mobile", "tablet", "desktop", "wide"] as const)
4492
+ // mobile: 390×844 (iPhone 14), tablet: 768×1024 (iPad), desktop: 1280×800, wide: 1920×1080
4493
+ ),
4494
+ width: Type.Optional(
4495
+ Type.Number({ description: "Custom viewport width in pixels (requires height too)" })
4496
+ ),
4497
+ height: Type.Optional(
4498
+ Type.Number({ description: "Custom viewport height in pixels (requires width too)" })
4499
+ ),
4500
+ }),
4501
+
4502
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4503
+ try {
4504
+ const { page: p } = await ensureBrowser();
4505
+
4506
+ let width: number;
4507
+ let height: number;
4508
+ let label: string;
4509
+
4510
+ if (params.preset) {
4511
+ switch (params.preset) {
4512
+ case "mobile":
4513
+ width = 390;
4514
+ height = 844;
4515
+ label = "mobile (390×844)";
4516
+ break;
4517
+ case "tablet":
4518
+ width = 768;
4519
+ height = 1024;
4520
+ label = "tablet (768×1024)";
4521
+ break;
4522
+ case "desktop":
4523
+ width = 1280;
4524
+ height = 800;
4525
+ label = "desktop (1280×800)";
4526
+ break;
4527
+ case "wide":
4528
+ width = 1920;
4529
+ height = 1080;
4530
+ label = "wide (1920×1080)";
4531
+ break;
4532
+ }
4533
+ } else if (params.width !== undefined && params.height !== undefined) {
4534
+ width = params.width;
4535
+ height = params.height;
4536
+ label = `custom (${width}×${height})`;
4537
+ } else {
4538
+ return {
4539
+ content: [
4540
+ {
4541
+ type: "text",
4542
+ text: "Provide either a preset (mobile/tablet/desktop/wide) or both width and height.",
4543
+ },
4544
+ ],
4545
+ details: {},
4546
+ isError: true,
4547
+ };
4548
+ }
4549
+
4550
+ await p.setViewportSize({ width, height });
4551
+
4552
+ return {
4553
+ content: [{ type: "text", text: `Viewport set to ${label}` }],
4554
+ details: { width, height, label },
4555
+ };
4556
+ } catch (err: any) {
4557
+ return {
4558
+ content: [{ type: "text", text: `Set viewport failed: ${err.message}` }],
4559
+ details: { error: err.message },
4560
+ isError: true,
4561
+ };
4562
+ }
4563
+ },
4564
+ });
4565
+
4566
+ // -------------------------------------------------------------------------
4567
+ // browser_get_page_source
4568
+ // -------------------------------------------------------------------------
4569
+ pi.registerTool({
4570
+ name: "browser_get_page_source",
4571
+ label: "Browser Page Source",
4572
+ description:
4573
+ "Get the current HTML source of the page (or a specific element). Use when you need to inspect the actual DOM structure — verify semantic HTML, check that elements rendered correctly, debug why a selector isn't matching, or audit accessibility markup. Output is truncated for large pages.",
4574
+ parameters: Type.Object({
4575
+ selector: Type.Optional(
4576
+ Type.String({
4577
+ description:
4578
+ "CSS selector to scope the output to a specific element (e.g. 'main', 'form', '#app'). If omitted, returns the full page HTML.",
4579
+ })
4580
+ ),
4581
+ }),
4582
+
4583
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4584
+ try {
4585
+ await ensureBrowser();
4586
+ const target = getActiveTarget();
4587
+
4588
+ let html: string;
4589
+ if (params.selector) {
4590
+ html = await target.locator(params.selector).first().evaluate((el: Element) => el.outerHTML);
4591
+ } else {
4592
+ html = await target.content();
4593
+ }
4594
+
4595
+ const truncated = truncateText(html);
4596
+ const scope = params.selector ? `element "${params.selector}"` : "full page";
4597
+
4598
+ return {
4599
+ content: [
4600
+ {
4601
+ type: "text",
4602
+ text: `HTML source of ${scope}:\n\n${truncated}`,
4603
+ },
4604
+ ],
4605
+ details: { scope },
4606
+ };
4607
+ } catch (err: any) {
4608
+ return {
4609
+ content: [
4610
+ {
4611
+ type: "text",
4612
+ text: `Get page source failed: ${err.message}`,
4613
+ },
4614
+ ],
4615
+ details: { error: err.message },
4616
+ isError: true,
4617
+ };
4618
+ }
4619
+ },
4620
+ });
4621
+
4622
+ // -------------------------------------------------------------------------
4623
+ // browser_list_pages
4624
+ // -------------------------------------------------------------------------
4625
+ pi.registerTool({
4626
+ name: "browser_list_pages",
4627
+ label: "Browser List Pages",
4628
+ description:
4629
+ "List all open browser pages/tabs with their IDs, titles, URLs, and active status. Use to see what pages are available before switching.",
4630
+ parameters: Type.Object({}),
4631
+
4632
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
4633
+ try {
4634
+ await ensureBrowser();
4635
+ // Update titles/URLs from live pages before listing
4636
+ for (const entry of pageRegistry.pages) {
4637
+ try {
4638
+ entry.title = await entry.page.title();
4639
+ entry.url = entry.page.url();
4640
+ } catch {
4641
+ // Page may have been closed
4642
+ }
4643
+ }
4644
+ const pages = registryListPages(pageRegistry);
4645
+ if (pages.length === 0) {
4646
+ return {
4647
+ content: [{ type: "text", text: "No pages open." }],
4648
+ details: { pages: [], count: 0 },
4649
+ };
4650
+ }
4651
+ const lines = pages.map((p: any) => {
4652
+ const active = p.isActive ? " ← active" : "";
4653
+ const opener = p.opener !== null ? ` (opener: ${p.opener})` : "";
4654
+ return ` [${p.id}] ${p.title || "(untitled)"} — ${p.url}${opener}${active}`;
4655
+ });
4656
+ return {
4657
+ content: [{ type: "text", text: `${pages.length} page(s):\n${lines.join("\n")}` }],
4658
+ details: { pages, count: pages.length },
4659
+ };
4660
+ } catch (err: any) {
4661
+ return {
4662
+ content: [{ type: "text", text: `List pages failed: ${err.message}` }],
4663
+ details: { error: err.message },
4664
+ isError: true,
4665
+ };
4666
+ }
4667
+ },
4668
+ });
4669
+
4670
+ // -------------------------------------------------------------------------
4671
+ // browser_switch_page
4672
+ // -------------------------------------------------------------------------
4673
+ pi.registerTool({
4674
+ name: "browser_switch_page",
4675
+ label: "Browser Switch Page",
4676
+ description:
4677
+ "Switch the active browser page/tab by page ID. Use browser_list_pages to see available IDs. Clears any active frame selection.",
4678
+ parameters: Type.Object({
4679
+ id: Type.Number({ description: "Page ID to switch to (from browser_list_pages)" }),
4680
+ }),
4681
+
4682
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4683
+ try {
4684
+ await ensureBrowser();
4685
+ registrySetActive(pageRegistry, params.id);
4686
+ activeFrame = null;
4687
+ const entry = registryGetActive(pageRegistry);
4688
+ // Bring the page to front
4689
+ await entry.page.bringToFront();
4690
+ const title = await entry.page.title().catch(() => "");
4691
+ const url = entry.page.url();
4692
+ entry.title = title;
4693
+ entry.url = url;
4694
+ return {
4695
+ content: [{ type: "text", text: `Switched to page ${params.id}: ${title || "(untitled)"} — ${url}` }],
4696
+ details: { id: params.id, title, url },
4697
+ };
4698
+ } catch (err: any) {
4699
+ return {
4700
+ content: [{ type: "text", text: `Switch page failed: ${err.message}` }],
4701
+ details: { error: err.message },
4702
+ isError: true,
4703
+ };
4704
+ }
4705
+ },
4706
+ });
4707
+
4708
+ // -------------------------------------------------------------------------
4709
+ // browser_close_page
4710
+ // -------------------------------------------------------------------------
4711
+ pi.registerTool({
4712
+ name: "browser_close_page",
4713
+ label: "Browser Close Page",
4714
+ description:
4715
+ "Close a specific browser page/tab by ID. Cannot close the last remaining page. The page's close event triggers automatic registry cleanup and active-page fallback.",
4716
+ parameters: Type.Object({
4717
+ id: Type.Number({ description: "Page ID to close (from browser_list_pages)" }),
4718
+ }),
4719
+
4720
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4721
+ try {
4722
+ await ensureBrowser();
4723
+ if (pageRegistry.pages.length <= 1) {
4724
+ return {
4725
+ content: [{ type: "text", text: `Cannot close the last remaining page. Use browser_close to close the entire browser.` }],
4726
+ details: { error: "last_page", pageCount: pageRegistry.pages.length },
4727
+ isError: true,
4728
+ };
4729
+ }
4730
+ const entry = pageRegistry.pages.find((e: any) => e.id === params.id);
4731
+ if (!entry) {
4732
+ const available = pageRegistry.pages.map((e: any) => e.id);
4733
+ return {
4734
+ content: [{ type: "text", text: `Page ${params.id} not found. Available page IDs: [${available.join(", ")}].` }],
4735
+ details: { error: "not_found", available },
4736
+ isError: true,
4737
+ };
4738
+ }
4739
+ // Close the Playwright page — this fires the "close" event handler
4740
+ // which calls registryRemovePage and handles active-page fallback
4741
+ await entry.page.close();
4742
+ // Clear active frame if it belonged to the closed page
4743
+ activeFrame = null;
4744
+ // Refresh the page list
4745
+ for (const remaining of pageRegistry.pages) {
4746
+ try {
4747
+ remaining.title = await remaining.page.title();
4748
+ remaining.url = remaining.page.url();
4749
+ } catch {}
4750
+ }
4751
+ const pages = registryListPages(pageRegistry);
4752
+ const lines = pages.map((p: any) => {
4753
+ const active = p.isActive ? " ← active" : "";
4754
+ return ` [${p.id}] ${p.title || "(untitled)"} — ${p.url}${active}`;
4755
+ });
4756
+ return {
4757
+ content: [{ type: "text", text: `Closed page ${params.id}. ${pages.length} page(s) remaining:\n${lines.join("\n")}` }],
4758
+ details: { closedId: params.id, pages, count: pages.length },
4759
+ };
4760
+ } catch (err: any) {
4761
+ return {
4762
+ content: [{ type: "text", text: `Close page failed: ${err.message}` }],
4763
+ details: { error: err.message },
4764
+ isError: true,
4765
+ };
4766
+ }
4767
+ },
4768
+ });
4769
+
4770
+ // -------------------------------------------------------------------------
4771
+ // browser_list_frames
4772
+ // -------------------------------------------------------------------------
4773
+ pi.registerTool({
4774
+ name: "browser_list_frames",
4775
+ label: "Browser List Frames",
4776
+ description:
4777
+ "List all frames in the active page, including the main frame and any iframes. Shows frame name, URL, and parent frame name. Use before browser_select_frame to identify available frames.",
4778
+ parameters: Type.Object({}),
4779
+
4780
+ async execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
4781
+ try {
4782
+ await ensureBrowser();
4783
+ const p = getActivePage();
4784
+ const frames = p.frames();
4785
+ const mainFrame = p.mainFrame();
4786
+ const frameList = frames.map((f, index) => {
4787
+ const isMain = f === mainFrame;
4788
+ const parentName = f.parentFrame()?.name() || (f.parentFrame() === mainFrame ? "main" : "");
4789
+ return {
4790
+ index,
4791
+ name: f.name() || (isMain ? "main" : `(unnamed-${index})`),
4792
+ url: f.url(),
4793
+ isMain,
4794
+ parentName: isMain ? null : (parentName || "main"),
4795
+ isActive: f === activeFrame,
4796
+ };
4797
+ });
4798
+ const lines = frameList.map((f) => {
4799
+ const main = f.isMain ? " [main]" : "";
4800
+ const active = f.isActive ? " ← selected" : "";
4801
+ const parent = f.parentName ? ` (parent: ${f.parentName})` : "";
4802
+ return ` [${f.index}] "${f.name}" — ${f.url}${main}${parent}${active}`;
4803
+ });
4804
+ const activeInfo = activeFrame ? `Active frame: "${activeFrame.name() || "(unnamed)"}"` : "No frame selected (operating on main page)";
4805
+ return {
4806
+ content: [{ type: "text", text: `${frameList.length} frame(s) in active page:\n${lines.join("\n")}\n\n${activeInfo}` }],
4807
+ details: { frames: frameList, count: frameList.length, activeFrame: activeFrame?.name() ?? null },
4808
+ };
4809
+ } catch (err: any) {
4810
+ return {
4811
+ content: [{ type: "text", text: `List frames failed: ${err.message}` }],
4812
+ details: { error: err.message },
4813
+ isError: true,
4814
+ };
4815
+ }
4816
+ },
4817
+ });
4818
+
4819
+ // -------------------------------------------------------------------------
4820
+ // browser_select_frame
4821
+ // -------------------------------------------------------------------------
4822
+ pi.registerTool({
4823
+ name: "browser_select_frame",
4824
+ label: "Browser Select Frame",
4825
+ description:
4826
+ "Select a frame within the active page to operate on. Find frames by name, URL pattern, or index. Pass null or \"main\" to reset back to the main page frame. Once a frame is selected, tools like browser_evaluate, browser_find, and browser_click will operate within that frame (after T03 migration).",
4827
+ parameters: Type.Object({
4828
+ name: Type.Optional(Type.String({ description: "Frame name to select. Use 'main' or 'null' to reset to main frame." })),
4829
+ urlPattern: Type.Optional(Type.String({ description: "URL substring to match against frame URLs." })),
4830
+ index: Type.Optional(Type.Number({ description: "Frame index from browser_list_frames." })),
4831
+ }),
4832
+
4833
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
4834
+ try {
4835
+ await ensureBrowser();
4836
+ const p = getActivePage();
4837
+ const frames = p.frames();
4838
+
4839
+ // Reset to main frame
4840
+ if (params.name === "main" || params.name === "null" || params.name === null) {
4841
+ activeFrame = null;
4842
+ return {
4843
+ content: [{ type: "text", text: "Reset to main page frame. Tools will operate on the main page." }],
4844
+ details: { activeFrame: null },
4845
+ };
4846
+ }
4847
+
4848
+ // Find frame by name
4849
+ if (params.name) {
4850
+ const frame = frames.find((f) => f.name() === params.name);
4851
+ if (!frame) {
4852
+ const available = frames.map((f, i) => `[${i}] "${f.name() || "(unnamed)"}" — ${f.url()}`);
4853
+ return {
4854
+ content: [{ type: "text", text: `Frame with name "${params.name}" not found.\nAvailable frames:\n ${available.join("\n ")}` }],
4855
+ details: { error: "frame_not_found", available },
4856
+ isError: true,
4857
+ };
4858
+ }
4859
+ activeFrame = frame;
4860
+ return {
4861
+ content: [{ type: "text", text: `Selected frame "${frame.name()}" — ${frame.url()}` }],
4862
+ details: { name: frame.name(), url: frame.url() },
4863
+ };
4864
+ }
4865
+
4866
+ // Find frame by URL pattern
4867
+ if (params.urlPattern) {
4868
+ const frame = frames.find((f) => f.url().includes(params.urlPattern!));
4869
+ if (!frame) {
4870
+ const available = frames.map((f, i) => `[${i}] "${f.name() || "(unnamed)"}" — ${f.url()}`);
4871
+ return {
4872
+ content: [{ type: "text", text: `No frame URL matches "${params.urlPattern}".\nAvailable frames:\n ${available.join("\n ")}` }],
4873
+ details: { error: "frame_not_found", available },
4874
+ isError: true,
4875
+ };
4876
+ }
4877
+ activeFrame = frame;
4878
+ return {
4879
+ content: [{ type: "text", text: `Selected frame "${frame.name() || "(unnamed)"}" — ${frame.url()}` }],
4880
+ details: { name: frame.name(), url: frame.url() },
4881
+ };
4882
+ }
4883
+
4884
+ // Find frame by index
4885
+ if (params.index !== undefined) {
4886
+ if (params.index < 0 || params.index >= frames.length) {
4887
+ return {
4888
+ content: [{ type: "text", text: `Frame index ${params.index} out of range. ${frames.length} frame(s) available (0-${frames.length - 1}).` }],
4889
+ details: { error: "index_out_of_range", count: frames.length },
4890
+ isError: true,
4891
+ };
4892
+ }
4893
+ const frame = frames[params.index];
4894
+ activeFrame = frame;
4895
+ return {
4896
+ content: [{ type: "text", text: `Selected frame [${params.index}] "${frame.name() || "(unnamed)"}" — ${frame.url()}` }],
4897
+ details: { index: params.index, name: frame.name(), url: frame.url() },
4898
+ };
4899
+ }
4900
+
4901
+ // No selection criteria provided
4902
+ return {
4903
+ content: [{ type: "text", text: "Provide name, urlPattern, or index to select a frame. Use name='main' to reset to main frame." }],
4904
+ details: { error: "no_criteria" },
4905
+ isError: true,
4906
+ };
4907
+ } catch (err: any) {
4908
+ return {
4909
+ content: [{ type: "text", text: `Select frame failed: ${err.message}` }],
4910
+ details: { error: err.message },
4911
+ isError: true,
4912
+ };
4913
+ }
4914
+ },
4915
+ });
4916
+ }