gsd-pi 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +12 -5
  2. package/dist/loader.js +0 -0
  3. package/dist/modes/interactive/theme/dark.json +85 -0
  4. package/dist/modes/interactive/theme/light.json +84 -0
  5. package/dist/modes/interactive/theme/theme-schema.json +335 -0
  6. package/dist/modes/interactive/theme/theme.d.ts +78 -0
  7. package/dist/modes/interactive/theme/theme.d.ts.map +1 -0
  8. package/dist/modes/interactive/theme/theme.js +949 -0
  9. package/dist/modes/interactive/theme/theme.js.map +1 -0
  10. package/node_modules/@gsd/pi-coding-agent/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  11. package/node_modules/@gsd/pi-coding-agent/dist/modes/interactive/interactive-mode.js +1 -1
  12. package/node_modules/@gsd/pi-coding-agent/dist/modes/interactive/interactive-mode.js.map +1 -1
  13. package/node_modules/@gsd/pi-coding-agent/src/modes/interactive/interactive-mode.ts +1 -1
  14. package/node_modules/cliui/CHANGELOG.md +121 -0
  15. package/node_modules/color-convert/CHANGELOG.md +54 -0
  16. package/node_modules/esprima/ChangeLog +235 -0
  17. package/node_modules/mz/HISTORY.md +66 -0
  18. package/node_modules/proper-lockfile/CHANGELOG.md +108 -0
  19. package/node_modules/source-map/CHANGELOG.md +301 -0
  20. package/node_modules/thenify/History.md +11 -0
  21. package/node_modules/thenify-all/History.md +11 -0
  22. package/node_modules/y18n/CHANGELOG.md +100 -0
  23. package/node_modules/yargs/CHANGELOG.md +88 -0
  24. package/node_modules/yargs-parser/CHANGELOG.md +263 -0
  25. package/package.json +5 -2
  26. package/packages/pi-coding-agent/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  27. package/packages/pi-coding-agent/dist/modes/interactive/interactive-mode.js +1 -1
  28. package/packages/pi-coding-agent/dist/modes/interactive/interactive-mode.js.map +1 -1
  29. package/packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts +1 -1
  30. package/src/resources/extensions/browser-tools/capture.ts +165 -0
  31. package/src/resources/extensions/browser-tools/evaluate-helpers.ts +184 -0
  32. package/src/resources/extensions/browser-tools/index.ts +47 -4985
  33. package/src/resources/extensions/browser-tools/lifecycle.ts +265 -0
  34. package/src/resources/extensions/browser-tools/package.json +5 -1
  35. package/src/resources/extensions/browser-tools/refs.ts +264 -0
  36. package/src/resources/extensions/browser-tools/settle.ts +197 -0
  37. package/src/resources/extensions/browser-tools/state.ts +408 -0
  38. package/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs +652 -0
  39. package/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +614 -0
  40. package/src/resources/extensions/browser-tools/tools/assertions.ts +342 -0
  41. package/src/resources/extensions/browser-tools/tools/forms.ts +801 -0
  42. package/src/resources/extensions/browser-tools/tools/inspection.ts +492 -0
  43. package/src/resources/extensions/browser-tools/tools/intent.ts +614 -0
  44. package/src/resources/extensions/browser-tools/tools/interaction.ts +865 -0
  45. package/src/resources/extensions/browser-tools/tools/navigation.ts +232 -0
  46. package/src/resources/extensions/browser-tools/tools/pages.ts +303 -0
  47. package/src/resources/extensions/browser-tools/tools/refs.ts +541 -0
  48. package/src/resources/extensions/browser-tools/tools/screenshot.ts +83 -0
  49. package/src/resources/extensions/browser-tools/tools/session.ts +400 -0
  50. package/src/resources/extensions/browser-tools/tools/wait.ts +247 -0
  51. package/src/resources/extensions/browser-tools/utils.ts +660 -0
  52. package/src/resources/extensions/gsd/git-service.ts +3 -0
  53. package/src/resources/extensions/shared/interview-ui.ts +1 -1
@@ -0,0 +1,614 @@
1
+ import type { ExtensionAPI } from "@gsd/pi-coding-agent";
2
+ import { Type } from "@sinclair/typebox";
3
+ import { StringEnum } from "@gsd/pi-ai";
4
+ import { diffCompactStates } from "../core.js";
5
+ import type { ToolDeps, CompactPageState } from "../state.js";
6
+ import {
7
+ setLastActionBeforeState,
8
+ setLastActionAfterState,
9
+ } from "../state.js";
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Intent definitions
13
+ // ---------------------------------------------------------------------------
14
+
15
+ const INTENTS = [
16
+ "submit_form",
17
+ "close_dialog",
18
+ "primary_cta",
19
+ "search_field",
20
+ "next_step",
21
+ "dismiss",
22
+ "auth_action",
23
+ "back_navigation",
24
+ ] as const;
25
+
26
+ type Intent = (typeof INTENTS)[number];
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Scoring evaluate script — runs entirely in-browser via page.evaluate()
30
+ // ---------------------------------------------------------------------------
31
+
32
+ /**
33
+ * Builds a self-contained IIFE string that scores candidate elements for a
34
+ * given intent. Returns top 5 candidates sorted by score descending, each
35
+ * with { score, selector, tag, role, name, text, reason }.
36
+ *
37
+ * Uses window.__pi utilities (injected via addInitScript) for element
38
+ * metadata — no inline redeclarations.
39
+ */
40
+ function buildIntentScoringScript(intent: string, scope?: string): string {
41
+ const scopeSelector = JSON.stringify(scope ?? null);
42
+
43
+ return `(() => {
44
+ var pi = window.__pi;
45
+ if (!pi) return { error: "window.__pi not available — browser helpers not injected" };
46
+
47
+ var intentRaw = ${JSON.stringify(intent)};
48
+ var normalized = intentRaw.toLowerCase().replace(/[\\s_\\-]+/g, "");
49
+ var scopeSel = ${scopeSelector};
50
+ var root = scopeSel ? document.querySelector(scopeSel) : document.body;
51
+ if (!root) return { error: "Scope selector not found: " + scopeSel };
52
+
53
+ // --- Shared helpers ---
54
+ function textOf(el) {
55
+ return (el.textContent || "").trim().replace(/\\s+/g, " ").slice(0, 120).toLowerCase();
56
+ }
57
+
58
+ function clamp01(v) { return Math.max(0, Math.min(1, v)); }
59
+
60
+ function makeCandidate(el, score, reason) {
61
+ return {
62
+ score: Math.round(clamp01(score) * 100) / 100,
63
+ selector: pi.cssPath(el),
64
+ tag: el.tagName.toLowerCase(),
65
+ role: pi.inferRole(el) || "",
66
+ name: pi.accessibleName(el) || "",
67
+ text: textOf(el).slice(0, 80),
68
+ reason: reason,
69
+ };
70
+ }
71
+
72
+ function qsa(sel) { return Array.from(root.querySelectorAll(sel)); }
73
+
74
+ function visibleEnabled(el) {
75
+ return pi.isVisible(el) && pi.isEnabled(el);
76
+ }
77
+
78
+ function textMatches(el, patterns) {
79
+ var t = textOf(el);
80
+ var n = (pi.accessibleName(el) || "").toLowerCase();
81
+ var combined = t + " " + n;
82
+ for (var i = 0; i < patterns.length; i++) {
83
+ if (combined.indexOf(patterns[i]) !== -1) return true;
84
+ }
85
+ return false;
86
+ }
87
+
88
+ function textMatchStrength(el, patterns) {
89
+ var t = textOf(el);
90
+ var n = (pi.accessibleName(el) || "").toLowerCase();
91
+ var combined = t + " " + n;
92
+ var count = 0;
93
+ for (var i = 0; i < patterns.length; i++) {
94
+ if (combined.indexOf(patterns[i]) !== -1) count++;
95
+ }
96
+ return Math.min(count / Math.max(patterns.length, 1), 1);
97
+ }
98
+
99
+ // --- Intent-specific scoring ---
100
+ var candidates = [];
101
+
102
+ if (normalized === "submitform") {
103
+ var els = qsa('button[type="submit"], input[type="submit"], button:not([type]), button[type="button"]');
104
+ for (var i = 0; i < els.length; i++) {
105
+ var el = els[i];
106
+ if (!visibleEnabled(el)) continue;
107
+ var d1 = el.type === "submit" || el.getAttribute("type") === "submit" ? 0.35 : 0;
108
+ var d2 = el.closest("form") ? 0.3 : 0;
109
+ var d3 = textMatches(el, ["submit", "send", "save", "create", "add", "post", "confirm", "ok", "done", "register", "sign up", "log in"]) ? 0.2 : 0;
110
+ var d4 = 0.15;
111
+ var score = d1 + d2 + d3 + d4;
112
+ var reasons = [];
113
+ if (d1 > 0) reasons.push("submit-type");
114
+ if (d2 > 0) reasons.push("inside-form");
115
+ if (d3 > 0) reasons.push("text-suggests-submit");
116
+ reasons.push("visible+enabled");
117
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
118
+ }
119
+ }
120
+
121
+ else if (normalized === "closedialog") {
122
+ var containers = qsa('[role="dialog"], dialog, [aria-modal="true"], [role="alertdialog"]');
123
+ for (var ci = 0; ci < containers.length; ci++) {
124
+ var btns = containers[ci].querySelectorAll("button, a, [role='button']");
125
+ for (var bi = 0; bi < btns.length; bi++) {
126
+ var el = btns[bi];
127
+ if (!visibleEnabled(el)) continue;
128
+ var d1 = textMatches(el, ["close", "cancel", "dismiss", "×", "✕", "x", "got it", "ok", "done"]) ? 0.35 : 0;
129
+ var ariaLbl = (el.getAttribute("aria-label") || "").toLowerCase();
130
+ var d2 = (ariaLbl.indexOf("close") !== -1 || ariaLbl.indexOf("dismiss") !== -1) ? 0.25 : 0;
131
+ var d3 = 0.2;
132
+ var rect = el.getBoundingClientRect();
133
+ var parentRect = containers[ci].getBoundingClientRect();
134
+ var isTopRight = rect.top - parentRect.top < 60 && parentRect.right - rect.right < 60;
135
+ var d4 = isTopRight ? 0.2 : 0;
136
+ var score = d1 + d2 + d3 + d4;
137
+ var reasons = [];
138
+ if (d1 > 0) reasons.push("text-matches-close");
139
+ if (d2 > 0) reasons.push("aria-label-close");
140
+ reasons.push("inside-dialog");
141
+ if (d4 > 0) reasons.push("top-right-position");
142
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
143
+ }
144
+ }
145
+ }
146
+
147
+ else if (normalized === "primarycta") {
148
+ var els = qsa("button, a, [role='button'], input[type='submit'], input[type='button']");
149
+ for (var i = 0; i < els.length; i++) {
150
+ var el = els[i];
151
+ if (!visibleEnabled(el)) continue;
152
+ var rect = el.getBoundingClientRect();
153
+ var area = rect.width * rect.height;
154
+ var d1 = clamp01(area / 12000);
155
+ var role = pi.inferRole(el);
156
+ var d2 = role === "button" ? 0.25 : (role === "link" ? 0.1 : 0.15);
157
+ var isNegative = textMatches(el, ["cancel", "dismiss", "close", "skip", "no thanks", "no, thanks", "maybe later"]);
158
+ var d3 = isNegative ? 0 : 0.2;
159
+ var inMain = !!el.closest("main, [role='main'], article, section, .hero, .content");
160
+ var d4 = inMain ? 0.15 : 0;
161
+ var score = d1 + d2 + d3 + d4;
162
+ var reasons = [];
163
+ reasons.push("size:" + Math.round(area));
164
+ if (d2 >= 0.25) reasons.push("button-role");
165
+ if (d3 > 0) reasons.push("non-dismissive");
166
+ if (d4 > 0) reasons.push("in-main-content");
167
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
168
+ }
169
+ }
170
+
171
+ else if (normalized === "searchfield") {
172
+ var els = qsa("input, textarea, [role='searchbox'], [role='combobox'], [contenteditable='true']");
173
+ for (var i = 0; i < els.length; i++) {
174
+ var el = els[i];
175
+ if (!pi.isVisible(el)) continue;
176
+ var type = (el.getAttribute("type") || "text").toLowerCase();
177
+ if (["hidden", "submit", "button", "reset", "image", "checkbox", "radio", "file"].indexOf(type) !== -1 && el.tagName.toLowerCase() === "input") continue;
178
+ var d1 = type === "search" || pi.inferRole(el) === "searchbox" ? 0.4 : 0;
179
+ var ph = (el.getAttribute("placeholder") || "").toLowerCase();
180
+ var nm = (el.getAttribute("name") || "").toLowerCase();
181
+ var ariaLbl = (el.getAttribute("aria-label") || "").toLowerCase();
182
+ var combined = ph + " " + nm + " " + ariaLbl;
183
+ var d2 = combined.indexOf("search") !== -1 || combined.indexOf("query") !== -1 || combined.indexOf("find") !== -1 ? 0.3 : 0;
184
+ var d3 = pi.isEnabled(el) ? 0.15 : 0;
185
+ var inHeader = !!el.closest("header, nav, [role='banner'], [role='navigation'], [role='search']");
186
+ var d4 = inHeader ? 0.15 : 0;
187
+ var score = d1 + d2 + d3 + d4;
188
+ if (score < 0.1) continue;
189
+ var reasons = [];
190
+ if (d1 > 0) reasons.push("search-type/role");
191
+ if (d2 > 0) reasons.push("name/placeholder-match");
192
+ if (d3 > 0) reasons.push("enabled");
193
+ if (d4 > 0) reasons.push("in-header/nav");
194
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
195
+ }
196
+ }
197
+
198
+ else if (normalized === "nextstep") {
199
+ var els = qsa("button, a, [role='button'], input[type='submit'], input[type='button']");
200
+ var patterns = ["next", "continue", "proceed", "forward", "go", "step"];
201
+ for (var i = 0; i < els.length; i++) {
202
+ var el = els[i];
203
+ if (!visibleEnabled(el)) continue;
204
+ var d1 = textMatchStrength(el, patterns) * 0.4;
205
+ if (d1 === 0) continue;
206
+ var role = pi.inferRole(el);
207
+ var d2 = role === "button" ? 0.25 : 0.1;
208
+ var d3 = 0.2;
209
+ var isDisabled = !pi.isEnabled(el);
210
+ var d4 = isDisabled ? 0 : 0.15;
211
+ var score = d1 + d2 + d3 + d4;
212
+ var reasons = [];
213
+ reasons.push("text-match");
214
+ if (d2 >= 0.25) reasons.push("button-role");
215
+ reasons.push("visible");
216
+ if (d4 > 0) reasons.push("enabled");
217
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
218
+ }
219
+ }
220
+
221
+ else if (normalized === "dismiss") {
222
+ var els = qsa("button, a, [role='button'], [role='link']");
223
+ var patterns = ["close", "cancel", "dismiss", "skip", "no thanks", "no, thanks", "maybe later", "not now", "×", "✕"];
224
+ for (var i = 0; i < els.length; i++) {
225
+ var el = els[i];
226
+ if (!visibleEnabled(el)) continue;
227
+ var d1 = textMatchStrength(el, patterns) * 0.35;
228
+ if (d1 === 0) continue;
229
+ var inOverlay = !!el.closest('[role="dialog"], dialog, [aria-modal="true"], [role="alertdialog"], .modal, .overlay, .popup, .popover, .toast, .banner');
230
+ var d2 = inOverlay ? 0.3 : 0.05;
231
+ var rect = el.getBoundingClientRect();
232
+ var isEdge = rect.top < 80 || rect.right > window.innerWidth - 80;
233
+ var d3 = isEdge ? 0.15 : 0;
234
+ var d4 = 0.15;
235
+ var score = d1 + d2 + d3 + d4;
236
+ var reasons = [];
237
+ reasons.push("text-match");
238
+ if (d2 >= 0.3) reasons.push("inside-overlay");
239
+ if (d3 > 0) reasons.push("edge-position");
240
+ reasons.push("visible+enabled");
241
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
242
+ }
243
+ }
244
+
245
+ else if (normalized === "authaction") {
246
+ var els = qsa("button, a, [role='button'], [role='link'], input[type='submit']");
247
+ var patterns = ["log in", "login", "sign in", "signin", "sign up", "signup", "register", "create account", "join", "get started"];
248
+ for (var i = 0; i < els.length; i++) {
249
+ var el = els[i];
250
+ if (!visibleEnabled(el)) continue;
251
+ var d1 = textMatchStrength(el, patterns) * 0.4;
252
+ if (d1 === 0) continue;
253
+ var role = pi.inferRole(el);
254
+ var d2 = (role === "button" || role === "link") ? 0.25 : 0.1;
255
+ var rect = el.getBoundingClientRect();
256
+ var inHeader = !!el.closest("header, nav, [role='banner'], [role='navigation']");
257
+ var isProminent = inHeader || rect.top < 200;
258
+ var d3 = isProminent ? 0.2 : 0.05;
259
+ var d4 = 0.15;
260
+ var score = d1 + d2 + d3 + d4;
261
+ var reasons = [];
262
+ reasons.push("text-match");
263
+ if (d2 >= 0.25) reasons.push("button-or-link");
264
+ if (d3 >= 0.2) reasons.push("prominent-position");
265
+ reasons.push("visible+enabled");
266
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
267
+ }
268
+ }
269
+
270
+ else if (normalized === "backnavigation") {
271
+ var els = qsa("button, a, [role='button'], [role='link']");
272
+ var patterns = ["back", "previous", "prev", "return", "go back"];
273
+ for (var i = 0; i < els.length; i++) {
274
+ var el = els[i];
275
+ if (!visibleEnabled(el)) continue;
276
+ var d1 = textMatchStrength(el, patterns) * 0.35;
277
+ if (d1 === 0) continue;
278
+ var innerHtml = el.innerHTML.toLowerCase();
279
+ var hasArrow = innerHtml.indexOf("←") !== -1 || innerHtml.indexOf("&larr") !== -1 || innerHtml.indexOf("arrow") !== -1 || innerHtml.indexOf("chevron-left") !== -1 || innerHtml.indexOf("back") !== -1;
280
+ var d2 = hasArrow ? 0.25 : 0;
281
+ var inNav = !!el.closest("header, nav, [role='banner'], [role='navigation'], .breadcrumb, .toolbar");
282
+ var d3 = inNav ? 0.25 : 0.05;
283
+ var d4 = 0.15;
284
+ var score = d1 + d2 + d3 + d4;
285
+ var reasons = [];
286
+ reasons.push("text-match");
287
+ if (d2 > 0) reasons.push("has-back-arrow/icon");
288
+ if (d3 >= 0.25) reasons.push("in-nav/header");
289
+ reasons.push("visible+enabled");
290
+ candidates.push(makeCandidate(el, score, reasons.join(", ")));
291
+ }
292
+ }
293
+
294
+ else {
295
+ return { error: "Unknown intent: " + intentRaw + ". Valid: submit_form, close_dialog, primary_cta, search_field, next_step, dismiss, auth_action, back_navigation" };
296
+ }
297
+
298
+ // Sort by score descending, cap at 5
299
+ candidates.sort(function(a, b) { return b.score - a.score; });
300
+ candidates = candidates.slice(0, 5);
301
+
302
+ return { intent: intentRaw, normalized: normalized, count: candidates.length, candidates: candidates };
303
+ })()`;
304
+ }
305
+
306
+ // ---------------------------------------------------------------------------
307
+ // Result types
308
+ // ---------------------------------------------------------------------------
309
+
310
+ interface IntentCandidate {
311
+ score: number;
312
+ selector: string;
313
+ tag: string;
314
+ role: string;
315
+ name: string;
316
+ text: string;
317
+ reason: string;
318
+ }
319
+
320
+ interface IntentScoringResult {
321
+ intent: string;
322
+ normalized: string;
323
+ count: number;
324
+ candidates: IntentCandidate[];
325
+ error?: string;
326
+ }
327
+
328
+ // ---------------------------------------------------------------------------
329
+ // Registration
330
+ // ---------------------------------------------------------------------------
331
+
332
+ export function registerIntentTools(pi: ExtensionAPI, deps: ToolDeps): void {
333
+
334
+ // -----------------------------------------------------------------------
335
+ // browser_find_best
336
+ // -----------------------------------------------------------------------
337
+ pi.registerTool({
338
+ name: "browser_find_best",
339
+ label: "Find Best",
340
+ description:
341
+ "Find the best-matching element for a semantic intent. Returns up to 5 scored candidates (0-1) ranked by structural position, role, text signals, and visibility. Use this to discover which element the agent should interact with for a given goal — e.g. intent=\"submit_form\" finds submit buttons, intent=\"close_dialog\" finds close/dismiss buttons inside dialogs. Each candidate includes a CSS selector usable with browser_click.",
342
+ parameters: Type.Object({
343
+ intent: StringEnum(INTENTS, {
344
+ description:
345
+ "Semantic intent: submit_form, close_dialog, primary_cta, search_field, next_step, dismiss, auth_action, back_navigation",
346
+ }),
347
+ scope: Type.Optional(
348
+ Type.String({
349
+ description:
350
+ "CSS selector to narrow the search area. If omitted, searches the full page.",
351
+ })
352
+ ),
353
+ }),
354
+
355
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
356
+ let actionId: number | null = null;
357
+ let beforeState: CompactPageState | null = null;
358
+ try {
359
+ const { page: p } = await deps.ensureBrowser();
360
+ const target = deps.getActiveTarget();
361
+ beforeState = await deps.captureCompactPageState(p, {
362
+ selectors: params.scope ? [params.scope] : [],
363
+ includeBodyText: false,
364
+ target,
365
+ });
366
+ actionId = deps.beginTrackedAction("browser_find_best", params, beforeState.url).id;
367
+
368
+ const script = buildIntentScoringScript(params.intent, params.scope);
369
+ const result = await target.evaluate(script) as IntentScoringResult;
370
+
371
+ if (result.error) {
372
+ deps.finishTrackedAction(actionId, {
373
+ status: "error",
374
+ error: result.error,
375
+ beforeState,
376
+ });
377
+ return {
378
+ content: [{ type: "text" as const, text: result.error }],
379
+ details: {},
380
+ isError: true,
381
+ };
382
+ }
383
+
384
+ const afterState = await deps.captureCompactPageState(p, {
385
+ selectors: params.scope ? [params.scope] : [],
386
+ includeBodyText: false,
387
+ target,
388
+ });
389
+ setLastActionBeforeState(beforeState);
390
+ setLastActionAfterState(afterState);
391
+
392
+ deps.finishTrackedAction(actionId, {
393
+ status: "success",
394
+ afterUrl: afterState.url,
395
+ beforeState,
396
+ afterState,
397
+ });
398
+
399
+ // Format output
400
+ const lines: string[] = [];
401
+ lines.push(`Intent: ${params.intent} → ${result.count} candidate(s)`);
402
+ if (params.scope) lines.push(`Scope: ${params.scope}`);
403
+ lines.push("");
404
+
405
+ if (result.candidates.length === 0) {
406
+ lines.push("No candidates found for this intent on the current page.");
407
+ } else {
408
+ for (let i = 0; i < result.candidates.length; i++) {
409
+ const c = result.candidates[i];
410
+ lines.push(`${i + 1}. **${c.score}** \`${c.selector}\``);
411
+ lines.push(` ${c.tag}${c.role ? ` [${c.role}]` : ""} — "${c.name || c.text}"`);
412
+ lines.push(` Reason: ${c.reason}`);
413
+ }
414
+ }
415
+
416
+ return {
417
+ content: [{ type: "text" as const, text: lines.join("\n") }],
418
+ details: { intentResult: result },
419
+ };
420
+ } catch (err: unknown) {
421
+ const screenshot = await deps.captureErrorScreenshot(
422
+ (() => { try { return deps.getActivePage(); } catch { return null; } })()
423
+ );
424
+ const errMsg = deps.firstErrorLine(err);
425
+
426
+ if (actionId !== null) {
427
+ deps.finishTrackedAction(actionId, {
428
+ status: "error",
429
+ error: errMsg,
430
+ beforeState: beforeState ?? undefined,
431
+ });
432
+ }
433
+
434
+ const content: Array<{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }> = [
435
+ { type: "text", text: `browser_find_best failed: ${errMsg}` },
436
+ ];
437
+ if (screenshot) {
438
+ content.push({ type: "image", data: screenshot.data, mimeType: screenshot.mimeType });
439
+ }
440
+ return { content, details: {}, isError: true };
441
+ }
442
+ },
443
+ });
444
+
445
+ // -----------------------------------------------------------------------
446
+ // browser_act
447
+ // -----------------------------------------------------------------------
448
+ pi.registerTool({
449
+ name: "browser_act",
450
+ label: "Browser Act",
451
+ description:
452
+ "Execute a semantic action in one call. Resolves the top candidate for the given intent (same scoring as browser_find_best), performs the action (click for buttons/links, focus for search fields), settles the page, and returns a before/after diff. Use when you know what you want to accomplish semantically — e.g. intent=\"submit_form\" finds and clicks the submit button, intent=\"close_dialog\" dismisses the dialog.",
453
+ parameters: Type.Object({
454
+ intent: StringEnum(INTENTS, {
455
+ description:
456
+ "Semantic intent: submit_form, close_dialog, primary_cta, search_field, next_step, dismiss, auth_action, back_navigation",
457
+ }),
458
+ scope: Type.Optional(
459
+ Type.String({
460
+ description:
461
+ "CSS selector to narrow the search area. If omitted, searches the full page.",
462
+ })
463
+ ),
464
+ }),
465
+
466
+ async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
467
+ let actionId: number | null = null;
468
+ let beforeState: CompactPageState | null = null;
469
+ try {
470
+ const { page: p } = await deps.ensureBrowser();
471
+ const target = deps.getActiveTarget();
472
+ beforeState = await deps.captureCompactPageState(p, {
473
+ selectors: params.scope ? [params.scope] : [],
474
+ includeBodyText: true,
475
+ target,
476
+ });
477
+ actionId = deps.beginTrackedAction("browser_act", params, beforeState.url).id;
478
+
479
+ // Score candidates
480
+ const script = buildIntentScoringScript(params.intent, params.scope);
481
+ const result = await target.evaluate(script) as IntentScoringResult;
482
+
483
+ if (result.error) {
484
+ deps.finishTrackedAction(actionId, {
485
+ status: "error",
486
+ error: result.error,
487
+ beforeState,
488
+ });
489
+ return {
490
+ content: [{ type: "text" as const, text: `browser_act failed: ${result.error}` }],
491
+ details: {},
492
+ isError: true,
493
+ };
494
+ }
495
+
496
+ if (result.candidates.length === 0) {
497
+ deps.finishTrackedAction(actionId, {
498
+ status: "error",
499
+ error: `No candidates found for intent "${params.intent}"`,
500
+ beforeState,
501
+ });
502
+ return {
503
+ content: [{
504
+ type: "text" as const,
505
+ text: `browser_act: No candidates found for intent "${params.intent}" on the current page. The page may not have the expected elements (e.g. no dialog for close_dialog, no form for submit_form).`,
506
+ }],
507
+ details: { intentResult: result },
508
+ isError: true,
509
+ };
510
+ }
511
+
512
+ // Take top candidate and execute action
513
+ const top = result.candidates[0];
514
+ const normalizedIntent = params.intent.toLowerCase().replace(/[\s_-]+/g, "");
515
+
516
+ if (normalizedIntent === "searchfield") {
517
+ // Focus instead of click for search fields
518
+ try {
519
+ await target.locator(top.selector).first().focus({ timeout: 5000 });
520
+ } catch {
521
+ // Fallback: click to focus
522
+ await target.locator(top.selector).first().click({ timeout: 5000 });
523
+ }
524
+ } else {
525
+ // Click via Playwright locator (D021)
526
+ try {
527
+ await target.locator(top.selector).first().click({ timeout: 5000 });
528
+ } catch {
529
+ // getByRole fallback from interaction.ts pattern
530
+ const nameMatch = top.selector.match(/\[(?:aria-label|name|placeholder)="([^"]+)"\]/i);
531
+ const roleName = nameMatch?.[1];
532
+ let clicked = false;
533
+ for (const role of ["button", "link", "combobox", "textbox"] as const) {
534
+ try {
535
+ const loc = roleName
536
+ ? target.getByRole(role, { name: new RegExp(roleName, "i") })
537
+ : target.getByRole(role, { name: new RegExp(top.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i") });
538
+ await loc.first().click({ timeout: 3000 });
539
+ clicked = true;
540
+ break;
541
+ } catch { /* try next role */ }
542
+ }
543
+ if (!clicked) {
544
+ throw new Error(`Could not click top candidate "${top.selector}" for intent "${params.intent}"`);
545
+ }
546
+ }
547
+ }
548
+
549
+ // Settle after action
550
+ await deps.settleAfterActionAdaptive(p);
551
+
552
+ // Capture after state and diff
553
+ const afterState = await deps.captureCompactPageState(p, {
554
+ selectors: params.scope ? [params.scope] : [],
555
+ includeBodyText: true,
556
+ target,
557
+ });
558
+ const diff = diffCompactStates(beforeState, afterState);
559
+ const summary = deps.formatCompactStateSummary(afterState);
560
+ const jsErrors = deps.getRecentErrors(p.url());
561
+
562
+ setLastActionBeforeState(beforeState);
563
+ setLastActionAfterState(afterState);
564
+
565
+ deps.finishTrackedAction(actionId, {
566
+ status: "success",
567
+ afterUrl: afterState.url,
568
+ diffSummary: diff.summary,
569
+ beforeState,
570
+ afterState,
571
+ });
572
+
573
+ // Format output
574
+ const lines: string[] = [];
575
+ lines.push(`Intent: ${params.intent}`);
576
+ lines.push(`Action: ${normalizedIntent === "searchfield" ? "focused" : "clicked"} top candidate (score: ${top.score})`);
577
+ lines.push(`Target: \`${top.selector}\` — "${top.name || top.text}"`);
578
+ lines.push(`Reason: ${top.reason}`);
579
+ lines.push("");
580
+ lines.push(`Diff:\n${deps.formatDiffText(diff)}`);
581
+ if (jsErrors.trim()) {
582
+ lines.push(`\nJS Errors:\n${jsErrors}`);
583
+ }
584
+ lines.push(`\nPage summary:\n${summary}`);
585
+
586
+ return {
587
+ content: [{ type: "text" as const, text: lines.join("\n") }],
588
+ details: { intentResult: result, topCandidate: top, diff },
589
+ };
590
+ } catch (err: unknown) {
591
+ const screenshot = await deps.captureErrorScreenshot(
592
+ (() => { try { return deps.getActivePage(); } catch { return null; } })()
593
+ );
594
+ const errMsg = deps.firstErrorLine(err);
595
+
596
+ if (actionId !== null) {
597
+ deps.finishTrackedAction(actionId, {
598
+ status: "error",
599
+ error: errMsg,
600
+ beforeState: beforeState ?? undefined,
601
+ });
602
+ }
603
+
604
+ const content: Array<{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }> = [
605
+ { type: "text", text: `browser_act failed: ${errMsg}` },
606
+ ];
607
+ if (screenshot) {
608
+ content.push({ type: "image", data: screenshot.data, mimeType: screenshot.mimeType });
609
+ }
610
+ return { content, details: {}, isError: true };
611
+ }
612
+ },
613
+ });
614
+ }