agentic-browser 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -117,15 +117,15 @@ AgenticBrowserCore → ControlApi → SessionManager → BrowserController (CDP)
117
117
 
118
118
  Subcommand: `agentic-browser mcp` (stdio transport). Setup: `agentic-browser setup`. Tools:
119
119
 
120
- | Tool | Purpose |
121
- | ----------------------- | ------------------------------ |
122
- | `browser_start_session` | Start Chrome, return sessionId |
123
- | `browser_navigate` | Navigate to URL |
124
- | `browser_interact` | click / type / press / waitFor |
125
- | `browser_get_content` | Get page title / text / html |
126
- | `browser_get_elements` | Discover interactive elements |
127
- | `browser_search_memory` | Search task memory |
128
- | `browser_stop_session` | Stop Chrome session |
120
+ | Tool | Purpose |
121
+ | ----------------------- | --------------------------------------------------------------------------------------------------------- |
122
+ | `browser_start_session` | Start Chrome, return sessionId |
123
+ | `browser_navigate` | Navigate to URL |
124
+ | `browser_interact` | click / type / press / waitFor / scroll / hover / select / toggle / goBack / goForward / refresh / dialog |
125
+ | `browser_get_content` | Get page title / text / html |
126
+ | `browser_get_elements` | Discover interactive elements |
127
+ | `browser_search_memory` | Search task memory |
128
+ | `browser_stop_session` | Stop Chrome session |
129
129
 
130
130
  ## For Browser Automation Tasks
131
131
 
package/README.md CHANGED
@@ -132,7 +132,7 @@ agentic-browser agent elements --roles button,link,input --visible-only --limit
132
132
  agentic-browser agent elements --selector "#main-content"
133
133
  ```
134
134
 
135
- Returns a JSON array of elements with CSS selectors usable in `agent run interact`:
135
+ Returns a JSON array of elements with CSS selectors and fallback selectors usable in `agent run interact`:
136
136
 
137
137
  ```json
138
138
  {
@@ -141,11 +141,9 @@ Returns a JSON array of elements with CSS selectors usable in `agent run interac
141
141
  "elements": [
142
142
  {
143
143
  "selector": "#login-btn",
144
+ "fallbackSelectors": ["button[aria-label=\"Login\"]"],
144
145
  "role": "button",
145
- "tagName": "button",
146
146
  "text": "Login",
147
- "actions": ["click"],
148
- "visible": true,
149
147
  "enabled": true
150
148
  }
151
149
  ],
@@ -154,13 +152,17 @@ Returns a JSON array of elements with CSS selectors usable in `agent run interac
154
152
  }
155
153
  ```
156
154
 
155
+ MCP responses are compact — `visible`, `actions`, and `tagName` are omitted to reduce token usage. The full element shape is available via the programmatic API.
156
+
157
+ ````
158
+
157
159
  ## MCP Server
158
160
 
159
161
  ### Quick Setup
160
162
 
161
163
  ```bash
162
164
  npx agentic-browser setup
163
- ```
165
+ ````
164
166
 
165
167
  Detects your AI tools (Claude Code, Cursor) and writes the MCP config automatically.
166
168
 
@@ -211,6 +213,12 @@ More `interact` actions:
211
213
  - `{"action":"type","selector":"input[name=q]","text":"innoq"}`
212
214
  - `{"action":"press","key":"Enter"}`
213
215
  - `{"action":"waitFor","selector":"main","timeoutMs":4000}`
216
+ - `{"action":"goBack"}` — browser back
217
+ - `{"action":"goForward"}` — browser forward
218
+ - `{"action":"refresh"}` — reload page
219
+ - `{"action":"dialog"}` — accept a JS dialog (alert/confirm/prompt)
220
+ - `{"action":"dialog","text":"dismiss"}` — dismiss a dialog
221
+ - `{"action":"dialog","value":"answer"}` — respond to a prompt dialog
214
222
 
215
223
  ### 4. Read Page Content
216
224
 
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { r as createCliRuntime } from "../runtime-CODdeRWR.mjs";
2
+ import { r as createCliRuntime } from "../runtime-OsYo7Rh2.mjs";
3
3
  import fs from "node:fs";
4
4
  import path from "node:path";
5
5
  import crypto from "node:crypto";
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env node
2
- import { i as createMockAgenticBrowserCore, n as createAgenticBrowserCore, t as AgenticBrowserCore } from "./runtime-CODdeRWR.mjs";
2
+ import { i as createMockAgenticBrowserCore, n as createAgenticBrowserCore, t as AgenticBrowserCore } from "./runtime-OsYo7Rh2.mjs";
3
3
 
4
4
  export { AgenticBrowserCore, createAgenticBrowserCore, createMockAgenticBrowserCore };
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { n as createAgenticBrowserCore } from "../runtime-CODdeRWR.mjs";
2
+ import { n as createAgenticBrowserCore } from "../runtime-OsYo7Rh2.mjs";
3
3
  import { z } from "zod";
4
4
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
5
5
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
@@ -31,9 +31,10 @@ const server = new McpServer({
31
31
  server.tool("browser_start_session", "Start a new Chrome browser session (or return the existing one if healthy). Sessions auto-start when you call any other browser tool, so you rarely need to call this explicitly. Use this to force a fresh session after stopping the previous one.", {}, async () => {
32
32
  const session = await getCore().startSession();
33
33
  activeSessionId = session.sessionId;
34
+ const { authTokenRef: _, ...compactSession } = session;
34
35
  return { content: [{
35
36
  type: "text",
36
- text: JSON.stringify(session)
37
+ text: JSON.stringify(compactSession)
37
38
  }] };
38
39
  });
39
40
  server.tool("browser_navigate", "Navigate the browser to a URL. A session is auto-started if needed.", {
@@ -47,12 +48,16 @@ server.tool("browser_navigate", "Navigate the browser to a URL. A session is aut
47
48
  type: "navigate",
48
49
  payload: { url }
49
50
  });
51
+ const compact = {
52
+ resultStatus: result.resultStatus,
53
+ resultMessage: result.resultMessage
54
+ };
50
55
  return { content: [{
51
56
  type: "text",
52
- text: JSON.stringify(result)
57
+ text: JSON.stringify(compact)
53
58
  }] };
54
59
  });
55
- server.tool("browser_interact", "Interact with a page element. Actions: \"click\" (click element), \"type\" (type text into input), \"press\" (press a keyboard key like Enter), \"waitFor\" (wait for element to appear), \"scroll\" (scroll page or element), \"hover\" (hover over element), \"select\" (pick option in <select>), \"toggle\" (toggle checkbox/radio/switch). A session is auto-started if needed.", {
60
+ server.tool("browser_interact", "Interact with a page element or perform browser actions. Element actions: \"click\", \"type\", \"press\", \"waitFor\", \"scroll\", \"hover\", \"select\", \"toggle\". Navigation actions: \"goBack\" (browser back), \"goForward\" (browser forward), \"refresh\" (reload page). Dialog action: \"dialog\" (handle JS alert/confirm/prompt — use text=\"dismiss\" to cancel, value=\"...\" for prompt input). Fallback selectors are tried automatically if the primary selector fails. A session is auto-started if needed.", {
56
61
  action: z.enum([
57
62
  "click",
58
63
  "type",
@@ -61,20 +66,26 @@ server.tool("browser_interact", "Interact with a page element. Actions: \"click\
61
66
  "scroll",
62
67
  "hover",
63
68
  "select",
64
- "toggle"
69
+ "toggle",
70
+ "goBack",
71
+ "goForward",
72
+ "refresh",
73
+ "dialog"
65
74
  ]).describe("The interaction type"),
66
75
  selector: z.string().optional().describe("CSS selector for the target element"),
67
- text: z.string().optional().describe("Text to type (required for \"type\" action)"),
76
+ fallbackSelectors: z.array(z.string()).optional().describe("Backup CSS selectors tried if the primary selector fails (from browser_get_elements)"),
77
+ text: z.string().optional().describe("Text to type (for \"type\"), or \"dismiss\" to dismiss a dialog (for \"dialog\")"),
68
78
  key: z.string().optional().describe("Key to press (required for \"press\" action, e.g. \"Enter\", \"Tab\")"),
69
- value: z.string().optional().describe("Option value to select (required for \"select\" action)"),
79
+ value: z.string().optional().describe("Option value to select (for \"select\"), or prompt text (for \"dialog\")"),
70
80
  scrollX: z.number().optional().describe("Horizontal scroll delta in pixels (for \"scroll\" action)"),
71
81
  scrollY: z.number().optional().describe("Vertical scroll delta in pixels (for \"scroll\" action, positive = down)"),
72
82
  timeoutMs: z.number().optional().describe("Timeout in milliseconds (for \"waitFor\" action, default 4000)"),
73
83
  sessionId: z.string().optional().describe("Session ID (auto-resolved if omitted)")
74
- }, async ({ action, selector, text, key, value, scrollX, scrollY, timeoutMs, sessionId }) => {
84
+ }, async ({ action, selector, fallbackSelectors, text, key, value, scrollX, scrollY, timeoutMs, sessionId }) => {
75
85
  const sid = await resolveSession(sessionId);
76
86
  const payload = { action };
77
87
  if (selector) payload.selector = selector;
88
+ if (fallbackSelectors) payload.fallbackSelectors = fallbackSelectors;
78
89
  if (text) payload.text = text;
79
90
  if (key) payload.key = key;
80
91
  if (value) payload.value = value;
@@ -87,9 +98,13 @@ server.tool("browser_interact", "Interact with a page element. Actions: \"click\
87
98
  type: "interact",
88
99
  payload
89
100
  });
101
+ const compact = {
102
+ resultStatus: result.resultStatus,
103
+ resultMessage: result.resultMessage
104
+ };
90
105
  return { content: [{
91
106
  type: "text",
92
- text: JSON.stringify(result)
107
+ text: JSON.stringify(compact)
93
108
  }] };
94
109
  });
95
110
  server.tool("browser_get_content", "Get the current page content. Modes: \"text\" (readable text), \"a11y\" (accessibility tree — best for understanding page structure), \"title\" (page title only), \"html\" (raw HTML). Use \"a11y\" to see the full page hierarchy with roles, names, and states. A session is auto-started if needed.", {
@@ -100,20 +115,42 @@ server.tool("browser_get_content", "Get the current page content. Modes: \"text\
100
115
  "a11y"
101
116
  ]).default("text").describe("Content extraction mode"),
102
117
  selector: z.string().optional().describe("CSS selector to scope content (e.g. \"main\", \"#content\")"),
118
+ maxChars: z.number().optional().describe("Maximum characters to return (default: 12000 for text/a11y, 6000 for html, no cap for title). Use a CSS selector to scope content instead of raising this limit."),
103
119
  sessionId: z.string().optional().describe("Session ID (auto-resolved if omitted)")
104
- }, async ({ mode, selector, sessionId }) => {
120
+ }, async ({ mode, selector, maxChars, sessionId }) => {
105
121
  const sid = await resolveSession(sessionId);
106
122
  const result = await getCore().getPageContent({
107
123
  sessionId: sid,
108
124
  mode,
109
125
  selector
110
126
  });
127
+ const limit = maxChars ?? {
128
+ text: 12e3,
129
+ a11y: 12e3,
130
+ html: 6e3,
131
+ title: void 0
132
+ }[mode];
133
+ if (limit && typeof result.content === "string" && result.content.length > limit) {
134
+ const originalLength = result.content.length;
135
+ const truncatedContent = result.content.slice(0, limit) + `\n\n[Truncated — showing first ${limit} of ${originalLength} characters. Use a CSS selector to scope the content.]`;
136
+ return { content: [{
137
+ type: "text",
138
+ text: JSON.stringify({
139
+ ...result,
140
+ content: truncatedContent,
141
+ truncated: true
142
+ })
143
+ }] };
144
+ }
111
145
  return { content: [{
112
146
  type: "text",
113
- text: JSON.stringify(result)
147
+ text: JSON.stringify({
148
+ ...result,
149
+ truncated: false
150
+ })
114
151
  }] };
115
152
  });
116
- server.tool("browser_get_elements", "Discover all interactive elements on the current page (buttons, links, inputs, etc.). Returns CSS selectors you can use with browser_interact. A session is auto-started if needed.", {
153
+ server.tool("browser_get_elements", "Discover all interactive elements on the current page (buttons, links, inputs, etc.). Returns CSS selectors and fallbackSelectors you can use with browser_interact. Pass fallbackSelectors to browser_interact for automatic retry when the primary selector breaks. Actions are derived from role: link/button/custom→click, input/textarea/contenteditable→click+type+press, select→click+select, checkbox/radio→toggle. A session is auto-started if needed.", {
117
154
  roles: z.array(z.enum([
118
155
  "link",
119
156
  "button",
@@ -138,24 +175,46 @@ server.tool("browser_get_elements", "Discover all interactive elements on the cu
138
175
  limit,
139
176
  selector
140
177
  });
178
+ const compactElements = result.elements.map((el) => {
179
+ const compact = { ...el };
180
+ if (visibleOnly) delete compact.visible;
181
+ delete compact.actions;
182
+ delete compact.tagName;
183
+ if (compact.ariaLabel && compact.ariaLabel === compact.text) delete compact.ariaLabel;
184
+ return compact;
185
+ });
141
186
  return { content: [{
142
187
  type: "text",
143
- text: JSON.stringify(result)
188
+ text: JSON.stringify({
189
+ elements: compactElements,
190
+ totalFound: result.totalFound,
191
+ truncated: result.truncated
192
+ })
144
193
  }] };
145
194
  });
146
- server.tool("browser_search_memory", "Search task memory for previously learned selectors and interaction patterns. Use this before interacting with a known site to reuse proven selectors instead of rediscovering them.", {
195
+ server.tool("browser_search_memory", "Search task memory for previously learned selectors, selector aliases, and interaction patterns. Results include selectorHints (proven selectors) and selectorAliases (human-readable names mapped to selectors with fallbacks). Use this before interacting with a known site to reuse proven selectors instead of rediscovering them.", {
147
196
  taskIntent: z.string().describe("What you want to do, e.g. \"login:github.com\" or \"search:amazon.de\""),
148
197
  siteDomain: z.string().optional().describe("Domain to scope the search"),
149
198
  limit: z.number().default(5).describe("Maximum number of results")
150
199
  }, async ({ taskIntent, siteDomain, limit }) => {
151
- const result = getCore().searchMemory({
200
+ const compactResults = getCore().searchMemory({
152
201
  taskIntent,
153
202
  siteDomain,
154
203
  limit
204
+ }).results.map((r) => {
205
+ const compact = { ...r };
206
+ delete compact.score;
207
+ delete compact.lastVerifiedAt;
208
+ if (Array.isArray(compact.selectorAliases)) compact.selectorAliases = compact.selectorAliases.map((alias) => {
209
+ const compactAlias = { ...alias };
210
+ if (Array.isArray(compactAlias.fallbackSelectors) && compactAlias.fallbackSelectors.length === 0) delete compactAlias.fallbackSelectors;
211
+ return compactAlias;
212
+ });
213
+ return compact;
155
214
  });
156
215
  return { content: [{
157
216
  type: "text",
158
- text: JSON.stringify(result)
217
+ text: JSON.stringify({ results: compactResults })
159
218
  }] };
160
219
  });
161
220
  server.tool("browser_stop_session", "Stop the browser session and terminate Chrome. The next browser tool call will auto-start a fresh session.", { sessionId: z.string().optional().describe("Session ID (uses active session if omitted)") }, async ({ sessionId }) => {
@@ -92,6 +92,16 @@ var CdpConnection = class CdpConnection {
92
92
  this.ws.on("message", onMessage);
93
93
  });
94
94
  }
95
+ onEvent(method, handler) {
96
+ const onMessage = (raw) => {
97
+ const message = JSON.parse(raw.toString("utf8"));
98
+ if (message.method === method) handler(message.params ?? {});
99
+ };
100
+ this.ws.on("message", onMessage);
101
+ return () => {
102
+ this.ws.off("message", onMessage);
103
+ };
104
+ }
95
105
  close() {
96
106
  this.ws.close();
97
107
  }
@@ -150,25 +160,19 @@ async function createTarget(cdpUrl, url = "about:blank") {
150
160
  } catch {}
151
161
  return await ensurePageWebSocketUrl(cdpUrl);
152
162
  }
153
- async function applyUserAgent(targetWsUrl, userAgent) {
154
- const conn = await CdpConnection.connect(targetWsUrl);
155
- try {
156
- await conn.send("Network.enable");
157
- await conn.send("Network.setUserAgentOverride", { userAgent });
158
- } finally {
159
- conn.close();
160
- }
161
- }
162
- async function evaluateExpression(targetWsUrl, expression) {
163
+ /** Verify the page is ready and optionally set a custom user-agent, using a single connection. */
164
+ async function initTarget(targetWsUrl, userAgent) {
163
165
  const conn = await CdpConnection.connect(targetWsUrl);
164
166
  try {
165
- await conn.send("Page.enable");
166
- await conn.send("Runtime.enable");
167
- return (await conn.send("Runtime.evaluate", {
168
- expression,
167
+ const enables = [conn.send("Page.enable"), conn.send("Runtime.enable")];
168
+ if (userAgent) enables.push(conn.send("Network.enable"));
169
+ await Promise.all(enables);
170
+ await conn.send("Runtime.evaluate", {
171
+ expression: "window.location.href",
169
172
  returnByValue: true,
170
173
  awaitPromise: true
171
- })).result.value ?? "";
174
+ });
175
+ if (userAgent) await conn.send("Network.setUserAgentOverride", { userAgent });
172
176
  } finally {
173
177
  conn.close();
174
178
  }
@@ -236,8 +240,7 @@ var ChromeCdpBrowserController = class {
236
240
  if (!port) throw new Error(`Invalid CDP URL: could not extract port from ${cdpUrl}`);
237
241
  await waitForDebugger(port);
238
242
  const targetWsUrl = await createTarget(cdpUrl);
239
- await evaluateExpression(targetWsUrl, "window.location.href");
240
- if (options?.userAgent) await applyUserAgent(targetWsUrl, options.userAgent);
243
+ await initTarget(targetWsUrl, options?.userAgent);
241
244
  return {
242
245
  pid: 0,
243
246
  cdpUrl,
@@ -247,13 +250,27 @@ var ChromeCdpBrowserController = class {
247
250
  async ensureEnabled(targetWsUrl) {
248
251
  const cached = this.connections.get(targetWsUrl);
249
252
  if (!cached) return;
250
- if (!cached.enabled.page) {
251
- await cached.conn.send("Page.enable");
253
+ const promises = [];
254
+ if (!cached.enabled.page) promises.push(cached.conn.send("Page.enable").then(() => {
252
255
  cached.enabled.page = true;
253
- }
254
- if (!cached.enabled.runtime) {
255
- await cached.conn.send("Runtime.enable");
256
+ }));
257
+ if (!cached.enabled.runtime) promises.push(cached.conn.send("Runtime.enable").then(() => {
256
258
  cached.enabled.runtime = true;
259
+ }));
260
+ if (promises.length) await Promise.all(promises);
261
+ if (!cached.dialogListenerAttached && cached.conn.onEvent) {
262
+ cached.conn.onEvent("Page.javascriptDialogOpening", (params) => {
263
+ cached.pendingDialog = {
264
+ type: params.type,
265
+ message: params.message,
266
+ defaultPrompt: params.defaultPrompt
267
+ };
268
+ if (params.type === "alert") {
269
+ cached.conn.send("Page.handleJavaScriptDialog", { accept: true }).catch(() => {});
270
+ cached.pendingDialog = void 0;
271
+ }
272
+ });
273
+ cached.dialogListenerAttached = true;
257
274
  }
258
275
  }
259
276
  async launch(sessionId, options) {
@@ -305,9 +322,8 @@ var ChromeCdpBrowserController = class {
305
322
  await waitForDebugger(port);
306
323
  const cdpUrl = `http://127.0.0.1:${port}`;
307
324
  const targetWsUrl = await createTarget(cdpUrl, "about:blank");
308
- await evaluateExpression(targetWsUrl, "window.location.href");
309
325
  if (!child.pid) throw new Error("Failed to launch Chrome process");
310
- if (userAgent) await applyUserAgent(targetWsUrl, userAgent);
326
+ await initTarget(targetWsUrl, userAgent);
311
327
  return {
312
328
  pid: child.pid,
313
329
  cdpUrl,
@@ -346,12 +362,83 @@ var ChromeCdpBrowserController = class {
346
362
  return navigatedEvent?.frame?.url ?? url;
347
363
  });
348
364
  }
365
+ async handleDialogAction(targetWsUrl, payload) {
366
+ return await this.withRetry(targetWsUrl, async (conn) => {
367
+ await this.ensureEnabled(targetWsUrl);
368
+ const cached = this.connections.get(targetWsUrl);
369
+ if (!cached?.pendingDialog) try {
370
+ await conn.waitForEvent("Page.javascriptDialogOpening", 500);
371
+ await new Promise((r) => setTimeout(r, 50));
372
+ } catch {
373
+ return "no dialog present";
374
+ }
375
+ const dialog = cached?.pendingDialog;
376
+ if (!dialog) return "no dialog present";
377
+ const dismiss = payload.text === "dismiss";
378
+ await conn.send("Page.handleJavaScriptDialog", {
379
+ accept: !dismiss,
380
+ promptText: payload.value
381
+ });
382
+ const result = dismiss ? `dismissed ${dialog.type}` : `accepted ${dialog.type}`;
383
+ if (cached) cached.pendingDialog = void 0;
384
+ return `${result}: ${dialog.message}`;
385
+ });
386
+ }
349
387
  async interact(targetWsUrl, payload) {
388
+ if (payload.action === "goBack") return await this.withRetry(targetWsUrl, async (conn) => {
389
+ await this.ensureEnabled(targetWsUrl);
390
+ const navigatedPromise = conn.waitForEvent("Page.frameNavigated", 3e3).catch(() => void 0);
391
+ await conn.send("Runtime.evaluate", {
392
+ expression: "history.back()",
393
+ returnByValue: true
394
+ });
395
+ const event = await navigatedPromise;
396
+ if (!event) return "no history to go back";
397
+ try {
398
+ await Promise.race([conn.waitForEvent("Page.loadEventFired", 5e3), conn.waitForEvent("Page.frameStoppedLoading", 5e3)]);
399
+ } catch {}
400
+ return `navigated back to ${event.frame?.url ?? "previous page"}`;
401
+ });
402
+ if (payload.action === "goForward") return await this.withRetry(targetWsUrl, async (conn) => {
403
+ await this.ensureEnabled(targetWsUrl);
404
+ const navigatedPromise = conn.waitForEvent("Page.frameNavigated", 3e3).catch(() => void 0);
405
+ await conn.send("Runtime.evaluate", {
406
+ expression: "history.forward()",
407
+ returnByValue: true
408
+ });
409
+ const event = await navigatedPromise;
410
+ if (!event) return "no history to go forward";
411
+ try {
412
+ await Promise.race([conn.waitForEvent("Page.loadEventFired", 5e3), conn.waitForEvent("Page.frameStoppedLoading", 5e3)]);
413
+ } catch {}
414
+ return `navigated forward to ${event.frame?.url ?? "next page"}`;
415
+ });
416
+ if (payload.action === "refresh") return await this.withRetry(targetWsUrl, async (conn) => {
417
+ await this.ensureEnabled(targetWsUrl);
418
+ await conn.send("Page.reload");
419
+ try {
420
+ await Promise.race([conn.waitForEvent("Page.loadEventFired", 1e4), conn.waitForEvent("Page.frameStoppedLoading", 1e4)]);
421
+ } catch {}
422
+ return "page refreshed";
423
+ });
424
+ if (payload.action === "dialog") return await this.handleDialogAction(targetWsUrl, payload);
350
425
  const expression = `(async () => {
351
426
  const payload = ${JSON.stringify(payload)};
427
+
428
+ function resolveElement(selector, fallbacks) {
429
+ let el = selector ? document.querySelector(selector) : null;
430
+ if (el) return el;
431
+ if (fallbacks && fallbacks.length) {
432
+ for (const fb of fallbacks) {
433
+ el = document.querySelector(fb);
434
+ if (el) return el;
435
+ }
436
+ }
437
+ throw new Error('Selector not found');
438
+ }
439
+
352
440
  if (payload.action === 'click') {
353
- const el = document.querySelector(payload.selector);
354
- if (!el) throw new Error('Selector not found');
441
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
355
442
  const rect = el.getBoundingClientRect();
356
443
  if (rect.width === 0 && rect.height === 0) {
357
444
  throw new Error('Element has zero size – it may be hidden or not rendered');
@@ -369,8 +456,7 @@ var ChromeCdpBrowserController = class {
369
456
  return 'clicked';
370
457
  }
371
458
  if (payload.action === 'type') {
372
- const el = document.querySelector(payload.selector);
373
- if (!el) throw new Error('Selector not found');
459
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
374
460
  el.focus();
375
461
  el.value = payload.text ?? '';
376
462
  el.dispatchEvent(new Event('input', { bubbles: true }));
@@ -405,8 +491,7 @@ var ChromeCdpBrowserController = class {
405
491
  }
406
492
  if (payload.action === 'scroll') {
407
493
  if (payload.selector) {
408
- const el = document.querySelector(payload.selector);
409
- if (!el) throw new Error('Selector not found');
494
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
410
495
  el.scrollBy({ left: payload.scrollX ?? 0, top: payload.scrollY ?? 0, behavior: 'smooth' });
411
496
  return 'scrolled element';
412
497
  }
@@ -414,8 +499,7 @@ var ChromeCdpBrowserController = class {
414
499
  return 'scrolled page';
415
500
  }
416
501
  if (payload.action === 'hover') {
417
- const el = document.querySelector(payload.selector);
418
- if (!el) throw new Error('Selector not found');
502
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
419
503
  const rect = el.getBoundingClientRect();
420
504
  const cx = rect.left + rect.width / 2;
421
505
  const cy = rect.top + rect.height / 2;
@@ -425,8 +509,7 @@ var ChromeCdpBrowserController = class {
425
509
  return 'hovered';
426
510
  }
427
511
  if (payload.action === 'select') {
428
- const el = document.querySelector(payload.selector);
429
- if (!el) throw new Error('Selector not found');
512
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
430
513
  if (el.tagName.toLowerCase() !== 'select') throw new Error('Element is not a <select>');
431
514
  el.value = payload.value ?? '';
432
515
  el.dispatchEvent(new Event('change', { bubbles: true }));
@@ -434,8 +517,7 @@ var ChromeCdpBrowserController = class {
434
517
  return 'selected ' + el.value;
435
518
  }
436
519
  if (payload.action === 'toggle') {
437
- const el = document.querySelector(payload.selector);
438
- if (!el) throw new Error('Selector not found');
520
+ const el = resolveElement(payload.selector, payload.fallbackSelectors);
439
521
  el.click();
440
522
  const checked = el.checked !== undefined ? el.checked : el.getAttribute('aria-checked') === 'true';
441
523
  return 'toggled to ' + (checked ? 'checked' : 'unchecked');
@@ -608,34 +690,62 @@ var ChromeCdpBrowserController = class {
608
690
  return value.replace(/\\\\/g, '\\\\\\\\').replace(/"/g, '\\\\"');
609
691
  }
610
692
 
693
+ function tryUniqueSelector(sel) {
694
+ try { return document.querySelectorAll(sel).length === 1 ? sel : null; }
695
+ catch { return null; }
696
+ }
697
+
611
698
  function buildSelector(el) {
612
699
  if (el.id) return '#' + CSS.escape(el.id);
613
700
 
614
701
  const name = el.getAttribute('name');
615
702
  if (name) {
616
703
  const tag = el.tagName.toLowerCase();
617
- const sel = tag + '[name="' + escapeAttr(name) + '"]';
618
- if (document.querySelectorAll(sel).length === 1) return sel;
704
+ const sel = tryUniqueSelector(tag + '[name="' + escapeAttr(name) + '"]');
705
+ if (sel) return sel;
619
706
  }
620
707
 
621
708
  const testId = el.getAttribute('data-testid') || el.getAttribute('data-test-id');
622
709
  if (testId) {
623
710
  const attr = el.hasAttribute('data-testid') ? 'data-testid' : 'data-test-id';
624
- const sel = '[' + attr + '="' + escapeAttr(testId) + '"]';
625
- if (document.querySelectorAll(sel).length === 1) return sel;
711
+ const sel = tryUniqueSelector('[' + attr + '="' + escapeAttr(testId) + '"]');
712
+ if (sel) return sel;
626
713
  }
627
714
 
628
715
  const ariaLabel = el.getAttribute('aria-label');
629
716
  if (ariaLabel) {
630
717
  const tag = el.tagName.toLowerCase();
631
- const sel = tag + '[aria-label="' + escapeAttr(ariaLabel) + '"]';
632
- if (document.querySelectorAll(sel).length === 1) return sel;
718
+ const sel = tryUniqueSelector(tag + '[aria-label="' + escapeAttr(ariaLabel) + '"]');
719
+ if (sel) return sel;
720
+ }
721
+
722
+ const dataCy = el.getAttribute('data-cy');
723
+ if (dataCy) {
724
+ const sel = tryUniqueSelector('[data-cy="' + escapeAttr(dataCy) + '"]');
725
+ if (sel) return sel;
726
+ }
727
+
728
+ const dataTest = el.getAttribute('data-test');
729
+ if (dataTest) {
730
+ const sel = tryUniqueSelector('[data-test="' + escapeAttr(dataTest) + '"]');
731
+ if (sel) return sel;
633
732
  }
634
733
 
734
+ const role = el.getAttribute('role');
735
+ if (role && ariaLabel) {
736
+ const sel = tryUniqueSelector('[role="' + escapeAttr(role) + '"][aria-label="' + escapeAttr(ariaLabel) + '"]');
737
+ if (sel) return sel;
738
+ }
739
+
740
+ // Path fallback — anchor at nearest ancestor with an id for shorter selectors
635
741
  const parts = [];
636
742
  let current = el;
637
743
  while (current && current !== document.documentElement) {
638
744
  const tag = current.tagName.toLowerCase();
745
+ if (current !== el && current.id) {
746
+ parts.unshift('#' + CSS.escape(current.id));
747
+ break;
748
+ }
639
749
  const parent = current.parentElement;
640
750
  if (!parent) { parts.unshift(tag); break; }
641
751
  const siblings = Array.from(parent.children).filter(
@@ -652,6 +762,50 @@ var ChromeCdpBrowserController = class {
652
762
  return parts.join(' > ');
653
763
  }
654
764
 
765
+ function buildFallbackSelectors(el, primarySelector) {
766
+ const fallbacks = [];
767
+ const candidates = [];
768
+
769
+ if (el.id) candidates.push('#' + CSS.escape(el.id));
770
+
771
+ const name = el.getAttribute('name');
772
+ if (name) {
773
+ const tag = el.tagName.toLowerCase();
774
+ const sel = tag + '[name="' + escapeAttr(name) + '"]';
775
+ candidates.push(sel);
776
+ }
777
+
778
+ const testId = el.getAttribute('data-testid') || el.getAttribute('data-test-id');
779
+ if (testId) {
780
+ const attr = el.hasAttribute('data-testid') ? 'data-testid' : 'data-test-id';
781
+ candidates.push('[' + attr + '="' + escapeAttr(testId) + '"]');
782
+ }
783
+
784
+ const ariaLabel = el.getAttribute('aria-label');
785
+ if (ariaLabel) {
786
+ const tag = el.tagName.toLowerCase();
787
+ candidates.push(tag + '[aria-label="' + escapeAttr(ariaLabel) + '"]');
788
+ }
789
+
790
+ const dataCy = el.getAttribute('data-cy');
791
+ if (dataCy) candidates.push('[data-cy="' + escapeAttr(dataCy) + '"]');
792
+
793
+ const dataTest = el.getAttribute('data-test');
794
+ if (dataTest) candidates.push('[data-test="' + escapeAttr(dataTest) + '"]');
795
+
796
+ const role = el.getAttribute('role');
797
+ if (role && ariaLabel) {
798
+ candidates.push('[role="' + escapeAttr(role) + '"][aria-label="' + escapeAttr(ariaLabel) + '"]');
799
+ }
800
+
801
+ for (const sel of candidates) {
802
+ if (sel === primarySelector) continue;
803
+ if (tryUniqueSelector(sel)) fallbacks.push(sel);
804
+ if (fallbacks.length >= 3) break;
805
+ }
806
+ return fallbacks;
807
+ }
808
+
655
809
  function isVisible(el) {
656
810
  const style = window.getComputedStyle(el);
657
811
  if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
@@ -715,8 +869,9 @@ var ChromeCdpBrowserController = class {
715
869
  totalFound++;
716
870
  if (results.length >= limit) continue;
717
871
 
872
+ const primarySelector = buildSelector(el);
718
873
  const entry = {
719
- selector: buildSelector(el),
874
+ selector: primarySelector,
720
875
  role,
721
876
  tagName: el.tagName.toLowerCase(),
722
877
  text: getText(el),
@@ -725,6 +880,9 @@ var ChromeCdpBrowserController = class {
725
880
  enabled: isEnabled(el),
726
881
  };
727
882
 
883
+ const fb = buildFallbackSelectors(el, primarySelector);
884
+ if (fb.length) entry.fallbackSelectors = fb;
885
+
728
886
  if (role === 'link' && el.href) entry.href = el.href;
729
887
  if (role === 'input') entry.inputType = (el.type || 'text').toLowerCase();
730
888
  const al = el.getAttribute('aria-label');
@@ -799,9 +957,30 @@ var MockBrowserController = class {
799
957
  page.title = url;
800
958
  page.text = `Content of ${url}`;
801
959
  page.html = `<html><body>${page.text}</body></html>`;
960
+ this.history.splice(this.historyIndex + 1);
961
+ this.history.push(url);
962
+ this.historyIndex = this.history.length - 1;
802
963
  return url;
803
964
  }
804
- async interact(_cdpUrl, payload) {
965
+ history = [];
966
+ historyIndex = -1;
967
+ async interact(cdpUrl, payload) {
968
+ if (payload.action === "goBack") {
969
+ if (this.historyIndex <= 0) return "no history to go back";
970
+ this.historyIndex--;
971
+ const page = this.pages.get(cdpUrl);
972
+ if (page) page.url = this.history[this.historyIndex] ?? page.url;
973
+ return `navigated back to ${page?.url ?? "previous page"}`;
974
+ }
975
+ if (payload.action === "goForward") {
976
+ if (this.historyIndex >= this.history.length - 1) return "no history to go forward";
977
+ this.historyIndex++;
978
+ const page = this.pages.get(cdpUrl);
979
+ if (page) page.url = this.history[this.historyIndex] ?? page.url;
980
+ return `navigated forward to ${page?.url ?? "next page"}`;
981
+ }
982
+ if (payload.action === "refresh") return "page refreshed";
983
+ if (payload.action === "dialog") return payload.text === "dismiss" ? "dismissed confirm: mock dialog" : "accepted confirm: mock dialog";
805
984
  return `interacted:${payload.action}`;
806
985
  }
807
986
  async getContent(cdpUrl, options) {
@@ -1401,7 +1580,7 @@ var EventStore = class {
1401
1580
  const existing = this.events.get(event.sessionId) ?? [];
1402
1581
  existing.push(event);
1403
1582
  this.events.set(event.sessionId, existing);
1404
- fs.appendFileSync(this.filePath, `${JSON.stringify(event)}\n`, "utf8");
1583
+ fs.appendFile(this.filePath, `${JSON.stringify(event)}\n`, "utf8", () => {});
1405
1584
  }
1406
1585
  list(sessionId, limit = 100) {
1407
1586
  const entries = this.events.get(sessionId) ?? [];
@@ -1502,6 +1681,9 @@ function buildSelectorHints(insight) {
1502
1681
  }
1503
1682
  return [...weightedSelectors.entries()].sort((a, b) => b[1] - a[1]).slice(0, 5).map(([selector]) => selector);
1504
1683
  }
1684
+ function buildSelectorAliases(insight) {
1685
+ return insight.selectorAliases ?? [];
1686
+ }
1505
1687
  function selectorSignal(insight) {
1506
1688
  const recipeSelectors = insight.actionRecipe.filter((step) => Boolean(step.selector)).length;
1507
1689
  const recipeCoverage = insight.actionRecipe.length > 0 ? recipeSelectors / insight.actionRecipe.length : 0;
@@ -1525,6 +1707,7 @@ function scoreInsight(insight, normalizedIntent, normalizedDomain) {
1525
1707
  freshness: insight.freshness,
1526
1708
  lastVerifiedAt: insight.lastVerifiedAt,
1527
1709
  selectorHints: buildSelectorHints(insight),
1710
+ selectorAliases: buildSelectorAliases(insight),
1528
1711
  score
1529
1712
  };
1530
1713
  }
@@ -1639,6 +1822,11 @@ const EvidenceRecordSchema = z.object({
1639
1822
  url: z.string().optional(),
1640
1823
  recordedAt: z.string().datetime()
1641
1824
  });
1825
+ const SelectorAliasSchema = z.object({
1826
+ alias: z.string().min(1),
1827
+ selector: z.string().min(1),
1828
+ fallbackSelectors: z.array(z.string()).default([])
1829
+ });
1642
1830
  const TaskInsightSchema = z.object({
1643
1831
  insightId: z.string().min(1),
1644
1832
  taskIntent: z.string().min(1),
@@ -1656,7 +1844,8 @@ const TaskInsightSchema = z.object({
1656
1844
  createdAt: z.string().datetime(),
1657
1845
  updatedAt: z.string().datetime(),
1658
1846
  supersedes: z.string().optional(),
1659
- evidence: z.array(EvidenceRecordSchema)
1847
+ evidence: z.array(EvidenceRecordSchema),
1848
+ selectorAliases: z.array(SelectorAliasSchema).default([])
1660
1849
  });
1661
1850
  const MemoryStateSchema = z.object({ insights: z.array(TaskInsightSchema) });
1662
1851
 
@@ -1841,7 +2030,7 @@ var MemoryService = class {
1841
2030
  const evidence = this.createEvidence(input, "success");
1842
2031
  if (!matched) {
1843
2032
  const now = (/* @__PURE__ */ new Date()).toISOString();
1844
- const created = {
2033
+ let created = {
1845
2034
  insightId: crypto.randomUUID(),
1846
2035
  taskIntent: input.taskIntent,
1847
2036
  siteDomain: input.siteDomain,
@@ -1857,8 +2046,10 @@ var MemoryService = class {
1857
2046
  lastVerifiedAt: now,
1858
2047
  createdAt: now,
1859
2048
  updatedAt: now,
1860
- evidence: [evidence]
2049
+ evidence: [evidence],
2050
+ selectorAliases: []
1861
2051
  };
2052
+ created = this.maybeGenerateAliases(created);
1862
2053
  this.store.upsert(created);
1863
2054
  this.invalidateSearchCache();
1864
2055
  return created;
@@ -1872,7 +2063,7 @@ var MemoryService = class {
1872
2063
  });
1873
2064
  if (matched.freshness === "suspect" || matched.freshness === "stale") {
1874
2065
  const now = (/* @__PURE__ */ new Date()).toISOString();
1875
- const versioned = {
2066
+ let versioned = {
1876
2067
  ...refreshed,
1877
2068
  insightId: crypto.randomUUID(),
1878
2069
  supersedes: matched.insightId,
@@ -1880,13 +2071,15 @@ var MemoryService = class {
1880
2071
  createdAt: now,
1881
2072
  updatedAt: now
1882
2073
  };
2074
+ versioned = this.maybeGenerateAliases(versioned);
1883
2075
  this.store.upsert(versioned);
1884
2076
  this.invalidateSearchCache();
1885
2077
  return versioned;
1886
2078
  }
1887
- this.store.upsert(refreshed);
2079
+ const withAliases = this.maybeGenerateAliases(refreshed);
2080
+ this.store.upsert(withAliases);
1888
2081
  this.invalidateSearchCache();
1889
- return refreshed;
2082
+ return withAliases;
1890
2083
  }
1891
2084
  recordFailure(input, errorMessage) {
1892
2085
  const insights = this.store.list();
@@ -1904,7 +2097,13 @@ var MemoryService = class {
1904
2097
  return failed;
1905
2098
  }
1906
2099
  findBestExactMatch(insights, taskIntent, siteDomain) {
1907
- return insights.filter((insight) => insight.taskIntent.toLowerCase() === taskIntent.toLowerCase() && insight.siteDomain.toLowerCase() === siteDomain.toLowerCase()).sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))[0];
2100
+ const intentLower = taskIntent.toLowerCase();
2101
+ const domainLower = siteDomain.toLowerCase();
2102
+ let best;
2103
+ for (const insight of insights) if (insight.taskIntent.toLowerCase() === intentLower && insight.siteDomain.toLowerCase() === domainLower) {
2104
+ if (!best || insight.updatedAt > best.updatedAt) best = insight;
2105
+ }
2106
+ return best;
1908
2107
  }
1909
2108
  createEvidence(input, result, reason) {
1910
2109
  return {
@@ -1917,6 +2116,42 @@ var MemoryService = class {
1917
2116
  recordedAt: (/* @__PURE__ */ new Date()).toISOString()
1918
2117
  };
1919
2118
  }
2119
+ maybeGenerateAliases(insight) {
2120
+ if (insight.confidence < .8 || insight.successCount < 3) return insight;
2121
+ const aliasMap = /* @__PURE__ */ new Map();
2122
+ for (const existing of insight.selectorAliases ?? []) aliasMap.set(existing.selector, existing);
2123
+ const selectors = /* @__PURE__ */ new Set();
2124
+ for (const step of insight.actionRecipe) if (step.selector) selectors.add(step.selector);
2125
+ for (const ev of insight.evidence) if (ev.selector && ev.result === "success") selectors.add(ev.selector);
2126
+ for (const selector of selectors) {
2127
+ if (aliasMap.has(selector)) continue;
2128
+ const alias = this.deriveAliasName(selector);
2129
+ if (alias) aliasMap.set(selector, {
2130
+ alias,
2131
+ selector,
2132
+ fallbackSelectors: []
2133
+ });
2134
+ if (aliasMap.size >= 10) break;
2135
+ }
2136
+ return {
2137
+ ...insight,
2138
+ selectorAliases: [...aliasMap.values()].slice(0, 10)
2139
+ };
2140
+ }
2141
+ deriveAliasName(selector) {
2142
+ const idMatch = selector.match(/^#([\w-]+)$/);
2143
+ if (idMatch) return idMatch[1].replace(/[-_]/g, " ");
2144
+ const nameMatch = selector.match(/\[name="([^"]+)"\]/);
2145
+ if (nameMatch) return nameMatch[1];
2146
+ const ariaMatch = selector.match(/\[aria-label="([^"]+)"\]/);
2147
+ if (ariaMatch) return ariaMatch[1];
2148
+ const testIdMatch = selector.match(/\[data-testid="([^"]+)"\]/);
2149
+ if (testIdMatch) return testIdMatch[1];
2150
+ const cyMatch = selector.match(/\[data-cy="([^"]+)"\]/);
2151
+ if (cyMatch) return cyMatch[1];
2152
+ const testMatch = selector.match(/\[data-test="([^"]+)"\]/);
2153
+ if (testMatch) return testMatch[1];
2154
+ }
1920
2155
  mergeRecipe(recipe, step) {
1921
2156
  if (recipe.find((existing) => existing.summary === step.summary && existing.selector === step.selector)) return recipe;
1922
2157
  return [...recipe, step].slice(-8);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentic-browser",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",