assistme 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/PLAN.md +14 -3
  2. package/dist/{chunk-UWE5WVQI.js → chunk-KX7ITO55.js} +20 -11
  3. package/dist/index.js +1791 -572
  4. package/dist/{job-runner-N4XAAWLJ.js → job-runner-P2L6MOOX.js} +1 -1
  5. package/package.json +5 -3
  6. package/src/agent/job-runner.ts +9 -13
  7. package/src/agent/mcp-servers.ts +6 -1020
  8. package/src/agent/memory.ts +2 -11
  9. package/src/agent/processor.ts +18 -108
  10. package/src/agent/scheduler.ts +2 -3
  11. package/src/agent/session.ts +20 -36
  12. package/src/agent/skills.ts +167 -61
  13. package/src/agent/system-prompt.ts +126 -0
  14. package/src/browser/chrome-launcher.ts +555 -0
  15. package/src/browser/controller.ts +1386 -0
  16. package/src/browser/types.ts +70 -0
  17. package/src/commands/credential.ts +190 -0
  18. package/src/commands/job.ts +14 -45
  19. package/src/commands/memory.ts +16 -29
  20. package/src/commands/schedule.ts +15 -37
  21. package/src/commands/start.ts +11 -43
  22. package/src/credentials/credential-store.test.ts +162 -0
  23. package/src/credentials/credential-store.ts +266 -0
  24. package/src/credentials/encryption.test.ts +98 -0
  25. package/src/credentials/encryption.ts +82 -0
  26. package/src/credentials/index.ts +15 -0
  27. package/src/credentials/local-store.ts +89 -0
  28. package/src/db/action.ts +19 -0
  29. package/src/db/api-client.ts +3 -32
  30. package/src/db/auth-store.ts +41 -0
  31. package/src/db/auth.ts +38 -0
  32. package/src/db/conversation.ts +39 -0
  33. package/src/db/event.ts +52 -0
  34. package/src/db/job-poll.ts +18 -0
  35. package/src/db/session.ts +60 -0
  36. package/src/db/supabase.ts +40 -383
  37. package/src/db/task.ts +69 -0
  38. package/src/db/types.ts +54 -0
  39. package/src/index.ts +2 -0
  40. package/src/mcp/agent-tools-server.ts +1047 -0
  41. package/src/mcp/browser-server.ts +258 -0
  42. package/src/tools/browser.ts +28 -1208
  43. package/src/tools/index.ts +32 -263
  44. package/src/tools/web.ts +0 -73
@@ -0,0 +1,1386 @@
1
+ import { WebSocket } from "ws";
2
+ import { platform } from "node:os";
3
+ import type {
4
+ CDPTab,
5
+ CDPResponse,
6
+ CDPEvalResult,
7
+ CDPScreenshotResult,
8
+ BoundingBox,
9
+ RefEntry,
10
+ SnapshotResult,
11
+ ActionSpec,
12
+ ActionResult,
13
+ } from "./types.js";
14
+
15
+ export class BrowserController {
16
+ private ws: WebSocket | null = null;
17
+ private debugPort: number;
18
+ private messageId = 0;
19
+ private callbacks = new Map<number, (response: CDPResponse) => void>();
20
+ private connected = false;
21
+ private currentTabId: string | null = null;
22
+ private refCache: Map<number, RefEntry> = new Map();
23
+
24
+ constructor(port = 9222) {
25
+ this.debugPort = port;
26
+ }
27
+
28
+ // ── Connection ──────────────────────────────────────────────────
29
+
30
+ async isAvailable(): Promise<boolean> {
31
+ try {
32
+ const res = await fetch(`http://127.0.0.1:${this.debugPort}/json/version`, {
33
+ signal: AbortSignal.timeout(2000),
34
+ });
35
+ return res.ok;
36
+ } catch {
37
+ return false;
38
+ }
39
+ }
40
+
41
+ async connect(tabIndex?: number): Promise<string> {
42
+ // Reuse existing connection if still open and targeting the same tab
43
+ if (this.connected && this.ws?.readyState === WebSocket.OPEN) {
44
+ if (tabIndex === undefined) {
45
+ return "Already connected to browser.";
46
+ }
47
+ // If a specific tab is requested, check if we're already on it
48
+ const tabs = await this.getTabs();
49
+ const pageTabs = tabs.filter((t) => t.type === "page");
50
+ const targetTab = pageTabs[tabIndex];
51
+ if (targetTab && targetTab.id === this.currentTabId) {
52
+ return `Already connected to tab: "${targetTab.title}"`;
53
+ }
54
+ // Need to switch — disconnect first
55
+ await this.disconnect();
56
+ }
57
+
58
+ const available = await this.isAvailable();
59
+ if (!available) {
60
+ throw new Error(
61
+ `Cannot connect to browser on port ${this.debugPort}. ` +
62
+ "Chrome remote debugging is not reachable. " +
63
+ "Please ensure Chrome is running with remote debugging enabled."
64
+ );
65
+ }
66
+
67
+ const tabs = await this.getTabs();
68
+ const pageTabs = tabs.filter((t) => t.type === "page");
69
+
70
+ if (pageTabs.length === 0) {
71
+ throw new Error("No browser tabs found. Please open at least one tab.");
72
+ }
73
+
74
+ const targetTab = pageTabs[tabIndex ?? 0];
75
+ if (!targetTab.webSocketDebuggerUrl) {
76
+ throw new Error("Tab does not expose a WebSocket debugger URL.");
77
+ }
78
+
79
+ this.currentTabId = targetTab.id;
80
+
81
+ return new Promise((resolve, reject) => {
82
+ let settled = false;
83
+ this.ws = new WebSocket(targetTab.webSocketDebuggerUrl!);
84
+
85
+ const connectTimeout = setTimeout(() => {
86
+ if (!settled) {
87
+ settled = true;
88
+ this.ws?.close();
89
+ reject(new Error("Connection timeout (5s)"));
90
+ }
91
+ }, 5000);
92
+
93
+ this.ws.on("open", () => {
94
+ if (settled) return;
95
+ settled = true;
96
+ clearTimeout(connectTimeout);
97
+ this.connected = true;
98
+ // Enable required domains
99
+ this.send("Page.enable").catch(() => {});
100
+ this.send("Runtime.enable").catch(() => {});
101
+ this.send("DOM.enable").catch(() => {});
102
+ resolve(`Connected to tab: "${targetTab.title}" (${targetTab.url})`);
103
+ });
104
+
105
+ this.ws.on("message", (data) => {
106
+ try {
107
+ const msg = JSON.parse(data.toString()) as CDPResponse;
108
+ if (msg.id !== undefined && this.callbacks.has(msg.id)) {
109
+ this.callbacks.get(msg.id)!(msg);
110
+ this.callbacks.delete(msg.id);
111
+ }
112
+ } catch {
113
+ // Ignore non-JSON messages (events)
114
+ }
115
+ });
116
+
117
+ this.ws.on("error", (err) => {
118
+ this.connected = false;
119
+ if (!settled) {
120
+ settled = true;
121
+ clearTimeout(connectTimeout);
122
+ reject(new Error(`WebSocket error: ${err.message}`));
123
+ }
124
+ });
125
+
126
+ this.ws.on("close", () => {
127
+ this.connected = false;
128
+ this.ws = null;
129
+ // Reject pending CDP commands so they don't hang forever
130
+ for (const [id, cb] of this.callbacks) {
131
+ cb({ id, error: { code: -1, message: "WebSocket closed" } });
132
+ }
133
+ this.callbacks.clear();
134
+ });
135
+ });
136
+ }
137
+
138
+ async disconnect(): Promise<string> {
139
+ if (this.ws) {
140
+ this.ws.close();
141
+ this.ws = null;
142
+ this.connected = false;
143
+ }
144
+ return "Disconnected from browser.";
145
+ }
146
+
147
+ // ── CDP Protocol ────────────────────────────────────────────────
148
+
149
+ private async getTabs(): Promise<CDPTab[]> {
150
+ const res = await fetch(`http://127.0.0.1:${this.debugPort}/json`, {
151
+ signal: AbortSignal.timeout(3000),
152
+ });
153
+ return (await res.json()) as CDPTab[];
154
+ }
155
+
156
+ private send(method: string, params?: Record<string, unknown>): Promise<Record<string, unknown>> {
157
+ return new Promise((resolve, reject) => {
158
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
159
+ reject(new Error("Not connected to browser. Call browser_connect first."));
160
+ return;
161
+ }
162
+
163
+ const id = ++this.messageId;
164
+ const timeout = setTimeout(() => {
165
+ this.callbacks.delete(id);
166
+ reject(new Error(`CDP command timed out: ${method}`));
167
+ }, 15000);
168
+
169
+ this.callbacks.set(id, (response) => {
170
+ clearTimeout(timeout);
171
+ if (response.error) {
172
+ reject(new Error(`CDP error: ${response.error.message}`));
173
+ } else {
174
+ resolve(response.result || {});
175
+ }
176
+ });
177
+
178
+ this.ws.send(JSON.stringify({ id, method, params }));
179
+ });
180
+ }
181
+
182
+ private ensureConnected() {
183
+ if (!this.connected || !this.ws || this.ws.readyState !== WebSocket.OPEN) {
184
+ throw new Error("Not connected to browser. Use browser_connect tool first.");
185
+ }
186
+ }
187
+
188
+ // ── Navigation ──────────────────────────────────────────────────
189
+
190
+ async navigate(url: string): Promise<string> {
191
+ this.ensureConnected();
192
+ await this.send("Page.navigate", { url });
193
+ // Wait for load
194
+ await this.waitForLoad();
195
+ const info = await this.getPageInfo();
196
+ return `Navigated to: ${info.title}\nURL: ${info.url}`;
197
+ }
198
+
199
+ async goBack(): Promise<string> {
200
+ this.ensureConnected();
201
+ await this.send("Page.navigateToHistoryEntry", {
202
+ entryId: -1,
203
+ }).catch(() => {});
204
+ // Fallback: use JS
205
+ await this.evaluate("window.history.back()");
206
+ await this.waitForLoad();
207
+ const info = await this.getPageInfo();
208
+ return `Went back to: ${info.title}`;
209
+ }
210
+
211
+ async reload(): Promise<string> {
212
+ this.ensureConnected();
213
+ await this.send("Page.reload");
214
+ await this.waitForLoad();
215
+ return "Page reloaded.";
216
+ }
217
+
218
+ // ── Page Reading ────────────────────────────────────────────────
219
+
220
+ async readPage(): Promise<string> {
221
+ this.ensureConnected();
222
+ const result = await this.send("Runtime.evaluate", {
223
+ expression: `
224
+ (function() {
225
+ // Get page title and URL
226
+ let output = "Title: " + document.title + "\\n";
227
+ output += "URL: " + window.location.href + "\\n\\n";
228
+
229
+ // Get main text content, cleaned up
230
+ const body = document.body.cloneNode(true);
231
+ // Remove scripts, styles, navs that add noise
232
+ body.querySelectorAll('script, style, noscript, svg, iframe').forEach(el => el.remove());
233
+
234
+ const text = body.innerText
235
+ .split('\\n')
236
+ .map(line => line.trim())
237
+ .filter(line => line.length > 0)
238
+ .join('\\n');
239
+
240
+ output += text;
241
+ return output.slice(0, 30000);
242
+ })()
243
+ `,
244
+ returnByValue: true,
245
+ });
246
+
247
+ return ((result as CDPEvalResult).result?.value as string) || "Could not read page content.";
248
+ }
249
+
250
+ async readElement(selector: string): Promise<string> {
251
+ this.ensureConnected();
252
+ const selectorJS = JSON.stringify(selector);
253
+ const result = await this.send("Runtime.evaluate", {
254
+ expression: `
255
+ (function() {
256
+ const el = document.querySelector(${selectorJS});
257
+ if (!el) return 'Element not found: ' + ${selectorJS};
258
+ return el.innerText || el.textContent || el.value || '(empty)';
259
+ })()
260
+ `,
261
+ returnByValue: true,
262
+ });
263
+
264
+ return ((result as CDPEvalResult).result?.value as string) || "Element not found.";
265
+ }
266
+
267
+ async getPageInfo(): Promise<{ title: string; url: string }> {
268
+ const result = await this.send("Runtime.evaluate", {
269
+ expression: `JSON.stringify({ title: document.title, url: window.location.href })`,
270
+ returnByValue: true,
271
+ });
272
+ try {
273
+ return JSON.parse(((result as CDPEvalResult).result?.value as string) || "{}");
274
+ } catch {
275
+ return { title: "Unknown", url: "unknown" };
276
+ }
277
+ }
278
+
279
+ // ── Screenshots (for Claude vision) ─────────────────────────────
280
+
281
+ async screenshot(): Promise<string> {
282
+ this.ensureConnected();
283
+ const result = await this.send("Page.captureScreenshot", {
284
+ format: "png",
285
+ quality: 80,
286
+ captureBeyondViewport: false,
287
+ });
288
+ // Returns base64-encoded PNG
289
+ return (result as CDPScreenshotResult).data || "";
290
+ }
291
+
292
+ // ── Interactions ────────────────────────────────────────────────
293
+
294
+ async click(selector: string): Promise<string> {
295
+ this.ensureConnected();
296
+ const selectorJS = JSON.stringify(selector);
297
+
298
+ const result = await this.send("Runtime.evaluate", {
299
+ expression: `
300
+ (function() {
301
+ var sel = ${selectorJS};
302
+
303
+ // Support :contains('text') pseudo-selector (not native CSS)
304
+ var containsMatch = sel.match(/^(.+?)?:contains\\(['"](.+?)['"]\\)$/);
305
+ if (containsMatch) {
306
+ var baseTag = (containsMatch[1] || '*').toLowerCase();
307
+ var searchText = containsMatch[2];
308
+ var candidates = document.querySelectorAll(baseTag === '*' ? '*' : baseTag);
309
+ var found = null;
310
+ for (var i = 0; i < candidates.length; i++) {
311
+ var c = candidates[i];
312
+ // Prefer exact text match on direct text content (not children)
313
+ var directText = Array.from(c.childNodes)
314
+ .filter(function(n) { return n.nodeType === 3; })
315
+ .map(function(n) { return n.textContent.trim(); })
316
+ .join(' ');
317
+ if (directText === searchText || c.textContent.trim() === searchText) {
318
+ // Prefer the deepest (most specific) matching element
319
+ if (!found || found.contains(c)) found = c;
320
+ }
321
+ }
322
+ if (!found) return 'Element not found: ' + sel;
323
+ found.scrollIntoView({ block: 'center', behavior: 'instant' });
324
+ found.click();
325
+ return 'Clicked: ' + (found.tagName || '') + ' ' + (found.textContent || '').slice(0, 50).trim();
326
+ }
327
+
328
+ var el = document.querySelector(sel);
329
+ if (!el) return 'Element not found: ' + sel;
330
+
331
+ // Scroll into view
332
+ el.scrollIntoView({ block: 'center', behavior: 'instant' });
333
+
334
+ // Click
335
+ el.click();
336
+ return 'Clicked: ' + (el.tagName || '') + ' ' + (el.textContent || '').slice(0, 50).trim();
337
+ })()
338
+ `,
339
+ returnByValue: true,
340
+ });
341
+
342
+ // Small delay for any resulting navigation/animation
343
+ await new Promise((r) => setTimeout(r, 500));
344
+ return ((result as CDPEvalResult).result?.value as string) || "Click executed.";
345
+ }
346
+
347
+ async typeText(selector: string, text: string): Promise<string> {
348
+ this.ensureConnected();
349
+ // Use JSON.stringify for safe string interpolation into JS — handles all
350
+ // special characters (quotes, backslashes, newlines, unicode) correctly.
351
+ const selectorJS = JSON.stringify(selector);
352
+ const textJS = JSON.stringify(text);
353
+
354
+ // First clear and set value via JS, dispatching all relevant events
355
+ const result = await this.send("Runtime.evaluate", {
356
+ expression: `
357
+ (function() {
358
+ const el = document.querySelector(${selectorJS});
359
+ if (!el) return 'Element not found: ' + ${selectorJS};
360
+
361
+ el.focus();
362
+
363
+ // Clear existing value
364
+ const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
365
+ window.HTMLInputElement.prototype, 'value'
366
+ )?.set || Object.getOwnPropertyDescriptor(
367
+ window.HTMLTextAreaElement.prototype, 'value'
368
+ )?.set;
369
+ if (nativeInputValueSetter) {
370
+ nativeInputValueSetter.call(el, ${textJS});
371
+ } else {
372
+ el.value = ${textJS};
373
+ }
374
+
375
+ // Dispatch events that frameworks (React, Angular, Material) listen to
376
+ el.dispatchEvent(new Event('input', { bubbles: true, cancelable: true }));
377
+ el.dispatchEvent(new Event('change', { bubbles: true, cancelable: true }));
378
+ el.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: ${textJS} }));
379
+ return 'Typed into: ' + (el.tagName || '') + ' [' + (el.name || el.id || '') + ']';
380
+ })()
381
+ `,
382
+ returnByValue: true,
383
+ });
384
+
385
+ return ((result as CDPEvalResult).result?.value as string) || "Text entered.";
386
+ }
387
+
388
+ async pressKey(key: string): Promise<string> {
389
+ this.ensureConnected();
390
+
391
+ // Map common key names to CDP key codes
392
+ const keyMap: Record<string, { keyCode: number; code: string }> = {
393
+ Enter: { keyCode: 13, code: "Enter" },
394
+ Tab: { keyCode: 9, code: "Tab" },
395
+ Escape: { keyCode: 27, code: "Escape" },
396
+ Backspace: { keyCode: 8, code: "Backspace" },
397
+ ArrowDown: { keyCode: 40, code: "ArrowDown" },
398
+ ArrowUp: { keyCode: 38, code: "ArrowUp" },
399
+ };
400
+
401
+ const mapped = keyMap[key];
402
+ if (mapped) {
403
+ await this.send("Input.dispatchKeyEvent", {
404
+ type: "keyDown",
405
+ key,
406
+ code: mapped.code,
407
+ windowsVirtualKeyCode: mapped.keyCode,
408
+ nativeVirtualKeyCode: mapped.keyCode,
409
+ });
410
+ await this.send("Input.dispatchKeyEvent", {
411
+ type: "keyUp",
412
+ key,
413
+ code: mapped.code,
414
+ windowsVirtualKeyCode: mapped.keyCode,
415
+ nativeVirtualKeyCode: mapped.keyCode,
416
+ });
417
+ } else {
418
+ // Single character key
419
+ await this.send("Input.dispatchKeyEvent", {
420
+ type: "char",
421
+ text: key,
422
+ });
423
+ }
424
+
425
+ return `Pressed key: ${key}`;
426
+ }
427
+
428
+ async scrollDown(): Promise<string> {
429
+ this.ensureConnected();
430
+ await this.send("Runtime.evaluate", {
431
+ expression: "window.scrollBy(0, window.innerHeight * 0.8)",
432
+ });
433
+ await new Promise((r) => setTimeout(r, 300));
434
+ return "Scrolled down.";
435
+ }
436
+
437
+ async scrollUp(): Promise<string> {
438
+ this.ensureConnected();
439
+ await this.send("Runtime.evaluate", {
440
+ expression: "window.scrollBy(0, -window.innerHeight * 0.8)",
441
+ });
442
+ await new Promise((r) => setTimeout(r, 300));
443
+ return "Scrolled up.";
444
+ }
445
+
446
+ // ── Annotated Snapshot (ref system) ─────────────────────────────
447
+
448
+ /**
449
+ * Take a snapshot of all interactive elements on the page.
450
+ *
451
+ * Strategy (informed by research — arxiv:2511.19477):
452
+ * - **Text ref table is ALWAYS returned** — compact, low-token, works for
453
+ * all page complexities including dense layouts (date pickers, tables).
454
+ * - **Annotated screenshot is OPTIONAL** (annotate parameter):
455
+ * - true: overlay ref badges on screenshot (best for simple pages with
456
+ * few interactive elements — gives visual context)
457
+ * - false: plain screenshot without overlays (default — avoids label
458
+ * clutter on dense pages; model still sees the page visually)
459
+ * - Research shows text-based grounding outperforms visual annotations
460
+ * on complex pages, and the hybrid approach (a11y text primary +
461
+ * selective vision) achieves ~85% vs ~50% for pure vision.
462
+ */
463
+ async snapshot(annotate = false): Promise<SnapshotResult> {
464
+ this.ensureConnected();
465
+
466
+ // Wait for page to be ready (auto-wait like Playwright)
467
+ await this.waitForLoad(5000);
468
+
469
+ // 1. Find all interactive elements, assign ref IDs, get bounding boxes
470
+ const findResult = await this.send("Runtime.evaluate", {
471
+ expression: `
472
+ (function() {
473
+ // Clean up previous refs
474
+ document.querySelectorAll('[data-assistme-ref]').forEach(function(el) {
475
+ el.removeAttribute('data-assistme-ref');
476
+ });
477
+
478
+ var selectors = [
479
+ 'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
480
+ '[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
481
+ '[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
482
+ '[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
483
+ '[onclick]', '[tabindex]:not([tabindex="-1"])',
484
+ '[contenteditable="true"]'
485
+ ].join(', ');
486
+
487
+ // Collect elements from main document AND same-origin iframes
488
+ var all = Array.from(document.querySelectorAll(selectors));
489
+ try {
490
+ var iframes = document.querySelectorAll('iframe');
491
+ for (var fi = 0; fi < iframes.length; fi++) {
492
+ try {
493
+ var iframeDoc = iframes[fi].contentDocument;
494
+ if (iframeDoc) {
495
+ var iframeRect = iframes[fi].getBoundingClientRect();
496
+ var iframeEls = iframeDoc.querySelectorAll(selectors);
497
+ for (var fe = 0; fe < iframeEls.length; fe++) {
498
+ // Tag iframe elements with offset for coordinate correction
499
+ iframeEls[fe].__iframeOffset = { x: iframeRect.x, y: iframeRect.y };
500
+ all.push(iframeEls[fe]);
501
+ }
502
+ }
503
+ } catch(e) { /* cross-origin iframe, skip */ }
504
+ }
505
+ } catch(e) { /* iframe enumeration failed, continue */ }
506
+
507
+ var refs = [];
508
+ var vh = window.innerHeight;
509
+ var vw = window.innerWidth;
510
+
511
+ for (var i = 0; i < all.length && refs.length < 80; i++) {
512
+ var el = all[i];
513
+ var rect = el.getBoundingClientRect();
514
+
515
+ // Skip invisible / tiny elements
516
+ if (rect.width < 5 || rect.height < 5) continue;
517
+ var style = window.getComputedStyle(el);
518
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
519
+
520
+ // Skip elements far outside viewport
521
+ if (rect.bottom < -50 || rect.top > vh + 50) continue;
522
+ if (rect.right < -50 || rect.left > vw + 50) continue;
523
+
524
+ // Determine role
525
+ var role = el.getAttribute('role') || '';
526
+ if (!role) {
527
+ var tag = el.tagName.toLowerCase();
528
+ if (tag === 'a') role = 'link';
529
+ else if (tag === 'button') role = 'button';
530
+ else if (tag === 'input') {
531
+ var t = (el.type || 'text').toLowerCase();
532
+ if (t === 'checkbox') role = 'checkbox';
533
+ else if (t === 'radio') role = 'radio';
534
+ else if (t === 'submit' || t === 'button') role = 'button';
535
+ else role = 'textbox';
536
+ }
537
+ else if (tag === 'select') role = 'combobox';
538
+ else if (tag === 'textarea') role = 'textbox';
539
+ else role = tag;
540
+ }
541
+
542
+ // Determine accessible name
543
+ var name = '';
544
+ var ariaLabel = el.getAttribute('aria-label');
545
+ var ariaLabelledBy = el.getAttribute('aria-labelledby');
546
+ if (ariaLabel) {
547
+ name = ariaLabel;
548
+ } else if (ariaLabelledBy) {
549
+ var labelEl = document.getElementById(ariaLabelledBy);
550
+ if (labelEl) name = labelEl.textContent.trim();
551
+ } else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
552
+ if (el.id) {
553
+ var lbl = document.querySelector('label[for="' + CSS.escape(el.id) + '"]');
554
+ if (lbl) name = lbl.textContent.trim();
555
+ }
556
+ if (!name) name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
557
+ } else {
558
+ name = (el.textContent || '').trim().slice(0, 60);
559
+ }
560
+
561
+ var refId = refs.length + 1;
562
+ el.setAttribute('data-assistme-ref', String(refId));
563
+
564
+ // Correct coordinates for elements inside iframes
565
+ var offsetX = el.__iframeOffset ? el.__iframeOffset.x : 0;
566
+ var offsetY = el.__iframeOffset ? el.__iframeOffset.y : 0;
567
+
568
+ refs.push({
569
+ id: refId,
570
+ role: role,
571
+ name: name,
572
+ tag: el.tagName.toLowerCase(),
573
+ type: el.getAttribute('type') || '',
574
+ box: {
575
+ x: Math.round(rect.x + offsetX),
576
+ y: Math.round(rect.y + offsetY),
577
+ width: Math.round(rect.width),
578
+ height: Math.round(rect.height)
579
+ }
580
+ });
581
+ }
582
+
583
+ return JSON.stringify(refs);
584
+ })()
585
+ `,
586
+ returnByValue: true,
587
+ });
588
+
589
+ const refs: RefEntry[] = JSON.parse(
590
+ ((findResult as CDPEvalResult).result?.value as string) || "[]"
591
+ ).map((r: Record<string, unknown>) => ({
592
+ id: r.id as number,
593
+ role: r.role as string,
594
+ name: r.name as string,
595
+ tag: r.tag as string,
596
+ inputType: (r.type as string) || "",
597
+ box: r.box as BoundingBox,
598
+ }));
599
+
600
+ // 2. Optionally inject visual overlay with ref labels
601
+ // (Skip for dense pages — labels would overlap and become unreadable)
602
+ if (annotate && refs.length <= 40) {
603
+ const refsJson = JSON.stringify(refs);
604
+ await this.send("Runtime.evaluate", {
605
+ expression: `
606
+ (function() {
607
+ var old = document.getElementById('__assistme_refs__');
608
+ if (old) old.remove();
609
+
610
+ var overlay = document.createElement('div');
611
+ overlay.id = '__assistme_refs__';
612
+ overlay.style.cssText = 'position:fixed;top:0;left:0;width:100%;height:100%;pointer-events:none;z-index:2147483647;';
613
+
614
+ var refs = ${refsJson};
615
+ var vh = window.innerHeight;
616
+ var vw = window.innerWidth;
617
+
618
+ for (var i = 0; i < refs.length; i++) {
619
+ var b = refs[i].box;
620
+ if (b.y + b.height < 0 || b.y > vh || b.x + b.width < 0 || b.x > vw) continue;
621
+
622
+ // Red badge with ref number
623
+ var badge = document.createElement('div');
624
+ var badgeTop = Math.max(0, b.y - 14);
625
+ var badgeLeft = Math.max(0, b.x);
626
+ badge.style.cssText = 'position:fixed;background:#e8384f;color:#fff;font:bold 10px/1.2 monospace;padding:1px 3px;border-radius:2px;white-space:nowrap;'
627
+ + 'left:' + badgeLeft + 'px;top:' + badgeTop + 'px;';
628
+ badge.textContent = String(refs[i].id);
629
+ overlay.appendChild(badge);
630
+
631
+ // Border around element
632
+ var border = document.createElement('div');
633
+ border.style.cssText = 'position:fixed;border:1.5px solid #e8384f;border-radius:2px;'
634
+ + 'left:' + b.x + 'px;top:' + b.y + 'px;width:' + b.width + 'px;height:' + b.height + 'px;';
635
+ overlay.appendChild(border);
636
+ }
637
+
638
+ document.documentElement.appendChild(overlay);
639
+ })()
640
+ `,
641
+ });
642
+ }
643
+
644
+ // 3. Take screenshot (with or without overlay)
645
+ const image = await this.screenshot();
646
+
647
+ // 4. Remove overlay if injected (keep data-assistme-ref attributes for later resolution)
648
+ if (annotate) {
649
+ await this.send("Runtime.evaluate", {
650
+ expression: `(function() { var el = document.getElementById('__assistme_refs__'); if (el) el.remove(); })()`,
651
+ });
652
+ }
653
+
654
+ // 5. Cache refs for subsequent act() calls
655
+ this.refCache.clear();
656
+ for (const ref of refs) {
657
+ this.refCache.set(ref.id, ref);
658
+ }
659
+
660
+ // 6. Get page info
661
+ const pageInfo = await this.getPageInfo();
662
+
663
+ return { image, refs, url: pageInfo.url, title: pageInfo.title };
664
+ }
665
+
666
+ /**
667
+ * Build a compact text table of refs for the model.
668
+ */
669
+ static formatRefTable(result: SnapshotResult): string {
670
+ let table = `Page: ${result.title}\nURL: ${result.url}\n\nRefs:\n`;
671
+ for (const ref of result.refs) {
672
+ const extra = ref.inputType ? ` (${ref.inputType})` : "";
673
+ const nameStr = ref.name ? ` "${ref.name}"` : "";
674
+ table += `[${ref.id}] ${ref.role}${nameStr}${extra}\n`;
675
+ }
676
+ if (result.refs.length === 0) {
677
+ table += "(no interactive elements found)\n";
678
+ }
679
+ return table;
680
+ }
681
+
682
+ // ── Ref Resolution ────────────────────────────────────────────────
683
+
684
+ /**
685
+ * Resolve a ref ID to its current center coordinates in the viewport.
686
+ * Uses two strategies:
687
+ * 1. Fast: find by data-assistme-ref attribute (set during snapshot)
688
+ * 2. Stable: search by role + accessible name (survives DOM changes)
689
+ *
690
+ * Includes actionability checks (like Playwright):
691
+ * - Element must be visible (not display:none, not zero-size)
692
+ * - Element must be in viewport (scrolls into view if needed)
693
+ * - Element must not be covered by another element (checks elementFromPoint)
694
+ *
695
+ * Returns null if the element cannot be found or is not actionable.
696
+ * Returns { error: string } if found but not actionable (for diagnostics).
697
+ */
698
+ private async resolveRef(
699
+ refId: number
700
+ ): Promise<{ x: number; y: number; width: number; height: number; error?: string } | null> {
701
+ const cached = this.refCache.get(refId);
702
+ const role = cached?.role || "";
703
+ const name = cached?.name || "";
704
+ const roleJS = JSON.stringify(role);
705
+ const nameJS = JSON.stringify(name);
706
+
707
+ const result = await this.send("Runtime.evaluate", {
708
+ expression: `
709
+ (function() {
710
+ var refId = ${refId};
711
+ var role = ${roleJS};
712
+ var name = ${nameJS};
713
+
714
+ // Strategy 1: data attribute (fast, from last snapshot)
715
+ var el = document.querySelector('[data-assistme-ref="' + refId + '"]');
716
+
717
+ // Strategy 2: role + name search (stable, survives DOM changes)
718
+ if (!el && role && name) {
719
+ var selectorMap = {
720
+ textbox: 'input, textarea, [role="textbox"], [role="searchbox"]',
721
+ button: 'button, [role="button"], input[type="submit"], input[type="button"]',
722
+ link: 'a[href], [role="link"]',
723
+ combobox: 'select, [role="combobox"]',
724
+ checkbox: 'input[type="checkbox"], [role="checkbox"]',
725
+ radio: 'input[type="radio"], [role="radio"]',
726
+ tab: '[role="tab"]',
727
+ menuitem: '[role="menuitem"]',
728
+ option: '[role="option"], option',
729
+ };
730
+ var sel = selectorMap[role] || '*[role="' + role + '"]';
731
+ var candidates = document.querySelectorAll(sel);
732
+ for (var i = 0; i < candidates.length; i++) {
733
+ var c = candidates[i];
734
+ var cName = c.getAttribute('aria-label')
735
+ || c.getAttribute('placeholder')
736
+ || (c.textContent || '').trim().slice(0, 60);
737
+ if (cName === name) { el = c; break; }
738
+ }
739
+ }
740
+
741
+ if (!el) return 'null';
742
+
743
+ // ── Actionability checks (Playwright-style) ──────────────
744
+
745
+ // Check visibility
746
+ var style = window.getComputedStyle(el);
747
+ if (style.display === 'none')
748
+ return JSON.stringify({ error: 'Element is hidden (display:none)' });
749
+ if (style.visibility === 'hidden')
750
+ return JSON.stringify({ error: 'Element is hidden (visibility:hidden)' });
751
+ if (parseFloat(style.opacity) < 0.05)
752
+ return JSON.stringify({ error: 'Element is hidden (opacity:0)' });
753
+
754
+ // Check disabled
755
+ if (el.disabled || el.getAttribute('aria-disabled') === 'true')
756
+ return JSON.stringify({ error: 'Element is disabled' });
757
+
758
+ // Scroll into view
759
+ el.scrollIntoView({ block: 'center', behavior: 'instant' });
760
+ var r = el.getBoundingClientRect();
761
+
762
+ // Check non-zero size
763
+ if (r.width < 1 || r.height < 1)
764
+ return JSON.stringify({ error: 'Element has zero size (' + r.width + 'x' + r.height + ')' });
765
+
766
+ // Check element is in viewport
767
+ if (r.bottom < 0 || r.top > window.innerHeight || r.right < 0 || r.left > window.innerWidth)
768
+ return JSON.stringify({ error: 'Element is outside viewport after scroll' });
769
+
770
+ var cx = r.x + r.width / 2;
771
+ var cy = r.y + r.height / 2;
772
+
773
+ // Check not covered by another element (hit test)
774
+ var topEl = document.elementFromPoint(cx, cy);
775
+ if (topEl && topEl !== el && !el.contains(topEl) && !topEl.closest('[data-assistme-ref="' + refId + '"]')) {
776
+ // Check if the covering element is the overlay (ignore it)
777
+ if (!topEl.closest('#__assistme_refs__')) {
778
+ var coverTag = topEl.tagName.toLowerCase();
779
+ var coverText = (topEl.textContent || '').trim().slice(0, 30);
780
+ return JSON.stringify({
781
+ error: 'Element is covered by <' + coverTag + '>' + (coverText ? ' "' + coverText + '"' : ''),
782
+ x: cx, y: cy, width: r.width, height: r.height
783
+ });
784
+ }
785
+ }
786
+
787
+ return JSON.stringify({
788
+ x: cx,
789
+ y: cy,
790
+ width: r.width,
791
+ height: r.height
792
+ });
793
+ })()
794
+ `,
795
+ returnByValue: true,
796
+ });
797
+
798
+ const value = (result as CDPEvalResult).result?.value as string;
799
+ if (!value || value === "null") return null;
800
+ try {
801
+ return JSON.parse(value);
802
+ } catch {
803
+ return null;
804
+ }
805
+ }
806
+
807
+ // ── Ref-based Interactions (CDP Input Events) ─────────────────────
808
+
809
+ /**
810
+ * Click an element by ref using CDP Input.dispatchMouseEvent.
811
+ * This simulates a real mouse click through the browser's input pipeline,
812
+ * triggering hover states, focus management, and all native browser events
813
+ * — more reliable than el.click() for framework components.
814
+ *
815
+ * Includes auto-wait: retries up to 3 times (with 500ms intervals) if the
816
+ * element is not yet actionable (e.g., covered by a loading overlay, still
817
+ * animating into view). This matches Playwright's auto-waiting behavior.
818
+ */
819
+ async clickRef(refId: number): Promise<string> {
820
+ this.ensureConnected();
821
+
822
+ // Auto-wait: retry up to 3 times if element is not actionable yet
823
+ const maxRetries = 3;
824
+ let lastError = "";
825
+
826
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
827
+ const resolved = await this.resolveRef(refId);
828
+
829
+ if (!resolved) {
830
+ return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
831
+ }
832
+
833
+ if (resolved.error) {
834
+ lastError = resolved.error;
835
+ // If element is covered or hidden, wait and retry (it might be animating)
836
+ if (attempt < maxRetries - 1) {
837
+ await new Promise((r) => setTimeout(r, 500));
838
+ continue;
839
+ }
840
+ // Final attempt failed — report the actionability issue
841
+ const ref = this.refCache.get(refId);
842
+ return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
843
+ }
844
+
845
+ // Element is actionable — small delay after scroll for rendering
846
+ if (attempt === 0) {
847
+ await new Promise((r) => setTimeout(r, 50));
848
+ // Re-read position after scroll settled
849
+ const settled = await this.resolveRef(refId);
850
+ if (settled && !settled.error) {
851
+ resolved.x = settled.x;
852
+ resolved.y = settled.y;
853
+ }
854
+ }
855
+
856
+ // Full mouse event sequence: move → press → release
857
+ await this.send("Input.dispatchMouseEvent", {
858
+ type: "mouseMoved",
859
+ x: resolved.x,
860
+ y: resolved.y,
861
+ });
862
+ await this.send("Input.dispatchMouseEvent", {
863
+ type: "mousePressed",
864
+ x: resolved.x,
865
+ y: resolved.y,
866
+ button: "left",
867
+ clickCount: 1,
868
+ });
869
+ await this.send("Input.dispatchMouseEvent", {
870
+ type: "mouseReleased",
871
+ x: resolved.x,
872
+ y: resolved.y,
873
+ button: "left",
874
+ clickCount: 1,
875
+ });
876
+
877
+ await new Promise((r) => setTimeout(r, 300));
878
+ const ref = this.refCache.get(refId);
879
+ return `Clicked [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
880
+ }
881
+
882
+ // Should not reach here, but just in case
883
+ const ref = this.refCache.get(refId);
884
+ return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
885
+ }
886
+
887
+ /**
888
+ * Type text into an element by ref using CDP Input events.
889
+ * Clicks to focus, selects all existing text (Ctrl/Cmd+A), then uses
890
+ * Input.insertText for reliable text insertion across all frameworks.
891
+ */
892
+ async typeRef(refId: number, text: string): Promise<string> {
893
+ this.ensureConnected();
894
+
895
+ // Click to focus the element
896
+ const clickResult = await this.clickRef(refId);
897
+ if (clickResult.includes("not found")) return clickResult;
898
+ await new Promise((r) => setTimeout(r, 100));
899
+
900
+ // Select all existing text (Cmd+A on macOS, Ctrl+A elsewhere)
901
+ const modifier = platform() === "darwin" ? 4 : 2;
902
+ await this.send("Input.dispatchKeyEvent", {
903
+ type: "keyDown",
904
+ modifiers: modifier,
905
+ key: "a",
906
+ code: "KeyA",
907
+ windowsVirtualKeyCode: 65,
908
+ });
909
+ await this.send("Input.dispatchKeyEvent", {
910
+ type: "keyUp",
911
+ key: "a",
912
+ code: "KeyA",
913
+ });
914
+
915
+ // Delete selected text
916
+ await this.send("Input.dispatchKeyEvent", {
917
+ type: "keyDown",
918
+ key: "Backspace",
919
+ code: "Backspace",
920
+ windowsVirtualKeyCode: 8,
921
+ });
922
+ await this.send("Input.dispatchKeyEvent", {
923
+ type: "keyUp",
924
+ key: "Backspace",
925
+ code: "Backspace",
926
+ });
927
+
928
+ // Insert text via CDP (goes through the browser's input pipeline)
929
+ await this.send("Input.insertText", { text });
930
+
931
+ await new Promise((r) => setTimeout(r, 100));
932
+ const ref = this.refCache.get(refId);
933
+ return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
934
+ }
935
+
936
+ /**
937
+ * Select a dropdown option by ref. Delegates to selectOption with the
938
+ * ref's data attribute as selector, handling both native <select> and
939
+ * custom dropdown components.
940
+ */
941
+ async selectRef(refId: number, option: string): Promise<string> {
942
+ this.ensureConnected();
943
+
944
+ // Check if ref exists
945
+ const cached = this.refCache.get(refId);
946
+ if (!cached) {
947
+ return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
948
+ }
949
+
950
+ // Use the data attribute selector to find the element
951
+ const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
952
+ return result.replace(
953
+ /\[data-assistme-ref="\d+"\]/,
954
+ `[${refId}] ${cached.role} "${cached.name}"`
955
+ );
956
+ }
957
+
958
+ // ── Action Pipeline ───────────────────────────────────────────────
959
+
960
+ /**
961
+ * Execute a batch of actions sequentially using refs.
962
+ * Reduces round-trips: instead of one tool call per action, the model
963
+ * can specify a sequence of actions that execute atomically.
964
+ *
965
+ * Optionally takes a screenshot after all actions complete.
966
+ */
967
+ async act(
968
+ actions: ActionSpec[],
969
+ takeScreenshot = false
970
+ ): Promise<{ results: ActionResult[]; screenshot?: string }> {
971
+ this.ensureConnected();
972
+ const results: ActionResult[] = [];
973
+
974
+ for (const spec of actions) {
975
+ let result: string;
976
+ let success = true;
977
+
978
+ try {
979
+ switch (spec.action) {
980
+ case "click":
981
+ result = await this.clickRef(spec.ref);
982
+ success = !result.includes("not found");
983
+ break;
984
+ case "type":
985
+ result = await this.typeRef(spec.ref, spec.text);
986
+ success = !result.includes("not found");
987
+ break;
988
+ case "select":
989
+ result = await this.selectRef(spec.ref, spec.option);
990
+ success = !result.includes("not found");
991
+ break;
992
+ case "press":
993
+ result = await this.pressKey(spec.key);
994
+ break;
995
+ case "scroll":
996
+ result = spec.direction === "up" ? await this.scrollUp() : await this.scrollDown();
997
+ break;
998
+ case "wait":
999
+ await new Promise((r) => setTimeout(r, Math.min(spec.ms, 5000)));
1000
+ result = `Waited ${spec.ms}ms`;
1001
+ break;
1002
+ default:
1003
+ result = `Unknown action: ${(spec as { action: string }).action}`;
1004
+ success = false;
1005
+ }
1006
+ } catch (err) {
1007
+ result = `Error: ${err instanceof Error ? err.message : String(err)}`;
1008
+ success = false;
1009
+ }
1010
+
1011
+ results.push({
1012
+ action: spec.action,
1013
+ ref: "ref" in spec ? (spec as { ref: number }).ref : undefined,
1014
+ result,
1015
+ success,
1016
+ });
1017
+
1018
+ // If an action failed, stop the batch (remaining refs may be stale)
1019
+ if (!success) break;
1020
+
1021
+ // Brief pause between actions for DOM to settle
1022
+ if (spec.action !== "wait") {
1023
+ await new Promise((r) => setTimeout(r, 200));
1024
+ }
1025
+ }
1026
+
1027
+ let screenshot: string | undefined;
1028
+ if (takeScreenshot) {
1029
+ // Wait a bit for final DOM changes to settle
1030
+ await new Promise((r) => setTimeout(r, 300));
1031
+ screenshot = await this.screenshot();
1032
+ }
1033
+
1034
+ return { results, screenshot };
1035
+ }
1036
+
1037
+ // ── Dropdown/Select ─────────────────────────────────────────────
1038
+
1039
+ /**
1040
+ * Select an option from a dropdown — handles both native <select> elements
1041
+ * and custom Material Design / React / Angular dropdown components.
1042
+ *
1043
+ * Strategy:
1044
+ * 1. Try native <select> first (by selector or label text)
1045
+ * 2. Fall back to custom dropdown: click to open, then click the option by text
1046
+ */
1047
+ async selectOption(selector: string, optionText: string): Promise<string> {
1048
+ this.ensureConnected();
1049
+ const selectorJS = JSON.stringify(selector);
1050
+ const optionJS = JSON.stringify(optionText);
1051
+
1052
+ const result = await this.send("Runtime.evaluate", {
1053
+ expression: `
1054
+ (function() {
1055
+ var sel = ${selectorJS};
1056
+ var optText = ${optionJS};
1057
+
1058
+ // Strategy 1: Native <select> element
1059
+ var selectEl = document.querySelector(sel);
1060
+ if (selectEl && selectEl.tagName === 'SELECT') {
1061
+ var options = selectEl.querySelectorAll('option');
1062
+ for (var i = 0; i < options.length; i++) {
1063
+ if (options[i].textContent.trim() === optText) {
1064
+ selectEl.value = options[i].value;
1065
+ selectEl.dispatchEvent(new Event('change', { bubbles: true }));
1066
+ selectEl.dispatchEvent(new Event('input', { bubbles: true }));
1067
+ return 'Selected "' + optText + '" in native select';
1068
+ }
1069
+ }
1070
+ return 'Option "' + optText + '" not found in select. Available: ' +
1071
+ Array.from(options).map(function(o) { return o.textContent.trim(); }).join(', ');
1072
+ }
1073
+
1074
+ // Strategy 2: Custom dropdown — find the trigger element
1075
+ var trigger = selectEl;
1076
+ if (!trigger) {
1077
+ // Try finding by label/placeholder text
1078
+ var allEls = document.querySelectorAll('*');
1079
+ for (var j = 0; j < allEls.length; j++) {
1080
+ var el = allEls[j];
1081
+ var ownText = Array.from(el.childNodes)
1082
+ .filter(function(n) { return n.nodeType === 3; })
1083
+ .map(function(n) { return n.textContent.trim(); })
1084
+ .join('');
1085
+ if (ownText === sel || el.getAttribute('aria-label') === sel) {
1086
+ trigger = el;
1087
+ break;
1088
+ }
1089
+ }
1090
+ }
1091
+
1092
+ if (!trigger) return 'Dropdown not found: ' + sel;
1093
+
1094
+ // Click to open the dropdown
1095
+ trigger.scrollIntoView({ block: 'center', behavior: 'instant' });
1096
+ trigger.click();
1097
+
1098
+ // Wait a frame for the dropdown menu to render, then select the option
1099
+ return new Promise(function(resolve) {
1100
+ setTimeout(function() {
1101
+ // Look for the option in listbox/menu/dropdown overlays
1102
+ var optionContainers = document.querySelectorAll(
1103
+ '[role="listbox"], [role="menu"], [role="presentation"], .MuiMenu-list, .MuiList-root, ul.mdc-list, .VfPpkd-xl07Ob'
1104
+ );
1105
+
1106
+ // Also check all visible elements as fallback
1107
+ var searchIn = optionContainers.length > 0
1108
+ ? Array.from(optionContainers).flatMap(function(c) { return Array.from(c.querySelectorAll('*')); })
1109
+ : Array.from(document.querySelectorAll('li, [role="option"], [role="menuitem"], div[data-value]'));
1110
+
1111
+ for (var k = 0; k < searchIn.length; k++) {
1112
+ var opt = searchIn[k];
1113
+ var txt = opt.textContent ? opt.textContent.trim() : '';
1114
+ if (txt === optText) {
1115
+ opt.scrollIntoView({ block: 'center', behavior: 'instant' });
1116
+ opt.click();
1117
+ resolve('Selected "' + optText + '" from custom dropdown');
1118
+ return;
1119
+ }
1120
+ }
1121
+
1122
+ // Broader search: any visible element with exact text match
1123
+ var everything = document.querySelectorAll('*');
1124
+ for (var m = 0; m < everything.length; m++) {
1125
+ var candidate = everything[m];
1126
+ if (candidate.textContent && candidate.textContent.trim() === optText &&
1127
+ candidate.offsetParent !== null && candidate.children.length === 0) {
1128
+ candidate.click();
1129
+ resolve('Selected "' + optText + '" (broad match)');
1130
+ return;
1131
+ }
1132
+ }
1133
+
1134
+ resolve('Option "' + optText + '" not found in dropdown');
1135
+ }, 300);
1136
+ });
1137
+ })()
1138
+ `,
1139
+ returnByValue: true,
1140
+ awaitPromise: true,
1141
+ });
1142
+
1143
+ await new Promise((r) => setTimeout(r, 500));
1144
+ return ((result as CDPEvalResult).result?.value as string) || "Selection attempted.";
1145
+ }
1146
+
1147
+ // ── JavaScript Evaluation ───────────────────────────────────────
1148
+
1149
+ async evaluate(expression: string): Promise<string> {
1150
+ this.ensureConnected();
1151
+ const result = await this.send("Runtime.evaluate", {
1152
+ expression,
1153
+ returnByValue: true,
1154
+ awaitPromise: true,
1155
+ });
1156
+
1157
+ const evalResult = (result as CDPEvalResult).result;
1158
+ const value = evalResult?.value;
1159
+ if (value === undefined) {
1160
+ const desc = evalResult?.description;
1161
+ return desc || "(undefined)";
1162
+ }
1163
+ return typeof value === "string" ? value : JSON.stringify(value, null, 2);
1164
+ }
1165
+
1166
+ // ── Tab Management ──────────────────────────────────────────────
1167
+
1168
+ async listTabs(): Promise<string> {
1169
+ const tabs = await this.getTabs();
1170
+ const pageTabs = tabs.filter((t) => t.type === "page");
1171
+
1172
+ if (pageTabs.length === 0) return "No tabs open.";
1173
+
1174
+ return pageTabs
1175
+ .map(
1176
+ (t, i) =>
1177
+ `[${i}] ${t.title.slice(0, 60)}${this.currentTabId === t.id ? " (active)" : ""}\n ${t.url}`
1178
+ )
1179
+ .join("\n\n");
1180
+ }
1181
+
1182
+ async switchTab(index: number): Promise<string> {
1183
+ const tabs = await this.getTabs();
1184
+ const pageTabs = tabs.filter((t) => t.type === "page");
1185
+
1186
+ if (index < 0 || index >= pageTabs.length) {
1187
+ return `Invalid tab index. Available: 0-${pageTabs.length - 1}`;
1188
+ }
1189
+
1190
+ // Disconnect from current tab
1191
+ await this.disconnect();
1192
+
1193
+ // Connect to new tab
1194
+ return this.connect(index);
1195
+ }
1196
+
1197
+ async openNewTab(url?: string): Promise<string> {
1198
+ const targetUrl = url || "about:blank";
1199
+ const res = await fetch(
1200
+ `http://127.0.0.1:${this.debugPort}/json/new?${encodeURIComponent(targetUrl)}`,
1201
+ { signal: AbortSignal.timeout(5000) }
1202
+ );
1203
+ const tab = (await res.json()) as CDPTab;
1204
+
1205
+ // Connect to the new tab
1206
+ await this.disconnect();
1207
+ const tabs = await this.getTabs();
1208
+ const idx = tabs.filter((t) => t.type === "page").findIndex((t) => t.id === tab.id);
1209
+ if (idx >= 0) {
1210
+ await this.connect(idx);
1211
+ }
1212
+
1213
+ return `Opened new tab: ${targetUrl}`;
1214
+ }
1215
+
1216
+ // ── Helpers ─────────────────────────────────────────────────────
1217
+
1218
+ private async waitForLoad(timeoutMs = 8000): Promise<void> {
1219
+ const start = Date.now();
1220
+ while (Date.now() - start < timeoutMs) {
1221
+ try {
1222
+ const result = await this.send("Runtime.evaluate", {
1223
+ expression: "document.readyState",
1224
+ returnByValue: true,
1225
+ });
1226
+ const state = (result as CDPEvalResult).result?.value;
1227
+ if (state === "complete" || state === "interactive") {
1228
+ // Extra small wait for dynamic content
1229
+ await new Promise((r) => setTimeout(r, 500));
1230
+ return;
1231
+ }
1232
+ } catch {
1233
+ // Tab might be navigating
1234
+ }
1235
+ await new Promise((r) => setTimeout(r, 300));
1236
+ }
1237
+ }
1238
+
1239
+ /**
1240
+ * Find interactive elements on the page for the AI to understand what's clickable
1241
+ */
1242
+ async getInteractiveElements(): Promise<string> {
1243
+ this.ensureConnected();
1244
+ const result = await this.send("Runtime.evaluate", {
1245
+ expression: `
1246
+ (function() {
1247
+ const elements = [];
1248
+ const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
1249
+ const all = document.querySelectorAll(selectors);
1250
+ for (let i = 0; i < all.length && elements.length < 50; i++) {
1251
+ const el = all[i];
1252
+ const rect = el.getBoundingClientRect();
1253
+ if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
1254
+
1255
+ // Build a reliable CSS selector
1256
+ let selector;
1257
+ if (el.id) {
1258
+ selector = '#' + CSS.escape(el.id);
1259
+ } else if (el.getAttribute('data-testid')) {
1260
+ selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
1261
+ } else {
1262
+ // Build a path-based selector: find nth-of-type among siblings
1263
+ const tag = el.tagName.toLowerCase();
1264
+ const parent = el.parentElement;
1265
+ if (parent) {
1266
+ const siblings = parent.querySelectorAll(':scope > ' + tag);
1267
+ const idx = Array.from(siblings).indexOf(el) + 1;
1268
+ selector = tag + ':nth-of-type(' + idx + ')';
1269
+ } else {
1270
+ selector = tag;
1271
+ }
1272
+ }
1273
+
1274
+ elements.push({
1275
+ tag: el.tagName.toLowerCase(),
1276
+ text: (el.textContent || '').trim().slice(0, 80),
1277
+ type: el.getAttribute('type') || '',
1278
+ name: el.getAttribute('name') || '',
1279
+ id: el.id || '',
1280
+ href: el.getAttribute('href') || '',
1281
+ placeholder: el.getAttribute('placeholder') || '',
1282
+ selector: selector,
1283
+ });
1284
+ }
1285
+ return JSON.stringify(elements, null, 2);
1286
+ })()
1287
+ `,
1288
+ returnByValue: true,
1289
+ });
1290
+
1291
+ return ((result as CDPEvalResult).result?.value as string) || "[]";
1292
+ }
1293
+
1294
+ isConnected(): boolean {
1295
+ return this.connected && this.ws?.readyState === WebSocket.OPEN;
1296
+ }
1297
+
1298
+ // ── Login Detection ────────────────────────────────────────────
1299
+
1300
+ /**
1301
+ * Detect if the current page appears to be a login/authentication page.
1302
+ * Checks URL patterns, password input fields, and login form actions.
1303
+ */
1304
+ async detectLoginPage(): Promise<{ isLoginPage: boolean; reason: string }> {
1305
+ try {
1306
+ const result = await this.send("Runtime.evaluate", {
1307
+ expression: `
1308
+ (function() {
1309
+ var url = window.location.href.toLowerCase();
1310
+
1311
+ // Exclude signup/registration pages — these are NOT login pages
1312
+ var signupPatterns = [
1313
+ '/signup', '/sign-up', '/sign_up', '/register',
1314
+ '/registration', '/create-account', '/create_account',
1315
+ '/join', '/enroll',
1316
+ 'accounts.google.com/lifecycle/steps/signup',
1317
+ 'signup.live.com',
1318
+ ];
1319
+ for (var s = 0; s < signupPatterns.length; s++) {
1320
+ if (url.indexOf(signupPatterns[s]) !== -1) {
1321
+ return JSON.stringify({ isLoginPage: false, reason: '' });
1322
+ }
1323
+ }
1324
+
1325
+ // URL-based detection
1326
+ var loginPatterns = [
1327
+ '/login', '/signin', '/sign-in', '/sign_in',
1328
+ '/auth/', '/sso/', '/oauth/', '/session/new',
1329
+ '/accounts/login', '/users/sign_in',
1330
+ 'accounts.google.com/v3/signin',
1331
+ 'accounts.google.com/servicelogin',
1332
+ 'login.microsoftonline.com',
1333
+ 'github.com/login', 'github.com/session',
1334
+ 'login.live.com', 'appleid.apple.com'
1335
+ ];
1336
+ for (var i = 0; i < loginPatterns.length; i++) {
1337
+ if (url.indexOf(loginPatterns[i]) !== -1) {
1338
+ return JSON.stringify({
1339
+ isLoginPage: true,
1340
+ reason: 'URL contains login pattern: ' + loginPatterns[i]
1341
+ });
1342
+ }
1343
+ }
1344
+
1345
+ // Password input detection (visible only)
1346
+ var passwordInputs = document.querySelectorAll('input[type="password"]');
1347
+ for (var j = 0; j < passwordInputs.length; j++) {
1348
+ var input = passwordInputs[j];
1349
+ var rect = input.getBoundingClientRect();
1350
+ var style = window.getComputedStyle(input);
1351
+ if (rect.width > 0 && rect.height > 0 &&
1352
+ style.display !== 'none' && style.visibility !== 'hidden') {
1353
+ return JSON.stringify({
1354
+ isLoginPage: true,
1355
+ reason: 'Page contains visible password input field'
1356
+ });
1357
+ }
1358
+ }
1359
+
1360
+ // Login form action detection
1361
+ var formSelectors = [
1362
+ 'form[action*="login"]', 'form[action*="signin"]',
1363
+ 'form[action*="session"]', 'form[action*="auth"]',
1364
+ 'form[action*="authenticate"]'
1365
+ ];
1366
+ var loginForms = document.querySelectorAll(formSelectors.join(','));
1367
+ if (loginForms.length > 0) {
1368
+ return JSON.stringify({
1369
+ isLoginPage: true,
1370
+ reason: 'Page contains login form'
1371
+ });
1372
+ }
1373
+
1374
+ return JSON.stringify({ isLoginPage: false, reason: '' });
1375
+ })()
1376
+ `,
1377
+ returnByValue: true,
1378
+ });
1379
+
1380
+ const value = (result as CDPEvalResult).result?.value as string;
1381
+ return JSON.parse(value || '{"isLoginPage":false,"reason":""}');
1382
+ } catch {
1383
+ return { isLoginPage: false, reason: "" };
1384
+ }
1385
+ }
1386
+ }