assistme 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PLAN.md +14 -3
- package/dist/{chunk-UWE5WVQI.js → chunk-KX7ITO55.js} +20 -11
- package/dist/index.js +1791 -572
- package/dist/{job-runner-N4XAAWLJ.js → job-runner-P2L6MOOX.js} +1 -1
- package/package.json +5 -3
- package/src/agent/job-runner.ts +9 -13
- package/src/agent/mcp-servers.ts +6 -1020
- package/src/agent/memory.ts +2 -11
- package/src/agent/processor.ts +18 -108
- package/src/agent/scheduler.ts +2 -3
- package/src/agent/session.ts +20 -36
- package/src/agent/skills.ts +167 -61
- package/src/agent/system-prompt.ts +126 -0
- package/src/browser/chrome-launcher.ts +555 -0
- package/src/browser/controller.ts +1386 -0
- package/src/browser/types.ts +70 -0
- package/src/commands/credential.ts +190 -0
- package/src/commands/job.ts +14 -45
- package/src/commands/memory.ts +16 -29
- package/src/commands/schedule.ts +15 -37
- package/src/commands/start.ts +11 -43
- package/src/credentials/credential-store.test.ts +162 -0
- package/src/credentials/credential-store.ts +266 -0
- package/src/credentials/encryption.test.ts +98 -0
- package/src/credentials/encryption.ts +82 -0
- package/src/credentials/index.ts +15 -0
- package/src/credentials/local-store.ts +89 -0
- package/src/db/action.ts +19 -0
- package/src/db/api-client.ts +3 -32
- package/src/db/auth-store.ts +41 -0
- package/src/db/auth.ts +38 -0
- package/src/db/conversation.ts +39 -0
- package/src/db/event.ts +52 -0
- package/src/db/job-poll.ts +18 -0
- package/src/db/session.ts +60 -0
- package/src/db/supabase.ts +40 -383
- package/src/db/task.ts +69 -0
- package/src/db/types.ts +54 -0
- package/src/index.ts +2 -0
- package/src/mcp/agent-tools-server.ts +1047 -0
- package/src/mcp/browser-server.ts +258 -0
- package/src/tools/browser.ts +28 -1208
- package/src/tools/index.ts +32 -263
- package/src/tools/web.ts +0 -73
|
@@ -0,0 +1,1386 @@
|
|
|
1
|
+
import { WebSocket } from "ws";
|
|
2
|
+
import { platform } from "node:os";
|
|
3
|
+
import type {
|
|
4
|
+
CDPTab,
|
|
5
|
+
CDPResponse,
|
|
6
|
+
CDPEvalResult,
|
|
7
|
+
CDPScreenshotResult,
|
|
8
|
+
BoundingBox,
|
|
9
|
+
RefEntry,
|
|
10
|
+
SnapshotResult,
|
|
11
|
+
ActionSpec,
|
|
12
|
+
ActionResult,
|
|
13
|
+
} from "./types.js";
|
|
14
|
+
|
|
15
|
+
export class BrowserController {
|
|
16
|
+
private ws: WebSocket | null = null;
|
|
17
|
+
private debugPort: number;
|
|
18
|
+
private messageId = 0;
|
|
19
|
+
private callbacks = new Map<number, (response: CDPResponse) => void>();
|
|
20
|
+
private connected = false;
|
|
21
|
+
private currentTabId: string | null = null;
|
|
22
|
+
private refCache: Map<number, RefEntry> = new Map();
|
|
23
|
+
|
|
24
|
+
constructor(port = 9222) {
|
|
25
|
+
this.debugPort = port;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ── Connection ──────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
async isAvailable(): Promise<boolean> {
|
|
31
|
+
try {
|
|
32
|
+
const res = await fetch(`http://127.0.0.1:${this.debugPort}/json/version`, {
|
|
33
|
+
signal: AbortSignal.timeout(2000),
|
|
34
|
+
});
|
|
35
|
+
return res.ok;
|
|
36
|
+
} catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async connect(tabIndex?: number): Promise<string> {
|
|
42
|
+
// Reuse existing connection if still open and targeting the same tab
|
|
43
|
+
if (this.connected && this.ws?.readyState === WebSocket.OPEN) {
|
|
44
|
+
if (tabIndex === undefined) {
|
|
45
|
+
return "Already connected to browser.";
|
|
46
|
+
}
|
|
47
|
+
// If a specific tab is requested, check if we're already on it
|
|
48
|
+
const tabs = await this.getTabs();
|
|
49
|
+
const pageTabs = tabs.filter((t) => t.type === "page");
|
|
50
|
+
const targetTab = pageTabs[tabIndex];
|
|
51
|
+
if (targetTab && targetTab.id === this.currentTabId) {
|
|
52
|
+
return `Already connected to tab: "${targetTab.title}"`;
|
|
53
|
+
}
|
|
54
|
+
// Need to switch — disconnect first
|
|
55
|
+
await this.disconnect();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const available = await this.isAvailable();
|
|
59
|
+
if (!available) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`Cannot connect to browser on port ${this.debugPort}. ` +
|
|
62
|
+
"Chrome remote debugging is not reachable. " +
|
|
63
|
+
"Please ensure Chrome is running with remote debugging enabled."
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const tabs = await this.getTabs();
|
|
68
|
+
const pageTabs = tabs.filter((t) => t.type === "page");
|
|
69
|
+
|
|
70
|
+
if (pageTabs.length === 0) {
|
|
71
|
+
throw new Error("No browser tabs found. Please open at least one tab.");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const targetTab = pageTabs[tabIndex ?? 0];
|
|
75
|
+
if (!targetTab.webSocketDebuggerUrl) {
|
|
76
|
+
throw new Error("Tab does not expose a WebSocket debugger URL.");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
this.currentTabId = targetTab.id;
|
|
80
|
+
|
|
81
|
+
return new Promise((resolve, reject) => {
|
|
82
|
+
let settled = false;
|
|
83
|
+
this.ws = new WebSocket(targetTab.webSocketDebuggerUrl!);
|
|
84
|
+
|
|
85
|
+
const connectTimeout = setTimeout(() => {
|
|
86
|
+
if (!settled) {
|
|
87
|
+
settled = true;
|
|
88
|
+
this.ws?.close();
|
|
89
|
+
reject(new Error("Connection timeout (5s)"));
|
|
90
|
+
}
|
|
91
|
+
}, 5000);
|
|
92
|
+
|
|
93
|
+
this.ws.on("open", () => {
|
|
94
|
+
if (settled) return;
|
|
95
|
+
settled = true;
|
|
96
|
+
clearTimeout(connectTimeout);
|
|
97
|
+
this.connected = true;
|
|
98
|
+
// Enable required domains
|
|
99
|
+
this.send("Page.enable").catch(() => {});
|
|
100
|
+
this.send("Runtime.enable").catch(() => {});
|
|
101
|
+
this.send("DOM.enable").catch(() => {});
|
|
102
|
+
resolve(`Connected to tab: "${targetTab.title}" (${targetTab.url})`);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
this.ws.on("message", (data) => {
|
|
106
|
+
try {
|
|
107
|
+
const msg = JSON.parse(data.toString()) as CDPResponse;
|
|
108
|
+
if (msg.id !== undefined && this.callbacks.has(msg.id)) {
|
|
109
|
+
this.callbacks.get(msg.id)!(msg);
|
|
110
|
+
this.callbacks.delete(msg.id);
|
|
111
|
+
}
|
|
112
|
+
} catch {
|
|
113
|
+
// Ignore non-JSON messages (events)
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
this.ws.on("error", (err) => {
|
|
118
|
+
this.connected = false;
|
|
119
|
+
if (!settled) {
|
|
120
|
+
settled = true;
|
|
121
|
+
clearTimeout(connectTimeout);
|
|
122
|
+
reject(new Error(`WebSocket error: ${err.message}`));
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
this.ws.on("close", () => {
|
|
127
|
+
this.connected = false;
|
|
128
|
+
this.ws = null;
|
|
129
|
+
// Reject pending CDP commands so they don't hang forever
|
|
130
|
+
for (const [id, cb] of this.callbacks) {
|
|
131
|
+
cb({ id, error: { code: -1, message: "WebSocket closed" } });
|
|
132
|
+
}
|
|
133
|
+
this.callbacks.clear();
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async disconnect(): Promise<string> {
|
|
139
|
+
if (this.ws) {
|
|
140
|
+
this.ws.close();
|
|
141
|
+
this.ws = null;
|
|
142
|
+
this.connected = false;
|
|
143
|
+
}
|
|
144
|
+
return "Disconnected from browser.";
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ── CDP Protocol ────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
private async getTabs(): Promise<CDPTab[]> {
|
|
150
|
+
const res = await fetch(`http://127.0.0.1:${this.debugPort}/json`, {
|
|
151
|
+
signal: AbortSignal.timeout(3000),
|
|
152
|
+
});
|
|
153
|
+
return (await res.json()) as CDPTab[];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
private send(method: string, params?: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
157
|
+
return new Promise((resolve, reject) => {
|
|
158
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
159
|
+
reject(new Error("Not connected to browser. Call browser_connect first."));
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const id = ++this.messageId;
|
|
164
|
+
const timeout = setTimeout(() => {
|
|
165
|
+
this.callbacks.delete(id);
|
|
166
|
+
reject(new Error(`CDP command timed out: ${method}`));
|
|
167
|
+
}, 15000);
|
|
168
|
+
|
|
169
|
+
this.callbacks.set(id, (response) => {
|
|
170
|
+
clearTimeout(timeout);
|
|
171
|
+
if (response.error) {
|
|
172
|
+
reject(new Error(`CDP error: ${response.error.message}`));
|
|
173
|
+
} else {
|
|
174
|
+
resolve(response.result || {});
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
this.ws.send(JSON.stringify({ id, method, params }));
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
private ensureConnected() {
|
|
183
|
+
if (!this.connected || !this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
184
|
+
throw new Error("Not connected to browser. Use browser_connect tool first.");
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ── Navigation ──────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
async navigate(url: string): Promise<string> {
|
|
191
|
+
this.ensureConnected();
|
|
192
|
+
await this.send("Page.navigate", { url });
|
|
193
|
+
// Wait for load
|
|
194
|
+
await this.waitForLoad();
|
|
195
|
+
const info = await this.getPageInfo();
|
|
196
|
+
return `Navigated to: ${info.title}\nURL: ${info.url}`;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async goBack(): Promise<string> {
|
|
200
|
+
this.ensureConnected();
|
|
201
|
+
await this.send("Page.navigateToHistoryEntry", {
|
|
202
|
+
entryId: -1,
|
|
203
|
+
}).catch(() => {});
|
|
204
|
+
// Fallback: use JS
|
|
205
|
+
await this.evaluate("window.history.back()");
|
|
206
|
+
await this.waitForLoad();
|
|
207
|
+
const info = await this.getPageInfo();
|
|
208
|
+
return `Went back to: ${info.title}`;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
async reload(): Promise<string> {
|
|
212
|
+
this.ensureConnected();
|
|
213
|
+
await this.send("Page.reload");
|
|
214
|
+
await this.waitForLoad();
|
|
215
|
+
return "Page reloaded.";
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// ── Page Reading ────────────────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
async readPage(): Promise<string> {
|
|
221
|
+
this.ensureConnected();
|
|
222
|
+
const result = await this.send("Runtime.evaluate", {
|
|
223
|
+
expression: `
|
|
224
|
+
(function() {
|
|
225
|
+
// Get page title and URL
|
|
226
|
+
let output = "Title: " + document.title + "\\n";
|
|
227
|
+
output += "URL: " + window.location.href + "\\n\\n";
|
|
228
|
+
|
|
229
|
+
// Get main text content, cleaned up
|
|
230
|
+
const body = document.body.cloneNode(true);
|
|
231
|
+
// Remove scripts, styles, navs that add noise
|
|
232
|
+
body.querySelectorAll('script, style, noscript, svg, iframe').forEach(el => el.remove());
|
|
233
|
+
|
|
234
|
+
const text = body.innerText
|
|
235
|
+
.split('\\n')
|
|
236
|
+
.map(line => line.trim())
|
|
237
|
+
.filter(line => line.length > 0)
|
|
238
|
+
.join('\\n');
|
|
239
|
+
|
|
240
|
+
output += text;
|
|
241
|
+
return output.slice(0, 30000);
|
|
242
|
+
})()
|
|
243
|
+
`,
|
|
244
|
+
returnByValue: true,
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
return ((result as CDPEvalResult).result?.value as string) || "Could not read page content.";
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
async readElement(selector: string): Promise<string> {
|
|
251
|
+
this.ensureConnected();
|
|
252
|
+
const selectorJS = JSON.stringify(selector);
|
|
253
|
+
const result = await this.send("Runtime.evaluate", {
|
|
254
|
+
expression: `
|
|
255
|
+
(function() {
|
|
256
|
+
const el = document.querySelector(${selectorJS});
|
|
257
|
+
if (!el) return 'Element not found: ' + ${selectorJS};
|
|
258
|
+
return el.innerText || el.textContent || el.value || '(empty)';
|
|
259
|
+
})()
|
|
260
|
+
`,
|
|
261
|
+
returnByValue: true,
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
return ((result as CDPEvalResult).result?.value as string) || "Element not found.";
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
async getPageInfo(): Promise<{ title: string; url: string }> {
|
|
268
|
+
const result = await this.send("Runtime.evaluate", {
|
|
269
|
+
expression: `JSON.stringify({ title: document.title, url: window.location.href })`,
|
|
270
|
+
returnByValue: true,
|
|
271
|
+
});
|
|
272
|
+
try {
|
|
273
|
+
return JSON.parse(((result as CDPEvalResult).result?.value as string) || "{}");
|
|
274
|
+
} catch {
|
|
275
|
+
return { title: "Unknown", url: "unknown" };
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ── Screenshots (for Claude vision) ─────────────────────────────
|
|
280
|
+
|
|
281
|
+
async screenshot(): Promise<string> {
|
|
282
|
+
this.ensureConnected();
|
|
283
|
+
const result = await this.send("Page.captureScreenshot", {
|
|
284
|
+
format: "png",
|
|
285
|
+
quality: 80,
|
|
286
|
+
captureBeyondViewport: false,
|
|
287
|
+
});
|
|
288
|
+
// Returns base64-encoded PNG
|
|
289
|
+
return (result as CDPScreenshotResult).data || "";
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// ── Interactions ────────────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
async click(selector: string): Promise<string> {
|
|
295
|
+
this.ensureConnected();
|
|
296
|
+
const selectorJS = JSON.stringify(selector);
|
|
297
|
+
|
|
298
|
+
const result = await this.send("Runtime.evaluate", {
|
|
299
|
+
expression: `
|
|
300
|
+
(function() {
|
|
301
|
+
var sel = ${selectorJS};
|
|
302
|
+
|
|
303
|
+
// Support :contains('text') pseudo-selector (not native CSS)
|
|
304
|
+
var containsMatch = sel.match(/^(.+?)?:contains\\(['"](.+?)['"]\\)$/);
|
|
305
|
+
if (containsMatch) {
|
|
306
|
+
var baseTag = (containsMatch[1] || '*').toLowerCase();
|
|
307
|
+
var searchText = containsMatch[2];
|
|
308
|
+
var candidates = document.querySelectorAll(baseTag === '*' ? '*' : baseTag);
|
|
309
|
+
var found = null;
|
|
310
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
311
|
+
var c = candidates[i];
|
|
312
|
+
// Prefer exact text match on direct text content (not children)
|
|
313
|
+
var directText = Array.from(c.childNodes)
|
|
314
|
+
.filter(function(n) { return n.nodeType === 3; })
|
|
315
|
+
.map(function(n) { return n.textContent.trim(); })
|
|
316
|
+
.join(' ');
|
|
317
|
+
if (directText === searchText || c.textContent.trim() === searchText) {
|
|
318
|
+
// Prefer the deepest (most specific) matching element
|
|
319
|
+
if (!found || found.contains(c)) found = c;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
if (!found) return 'Element not found: ' + sel;
|
|
323
|
+
found.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
324
|
+
found.click();
|
|
325
|
+
return 'Clicked: ' + (found.tagName || '') + ' ' + (found.textContent || '').slice(0, 50).trim();
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
var el = document.querySelector(sel);
|
|
329
|
+
if (!el) return 'Element not found: ' + sel;
|
|
330
|
+
|
|
331
|
+
// Scroll into view
|
|
332
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
333
|
+
|
|
334
|
+
// Click
|
|
335
|
+
el.click();
|
|
336
|
+
return 'Clicked: ' + (el.tagName || '') + ' ' + (el.textContent || '').slice(0, 50).trim();
|
|
337
|
+
})()
|
|
338
|
+
`,
|
|
339
|
+
returnByValue: true,
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// Small delay for any resulting navigation/animation
|
|
343
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
344
|
+
return ((result as CDPEvalResult).result?.value as string) || "Click executed.";
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async typeText(selector: string, text: string): Promise<string> {
|
|
348
|
+
this.ensureConnected();
|
|
349
|
+
// Use JSON.stringify for safe string interpolation into JS — handles all
|
|
350
|
+
// special characters (quotes, backslashes, newlines, unicode) correctly.
|
|
351
|
+
const selectorJS = JSON.stringify(selector);
|
|
352
|
+
const textJS = JSON.stringify(text);
|
|
353
|
+
|
|
354
|
+
// First clear and set value via JS, dispatching all relevant events
|
|
355
|
+
const result = await this.send("Runtime.evaluate", {
|
|
356
|
+
expression: `
|
|
357
|
+
(function() {
|
|
358
|
+
const el = document.querySelector(${selectorJS});
|
|
359
|
+
if (!el) return 'Element not found: ' + ${selectorJS};
|
|
360
|
+
|
|
361
|
+
el.focus();
|
|
362
|
+
|
|
363
|
+
// Clear existing value
|
|
364
|
+
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
365
|
+
window.HTMLInputElement.prototype, 'value'
|
|
366
|
+
)?.set || Object.getOwnPropertyDescriptor(
|
|
367
|
+
window.HTMLTextAreaElement.prototype, 'value'
|
|
368
|
+
)?.set;
|
|
369
|
+
if (nativeInputValueSetter) {
|
|
370
|
+
nativeInputValueSetter.call(el, ${textJS});
|
|
371
|
+
} else {
|
|
372
|
+
el.value = ${textJS};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Dispatch events that frameworks (React, Angular, Material) listen to
|
|
376
|
+
el.dispatchEvent(new Event('input', { bubbles: true, cancelable: true }));
|
|
377
|
+
el.dispatchEvent(new Event('change', { bubbles: true, cancelable: true }));
|
|
378
|
+
el.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: ${textJS} }));
|
|
379
|
+
return 'Typed into: ' + (el.tagName || '') + ' [' + (el.name || el.id || '') + ']';
|
|
380
|
+
})()
|
|
381
|
+
`,
|
|
382
|
+
returnByValue: true,
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
return ((result as CDPEvalResult).result?.value as string) || "Text entered.";
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
async pressKey(key: string): Promise<string> {
|
|
389
|
+
this.ensureConnected();
|
|
390
|
+
|
|
391
|
+
// Map common key names to CDP key codes
|
|
392
|
+
const keyMap: Record<string, { keyCode: number; code: string }> = {
|
|
393
|
+
Enter: { keyCode: 13, code: "Enter" },
|
|
394
|
+
Tab: { keyCode: 9, code: "Tab" },
|
|
395
|
+
Escape: { keyCode: 27, code: "Escape" },
|
|
396
|
+
Backspace: { keyCode: 8, code: "Backspace" },
|
|
397
|
+
ArrowDown: { keyCode: 40, code: "ArrowDown" },
|
|
398
|
+
ArrowUp: { keyCode: 38, code: "ArrowUp" },
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
const mapped = keyMap[key];
|
|
402
|
+
if (mapped) {
|
|
403
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
404
|
+
type: "keyDown",
|
|
405
|
+
key,
|
|
406
|
+
code: mapped.code,
|
|
407
|
+
windowsVirtualKeyCode: mapped.keyCode,
|
|
408
|
+
nativeVirtualKeyCode: mapped.keyCode,
|
|
409
|
+
});
|
|
410
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
411
|
+
type: "keyUp",
|
|
412
|
+
key,
|
|
413
|
+
code: mapped.code,
|
|
414
|
+
windowsVirtualKeyCode: mapped.keyCode,
|
|
415
|
+
nativeVirtualKeyCode: mapped.keyCode,
|
|
416
|
+
});
|
|
417
|
+
} else {
|
|
418
|
+
// Single character key
|
|
419
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
420
|
+
type: "char",
|
|
421
|
+
text: key,
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return `Pressed key: ${key}`;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
async scrollDown(): Promise<string> {
|
|
429
|
+
this.ensureConnected();
|
|
430
|
+
await this.send("Runtime.evaluate", {
|
|
431
|
+
expression: "window.scrollBy(0, window.innerHeight * 0.8)",
|
|
432
|
+
});
|
|
433
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
434
|
+
return "Scrolled down.";
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
async scrollUp(): Promise<string> {
|
|
438
|
+
this.ensureConnected();
|
|
439
|
+
await this.send("Runtime.evaluate", {
|
|
440
|
+
expression: "window.scrollBy(0, -window.innerHeight * 0.8)",
|
|
441
|
+
});
|
|
442
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
443
|
+
return "Scrolled up.";
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// ── Annotated Snapshot (ref system) ─────────────────────────────
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Take a snapshot of all interactive elements on the page.
|
|
450
|
+
*
|
|
451
|
+
* Strategy (informed by research — arxiv:2511.19477):
|
|
452
|
+
* - **Text ref table is ALWAYS returned** — compact, low-token, works for
|
|
453
|
+
* all page complexities including dense layouts (date pickers, tables).
|
|
454
|
+
* - **Annotated screenshot is OPTIONAL** (annotate parameter):
|
|
455
|
+
* - true: overlay ref badges on screenshot (best for simple pages with
|
|
456
|
+
* few interactive elements — gives visual context)
|
|
457
|
+
* - false: plain screenshot without overlays (default — avoids label
|
|
458
|
+
* clutter on dense pages; model still sees the page visually)
|
|
459
|
+
* - Research shows text-based grounding outperforms visual annotations
|
|
460
|
+
* on complex pages, and the hybrid approach (a11y text primary +
|
|
461
|
+
* selective vision) achieves ~85% vs ~50% for pure vision.
|
|
462
|
+
*/
|
|
463
|
+
async snapshot(annotate = false): Promise<SnapshotResult> {
|
|
464
|
+
this.ensureConnected();
|
|
465
|
+
|
|
466
|
+
// Wait for page to be ready (auto-wait like Playwright)
|
|
467
|
+
await this.waitForLoad(5000);
|
|
468
|
+
|
|
469
|
+
// 1. Find all interactive elements, assign ref IDs, get bounding boxes
|
|
470
|
+
const findResult = await this.send("Runtime.evaluate", {
|
|
471
|
+
expression: `
|
|
472
|
+
(function() {
|
|
473
|
+
// Clean up previous refs
|
|
474
|
+
document.querySelectorAll('[data-assistme-ref]').forEach(function(el) {
|
|
475
|
+
el.removeAttribute('data-assistme-ref');
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
var selectors = [
|
|
479
|
+
'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
|
|
480
|
+
'[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
|
|
481
|
+
'[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
|
|
482
|
+
'[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
|
|
483
|
+
'[onclick]', '[tabindex]:not([tabindex="-1"])',
|
|
484
|
+
'[contenteditable="true"]'
|
|
485
|
+
].join(', ');
|
|
486
|
+
|
|
487
|
+
// Collect elements from main document AND same-origin iframes
|
|
488
|
+
var all = Array.from(document.querySelectorAll(selectors));
|
|
489
|
+
try {
|
|
490
|
+
var iframes = document.querySelectorAll('iframe');
|
|
491
|
+
for (var fi = 0; fi < iframes.length; fi++) {
|
|
492
|
+
try {
|
|
493
|
+
var iframeDoc = iframes[fi].contentDocument;
|
|
494
|
+
if (iframeDoc) {
|
|
495
|
+
var iframeRect = iframes[fi].getBoundingClientRect();
|
|
496
|
+
var iframeEls = iframeDoc.querySelectorAll(selectors);
|
|
497
|
+
for (var fe = 0; fe < iframeEls.length; fe++) {
|
|
498
|
+
// Tag iframe elements with offset for coordinate correction
|
|
499
|
+
iframeEls[fe].__iframeOffset = { x: iframeRect.x, y: iframeRect.y };
|
|
500
|
+
all.push(iframeEls[fe]);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
} catch(e) { /* cross-origin iframe, skip */ }
|
|
504
|
+
}
|
|
505
|
+
} catch(e) { /* iframe enumeration failed, continue */ }
|
|
506
|
+
|
|
507
|
+
var refs = [];
|
|
508
|
+
var vh = window.innerHeight;
|
|
509
|
+
var vw = window.innerWidth;
|
|
510
|
+
|
|
511
|
+
for (var i = 0; i < all.length && refs.length < 80; i++) {
|
|
512
|
+
var el = all[i];
|
|
513
|
+
var rect = el.getBoundingClientRect();
|
|
514
|
+
|
|
515
|
+
// Skip invisible / tiny elements
|
|
516
|
+
if (rect.width < 5 || rect.height < 5) continue;
|
|
517
|
+
var style = window.getComputedStyle(el);
|
|
518
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
|
|
519
|
+
|
|
520
|
+
// Skip elements far outside viewport
|
|
521
|
+
if (rect.bottom < -50 || rect.top > vh + 50) continue;
|
|
522
|
+
if (rect.right < -50 || rect.left > vw + 50) continue;
|
|
523
|
+
|
|
524
|
+
// Determine role
|
|
525
|
+
var role = el.getAttribute('role') || '';
|
|
526
|
+
if (!role) {
|
|
527
|
+
var tag = el.tagName.toLowerCase();
|
|
528
|
+
if (tag === 'a') role = 'link';
|
|
529
|
+
else if (tag === 'button') role = 'button';
|
|
530
|
+
else if (tag === 'input') {
|
|
531
|
+
var t = (el.type || 'text').toLowerCase();
|
|
532
|
+
if (t === 'checkbox') role = 'checkbox';
|
|
533
|
+
else if (t === 'radio') role = 'radio';
|
|
534
|
+
else if (t === 'submit' || t === 'button') role = 'button';
|
|
535
|
+
else role = 'textbox';
|
|
536
|
+
}
|
|
537
|
+
else if (tag === 'select') role = 'combobox';
|
|
538
|
+
else if (tag === 'textarea') role = 'textbox';
|
|
539
|
+
else role = tag;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Determine accessible name
|
|
543
|
+
var name = '';
|
|
544
|
+
var ariaLabel = el.getAttribute('aria-label');
|
|
545
|
+
var ariaLabelledBy = el.getAttribute('aria-labelledby');
|
|
546
|
+
if (ariaLabel) {
|
|
547
|
+
name = ariaLabel;
|
|
548
|
+
} else if (ariaLabelledBy) {
|
|
549
|
+
var labelEl = document.getElementById(ariaLabelledBy);
|
|
550
|
+
if (labelEl) name = labelEl.textContent.trim();
|
|
551
|
+
} else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
|
|
552
|
+
if (el.id) {
|
|
553
|
+
var lbl = document.querySelector('label[for="' + CSS.escape(el.id) + '"]');
|
|
554
|
+
if (lbl) name = lbl.textContent.trim();
|
|
555
|
+
}
|
|
556
|
+
if (!name) name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
|
|
557
|
+
} else {
|
|
558
|
+
name = (el.textContent || '').trim().slice(0, 60);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
var refId = refs.length + 1;
|
|
562
|
+
el.setAttribute('data-assistme-ref', String(refId));
|
|
563
|
+
|
|
564
|
+
// Correct coordinates for elements inside iframes
|
|
565
|
+
var offsetX = el.__iframeOffset ? el.__iframeOffset.x : 0;
|
|
566
|
+
var offsetY = el.__iframeOffset ? el.__iframeOffset.y : 0;
|
|
567
|
+
|
|
568
|
+
refs.push({
|
|
569
|
+
id: refId,
|
|
570
|
+
role: role,
|
|
571
|
+
name: name,
|
|
572
|
+
tag: el.tagName.toLowerCase(),
|
|
573
|
+
type: el.getAttribute('type') || '',
|
|
574
|
+
box: {
|
|
575
|
+
x: Math.round(rect.x + offsetX),
|
|
576
|
+
y: Math.round(rect.y + offsetY),
|
|
577
|
+
width: Math.round(rect.width),
|
|
578
|
+
height: Math.round(rect.height)
|
|
579
|
+
}
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return JSON.stringify(refs);
|
|
584
|
+
})()
|
|
585
|
+
`,
|
|
586
|
+
returnByValue: true,
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
const refs: RefEntry[] = JSON.parse(
|
|
590
|
+
((findResult as CDPEvalResult).result?.value as string) || "[]"
|
|
591
|
+
).map((r: Record<string, unknown>) => ({
|
|
592
|
+
id: r.id as number,
|
|
593
|
+
role: r.role as string,
|
|
594
|
+
name: r.name as string,
|
|
595
|
+
tag: r.tag as string,
|
|
596
|
+
inputType: (r.type as string) || "",
|
|
597
|
+
box: r.box as BoundingBox,
|
|
598
|
+
}));
|
|
599
|
+
|
|
600
|
+
// 2. Optionally inject visual overlay with ref labels
|
|
601
|
+
// (Skip for dense pages — labels would overlap and become unreadable)
|
|
602
|
+
if (annotate && refs.length <= 40) {
|
|
603
|
+
const refsJson = JSON.stringify(refs);
|
|
604
|
+
await this.send("Runtime.evaluate", {
|
|
605
|
+
expression: `
|
|
606
|
+
(function() {
|
|
607
|
+
var old = document.getElementById('__assistme_refs__');
|
|
608
|
+
if (old) old.remove();
|
|
609
|
+
|
|
610
|
+
var overlay = document.createElement('div');
|
|
611
|
+
overlay.id = '__assistme_refs__';
|
|
612
|
+
overlay.style.cssText = 'position:fixed;top:0;left:0;width:100%;height:100%;pointer-events:none;z-index:2147483647;';
|
|
613
|
+
|
|
614
|
+
var refs = ${refsJson};
|
|
615
|
+
var vh = window.innerHeight;
|
|
616
|
+
var vw = window.innerWidth;
|
|
617
|
+
|
|
618
|
+
for (var i = 0; i < refs.length; i++) {
|
|
619
|
+
var b = refs[i].box;
|
|
620
|
+
if (b.y + b.height < 0 || b.y > vh || b.x + b.width < 0 || b.x > vw) continue;
|
|
621
|
+
|
|
622
|
+
// Red badge with ref number
|
|
623
|
+
var badge = document.createElement('div');
|
|
624
|
+
var badgeTop = Math.max(0, b.y - 14);
|
|
625
|
+
var badgeLeft = Math.max(0, b.x);
|
|
626
|
+
badge.style.cssText = 'position:fixed;background:#e8384f;color:#fff;font:bold 10px/1.2 monospace;padding:1px 3px;border-radius:2px;white-space:nowrap;'
|
|
627
|
+
+ 'left:' + badgeLeft + 'px;top:' + badgeTop + 'px;';
|
|
628
|
+
badge.textContent = String(refs[i].id);
|
|
629
|
+
overlay.appendChild(badge);
|
|
630
|
+
|
|
631
|
+
// Border around element
|
|
632
|
+
var border = document.createElement('div');
|
|
633
|
+
border.style.cssText = 'position:fixed;border:1.5px solid #e8384f;border-radius:2px;'
|
|
634
|
+
+ 'left:' + b.x + 'px;top:' + b.y + 'px;width:' + b.width + 'px;height:' + b.height + 'px;';
|
|
635
|
+
overlay.appendChild(border);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
document.documentElement.appendChild(overlay);
|
|
639
|
+
})()
|
|
640
|
+
`,
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// 3. Take screenshot (with or without overlay)
|
|
645
|
+
const image = await this.screenshot();
|
|
646
|
+
|
|
647
|
+
// 4. Remove overlay if injected (keep data-assistme-ref attributes for later resolution)
|
|
648
|
+
if (annotate) {
|
|
649
|
+
await this.send("Runtime.evaluate", {
|
|
650
|
+
expression: `(function() { var el = document.getElementById('__assistme_refs__'); if (el) el.remove(); })()`,
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// 5. Cache refs for subsequent act() calls
|
|
655
|
+
this.refCache.clear();
|
|
656
|
+
for (const ref of refs) {
|
|
657
|
+
this.refCache.set(ref.id, ref);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// 6. Get page info
|
|
661
|
+
const pageInfo = await this.getPageInfo();
|
|
662
|
+
|
|
663
|
+
return { image, refs, url: pageInfo.url, title: pageInfo.title };
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
/**
|
|
667
|
+
* Build a compact text table of refs for the model.
|
|
668
|
+
*/
|
|
669
|
+
static formatRefTable(result: SnapshotResult): string {
|
|
670
|
+
let table = `Page: ${result.title}\nURL: ${result.url}\n\nRefs:\n`;
|
|
671
|
+
for (const ref of result.refs) {
|
|
672
|
+
const extra = ref.inputType ? ` (${ref.inputType})` : "";
|
|
673
|
+
const nameStr = ref.name ? ` "${ref.name}"` : "";
|
|
674
|
+
table += `[${ref.id}] ${ref.role}${nameStr}${extra}\n`;
|
|
675
|
+
}
|
|
676
|
+
if (result.refs.length === 0) {
|
|
677
|
+
table += "(no interactive elements found)\n";
|
|
678
|
+
}
|
|
679
|
+
return table;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// ── Ref Resolution ────────────────────────────────────────────────
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Resolve a ref ID to its current center coordinates in the viewport.
|
|
686
|
+
* Uses two strategies:
|
|
687
|
+
* 1. Fast: find by data-assistme-ref attribute (set during snapshot)
|
|
688
|
+
* 2. Stable: search by role + accessible name (survives DOM changes)
|
|
689
|
+
*
|
|
690
|
+
* Includes actionability checks (like Playwright):
|
|
691
|
+
* - Element must be visible (not display:none, not zero-size)
|
|
692
|
+
* - Element must be in viewport (scrolls into view if needed)
|
|
693
|
+
* - Element must not be covered by another element (checks elementFromPoint)
|
|
694
|
+
*
|
|
695
|
+
* Returns null if the element cannot be found or is not actionable.
|
|
696
|
+
* Returns { error: string } if found but not actionable (for diagnostics).
|
|
697
|
+
*/
|
|
698
|
+
private async resolveRef(
|
|
699
|
+
refId: number
|
|
700
|
+
): Promise<{ x: number; y: number; width: number; height: number; error?: string } | null> {
|
|
701
|
+
const cached = this.refCache.get(refId);
|
|
702
|
+
const role = cached?.role || "";
|
|
703
|
+
const name = cached?.name || "";
|
|
704
|
+
const roleJS = JSON.stringify(role);
|
|
705
|
+
const nameJS = JSON.stringify(name);
|
|
706
|
+
|
|
707
|
+
const result = await this.send("Runtime.evaluate", {
|
|
708
|
+
expression: `
|
|
709
|
+
(function() {
|
|
710
|
+
var refId = ${refId};
|
|
711
|
+
var role = ${roleJS};
|
|
712
|
+
var name = ${nameJS};
|
|
713
|
+
|
|
714
|
+
// Strategy 1: data attribute (fast, from last snapshot)
|
|
715
|
+
var el = document.querySelector('[data-assistme-ref="' + refId + '"]');
|
|
716
|
+
|
|
717
|
+
// Strategy 2: role + name search (stable, survives DOM changes)
|
|
718
|
+
if (!el && role && name) {
|
|
719
|
+
var selectorMap = {
|
|
720
|
+
textbox: 'input, textarea, [role="textbox"], [role="searchbox"]',
|
|
721
|
+
button: 'button, [role="button"], input[type="submit"], input[type="button"]',
|
|
722
|
+
link: 'a[href], [role="link"]',
|
|
723
|
+
combobox: 'select, [role="combobox"]',
|
|
724
|
+
checkbox: 'input[type="checkbox"], [role="checkbox"]',
|
|
725
|
+
radio: 'input[type="radio"], [role="radio"]',
|
|
726
|
+
tab: '[role="tab"]',
|
|
727
|
+
menuitem: '[role="menuitem"]',
|
|
728
|
+
option: '[role="option"], option',
|
|
729
|
+
};
|
|
730
|
+
var sel = selectorMap[role] || '*[role="' + role + '"]';
|
|
731
|
+
var candidates = document.querySelectorAll(sel);
|
|
732
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
733
|
+
var c = candidates[i];
|
|
734
|
+
var cName = c.getAttribute('aria-label')
|
|
735
|
+
|| c.getAttribute('placeholder')
|
|
736
|
+
|| (c.textContent || '').trim().slice(0, 60);
|
|
737
|
+
if (cName === name) { el = c; break; }
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
if (!el) return 'null';
|
|
742
|
+
|
|
743
|
+
// ── Actionability checks (Playwright-style) ──────────────
|
|
744
|
+
|
|
745
|
+
// Check visibility
|
|
746
|
+
var style = window.getComputedStyle(el);
|
|
747
|
+
if (style.display === 'none')
|
|
748
|
+
return JSON.stringify({ error: 'Element is hidden (display:none)' });
|
|
749
|
+
if (style.visibility === 'hidden')
|
|
750
|
+
return JSON.stringify({ error: 'Element is hidden (visibility:hidden)' });
|
|
751
|
+
if (parseFloat(style.opacity) < 0.05)
|
|
752
|
+
return JSON.stringify({ error: 'Element is hidden (opacity:0)' });
|
|
753
|
+
|
|
754
|
+
// Check disabled
|
|
755
|
+
if (el.disabled || el.getAttribute('aria-disabled') === 'true')
|
|
756
|
+
return JSON.stringify({ error: 'Element is disabled' });
|
|
757
|
+
|
|
758
|
+
// Scroll into view
|
|
759
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
760
|
+
var r = el.getBoundingClientRect();
|
|
761
|
+
|
|
762
|
+
// Check non-zero size
|
|
763
|
+
if (r.width < 1 || r.height < 1)
|
|
764
|
+
return JSON.stringify({ error: 'Element has zero size (' + r.width + 'x' + r.height + ')' });
|
|
765
|
+
|
|
766
|
+
// Check element is in viewport
|
|
767
|
+
if (r.bottom < 0 || r.top > window.innerHeight || r.right < 0 || r.left > window.innerWidth)
|
|
768
|
+
return JSON.stringify({ error: 'Element is outside viewport after scroll' });
|
|
769
|
+
|
|
770
|
+
var cx = r.x + r.width / 2;
|
|
771
|
+
var cy = r.y + r.height / 2;
|
|
772
|
+
|
|
773
|
+
// Check not covered by another element (hit test)
|
|
774
|
+
var topEl = document.elementFromPoint(cx, cy);
|
|
775
|
+
if (topEl && topEl !== el && !el.contains(topEl) && !topEl.closest('[data-assistme-ref="' + refId + '"]')) {
|
|
776
|
+
// Check if the covering element is the overlay (ignore it)
|
|
777
|
+
if (!topEl.closest('#__assistme_refs__')) {
|
|
778
|
+
var coverTag = topEl.tagName.toLowerCase();
|
|
779
|
+
var coverText = (topEl.textContent || '').trim().slice(0, 30);
|
|
780
|
+
return JSON.stringify({
|
|
781
|
+
error: 'Element is covered by <' + coverTag + '>' + (coverText ? ' "' + coverText + '"' : ''),
|
|
782
|
+
x: cx, y: cy, width: r.width, height: r.height
|
|
783
|
+
});
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
return JSON.stringify({
|
|
788
|
+
x: cx,
|
|
789
|
+
y: cy,
|
|
790
|
+
width: r.width,
|
|
791
|
+
height: r.height
|
|
792
|
+
});
|
|
793
|
+
})()
|
|
794
|
+
`,
|
|
795
|
+
returnByValue: true,
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
const value = (result as CDPEvalResult).result?.value as string;
|
|
799
|
+
if (!value || value === "null") return null;
|
|
800
|
+
try {
|
|
801
|
+
return JSON.parse(value);
|
|
802
|
+
} catch {
|
|
803
|
+
return null;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// ── Ref-based Interactions (CDP Input Events) ─────────────────────
|
|
808
|
+
|
|
809
|
+
/**
|
|
810
|
+
* Click an element by ref using CDP Input.dispatchMouseEvent.
|
|
811
|
+
* This simulates a real mouse click through the browser's input pipeline,
|
|
812
|
+
* triggering hover states, focus management, and all native browser events
|
|
813
|
+
* — more reliable than el.click() for framework components.
|
|
814
|
+
*
|
|
815
|
+
* Includes auto-wait: retries up to 3 times (with 500ms intervals) if the
|
|
816
|
+
* element is not yet actionable (e.g., covered by a loading overlay, still
|
|
817
|
+
* animating into view). This matches Playwright's auto-waiting behavior.
|
|
818
|
+
*/
|
|
819
|
+
async clickRef(refId: number): Promise<string> {
|
|
820
|
+
this.ensureConnected();
|
|
821
|
+
|
|
822
|
+
// Auto-wait: retry up to 3 times if element is not actionable yet
|
|
823
|
+
const maxRetries = 3;
|
|
824
|
+
let lastError = "";
|
|
825
|
+
|
|
826
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
827
|
+
const resolved = await this.resolveRef(refId);
|
|
828
|
+
|
|
829
|
+
if (!resolved) {
|
|
830
|
+
return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
if (resolved.error) {
|
|
834
|
+
lastError = resolved.error;
|
|
835
|
+
// If element is covered or hidden, wait and retry (it might be animating)
|
|
836
|
+
if (attempt < maxRetries - 1) {
|
|
837
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
838
|
+
continue;
|
|
839
|
+
}
|
|
840
|
+
// Final attempt failed — report the actionability issue
|
|
841
|
+
const ref = this.refCache.get(refId);
|
|
842
|
+
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// Element is actionable — small delay after scroll for rendering
|
|
846
|
+
if (attempt === 0) {
|
|
847
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
848
|
+
// Re-read position after scroll settled
|
|
849
|
+
const settled = await this.resolveRef(refId);
|
|
850
|
+
if (settled && !settled.error) {
|
|
851
|
+
resolved.x = settled.x;
|
|
852
|
+
resolved.y = settled.y;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Full mouse event sequence: move → press → release
|
|
857
|
+
await this.send("Input.dispatchMouseEvent", {
|
|
858
|
+
type: "mouseMoved",
|
|
859
|
+
x: resolved.x,
|
|
860
|
+
y: resolved.y,
|
|
861
|
+
});
|
|
862
|
+
await this.send("Input.dispatchMouseEvent", {
|
|
863
|
+
type: "mousePressed",
|
|
864
|
+
x: resolved.x,
|
|
865
|
+
y: resolved.y,
|
|
866
|
+
button: "left",
|
|
867
|
+
clickCount: 1,
|
|
868
|
+
});
|
|
869
|
+
await this.send("Input.dispatchMouseEvent", {
|
|
870
|
+
type: "mouseReleased",
|
|
871
|
+
x: resolved.x,
|
|
872
|
+
y: resolved.y,
|
|
873
|
+
button: "left",
|
|
874
|
+
clickCount: 1,
|
|
875
|
+
});
|
|
876
|
+
|
|
877
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
878
|
+
const ref = this.refCache.get(refId);
|
|
879
|
+
return `Clicked [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
// Should not reach here, but just in case
|
|
883
|
+
const ref = this.refCache.get(refId);
|
|
884
|
+
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
/**
|
|
888
|
+
* Type text into an element by ref using CDP Input events.
|
|
889
|
+
* Clicks to focus, selects all existing text (Ctrl/Cmd+A), then uses
|
|
890
|
+
* Input.insertText for reliable text insertion across all frameworks.
|
|
891
|
+
*/
|
|
892
|
+
async typeRef(refId: number, text: string): Promise<string> {
|
|
893
|
+
this.ensureConnected();
|
|
894
|
+
|
|
895
|
+
// Click to focus the element
|
|
896
|
+
const clickResult = await this.clickRef(refId);
|
|
897
|
+
if (clickResult.includes("not found")) return clickResult;
|
|
898
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
899
|
+
|
|
900
|
+
// Select all existing text (Cmd+A on macOS, Ctrl+A elsewhere)
|
|
901
|
+
const modifier = platform() === "darwin" ? 4 : 2;
|
|
902
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
903
|
+
type: "keyDown",
|
|
904
|
+
modifiers: modifier,
|
|
905
|
+
key: "a",
|
|
906
|
+
code: "KeyA",
|
|
907
|
+
windowsVirtualKeyCode: 65,
|
|
908
|
+
});
|
|
909
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
910
|
+
type: "keyUp",
|
|
911
|
+
key: "a",
|
|
912
|
+
code: "KeyA",
|
|
913
|
+
});
|
|
914
|
+
|
|
915
|
+
// Delete selected text
|
|
916
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
917
|
+
type: "keyDown",
|
|
918
|
+
key: "Backspace",
|
|
919
|
+
code: "Backspace",
|
|
920
|
+
windowsVirtualKeyCode: 8,
|
|
921
|
+
});
|
|
922
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
923
|
+
type: "keyUp",
|
|
924
|
+
key: "Backspace",
|
|
925
|
+
code: "Backspace",
|
|
926
|
+
});
|
|
927
|
+
|
|
928
|
+
// Insert text via CDP (goes through the browser's input pipeline)
|
|
929
|
+
await this.send("Input.insertText", { text });
|
|
930
|
+
|
|
931
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
932
|
+
const ref = this.refCache.get(refId);
|
|
933
|
+
return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
/**
|
|
937
|
+
* Select a dropdown option by ref. Delegates to selectOption with the
|
|
938
|
+
* ref's data attribute as selector, handling both native <select> and
|
|
939
|
+
* custom dropdown components.
|
|
940
|
+
*/
|
|
941
|
+
async selectRef(refId: number, option: string): Promise<string> {
|
|
942
|
+
this.ensureConnected();
|
|
943
|
+
|
|
944
|
+
// Check if ref exists
|
|
945
|
+
const cached = this.refCache.get(refId);
|
|
946
|
+
if (!cached) {
|
|
947
|
+
return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
// Use the data attribute selector to find the element
|
|
951
|
+
const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
|
|
952
|
+
return result.replace(
|
|
953
|
+
/\[data-assistme-ref="\d+"\]/,
|
|
954
|
+
`[${refId}] ${cached.role} "${cached.name}"`
|
|
955
|
+
);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
// ── Action Pipeline ───────────────────────────────────────────────
|
|
959
|
+
|
|
960
|
+
/**
|
|
961
|
+
* Execute a batch of actions sequentially using refs.
|
|
962
|
+
* Reduces round-trips: instead of one tool call per action, the model
|
|
963
|
+
* can specify a sequence of actions that execute atomically.
|
|
964
|
+
*
|
|
965
|
+
* Optionally takes a screenshot after all actions complete.
|
|
966
|
+
*/
|
|
967
|
+
async act(
|
|
968
|
+
actions: ActionSpec[],
|
|
969
|
+
takeScreenshot = false
|
|
970
|
+
): Promise<{ results: ActionResult[]; screenshot?: string }> {
|
|
971
|
+
this.ensureConnected();
|
|
972
|
+
const results: ActionResult[] = [];
|
|
973
|
+
|
|
974
|
+
for (const spec of actions) {
|
|
975
|
+
let result: string;
|
|
976
|
+
let success = true;
|
|
977
|
+
|
|
978
|
+
try {
|
|
979
|
+
switch (spec.action) {
|
|
980
|
+
case "click":
|
|
981
|
+
result = await this.clickRef(spec.ref);
|
|
982
|
+
success = !result.includes("not found");
|
|
983
|
+
break;
|
|
984
|
+
case "type":
|
|
985
|
+
result = await this.typeRef(spec.ref, spec.text);
|
|
986
|
+
success = !result.includes("not found");
|
|
987
|
+
break;
|
|
988
|
+
case "select":
|
|
989
|
+
result = await this.selectRef(spec.ref, spec.option);
|
|
990
|
+
success = !result.includes("not found");
|
|
991
|
+
break;
|
|
992
|
+
case "press":
|
|
993
|
+
result = await this.pressKey(spec.key);
|
|
994
|
+
break;
|
|
995
|
+
case "scroll":
|
|
996
|
+
result = spec.direction === "up" ? await this.scrollUp() : await this.scrollDown();
|
|
997
|
+
break;
|
|
998
|
+
case "wait":
|
|
999
|
+
await new Promise((r) => setTimeout(r, Math.min(spec.ms, 5000)));
|
|
1000
|
+
result = `Waited ${spec.ms}ms`;
|
|
1001
|
+
break;
|
|
1002
|
+
default:
|
|
1003
|
+
result = `Unknown action: ${(spec as { action: string }).action}`;
|
|
1004
|
+
success = false;
|
|
1005
|
+
}
|
|
1006
|
+
} catch (err) {
|
|
1007
|
+
result = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
|
1008
|
+
success = false;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
results.push({
|
|
1012
|
+
action: spec.action,
|
|
1013
|
+
ref: "ref" in spec ? (spec as { ref: number }).ref : undefined,
|
|
1014
|
+
result,
|
|
1015
|
+
success,
|
|
1016
|
+
});
|
|
1017
|
+
|
|
1018
|
+
// If an action failed, stop the batch (remaining refs may be stale)
|
|
1019
|
+
if (!success) break;
|
|
1020
|
+
|
|
1021
|
+
// Brief pause between actions for DOM to settle
|
|
1022
|
+
if (spec.action !== "wait") {
|
|
1023
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
let screenshot: string | undefined;
|
|
1028
|
+
if (takeScreenshot) {
|
|
1029
|
+
// Wait a bit for final DOM changes to settle
|
|
1030
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1031
|
+
screenshot = await this.screenshot();
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
return { results, screenshot };
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
// ── Dropdown/Select ─────────────────────────────────────────────
|
|
1038
|
+
|
|
1039
|
+
/**
|
|
1040
|
+
* Select an option from a dropdown — handles both native <select> elements
|
|
1041
|
+
* and custom Material Design / React / Angular dropdown components.
|
|
1042
|
+
*
|
|
1043
|
+
* Strategy:
|
|
1044
|
+
* 1. Try native <select> first (by selector or label text)
|
|
1045
|
+
* 2. Fall back to custom dropdown: click to open, then click the option by text
|
|
1046
|
+
*/
|
|
1047
|
+
async selectOption(selector: string, optionText: string): Promise<string> {
|
|
1048
|
+
this.ensureConnected();
|
|
1049
|
+
const selectorJS = JSON.stringify(selector);
|
|
1050
|
+
const optionJS = JSON.stringify(optionText);
|
|
1051
|
+
|
|
1052
|
+
const result = await this.send("Runtime.evaluate", {
|
|
1053
|
+
expression: `
|
|
1054
|
+
(function() {
|
|
1055
|
+
var sel = ${selectorJS};
|
|
1056
|
+
var optText = ${optionJS};
|
|
1057
|
+
|
|
1058
|
+
// Strategy 1: Native <select> element
|
|
1059
|
+
var selectEl = document.querySelector(sel);
|
|
1060
|
+
if (selectEl && selectEl.tagName === 'SELECT') {
|
|
1061
|
+
var options = selectEl.querySelectorAll('option');
|
|
1062
|
+
for (var i = 0; i < options.length; i++) {
|
|
1063
|
+
if (options[i].textContent.trim() === optText) {
|
|
1064
|
+
selectEl.value = options[i].value;
|
|
1065
|
+
selectEl.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1066
|
+
selectEl.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1067
|
+
return 'Selected "' + optText + '" in native select';
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
return 'Option "' + optText + '" not found in select. Available: ' +
|
|
1071
|
+
Array.from(options).map(function(o) { return o.textContent.trim(); }).join(', ');
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
// Strategy 2: Custom dropdown — find the trigger element
|
|
1075
|
+
var trigger = selectEl;
|
|
1076
|
+
if (!trigger) {
|
|
1077
|
+
// Try finding by label/placeholder text
|
|
1078
|
+
var allEls = document.querySelectorAll('*');
|
|
1079
|
+
for (var j = 0; j < allEls.length; j++) {
|
|
1080
|
+
var el = allEls[j];
|
|
1081
|
+
var ownText = Array.from(el.childNodes)
|
|
1082
|
+
.filter(function(n) { return n.nodeType === 3; })
|
|
1083
|
+
.map(function(n) { return n.textContent.trim(); })
|
|
1084
|
+
.join('');
|
|
1085
|
+
if (ownText === sel || el.getAttribute('aria-label') === sel) {
|
|
1086
|
+
trigger = el;
|
|
1087
|
+
break;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
if (!trigger) return 'Dropdown not found: ' + sel;
|
|
1093
|
+
|
|
1094
|
+
// Click to open the dropdown
|
|
1095
|
+
trigger.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
1096
|
+
trigger.click();
|
|
1097
|
+
|
|
1098
|
+
// Wait a frame for the dropdown menu to render, then select the option
|
|
1099
|
+
return new Promise(function(resolve) {
|
|
1100
|
+
setTimeout(function() {
|
|
1101
|
+
// Look for the option in listbox/menu/dropdown overlays
|
|
1102
|
+
var optionContainers = document.querySelectorAll(
|
|
1103
|
+
'[role="listbox"], [role="menu"], [role="presentation"], .MuiMenu-list, .MuiList-root, ul.mdc-list, .VfPpkd-xl07Ob'
|
|
1104
|
+
);
|
|
1105
|
+
|
|
1106
|
+
// Also check all visible elements as fallback
|
|
1107
|
+
var searchIn = optionContainers.length > 0
|
|
1108
|
+
? Array.from(optionContainers).flatMap(function(c) { return Array.from(c.querySelectorAll('*')); })
|
|
1109
|
+
: Array.from(document.querySelectorAll('li, [role="option"], [role="menuitem"], div[data-value]'));
|
|
1110
|
+
|
|
1111
|
+
for (var k = 0; k < searchIn.length; k++) {
|
|
1112
|
+
var opt = searchIn[k];
|
|
1113
|
+
var txt = opt.textContent ? opt.textContent.trim() : '';
|
|
1114
|
+
if (txt === optText) {
|
|
1115
|
+
opt.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
1116
|
+
opt.click();
|
|
1117
|
+
resolve('Selected "' + optText + '" from custom dropdown');
|
|
1118
|
+
return;
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
// Broader search: any visible element with exact text match
|
|
1123
|
+
var everything = document.querySelectorAll('*');
|
|
1124
|
+
for (var m = 0; m < everything.length; m++) {
|
|
1125
|
+
var candidate = everything[m];
|
|
1126
|
+
if (candidate.textContent && candidate.textContent.trim() === optText &&
|
|
1127
|
+
candidate.offsetParent !== null && candidate.children.length === 0) {
|
|
1128
|
+
candidate.click();
|
|
1129
|
+
resolve('Selected "' + optText + '" (broad match)');
|
|
1130
|
+
return;
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
resolve('Option "' + optText + '" not found in dropdown');
|
|
1135
|
+
}, 300);
|
|
1136
|
+
});
|
|
1137
|
+
})()
|
|
1138
|
+
`,
|
|
1139
|
+
returnByValue: true,
|
|
1140
|
+
awaitPromise: true,
|
|
1141
|
+
});
|
|
1142
|
+
|
|
1143
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
1144
|
+
return ((result as CDPEvalResult).result?.value as string) || "Selection attempted.";
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// ── JavaScript Evaluation ───────────────────────────────────────
|
|
1148
|
+
|
|
1149
|
+
async evaluate(expression: string): Promise<string> {
|
|
1150
|
+
this.ensureConnected();
|
|
1151
|
+
const result = await this.send("Runtime.evaluate", {
|
|
1152
|
+
expression,
|
|
1153
|
+
returnByValue: true,
|
|
1154
|
+
awaitPromise: true,
|
|
1155
|
+
});
|
|
1156
|
+
|
|
1157
|
+
const evalResult = (result as CDPEvalResult).result;
|
|
1158
|
+
const value = evalResult?.value;
|
|
1159
|
+
if (value === undefined) {
|
|
1160
|
+
const desc = evalResult?.description;
|
|
1161
|
+
return desc || "(undefined)";
|
|
1162
|
+
}
|
|
1163
|
+
return typeof value === "string" ? value : JSON.stringify(value, null, 2);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// ── Tab Management ──────────────────────────────────────────────
|
|
1167
|
+
|
|
1168
|
+
async listTabs(): Promise<string> {
|
|
1169
|
+
const tabs = await this.getTabs();
|
|
1170
|
+
const pageTabs = tabs.filter((t) => t.type === "page");
|
|
1171
|
+
|
|
1172
|
+
if (pageTabs.length === 0) return "No tabs open.";
|
|
1173
|
+
|
|
1174
|
+
return pageTabs
|
|
1175
|
+
.map(
|
|
1176
|
+
(t, i) =>
|
|
1177
|
+
`[${i}] ${t.title.slice(0, 60)}${this.currentTabId === t.id ? " (active)" : ""}\n ${t.url}`
|
|
1178
|
+
)
|
|
1179
|
+
.join("\n\n");
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
async switchTab(index: number): Promise<string> {
|
|
1183
|
+
const tabs = await this.getTabs();
|
|
1184
|
+
const pageTabs = tabs.filter((t) => t.type === "page");
|
|
1185
|
+
|
|
1186
|
+
if (index < 0 || index >= pageTabs.length) {
|
|
1187
|
+
return `Invalid tab index. Available: 0-${pageTabs.length - 1}`;
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
// Disconnect from current tab
|
|
1191
|
+
await this.disconnect();
|
|
1192
|
+
|
|
1193
|
+
// Connect to new tab
|
|
1194
|
+
return this.connect(index);
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
async openNewTab(url?: string): Promise<string> {
|
|
1198
|
+
const targetUrl = url || "about:blank";
|
|
1199
|
+
const res = await fetch(
|
|
1200
|
+
`http://127.0.0.1:${this.debugPort}/json/new?${encodeURIComponent(targetUrl)}`,
|
|
1201
|
+
{ signal: AbortSignal.timeout(5000) }
|
|
1202
|
+
);
|
|
1203
|
+
const tab = (await res.json()) as CDPTab;
|
|
1204
|
+
|
|
1205
|
+
// Connect to the new tab
|
|
1206
|
+
await this.disconnect();
|
|
1207
|
+
const tabs = await this.getTabs();
|
|
1208
|
+
const idx = tabs.filter((t) => t.type === "page").findIndex((t) => t.id === tab.id);
|
|
1209
|
+
if (idx >= 0) {
|
|
1210
|
+
await this.connect(idx);
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
return `Opened new tab: ${targetUrl}`;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// ── Helpers ─────────────────────────────────────────────────────
|
|
1217
|
+
|
|
1218
|
+
private async waitForLoad(timeoutMs = 8000): Promise<void> {
|
|
1219
|
+
const start = Date.now();
|
|
1220
|
+
while (Date.now() - start < timeoutMs) {
|
|
1221
|
+
try {
|
|
1222
|
+
const result = await this.send("Runtime.evaluate", {
|
|
1223
|
+
expression: "document.readyState",
|
|
1224
|
+
returnByValue: true,
|
|
1225
|
+
});
|
|
1226
|
+
const state = (result as CDPEvalResult).result?.value;
|
|
1227
|
+
if (state === "complete" || state === "interactive") {
|
|
1228
|
+
// Extra small wait for dynamic content
|
|
1229
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
1230
|
+
return;
|
|
1231
|
+
}
|
|
1232
|
+
} catch {
|
|
1233
|
+
// Tab might be navigating
|
|
1234
|
+
}
|
|
1235
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
/**
|
|
1240
|
+
* Find interactive elements on the page for the AI to understand what's clickable
|
|
1241
|
+
*/
|
|
1242
|
+
async getInteractiveElements(): Promise<string> {
|
|
1243
|
+
this.ensureConnected();
|
|
1244
|
+
const result = await this.send("Runtime.evaluate", {
|
|
1245
|
+
expression: `
|
|
1246
|
+
(function() {
|
|
1247
|
+
const elements = [];
|
|
1248
|
+
const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
|
|
1249
|
+
const all = document.querySelectorAll(selectors);
|
|
1250
|
+
for (let i = 0; i < all.length && elements.length < 50; i++) {
|
|
1251
|
+
const el = all[i];
|
|
1252
|
+
const rect = el.getBoundingClientRect();
|
|
1253
|
+
if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
|
|
1254
|
+
|
|
1255
|
+
// Build a reliable CSS selector
|
|
1256
|
+
let selector;
|
|
1257
|
+
if (el.id) {
|
|
1258
|
+
selector = '#' + CSS.escape(el.id);
|
|
1259
|
+
} else if (el.getAttribute('data-testid')) {
|
|
1260
|
+
selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
1261
|
+
} else {
|
|
1262
|
+
// Build a path-based selector: find nth-of-type among siblings
|
|
1263
|
+
const tag = el.tagName.toLowerCase();
|
|
1264
|
+
const parent = el.parentElement;
|
|
1265
|
+
if (parent) {
|
|
1266
|
+
const siblings = parent.querySelectorAll(':scope > ' + tag);
|
|
1267
|
+
const idx = Array.from(siblings).indexOf(el) + 1;
|
|
1268
|
+
selector = tag + ':nth-of-type(' + idx + ')';
|
|
1269
|
+
} else {
|
|
1270
|
+
selector = tag;
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
elements.push({
|
|
1275
|
+
tag: el.tagName.toLowerCase(),
|
|
1276
|
+
text: (el.textContent || '').trim().slice(0, 80),
|
|
1277
|
+
type: el.getAttribute('type') || '',
|
|
1278
|
+
name: el.getAttribute('name') || '',
|
|
1279
|
+
id: el.id || '',
|
|
1280
|
+
href: el.getAttribute('href') || '',
|
|
1281
|
+
placeholder: el.getAttribute('placeholder') || '',
|
|
1282
|
+
selector: selector,
|
|
1283
|
+
});
|
|
1284
|
+
}
|
|
1285
|
+
return JSON.stringify(elements, null, 2);
|
|
1286
|
+
})()
|
|
1287
|
+
`,
|
|
1288
|
+
returnByValue: true,
|
|
1289
|
+
});
|
|
1290
|
+
|
|
1291
|
+
return ((result as CDPEvalResult).result?.value as string) || "[]";
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
isConnected(): boolean {
|
|
1295
|
+
return this.connected && this.ws?.readyState === WebSocket.OPEN;
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
// ── Login Detection ────────────────────────────────────────────
|
|
1299
|
+
|
|
1300
|
+
/**
|
|
1301
|
+
* Detect if the current page appears to be a login/authentication page.
|
|
1302
|
+
* Checks URL patterns, password input fields, and login form actions.
|
|
1303
|
+
*/
|
|
1304
|
+
async detectLoginPage(): Promise<{ isLoginPage: boolean; reason: string }> {
|
|
1305
|
+
try {
|
|
1306
|
+
const result = await this.send("Runtime.evaluate", {
|
|
1307
|
+
expression: `
|
|
1308
|
+
(function() {
|
|
1309
|
+
var url = window.location.href.toLowerCase();
|
|
1310
|
+
|
|
1311
|
+
// Exclude signup/registration pages — these are NOT login pages
|
|
1312
|
+
var signupPatterns = [
|
|
1313
|
+
'/signup', '/sign-up', '/sign_up', '/register',
|
|
1314
|
+
'/registration', '/create-account', '/create_account',
|
|
1315
|
+
'/join', '/enroll',
|
|
1316
|
+
'accounts.google.com/lifecycle/steps/signup',
|
|
1317
|
+
'signup.live.com',
|
|
1318
|
+
];
|
|
1319
|
+
for (var s = 0; s < signupPatterns.length; s++) {
|
|
1320
|
+
if (url.indexOf(signupPatterns[s]) !== -1) {
|
|
1321
|
+
return JSON.stringify({ isLoginPage: false, reason: '' });
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
// URL-based detection
|
|
1326
|
+
var loginPatterns = [
|
|
1327
|
+
'/login', '/signin', '/sign-in', '/sign_in',
|
|
1328
|
+
'/auth/', '/sso/', '/oauth/', '/session/new',
|
|
1329
|
+
'/accounts/login', '/users/sign_in',
|
|
1330
|
+
'accounts.google.com/v3/signin',
|
|
1331
|
+
'accounts.google.com/servicelogin',
|
|
1332
|
+
'login.microsoftonline.com',
|
|
1333
|
+
'github.com/login', 'github.com/session',
|
|
1334
|
+
'login.live.com', 'appleid.apple.com'
|
|
1335
|
+
];
|
|
1336
|
+
for (var i = 0; i < loginPatterns.length; i++) {
|
|
1337
|
+
if (url.indexOf(loginPatterns[i]) !== -1) {
|
|
1338
|
+
return JSON.stringify({
|
|
1339
|
+
isLoginPage: true,
|
|
1340
|
+
reason: 'URL contains login pattern: ' + loginPatterns[i]
|
|
1341
|
+
});
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
// Password input detection (visible only)
|
|
1346
|
+
var passwordInputs = document.querySelectorAll('input[type="password"]');
|
|
1347
|
+
for (var j = 0; j < passwordInputs.length; j++) {
|
|
1348
|
+
var input = passwordInputs[j];
|
|
1349
|
+
var rect = input.getBoundingClientRect();
|
|
1350
|
+
var style = window.getComputedStyle(input);
|
|
1351
|
+
if (rect.width > 0 && rect.height > 0 &&
|
|
1352
|
+
style.display !== 'none' && style.visibility !== 'hidden') {
|
|
1353
|
+
return JSON.stringify({
|
|
1354
|
+
isLoginPage: true,
|
|
1355
|
+
reason: 'Page contains visible password input field'
|
|
1356
|
+
});
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// Login form action detection
|
|
1361
|
+
var formSelectors = [
|
|
1362
|
+
'form[action*="login"]', 'form[action*="signin"]',
|
|
1363
|
+
'form[action*="session"]', 'form[action*="auth"]',
|
|
1364
|
+
'form[action*="authenticate"]'
|
|
1365
|
+
];
|
|
1366
|
+
var loginForms = document.querySelectorAll(formSelectors.join(','));
|
|
1367
|
+
if (loginForms.length > 0) {
|
|
1368
|
+
return JSON.stringify({
|
|
1369
|
+
isLoginPage: true,
|
|
1370
|
+
reason: 'Page contains login form'
|
|
1371
|
+
});
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
return JSON.stringify({ isLoginPage: false, reason: '' });
|
|
1375
|
+
})()
|
|
1376
|
+
`,
|
|
1377
|
+
returnByValue: true,
|
|
1378
|
+
});
|
|
1379
|
+
|
|
1380
|
+
const value = (result as CDPEvalResult).result?.value as string;
|
|
1381
|
+
return JSON.parse(value || '{"isLoginPage":false,"reason":""}');
|
|
1382
|
+
} catch {
|
|
1383
|
+
return { isLoginPage: false, reason: "" };
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
}
|