assistme 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +301 -212
- package/package.json +1 -1
- package/src/agent/processor.ts +63 -7
- package/src/agent/system-prompt.ts +6 -2
- package/src/browser/chrome-launcher.ts +6 -4
- package/src/browser/controller.ts +196 -134
- package/src/browser/types.ts +6 -0
- package/src/db/event.ts +32 -20
- package/src/mcp/agent-tools-server.ts +53 -40
- package/src/mcp/browser-server.ts +16 -33
- package/src/tools/browser.ts +1 -0
- package/src/tools/index.ts +0 -3
package/package.json
CHANGED
package/src/agent/processor.ts
CHANGED
|
@@ -31,6 +31,55 @@ import {
|
|
|
31
31
|
import { createEventHooks } from "./event-hooks.js";
|
|
32
32
|
import { BASE_SYSTEM_PROMPT } from "./system-prompt.js";
|
|
33
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Manages the task wall-clock timeout.
|
|
36
|
+
* Supports pausing while the agent is waiting for user input (ask_user)
|
|
37
|
+
* so that idle wait time doesn't count toward the timeout.
|
|
38
|
+
*/
|
|
39
|
+
class TaskTimeout {
|
|
40
|
+
private timeoutId: ReturnType<typeof setTimeout> | null = null;
|
|
41
|
+
private remainingMs: number;
|
|
42
|
+
private resumedAt: number;
|
|
43
|
+
|
|
44
|
+
constructor(
|
|
45
|
+
private abortController: AbortController,
|
|
46
|
+
timeoutMs: number
|
|
47
|
+
) {
|
|
48
|
+
this.remainingMs = timeoutMs;
|
|
49
|
+
this.resumedAt = Date.now();
|
|
50
|
+
this.schedule();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
private schedule(): void {
|
|
54
|
+
this.timeoutId = setTimeout(() => {
|
|
55
|
+
this.abortController.abort();
|
|
56
|
+
}, this.remainingMs);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Pause the timeout (e.g. while waiting for user). */
|
|
60
|
+
pause(): void {
|
|
61
|
+
if (this.timeoutId) {
|
|
62
|
+
clearTimeout(this.timeoutId);
|
|
63
|
+
this.timeoutId = null;
|
|
64
|
+
const elapsed = Date.now() - this.resumedAt;
|
|
65
|
+
this.remainingMs = Math.max(0, this.remainingMs - elapsed);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Resume the timeout after user interaction completes. */
|
|
70
|
+
resume(): void {
|
|
71
|
+
this.resumedAt = Date.now();
|
|
72
|
+
this.schedule();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
clear(): void {
|
|
76
|
+
if (this.timeoutId) {
|
|
77
|
+
clearTimeout(this.timeoutId);
|
|
78
|
+
this.timeoutId = null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
34
83
|
const MAX_HISTORY_ENTRIES = 10;
|
|
35
84
|
const MAX_RESPONSE_LENGTH = 1500;
|
|
36
85
|
|
|
@@ -143,6 +192,9 @@ export class TaskProcessor {
|
|
|
143
192
|
systemPrompt += historyPrompt;
|
|
144
193
|
}
|
|
145
194
|
|
|
195
|
+
const abortController = new AbortController();
|
|
196
|
+
const taskTimeout = new TaskTimeout(abortController, taskTimeoutMs);
|
|
197
|
+
|
|
146
198
|
// Create MCP servers for custom tools
|
|
147
199
|
const browserServer = createBrowserMcpServer();
|
|
148
200
|
const agentToolsServer = createAgentToolsServer({
|
|
@@ -150,6 +202,8 @@ export class TaskProcessor {
|
|
|
150
202
|
skillManager: this.skillManager,
|
|
151
203
|
taskId: task.id,
|
|
152
204
|
sessionId: this.sessionId || undefined,
|
|
205
|
+
onUserWaitStart: () => taskTimeout.pause(),
|
|
206
|
+
onUserWaitEnd: () => taskTimeout.resume(),
|
|
153
207
|
});
|
|
154
208
|
|
|
155
209
|
// Create event hooks for Supabase event emission
|
|
@@ -203,7 +257,6 @@ export class TaskProcessor {
|
|
|
203
257
|
};
|
|
204
258
|
}
|
|
205
259
|
|
|
206
|
-
const abortController = new AbortController();
|
|
207
260
|
const options: Options = {
|
|
208
261
|
model: config.model,
|
|
209
262
|
systemPrompt,
|
|
@@ -221,11 +274,7 @@ export class TaskProcessor {
|
|
|
221
274
|
abortController,
|
|
222
275
|
};
|
|
223
276
|
|
|
224
|
-
// Wall-clock timeout via abort
|
|
225
277
|
const taskStartTime = Date.now();
|
|
226
|
-
const timeoutId = setTimeout(() => {
|
|
227
|
-
abortController.abort();
|
|
228
|
-
}, taskTimeoutMs);
|
|
229
278
|
|
|
230
279
|
try {
|
|
231
280
|
for await (const message of query({
|
|
@@ -302,11 +351,18 @@ export class TaskProcessor {
|
|
|
302
351
|
}
|
|
303
352
|
}
|
|
304
353
|
} finally {
|
|
305
|
-
|
|
354
|
+
taskTimeout.clear();
|
|
306
355
|
}
|
|
307
356
|
|
|
357
|
+
// Truncate finalResponse to avoid edge function payload limits
|
|
358
|
+
const MAX_CONTENT_LENGTH = 50_000;
|
|
359
|
+
const truncatedResponse =
|
|
360
|
+
finalResponse.length > MAX_CONTENT_LENGTH
|
|
361
|
+
? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]"
|
|
362
|
+
: finalResponse;
|
|
363
|
+
|
|
308
364
|
// Complete the task (with retry for transient DB failures)
|
|
309
|
-
await withRetry(() => completeTask(task.id,
|
|
365
|
+
await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
|
|
310
366
|
maxRetries: 2,
|
|
311
367
|
baseDelayMs: 300,
|
|
312
368
|
label: "completeTask",
|
|
@@ -28,7 +28,7 @@ Available capabilities:
|
|
|
28
28
|
- Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
|
|
29
29
|
|
|
30
30
|
**Legacy tools (still available, use when refs don't work):**
|
|
31
|
-
- browser_click, browser_type, browser_select,
|
|
31
|
+
- browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
|
|
32
32
|
- browser_click supports :contains('text') pseudo-selectors
|
|
33
33
|
- browser_select handles native and custom dropdowns
|
|
34
34
|
|
|
@@ -41,12 +41,16 @@ Available capabilities:
|
|
|
41
41
|
- Bash tool for shell commands
|
|
42
42
|
- Glob and Grep for file search
|
|
43
43
|
|
|
44
|
-
3. MEMORY:
|
|
44
|
+
3. MEMORY & CREDENTIALS:
|
|
45
45
|
- You can remember things about the user using memory_store
|
|
46
46
|
- Use this when you learn preferences, important facts, or standing instructions
|
|
47
47
|
- Your stored memories persist across conversations
|
|
48
48
|
- PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
|
|
49
49
|
- Before completing a task, consider if anything learned should be remembered for future conversations
|
|
50
|
+
- CRITICAL — Credential Storage: When you create, register, or receive any account credentials (username, password, API keys, tokens), you MUST use credential_set to save them locally. NEVER use memory_store for credentials — memory_store is for preferences and facts, credential_set is for secrets. Examples:
|
|
51
|
+
* After registering a new email/account → credential_set with type "login" and data { "username": "...", "password": "...", "email": "..." }
|
|
52
|
+
* After generating an API key → credential_set with type "api_key" and data { "api_key": "..." }
|
|
53
|
+
* Credentials saved via credential_set are encrypted on disk and viewable in the desktop app's Credentials panel
|
|
50
54
|
|
|
51
55
|
4. SKILL-AWARE EXECUTION (CRITICAL — follow this for EVERY task):
|
|
52
56
|
Step A — Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
|
|
@@ -545,11 +545,13 @@ export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchRes
|
|
|
545
545
|
|
|
546
546
|
// ── Singleton ───────────────────────────────────────────────────────
|
|
547
547
|
|
|
548
|
-
|
|
548
|
+
const browserInstances = new Map<number, BrowserController>();
|
|
549
549
|
|
|
550
550
|
export function getBrowser(port = 9222): BrowserController {
|
|
551
|
-
|
|
552
|
-
|
|
551
|
+
let instance = browserInstances.get(port);
|
|
552
|
+
if (!instance) {
|
|
553
|
+
instance = new BrowserController(port);
|
|
554
|
+
browserInstances.set(port, instance);
|
|
553
555
|
}
|
|
554
|
-
return
|
|
556
|
+
return instance;
|
|
555
557
|
}
|
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
SnapshotResult,
|
|
11
11
|
ActionSpec,
|
|
12
12
|
ActionResult,
|
|
13
|
+
RefActionResult,
|
|
13
14
|
} from "./types.js";
|
|
14
15
|
|
|
15
16
|
export class BrowserController {
|
|
@@ -198,11 +199,26 @@ export class BrowserController {
|
|
|
198
199
|
|
|
199
200
|
async goBack(): Promise<string> {
|
|
200
201
|
this.ensureConnected();
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
202
|
+
try {
|
|
203
|
+
// Get navigation history and go to the previous entry
|
|
204
|
+
const history = (await this.send("Page.getNavigationHistory")) as {
|
|
205
|
+
currentIndex?: number;
|
|
206
|
+
entries?: Array<{ id: number }>;
|
|
207
|
+
};
|
|
208
|
+
const idx = history.currentIndex ?? 0;
|
|
209
|
+
const entries = history.entries ?? [];
|
|
210
|
+
if (idx > 0 && entries[idx - 1]) {
|
|
211
|
+
await this.send("Page.navigateToHistoryEntry", {
|
|
212
|
+
entryId: entries[idx - 1].id,
|
|
213
|
+
});
|
|
214
|
+
} else {
|
|
215
|
+
// No previous entry in CDP history — use JS fallback
|
|
216
|
+
await this.evaluate("window.history.back()");
|
|
217
|
+
}
|
|
218
|
+
} catch {
|
|
219
|
+
// CDP history API failed — use JS fallback
|
|
220
|
+
await this.evaluate("window.history.back()");
|
|
221
|
+
}
|
|
206
222
|
await this.waitForLoad();
|
|
207
223
|
const info = await this.getPageInfo();
|
|
208
224
|
return `Went back to: ${info.title}`;
|
|
@@ -394,31 +410,88 @@ export class BrowserController {
|
|
|
394
410
|
Tab: { keyCode: 9, code: "Tab" },
|
|
395
411
|
Escape: { keyCode: 27, code: "Escape" },
|
|
396
412
|
Backspace: { keyCode: 8, code: "Backspace" },
|
|
413
|
+
Delete: { keyCode: 46, code: "Delete" },
|
|
397
414
|
ArrowDown: { keyCode: 40, code: "ArrowDown" },
|
|
398
415
|
ArrowUp: { keyCode: 38, code: "ArrowUp" },
|
|
416
|
+
ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
|
|
417
|
+
ArrowRight: { keyCode: 39, code: "ArrowRight" },
|
|
418
|
+
Home: { keyCode: 36, code: "Home" },
|
|
419
|
+
End: { keyCode: 35, code: "End" },
|
|
420
|
+
Space: { keyCode: 32, code: "Space" },
|
|
399
421
|
};
|
|
400
422
|
|
|
401
|
-
|
|
423
|
+
// CDP modifier bitmask values
|
|
424
|
+
const modifierMap: Record<string, number> = {
|
|
425
|
+
Alt: 1,
|
|
426
|
+
Control: 2,
|
|
427
|
+
Meta: 4,
|
|
428
|
+
Shift: 8,
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
// Parse modifier combos like "Control+a", "Meta+Shift+z"
|
|
432
|
+
const parts = key.split("+");
|
|
433
|
+
let modifiers = 0;
|
|
434
|
+
let actualKey = parts[parts.length - 1];
|
|
435
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
436
|
+
const mod = modifierMap[parts[i]];
|
|
437
|
+
if (mod) modifiers |= mod;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const mapped = keyMap[actualKey];
|
|
402
441
|
if (mapped) {
|
|
403
442
|
await this.send("Input.dispatchKeyEvent", {
|
|
404
443
|
type: "keyDown",
|
|
405
|
-
key,
|
|
444
|
+
key: actualKey,
|
|
406
445
|
code: mapped.code,
|
|
407
446
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
408
447
|
nativeVirtualKeyCode: mapped.keyCode,
|
|
448
|
+
modifiers,
|
|
409
449
|
});
|
|
410
450
|
await this.send("Input.dispatchKeyEvent", {
|
|
411
451
|
type: "keyUp",
|
|
412
|
-
key,
|
|
452
|
+
key: actualKey,
|
|
413
453
|
code: mapped.code,
|
|
414
454
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
415
455
|
nativeVirtualKeyCode: mapped.keyCode,
|
|
456
|
+
modifiers,
|
|
457
|
+
});
|
|
458
|
+
} else if (actualKey.length === 1) {
|
|
459
|
+
// Single character key (e.g., "a", "z")
|
|
460
|
+
const code = `Key${actualKey.toUpperCase()}`;
|
|
461
|
+
const keyCode = actualKey.toUpperCase().charCodeAt(0);
|
|
462
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
463
|
+
type: "keyDown",
|
|
464
|
+
key: actualKey,
|
|
465
|
+
code,
|
|
466
|
+
windowsVirtualKeyCode: keyCode,
|
|
467
|
+
nativeVirtualKeyCode: keyCode,
|
|
468
|
+
modifiers,
|
|
469
|
+
});
|
|
470
|
+
if (!modifiers) {
|
|
471
|
+
// Only insert text for unmodified single characters
|
|
472
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
473
|
+
type: "char",
|
|
474
|
+
text: actualKey,
|
|
475
|
+
modifiers,
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
479
|
+
type: "keyUp",
|
|
480
|
+
key: actualKey,
|
|
481
|
+
code,
|
|
482
|
+
modifiers,
|
|
416
483
|
});
|
|
417
484
|
} else {
|
|
418
|
-
//
|
|
485
|
+
// Unknown key name — try as-is
|
|
419
486
|
await this.send("Input.dispatchKeyEvent", {
|
|
420
|
-
type: "
|
|
421
|
-
|
|
487
|
+
type: "keyDown",
|
|
488
|
+
key: actualKey,
|
|
489
|
+
modifiers,
|
|
490
|
+
});
|
|
491
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
492
|
+
type: "keyUp",
|
|
493
|
+
key: actualKey,
|
|
494
|
+
modifiers,
|
|
422
495
|
});
|
|
423
496
|
}
|
|
424
497
|
|
|
@@ -816,8 +889,10 @@ export class BrowserController {
|
|
|
816
889
|
* element is not yet actionable (e.g., covered by a loading overlay, still
|
|
817
890
|
* animating into view). This matches Playwright's auto-waiting behavior.
|
|
818
891
|
*/
|
|
819
|
-
async clickRef(refId: number): Promise<
|
|
892
|
+
async clickRef(refId: number): Promise<RefActionResult> {
|
|
820
893
|
this.ensureConnected();
|
|
894
|
+
const ref = this.refCache.get(refId);
|
|
895
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
821
896
|
|
|
822
897
|
// Auto-wait: retry up to 3 times if element is not actionable yet
|
|
823
898
|
const maxRetries = 3;
|
|
@@ -827,7 +902,10 @@ export class BrowserController {
|
|
|
827
902
|
const resolved = await this.resolveRef(refId);
|
|
828
903
|
|
|
829
904
|
if (!resolved) {
|
|
830
|
-
return
|
|
905
|
+
return {
|
|
906
|
+
success: false,
|
|
907
|
+
message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`,
|
|
908
|
+
};
|
|
831
909
|
}
|
|
832
910
|
|
|
833
911
|
if (resolved.error) {
|
|
@@ -837,9 +915,7 @@ export class BrowserController {
|
|
|
837
915
|
await new Promise((r) => setTimeout(r, 500));
|
|
838
916
|
continue;
|
|
839
917
|
}
|
|
840
|
-
|
|
841
|
-
const ref = this.refCache.get(refId);
|
|
842
|
-
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
918
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
843
919
|
}
|
|
844
920
|
|
|
845
921
|
// Element is actionable — small delay after scroll for rendering
|
|
@@ -875,13 +951,10 @@ export class BrowserController {
|
|
|
875
951
|
});
|
|
876
952
|
|
|
877
953
|
await new Promise((r) => setTimeout(r, 300));
|
|
878
|
-
|
|
879
|
-
return `Clicked [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
954
|
+
return { success: true, message: `Clicked ${refLabel}` };
|
|
880
955
|
}
|
|
881
956
|
|
|
882
|
-
|
|
883
|
-
const ref = this.refCache.get(refId);
|
|
884
|
-
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
957
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
885
958
|
}
|
|
886
959
|
|
|
887
960
|
/**
|
|
@@ -889,48 +962,61 @@ export class BrowserController {
|
|
|
889
962
|
* Clicks to focus, selects all existing text (Ctrl/Cmd+A), then uses
|
|
890
963
|
* Input.insertText for reliable text insertion across all frameworks.
|
|
891
964
|
*/
|
|
892
|
-
async typeRef(refId: number, text: string): Promise<
|
|
965
|
+
async typeRef(refId: number, text: string): Promise<RefActionResult> {
|
|
893
966
|
this.ensureConnected();
|
|
967
|
+
const ref = this.refCache.get(refId);
|
|
968
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
894
969
|
|
|
895
970
|
// Click to focus the element
|
|
896
971
|
const clickResult = await this.clickRef(refId);
|
|
897
|
-
if (clickResult.
|
|
972
|
+
if (!clickResult.success) return clickResult;
|
|
898
973
|
await new Promise((r) => setTimeout(r, 100));
|
|
899
974
|
|
|
900
|
-
//
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
windowsVirtualKeyCode: 65,
|
|
908
|
-
});
|
|
909
|
-
await this.send("Input.dispatchKeyEvent", {
|
|
910
|
-
type: "keyUp",
|
|
911
|
-
key: "a",
|
|
912
|
-
code: "KeyA",
|
|
913
|
-
});
|
|
975
|
+
// Clear existing text using multiple strategies for reliability:
|
|
976
|
+
// 1. Try Ctrl/Cmd+A to select all, then Backspace to delete
|
|
977
|
+
const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
|
|
978
|
+
await this.pressKey(selectAllKey);
|
|
979
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
980
|
+
await this.pressKey("Backspace");
|
|
981
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
914
982
|
|
|
915
|
-
//
|
|
916
|
-
await this.send("
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
983
|
+
// 2. Verify the field is empty; if not, fall back to JS-based clearing
|
|
984
|
+
const cleared = await this.send("Runtime.evaluate", {
|
|
985
|
+
expression: `
|
|
986
|
+
(function() {
|
|
987
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
988
|
+
if (!el) return 'no_element';
|
|
989
|
+
if (el.value !== undefined && el.value !== '') {
|
|
990
|
+
// Ctrl+A didn't work (some frameworks intercept it) — clear via JS
|
|
991
|
+
var setter = Object.getOwnPropertyDescriptor(
|
|
992
|
+
window.HTMLInputElement.prototype, 'value'
|
|
993
|
+
)?.set || Object.getOwnPropertyDescriptor(
|
|
994
|
+
window.HTMLTextAreaElement.prototype, 'value'
|
|
995
|
+
)?.set;
|
|
996
|
+
if (setter) setter.call(el, '');
|
|
997
|
+
else el.value = '';
|
|
998
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
999
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1000
|
+
return 'js_cleared';
|
|
1001
|
+
}
|
|
1002
|
+
return 'ok';
|
|
1003
|
+
})()
|
|
1004
|
+
`,
|
|
1005
|
+
returnByValue: true,
|
|
926
1006
|
});
|
|
1007
|
+
const clearStatus = ((cleared as CDPEvalResult).result?.value as string) || "ok";
|
|
1008
|
+
if (clearStatus === "no_element") {
|
|
1009
|
+
return {
|
|
1010
|
+
success: false,
|
|
1011
|
+
message: `Ref ${refLabel} not found after click. Take a new snapshot.`,
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
927
1014
|
|
|
928
1015
|
// Insert text via CDP (goes through the browser's input pipeline)
|
|
929
1016
|
await this.send("Input.insertText", { text });
|
|
930
1017
|
|
|
931
1018
|
await new Promise((r) => setTimeout(r, 100));
|
|
932
|
-
|
|
933
|
-
return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
1019
|
+
return { success: true, message: `Typed "${text}" into ${refLabel}` };
|
|
934
1020
|
}
|
|
935
1021
|
|
|
936
1022
|
/**
|
|
@@ -938,21 +1024,22 @@ export class BrowserController {
|
|
|
938
1024
|
* ref's data attribute as selector, handling both native <select> and
|
|
939
1025
|
* custom dropdown components.
|
|
940
1026
|
*/
|
|
941
|
-
async selectRef(refId: number, option: string): Promise<
|
|
1027
|
+
async selectRef(refId: number, option: string): Promise<RefActionResult> {
|
|
942
1028
|
this.ensureConnected();
|
|
943
1029
|
|
|
944
|
-
// Check if ref exists
|
|
945
1030
|
const cached = this.refCache.get(refId);
|
|
946
1031
|
if (!cached) {
|
|
947
|
-
return
|
|
1032
|
+
return {
|
|
1033
|
+
success: false,
|
|
1034
|
+
message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`,
|
|
1035
|
+
};
|
|
948
1036
|
}
|
|
949
1037
|
|
|
950
|
-
|
|
1038
|
+
const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
|
|
951
1039
|
const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
);
|
|
1040
|
+
const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
|
|
1041
|
+
const success = !result.includes("not found");
|
|
1042
|
+
return { success, message };
|
|
956
1043
|
}
|
|
957
1044
|
|
|
958
1045
|
// ── Action Pipeline ───────────────────────────────────────────────
|
|
@@ -977,18 +1064,24 @@ export class BrowserController {
|
|
|
977
1064
|
|
|
978
1065
|
try {
|
|
979
1066
|
switch (spec.action) {
|
|
980
|
-
case "click":
|
|
981
|
-
|
|
982
|
-
|
|
1067
|
+
case "click": {
|
|
1068
|
+
const r = await this.clickRef(spec.ref);
|
|
1069
|
+
result = r.message;
|
|
1070
|
+
success = r.success;
|
|
983
1071
|
break;
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1072
|
+
}
|
|
1073
|
+
case "type": {
|
|
1074
|
+
const r = await this.typeRef(spec.ref, spec.text);
|
|
1075
|
+
result = r.message;
|
|
1076
|
+
success = r.success;
|
|
987
1077
|
break;
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
1078
|
+
}
|
|
1079
|
+
case "select": {
|
|
1080
|
+
const r = await this.selectRef(spec.ref, spec.option);
|
|
1081
|
+
result = r.message;
|
|
1082
|
+
success = r.success;
|
|
991
1083
|
break;
|
|
1084
|
+
}
|
|
992
1085
|
case "press":
|
|
993
1086
|
result = await this.pressKey(spec.key);
|
|
994
1087
|
break;
|
|
@@ -1074,15 +1167,24 @@ export class BrowserController {
|
|
|
1074
1167
|
// Strategy 2: Custom dropdown — find the trigger element
|
|
1075
1168
|
var trigger = selectEl;
|
|
1076
1169
|
if (!trigger) {
|
|
1077
|
-
// Try finding by label
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1170
|
+
// Try finding by aria-label first (fast, indexed)
|
|
1171
|
+
trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
|
|
1172
|
+
}
|
|
1173
|
+
if (!trigger) {
|
|
1174
|
+
// Try finding by label/placeholder text in likely dropdown elements
|
|
1175
|
+
var dropdownCandidates = document.querySelectorAll(
|
|
1176
|
+
'button, [role="combobox"], [role="listbox"], [role="button"], ' +
|
|
1177
|
+
'select, input, .MuiSelect-root, .MuiInput-root, ' +
|
|
1178
|
+
'[class*="select"], [class*="dropdown"], [class*="picker"]'
|
|
1179
|
+
);
|
|
1180
|
+
for (var j = 0; j < dropdownCandidates.length; j++) {
|
|
1181
|
+
var el = dropdownCandidates[j];
|
|
1081
1182
|
var ownText = Array.from(el.childNodes)
|
|
1082
1183
|
.filter(function(n) { return n.nodeType === 3; })
|
|
1083
1184
|
.map(function(n) { return n.textContent.trim(); })
|
|
1084
1185
|
.join('');
|
|
1085
|
-
if (ownText === sel || el.getAttribute('aria-label') === sel
|
|
1186
|
+
if (ownText === sel || el.getAttribute('aria-label') === sel ||
|
|
1187
|
+
el.getAttribute('placeholder') === sel) {
|
|
1086
1188
|
trigger = el;
|
|
1087
1189
|
break;
|
|
1088
1190
|
}
|
|
@@ -1119,10 +1221,13 @@ export class BrowserController {
|
|
|
1119
1221
|
}
|
|
1120
1222
|
}
|
|
1121
1223
|
|
|
1122
|
-
// Broader search:
|
|
1123
|
-
var
|
|
1124
|
-
|
|
1125
|
-
|
|
1224
|
+
// Broader search: visible leaf elements in interactive containers
|
|
1225
|
+
var broadCandidates = document.querySelectorAll(
|
|
1226
|
+
'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
|
|
1227
|
+
'[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
|
|
1228
|
+
);
|
|
1229
|
+
for (var m = 0; m < broadCandidates.length; m++) {
|
|
1230
|
+
var candidate = broadCandidates[m];
|
|
1126
1231
|
if (candidate.textContent && candidate.textContent.trim() === optText &&
|
|
1127
1232
|
candidate.offsetParent !== null && candidate.children.length === 0) {
|
|
1128
1233
|
candidate.click();
|
|
@@ -1217,6 +1322,7 @@ export class BrowserController {
|
|
|
1217
1322
|
|
|
1218
1323
|
private async waitForLoad(timeoutMs = 8000): Promise<void> {
|
|
1219
1324
|
const start = Date.now();
|
|
1325
|
+
let sawInteractive = false;
|
|
1220
1326
|
while (Date.now() - start < timeoutMs) {
|
|
1221
1327
|
try {
|
|
1222
1328
|
const result = await this.send("Runtime.evaluate", {
|
|
@@ -1224,71 +1330,27 @@ export class BrowserController {
|
|
|
1224
1330
|
returnByValue: true,
|
|
1225
1331
|
});
|
|
1226
1332
|
const state = (result as CDPEvalResult).result?.value;
|
|
1227
|
-
if (state === "complete"
|
|
1228
|
-
//
|
|
1229
|
-
await new Promise((r) => setTimeout(r,
|
|
1333
|
+
if (state === "complete") {
|
|
1334
|
+
// Fully loaded — brief wait for dynamic content
|
|
1335
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1230
1336
|
return;
|
|
1231
1337
|
}
|
|
1338
|
+
if (state === "interactive") {
|
|
1339
|
+
if (!sawInteractive) {
|
|
1340
|
+
sawInteractive = true;
|
|
1341
|
+
// DOM is ready but sub-resources still loading — give it more
|
|
1342
|
+
// time to reach "complete" before settling for "interactive"
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1232
1345
|
} catch {
|
|
1233
1346
|
// Tab might be navigating
|
|
1234
1347
|
}
|
|
1235
1348
|
await new Promise((r) => setTimeout(r, 300));
|
|
1236
1349
|
}
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
*/
|
|
1242
|
-
async getInteractiveElements(): Promise<string> {
|
|
1243
|
-
this.ensureConnected();
|
|
1244
|
-
const result = await this.send("Runtime.evaluate", {
|
|
1245
|
-
expression: `
|
|
1246
|
-
(function() {
|
|
1247
|
-
const elements = [];
|
|
1248
|
-
const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
|
|
1249
|
-
const all = document.querySelectorAll(selectors);
|
|
1250
|
-
for (let i = 0; i < all.length && elements.length < 50; i++) {
|
|
1251
|
-
const el = all[i];
|
|
1252
|
-
const rect = el.getBoundingClientRect();
|
|
1253
|
-
if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
|
|
1254
|
-
|
|
1255
|
-
// Build a reliable CSS selector
|
|
1256
|
-
let selector;
|
|
1257
|
-
if (el.id) {
|
|
1258
|
-
selector = '#' + CSS.escape(el.id);
|
|
1259
|
-
} else if (el.getAttribute('data-testid')) {
|
|
1260
|
-
selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
1261
|
-
} else {
|
|
1262
|
-
// Build a path-based selector: find nth-of-type among siblings
|
|
1263
|
-
const tag = el.tagName.toLowerCase();
|
|
1264
|
-
const parent = el.parentElement;
|
|
1265
|
-
if (parent) {
|
|
1266
|
-
const siblings = parent.querySelectorAll(':scope > ' + tag);
|
|
1267
|
-
const idx = Array.from(siblings).indexOf(el) + 1;
|
|
1268
|
-
selector = tag + ':nth-of-type(' + idx + ')';
|
|
1269
|
-
} else {
|
|
1270
|
-
selector = tag;
|
|
1271
|
-
}
|
|
1272
|
-
}
|
|
1273
|
-
|
|
1274
|
-
elements.push({
|
|
1275
|
-
tag: el.tagName.toLowerCase(),
|
|
1276
|
-
text: (el.textContent || '').trim().slice(0, 80),
|
|
1277
|
-
type: el.getAttribute('type') || '',
|
|
1278
|
-
name: el.getAttribute('name') || '',
|
|
1279
|
-
id: el.id || '',
|
|
1280
|
-
href: el.getAttribute('href') || '',
|
|
1281
|
-
placeholder: el.getAttribute('placeholder') || '',
|
|
1282
|
-
selector: selector,
|
|
1283
|
-
});
|
|
1284
|
-
}
|
|
1285
|
-
return JSON.stringify(elements, null, 2);
|
|
1286
|
-
})()
|
|
1287
|
-
`,
|
|
1288
|
-
returnByValue: true,
|
|
1289
|
-
});
|
|
1290
|
-
|
|
1291
|
-
return ((result as CDPEvalResult).result?.value as string) || "[]";
|
|
1350
|
+
// Timed out — if we at least saw "interactive", that's usually good enough
|
|
1351
|
+
if (sawInteractive) {
|
|
1352
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1353
|
+
}
|
|
1292
1354
|
}
|
|
1293
1355
|
|
|
1294
1356
|
isConnected(): boolean {
|
package/src/browser/types.ts
CHANGED
|
@@ -62,6 +62,12 @@ export interface ActionResult {
|
|
|
62
62
|
success: boolean;
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
+
/** Structured result from ref-based interactions (click, type, select). */
|
|
66
|
+
export interface RefActionResult {
|
|
67
|
+
success: boolean;
|
|
68
|
+
message: string;
|
|
69
|
+
}
|
|
70
|
+
|
|
65
71
|
export interface AutoLaunchResult {
|
|
66
72
|
success: boolean;
|
|
67
73
|
action: "already_available" | "launched" | "chrome_not_found" | "launch_failed" | "port_conflict";
|