assistme 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +188 -157
- package/package.json +1 -1
- package/src/agent/processor.ts +8 -1
- package/src/agent/system-prompt.ts +1 -1
- package/src/browser/chrome-launcher.ts +6 -4
- package/src/browser/controller.ts +196 -134
- package/src/browser/types.ts +6 -0
- package/src/mcp/browser-server.ts +16 -33
- package/src/tools/browser.ts +1 -0
- package/src/tools/index.ts +0 -3
package/dist/index.js
CHANGED
|
@@ -493,11 +493,20 @@ URL: ${info.url}`;
|
|
|
493
493
|
}
|
|
494
494
|
async goBack() {
|
|
495
495
|
this.ensureConnected();
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
496
|
+
try {
|
|
497
|
+
const history = await this.send("Page.getNavigationHistory");
|
|
498
|
+
const idx = history.currentIndex ?? 0;
|
|
499
|
+
const entries = history.entries ?? [];
|
|
500
|
+
if (idx > 0 && entries[idx - 1]) {
|
|
501
|
+
await this.send("Page.navigateToHistoryEntry", {
|
|
502
|
+
entryId: entries[idx - 1].id
|
|
503
|
+
});
|
|
504
|
+
} else {
|
|
505
|
+
await this.evaluate("window.history.back()");
|
|
506
|
+
}
|
|
507
|
+
} catch {
|
|
508
|
+
await this.evaluate("window.history.back()");
|
|
509
|
+
}
|
|
501
510
|
await this.waitForLoad();
|
|
502
511
|
const info = await this.getPageInfo();
|
|
503
512
|
return `Went back to: ${info.title}`;
|
|
@@ -665,29 +674,80 @@ URL: ${info.url}`;
|
|
|
665
674
|
Tab: { keyCode: 9, code: "Tab" },
|
|
666
675
|
Escape: { keyCode: 27, code: "Escape" },
|
|
667
676
|
Backspace: { keyCode: 8, code: "Backspace" },
|
|
677
|
+
Delete: { keyCode: 46, code: "Delete" },
|
|
668
678
|
ArrowDown: { keyCode: 40, code: "ArrowDown" },
|
|
669
|
-
ArrowUp: { keyCode: 38, code: "ArrowUp" }
|
|
679
|
+
ArrowUp: { keyCode: 38, code: "ArrowUp" },
|
|
680
|
+
ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
|
|
681
|
+
ArrowRight: { keyCode: 39, code: "ArrowRight" },
|
|
682
|
+
Home: { keyCode: 36, code: "Home" },
|
|
683
|
+
End: { keyCode: 35, code: "End" },
|
|
684
|
+
Space: { keyCode: 32, code: "Space" }
|
|
685
|
+
};
|
|
686
|
+
const modifierMap = {
|
|
687
|
+
Alt: 1,
|
|
688
|
+
Control: 2,
|
|
689
|
+
Meta: 4,
|
|
690
|
+
Shift: 8
|
|
670
691
|
};
|
|
671
|
-
const
|
|
692
|
+
const parts = key.split("+");
|
|
693
|
+
let modifiers = 0;
|
|
694
|
+
let actualKey = parts[parts.length - 1];
|
|
695
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
696
|
+
const mod = modifierMap[parts[i]];
|
|
697
|
+
if (mod) modifiers |= mod;
|
|
698
|
+
}
|
|
699
|
+
const mapped = keyMap[actualKey];
|
|
672
700
|
if (mapped) {
|
|
673
701
|
await this.send("Input.dispatchKeyEvent", {
|
|
674
702
|
type: "keyDown",
|
|
675
|
-
key,
|
|
703
|
+
key: actualKey,
|
|
676
704
|
code: mapped.code,
|
|
677
705
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
678
|
-
nativeVirtualKeyCode: mapped.keyCode
|
|
706
|
+
nativeVirtualKeyCode: mapped.keyCode,
|
|
707
|
+
modifiers
|
|
679
708
|
});
|
|
680
709
|
await this.send("Input.dispatchKeyEvent", {
|
|
681
710
|
type: "keyUp",
|
|
682
|
-
key,
|
|
711
|
+
key: actualKey,
|
|
683
712
|
code: mapped.code,
|
|
684
713
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
685
|
-
nativeVirtualKeyCode: mapped.keyCode
|
|
714
|
+
nativeVirtualKeyCode: mapped.keyCode,
|
|
715
|
+
modifiers
|
|
716
|
+
});
|
|
717
|
+
} else if (actualKey.length === 1) {
|
|
718
|
+
const code = `Key${actualKey.toUpperCase()}`;
|
|
719
|
+
const keyCode = actualKey.toUpperCase().charCodeAt(0);
|
|
720
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
721
|
+
type: "keyDown",
|
|
722
|
+
key: actualKey,
|
|
723
|
+
code,
|
|
724
|
+
windowsVirtualKeyCode: keyCode,
|
|
725
|
+
nativeVirtualKeyCode: keyCode,
|
|
726
|
+
modifiers
|
|
727
|
+
});
|
|
728
|
+
if (!modifiers) {
|
|
729
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
730
|
+
type: "char",
|
|
731
|
+
text: actualKey,
|
|
732
|
+
modifiers
|
|
733
|
+
});
|
|
734
|
+
}
|
|
735
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
736
|
+
type: "keyUp",
|
|
737
|
+
key: actualKey,
|
|
738
|
+
code,
|
|
739
|
+
modifiers
|
|
686
740
|
});
|
|
687
741
|
} else {
|
|
688
742
|
await this.send("Input.dispatchKeyEvent", {
|
|
689
|
-
type: "
|
|
690
|
-
|
|
743
|
+
type: "keyDown",
|
|
744
|
+
key: actualKey,
|
|
745
|
+
modifiers
|
|
746
|
+
});
|
|
747
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
748
|
+
type: "keyUp",
|
|
749
|
+
key: actualKey,
|
|
750
|
+
modifiers
|
|
691
751
|
});
|
|
692
752
|
}
|
|
693
753
|
return `Pressed key: ${key}`;
|
|
@@ -1061,12 +1121,17 @@ Refs:
|
|
|
1061
1121
|
*/
|
|
1062
1122
|
async clickRef(refId) {
|
|
1063
1123
|
this.ensureConnected();
|
|
1124
|
+
const ref = this.refCache.get(refId);
|
|
1125
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
1064
1126
|
const maxRetries = 3;
|
|
1065
1127
|
let lastError = "";
|
|
1066
1128
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1067
1129
|
const resolved = await this.resolveRef(refId);
|
|
1068
1130
|
if (!resolved) {
|
|
1069
|
-
return
|
|
1131
|
+
return {
|
|
1132
|
+
success: false,
|
|
1133
|
+
message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`
|
|
1134
|
+
};
|
|
1070
1135
|
}
|
|
1071
1136
|
if (resolved.error) {
|
|
1072
1137
|
lastError = resolved.error;
|
|
@@ -1074,8 +1139,7 @@ Refs:
|
|
|
1074
1139
|
await new Promise((r) => setTimeout(r, 500));
|
|
1075
1140
|
continue;
|
|
1076
1141
|
}
|
|
1077
|
-
|
|
1078
|
-
return `Cannot click [${refId}] ${ref3?.role || ""} "${ref3?.name || ""}": ${lastError}`;
|
|
1142
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
1079
1143
|
}
|
|
1080
1144
|
if (attempt === 0) {
|
|
1081
1145
|
await new Promise((r) => setTimeout(r, 50));
|
|
@@ -1105,11 +1169,9 @@ Refs:
|
|
|
1105
1169
|
clickCount: 1
|
|
1106
1170
|
});
|
|
1107
1171
|
await new Promise((r) => setTimeout(r, 300));
|
|
1108
|
-
|
|
1109
|
-
return `Clicked [${refId}] ${ref2?.role || ""} "${ref2?.name || ""}"`;
|
|
1172
|
+
return { success: true, message: `Clicked ${refLabel}` };
|
|
1110
1173
|
}
|
|
1111
|
-
|
|
1112
|
-
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
1174
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
1113
1175
|
}
|
|
1114
1176
|
/**
|
|
1115
1177
|
* Type text into an element by ref using CDP Input events.
|
|
@@ -1118,37 +1180,49 @@ Refs:
|
|
|
1118
1180
|
*/
|
|
1119
1181
|
async typeRef(refId, text) {
|
|
1120
1182
|
this.ensureConnected();
|
|
1183
|
+
const ref = this.refCache.get(refId);
|
|
1184
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
1121
1185
|
const clickResult = await this.clickRef(refId);
|
|
1122
|
-
if (clickResult.
|
|
1186
|
+
if (!clickResult.success) return clickResult;
|
|
1123
1187
|
await new Promise((r) => setTimeout(r, 100));
|
|
1124
|
-
const
|
|
1125
|
-
await this.
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1188
|
+
const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
|
|
1189
|
+
await this.pressKey(selectAllKey);
|
|
1190
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
1191
|
+
await this.pressKey("Backspace");
|
|
1192
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
1193
|
+
const cleared = await this.send("Runtime.evaluate", {
|
|
1194
|
+
expression: `
|
|
1195
|
+
(function() {
|
|
1196
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1197
|
+
if (!el) return 'no_element';
|
|
1198
|
+
if (el.value !== undefined && el.value !== '') {
|
|
1199
|
+
// Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
|
|
1200
|
+
var setter = Object.getOwnPropertyDescriptor(
|
|
1201
|
+
window.HTMLInputElement.prototype, 'value'
|
|
1202
|
+
)?.set || Object.getOwnPropertyDescriptor(
|
|
1203
|
+
window.HTMLTextAreaElement.prototype, 'value'
|
|
1204
|
+
)?.set;
|
|
1205
|
+
if (setter) setter.call(el, '');
|
|
1206
|
+
else el.value = '';
|
|
1207
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1208
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1209
|
+
return 'js_cleared';
|
|
1210
|
+
}
|
|
1211
|
+
return 'ok';
|
|
1212
|
+
})()
|
|
1213
|
+
`,
|
|
1214
|
+
returnByValue: true
|
|
1147
1215
|
});
|
|
1216
|
+
const clearStatus = cleared.result?.value || "ok";
|
|
1217
|
+
if (clearStatus === "no_element") {
|
|
1218
|
+
return {
|
|
1219
|
+
success: false,
|
|
1220
|
+
message: `Ref ${refLabel} not found after click. Take a new snapshot.`
|
|
1221
|
+
};
|
|
1222
|
+
}
|
|
1148
1223
|
await this.send("Input.insertText", { text });
|
|
1149
1224
|
await new Promise((r) => setTimeout(r, 100));
|
|
1150
|
-
|
|
1151
|
-
return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
1225
|
+
return { success: true, message: `Typed "${text}" into ${refLabel}` };
|
|
1152
1226
|
}
|
|
1153
1227
|
/**
|
|
1154
1228
|
* Select a dropdown option by ref. Delegates to selectOption with the
|
|
@@ -1159,13 +1233,16 @@ Refs:
|
|
|
1159
1233
|
this.ensureConnected();
|
|
1160
1234
|
const cached = this.refCache.get(refId);
|
|
1161
1235
|
if (!cached) {
|
|
1162
|
-
return
|
|
1236
|
+
return {
|
|
1237
|
+
success: false,
|
|
1238
|
+
message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`
|
|
1239
|
+
};
|
|
1163
1240
|
}
|
|
1241
|
+
const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
|
|
1164
1242
|
const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
);
|
|
1243
|
+
const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
|
|
1244
|
+
const success = !result.includes("not found");
|
|
1245
|
+
return { success, message };
|
|
1169
1246
|
}
|
|
1170
1247
|
// ── Action Pipeline ───────────────────────────────────────────────
|
|
1171
1248
|
/**
|
|
@@ -1183,18 +1260,24 @@ Refs:
|
|
|
1183
1260
|
let success = true;
|
|
1184
1261
|
try {
|
|
1185
1262
|
switch (spec.action) {
|
|
1186
|
-
case "click":
|
|
1187
|
-
|
|
1188
|
-
|
|
1263
|
+
case "click": {
|
|
1264
|
+
const r = await this.clickRef(spec.ref);
|
|
1265
|
+
result = r.message;
|
|
1266
|
+
success = r.success;
|
|
1189
1267
|
break;
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1268
|
+
}
|
|
1269
|
+
case "type": {
|
|
1270
|
+
const r = await this.typeRef(spec.ref, spec.text);
|
|
1271
|
+
result = r.message;
|
|
1272
|
+
success = r.success;
|
|
1193
1273
|
break;
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1274
|
+
}
|
|
1275
|
+
case "select": {
|
|
1276
|
+
const r = await this.selectRef(spec.ref, spec.option);
|
|
1277
|
+
result = r.message;
|
|
1278
|
+
success = r.success;
|
|
1197
1279
|
break;
|
|
1280
|
+
}
|
|
1198
1281
|
case "press":
|
|
1199
1282
|
result = await this.pressKey(spec.key);
|
|
1200
1283
|
break;
|
|
@@ -1269,15 +1352,24 @@ Refs:
|
|
|
1269
1352
|
// Strategy 2: Custom dropdown \u2014 find the trigger element
|
|
1270
1353
|
var trigger = selectEl;
|
|
1271
1354
|
if (!trigger) {
|
|
1272
|
-
// Try finding by label
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1355
|
+
// Try finding by aria-label first (fast, indexed)
|
|
1356
|
+
trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
|
|
1357
|
+
}
|
|
1358
|
+
if (!trigger) {
|
|
1359
|
+
// Try finding by label/placeholder text in likely dropdown elements
|
|
1360
|
+
var dropdownCandidates = document.querySelectorAll(
|
|
1361
|
+
'button, [role="combobox"], [role="listbox"], [role="button"], ' +
|
|
1362
|
+
'select, input, .MuiSelect-root, .MuiInput-root, ' +
|
|
1363
|
+
'[class*="select"], [class*="dropdown"], [class*="picker"]'
|
|
1364
|
+
);
|
|
1365
|
+
for (var j = 0; j < dropdownCandidates.length; j++) {
|
|
1366
|
+
var el = dropdownCandidates[j];
|
|
1276
1367
|
var ownText = Array.from(el.childNodes)
|
|
1277
1368
|
.filter(function(n) { return n.nodeType === 3; })
|
|
1278
1369
|
.map(function(n) { return n.textContent.trim(); })
|
|
1279
1370
|
.join('');
|
|
1280
|
-
if (ownText === sel || el.getAttribute('aria-label') === sel
|
|
1371
|
+
if (ownText === sel || el.getAttribute('aria-label') === sel ||
|
|
1372
|
+
el.getAttribute('placeholder') === sel) {
|
|
1281
1373
|
trigger = el;
|
|
1282
1374
|
break;
|
|
1283
1375
|
}
|
|
@@ -1314,10 +1406,13 @@ Refs:
|
|
|
1314
1406
|
}
|
|
1315
1407
|
}
|
|
1316
1408
|
|
|
1317
|
-
// Broader search:
|
|
1318
|
-
var
|
|
1319
|
-
|
|
1320
|
-
|
|
1409
|
+
// Broader search: visible leaf elements in interactive containers
|
|
1410
|
+
var broadCandidates = document.querySelectorAll(
|
|
1411
|
+
'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
|
|
1412
|
+
'[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
|
|
1413
|
+
);
|
|
1414
|
+
for (var m = 0; m < broadCandidates.length; m++) {
|
|
1415
|
+
var candidate = broadCandidates[m];
|
|
1321
1416
|
if (candidate.textContent && candidate.textContent.trim() === optText &&
|
|
1322
1417
|
candidate.offsetParent !== null && candidate.children.length === 0) {
|
|
1323
1418
|
candidate.click();
|
|
@@ -1390,6 +1485,7 @@ Refs:
|
|
|
1390
1485
|
// ── Helpers ─────────────────────────────────────────────────────
|
|
1391
1486
|
async waitForLoad(timeoutMs = 8e3) {
|
|
1392
1487
|
const start = Date.now();
|
|
1488
|
+
let sawInteractive = false;
|
|
1393
1489
|
while (Date.now() - start < timeoutMs) {
|
|
1394
1490
|
try {
|
|
1395
1491
|
const result = await this.send("Runtime.evaluate", {
|
|
@@ -1397,67 +1493,22 @@ Refs:
|
|
|
1397
1493
|
returnByValue: true
|
|
1398
1494
|
});
|
|
1399
1495
|
const state = result.result?.value;
|
|
1400
|
-
if (state === "complete"
|
|
1401
|
-
await new Promise((r) => setTimeout(r,
|
|
1496
|
+
if (state === "complete") {
|
|
1497
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1402
1498
|
return;
|
|
1403
1499
|
}
|
|
1500
|
+
if (state === "interactive") {
|
|
1501
|
+
if (!sawInteractive) {
|
|
1502
|
+
sawInteractive = true;
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1404
1505
|
} catch {
|
|
1405
1506
|
}
|
|
1406
1507
|
await new Promise((r) => setTimeout(r, 300));
|
|
1407
1508
|
}
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
*/
|
|
1412
|
-
async getInteractiveElements() {
|
|
1413
|
-
this.ensureConnected();
|
|
1414
|
-
const result = await this.send("Runtime.evaluate", {
|
|
1415
|
-
expression: `
|
|
1416
|
-
(function() {
|
|
1417
|
-
const elements = [];
|
|
1418
|
-
const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
|
|
1419
|
-
const all = document.querySelectorAll(selectors);
|
|
1420
|
-
for (let i = 0; i < all.length && elements.length < 50; i++) {
|
|
1421
|
-
const el = all[i];
|
|
1422
|
-
const rect = el.getBoundingClientRect();
|
|
1423
|
-
if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
|
|
1424
|
-
|
|
1425
|
-
// Build a reliable CSS selector
|
|
1426
|
-
let selector;
|
|
1427
|
-
if (el.id) {
|
|
1428
|
-
selector = '#' + CSS.escape(el.id);
|
|
1429
|
-
} else if (el.getAttribute('data-testid')) {
|
|
1430
|
-
selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
1431
|
-
} else {
|
|
1432
|
-
// Build a path-based selector: find nth-of-type among siblings
|
|
1433
|
-
const tag = el.tagName.toLowerCase();
|
|
1434
|
-
const parent = el.parentElement;
|
|
1435
|
-
if (parent) {
|
|
1436
|
-
const siblings = parent.querySelectorAll(':scope > ' + tag);
|
|
1437
|
-
const idx = Array.from(siblings).indexOf(el) + 1;
|
|
1438
|
-
selector = tag + ':nth-of-type(' + idx + ')';
|
|
1439
|
-
} else {
|
|
1440
|
-
selector = tag;
|
|
1441
|
-
}
|
|
1442
|
-
}
|
|
1443
|
-
|
|
1444
|
-
elements.push({
|
|
1445
|
-
tag: el.tagName.toLowerCase(),
|
|
1446
|
-
text: (el.textContent || '').trim().slice(0, 80),
|
|
1447
|
-
type: el.getAttribute('type') || '',
|
|
1448
|
-
name: el.getAttribute('name') || '',
|
|
1449
|
-
id: el.id || '',
|
|
1450
|
-
href: el.getAttribute('href') || '',
|
|
1451
|
-
placeholder: el.getAttribute('placeholder') || '',
|
|
1452
|
-
selector: selector,
|
|
1453
|
-
});
|
|
1454
|
-
}
|
|
1455
|
-
return JSON.stringify(elements, null, 2);
|
|
1456
|
-
})()
|
|
1457
|
-
`,
|
|
1458
|
-
returnByValue: true
|
|
1459
|
-
});
|
|
1460
|
-
return result.result?.value || "[]";
|
|
1509
|
+
if (sawInteractive) {
|
|
1510
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1511
|
+
}
|
|
1461
1512
|
}
|
|
1462
1513
|
isConnected() {
|
|
1463
1514
|
return this.connected && this.ws?.readyState === WebSocket.OPEN;
|
|
@@ -1796,12 +1847,14 @@ async function ensureBrowserAvailable(port = 9222) {
|
|
|
1796
1847
|
detail: "Could not start browser with remote debugging. Possible causes:\n 1) Another assistme debug browser is already using port " + port + "\n 2) The browser crashed on startup\nTry: rm -rf ~/.assistme/browser-profile && assistme"
|
|
1797
1848
|
};
|
|
1798
1849
|
}
|
|
1799
|
-
var
|
|
1850
|
+
var browserInstances = /* @__PURE__ */ new Map();
|
|
1800
1851
|
function getBrowser(port = 9222) {
|
|
1801
|
-
|
|
1802
|
-
|
|
1852
|
+
let instance = browserInstances.get(port);
|
|
1853
|
+
if (!instance) {
|
|
1854
|
+
instance = new BrowserController(port);
|
|
1855
|
+
browserInstances.set(port, instance);
|
|
1803
1856
|
}
|
|
1804
|
-
return
|
|
1857
|
+
return instance;
|
|
1805
1858
|
}
|
|
1806
1859
|
|
|
1807
1860
|
// src/commands/browser.ts
|
|
@@ -3508,9 +3561,6 @@ async function executeTool(name, input) {
|
|
|
3508
3561
|
case "browser_scroll":
|
|
3509
3562
|
await ensureConnected(browser);
|
|
3510
3563
|
return input.direction === "up" ? browser.scrollUp() : browser.scrollDown();
|
|
3511
|
-
case "browser_get_elements":
|
|
3512
|
-
await ensureConnected(browser);
|
|
3513
|
-
return browser.getInteractiveElements();
|
|
3514
3564
|
case "browser_select":
|
|
3515
3565
|
await ensureConnected(browser);
|
|
3516
3566
|
return browser.selectOption(input.selector, input.option);
|
|
@@ -3691,7 +3741,6 @@ var BROWSER_TOOL_NAMES = [
|
|
|
3691
3741
|
"browser_type",
|
|
3692
3742
|
"browser_press_key",
|
|
3693
3743
|
"browser_scroll",
|
|
3694
|
-
"browser_get_elements",
|
|
3695
3744
|
"browser_select",
|
|
3696
3745
|
"browser_snapshot",
|
|
3697
3746
|
"browser_act",
|
|
@@ -3734,13 +3783,7 @@ function createBrowserMcpServer() {
|
|
|
3734
3783
|
const base64 = await executeTool("browser_screenshot", {});
|
|
3735
3784
|
if (base64.length > 100) {
|
|
3736
3785
|
return {
|
|
3737
|
-
content: [
|
|
3738
|
-
{
|
|
3739
|
-
type: "image",
|
|
3740
|
-
data: base64,
|
|
3741
|
-
mimeType: "image/png"
|
|
3742
|
-
}
|
|
3743
|
-
]
|
|
3786
|
+
content: [{ type: "image", data: base64, mimeType: "image/png" }]
|
|
3744
3787
|
};
|
|
3745
3788
|
}
|
|
3746
3789
|
return { content: [{ type: "text", text: base64 }] };
|
|
@@ -3773,12 +3816,6 @@ function createBrowserMcpServer() {
|
|
|
3773
3816
|
{ direction: z.string().describe("'down' or 'up'") },
|
|
3774
3817
|
async (args) => callTool("browser_scroll", args)
|
|
3775
3818
|
),
|
|
3776
|
-
tool(
|
|
3777
|
-
"browser_get_elements",
|
|
3778
|
-
"Find all interactive elements (links, buttons, inputs) on the current page.",
|
|
3779
|
-
{},
|
|
3780
|
-
async () => callTool("browser_get_elements", {})
|
|
3781
|
-
),
|
|
3782
3819
|
tool(
|
|
3783
3820
|
"browser_select",
|
|
3784
3821
|
"Select an option from a dropdown menu. Handles both native <select> elements and custom dropdowns (Material Design, React, Angular). Use this instead of manually clicking dropdown items.",
|
|
@@ -3807,11 +3844,7 @@ function createBrowserMcpServer() {
|
|
|
3807
3844
|
const imageData = parts[1] || "";
|
|
3808
3845
|
const content = [];
|
|
3809
3846
|
if (imageData.length > 100) {
|
|
3810
|
-
content.push({
|
|
3811
|
-
type: "image",
|
|
3812
|
-
data: imageData,
|
|
3813
|
-
mimeType: "image/png"
|
|
3814
|
-
});
|
|
3847
|
+
content.push({ type: "image", data: imageData, mimeType: "image/png" });
|
|
3815
3848
|
}
|
|
3816
3849
|
content.push({ type: "text", text: refTable });
|
|
3817
3850
|
return { content };
|
|
@@ -3847,11 +3880,7 @@ function createBrowserMcpServer() {
|
|
|
3847
3880
|
const content = [];
|
|
3848
3881
|
content.push({ type: "text", text: actionText });
|
|
3849
3882
|
if (screenshotData.length > 100) {
|
|
3850
|
-
content.push({
|
|
3851
|
-
type: "image",
|
|
3852
|
-
data: screenshotData,
|
|
3853
|
-
mimeType: "image/png"
|
|
3854
|
-
});
|
|
3883
|
+
content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
|
|
3855
3884
|
}
|
|
3856
3885
|
return { content };
|
|
3857
3886
|
}
|
|
@@ -5125,7 +5154,7 @@ Available capabilities:
|
|
|
5125
5154
|
- Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
|
|
5126
5155
|
|
|
5127
5156
|
**Legacy tools (still available, use when refs don't work):**
|
|
5128
|
-
- browser_click, browser_type, browser_select,
|
|
5157
|
+
- browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
|
|
5129
5158
|
- browser_click supports :contains('text') pseudo-selectors
|
|
5130
5159
|
- browser_select handles native and custom dropdowns
|
|
5131
5160
|
|
|
@@ -5441,7 +5470,9 @@ var TaskProcessor = class {
|
|
|
5441
5470
|
} finally {
|
|
5442
5471
|
clearTimeout(timeoutId);
|
|
5443
5472
|
}
|
|
5444
|
-
|
|
5473
|
+
const MAX_CONTENT_LENGTH = 5e4;
|
|
5474
|
+
const truncatedResponse = finalResponse.length > MAX_CONTENT_LENGTH ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]" : finalResponse;
|
|
5475
|
+
await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
|
|
5445
5476
|
maxRetries: 2,
|
|
5446
5477
|
baseDelayMs: 300,
|
|
5447
5478
|
label: "completeTask"
|
package/package.json
CHANGED
package/src/agent/processor.ts
CHANGED
|
@@ -305,8 +305,15 @@ export class TaskProcessor {
|
|
|
305
305
|
clearTimeout(timeoutId);
|
|
306
306
|
}
|
|
307
307
|
|
|
308
|
+
// Truncate finalResponse to avoid edge function payload limits
|
|
309
|
+
const MAX_CONTENT_LENGTH = 50_000;
|
|
310
|
+
const truncatedResponse =
|
|
311
|
+
finalResponse.length > MAX_CONTENT_LENGTH
|
|
312
|
+
? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]"
|
|
313
|
+
: finalResponse;
|
|
314
|
+
|
|
308
315
|
// Complete the task (with retry for transient DB failures)
|
|
309
|
-
await withRetry(() => completeTask(task.id,
|
|
316
|
+
await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
|
|
310
317
|
maxRetries: 2,
|
|
311
318
|
baseDelayMs: 300,
|
|
312
319
|
label: "completeTask",
|
|
@@ -28,7 +28,7 @@ Available capabilities:
|
|
|
28
28
|
- Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
|
|
29
29
|
|
|
30
30
|
**Legacy tools (still available, use when refs don't work):**
|
|
31
|
-
- browser_click, browser_type, browser_select,
|
|
31
|
+
- browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
|
|
32
32
|
- browser_click supports :contains('text') pseudo-selectors
|
|
33
33
|
- browser_select handles native and custom dropdowns
|
|
34
34
|
|
|
@@ -545,11 +545,13 @@ export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchRes
|
|
|
545
545
|
|
|
546
546
|
// ── Singleton ───────────────────────────────────────────────────────
|
|
547
547
|
|
|
548
|
-
|
|
548
|
+
const browserInstances = new Map<number, BrowserController>();
|
|
549
549
|
|
|
550
550
|
export function getBrowser(port = 9222): BrowserController {
|
|
551
|
-
|
|
552
|
-
|
|
551
|
+
let instance = browserInstances.get(port);
|
|
552
|
+
if (!instance) {
|
|
553
|
+
instance = new BrowserController(port);
|
|
554
|
+
browserInstances.set(port, instance);
|
|
553
555
|
}
|
|
554
|
-
return
|
|
556
|
+
return instance;
|
|
555
557
|
}
|
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
SnapshotResult,
|
|
11
11
|
ActionSpec,
|
|
12
12
|
ActionResult,
|
|
13
|
+
RefActionResult,
|
|
13
14
|
} from "./types.js";
|
|
14
15
|
|
|
15
16
|
export class BrowserController {
|
|
@@ -198,11 +199,26 @@ export class BrowserController {
|
|
|
198
199
|
|
|
199
200
|
async goBack(): Promise<string> {
|
|
200
201
|
this.ensureConnected();
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
202
|
+
try {
|
|
203
|
+
// Get navigation history and go to the previous entry
|
|
204
|
+
const history = (await this.send("Page.getNavigationHistory")) as {
|
|
205
|
+
currentIndex?: number;
|
|
206
|
+
entries?: Array<{ id: number }>;
|
|
207
|
+
};
|
|
208
|
+
const idx = history.currentIndex ?? 0;
|
|
209
|
+
const entries = history.entries ?? [];
|
|
210
|
+
if (idx > 0 && entries[idx - 1]) {
|
|
211
|
+
await this.send("Page.navigateToHistoryEntry", {
|
|
212
|
+
entryId: entries[idx - 1].id,
|
|
213
|
+
});
|
|
214
|
+
} else {
|
|
215
|
+
// No previous entry in CDP history — use JS fallback
|
|
216
|
+
await this.evaluate("window.history.back()");
|
|
217
|
+
}
|
|
218
|
+
} catch {
|
|
219
|
+
// CDP history API failed — use JS fallback
|
|
220
|
+
await this.evaluate("window.history.back()");
|
|
221
|
+
}
|
|
206
222
|
await this.waitForLoad();
|
|
207
223
|
const info = await this.getPageInfo();
|
|
208
224
|
return `Went back to: ${info.title}`;
|
|
@@ -394,31 +410,88 @@ export class BrowserController {
|
|
|
394
410
|
Tab: { keyCode: 9, code: "Tab" },
|
|
395
411
|
Escape: { keyCode: 27, code: "Escape" },
|
|
396
412
|
Backspace: { keyCode: 8, code: "Backspace" },
|
|
413
|
+
Delete: { keyCode: 46, code: "Delete" },
|
|
397
414
|
ArrowDown: { keyCode: 40, code: "ArrowDown" },
|
|
398
415
|
ArrowUp: { keyCode: 38, code: "ArrowUp" },
|
|
416
|
+
ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
|
|
417
|
+
ArrowRight: { keyCode: 39, code: "ArrowRight" },
|
|
418
|
+
Home: { keyCode: 36, code: "Home" },
|
|
419
|
+
End: { keyCode: 35, code: "End" },
|
|
420
|
+
Space: { keyCode: 32, code: "Space" },
|
|
399
421
|
};
|
|
400
422
|
|
|
401
|
-
|
|
423
|
+
// CDP modifier bitmask values
|
|
424
|
+
const modifierMap: Record<string, number> = {
|
|
425
|
+
Alt: 1,
|
|
426
|
+
Control: 2,
|
|
427
|
+
Meta: 4,
|
|
428
|
+
Shift: 8,
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
// Parse modifier combos like "Control+a", "Meta+Shift+z"
|
|
432
|
+
const parts = key.split("+");
|
|
433
|
+
let modifiers = 0;
|
|
434
|
+
let actualKey = parts[parts.length - 1];
|
|
435
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
436
|
+
const mod = modifierMap[parts[i]];
|
|
437
|
+
if (mod) modifiers |= mod;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const mapped = keyMap[actualKey];
|
|
402
441
|
if (mapped) {
|
|
403
442
|
await this.send("Input.dispatchKeyEvent", {
|
|
404
443
|
type: "keyDown",
|
|
405
|
-
key,
|
|
444
|
+
key: actualKey,
|
|
406
445
|
code: mapped.code,
|
|
407
446
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
408
447
|
nativeVirtualKeyCode: mapped.keyCode,
|
|
448
|
+
modifiers,
|
|
409
449
|
});
|
|
410
450
|
await this.send("Input.dispatchKeyEvent", {
|
|
411
451
|
type: "keyUp",
|
|
412
|
-
key,
|
|
452
|
+
key: actualKey,
|
|
413
453
|
code: mapped.code,
|
|
414
454
|
windowsVirtualKeyCode: mapped.keyCode,
|
|
415
455
|
nativeVirtualKeyCode: mapped.keyCode,
|
|
456
|
+
modifiers,
|
|
457
|
+
});
|
|
458
|
+
} else if (actualKey.length === 1) {
|
|
459
|
+
// Single character key (e.g., "a", "z")
|
|
460
|
+
const code = `Key${actualKey.toUpperCase()}`;
|
|
461
|
+
const keyCode = actualKey.toUpperCase().charCodeAt(0);
|
|
462
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
463
|
+
type: "keyDown",
|
|
464
|
+
key: actualKey,
|
|
465
|
+
code,
|
|
466
|
+
windowsVirtualKeyCode: keyCode,
|
|
467
|
+
nativeVirtualKeyCode: keyCode,
|
|
468
|
+
modifiers,
|
|
469
|
+
});
|
|
470
|
+
if (!modifiers) {
|
|
471
|
+
// Only insert text for unmodified single characters
|
|
472
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
473
|
+
type: "char",
|
|
474
|
+
text: actualKey,
|
|
475
|
+
modifiers,
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
479
|
+
type: "keyUp",
|
|
480
|
+
key: actualKey,
|
|
481
|
+
code,
|
|
482
|
+
modifiers,
|
|
416
483
|
});
|
|
417
484
|
} else {
|
|
418
|
-
//
|
|
485
|
+
// Unknown key name — try as-is
|
|
419
486
|
await this.send("Input.dispatchKeyEvent", {
|
|
420
|
-
type: "
|
|
421
|
-
|
|
487
|
+
type: "keyDown",
|
|
488
|
+
key: actualKey,
|
|
489
|
+
modifiers,
|
|
490
|
+
});
|
|
491
|
+
await this.send("Input.dispatchKeyEvent", {
|
|
492
|
+
type: "keyUp",
|
|
493
|
+
key: actualKey,
|
|
494
|
+
modifiers,
|
|
422
495
|
});
|
|
423
496
|
}
|
|
424
497
|
|
|
@@ -816,8 +889,10 @@ export class BrowserController {
|
|
|
816
889
|
* element is not yet actionable (e.g., covered by a loading overlay, still
|
|
817
890
|
* animating into view). This matches Playwright's auto-waiting behavior.
|
|
818
891
|
*/
|
|
819
|
-
async clickRef(refId: number): Promise<
|
|
892
|
+
async clickRef(refId: number): Promise<RefActionResult> {
|
|
820
893
|
this.ensureConnected();
|
|
894
|
+
const ref = this.refCache.get(refId);
|
|
895
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
821
896
|
|
|
822
897
|
// Auto-wait: retry up to 3 times if element is not actionable yet
|
|
823
898
|
const maxRetries = 3;
|
|
@@ -827,7 +902,10 @@ export class BrowserController {
|
|
|
827
902
|
const resolved = await this.resolveRef(refId);
|
|
828
903
|
|
|
829
904
|
if (!resolved) {
|
|
830
|
-
return
|
|
905
|
+
return {
|
|
906
|
+
success: false,
|
|
907
|
+
message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`,
|
|
908
|
+
};
|
|
831
909
|
}
|
|
832
910
|
|
|
833
911
|
if (resolved.error) {
|
|
@@ -837,9 +915,7 @@ export class BrowserController {
|
|
|
837
915
|
await new Promise((r) => setTimeout(r, 500));
|
|
838
916
|
continue;
|
|
839
917
|
}
|
|
840
|
-
|
|
841
|
-
const ref = this.refCache.get(refId);
|
|
842
|
-
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
918
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
843
919
|
}
|
|
844
920
|
|
|
845
921
|
// Element is actionable — small delay after scroll for rendering
|
|
@@ -875,13 +951,10 @@ export class BrowserController {
|
|
|
875
951
|
});
|
|
876
952
|
|
|
877
953
|
await new Promise((r) => setTimeout(r, 300));
|
|
878
|
-
|
|
879
|
-
return `Clicked [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
954
|
+
return { success: true, message: `Clicked ${refLabel}` };
|
|
880
955
|
}
|
|
881
956
|
|
|
882
|
-
|
|
883
|
-
const ref = this.refCache.get(refId);
|
|
884
|
-
return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
|
|
957
|
+
return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
|
|
885
958
|
}
|
|
886
959
|
|
|
887
960
|
/**
|
|
@@ -889,48 +962,61 @@ export class BrowserController {
|
|
|
889
962
|
* Clicks to focus, selects all existing text (Ctrl/Cmd+A), then uses
|
|
890
963
|
* Input.insertText for reliable text insertion across all frameworks.
|
|
891
964
|
*/
|
|
892
|
-
async typeRef(refId: number, text: string): Promise<
|
|
965
|
+
async typeRef(refId: number, text: string): Promise<RefActionResult> {
|
|
893
966
|
this.ensureConnected();
|
|
967
|
+
const ref = this.refCache.get(refId);
|
|
968
|
+
const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
894
969
|
|
|
895
970
|
// Click to focus the element
|
|
896
971
|
const clickResult = await this.clickRef(refId);
|
|
897
|
-
if (clickResult.
|
|
972
|
+
if (!clickResult.success) return clickResult;
|
|
898
973
|
await new Promise((r) => setTimeout(r, 100));
|
|
899
974
|
|
|
900
|
-
//
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
windowsVirtualKeyCode: 65,
|
|
908
|
-
});
|
|
909
|
-
await this.send("Input.dispatchKeyEvent", {
|
|
910
|
-
type: "keyUp",
|
|
911
|
-
key: "a",
|
|
912
|
-
code: "KeyA",
|
|
913
|
-
});
|
|
975
|
+
// Clear existing text using multiple strategies for reliability:
|
|
976
|
+
// 1. Try Ctrl/Cmd+A to select all, then Backspace to delete
|
|
977
|
+
const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
|
|
978
|
+
await this.pressKey(selectAllKey);
|
|
979
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
980
|
+
await this.pressKey("Backspace");
|
|
981
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
914
982
|
|
|
915
|
-
//
|
|
916
|
-
await this.send("
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
983
|
+
// 2. Verify the field is empty; if not, fall back to JS-based clearing
|
|
984
|
+
const cleared = await this.send("Runtime.evaluate", {
|
|
985
|
+
expression: `
|
|
986
|
+
(function() {
|
|
987
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
988
|
+
if (!el) return 'no_element';
|
|
989
|
+
if (el.value !== undefined && el.value !== '') {
|
|
990
|
+
// Ctrl+A didn't work (some frameworks intercept it) — clear via JS
|
|
991
|
+
var setter = Object.getOwnPropertyDescriptor(
|
|
992
|
+
window.HTMLInputElement.prototype, 'value'
|
|
993
|
+
)?.set || Object.getOwnPropertyDescriptor(
|
|
994
|
+
window.HTMLTextAreaElement.prototype, 'value'
|
|
995
|
+
)?.set;
|
|
996
|
+
if (setter) setter.call(el, '');
|
|
997
|
+
else el.value = '';
|
|
998
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
999
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1000
|
+
return 'js_cleared';
|
|
1001
|
+
}
|
|
1002
|
+
return 'ok';
|
|
1003
|
+
})()
|
|
1004
|
+
`,
|
|
1005
|
+
returnByValue: true,
|
|
926
1006
|
});
|
|
1007
|
+
const clearStatus = ((cleared as CDPEvalResult).result?.value as string) || "ok";
|
|
1008
|
+
if (clearStatus === "no_element") {
|
|
1009
|
+
return {
|
|
1010
|
+
success: false,
|
|
1011
|
+
message: `Ref ${refLabel} not found after click. Take a new snapshot.`,
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
927
1014
|
|
|
928
1015
|
// Insert text via CDP (goes through the browser's input pipeline)
|
|
929
1016
|
await this.send("Input.insertText", { text });
|
|
930
1017
|
|
|
931
1018
|
await new Promise((r) => setTimeout(r, 100));
|
|
932
|
-
|
|
933
|
-
return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
|
|
1019
|
+
return { success: true, message: `Typed "${text}" into ${refLabel}` };
|
|
934
1020
|
}
|
|
935
1021
|
|
|
936
1022
|
/**
|
|
@@ -938,21 +1024,22 @@ export class BrowserController {
|
|
|
938
1024
|
* ref's data attribute as selector, handling both native <select> and
|
|
939
1025
|
* custom dropdown components.
|
|
940
1026
|
*/
|
|
941
|
-
async selectRef(refId: number, option: string): Promise<
|
|
1027
|
+
async selectRef(refId: number, option: string): Promise<RefActionResult> {
|
|
942
1028
|
this.ensureConnected();
|
|
943
1029
|
|
|
944
|
-
// Check if ref exists
|
|
945
1030
|
const cached = this.refCache.get(refId);
|
|
946
1031
|
if (!cached) {
|
|
947
|
-
return
|
|
1032
|
+
return {
|
|
1033
|
+
success: false,
|
|
1034
|
+
message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`,
|
|
1035
|
+
};
|
|
948
1036
|
}
|
|
949
1037
|
|
|
950
|
-
|
|
1038
|
+
const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
|
|
951
1039
|
const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
);
|
|
1040
|
+
const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
|
|
1041
|
+
const success = !result.includes("not found");
|
|
1042
|
+
return { success, message };
|
|
956
1043
|
}
|
|
957
1044
|
|
|
958
1045
|
// ── Action Pipeline ───────────────────────────────────────────────
|
|
@@ -977,18 +1064,24 @@ export class BrowserController {
|
|
|
977
1064
|
|
|
978
1065
|
try {
|
|
979
1066
|
switch (spec.action) {
|
|
980
|
-
case "click":
|
|
981
|
-
|
|
982
|
-
|
|
1067
|
+
case "click": {
|
|
1068
|
+
const r = await this.clickRef(spec.ref);
|
|
1069
|
+
result = r.message;
|
|
1070
|
+
success = r.success;
|
|
983
1071
|
break;
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1072
|
+
}
|
|
1073
|
+
case "type": {
|
|
1074
|
+
const r = await this.typeRef(spec.ref, spec.text);
|
|
1075
|
+
result = r.message;
|
|
1076
|
+
success = r.success;
|
|
987
1077
|
break;
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
1078
|
+
}
|
|
1079
|
+
case "select": {
|
|
1080
|
+
const r = await this.selectRef(spec.ref, spec.option);
|
|
1081
|
+
result = r.message;
|
|
1082
|
+
success = r.success;
|
|
991
1083
|
break;
|
|
1084
|
+
}
|
|
992
1085
|
case "press":
|
|
993
1086
|
result = await this.pressKey(spec.key);
|
|
994
1087
|
break;
|
|
@@ -1074,15 +1167,24 @@ export class BrowserController {
|
|
|
1074
1167
|
// Strategy 2: Custom dropdown — find the trigger element
|
|
1075
1168
|
var trigger = selectEl;
|
|
1076
1169
|
if (!trigger) {
|
|
1077
|
-
// Try finding by label
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1170
|
+
// Try finding by aria-label first (fast, indexed)
|
|
1171
|
+
trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
|
|
1172
|
+
}
|
|
1173
|
+
if (!trigger) {
|
|
1174
|
+
// Try finding by label/placeholder text in likely dropdown elements
|
|
1175
|
+
var dropdownCandidates = document.querySelectorAll(
|
|
1176
|
+
'button, [role="combobox"], [role="listbox"], [role="button"], ' +
|
|
1177
|
+
'select, input, .MuiSelect-root, .MuiInput-root, ' +
|
|
1178
|
+
'[class*="select"], [class*="dropdown"], [class*="picker"]'
|
|
1179
|
+
);
|
|
1180
|
+
for (var j = 0; j < dropdownCandidates.length; j++) {
|
|
1181
|
+
var el = dropdownCandidates[j];
|
|
1081
1182
|
var ownText = Array.from(el.childNodes)
|
|
1082
1183
|
.filter(function(n) { return n.nodeType === 3; })
|
|
1083
1184
|
.map(function(n) { return n.textContent.trim(); })
|
|
1084
1185
|
.join('');
|
|
1085
|
-
if (ownText === sel || el.getAttribute('aria-label') === sel
|
|
1186
|
+
if (ownText === sel || el.getAttribute('aria-label') === sel ||
|
|
1187
|
+
el.getAttribute('placeholder') === sel) {
|
|
1086
1188
|
trigger = el;
|
|
1087
1189
|
break;
|
|
1088
1190
|
}
|
|
@@ -1119,10 +1221,13 @@ export class BrowserController {
|
|
|
1119
1221
|
}
|
|
1120
1222
|
}
|
|
1121
1223
|
|
|
1122
|
-
// Broader search:
|
|
1123
|
-
var
|
|
1124
|
-
|
|
1125
|
-
|
|
1224
|
+
// Broader search: visible leaf elements in interactive containers
|
|
1225
|
+
var broadCandidates = document.querySelectorAll(
|
|
1226
|
+
'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
|
|
1227
|
+
'[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
|
|
1228
|
+
);
|
|
1229
|
+
for (var m = 0; m < broadCandidates.length; m++) {
|
|
1230
|
+
var candidate = broadCandidates[m];
|
|
1126
1231
|
if (candidate.textContent && candidate.textContent.trim() === optText &&
|
|
1127
1232
|
candidate.offsetParent !== null && candidate.children.length === 0) {
|
|
1128
1233
|
candidate.click();
|
|
@@ -1217,6 +1322,7 @@ export class BrowserController {
|
|
|
1217
1322
|
|
|
1218
1323
|
private async waitForLoad(timeoutMs = 8000): Promise<void> {
|
|
1219
1324
|
const start = Date.now();
|
|
1325
|
+
let sawInteractive = false;
|
|
1220
1326
|
while (Date.now() - start < timeoutMs) {
|
|
1221
1327
|
try {
|
|
1222
1328
|
const result = await this.send("Runtime.evaluate", {
|
|
@@ -1224,71 +1330,27 @@ export class BrowserController {
|
|
|
1224
1330
|
returnByValue: true,
|
|
1225
1331
|
});
|
|
1226
1332
|
const state = (result as CDPEvalResult).result?.value;
|
|
1227
|
-
if (state === "complete"
|
|
1228
|
-
//
|
|
1229
|
-
await new Promise((r) => setTimeout(r,
|
|
1333
|
+
if (state === "complete") {
|
|
1334
|
+
// Fully loaded — brief wait for dynamic content
|
|
1335
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1230
1336
|
return;
|
|
1231
1337
|
}
|
|
1338
|
+
if (state === "interactive") {
|
|
1339
|
+
if (!sawInteractive) {
|
|
1340
|
+
sawInteractive = true;
|
|
1341
|
+
// DOM is ready but sub-resources still loading — give it more
|
|
1342
|
+
// time to reach "complete" before settling for "interactive"
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1232
1345
|
} catch {
|
|
1233
1346
|
// Tab might be navigating
|
|
1234
1347
|
}
|
|
1235
1348
|
await new Promise((r) => setTimeout(r, 300));
|
|
1236
1349
|
}
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
*/
|
|
1242
|
-
async getInteractiveElements(): Promise<string> {
|
|
1243
|
-
this.ensureConnected();
|
|
1244
|
-
const result = await this.send("Runtime.evaluate", {
|
|
1245
|
-
expression: `
|
|
1246
|
-
(function() {
|
|
1247
|
-
const elements = [];
|
|
1248
|
-
const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
|
|
1249
|
-
const all = document.querySelectorAll(selectors);
|
|
1250
|
-
for (let i = 0; i < all.length && elements.length < 50; i++) {
|
|
1251
|
-
const el = all[i];
|
|
1252
|
-
const rect = el.getBoundingClientRect();
|
|
1253
|
-
if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
|
|
1254
|
-
|
|
1255
|
-
// Build a reliable CSS selector
|
|
1256
|
-
let selector;
|
|
1257
|
-
if (el.id) {
|
|
1258
|
-
selector = '#' + CSS.escape(el.id);
|
|
1259
|
-
} else if (el.getAttribute('data-testid')) {
|
|
1260
|
-
selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
1261
|
-
} else {
|
|
1262
|
-
// Build a path-based selector: find nth-of-type among siblings
|
|
1263
|
-
const tag = el.tagName.toLowerCase();
|
|
1264
|
-
const parent = el.parentElement;
|
|
1265
|
-
if (parent) {
|
|
1266
|
-
const siblings = parent.querySelectorAll(':scope > ' + tag);
|
|
1267
|
-
const idx = Array.from(siblings).indexOf(el) + 1;
|
|
1268
|
-
selector = tag + ':nth-of-type(' + idx + ')';
|
|
1269
|
-
} else {
|
|
1270
|
-
selector = tag;
|
|
1271
|
-
}
|
|
1272
|
-
}
|
|
1273
|
-
|
|
1274
|
-
elements.push({
|
|
1275
|
-
tag: el.tagName.toLowerCase(),
|
|
1276
|
-
text: (el.textContent || '').trim().slice(0, 80),
|
|
1277
|
-
type: el.getAttribute('type') || '',
|
|
1278
|
-
name: el.getAttribute('name') || '',
|
|
1279
|
-
id: el.id || '',
|
|
1280
|
-
href: el.getAttribute('href') || '',
|
|
1281
|
-
placeholder: el.getAttribute('placeholder') || '',
|
|
1282
|
-
selector: selector,
|
|
1283
|
-
});
|
|
1284
|
-
}
|
|
1285
|
-
return JSON.stringify(elements, null, 2);
|
|
1286
|
-
})()
|
|
1287
|
-
`,
|
|
1288
|
-
returnByValue: true,
|
|
1289
|
-
});
|
|
1290
|
-
|
|
1291
|
-
return ((result as CDPEvalResult).result?.value as string) || "[]";
|
|
1350
|
+
// Timed out — if we at least saw "interactive", that's usually good enough
|
|
1351
|
+
if (sawInteractive) {
|
|
1352
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
1353
|
+
}
|
|
1292
1354
|
}
|
|
1293
1355
|
|
|
1294
1356
|
isConnected(): boolean {
|
package/src/browser/types.ts
CHANGED
|
@@ -62,6 +62,12 @@ export interface ActionResult {
|
|
|
62
62
|
success: boolean;
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
+
/** Structured result from ref-based interactions (click, type, select). */
|
|
66
|
+
export interface RefActionResult {
|
|
67
|
+
success: boolean;
|
|
68
|
+
message: string;
|
|
69
|
+
}
|
|
70
|
+
|
|
65
71
|
export interface AutoLaunchResult {
|
|
66
72
|
success: boolean;
|
|
67
73
|
action: "already_available" | "launched" | "chrome_not_found" | "launch_failed" | "port_conflict";
|
|
@@ -9,11 +9,15 @@ import { getLimiterForTool } from "../utils/rate-limiter.js";
|
|
|
9
9
|
|
|
10
10
|
// ── Helper ──────────────────────────────────────────────────────────
|
|
11
11
|
|
|
12
|
+
/** MCP content block — text or image. */
|
|
13
|
+
type ContentBlock =
|
|
14
|
+
| { type: "text"; text: string }
|
|
15
|
+
| { type: "image"; data: string; mimeType: string };
|
|
16
|
+
|
|
17
|
+
type ToolResult = { content: ContentBlock[] };
|
|
18
|
+
|
|
12
19
|
/** Wrap executeTool with rate limiting and text result. */
|
|
13
|
-
async function callTool(
|
|
14
|
-
name: string,
|
|
15
|
-
input: Record<string, unknown>
|
|
16
|
-
): Promise<{ content: Array<{ type: "text"; text: string }> }> {
|
|
20
|
+
async function callTool(name: string, input: Record<string, unknown>): Promise<ToolResult> {
|
|
17
21
|
const limiter = getLimiterForTool(name);
|
|
18
22
|
if (limiter) await limiter.acquire();
|
|
19
23
|
const result = await executeTool(name, input);
|
|
@@ -31,7 +35,6 @@ export const BROWSER_TOOL_NAMES = [
|
|
|
31
35
|
"browser_type",
|
|
32
36
|
"browser_press_key",
|
|
33
37
|
"browser_scroll",
|
|
34
|
-
"browser_get_elements",
|
|
35
38
|
"browser_select",
|
|
36
39
|
"browser_snapshot",
|
|
37
40
|
"browser_act",
|
|
@@ -69,19 +72,13 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
69
72
|
"browser_screenshot",
|
|
70
73
|
"Take a screenshot of the current browser page. Returns a base64-encoded PNG image.",
|
|
71
74
|
{},
|
|
72
|
-
async () => {
|
|
75
|
+
async (): Promise<ToolResult> => {
|
|
73
76
|
const limiter = getLimiterForTool("browser_screenshot");
|
|
74
77
|
if (limiter) await limiter.acquire();
|
|
75
78
|
const base64 = await executeTool("browser_screenshot", {});
|
|
76
79
|
if (base64.length > 100) {
|
|
77
80
|
return {
|
|
78
|
-
content: [
|
|
79
|
-
{
|
|
80
|
-
type: "image" as const,
|
|
81
|
-
data: base64,
|
|
82
|
-
mimeType: "image/png",
|
|
83
|
-
} as unknown as { type: "text"; text: string },
|
|
84
|
-
],
|
|
81
|
+
content: [{ type: "image", data: base64, mimeType: "image/png" }],
|
|
85
82
|
};
|
|
86
83
|
}
|
|
87
84
|
return { content: [{ type: "text", text: base64 }] };
|
|
@@ -114,12 +111,6 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
114
111
|
{ direction: z.string().describe("'down' or 'up'") },
|
|
115
112
|
async (args) => callTool("browser_scroll", args)
|
|
116
113
|
),
|
|
117
|
-
tool(
|
|
118
|
-
"browser_get_elements",
|
|
119
|
-
"Find all interactive elements (links, buttons, inputs) on the current page.",
|
|
120
|
-
{},
|
|
121
|
-
async () => callTool("browser_get_elements", {})
|
|
122
|
-
),
|
|
123
114
|
tool(
|
|
124
115
|
"browser_select",
|
|
125
116
|
"Select an option from a dropdown menu. Handles both native <select> elements and custom dropdowns (Material Design, React, Angular). Use this instead of manually clicking dropdown items.",
|
|
@@ -149,7 +140,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
149
140
|
"Overlay ref badges on the screenshot. Default false. Use true for simple pages where visual context helps."
|
|
150
141
|
),
|
|
151
142
|
},
|
|
152
|
-
async (args) => {
|
|
143
|
+
async (args): Promise<ToolResult> => {
|
|
153
144
|
const limiter = getLimiterForTool("browser_snapshot");
|
|
154
145
|
if (limiter) await limiter.acquire();
|
|
155
146
|
const result = await executeTool("browser_snapshot", args);
|
|
@@ -159,13 +150,9 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
159
150
|
const refTable = parts[0];
|
|
160
151
|
const imageData = parts[1] || "";
|
|
161
152
|
|
|
162
|
-
const content:
|
|
153
|
+
const content: ContentBlock[] = [];
|
|
163
154
|
if (imageData.length > 100) {
|
|
164
|
-
content.push({
|
|
165
|
-
type: "image" as const,
|
|
166
|
-
data: imageData,
|
|
167
|
-
mimeType: "image/png",
|
|
168
|
-
} as unknown as { type: "text"; text: string });
|
|
155
|
+
content.push({ type: "image", data: imageData, mimeType: "image/png" });
|
|
169
156
|
}
|
|
170
157
|
content.push({ type: "text", text: refTable });
|
|
171
158
|
|
|
@@ -197,7 +184,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
197
184
|
.optional()
|
|
198
185
|
.describe("Take screenshot after actions (default: false)"),
|
|
199
186
|
},
|
|
200
|
-
async (args) => {
|
|
187
|
+
async (args): Promise<ToolResult> => {
|
|
201
188
|
const limiter = getLimiterForTool("browser_act");
|
|
202
189
|
if (limiter) await limiter.acquire();
|
|
203
190
|
const result = await executeTool("browser_act", {
|
|
@@ -210,14 +197,10 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
210
197
|
const actionText = parts[0];
|
|
211
198
|
const screenshotData = parts[1] || "";
|
|
212
199
|
|
|
213
|
-
const content:
|
|
200
|
+
const content: ContentBlock[] = [];
|
|
214
201
|
content.push({ type: "text", text: actionText });
|
|
215
202
|
if (screenshotData.length > 100) {
|
|
216
|
-
content.push({
|
|
217
|
-
type: "image" as const,
|
|
218
|
-
data: screenshotData,
|
|
219
|
-
mimeType: "image/png",
|
|
220
|
-
} as unknown as { type: "text"; text: string });
|
|
203
|
+
content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
|
|
221
204
|
}
|
|
222
205
|
|
|
223
206
|
return { content };
|
package/src/tools/browser.ts
CHANGED
package/src/tools/index.ts
CHANGED
|
@@ -169,9 +169,6 @@ export async function executeTool(name: string, input: Record<string, unknown>):
|
|
|
169
169
|
case "browser_scroll":
|
|
170
170
|
await ensureConnected(browser);
|
|
171
171
|
return (input.direction as string) === "up" ? browser.scrollUp() : browser.scrollDown();
|
|
172
|
-
case "browser_get_elements":
|
|
173
|
-
await ensureConnected(browser);
|
|
174
|
-
return browser.getInteractiveElements();
|
|
175
172
|
case "browser_select":
|
|
176
173
|
await ensureConnected(browser);
|
|
177
174
|
return browser.selectOption(input.selector as string, input.option as string);
|