assistme 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +349 -10
- package/package.json +1 -1
- package/src/browser/controller.ts +420 -10
- package/src/mcp/browser-server.ts +1 -1
package/dist/index.js
CHANGED
|
@@ -360,6 +360,8 @@ var BrowserController = class {
|
|
|
360
360
|
connected = false;
|
|
361
361
|
currentTabId = null;
|
|
362
362
|
refCache = /* @__PURE__ */ new Map();
|
|
363
|
+
frameContexts = /* @__PURE__ */ new Map();
|
|
364
|
+
// refId → contextId
|
|
363
365
|
constructor(port = 9222) {
|
|
364
366
|
this.debugPort = port;
|
|
365
367
|
}
|
|
@@ -653,13 +655,46 @@ URL: ${info.url}`;
|
|
|
653
655
|
const result = await this.send("Runtime.evaluate", {
|
|
654
656
|
expression: `
|
|
655
657
|
(function() {
|
|
656
|
-
|
|
658
|
+
var el = document.querySelector(${selectorJS});
|
|
659
|
+
|
|
660
|
+
// If not found in main document, search same-origin iframes
|
|
661
|
+
if (!el) {
|
|
662
|
+
var iframes = document.querySelectorAll('iframe');
|
|
663
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
664
|
+
try {
|
|
665
|
+
var iframeDoc = iframes[i].contentDocument;
|
|
666
|
+
if (iframeDoc) {
|
|
667
|
+
el = iframeDoc.querySelector(${selectorJS});
|
|
668
|
+
if (el) break;
|
|
669
|
+
}
|
|
670
|
+
} catch(e) { /* cross-origin, skip */ }
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
657
674
|
if (!el) return 'Element not found: ' + ${selectorJS};
|
|
658
675
|
|
|
659
676
|
el.focus();
|
|
660
677
|
|
|
661
|
-
//
|
|
662
|
-
|
|
678
|
+
// Check if this is a contenteditable element (rich text editor)
|
|
679
|
+
var isContentEditable = el.isContentEditable ||
|
|
680
|
+
el.getAttribute('contenteditable') === 'true' ||
|
|
681
|
+
el.getAttribute('contenteditable') === '';
|
|
682
|
+
|
|
683
|
+
if (isContentEditable) {
|
|
684
|
+
// For contenteditable: select all content, then replace
|
|
685
|
+
var ownerDoc = el.ownerDocument;
|
|
686
|
+
var sel = ownerDoc.defaultView.getSelection();
|
|
687
|
+
var range = ownerDoc.createRange();
|
|
688
|
+
range.selectNodeContents(el);
|
|
689
|
+
sel.removeAllRanges();
|
|
690
|
+
sel.addRange(range);
|
|
691
|
+
// Use insertText command which respects undo stack and triggers input events
|
|
692
|
+
ownerDoc.execCommand('insertText', false, ${textJS});
|
|
693
|
+
return 'Typed into: ' + (el.tagName || '') + ' [contenteditable]';
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// For input/textarea: clear and set value
|
|
697
|
+
var nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
663
698
|
window.HTMLInputElement.prototype, 'value'
|
|
664
699
|
)?.set || Object.getOwnPropertyDescriptor(
|
|
665
700
|
window.HTMLTextAreaElement.prototype, 'value'
|
|
@@ -679,7 +714,27 @@ URL: ${info.url}`;
|
|
|
679
714
|
`,
|
|
680
715
|
returnByValue: true
|
|
681
716
|
});
|
|
682
|
-
|
|
717
|
+
const textResult = result.result?.value || "";
|
|
718
|
+
if (textResult.startsWith("Element not found")) {
|
|
719
|
+
return this.typeAtFocus(text);
|
|
720
|
+
}
|
|
721
|
+
return textResult || "Text entered.";
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Type text into the currently focused element using CDP Input.insertText.
|
|
725
|
+
* This bypasses DOM queries entirely and works with any focused element,
|
|
726
|
+
* including those inside cross-origin iframes or shadow DOM.
|
|
727
|
+
*/
|
|
728
|
+
async typeAtFocus(text) {
|
|
729
|
+
this.ensureConnected();
|
|
730
|
+
const modKey = platform() === "darwin" ? "Meta" : "Control";
|
|
731
|
+
await this.pressKey(`${modKey}+a`);
|
|
732
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
733
|
+
await this.pressKey("Backspace");
|
|
734
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
735
|
+
await this.send("Input.insertText", { text });
|
|
736
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
737
|
+
return "Text entered (into focused element).";
|
|
683
738
|
}
|
|
684
739
|
async pressKey(key) {
|
|
685
740
|
this.ensureConnected();
|
|
@@ -929,6 +984,7 @@ URL: ${info.url}`;
|
|
|
929
984
|
inputType: r.type || "",
|
|
930
985
|
box: r.box
|
|
931
986
|
}));
|
|
987
|
+
await this.discoverCrossOriginFrameRefs(refs);
|
|
932
988
|
if (annotate && refs.length <= 40) {
|
|
933
989
|
const refsJson = JSON.stringify(refs);
|
|
934
990
|
await this.send("Runtime.evaluate", {
|
|
@@ -1003,6 +1059,197 @@ Refs:
|
|
|
1003
1059
|
}
|
|
1004
1060
|
return table;
|
|
1005
1061
|
}
|
|
1062
|
+
// ── Cross-Origin Iframe Discovery ────────────────────────────────
|
|
1063
|
+
/**
|
|
1064
|
+
* Use CDP's Page.getFrameTree + Runtime.evaluate with contextId to discover
|
|
1065
|
+
* interactive elements inside cross-origin iframes (e.g., ProtonMail editor,
|
|
1066
|
+
* Google Docs, embedded rich text editors).
|
|
1067
|
+
*
|
|
1068
|
+
* Same-origin iframes are already handled inline by the main snapshot JS.
|
|
1069
|
+
* This method handles the ones that threw cross-origin errors.
|
|
1070
|
+
*/
|
|
1071
|
+
async discoverCrossOriginFrameRefs(refs) {
|
|
1072
|
+
this.frameContexts.clear();
|
|
1073
|
+
try {
|
|
1074
|
+
const frameTree = await this.send("Page.getFrameTree");
|
|
1075
|
+
const mainFrameId = frameTree.frameTree?.frame?.id;
|
|
1076
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
1077
|
+
if (childFrames.length === 0) return;
|
|
1078
|
+
const contexts = await this.getFrameContexts(mainFrameId || "");
|
|
1079
|
+
for (const child of childFrames) {
|
|
1080
|
+
const frameId = child.frame.id;
|
|
1081
|
+
const contextId = contexts.get(frameId);
|
|
1082
|
+
if (!contextId) continue;
|
|
1083
|
+
const iframeOffsetResult = await this.send("Runtime.evaluate", {
|
|
1084
|
+
expression: `
|
|
1085
|
+
(function() {
|
|
1086
|
+
var iframes = document.querySelectorAll('iframe');
|
|
1087
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
1088
|
+
try {
|
|
1089
|
+
// Match by frame src or name
|
|
1090
|
+
var f = iframes[i];
|
|
1091
|
+
if (f.contentWindow) {
|
|
1092
|
+
var r = f.getBoundingClientRect();
|
|
1093
|
+
if (r.width > 10 && r.height > 10) {
|
|
1094
|
+
return JSON.stringify({ x: r.x, y: r.y, width: r.width, height: r.height, index: i });
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
} catch(e) {}
|
|
1098
|
+
}
|
|
1099
|
+
return 'null';
|
|
1100
|
+
})()
|
|
1101
|
+
`,
|
|
1102
|
+
returnByValue: true
|
|
1103
|
+
});
|
|
1104
|
+
let iframeOffset = { x: 0, y: 0 };
|
|
1105
|
+
try {
|
|
1106
|
+
const parsed = JSON.parse(
|
|
1107
|
+
iframeOffsetResult.result?.value || "null"
|
|
1108
|
+
);
|
|
1109
|
+
if (parsed) iframeOffset = { x: parsed.x, y: parsed.y };
|
|
1110
|
+
} catch {
|
|
1111
|
+
}
|
|
1112
|
+
const startRefId = refs.length + 1;
|
|
1113
|
+
try {
|
|
1114
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
1115
|
+
expression: `
|
|
1116
|
+
(function() {
|
|
1117
|
+
var selectors = [
|
|
1118
|
+
'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
|
|
1119
|
+
'[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
|
|
1120
|
+
'[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
|
|
1121
|
+
'[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
|
|
1122
|
+
'[onclick]', '[tabindex]:not([tabindex="-1"])',
|
|
1123
|
+
'[contenteditable="true"]', '[contenteditable=""]'
|
|
1124
|
+
].join(', ');
|
|
1125
|
+
|
|
1126
|
+
var all = document.querySelectorAll(selectors);
|
|
1127
|
+
// Also check if the body itself is contenteditable
|
|
1128
|
+
if (document.body && (document.body.isContentEditable || document.body.getAttribute('contenteditable') === 'true')) {
|
|
1129
|
+
all = [document.body].concat(Array.from(all));
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
var refs = [];
|
|
1133
|
+
var startId = ${startRefId};
|
|
1134
|
+
var vh = window.innerHeight;
|
|
1135
|
+
var vw = window.innerWidth;
|
|
1136
|
+
|
|
1137
|
+
for (var i = 0; i < all.length && refs.length < 20; i++) {
|
|
1138
|
+
var el = all[i];
|
|
1139
|
+
var rect = el.getBoundingClientRect();
|
|
1140
|
+
if (rect.width < 5 || rect.height < 5) continue;
|
|
1141
|
+
var style = window.getComputedStyle(el);
|
|
1142
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
|
|
1143
|
+
|
|
1144
|
+
var role = el.getAttribute('role') || '';
|
|
1145
|
+
if (!role) {
|
|
1146
|
+
var tag = el.tagName.toLowerCase();
|
|
1147
|
+
if (tag === 'a') role = 'link';
|
|
1148
|
+
else if (tag === 'button') role = 'button';
|
|
1149
|
+
else if (tag === 'input') {
|
|
1150
|
+
var t = (el.type || 'text').toLowerCase();
|
|
1151
|
+
if (t === 'checkbox') role = 'checkbox';
|
|
1152
|
+
else if (t === 'radio') role = 'radio';
|
|
1153
|
+
else if (t === 'submit' || t === 'button') role = 'button';
|
|
1154
|
+
else role = 'textbox';
|
|
1155
|
+
}
|
|
1156
|
+
else if (tag === 'select') role = 'combobox';
|
|
1157
|
+
else if (tag === 'textarea') role = 'textbox';
|
|
1158
|
+
else if (el.isContentEditable) role = 'textbox';
|
|
1159
|
+
else role = tag;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
var name = '';
|
|
1163
|
+
var ariaLabel = el.getAttribute('aria-label');
|
|
1164
|
+
if (ariaLabel) {
|
|
1165
|
+
name = ariaLabel;
|
|
1166
|
+
} else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
|
|
1167
|
+
name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
|
|
1168
|
+
} else if (el.isContentEditable) {
|
|
1169
|
+
name = 'compose body';
|
|
1170
|
+
} else {
|
|
1171
|
+
name = (el.textContent || '').trim().slice(0, 60);
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
var refId = startId + refs.length;
|
|
1175
|
+
el.setAttribute('data-assistme-ref', String(refId));
|
|
1176
|
+
|
|
1177
|
+
refs.push({
|
|
1178
|
+
id: refId,
|
|
1179
|
+
role: role,
|
|
1180
|
+
name: name,
|
|
1181
|
+
tag: el.tagName.toLowerCase(),
|
|
1182
|
+
type: el.getAttribute('type') || '',
|
|
1183
|
+
box: {
|
|
1184
|
+
x: Math.round(rect.x),
|
|
1185
|
+
y: Math.round(rect.y),
|
|
1186
|
+
width: Math.round(rect.width),
|
|
1187
|
+
height: Math.round(rect.height)
|
|
1188
|
+
},
|
|
1189
|
+
inFrame: true
|
|
1190
|
+
});
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
return JSON.stringify(refs);
|
|
1194
|
+
})()
|
|
1195
|
+
`,
|
|
1196
|
+
contextId,
|
|
1197
|
+
returnByValue: true
|
|
1198
|
+
});
|
|
1199
|
+
const frameRefs = JSON.parse(
|
|
1200
|
+
frameResult.result?.value || "[]"
|
|
1201
|
+
);
|
|
1202
|
+
for (const r of frameRefs) {
|
|
1203
|
+
refs.push({
|
|
1204
|
+
id: r.id,
|
|
1205
|
+
role: r.role,
|
|
1206
|
+
name: r.name,
|
|
1207
|
+
tag: r.tag,
|
|
1208
|
+
inputType: r.type || "",
|
|
1209
|
+
box: {
|
|
1210
|
+
x: Math.round(r.box.x + iframeOffset.x),
|
|
1211
|
+
y: Math.round(r.box.y + iframeOffset.y),
|
|
1212
|
+
width: r.box.width,
|
|
1213
|
+
height: r.box.height
|
|
1214
|
+
}
|
|
1215
|
+
});
|
|
1216
|
+
this.frameContexts.set(r.id, contextId);
|
|
1217
|
+
}
|
|
1218
|
+
} catch {
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
} catch {
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
/**
|
|
1225
|
+
* Get execution context IDs for each frame in the page.
|
|
1226
|
+
* Uses Runtime.executionContextCreated events collected during the session,
|
|
1227
|
+
* or falls back to evaluating in known frames.
|
|
1228
|
+
*/
|
|
1229
|
+
async getFrameContexts(_mainFrameId) {
|
|
1230
|
+
const contexts = /* @__PURE__ */ new Map();
|
|
1231
|
+
try {
|
|
1232
|
+
await this.send("Runtime.enable").catch(() => {
|
|
1233
|
+
});
|
|
1234
|
+
const frameTree = await this.send("Page.getFrameTree");
|
|
1235
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
1236
|
+
for (const child of childFrames) {
|
|
1237
|
+
try {
|
|
1238
|
+
const world = await this.send("Page.createIsolatedWorld", {
|
|
1239
|
+
frameId: child.frame.id,
|
|
1240
|
+
worldName: "assistme-snapshot",
|
|
1241
|
+
grantUniveralAccess: true
|
|
1242
|
+
});
|
|
1243
|
+
if (world.executionContextId) {
|
|
1244
|
+
contexts.set(child.frame.id, world.executionContextId);
|
|
1245
|
+
}
|
|
1246
|
+
} catch {
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
} catch {
|
|
1250
|
+
}
|
|
1251
|
+
return contexts;
|
|
1252
|
+
}
|
|
1006
1253
|
// ── Ref Resolution ────────────────────────────────────────────────
|
|
1007
1254
|
/**
|
|
1008
1255
|
* Resolve a ref ID to its current center coordinates in the viewport.
|
|
@@ -1115,9 +1362,85 @@ Refs:
|
|
|
1115
1362
|
returnByValue: true
|
|
1116
1363
|
});
|
|
1117
1364
|
const value = result.result?.value;
|
|
1118
|
-
if (
|
|
1365
|
+
if (value && value !== "null") {
|
|
1366
|
+
try {
|
|
1367
|
+
return JSON.parse(value);
|
|
1368
|
+
} catch {
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1372
|
+
if (frameContextId) {
|
|
1373
|
+
return this.resolveRefInFrame(refId, frameContextId, role, name);
|
|
1374
|
+
}
|
|
1375
|
+
return null;
|
|
1376
|
+
}
|
|
1377
|
+
/**
|
|
1378
|
+
* Resolve a ref inside a cross-origin iframe using its execution context.
|
|
1379
|
+
* Returns coordinates adjusted by the iframe's viewport offset.
|
|
1380
|
+
*/
|
|
1381
|
+
async resolveRefInFrame(refId, contextId, role, name) {
|
|
1382
|
+
const roleJS = JSON.stringify(role);
|
|
1383
|
+
const nameJS = JSON.stringify(name);
|
|
1119
1384
|
try {
|
|
1120
|
-
|
|
1385
|
+
const offsetResult = await this.send("Runtime.evaluate", {
|
|
1386
|
+
expression: `
|
|
1387
|
+
(function() {
|
|
1388
|
+
var iframes = document.querySelectorAll('iframe');
|
|
1389
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
1390
|
+
var r = iframes[i].getBoundingClientRect();
|
|
1391
|
+
if (r.width > 10 && r.height > 10) {
|
|
1392
|
+
return JSON.stringify({ x: r.x, y: r.y });
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
return JSON.stringify({ x: 0, y: 0 });
|
|
1396
|
+
})()
|
|
1397
|
+
`,
|
|
1398
|
+
returnByValue: true
|
|
1399
|
+
});
|
|
1400
|
+
const offset = JSON.parse(
|
|
1401
|
+
offsetResult.result?.value || '{"x":0,"y":0}'
|
|
1402
|
+
);
|
|
1403
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
1404
|
+
expression: `
|
|
1405
|
+
(function() {
|
|
1406
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1407
|
+
if (!el && ${roleJS} && ${nameJS}) {
|
|
1408
|
+
// Fallback: search by role
|
|
1409
|
+
var candidates = document.querySelectorAll('*');
|
|
1410
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
1411
|
+
var c = candidates[i];
|
|
1412
|
+
if (c.isContentEditable || c.getAttribute('contenteditable') === 'true') {
|
|
1413
|
+
el = c; break;
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
if (!el) return 'null';
|
|
1418
|
+
|
|
1419
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
1420
|
+
var r = el.getBoundingClientRect();
|
|
1421
|
+
if (r.width < 1 || r.height < 1) return JSON.stringify({ error: 'Zero size' });
|
|
1422
|
+
|
|
1423
|
+
return JSON.stringify({
|
|
1424
|
+
x: r.x + r.width / 2,
|
|
1425
|
+
y: r.y + r.height / 2,
|
|
1426
|
+
width: r.width,
|
|
1427
|
+
height: r.height
|
|
1428
|
+
});
|
|
1429
|
+
})()
|
|
1430
|
+
`,
|
|
1431
|
+
contextId,
|
|
1432
|
+
returnByValue: true
|
|
1433
|
+
});
|
|
1434
|
+
const value = frameResult.result?.value;
|
|
1435
|
+
if (!value || value === "null") return null;
|
|
1436
|
+
const parsed = JSON.parse(value);
|
|
1437
|
+
if (parsed.error) return parsed;
|
|
1438
|
+
return {
|
|
1439
|
+
x: parsed.x + offset.x,
|
|
1440
|
+
y: parsed.y + offset.y,
|
|
1441
|
+
width: parsed.width,
|
|
1442
|
+
height: parsed.height
|
|
1443
|
+
};
|
|
1121
1444
|
} catch {
|
|
1122
1445
|
return null;
|
|
1123
1446
|
}
|
|
@@ -1204,11 +1527,23 @@ Refs:
|
|
|
1204
1527
|
await new Promise((r) => setTimeout(r, 50));
|
|
1205
1528
|
await this.pressKey("Backspace");
|
|
1206
1529
|
await new Promise((r) => setTimeout(r, 50));
|
|
1207
|
-
const
|
|
1530
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1531
|
+
const clearEvalOpts = {
|
|
1208
1532
|
expression: `
|
|
1209
1533
|
(function() {
|
|
1210
1534
|
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1211
1535
|
if (!el) return 'no_element';
|
|
1536
|
+
|
|
1537
|
+
// For contenteditable elements, check textContent instead of value
|
|
1538
|
+
if (el.isContentEditable || el.getAttribute('contenteditable') === 'true') {
|
|
1539
|
+
if (el.textContent && el.textContent.trim() !== '') {
|
|
1540
|
+
el.textContent = '';
|
|
1541
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1542
|
+
return 'js_cleared';
|
|
1543
|
+
}
|
|
1544
|
+
return 'ok';
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1212
1547
|
if (el.value !== undefined && el.value !== '') {
|
|
1213
1548
|
// Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
|
|
1214
1549
|
var setter = Object.getOwnPropertyDescriptor(
|
|
@@ -1226,9 +1561,13 @@ Refs:
|
|
|
1226
1561
|
})()
|
|
1227
1562
|
`,
|
|
1228
1563
|
returnByValue: true
|
|
1229
|
-
}
|
|
1564
|
+
};
|
|
1565
|
+
if (frameContextId) {
|
|
1566
|
+
clearEvalOpts.contextId = frameContextId;
|
|
1567
|
+
}
|
|
1568
|
+
const cleared = await this.send("Runtime.evaluate", clearEvalOpts);
|
|
1230
1569
|
const clearStatus = cleared.result?.value || "ok";
|
|
1231
|
-
if (clearStatus === "no_element") {
|
|
1570
|
+
if (clearStatus === "no_element" && !frameContextId) {
|
|
1232
1571
|
return {
|
|
1233
1572
|
success: false,
|
|
1234
1573
|
message: `Ref ${refLabel} not found after click. Take a new snapshot.`
|
|
@@ -3811,7 +4150,7 @@ function createBrowserMcpServer() {
|
|
|
3811
4150
|
),
|
|
3812
4151
|
tool(
|
|
3813
4152
|
"browser_type",
|
|
3814
|
-
"Type text into an input field in the user's browser.",
|
|
4153
|
+
"Type text into an input field in the user's browser. If the CSS selector fails, automatically falls back to typing into the currently focused element. Works with contenteditable elements (rich text editors) and cross-origin iframes.",
|
|
3815
4154
|
{
|
|
3816
4155
|
selector: z.string().describe("CSS selector of the input element"),
|
|
3817
4156
|
text: z.string().describe("Text to type")
|
package/package.json
CHANGED
|
@@ -21,6 +21,7 @@ export class BrowserController {
|
|
|
21
21
|
private connected = false;
|
|
22
22
|
private currentTabId: string | null = null;
|
|
23
23
|
private refCache: Map<number, RefEntry> = new Map();
|
|
24
|
+
private frameContexts: Map<number, number> = new Map(); // refId → contextId
|
|
24
25
|
|
|
25
26
|
constructor(port = 9222) {
|
|
26
27
|
this.debugPort = port;
|
|
@@ -367,17 +368,50 @@ export class BrowserController {
|
|
|
367
368
|
const selectorJS = JSON.stringify(selector);
|
|
368
369
|
const textJS = JSON.stringify(text);
|
|
369
370
|
|
|
370
|
-
// First
|
|
371
|
+
// First try to find the element in main document, then in same-origin iframes
|
|
371
372
|
const result = await this.send("Runtime.evaluate", {
|
|
372
373
|
expression: `
|
|
373
374
|
(function() {
|
|
374
|
-
|
|
375
|
+
var el = document.querySelector(${selectorJS});
|
|
376
|
+
|
|
377
|
+
// If not found in main document, search same-origin iframes
|
|
378
|
+
if (!el) {
|
|
379
|
+
var iframes = document.querySelectorAll('iframe');
|
|
380
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
381
|
+
try {
|
|
382
|
+
var iframeDoc = iframes[i].contentDocument;
|
|
383
|
+
if (iframeDoc) {
|
|
384
|
+
el = iframeDoc.querySelector(${selectorJS});
|
|
385
|
+
if (el) break;
|
|
386
|
+
}
|
|
387
|
+
} catch(e) { /* cross-origin, skip */ }
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
375
391
|
if (!el) return 'Element not found: ' + ${selectorJS};
|
|
376
392
|
|
|
377
393
|
el.focus();
|
|
378
394
|
|
|
379
|
-
//
|
|
380
|
-
|
|
395
|
+
// Check if this is a contenteditable element (rich text editor)
|
|
396
|
+
var isContentEditable = el.isContentEditable ||
|
|
397
|
+
el.getAttribute('contenteditable') === 'true' ||
|
|
398
|
+
el.getAttribute('contenteditable') === '';
|
|
399
|
+
|
|
400
|
+
if (isContentEditable) {
|
|
401
|
+
// For contenteditable: select all content, then replace
|
|
402
|
+
var ownerDoc = el.ownerDocument;
|
|
403
|
+
var sel = ownerDoc.defaultView.getSelection();
|
|
404
|
+
var range = ownerDoc.createRange();
|
|
405
|
+
range.selectNodeContents(el);
|
|
406
|
+
sel.removeAllRanges();
|
|
407
|
+
sel.addRange(range);
|
|
408
|
+
// Use insertText command which respects undo stack and triggers input events
|
|
409
|
+
ownerDoc.execCommand('insertText', false, ${textJS});
|
|
410
|
+
return 'Typed into: ' + (el.tagName || '') + ' [contenteditable]';
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// For input/textarea: clear and set value
|
|
414
|
+
var nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
381
415
|
window.HTMLInputElement.prototype, 'value'
|
|
382
416
|
)?.set || Object.getOwnPropertyDescriptor(
|
|
383
417
|
window.HTMLTextAreaElement.prototype, 'value'
|
|
@@ -398,7 +432,36 @@ export class BrowserController {
|
|
|
398
432
|
returnByValue: true,
|
|
399
433
|
});
|
|
400
434
|
|
|
401
|
-
|
|
435
|
+
const textResult = ((result as CDPEvalResult).result?.value as string) || "";
|
|
436
|
+
|
|
437
|
+
// If element still not found, try typing into the currently focused element via CDP
|
|
438
|
+
if (textResult.startsWith("Element not found")) {
|
|
439
|
+
return this.typeAtFocus(text);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
return textResult || "Text entered.";
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Type text into the currently focused element using CDP Input.insertText.
|
|
447
|
+
* This bypasses DOM queries entirely and works with any focused element,
|
|
448
|
+
* including those inside cross-origin iframes or shadow DOM.
|
|
449
|
+
*/
|
|
450
|
+
async typeAtFocus(text: string): Promise<string> {
|
|
451
|
+
this.ensureConnected();
|
|
452
|
+
|
|
453
|
+
// Optionally clear existing content: select all then delete
|
|
454
|
+
const modKey = platform() === "darwin" ? "Meta" : "Control";
|
|
455
|
+
await this.pressKey(`${modKey}+a`);
|
|
456
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
457
|
+
await this.pressKey("Backspace");
|
|
458
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
459
|
+
|
|
460
|
+
// Insert text via CDP — goes through the browser's native input pipeline
|
|
461
|
+
await this.send("Input.insertText", { text });
|
|
462
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
463
|
+
|
|
464
|
+
return "Text entered (into focused element).";
|
|
402
465
|
}
|
|
403
466
|
|
|
404
467
|
async pressKey(key: string): Promise<string> {
|
|
@@ -670,6 +733,9 @@ export class BrowserController {
|
|
|
670
733
|
box: r.box as BoundingBox,
|
|
671
734
|
}));
|
|
672
735
|
|
|
736
|
+
// 1b. Discover elements in cross-origin iframes via CDP frame targeting
|
|
737
|
+
await this.discoverCrossOriginFrameRefs(refs);
|
|
738
|
+
|
|
673
739
|
// 2. Optionally inject visual overlay with ref labels
|
|
674
740
|
// (Skip for dense pages — labels would overlap and become unreadable)
|
|
675
741
|
if (annotate && refs.length <= 40) {
|
|
@@ -725,6 +791,8 @@ export class BrowserController {
|
|
|
725
791
|
}
|
|
726
792
|
|
|
727
793
|
// 5. Cache refs for subsequent act() calls
|
|
794
|
+
// Note: frameContexts is populated by discoverCrossOriginFrameRefs above,
|
|
795
|
+
// so we only clear refCache here (frameContexts was cleared at start of discover)
|
|
728
796
|
this.refCache.clear();
|
|
729
797
|
for (const ref of refs) {
|
|
730
798
|
this.refCache.set(ref.id, ref);
|
|
@@ -752,6 +820,235 @@ export class BrowserController {
|
|
|
752
820
|
return table;
|
|
753
821
|
}
|
|
754
822
|
|
|
823
|
+
// ── Cross-Origin Iframe Discovery ────────────────────────────────
|
|
824
|
+
|
|
825
|
+
/**
|
|
826
|
+
* Use CDP's Page.getFrameTree + Runtime.evaluate with contextId to discover
|
|
827
|
+
* interactive elements inside cross-origin iframes (e.g., ProtonMail editor,
|
|
828
|
+
* Google Docs, embedded rich text editors).
|
|
829
|
+
*
|
|
830
|
+
* Same-origin iframes are already handled inline by the main snapshot JS.
|
|
831
|
+
* This method handles the ones that threw cross-origin errors.
|
|
832
|
+
*/
|
|
833
|
+
private async discoverCrossOriginFrameRefs(refs: RefEntry[]): Promise<void> {
|
|
834
|
+
this.frameContexts.clear();
|
|
835
|
+
try {
|
|
836
|
+
// Get the frame tree to find all child frames
|
|
837
|
+
const frameTree = (await this.send("Page.getFrameTree")) as {
|
|
838
|
+
frameTree?: {
|
|
839
|
+
frame: { id: string };
|
|
840
|
+
childFrames?: Array<{ frame: { id: string; url: string; name?: string } }>;
|
|
841
|
+
};
|
|
842
|
+
};
|
|
843
|
+
|
|
844
|
+
const mainFrameId = frameTree.frameTree?.frame?.id;
|
|
845
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
846
|
+
if (childFrames.length === 0) return;
|
|
847
|
+
|
|
848
|
+
// Get all execution contexts to map frameId → contextId
|
|
849
|
+
// We need to enable Runtime events and collect contexts
|
|
850
|
+
const contexts = await this.getFrameContexts(mainFrameId || "");
|
|
851
|
+
|
|
852
|
+
for (const child of childFrames) {
|
|
853
|
+
const frameId = child.frame.id;
|
|
854
|
+
const contextId = contexts.get(frameId);
|
|
855
|
+
if (!contextId) continue;
|
|
856
|
+
|
|
857
|
+
// Get the iframe's bounding rect from the parent frame for coordinate offset
|
|
858
|
+
const iframeOffsetResult = await this.send("Runtime.evaluate", {
|
|
859
|
+
expression: `
|
|
860
|
+
(function() {
|
|
861
|
+
var iframes = document.querySelectorAll('iframe');
|
|
862
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
863
|
+
try {
|
|
864
|
+
// Match by frame src or name
|
|
865
|
+
var f = iframes[i];
|
|
866
|
+
if (f.contentWindow) {
|
|
867
|
+
var r = f.getBoundingClientRect();
|
|
868
|
+
if (r.width > 10 && r.height > 10) {
|
|
869
|
+
return JSON.stringify({ x: r.x, y: r.y, width: r.width, height: r.height, index: i });
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
} catch(e) {}
|
|
873
|
+
}
|
|
874
|
+
return 'null';
|
|
875
|
+
})()
|
|
876
|
+
`,
|
|
877
|
+
returnByValue: true,
|
|
878
|
+
});
|
|
879
|
+
|
|
880
|
+
let iframeOffset = { x: 0, y: 0 };
|
|
881
|
+
try {
|
|
882
|
+
const parsed = JSON.parse(
|
|
883
|
+
((iframeOffsetResult as CDPEvalResult).result?.value as string) || "null"
|
|
884
|
+
);
|
|
885
|
+
if (parsed) iframeOffset = { x: parsed.x, y: parsed.y };
|
|
886
|
+
} catch {
|
|
887
|
+
/* ignore */
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
// Evaluate inside the child frame's execution context
|
|
891
|
+
const startRefId = refs.length + 1;
|
|
892
|
+
try {
|
|
893
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
894
|
+
expression: `
|
|
895
|
+
(function() {
|
|
896
|
+
var selectors = [
|
|
897
|
+
'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
|
|
898
|
+
'[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
|
|
899
|
+
'[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
|
|
900
|
+
'[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
|
|
901
|
+
'[onclick]', '[tabindex]:not([tabindex="-1"])',
|
|
902
|
+
'[contenteditable="true"]', '[contenteditable=""]'
|
|
903
|
+
].join(', ');
|
|
904
|
+
|
|
905
|
+
var all = document.querySelectorAll(selectors);
|
|
906
|
+
// Also check if the body itself is contenteditable
|
|
907
|
+
if (document.body && (document.body.isContentEditable || document.body.getAttribute('contenteditable') === 'true')) {
|
|
908
|
+
all = [document.body].concat(Array.from(all));
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
var refs = [];
|
|
912
|
+
var startId = ${startRefId};
|
|
913
|
+
var vh = window.innerHeight;
|
|
914
|
+
var vw = window.innerWidth;
|
|
915
|
+
|
|
916
|
+
for (var i = 0; i < all.length && refs.length < 20; i++) {
|
|
917
|
+
var el = all[i];
|
|
918
|
+
var rect = el.getBoundingClientRect();
|
|
919
|
+
if (rect.width < 5 || rect.height < 5) continue;
|
|
920
|
+
var style = window.getComputedStyle(el);
|
|
921
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
|
|
922
|
+
|
|
923
|
+
var role = el.getAttribute('role') || '';
|
|
924
|
+
if (!role) {
|
|
925
|
+
var tag = el.tagName.toLowerCase();
|
|
926
|
+
if (tag === 'a') role = 'link';
|
|
927
|
+
else if (tag === 'button') role = 'button';
|
|
928
|
+
else if (tag === 'input') {
|
|
929
|
+
var t = (el.type || 'text').toLowerCase();
|
|
930
|
+
if (t === 'checkbox') role = 'checkbox';
|
|
931
|
+
else if (t === 'radio') role = 'radio';
|
|
932
|
+
else if (t === 'submit' || t === 'button') role = 'button';
|
|
933
|
+
else role = 'textbox';
|
|
934
|
+
}
|
|
935
|
+
else if (tag === 'select') role = 'combobox';
|
|
936
|
+
else if (tag === 'textarea') role = 'textbox';
|
|
937
|
+
else if (el.isContentEditable) role = 'textbox';
|
|
938
|
+
else role = tag;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
var name = '';
|
|
942
|
+
var ariaLabel = el.getAttribute('aria-label');
|
|
943
|
+
if (ariaLabel) {
|
|
944
|
+
name = ariaLabel;
|
|
945
|
+
} else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
|
|
946
|
+
name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
|
|
947
|
+
} else if (el.isContentEditable) {
|
|
948
|
+
name = 'compose body';
|
|
949
|
+
} else {
|
|
950
|
+
name = (el.textContent || '').trim().slice(0, 60);
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
var refId = startId + refs.length;
|
|
954
|
+
el.setAttribute('data-assistme-ref', String(refId));
|
|
955
|
+
|
|
956
|
+
refs.push({
|
|
957
|
+
id: refId,
|
|
958
|
+
role: role,
|
|
959
|
+
name: name,
|
|
960
|
+
tag: el.tagName.toLowerCase(),
|
|
961
|
+
type: el.getAttribute('type') || '',
|
|
962
|
+
box: {
|
|
963
|
+
x: Math.round(rect.x),
|
|
964
|
+
y: Math.round(rect.y),
|
|
965
|
+
width: Math.round(rect.width),
|
|
966
|
+
height: Math.round(rect.height)
|
|
967
|
+
},
|
|
968
|
+
inFrame: true
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
return JSON.stringify(refs);
|
|
973
|
+
})()
|
|
974
|
+
`,
|
|
975
|
+
contextId,
|
|
976
|
+
returnByValue: true,
|
|
977
|
+
});
|
|
978
|
+
|
|
979
|
+
const frameRefs = JSON.parse(
|
|
980
|
+
((frameResult as CDPEvalResult).result?.value as string) || "[]"
|
|
981
|
+
);
|
|
982
|
+
|
|
983
|
+
for (const r of frameRefs) {
|
|
984
|
+
refs.push({
|
|
985
|
+
id: r.id as number,
|
|
986
|
+
role: r.role as string,
|
|
987
|
+
name: r.name as string,
|
|
988
|
+
tag: r.tag as string,
|
|
989
|
+
inputType: (r.type as string) || "",
|
|
990
|
+
box: {
|
|
991
|
+
x: Math.round((r.box.x as number) + iframeOffset.x),
|
|
992
|
+
y: Math.round((r.box.y as number) + iframeOffset.y),
|
|
993
|
+
width: r.box.width as number,
|
|
994
|
+
height: r.box.height as number,
|
|
995
|
+
},
|
|
996
|
+
});
|
|
997
|
+
// Store frame context for later resolution
|
|
998
|
+
this.frameContexts.set(r.id as number, contextId);
|
|
999
|
+
}
|
|
1000
|
+
} catch {
|
|
1001
|
+
// Frame evaluation failed (e.g., about:blank, pdf viewer) — skip
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
} catch {
|
|
1005
|
+
// Frame tree unavailable — not critical, skip silently
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
/**
|
|
1010
|
+
* Get execution context IDs for each frame in the page.
|
|
1011
|
+
* Uses Runtime.executionContextCreated events collected during the session,
|
|
1012
|
+
* or falls back to evaluating in known frames.
|
|
1013
|
+
*/
|
|
1014
|
+
private async getFrameContexts(_mainFrameId: string): Promise<Map<string, number>> {
|
|
1015
|
+
const contexts = new Map<string, number>();
|
|
1016
|
+
try {
|
|
1017
|
+
// Enable Runtime domain to get context descriptions (may already be enabled)
|
|
1018
|
+
await this.send("Runtime.enable").catch(() => {});
|
|
1019
|
+
|
|
1020
|
+
// Use Page.getFrameTree to get frame IDs, then try to create isolated worlds
|
|
1021
|
+
// for each frame to get their execution context IDs
|
|
1022
|
+
const frameTree = (await this.send("Page.getFrameTree")) as {
|
|
1023
|
+
frameTree?: {
|
|
1024
|
+
frame: { id: string };
|
|
1025
|
+
childFrames?: Array<{ frame: { id: string } }>;
|
|
1026
|
+
};
|
|
1027
|
+
};
|
|
1028
|
+
|
|
1029
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
1030
|
+
for (const child of childFrames) {
|
|
1031
|
+
try {
|
|
1032
|
+
// Create an isolated world in the frame to get a context ID
|
|
1033
|
+
const world = (await this.send("Page.createIsolatedWorld", {
|
|
1034
|
+
frameId: child.frame.id,
|
|
1035
|
+
worldName: "assistme-snapshot",
|
|
1036
|
+
grantUniveralAccess: true,
|
|
1037
|
+
})) as { executionContextId?: number };
|
|
1038
|
+
|
|
1039
|
+
if (world.executionContextId) {
|
|
1040
|
+
contexts.set(child.frame.id, world.executionContextId);
|
|
1041
|
+
}
|
|
1042
|
+
} catch {
|
|
1043
|
+
// Frame might not support isolated worlds — skip
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
} catch {
|
|
1047
|
+
// Fallback: no contexts available
|
|
1048
|
+
}
|
|
1049
|
+
return contexts;
|
|
1050
|
+
}
|
|
1051
|
+
|
|
755
1052
|
// ── Ref Resolution ────────────────────────────────────────────────
|
|
756
1053
|
|
|
757
1054
|
/**
|
|
@@ -869,9 +1166,103 @@ export class BrowserController {
|
|
|
869
1166
|
});
|
|
870
1167
|
|
|
871
1168
|
const value = (result as CDPEvalResult).result?.value as string;
|
|
872
|
-
if (
|
|
1169
|
+
if (value && value !== "null") {
|
|
1170
|
+
try {
|
|
1171
|
+
return JSON.parse(value);
|
|
1172
|
+
} catch {
|
|
1173
|
+
/* fall through to frame search */
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
// Strategy 3: search in cross-origin iframe contexts
|
|
1178
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1179
|
+
if (frameContextId) {
|
|
1180
|
+
return this.resolveRefInFrame(refId, frameContextId, role, name);
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
return null;
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
/**
|
|
1187
|
+
* Resolve a ref inside a cross-origin iframe using its execution context.
|
|
1188
|
+
* Returns coordinates adjusted by the iframe's viewport offset.
|
|
1189
|
+
*/
|
|
1190
|
+
private async resolveRefInFrame(
|
|
1191
|
+
refId: number,
|
|
1192
|
+
contextId: number,
|
|
1193
|
+
role: string,
|
|
1194
|
+
name: string
|
|
1195
|
+
): Promise<{ x: number; y: number; width: number; height: number; error?: string } | null> {
|
|
1196
|
+
const roleJS = JSON.stringify(role);
|
|
1197
|
+
const nameJS = JSON.stringify(name);
|
|
1198
|
+
|
|
873
1199
|
try {
|
|
874
|
-
|
|
1200
|
+
// Get iframe offset from main document
|
|
1201
|
+
const offsetResult = await this.send("Runtime.evaluate", {
|
|
1202
|
+
expression: `
|
|
1203
|
+
(function() {
|
|
1204
|
+
var iframes = document.querySelectorAll('iframe');
|
|
1205
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
1206
|
+
var r = iframes[i].getBoundingClientRect();
|
|
1207
|
+
if (r.width > 10 && r.height > 10) {
|
|
1208
|
+
return JSON.stringify({ x: r.x, y: r.y });
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
return JSON.stringify({ x: 0, y: 0 });
|
|
1212
|
+
})()
|
|
1213
|
+
`,
|
|
1214
|
+
returnByValue: true,
|
|
1215
|
+
});
|
|
1216
|
+
const offset = JSON.parse(
|
|
1217
|
+
((offsetResult as CDPEvalResult).result?.value as string) || '{"x":0,"y":0}'
|
|
1218
|
+
);
|
|
1219
|
+
|
|
1220
|
+
// Resolve element inside the frame
|
|
1221
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
1222
|
+
expression: `
|
|
1223
|
+
(function() {
|
|
1224
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1225
|
+
if (!el && ${roleJS} && ${nameJS}) {
|
|
1226
|
+
// Fallback: search by role
|
|
1227
|
+
var candidates = document.querySelectorAll('*');
|
|
1228
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
1229
|
+
var c = candidates[i];
|
|
1230
|
+
if (c.isContentEditable || c.getAttribute('contenteditable') === 'true') {
|
|
1231
|
+
el = c; break;
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
if (!el) return 'null';
|
|
1236
|
+
|
|
1237
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
1238
|
+
var r = el.getBoundingClientRect();
|
|
1239
|
+
if (r.width < 1 || r.height < 1) return JSON.stringify({ error: 'Zero size' });
|
|
1240
|
+
|
|
1241
|
+
return JSON.stringify({
|
|
1242
|
+
x: r.x + r.width / 2,
|
|
1243
|
+
y: r.y + r.height / 2,
|
|
1244
|
+
width: r.width,
|
|
1245
|
+
height: r.height
|
|
1246
|
+
});
|
|
1247
|
+
})()
|
|
1248
|
+
`,
|
|
1249
|
+
contextId,
|
|
1250
|
+
returnByValue: true,
|
|
1251
|
+
});
|
|
1252
|
+
|
|
1253
|
+
const value = (frameResult as CDPEvalResult).result?.value as string;
|
|
1254
|
+
if (!value || value === "null") return null;
|
|
1255
|
+
|
|
1256
|
+
const parsed = JSON.parse(value);
|
|
1257
|
+
if (parsed.error) return parsed;
|
|
1258
|
+
|
|
1259
|
+
// Adjust coordinates by iframe offset
|
|
1260
|
+
return {
|
|
1261
|
+
x: parsed.x + offset.x,
|
|
1262
|
+
y: parsed.y + offset.y,
|
|
1263
|
+
width: parsed.width,
|
|
1264
|
+
height: parsed.height,
|
|
1265
|
+
};
|
|
875
1266
|
} catch {
|
|
876
1267
|
return null;
|
|
877
1268
|
}
|
|
@@ -981,11 +1372,24 @@ export class BrowserController {
|
|
|
981
1372
|
await new Promise((r) => setTimeout(r, 50));
|
|
982
1373
|
|
|
983
1374
|
// 2. Verify the field is empty; if not, fall back to JS-based clearing
|
|
984
|
-
|
|
1375
|
+
// Determine which context to evaluate in (main doc or iframe)
|
|
1376
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1377
|
+
const clearEvalOpts: Record<string, unknown> = {
|
|
985
1378
|
expression: `
|
|
986
1379
|
(function() {
|
|
987
1380
|
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
988
1381
|
if (!el) return 'no_element';
|
|
1382
|
+
|
|
1383
|
+
// For contenteditable elements, check textContent instead of value
|
|
1384
|
+
if (el.isContentEditable || el.getAttribute('contenteditable') === 'true') {
|
|
1385
|
+
if (el.textContent && el.textContent.trim() !== '') {
|
|
1386
|
+
el.textContent = '';
|
|
1387
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1388
|
+
return 'js_cleared';
|
|
1389
|
+
}
|
|
1390
|
+
return 'ok';
|
|
1391
|
+
}
|
|
1392
|
+
|
|
989
1393
|
if (el.value !== undefined && el.value !== '') {
|
|
990
1394
|
// Ctrl+A didn't work (some frameworks intercept it) — clear via JS
|
|
991
1395
|
var setter = Object.getOwnPropertyDescriptor(
|
|
@@ -1003,9 +1407,15 @@ export class BrowserController {
|
|
|
1003
1407
|
})()
|
|
1004
1408
|
`,
|
|
1005
1409
|
returnByValue: true,
|
|
1006
|
-
}
|
|
1410
|
+
};
|
|
1411
|
+
// If element is in a cross-origin iframe, evaluate in its context
|
|
1412
|
+
if (frameContextId) {
|
|
1413
|
+
clearEvalOpts.contextId = frameContextId;
|
|
1414
|
+
}
|
|
1415
|
+
const cleared = await this.send("Runtime.evaluate", clearEvalOpts);
|
|
1007
1416
|
const clearStatus = ((cleared as CDPEvalResult).result?.value as string) || "ok";
|
|
1008
|
-
if (clearStatus === "no_element") {
|
|
1417
|
+
if (clearStatus === "no_element" && !frameContextId) {
|
|
1418
|
+
// Element not found in main doc and no frame context — truly missing
|
|
1009
1419
|
return {
|
|
1010
1420
|
success: false,
|
|
1011
1421
|
message: `Ref ${refLabel} not found after click. Take a new snapshot.`,
|
|
@@ -92,7 +92,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
|
|
|
92
92
|
),
|
|
93
93
|
tool(
|
|
94
94
|
"browser_type",
|
|
95
|
-
"Type text into an input field in the user's browser.",
|
|
95
|
+
"Type text into an input field in the user's browser. If the CSS selector fails, automatically falls back to typing into the currently focused element. Works with contenteditable elements (rich text editors) and cross-origin iframes.",
|
|
96
96
|
{
|
|
97
97
|
selector: z.string().describe("CSS selector of the input element"),
|
|
98
98
|
text: z.string().describe("Text to type"),
|