assistme 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +462 -65
- package/package.json +1 -1
- package/src/agent/processor.ts +55 -6
- package/src/agent/system-prompt.ts +5 -1
- package/src/browser/controller.ts +420 -10
- package/src/db/event.ts +32 -20
- package/src/mcp/agent-tools-server.ts +53 -40
- package/src/mcp/browser-server.ts +1 -1
package/dist/index.js
CHANGED
|
@@ -171,22 +171,36 @@ async function getConversationHistory(conversationId, excludeMessageId, limit =
|
|
|
171
171
|
}
|
|
172
172
|
|
|
173
173
|
// src/db/event.ts
|
|
174
|
+
var MAX_EMIT_RETRIES = 2;
|
|
175
|
+
var EMIT_RETRY_DELAY_MS = 500;
|
|
176
|
+
async function emitWithRetry(messageId, eventType, eventData, seq) {
|
|
177
|
+
for (let attempt = 0; attempt <= MAX_EMIT_RETRIES; attempt++) {
|
|
178
|
+
try {
|
|
179
|
+
await callMcpHandler("event.emit", {
|
|
180
|
+
message_id: messageId,
|
|
181
|
+
event_type: eventType,
|
|
182
|
+
event_data: eventData,
|
|
183
|
+
seq
|
|
184
|
+
});
|
|
185
|
+
return;
|
|
186
|
+
} catch (err) {
|
|
187
|
+
if (attempt < MAX_EMIT_RETRIES) {
|
|
188
|
+
await new Promise((r) => setTimeout(r, EMIT_RETRY_DELAY_MS * (attempt + 1)));
|
|
189
|
+
} else {
|
|
190
|
+
log.warn(
|
|
191
|
+
`Failed to emit event after ${MAX_EMIT_RETRIES + 1} attempts: ${err instanceof Error ? err.message : err}`
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
174
197
|
var eventSequence = 0;
|
|
175
198
|
function resetEventSequence() {
|
|
176
199
|
eventSequence = 0;
|
|
177
200
|
}
|
|
178
201
|
async function emitEvent(messageId, eventType, eventData) {
|
|
179
202
|
eventSequence++;
|
|
180
|
-
|
|
181
|
-
await callMcpHandler("event.emit", {
|
|
182
|
-
message_id: messageId,
|
|
183
|
-
event_type: eventType,
|
|
184
|
-
event_data: eventData,
|
|
185
|
-
seq: eventSequence
|
|
186
|
-
});
|
|
187
|
-
} catch (err) {
|
|
188
|
-
log.warn(`Failed to emit event: ${err instanceof Error ? err.message : err}`);
|
|
189
|
-
}
|
|
203
|
+
await emitWithRetry(messageId, eventType, eventData, eventSequence);
|
|
190
204
|
}
|
|
191
205
|
|
|
192
206
|
// src/db/action.ts
|
|
@@ -346,6 +360,8 @@ var BrowserController = class {
|
|
|
346
360
|
connected = false;
|
|
347
361
|
currentTabId = null;
|
|
348
362
|
refCache = /* @__PURE__ */ new Map();
|
|
363
|
+
frameContexts = /* @__PURE__ */ new Map();
|
|
364
|
+
// refId → contextId
|
|
349
365
|
constructor(port = 9222) {
|
|
350
366
|
this.debugPort = port;
|
|
351
367
|
}
|
|
@@ -639,13 +655,46 @@ URL: ${info.url}`;
|
|
|
639
655
|
const result = await this.send("Runtime.evaluate", {
|
|
640
656
|
expression: `
|
|
641
657
|
(function() {
|
|
642
|
-
|
|
658
|
+
var el = document.querySelector(${selectorJS});
|
|
659
|
+
|
|
660
|
+
// If not found in main document, search same-origin iframes
|
|
661
|
+
if (!el) {
|
|
662
|
+
var iframes = document.querySelectorAll('iframe');
|
|
663
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
664
|
+
try {
|
|
665
|
+
var iframeDoc = iframes[i].contentDocument;
|
|
666
|
+
if (iframeDoc) {
|
|
667
|
+
el = iframeDoc.querySelector(${selectorJS});
|
|
668
|
+
if (el) break;
|
|
669
|
+
}
|
|
670
|
+
} catch(e) { /* cross-origin, skip */ }
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
643
674
|
if (!el) return 'Element not found: ' + ${selectorJS};
|
|
644
675
|
|
|
645
676
|
el.focus();
|
|
646
677
|
|
|
647
|
-
//
|
|
648
|
-
|
|
678
|
+
// Check if this is a contenteditable element (rich text editor)
|
|
679
|
+
var isContentEditable = el.isContentEditable ||
|
|
680
|
+
el.getAttribute('contenteditable') === 'true' ||
|
|
681
|
+
el.getAttribute('contenteditable') === '';
|
|
682
|
+
|
|
683
|
+
if (isContentEditable) {
|
|
684
|
+
// For contenteditable: select all content, then replace
|
|
685
|
+
var ownerDoc = el.ownerDocument;
|
|
686
|
+
var sel = ownerDoc.defaultView.getSelection();
|
|
687
|
+
var range = ownerDoc.createRange();
|
|
688
|
+
range.selectNodeContents(el);
|
|
689
|
+
sel.removeAllRanges();
|
|
690
|
+
sel.addRange(range);
|
|
691
|
+
// Use insertText command which respects undo stack and triggers input events
|
|
692
|
+
ownerDoc.execCommand('insertText', false, ${textJS});
|
|
693
|
+
return 'Typed into: ' + (el.tagName || '') + ' [contenteditable]';
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// For input/textarea: clear and set value
|
|
697
|
+
var nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
649
698
|
window.HTMLInputElement.prototype, 'value'
|
|
650
699
|
)?.set || Object.getOwnPropertyDescriptor(
|
|
651
700
|
window.HTMLTextAreaElement.prototype, 'value'
|
|
@@ -665,7 +714,27 @@ URL: ${info.url}`;
|
|
|
665
714
|
`,
|
|
666
715
|
returnByValue: true
|
|
667
716
|
});
|
|
668
|
-
|
|
717
|
+
const textResult = result.result?.value || "";
|
|
718
|
+
if (textResult.startsWith("Element not found")) {
|
|
719
|
+
return this.typeAtFocus(text);
|
|
720
|
+
}
|
|
721
|
+
return textResult || "Text entered.";
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Type text into the currently focused element using CDP Input.insertText.
|
|
725
|
+
* This bypasses DOM queries entirely and works with any focused element,
|
|
726
|
+
* including those inside cross-origin iframes or shadow DOM.
|
|
727
|
+
*/
|
|
728
|
+
async typeAtFocus(text) {
|
|
729
|
+
this.ensureConnected();
|
|
730
|
+
const modKey = platform() === "darwin" ? "Meta" : "Control";
|
|
731
|
+
await this.pressKey(`${modKey}+a`);
|
|
732
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
733
|
+
await this.pressKey("Backspace");
|
|
734
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
735
|
+
await this.send("Input.insertText", { text });
|
|
736
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
737
|
+
return "Text entered (into focused element).";
|
|
669
738
|
}
|
|
670
739
|
async pressKey(key) {
|
|
671
740
|
this.ensureConnected();
|
|
@@ -915,6 +984,7 @@ URL: ${info.url}`;
|
|
|
915
984
|
inputType: r.type || "",
|
|
916
985
|
box: r.box
|
|
917
986
|
}));
|
|
987
|
+
await this.discoverCrossOriginFrameRefs(refs);
|
|
918
988
|
if (annotate && refs.length <= 40) {
|
|
919
989
|
const refsJson = JSON.stringify(refs);
|
|
920
990
|
await this.send("Runtime.evaluate", {
|
|
@@ -989,6 +1059,197 @@ Refs:
|
|
|
989
1059
|
}
|
|
990
1060
|
return table;
|
|
991
1061
|
}
|
|
1062
|
+
// ── Cross-Origin Iframe Discovery ────────────────────────────────
|
|
1063
|
+
/**
|
|
1064
|
+
* Use CDP's Page.getFrameTree + Runtime.evaluate with contextId to discover
|
|
1065
|
+
* interactive elements inside cross-origin iframes (e.g., ProtonMail editor,
|
|
1066
|
+
* Google Docs, embedded rich text editors).
|
|
1067
|
+
*
|
|
1068
|
+
* Same-origin iframes are already handled inline by the main snapshot JS.
|
|
1069
|
+
* This method handles the ones that threw cross-origin errors.
|
|
1070
|
+
*/
|
|
1071
|
+
async discoverCrossOriginFrameRefs(refs) {
|
|
1072
|
+
this.frameContexts.clear();
|
|
1073
|
+
try {
|
|
1074
|
+
const frameTree = await this.send("Page.getFrameTree");
|
|
1075
|
+
const mainFrameId = frameTree.frameTree?.frame?.id;
|
|
1076
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
1077
|
+
if (childFrames.length === 0) return;
|
|
1078
|
+
const contexts = await this.getFrameContexts(mainFrameId || "");
|
|
1079
|
+
for (const child of childFrames) {
|
|
1080
|
+
const frameId = child.frame.id;
|
|
1081
|
+
const contextId = contexts.get(frameId);
|
|
1082
|
+
if (!contextId) continue;
|
|
1083
|
+
const iframeOffsetResult = await this.send("Runtime.evaluate", {
|
|
1084
|
+
expression: `
|
|
1085
|
+
(function() {
|
|
1086
|
+
var iframes = document.querySelectorAll('iframe');
|
|
1087
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
1088
|
+
try {
|
|
1089
|
+
// Match by frame src or name
|
|
1090
|
+
var f = iframes[i];
|
|
1091
|
+
if (f.contentWindow) {
|
|
1092
|
+
var r = f.getBoundingClientRect();
|
|
1093
|
+
if (r.width > 10 && r.height > 10) {
|
|
1094
|
+
return JSON.stringify({ x: r.x, y: r.y, width: r.width, height: r.height, index: i });
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
} catch(e) {}
|
|
1098
|
+
}
|
|
1099
|
+
return 'null';
|
|
1100
|
+
})()
|
|
1101
|
+
`,
|
|
1102
|
+
returnByValue: true
|
|
1103
|
+
});
|
|
1104
|
+
let iframeOffset = { x: 0, y: 0 };
|
|
1105
|
+
try {
|
|
1106
|
+
const parsed = JSON.parse(
|
|
1107
|
+
iframeOffsetResult.result?.value || "null"
|
|
1108
|
+
);
|
|
1109
|
+
if (parsed) iframeOffset = { x: parsed.x, y: parsed.y };
|
|
1110
|
+
} catch {
|
|
1111
|
+
}
|
|
1112
|
+
const startRefId = refs.length + 1;
|
|
1113
|
+
try {
|
|
1114
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
1115
|
+
expression: `
|
|
1116
|
+
(function() {
|
|
1117
|
+
var selectors = [
|
|
1118
|
+
'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
|
|
1119
|
+
'[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
|
|
1120
|
+
'[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
|
|
1121
|
+
'[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
|
|
1122
|
+
'[onclick]', '[tabindex]:not([tabindex="-1"])',
|
|
1123
|
+
'[contenteditable="true"]', '[contenteditable=""]'
|
|
1124
|
+
].join(', ');
|
|
1125
|
+
|
|
1126
|
+
var all = document.querySelectorAll(selectors);
|
|
1127
|
+
// Also check if the body itself is contenteditable
|
|
1128
|
+
if (document.body && (document.body.isContentEditable || document.body.getAttribute('contenteditable') === 'true')) {
|
|
1129
|
+
all = [document.body].concat(Array.from(all));
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
var refs = [];
|
|
1133
|
+
var startId = ${startRefId};
|
|
1134
|
+
var vh = window.innerHeight;
|
|
1135
|
+
var vw = window.innerWidth;
|
|
1136
|
+
|
|
1137
|
+
for (var i = 0; i < all.length && refs.length < 20; i++) {
|
|
1138
|
+
var el = all[i];
|
|
1139
|
+
var rect = el.getBoundingClientRect();
|
|
1140
|
+
if (rect.width < 5 || rect.height < 5) continue;
|
|
1141
|
+
var style = window.getComputedStyle(el);
|
|
1142
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
|
|
1143
|
+
|
|
1144
|
+
var role = el.getAttribute('role') || '';
|
|
1145
|
+
if (!role) {
|
|
1146
|
+
var tag = el.tagName.toLowerCase();
|
|
1147
|
+
if (tag === 'a') role = 'link';
|
|
1148
|
+
else if (tag === 'button') role = 'button';
|
|
1149
|
+
else if (tag === 'input') {
|
|
1150
|
+
var t = (el.type || 'text').toLowerCase();
|
|
1151
|
+
if (t === 'checkbox') role = 'checkbox';
|
|
1152
|
+
else if (t === 'radio') role = 'radio';
|
|
1153
|
+
else if (t === 'submit' || t === 'button') role = 'button';
|
|
1154
|
+
else role = 'textbox';
|
|
1155
|
+
}
|
|
1156
|
+
else if (tag === 'select') role = 'combobox';
|
|
1157
|
+
else if (tag === 'textarea') role = 'textbox';
|
|
1158
|
+
else if (el.isContentEditable) role = 'textbox';
|
|
1159
|
+
else role = tag;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
var name = '';
|
|
1163
|
+
var ariaLabel = el.getAttribute('aria-label');
|
|
1164
|
+
if (ariaLabel) {
|
|
1165
|
+
name = ariaLabel;
|
|
1166
|
+
} else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
|
|
1167
|
+
name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
|
|
1168
|
+
} else if (el.isContentEditable) {
|
|
1169
|
+
name = 'compose body';
|
|
1170
|
+
} else {
|
|
1171
|
+
name = (el.textContent || '').trim().slice(0, 60);
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
var refId = startId + refs.length;
|
|
1175
|
+
el.setAttribute('data-assistme-ref', String(refId));
|
|
1176
|
+
|
|
1177
|
+
refs.push({
|
|
1178
|
+
id: refId,
|
|
1179
|
+
role: role,
|
|
1180
|
+
name: name,
|
|
1181
|
+
tag: el.tagName.toLowerCase(),
|
|
1182
|
+
type: el.getAttribute('type') || '',
|
|
1183
|
+
box: {
|
|
1184
|
+
x: Math.round(rect.x),
|
|
1185
|
+
y: Math.round(rect.y),
|
|
1186
|
+
width: Math.round(rect.width),
|
|
1187
|
+
height: Math.round(rect.height)
|
|
1188
|
+
},
|
|
1189
|
+
inFrame: true
|
|
1190
|
+
});
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
return JSON.stringify(refs);
|
|
1194
|
+
})()
|
|
1195
|
+
`,
|
|
1196
|
+
contextId,
|
|
1197
|
+
returnByValue: true
|
|
1198
|
+
});
|
|
1199
|
+
const frameRefs = JSON.parse(
|
|
1200
|
+
frameResult.result?.value || "[]"
|
|
1201
|
+
);
|
|
1202
|
+
for (const r of frameRefs) {
|
|
1203
|
+
refs.push({
|
|
1204
|
+
id: r.id,
|
|
1205
|
+
role: r.role,
|
|
1206
|
+
name: r.name,
|
|
1207
|
+
tag: r.tag,
|
|
1208
|
+
inputType: r.type || "",
|
|
1209
|
+
box: {
|
|
1210
|
+
x: Math.round(r.box.x + iframeOffset.x),
|
|
1211
|
+
y: Math.round(r.box.y + iframeOffset.y),
|
|
1212
|
+
width: r.box.width,
|
|
1213
|
+
height: r.box.height
|
|
1214
|
+
}
|
|
1215
|
+
});
|
|
1216
|
+
this.frameContexts.set(r.id, contextId);
|
|
1217
|
+
}
|
|
1218
|
+
} catch {
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
} catch {
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
/**
|
|
1225
|
+
* Get execution context IDs for each frame in the page.
|
|
1226
|
+
* Uses Runtime.executionContextCreated events collected during the session,
|
|
1227
|
+
* or falls back to evaluating in known frames.
|
|
1228
|
+
*/
|
|
1229
|
+
async getFrameContexts(_mainFrameId) {
|
|
1230
|
+
const contexts = /* @__PURE__ */ new Map();
|
|
1231
|
+
try {
|
|
1232
|
+
await this.send("Runtime.enable").catch(() => {
|
|
1233
|
+
});
|
|
1234
|
+
const frameTree = await this.send("Page.getFrameTree");
|
|
1235
|
+
const childFrames = frameTree.frameTree?.childFrames || [];
|
|
1236
|
+
for (const child of childFrames) {
|
|
1237
|
+
try {
|
|
1238
|
+
const world = await this.send("Page.createIsolatedWorld", {
|
|
1239
|
+
frameId: child.frame.id,
|
|
1240
|
+
worldName: "assistme-snapshot",
|
|
1241
|
+
grantUniveralAccess: true
|
|
1242
|
+
});
|
|
1243
|
+
if (world.executionContextId) {
|
|
1244
|
+
contexts.set(child.frame.id, world.executionContextId);
|
|
1245
|
+
}
|
|
1246
|
+
} catch {
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
} catch {
|
|
1250
|
+
}
|
|
1251
|
+
return contexts;
|
|
1252
|
+
}
|
|
992
1253
|
// ── Ref Resolution ────────────────────────────────────────────────
|
|
993
1254
|
/**
|
|
994
1255
|
* Resolve a ref ID to its current center coordinates in the viewport.
|
|
@@ -1101,9 +1362,85 @@ Refs:
|
|
|
1101
1362
|
returnByValue: true
|
|
1102
1363
|
});
|
|
1103
1364
|
const value = result.result?.value;
|
|
1104
|
-
if (
|
|
1365
|
+
if (value && value !== "null") {
|
|
1366
|
+
try {
|
|
1367
|
+
return JSON.parse(value);
|
|
1368
|
+
} catch {
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1372
|
+
if (frameContextId) {
|
|
1373
|
+
return this.resolveRefInFrame(refId, frameContextId, role, name);
|
|
1374
|
+
}
|
|
1375
|
+
return null;
|
|
1376
|
+
}
|
|
1377
|
+
/**
|
|
1378
|
+
* Resolve a ref inside a cross-origin iframe using its execution context.
|
|
1379
|
+
* Returns coordinates adjusted by the iframe's viewport offset.
|
|
1380
|
+
*/
|
|
1381
|
+
async resolveRefInFrame(refId, contextId, role, name) {
|
|
1382
|
+
const roleJS = JSON.stringify(role);
|
|
1383
|
+
const nameJS = JSON.stringify(name);
|
|
1105
1384
|
try {
|
|
1106
|
-
|
|
1385
|
+
const offsetResult = await this.send("Runtime.evaluate", {
|
|
1386
|
+
expression: `
|
|
1387
|
+
(function() {
|
|
1388
|
+
var iframes = document.querySelectorAll('iframe');
|
|
1389
|
+
for (var i = 0; i < iframes.length; i++) {
|
|
1390
|
+
var r = iframes[i].getBoundingClientRect();
|
|
1391
|
+
if (r.width > 10 && r.height > 10) {
|
|
1392
|
+
return JSON.stringify({ x: r.x, y: r.y });
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
return JSON.stringify({ x: 0, y: 0 });
|
|
1396
|
+
})()
|
|
1397
|
+
`,
|
|
1398
|
+
returnByValue: true
|
|
1399
|
+
});
|
|
1400
|
+
const offset = JSON.parse(
|
|
1401
|
+
offsetResult.result?.value || '{"x":0,"y":0}'
|
|
1402
|
+
);
|
|
1403
|
+
const frameResult = await this.send("Runtime.evaluate", {
|
|
1404
|
+
expression: `
|
|
1405
|
+
(function() {
|
|
1406
|
+
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1407
|
+
if (!el && ${roleJS} && ${nameJS}) {
|
|
1408
|
+
// Fallback: search by role
|
|
1409
|
+
var candidates = document.querySelectorAll('*');
|
|
1410
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
1411
|
+
var c = candidates[i];
|
|
1412
|
+
if (c.isContentEditable || c.getAttribute('contenteditable') === 'true') {
|
|
1413
|
+
el = c; break;
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
if (!el) return 'null';
|
|
1418
|
+
|
|
1419
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
1420
|
+
var r = el.getBoundingClientRect();
|
|
1421
|
+
if (r.width < 1 || r.height < 1) return JSON.stringify({ error: 'Zero size' });
|
|
1422
|
+
|
|
1423
|
+
return JSON.stringify({
|
|
1424
|
+
x: r.x + r.width / 2,
|
|
1425
|
+
y: r.y + r.height / 2,
|
|
1426
|
+
width: r.width,
|
|
1427
|
+
height: r.height
|
|
1428
|
+
});
|
|
1429
|
+
})()
|
|
1430
|
+
`,
|
|
1431
|
+
contextId,
|
|
1432
|
+
returnByValue: true
|
|
1433
|
+
});
|
|
1434
|
+
const value = frameResult.result?.value;
|
|
1435
|
+
if (!value || value === "null") return null;
|
|
1436
|
+
const parsed = JSON.parse(value);
|
|
1437
|
+
if (parsed.error) return parsed;
|
|
1438
|
+
return {
|
|
1439
|
+
x: parsed.x + offset.x,
|
|
1440
|
+
y: parsed.y + offset.y,
|
|
1441
|
+
width: parsed.width,
|
|
1442
|
+
height: parsed.height
|
|
1443
|
+
};
|
|
1107
1444
|
} catch {
|
|
1108
1445
|
return null;
|
|
1109
1446
|
}
|
|
@@ -1190,11 +1527,23 @@ Refs:
|
|
|
1190
1527
|
await new Promise((r) => setTimeout(r, 50));
|
|
1191
1528
|
await this.pressKey("Backspace");
|
|
1192
1529
|
await new Promise((r) => setTimeout(r, 50));
|
|
1193
|
-
const
|
|
1530
|
+
const frameContextId = this.frameContexts.get(refId);
|
|
1531
|
+
const clearEvalOpts = {
|
|
1194
1532
|
expression: `
|
|
1195
1533
|
(function() {
|
|
1196
1534
|
var el = document.querySelector('[data-assistme-ref="${refId}"]');
|
|
1197
1535
|
if (!el) return 'no_element';
|
|
1536
|
+
|
|
1537
|
+
// For contenteditable elements, check textContent instead of value
|
|
1538
|
+
if (el.isContentEditable || el.getAttribute('contenteditable') === 'true') {
|
|
1539
|
+
if (el.textContent && el.textContent.trim() !== '') {
|
|
1540
|
+
el.textContent = '';
|
|
1541
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1542
|
+
return 'js_cleared';
|
|
1543
|
+
}
|
|
1544
|
+
return 'ok';
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1198
1547
|
if (el.value !== undefined && el.value !== '') {
|
|
1199
1548
|
// Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
|
|
1200
1549
|
var setter = Object.getOwnPropertyDescriptor(
|
|
@@ -1212,9 +1561,13 @@ Refs:
|
|
|
1212
1561
|
})()
|
|
1213
1562
|
`,
|
|
1214
1563
|
returnByValue: true
|
|
1215
|
-
}
|
|
1564
|
+
};
|
|
1565
|
+
if (frameContextId) {
|
|
1566
|
+
clearEvalOpts.contextId = frameContextId;
|
|
1567
|
+
}
|
|
1568
|
+
const cleared = await this.send("Runtime.evaluate", clearEvalOpts);
|
|
1216
1569
|
const clearStatus = cleared.result?.value || "ok";
|
|
1217
|
-
if (clearStatus === "no_element") {
|
|
1570
|
+
if (clearStatus === "no_element" && !frameContextId) {
|
|
1218
1571
|
return {
|
|
1219
1572
|
success: false,
|
|
1220
1573
|
message: `Ref ${refLabel} not found after click. Take a new snapshot.`
|
|
@@ -3797,7 +4150,7 @@ function createBrowserMcpServer() {
|
|
|
3797
4150
|
),
|
|
3798
4151
|
tool(
|
|
3799
4152
|
"browser_type",
|
|
3800
|
-
"Type text into an input field in the user's browser.",
|
|
4153
|
+
"Type text into an input field in the user's browser. If the CSS selector fails, automatically falls back to typing into the currently focused element. Works with contenteditable elements (rich text editors) and cross-origin iframes.",
|
|
3801
4154
|
{
|
|
3802
4155
|
selector: z.string().describe("CSS selector of the input element"),
|
|
3803
4156
|
text: z.string().describe("Text to type")
|
|
@@ -4183,7 +4536,7 @@ function getCredentialStore() {
|
|
|
4183
4536
|
|
|
4184
4537
|
// src/mcp/agent-tools-server.ts
|
|
4185
4538
|
function createAgentToolsServer(deps) {
|
|
4186
|
-
const { memoryManager, skillManager, taskId, sessionId } = deps;
|
|
4539
|
+
const { memoryManager, skillManager, taskId, sessionId, onUserWaitStart, onUserWaitEnd } = deps;
|
|
4187
4540
|
return createSdkMcpServer2({
|
|
4188
4541
|
name: "assistme-agent",
|
|
4189
4542
|
version: "1.0.0",
|
|
@@ -4688,52 +5041,56 @@ Use \`ask_user\` to request these from the user, or create them yourself (e.g. r
|
|
|
4688
5041
|
try {
|
|
4689
5042
|
await setActionRequest(taskId, actionData);
|
|
4690
5043
|
log.info(`Ask user ${actionId}: "${args.question.slice(0, 80)}..."`);
|
|
4691
|
-
emitEvent(taskId, "user_action_request", actionData)
|
|
4692
|
-
|
|
4693
|
-
emitEvent(taskId, "status_change", {
|
|
5044
|
+
await emitEvent(taskId, "user_action_request", actionData);
|
|
5045
|
+
await emitEvent(taskId, "status_change", {
|
|
4694
5046
|
status: "waiting_for_user",
|
|
4695
5047
|
message: args.question
|
|
4696
|
-
}).catch(() => {
|
|
4697
5048
|
});
|
|
5049
|
+
onUserWaitStart?.();
|
|
4698
5050
|
const startTime = Date.now();
|
|
4699
5051
|
const pollInterval = 2e3;
|
|
4700
|
-
|
|
4701
|
-
|
|
4702
|
-
|
|
4703
|
-
|
|
4704
|
-
|
|
4705
|
-
|
|
4706
|
-
|
|
4707
|
-
|
|
4708
|
-
|
|
4709
|
-
|
|
4710
|
-
|
|
4711
|
-
|
|
4712
|
-
|
|
4713
|
-
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
5052
|
+
try {
|
|
5053
|
+
while (Date.now() - startTime < timeout) {
|
|
5054
|
+
const response = await pollActionResponse(taskId);
|
|
5055
|
+
if (response && (!response.action_id || response.action_id === actionId)) {
|
|
5056
|
+
const actionKey = response.action_key || "";
|
|
5057
|
+
const text = response.text || "";
|
|
5058
|
+
const label = response.label || actionKey || text;
|
|
5059
|
+
log.info(`User responded: "${label}"`);
|
|
5060
|
+
return {
|
|
5061
|
+
content: [
|
|
5062
|
+
{
|
|
5063
|
+
type: "text",
|
|
5064
|
+
text: JSON.stringify({
|
|
5065
|
+
status: "responded",
|
|
5066
|
+
action_key: actionKey || "custom_input",
|
|
5067
|
+
label,
|
|
5068
|
+
text: text || label
|
|
5069
|
+
})
|
|
5070
|
+
}
|
|
5071
|
+
]
|
|
5072
|
+
};
|
|
5073
|
+
}
|
|
5074
|
+
await new Promise((resolve2) => setTimeout(resolve2, pollInterval));
|
|
4720
5075
|
}
|
|
4721
|
-
|
|
5076
|
+
log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
|
|
5077
|
+
return {
|
|
5078
|
+
content: [
|
|
5079
|
+
{
|
|
5080
|
+
type: "text",
|
|
5081
|
+
text: JSON.stringify({
|
|
5082
|
+
status: "timeout",
|
|
5083
|
+
message: "User did not respond within the timeout period. Continue the task with a reasonable default or skip the step that required user input."
|
|
5084
|
+
})
|
|
5085
|
+
}
|
|
5086
|
+
]
|
|
5087
|
+
};
|
|
5088
|
+
} finally {
|
|
5089
|
+
onUserWaitEnd?.();
|
|
4722
5090
|
}
|
|
4723
|
-
log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
|
|
4724
|
-
return {
|
|
4725
|
-
content: [
|
|
4726
|
-
{
|
|
4727
|
-
type: "text",
|
|
4728
|
-
text: JSON.stringify({
|
|
4729
|
-
status: "timeout",
|
|
4730
|
-
message: "User did not respond within the timeout period."
|
|
4731
|
-
})
|
|
4732
|
-
}
|
|
4733
|
-
]
|
|
4734
|
-
};
|
|
4735
5091
|
} catch (err) {
|
|
4736
5092
|
log.error(`ask_user failed: ${err}`);
|
|
5093
|
+
onUserWaitEnd?.();
|
|
4737
5094
|
return {
|
|
4738
5095
|
content: [
|
|
4739
5096
|
{
|
|
@@ -5167,12 +5524,16 @@ Available capabilities:
|
|
|
5167
5524
|
- Bash tool for shell commands
|
|
5168
5525
|
- Glob and Grep for file search
|
|
5169
5526
|
|
|
5170
|
-
3. MEMORY:
|
|
5527
|
+
3. MEMORY & CREDENTIALS:
|
|
5171
5528
|
- You can remember things about the user using memory_store
|
|
5172
5529
|
- Use this when you learn preferences, important facts, or standing instructions
|
|
5173
5530
|
- Your stored memories persist across conversations
|
|
5174
5531
|
- PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
|
|
5175
5532
|
- Before completing a task, consider if anything learned should be remembered for future conversations
|
|
5533
|
+
- CRITICAL \u2014 Credential Storage: When you create, register, or receive any account credentials (username, password, API keys, tokens), you MUST use credential_set to save them locally. NEVER use memory_store for credentials \u2014 memory_store is for preferences and facts, credential_set is for secrets. Examples:
|
|
5534
|
+
* After registering a new email/account \u2192 credential_set with type "login" and data { "username": "...", "password": "...", "email": "..." }
|
|
5535
|
+
* After generating an API key \u2192 credential_set with type "api_key" and data { "api_key": "..." }
|
|
5536
|
+
* Credentials saved via credential_set are encrypted on disk and viewable in the desktop app's Credentials panel
|
|
5176
5537
|
|
|
5177
5538
|
4. SKILL-AWARE EXECUTION (CRITICAL \u2014 follow this for EVERY task):
|
|
5178
5539
|
Step A \u2014 Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
|
|
@@ -5252,6 +5613,42 @@ CRITICAL \u2014 Ask before you guess:
|
|
|
5252
5613
|
Workspace path: {workspace_path}`;
|
|
5253
5614
|
|
|
5254
5615
|
// src/agent/processor.ts
|
|
5616
|
+
var TaskTimeout = class {
|
|
5617
|
+
constructor(abortController, timeoutMs) {
|
|
5618
|
+
this.abortController = abortController;
|
|
5619
|
+
this.remainingMs = timeoutMs;
|
|
5620
|
+
this.resumedAt = Date.now();
|
|
5621
|
+
this.schedule();
|
|
5622
|
+
}
|
|
5623
|
+
timeoutId = null;
|
|
5624
|
+
remainingMs;
|
|
5625
|
+
resumedAt;
|
|
5626
|
+
schedule() {
|
|
5627
|
+
this.timeoutId = setTimeout(() => {
|
|
5628
|
+
this.abortController.abort();
|
|
5629
|
+
}, this.remainingMs);
|
|
5630
|
+
}
|
|
5631
|
+
/** Pause the timeout (e.g. while waiting for user). */
|
|
5632
|
+
pause() {
|
|
5633
|
+
if (this.timeoutId) {
|
|
5634
|
+
clearTimeout(this.timeoutId);
|
|
5635
|
+
this.timeoutId = null;
|
|
5636
|
+
const elapsed = Date.now() - this.resumedAt;
|
|
5637
|
+
this.remainingMs = Math.max(0, this.remainingMs - elapsed);
|
|
5638
|
+
}
|
|
5639
|
+
}
|
|
5640
|
+
/** Resume the timeout after user interaction completes. */
|
|
5641
|
+
resume() {
|
|
5642
|
+
this.resumedAt = Date.now();
|
|
5643
|
+
this.schedule();
|
|
5644
|
+
}
|
|
5645
|
+
clear() {
|
|
5646
|
+
if (this.timeoutId) {
|
|
5647
|
+
clearTimeout(this.timeoutId);
|
|
5648
|
+
this.timeoutId = null;
|
|
5649
|
+
}
|
|
5650
|
+
}
|
|
5651
|
+
};
|
|
5255
5652
|
var MAX_HISTORY_ENTRIES = 10;
|
|
5256
5653
|
var MAX_RESPONSE_LENGTH = 1500;
|
|
5257
5654
|
var TaskProcessor = class {
|
|
@@ -5334,12 +5731,16 @@ var TaskProcessor = class {
|
|
|
5334
5731
|
}
|
|
5335
5732
|
systemPrompt += historyPrompt;
|
|
5336
5733
|
}
|
|
5734
|
+
const abortController = new AbortController();
|
|
5735
|
+
const taskTimeout = new TaskTimeout(abortController, taskTimeoutMs);
|
|
5337
5736
|
const browserServer = createBrowserMcpServer();
|
|
5338
5737
|
const agentToolsServer = createAgentToolsServer({
|
|
5339
5738
|
memoryManager: this.memoryManager,
|
|
5340
5739
|
skillManager: this.skillManager,
|
|
5341
5740
|
taskId: task.id,
|
|
5342
|
-
sessionId: this.sessionId || void 0
|
|
5741
|
+
sessionId: this.sessionId || void 0,
|
|
5742
|
+
onUserWaitStart: () => taskTimeout.pause(),
|
|
5743
|
+
onUserWaitEnd: () => taskTimeout.resume()
|
|
5343
5744
|
});
|
|
5344
5745
|
const eventHooks = createEventHooks(task.id, toolCallRecords);
|
|
5345
5746
|
const allowedTools = [
|
|
@@ -5386,7 +5787,6 @@ var TaskProcessor = class {
|
|
|
5386
5787
|
session_id: ""
|
|
5387
5788
|
};
|
|
5388
5789
|
}
|
|
5389
|
-
const abortController = new AbortController();
|
|
5390
5790
|
const options = {
|
|
5391
5791
|
model: config.model,
|
|
5392
5792
|
systemPrompt,
|
|
@@ -5404,9 +5804,6 @@ var TaskProcessor = class {
|
|
|
5404
5804
|
abortController
|
|
5405
5805
|
};
|
|
5406
5806
|
const taskStartTime = Date.now();
|
|
5407
|
-
const timeoutId = setTimeout(() => {
|
|
5408
|
-
abortController.abort();
|
|
5409
|
-
}, taskTimeoutMs);
|
|
5410
5807
|
try {
|
|
5411
5808
|
for await (const message of query2({
|
|
5412
5809
|
prompt: promptMessages(),
|
|
@@ -5468,7 +5865,7 @@ var TaskProcessor = class {
|
|
|
5468
5865
|
}
|
|
5469
5866
|
}
|
|
5470
5867
|
} finally {
|
|
5471
|
-
|
|
5868
|
+
taskTimeout.clear();
|
|
5472
5869
|
}
|
|
5473
5870
|
const MAX_CONTENT_LENGTH = 5e4;
|
|
5474
5871
|
const truncatedResponse = finalResponse.length > MAX_CONTENT_LENGTH ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]" : finalResponse;
|