assistme 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -171,22 +171,36 @@ async function getConversationHistory(conversationId, excludeMessageId, limit =
171
171
  }
172
172
 
173
173
  // src/db/event.ts
174
+ var MAX_EMIT_RETRIES = 2;
175
+ var EMIT_RETRY_DELAY_MS = 500;
176
+ async function emitWithRetry(messageId, eventType, eventData, seq) {
177
+ for (let attempt = 0; attempt <= MAX_EMIT_RETRIES; attempt++) {
178
+ try {
179
+ await callMcpHandler("event.emit", {
180
+ message_id: messageId,
181
+ event_type: eventType,
182
+ event_data: eventData,
183
+ seq
184
+ });
185
+ return;
186
+ } catch (err) {
187
+ if (attempt < MAX_EMIT_RETRIES) {
188
+ await new Promise((r) => setTimeout(r, EMIT_RETRY_DELAY_MS * (attempt + 1)));
189
+ } else {
190
+ log.warn(
191
+ `Failed to emit event after ${MAX_EMIT_RETRIES + 1} attempts: ${err instanceof Error ? err.message : err}`
192
+ );
193
+ }
194
+ }
195
+ }
196
+ }
174
197
  var eventSequence = 0;
175
198
  function resetEventSequence() {
176
199
  eventSequence = 0;
177
200
  }
178
201
  async function emitEvent(messageId, eventType, eventData) {
179
202
  eventSequence++;
180
- try {
181
- await callMcpHandler("event.emit", {
182
- message_id: messageId,
183
- event_type: eventType,
184
- event_data: eventData,
185
- seq: eventSequence
186
- });
187
- } catch (err) {
188
- log.warn(`Failed to emit event: ${err instanceof Error ? err.message : err}`);
189
- }
203
+ await emitWithRetry(messageId, eventType, eventData, eventSequence);
190
204
  }
191
205
 
192
206
  // src/db/action.ts
@@ -346,6 +360,8 @@ var BrowserController = class {
346
360
  connected = false;
347
361
  currentTabId = null;
348
362
  refCache = /* @__PURE__ */ new Map();
363
+ frameContexts = /* @__PURE__ */ new Map();
364
+ // refId → contextId
349
365
  constructor(port = 9222) {
350
366
  this.debugPort = port;
351
367
  }
@@ -639,13 +655,46 @@ URL: ${info.url}`;
639
655
  const result = await this.send("Runtime.evaluate", {
640
656
  expression: `
641
657
  (function() {
642
- const el = document.querySelector(${selectorJS});
658
+ var el = document.querySelector(${selectorJS});
659
+
660
+ // If not found in main document, search same-origin iframes
661
+ if (!el) {
662
+ var iframes = document.querySelectorAll('iframe');
663
+ for (var i = 0; i < iframes.length; i++) {
664
+ try {
665
+ var iframeDoc = iframes[i].contentDocument;
666
+ if (iframeDoc) {
667
+ el = iframeDoc.querySelector(${selectorJS});
668
+ if (el) break;
669
+ }
670
+ } catch(e) { /* cross-origin, skip */ }
671
+ }
672
+ }
673
+
643
674
  if (!el) return 'Element not found: ' + ${selectorJS};
644
675
 
645
676
  el.focus();
646
677
 
647
- // Clear existing value
648
- const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
678
+ // Check if this is a contenteditable element (rich text editor)
679
+ var isContentEditable = el.isContentEditable ||
680
+ el.getAttribute('contenteditable') === 'true' ||
681
+ el.getAttribute('contenteditable') === '';
682
+
683
+ if (isContentEditable) {
684
+ // For contenteditable: select all content, then replace
685
+ var ownerDoc = el.ownerDocument;
686
+ var sel = ownerDoc.defaultView.getSelection();
687
+ var range = ownerDoc.createRange();
688
+ range.selectNodeContents(el);
689
+ sel.removeAllRanges();
690
+ sel.addRange(range);
691
+ // Use insertText command which respects undo stack and triggers input events
692
+ ownerDoc.execCommand('insertText', false, ${textJS});
693
+ return 'Typed into: ' + (el.tagName || '') + ' [contenteditable]';
694
+ }
695
+
696
+ // For input/textarea: clear and set value
697
+ var nativeInputValueSetter = Object.getOwnPropertyDescriptor(
649
698
  window.HTMLInputElement.prototype, 'value'
650
699
  )?.set || Object.getOwnPropertyDescriptor(
651
700
  window.HTMLTextAreaElement.prototype, 'value'
@@ -665,7 +714,27 @@ URL: ${info.url}`;
665
714
  `,
666
715
  returnByValue: true
667
716
  });
668
- return result.result?.value || "Text entered.";
717
+ const textResult = result.result?.value || "";
718
+ if (textResult.startsWith("Element not found")) {
719
+ return this.typeAtFocus(text);
720
+ }
721
+ return textResult || "Text entered.";
722
+ }
723
+ /**
724
+ * Type text into the currently focused element using CDP Input.insertText.
725
+ * This bypasses DOM queries entirely and works with any focused element,
726
+ * including those inside cross-origin iframes or shadow DOM.
727
+ */
728
+ async typeAtFocus(text) {
729
+ this.ensureConnected();
730
+ const modKey = platform() === "darwin" ? "Meta" : "Control";
731
+ await this.pressKey(`${modKey}+a`);
732
+ await new Promise((r) => setTimeout(r, 50));
733
+ await this.pressKey("Backspace");
734
+ await new Promise((r) => setTimeout(r, 50));
735
+ await this.send("Input.insertText", { text });
736
+ await new Promise((r) => setTimeout(r, 100));
737
+ return "Text entered (into focused element).";
669
738
  }
670
739
  async pressKey(key) {
671
740
  this.ensureConnected();
@@ -915,6 +984,7 @@ URL: ${info.url}`;
915
984
  inputType: r.type || "",
916
985
  box: r.box
917
986
  }));
987
+ await this.discoverCrossOriginFrameRefs(refs);
918
988
  if (annotate && refs.length <= 40) {
919
989
  const refsJson = JSON.stringify(refs);
920
990
  await this.send("Runtime.evaluate", {
@@ -989,6 +1059,197 @@ Refs:
989
1059
  }
990
1060
  return table;
991
1061
  }
1062
+ // ── Cross-Origin Iframe Discovery ────────────────────────────────
1063
+ /**
1064
+ * Use CDP's Page.getFrameTree + Runtime.evaluate with contextId to discover
1065
+ * interactive elements inside cross-origin iframes (e.g., ProtonMail editor,
1066
+ * Google Docs, embedded rich text editors).
1067
+ *
1068
+ * Same-origin iframes are already handled inline by the main snapshot JS.
1069
+ * This method handles the ones that threw cross-origin errors.
1070
+ */
1071
+ async discoverCrossOriginFrameRefs(refs) {
1072
+ this.frameContexts.clear();
1073
+ try {
1074
+ const frameTree = await this.send("Page.getFrameTree");
1075
+ const mainFrameId = frameTree.frameTree?.frame?.id;
1076
+ const childFrames = frameTree.frameTree?.childFrames || [];
1077
+ if (childFrames.length === 0) return;
1078
+ const contexts = await this.getFrameContexts(mainFrameId || "");
1079
+ for (const child of childFrames) {
1080
+ const frameId = child.frame.id;
1081
+ const contextId = contexts.get(frameId);
1082
+ if (!contextId) continue;
1083
+ const iframeOffsetResult = await this.send("Runtime.evaluate", {
1084
+ expression: `
1085
+ (function() {
1086
+ var iframes = document.querySelectorAll('iframe');
1087
+ for (var i = 0; i < iframes.length; i++) {
1088
+ try {
1089
+ // Match by frame src or name
1090
+ var f = iframes[i];
1091
+ if (f.contentWindow) {
1092
+ var r = f.getBoundingClientRect();
1093
+ if (r.width > 10 && r.height > 10) {
1094
+ return JSON.stringify({ x: r.x, y: r.y, width: r.width, height: r.height, index: i });
1095
+ }
1096
+ }
1097
+ } catch(e) {}
1098
+ }
1099
+ return 'null';
1100
+ })()
1101
+ `,
1102
+ returnByValue: true
1103
+ });
1104
+ let iframeOffset = { x: 0, y: 0 };
1105
+ try {
1106
+ const parsed = JSON.parse(
1107
+ iframeOffsetResult.result?.value || "null"
1108
+ );
1109
+ if (parsed) iframeOffset = { x: parsed.x, y: parsed.y };
1110
+ } catch {
1111
+ }
1112
+ const startRefId = refs.length + 1;
1113
+ try {
1114
+ const frameResult = await this.send("Runtime.evaluate", {
1115
+ expression: `
1116
+ (function() {
1117
+ var selectors = [
1118
+ 'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
1119
+ '[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
1120
+ '[role="combobox"]', '[role="listbox"]', '[role="menuitem"]', '[role="tab"]',
1121
+ '[role="switch"]', '[role="slider"]', '[role="option"]', '[role="searchbox"]',
1122
+ '[onclick]', '[tabindex]:not([tabindex="-1"])',
1123
+ '[contenteditable="true"]', '[contenteditable=""]'
1124
+ ].join(', ');
1125
+
1126
+ var all = document.querySelectorAll(selectors);
1127
+ // Also check if the body itself is contenteditable
1128
+ if (document.body && (document.body.isContentEditable || document.body.getAttribute('contenteditable') === 'true')) {
1129
+ all = [document.body].concat(Array.from(all));
1130
+ }
1131
+
1132
+ var refs = [];
1133
+ var startId = ${startRefId};
1134
+ var vh = window.innerHeight;
1135
+ var vw = window.innerWidth;
1136
+
1137
+ for (var i = 0; i < all.length && refs.length < 20; i++) {
1138
+ var el = all[i];
1139
+ var rect = el.getBoundingClientRect();
1140
+ if (rect.width < 5 || rect.height < 5) continue;
1141
+ var style = window.getComputedStyle(el);
1142
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
1143
+
1144
+ var role = el.getAttribute('role') || '';
1145
+ if (!role) {
1146
+ var tag = el.tagName.toLowerCase();
1147
+ if (tag === 'a') role = 'link';
1148
+ else if (tag === 'button') role = 'button';
1149
+ else if (tag === 'input') {
1150
+ var t = (el.type || 'text').toLowerCase();
1151
+ if (t === 'checkbox') role = 'checkbox';
1152
+ else if (t === 'radio') role = 'radio';
1153
+ else if (t === 'submit' || t === 'button') role = 'button';
1154
+ else role = 'textbox';
1155
+ }
1156
+ else if (tag === 'select') role = 'combobox';
1157
+ else if (tag === 'textarea') role = 'textbox';
1158
+ else if (el.isContentEditable) role = 'textbox';
1159
+ else role = tag;
1160
+ }
1161
+
1162
+ var name = '';
1163
+ var ariaLabel = el.getAttribute('aria-label');
1164
+ if (ariaLabel) {
1165
+ name = ariaLabel;
1166
+ } else if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
1167
+ name = el.getAttribute('placeholder') || el.getAttribute('name') || '';
1168
+ } else if (el.isContentEditable) {
1169
+ name = 'compose body';
1170
+ } else {
1171
+ name = (el.textContent || '').trim().slice(0, 60);
1172
+ }
1173
+
1174
+ var refId = startId + refs.length;
1175
+ el.setAttribute('data-assistme-ref', String(refId));
1176
+
1177
+ refs.push({
1178
+ id: refId,
1179
+ role: role,
1180
+ name: name,
1181
+ tag: el.tagName.toLowerCase(),
1182
+ type: el.getAttribute('type') || '',
1183
+ box: {
1184
+ x: Math.round(rect.x),
1185
+ y: Math.round(rect.y),
1186
+ width: Math.round(rect.width),
1187
+ height: Math.round(rect.height)
1188
+ },
1189
+ inFrame: true
1190
+ });
1191
+ }
1192
+
1193
+ return JSON.stringify(refs);
1194
+ })()
1195
+ `,
1196
+ contextId,
1197
+ returnByValue: true
1198
+ });
1199
+ const frameRefs = JSON.parse(
1200
+ frameResult.result?.value || "[]"
1201
+ );
1202
+ for (const r of frameRefs) {
1203
+ refs.push({
1204
+ id: r.id,
1205
+ role: r.role,
1206
+ name: r.name,
1207
+ tag: r.tag,
1208
+ inputType: r.type || "",
1209
+ box: {
1210
+ x: Math.round(r.box.x + iframeOffset.x),
1211
+ y: Math.round(r.box.y + iframeOffset.y),
1212
+ width: r.box.width,
1213
+ height: r.box.height
1214
+ }
1215
+ });
1216
+ this.frameContexts.set(r.id, contextId);
1217
+ }
1218
+ } catch {
1219
+ }
1220
+ }
1221
+ } catch {
1222
+ }
1223
+ }
1224
+ /**
1225
+ * Get execution context IDs for each frame in the page.
1226
+ * Uses Runtime.executionContextCreated events collected during the session,
1227
+ * or falls back to evaluating in known frames.
1228
+ */
1229
+ async getFrameContexts(_mainFrameId) {
1230
+ const contexts = /* @__PURE__ */ new Map();
1231
+ try {
1232
+ await this.send("Runtime.enable").catch(() => {
1233
+ });
1234
+ const frameTree = await this.send("Page.getFrameTree");
1235
+ const childFrames = frameTree.frameTree?.childFrames || [];
1236
+ for (const child of childFrames) {
1237
+ try {
1238
+ const world = await this.send("Page.createIsolatedWorld", {
1239
+ frameId: child.frame.id,
1240
+ worldName: "assistme-snapshot",
1241
+ grantUniveralAccess: true
1242
+ });
1243
+ if (world.executionContextId) {
1244
+ contexts.set(child.frame.id, world.executionContextId);
1245
+ }
1246
+ } catch {
1247
+ }
1248
+ }
1249
+ } catch {
1250
+ }
1251
+ return contexts;
1252
+ }
992
1253
  // ── Ref Resolution ────────────────────────────────────────────────
993
1254
  /**
994
1255
  * Resolve a ref ID to its current center coordinates in the viewport.
@@ -1101,9 +1362,85 @@ Refs:
1101
1362
  returnByValue: true
1102
1363
  });
1103
1364
  const value = result.result?.value;
1104
- if (!value || value === "null") return null;
1365
+ if (value && value !== "null") {
1366
+ try {
1367
+ return JSON.parse(value);
1368
+ } catch {
1369
+ }
1370
+ }
1371
+ const frameContextId = this.frameContexts.get(refId);
1372
+ if (frameContextId) {
1373
+ return this.resolveRefInFrame(refId, frameContextId, role, name);
1374
+ }
1375
+ return null;
1376
+ }
1377
+ /**
1378
+ * Resolve a ref inside a cross-origin iframe using its execution context.
1379
+ * Returns coordinates adjusted by the iframe's viewport offset.
1380
+ */
1381
+ async resolveRefInFrame(refId, contextId, role, name) {
1382
+ const roleJS = JSON.stringify(role);
1383
+ const nameJS = JSON.stringify(name);
1105
1384
  try {
1106
- return JSON.parse(value);
1385
+ const offsetResult = await this.send("Runtime.evaluate", {
1386
+ expression: `
1387
+ (function() {
1388
+ var iframes = document.querySelectorAll('iframe');
1389
+ for (var i = 0; i < iframes.length; i++) {
1390
+ var r = iframes[i].getBoundingClientRect();
1391
+ if (r.width > 10 && r.height > 10) {
1392
+ return JSON.stringify({ x: r.x, y: r.y });
1393
+ }
1394
+ }
1395
+ return JSON.stringify({ x: 0, y: 0 });
1396
+ })()
1397
+ `,
1398
+ returnByValue: true
1399
+ });
1400
+ const offset = JSON.parse(
1401
+ offsetResult.result?.value || '{"x":0,"y":0}'
1402
+ );
1403
+ const frameResult = await this.send("Runtime.evaluate", {
1404
+ expression: `
1405
+ (function() {
1406
+ var el = document.querySelector('[data-assistme-ref="${refId}"]');
1407
+ if (!el && ${roleJS} && ${nameJS}) {
1408
+ // Fallback: search by role
1409
+ var candidates = document.querySelectorAll('*');
1410
+ for (var i = 0; i < candidates.length; i++) {
1411
+ var c = candidates[i];
1412
+ if (c.isContentEditable || c.getAttribute('contenteditable') === 'true') {
1413
+ el = c; break;
1414
+ }
1415
+ }
1416
+ }
1417
+ if (!el) return 'null';
1418
+
1419
+ el.scrollIntoView({ block: 'center', behavior: 'instant' });
1420
+ var r = el.getBoundingClientRect();
1421
+ if (r.width < 1 || r.height < 1) return JSON.stringify({ error: 'Zero size' });
1422
+
1423
+ return JSON.stringify({
1424
+ x: r.x + r.width / 2,
1425
+ y: r.y + r.height / 2,
1426
+ width: r.width,
1427
+ height: r.height
1428
+ });
1429
+ })()
1430
+ `,
1431
+ contextId,
1432
+ returnByValue: true
1433
+ });
1434
+ const value = frameResult.result?.value;
1435
+ if (!value || value === "null") return null;
1436
+ const parsed = JSON.parse(value);
1437
+ if (parsed.error) return parsed;
1438
+ return {
1439
+ x: parsed.x + offset.x,
1440
+ y: parsed.y + offset.y,
1441
+ width: parsed.width,
1442
+ height: parsed.height
1443
+ };
1107
1444
  } catch {
1108
1445
  return null;
1109
1446
  }
@@ -1190,11 +1527,23 @@ Refs:
1190
1527
  await new Promise((r) => setTimeout(r, 50));
1191
1528
  await this.pressKey("Backspace");
1192
1529
  await new Promise((r) => setTimeout(r, 50));
1193
- const cleared = await this.send("Runtime.evaluate", {
1530
+ const frameContextId = this.frameContexts.get(refId);
1531
+ const clearEvalOpts = {
1194
1532
  expression: `
1195
1533
  (function() {
1196
1534
  var el = document.querySelector('[data-assistme-ref="${refId}"]');
1197
1535
  if (!el) return 'no_element';
1536
+
1537
+ // For contenteditable elements, check textContent instead of value
1538
+ if (el.isContentEditable || el.getAttribute('contenteditable') === 'true') {
1539
+ if (el.textContent && el.textContent.trim() !== '') {
1540
+ el.textContent = '';
1541
+ el.dispatchEvent(new Event('input', { bubbles: true }));
1542
+ return 'js_cleared';
1543
+ }
1544
+ return 'ok';
1545
+ }
1546
+
1198
1547
  if (el.value !== undefined && el.value !== '') {
1199
1548
  // Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
1200
1549
  var setter = Object.getOwnPropertyDescriptor(
@@ -1212,9 +1561,13 @@ Refs:
1212
1561
  })()
1213
1562
  `,
1214
1563
  returnByValue: true
1215
- });
1564
+ };
1565
+ if (frameContextId) {
1566
+ clearEvalOpts.contextId = frameContextId;
1567
+ }
1568
+ const cleared = await this.send("Runtime.evaluate", clearEvalOpts);
1216
1569
  const clearStatus = cleared.result?.value || "ok";
1217
- if (clearStatus === "no_element") {
1570
+ if (clearStatus === "no_element" && !frameContextId) {
1218
1571
  return {
1219
1572
  success: false,
1220
1573
  message: `Ref ${refLabel} not found after click. Take a new snapshot.`
@@ -3797,7 +4150,7 @@ function createBrowserMcpServer() {
3797
4150
  ),
3798
4151
  tool(
3799
4152
  "browser_type",
3800
- "Type text into an input field in the user's browser.",
4153
+ "Type text into an input field in the user's browser. If the CSS selector fails, automatically falls back to typing into the currently focused element. Works with contenteditable elements (rich text editors) and cross-origin iframes.",
3801
4154
  {
3802
4155
  selector: z.string().describe("CSS selector of the input element"),
3803
4156
  text: z.string().describe("Text to type")
@@ -4183,7 +4536,7 @@ function getCredentialStore() {
4183
4536
 
4184
4537
  // src/mcp/agent-tools-server.ts
4185
4538
  function createAgentToolsServer(deps) {
4186
- const { memoryManager, skillManager, taskId, sessionId } = deps;
4539
+ const { memoryManager, skillManager, taskId, sessionId, onUserWaitStart, onUserWaitEnd } = deps;
4187
4540
  return createSdkMcpServer2({
4188
4541
  name: "assistme-agent",
4189
4542
  version: "1.0.0",
@@ -4688,52 +5041,56 @@ Use \`ask_user\` to request these from the user, or create them yourself (e.g. r
4688
5041
  try {
4689
5042
  await setActionRequest(taskId, actionData);
4690
5043
  log.info(`Ask user ${actionId}: "${args.question.slice(0, 80)}..."`);
4691
- emitEvent(taskId, "user_action_request", actionData).catch(() => {
4692
- });
4693
- emitEvent(taskId, "status_change", {
5044
+ await emitEvent(taskId, "user_action_request", actionData);
5045
+ await emitEvent(taskId, "status_change", {
4694
5046
  status: "waiting_for_user",
4695
5047
  message: args.question
4696
- }).catch(() => {
4697
5048
  });
5049
+ onUserWaitStart?.();
4698
5050
  const startTime = Date.now();
4699
5051
  const pollInterval = 2e3;
4700
- while (Date.now() - startTime < timeout) {
4701
- const response = await pollActionResponse(taskId);
4702
- if (response && (!response.action_id || response.action_id === actionId)) {
4703
- const actionKey = response.action_key || "";
4704
- const text = response.text || "";
4705
- const label = response.label || actionKey || text;
4706
- log.info(`User responded: "${label}"`);
4707
- return {
4708
- content: [
4709
- {
4710
- type: "text",
4711
- text: JSON.stringify({
4712
- status: "responded",
4713
- action_key: actionKey || "custom_input",
4714
- label,
4715
- text: text || label
4716
- })
4717
- }
4718
- ]
4719
- };
5052
+ try {
5053
+ while (Date.now() - startTime < timeout) {
5054
+ const response = await pollActionResponse(taskId);
5055
+ if (response && (!response.action_id || response.action_id === actionId)) {
5056
+ const actionKey = response.action_key || "";
5057
+ const text = response.text || "";
5058
+ const label = response.label || actionKey || text;
5059
+ log.info(`User responded: "${label}"`);
5060
+ return {
5061
+ content: [
5062
+ {
5063
+ type: "text",
5064
+ text: JSON.stringify({
5065
+ status: "responded",
5066
+ action_key: actionKey || "custom_input",
5067
+ label,
5068
+ text: text || label
5069
+ })
5070
+ }
5071
+ ]
5072
+ };
5073
+ }
5074
+ await new Promise((resolve2) => setTimeout(resolve2, pollInterval));
4720
5075
  }
4721
- await new Promise((resolve2) => setTimeout(resolve2, pollInterval));
5076
+ log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
5077
+ return {
5078
+ content: [
5079
+ {
5080
+ type: "text",
5081
+ text: JSON.stringify({
5082
+ status: "timeout",
5083
+ message: "User did not respond within the timeout period. Continue the task with a reasonable default or skip the step that required user input."
5084
+ })
5085
+ }
5086
+ ]
5087
+ };
5088
+ } finally {
5089
+ onUserWaitEnd?.();
4722
5090
  }
4723
- log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
4724
- return {
4725
- content: [
4726
- {
4727
- type: "text",
4728
- text: JSON.stringify({
4729
- status: "timeout",
4730
- message: "User did not respond within the timeout period."
4731
- })
4732
- }
4733
- ]
4734
- };
4735
5091
  } catch (err) {
4736
5092
  log.error(`ask_user failed: ${err}`);
5093
+ onUserWaitEnd?.();
4737
5094
  return {
4738
5095
  content: [
4739
5096
  {
@@ -5167,12 +5524,16 @@ Available capabilities:
5167
5524
  - Bash tool for shell commands
5168
5525
  - Glob and Grep for file search
5169
5526
 
5170
- 3. MEMORY:
5527
+ 3. MEMORY & CREDENTIALS:
5171
5528
  - You can remember things about the user using memory_store
5172
5529
  - Use this when you learn preferences, important facts, or standing instructions
5173
5530
  - Your stored memories persist across conversations
5174
5531
  - PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
5175
5532
  - Before completing a task, consider if anything learned should be remembered for future conversations
5533
+ - CRITICAL \u2014 Credential Storage: When you create, register, or receive any account credentials (username, password, API keys, tokens), you MUST use credential_set to save them locally. NEVER use memory_store for credentials \u2014 memory_store is for preferences and facts, credential_set is for secrets. Examples:
5534
+ * After registering a new email/account \u2192 credential_set with type "login" and data { "username": "...", "password": "...", "email": "..." }
5535
+ * After generating an API key \u2192 credential_set with type "api_key" and data { "api_key": "..." }
5536
+ * Credentials saved via credential_set are encrypted on disk and viewable in the desktop app's Credentials panel
5176
5537
 
5177
5538
  4. SKILL-AWARE EXECUTION (CRITICAL \u2014 follow this for EVERY task):
5178
5539
  Step A \u2014 Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
@@ -5252,6 +5613,42 @@ CRITICAL \u2014 Ask before you guess:
5252
5613
  Workspace path: {workspace_path}`;
5253
5614
 
5254
5615
  // src/agent/processor.ts
5616
+ var TaskTimeout = class {
5617
+ constructor(abortController, timeoutMs) {
5618
+ this.abortController = abortController;
5619
+ this.remainingMs = timeoutMs;
5620
+ this.resumedAt = Date.now();
5621
+ this.schedule();
5622
+ }
5623
+ timeoutId = null;
5624
+ remainingMs;
5625
+ resumedAt;
5626
+ schedule() {
5627
+ this.timeoutId = setTimeout(() => {
5628
+ this.abortController.abort();
5629
+ }, this.remainingMs);
5630
+ }
5631
+ /** Pause the timeout (e.g. while waiting for user). */
5632
+ pause() {
5633
+ if (this.timeoutId) {
5634
+ clearTimeout(this.timeoutId);
5635
+ this.timeoutId = null;
5636
+ const elapsed = Date.now() - this.resumedAt;
5637
+ this.remainingMs = Math.max(0, this.remainingMs - elapsed);
5638
+ }
5639
+ }
5640
+ /** Resume the timeout after user interaction completes. */
5641
+ resume() {
5642
+ this.resumedAt = Date.now();
5643
+ this.schedule();
5644
+ }
5645
+ clear() {
5646
+ if (this.timeoutId) {
5647
+ clearTimeout(this.timeoutId);
5648
+ this.timeoutId = null;
5649
+ }
5650
+ }
5651
+ };
5255
5652
  var MAX_HISTORY_ENTRIES = 10;
5256
5653
  var MAX_RESPONSE_LENGTH = 1500;
5257
5654
  var TaskProcessor = class {
@@ -5334,12 +5731,16 @@ var TaskProcessor = class {
5334
5731
  }
5335
5732
  systemPrompt += historyPrompt;
5336
5733
  }
5734
+ const abortController = new AbortController();
5735
+ const taskTimeout = new TaskTimeout(abortController, taskTimeoutMs);
5337
5736
  const browserServer = createBrowserMcpServer();
5338
5737
  const agentToolsServer = createAgentToolsServer({
5339
5738
  memoryManager: this.memoryManager,
5340
5739
  skillManager: this.skillManager,
5341
5740
  taskId: task.id,
5342
- sessionId: this.sessionId || void 0
5741
+ sessionId: this.sessionId || void 0,
5742
+ onUserWaitStart: () => taskTimeout.pause(),
5743
+ onUserWaitEnd: () => taskTimeout.resume()
5343
5744
  });
5344
5745
  const eventHooks = createEventHooks(task.id, toolCallRecords);
5345
5746
  const allowedTools = [
@@ -5386,7 +5787,6 @@ var TaskProcessor = class {
5386
5787
  session_id: ""
5387
5788
  };
5388
5789
  }
5389
- const abortController = new AbortController();
5390
5790
  const options = {
5391
5791
  model: config.model,
5392
5792
  systemPrompt,
@@ -5404,9 +5804,6 @@ var TaskProcessor = class {
5404
5804
  abortController
5405
5805
  };
5406
5806
  const taskStartTime = Date.now();
5407
- const timeoutId = setTimeout(() => {
5408
- abortController.abort();
5409
- }, taskTimeoutMs);
5410
5807
  try {
5411
5808
  for await (const message of query2({
5412
5809
  prompt: promptMessages(),
@@ -5468,7 +5865,7 @@ var TaskProcessor = class {
5468
5865
  }
5469
5866
  }
5470
5867
  } finally {
5471
- clearTimeout(timeoutId);
5868
+ taskTimeout.clear();
5472
5869
  }
5473
5870
  const MAX_CONTENT_LENGTH = 5e4;
5474
5871
  const truncatedResponse = finalResponse.length > MAX_CONTENT_LENGTH ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]" : finalResponse;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.3.3",
3
+ "version": "0.3.5",
4
4
  "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",