assistme 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -171,22 +171,36 @@ async function getConversationHistory(conversationId, excludeMessageId, limit =
171
171
  }
172
172
 
173
173
  // src/db/event.ts
174
+ var MAX_EMIT_RETRIES = 2;
175
+ var EMIT_RETRY_DELAY_MS = 500;
176
+ async function emitWithRetry(messageId, eventType, eventData, seq) {
177
+ for (let attempt = 0; attempt <= MAX_EMIT_RETRIES; attempt++) {
178
+ try {
179
+ await callMcpHandler("event.emit", {
180
+ message_id: messageId,
181
+ event_type: eventType,
182
+ event_data: eventData,
183
+ seq
184
+ });
185
+ return;
186
+ } catch (err) {
187
+ if (attempt < MAX_EMIT_RETRIES) {
188
+ await new Promise((r) => setTimeout(r, EMIT_RETRY_DELAY_MS * (attempt + 1)));
189
+ } else {
190
+ log.warn(
191
+ `Failed to emit event after ${MAX_EMIT_RETRIES + 1} attempts: ${err instanceof Error ? err.message : err}`
192
+ );
193
+ }
194
+ }
195
+ }
196
+ }
174
197
  var eventSequence = 0;
175
198
  function resetEventSequence() {
176
199
  eventSequence = 0;
177
200
  }
178
201
  async function emitEvent(messageId, eventType, eventData) {
179
202
  eventSequence++;
180
- try {
181
- await callMcpHandler("event.emit", {
182
- message_id: messageId,
183
- event_type: eventType,
184
- event_data: eventData,
185
- seq: eventSequence
186
- });
187
- } catch (err) {
188
- log.warn(`Failed to emit event: ${err instanceof Error ? err.message : err}`);
189
- }
203
+ await emitWithRetry(messageId, eventType, eventData, eventSequence);
190
204
  }
191
205
 
192
206
  // src/db/action.ts
@@ -493,11 +507,20 @@ URL: ${info.url}`;
493
507
  }
494
508
  async goBack() {
495
509
  this.ensureConnected();
496
- await this.send("Page.navigateToHistoryEntry", {
497
- entryId: -1
498
- }).catch(() => {
499
- });
500
- await this.evaluate("window.history.back()");
510
+ try {
511
+ const history = await this.send("Page.getNavigationHistory");
512
+ const idx = history.currentIndex ?? 0;
513
+ const entries = history.entries ?? [];
514
+ if (idx > 0 && entries[idx - 1]) {
515
+ await this.send("Page.navigateToHistoryEntry", {
516
+ entryId: entries[idx - 1].id
517
+ });
518
+ } else {
519
+ await this.evaluate("window.history.back()");
520
+ }
521
+ } catch {
522
+ await this.evaluate("window.history.back()");
523
+ }
501
524
  await this.waitForLoad();
502
525
  const info = await this.getPageInfo();
503
526
  return `Went back to: ${info.title}`;
@@ -665,29 +688,80 @@ URL: ${info.url}`;
665
688
  Tab: { keyCode: 9, code: "Tab" },
666
689
  Escape: { keyCode: 27, code: "Escape" },
667
690
  Backspace: { keyCode: 8, code: "Backspace" },
691
+ Delete: { keyCode: 46, code: "Delete" },
668
692
  ArrowDown: { keyCode: 40, code: "ArrowDown" },
669
- ArrowUp: { keyCode: 38, code: "ArrowUp" }
693
+ ArrowUp: { keyCode: 38, code: "ArrowUp" },
694
+ ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
695
+ ArrowRight: { keyCode: 39, code: "ArrowRight" },
696
+ Home: { keyCode: 36, code: "Home" },
697
+ End: { keyCode: 35, code: "End" },
698
+ Space: { keyCode: 32, code: "Space" }
699
+ };
700
+ const modifierMap = {
701
+ Alt: 1,
702
+ Control: 2,
703
+ Meta: 4,
704
+ Shift: 8
670
705
  };
671
- const mapped = keyMap[key];
706
+ const parts = key.split("+");
707
+ let modifiers = 0;
708
+ let actualKey = parts[parts.length - 1];
709
+ for (let i = 0; i < parts.length - 1; i++) {
710
+ const mod = modifierMap[parts[i]];
711
+ if (mod) modifiers |= mod;
712
+ }
713
+ const mapped = keyMap[actualKey];
672
714
  if (mapped) {
673
715
  await this.send("Input.dispatchKeyEvent", {
674
716
  type: "keyDown",
675
- key,
717
+ key: actualKey,
676
718
  code: mapped.code,
677
719
  windowsVirtualKeyCode: mapped.keyCode,
678
- nativeVirtualKeyCode: mapped.keyCode
720
+ nativeVirtualKeyCode: mapped.keyCode,
721
+ modifiers
679
722
  });
680
723
  await this.send("Input.dispatchKeyEvent", {
681
724
  type: "keyUp",
682
- key,
725
+ key: actualKey,
683
726
  code: mapped.code,
684
727
  windowsVirtualKeyCode: mapped.keyCode,
685
- nativeVirtualKeyCode: mapped.keyCode
728
+ nativeVirtualKeyCode: mapped.keyCode,
729
+ modifiers
730
+ });
731
+ } else if (actualKey.length === 1) {
732
+ const code = `Key${actualKey.toUpperCase()}`;
733
+ const keyCode = actualKey.toUpperCase().charCodeAt(0);
734
+ await this.send("Input.dispatchKeyEvent", {
735
+ type: "keyDown",
736
+ key: actualKey,
737
+ code,
738
+ windowsVirtualKeyCode: keyCode,
739
+ nativeVirtualKeyCode: keyCode,
740
+ modifiers
741
+ });
742
+ if (!modifiers) {
743
+ await this.send("Input.dispatchKeyEvent", {
744
+ type: "char",
745
+ text: actualKey,
746
+ modifiers
747
+ });
748
+ }
749
+ await this.send("Input.dispatchKeyEvent", {
750
+ type: "keyUp",
751
+ key: actualKey,
752
+ code,
753
+ modifiers
686
754
  });
687
755
  } else {
688
756
  await this.send("Input.dispatchKeyEvent", {
689
- type: "char",
690
- text: key
757
+ type: "keyDown",
758
+ key: actualKey,
759
+ modifiers
760
+ });
761
+ await this.send("Input.dispatchKeyEvent", {
762
+ type: "keyUp",
763
+ key: actualKey,
764
+ modifiers
691
765
  });
692
766
  }
693
767
  return `Pressed key: ${key}`;
@@ -1061,12 +1135,17 @@ Refs:
1061
1135
  */
1062
1136
  async clickRef(refId) {
1063
1137
  this.ensureConnected();
1138
+ const ref = this.refCache.get(refId);
1139
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1064
1140
  const maxRetries = 3;
1065
1141
  let lastError = "";
1066
1142
  for (let attempt = 0; attempt < maxRetries; attempt++) {
1067
1143
  const resolved = await this.resolveRef(refId);
1068
1144
  if (!resolved) {
1069
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
1145
+ return {
1146
+ success: false,
1147
+ message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`
1148
+ };
1070
1149
  }
1071
1150
  if (resolved.error) {
1072
1151
  lastError = resolved.error;
@@ -1074,8 +1153,7 @@ Refs:
1074
1153
  await new Promise((r) => setTimeout(r, 500));
1075
1154
  continue;
1076
1155
  }
1077
- const ref3 = this.refCache.get(refId);
1078
- return `Cannot click [${refId}] ${ref3?.role || ""} "${ref3?.name || ""}": ${lastError}`;
1156
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
1079
1157
  }
1080
1158
  if (attempt === 0) {
1081
1159
  await new Promise((r) => setTimeout(r, 50));
@@ -1105,11 +1183,9 @@ Refs:
1105
1183
  clickCount: 1
1106
1184
  });
1107
1185
  await new Promise((r) => setTimeout(r, 300));
1108
- const ref2 = this.refCache.get(refId);
1109
- return `Clicked [${refId}] ${ref2?.role || ""} "${ref2?.name || ""}"`;
1186
+ return { success: true, message: `Clicked ${refLabel}` };
1110
1187
  }
1111
- const ref = this.refCache.get(refId);
1112
- return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
1188
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
1113
1189
  }
1114
1190
  /**
1115
1191
  * Type text into an element by ref using CDP Input events.
@@ -1118,37 +1194,49 @@ Refs:
1118
1194
  */
1119
1195
  async typeRef(refId, text) {
1120
1196
  this.ensureConnected();
1197
+ const ref = this.refCache.get(refId);
1198
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1121
1199
  const clickResult = await this.clickRef(refId);
1122
- if (clickResult.includes("not found")) return clickResult;
1200
+ if (!clickResult.success) return clickResult;
1123
1201
  await new Promise((r) => setTimeout(r, 100));
1124
- const modifier = platform() === "darwin" ? 4 : 2;
1125
- await this.send("Input.dispatchKeyEvent", {
1126
- type: "keyDown",
1127
- modifiers: modifier,
1128
- key: "a",
1129
- code: "KeyA",
1130
- windowsVirtualKeyCode: 65
1131
- });
1132
- await this.send("Input.dispatchKeyEvent", {
1133
- type: "keyUp",
1134
- key: "a",
1135
- code: "KeyA"
1136
- });
1137
- await this.send("Input.dispatchKeyEvent", {
1138
- type: "keyDown",
1139
- key: "Backspace",
1140
- code: "Backspace",
1141
- windowsVirtualKeyCode: 8
1142
- });
1143
- await this.send("Input.dispatchKeyEvent", {
1144
- type: "keyUp",
1145
- key: "Backspace",
1146
- code: "Backspace"
1202
+ const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
1203
+ await this.pressKey(selectAllKey);
1204
+ await new Promise((r) => setTimeout(r, 50));
1205
+ await this.pressKey("Backspace");
1206
+ await new Promise((r) => setTimeout(r, 50));
1207
+ const cleared = await this.send("Runtime.evaluate", {
1208
+ expression: `
1209
+ (function() {
1210
+ var el = document.querySelector('[data-assistme-ref="${refId}"]');
1211
+ if (!el) return 'no_element';
1212
+ if (el.value !== undefined && el.value !== '') {
1213
+ // Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
1214
+ var setter = Object.getOwnPropertyDescriptor(
1215
+ window.HTMLInputElement.prototype, 'value'
1216
+ )?.set || Object.getOwnPropertyDescriptor(
1217
+ window.HTMLTextAreaElement.prototype, 'value'
1218
+ )?.set;
1219
+ if (setter) setter.call(el, '');
1220
+ else el.value = '';
1221
+ el.dispatchEvent(new Event('input', { bubbles: true }));
1222
+ el.dispatchEvent(new Event('change', { bubbles: true }));
1223
+ return 'js_cleared';
1224
+ }
1225
+ return 'ok';
1226
+ })()
1227
+ `,
1228
+ returnByValue: true
1147
1229
  });
1230
+ const clearStatus = cleared.result?.value || "ok";
1231
+ if (clearStatus === "no_element") {
1232
+ return {
1233
+ success: false,
1234
+ message: `Ref ${refLabel} not found after click. Take a new snapshot.`
1235
+ };
1236
+ }
1148
1237
  await this.send("Input.insertText", { text });
1149
1238
  await new Promise((r) => setTimeout(r, 100));
1150
- const ref = this.refCache.get(refId);
1151
- return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1239
+ return { success: true, message: `Typed "${text}" into ${refLabel}` };
1152
1240
  }
1153
1241
  /**
1154
1242
  * Select a dropdown option by ref. Delegates to selectOption with the
@@ -1159,13 +1247,16 @@ Refs:
1159
1247
  this.ensureConnected();
1160
1248
  const cached = this.refCache.get(refId);
1161
1249
  if (!cached) {
1162
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
1250
+ return {
1251
+ success: false,
1252
+ message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`
1253
+ };
1163
1254
  }
1255
+ const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
1164
1256
  const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
1165
- return result.replace(
1166
- /\[data-assistme-ref="\d+"\]/,
1167
- `[${refId}] ${cached.role} "${cached.name}"`
1168
- );
1257
+ const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
1258
+ const success = !result.includes("not found");
1259
+ return { success, message };
1169
1260
  }
1170
1261
  // ── Action Pipeline ───────────────────────────────────────────────
1171
1262
  /**
@@ -1183,18 +1274,24 @@ Refs:
1183
1274
  let success = true;
1184
1275
  try {
1185
1276
  switch (spec.action) {
1186
- case "click":
1187
- result = await this.clickRef(spec.ref);
1188
- success = !result.includes("not found");
1277
+ case "click": {
1278
+ const r = await this.clickRef(spec.ref);
1279
+ result = r.message;
1280
+ success = r.success;
1189
1281
  break;
1190
- case "type":
1191
- result = await this.typeRef(spec.ref, spec.text);
1192
- success = !result.includes("not found");
1282
+ }
1283
+ case "type": {
1284
+ const r = await this.typeRef(spec.ref, spec.text);
1285
+ result = r.message;
1286
+ success = r.success;
1193
1287
  break;
1194
- case "select":
1195
- result = await this.selectRef(spec.ref, spec.option);
1196
- success = !result.includes("not found");
1288
+ }
1289
+ case "select": {
1290
+ const r = await this.selectRef(spec.ref, spec.option);
1291
+ result = r.message;
1292
+ success = r.success;
1197
1293
  break;
1294
+ }
1198
1295
  case "press":
1199
1296
  result = await this.pressKey(spec.key);
1200
1297
  break;
@@ -1269,15 +1366,24 @@ Refs:
1269
1366
  // Strategy 2: Custom dropdown \u2014 find the trigger element
1270
1367
  var trigger = selectEl;
1271
1368
  if (!trigger) {
1272
- // Try finding by label/placeholder text
1273
- var allEls = document.querySelectorAll('*');
1274
- for (var j = 0; j < allEls.length; j++) {
1275
- var el = allEls[j];
1369
+ // Try finding by aria-label first (fast, indexed)
1370
+ trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
1371
+ }
1372
+ if (!trigger) {
1373
+ // Try finding by label/placeholder text in likely dropdown elements
1374
+ var dropdownCandidates = document.querySelectorAll(
1375
+ 'button, [role="combobox"], [role="listbox"], [role="button"], ' +
1376
+ 'select, input, .MuiSelect-root, .MuiInput-root, ' +
1377
+ '[class*="select"], [class*="dropdown"], [class*="picker"]'
1378
+ );
1379
+ for (var j = 0; j < dropdownCandidates.length; j++) {
1380
+ var el = dropdownCandidates[j];
1276
1381
  var ownText = Array.from(el.childNodes)
1277
1382
  .filter(function(n) { return n.nodeType === 3; })
1278
1383
  .map(function(n) { return n.textContent.trim(); })
1279
1384
  .join('');
1280
- if (ownText === sel || el.getAttribute('aria-label') === sel) {
1385
+ if (ownText === sel || el.getAttribute('aria-label') === sel ||
1386
+ el.getAttribute('placeholder') === sel) {
1281
1387
  trigger = el;
1282
1388
  break;
1283
1389
  }
@@ -1314,10 +1420,13 @@ Refs:
1314
1420
  }
1315
1421
  }
1316
1422
 
1317
- // Broader search: any visible element with exact text match
1318
- var everything = document.querySelectorAll('*');
1319
- for (var m = 0; m < everything.length; m++) {
1320
- var candidate = everything[m];
1423
+ // Broader search: visible leaf elements in interactive containers
1424
+ var broadCandidates = document.querySelectorAll(
1425
+ 'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
1426
+ '[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
1427
+ );
1428
+ for (var m = 0; m < broadCandidates.length; m++) {
1429
+ var candidate = broadCandidates[m];
1321
1430
  if (candidate.textContent && candidate.textContent.trim() === optText &&
1322
1431
  candidate.offsetParent !== null && candidate.children.length === 0) {
1323
1432
  candidate.click();
@@ -1390,6 +1499,7 @@ Refs:
1390
1499
  // ── Helpers ─────────────────────────────────────────────────────
1391
1500
  async waitForLoad(timeoutMs = 8e3) {
1392
1501
  const start = Date.now();
1502
+ let sawInteractive = false;
1393
1503
  while (Date.now() - start < timeoutMs) {
1394
1504
  try {
1395
1505
  const result = await this.send("Runtime.evaluate", {
@@ -1397,67 +1507,22 @@ Refs:
1397
1507
  returnByValue: true
1398
1508
  });
1399
1509
  const state = result.result?.value;
1400
- if (state === "complete" || state === "interactive") {
1401
- await new Promise((r) => setTimeout(r, 500));
1510
+ if (state === "complete") {
1511
+ await new Promise((r) => setTimeout(r, 300));
1402
1512
  return;
1403
1513
  }
1514
+ if (state === "interactive") {
1515
+ if (!sawInteractive) {
1516
+ sawInteractive = true;
1517
+ }
1518
+ }
1404
1519
  } catch {
1405
1520
  }
1406
1521
  await new Promise((r) => setTimeout(r, 300));
1407
1522
  }
1408
- }
1409
- /**
1410
- * Find interactive elements on the page for the AI to understand what's clickable
1411
- */
1412
- async getInteractiveElements() {
1413
- this.ensureConnected();
1414
- const result = await this.send("Runtime.evaluate", {
1415
- expression: `
1416
- (function() {
1417
- const elements = [];
1418
- const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
1419
- const all = document.querySelectorAll(selectors);
1420
- for (let i = 0; i < all.length && elements.length < 50; i++) {
1421
- const el = all[i];
1422
- const rect = el.getBoundingClientRect();
1423
- if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
1424
-
1425
- // Build a reliable CSS selector
1426
- let selector;
1427
- if (el.id) {
1428
- selector = '#' + CSS.escape(el.id);
1429
- } else if (el.getAttribute('data-testid')) {
1430
- selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
1431
- } else {
1432
- // Build a path-based selector: find nth-of-type among siblings
1433
- const tag = el.tagName.toLowerCase();
1434
- const parent = el.parentElement;
1435
- if (parent) {
1436
- const siblings = parent.querySelectorAll(':scope > ' + tag);
1437
- const idx = Array.from(siblings).indexOf(el) + 1;
1438
- selector = tag + ':nth-of-type(' + idx + ')';
1439
- } else {
1440
- selector = tag;
1441
- }
1442
- }
1443
-
1444
- elements.push({
1445
- tag: el.tagName.toLowerCase(),
1446
- text: (el.textContent || '').trim().slice(0, 80),
1447
- type: el.getAttribute('type') || '',
1448
- name: el.getAttribute('name') || '',
1449
- id: el.id || '',
1450
- href: el.getAttribute('href') || '',
1451
- placeholder: el.getAttribute('placeholder') || '',
1452
- selector: selector,
1453
- });
1454
- }
1455
- return JSON.stringify(elements, null, 2);
1456
- })()
1457
- `,
1458
- returnByValue: true
1459
- });
1460
- return result.result?.value || "[]";
1523
+ if (sawInteractive) {
1524
+ await new Promise((r) => setTimeout(r, 300));
1525
+ }
1461
1526
  }
1462
1527
  isConnected() {
1463
1528
  return this.connected && this.ws?.readyState === WebSocket.OPEN;
@@ -1796,12 +1861,14 @@ async function ensureBrowserAvailable(port = 9222) {
1796
1861
  detail: "Could not start browser with remote debugging. Possible causes:\n 1) Another assistme debug browser is already using port " + port + "\n 2) The browser crashed on startup\nTry: rm -rf ~/.assistme/browser-profile && assistme"
1797
1862
  };
1798
1863
  }
1799
- var browserInstance = null;
1864
+ var browserInstances = /* @__PURE__ */ new Map();
1800
1865
  function getBrowser(port = 9222) {
1801
- if (!browserInstance) {
1802
- browserInstance = new BrowserController(port);
1866
+ let instance = browserInstances.get(port);
1867
+ if (!instance) {
1868
+ instance = new BrowserController(port);
1869
+ browserInstances.set(port, instance);
1803
1870
  }
1804
- return browserInstance;
1871
+ return instance;
1805
1872
  }
1806
1873
 
1807
1874
  // src/commands/browser.ts
@@ -3508,9 +3575,6 @@ async function executeTool(name, input) {
3508
3575
  case "browser_scroll":
3509
3576
  await ensureConnected(browser);
3510
3577
  return input.direction === "up" ? browser.scrollUp() : browser.scrollDown();
3511
- case "browser_get_elements":
3512
- await ensureConnected(browser);
3513
- return browser.getInteractiveElements();
3514
3578
  case "browser_select":
3515
3579
  await ensureConnected(browser);
3516
3580
  return browser.selectOption(input.selector, input.option);
@@ -3691,7 +3755,6 @@ var BROWSER_TOOL_NAMES = [
3691
3755
  "browser_type",
3692
3756
  "browser_press_key",
3693
3757
  "browser_scroll",
3694
- "browser_get_elements",
3695
3758
  "browser_select",
3696
3759
  "browser_snapshot",
3697
3760
  "browser_act",
@@ -3734,13 +3797,7 @@ function createBrowserMcpServer() {
3734
3797
  const base64 = await executeTool("browser_screenshot", {});
3735
3798
  if (base64.length > 100) {
3736
3799
  return {
3737
- content: [
3738
- {
3739
- type: "image",
3740
- data: base64,
3741
- mimeType: "image/png"
3742
- }
3743
- ]
3800
+ content: [{ type: "image", data: base64, mimeType: "image/png" }]
3744
3801
  };
3745
3802
  }
3746
3803
  return { content: [{ type: "text", text: base64 }] };
@@ -3773,12 +3830,6 @@ function createBrowserMcpServer() {
3773
3830
  { direction: z.string().describe("'down' or 'up'") },
3774
3831
  async (args) => callTool("browser_scroll", args)
3775
3832
  ),
3776
- tool(
3777
- "browser_get_elements",
3778
- "Find all interactive elements (links, buttons, inputs) on the current page.",
3779
- {},
3780
- async () => callTool("browser_get_elements", {})
3781
- ),
3782
3833
  tool(
3783
3834
  "browser_select",
3784
3835
  "Select an option from a dropdown menu. Handles both native <select> elements and custom dropdowns (Material Design, React, Angular). Use this instead of manually clicking dropdown items.",
@@ -3807,11 +3858,7 @@ function createBrowserMcpServer() {
3807
3858
  const imageData = parts[1] || "";
3808
3859
  const content = [];
3809
3860
  if (imageData.length > 100) {
3810
- content.push({
3811
- type: "image",
3812
- data: imageData,
3813
- mimeType: "image/png"
3814
- });
3861
+ content.push({ type: "image", data: imageData, mimeType: "image/png" });
3815
3862
  }
3816
3863
  content.push({ type: "text", text: refTable });
3817
3864
  return { content };
@@ -3847,11 +3894,7 @@ function createBrowserMcpServer() {
3847
3894
  const content = [];
3848
3895
  content.push({ type: "text", text: actionText });
3849
3896
  if (screenshotData.length > 100) {
3850
- content.push({
3851
- type: "image",
3852
- data: screenshotData,
3853
- mimeType: "image/png"
3854
- });
3897
+ content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
3855
3898
  }
3856
3899
  return { content };
3857
3900
  }
@@ -4154,7 +4197,7 @@ function getCredentialStore() {
4154
4197
 
4155
4198
  // src/mcp/agent-tools-server.ts
4156
4199
  function createAgentToolsServer(deps) {
4157
- const { memoryManager, skillManager, taskId, sessionId } = deps;
4200
+ const { memoryManager, skillManager, taskId, sessionId, onUserWaitStart, onUserWaitEnd } = deps;
4158
4201
  return createSdkMcpServer2({
4159
4202
  name: "assistme-agent",
4160
4203
  version: "1.0.0",
@@ -4659,52 +4702,56 @@ Use \`ask_user\` to request these from the user, or create them yourself (e.g. r
4659
4702
  try {
4660
4703
  await setActionRequest(taskId, actionData);
4661
4704
  log.info(`Ask user ${actionId}: "${args.question.slice(0, 80)}..."`);
4662
- emitEvent(taskId, "user_action_request", actionData).catch(() => {
4663
- });
4664
- emitEvent(taskId, "status_change", {
4705
+ await emitEvent(taskId, "user_action_request", actionData);
4706
+ await emitEvent(taskId, "status_change", {
4665
4707
  status: "waiting_for_user",
4666
4708
  message: args.question
4667
- }).catch(() => {
4668
4709
  });
4710
+ onUserWaitStart?.();
4669
4711
  const startTime = Date.now();
4670
4712
  const pollInterval = 2e3;
4671
- while (Date.now() - startTime < timeout) {
4672
- const response = await pollActionResponse(taskId);
4673
- if (response && (!response.action_id || response.action_id === actionId)) {
4674
- const actionKey = response.action_key || "";
4675
- const text = response.text || "";
4676
- const label = response.label || actionKey || text;
4677
- log.info(`User responded: "${label}"`);
4678
- return {
4679
- content: [
4680
- {
4681
- type: "text",
4682
- text: JSON.stringify({
4683
- status: "responded",
4684
- action_key: actionKey || "custom_input",
4685
- label,
4686
- text: text || label
4687
- })
4688
- }
4689
- ]
4690
- };
4713
+ try {
4714
+ while (Date.now() - startTime < timeout) {
4715
+ const response = await pollActionResponse(taskId);
4716
+ if (response && (!response.action_id || response.action_id === actionId)) {
4717
+ const actionKey = response.action_key || "";
4718
+ const text = response.text || "";
4719
+ const label = response.label || actionKey || text;
4720
+ log.info(`User responded: "${label}"`);
4721
+ return {
4722
+ content: [
4723
+ {
4724
+ type: "text",
4725
+ text: JSON.stringify({
4726
+ status: "responded",
4727
+ action_key: actionKey || "custom_input",
4728
+ label,
4729
+ text: text || label
4730
+ })
4731
+ }
4732
+ ]
4733
+ };
4734
+ }
4735
+ await new Promise((resolve2) => setTimeout(resolve2, pollInterval));
4691
4736
  }
4692
- await new Promise((resolve2) => setTimeout(resolve2, pollInterval));
4737
+ log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
4738
+ return {
4739
+ content: [
4740
+ {
4741
+ type: "text",
4742
+ text: JSON.stringify({
4743
+ status: "timeout",
4744
+ message: "User did not respond within the timeout period. Continue the task with a reasonable default or skip the step that required user input."
4745
+ })
4746
+ }
4747
+ ]
4748
+ };
4749
+ } finally {
4750
+ onUserWaitEnd?.();
4693
4751
  }
4694
- log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
4695
- return {
4696
- content: [
4697
- {
4698
- type: "text",
4699
- text: JSON.stringify({
4700
- status: "timeout",
4701
- message: "User did not respond within the timeout period."
4702
- })
4703
- }
4704
- ]
4705
- };
4706
4752
  } catch (err) {
4707
4753
  log.error(`ask_user failed: ${err}`);
4754
+ onUserWaitEnd?.();
4708
4755
  return {
4709
4756
  content: [
4710
4757
  {
@@ -5125,7 +5172,7 @@ Available capabilities:
5125
5172
  - Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
5126
5173
 
5127
5174
  **Legacy tools (still available, use when refs don't work):**
5128
- - browser_click, browser_type, browser_select, browser_get_elements, browser_screenshot, browser_evaluate
5175
+ - browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
5129
5176
  - browser_click supports :contains('text') pseudo-selectors
5130
5177
  - browser_select handles native and custom dropdowns
5131
5178
 
@@ -5138,12 +5185,16 @@ Available capabilities:
5138
5185
  - Bash tool for shell commands
5139
5186
  - Glob and Grep for file search
5140
5187
 
5141
- 3. MEMORY:
5188
+ 3. MEMORY & CREDENTIALS:
5142
5189
  - You can remember things about the user using memory_store
5143
5190
  - Use this when you learn preferences, important facts, or standing instructions
5144
5191
  - Your stored memories persist across conversations
5145
5192
  - PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
5146
5193
  - Before completing a task, consider if anything learned should be remembered for future conversations
5194
+ - CRITICAL \u2014 Credential Storage: When you create, register, or receive any account credentials (username, password, API keys, tokens), you MUST use credential_set to save them locally. NEVER use memory_store for credentials \u2014 memory_store is for preferences and facts, credential_set is for secrets. Examples:
5195
+ * After registering a new email/account \u2192 credential_set with type "login" and data { "username": "...", "password": "...", "email": "..." }
5196
+ * After generating an API key \u2192 credential_set with type "api_key" and data { "api_key": "..." }
5197
+ * Credentials saved via credential_set are encrypted on disk and viewable in the desktop app's Credentials panel
5147
5198
 
5148
5199
  4. SKILL-AWARE EXECUTION (CRITICAL \u2014 follow this for EVERY task):
5149
5200
  Step A \u2014 Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
@@ -5223,6 +5274,42 @@ CRITICAL \u2014 Ask before you guess:
5223
5274
  Workspace path: {workspace_path}`;
5224
5275
 
5225
5276
  // src/agent/processor.ts
5277
+ var TaskTimeout = class {
5278
+ constructor(abortController, timeoutMs) {
5279
+ this.abortController = abortController;
5280
+ this.remainingMs = timeoutMs;
5281
+ this.resumedAt = Date.now();
5282
+ this.schedule();
5283
+ }
5284
+ timeoutId = null;
5285
+ remainingMs;
5286
+ resumedAt;
5287
+ schedule() {
5288
+ this.timeoutId = setTimeout(() => {
5289
+ this.abortController.abort();
5290
+ }, this.remainingMs);
5291
+ }
5292
+ /** Pause the timeout (e.g. while waiting for user). */
5293
+ pause() {
5294
+ if (this.timeoutId) {
5295
+ clearTimeout(this.timeoutId);
5296
+ this.timeoutId = null;
5297
+ const elapsed = Date.now() - this.resumedAt;
5298
+ this.remainingMs = Math.max(0, this.remainingMs - elapsed);
5299
+ }
5300
+ }
5301
+ /** Resume the timeout after user interaction completes. */
5302
+ resume() {
5303
+ this.resumedAt = Date.now();
5304
+ this.schedule();
5305
+ }
5306
+ clear() {
5307
+ if (this.timeoutId) {
5308
+ clearTimeout(this.timeoutId);
5309
+ this.timeoutId = null;
5310
+ }
5311
+ }
5312
+ };
5226
5313
  var MAX_HISTORY_ENTRIES = 10;
5227
5314
  var MAX_RESPONSE_LENGTH = 1500;
5228
5315
  var TaskProcessor = class {
@@ -5305,12 +5392,16 @@ var TaskProcessor = class {
5305
5392
  }
5306
5393
  systemPrompt += historyPrompt;
5307
5394
  }
5395
+ const abortController = new AbortController();
5396
+ const taskTimeout = new TaskTimeout(abortController, taskTimeoutMs);
5308
5397
  const browserServer = createBrowserMcpServer();
5309
5398
  const agentToolsServer = createAgentToolsServer({
5310
5399
  memoryManager: this.memoryManager,
5311
5400
  skillManager: this.skillManager,
5312
5401
  taskId: task.id,
5313
- sessionId: this.sessionId || void 0
5402
+ sessionId: this.sessionId || void 0,
5403
+ onUserWaitStart: () => taskTimeout.pause(),
5404
+ onUserWaitEnd: () => taskTimeout.resume()
5314
5405
  });
5315
5406
  const eventHooks = createEventHooks(task.id, toolCallRecords);
5316
5407
  const allowedTools = [
@@ -5357,7 +5448,6 @@ var TaskProcessor = class {
5357
5448
  session_id: ""
5358
5449
  };
5359
5450
  }
5360
- const abortController = new AbortController();
5361
5451
  const options = {
5362
5452
  model: config.model,
5363
5453
  systemPrompt,
@@ -5375,9 +5465,6 @@ var TaskProcessor = class {
5375
5465
  abortController
5376
5466
  };
5377
5467
  const taskStartTime = Date.now();
5378
- const timeoutId = setTimeout(() => {
5379
- abortController.abort();
5380
- }, taskTimeoutMs);
5381
5468
  try {
5382
5469
  for await (const message of query2({
5383
5470
  prompt: promptMessages(),
@@ -5439,9 +5526,11 @@ var TaskProcessor = class {
5439
5526
  }
5440
5527
  }
5441
5528
  } finally {
5442
- clearTimeout(timeoutId);
5529
+ taskTimeout.clear();
5443
5530
  }
5444
- await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
5531
+ const MAX_CONTENT_LENGTH = 5e4;
5532
+ const truncatedResponse = finalResponse.length > MAX_CONTENT_LENGTH ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]" : finalResponse;
5533
+ await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
5445
5534
  maxRetries: 2,
5446
5535
  baseDelayMs: 300,
5447
5536
  label: "completeTask"