neoagent 2.1.11 → 2.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,38 @@ const { v4: uuidv4 } = require('uuid')
3
3
  const INVOKE_OPEN_RE = /(?:[A-Za-z0-9_.-]+:tool_call\s*)?<invoke\s+name="([^"]+)">/g
4
4
  const PARAM_OPEN_RE = /<parameter\s+name="([^"]+)">/g
5
5
  const PARAM_CLOSED_RE = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g
6
+ const TOOL_WRAPPER_RE = /<\/?[A-Za-z0-9_.-]+:tool_call>/g
7
+ const COMPLETE_INLINE_CALL_RE = /(?:[A-Za-z0-9_.-]+:tool_call\s*)?<invoke\s+name="[^"]+">[\s\S]*?<\/invoke>/g
6
8
  const INVOKE_CLOSE = '</invoke>'
7
9
 
8
10
  function trimLooseControlText(text) {
9
11
  return String(text || '')
12
+ .replace(TOOL_WRAPPER_RE, '')
10
13
  .replace(/(?:^|\s)[A-Za-z0-9_.-]+:tool_call\s*$/g, '')
11
14
  .trim()
12
15
  }
13
16
 
17
+ function sanitizeStreamingToolCallText(text) {
18
+ let visible = String(text || '')
19
+ .replace(COMPLETE_INLINE_CALL_RE, '')
20
+ .replace(TOOL_WRAPPER_RE, '')
21
+
22
+ const partialStarts = [
23
+ visible.lastIndexOf('<invoke'),
24
+ visible.lastIndexOf(':tool_call')
25
+ ].filter((index) => index >= 0)
26
+
27
+ if (partialStarts.length > 0) {
28
+ const partialStart = Math.max(...partialStarts)
29
+ const suffix = visible.slice(partialStart)
30
+ if (!suffix.includes(INVOKE_CLOSE)) {
31
+ visible = visible.slice(0, partialStart)
32
+ }
33
+ }
34
+
35
+ return trimLooseControlText(visible).replace(/\n{3,}/g, '\n\n')
36
+ }
37
+
14
38
  function parseParameterMap(body) {
15
39
  const args = {}
16
40
  let sawClosedParam = false
@@ -113,5 +137,6 @@ function salvageTextToolCalls(content, tools = []) {
113
137
  }
114
138
 
115
139
  module.exports = {
116
- salvageTextToolCalls
140
+ salvageTextToolCalls,
141
+ sanitizeStreamingToolCallText
117
142
  }
@@ -90,10 +90,12 @@ function compactToolResult(toolName, toolArgs = {}, toolResult, options = {}) {
90
90
  break;
91
91
 
92
92
  case 'android_dump_ui':
93
+ case 'android_observe':
93
94
  envelope = trimObject({
94
95
  tool: toolName,
95
96
  serial: toolResult?.serial,
96
97
  nodeCount: toolResult?.nodeCount,
98
+ screenshotPath: toolResult?.screenshotPath,
97
99
  uiDumpPath: toolResult?.uiDumpPath,
98
100
  preview: clampText(JSON.stringify(toolResult?.preview || []).slice(0, Math.floor(softLimit * 0.55)), Math.floor(softLimit * 0.55))
99
101
  });
@@ -292,6 +292,16 @@ function getAvailableTools(app, options = {}) {
292
292
  }
293
293
  }
294
294
  },
295
+ {
296
+ name: 'android_observe',
297
+ description: 'Capture the current Android screen end-to-end: fresh screenshot, UI dump path, and a preview of visible UI nodes.',
298
+ parameters: {
299
+ type: 'object',
300
+ properties: {
301
+ includeNodes: { type: 'boolean', description: 'Include a preview of parsed UI nodes (default true)' }
302
+ }
303
+ }
304
+ },
295
305
  {
296
306
  name: 'android_dump_ui',
297
307
  description: 'Capture the current Android UIAutomator XML dump and return a preview of the nodes.',
@@ -763,7 +773,7 @@ function getAvailableTools(app, options = {}) {
763
773
  },
764
774
  {
765
775
  name: 'analyze_image',
766
- description: 'Analyze an image file using Grok vision. Use this to describe photos, read QR codes, extract text from screenshots, or answer any visual question about an image.',
776
+ description: 'Analyze an image file using the best available vision-capable model. Use this to describe photos, read QR codes, extract text from screenshots, or answer visual questions.',
767
777
  parameters: {
768
778
  type: 'object',
769
779
  properties: {
@@ -940,6 +950,12 @@ async function executeTool(toolName, args, context, engine) {
940
950
  return await controller.waitFor(args || {});
941
951
  }
942
952
 
953
+ case 'android_observe': {
954
+ const controller = ac();
955
+ if (!controller) return { error: 'Android controller not available' };
956
+ return await controller.observe(args || {});
957
+ }
958
+
943
959
  case 'android_dump_ui': {
944
960
  const controller = ac();
945
961
  if (!controller) return { error: 'Android controller not available' };
@@ -1541,23 +1557,67 @@ async function executeTool(toolName, args, context, engine) {
1541
1557
  case 'analyze_image': {
1542
1558
  try {
1543
1559
  if (!fs.existsSync(args.image_path)) return { error: `File not found: ${args.image_path}` };
1544
- const b64 = fs.readFileSync(args.image_path).toString('base64');
1545
1560
  const ext = path.extname(args.image_path).toLowerCase();
1546
1561
  const mimeMap = { '.png': 'image/png', '.gif': 'image/gif', '.webp': 'image/webp', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg' };
1547
1562
  const mime = mimeMap[ext] || 'image/jpeg';
1563
+ const question = args.question || 'Describe this image in detail.';
1548
1564
  const { getProviderForUser } = require('./engine');
1549
- const { provider: visionProvider, model: visionModel } = await getProviderForUser(userId);
1550
- const visionResponse = await visionProvider.chat(
1551
- [{
1552
- role: 'user', content: [
1553
- { type: 'text', text: args.question || 'Describe this image in detail.' },
1554
- { type: 'image_url', image_url: { url: `data:${mime};base64,${b64}` } }
1555
- ]
1556
- }],
1557
- [],
1558
- { model: visionModel }
1559
- );
1560
- return { description: visionResponse.content };
1565
+ const { createProviderInstance, getProviderCatalog } = require('./models');
1566
+
1567
+ const attempted = [];
1568
+ const candidates = [];
1569
+
1570
+ try {
1571
+ const preferred = await getProviderForUser(userId);
1572
+ candidates.push({
1573
+ providerName: preferred.providerName,
1574
+ provider: preferred.provider,
1575
+ });
1576
+ } catch (err) {
1577
+ attempted.push(`default-provider lookup failed: ${err.message}`);
1578
+ }
1579
+
1580
+ for (const providerInfo of getProviderCatalog(userId)) {
1581
+ if (!providerInfo.available) continue;
1582
+ if (candidates.some((candidate) => candidate.providerName === providerInfo.id)) continue;
1583
+ if (!['grok', 'openai'].includes(providerInfo.id)) continue;
1584
+ try {
1585
+ candidates.push({
1586
+ providerName: providerInfo.id,
1587
+ provider: createProviderInstance(providerInfo.id, userId),
1588
+ });
1589
+ } catch (err) {
1590
+ attempted.push(`${providerInfo.id}: ${err.message}`);
1591
+ }
1592
+ }
1593
+
1594
+ for (const candidate of candidates) {
1595
+ if (typeof candidate.provider.supportsVision !== 'function' || candidate.provider.supportsVision() !== true) {
1596
+ attempted.push(`${candidate.providerName}: image analysis is not supported by this provider integration`);
1597
+ continue;
1598
+ }
1599
+
1600
+ try {
1601
+ const visionResponse = await candidate.provider.analyzeImage({
1602
+ imagePath: args.image_path,
1603
+ mimeType: mime,
1604
+ question,
1605
+ });
1606
+ return {
1607
+ description: visionResponse.content,
1608
+ model: visionResponse.model || null,
1609
+ provider: candidate.providerName,
1610
+ };
1611
+ } catch (err) {
1612
+ attempted.push(`${candidate.providerName}: ${err.message}`);
1613
+ }
1614
+ }
1615
+
1616
+ return {
1617
+ error: attempted.length > 0
1618
+ ? `Image analysis failed. ${attempted.join(' | ')}`
1619
+ : 'No vision-capable provider is currently available. Configure OpenAI or xAI for image analysis.',
1620
+ };
1561
1621
  } catch (err) {
1562
1622
  return { error: err.message };
1563
1623
  }
@@ -64,6 +64,25 @@ function commandExists(command) {
64
64
  return probe.status === 0;
65
65
  }
66
66
 
67
+ function parseResolvedLaunchComponent(output, packageName) {
68
+ const lines = String(output || '')
69
+ .split('\n')
70
+ .map((line) => line.trim())
71
+ .filter(Boolean);
72
+ const normalizedPackage = String(packageName || '').trim();
73
+ const componentPattern = /^[A-Za-z0-9._$]+\/[A-Za-z0-9._$]+$/;
74
+ const relativePattern = /^[A-Za-z0-9._$]+\/\.[A-Za-z0-9._$]+$/;
75
+
76
+ const exact = lines.find((line) =>
77
+ normalizedPackage
78
+ ? line.startsWith(`${normalizedPackage}/`)
79
+ : componentPattern.test(line) || relativePattern.test(line)
80
+ );
81
+ if (exact) return exact;
82
+
83
+ return lines.find((line) => componentPattern.test(line) || relativePattern.test(line)) || null;
84
+ }
85
+
67
86
  function appendState(patch) {
68
87
  const current = readState();
69
88
  const next = {
@@ -1085,7 +1104,7 @@ class AndroidController {
1085
1104
  }
1086
1105
 
1087
1106
  async screenshot(options = {}) {
1088
- const serial = await this.ensureDevice();
1107
+ const serial = options.serial || await this.ensureDevice();
1089
1108
  const filename = `android_${Date.now()}.png`;
1090
1109
  const fullPath = path.join(SCREENSHOTS_DIR, filename);
1091
1110
  await this.#run(`${quoteShell(adbBinary())} -s ${quoteShell(serial)} exec-out screencap -p > ${quoteShell(fullPath)}`, { timeout: 30000 });
@@ -1098,7 +1117,7 @@ class AndroidController {
1098
1117
  }
1099
1118
 
1100
1119
  async dumpUi(options = {}) {
1101
- const serial = await this.ensureDevice();
1120
+ const serial = options.serial || await this.ensureDevice();
1102
1121
  let xml = await this.#adb(serial, 'shell uiautomator dump --compressed /dev/tty', { timeout: 30000 });
1103
1122
  if (!String(xml || '').includes('<hierarchy')) {
1104
1123
  const remote = '/sdcard/neoagent-ui.xml';
@@ -1121,6 +1140,63 @@ class AndroidController {
1121
1140
  };
1122
1141
  }
1123
1142
 
1143
+ async #captureObservation(serial, options = {}) {
1144
+ const resolvedSerial = serial || await this.ensureDevice();
1145
+ const observation = {
1146
+ serial: resolvedSerial,
1147
+ screenshotPath: null,
1148
+ fullPath: null,
1149
+ uiDumpPath: null,
1150
+ nodeCount: null,
1151
+ preview: undefined,
1152
+ observationWarnings: [],
1153
+ };
1154
+
1155
+ if (options.screenshot !== false) {
1156
+ try {
1157
+ const shot = await this.screenshot({ serial: resolvedSerial });
1158
+ observation.screenshotPath = shot?.screenshotPath || null;
1159
+ observation.fullPath = shot?.fullPath || null;
1160
+ } catch (err) {
1161
+ observation.observationWarnings.push(`screenshot: ${err.message}`);
1162
+ }
1163
+ }
1164
+
1165
+ try {
1166
+ const dump = await this.dumpUi({
1167
+ serial: resolvedSerial,
1168
+ includeNodes: options.includeNodes !== false,
1169
+ });
1170
+ observation.uiDumpPath = dump.uiDumpPath;
1171
+ observation.nodeCount = dump.nodeCount;
1172
+ observation.preview = dump.preview;
1173
+ } catch (err) {
1174
+ observation.observationWarnings.push(`ui_dump: ${err.message}`);
1175
+ }
1176
+
1177
+ if (observation.observationWarnings.length === 0) {
1178
+ delete observation.observationWarnings;
1179
+ }
1180
+
1181
+ return observation;
1182
+ }
1183
+
1184
+ async observe(options = {}) {
1185
+ const serial = options.serial || await this.ensureDevice();
1186
+ const observation = await this.#captureObservation(serial, options);
1187
+ if (!observation.screenshotPath && !observation.uiDumpPath) {
1188
+ throw new Error(
1189
+ Array.isArray(observation.observationWarnings) && observation.observationWarnings.length > 0
1190
+ ? observation.observationWarnings.join(' | ')
1191
+ : 'Unable to capture Android observation',
1192
+ );
1193
+ }
1194
+ return {
1195
+ success: true,
1196
+ ...observation,
1197
+ };
1198
+ }
1199
+
1124
1200
  async #resolveSelector(args = {}) {
1125
1201
  const dump = await this.dumpUi({ includeNodes: false });
1126
1202
  const selector = {
@@ -1145,27 +1221,27 @@ class AndroidController {
1145
1221
  let y = Number(args.y);
1146
1222
  let node = null;
1147
1223
  let serial = await this.ensureDevice();
1148
- let uiDumpPath = null;
1224
+ let resolvedFromUiDumpPath = null;
1149
1225
 
1150
1226
  if (!Number.isFinite(x) || !Number.isFinite(y)) {
1151
1227
  const resolved = await this.#resolveSelector(args);
1152
1228
  serial = resolved.serial;
1153
1229
  node = resolved.node;
1154
- uiDumpPath = resolved.uiDumpPath;
1230
+ resolvedFromUiDumpPath = resolved.uiDumpPath;
1155
1231
  x = node.bounds.centerX;
1156
1232
  y = node.bounds.centerY;
1157
1233
  }
1158
1234
 
1159
1235
  await this.#adb(serial, `shell input tap ${Math.round(x)} ${Math.round(y)}`, { timeout: 15000 });
1160
- const shot = await this.screenshot();
1236
+ const observation = await this.#captureObservation(serial);
1161
1237
  return {
1162
1238
  success: true,
1163
1239
  serial,
1164
1240
  x: Math.round(x),
1165
1241
  y: Math.round(y),
1166
1242
  target: summarizeNode(node),
1167
- uiDumpPath,
1168
- screenshotPath: shot.screenshotPath,
1243
+ resolvedFromUiDumpPath,
1244
+ ...observation,
1169
1245
  };
1170
1246
  }
1171
1247
 
@@ -1174,13 +1250,13 @@ class AndroidController {
1174
1250
  let y = Number(args.y);
1175
1251
  let node = null;
1176
1252
  let serial = await this.ensureDevice();
1177
- let uiDumpPath = null;
1253
+ let resolvedFromUiDumpPath = null;
1178
1254
 
1179
1255
  if (!Number.isFinite(x) || !Number.isFinite(y)) {
1180
1256
  const resolved = await this.#resolveSelector(args);
1181
1257
  serial = resolved.serial;
1182
1258
  node = resolved.node;
1183
- uiDumpPath = resolved.uiDumpPath;
1259
+ resolvedFromUiDumpPath = resolved.uiDumpPath;
1184
1260
  x = node.bounds.centerX;
1185
1261
  y = node.bounds.centerY;
1186
1262
  }
@@ -1191,7 +1267,7 @@ class AndroidController {
1191
1267
  `shell input swipe ${Math.round(x)} ${Math.round(y)} ${Math.round(x)} ${Math.round(y)} ${Math.round(durationMs)}`,
1192
1268
  { timeout: Math.max(15000, durationMs + 5000) },
1193
1269
  );
1194
- const shot = await this.screenshot();
1270
+ const observation = await this.#captureObservation(serial);
1195
1271
  return {
1196
1272
  success: true,
1197
1273
  serial,
@@ -1199,8 +1275,8 @@ class AndroidController {
1199
1275
  y: Math.round(y),
1200
1276
  durationMs,
1201
1277
  target: summarizeNode(node),
1202
- uiDumpPath,
1203
- screenshotPath: shot.screenshotPath,
1278
+ resolvedFromUiDumpPath,
1279
+ ...observation,
1204
1280
  };
1205
1281
  }
1206
1282
 
@@ -1225,12 +1301,12 @@ class AndroidController {
1225
1301
  if (args.pressEnter) {
1226
1302
  await this.#adb(serial, 'shell input keyevent 66', { timeout: 10000 });
1227
1303
  }
1228
- const shot = await this.screenshot();
1304
+ const observation = await this.#captureObservation(serial);
1229
1305
  return {
1230
1306
  success: true,
1231
1307
  serial,
1232
1308
  typed: args.text || '',
1233
- screenshotPath: shot.screenshotPath,
1309
+ ...observation,
1234
1310
  };
1235
1311
  }
1236
1312
 
@@ -1245,11 +1321,11 @@ class AndroidController {
1245
1321
  throw new Error('x1, y1, x2, and y2 are required for android_swipe');
1246
1322
  }
1247
1323
  await this.#adb(serial, `shell input swipe ${Math.round(x1)} ${Math.round(y1)} ${Math.round(x2)} ${Math.round(y2)} ${Math.round(duration)}`, { timeout: 15000 });
1248
- const shot = await this.screenshot();
1324
+ const observation = await this.#captureObservation(serial);
1249
1325
  return {
1250
1326
  success: true,
1251
1327
  serial,
1252
- screenshotPath: shot.screenshotPath,
1328
+ ...observation,
1253
1329
  };
1254
1330
  }
1255
1331
 
@@ -1259,13 +1335,13 @@ class AndroidController {
1259
1335
  const keyCode = Number.isFinite(Number(raw)) ? Number(raw) : (DEFAULT_KEYEVENTS[raw] || null);
1260
1336
  if (!keyCode) throw new Error(`Unsupported Android key: ${args.key}`);
1261
1337
  await this.#adb(serial, `shell input keyevent ${keyCode}`, { timeout: 10000 });
1262
- const shot = await this.screenshot();
1338
+ const observation = await this.#captureObservation(serial);
1263
1339
  return {
1264
1340
  success: true,
1265
1341
  serial,
1266
1342
  key: args.key,
1267
1343
  keyCode,
1268
- screenshotPath: shot.screenshotPath,
1344
+ ...observation,
1269
1345
  };
1270
1346
  }
1271
1347
 
@@ -1285,13 +1361,15 @@ class AndroidController {
1285
1361
  clickable: args.clickable,
1286
1362
  });
1287
1363
  if (node) {
1288
- const shot = args.screenshot === false ? null : await this.screenshot();
1364
+ const observation = await this.#captureObservation(dump.serial, {
1365
+ screenshot: args.screenshot !== false,
1366
+ });
1289
1367
  return {
1290
1368
  success: true,
1291
1369
  serial: dump.serial,
1292
1370
  matched: summarizeNode(node),
1293
- uiDumpPath: dump.uiDumpPath,
1294
- screenshotPath: shot?.screenshotPath || null,
1371
+ matchedFromUiDumpPath: dump.uiDumpPath,
1372
+ ...observation,
1295
1373
  };
1296
1374
  }
1297
1375
  await sleep(intervalMs);
@@ -1305,17 +1383,30 @@ class AndroidController {
1305
1383
  if (args.activity) {
1306
1384
  await this.#adb(serial, `shell am start -n ${quoteShell(`${args.packageName}/${args.activity}`)}`, { timeout: 20000 });
1307
1385
  } else if (args.packageName) {
1308
- await this.#adb(serial, `shell monkey -p ${quoteShell(args.packageName)} -c android.intent.category.LAUNCHER 1`, { timeout: 30000 });
1386
+ const resolved = await this.#runAllowFailure(
1387
+ `${quoteShell(adbBinary())} -s ${quoteShell(serial)} shell cmd package resolve-activity --brief -c android.intent.category.LAUNCHER ${quoteShell(args.packageName)}`,
1388
+ { timeout: 15000 },
1389
+ );
1390
+ const component = parseResolvedLaunchComponent(
1391
+ `${resolved.stdout || ''}\n${resolved.stderr || ''}`,
1392
+ args.packageName,
1393
+ );
1394
+
1395
+ if (component) {
1396
+ await this.#adb(serial, `shell am start -n ${quoteShell(component)}`, { timeout: 20000 });
1397
+ } else {
1398
+ await this.#adb(serial, `shell monkey -p ${quoteShell(args.packageName)} -c android.intent.category.LAUNCHER 1`, { timeout: 30000 });
1399
+ }
1309
1400
  } else {
1310
1401
  throw new Error('packageName is required for android_open_app');
1311
1402
  }
1312
- const shot = await this.screenshot();
1403
+ const observation = await this.#captureObservation(serial);
1313
1404
  return {
1314
1405
  success: true,
1315
1406
  serial,
1316
1407
  packageName: args.packageName,
1317
1408
  activity: args.activity || null,
1318
- screenshotPath: shot.screenshotPath,
1409
+ ...observation,
1319
1410
  };
1320
1411
  }
1321
1412
 
@@ -1335,11 +1426,11 @@ class AndroidController {
1335
1426
  }
1336
1427
 
1337
1428
  await this.#adb(serial, parts.join(' '), { timeout: 20000 });
1338
- const shot = await this.screenshot();
1429
+ const observation = await this.#captureObservation(serial);
1339
1430
  return {
1340
1431
  success: true,
1341
1432
  serial,
1342
- screenshotPath: shot.screenshotPath,
1433
+ ...observation,
1343
1434
  };
1344
1435
  }
1345
1436
 
@@ -1380,13 +1471,19 @@ class AndroidController {
1380
1471
 
1381
1472
  const timeout = Math.max(1000, Number(args.timeoutMs) || 20000);
1382
1473
  const stdout = await this.#adb(serial, `shell ${quoteShell(command)}`, { timeout });
1383
- const shot = args.screenshot === true ? await this.screenshot() : null;
1474
+ const observation = args.screenshot === true
1475
+ ? await this.#captureObservation(serial)
1476
+ : null;
1384
1477
  return {
1385
1478
  success: true,
1386
1479
  serial,
1387
1480
  command,
1388
1481
  stdout,
1389
- screenshotPath: shot?.screenshotPath || null,
1482
+ screenshotPath: observation?.screenshotPath || null,
1483
+ fullPath: observation?.fullPath || null,
1484
+ uiDumpPath: observation?.uiDumpPath || null,
1485
+ nodeCount: observation?.nodeCount,
1486
+ preview: observation?.preview,
1390
1487
  };
1391
1488
  }
1392
1489
 
@@ -1458,6 +1555,7 @@ module.exports = {
1458
1555
  configuredSystemImagePackage,
1459
1556
  configuredSystemImagePlatform,
1460
1557
  formatSystemImageError,
1558
+ parseResolvedLaunchComponent,
1461
1559
  parseLatestCmdlineToolsUrl,
1462
1560
  parseSystemImages,
1463
1561
  sanitizeUiXml,
@@ -76,6 +76,25 @@ function setupWebSocket(io, services) {
76
76
  }
77
77
  }
78
78
 
79
+ const activeRun = agentEngine.findSteerableRunForUser(userId, 'web');
80
+ if (activeRun) {
81
+ const queued = agentEngine.enqueueSteering(activeRun.runId, task, {
82
+ platform: 'web',
83
+ socketId: socket.id
84
+ });
85
+ if (queued) {
86
+ db.prepare('INSERT INTO conversation_history (user_id, agent_run_id, role, content, metadata) VALUES (?, ?, ?, ?, ?)')
87
+ .run(
88
+ userId,
89
+ activeRun.runId,
90
+ 'user',
91
+ task,
92
+ JSON.stringify({ platform: 'web', steering: true })
93
+ );
94
+ return;
95
+ }
96
+ }
97
+
79
98
  db.prepare('INSERT INTO conversation_history (user_id, role, content, metadata) VALUES (?, ?, ?, ?)')
80
99
  .run(userId, 'user', task, JSON.stringify({ platform: 'web' }));
81
100