neoagent 2.1.12 → 2.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,15 @@ router.post('/screenshot', async (req, res) => {
85
85
  }
86
86
  });
87
87
 
88
+ router.post('/observe', async (req, res) => {
89
+ try {
90
+ const controller = req.app.locals.androidController;
91
+ res.json(await controller.observe(req.body || {}));
92
+ } catch (err) {
93
+ res.status(500).json({ error: sanitizeError(err) });
94
+ }
95
+ });
96
+
88
97
  router.post('/ui-dump', async (req, res) => {
89
98
  try {
90
99
  const controller = req.app.locals.androidController;
@@ -163,6 +163,86 @@ class AgentEngine {
163
163
  return this.activeRuns.get(runId) || null;
164
164
  }
165
165
 
166
+ findActiveRunForUser(userId, predicate = null) {
167
+ let candidate = null;
168
+ for (const [runId, runMeta] of this.activeRuns.entries()) {
169
+ if (runMeta.userId !== userId || runMeta.aborted) continue;
170
+ if (typeof predicate === 'function' && !predicate(runMeta, runId)) continue;
171
+ if (!candidate || (runMeta.startedAt || 0) >= (candidate.startedAt || 0)) {
172
+ candidate = { runId, ...runMeta };
173
+ }
174
+ }
175
+ return candidate;
176
+ }
177
+
178
+ findSteerableRunForUser(userId, triggerSource = 'web') {
179
+ return this.findActiveRunForUser(
180
+ userId,
181
+ (runMeta) => runMeta.triggerSource === triggerSource && runMeta.triggerType === 'user'
182
+ );
183
+ }
184
+
185
+ enqueueSteering(runId, content, metadata = {}) {
186
+ const runMeta = this.getRunMeta(runId);
187
+ const trimmed = typeof content === 'string' ? content.trim() : '';
188
+ if (!runMeta || runMeta.aborted || !trimmed) return null;
189
+
190
+ const item = {
191
+ id: uuidv4(),
192
+ content: trimmed,
193
+ metadata,
194
+ createdAt: new Date().toISOString()
195
+ };
196
+
197
+ runMeta.steeringQueue.push(item);
198
+ this.emit(runMeta.userId, 'run:steer_queued', {
199
+ runId,
200
+ content: item.content,
201
+ pendingCount: runMeta.steeringQueue.length
202
+ });
203
+
204
+ return {
205
+ runId,
206
+ pendingCount: runMeta.steeringQueue.length,
207
+ item
208
+ };
209
+ }
210
+
211
+ applyQueuedSteering(runId, messages, { userId, conversationId }) {
212
+ const runMeta = this.getRunMeta(runId);
213
+ if (!runMeta?.steeringQueue?.length) {
214
+ return { messages, appliedCount: 0 };
215
+ }
216
+
217
+ const queued = runMeta.steeringQueue.splice(0, runMeta.steeringQueue.length);
218
+ messages.push({
219
+ role: 'system',
220
+ content: [
221
+ 'The user sent follow-up messages while you were already working.',
222
+ 'Treat them as steering or next-up context for the same conversation.',
223
+ 'If a message materially changes the active task, incorporate it now.',
224
+ 'If it is unrelated or better handled after the current task, finish the current work first and then address it.'
225
+ ].join(' ')
226
+ });
227
+
228
+ for (const entry of queued) {
229
+ messages.push({ role: 'user', content: entry.content });
230
+ if (conversationId) {
231
+ db.prepare('INSERT INTO conversation_messages (conversation_id, role, content) VALUES (?, ?, ?)')
232
+ .run(conversationId, 'user', entry.content);
233
+ }
234
+ }
235
+
236
+ this.emit(userId, 'run:steer_applied', {
237
+ runId,
238
+ count: queued.length,
239
+ pendingCount: runMeta.steeringQueue.length,
240
+ latestContent: queued[queued.length - 1]?.content || ''
241
+ });
242
+
243
+ return { messages, appliedCount: queued.length };
244
+ }
245
+
166
246
  isRunStopped(runId) {
167
247
  return this.getRunMeta(runId)?.aborted === true;
168
248
  }
@@ -321,8 +401,12 @@ class AgentEngine {
321
401
  status: 'running',
322
402
  aborted: false,
323
403
  messagingSent: false,
404
+ triggerType,
405
+ triggerSource,
406
+ startedAt: Date.now(),
324
407
  lastToolName: null,
325
408
  lastToolTarget: null,
409
+ steeringQueue: [],
326
410
  toolPids: new Set()
327
411
  });
328
412
  this.emit(userId, 'run:start', { runId, title: runTitle, model, triggerType, triggerSource });
@@ -372,6 +456,12 @@ class AgentEngine {
372
456
  if (this.isRunStopped(runId)) break;
373
457
  iteration++;
374
458
 
459
+ const steeringAtLoopStart = this.applyQueuedSteering(runId, messages, {
460
+ userId,
461
+ conversationId
462
+ });
463
+ messages = steeringAtLoopStart.messages;
464
+
375
465
  let metrics = this.estimatePromptMetrics(messages, tools);
376
466
  const contextWindow = provider.getContextWindow(model);
377
467
  if (metrics.totalEstimatedTokens > contextWindow * 0.7) {
@@ -513,7 +603,19 @@ class AgentEngine {
513
603
  );
514
604
  }
515
605
 
516
- if (!response.toolCalls || response.toolCalls.length === 0) break;
606
+ if (!response.toolCalls || response.toolCalls.length === 0) {
607
+ const steeringAfterResponse = this.applyQueuedSteering(runId, messages, {
608
+ userId,
609
+ conversationId
610
+ });
611
+ messages = steeringAfterResponse.messages;
612
+ if (steeringAfterResponse.appliedCount > 0) {
613
+ iteration = Math.max(0, iteration - 1);
614
+ lastContent = '';
615
+ continue;
616
+ }
617
+ break;
618
+ }
517
619
 
518
620
  for (const toolCall of response.toolCalls) {
519
621
  if (this.isRunStopped(runId)) break;
@@ -1,4 +1,9 @@
1
1
  class BaseProvider {
2
+ static readImageAsBase64(imagePath) {
3
+ const fs = require('fs');
4
+ return fs.readFileSync(imagePath).toString('base64');
5
+ }
6
+
2
7
  constructor(config = {}) {
3
8
  this.config = config;
4
9
  this.name = 'base';
@@ -36,6 +41,18 @@ class BaseProvider {
36
41
  getContextWindow(model) {
37
42
  return 128000;
38
43
  }
44
+
45
+ supportsVision() {
46
+ return false;
47
+ }
48
+
49
+ getDefaultVisionModel() {
50
+ return null;
51
+ }
52
+
53
+ async analyzeImage(_options = {}) {
54
+ throw new Error(`Provider '${this.name}' does not support image analysis`);
55
+ }
39
56
  }
40
57
 
41
58
  module.exports = { BaseProvider };
@@ -15,6 +15,14 @@ class GrokProvider extends BaseProvider {
15
15
  return 131072; // grok-4 context window
16
16
  }
17
17
 
18
+ supportsVision() {
19
+ return true;
20
+ }
21
+
22
+ getDefaultVisionModel() {
23
+ return 'grok-4.20-beta-latest-non-reasoning';
24
+ }
25
+
18
26
  _buildParams(model, messages, tools, options) {
19
27
  const params = {
20
28
  model,
@@ -116,6 +124,32 @@ class GrokProvider extends BaseProvider {
116
124
  }
117
125
  }));
118
126
  }
127
+
128
+ async analyzeImage(options = {}) {
129
+ const model = options.model || this.getDefaultVisionModel();
130
+ const b64 = BaseProvider.readImageAsBase64(options.imagePath);
131
+ const response = await this.client.chat.completions.create({
132
+ model,
133
+ max_tokens: options.maxTokens || 4096,
134
+ messages: [{
135
+ role: 'user',
136
+ content: [
137
+ { type: 'text', text: options.question || 'Describe this image in detail.' },
138
+ {
139
+ type: 'image_url',
140
+ image_url: {
141
+ url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
142
+ }
143
+ }
144
+ ]
145
+ }]
146
+ });
147
+
148
+ return {
149
+ content: response.choices[0]?.message?.content || '',
150
+ model: response.model || model,
151
+ };
152
+ }
119
153
  }
120
154
 
121
155
  module.exports = { GrokProvider };
@@ -48,6 +48,14 @@ class OpenAIProvider extends BaseProvider {
48
48
  return 128000;
49
49
  }
50
50
 
51
+ supportsVision() {
52
+ return true;
53
+ }
54
+
55
+ getDefaultVisionModel() {
56
+ return 'gpt-4.1-mini';
57
+ }
58
+
51
59
  _buildParams(model, messages, tools, options) {
52
60
  const isReasoning = this.isReasoningModel(model);
53
61
  // Reasoning models (GPT-5, o-series): use developer role for system messages
@@ -163,6 +171,32 @@ class OpenAIProvider extends BaseProvider {
163
171
  }
164
172
  }
165
173
  }
174
+
175
+ async analyzeImage(options = {}) {
176
+ const model = options.model || this.getDefaultVisionModel();
177
+ const b64 = BaseProvider.readImageAsBase64(options.imagePath);
178
+ const response = await this.client.chat.completions.create({
179
+ model,
180
+ max_tokens: options.maxTokens || 4096,
181
+ messages: [{
182
+ role: 'user',
183
+ content: [
184
+ { type: 'text', text: options.question || 'Describe this image in detail.' },
185
+ {
186
+ type: 'image_url',
187
+ image_url: {
188
+ url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
189
+ }
190
+ }
191
+ ]
192
+ }]
193
+ });
194
+
195
+ return {
196
+ content: response.choices[0]?.message?.content || '',
197
+ model: response.model || model,
198
+ };
199
+ }
166
200
  }
167
201
 
168
202
  module.exports = { OpenAIProvider };
@@ -41,6 +41,7 @@ When prior context makes the goal clear, act on it. Only ask a clarifying questi
41
41
 
42
42
  REPORT ACTUAL RESULTS
43
43
  When a tool returns data, share the relevant parts — summarized if large, direct if short. Never paste raw JSON as the answer. Never narrate what you're about to do at length before doing it.
44
+ Never promise an action in the final answer unless you already took that action in this run. Do not say "I'll check", "I'll fix it", or "I'll send it" and then stop. Either do it first or say you have not done it yet.
44
45
 
45
46
  DON'T REPEAT YOURSELF
46
47
  State a limitation or error once. If the user pushes back, try a different approach before restating the same failure. Repeating the same dead-end across five messages is useless.
@@ -90,10 +90,12 @@ function compactToolResult(toolName, toolArgs = {}, toolResult, options = {}) {
90
90
  break;
91
91
 
92
92
  case 'android_dump_ui':
93
+ case 'android_observe':
93
94
  envelope = trimObject({
94
95
  tool: toolName,
95
96
  serial: toolResult?.serial,
96
97
  nodeCount: toolResult?.nodeCount,
98
+ screenshotPath: toolResult?.screenshotPath,
97
99
  uiDumpPath: toolResult?.uiDumpPath,
98
100
  preview: clampText(JSON.stringify(toolResult?.preview || []).slice(0, Math.floor(softLimit * 0.55)), Math.floor(softLimit * 0.55))
99
101
  });
@@ -292,6 +292,16 @@ function getAvailableTools(app, options = {}) {
292
292
  }
293
293
  }
294
294
  },
295
+ {
296
+ name: 'android_observe',
297
+ description: 'Capture the current Android screen end-to-end: fresh screenshot, UI dump path, and a preview of visible UI nodes.',
298
+ parameters: {
299
+ type: 'object',
300
+ properties: {
301
+ includeNodes: { type: 'boolean', description: 'Include a preview of parsed UI nodes (default true)' }
302
+ }
303
+ }
304
+ },
295
305
  {
296
306
  name: 'android_dump_ui',
297
307
  description: 'Capture the current Android UIAutomator XML dump and return a preview of the nodes.',
@@ -763,7 +773,7 @@ function getAvailableTools(app, options = {}) {
763
773
  },
764
774
  {
765
775
  name: 'analyze_image',
766
- description: 'Analyze an image file using Grok vision. Use this to describe photos, read QR codes, extract text from screenshots, or answer any visual question about an image.',
776
+ description: 'Analyze an image file using the best available vision-capable model. Use this to describe photos, read QR codes, extract text from screenshots, or answer visual questions.',
767
777
  parameters: {
768
778
  type: 'object',
769
779
  properties: {
@@ -940,6 +950,12 @@ async function executeTool(toolName, args, context, engine) {
940
950
  return await controller.waitFor(args || {});
941
951
  }
942
952
 
953
+ case 'android_observe': {
954
+ const controller = ac();
955
+ if (!controller) return { error: 'Android controller not available' };
956
+ return await controller.observe(args || {});
957
+ }
958
+
943
959
  case 'android_dump_ui': {
944
960
  const controller = ac();
945
961
  if (!controller) return { error: 'Android controller not available' };
@@ -1541,23 +1557,67 @@ async function executeTool(toolName, args, context, engine) {
1541
1557
  case 'analyze_image': {
1542
1558
  try {
1543
1559
  if (!fs.existsSync(args.image_path)) return { error: `File not found: ${args.image_path}` };
1544
- const b64 = fs.readFileSync(args.image_path).toString('base64');
1545
1560
  const ext = path.extname(args.image_path).toLowerCase();
1546
1561
  const mimeMap = { '.png': 'image/png', '.gif': 'image/gif', '.webp': 'image/webp', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg' };
1547
1562
  const mime = mimeMap[ext] || 'image/jpeg';
1563
+ const question = args.question || 'Describe this image in detail.';
1548
1564
  const { getProviderForUser } = require('./engine');
1549
- const { provider: visionProvider, model: visionModel } = await getProviderForUser(userId);
1550
- const visionResponse = await visionProvider.chat(
1551
- [{
1552
- role: 'user', content: [
1553
- { type: 'text', text: args.question || 'Describe this image in detail.' },
1554
- { type: 'image_url', image_url: { url: `data:${mime};base64,${b64}` } }
1555
- ]
1556
- }],
1557
- [],
1558
- { model: visionModel }
1559
- );
1560
- return { description: visionResponse.content };
1565
+ const { createProviderInstance, getProviderCatalog } = require('./models');
1566
+
1567
+ const attempted = [];
1568
+ const candidates = [];
1569
+
1570
+ try {
1571
+ const preferred = await getProviderForUser(userId);
1572
+ candidates.push({
1573
+ providerName: preferred.providerName,
1574
+ provider: preferred.provider,
1575
+ });
1576
+ } catch (err) {
1577
+ attempted.push(`default-provider lookup failed: ${err.message}`);
1578
+ }
1579
+
1580
+ for (const providerInfo of getProviderCatalog(userId)) {
1581
+ if (!providerInfo.available) continue;
1582
+ if (candidates.some((candidate) => candidate.providerName === providerInfo.id)) continue;
1583
+ if (!['grok', 'openai'].includes(providerInfo.id)) continue;
1584
+ try {
1585
+ candidates.push({
1586
+ providerName: providerInfo.id,
1587
+ provider: createProviderInstance(providerInfo.id, userId),
1588
+ });
1589
+ } catch (err) {
1590
+ attempted.push(`${providerInfo.id}: ${err.message}`);
1591
+ }
1592
+ }
1593
+
1594
+ for (const candidate of candidates) {
1595
+ if (typeof candidate.provider.supportsVision !== 'function' || candidate.provider.supportsVision() !== true) {
1596
+ attempted.push(`${candidate.providerName}: image analysis is not supported by this provider integration`);
1597
+ continue;
1598
+ }
1599
+
1600
+ try {
1601
+ const visionResponse = await candidate.provider.analyzeImage({
1602
+ imagePath: args.image_path,
1603
+ mimeType: mime,
1604
+ question,
1605
+ });
1606
+ return {
1607
+ description: visionResponse.content,
1608
+ model: visionResponse.model || null,
1609
+ provider: candidate.providerName,
1610
+ };
1611
+ } catch (err) {
1612
+ attempted.push(`${candidate.providerName}: ${err.message}`);
1613
+ }
1614
+ }
1615
+
1616
+ return {
1617
+ error: attempted.length > 0
1618
+ ? `Image analysis failed. ${attempted.join(' | ')}`
1619
+ : 'No vision-capable provider is currently available. Configure OpenAI or xAI for image analysis.',
1620
+ };
1561
1621
  } catch (err) {
1562
1622
  return { error: err.message };
1563
1623
  }
@@ -1104,7 +1104,7 @@ class AndroidController {
1104
1104
  }
1105
1105
 
1106
1106
  async screenshot(options = {}) {
1107
- const serial = await this.ensureDevice();
1107
+ const serial = options.serial || await this.ensureDevice();
1108
1108
  const filename = `android_${Date.now()}.png`;
1109
1109
  const fullPath = path.join(SCREENSHOTS_DIR, filename);
1110
1110
  await this.#run(`${quoteShell(adbBinary())} -s ${quoteShell(serial)} exec-out screencap -p > ${quoteShell(fullPath)}`, { timeout: 30000 });
@@ -1117,7 +1117,7 @@ class AndroidController {
1117
1117
  }
1118
1118
 
1119
1119
  async dumpUi(options = {}) {
1120
- const serial = await this.ensureDevice();
1120
+ const serial = options.serial || await this.ensureDevice();
1121
1121
  let xml = await this.#adb(serial, 'shell uiautomator dump --compressed /dev/tty', { timeout: 30000 });
1122
1122
  if (!String(xml || '').includes('<hierarchy')) {
1123
1123
  const remote = '/sdcard/neoagent-ui.xml';
@@ -1140,6 +1140,63 @@ class AndroidController {
1140
1140
  };
1141
1141
  }
1142
1142
 
1143
+ async #captureObservation(serial, options = {}) {
1144
+ const resolvedSerial = serial || await this.ensureDevice();
1145
+ const observation = {
1146
+ serial: resolvedSerial,
1147
+ screenshotPath: null,
1148
+ fullPath: null,
1149
+ uiDumpPath: null,
1150
+ nodeCount: null,
1151
+ preview: undefined,
1152
+ observationWarnings: [],
1153
+ };
1154
+
1155
+ if (options.screenshot !== false) {
1156
+ try {
1157
+ const shot = await this.screenshot({ serial: resolvedSerial });
1158
+ observation.screenshotPath = shot?.screenshotPath || null;
1159
+ observation.fullPath = shot?.fullPath || null;
1160
+ } catch (err) {
1161
+ observation.observationWarnings.push(`screenshot: ${err.message}`);
1162
+ }
1163
+ }
1164
+
1165
+ try {
1166
+ const dump = await this.dumpUi({
1167
+ serial: resolvedSerial,
1168
+ includeNodes: options.includeNodes !== false,
1169
+ });
1170
+ observation.uiDumpPath = dump.uiDumpPath;
1171
+ observation.nodeCount = dump.nodeCount;
1172
+ observation.preview = dump.preview;
1173
+ } catch (err) {
1174
+ observation.observationWarnings.push(`ui_dump: ${err.message}`);
1175
+ }
1176
+
1177
+ if (observation.observationWarnings.length === 0) {
1178
+ delete observation.observationWarnings;
1179
+ }
1180
+
1181
+ return observation;
1182
+ }
1183
+
1184
+ async observe(options = {}) {
1185
+ const serial = options.serial || await this.ensureDevice();
1186
+ const observation = await this.#captureObservation(serial, options);
1187
+ if (!observation.screenshotPath && !observation.uiDumpPath) {
1188
+ throw new Error(
1189
+ Array.isArray(observation.observationWarnings) && observation.observationWarnings.length > 0
1190
+ ? observation.observationWarnings.join(' | ')
1191
+ : 'Unable to capture Android observation',
1192
+ );
1193
+ }
1194
+ return {
1195
+ success: true,
1196
+ ...observation,
1197
+ };
1198
+ }
1199
+
1143
1200
  async #resolveSelector(args = {}) {
1144
1201
  const dump = await this.dumpUi({ includeNodes: false });
1145
1202
  const selector = {
@@ -1164,27 +1221,27 @@ class AndroidController {
1164
1221
  let y = Number(args.y);
1165
1222
  let node = null;
1166
1223
  let serial = await this.ensureDevice();
1167
- let uiDumpPath = null;
1224
+ let resolvedFromUiDumpPath = null;
1168
1225
 
1169
1226
  if (!Number.isFinite(x) || !Number.isFinite(y)) {
1170
1227
  const resolved = await this.#resolveSelector(args);
1171
1228
  serial = resolved.serial;
1172
1229
  node = resolved.node;
1173
- uiDumpPath = resolved.uiDumpPath;
1230
+ resolvedFromUiDumpPath = resolved.uiDumpPath;
1174
1231
  x = node.bounds.centerX;
1175
1232
  y = node.bounds.centerY;
1176
1233
  }
1177
1234
 
1178
1235
  await this.#adb(serial, `shell input tap ${Math.round(x)} ${Math.round(y)}`, { timeout: 15000 });
1179
- const shot = await this.screenshot();
1236
+ const observation = await this.#captureObservation(serial);
1180
1237
  return {
1181
1238
  success: true,
1182
1239
  serial,
1183
1240
  x: Math.round(x),
1184
1241
  y: Math.round(y),
1185
1242
  target: summarizeNode(node),
1186
- uiDumpPath,
1187
- screenshotPath: shot.screenshotPath,
1243
+ resolvedFromUiDumpPath,
1244
+ ...observation,
1188
1245
  };
1189
1246
  }
1190
1247
 
@@ -1193,13 +1250,13 @@ class AndroidController {
1193
1250
  let y = Number(args.y);
1194
1251
  let node = null;
1195
1252
  let serial = await this.ensureDevice();
1196
- let uiDumpPath = null;
1253
+ let resolvedFromUiDumpPath = null;
1197
1254
 
1198
1255
  if (!Number.isFinite(x) || !Number.isFinite(y)) {
1199
1256
  const resolved = await this.#resolveSelector(args);
1200
1257
  serial = resolved.serial;
1201
1258
  node = resolved.node;
1202
- uiDumpPath = resolved.uiDumpPath;
1259
+ resolvedFromUiDumpPath = resolved.uiDumpPath;
1203
1260
  x = node.bounds.centerX;
1204
1261
  y = node.bounds.centerY;
1205
1262
  }
@@ -1210,7 +1267,7 @@ class AndroidController {
1210
1267
  `shell input swipe ${Math.round(x)} ${Math.round(y)} ${Math.round(x)} ${Math.round(y)} ${Math.round(durationMs)}`,
1211
1268
  { timeout: Math.max(15000, durationMs + 5000) },
1212
1269
  );
1213
- const shot = await this.screenshot();
1270
+ const observation = await this.#captureObservation(serial);
1214
1271
  return {
1215
1272
  success: true,
1216
1273
  serial,
@@ -1218,8 +1275,8 @@ class AndroidController {
1218
1275
  y: Math.round(y),
1219
1276
  durationMs,
1220
1277
  target: summarizeNode(node),
1221
- uiDumpPath,
1222
- screenshotPath: shot.screenshotPath,
1278
+ resolvedFromUiDumpPath,
1279
+ ...observation,
1223
1280
  };
1224
1281
  }
1225
1282
 
@@ -1244,12 +1301,12 @@ class AndroidController {
1244
1301
  if (args.pressEnter) {
1245
1302
  await this.#adb(serial, 'shell input keyevent 66', { timeout: 10000 });
1246
1303
  }
1247
- const shot = await this.screenshot();
1304
+ const observation = await this.#captureObservation(serial);
1248
1305
  return {
1249
1306
  success: true,
1250
1307
  serial,
1251
1308
  typed: args.text || '',
1252
- screenshotPath: shot.screenshotPath,
1309
+ ...observation,
1253
1310
  };
1254
1311
  }
1255
1312
 
@@ -1264,11 +1321,11 @@ class AndroidController {
1264
1321
  throw new Error('x1, y1, x2, and y2 are required for android_swipe');
1265
1322
  }
1266
1323
  await this.#adb(serial, `shell input swipe ${Math.round(x1)} ${Math.round(y1)} ${Math.round(x2)} ${Math.round(y2)} ${Math.round(duration)}`, { timeout: 15000 });
1267
- const shot = await this.screenshot();
1324
+ const observation = await this.#captureObservation(serial);
1268
1325
  return {
1269
1326
  success: true,
1270
1327
  serial,
1271
- screenshotPath: shot.screenshotPath,
1328
+ ...observation,
1272
1329
  };
1273
1330
  }
1274
1331
 
@@ -1278,13 +1335,13 @@ class AndroidController {
1278
1335
  const keyCode = Number.isFinite(Number(raw)) ? Number(raw) : (DEFAULT_KEYEVENTS[raw] || null);
1279
1336
  if (!keyCode) throw new Error(`Unsupported Android key: ${args.key}`);
1280
1337
  await this.#adb(serial, `shell input keyevent ${keyCode}`, { timeout: 10000 });
1281
- const shot = await this.screenshot();
1338
+ const observation = await this.#captureObservation(serial);
1282
1339
  return {
1283
1340
  success: true,
1284
1341
  serial,
1285
1342
  key: args.key,
1286
1343
  keyCode,
1287
- screenshotPath: shot.screenshotPath,
1344
+ ...observation,
1288
1345
  };
1289
1346
  }
1290
1347
 
@@ -1304,13 +1361,15 @@ class AndroidController {
1304
1361
  clickable: args.clickable,
1305
1362
  });
1306
1363
  if (node) {
1307
- const shot = args.screenshot === false ? null : await this.screenshot();
1364
+ const observation = await this.#captureObservation(dump.serial, {
1365
+ screenshot: args.screenshot !== false,
1366
+ });
1308
1367
  return {
1309
1368
  success: true,
1310
1369
  serial: dump.serial,
1311
1370
  matched: summarizeNode(node),
1312
- uiDumpPath: dump.uiDumpPath,
1313
- screenshotPath: shot?.screenshotPath || null,
1371
+ matchedFromUiDumpPath: dump.uiDumpPath,
1372
+ ...observation,
1314
1373
  };
1315
1374
  }
1316
1375
  await sleep(intervalMs);
@@ -1341,13 +1400,13 @@ class AndroidController {
1341
1400
  } else {
1342
1401
  throw new Error('packageName is required for android_open_app');
1343
1402
  }
1344
- const shot = await this.screenshot();
1403
+ const observation = await this.#captureObservation(serial);
1345
1404
  return {
1346
1405
  success: true,
1347
1406
  serial,
1348
1407
  packageName: args.packageName,
1349
1408
  activity: args.activity || null,
1350
- screenshotPath: shot.screenshotPath,
1409
+ ...observation,
1351
1410
  };
1352
1411
  }
1353
1412
 
@@ -1367,11 +1426,11 @@ class AndroidController {
1367
1426
  }
1368
1427
 
1369
1428
  await this.#adb(serial, parts.join(' '), { timeout: 20000 });
1370
- const shot = await this.screenshot();
1429
+ const observation = await this.#captureObservation(serial);
1371
1430
  return {
1372
1431
  success: true,
1373
1432
  serial,
1374
- screenshotPath: shot.screenshotPath,
1433
+ ...observation,
1375
1434
  };
1376
1435
  }
1377
1436
 
@@ -1412,13 +1471,19 @@ class AndroidController {
1412
1471
 
1413
1472
  const timeout = Math.max(1000, Number(args.timeoutMs) || 20000);
1414
1473
  const stdout = await this.#adb(serial, `shell ${quoteShell(command)}`, { timeout });
1415
- const shot = args.screenshot === true ? await this.screenshot() : null;
1474
+ const observation = args.screenshot === true
1475
+ ? await this.#captureObservation(serial)
1476
+ : null;
1416
1477
  return {
1417
1478
  success: true,
1418
1479
  serial,
1419
1480
  command,
1420
1481
  stdout,
1421
- screenshotPath: shot?.screenshotPath || null,
1482
+ screenshotPath: observation?.screenshotPath || null,
1483
+ fullPath: observation?.fullPath || null,
1484
+ uiDumpPath: observation?.uiDumpPath || null,
1485
+ nodeCount: observation?.nodeCount,
1486
+ preview: observation?.preview,
1422
1487
  };
1423
1488
  }
1424
1489