neoagent 2.1.12 → 2.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +31420 -31340
- package/server/routes/android.js +15 -4
- package/server/services/ai/engine.js +119 -7
- package/server/services/ai/history.js +63 -1
- package/server/services/ai/providers/base.js +17 -0
- package/server/services/ai/providers/grok.js +34 -0
- package/server/services/ai/providers/openai.js +34 -0
- package/server/services/ai/systemPrompt.js +1 -0
- package/server/services/ai/toolResult.js +2 -0
- package/server/services/ai/tools.js +76 -16
- package/server/services/android/controller.js +168 -27
- package/server/services/websocket.js +19 -0
package/server/routes/android.js
CHANGED
|
@@ -11,6 +11,7 @@ router.use(requireAuth);
|
|
|
11
11
|
|
|
12
12
|
const androidApkUploadDir = path.join(DATA_DIR, 'uploads', 'android-apks');
|
|
13
13
|
fs.mkdirSync(androidApkUploadDir, { recursive: true });
|
|
14
|
+
const INSTALLABLE_ANDROID_PACKAGE_EXTENSIONS = new Set(['.apk', '.apks']);
|
|
14
15
|
|
|
15
16
|
const androidApkUpload = multer({
|
|
16
17
|
storage: multer.diskStorage({
|
|
@@ -28,8 +29,9 @@ const androidApkUpload = multer({
|
|
|
28
29
|
},
|
|
29
30
|
}),
|
|
30
31
|
fileFilter: (_req, file, cb) => {
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
const extension = path.extname(String(file.originalname || '')).toLowerCase();
|
|
33
|
+
if (!INSTALLABLE_ANDROID_PACKAGE_EXTENSIONS.has(extension)) {
|
|
34
|
+
cb(new Error('Only .apk or .apks files can be installed.'));
|
|
33
35
|
return;
|
|
34
36
|
}
|
|
35
37
|
cb(null, true);
|
|
@@ -85,6 +87,15 @@ router.post('/screenshot', async (req, res) => {
|
|
|
85
87
|
}
|
|
86
88
|
});
|
|
87
89
|
|
|
90
|
+
router.post('/observe', async (req, res) => {
|
|
91
|
+
try {
|
|
92
|
+
const controller = req.app.locals.androidController;
|
|
93
|
+
res.json(await controller.observe(req.body || {}));
|
|
94
|
+
} catch (err) {
|
|
95
|
+
res.status(500).json({ error: sanitizeError(err) });
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
88
99
|
router.post('/ui-dump', async (req, res) => {
|
|
89
100
|
try {
|
|
90
101
|
const controller = req.app.locals.androidController;
|
|
@@ -183,7 +194,7 @@ router.post('/install-apk', (req, res) => {
|
|
|
183
194
|
const message =
|
|
184
195
|
uploadError instanceof multer.MulterError &&
|
|
185
196
|
uploadError.code === 'LIMIT_FILE_SIZE'
|
|
186
|
-
? '
|
|
197
|
+
? 'Android app upload is too large. Limit is 512MB.'
|
|
187
198
|
: sanitizeError(uploadError);
|
|
188
199
|
res.status(400).json({ error: message });
|
|
189
200
|
return;
|
|
@@ -191,7 +202,7 @@ router.post('/install-apk', (req, res) => {
|
|
|
191
202
|
|
|
192
203
|
const uploadedApkPath = req.file?.path;
|
|
193
204
|
if (!uploadedApkPath) {
|
|
194
|
-
res.status(400).json({ error: 'No APK
|
|
205
|
+
res.status(400).json({ error: 'No APK or APK bundle was uploaded.' });
|
|
195
206
|
return;
|
|
196
207
|
}
|
|
197
208
|
|
|
@@ -2,7 +2,12 @@ const { v4: uuidv4 } = require('uuid');
|
|
|
2
2
|
const fs = require('fs');
|
|
3
3
|
const db = require('../../db/database');
|
|
4
4
|
const { compact } = require('./compaction');
|
|
5
|
-
const {
|
|
5
|
+
const {
|
|
6
|
+
getConversationContext,
|
|
7
|
+
buildSummaryCarrier,
|
|
8
|
+
refreshConversationSummary,
|
|
9
|
+
sanitizeConversationMessages
|
|
10
|
+
} = require('./history');
|
|
6
11
|
const { ensureDefaultAiSettings, getAiSettings } = require('./settings');
|
|
7
12
|
const { selectToolsForTask } = require('./toolSelector');
|
|
8
13
|
const { compactToolResult } = require('./toolResult');
|
|
@@ -163,6 +168,86 @@ class AgentEngine {
|
|
|
163
168
|
return this.activeRuns.get(runId) || null;
|
|
164
169
|
}
|
|
165
170
|
|
|
171
|
+
findActiveRunForUser(userId, predicate = null) {
|
|
172
|
+
let candidate = null;
|
|
173
|
+
for (const [runId, runMeta] of this.activeRuns.entries()) {
|
|
174
|
+
if (runMeta.userId !== userId || runMeta.aborted) continue;
|
|
175
|
+
if (typeof predicate === 'function' && !predicate(runMeta, runId)) continue;
|
|
176
|
+
if (!candidate || (runMeta.startedAt || 0) >= (candidate.startedAt || 0)) {
|
|
177
|
+
candidate = { runId, ...runMeta };
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return candidate;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
findSteerableRunForUser(userId, triggerSource = 'web') {
|
|
184
|
+
return this.findActiveRunForUser(
|
|
185
|
+
userId,
|
|
186
|
+
(runMeta) => runMeta.triggerSource === triggerSource && runMeta.triggerType === 'user'
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
enqueueSteering(runId, content, metadata = {}) {
|
|
191
|
+
const runMeta = this.getRunMeta(runId);
|
|
192
|
+
const trimmed = typeof content === 'string' ? content.trim() : '';
|
|
193
|
+
if (!runMeta || runMeta.aborted || !trimmed) return null;
|
|
194
|
+
|
|
195
|
+
const item = {
|
|
196
|
+
id: uuidv4(),
|
|
197
|
+
content: trimmed,
|
|
198
|
+
metadata,
|
|
199
|
+
createdAt: new Date().toISOString()
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
runMeta.steeringQueue.push(item);
|
|
203
|
+
this.emit(runMeta.userId, 'run:steer_queued', {
|
|
204
|
+
runId,
|
|
205
|
+
content: item.content,
|
|
206
|
+
pendingCount: runMeta.steeringQueue.length
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
runId,
|
|
211
|
+
pendingCount: runMeta.steeringQueue.length,
|
|
212
|
+
item
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
applyQueuedSteering(runId, messages, { userId, conversationId }) {
|
|
217
|
+
const runMeta = this.getRunMeta(runId);
|
|
218
|
+
if (!runMeta?.steeringQueue?.length) {
|
|
219
|
+
return { messages, appliedCount: 0 };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const queued = runMeta.steeringQueue.splice(0, runMeta.steeringQueue.length);
|
|
223
|
+
messages.push({
|
|
224
|
+
role: 'system',
|
|
225
|
+
content: [
|
|
226
|
+
'The user sent follow-up messages while you were already working.',
|
|
227
|
+
'Treat them as steering or next-up context for the same conversation.',
|
|
228
|
+
'If a message materially changes the active task, incorporate it now.',
|
|
229
|
+
'If it is unrelated or better handled after the current task, finish the current work first and then address it.'
|
|
230
|
+
].join(' ')
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
for (const entry of queued) {
|
|
234
|
+
messages.push({ role: 'user', content: entry.content });
|
|
235
|
+
if (conversationId) {
|
|
236
|
+
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content) VALUES (?, ?, ?)')
|
|
237
|
+
.run(conversationId, 'user', entry.content);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
this.emit(userId, 'run:steer_applied', {
|
|
242
|
+
runId,
|
|
243
|
+
count: queued.length,
|
|
244
|
+
pendingCount: runMeta.steeringQueue.length,
|
|
245
|
+
latestContent: queued[queued.length - 1]?.content || ''
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
return { messages, appliedCount: queued.length };
|
|
249
|
+
}
|
|
250
|
+
|
|
166
251
|
isRunStopped(runId) {
|
|
167
252
|
return this.getRunMeta(runId)?.aborted === true;
|
|
168
253
|
}
|
|
@@ -321,8 +406,12 @@ class AgentEngine {
|
|
|
321
406
|
status: 'running',
|
|
322
407
|
aborted: false,
|
|
323
408
|
messagingSent: false,
|
|
409
|
+
triggerType,
|
|
410
|
+
triggerSource,
|
|
411
|
+
startedAt: Date.now(),
|
|
324
412
|
lastToolName: null,
|
|
325
413
|
lastToolTarget: null,
|
|
414
|
+
steeringQueue: [],
|
|
326
415
|
toolPids: new Set()
|
|
327
416
|
});
|
|
328
417
|
this.emit(userId, 'run:start', { runId, title: runTitle, model, triggerType, triggerSource });
|
|
@@ -354,6 +443,7 @@ class AgentEngine {
|
|
|
354
443
|
|
|
355
444
|
let messages = this.buildContextMessages(systemPrompt, summaryMessage, historyMessages, recallMsg);
|
|
356
445
|
messages.push(this.buildUserMessage(userMessage, options));
|
|
446
|
+
messages = sanitizeConversationMessages(messages);
|
|
357
447
|
|
|
358
448
|
if (conversationId) {
|
|
359
449
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content) VALUES (?, ?, ?)')
|
|
@@ -372,10 +462,18 @@ class AgentEngine {
|
|
|
372
462
|
if (this.isRunStopped(runId)) break;
|
|
373
463
|
iteration++;
|
|
374
464
|
|
|
465
|
+
const steeringAtLoopStart = this.applyQueuedSteering(runId, messages, {
|
|
466
|
+
userId,
|
|
467
|
+
conversationId
|
|
468
|
+
});
|
|
469
|
+
messages = steeringAtLoopStart.messages;
|
|
470
|
+
messages = sanitizeConversationMessages(messages);
|
|
471
|
+
|
|
375
472
|
let metrics = this.estimatePromptMetrics(messages, tools);
|
|
376
473
|
const contextWindow = provider.getContextWindow(model);
|
|
377
474
|
if (metrics.totalEstimatedTokens > contextWindow * 0.7) {
|
|
378
475
|
messages = await compact(messages, provider, model);
|
|
476
|
+
messages = sanitizeConversationMessages(messages);
|
|
379
477
|
this.emit(userId, 'run:compaction', { runId, iteration });
|
|
380
478
|
metrics = this.estimatePromptMetrics(messages, tools);
|
|
381
479
|
}
|
|
@@ -390,9 +488,10 @@ class AgentEngine {
|
|
|
390
488
|
const callOptions = { model, reasoningEffort: this.getReasoningEffort(providerName, options) };
|
|
391
489
|
|
|
392
490
|
const tryModelCall = async (retryForFallback = true) => {
|
|
491
|
+
const requestMessages = sanitizeConversationMessages(messages);
|
|
393
492
|
try {
|
|
394
493
|
if (options.stream !== false) {
|
|
395
|
-
const gen = provider.stream(
|
|
494
|
+
const gen = provider.stream(requestMessages, tools, callOptions);
|
|
396
495
|
for await (const chunk of gen) {
|
|
397
496
|
if (chunk.type === 'content') {
|
|
398
497
|
streamContent += chunk.content;
|
|
@@ -418,7 +517,7 @@ class AgentEngine {
|
|
|
418
517
|
}
|
|
419
518
|
}
|
|
420
519
|
} else {
|
|
421
|
-
response = await provider.chat(
|
|
520
|
+
response = await provider.chat(requestMessages, tools, callOptions);
|
|
422
521
|
responseModel = model;
|
|
423
522
|
}
|
|
424
523
|
} catch (err) {
|
|
@@ -438,9 +537,10 @@ class AgentEngine {
|
|
|
438
537
|
|
|
439
538
|
// Recursive call once
|
|
440
539
|
const retryOptions = { ...callOptions, model, reasoningEffort: this.getReasoningEffort(providerName, options) };
|
|
540
|
+
const retryMessages = sanitizeConversationMessages(messages);
|
|
441
541
|
|
|
442
542
|
if (options.stream !== false) {
|
|
443
|
-
const gen = provider.stream(
|
|
543
|
+
const gen = provider.stream(retryMessages, tools, retryOptions);
|
|
444
544
|
for await (const chunk of gen) {
|
|
445
545
|
if (chunk.type === 'content') {
|
|
446
546
|
streamContent += chunk.content;
|
|
@@ -466,7 +566,7 @@ class AgentEngine {
|
|
|
466
566
|
}
|
|
467
567
|
}
|
|
468
568
|
} else {
|
|
469
|
-
response = await provider.chat(
|
|
569
|
+
response = await provider.chat(retryMessages, tools, retryOptions);
|
|
470
570
|
responseModel = model;
|
|
471
571
|
}
|
|
472
572
|
} else {
|
|
@@ -513,7 +613,19 @@ class AgentEngine {
|
|
|
513
613
|
);
|
|
514
614
|
}
|
|
515
615
|
|
|
516
|
-
if (!response.toolCalls || response.toolCalls.length === 0)
|
|
616
|
+
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
617
|
+
const steeringAfterResponse = this.applyQueuedSteering(runId, messages, {
|
|
618
|
+
userId,
|
|
619
|
+
conversationId
|
|
620
|
+
});
|
|
621
|
+
messages = steeringAfterResponse.messages;
|
|
622
|
+
if (steeringAfterResponse.appliedCount > 0) {
|
|
623
|
+
iteration = Math.max(0, iteration - 1);
|
|
624
|
+
lastContent = '';
|
|
625
|
+
continue;
|
|
626
|
+
}
|
|
627
|
+
break;
|
|
628
|
+
}
|
|
517
629
|
|
|
518
630
|
for (const toolCall of response.toolCalls) {
|
|
519
631
|
if (this.isRunStopped(runId)) break;
|
|
@@ -602,7 +714,7 @@ class AgentEngine {
|
|
|
602
714
|
|
|
603
715
|
if ((iteration >= maxIterations && messages[messages.length - 1]?.role === 'tool')
|
|
604
716
|
|| (iteration < maxIterations && stepIndex > 0 && !lastContent.trim() && messages[messages.length - 1]?.role !== 'tool')) {
|
|
605
|
-
const finalResponse = await provider.chat(messages, [], {
|
|
717
|
+
const finalResponse = await provider.chat(sanitizeConversationMessages(messages), [], {
|
|
606
718
|
model,
|
|
607
719
|
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
608
720
|
});
|
|
@@ -34,6 +34,67 @@ function normalizeHistoryRows(rows) {
|
|
|
34
34
|
});
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
function sanitizeConversationMessages(messages) {
|
|
38
|
+
const sanitized = [];
|
|
39
|
+
let pendingToolSequence = null;
|
|
40
|
+
|
|
41
|
+
const dropPendingSequence = () => {
|
|
42
|
+
pendingToolSequence = null;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const flushPendingSequence = () => {
|
|
46
|
+
if (!pendingToolSequence) return;
|
|
47
|
+
if (pendingToolSequence.pendingIds.size === 0) {
|
|
48
|
+
sanitized.push(...pendingToolSequence.messages);
|
|
49
|
+
}
|
|
50
|
+
pendingToolSequence = null;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
for (const msg of messages || []) {
|
|
54
|
+
if (!msg || !msg.role) continue;
|
|
55
|
+
|
|
56
|
+
if (msg.role === 'assistant' && Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) {
|
|
57
|
+
const toolCallIds = msg.tool_calls
|
|
58
|
+
.map((toolCall) => toolCall?.id)
|
|
59
|
+
.filter(Boolean);
|
|
60
|
+
|
|
61
|
+
if (toolCallIds.length === 0) {
|
|
62
|
+
dropPendingSequence();
|
|
63
|
+
sanitized.push(msg);
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
dropPendingSequence();
|
|
68
|
+
pendingToolSequence = {
|
|
69
|
+
messages: [msg],
|
|
70
|
+
pendingIds: new Set(toolCallIds)
|
|
71
|
+
};
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (msg.role === 'tool') {
|
|
76
|
+
if (
|
|
77
|
+
pendingToolSequence
|
|
78
|
+
&& msg.tool_call_id
|
|
79
|
+
&& pendingToolSequence.pendingIds.has(msg.tool_call_id)
|
|
80
|
+
) {
|
|
81
|
+
pendingToolSequence.messages.push(msg);
|
|
82
|
+
pendingToolSequence.pendingIds.delete(msg.tool_call_id);
|
|
83
|
+
if (pendingToolSequence.pendingIds.size === 0) {
|
|
84
|
+
flushPendingSequence();
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
dropPendingSequence();
|
|
91
|
+
sanitized.push(msg);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
flushPendingSequence();
|
|
95
|
+
return sanitized;
|
|
96
|
+
}
|
|
97
|
+
|
|
37
98
|
function serializeHistoryForSummary(messages) {
|
|
38
99
|
return messages.map((msg) => {
|
|
39
100
|
if (msg.role === 'tool') {
|
|
@@ -143,7 +204,7 @@ function getConversationContext(conversationId, recentLimit) {
|
|
|
143
204
|
return {
|
|
144
205
|
summary: convo?.summary || '',
|
|
145
206
|
summaryCount: Number(convo?.summary_message_count || 0),
|
|
146
|
-
recentMessages: normalizeHistoryRows(recent),
|
|
207
|
+
recentMessages: sanitizeConversationMessages(normalizeHistoryRows(recent)),
|
|
147
208
|
totalMessages: db.prepare('SELECT COUNT(*) AS count FROM conversation_messages WHERE conversation_id = ?').get(conversationId).count
|
|
148
209
|
};
|
|
149
210
|
}
|
|
@@ -184,5 +245,6 @@ module.exports = {
|
|
|
184
245
|
getWebChatContext,
|
|
185
246
|
refreshConversationSummary,
|
|
186
247
|
refreshWebChatSummary,
|
|
248
|
+
sanitizeConversationMessages,
|
|
187
249
|
summarizeMessages
|
|
188
250
|
};
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
class BaseProvider {
|
|
2
|
+
static readImageAsBase64(imagePath) {
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
return fs.readFileSync(imagePath).toString('base64');
|
|
5
|
+
}
|
|
6
|
+
|
|
2
7
|
constructor(config = {}) {
|
|
3
8
|
this.config = config;
|
|
4
9
|
this.name = 'base';
|
|
@@ -36,6 +41,18 @@ class BaseProvider {
|
|
|
36
41
|
getContextWindow(model) {
|
|
37
42
|
return 128000;
|
|
38
43
|
}
|
|
44
|
+
|
|
45
|
+
supportsVision() {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
getDefaultVisionModel() {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async analyzeImage(_options = {}) {
|
|
54
|
+
throw new Error(`Provider '${this.name}' does not support image analysis`);
|
|
55
|
+
}
|
|
39
56
|
}
|
|
40
57
|
|
|
41
58
|
module.exports = { BaseProvider };
|
|
@@ -15,6 +15,14 @@ class GrokProvider extends BaseProvider {
|
|
|
15
15
|
return 131072; // grok-4 context window
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
supportsVision() {
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
getDefaultVisionModel() {
|
|
23
|
+
return 'grok-4.20-beta-latest-non-reasoning';
|
|
24
|
+
}
|
|
25
|
+
|
|
18
26
|
_buildParams(model, messages, tools, options) {
|
|
19
27
|
const params = {
|
|
20
28
|
model,
|
|
@@ -116,6 +124,32 @@ class GrokProvider extends BaseProvider {
|
|
|
116
124
|
}
|
|
117
125
|
}));
|
|
118
126
|
}
|
|
127
|
+
|
|
128
|
+
async analyzeImage(options = {}) {
|
|
129
|
+
const model = options.model || this.getDefaultVisionModel();
|
|
130
|
+
const b64 = BaseProvider.readImageAsBase64(options.imagePath);
|
|
131
|
+
const response = await this.client.chat.completions.create({
|
|
132
|
+
model,
|
|
133
|
+
max_tokens: options.maxTokens || 4096,
|
|
134
|
+
messages: [{
|
|
135
|
+
role: 'user',
|
|
136
|
+
content: [
|
|
137
|
+
{ type: 'text', text: options.question || 'Describe this image in detail.' },
|
|
138
|
+
{
|
|
139
|
+
type: 'image_url',
|
|
140
|
+
image_url: {
|
|
141
|
+
url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
}]
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
content: response.choices[0]?.message?.content || '',
|
|
150
|
+
model: response.model || model,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
119
153
|
}
|
|
120
154
|
|
|
121
155
|
module.exports = { GrokProvider };
|
|
@@ -48,6 +48,14 @@ class OpenAIProvider extends BaseProvider {
|
|
|
48
48
|
return 128000;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
supportsVision() {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
getDefaultVisionModel() {
|
|
56
|
+
return 'gpt-4.1-mini';
|
|
57
|
+
}
|
|
58
|
+
|
|
51
59
|
_buildParams(model, messages, tools, options) {
|
|
52
60
|
const isReasoning = this.isReasoningModel(model);
|
|
53
61
|
// Reasoning models (GPT-5, o-series): use developer role for system messages
|
|
@@ -163,6 +171,32 @@ class OpenAIProvider extends BaseProvider {
|
|
|
163
171
|
}
|
|
164
172
|
}
|
|
165
173
|
}
|
|
174
|
+
|
|
175
|
+
async analyzeImage(options = {}) {
|
|
176
|
+
const model = options.model || this.getDefaultVisionModel();
|
|
177
|
+
const b64 = BaseProvider.readImageAsBase64(options.imagePath);
|
|
178
|
+
const response = await this.client.chat.completions.create({
|
|
179
|
+
model,
|
|
180
|
+
max_tokens: options.maxTokens || 4096,
|
|
181
|
+
messages: [{
|
|
182
|
+
role: 'user',
|
|
183
|
+
content: [
|
|
184
|
+
{ type: 'text', text: options.question || 'Describe this image in detail.' },
|
|
185
|
+
{
|
|
186
|
+
type: 'image_url',
|
|
187
|
+
image_url: {
|
|
188
|
+
url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
]
|
|
192
|
+
}]
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
content: response.choices[0]?.message?.content || '',
|
|
197
|
+
model: response.model || model,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
166
200
|
}
|
|
167
201
|
|
|
168
202
|
module.exports = { OpenAIProvider };
|
|
@@ -41,6 +41,7 @@ When prior context makes the goal clear, act on it. Only ask a clarifying questi
|
|
|
41
41
|
|
|
42
42
|
REPORT ACTUAL RESULTS
|
|
43
43
|
When a tool returns data, share the relevant parts — summarized if large, direct if short. Never paste raw JSON as the answer. Never narrate what you're about to do at length before doing it.
|
|
44
|
+
Never promise an action in the final answer unless you already took that action in this run. Do not say "I'll check", "I'll fix it", or "I'll send it" and then stop. Either do it first or say you have not done it yet.
|
|
44
45
|
|
|
45
46
|
DON'T REPEAT YOURSELF
|
|
46
47
|
State a limitation or error once. If the user pushes back, try a different approach before restating the same failure. Repeating the same dead-end across five messages is useless.
|
|
@@ -90,10 +90,12 @@ function compactToolResult(toolName, toolArgs = {}, toolResult, options = {}) {
|
|
|
90
90
|
break;
|
|
91
91
|
|
|
92
92
|
case 'android_dump_ui':
|
|
93
|
+
case 'android_observe':
|
|
93
94
|
envelope = trimObject({
|
|
94
95
|
tool: toolName,
|
|
95
96
|
serial: toolResult?.serial,
|
|
96
97
|
nodeCount: toolResult?.nodeCount,
|
|
98
|
+
screenshotPath: toolResult?.screenshotPath,
|
|
97
99
|
uiDumpPath: toolResult?.uiDumpPath,
|
|
98
100
|
preview: clampText(JSON.stringify(toolResult?.preview || []).slice(0, Math.floor(softLimit * 0.55)), Math.floor(softLimit * 0.55))
|
|
99
101
|
});
|
|
@@ -292,6 +292,16 @@ function getAvailableTools(app, options = {}) {
|
|
|
292
292
|
}
|
|
293
293
|
}
|
|
294
294
|
},
|
|
295
|
+
{
|
|
296
|
+
name: 'android_observe',
|
|
297
|
+
description: 'Capture the current Android screen end-to-end: fresh screenshot, UI dump path, and a preview of visible UI nodes.',
|
|
298
|
+
parameters: {
|
|
299
|
+
type: 'object',
|
|
300
|
+
properties: {
|
|
301
|
+
includeNodes: { type: 'boolean', description: 'Include a preview of parsed UI nodes (default true)' }
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
},
|
|
295
305
|
{
|
|
296
306
|
name: 'android_dump_ui',
|
|
297
307
|
description: 'Capture the current Android UIAutomator XML dump and return a preview of the nodes.',
|
|
@@ -322,11 +332,11 @@ function getAvailableTools(app, options = {}) {
|
|
|
322
332
|
},
|
|
323
333
|
{
|
|
324
334
|
name: 'android_install_apk',
|
|
325
|
-
description: 'Install or replace an APK on the Android emulator.',
|
|
335
|
+
description: 'Install or replace an APK or universal .apks bundle on the Android emulator.',
|
|
326
336
|
parameters: {
|
|
327
337
|
type: 'object',
|
|
328
338
|
properties: {
|
|
329
|
-
apkPath: { type: 'string', description: 'Absolute path to
|
|
339
|
+
apkPath: { type: 'string', description: 'Absolute path to an .apk file or universal .apks bundle on disk' }
|
|
330
340
|
},
|
|
331
341
|
required: ['apkPath']
|
|
332
342
|
}
|
|
@@ -763,7 +773,7 @@ function getAvailableTools(app, options = {}) {
|
|
|
763
773
|
},
|
|
764
774
|
{
|
|
765
775
|
name: 'analyze_image',
|
|
766
|
-
description: 'Analyze an image file using
|
|
776
|
+
description: 'Analyze an image file using the best available vision-capable model. Use this to describe photos, read QR codes, extract text from screenshots, or answer visual questions.',
|
|
767
777
|
parameters: {
|
|
768
778
|
type: 'object',
|
|
769
779
|
properties: {
|
|
@@ -940,6 +950,12 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
940
950
|
return await controller.waitFor(args || {});
|
|
941
951
|
}
|
|
942
952
|
|
|
953
|
+
case 'android_observe': {
|
|
954
|
+
const controller = ac();
|
|
955
|
+
if (!controller) return { error: 'Android controller not available' };
|
|
956
|
+
return await controller.observe(args || {});
|
|
957
|
+
}
|
|
958
|
+
|
|
943
959
|
case 'android_dump_ui': {
|
|
944
960
|
const controller = ac();
|
|
945
961
|
if (!controller) return { error: 'Android controller not available' };
|
|
@@ -1541,23 +1557,67 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
1541
1557
|
case 'analyze_image': {
|
|
1542
1558
|
try {
|
|
1543
1559
|
if (!fs.existsSync(args.image_path)) return { error: `File not found: ${args.image_path}` };
|
|
1544
|
-
const b64 = fs.readFileSync(args.image_path).toString('base64');
|
|
1545
1560
|
const ext = path.extname(args.image_path).toLowerCase();
|
|
1546
1561
|
const mimeMap = { '.png': 'image/png', '.gif': 'image/gif', '.webp': 'image/webp', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg' };
|
|
1547
1562
|
const mime = mimeMap[ext] || 'image/jpeg';
|
|
1563
|
+
const question = args.question || 'Describe this image in detail.';
|
|
1548
1564
|
const { getProviderForUser } = require('./engine');
|
|
1549
|
-
const {
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1565
|
+
const { createProviderInstance, getProviderCatalog } = require('./models');
|
|
1566
|
+
|
|
1567
|
+
const attempted = [];
|
|
1568
|
+
const candidates = [];
|
|
1569
|
+
|
|
1570
|
+
try {
|
|
1571
|
+
const preferred = await getProviderForUser(userId);
|
|
1572
|
+
candidates.push({
|
|
1573
|
+
providerName: preferred.providerName,
|
|
1574
|
+
provider: preferred.provider,
|
|
1575
|
+
});
|
|
1576
|
+
} catch (err) {
|
|
1577
|
+
attempted.push(`default-provider lookup failed: ${err.message}`);
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
for (const providerInfo of getProviderCatalog(userId)) {
|
|
1581
|
+
if (!providerInfo.available) continue;
|
|
1582
|
+
if (candidates.some((candidate) => candidate.providerName === providerInfo.id)) continue;
|
|
1583
|
+
if (!['grok', 'openai'].includes(providerInfo.id)) continue;
|
|
1584
|
+
try {
|
|
1585
|
+
candidates.push({
|
|
1586
|
+
providerName: providerInfo.id,
|
|
1587
|
+
provider: createProviderInstance(providerInfo.id, userId),
|
|
1588
|
+
});
|
|
1589
|
+
} catch (err) {
|
|
1590
|
+
attempted.push(`${providerInfo.id}: ${err.message}`);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
for (const candidate of candidates) {
|
|
1595
|
+
if (typeof candidate.provider.supportsVision !== 'function' || candidate.provider.supportsVision() !== true) {
|
|
1596
|
+
attempted.push(`${candidate.providerName}: image analysis is not supported by this provider integration`);
|
|
1597
|
+
continue;
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
try {
|
|
1601
|
+
const visionResponse = await candidate.provider.analyzeImage({
|
|
1602
|
+
imagePath: args.image_path,
|
|
1603
|
+
mimeType: mime,
|
|
1604
|
+
question,
|
|
1605
|
+
});
|
|
1606
|
+
return {
|
|
1607
|
+
description: visionResponse.content,
|
|
1608
|
+
model: visionResponse.model || null,
|
|
1609
|
+
provider: candidate.providerName,
|
|
1610
|
+
};
|
|
1611
|
+
} catch (err) {
|
|
1612
|
+
attempted.push(`${candidate.providerName}: ${err.message}`);
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
return {
|
|
1617
|
+
error: attempted.length > 0
|
|
1618
|
+
? `Image analysis failed. ${attempted.join(' | ')}`
|
|
1619
|
+
: 'No vision-capable provider is currently available. Configure OpenAI or xAI for image analysis.',
|
|
1620
|
+
};
|
|
1561
1621
|
} catch (err) {
|
|
1562
1622
|
return { error: err.message };
|
|
1563
1623
|
}
|