neoagent 2.1.11 → 2.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +29110 -29050
- package/server/routes/android.js +9 -0
- package/server/services/ai/engine.js +144 -15
- package/server/services/ai/outputSanitizer.js +67 -0
- package/server/services/ai/providers/anthropic.js +130 -1
- package/server/services/ai/providers/base.js +17 -0
- package/server/services/ai/providers/grok.js +34 -0
- package/server/services/ai/providers/openai.js +34 -0
- package/server/services/ai/systemPrompt.js +1 -0
- package/server/services/ai/toolCallSalvage.js +26 -1
- package/server/services/ai/toolResult.js +2 -0
- package/server/services/ai/tools.js +74 -14
- package/server/services/android/controller.js +126 -28
- package/server/services/websocket.js +19 -0
package/server/routes/android.js
CHANGED
|
@@ -85,6 +85,15 @@ router.post('/screenshot', async (req, res) => {
|
|
|
85
85
|
}
|
|
86
86
|
});
|
|
87
87
|
|
|
88
|
+
router.post('/observe', async (req, res) => {
|
|
89
|
+
try {
|
|
90
|
+
const controller = req.app.locals.androidController;
|
|
91
|
+
res.json(await controller.observe(req.body || {}));
|
|
92
|
+
} catch (err) {
|
|
93
|
+
res.status(500).json({ error: sanitizeError(err) });
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
|
|
88
97
|
router.post('/ui-dump', async (req, res) => {
|
|
89
98
|
try {
|
|
90
99
|
const controller = req.app.locals.androidController;
|
|
@@ -7,6 +7,7 @@ const { ensureDefaultAiSettings, getAiSettings } = require('./settings');
|
|
|
7
7
|
const { selectToolsForTask } = require('./toolSelector');
|
|
8
8
|
const { compactToolResult } = require('./toolResult');
|
|
9
9
|
const { salvageTextToolCalls } = require('./toolCallSalvage');
|
|
10
|
+
const { sanitizeModelOutput } = require('./outputSanitizer');
|
|
10
11
|
|
|
11
12
|
function generateTitle(task) {
|
|
12
13
|
if (!task || typeof task !== 'string') return 'Untitled';
|
|
@@ -162,6 +163,86 @@ class AgentEngine {
|
|
|
162
163
|
return this.activeRuns.get(runId) || null;
|
|
163
164
|
}
|
|
164
165
|
|
|
166
|
+
findActiveRunForUser(userId, predicate = null) {
|
|
167
|
+
let candidate = null;
|
|
168
|
+
for (const [runId, runMeta] of this.activeRuns.entries()) {
|
|
169
|
+
if (runMeta.userId !== userId || runMeta.aborted) continue;
|
|
170
|
+
if (typeof predicate === 'function' && !predicate(runMeta, runId)) continue;
|
|
171
|
+
if (!candidate || (runMeta.startedAt || 0) >= (candidate.startedAt || 0)) {
|
|
172
|
+
candidate = { runId, ...runMeta };
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return candidate;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
findSteerableRunForUser(userId, triggerSource = 'web') {
|
|
179
|
+
return this.findActiveRunForUser(
|
|
180
|
+
userId,
|
|
181
|
+
(runMeta) => runMeta.triggerSource === triggerSource && runMeta.triggerType === 'user'
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
enqueueSteering(runId, content, metadata = {}) {
|
|
186
|
+
const runMeta = this.getRunMeta(runId);
|
|
187
|
+
const trimmed = typeof content === 'string' ? content.trim() : '';
|
|
188
|
+
if (!runMeta || runMeta.aborted || !trimmed) return null;
|
|
189
|
+
|
|
190
|
+
const item = {
|
|
191
|
+
id: uuidv4(),
|
|
192
|
+
content: trimmed,
|
|
193
|
+
metadata,
|
|
194
|
+
createdAt: new Date().toISOString()
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
runMeta.steeringQueue.push(item);
|
|
198
|
+
this.emit(runMeta.userId, 'run:steer_queued', {
|
|
199
|
+
runId,
|
|
200
|
+
content: item.content,
|
|
201
|
+
pendingCount: runMeta.steeringQueue.length
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
runId,
|
|
206
|
+
pendingCount: runMeta.steeringQueue.length,
|
|
207
|
+
item
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
applyQueuedSteering(runId, messages, { userId, conversationId }) {
|
|
212
|
+
const runMeta = this.getRunMeta(runId);
|
|
213
|
+
if (!runMeta?.steeringQueue?.length) {
|
|
214
|
+
return { messages, appliedCount: 0 };
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const queued = runMeta.steeringQueue.splice(0, runMeta.steeringQueue.length);
|
|
218
|
+
messages.push({
|
|
219
|
+
role: 'system',
|
|
220
|
+
content: [
|
|
221
|
+
'The user sent follow-up messages while you were already working.',
|
|
222
|
+
'Treat them as steering or next-up context for the same conversation.',
|
|
223
|
+
'If a message materially changes the active task, incorporate it now.',
|
|
224
|
+
'If it is unrelated or better handled after the current task, finish the current work first and then address it.'
|
|
225
|
+
].join(' ')
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
for (const entry of queued) {
|
|
229
|
+
messages.push({ role: 'user', content: entry.content });
|
|
230
|
+
if (conversationId) {
|
|
231
|
+
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content) VALUES (?, ?, ?)')
|
|
232
|
+
.run(conversationId, 'user', entry.content);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
this.emit(userId, 'run:steer_applied', {
|
|
237
|
+
runId,
|
|
238
|
+
count: queued.length,
|
|
239
|
+
pendingCount: runMeta.steeringQueue.length,
|
|
240
|
+
latestContent: queued[queued.length - 1]?.content || ''
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
return { messages, appliedCount: queued.length };
|
|
244
|
+
}
|
|
245
|
+
|
|
165
246
|
isRunStopped(runId) {
|
|
166
247
|
return this.getRunMeta(runId)?.aborted === true;
|
|
167
248
|
}
|
|
@@ -300,13 +381,16 @@ class AgentEngine {
|
|
|
300
381
|
});
|
|
301
382
|
}
|
|
302
383
|
};
|
|
303
|
-
const
|
|
384
|
+
const selectedProvider = await getProviderForUser(
|
|
304
385
|
userId,
|
|
305
386
|
userMessage,
|
|
306
387
|
triggerType === 'subagent',
|
|
307
388
|
_modelOverride,
|
|
308
389
|
providerStatusConfig
|
|
309
390
|
);
|
|
391
|
+
let provider = selectedProvider.provider;
|
|
392
|
+
let model = selectedProvider.model;
|
|
393
|
+
let providerName = selectedProvider.providerName;
|
|
310
394
|
|
|
311
395
|
const runTitle = generateTitle(userMessage);
|
|
312
396
|
db.prepare(`INSERT OR REPLACE INTO agent_runs(id, user_id, title, status, trigger_type, trigger_source, model)
|
|
@@ -317,8 +401,12 @@ class AgentEngine {
|
|
|
317
401
|
status: 'running',
|
|
318
402
|
aborted: false,
|
|
319
403
|
messagingSent: false,
|
|
404
|
+
triggerType,
|
|
405
|
+
triggerSource,
|
|
406
|
+
startedAt: Date.now(),
|
|
320
407
|
lastToolName: null,
|
|
321
408
|
lastToolTarget: null,
|
|
409
|
+
steeringQueue: [],
|
|
322
410
|
toolPids: new Set()
|
|
323
411
|
});
|
|
324
412
|
this.emit(userId, 'run:start', { runId, title: runTitle, model, triggerType, triggerSource });
|
|
@@ -368,6 +456,12 @@ class AgentEngine {
|
|
|
368
456
|
if (this.isRunStopped(runId)) break;
|
|
369
457
|
iteration++;
|
|
370
458
|
|
|
459
|
+
const steeringAtLoopStart = this.applyQueuedSteering(runId, messages, {
|
|
460
|
+
userId,
|
|
461
|
+
conversationId
|
|
462
|
+
});
|
|
463
|
+
messages = steeringAtLoopStart.messages;
|
|
464
|
+
|
|
371
465
|
let metrics = this.estimatePromptMetrics(messages, tools);
|
|
372
466
|
const contextWindow = provider.getContextWindow(model);
|
|
373
467
|
if (metrics.totalEstimatedTokens > contextWindow * 0.7) {
|
|
@@ -381,6 +475,7 @@ class AgentEngine {
|
|
|
381
475
|
this.emit(userId, 'run:thinking', { runId, iteration });
|
|
382
476
|
|
|
383
477
|
let response;
|
|
478
|
+
let responseModel = model;
|
|
384
479
|
let streamContent = '';
|
|
385
480
|
const callOptions = { model, reasoningEffort: this.getReasoningEffort(providerName, options) };
|
|
386
481
|
|
|
@@ -391,22 +486,30 @@ class AgentEngine {
|
|
|
391
486
|
for await (const chunk of gen) {
|
|
392
487
|
if (chunk.type === 'content') {
|
|
393
488
|
streamContent += chunk.content;
|
|
394
|
-
this.emit(userId, 'run:stream', {
|
|
489
|
+
this.emit(userId, 'run:stream', {
|
|
490
|
+
runId,
|
|
491
|
+
content: sanitizeModelOutput(streamContent, { model }),
|
|
492
|
+
iteration
|
|
493
|
+
});
|
|
395
494
|
}
|
|
396
495
|
if (chunk.type === 'done') {
|
|
397
496
|
response = chunk;
|
|
497
|
+
responseModel = model;
|
|
398
498
|
}
|
|
399
499
|
if (chunk.type === 'tool_calls') {
|
|
400
500
|
response = {
|
|
401
501
|
content: chunk.content || streamContent,
|
|
402
502
|
toolCalls: chunk.toolCalls,
|
|
503
|
+
providerContentBlocks: chunk.providerContentBlocks || null,
|
|
403
504
|
finishReason: 'tool_calls',
|
|
404
505
|
usage: chunk.usage || null
|
|
405
506
|
};
|
|
507
|
+
responseModel = model;
|
|
406
508
|
}
|
|
407
509
|
}
|
|
408
510
|
} else {
|
|
409
511
|
response = await provider.chat(messages, tools, callOptions);
|
|
512
|
+
responseModel = model;
|
|
410
513
|
}
|
|
411
514
|
} catch (err) {
|
|
412
515
|
console.error(`[Engine] Model call failed (${model}):`, err.message);
|
|
@@ -419,33 +522,42 @@ class AgentEngine {
|
|
|
419
522
|
aiSettings.fallback_model_id,
|
|
420
523
|
providerStatusConfig
|
|
421
524
|
);
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
const nextProviderName = fallback.providerName;
|
|
525
|
+
provider = fallback.provider;
|
|
526
|
+
model = fallback.model;
|
|
527
|
+
providerName = fallback.providerName;
|
|
426
528
|
|
|
427
529
|
// Recursive call once
|
|
428
|
-
const retryOptions = { ...callOptions, model
|
|
530
|
+
const retryOptions = { ...callOptions, model, reasoningEffort: this.getReasoningEffort(providerName, options) };
|
|
429
531
|
|
|
430
532
|
if (options.stream !== false) {
|
|
431
|
-
const gen =
|
|
533
|
+
const gen = provider.stream(messages, tools, retryOptions);
|
|
432
534
|
for await (const chunk of gen) {
|
|
433
535
|
if (chunk.type === 'content') {
|
|
434
536
|
streamContent += chunk.content;
|
|
435
|
-
this.emit(userId, 'run:stream', {
|
|
537
|
+
this.emit(userId, 'run:stream', {
|
|
538
|
+
runId,
|
|
539
|
+
content: sanitizeModelOutput(streamContent, { model }),
|
|
540
|
+
iteration
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
if (chunk.type === 'done') {
|
|
544
|
+
response = chunk;
|
|
545
|
+
responseModel = model;
|
|
436
546
|
}
|
|
437
|
-
if (chunk.type === 'done') response = chunk;
|
|
438
547
|
if (chunk.type === 'tool_calls') {
|
|
439
548
|
response = {
|
|
440
549
|
content: chunk.content || streamContent,
|
|
441
550
|
toolCalls: chunk.toolCalls,
|
|
551
|
+
providerContentBlocks: chunk.providerContentBlocks || null,
|
|
442
552
|
finishReason: 'tool_calls',
|
|
443
553
|
usage: chunk.usage || null
|
|
444
554
|
};
|
|
555
|
+
responseModel = model;
|
|
445
556
|
}
|
|
446
557
|
}
|
|
447
558
|
} else {
|
|
448
|
-
response = await
|
|
559
|
+
response = await provider.chat(messages, tools, retryOptions);
|
|
560
|
+
responseModel = model;
|
|
449
561
|
}
|
|
450
562
|
} else {
|
|
451
563
|
throw err;
|
|
@@ -463,7 +575,7 @@ class AgentEngine {
|
|
|
463
575
|
totalTokens += response.usage.totalTokens || 0;
|
|
464
576
|
}
|
|
465
577
|
|
|
466
|
-
lastContent = response.content || streamContent || '';
|
|
578
|
+
lastContent = sanitizeModelOutput(response.content || streamContent || '', { model: responseModel });
|
|
467
579
|
|
|
468
580
|
if ((!response.toolCalls || response.toolCalls.length === 0) && lastContent) {
|
|
469
581
|
const salvaged = salvageTextToolCalls(lastContent, tools);
|
|
@@ -477,6 +589,7 @@ class AgentEngine {
|
|
|
477
589
|
|
|
478
590
|
const assistantMessage = { role: 'assistant', content: lastContent };
|
|
479
591
|
if (response.toolCalls?.length) assistantMessage.tool_calls = response.toolCalls;
|
|
592
|
+
if (response.providerContentBlocks?.length) assistantMessage.providerContentBlocks = response.providerContentBlocks;
|
|
480
593
|
messages.push(assistantMessage);
|
|
481
594
|
|
|
482
595
|
if (conversationId) {
|
|
@@ -490,7 +603,19 @@ class AgentEngine {
|
|
|
490
603
|
);
|
|
491
604
|
}
|
|
492
605
|
|
|
493
|
-
if (!response.toolCalls || response.toolCalls.length === 0)
|
|
606
|
+
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
607
|
+
const steeringAfterResponse = this.applyQueuedSteering(runId, messages, {
|
|
608
|
+
userId,
|
|
609
|
+
conversationId
|
|
610
|
+
});
|
|
611
|
+
messages = steeringAfterResponse.messages;
|
|
612
|
+
if (steeringAfterResponse.appliedCount > 0) {
|
|
613
|
+
iteration = Math.max(0, iteration - 1);
|
|
614
|
+
lastContent = '';
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
break;
|
|
618
|
+
}
|
|
494
619
|
|
|
495
620
|
for (const toolCall of response.toolCalls) {
|
|
496
621
|
if (this.isRunStopped(runId)) break;
|
|
@@ -583,10 +708,14 @@ class AgentEngine {
|
|
|
583
708
|
model,
|
|
584
709
|
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
585
710
|
});
|
|
586
|
-
lastContent = finalResponse.content || '';
|
|
711
|
+
lastContent = sanitizeModelOutput(finalResponse.content || '', { model });
|
|
587
712
|
forcedFinalResponse = true;
|
|
588
713
|
|
|
589
|
-
|
|
714
|
+
const finalAssistantMessage = { role: 'assistant', content: lastContent };
|
|
715
|
+
if (finalResponse.providerContentBlocks?.length) {
|
|
716
|
+
finalAssistantMessage.providerContentBlocks = finalResponse.providerContentBlocks;
|
|
717
|
+
}
|
|
718
|
+
messages.push(finalAssistantMessage);
|
|
590
719
|
if (conversationId) {
|
|
591
720
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
592
721
|
.run(conversationId, 'assistant', lastContent, finalResponse.usage?.totalTokens || 0);
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
const { sanitizeStreamingToolCallText } = require('./toolCallSalvage');
|
|
2
|
+
|
|
3
|
+
const HAN_CHAR_REGEX = /\p{Script=Han}/gu;
|
|
4
|
+
const LATIN_CHAR_REGEX = /\p{Script=Latin}/gu;
|
|
5
|
+
const LETTER_CHAR_REGEX = /\p{L}/gu;
|
|
6
|
+
const HAN_RUN_REGEX = /[\p{Script=Han}\u3000-\u303F]+/gu;
|
|
7
|
+
const MARKDOWN_CODE_SPAN_REGEX = /(```[\s\S]*?```|`[^`\n]+`)/g;
|
|
8
|
+
|
|
9
|
+
function countMatches(text, regex) {
|
|
10
|
+
const matches = text.match(regex);
|
|
11
|
+
return matches ? matches.length : 0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function shouldStripIncidentalHan(text, model) {
|
|
15
|
+
if (model !== 'MiniMax-M2.7') return false;
|
|
16
|
+
|
|
17
|
+
const hanCount = countMatches(text, HAN_CHAR_REGEX);
|
|
18
|
+
if (hanCount === 0) return false;
|
|
19
|
+
if (hanCount > 24) return false;
|
|
20
|
+
|
|
21
|
+
const latinCount = countMatches(text, LATIN_CHAR_REGEX);
|
|
22
|
+
if (latinCount < 20) return false;
|
|
23
|
+
|
|
24
|
+
const letterCount = countMatches(text, LETTER_CHAR_REGEX);
|
|
25
|
+
if (letterCount > 0 && (hanCount / letterCount) > 0.18) return false;
|
|
26
|
+
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function sanitizePlainText(text) {
|
|
31
|
+
return text
|
|
32
|
+
.replace(/([\p{L}\p{N}])[\p{Script=Han}\u3000-\u303F]+([\p{L}\p{N}])/gu, '$1 $2')
|
|
33
|
+
.replace(HAN_RUN_REGEX, '')
|
|
34
|
+
.replace(/[ \t]{2,}/g, ' ')
|
|
35
|
+
.replace(/[ \t]+\n/g, '\n')
|
|
36
|
+
.replace(/\n[ \t]+/g, '\n')
|
|
37
|
+
.replace(/[ \t]+([,.;:!?)\]}])/g, '$1')
|
|
38
|
+
.replace(/([([{])\s+/g, '$1');
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function sanitizeMarkdownAware(text) {
|
|
42
|
+
return text
|
|
43
|
+
.split(MARKDOWN_CODE_SPAN_REGEX)
|
|
44
|
+
.map((part) => {
|
|
45
|
+
if (!part) return part;
|
|
46
|
+
if (part.startsWith('```') || part.startsWith('`')) return part;
|
|
47
|
+
return sanitizePlainText(part);
|
|
48
|
+
})
|
|
49
|
+
.join('');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function sanitizeModelOutput(text, options = {}) {
|
|
53
|
+
if (typeof text !== 'string' || text.length === 0) return text;
|
|
54
|
+
|
|
55
|
+
let sanitized = text;
|
|
56
|
+
|
|
57
|
+
if (options.model === 'MiniMax-M2.7' && (sanitized.includes('<invoke') || sanitized.includes(':tool_call'))) {
|
|
58
|
+
sanitized = sanitizeStreamingToolCallText(sanitized);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (!shouldStripIncidentalHan(sanitized, options.model)) return sanitized;
|
|
62
|
+
return sanitizeMarkdownAware(sanitized);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = {
|
|
66
|
+
sanitizeModelOutput
|
|
67
|
+
};
|
|
@@ -37,6 +37,50 @@ class AnthropicProvider extends BaseProvider {
|
|
|
37
37
|
}));
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
normalizeContentBlocks(blocks = []) {
|
|
41
|
+
const normalized = [];
|
|
42
|
+
|
|
43
|
+
for (const block of blocks) {
|
|
44
|
+
if (!block || !block.type) continue;
|
|
45
|
+
|
|
46
|
+
if (block.type === 'thinking') {
|
|
47
|
+
normalized.push({
|
|
48
|
+
type: 'thinking',
|
|
49
|
+
thinking: block.thinking || '',
|
|
50
|
+
...(block.signature ? { signature: block.signature } : {})
|
|
51
|
+
});
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (block.type === 'redacted_thinking') {
|
|
56
|
+
normalized.push({
|
|
57
|
+
type: 'redacted_thinking',
|
|
58
|
+
data: block.data
|
|
59
|
+
});
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (block.type === 'text') {
|
|
64
|
+
normalized.push({
|
|
65
|
+
type: 'text',
|
|
66
|
+
text: block.text || ''
|
|
67
|
+
});
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (block.type === 'tool_use') {
|
|
72
|
+
normalized.push({
|
|
73
|
+
type: 'tool_use',
|
|
74
|
+
id: block.id,
|
|
75
|
+
name: block.name,
|
|
76
|
+
input: block.input || {}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return normalized;
|
|
82
|
+
}
|
|
83
|
+
|
|
40
84
|
convertMessages(messages) {
|
|
41
85
|
let system = '';
|
|
42
86
|
const converted = [];
|
|
@@ -60,6 +104,14 @@ class AnthropicProvider extends BaseProvider {
|
|
|
60
104
|
}
|
|
61
105
|
|
|
62
106
|
if (msg.role === 'assistant' && msg.tool_calls) {
|
|
107
|
+
if (Array.isArray(msg.providerContentBlocks) && msg.providerContentBlocks.length > 0) {
|
|
108
|
+
converted.push({
|
|
109
|
+
role: 'assistant',
|
|
110
|
+
content: this.normalizeContentBlocks(msg.providerContentBlocks)
|
|
111
|
+
});
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
63
115
|
const content = [];
|
|
64
116
|
if (msg.content) content.push({ type: 'text', text: msg.content });
|
|
65
117
|
for (const tc of msg.tool_calls) {
|
|
@@ -100,6 +152,7 @@ class AnthropicProvider extends BaseProvider {
|
|
|
100
152
|
|
|
101
153
|
let content = '';
|
|
102
154
|
const toolCalls = [];
|
|
155
|
+
const providerContentBlocks = this.normalizeContentBlocks(response.content);
|
|
103
156
|
|
|
104
157
|
for (const block of response.content) {
|
|
105
158
|
if (block.type === 'text') {
|
|
@@ -119,6 +172,7 @@ class AnthropicProvider extends BaseProvider {
|
|
|
119
172
|
return {
|
|
120
173
|
content,
|
|
121
174
|
toolCalls,
|
|
175
|
+
providerContentBlocks,
|
|
122
176
|
finishReason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop',
|
|
123
177
|
usage: {
|
|
124
178
|
promptTokens: response.usage.input_tokens,
|
|
@@ -148,31 +202,106 @@ class AnthropicProvider extends BaseProvider {
|
|
|
148
202
|
let content = '';
|
|
149
203
|
let currentToolCalls = [];
|
|
150
204
|
let currentToolIndex = -1;
|
|
205
|
+
const providerContentBlocks = [];
|
|
151
206
|
|
|
152
207
|
for await (const event of stream) {
|
|
153
208
|
if (event.type === 'content_block_start') {
|
|
154
|
-
if (event.content_block.type === '
|
|
209
|
+
if (event.content_block.type === 'thinking') {
|
|
210
|
+
providerContentBlocks[event.index] = {
|
|
211
|
+
type: 'thinking',
|
|
212
|
+
thinking: event.content_block.thinking || '',
|
|
213
|
+
signature: event.content_block.signature || ''
|
|
214
|
+
};
|
|
215
|
+
} else if (event.content_block.type === 'redacted_thinking') {
|
|
216
|
+
providerContentBlocks[event.index] = {
|
|
217
|
+
type: 'redacted_thinking',
|
|
218
|
+
data: event.content_block.data
|
|
219
|
+
};
|
|
220
|
+
} else if (event.content_block.type === 'text') {
|
|
221
|
+
providerContentBlocks[event.index] = {
|
|
222
|
+
type: 'text',
|
|
223
|
+
text: event.content_block.text || ''
|
|
224
|
+
};
|
|
225
|
+
} else if (event.content_block.type === 'tool_use') {
|
|
155
226
|
currentToolIndex++;
|
|
156
227
|
currentToolCalls.push({
|
|
157
228
|
id: event.content_block.id,
|
|
158
229
|
type: 'function',
|
|
159
230
|
function: { name: event.content_block.name, arguments: '' }
|
|
160
231
|
});
|
|
232
|
+
providerContentBlocks[event.index] = {
|
|
233
|
+
type: 'tool_use',
|
|
234
|
+
id: event.content_block.id,
|
|
235
|
+
name: event.content_block.name,
|
|
236
|
+
input: {}
|
|
237
|
+
};
|
|
161
238
|
}
|
|
162
239
|
} else if (event.type === 'content_block_delta') {
|
|
163
240
|
if (event.delta.type === 'text_delta') {
|
|
164
241
|
content += event.delta.text;
|
|
242
|
+
if (providerContentBlocks[event.index]?.type === 'text') {
|
|
243
|
+
providerContentBlocks[event.index].text += event.delta.text;
|
|
244
|
+
}
|
|
165
245
|
yield { type: 'content', content: event.delta.text };
|
|
246
|
+
} else if (event.delta.type === 'thinking_delta') {
|
|
247
|
+
if (providerContentBlocks[event.index]?.type === 'thinking') {
|
|
248
|
+
providerContentBlocks[event.index].thinking += event.delta.thinking || '';
|
|
249
|
+
}
|
|
250
|
+
} else if (event.delta.type === 'signature_delta') {
|
|
251
|
+
if (providerContentBlocks[event.index]?.type === 'thinking') {
|
|
252
|
+
providerContentBlocks[event.index].signature = event.delta.signature || '';
|
|
253
|
+
}
|
|
166
254
|
} else if (event.delta.type === 'input_json_delta') {
|
|
167
255
|
if (currentToolCalls[currentToolIndex]) {
|
|
168
256
|
currentToolCalls[currentToolIndex].function.arguments += event.delta.partial_json;
|
|
169
257
|
}
|
|
258
|
+
if (providerContentBlocks[event.index]?.type === 'tool_use') {
|
|
259
|
+
const currentJson = providerContentBlocks[event.index]._inputJson || '';
|
|
260
|
+
providerContentBlocks[event.index]._inputJson = currentJson + (event.delta.partial_json || '');
|
|
261
|
+
}
|
|
170
262
|
}
|
|
171
263
|
} else if (event.type === 'message_stop') {
|
|
264
|
+
const normalizedBlocks = providerContentBlocks
|
|
265
|
+
.filter(Boolean)
|
|
266
|
+
.map((block) => {
|
|
267
|
+
if (block.type === 'tool_use') {
|
|
268
|
+
let parsedInput = block.input || {};
|
|
269
|
+
if (typeof block._inputJson === 'string' && block._inputJson.trim()) {
|
|
270
|
+
try {
|
|
271
|
+
parsedInput = JSON.parse(block._inputJson);
|
|
272
|
+
} catch { }
|
|
273
|
+
}
|
|
274
|
+
return {
|
|
275
|
+
type: 'tool_use',
|
|
276
|
+
id: block.id,
|
|
277
|
+
name: block.name,
|
|
278
|
+
input: parsedInput
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
if (block.type === 'thinking') {
|
|
282
|
+
return {
|
|
283
|
+
type: 'thinking',
|
|
284
|
+
thinking: block.thinking || '',
|
|
285
|
+
...(block.signature ? { signature: block.signature } : {})
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
if (block.type === 'redacted_thinking') {
|
|
289
|
+
return {
|
|
290
|
+
type: 'redacted_thinking',
|
|
291
|
+
data: block.data
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
return {
|
|
295
|
+
type: 'text',
|
|
296
|
+
text: block.text || ''
|
|
297
|
+
};
|
|
298
|
+
});
|
|
299
|
+
|
|
172
300
|
yield {
|
|
173
301
|
type: 'done',
|
|
174
302
|
content,
|
|
175
303
|
toolCalls: currentToolCalls,
|
|
304
|
+
providerContentBlocks: normalizedBlocks,
|
|
176
305
|
finishReason: currentToolCalls.length > 0 ? 'tool_calls' : 'stop',
|
|
177
306
|
usage: null
|
|
178
307
|
};
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
class BaseProvider {
|
|
2
|
+
static readImageAsBase64(imagePath) {
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
return fs.readFileSync(imagePath).toString('base64');
|
|
5
|
+
}
|
|
6
|
+
|
|
2
7
|
constructor(config = {}) {
|
|
3
8
|
this.config = config;
|
|
4
9
|
this.name = 'base';
|
|
@@ -36,6 +41,18 @@ class BaseProvider {
|
|
|
36
41
|
getContextWindow(model) {
|
|
37
42
|
return 128000;
|
|
38
43
|
}
|
|
44
|
+
|
|
45
|
+
supportsVision() {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
getDefaultVisionModel() {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async analyzeImage(_options = {}) {
|
|
54
|
+
throw new Error(`Provider '${this.name}' does not support image analysis`);
|
|
55
|
+
}
|
|
39
56
|
}
|
|
40
57
|
|
|
41
58
|
module.exports = { BaseProvider };
|
|
@@ -15,6 +15,14 @@ class GrokProvider extends BaseProvider {
|
|
|
15
15
|
return 131072; // grok-4 context window
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
supportsVision() {
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
getDefaultVisionModel() {
|
|
23
|
+
return 'grok-4.20-beta-latest-non-reasoning';
|
|
24
|
+
}
|
|
25
|
+
|
|
18
26
|
_buildParams(model, messages, tools, options) {
|
|
19
27
|
const params = {
|
|
20
28
|
model,
|
|
@@ -116,6 +124,32 @@ class GrokProvider extends BaseProvider {
|
|
|
116
124
|
}
|
|
117
125
|
}));
|
|
118
126
|
}
|
|
127
|
+
|
|
128
|
+
async analyzeImage(options = {}) {
|
|
129
|
+
const model = options.model || this.getDefaultVisionModel();
|
|
130
|
+
const b64 = BaseProvider.readImageAsBase64(options.imagePath);
|
|
131
|
+
const response = await this.client.chat.completions.create({
|
|
132
|
+
model,
|
|
133
|
+
max_tokens: options.maxTokens || 4096,
|
|
134
|
+
messages: [{
|
|
135
|
+
role: 'user',
|
|
136
|
+
content: [
|
|
137
|
+
{ type: 'text', text: options.question || 'Describe this image in detail.' },
|
|
138
|
+
{
|
|
139
|
+
type: 'image_url',
|
|
140
|
+
image_url: {
|
|
141
|
+
url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
}]
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
content: response.choices[0]?.message?.content || '',
|
|
150
|
+
model: response.model || model,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
119
153
|
}
|
|
120
154
|
|
|
121
155
|
module.exports = { GrokProvider };
|
|
@@ -48,6 +48,14 @@ class OpenAIProvider extends BaseProvider {
|
|
|
48
48
|
return 128000;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
supportsVision() {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
getDefaultVisionModel() {
|
|
56
|
+
return 'gpt-4.1-mini';
|
|
57
|
+
}
|
|
58
|
+
|
|
51
59
|
_buildParams(model, messages, tools, options) {
|
|
52
60
|
const isReasoning = this.isReasoningModel(model);
|
|
53
61
|
// Reasoning models (GPT-5, o-series): use developer role for system messages
|
|
@@ -163,6 +171,32 @@ class OpenAIProvider extends BaseProvider {
|
|
|
163
171
|
}
|
|
164
172
|
}
|
|
165
173
|
}
|
|
174
|
+
|
|
175
|
+
async analyzeImage(options = {}) {
|
|
176
|
+
const model = options.model || this.getDefaultVisionModel();
|
|
177
|
+
const b64 = BaseProvider.readImageAsBase64(options.imagePath);
|
|
178
|
+
const response = await this.client.chat.completions.create({
|
|
179
|
+
model,
|
|
180
|
+
max_tokens: options.maxTokens || 4096,
|
|
181
|
+
messages: [{
|
|
182
|
+
role: 'user',
|
|
183
|
+
content: [
|
|
184
|
+
{ type: 'text', text: options.question || 'Describe this image in detail.' },
|
|
185
|
+
{
|
|
186
|
+
type: 'image_url',
|
|
187
|
+
image_url: {
|
|
188
|
+
url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
]
|
|
192
|
+
}]
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
content: response.choices[0]?.message?.content || '',
|
|
197
|
+
model: response.model || model,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
166
200
|
}
|
|
167
201
|
|
|
168
202
|
module.exports = { OpenAIProvider };
|
|
@@ -41,6 +41,7 @@ When prior context makes the goal clear, act on it. Only ask a clarifying questi
|
|
|
41
41
|
|
|
42
42
|
REPORT ACTUAL RESULTS
|
|
43
43
|
When a tool returns data, share the relevant parts — summarized if large, direct if short. Never paste raw JSON as the answer. Never narrate what you're about to do at length before doing it.
|
|
44
|
+
Never promise an action in the final answer unless you already took that action in this run. Do not say "I'll check", "I'll fix it", or "I'll send it" and then stop. Either do it first or say you have not done it yet.
|
|
44
45
|
|
|
45
46
|
DON'T REPEAT YOURSELF
|
|
46
47
|
State a limitation or error once. If the user pushes back, try a different approach before restating the same failure. Repeating the same dead-end across five messages is useless.
|