neoagent 2.1.11 → 2.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,15 @@ router.post('/screenshot', async (req, res) => {
85
85
  }
86
86
  });
87
87
 
88
+ router.post('/observe', async (req, res) => {
89
+ try {
90
+ const controller = req.app.locals.androidController;
91
+ res.json(await controller.observe(req.body || {}));
92
+ } catch (err) {
93
+ res.status(500).json({ error: sanitizeError(err) });
94
+ }
95
+ });
96
+
88
97
  router.post('/ui-dump', async (req, res) => {
89
98
  try {
90
99
  const controller = req.app.locals.androidController;
@@ -7,6 +7,7 @@ const { ensureDefaultAiSettings, getAiSettings } = require('./settings');
7
7
  const { selectToolsForTask } = require('./toolSelector');
8
8
  const { compactToolResult } = require('./toolResult');
9
9
  const { salvageTextToolCalls } = require('./toolCallSalvage');
10
+ const { sanitizeModelOutput } = require('./outputSanitizer');
10
11
 
11
12
  function generateTitle(task) {
12
13
  if (!task || typeof task !== 'string') return 'Untitled';
@@ -162,6 +163,86 @@ class AgentEngine {
162
163
  return this.activeRuns.get(runId) || null;
163
164
  }
164
165
 
166
+ findActiveRunForUser(userId, predicate = null) {
167
+ let candidate = null;
168
+ for (const [runId, runMeta] of this.activeRuns.entries()) {
169
+ if (runMeta.userId !== userId || runMeta.aborted) continue;
170
+ if (typeof predicate === 'function' && !predicate(runMeta, runId)) continue;
171
+ if (!candidate || (runMeta.startedAt || 0) >= (candidate.startedAt || 0)) {
172
+ candidate = { runId, ...runMeta };
173
+ }
174
+ }
175
+ return candidate;
176
+ }
177
+
178
+ findSteerableRunForUser(userId, triggerSource = 'web') {
179
+ return this.findActiveRunForUser(
180
+ userId,
181
+ (runMeta) => runMeta.triggerSource === triggerSource && runMeta.triggerType === 'user'
182
+ );
183
+ }
184
+
185
+ enqueueSteering(runId, content, metadata = {}) {
186
+ const runMeta = this.getRunMeta(runId);
187
+ const trimmed = typeof content === 'string' ? content.trim() : '';
188
+ if (!runMeta || runMeta.aborted || !trimmed) return null;
189
+
190
+ const item = {
191
+ id: uuidv4(),
192
+ content: trimmed,
193
+ metadata,
194
+ createdAt: new Date().toISOString()
195
+ };
196
+
197
+ runMeta.steeringQueue.push(item);
198
+ this.emit(runMeta.userId, 'run:steer_queued', {
199
+ runId,
200
+ content: item.content,
201
+ pendingCount: runMeta.steeringQueue.length
202
+ });
203
+
204
+ return {
205
+ runId,
206
+ pendingCount: runMeta.steeringQueue.length,
207
+ item
208
+ };
209
+ }
210
+
211
+ applyQueuedSteering(runId, messages, { userId, conversationId }) {
212
+ const runMeta = this.getRunMeta(runId);
213
+ if (!runMeta?.steeringQueue?.length) {
214
+ return { messages, appliedCount: 0 };
215
+ }
216
+
217
+ const queued = runMeta.steeringQueue.splice(0, runMeta.steeringQueue.length);
218
+ messages.push({
219
+ role: 'system',
220
+ content: [
221
+ 'The user sent follow-up messages while you were already working.',
222
+ 'Treat them as steering or next-up context for the same conversation.',
223
+ 'If a message materially changes the active task, incorporate it now.',
224
+ 'If it is unrelated or better handled after the current task, finish the current work first and then address it.'
225
+ ].join(' ')
226
+ });
227
+
228
+ for (const entry of queued) {
229
+ messages.push({ role: 'user', content: entry.content });
230
+ if (conversationId) {
231
+ db.prepare('INSERT INTO conversation_messages (conversation_id, role, content) VALUES (?, ?, ?)')
232
+ .run(conversationId, 'user', entry.content);
233
+ }
234
+ }
235
+
236
+ this.emit(userId, 'run:steer_applied', {
237
+ runId,
238
+ count: queued.length,
239
+ pendingCount: runMeta.steeringQueue.length,
240
+ latestContent: queued[queued.length - 1]?.content || ''
241
+ });
242
+
243
+ return { messages, appliedCount: queued.length };
244
+ }
245
+
165
246
  isRunStopped(runId) {
166
247
  return this.getRunMeta(runId)?.aborted === true;
167
248
  }
@@ -300,13 +381,16 @@ class AgentEngine {
300
381
  });
301
382
  }
302
383
  };
303
- const { provider, model, providerName } = await getProviderForUser(
384
+ const selectedProvider = await getProviderForUser(
304
385
  userId,
305
386
  userMessage,
306
387
  triggerType === 'subagent',
307
388
  _modelOverride,
308
389
  providerStatusConfig
309
390
  );
391
+ let provider = selectedProvider.provider;
392
+ let model = selectedProvider.model;
393
+ let providerName = selectedProvider.providerName;
310
394
 
311
395
  const runTitle = generateTitle(userMessage);
312
396
  db.prepare(`INSERT OR REPLACE INTO agent_runs(id, user_id, title, status, trigger_type, trigger_source, model)
@@ -317,8 +401,12 @@ class AgentEngine {
317
401
  status: 'running',
318
402
  aborted: false,
319
403
  messagingSent: false,
404
+ triggerType,
405
+ triggerSource,
406
+ startedAt: Date.now(),
320
407
  lastToolName: null,
321
408
  lastToolTarget: null,
409
+ steeringQueue: [],
322
410
  toolPids: new Set()
323
411
  });
324
412
  this.emit(userId, 'run:start', { runId, title: runTitle, model, triggerType, triggerSource });
@@ -368,6 +456,12 @@ class AgentEngine {
368
456
  if (this.isRunStopped(runId)) break;
369
457
  iteration++;
370
458
 
459
+ const steeringAtLoopStart = this.applyQueuedSteering(runId, messages, {
460
+ userId,
461
+ conversationId
462
+ });
463
+ messages = steeringAtLoopStart.messages;
464
+
371
465
  let metrics = this.estimatePromptMetrics(messages, tools);
372
466
  const contextWindow = provider.getContextWindow(model);
373
467
  if (metrics.totalEstimatedTokens > contextWindow * 0.7) {
@@ -381,6 +475,7 @@ class AgentEngine {
381
475
  this.emit(userId, 'run:thinking', { runId, iteration });
382
476
 
383
477
  let response;
478
+ let responseModel = model;
384
479
  let streamContent = '';
385
480
  const callOptions = { model, reasoningEffort: this.getReasoningEffort(providerName, options) };
386
481
 
@@ -391,22 +486,30 @@ class AgentEngine {
391
486
  for await (const chunk of gen) {
392
487
  if (chunk.type === 'content') {
393
488
  streamContent += chunk.content;
394
- this.emit(userId, 'run:stream', { runId, content: streamContent, iteration });
489
+ this.emit(userId, 'run:stream', {
490
+ runId,
491
+ content: sanitizeModelOutput(streamContent, { model }),
492
+ iteration
493
+ });
395
494
  }
396
495
  if (chunk.type === 'done') {
397
496
  response = chunk;
497
+ responseModel = model;
398
498
  }
399
499
  if (chunk.type === 'tool_calls') {
400
500
  response = {
401
501
  content: chunk.content || streamContent,
402
502
  toolCalls: chunk.toolCalls,
503
+ providerContentBlocks: chunk.providerContentBlocks || null,
403
504
  finishReason: 'tool_calls',
404
505
  usage: chunk.usage || null
405
506
  };
507
+ responseModel = model;
406
508
  }
407
509
  }
408
510
  } else {
409
511
  response = await provider.chat(messages, tools, callOptions);
512
+ responseModel = model;
410
513
  }
411
514
  } catch (err) {
412
515
  console.error(`[Engine] Model call failed (${model}):`, err.message);
@@ -419,33 +522,42 @@ class AgentEngine {
419
522
  aiSettings.fallback_model_id,
420
523
  providerStatusConfig
421
524
  );
422
- // Update local state for the retry
423
- const nextProvider = fallback.provider;
424
- const nextModel = fallback.model;
425
- const nextProviderName = fallback.providerName;
525
+ provider = fallback.provider;
526
+ model = fallback.model;
527
+ providerName = fallback.providerName;
426
528
 
427
529
  // Recursive call once
428
- const retryOptions = { ...callOptions, model: nextModel, reasoningEffort: this.getReasoningEffort(nextProviderName, options) };
530
+ const retryOptions = { ...callOptions, model, reasoningEffort: this.getReasoningEffort(providerName, options) };
429
531
 
430
532
  if (options.stream !== false) {
431
- const gen = nextProvider.stream(messages, tools, retryOptions);
533
+ const gen = provider.stream(messages, tools, retryOptions);
432
534
  for await (const chunk of gen) {
433
535
  if (chunk.type === 'content') {
434
536
  streamContent += chunk.content;
435
- this.emit(userId, 'run:stream', { runId, content: streamContent, iteration });
537
+ this.emit(userId, 'run:stream', {
538
+ runId,
539
+ content: sanitizeModelOutput(streamContent, { model }),
540
+ iteration
541
+ });
542
+ }
543
+ if (chunk.type === 'done') {
544
+ response = chunk;
545
+ responseModel = model;
436
546
  }
437
- if (chunk.type === 'done') response = chunk;
438
547
  if (chunk.type === 'tool_calls') {
439
548
  response = {
440
549
  content: chunk.content || streamContent,
441
550
  toolCalls: chunk.toolCalls,
551
+ providerContentBlocks: chunk.providerContentBlocks || null,
442
552
  finishReason: 'tool_calls',
443
553
  usage: chunk.usage || null
444
554
  };
555
+ responseModel = model;
445
556
  }
446
557
  }
447
558
  } else {
448
- response = await nextProvider.chat(messages, tools, retryOptions);
559
+ response = await provider.chat(messages, tools, retryOptions);
560
+ responseModel = model;
449
561
  }
450
562
  } else {
451
563
  throw err;
@@ -463,7 +575,7 @@ class AgentEngine {
463
575
  totalTokens += response.usage.totalTokens || 0;
464
576
  }
465
577
 
466
- lastContent = response.content || streamContent || '';
578
+ lastContent = sanitizeModelOutput(response.content || streamContent || '', { model: responseModel });
467
579
 
468
580
  if ((!response.toolCalls || response.toolCalls.length === 0) && lastContent) {
469
581
  const salvaged = salvageTextToolCalls(lastContent, tools);
@@ -477,6 +589,7 @@ class AgentEngine {
477
589
 
478
590
  const assistantMessage = { role: 'assistant', content: lastContent };
479
591
  if (response.toolCalls?.length) assistantMessage.tool_calls = response.toolCalls;
592
+ if (response.providerContentBlocks?.length) assistantMessage.providerContentBlocks = response.providerContentBlocks;
480
593
  messages.push(assistantMessage);
481
594
 
482
595
  if (conversationId) {
@@ -490,7 +603,19 @@ class AgentEngine {
490
603
  );
491
604
  }
492
605
 
493
- if (!response.toolCalls || response.toolCalls.length === 0) break;
606
+ if (!response.toolCalls || response.toolCalls.length === 0) {
607
+ const steeringAfterResponse = this.applyQueuedSteering(runId, messages, {
608
+ userId,
609
+ conversationId
610
+ });
611
+ messages = steeringAfterResponse.messages;
612
+ if (steeringAfterResponse.appliedCount > 0) {
613
+ iteration = Math.max(0, iteration - 1);
614
+ lastContent = '';
615
+ continue;
616
+ }
617
+ break;
618
+ }
494
619
 
495
620
  for (const toolCall of response.toolCalls) {
496
621
  if (this.isRunStopped(runId)) break;
@@ -583,10 +708,14 @@ class AgentEngine {
583
708
  model,
584
709
  reasoningEffort: this.getReasoningEffort(providerName, options)
585
710
  });
586
- lastContent = finalResponse.content || '';
711
+ lastContent = sanitizeModelOutput(finalResponse.content || '', { model });
587
712
  forcedFinalResponse = true;
588
713
 
589
- messages.push({ role: 'assistant', content: lastContent });
714
+ const finalAssistantMessage = { role: 'assistant', content: lastContent };
715
+ if (finalResponse.providerContentBlocks?.length) {
716
+ finalAssistantMessage.providerContentBlocks = finalResponse.providerContentBlocks;
717
+ }
718
+ messages.push(finalAssistantMessage);
590
719
  if (conversationId) {
591
720
  db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
592
721
  .run(conversationId, 'assistant', lastContent, finalResponse.usage?.totalTokens || 0);
@@ -0,0 +1,67 @@
1
+ const { sanitizeStreamingToolCallText } = require('./toolCallSalvage');
2
+
3
+ const HAN_CHAR_REGEX = /\p{Script=Han}/gu;
4
+ const LATIN_CHAR_REGEX = /\p{Script=Latin}/gu;
5
+ const LETTER_CHAR_REGEX = /\p{L}/gu;
6
+ const HAN_RUN_REGEX = /[\p{Script=Han}\u3000-\u303F]+/gu;
7
+ const MARKDOWN_CODE_SPAN_REGEX = /(```[\s\S]*?```|`[^`\n]+`)/g;
8
+
9
+ function countMatches(text, regex) {
10
+ const matches = text.match(regex);
11
+ return matches ? matches.length : 0;
12
+ }
13
+
14
+ function shouldStripIncidentalHan(text, model) {
15
+ if (model !== 'MiniMax-M2.7') return false;
16
+
17
+ const hanCount = countMatches(text, HAN_CHAR_REGEX);
18
+ if (hanCount === 0) return false;
19
+ if (hanCount > 24) return false;
20
+
21
+ const latinCount = countMatches(text, LATIN_CHAR_REGEX);
22
+ if (latinCount < 20) return false;
23
+
24
+ const letterCount = countMatches(text, LETTER_CHAR_REGEX);
25
+ if (letterCount > 0 && (hanCount / letterCount) > 0.18) return false;
26
+
27
+ return true;
28
+ }
29
+
30
+ function sanitizePlainText(text) {
31
+ return text
32
+ .replace(/([\p{L}\p{N}])[\p{Script=Han}\u3000-\u303F]+([\p{L}\p{N}])/gu, '$1 $2')
33
+ .replace(HAN_RUN_REGEX, '')
34
+ .replace(/[ \t]{2,}/g, ' ')
35
+ .replace(/[ \t]+\n/g, '\n')
36
+ .replace(/\n[ \t]+/g, '\n')
37
+ .replace(/[ \t]+([,.;:!?)\]}])/g, '$1')
38
+ .replace(/([([{])\s+/g, '$1');
39
+ }
40
+
41
+ function sanitizeMarkdownAware(text) {
42
+ return text
43
+ .split(MARKDOWN_CODE_SPAN_REGEX)
44
+ .map((part) => {
45
+ if (!part) return part;
46
+ if (part.startsWith('```') || part.startsWith('`')) return part;
47
+ return sanitizePlainText(part);
48
+ })
49
+ .join('');
50
+ }
51
+
52
+ function sanitizeModelOutput(text, options = {}) {
53
+ if (typeof text !== 'string' || text.length === 0) return text;
54
+
55
+ let sanitized = text;
56
+
57
+ if (options.model === 'MiniMax-M2.7' && (sanitized.includes('<invoke') || sanitized.includes(':tool_call'))) {
58
+ sanitized = sanitizeStreamingToolCallText(sanitized);
59
+ }
60
+
61
+ if (!shouldStripIncidentalHan(sanitized, options.model)) return sanitized;
62
+ return sanitizeMarkdownAware(sanitized);
63
+ }
64
+
65
+ module.exports = {
66
+ sanitizeModelOutput
67
+ };
@@ -37,6 +37,50 @@ class AnthropicProvider extends BaseProvider {
37
37
  }));
38
38
  }
39
39
 
40
+ normalizeContentBlocks(blocks = []) {
41
+ const normalized = [];
42
+
43
+ for (const block of blocks) {
44
+ if (!block || !block.type) continue;
45
+
46
+ if (block.type === 'thinking') {
47
+ normalized.push({
48
+ type: 'thinking',
49
+ thinking: block.thinking || '',
50
+ ...(block.signature ? { signature: block.signature } : {})
51
+ });
52
+ continue;
53
+ }
54
+
55
+ if (block.type === 'redacted_thinking') {
56
+ normalized.push({
57
+ type: 'redacted_thinking',
58
+ data: block.data
59
+ });
60
+ continue;
61
+ }
62
+
63
+ if (block.type === 'text') {
64
+ normalized.push({
65
+ type: 'text',
66
+ text: block.text || ''
67
+ });
68
+ continue;
69
+ }
70
+
71
+ if (block.type === 'tool_use') {
72
+ normalized.push({
73
+ type: 'tool_use',
74
+ id: block.id,
75
+ name: block.name,
76
+ input: block.input || {}
77
+ });
78
+ }
79
+ }
80
+
81
+ return normalized;
82
+ }
83
+
40
84
  convertMessages(messages) {
41
85
  let system = '';
42
86
  const converted = [];
@@ -60,6 +104,14 @@ class AnthropicProvider extends BaseProvider {
60
104
  }
61
105
 
62
106
  if (msg.role === 'assistant' && msg.tool_calls) {
107
+ if (Array.isArray(msg.providerContentBlocks) && msg.providerContentBlocks.length > 0) {
108
+ converted.push({
109
+ role: 'assistant',
110
+ content: this.normalizeContentBlocks(msg.providerContentBlocks)
111
+ });
112
+ continue;
113
+ }
114
+
63
115
  const content = [];
64
116
  if (msg.content) content.push({ type: 'text', text: msg.content });
65
117
  for (const tc of msg.tool_calls) {
@@ -100,6 +152,7 @@ class AnthropicProvider extends BaseProvider {
100
152
 
101
153
  let content = '';
102
154
  const toolCalls = [];
155
+ const providerContentBlocks = this.normalizeContentBlocks(response.content);
103
156
 
104
157
  for (const block of response.content) {
105
158
  if (block.type === 'text') {
@@ -119,6 +172,7 @@ class AnthropicProvider extends BaseProvider {
119
172
  return {
120
173
  content,
121
174
  toolCalls,
175
+ providerContentBlocks,
122
176
  finishReason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop',
123
177
  usage: {
124
178
  promptTokens: response.usage.input_tokens,
@@ -148,31 +202,106 @@ class AnthropicProvider extends BaseProvider {
148
202
  let content = '';
149
203
  let currentToolCalls = [];
150
204
  let currentToolIndex = -1;
205
+ const providerContentBlocks = [];
151
206
 
152
207
  for await (const event of stream) {
153
208
  if (event.type === 'content_block_start') {
154
- if (event.content_block.type === 'tool_use') {
209
+ if (event.content_block.type === 'thinking') {
210
+ providerContentBlocks[event.index] = {
211
+ type: 'thinking',
212
+ thinking: event.content_block.thinking || '',
213
+ signature: event.content_block.signature || ''
214
+ };
215
+ } else if (event.content_block.type === 'redacted_thinking') {
216
+ providerContentBlocks[event.index] = {
217
+ type: 'redacted_thinking',
218
+ data: event.content_block.data
219
+ };
220
+ } else if (event.content_block.type === 'text') {
221
+ providerContentBlocks[event.index] = {
222
+ type: 'text',
223
+ text: event.content_block.text || ''
224
+ };
225
+ } else if (event.content_block.type === 'tool_use') {
155
226
  currentToolIndex++;
156
227
  currentToolCalls.push({
157
228
  id: event.content_block.id,
158
229
  type: 'function',
159
230
  function: { name: event.content_block.name, arguments: '' }
160
231
  });
232
+ providerContentBlocks[event.index] = {
233
+ type: 'tool_use',
234
+ id: event.content_block.id,
235
+ name: event.content_block.name,
236
+ input: {}
237
+ };
161
238
  }
162
239
  } else if (event.type === 'content_block_delta') {
163
240
  if (event.delta.type === 'text_delta') {
164
241
  content += event.delta.text;
242
+ if (providerContentBlocks[event.index]?.type === 'text') {
243
+ providerContentBlocks[event.index].text += event.delta.text;
244
+ }
165
245
  yield { type: 'content', content: event.delta.text };
246
+ } else if (event.delta.type === 'thinking_delta') {
247
+ if (providerContentBlocks[event.index]?.type === 'thinking') {
248
+ providerContentBlocks[event.index].thinking += event.delta.thinking || '';
249
+ }
250
+ } else if (event.delta.type === 'signature_delta') {
251
+ if (providerContentBlocks[event.index]?.type === 'thinking') {
252
+ providerContentBlocks[event.index].signature = event.delta.signature || '';
253
+ }
166
254
  } else if (event.delta.type === 'input_json_delta') {
167
255
  if (currentToolCalls[currentToolIndex]) {
168
256
  currentToolCalls[currentToolIndex].function.arguments += event.delta.partial_json;
169
257
  }
258
+ if (providerContentBlocks[event.index]?.type === 'tool_use') {
259
+ const currentJson = providerContentBlocks[event.index]._inputJson || '';
260
+ providerContentBlocks[event.index]._inputJson = currentJson + (event.delta.partial_json || '');
261
+ }
170
262
  }
171
263
  } else if (event.type === 'message_stop') {
264
+ const normalizedBlocks = providerContentBlocks
265
+ .filter(Boolean)
266
+ .map((block) => {
267
+ if (block.type === 'tool_use') {
268
+ let parsedInput = block.input || {};
269
+ if (typeof block._inputJson === 'string' && block._inputJson.trim()) {
270
+ try {
271
+ parsedInput = JSON.parse(block._inputJson);
272
+ } catch { }
273
+ }
274
+ return {
275
+ type: 'tool_use',
276
+ id: block.id,
277
+ name: block.name,
278
+ input: parsedInput
279
+ };
280
+ }
281
+ if (block.type === 'thinking') {
282
+ return {
283
+ type: 'thinking',
284
+ thinking: block.thinking || '',
285
+ ...(block.signature ? { signature: block.signature } : {})
286
+ };
287
+ }
288
+ if (block.type === 'redacted_thinking') {
289
+ return {
290
+ type: 'redacted_thinking',
291
+ data: block.data
292
+ };
293
+ }
294
+ return {
295
+ type: 'text',
296
+ text: block.text || ''
297
+ };
298
+ });
299
+
172
300
  yield {
173
301
  type: 'done',
174
302
  content,
175
303
  toolCalls: currentToolCalls,
304
+ providerContentBlocks: normalizedBlocks,
176
305
  finishReason: currentToolCalls.length > 0 ? 'tool_calls' : 'stop',
177
306
  usage: null
178
307
  };
@@ -1,4 +1,9 @@
1
1
  class BaseProvider {
2
+ static readImageAsBase64(imagePath) {
3
+ const fs = require('fs');
4
+ return fs.readFileSync(imagePath).toString('base64');
5
+ }
6
+
2
7
  constructor(config = {}) {
3
8
  this.config = config;
4
9
  this.name = 'base';
@@ -36,6 +41,18 @@ class BaseProvider {
36
41
  getContextWindow(model) {
37
42
  return 128000;
38
43
  }
44
+
45
+ supportsVision() {
46
+ return false;
47
+ }
48
+
49
+ getDefaultVisionModel() {
50
+ return null;
51
+ }
52
+
53
+ async analyzeImage(_options = {}) {
54
+ throw new Error(`Provider '${this.name}' does not support image analysis`);
55
+ }
39
56
  }
40
57
 
41
58
  module.exports = { BaseProvider };
@@ -15,6 +15,14 @@ class GrokProvider extends BaseProvider {
15
15
  return 131072; // grok-4 context window
16
16
  }
17
17
 
18
+ supportsVision() {
19
+ return true;
20
+ }
21
+
22
+ getDefaultVisionModel() {
23
+ return 'grok-4.20-beta-latest-non-reasoning';
24
+ }
25
+
18
26
  _buildParams(model, messages, tools, options) {
19
27
  const params = {
20
28
  model,
@@ -116,6 +124,32 @@ class GrokProvider extends BaseProvider {
116
124
  }
117
125
  }));
118
126
  }
127
+
128
+ async analyzeImage(options = {}) {
129
+ const model = options.model || this.getDefaultVisionModel();
130
+ const b64 = BaseProvider.readImageAsBase64(options.imagePath);
131
+ const response = await this.client.chat.completions.create({
132
+ model,
133
+ max_tokens: options.maxTokens || 4096,
134
+ messages: [{
135
+ role: 'user',
136
+ content: [
137
+ { type: 'text', text: options.question || 'Describe this image in detail.' },
138
+ {
139
+ type: 'image_url',
140
+ image_url: {
141
+ url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
142
+ }
143
+ }
144
+ ]
145
+ }]
146
+ });
147
+
148
+ return {
149
+ content: response.choices[0]?.message?.content || '',
150
+ model: response.model || model,
151
+ };
152
+ }
119
153
  }
120
154
 
121
155
  module.exports = { GrokProvider };
@@ -48,6 +48,14 @@ class OpenAIProvider extends BaseProvider {
48
48
  return 128000;
49
49
  }
50
50
 
51
+ supportsVision() {
52
+ return true;
53
+ }
54
+
55
+ getDefaultVisionModel() {
56
+ return 'gpt-4.1-mini';
57
+ }
58
+
51
59
  _buildParams(model, messages, tools, options) {
52
60
  const isReasoning = this.isReasoningModel(model);
53
61
  // Reasoning models (GPT-5, o-series): use developer role for system messages
@@ -163,6 +171,32 @@ class OpenAIProvider extends BaseProvider {
163
171
  }
164
172
  }
165
173
  }
174
+
175
+ async analyzeImage(options = {}) {
176
+ const model = options.model || this.getDefaultVisionModel();
177
+ const b64 = BaseProvider.readImageAsBase64(options.imagePath);
178
+ const response = await this.client.chat.completions.create({
179
+ model,
180
+ max_tokens: options.maxTokens || 4096,
181
+ messages: [{
182
+ role: 'user',
183
+ content: [
184
+ { type: 'text', text: options.question || 'Describe this image in detail.' },
185
+ {
186
+ type: 'image_url',
187
+ image_url: {
188
+ url: `data:${options.mimeType || 'image/jpeg'};base64,${b64}`
189
+ }
190
+ }
191
+ ]
192
+ }]
193
+ });
194
+
195
+ return {
196
+ content: response.choices[0]?.message?.content || '',
197
+ model: response.model || model,
198
+ };
199
+ }
166
200
  }
167
201
 
168
202
  module.exports = { OpenAIProvider };
@@ -41,6 +41,7 @@ When prior context makes the goal clear, act on it. Only ask a clarifying questi
41
41
 
42
42
  REPORT ACTUAL RESULTS
43
43
  When a tool returns data, share the relevant parts — summarized if large, direct if short. Never paste raw JSON as the answer. Never narrate what you're about to do at length before doing it.
44
+ Never promise an action in the final answer unless you already took that action in this run. Do not say "I'll check", "I'll fix it", or "I'll send it" and then stop. Either do it first or say you have not done it yet.
44
45
 
45
46
  DON'T REPEAT YOURSELF
46
47
  State a limitation or error once. If the user pushes back, try a different approach before restating the same failure. Repeating the same dead-end across five messages is useless.