@hamp10/agentforge 0.2.14 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/OllamaAgent.js +182 -213
package/package.json
CHANGED
package/src/OllamaAgent.js
CHANGED
|
@@ -8,106 +8,84 @@ import { fileURLToPath } from 'url';
|
|
|
8
8
|
const execAsync = promisify(exec);
|
|
9
9
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
// Minimal tool definitions — one compact JSON per line, embedded in system prompt.
|
|
12
|
+
// Ollama's `tools` API param is broken for qwen3 (github.com/ollama/ollama/issues/14601).
|
|
13
|
+
// Descriptions kept short to fit within qwen3-vl:8b's 4096 token context.
|
|
14
|
+
const TOOL_DEFS = [
|
|
12
15
|
{
|
|
13
16
|
type: 'function',
|
|
14
17
|
function: {
|
|
15
18
|
name: 'bash',
|
|
16
|
-
description: '
|
|
17
|
-
parameters: {
|
|
18
|
-
type: 'object',
|
|
19
|
-
properties: {
|
|
20
|
-
command: { type: 'string', description: 'The shell command to run' }
|
|
21
|
-
},
|
|
22
|
-
required: ['command']
|
|
23
|
-
}
|
|
19
|
+
description: 'Run a shell command. Returns stdout/stderr.',
|
|
20
|
+
parameters: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }
|
|
24
21
|
}
|
|
25
22
|
},
|
|
26
23
|
{
|
|
27
24
|
type: 'function',
|
|
28
25
|
function: {
|
|
29
26
|
name: 'read_file',
|
|
30
|
-
description: 'Read
|
|
31
|
-
parameters: {
|
|
32
|
-
type: 'object',
|
|
33
|
-
properties: {
|
|
34
|
-
path: { type: 'string', description: 'Path to the file (absolute or relative to workdir)' }
|
|
35
|
-
},
|
|
36
|
-
required: ['path']
|
|
37
|
-
}
|
|
27
|
+
description: 'Read a file.',
|
|
28
|
+
parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
|
|
38
29
|
}
|
|
39
30
|
},
|
|
40
31
|
{
|
|
41
32
|
type: 'function',
|
|
42
33
|
function: {
|
|
43
34
|
name: 'write_file',
|
|
44
|
-
description: 'Write
|
|
45
|
-
parameters: {
|
|
46
|
-
type: 'object',
|
|
47
|
-
properties: {
|
|
48
|
-
path: { type: 'string', description: 'Path to write (absolute or relative to workdir)' },
|
|
49
|
-
content: { type: 'string', description: 'File content to write' }
|
|
50
|
-
},
|
|
51
|
-
required: ['path', 'content']
|
|
52
|
-
}
|
|
35
|
+
description: 'Write a file.',
|
|
36
|
+
parameters: { type: 'object', properties: { path: { type: 'string' }, content: { type: 'string' } }, required: ['path', 'content'] }
|
|
53
37
|
}
|
|
54
38
|
},
|
|
55
39
|
{
|
|
56
40
|
type: 'function',
|
|
57
41
|
function: {
|
|
58
42
|
name: 'list_directory',
|
|
59
|
-
description: 'List files
|
|
60
|
-
parameters: {
|
|
61
|
-
type: 'object',
|
|
62
|
-
properties: {
|
|
63
|
-
path: { type: 'string', description: 'Directory path (absolute or relative to workdir)' }
|
|
64
|
-
},
|
|
65
|
-
required: ['path']
|
|
66
|
-
}
|
|
43
|
+
description: 'List files in a directory.',
|
|
44
|
+
parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
|
|
67
45
|
}
|
|
68
46
|
},
|
|
69
47
|
{
|
|
70
48
|
type: 'function',
|
|
71
49
|
function: {
|
|
72
50
|
name: 'web_fetch',
|
|
73
|
-
description: 'Fetch
|
|
74
|
-
parameters: {
|
|
75
|
-
type: 'object',
|
|
76
|
-
properties: {
|
|
77
|
-
url: { type: 'string', description: 'URL to fetch' }
|
|
78
|
-
},
|
|
79
|
-
required: ['url']
|
|
80
|
-
}
|
|
51
|
+
description: 'Fetch text content from a URL.',
|
|
52
|
+
parameters: { type: 'object', properties: { url: { type: 'string' } }, required: ['url'] }
|
|
81
53
|
}
|
|
82
54
|
},
|
|
83
55
|
{
|
|
84
56
|
type: 'function',
|
|
85
57
|
function: {
|
|
86
58
|
name: 'take_screenshot',
|
|
87
|
-
description: '
|
|
88
|
-
parameters: {
|
|
89
|
-
type: 'object',
|
|
90
|
-
properties: {
|
|
91
|
-
target: {
|
|
92
|
-
type: 'string',
|
|
93
|
-
enum: ['screen', 'browser'],
|
|
94
|
-
description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
|
|
95
|
-
},
|
|
96
|
-
url: {
|
|
97
|
-
type: 'string',
|
|
98
|
-
description: 'Optional: navigate the browser to this URL before taking the screenshot.'
|
|
99
|
-
},
|
|
100
|
-
send_to_user: {
|
|
101
|
-
type: 'boolean',
|
|
102
|
-
description: 'If true, send the screenshot to the user\'s chat. Only set this when the user explicitly asked to see a screenshot or visual output.'
|
|
103
|
-
}
|
|
104
|
-
},
|
|
105
|
-
required: ['target']
|
|
106
|
-
}
|
|
59
|
+
description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
|
|
60
|
+
parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
|
|
107
61
|
}
|
|
108
62
|
}
|
|
109
63
|
];
|
|
110
64
|
|
|
65
|
+
// Minimal <tools> XML for system prompt — one compact JSON per line, no outer array.
|
|
66
|
+
// Per qwen3 Hermes chat template (tokenizer_config.json).
|
|
67
|
+
const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Parse <tool_call>...</tool_call> blocks from streamed content.
|
|
71
|
+
* qwen3-vl native format: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
|
|
72
|
+
* Returns array of {name, arguments} or null if no complete tool calls found.
|
|
73
|
+
*/
|
|
74
|
+
function _parseToolCallTags(content) {
|
|
75
|
+
const calls = [];
|
|
76
|
+
const re = /<tool_call>([\s\S]*?)<\/tool_call>/g;
|
|
77
|
+
let match;
|
|
78
|
+
while ((match = re.exec(content)) !== null) {
|
|
79
|
+
try {
|
|
80
|
+
const obj = JSON.parse(match[1].trim());
|
|
81
|
+
const name = obj.name || obj.tool;
|
|
82
|
+
const args = obj.arguments ?? obj.args ?? {};
|
|
83
|
+
if (typeof name === 'string') calls.push({ name, arguments: args });
|
|
84
|
+
} catch {}
|
|
85
|
+
}
|
|
86
|
+
return calls.length > 0 ? calls : null;
|
|
87
|
+
}
|
|
88
|
+
|
|
111
89
|
/**
|
|
112
90
|
* Detect text-based tool calls from model content.
|
|
113
91
|
* qwen3-vl:8b outputs tool calls as JSON in content rather than tool_calls field.
|
|
@@ -254,23 +232,34 @@ export class OllamaAgent extends EventEmitter {
|
|
|
254
232
|
// Load conversation history from disk (session persistence)
|
|
255
233
|
const history = this._loadHistory(agentId, workDir, sessionId);
|
|
256
234
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
235
|
+
// System prompt uses the exact format from qwen3's Hermes chat template.
|
|
236
|
+
// Tools are embedded as <tools> XML — never passed via the API `tools` param (broken in Ollama).
|
|
237
|
+
const systemPrompt = isQwen3
|
|
238
|
+
? [
|
|
239
|
+
'/no_think',
|
|
240
|
+
`You are a helpful assistant. Working directory: ${workDir}`,
|
|
241
|
+
``,
|
|
242
|
+
`# Tools`,
|
|
243
|
+
``,
|
|
244
|
+
`You may call one or more functions to complete the task.`,
|
|
245
|
+
``,
|
|
246
|
+
`You are provided with function signatures within <tools></tools> XML tags:`,
|
|
247
|
+
TOOLS_XML,
|
|
248
|
+
``,
|
|
249
|
+
`For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:`,
|
|
250
|
+
`<tool_call>`,
|
|
251
|
+
`{"name": <function-name>, "arguments": <args-json-object>}`,
|
|
252
|
+
`</tool_call>`,
|
|
253
|
+
``,
|
|
254
|
+
`Rules:`,
|
|
255
|
+
`- Call tools to take actions. Do NOT describe what you would do — just do it.`,
|
|
256
|
+
`- For simple conversation (greetings, questions) respond with plain text, no tools.`,
|
|
257
|
+
`- After finishing, write a brief summary.`,
|
|
258
|
+
].join('\n')
|
|
259
|
+
: [
|
|
260
|
+
`You are a helpful AI agent. Working directory: ${workDir}`,
|
|
261
|
+
`Use the provided tools to complete tasks. Don't describe — act.`,
|
|
262
|
+
].join('\n');
|
|
274
263
|
|
|
275
264
|
const messages = [
|
|
276
265
|
{ role: 'system', content: systemPrompt },
|
|
@@ -300,17 +289,16 @@ export class OllamaAgent extends EventEmitter {
|
|
|
300
289
|
const requestBody = {
|
|
301
290
|
model: effectiveModel,
|
|
302
291
|
messages,
|
|
303
|
-
tools: TOOLS,
|
|
304
|
-
tool_choice: 'auto',
|
|
305
292
|
stream: true,
|
|
293
|
+
// qwen3: tools embedded in system prompt — do NOT pass tools param (broken in Ollama for qwen3)
|
|
294
|
+
// Other models: pass tools normally
|
|
295
|
+
...(!isQwen3 ? { tools: TOOL_DEFS, tool_choice: 'auto' } : {}),
|
|
296
|
+
options: {
|
|
297
|
+
num_ctx: 8192, // explicit context — Ollama defaults to 2048 which is too small
|
|
298
|
+
...(isQwen3 ? { think: false } : {}), // CRITICAL: thinking + tools corrupts template
|
|
299
|
+
},
|
|
306
300
|
};
|
|
307
301
|
|
|
308
|
-
// Disable thinking mode for qwen3 — prevents 3-minute silent think phases
|
|
309
|
-
// and makes tool-call JSON output reliable.
|
|
310
|
-
if (isQwen3) {
|
|
311
|
-
requestBody.options = { think: false };
|
|
312
|
-
}
|
|
313
|
-
|
|
314
302
|
response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
315
303
|
method: 'POST',
|
|
316
304
|
headers: { 'Content-Type': 'application/json' },
|
|
@@ -328,14 +316,15 @@ export class OllamaAgent extends EventEmitter {
|
|
|
328
316
|
}
|
|
329
317
|
|
|
330
318
|
// ── Stream the SSE response ──
|
|
331
|
-
//
|
|
332
|
-
//
|
|
333
|
-
|
|
334
|
-
let
|
|
319
|
+
// For qwen3: model emits text tokens including <tool_call>...</tool_call> blocks.
|
|
320
|
+
// Stream text live to user, but suppress content inside <tool_call> tags.
|
|
321
|
+
// For other models: also handle delta.tool_calls in the standard OpenAI format.
|
|
322
|
+
let streamContent = ''; // full accumulated text (including tool_call tags for qwen3)
|
|
323
|
+
let visibleContent = ''; // text emitted live to user (no tool_call or think blocks)
|
|
324
|
+
let streamToolCalls = {}; // OpenAI-format tool calls (non-qwen3 models)
|
|
335
325
|
let inThinkBlock = false;
|
|
336
|
-
let
|
|
326
|
+
let inToolCallBlock = false; // inside <tool_call>...</tool_call>
|
|
337
327
|
let rawTokenCount = 0;
|
|
338
|
-
let rawThinkChars = 0;
|
|
339
328
|
|
|
340
329
|
const reader = response.body.getReader();
|
|
341
330
|
const decoder = new TextDecoder();
|
|
@@ -348,7 +337,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
348
337
|
|
|
349
338
|
buf += decoder.decode(value, { stream: true });
|
|
350
339
|
const lines = buf.split('\n');
|
|
351
|
-
buf = lines.pop();
|
|
340
|
+
buf = lines.pop();
|
|
352
341
|
|
|
353
342
|
for (const line of lines) {
|
|
354
343
|
if (!line.startsWith('data: ')) continue;
|
|
@@ -360,7 +349,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
360
349
|
const delta = evt.choices?.[0]?.delta;
|
|
361
350
|
if (!delta) continue;
|
|
362
351
|
|
|
363
|
-
//
|
|
352
|
+
// Standard OpenAI tool_calls (non-qwen3 models)
|
|
364
353
|
if (delta.tool_calls) {
|
|
365
354
|
for (const tc of delta.tool_calls) {
|
|
366
355
|
const idx = tc.index ?? 0;
|
|
@@ -371,173 +360,153 @@ export class OllamaAgent extends EventEmitter {
|
|
|
371
360
|
}
|
|
372
361
|
}
|
|
373
362
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
const thinkEnd = thinkBuffer.indexOf('</think>', i);
|
|
396
|
-
if (thinkEnd === -1) {
|
|
397
|
-
// still inside think block, keep buffering
|
|
398
|
-
i = thinkBuffer.length;
|
|
399
|
-
} else {
|
|
400
|
-
inThinkBlock = false;
|
|
401
|
-
i = thinkEnd + 8;
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
thinkBuffer = inThinkBlock ? thinkBuffer.slice(thinkBuffer.lastIndexOf('<think>')) : '';
|
|
406
|
-
|
|
407
|
-
streamContent += out;
|
|
408
|
-
// Stream text tokens live — but only if output clearly isn't JSON tool calls.
|
|
409
|
-
// If the accumulated content starts with '{', it may be a tool call — buffer silently.
|
|
410
|
-
// Otherwise emit immediately so the user sees live output.
|
|
411
|
-
if (out && !streamContent.trimStart().startsWith('{')) {
|
|
412
|
-
this.emit('agent_output', { agentId, output: out, isChunk: true });
|
|
363
|
+
if (!delta.content) continue;
|
|
364
|
+
rawTokenCount++;
|
|
365
|
+
streamContent += delta.content;
|
|
366
|
+
|
|
367
|
+
// Process token through think + tool_call filters, emit visible text live
|
|
368
|
+
// We scan only the new delta token against the current buffer state
|
|
369
|
+
const chunk = delta.content;
|
|
370
|
+
let visible = '';
|
|
371
|
+
// Simple per-token state machine — handles split tags across tokens by tracking state flags
|
|
372
|
+
if (!inThinkBlock && !inToolCallBlock) {
|
|
373
|
+
// Check if this chunk starts a filtered block
|
|
374
|
+
if (streamContent.includes('<think>') && !streamContent.includes('</think>')) {
|
|
375
|
+
inThinkBlock = true;
|
|
376
|
+
// emit text before the <think> tag
|
|
377
|
+
const before = streamContent.lastIndexOf('<think>');
|
|
378
|
+
// already streamed everything before this point; just suppress from here
|
|
379
|
+
} else if (streamContent.includes('<tool_call>') && !streamContent.slice(streamContent.lastIndexOf('<tool_call>')).includes('</tool_call>')) {
|
|
380
|
+
inToolCallBlock = true;
|
|
381
|
+
// Text before <tool_call> on this same token — already emitted or trivial
|
|
382
|
+
} else if (!inThinkBlock && !inToolCallBlock) {
|
|
383
|
+
visible = chunk;
|
|
413
384
|
}
|
|
414
385
|
}
|
|
386
|
+
// Exit think block
|
|
387
|
+
if (inThinkBlock && streamContent.includes('</think>')) {
|
|
388
|
+
inThinkBlock = false;
|
|
389
|
+
}
|
|
390
|
+
// Exit tool_call block
|
|
391
|
+
if (inToolCallBlock && streamContent.slice(streamContent.lastIndexOf('<tool_call>')).includes('</tool_call>')) {
|
|
392
|
+
inToolCallBlock = false;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
if (visible && !inThinkBlock && !inToolCallBlock) {
|
|
396
|
+
visibleContent += visible;
|
|
397
|
+
this.emit('agent_output', { agentId, output: visible, isChunk: true });
|
|
398
|
+
}
|
|
415
399
|
}
|
|
416
400
|
}
|
|
417
401
|
|
|
418
|
-
console.log(` [${agentId}] 📊 Stream done: ${rawTokenCount} tokens, ${streamContent.length}
|
|
402
|
+
console.log(` [${agentId}] 📊 Stream done: ${rawTokenCount} tokens, ${streamContent.length} chars, ${visibleContent.length} visible, apiToolCalls=${Object.keys(streamToolCalls).length}`);
|
|
419
403
|
if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
|
|
420
404
|
|
|
421
|
-
//
|
|
422
|
-
//
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
// Don't emit here — detection block below handles it
|
|
405
|
+
// ── Extract tool calls from content ───────────────────────────────────
|
|
406
|
+
// For qwen3: parse <tool_call> XML tags from full streamed content.
|
|
407
|
+
// For others: use API-level tool_calls already accumulated above.
|
|
408
|
+
let parsedTagCalls = null;
|
|
409
|
+
if (isQwen3 && Object.keys(streamToolCalls).length === 0) {
|
|
410
|
+
parsedTagCalls = _parseToolCallTags(streamContent);
|
|
411
|
+
if (parsedTagCalls) {
|
|
412
|
+
console.log(` [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
|
|
430
413
|
}
|
|
431
414
|
}
|
|
432
415
|
|
|
433
|
-
//
|
|
434
|
-
|
|
435
|
-
// If detected, convert to streamToolCalls so they actually execute.
|
|
436
|
-
// If not tool calls, content was already streamed live token-by-token above.
|
|
437
|
-
if (Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
416
|
+
// Fallback: try legacy JSON-blob detection if no tags found
|
|
417
|
+
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
438
418
|
const textCalls = _parseTextToolCalls(streamContent);
|
|
439
419
|
if (textCalls) {
|
|
440
|
-
console.log(` [${agentId}] 🔍 ${textCalls.length} text
|
|
441
|
-
|
|
442
|
-
streamToolCalls[i] = {
|
|
443
|
-
id: `text-${i}`,
|
|
444
|
-
type: 'function',
|
|
445
|
-
function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
|
|
446
|
-
};
|
|
447
|
-
});
|
|
448
|
-
streamContent = ''; // Suppress raw JSON from output
|
|
449
|
-
} else {
|
|
450
|
-
// Regular text — already emitted live above, just accumulate
|
|
451
|
-
allOutput += streamContent;
|
|
420
|
+
console.log(` [${agentId}] 🔍 ${textCalls.length} JSON text tool call(s) detected (legacy fallback)`);
|
|
421
|
+
parsedTagCalls = textCalls;
|
|
452
422
|
}
|
|
453
423
|
}
|
|
454
424
|
|
|
425
|
+
// Convert tag/text calls into streamToolCalls structure
|
|
426
|
+
if (parsedTagCalls) {
|
|
427
|
+
parsedTagCalls.forEach((tc, i) => {
|
|
428
|
+
streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
|
|
429
|
+
});
|
|
430
|
+
// Don't accumulate raw tool_call XML as user-visible output
|
|
431
|
+
} else if (visibleContent) {
|
|
432
|
+
allOutput += visibleContent;
|
|
433
|
+
}
|
|
434
|
+
|
|
455
435
|
this.emit('tool_activity', {
|
|
456
436
|
agentId,
|
|
457
437
|
event: 'api_call_end',
|
|
458
438
|
description: `✅ Ollama responded`
|
|
459
439
|
});
|
|
460
440
|
|
|
461
|
-
//
|
|
441
|
+
// ── Push assistant message ────────────────────────────────────────────
|
|
462
442
|
const toolCallsArray = Object.values(streamToolCalls);
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
content: streamContent ||
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
443
|
+
if (isQwen3) {
|
|
444
|
+
// qwen3: assistant message is the raw streamed content (includes <tool_call> tags)
|
|
445
|
+
messages.push({ role: 'assistant', content: streamContent || '' });
|
|
446
|
+
} else {
|
|
447
|
+
messages.push({
|
|
448
|
+
role: 'assistant',
|
|
449
|
+
content: visibleContent || null,
|
|
450
|
+
tool_calls: toolCallsArray.length > 0 ? toolCallsArray : undefined
|
|
451
|
+
});
|
|
452
|
+
}
|
|
470
453
|
|
|
471
|
-
// ──
|
|
472
|
-
if (
|
|
473
|
-
for (const toolCall of
|
|
454
|
+
// ── Execute tool calls ────────────────────────────────────────────────
|
|
455
|
+
if (toolCallsArray.length > 0) {
|
|
456
|
+
for (const toolCall of toolCallsArray) {
|
|
474
457
|
if (controller.signal.aborted) break;
|
|
475
458
|
|
|
476
459
|
const { name, arguments: args } = toolCall.function;
|
|
477
|
-
|
|
460
|
+
let parsedArgs;
|
|
461
|
+
try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
|
|
462
|
+
catch { parsedArgs = {}; }
|
|
478
463
|
|
|
479
464
|
this.emit('tool_activity', {
|
|
480
|
-
agentId,
|
|
481
|
-
event: 'tool_start',
|
|
482
|
-
tool: name,
|
|
465
|
+
agentId, event: 'tool_start', tool: name,
|
|
483
466
|
description: this._toolDesc(name, parsedArgs)
|
|
484
467
|
});
|
|
485
|
-
|
|
486
468
|
console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
|
|
487
469
|
toolsUsed.push(name);
|
|
488
470
|
|
|
489
471
|
const result = await this._executeTool(name, parsedArgs, workDir);
|
|
490
472
|
|
|
491
|
-
this.emit('tool_activity', {
|
|
492
|
-
agentId,
|
|
493
|
-
event: 'tool_end',
|
|
494
|
-
tool: name,
|
|
495
|
-
description: `✓ ${name}`
|
|
496
|
-
});
|
|
473
|
+
this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
|
|
497
474
|
|
|
498
|
-
// If the tool returned an image (base64), push it as a vision message
|
|
499
|
-
// so the model can actually see what was captured.
|
|
500
|
-
// Also forward to dashboard so the user sees the screenshot in chat.
|
|
501
475
|
const isImageResult = typeof result === 'string' && result.startsWith('data:image/');
|
|
502
476
|
if (isImageResult && parsedArgs.send_to_user === true) {
|
|
503
477
|
this.emit('agent_image', { agentId, image: result });
|
|
504
478
|
}
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
role: 'user',
|
|
514
|
-
|
|
515
|
-
images: [base64]
|
|
516
|
-
});
|
|
479
|
+
|
|
480
|
+
if (isQwen3) {
|
|
481
|
+
// qwen3 format: tool results go back as user messages with <tool_response> tags
|
|
482
|
+
if (isImageResult && isVision) {
|
|
483
|
+
const base64 = result.replace(/^data:image\/\w+;base64,/, '');
|
|
484
|
+
messages.push({ role: 'user', content: '<tool_response>\n[Screenshot captured]\n</tool_response>', images: [base64] });
|
|
485
|
+
} else {
|
|
486
|
+
const resultText = isImageResult ? '[Screenshot captured — vision model needed to analyze]' : String(result).slice(0, 8000);
|
|
487
|
+
messages.push({ role: 'user', content: `<tool_response>\n${resultText}\n</tool_response>` });
|
|
488
|
+
}
|
|
517
489
|
} else {
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
tool_call_id: toolCall.id || undefined,
|
|
521
|
-
|
|
522
|
-
|
|
490
|
+
// Standard OpenAI format
|
|
491
|
+
if (isImageResult && isVision) {
|
|
492
|
+
messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: '[Screenshot captured — see image attached]' });
|
|
493
|
+
const base64 = result.replace(/^data:image\/\w+;base64,/, '');
|
|
494
|
+
messages.push({ role: 'user', content: 'Here is the screenshot:', images: [base64] });
|
|
495
|
+
} else {
|
|
496
|
+
messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: isImageResult ? '[Screenshot captured]' : String(result).slice(0, 8000) });
|
|
497
|
+
}
|
|
523
498
|
}
|
|
524
499
|
}
|
|
525
|
-
//
|
|
526
|
-
continue;
|
|
500
|
+
continue; // loop back for next model turn
|
|
527
501
|
}
|
|
528
502
|
|
|
529
|
-
// ── No tool calls: final answer
|
|
530
|
-
if (
|
|
531
|
-
finalContent = streamContent;
|
|
532
|
-
}
|
|
503
|
+
// ── No tool calls: final answer ───────────────────────────────────────
|
|
504
|
+
if (visibleContent) finalContent = visibleContent;
|
|
533
505
|
break;
|
|
534
506
|
|
|
535
507
|
}
|
|
536
508
|
|
|
537
|
-
|
|
538
|
-
if (!finalContent && allOutput) {
|
|
539
|
-
finalContent = allOutput;
|
|
540
|
-
}
|
|
509
|
+
if (!finalContent && allOutput) finalContent = allOutput;
|
|
541
510
|
|
|
542
511
|
// If still no output (model did only tool calls, never wrote text), ask for a summary.
|
|
543
512
|
// Use only the last 6 messages to avoid context overflow after many tool-call turns.
|