@hamp10/agentforge 0.2.15 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +25 -2
- package/package.json +5 -1
- package/scripts/postinstall.js +62 -0
- package/src/OllamaAgent.js +938 -252
- package/src/hampagent/browser.js +209 -73
- package/src/selfUpdate.js +7 -2
- package/src/worker.js +68 -36
- package/templates/agent/AGENTFORGE.md +120 -0
package/src/OllamaAgent.js
CHANGED
|
@@ -1,123 +1,99 @@
|
|
|
1
1
|
import { exec } from 'child_process';
|
|
2
|
-
import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
|
|
2
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
4
|
import path from 'path';
|
|
5
5
|
import { promisify } from 'util';
|
|
6
6
|
import { fileURLToPath } from 'url';
|
|
7
|
+
import { browserAction } from './hampagent/browser.js';
|
|
7
8
|
|
|
8
9
|
const execAsync = promisify(exec);
|
|
9
10
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
11
|
|
|
11
|
-
//
|
|
12
|
-
//
|
|
12
|
+
// ── Worker log file — always write to /tmp/agentforge/worker.log so logs are
|
|
13
|
+
// accessible remotely via SSH regardless of how the worker was started.
|
|
14
|
+
const WORKER_LOG = '/tmp/agentforge/worker.log';
|
|
15
|
+
try { mkdirSync('/tmp/agentforge', { recursive: true }); } catch {}
|
|
16
|
+
const _origLog = console.log.bind(console);
|
|
17
|
+
console.log = (...args) => {
|
|
18
|
+
_origLog(...args);
|
|
19
|
+
try {
|
|
20
|
+
const line = args.map(a => (typeof a === 'object' ? JSON.stringify(a) : String(a))).join(' ');
|
|
21
|
+
appendFileSync(WORKER_LOG, `${new Date().toISOString()} ${line}\n`);
|
|
22
|
+
} catch {}
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
// Minimal tool definitions — one compact JSON per line, embedded in system prompt.
|
|
26
|
+
// Ollama's `tools` API param is unreliable — tools are injected as text in the system prompt.
|
|
27
|
+
// Descriptions kept short to fit within a 4096 token context window.
|
|
13
28
|
const TOOL_DEFS = [
|
|
14
29
|
{
|
|
15
30
|
type: 'function',
|
|
16
31
|
function: {
|
|
17
32
|
name: 'bash',
|
|
18
|
-
description: '
|
|
19
|
-
parameters: {
|
|
20
|
-
type: 'object',
|
|
21
|
-
properties: {
|
|
22
|
-
command: { type: 'string', description: 'The shell command to run' }
|
|
23
|
-
},
|
|
24
|
-
required: ['command']
|
|
25
|
-
}
|
|
33
|
+
description: 'Run a shell command. Returns stdout/stderr.',
|
|
34
|
+
parameters: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }
|
|
26
35
|
}
|
|
27
36
|
},
|
|
28
37
|
{
|
|
29
38
|
type: 'function',
|
|
30
39
|
function: {
|
|
31
40
|
name: 'read_file',
|
|
32
|
-
description: 'Read
|
|
33
|
-
parameters: {
|
|
34
|
-
type: 'object',
|
|
35
|
-
properties: {
|
|
36
|
-
path: { type: 'string', description: 'Path to the file (absolute or relative to workdir)' }
|
|
37
|
-
},
|
|
38
|
-
required: ['path']
|
|
39
|
-
}
|
|
41
|
+
description: 'Read a file.',
|
|
42
|
+
parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
|
|
40
43
|
}
|
|
41
44
|
},
|
|
42
45
|
{
|
|
43
46
|
type: 'function',
|
|
44
47
|
function: {
|
|
45
48
|
name: 'write_file',
|
|
46
|
-
description: 'Write
|
|
47
|
-
parameters: {
|
|
48
|
-
type: 'object',
|
|
49
|
-
properties: {
|
|
50
|
-
path: { type: 'string', description: 'Path to write (absolute or relative to workdir)' },
|
|
51
|
-
content: { type: 'string', description: 'File content to write' }
|
|
52
|
-
},
|
|
53
|
-
required: ['path', 'content']
|
|
54
|
-
}
|
|
49
|
+
description: 'Write a file.',
|
|
50
|
+
parameters: { type: 'object', properties: { path: { type: 'string' }, content: { type: 'string' } }, required: ['path', 'content'] }
|
|
55
51
|
}
|
|
56
52
|
},
|
|
57
53
|
{
|
|
58
54
|
type: 'function',
|
|
59
55
|
function: {
|
|
60
56
|
name: 'list_directory',
|
|
61
|
-
description: 'List files
|
|
62
|
-
parameters: {
|
|
63
|
-
type: 'object',
|
|
64
|
-
properties: {
|
|
65
|
-
path: { type: 'string', description: 'Directory path (absolute or relative to workdir)' }
|
|
66
|
-
},
|
|
67
|
-
required: ['path']
|
|
68
|
-
}
|
|
57
|
+
description: 'List files in a directory.',
|
|
58
|
+
parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
|
|
69
59
|
}
|
|
70
60
|
},
|
|
71
61
|
{
|
|
72
62
|
type: 'function',
|
|
73
63
|
function: {
|
|
74
64
|
name: 'web_fetch',
|
|
75
|
-
description: 'Fetch
|
|
76
|
-
parameters: {
|
|
77
|
-
type: 'object',
|
|
78
|
-
properties: {
|
|
79
|
-
url: { type: 'string', description: 'URL to fetch' }
|
|
80
|
-
},
|
|
81
|
-
required: ['url']
|
|
82
|
-
}
|
|
65
|
+
description: 'Fetch text content from a URL.',
|
|
66
|
+
parameters: { type: 'object', properties: { url: { type: 'string' } }, required: ['url'] }
|
|
83
67
|
}
|
|
84
68
|
},
|
|
85
69
|
{
|
|
86
70
|
type: 'function',
|
|
87
71
|
function: {
|
|
88
72
|
name: 'take_screenshot',
|
|
89
|
-
description: '
|
|
90
|
-
parameters: {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
type: 'boolean',
|
|
104
|
-
description: 'If true, send the screenshot to the user\'s chat. Only set this when the user explicitly asked to see a screenshot or visual output.'
|
|
105
|
-
}
|
|
106
|
-
},
|
|
107
|
-
required: ['target']
|
|
108
|
-
}
|
|
73
|
+
description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
|
|
74
|
+
parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
type: 'function',
|
|
79
|
+
function: {
|
|
80
|
+
name: 'screenshot_and_describe',
|
|
81
|
+
description: 'Screenshot a URL and get AI visual analysis. Use after building any web app to verify it looks correct before reporting done. Set send_to_user:true to show the screenshot to the user in chat.',
|
|
82
|
+
parameters: { type: 'object', properties: {
|
|
83
|
+
url: { type: 'string', description: 'URL to screenshot (e.g. http://localhost:3458)' },
|
|
84
|
+
check_for: { type: 'string', description: 'What should be visible (e.g. "snake game with canvas, scoreboard, and game controls")' },
|
|
85
|
+
send_to_user: { type: 'boolean', description: 'Send screenshot image to user in chat (true when confirmed working)' }
|
|
86
|
+
}, required: ['url'] }
|
|
109
87
|
}
|
|
110
88
|
}
|
|
111
89
|
];
|
|
112
90
|
|
|
113
|
-
//
|
|
114
|
-
|
|
115
|
-
// The reliable fix is to embed tool definitions directly in the system prompt as XML.
|
|
116
|
-
const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t)).join('\n')}\n</tools>`;
|
|
91
|
+
// Minimal <tools> XML for system prompt — one compact JSON per line, no outer array.
|
|
92
|
+
const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
|
|
117
93
|
|
|
118
94
|
/**
|
|
119
95
|
* Parse <tool_call>...</tool_call> blocks from streamed content.
|
|
120
|
-
*
|
|
96
|
+
* Some models emit: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
|
|
121
97
|
* Returns array of {name, arguments} or null if no complete tool calls found.
|
|
122
98
|
*/
|
|
123
99
|
function _parseToolCallTags(content) {
|
|
@@ -135,67 +111,170 @@ function _parseToolCallTags(content) {
|
|
|
135
111
|
return calls.length > 0 ? calls : null;
|
|
136
112
|
}
|
|
137
113
|
|
|
114
|
+
/**
|
|
115
|
+
* Parse WRITE_FILE code-fence format.
|
|
116
|
+
* Models struggle to JSON-escape large code files (unescaped quotes break JSON.parse).
|
|
117
|
+
* This format avoids the problem: path on the first line, raw content in a code fence.
|
|
118
|
+
*
|
|
119
|
+
* Accepted formats:
|
|
120
|
+
* WRITE_FILE /abs/path/to/file.js
|
|
121
|
+
* ```
|
|
122
|
+
* ...raw content, no escaping needed...
|
|
123
|
+
* ```
|
|
124
|
+
*
|
|
125
|
+
* write_file: /abs/path/to/file.js
|
|
126
|
+
* ```javascript
|
|
127
|
+
* ...content...
|
|
128
|
+
* ```
|
|
129
|
+
*
|
|
130
|
+
* Returns array of {name, arguments} or null if no matches found.
|
|
131
|
+
*/
|
|
132
|
+
function _parseWriteFileFences(content) {
|
|
133
|
+
if (!content) return null;
|
|
134
|
+
const calls = [];
|
|
135
|
+
// Match WRITE_FILE <path> or write_file: <path> followed by a code fence
|
|
136
|
+
const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
|
|
137
|
+
let m;
|
|
138
|
+
while ((m = re.exec(content)) !== null) {
|
|
139
|
+
const filePath = m[1].trim();
|
|
140
|
+
const fileContent = m[2]; // raw content, no unescaping needed
|
|
141
|
+
if (filePath && fileContent !== undefined) {
|
|
142
|
+
calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return calls.length > 0 ? calls : null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Fallback: parse "Writing filename...\n```lang\ncontent\n```" code blocks.
|
|
150
|
+
* Many local models ignore the WRITE_FILE instruction and use raw markdown blocks.
|
|
151
|
+
* Extract the filename from the "Writing X..." line and write the file to the project dir.
|
|
152
|
+
* Project dir is inferred from the most recent "mkdir -p /path" in the content.
|
|
153
|
+
*/
|
|
154
|
+
function _parseWritingFallback(content, workDir) {
|
|
155
|
+
if (!content) return null;
|
|
156
|
+
const calls = [];
|
|
157
|
+
|
|
158
|
+
// Infer project dir from last mkdir -p command in the stream
|
|
159
|
+
let projectDir = workDir;
|
|
160
|
+
const mkdirMatches = [...content.matchAll(/mkdir\s+-p\s+"?([^"\n]+)"?/g)];
|
|
161
|
+
if (mkdirMatches.length > 0) {
|
|
162
|
+
const lastMkdir = mkdirMatches[mkdirMatches.length - 1];
|
|
163
|
+
const candidate = lastMkdir[1].trim().replace(/~/, process.env.HOME || '/tmp');
|
|
164
|
+
if (candidate && !candidate.includes('$')) projectDir = candidate;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Match: "Writing filename...\n```lang\ncontent\n```"
|
|
168
|
+
const re = /Writing\s+([\w./\-]+?)(?:\.{3})?\s*\n```[^\n]*\n([\s\S]*?)```(?:\n|$)/gi;
|
|
169
|
+
let m;
|
|
170
|
+
while ((m = re.exec(content)) !== null) {
|
|
171
|
+
const filename = m[1].trim();
|
|
172
|
+
const fileContent = m[2];
|
|
173
|
+
if (!filename || fileContent === undefined) continue;
|
|
174
|
+
// Skip if this is just a status echo with no real code
|
|
175
|
+
if (fileContent.trim().length < 5) continue;
|
|
176
|
+
const filePath = filename.startsWith('/') ? filename : `${projectDir}/${filename}`;
|
|
177
|
+
calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
|
|
178
|
+
}
|
|
179
|
+
return calls.length > 0 ? calls : null;
|
|
180
|
+
}
|
|
181
|
+
|
|
138
182
|
/**
|
|
139
183
|
* Detect text-based tool calls from model content.
|
|
140
|
-
*
|
|
184
|
+
* Models that don't use native tool_calls emit JSON in their text content instead.
|
|
141
185
|
* Supports two schemas:
|
|
142
186
|
* - {name, arguments} (OpenAI-style)
|
|
143
|
-
* - {tool, args} (
|
|
144
|
-
* Supports
|
|
145
|
-
*
|
|
187
|
+
* - {tool, args} (alternate style)
|
|
188
|
+
* Supports:
|
|
189
|
+
* - Pure JSON (whole content is one or more JSON objects)
|
|
190
|
+
* - Mixed: "Status line\n{json}" — narration before the tool call JSON
|
|
191
|
+
* Returns array of {name, arguments} if any tool calls found, else null.
|
|
146
192
|
*/
|
|
147
193
|
function _parseTextToolCalls(content) {
|
|
148
194
|
if (!content) return null;
|
|
149
195
|
const trimmed = content.trim();
|
|
150
|
-
if (!trimmed
|
|
196
|
+
if (!trimmed) return null;
|
|
151
197
|
|
|
152
198
|
// Normalise a single parsed object into {name, arguments}
|
|
199
|
+
// Handles multiple schemas models may emit:
|
|
200
|
+
// {name, arguments} — OpenAI-style (correct)
|
|
201
|
+
// {tool, args} — alternate native style
|
|
202
|
+
// {action:"write_file", path, content} — model shorthand
|
|
203
|
+
// {action:"bash", command} — model shorthand
|
|
204
|
+
// {action:"read_file", path} — model shorthand
|
|
153
205
|
const normalise = (obj) => {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
206
|
+
try {
|
|
207
|
+
if (typeof obj.name === 'string' && obj.arguments !== undefined) {
|
|
208
|
+
const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
|
|
209
|
+
return { name: obj.name, arguments: args };
|
|
210
|
+
}
|
|
211
|
+
if (typeof obj.tool === 'string' && obj.args !== undefined) {
|
|
212
|
+
return { name: obj.tool, arguments: obj.args };
|
|
213
|
+
}
|
|
214
|
+
// Handle {action, ...} shorthand the model sometimes emits
|
|
215
|
+
if (typeof obj.action === 'string') {
|
|
216
|
+
const action = obj.action.toLowerCase().replace(/[ -]/g, '_');
|
|
217
|
+
// Map common action names to tool names
|
|
218
|
+
const toolName = action === 'write' ? 'write_file'
|
|
219
|
+
: action === 'read' ? 'read_file'
|
|
220
|
+
: action === 'list' ? 'list_directory'
|
|
221
|
+
: action === 'run' || action === 'execute' || action === 'exec' ? 'bash'
|
|
222
|
+
: action; // use as-is (write_file, bash, read_file, etc.)
|
|
223
|
+
const args = {};
|
|
224
|
+
if (obj.path !== undefined) args.path = obj.path;
|
|
225
|
+
if (obj.content !== undefined) args.content = obj.content;
|
|
226
|
+
if (obj.command !== undefined) args.command = obj.command;
|
|
227
|
+
if (obj.url !== undefined) args.url = obj.url;
|
|
228
|
+
if (obj.target !== undefined) args.target = obj.target;
|
|
229
|
+
if (Object.keys(args).length > 0) return { name: toolName, arguments: args };
|
|
230
|
+
}
|
|
231
|
+
} catch {}
|
|
161
232
|
return null;
|
|
162
233
|
};
|
|
163
234
|
|
|
164
|
-
//
|
|
165
|
-
|
|
166
|
-
const obj = JSON.parse(trimmed);
|
|
167
|
-
if (Array.isArray(obj)) {
|
|
168
|
-
const calls = obj.map(normalise);
|
|
169
|
-
if (calls.every(Boolean)) return calls;
|
|
170
|
-
return null;
|
|
171
|
-
}
|
|
172
|
-
const call = normalise(obj);
|
|
173
|
-
if (call) return [call];
|
|
174
|
-
return null;
|
|
175
|
-
} catch {}
|
|
176
|
-
|
|
177
|
-
// Try extracting multiple top-level JSON objects (separated by newlines/whitespace)
|
|
235
|
+
// Extract all JSON objects that start at the beginning of a line
|
|
236
|
+
// This handles both pure-JSON responses and "narration\n{json}" mixed responses
|
|
178
237
|
const calls = [];
|
|
238
|
+
const lines = trimmed.split('\n');
|
|
179
239
|
let i = 0;
|
|
180
|
-
while (i <
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
240
|
+
while (i < lines.length) {
|
|
241
|
+
const line = lines[i].trim();
|
|
242
|
+
if (line.startsWith('{') || line.startsWith('[')) {
|
|
243
|
+
// Accumulate lines until we have a complete JSON object (handles multi-line JSON)
|
|
244
|
+
// Skips { } [ ] inside JSON strings so CSS/HTML brace counts don't confuse the parser.
|
|
245
|
+
let jsonStr = '';
|
|
246
|
+
let depth = 0;
|
|
247
|
+
while (i < lines.length) {
|
|
248
|
+
const l = lines[i];
|
|
249
|
+
jsonStr += (jsonStr ? '\n' : '') + l;
|
|
250
|
+
let inString = false, escape = false;
|
|
251
|
+
for (const ch of l) {
|
|
252
|
+
if (escape) { escape = false; continue; }
|
|
253
|
+
if (ch === '\\' && inString) { escape = true; continue; }
|
|
254
|
+
if (ch === '"') { inString = !inString; continue; }
|
|
255
|
+
if (!inString) {
|
|
256
|
+
if (ch === '{' || ch === '[') depth++;
|
|
257
|
+
else if (ch === '}' || ch === ']') depth--;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
i++;
|
|
261
|
+
if (depth === 0 && jsonStr.trim()) break;
|
|
262
|
+
}
|
|
263
|
+
try {
|
|
264
|
+
const obj = JSON.parse(jsonStr.trim());
|
|
265
|
+
if (Array.isArray(obj)) {
|
|
266
|
+
for (const item of obj) {
|
|
267
|
+
const call = normalise(item);
|
|
268
|
+
if (call) calls.push(call);
|
|
269
|
+
}
|
|
270
|
+
} else {
|
|
271
|
+
const call = normalise(obj);
|
|
272
|
+
if (call) calls.push(call);
|
|
273
|
+
}
|
|
274
|
+
} catch {}
|
|
275
|
+
} else {
|
|
276
|
+
i++;
|
|
191
277
|
}
|
|
192
|
-
try {
|
|
193
|
-
const obj = JSON.parse(trimmed.slice(i, j));
|
|
194
|
-
const call = normalise(obj);
|
|
195
|
-
if (!call) return null;
|
|
196
|
-
calls.push(call);
|
|
197
|
-
i = j;
|
|
198
|
-
} catch { return null; }
|
|
199
278
|
}
|
|
200
279
|
return calls.length > 0 ? calls : null;
|
|
201
280
|
}
|
|
@@ -255,13 +334,13 @@ export class OllamaAgent extends EventEmitter {
|
|
|
255
334
|
return { agentId, workDir };
|
|
256
335
|
}
|
|
257
336
|
|
|
258
|
-
async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null) {
|
|
337
|
+
async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
|
|
259
338
|
const startTime = Date.now();
|
|
260
339
|
const controller = new AbortController();
|
|
261
340
|
|
|
262
341
|
// Use per-agent model override if provided (and not the placeholder 'Default').
|
|
263
|
-
// Strip 'ollama/' prefix — catalog returns IDs like 'ollama/
|
|
264
|
-
// Ollama's API expects bare names like '
|
|
342
|
+
// Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
|
|
343
|
+
// Ollama's API expects bare names like 'modelname:tag'.
|
|
265
344
|
const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
|
|
266
345
|
const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
|
|
267
346
|
|
|
@@ -273,69 +352,153 @@ export class OllamaAgent extends EventEmitter {
|
|
|
273
352
|
console.log(` Task: ${task}`);
|
|
274
353
|
console.log(` Working dir: ${workDir}`);
|
|
275
354
|
|
|
276
|
-
// Detect model capabilities
|
|
277
|
-
const isQwen3 = effectiveModel.startsWith('qwen3');
|
|
278
|
-
const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(effectiveModel);
|
|
279
|
-
|
|
280
355
|
try {
|
|
281
|
-
// Load conversation history
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
356
|
+
// Load conversation history — prefer Railway DB history (sent via task payload, works across
|
|
357
|
+
// any machine/user/model). Fall back to local file for offline or pre-fix sessions.
|
|
358
|
+
const history = (conversationHistory && conversationHistory.length > 0)
|
|
359
|
+
? conversationHistory.slice(-20)
|
|
360
|
+
: this._loadHistory(agentId, workDir, sessionId);
|
|
361
|
+
|
|
362
|
+
// Text-based tool format is used rather than XML schemas — more reliable across models.
|
|
363
|
+
// Use flow's custom system prompt if provided, otherwise fall back to built-in default.
|
|
364
|
+
// ALL models get the same rule set and tool format — no model-specific branching.
|
|
365
|
+
const homeDir = process.env.HOME || '/tmp';
|
|
366
|
+
const projectsDir = `${homeDir}/Desktop/Projects`;
|
|
367
|
+
const universalRules = `
|
|
368
|
+
== WHAT YOU CAN DO ==
|
|
369
|
+
You have these tools:
|
|
370
|
+
|
|
371
|
+
bash: Run any shell command — file ops, servers, packages, logs, system queries.
|
|
372
|
+
read_file: Read a local file.
|
|
373
|
+
WRITE_FILE: Write a local file (code-fence format only).
|
|
374
|
+
list_directory: List a local directory.
|
|
375
|
+
web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
|
|
376
|
+
screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
|
|
377
|
+
browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
|
|
378
|
+
|
|
379
|
+
BROWSER TOOL — use this instead of writing CDP scripts:
|
|
380
|
+
{"name":"browser","arguments":{"action":"tabs"}} ← list ALL open tabs with URLs (DO THIS FIRST)
|
|
381
|
+
{"name":"browser","arguments":{"action":"snapshot"}} ← read current page content + interactive elements (also shows all tabs)
|
|
382
|
+
{"name":"browser","arguments":{"action":"navigate","url":"https://..."}} ← go to URL
|
|
383
|
+
{"name":"browser","arguments":{"action":"focus","url":"expireddomains"}} ← switch to a tab by URL fragment
|
|
384
|
+
{"name":"browser","arguments":{"action":"click","ref":3}} ← click element by index from snapshot
|
|
385
|
+
{"name":"browser","arguments":{"action":"click","text":"Show Filter"}} ← click element by visible text
|
|
386
|
+
{"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}} ← click by CSS selector
|
|
387
|
+
{"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
|
|
388
|
+
{"name":"browser","arguments":{"action":"screenshot"}} ← take screenshot
|
|
389
|
+
{"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
|
|
390
|
+
{"name":"browser","arguments":{"action":"scroll","y":400}} ← scroll down
|
|
391
|
+
|
|
392
|
+
WORKFLOW when user says "the tab is already open":
|
|
393
|
+
1. browser tabs → see ALL open tabs and their URLs
|
|
394
|
+
2. browser focus with the URL fragment of the tab you need (e.g. "expireddomains")
|
|
395
|
+
3. browser snapshot → read page content and get element indices
|
|
396
|
+
4. browser click to interact (by ref index, by text, or by selector)
|
|
397
|
+
5. browser snapshot again to see result
|
|
398
|
+
The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
|
|
399
|
+
|
|
400
|
+
== GENERAL RULES (all tasks) ==
|
|
401
|
+
G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
|
|
402
|
+
G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
|
|
403
|
+
G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
|
|
404
|
+
G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
|
|
405
|
+
G5. IF A TOOL FAILS: Try a different approach. web_fetch empty → screenshot_and_describe. Never repeat a failing call identically.
|
|
406
|
+
G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
|
|
407
|
+
G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
|
|
408
|
+
G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
|
|
409
|
+
G9. KEEP GOING until the task is fully complete.
|
|
410
|
+
|
|
411
|
+
== BUILD RULES (only when building apps/games/tools) ==
|
|
412
|
+
B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores).
|
|
413
|
+
B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
|
|
414
|
+
B3. BUILD FILE BY FILE — write each file completely before writing the next.
|
|
415
|
+
B4. ALWAYS use absolute paths.
|
|
416
|
+
B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
|
|
417
|
+
B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
|
|
418
|
+
B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT — if 000, check /tmp/server.log and fix the error.
|
|
419
|
+
B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
|
|
420
|
+
B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
|
|
421
|
+
B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
|
|
422
|
+
B11. ALWAYS open the finished app: bash open http://localhost:PORT
|
|
423
|
+
B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
|
|
424
|
+
B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
|
|
425
|
+
B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
|
|
426
|
+
B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
|
|
427
|
+
B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
|
|
428
|
+
// Text-based tool format works reliably across all local models.
|
|
429
|
+
// WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
|
|
430
|
+
const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
|
|
431
|
+
const systemPrompt = customSystemPrompt || jsonToolFormat;
|
|
302
432
|
|
|
303
433
|
const messages = [
|
|
304
434
|
{ role: 'system', content: systemPrompt },
|
|
305
435
|
...history,
|
|
306
436
|
];
|
|
307
437
|
|
|
308
|
-
// Attach initial image
|
|
438
|
+
// Attach initial image if provided — always include it; models that don't support
|
|
439
|
+
// images will ignore the field, and if they error we catch it below.
|
|
309
440
|
const userMessage = { role: 'user', content: task };
|
|
310
|
-
if (image
|
|
441
|
+
if (image) {
|
|
311
442
|
const base64 = image.replace(/^data:image\/\w+;base64,/, '');
|
|
312
443
|
userMessage.images = [base64];
|
|
313
444
|
}
|
|
314
445
|
messages.push(userMessage);
|
|
315
446
|
|
|
447
|
+
// Force-unload any currently loaded model so it reloads with our num_ctx setting.
|
|
448
|
+
// Model-agnostic and machine-agnostic — guarantees 32K context on every task.
|
|
449
|
+
try {
|
|
450
|
+
await fetch(`${this.baseUrl}/api/generate`, {
|
|
451
|
+
method: 'POST', signal: controller.signal,
|
|
452
|
+
headers: { 'Content-Type': 'application/json' },
|
|
453
|
+
body: JSON.stringify({ model: effectiveModel, keep_alive: 0, prompt: '' })
|
|
454
|
+
});
|
|
455
|
+
} catch { /* ignore — model may not be loaded yet */ }
|
|
456
|
+
|
|
316
457
|
let finalContent = '';
|
|
317
458
|
let allOutput = ''; // accumulate everything streamed across all turns
|
|
318
459
|
const toolsUsed = []; // track tool names called (for fallback summary)
|
|
319
|
-
|
|
460
|
+
// No hard turn limit — agent runs until done, loop-detected, or wall-clock timeout.
|
|
461
|
+
const recentCalls = []; // last N tool calls for loop detection
|
|
462
|
+
let emptyRetries = 0; // consecutive empty-response retries
|
|
320
463
|
|
|
321
|
-
for (let turn = 0;
|
|
464
|
+
for (let turn = 0; ; turn++) {
|
|
322
465
|
if (controller.signal.aborted) break;
|
|
323
466
|
|
|
324
467
|
this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
|
|
325
468
|
|
|
469
|
+
// All local Ollama models use the native /api/chat endpoint.
|
|
470
|
+
// The OpenAI-compatible /v1/chat/completions endpoint ignores options.num_ctx,
|
|
471
|
+
// causing all models to run at 4096-token context regardless of what we pass.
|
|
472
|
+
const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
|
|
473
|
+
const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
|
|
474
|
+
|
|
326
475
|
let response;
|
|
327
476
|
try {
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
//
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
477
|
+
|
|
478
|
+
let requestBody;
|
|
479
|
+
let endpoint;
|
|
480
|
+
|
|
481
|
+
if (useNativeEndpoint) {
|
|
482
|
+
// Ollama native format — supports think:false at top level
|
|
483
|
+
endpoint = `${this.baseUrl}/api/chat`;
|
|
484
|
+
requestBody = {
|
|
485
|
+
model: effectiveModel,
|
|
486
|
+
messages,
|
|
487
|
+
stream: true,
|
|
488
|
+
think: false, // top-level think disable — WORKS on native endpoint
|
|
489
|
+
options: { num_ctx: 32768 },
|
|
490
|
+
};
|
|
491
|
+
} else {
|
|
492
|
+
endpoint = `${this.baseUrl}/v1/chat/completions`;
|
|
493
|
+
requestBody = {
|
|
494
|
+
model: effectiveModel,
|
|
495
|
+
messages,
|
|
496
|
+
stream: true,
|
|
497
|
+
options: { num_ctx: 32768 },
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
response = await fetch(endpoint, {
|
|
339
502
|
method: 'POST',
|
|
340
503
|
headers: { 'Content-Type': 'application/json' },
|
|
341
504
|
signal: controller.signal,
|
|
@@ -351,21 +514,30 @@ export class OllamaAgent extends EventEmitter {
|
|
|
351
514
|
throw new Error(`Local model error ${response.status}: ${body}`);
|
|
352
515
|
}
|
|
353
516
|
|
|
354
|
-
// ── Stream the
|
|
355
|
-
//
|
|
356
|
-
//
|
|
357
|
-
//
|
|
358
|
-
|
|
517
|
+
// ── Stream the response ──
|
|
518
|
+
// Two formats:
|
|
519
|
+
// Ollama native (/api/chat): NDJSON lines — {"message":{"content":"..."},"done":false}
|
|
520
|
+
// OpenAI-compatible (/v1/...): SSE lines — data: {"choices":[{"delta":{"content":"..."}}]}
|
|
521
|
+
// Models may emit <tool_call>...</tool_call> or <think>...</think> blocks in text content.
|
|
522
|
+
// Stream text live to user; suppress think blocks and raw JSON tool call blobs.
|
|
523
|
+
let streamContent = ''; // full accumulated text (including any tool_call/think blocks)
|
|
359
524
|
let visibleContent = ''; // text emitted live to user (no tool_call or think blocks)
|
|
360
|
-
let streamToolCalls = {}; // OpenAI-format tool calls
|
|
525
|
+
let streamToolCalls = {}; // OpenAI-format tool calls from native tool_calls field
|
|
361
526
|
let inThinkBlock = false;
|
|
362
527
|
let inToolCallBlock = false; // inside <tool_call>...</tool_call>
|
|
528
|
+
let inJsonBlob = false; // inside bare JSON tool call — suppress from streaming
|
|
529
|
+
let inFenceBlock = false; // inside WRITE_FILE code fence — suppress content from streaming
|
|
530
|
+
let fenceDepth = 0; // ``` count since last WRITE_FILE (even=closed, odd=open)
|
|
363
531
|
let rawTokenCount = 0;
|
|
532
|
+
let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
|
|
364
533
|
|
|
365
534
|
const reader = response.body.getReader();
|
|
366
535
|
const decoder = new TextDecoder();
|
|
367
536
|
let buf = '';
|
|
368
537
|
|
|
538
|
+
// No timeouts — local model can take as long as it needs on any turn.
|
|
539
|
+
// Only the user abort (controller.signal) or stream end stops a turn.
|
|
540
|
+
let turnRetry = false;
|
|
369
541
|
while (true) {
|
|
370
542
|
if (controller.signal.aborted) break;
|
|
371
543
|
const { done, value } = await reader.read();
|
|
@@ -376,33 +548,47 @@ export class OllamaAgent extends EventEmitter {
|
|
|
376
548
|
buf = lines.pop();
|
|
377
549
|
|
|
378
550
|
for (const line of lines) {
|
|
379
|
-
if (!line.
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
551
|
+
if (!line.trim()) continue;
|
|
552
|
+
|
|
553
|
+
let tokenText = null;
|
|
554
|
+
|
|
555
|
+
if (useNativeEndpoint) {
|
|
556
|
+
// Ollama native NDJSON format
|
|
557
|
+
let nativeEvt;
|
|
558
|
+
try { nativeEvt = JSON.parse(line); } catch { continue; }
|
|
559
|
+
if (nativeEvt.done) continue;
|
|
560
|
+
tokenText = nativeEvt.message?.content ?? null;
|
|
561
|
+
} else {
|
|
562
|
+
// OpenAI SSE format
|
|
563
|
+
if (!line.startsWith('data: ')) continue;
|
|
564
|
+
const payload = line.slice(6).trim();
|
|
565
|
+
if (payload === '[DONE]') continue;
|
|
566
|
+
let evt;
|
|
567
|
+
try { evt = JSON.parse(payload); } catch { continue; }
|
|
568
|
+
|
|
569
|
+
const delta = evt.choices?.[0]?.delta;
|
|
570
|
+
if (!delta) continue;
|
|
571
|
+
|
|
572
|
+
// Standard OpenAI tool_calls from native tool_calls field
|
|
573
|
+
if (delta.tool_calls) {
|
|
574
|
+
for (const tc of delta.tool_calls) {
|
|
575
|
+
const idx = tc.index ?? 0;
|
|
576
|
+
if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
|
|
577
|
+
if (tc.id) streamToolCalls[idx].id = tc.id;
|
|
578
|
+
if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
|
|
579
|
+
if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
|
|
580
|
+
}
|
|
396
581
|
}
|
|
582
|
+
tokenText = delta.content ?? null;
|
|
397
583
|
}
|
|
398
584
|
|
|
399
|
-
if (
|
|
585
|
+
if (tokenText === null) continue;
|
|
400
586
|
rawTokenCount++;
|
|
401
|
-
streamContent +=
|
|
587
|
+
streamContent += tokenText;
|
|
402
588
|
|
|
403
589
|
// Process token through think + tool_call filters, emit visible text live
|
|
404
590
|
// We scan only the new delta token against the current buffer state
|
|
405
|
-
const chunk =
|
|
591
|
+
const chunk = tokenText;
|
|
406
592
|
let visible = '';
|
|
407
593
|
// Simple per-token state machine — handles split tags across tokens by tracking state flags
|
|
408
594
|
if (!inThinkBlock && !inToolCallBlock) {
|
|
@@ -428,9 +614,59 @@ export class OllamaAgent extends EventEmitter {
|
|
|
428
614
|
inToolCallBlock = false;
|
|
429
615
|
}
|
|
430
616
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
617
|
+
// Scan ALL lines completed in this token for state transitions.
|
|
618
|
+
// Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
|
|
619
|
+
if (tokenText.includes('\n')) {
|
|
620
|
+
const tokenStartIdx = streamContent.length - tokenText.length;
|
|
621
|
+
let nlIdx = streamContent.indexOf('\n', tokenStartIdx);
|
|
622
|
+
while (nlIdx !== -1) {
|
|
623
|
+
const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
|
|
624
|
+
const line = streamContent.slice(lineStart, nlIdx).trim();
|
|
625
|
+
if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
|
|
626
|
+
inFenceBlock = true; fenceDepth = 0;
|
|
627
|
+
} else if (inFenceBlock && /^```/.test(line)) {
|
|
628
|
+
fenceDepth++;
|
|
629
|
+
if (fenceDepth >= 2 && fenceDepth % 2 === 0) inFenceBlock = false;
|
|
630
|
+
} else if (!inFenceBlock && !inJsonBlob && line.length > 1 && (line.startsWith('{') || line.startsWith('['))) {
|
|
631
|
+
inJsonBlob = true;
|
|
632
|
+
}
|
|
633
|
+
nlIdx = streamContent.indexOf('\n', nlIdx + 1);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Also check current partial line (mid-token, before next \n)
|
|
638
|
+
if (!inFenceBlock || !inJsonBlob) {
|
|
639
|
+
const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
|
|
640
|
+
const lastNL = cleanSC.lastIndexOf('\n');
|
|
641
|
+
const curLine = cleanSC.slice(lastNL + 1).trimStart();
|
|
642
|
+
if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
|
|
643
|
+
inFenceBlock = true; fenceDepth = 0;
|
|
644
|
+
}
|
|
645
|
+
if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
|
|
646
|
+
inJsonBlob = true;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// Emit visible content — safety filter removes any ``` or WRITE_FILE lines
|
|
651
|
+
// that slipped through (e.g. partial token at detection boundary)
|
|
652
|
+
if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
|
|
653
|
+
const safe = visible.split('\n').filter(ln => {
|
|
654
|
+
const t = ln.trimStart();
|
|
655
|
+
return !t.startsWith('```') && !/^(WRITE_FILE|write_file)/i.test(t);
|
|
656
|
+
}).join('\n');
|
|
657
|
+
if (safe.trim() || safe.includes('\n')) {
|
|
658
|
+
visibleContent += safe;
|
|
659
|
+
lastVisibleAt = Date.now();
|
|
660
|
+
this.emit('agent_output', { agentId, output: safe, isChunk: true });
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// Thinking timeout: if the model has been in a <think> block for >90s with no visible output,
|
|
665
|
+
// abort the stream so we can retry with a kick. Prevents infinite thinking loops.
|
|
666
|
+
if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
|
|
667
|
+
console.log(` [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
|
|
668
|
+
reader.cancel().catch(() => {});
|
|
669
|
+
break;
|
|
434
670
|
}
|
|
435
671
|
}
|
|
436
672
|
}
|
|
@@ -439,17 +675,35 @@ export class OllamaAgent extends EventEmitter {
|
|
|
439
675
|
if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
|
|
440
676
|
|
|
441
677
|
// ── Extract tool calls from content ───────────────────────────────────
|
|
442
|
-
//
|
|
443
|
-
//
|
|
678
|
+
// Try <tool_call> XML tags first (some models emit this format), then fall through
|
|
679
|
+
// to code-fence and JSON text parsers.
|
|
444
680
|
let parsedTagCalls = null;
|
|
445
|
-
if (
|
|
681
|
+
if (Object.keys(streamToolCalls).length === 0) {
|
|
446
682
|
parsedTagCalls = _parseToolCallTags(streamContent);
|
|
447
683
|
if (parsedTagCalls) {
|
|
448
684
|
console.log(` [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
|
|
449
685
|
}
|
|
450
686
|
}
|
|
451
687
|
|
|
452
|
-
// Fallback: try
|
|
688
|
+
// Fallback 1: try WRITE_FILE code-fence format (avoids JSON-escaping issues with code)
|
|
689
|
+
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
690
|
+
const fenceCalls = _parseWriteFileFences(streamContent);
|
|
691
|
+
if (fenceCalls) {
|
|
692
|
+
console.log(` [${agentId}] 🔍 ${fenceCalls.length} WRITE_FILE fence(s) detected`);
|
|
693
|
+
parsedTagCalls = fenceCalls;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Fallback 2: "Writing filename...\n```\ncontent\n```" (model ignored WRITE_FILE instruction)
|
|
698
|
+
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
699
|
+
const writingCalls = _parseWritingFallback(streamContent, workDir);
|
|
700
|
+
if (writingCalls) {
|
|
701
|
+
console.log(` [${agentId}] 🔍 ${writingCalls.length} Writing-block fallback file(s) detected`);
|
|
702
|
+
parsedTagCalls = writingCalls;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// Fallback 3: try legacy JSON-blob detection if no tags found
|
|
453
707
|
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
454
708
|
const textCalls = _parseTextToolCalls(streamContent);
|
|
455
709
|
if (textCalls) {
|
|
@@ -458,9 +712,35 @@ export class OllamaAgent extends EventEmitter {
|
|
|
458
712
|
}
|
|
459
713
|
}
|
|
460
714
|
|
|
461
|
-
//
|
|
715
|
+
// Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
|
|
716
|
+
// merge them so files get written AND bash runs
|
|
717
|
+
if (parsedTagCalls && streamContent) {
|
|
718
|
+
const writingCalls = _parseWritingFallback(streamContent, workDir);
|
|
719
|
+
if (writingCalls) {
|
|
720
|
+
const existingPaths = new Set(parsedTagCalls.filter(c => c.name === 'write_file').map(c => c.arguments.path));
|
|
721
|
+
const newWrites = writingCalls.filter(c => !existingPaths.has(c.arguments.path));
|
|
722
|
+
if (newWrites.length > 0) {
|
|
723
|
+
console.log(` [${agentId}] 🔍 +${newWrites.length} additional Writing-block file(s) merged`);
|
|
724
|
+
// Prepend file writes before bash commands so files exist before server starts
|
|
725
|
+
parsedTagCalls = [...newWrites, ...parsedTagCalls];
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Convert tag/text calls into streamToolCalls structure.
|
|
731
|
+
// Deduplicate: if model emits the same tool call N times in one stream, only run it once.
|
|
462
732
|
if (parsedTagCalls) {
|
|
463
|
-
|
|
733
|
+
const seen = new Set();
|
|
734
|
+
const deduped = parsedTagCalls.filter(tc => {
|
|
735
|
+
const key = `${tc.name}:${JSON.stringify(tc.arguments)}`;
|
|
736
|
+
if (seen.has(key)) return false;
|
|
737
|
+
seen.add(key);
|
|
738
|
+
return true;
|
|
739
|
+
});
|
|
740
|
+
if (deduped.length < parsedTagCalls.length) {
|
|
741
|
+
console.log(` [${agentId}] 🔁 Deduplicated ${parsedTagCalls.length} → ${deduped.length} tool call(s)`);
|
|
742
|
+
}
|
|
743
|
+
deduped.forEach((tc, i) => {
|
|
464
744
|
streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
|
|
465
745
|
});
|
|
466
746
|
// Don't accumulate raw tool_call XML as user-visible output
|
|
@@ -475,17 +755,17 @@ export class OllamaAgent extends EventEmitter {
|
|
|
475
755
|
});
|
|
476
756
|
|
|
477
757
|
// ── Push assistant message ────────────────────────────────────────────
|
|
758
|
+
// All local models now use JSON-in-text format on the native endpoint.
|
|
759
|
+
// Strip <think>...</think> blocks to avoid burning context on reasoning traces.
|
|
478
760
|
const toolCallsArray = Object.values(streamToolCalls);
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
});
|
|
488
|
-
}
|
|
761
|
+
const hasToolCalls = toolCallsArray.length > 0;
|
|
762
|
+
const cleanedContent = (streamContent || '')
|
|
763
|
+
.replace(/<think>[\s\S]*?<\/think>/g, '')
|
|
764
|
+
.trim();
|
|
765
|
+
messages.push({ role: 'assistant', content: cleanedContent || '' });
|
|
766
|
+
|
|
767
|
+
// Incremental save — always, regardless of sessionId (sessionId is null for OllamaAgent)
|
|
768
|
+
this._saveHistory(agentId, workDir, sessionId, messages.slice(1));
|
|
489
769
|
|
|
490
770
|
// ── Execute tool calls ────────────────────────────────────────────────
|
|
491
771
|
if (toolCallsArray.length > 0) {
|
|
@@ -497,14 +777,95 @@ export class OllamaAgent extends EventEmitter {
|
|
|
497
777
|
try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
|
|
498
778
|
catch { parsedArgs = {}; }
|
|
499
779
|
|
|
780
|
+
// ── Unknown tool name detection ──────────────────────────────────
|
|
781
|
+
// Block calls to tools that don't exist (e.g. model writes {"name":"curl",...}
|
|
782
|
+
// instead of {"name":"bash","arguments":{"command":"curl ..."}})
|
|
783
|
+
const VALID_TOOL_NAMES = new Set(['bash','read_file','write_file','list_directory','web_fetch','screenshot_and_describe','take_screenshot','browser']);
|
|
784
|
+
if (!VALID_TOOL_NAMES.has(name.toLowerCase())) {
|
|
785
|
+
console.log(` [${agentId}] ⚠️ Unknown tool "${name}" — blocked`);
|
|
786
|
+
messages.push({ role: 'user', content: `"${name}" is not a valid tool. Valid tools: bash, read_file, write_file, list_directory, web_fetch, screenshot_and_describe. To run a shell command use bash: {"name":"bash","arguments":{"command":"${name} ..."}}.` });
|
|
787
|
+
continue;
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// ── Placeholder detection ────────────────────────────────────────
|
|
791
|
+
// Block tool calls where the agent passed a literal placeholder like
|
|
792
|
+
// "[The URL where the auction is being viewed]" instead of a real value.
|
|
793
|
+
// These come from the model reading its own planning text and mistaking it
|
|
794
|
+
// for a concrete argument.
|
|
795
|
+
{
|
|
796
|
+
const argStr = JSON.stringify(parsedArgs);
|
|
797
|
+
const hasPlaceholder = /\[(the |this |your |a |an |current )?(url|path|address|link|tab|page|site|location|file|directory)[^\]]*\]/i.test(argStr);
|
|
798
|
+
if (hasPlaceholder) {
|
|
799
|
+
console.log(` [${agentId}] ⚠️ Placeholder in args — blocked: ${argStr.slice(0, 120)}`);
|
|
800
|
+
messages.push({ role: 'user', content: `Tool call BLOCKED: your argument contains a placeholder "${argStr.slice(0, 100)}" — that is NOT a real URL or path. Look at the tool results already in the conversation (e.g. the curl localhost:9223/json output) and use the actual URL you found there.` });
|
|
801
|
+
continue;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
500
805
|
this.emit('tool_activity', {
|
|
501
806
|
agentId, event: 'tool_start', tool: name,
|
|
502
807
|
description: this._toolDesc(name, parsedArgs)
|
|
503
808
|
});
|
|
504
809
|
console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
|
|
505
810
|
toolsUsed.push(name);
|
|
811
|
+
emptyRetries = 0; // reset on successful tool call
|
|
812
|
+
|
|
813
|
+
// Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
|
|
814
|
+
// Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
|
|
815
|
+
// "sleep 10 && curl ...URL" both map to the same key "curl:URL".
|
|
816
|
+
let callKey = `${name}:${JSON.stringify(parsedArgs)}`;
|
|
817
|
+
if (name === 'bash' && parsedArgs.command) {
|
|
818
|
+
const curlMatch = parsedArgs.command.match(/curl\s+.*?(https?:\/\/\S+|localhost:\d+)/);
|
|
819
|
+
if (curlMatch) callKey = `curl:${curlMatch[1]}`;
|
|
820
|
+
}
|
|
821
|
+
recentCalls.push(callKey);
|
|
822
|
+
if (recentCalls.length > 6) recentCalls.shift();
|
|
823
|
+
|
|
824
|
+
// Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
|
|
825
|
+
const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
|
|
826
|
+
const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
|
|
827
|
+
// Detect: alternating A,B,A,B pattern (last 4 calls)
|
|
828
|
+
const last4 = recentCalls.slice(-4);
|
|
829
|
+
const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
|
|
830
|
+
// Detect: A,B,C,A,B,C pattern (last 6)
|
|
831
|
+
const last6 = recentCalls.slice(-6);
|
|
832
|
+
const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
|
|
833
|
+
|
|
834
|
+
if (last3Same || abab || abcabc) {
|
|
835
|
+
const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
|
|
836
|
+
console.log(` [${agentId}] 🔁 Loop detected (${pattern}) — injecting fix hint`);
|
|
837
|
+
// Generate a context-aware hint based on what's looping
|
|
838
|
+
let loopFixMsg = `You are repeating the same action — STOP looping. Observe first, then act.\n`;
|
|
839
|
+
const loopCmd = parsedArgs.command || parsedArgs.path || '';
|
|
840
|
+
const noThink = '';
|
|
841
|
+
if (name === 'write_file') {
|
|
842
|
+
loopFixMsg += `You keep rewriting the same file. The file already exists with your previous code. Do NOT rewrite it from scratch.\nInstead:\n1. call screenshot_and_describe to SEE what the app looks like right now\n2. Identify the specific thing that is wrong or missing\n3. read_file the file to see current content\n4. Make a TARGETED edit — change only the specific broken section\nNever rewrite an entire file when the server is already running.`;
|
|
843
|
+
} else if (loopCmd.includes('mkdir') || loopCmd.includes('client')) {
|
|
844
|
+
loopFixMsg += `Files/folders already exist. STOP creating them. Call screenshot_and_describe to see the current state of the app, then identify what specifically needs to be improved and fix it with targeted edits.`;
|
|
845
|
+
} else if (loopCmd.includes('open http')) {
|
|
846
|
+
const openPortMatch = loopCmd.match(/:(\d+)/);
|
|
847
|
+
const openPort = openPortMatch ? openPortMatch[1] : '????';
|
|
848
|
+
loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
|
|
849
|
+
} else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
|
|
850
|
+
loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
|
|
851
|
+
} else if (loopCmd.includes('npm install')) {
|
|
852
|
+
loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
|
|
853
|
+
} else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
|
|
854
|
+
loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n // Click element containing the text you need (change "Filter" to what you see on the page)\n ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
|
|
855
|
+
} else if (name === 'screenshot_and_describe') {
|
|
856
|
+
const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
|
|
857
|
+
loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedback — answer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved — make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
|
|
858
|
+
} else {
|
|
859
|
+
loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
|
|
860
|
+
}
|
|
861
|
+
loopFixMsg += noThink;
|
|
862
|
+
messages.push({ role: 'user', content: loopFixMsg });
|
|
863
|
+
// Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
|
|
864
|
+
recentCalls.splice(0, recentCalls.length - 1);
|
|
865
|
+
break; // break inner tool loop, let model respond to hint
|
|
866
|
+
}
|
|
506
867
|
|
|
507
|
-
const result = await this._executeTool(name, parsedArgs, workDir);
|
|
868
|
+
const result = await this._executeTool(name, parsedArgs, workDir, agentId);
|
|
508
869
|
|
|
509
870
|
this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
|
|
510
871
|
|
|
@@ -513,30 +874,107 @@ export class OllamaAgent extends EventEmitter {
|
|
|
513
874
|
this.emit('agent_image', { agentId, image: result });
|
|
514
875
|
}
|
|
515
876
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
const resultText = isImageResult ? '[Screenshot captured — vision model needed to analyze]' : String(result).slice(0, 8000);
|
|
523
|
-
messages.push({ role: 'user', content: `<tool_response>\n${resultText}\n</tool_response>` });
|
|
524
|
-
}
|
|
525
|
-
} else {
|
|
526
|
-
// Standard OpenAI format
|
|
527
|
-
if (isImageResult && isVision) {
|
|
528
|
-
messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: '[Screenshot captured — see image attached]' });
|
|
877
|
+
// ALL models get tool results fed back — no model should run blind.
|
|
878
|
+
// This is the core of the observe → reason → act loop: every tool result
|
|
879
|
+
// must be in context so the model can see what happened and react correctly.
|
|
880
|
+
{
|
|
881
|
+
const noThink = '';
|
|
882
|
+
if (isImageResult) {
|
|
529
883
|
const base64 = result.replace(/^data:image\/\w+;base64,/, '');
|
|
530
|
-
messages.push({ role: 'user', content:
|
|
884
|
+
messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
|
|
531
885
|
} else {
|
|
532
|
-
|
|
886
|
+
const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
|
|
887
|
+
messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
|
|
888
|
+
|
|
889
|
+
if (name === 'screenshot_and_describe') {
|
|
890
|
+
const screenshotResult = String(result);
|
|
891
|
+
const isLocalhost = (parsedArgs.url || '').includes('localhost') || (parsedArgs.url || '').includes('127.0.0.1');
|
|
892
|
+
// Server unreachable on localhost — force bash restart (only for local servers, not public URLs)
|
|
893
|
+
if (screenshotResult.includes('SERVER IS NOT REACHABLE') && isLocalhost) {
|
|
894
|
+
const portMatch = (parsedArgs.url || '').match(/:(\d+)/);
|
|
895
|
+
const port = portMatch ? portMatch[1] : '????';
|
|
896
|
+
messages.push({ role: 'user', content: `The local server on port ${port} is not running. Restart it with bash — find the project directory, then: pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd /path/to/project && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${port}` });
|
|
897
|
+
}
|
|
898
|
+
// Public URL unreachable — try web_fetch instead
|
|
899
|
+
else if (screenshotResult.includes('SERVER IS NOT REACHABLE') && !isLocalhost) {
|
|
900
|
+
messages.push({ role: 'user', content: `screenshot_and_describe could not reach ${parsedArgs.url}. Try web_fetch instead:\n{"name":"web_fetch","arguments":{"url":"${parsedArgs.url}"}}` });
|
|
901
|
+
}
|
|
902
|
+
// Dependency audit issues — prevent port-hopping
|
|
903
|
+
else if (screenshotResult.includes('DEPENDENCY AUDIT FOUND ISSUES')) {
|
|
904
|
+
messages.push({ role: 'user', content: `CRITICAL: Missing client-side libraries in your HTML. Do NOT change the port. Fix it: (1) read_file the HTML; (2) add the missing script tags; (3) write_file back; (4) restart server same port; (5) screenshot to verify.` });
|
|
905
|
+
}
|
|
906
|
+
// Successful screenshot of a build task — push to make a code change
|
|
907
|
+
else if (isLocalhost) {
|
|
908
|
+
messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
|
|
909
|
+
}
|
|
910
|
+
// Successful screenshot of a public URL — agent is doing research, let it reason
|
|
911
|
+
}
|
|
912
|
+
// Catch placeholder/hello world pages — force the model to keep building
|
|
913
|
+
const screenshotText = String(result).toLowerCase();
|
|
914
|
+
const isPlaceholder = (
|
|
915
|
+
screenshotText.includes('hello world') ||
|
|
916
|
+
screenshotText.includes('cannot get /') ||
|
|
917
|
+
(screenshotText.includes('express') && screenshotText.includes('error')) ||
|
|
918
|
+
screenshotText.includes('placeholder') ||
|
|
919
|
+
screenshotText.includes('coming soon') ||
|
|
920
|
+
(screenshotText.includes('blank') && !screenshotText.includes('not blank'))
|
|
921
|
+
);
|
|
922
|
+
if (isPlaceholder) {
|
|
923
|
+
messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
|
|
924
|
+
}
|
|
533
925
|
}
|
|
534
926
|
}
|
|
535
927
|
}
|
|
536
928
|
continue; // loop back for next model turn
|
|
537
929
|
}
|
|
538
930
|
|
|
539
|
-
// ── No tool calls
|
|
931
|
+
// ── No tool calls ────────────────────────────────────────────────────
|
|
932
|
+
{
|
|
933
|
+
const combined = (visibleContent + streamContent).replace(/<think>[\s\S]*?<\/think>/g, '');
|
|
934
|
+
const hasContent = combined.trim().length > 30;
|
|
935
|
+
const isEmpty = combined.trim().length === 0;
|
|
936
|
+
|
|
937
|
+
// Structural: truncated JSON — model started a tool call but stream ended early
|
|
938
|
+
const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
|
|
939
|
+
if (hasTruncatedJson) {
|
|
940
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
|
|
941
|
+
messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// Structural: empty response — model produced nothing
|
|
946
|
+
if (isEmpty) {
|
|
947
|
+
if (emptyRetries < 3) {
|
|
948
|
+
emptyRetries++;
|
|
949
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: empty response (retry ${emptyRetries}/3) — kicking`);
|
|
950
|
+
messages.push({ role: 'user', content: toolsUsed.length === 0 ? 'Start now — make your first tool call.' : 'You stopped. Make your next tool call.' });
|
|
951
|
+
continue;
|
|
952
|
+
}
|
|
953
|
+
console.log(` [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// Structural: agent hasn't used any tools yet — it must act before it can answer
|
|
957
|
+
if (toolsUsed.length === 0 && hasContent) {
|
|
958
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act`);
|
|
959
|
+
messages.push({ role: 'user', content: 'Make your first tool call now.' });
|
|
960
|
+
continue;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
// Semantic: ask the LLM whether the task is actually complete.
|
|
964
|
+
// This replaces all regex-based intent detection — the model judges its own output.
|
|
965
|
+
if (hasContent && toolsUsed.length > 0) {
|
|
966
|
+
const originalTask = messages.find(m => m.role === 'user')?.content || task;
|
|
967
|
+
const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
|
|
968
|
+
if (!isDone) {
|
|
969
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
|
|
970
|
+
messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
|
|
971
|
+
continue;
|
|
972
|
+
}
|
|
973
|
+
console.log(` [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
// ── Final answer ──────────────────────────────────────────────────────
|
|
540
978
|
if (visibleContent) finalContent = visibleContent;
|
|
541
979
|
break;
|
|
542
980
|
|
|
@@ -555,7 +993,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
555
993
|
];
|
|
556
994
|
|
|
557
995
|
try {
|
|
558
|
-
const summaryRes = await fetch(`${this.baseUrl}/
|
|
996
|
+
const summaryRes = await fetch(`${this.baseUrl}/api/chat`, {
|
|
559
997
|
method: 'POST',
|
|
560
998
|
headers: { 'Content-Type': 'application/json' },
|
|
561
999
|
signal: controller.signal,
|
|
@@ -563,7 +1001,8 @@ export class OllamaAgent extends EventEmitter {
|
|
|
563
1001
|
model: effectiveModel,
|
|
564
1002
|
messages: summaryMessages,
|
|
565
1003
|
stream: true,
|
|
566
|
-
|
|
1004
|
+
think: false,
|
|
1005
|
+
options: { num_ctx: 32768 }
|
|
567
1006
|
})
|
|
568
1007
|
});
|
|
569
1008
|
|
|
@@ -606,7 +1045,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
606
1045
|
}
|
|
607
1046
|
|
|
608
1047
|
// Persist history for next task
|
|
609
|
-
if (finalContent
|
|
1048
|
+
if (finalContent) {
|
|
610
1049
|
this._saveHistory(agentId, workDir, sessionId, [
|
|
611
1050
|
...history,
|
|
612
1051
|
{ role: 'user', content: task },
|
|
@@ -665,16 +1104,96 @@ export class OllamaAgent extends EventEmitter {
|
|
|
665
1104
|
|
|
666
1105
|
// ─── Tool execution ───────────────────────────────────────────────────────
|
|
667
1106
|
|
|
668
|
-
async _executeTool(name, args, workDir) {
|
|
1107
|
+
async _executeTool(name, args, workDir, agentId = 'agent') {
|
|
669
1108
|
try {
|
|
670
1109
|
switch (name) {
|
|
671
1110
|
case 'bash': {
|
|
1111
|
+
// Block commands that would kill the worker process itself.
|
|
1112
|
+
// "pkill -f node" and "killall node" match the worker's own process.
|
|
1113
|
+
// Rewrite to only kill processes by their specific server log path or port.
|
|
1114
|
+
const cmd = args.command || '';
|
|
1115
|
+
if (/pkill\s+(-\w+\s+)*(-f\s+)?node\b/i.test(cmd) || /killall\s+node\b/i.test(cmd)) {
|
|
1116
|
+
// Safe replacement: kill only the app server on the port, not all node processes
|
|
1117
|
+
const portMatch = cmd.match(/localhost:(\d+)|:(\d+)/);
|
|
1118
|
+
const serverLogMatch = cmd.match(/server\.js/);
|
|
1119
|
+
if (portMatch || serverLogMatch) {
|
|
1120
|
+
const safeCmd = portMatch
|
|
1121
|
+
? `lsof -ti:${portMatch[1] || portMatch[2]} | xargs kill -9 2>/dev/null || true`
|
|
1122
|
+
: `pkill -f "server.js" 2>/dev/null || true`;
|
|
1123
|
+
args = { ...args, command: safeCmd + cmd.slice(cmd.indexOf('&&') !== -1 ? cmd.indexOf('&&') : cmd.length) };
|
|
1124
|
+
} else {
|
|
1125
|
+
// No specific target — skip the pkill entirely, just run what follows &&
|
|
1126
|
+
const afterAnd = cmd.indexOf('&&');
|
|
1127
|
+
if (afterAnd !== -1) {
|
|
1128
|
+
args = { ...args, command: cmd.slice(afterAnd + 2).trim() };
|
|
1129
|
+
} else {
|
|
1130
|
+
return 'Skipped broad pkill to protect worker process. Use: lsof -ti:PORT | xargs kill -9';
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Intercept "open http://..." — navigate the AgentForge CDP browser directly,
|
|
1136
|
+
// then auto-screenshot so the agent immediately sees what it built.
|
|
1137
|
+
const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
|
|
1138
|
+
if (openUrlMatch) {
|
|
1139
|
+
const targetUrl = openUrlMatch[1];
|
|
1140
|
+
let openedViaCDP = false;
|
|
1141
|
+
try {
|
|
1142
|
+
const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
|
|
1143
|
+
const newTabData = await newTabRes.json();
|
|
1144
|
+
const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
|
|
1145
|
+
await new Promise(r => tabWs.on('open', r));
|
|
1146
|
+
await new Promise(r => {
|
|
1147
|
+
let navigated = false;
|
|
1148
|
+
tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
|
|
1149
|
+
tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
|
|
1150
|
+
setTimeout(() => { tabWs.close(); r(); }, 3000);
|
|
1151
|
+
});
|
|
1152
|
+
openedViaCDP = true;
|
|
1153
|
+
} catch {
|
|
1154
|
+
// CDP unavailable — fall through to OS open
|
|
1155
|
+
try { await execAsync(`open "${targetUrl}"`); } catch {}
|
|
1156
|
+
}
|
|
1157
|
+
// Auto-screenshot after opening so the agent sees what it built.
|
|
1158
|
+
// Wait for page to load, then call screenshot_and_describe.
|
|
1159
|
+
await new Promise(r => setTimeout(r, 2500));
|
|
1160
|
+
try {
|
|
1161
|
+
const screenshotResult = await this._executeTool('screenshot_and_describe', {
|
|
1162
|
+
url: targetUrl,
|
|
1163
|
+
check_for: 'the running application',
|
|
1164
|
+
send_to_user: true
|
|
1165
|
+
}, workDir, agentId);
|
|
1166
|
+
return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
|
|
1167
|
+
} catch {
|
|
1168
|
+
return `Opened ${targetUrl} in browser. (Screenshot failed — verify with screenshot_and_describe)`;
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// If workDir doesn't exist (e.g. /tmp was cleared after worker restart),
|
|
1173
|
+
// fall back to HOME rather than failing with ENOENT on every bash call.
|
|
1174
|
+
let bashCwd = workDir;
|
|
1175
|
+
const _home = process.env.HOME || '/tmp';
|
|
1176
|
+
try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
|
|
1177
|
+
// Background commands (ending with &) return no stdout — the model interprets
|
|
1178
|
+
// silence as failure and loops. Run them, then read back any log file to confirm.
|
|
1179
|
+
const isBackground = /&\s*$/.test(args.command.trim());
|
|
672
1180
|
const { stdout, stderr } = await execAsync(args.command, {
|
|
673
|
-
cwd:
|
|
674
|
-
timeout:
|
|
1181
|
+
cwd: bashCwd,
|
|
1182
|
+
timeout: 120000,
|
|
675
1183
|
maxBuffer: 1024 * 1024 * 2 // 2MB
|
|
676
1184
|
});
|
|
677
|
-
|
|
1185
|
+
const out = (stdout + stderr).trim();
|
|
1186
|
+
if (isBackground && !out) {
|
|
1187
|
+
// Give the process a moment to start, then check /tmp/server.log if it exists
|
|
1188
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
1189
|
+
let confirmation = 'Background process started.';
|
|
1190
|
+
try {
|
|
1191
|
+
const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
|
|
1192
|
+
if (logContent) confirmation = `Background process started. Server log:\n${logContent}`;
|
|
1193
|
+
} catch { /* no log yet */ }
|
|
1194
|
+
return confirmation;
|
|
1195
|
+
}
|
|
1196
|
+
return out || '(no output)';
|
|
678
1197
|
}
|
|
679
1198
|
|
|
680
1199
|
case 'read_file': {
|
|
@@ -721,6 +1240,26 @@ export class OllamaAgent extends EventEmitter {
|
|
|
721
1240
|
}
|
|
722
1241
|
}
|
|
723
1242
|
|
|
1243
|
+
case 'screenshot_and_describe': {
|
|
1244
|
+
const result = await this._screenshotAndDescribe(args.url, args.check_for);
|
|
1245
|
+
// Always send screenshot to user — agent called this tool, user should always see it
|
|
1246
|
+
if (this._lastScreenshotData) {
|
|
1247
|
+
this.emit('agent_image', { agentId, image: this._lastScreenshotData });
|
|
1248
|
+
this._lastScreenshotData = null;
|
|
1249
|
+
}
|
|
1250
|
+
return result;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
case 'browser': {
|
|
1254
|
+
const result = await browserAction(args);
|
|
1255
|
+
if (result && result.__screenshot) {
|
|
1256
|
+
const imgData = `data:image/png;base64,${result.base64}`;
|
|
1257
|
+
this.emit('agent_image', { agentId, image: imgData });
|
|
1258
|
+
return `Screenshot taken (${Math.round(result.base64.length * 0.75 / 1024)}KB). Image sent to chat.`;
|
|
1259
|
+
}
|
|
1260
|
+
return typeof result === 'string' ? result : JSON.stringify(result);
|
|
1261
|
+
}
|
|
1262
|
+
|
|
724
1263
|
default:
|
|
725
1264
|
return `Unknown tool: ${name}`;
|
|
726
1265
|
}
|
|
@@ -733,24 +1272,13 @@ export class OllamaAgent extends EventEmitter {
|
|
|
733
1272
|
|
|
734
1273
|
async _cdpScreenshot(navigateUrl, tmpFile) {
|
|
735
1274
|
const CDP_PORT = 9223;
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
const
|
|
740
|
-
const
|
|
741
|
-
const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
|
|
742
|
-
|
|
743
|
-
if (!usable) {
|
|
744
|
-
// Create new tab
|
|
745
|
-
const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
|
|
746
|
-
const newTabData = await newTab.json();
|
|
747
|
-
tabId = newTabData.id;
|
|
748
|
-
} else {
|
|
749
|
-
tabId = usable.id;
|
|
750
|
-
}
|
|
1275
|
+
|
|
1276
|
+
// Always create a NEW tab — never hijack the dashboard or other existing tabs
|
|
1277
|
+
const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
|
|
1278
|
+
const newTabData = await newTabRes.json();
|
|
1279
|
+
const tabId = newTabData.id;
|
|
751
1280
|
|
|
752
1281
|
return new Promise((resolve, reject) => {
|
|
753
|
-
// Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
|
|
754
1282
|
const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
|
|
755
1283
|
let msgId = 1;
|
|
756
1284
|
const pending = new Map();
|
|
@@ -775,10 +1303,12 @@ export class OllamaAgent extends EventEmitter {
|
|
|
775
1303
|
try {
|
|
776
1304
|
if (navigateUrl) {
|
|
777
1305
|
await send('Page.navigate', { url: navigateUrl });
|
|
778
|
-
// Wait for
|
|
1306
|
+
// Wait for page to fully render
|
|
779
1307
|
await new Promise(r => setTimeout(r, 3000));
|
|
780
1308
|
}
|
|
781
1309
|
const { data } = await send('Page.captureScreenshot', { format: 'png' });
|
|
1310
|
+
// Close the temporary tab
|
|
1311
|
+
await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
|
|
782
1312
|
ws.close();
|
|
783
1313
|
resolve(`data:image/png;base64,${data}`);
|
|
784
1314
|
} catch (err) {
|
|
@@ -788,10 +1318,129 @@ export class OllamaAgent extends EventEmitter {
|
|
|
788
1318
|
});
|
|
789
1319
|
|
|
790
1320
|
ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
|
|
791
|
-
setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); },
|
|
1321
|
+
setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
|
|
792
1322
|
});
|
|
793
1323
|
}
|
|
794
1324
|
|
|
1325
|
+
// ─── Screenshot + vision analysis ─────────────────────────────────────────
|
|
1326
|
+
// Takes a screenshot of a URL, then asks the active vision model to describe it.
|
|
1327
|
+
// Returns a plain-text description the main agent can reason about.
|
|
1328
|
+
|
|
1329
|
+
async _screenshotAndDescribe(url, checkFor) {
|
|
1330
|
+
const question = checkFor
|
|
1331
|
+
? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
|
|
1332
|
+
: `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count — be precise about what you see.`;
|
|
1333
|
+
|
|
1334
|
+
// === Server reachability check — fast fail if server is down ===
|
|
1335
|
+
try {
|
|
1336
|
+
await fetch(url, { signal: AbortSignal.timeout(4000) });
|
|
1337
|
+
} catch (reachErr) {
|
|
1338
|
+
const portMatch = url.match(/:(\d+)/);
|
|
1339
|
+
const port = portMatch ? portMatch[1] : '?';
|
|
1340
|
+
return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
// === HTML dependency audit (always runs — fast, reliable) ===
|
|
1344
|
+
// Fetches the page HTML and checks for common missing client-side dependencies.
|
|
1345
|
+
// This catches issues that screenshots can't detect (JS errors, missing script tags).
|
|
1346
|
+
let auditNotes = '';
|
|
1347
|
+
try {
|
|
1348
|
+
const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
|
|
1349
|
+
const html = await htmlRes.text();
|
|
1350
|
+
const missing = [];
|
|
1351
|
+
// Check for socket.io client usage without the script tag
|
|
1352
|
+
if (/\bio\s*\(/.test(html) && !html.includes('/socket.io/socket.io.js')) {
|
|
1353
|
+
missing.push('Missing <script src="/socket.io/socket.io.js"></script> — io() is called but the client library is not loaded');
|
|
1354
|
+
// Also verify the server actually serves it
|
|
1355
|
+
try {
|
|
1356
|
+
const sioRes = await fetch(url.replace(/\/$/, '') + '/socket.io/socket.io.js', { signal: AbortSignal.timeout(5000) });
|
|
1357
|
+
if (!sioRes.ok || (await sioRes.text()).startsWith('<!')) {
|
|
1358
|
+
missing.push('Server does NOT serve /socket.io/socket.io.js — check that socket.io is installed and express-static is set up');
|
|
1359
|
+
}
|
|
1360
|
+
} catch {}
|
|
1361
|
+
}
|
|
1362
|
+
if (missing.length > 0) {
|
|
1363
|
+
auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
|
|
1364
|
+
}
|
|
1365
|
+
} catch {}
|
|
1366
|
+
|
|
1367
|
+
let imageData;
|
|
1368
|
+
const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
|
|
1369
|
+
|
|
1370
|
+
// Try AgentForge browser via CDP first
|
|
1371
|
+
try {
|
|
1372
|
+
imageData = await this._cdpScreenshot(url, null);
|
|
1373
|
+
} catch (cdpErr) {
|
|
1374
|
+
// CDP not available — try puppeteer headless screenshot
|
|
1375
|
+
try {
|
|
1376
|
+
const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
|
|
1377
|
+
const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
|
|
1378
|
+
const nodeScript = `
|
|
1379
|
+
const puppeteer = require(${JSON.stringify(puppeteerModule)});
|
|
1380
|
+
(async () => {
|
|
1381
|
+
const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
|
|
1382
|
+
const page = await browser.newPage();
|
|
1383
|
+
await page.setDefaultNavigationTimeout(12000);
|
|
1384
|
+
await page.setViewport({width: 1280, height: 900});
|
|
1385
|
+
try {
|
|
1386
|
+
await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
|
|
1387
|
+
await new Promise(r => setTimeout(r, 2500));
|
|
1388
|
+
await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
|
|
1389
|
+
console.log('puppeteer screenshot ok');
|
|
1390
|
+
} finally {
|
|
1391
|
+
await browser.close();
|
|
1392
|
+
}
|
|
1393
|
+
})().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
|
|
1394
|
+
`;
|
|
1395
|
+
writeFileSync(scriptFile, nodeScript);
|
|
1396
|
+
await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
|
|
1397
|
+
await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
|
|
1398
|
+
const raw = readFileSync(tmpFile).toString('base64');
|
|
1399
|
+
await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
|
|
1400
|
+
imageData = `data:image/png;base64,${raw}`;
|
|
1401
|
+
} catch (pupErr) {
|
|
1402
|
+
console.warn(` [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
|
|
1403
|
+
// No screenshot possible — return audit notes only
|
|
1404
|
+
return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
// Store imageData so caller can emit to user if send_to_user=true
|
|
1409
|
+
this._lastScreenshotData = imageData;
|
|
1410
|
+
|
|
1411
|
+
const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
|
|
1412
|
+
|
|
1413
|
+
// Use the active model for vision analysis.
|
|
1414
|
+
try {
|
|
1415
|
+
// /api/chat with images array — supported by all Ollama vision-capable models
|
|
1416
|
+
const res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
1417
|
+
method: 'POST',
|
|
1418
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1419
|
+
body: JSON.stringify({
|
|
1420
|
+
model: this.model,
|
|
1421
|
+
messages: [{ role: 'user', content: question, images: [base64] }],
|
|
1422
|
+
stream: false,
|
|
1423
|
+
options: { num_ctx: 4096 }
|
|
1424
|
+
}),
|
|
1425
|
+
signal: AbortSignal.timeout(120000)
|
|
1426
|
+
});
|
|
1427
|
+
|
|
1428
|
+
if (res.ok) {
|
|
1429
|
+
const json = await res.json();
|
|
1430
|
+
const description = json.message?.content || json.response || '';
|
|
1431
|
+
const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
1432
|
+
if (clean) {
|
|
1433
|
+
console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
|
|
1434
|
+
return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
} catch (err) {
|
|
1438
|
+
console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
|
|
1442
|
+
}
|
|
1443
|
+
|
|
795
1444
|
_resolvePath(p, workDir) {
|
|
796
1445
|
return path.isAbsolute(p) ? p : path.join(workDir, p);
|
|
797
1446
|
}
|
|
@@ -817,28 +1466,65 @@ export class OllamaAgent extends EventEmitter {
|
|
|
817
1466
|
}
|
|
818
1467
|
|
|
819
1468
|
// ─── History persistence ──────────────────────────────────────────────────
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
1469
|
+
// History lives at ~/.agentforge/history/{agentId}.json — one canonical file
|
|
1470
|
+
// per agent, independent of workDir/sessionId/machine state. Never gets lost
|
|
1471
|
+
// due to workDir changes, worker restarts, or Railway assigning new sessionIds.
|
|
1472
|
+
|
|
1473
|
+
_historyPath(agentId) {
|
|
1474
|
+
const home = process.env.HOME || '/tmp';
|
|
1475
|
+
const dir = path.join(home, '.agentforge', 'history');
|
|
1476
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
1477
|
+
return path.join(dir, `${agentId}.json`);
|
|
823
1478
|
}
|
|
824
1479
|
|
|
825
1480
|
_loadHistory(agentId, workDir, sessionId) {
|
|
826
|
-
if (!sessionId) return [];
|
|
827
1481
|
try {
|
|
828
|
-
const fp = this._historyPath(
|
|
1482
|
+
const fp = this._historyPath(agentId);
|
|
829
1483
|
if (existsSync(fp)) {
|
|
830
1484
|
const data = JSON.parse(readFileSync(fp, 'utf-8'));
|
|
831
|
-
|
|
1485
|
+
console.log(` [${agentId}] Loaded ${data.length} history msgs from ~/.agentforge/history/`);
|
|
832
1486
|
return data.slice(-12);
|
|
833
1487
|
}
|
|
834
|
-
} catch {
|
|
1488
|
+
} catch (e) {
|
|
1489
|
+
console.warn(`⚠️ [${agentId}] History load error: ${e.message}`);
|
|
1490
|
+
}
|
|
835
1491
|
return [];
|
|
836
1492
|
}
|
|
837
1493
|
|
|
838
1494
|
_saveHistory(agentId, workDir, sessionId, messages) {
|
|
839
1495
|
try {
|
|
840
|
-
const fp = this._historyPath(
|
|
1496
|
+
const fp = this._historyPath(agentId);
|
|
841
1497
|
writeFileSync(fp, JSON.stringify(messages.slice(-20), null, 2));
|
|
842
|
-
} catch {
|
|
1498
|
+
} catch (e) {
|
|
1499
|
+
console.warn(`⚠️ [${agentId}] History save error: ${e.message}`);
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
async _isTaskComplete(task, output, signal) {
|
|
1504
|
+
try {
|
|
1505
|
+
const res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
1506
|
+
method: 'POST',
|
|
1507
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1508
|
+
signal,
|
|
1509
|
+
body: JSON.stringify({
|
|
1510
|
+
model: this.model,
|
|
1511
|
+
messages: [
|
|
1512
|
+
{ role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
|
|
1513
|
+
{ role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
|
|
1514
|
+
],
|
|
1515
|
+
stream: false,
|
|
1516
|
+
think: false,
|
|
1517
|
+
options: { num_ctx: 2048 }
|
|
1518
|
+
})
|
|
1519
|
+
});
|
|
1520
|
+
if (!res.ok) return true;
|
|
1521
|
+
const data = await res.json();
|
|
1522
|
+
const answer = (data.message?.content || '').toLowerCase().trim();
|
|
1523
|
+
console.log(` [_isTaskComplete] verdict: "${answer}"`);
|
|
1524
|
+
return answer.startsWith('yes');
|
|
1525
|
+
} catch (e) {
|
|
1526
|
+
console.warn(`⚠️ [_isTaskComplete] error: ${e.message}`);
|
|
1527
|
+
return true; // assume done on error to avoid infinite loops
|
|
1528
|
+
}
|
|
843
1529
|
}
|
|
844
1530
|
}
|