@hamp10/agentforge 0.2.16 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +25 -2
- package/package.json +5 -1
- package/scripts/postinstall.js +62 -0
- package/src/OllamaAgent.js +923 -201
- package/src/hampagent/browser.js +209 -73
- package/src/selfUpdate.js +7 -2
- package/src/worker.js +68 -36
- package/templates/agent/AGENTFORGE.md +120 -0
package/src/OllamaAgent.js
CHANGED
|
@@ -1,16 +1,30 @@
|
|
|
1
1
|
import { exec } from 'child_process';
|
|
2
|
-
import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
|
|
2
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
4
|
import path from 'path';
|
|
5
5
|
import { promisify } from 'util';
|
|
6
6
|
import { fileURLToPath } from 'url';
|
|
7
|
+
import { browserAction } from './hampagent/browser.js';
|
|
7
8
|
|
|
8
9
|
const execAsync = promisify(exec);
|
|
9
10
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
11
|
|
|
12
|
+
// ── Worker log file — always write to /tmp/agentforge/worker.log so logs are
|
|
13
|
+
// accessible remotely via SSH regardless of how the worker was started.
|
|
14
|
+
const WORKER_LOG = '/tmp/agentforge/worker.log';
|
|
15
|
+
try { mkdirSync('/tmp/agentforge', { recursive: true }); } catch {}
|
|
16
|
+
const _origLog = console.log.bind(console);
|
|
17
|
+
console.log = (...args) => {
|
|
18
|
+
_origLog(...args);
|
|
19
|
+
try {
|
|
20
|
+
const line = args.map(a => (typeof a === 'object' ? JSON.stringify(a) : String(a))).join(' ');
|
|
21
|
+
appendFileSync(WORKER_LOG, `${new Date().toISOString()} ${line}\n`);
|
|
22
|
+
} catch {}
|
|
23
|
+
};
|
|
24
|
+
|
|
11
25
|
// Minimal tool definitions — one compact JSON per line, embedded in system prompt.
|
|
12
|
-
// Ollama's `tools` API param is
|
|
13
|
-
// Descriptions kept short to fit within
|
|
26
|
+
// Ollama's `tools` API param is unreliable — tools are injected as text in the system prompt.
|
|
27
|
+
// Descriptions kept short to fit within a 4096 token context window.
|
|
14
28
|
const TOOL_DEFS = [
|
|
15
29
|
{
|
|
16
30
|
type: 'function',
|
|
@@ -59,16 +73,27 @@ const TOOL_DEFS = [
|
|
|
59
73
|
description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
|
|
60
74
|
parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
|
|
61
75
|
}
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
type: 'function',
|
|
79
|
+
function: {
|
|
80
|
+
name: 'screenshot_and_describe',
|
|
81
|
+
description: 'Screenshot a URL and get AI visual analysis. Use after building any web app to verify it looks correct before reporting done. Set send_to_user:true to show the screenshot to the user in chat.',
|
|
82
|
+
parameters: { type: 'object', properties: {
|
|
83
|
+
url: { type: 'string', description: 'URL to screenshot (e.g. http://localhost:3458)' },
|
|
84
|
+
check_for: { type: 'string', description: 'What should be visible (e.g. "snake game with canvas, scoreboard, and game controls")' },
|
|
85
|
+
send_to_user: { type: 'boolean', description: 'Send screenshot image to user in chat (true when confirmed working)' }
|
|
86
|
+
}, required: ['url'] }
|
|
87
|
+
}
|
|
62
88
|
}
|
|
63
89
|
];
|
|
64
90
|
|
|
65
91
|
// Minimal <tools> XML for system prompt — one compact JSON per line, no outer array.
|
|
66
|
-
// Per qwen3 Hermes chat template (tokenizer_config.json).
|
|
67
92
|
const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
|
|
68
93
|
|
|
69
94
|
/**
|
|
70
95
|
* Parse <tool_call>...</tool_call> blocks from streamed content.
|
|
71
|
-
*
|
|
96
|
+
* Some models emit: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
|
|
72
97
|
* Returns array of {name, arguments} or null if no complete tool calls found.
|
|
73
98
|
*/
|
|
74
99
|
function _parseToolCallTags(content) {
|
|
@@ -86,67 +111,170 @@ function _parseToolCallTags(content) {
|
|
|
86
111
|
return calls.length > 0 ? calls : null;
|
|
87
112
|
}
|
|
88
113
|
|
|
114
|
+
/**
|
|
115
|
+
* Parse WRITE_FILE code-fence format.
|
|
116
|
+
* Models struggle to JSON-escape large code files (unescaped quotes break JSON.parse).
|
|
117
|
+
* This format avoids the problem: path on the first line, raw content in a code fence.
|
|
118
|
+
*
|
|
119
|
+
* Accepted formats:
|
|
120
|
+
* WRITE_FILE /abs/path/to/file.js
|
|
121
|
+
* ```
|
|
122
|
+
* ...raw content, no escaping needed...
|
|
123
|
+
* ```
|
|
124
|
+
*
|
|
125
|
+
* write_file: /abs/path/to/file.js
|
|
126
|
+
* ```javascript
|
|
127
|
+
* ...content...
|
|
128
|
+
* ```
|
|
129
|
+
*
|
|
130
|
+
* Returns array of {name, arguments} or null if no matches found.
|
|
131
|
+
*/
|
|
132
|
+
function _parseWriteFileFences(content) {
|
|
133
|
+
if (!content) return null;
|
|
134
|
+
const calls = [];
|
|
135
|
+
// Match WRITE_FILE <path> or write_file: <path> followed by a code fence
|
|
136
|
+
const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
|
|
137
|
+
let m;
|
|
138
|
+
while ((m = re.exec(content)) !== null) {
|
|
139
|
+
const filePath = m[1].trim();
|
|
140
|
+
const fileContent = m[2]; // raw content, no unescaping needed
|
|
141
|
+
if (filePath && fileContent !== undefined) {
|
|
142
|
+
calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return calls.length > 0 ? calls : null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Fallback: parse "Writing filename...\n```lang\ncontent\n```" code blocks.
|
|
150
|
+
* Many local models ignore the WRITE_FILE instruction and use raw markdown blocks.
|
|
151
|
+
* Extract the filename from the "Writing X..." line and write the file to the project dir.
|
|
152
|
+
* Project dir is inferred from the most recent "mkdir -p /path" in the content.
|
|
153
|
+
*/
|
|
154
|
+
function _parseWritingFallback(content, workDir) {
|
|
155
|
+
if (!content) return null;
|
|
156
|
+
const calls = [];
|
|
157
|
+
|
|
158
|
+
// Infer project dir from last mkdir -p command in the stream
|
|
159
|
+
let projectDir = workDir;
|
|
160
|
+
const mkdirMatches = [...content.matchAll(/mkdir\s+-p\s+"?([^"\n]+)"?/g)];
|
|
161
|
+
if (mkdirMatches.length > 0) {
|
|
162
|
+
const lastMkdir = mkdirMatches[mkdirMatches.length - 1];
|
|
163
|
+
const candidate = lastMkdir[1].trim().replace(/~/, process.env.HOME || '/tmp');
|
|
164
|
+
if (candidate && !candidate.includes('$')) projectDir = candidate;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Match: "Writing filename...\n```lang\ncontent\n```"
|
|
168
|
+
const re = /Writing\s+([\w./\-]+?)(?:\.{3})?\s*\n```[^\n]*\n([\s\S]*?)```(?:\n|$)/gi;
|
|
169
|
+
let m;
|
|
170
|
+
while ((m = re.exec(content)) !== null) {
|
|
171
|
+
const filename = m[1].trim();
|
|
172
|
+
const fileContent = m[2];
|
|
173
|
+
if (!filename || fileContent === undefined) continue;
|
|
174
|
+
// Skip if this is just a status echo with no real code
|
|
175
|
+
if (fileContent.trim().length < 5) continue;
|
|
176
|
+
const filePath = filename.startsWith('/') ? filename : `${projectDir}/${filename}`;
|
|
177
|
+
calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
|
|
178
|
+
}
|
|
179
|
+
return calls.length > 0 ? calls : null;
|
|
180
|
+
}
|
|
181
|
+
|
|
89
182
|
/**
|
|
90
183
|
* Detect text-based tool calls from model content.
|
|
91
|
-
*
|
|
184
|
+
* Models that don't use native tool_calls emit JSON in their text content instead.
|
|
92
185
|
* Supports two schemas:
|
|
93
186
|
* - {name, arguments} (OpenAI-style)
|
|
94
|
-
* - {tool, args} (
|
|
95
|
-
* Supports
|
|
96
|
-
*
|
|
187
|
+
* - {tool, args} (alternate style)
|
|
188
|
+
* Supports:
|
|
189
|
+
* - Pure JSON (whole content is one or more JSON objects)
|
|
190
|
+
* - Mixed: "Status line\n{json}" — narration before the tool call JSON
|
|
191
|
+
* Returns array of {name, arguments} if any tool calls found, else null.
|
|
97
192
|
*/
|
|
98
193
|
function _parseTextToolCalls(content) {
|
|
99
194
|
if (!content) return null;
|
|
100
195
|
const trimmed = content.trim();
|
|
101
|
-
if (!trimmed
|
|
196
|
+
if (!trimmed) return null;
|
|
102
197
|
|
|
103
198
|
// Normalise a single parsed object into {name, arguments}
|
|
199
|
+
// Handles multiple schemas models may emit:
|
|
200
|
+
// {name, arguments} — OpenAI-style (correct)
|
|
201
|
+
// {tool, args} — alternate native style
|
|
202
|
+
// {action:"write_file", path, content} — model shorthand
|
|
203
|
+
// {action:"bash", command} — model shorthand
|
|
204
|
+
// {action:"read_file", path} — model shorthand
|
|
104
205
|
const normalise = (obj) => {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
206
|
+
try {
|
|
207
|
+
if (typeof obj.name === 'string' && obj.arguments !== undefined) {
|
|
208
|
+
const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
|
|
209
|
+
return { name: obj.name, arguments: args };
|
|
210
|
+
}
|
|
211
|
+
if (typeof obj.tool === 'string' && obj.args !== undefined) {
|
|
212
|
+
return { name: obj.tool, arguments: obj.args };
|
|
213
|
+
}
|
|
214
|
+
// Handle {action, ...} shorthand the model sometimes emits
|
|
215
|
+
if (typeof obj.action === 'string') {
|
|
216
|
+
const action = obj.action.toLowerCase().replace(/[ -]/g, '_');
|
|
217
|
+
// Map common action names to tool names
|
|
218
|
+
const toolName = action === 'write' ? 'write_file'
|
|
219
|
+
: action === 'read' ? 'read_file'
|
|
220
|
+
: action === 'list' ? 'list_directory'
|
|
221
|
+
: action === 'run' || action === 'execute' || action === 'exec' ? 'bash'
|
|
222
|
+
: action; // use as-is (write_file, bash, read_file, etc.)
|
|
223
|
+
const args = {};
|
|
224
|
+
if (obj.path !== undefined) args.path = obj.path;
|
|
225
|
+
if (obj.content !== undefined) args.content = obj.content;
|
|
226
|
+
if (obj.command !== undefined) args.command = obj.command;
|
|
227
|
+
if (obj.url !== undefined) args.url = obj.url;
|
|
228
|
+
if (obj.target !== undefined) args.target = obj.target;
|
|
229
|
+
if (Object.keys(args).length > 0) return { name: toolName, arguments: args };
|
|
230
|
+
}
|
|
231
|
+
} catch {}
|
|
112
232
|
return null;
|
|
113
233
|
};
|
|
114
234
|
|
|
115
|
-
//
|
|
116
|
-
|
|
117
|
-
const obj = JSON.parse(trimmed);
|
|
118
|
-
if (Array.isArray(obj)) {
|
|
119
|
-
const calls = obj.map(normalise);
|
|
120
|
-
if (calls.every(Boolean)) return calls;
|
|
121
|
-
return null;
|
|
122
|
-
}
|
|
123
|
-
const call = normalise(obj);
|
|
124
|
-
if (call) return [call];
|
|
125
|
-
return null;
|
|
126
|
-
} catch {}
|
|
127
|
-
|
|
128
|
-
// Try extracting multiple top-level JSON objects (separated by newlines/whitespace)
|
|
235
|
+
// Extract all JSON objects that start at the beginning of a line
|
|
236
|
+
// This handles both pure-JSON responses and "narration\n{json}" mixed responses
|
|
129
237
|
const calls = [];
|
|
238
|
+
const lines = trimmed.split('\n');
|
|
130
239
|
let i = 0;
|
|
131
|
-
while (i <
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
240
|
+
while (i < lines.length) {
|
|
241
|
+
const line = lines[i].trim();
|
|
242
|
+
if (line.startsWith('{') || line.startsWith('[')) {
|
|
243
|
+
// Accumulate lines until we have a complete JSON object (handles multi-line JSON)
|
|
244
|
+
// Skips { } [ ] inside JSON strings so CSS/HTML brace counts don't confuse the parser.
|
|
245
|
+
let jsonStr = '';
|
|
246
|
+
let depth = 0;
|
|
247
|
+
while (i < lines.length) {
|
|
248
|
+
const l = lines[i];
|
|
249
|
+
jsonStr += (jsonStr ? '\n' : '') + l;
|
|
250
|
+
let inString = false, escape = false;
|
|
251
|
+
for (const ch of l) {
|
|
252
|
+
if (escape) { escape = false; continue; }
|
|
253
|
+
if (ch === '\\' && inString) { escape = true; continue; }
|
|
254
|
+
if (ch === '"') { inString = !inString; continue; }
|
|
255
|
+
if (!inString) {
|
|
256
|
+
if (ch === '{' || ch === '[') depth++;
|
|
257
|
+
else if (ch === '}' || ch === ']') depth--;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
i++;
|
|
261
|
+
if (depth === 0 && jsonStr.trim()) break;
|
|
262
|
+
}
|
|
263
|
+
try {
|
|
264
|
+
const obj = JSON.parse(jsonStr.trim());
|
|
265
|
+
if (Array.isArray(obj)) {
|
|
266
|
+
for (const item of obj) {
|
|
267
|
+
const call = normalise(item);
|
|
268
|
+
if (call) calls.push(call);
|
|
269
|
+
}
|
|
270
|
+
} else {
|
|
271
|
+
const call = normalise(obj);
|
|
272
|
+
if (call) calls.push(call);
|
|
273
|
+
}
|
|
274
|
+
} catch {}
|
|
275
|
+
} else {
|
|
276
|
+
i++;
|
|
142
277
|
}
|
|
143
|
-
try {
|
|
144
|
-
const obj = JSON.parse(trimmed.slice(i, j));
|
|
145
|
-
const call = normalise(obj);
|
|
146
|
-
if (!call) return null;
|
|
147
|
-
calls.push(call);
|
|
148
|
-
i = j;
|
|
149
|
-
} catch { return null; }
|
|
150
278
|
}
|
|
151
279
|
return calls.length > 0 ? calls : null;
|
|
152
280
|
}
|
|
@@ -206,13 +334,13 @@ export class OllamaAgent extends EventEmitter {
|
|
|
206
334
|
return { agentId, workDir };
|
|
207
335
|
}
|
|
208
336
|
|
|
209
|
-
async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null) {
|
|
337
|
+
async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
|
|
210
338
|
const startTime = Date.now();
|
|
211
339
|
const controller = new AbortController();
|
|
212
340
|
|
|
213
341
|
// Use per-agent model override if provided (and not the placeholder 'Default').
|
|
214
|
-
// Strip 'ollama/' prefix — catalog returns IDs like 'ollama/
|
|
215
|
-
// Ollama's API expects bare names like '
|
|
342
|
+
// Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
|
|
343
|
+
// Ollama's API expects bare names like 'modelname:tag'.
|
|
216
344
|
const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
|
|
217
345
|
const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
|
|
218
346
|
|
|
@@ -224,82 +352,153 @@ export class OllamaAgent extends EventEmitter {
|
|
|
224
352
|
console.log(` Task: ${task}`);
|
|
225
353
|
console.log(` Working dir: ${workDir}`);
|
|
226
354
|
|
|
227
|
-
// Detect model capabilities
|
|
228
|
-
const isQwen3 = effectiveModel.startsWith('qwen3');
|
|
229
|
-
const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(effectiveModel);
|
|
230
|
-
|
|
231
355
|
try {
|
|
232
|
-
// Load conversation history
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
356
|
+
// Load conversation history — prefer Railway DB history (sent via task payload, works across
|
|
357
|
+
// any machine/user/model). Fall back to local file for offline or pre-fix sessions.
|
|
358
|
+
const history = (conversationHistory && conversationHistory.length > 0)
|
|
359
|
+
? conversationHistory.slice(-20)
|
|
360
|
+
: this._loadHistory(agentId, workDir, sessionId);
|
|
361
|
+
|
|
362
|
+
// Text-based tool format is used rather than XML schemas — more reliable across models.
|
|
363
|
+
// Use flow's custom system prompt if provided, otherwise fall back to built-in default.
|
|
364
|
+
// ALL models get the same rule set and tool format — no model-specific branching.
|
|
365
|
+
const homeDir = process.env.HOME || '/tmp';
|
|
366
|
+
const projectsDir = `${homeDir}/Desktop/Projects`;
|
|
367
|
+
const universalRules = `
|
|
368
|
+
== WHAT YOU CAN DO ==
|
|
369
|
+
You have these tools:
|
|
370
|
+
|
|
371
|
+
bash: Run any shell command — file ops, servers, packages, logs, system queries.
|
|
372
|
+
read_file: Read a local file.
|
|
373
|
+
WRITE_FILE: Write a local file (code-fence format only).
|
|
374
|
+
list_directory: List a local directory.
|
|
375
|
+
web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
|
|
376
|
+
screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
|
|
377
|
+
browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
|
|
378
|
+
|
|
379
|
+
BROWSER TOOL — use this instead of writing CDP scripts:
|
|
380
|
+
{"name":"browser","arguments":{"action":"tabs"}} ← list ALL open tabs with URLs (DO THIS FIRST)
|
|
381
|
+
{"name":"browser","arguments":{"action":"snapshot"}} ← read current page content + interactive elements (also shows all tabs)
|
|
382
|
+
{"name":"browser","arguments":{"action":"navigate","url":"https://..."}} ← go to URL
|
|
383
|
+
{"name":"browser","arguments":{"action":"focus","url":"expireddomains"}} ← switch to a tab by URL fragment
|
|
384
|
+
{"name":"browser","arguments":{"action":"click","ref":3}} ← click element by index from snapshot
|
|
385
|
+
{"name":"browser","arguments":{"action":"click","text":"Show Filter"}} ← click element by visible text
|
|
386
|
+
{"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}} ← click by CSS selector
|
|
387
|
+
{"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
|
|
388
|
+
{"name":"browser","arguments":{"action":"screenshot"}} ← take screenshot
|
|
389
|
+
{"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
|
|
390
|
+
{"name":"browser","arguments":{"action":"scroll","y":400}} ← scroll down
|
|
391
|
+
|
|
392
|
+
WORKFLOW when user says "the tab is already open":
|
|
393
|
+
1. browser tabs → see ALL open tabs and their URLs
|
|
394
|
+
2. browser focus with the URL fragment of the tab you need (e.g. "expireddomains")
|
|
395
|
+
3. browser snapshot → read page content and get element indices
|
|
396
|
+
4. browser click to interact (by ref index, by text, or by selector)
|
|
397
|
+
5. browser snapshot again to see result
|
|
398
|
+
The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
|
|
399
|
+
|
|
400
|
+
== GENERAL RULES (all tasks) ==
|
|
401
|
+
G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
|
|
402
|
+
G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
|
|
403
|
+
G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
|
|
404
|
+
G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
|
|
405
|
+
G5. IF A TOOL FAILS: Try a different approach. web_fetch empty → screenshot_and_describe. Never repeat a failing call identically.
|
|
406
|
+
G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
|
|
407
|
+
G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
|
|
408
|
+
G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
|
|
409
|
+
G9. KEEP GOING until the task is fully complete.
|
|
410
|
+
|
|
411
|
+
== BUILD RULES (only when building apps/games/tools) ==
|
|
412
|
+
B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores).
|
|
413
|
+
B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
|
|
414
|
+
B3. BUILD FILE BY FILE — write each file completely before writing the next.
|
|
415
|
+
B4. ALWAYS use absolute paths.
|
|
416
|
+
B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
|
|
417
|
+
B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
|
|
418
|
+
B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT — if 000, check /tmp/server.log and fix the error.
|
|
419
|
+
B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
|
|
420
|
+
B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
|
|
421
|
+
B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
|
|
422
|
+
B11. ALWAYS open the finished app: bash open http://localhost:PORT
|
|
423
|
+
B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
|
|
424
|
+
B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
|
|
425
|
+
B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
|
|
426
|
+
B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
|
|
427
|
+
B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
|
|
428
|
+
// Text-based tool format works reliably across all local models.
|
|
429
|
+
// WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
|
|
430
|
+
const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
|
|
431
|
+
const systemPrompt = customSystemPrompt || jsonToolFormat;
|
|
263
432
|
|
|
264
433
|
const messages = [
|
|
265
434
|
{ role: 'system', content: systemPrompt },
|
|
266
435
|
...history,
|
|
267
436
|
];
|
|
268
437
|
|
|
269
|
-
// Attach initial image
|
|
438
|
+
// Attach initial image if provided — always include it; models that don't support
|
|
439
|
+
// images will ignore the field, and if they error we catch it below.
|
|
270
440
|
const userMessage = { role: 'user', content: task };
|
|
271
|
-
if (image
|
|
441
|
+
if (image) {
|
|
272
442
|
const base64 = image.replace(/^data:image\/\w+;base64,/, '');
|
|
273
443
|
userMessage.images = [base64];
|
|
274
444
|
}
|
|
275
445
|
messages.push(userMessage);
|
|
276
446
|
|
|
447
|
+
// Force-unload any currently loaded model so it reloads with our num_ctx setting.
|
|
448
|
+
// Model-agnostic and machine-agnostic — guarantees 32K context on every task.
|
|
449
|
+
try {
|
|
450
|
+
await fetch(`${this.baseUrl}/api/generate`, {
|
|
451
|
+
method: 'POST', signal: controller.signal,
|
|
452
|
+
headers: { 'Content-Type': 'application/json' },
|
|
453
|
+
body: JSON.stringify({ model: effectiveModel, keep_alive: 0, prompt: '' })
|
|
454
|
+
});
|
|
455
|
+
} catch { /* ignore — model may not be loaded yet */ }
|
|
456
|
+
|
|
277
457
|
let finalContent = '';
|
|
278
458
|
let allOutput = ''; // accumulate everything streamed across all turns
|
|
279
459
|
const toolsUsed = []; // track tool names called (for fallback summary)
|
|
280
|
-
|
|
460
|
+
// No hard turn limit — agent runs until done, loop-detected, or wall-clock timeout.
|
|
461
|
+
const recentCalls = []; // last N tool calls for loop detection
|
|
462
|
+
let emptyRetries = 0; // consecutive empty-response retries
|
|
281
463
|
|
|
282
|
-
for (let turn = 0;
|
|
464
|
+
for (let turn = 0; ; turn++) {
|
|
283
465
|
if (controller.signal.aborted) break;
|
|
284
466
|
|
|
285
467
|
this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
|
|
286
468
|
|
|
469
|
+
// All local Ollama models use the native /api/chat endpoint.
|
|
470
|
+
// The OpenAI-compatible /v1/chat/completions endpoint ignores options.num_ctx,
|
|
471
|
+
// causing all models to run at 4096-token context regardless of what we pass.
|
|
472
|
+
const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
|
|
473
|
+
const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
|
|
474
|
+
|
|
287
475
|
let response;
|
|
288
476
|
try {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
//
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
477
|
+
|
|
478
|
+
let requestBody;
|
|
479
|
+
let endpoint;
|
|
480
|
+
|
|
481
|
+
if (useNativeEndpoint) {
|
|
482
|
+
// Ollama native format — supports think:false at top level
|
|
483
|
+
endpoint = `${this.baseUrl}/api/chat`;
|
|
484
|
+
requestBody = {
|
|
485
|
+
model: effectiveModel,
|
|
486
|
+
messages,
|
|
487
|
+
stream: true,
|
|
488
|
+
think: false, // top-level think disable — WORKS on native endpoint
|
|
489
|
+
options: { num_ctx: 32768 },
|
|
490
|
+
};
|
|
491
|
+
} else {
|
|
492
|
+
endpoint = `${this.baseUrl}/v1/chat/completions`;
|
|
493
|
+
requestBody = {
|
|
494
|
+
model: effectiveModel,
|
|
495
|
+
messages,
|
|
496
|
+
stream: true,
|
|
497
|
+
options: { num_ctx: 32768 },
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
response = await fetch(endpoint, {
|
|
303
502
|
method: 'POST',
|
|
304
503
|
headers: { 'Content-Type': 'application/json' },
|
|
305
504
|
signal: controller.signal,
|
|
@@ -315,21 +514,30 @@ export class OllamaAgent extends EventEmitter {
|
|
|
315
514
|
throw new Error(`Local model error ${response.status}: ${body}`);
|
|
316
515
|
}
|
|
317
516
|
|
|
318
|
-
// ── Stream the
|
|
319
|
-
//
|
|
320
|
-
//
|
|
321
|
-
//
|
|
322
|
-
|
|
517
|
+
// ── Stream the response ──
|
|
518
|
+
// Two formats:
|
|
519
|
+
// Ollama native (/api/chat): NDJSON lines — {"message":{"content":"..."},"done":false}
|
|
520
|
+
// OpenAI-compatible (/v1/...): SSE lines — data: {"choices":[{"delta":{"content":"..."}}]}
|
|
521
|
+
// Models may emit <tool_call>...</tool_call> or <think>...</think> blocks in text content.
|
|
522
|
+
// Stream text live to user; suppress think blocks and raw JSON tool call blobs.
|
|
523
|
+
let streamContent = ''; // full accumulated text (including any tool_call/think blocks)
|
|
323
524
|
let visibleContent = ''; // text emitted live to user (no tool_call or think blocks)
|
|
324
|
-
let streamToolCalls = {}; // OpenAI-format tool calls
|
|
525
|
+
let streamToolCalls = {}; // OpenAI-format tool calls from native tool_calls field
|
|
325
526
|
let inThinkBlock = false;
|
|
326
527
|
let inToolCallBlock = false; // inside <tool_call>...</tool_call>
|
|
528
|
+
let inJsonBlob = false; // inside bare JSON tool call — suppress from streaming
|
|
529
|
+
let inFenceBlock = false; // inside WRITE_FILE code fence — suppress content from streaming
|
|
530
|
+
let fenceDepth = 0; // ``` count since last WRITE_FILE (even=closed, odd=open)
|
|
327
531
|
let rawTokenCount = 0;
|
|
532
|
+
let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
|
|
328
533
|
|
|
329
534
|
const reader = response.body.getReader();
|
|
330
535
|
const decoder = new TextDecoder();
|
|
331
536
|
let buf = '';
|
|
332
537
|
|
|
538
|
+
// No timeouts — local model can take as long as it needs on any turn.
|
|
539
|
+
// Only the user abort (controller.signal) or stream end stops a turn.
|
|
540
|
+
let turnRetry = false;
|
|
333
541
|
while (true) {
|
|
334
542
|
if (controller.signal.aborted) break;
|
|
335
543
|
const { done, value } = await reader.read();
|
|
@@ -340,33 +548,47 @@ export class OllamaAgent extends EventEmitter {
|
|
|
340
548
|
buf = lines.pop();
|
|
341
549
|
|
|
342
550
|
for (const line of lines) {
|
|
343
|
-
if (!line.
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
551
|
+
if (!line.trim()) continue;
|
|
552
|
+
|
|
553
|
+
let tokenText = null;
|
|
554
|
+
|
|
555
|
+
if (useNativeEndpoint) {
|
|
556
|
+
// Ollama native NDJSON format
|
|
557
|
+
let nativeEvt;
|
|
558
|
+
try { nativeEvt = JSON.parse(line); } catch { continue; }
|
|
559
|
+
if (nativeEvt.done) continue;
|
|
560
|
+
tokenText = nativeEvt.message?.content ?? null;
|
|
561
|
+
} else {
|
|
562
|
+
// OpenAI SSE format
|
|
563
|
+
if (!line.startsWith('data: ')) continue;
|
|
564
|
+
const payload = line.slice(6).trim();
|
|
565
|
+
if (payload === '[DONE]') continue;
|
|
566
|
+
let evt;
|
|
567
|
+
try { evt = JSON.parse(payload); } catch { continue; }
|
|
568
|
+
|
|
569
|
+
const delta = evt.choices?.[0]?.delta;
|
|
570
|
+
if (!delta) continue;
|
|
571
|
+
|
|
572
|
+
// Standard OpenAI tool_calls from native tool_calls field
|
|
573
|
+
if (delta.tool_calls) {
|
|
574
|
+
for (const tc of delta.tool_calls) {
|
|
575
|
+
const idx = tc.index ?? 0;
|
|
576
|
+
if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
|
|
577
|
+
if (tc.id) streamToolCalls[idx].id = tc.id;
|
|
578
|
+
if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
|
|
579
|
+
if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
|
|
580
|
+
}
|
|
360
581
|
}
|
|
582
|
+
tokenText = delta.content ?? null;
|
|
361
583
|
}
|
|
362
584
|
|
|
363
|
-
if (
|
|
585
|
+
if (tokenText === null) continue;
|
|
364
586
|
rawTokenCount++;
|
|
365
|
-
streamContent +=
|
|
587
|
+
streamContent += tokenText;
|
|
366
588
|
|
|
367
589
|
// Process token through think + tool_call filters, emit visible text live
|
|
368
590
|
// We scan only the new delta token against the current buffer state
|
|
369
|
-
const chunk =
|
|
591
|
+
const chunk = tokenText;
|
|
370
592
|
let visible = '';
|
|
371
593
|
// Simple per-token state machine — handles split tags across tokens by tracking state flags
|
|
372
594
|
if (!inThinkBlock && !inToolCallBlock) {
|
|
@@ -392,9 +614,59 @@ export class OllamaAgent extends EventEmitter {
|
|
|
392
614
|
inToolCallBlock = false;
|
|
393
615
|
}
|
|
394
616
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
617
|
+
// Scan ALL lines completed in this token for state transitions.
|
|
618
|
+
// Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
|
|
619
|
+
if (tokenText.includes('\n')) {
|
|
620
|
+
const tokenStartIdx = streamContent.length - tokenText.length;
|
|
621
|
+
let nlIdx = streamContent.indexOf('\n', tokenStartIdx);
|
|
622
|
+
while (nlIdx !== -1) {
|
|
623
|
+
const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
|
|
624
|
+
const line = streamContent.slice(lineStart, nlIdx).trim();
|
|
625
|
+
if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
|
|
626
|
+
inFenceBlock = true; fenceDepth = 0;
|
|
627
|
+
} else if (inFenceBlock && /^```/.test(line)) {
|
|
628
|
+
fenceDepth++;
|
|
629
|
+
if (fenceDepth >= 2 && fenceDepth % 2 === 0) inFenceBlock = false;
|
|
630
|
+
} else if (!inFenceBlock && !inJsonBlob && line.length > 1 && (line.startsWith('{') || line.startsWith('['))) {
|
|
631
|
+
inJsonBlob = true;
|
|
632
|
+
}
|
|
633
|
+
nlIdx = streamContent.indexOf('\n', nlIdx + 1);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Also check current partial line (mid-token, before next \n)
|
|
638
|
+
if (!inFenceBlock || !inJsonBlob) {
|
|
639
|
+
const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
|
|
640
|
+
const lastNL = cleanSC.lastIndexOf('\n');
|
|
641
|
+
const curLine = cleanSC.slice(lastNL + 1).trimStart();
|
|
642
|
+
if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
|
|
643
|
+
inFenceBlock = true; fenceDepth = 0;
|
|
644
|
+
}
|
|
645
|
+
if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
|
|
646
|
+
inJsonBlob = true;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// Emit visible content — safety filter removes any ``` or WRITE_FILE lines
|
|
651
|
+
// that slipped through (e.g. partial token at detection boundary)
|
|
652
|
+
if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
|
|
653
|
+
const safe = visible.split('\n').filter(ln => {
|
|
654
|
+
const t = ln.trimStart();
|
|
655
|
+
return !t.startsWith('```') && !/^(WRITE_FILE|write_file)/i.test(t);
|
|
656
|
+
}).join('\n');
|
|
657
|
+
if (safe.trim() || safe.includes('\n')) {
|
|
658
|
+
visibleContent += safe;
|
|
659
|
+
lastVisibleAt = Date.now();
|
|
660
|
+
this.emit('agent_output', { agentId, output: safe, isChunk: true });
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// Thinking timeout: if the model has been in a <think> block for >90s with no visible output,
|
|
665
|
+
// abort the stream so we can retry with a kick. Prevents infinite thinking loops.
|
|
666
|
+
if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
|
|
667
|
+
console.log(` [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
|
|
668
|
+
reader.cancel().catch(() => {});
|
|
669
|
+
break;
|
|
398
670
|
}
|
|
399
671
|
}
|
|
400
672
|
}
|
|
@@ -403,17 +675,35 @@ export class OllamaAgent extends EventEmitter {
|
|
|
403
675
|
if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
|
|
404
676
|
|
|
405
677
|
// ── Extract tool calls from content ───────────────────────────────────
|
|
406
|
-
//
|
|
407
|
-
//
|
|
678
|
+
// Try <tool_call> XML tags first (some models emit this format), then fall through
|
|
679
|
+
// to code-fence and JSON text parsers.
|
|
408
680
|
let parsedTagCalls = null;
|
|
409
|
-
if (
|
|
681
|
+
if (Object.keys(streamToolCalls).length === 0) {
|
|
410
682
|
parsedTagCalls = _parseToolCallTags(streamContent);
|
|
411
683
|
if (parsedTagCalls) {
|
|
412
684
|
console.log(` [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
|
|
413
685
|
}
|
|
414
686
|
}
|
|
415
687
|
|
|
416
|
-
// Fallback: try
|
|
688
|
+
// Fallback 1: try WRITE_FILE code-fence format (avoids JSON-escaping issues with code)
|
|
689
|
+
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
690
|
+
const fenceCalls = _parseWriteFileFences(streamContent);
|
|
691
|
+
if (fenceCalls) {
|
|
692
|
+
console.log(` [${agentId}] 🔍 ${fenceCalls.length} WRITE_FILE fence(s) detected`);
|
|
693
|
+
parsedTagCalls = fenceCalls;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Fallback 2: "Writing filename...\n```\ncontent\n```" (model ignored WRITE_FILE instruction)
|
|
698
|
+
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
699
|
+
const writingCalls = _parseWritingFallback(streamContent, workDir);
|
|
700
|
+
if (writingCalls) {
|
|
701
|
+
console.log(` [${agentId}] 🔍 ${writingCalls.length} Writing-block fallback file(s) detected`);
|
|
702
|
+
parsedTagCalls = writingCalls;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// Fallback 3: try legacy JSON-blob detection if no tags found
|
|
417
707
|
if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
|
|
418
708
|
const textCalls = _parseTextToolCalls(streamContent);
|
|
419
709
|
if (textCalls) {
|
|
@@ -422,9 +712,35 @@ export class OllamaAgent extends EventEmitter {
|
|
|
422
712
|
}
|
|
423
713
|
}
|
|
424
714
|
|
|
425
|
-
//
|
|
715
|
+
// Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
|
|
716
|
+
// merge them so files get written AND bash runs
|
|
717
|
+
if (parsedTagCalls && streamContent) {
|
|
718
|
+
const writingCalls = _parseWritingFallback(streamContent, workDir);
|
|
719
|
+
if (writingCalls) {
|
|
720
|
+
const existingPaths = new Set(parsedTagCalls.filter(c => c.name === 'write_file').map(c => c.arguments.path));
|
|
721
|
+
const newWrites = writingCalls.filter(c => !existingPaths.has(c.arguments.path));
|
|
722
|
+
if (newWrites.length > 0) {
|
|
723
|
+
console.log(` [${agentId}] 🔍 +${newWrites.length} additional Writing-block file(s) merged`);
|
|
724
|
+
// Prepend file writes before bash commands so files exist before server starts
|
|
725
|
+
parsedTagCalls = [...newWrites, ...parsedTagCalls];
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Convert tag/text calls into streamToolCalls structure.
|
|
731
|
+
// Deduplicate: if model emits the same tool call N times in one stream, only run it once.
|
|
426
732
|
if (parsedTagCalls) {
|
|
427
|
-
|
|
733
|
+
const seen = new Set();
|
|
734
|
+
const deduped = parsedTagCalls.filter(tc => {
|
|
735
|
+
const key = `${tc.name}:${JSON.stringify(tc.arguments)}`;
|
|
736
|
+
if (seen.has(key)) return false;
|
|
737
|
+
seen.add(key);
|
|
738
|
+
return true;
|
|
739
|
+
});
|
|
740
|
+
if (deduped.length < parsedTagCalls.length) {
|
|
741
|
+
console.log(` [${agentId}] 🔁 Deduplicated ${parsedTagCalls.length} → ${deduped.length} tool call(s)`);
|
|
742
|
+
}
|
|
743
|
+
deduped.forEach((tc, i) => {
|
|
428
744
|
streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
|
|
429
745
|
});
|
|
430
746
|
// Don't accumulate raw tool_call XML as user-visible output
|
|
@@ -439,17 +755,17 @@ export class OllamaAgent extends EventEmitter {
|
|
|
439
755
|
});
|
|
440
756
|
|
|
441
757
|
// ── Push assistant message ────────────────────────────────────────────
|
|
758
|
+
// All local models now use JSON-in-text format on the native endpoint.
|
|
759
|
+
// Strip <think>...</think> blocks to avoid burning context on reasoning traces.
|
|
442
760
|
const toolCallsArray = Object.values(streamToolCalls);
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
});
|
|
452
|
-
}
|
|
761
|
+
const hasToolCalls = toolCallsArray.length > 0;
|
|
762
|
+
const cleanedContent = (streamContent || '')
|
|
763
|
+
.replace(/<think>[\s\S]*?<\/think>/g, '')
|
|
764
|
+
.trim();
|
|
765
|
+
messages.push({ role: 'assistant', content: cleanedContent || '' });
|
|
766
|
+
|
|
767
|
+
// Incremental save — always, regardless of sessionId (sessionId is null for OllamaAgent)
|
|
768
|
+
this._saveHistory(agentId, workDir, sessionId, messages.slice(1));
|
|
453
769
|
|
|
454
770
|
// ── Execute tool calls ────────────────────────────────────────────────
|
|
455
771
|
if (toolCallsArray.length > 0) {
|
|
@@ -461,14 +777,95 @@ export class OllamaAgent extends EventEmitter {
|
|
|
461
777
|
try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
|
|
462
778
|
catch { parsedArgs = {}; }
|
|
463
779
|
|
|
780
|
+
// ── Unknown tool name detection ──────────────────────────────────
|
|
781
|
+
// Block calls to tools that don't exist (e.g. model writes {"name":"curl",...}
|
|
782
|
+
// instead of {"name":"bash","arguments":{"command":"curl ..."}})
|
|
783
|
+
const VALID_TOOL_NAMES = new Set(['bash','read_file','write_file','list_directory','web_fetch','screenshot_and_describe','take_screenshot','browser']);
|
|
784
|
+
if (!VALID_TOOL_NAMES.has(name.toLowerCase())) {
|
|
785
|
+
console.log(` [${agentId}] ⚠️ Unknown tool "${name}" — blocked`);
|
|
786
|
+
messages.push({ role: 'user', content: `"${name}" is not a valid tool. Valid tools: bash, read_file, write_file, list_directory, web_fetch, screenshot_and_describe. To run a shell command use bash: {"name":"bash","arguments":{"command":"${name} ..."}}.` });
|
|
787
|
+
continue;
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// ── Placeholder detection ────────────────────────────────────────
|
|
791
|
+
// Block tool calls where the agent passed a literal placeholder like
|
|
792
|
+
// "[The URL where the auction is being viewed]" instead of a real value.
|
|
793
|
+
// These come from the model reading its own planning text and mistaking it
|
|
794
|
+
// for a concrete argument.
|
|
795
|
+
{
|
|
796
|
+
const argStr = JSON.stringify(parsedArgs);
|
|
797
|
+
const hasPlaceholder = /\[(the |this |your |a |an |current )?(url|path|address|link|tab|page|site|location|file|directory)[^\]]*\]/i.test(argStr);
|
|
798
|
+
if (hasPlaceholder) {
|
|
799
|
+
console.log(` [${agentId}] ⚠️ Placeholder in args — blocked: ${argStr.slice(0, 120)}`);
|
|
800
|
+
messages.push({ role: 'user', content: `Tool call BLOCKED: your argument contains a placeholder "${argStr.slice(0, 100)}" — that is NOT a real URL or path. Look at the tool results already in the conversation (e.g. the curl localhost:9223/json output) and use the actual URL you found there.` });
|
|
801
|
+
continue;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
464
805
|
this.emit('tool_activity', {
|
|
465
806
|
agentId, event: 'tool_start', tool: name,
|
|
466
807
|
description: this._toolDesc(name, parsedArgs)
|
|
467
808
|
});
|
|
468
809
|
console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
|
|
469
810
|
toolsUsed.push(name);
|
|
811
|
+
emptyRetries = 0; // reset on successful tool call
|
|
812
|
+
|
|
813
|
+
// Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
|
|
814
|
+
// Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
|
|
815
|
+
// "sleep 10 && curl ...URL" both map to the same key "curl:URL".
|
|
816
|
+
let callKey = `${name}:${JSON.stringify(parsedArgs)}`;
|
|
817
|
+
if (name === 'bash' && parsedArgs.command) {
|
|
818
|
+
const curlMatch = parsedArgs.command.match(/curl\s+.*?(https?:\/\/\S+|localhost:\d+)/);
|
|
819
|
+
if (curlMatch) callKey = `curl:${curlMatch[1]}`;
|
|
820
|
+
}
|
|
821
|
+
recentCalls.push(callKey);
|
|
822
|
+
if (recentCalls.length > 6) recentCalls.shift();
|
|
823
|
+
|
|
824
|
+
// Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
|
|
825
|
+
const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
|
|
826
|
+
const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
|
|
827
|
+
// Detect: alternating A,B,A,B pattern (last 4 calls)
|
|
828
|
+
const last4 = recentCalls.slice(-4);
|
|
829
|
+
const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
|
|
830
|
+
// Detect: A,B,C,A,B,C pattern (last 6)
|
|
831
|
+
const last6 = recentCalls.slice(-6);
|
|
832
|
+
const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
|
|
833
|
+
|
|
834
|
+
if (last3Same || abab || abcabc) {
|
|
835
|
+
const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
|
|
836
|
+
console.log(` [${agentId}] 🔁 Loop detected (${pattern}) — injecting fix hint`);
|
|
837
|
+
// Generate a context-aware hint based on what's looping
|
|
838
|
+
let loopFixMsg = `You are repeating the same action — STOP looping. Observe first, then act.\n`;
|
|
839
|
+
const loopCmd = parsedArgs.command || parsedArgs.path || '';
|
|
840
|
+
const noThink = '';
|
|
841
|
+
if (name === 'write_file') {
|
|
842
|
+
loopFixMsg += `You keep rewriting the same file. The file already exists with your previous code. Do NOT rewrite it from scratch.\nInstead:\n1. call screenshot_and_describe to SEE what the app looks like right now\n2. Identify the specific thing that is wrong or missing\n3. read_file the file to see current content\n4. Make a TARGETED edit — change only the specific broken section\nNever rewrite an entire file when the server is already running.`;
|
|
843
|
+
} else if (loopCmd.includes('mkdir') || loopCmd.includes('client')) {
|
|
844
|
+
loopFixMsg += `Files/folders already exist. STOP creating them. Call screenshot_and_describe to see the current state of the app, then identify what specifically needs to be improved and fix it with targeted edits.`;
|
|
845
|
+
} else if (loopCmd.includes('open http')) {
|
|
846
|
+
const openPortMatch = loopCmd.match(/:(\d+)/);
|
|
847
|
+
const openPort = openPortMatch ? openPortMatch[1] : '????';
|
|
848
|
+
loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
|
|
849
|
+
} else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
|
|
850
|
+
loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
|
|
851
|
+
} else if (loopCmd.includes('npm install')) {
|
|
852
|
+
loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
|
|
853
|
+
} else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
|
|
854
|
+
loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n // Click element containing the text you need (change "Filter" to what you see on the page)\n ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
|
|
855
|
+
} else if (name === 'screenshot_and_describe') {
|
|
856
|
+
const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
|
|
857
|
+
loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedback — answer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved — make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
|
|
858
|
+
} else {
|
|
859
|
+
loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
|
|
860
|
+
}
|
|
861
|
+
loopFixMsg += noThink;
|
|
862
|
+
messages.push({ role: 'user', content: loopFixMsg });
|
|
863
|
+
// Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
|
|
864
|
+
recentCalls.splice(0, recentCalls.length - 1);
|
|
865
|
+
break; // break inner tool loop, let model respond to hint
|
|
866
|
+
}
|
|
470
867
|
|
|
471
|
-
const result = await this._executeTool(name, parsedArgs, workDir);
|
|
868
|
+
const result = await this._executeTool(name, parsedArgs, workDir, agentId);
|
|
472
869
|
|
|
473
870
|
this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
|
|
474
871
|
|
|
@@ -477,30 +874,107 @@ export class OllamaAgent extends EventEmitter {
|
|
|
477
874
|
this.emit('agent_image', { agentId, image: result });
|
|
478
875
|
}
|
|
479
876
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
877
|
+
// ALL models get tool results fed back — no model should run blind.
|
|
878
|
+
// This is the core of the observe → reason → act loop: every tool result
|
|
879
|
+
// must be in context so the model can see what happened and react correctly.
|
|
880
|
+
{
|
|
881
|
+
const noThink = '';
|
|
882
|
+
if (isImageResult) {
|
|
483
883
|
const base64 = result.replace(/^data:image\/\w+;base64,/, '');
|
|
484
|
-
messages.push({ role: 'user', content:
|
|
884
|
+
messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
|
|
485
885
|
} else {
|
|
486
|
-
const resultText = isImageResult ? '[Screenshot captured
|
|
487
|
-
messages.push({ role: 'user', content:
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
886
|
+
const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
|
|
887
|
+
messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
|
|
888
|
+
|
|
889
|
+
if (name === 'screenshot_and_describe') {
|
|
890
|
+
const screenshotResult = String(result);
|
|
891
|
+
const isLocalhost = (parsedArgs.url || '').includes('localhost') || (parsedArgs.url || '').includes('127.0.0.1');
|
|
892
|
+
// Server unreachable on localhost — force bash restart (only for local servers, not public URLs)
|
|
893
|
+
if (screenshotResult.includes('SERVER IS NOT REACHABLE') && isLocalhost) {
|
|
894
|
+
const portMatch = (parsedArgs.url || '').match(/:(\d+)/);
|
|
895
|
+
const port = portMatch ? portMatch[1] : '????';
|
|
896
|
+
messages.push({ role: 'user', content: `The local server on port ${port} is not running. Restart it with bash — find the project directory, then: pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd /path/to/project && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${port}` });
|
|
897
|
+
}
|
|
898
|
+
// Public URL unreachable — try web_fetch instead
|
|
899
|
+
else if (screenshotResult.includes('SERVER IS NOT REACHABLE') && !isLocalhost) {
|
|
900
|
+
messages.push({ role: 'user', content: `screenshot_and_describe could not reach ${parsedArgs.url}. Try web_fetch instead:\n{"name":"web_fetch","arguments":{"url":"${parsedArgs.url}"}}` });
|
|
901
|
+
}
|
|
902
|
+
// Dependency audit issues — prevent port-hopping
|
|
903
|
+
else if (screenshotResult.includes('DEPENDENCY AUDIT FOUND ISSUES')) {
|
|
904
|
+
messages.push({ role: 'user', content: `CRITICAL: Missing client-side libraries in your HTML. Do NOT change the port. Fix it: (1) read_file the HTML; (2) add the missing script tags; (3) write_file back; (4) restart server same port; (5) screenshot to verify.` });
|
|
905
|
+
}
|
|
906
|
+
// Successful screenshot of a build task — push to make a code change
|
|
907
|
+
else if (isLocalhost) {
|
|
908
|
+
messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
|
|
909
|
+
}
|
|
910
|
+
// Successful screenshot of a public URL — agent is doing research, let it reason
|
|
911
|
+
}
|
|
912
|
+
// Catch placeholder/hello world pages — force the model to keep building
|
|
913
|
+
const screenshotText = String(result).toLowerCase();
|
|
914
|
+
const isPlaceholder = (
|
|
915
|
+
screenshotText.includes('hello world') ||
|
|
916
|
+
screenshotText.includes('cannot get /') ||
|
|
917
|
+
(screenshotText.includes('express') && screenshotText.includes('error')) ||
|
|
918
|
+
screenshotText.includes('placeholder') ||
|
|
919
|
+
screenshotText.includes('coming soon') ||
|
|
920
|
+
(screenshotText.includes('blank') && !screenshotText.includes('not blank'))
|
|
921
|
+
);
|
|
922
|
+
if (isPlaceholder) {
|
|
923
|
+
messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
|
|
924
|
+
}
|
|
497
925
|
}
|
|
498
926
|
}
|
|
499
927
|
}
|
|
500
928
|
continue; // loop back for next model turn
|
|
501
929
|
}
|
|
502
930
|
|
|
503
|
-
// ── No tool calls
|
|
931
|
+
// ── No tool calls ────────────────────────────────────────────────────
|
|
932
|
+
{
|
|
933
|
+
const combined = (visibleContent + streamContent).replace(/<think>[\s\S]*?<\/think>/g, '');
|
|
934
|
+
const hasContent = combined.trim().length > 30;
|
|
935
|
+
const isEmpty = combined.trim().length === 0;
|
|
936
|
+
|
|
937
|
+
// Structural: truncated JSON — model started a tool call but stream ended early
|
|
938
|
+
const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
|
|
939
|
+
if (hasTruncatedJson) {
|
|
940
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
|
|
941
|
+
messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// Structural: empty response — model produced nothing
|
|
946
|
+
if (isEmpty) {
|
|
947
|
+
if (emptyRetries < 3) {
|
|
948
|
+
emptyRetries++;
|
|
949
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: empty response (retry ${emptyRetries}/3) — kicking`);
|
|
950
|
+
messages.push({ role: 'user', content: toolsUsed.length === 0 ? 'Start now — make your first tool call.' : 'You stopped. Make your next tool call.' });
|
|
951
|
+
continue;
|
|
952
|
+
}
|
|
953
|
+
console.log(` [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// Structural: agent hasn't used any tools yet — it must act before it can answer
|
|
957
|
+
if (toolsUsed.length === 0 && hasContent) {
|
|
958
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act`);
|
|
959
|
+
messages.push({ role: 'user', content: 'Make your first tool call now.' });
|
|
960
|
+
continue;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
// Semantic: ask the LLM whether the task is actually complete.
|
|
964
|
+
// This replaces all regex-based intent detection — the model judges its own output.
|
|
965
|
+
if (hasContent && toolsUsed.length > 0) {
|
|
966
|
+
const originalTask = messages.find(m => m.role === 'user')?.content || task;
|
|
967
|
+
const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
|
|
968
|
+
if (!isDone) {
|
|
969
|
+
console.log(` [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
|
|
970
|
+
messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
|
|
971
|
+
continue;
|
|
972
|
+
}
|
|
973
|
+
console.log(` [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
// ── Final answer ──────────────────────────────────────────────────────
|
|
504
978
|
if (visibleContent) finalContent = visibleContent;
|
|
505
979
|
break;
|
|
506
980
|
|
|
@@ -519,7 +993,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
519
993
|
];
|
|
520
994
|
|
|
521
995
|
try {
|
|
522
|
-
const summaryRes = await fetch(`${this.baseUrl}/
|
|
996
|
+
const summaryRes = await fetch(`${this.baseUrl}/api/chat`, {
|
|
523
997
|
method: 'POST',
|
|
524
998
|
headers: { 'Content-Type': 'application/json' },
|
|
525
999
|
signal: controller.signal,
|
|
@@ -527,7 +1001,8 @@ export class OllamaAgent extends EventEmitter {
|
|
|
527
1001
|
model: effectiveModel,
|
|
528
1002
|
messages: summaryMessages,
|
|
529
1003
|
stream: true,
|
|
530
|
-
|
|
1004
|
+
think: false,
|
|
1005
|
+
options: { num_ctx: 32768 }
|
|
531
1006
|
})
|
|
532
1007
|
});
|
|
533
1008
|
|
|
@@ -570,7 +1045,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
570
1045
|
}
|
|
571
1046
|
|
|
572
1047
|
// Persist history for next task
|
|
573
|
-
if (finalContent
|
|
1048
|
+
if (finalContent) {
|
|
574
1049
|
this._saveHistory(agentId, workDir, sessionId, [
|
|
575
1050
|
...history,
|
|
576
1051
|
{ role: 'user', content: task },
|
|
@@ -629,16 +1104,96 @@ export class OllamaAgent extends EventEmitter {
|
|
|
629
1104
|
|
|
630
1105
|
// ─── Tool execution ───────────────────────────────────────────────────────
|
|
631
1106
|
|
|
632
|
-
async _executeTool(name, args, workDir) {
|
|
1107
|
+
async _executeTool(name, args, workDir, agentId = 'agent') {
|
|
633
1108
|
try {
|
|
634
1109
|
switch (name) {
|
|
635
1110
|
case 'bash': {
|
|
1111
|
+
// Block commands that would kill the worker process itself.
|
|
1112
|
+
// "pkill -f node" and "killall node" match the worker's own process.
|
|
1113
|
+
// Rewrite to only kill processes by their specific server log path or port.
|
|
1114
|
+
const cmd = args.command || '';
|
|
1115
|
+
if (/pkill\s+(-\w+\s+)*(-f\s+)?node\b/i.test(cmd) || /killall\s+node\b/i.test(cmd)) {
|
|
1116
|
+
// Safe replacement: kill only the app server on the port, not all node processes
|
|
1117
|
+
const portMatch = cmd.match(/localhost:(\d+)|:(\d+)/);
|
|
1118
|
+
const serverLogMatch = cmd.match(/server\.js/);
|
|
1119
|
+
if (portMatch || serverLogMatch) {
|
|
1120
|
+
const safeCmd = portMatch
|
|
1121
|
+
? `lsof -ti:${portMatch[1] || portMatch[2]} | xargs kill -9 2>/dev/null || true`
|
|
1122
|
+
: `pkill -f "server.js" 2>/dev/null || true`;
|
|
1123
|
+
args = { ...args, command: safeCmd + cmd.slice(cmd.indexOf('&&') !== -1 ? cmd.indexOf('&&') : cmd.length) };
|
|
1124
|
+
} else {
|
|
1125
|
+
// No specific target — skip the pkill entirely, just run what follows &&
|
|
1126
|
+
const afterAnd = cmd.indexOf('&&');
|
|
1127
|
+
if (afterAnd !== -1) {
|
|
1128
|
+
args = { ...args, command: cmd.slice(afterAnd + 2).trim() };
|
|
1129
|
+
} else {
|
|
1130
|
+
return 'Skipped broad pkill to protect worker process. Use: lsof -ti:PORT | xargs kill -9';
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Intercept "open http://..." — navigate the AgentForge CDP browser directly,
|
|
1136
|
+
// then auto-screenshot so the agent immediately sees what it built.
|
|
1137
|
+
const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
|
|
1138
|
+
if (openUrlMatch) {
|
|
1139
|
+
const targetUrl = openUrlMatch[1];
|
|
1140
|
+
let openedViaCDP = false;
|
|
1141
|
+
try {
|
|
1142
|
+
const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
|
|
1143
|
+
const newTabData = await newTabRes.json();
|
|
1144
|
+
const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
|
|
1145
|
+
await new Promise(r => tabWs.on('open', r));
|
|
1146
|
+
await new Promise(r => {
|
|
1147
|
+
let navigated = false;
|
|
1148
|
+
tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
|
|
1149
|
+
tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
|
|
1150
|
+
setTimeout(() => { tabWs.close(); r(); }, 3000);
|
|
1151
|
+
});
|
|
1152
|
+
openedViaCDP = true;
|
|
1153
|
+
} catch {
|
|
1154
|
+
// CDP unavailable — fall through to OS open
|
|
1155
|
+
try { await execAsync(`open "${targetUrl}"`); } catch {}
|
|
1156
|
+
}
|
|
1157
|
+
// Auto-screenshot after opening so the agent sees what it built.
|
|
1158
|
+
// Wait for page to load, then call screenshot_and_describe.
|
|
1159
|
+
await new Promise(r => setTimeout(r, 2500));
|
|
1160
|
+
try {
|
|
1161
|
+
const screenshotResult = await this._executeTool('screenshot_and_describe', {
|
|
1162
|
+
url: targetUrl,
|
|
1163
|
+
check_for: 'the running application',
|
|
1164
|
+
send_to_user: true
|
|
1165
|
+
}, workDir, agentId);
|
|
1166
|
+
return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
|
|
1167
|
+
} catch {
|
|
1168
|
+
return `Opened ${targetUrl} in browser. (Screenshot failed — verify with screenshot_and_describe)`;
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// If workDir doesn't exist (e.g. /tmp was cleared after worker restart),
|
|
1173
|
+
// fall back to HOME rather than failing with ENOENT on every bash call.
|
|
1174
|
+
let bashCwd = workDir;
|
|
1175
|
+
const _home = process.env.HOME || '/tmp';
|
|
1176
|
+
try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
|
|
1177
|
+
// Background commands (ending with &) return no stdout — the model interprets
|
|
1178
|
+
// silence as failure and loops. Run them, then read back any log file to confirm.
|
|
1179
|
+
const isBackground = /&\s*$/.test(args.command.trim());
|
|
636
1180
|
const { stdout, stderr } = await execAsync(args.command, {
|
|
637
|
-
cwd:
|
|
638
|
-
timeout:
|
|
1181
|
+
cwd: bashCwd,
|
|
1182
|
+
timeout: 120000,
|
|
639
1183
|
maxBuffer: 1024 * 1024 * 2 // 2MB
|
|
640
1184
|
});
|
|
641
|
-
|
|
1185
|
+
const out = (stdout + stderr).trim();
|
|
1186
|
+
if (isBackground && !out) {
|
|
1187
|
+
// Give the process a moment to start, then check /tmp/server.log if it exists
|
|
1188
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
1189
|
+
let confirmation = 'Background process started.';
|
|
1190
|
+
try {
|
|
1191
|
+
const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
|
|
1192
|
+
if (logContent) confirmation = `Background process started. Server log:\n${logContent}`;
|
|
1193
|
+
} catch { /* no log yet */ }
|
|
1194
|
+
return confirmation;
|
|
1195
|
+
}
|
|
1196
|
+
return out || '(no output)';
|
|
642
1197
|
}
|
|
643
1198
|
|
|
644
1199
|
case 'read_file': {
|
|
@@ -685,6 +1240,26 @@ export class OllamaAgent extends EventEmitter {
|
|
|
685
1240
|
}
|
|
686
1241
|
}
|
|
687
1242
|
|
|
1243
|
+
case 'screenshot_and_describe': {
|
|
1244
|
+
const result = await this._screenshotAndDescribe(args.url, args.check_for);
|
|
1245
|
+
// Always send screenshot to user — agent called this tool, user should always see it
|
|
1246
|
+
if (this._lastScreenshotData) {
|
|
1247
|
+
this.emit('agent_image', { agentId, image: this._lastScreenshotData });
|
|
1248
|
+
this._lastScreenshotData = null;
|
|
1249
|
+
}
|
|
1250
|
+
return result;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
case 'browser': {
|
|
1254
|
+
const result = await browserAction(args);
|
|
1255
|
+
if (result && result.__screenshot) {
|
|
1256
|
+
const imgData = `data:image/png;base64,${result.base64}`;
|
|
1257
|
+
this.emit('agent_image', { agentId, image: imgData });
|
|
1258
|
+
return `Screenshot taken (${Math.round(result.base64.length * 0.75 / 1024)}KB). Image sent to chat.`;
|
|
1259
|
+
}
|
|
1260
|
+
return typeof result === 'string' ? result : JSON.stringify(result);
|
|
1261
|
+
}
|
|
1262
|
+
|
|
688
1263
|
default:
|
|
689
1264
|
return `Unknown tool: ${name}`;
|
|
690
1265
|
}
|
|
@@ -697,24 +1272,13 @@ export class OllamaAgent extends EventEmitter {
|
|
|
697
1272
|
|
|
698
1273
|
async _cdpScreenshot(navigateUrl, tmpFile) {
|
|
699
1274
|
const CDP_PORT = 9223;
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
const
|
|
704
|
-
const
|
|
705
|
-
const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
|
|
706
|
-
|
|
707
|
-
if (!usable) {
|
|
708
|
-
// Create new tab
|
|
709
|
-
const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
|
|
710
|
-
const newTabData = await newTab.json();
|
|
711
|
-
tabId = newTabData.id;
|
|
712
|
-
} else {
|
|
713
|
-
tabId = usable.id;
|
|
714
|
-
}
|
|
1275
|
+
|
|
1276
|
+
// Always create a NEW tab — never hijack the dashboard or other existing tabs
|
|
1277
|
+
const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
|
|
1278
|
+
const newTabData = await newTabRes.json();
|
|
1279
|
+
const tabId = newTabData.id;
|
|
715
1280
|
|
|
716
1281
|
return new Promise((resolve, reject) => {
|
|
717
|
-
// Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
|
|
718
1282
|
const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
|
|
719
1283
|
let msgId = 1;
|
|
720
1284
|
const pending = new Map();
|
|
@@ -739,10 +1303,12 @@ export class OllamaAgent extends EventEmitter {
|
|
|
739
1303
|
try {
|
|
740
1304
|
if (navigateUrl) {
|
|
741
1305
|
await send('Page.navigate', { url: navigateUrl });
|
|
742
|
-
// Wait for
|
|
1306
|
+
// Wait for page to fully render
|
|
743
1307
|
await new Promise(r => setTimeout(r, 3000));
|
|
744
1308
|
}
|
|
745
1309
|
const { data } = await send('Page.captureScreenshot', { format: 'png' });
|
|
1310
|
+
// Close the temporary tab
|
|
1311
|
+
await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
|
|
746
1312
|
ws.close();
|
|
747
1313
|
resolve(`data:image/png;base64,${data}`);
|
|
748
1314
|
} catch (err) {
|
|
@@ -752,10 +1318,129 @@ export class OllamaAgent extends EventEmitter {
|
|
|
752
1318
|
});
|
|
753
1319
|
|
|
754
1320
|
ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
|
|
755
|
-
setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); },
|
|
1321
|
+
setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
|
|
756
1322
|
});
|
|
757
1323
|
}
|
|
758
1324
|
|
|
1325
|
+
// ─── Screenshot + vision analysis ─────────────────────────────────────────
|
|
1326
|
+
// Takes a screenshot of a URL, then asks the active vision model to describe it.
|
|
1327
|
+
// Returns a plain-text description the main agent can reason about.
|
|
1328
|
+
|
|
1329
|
+
async _screenshotAndDescribe(url, checkFor) {
|
|
1330
|
+
const question = checkFor
|
|
1331
|
+
? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
|
|
1332
|
+
: `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count — be precise about what you see.`;
|
|
1333
|
+
|
|
1334
|
+
// === Server reachability check — fast fail if server is down ===
|
|
1335
|
+
try {
|
|
1336
|
+
await fetch(url, { signal: AbortSignal.timeout(4000) });
|
|
1337
|
+
} catch (reachErr) {
|
|
1338
|
+
const portMatch = url.match(/:(\d+)/);
|
|
1339
|
+
const port = portMatch ? portMatch[1] : '?';
|
|
1340
|
+
return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
// === HTML dependency audit (always runs — fast, reliable) ===
|
|
1344
|
+
// Fetches the page HTML and checks for common missing client-side dependencies.
|
|
1345
|
+
// This catches issues that screenshots can't detect (JS errors, missing script tags).
|
|
1346
|
+
let auditNotes = '';
|
|
1347
|
+
try {
|
|
1348
|
+
const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
|
|
1349
|
+
const html = await htmlRes.text();
|
|
1350
|
+
const missing = [];
|
|
1351
|
+
// Check for socket.io client usage without the script tag
|
|
1352
|
+
if (/\bio\s*\(/.test(html) && !html.includes('/socket.io/socket.io.js')) {
|
|
1353
|
+
missing.push('Missing <script src="/socket.io/socket.io.js"></script> — io() is called but the client library is not loaded');
|
|
1354
|
+
// Also verify the server actually serves it
|
|
1355
|
+
try {
|
|
1356
|
+
const sioRes = await fetch(url.replace(/\/$/, '') + '/socket.io/socket.io.js', { signal: AbortSignal.timeout(5000) });
|
|
1357
|
+
if (!sioRes.ok || (await sioRes.text()).startsWith('<!')) {
|
|
1358
|
+
missing.push('Server does NOT serve /socket.io/socket.io.js — check that socket.io is installed and express-static is set up');
|
|
1359
|
+
}
|
|
1360
|
+
} catch {}
|
|
1361
|
+
}
|
|
1362
|
+
if (missing.length > 0) {
|
|
1363
|
+
auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
|
|
1364
|
+
}
|
|
1365
|
+
} catch {}
|
|
1366
|
+
|
|
1367
|
+
let imageData;
|
|
1368
|
+
const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
|
|
1369
|
+
|
|
1370
|
+
// Try AgentForge browser via CDP first
|
|
1371
|
+
try {
|
|
1372
|
+
imageData = await this._cdpScreenshot(url, null);
|
|
1373
|
+
} catch (cdpErr) {
|
|
1374
|
+
// CDP not available — try puppeteer headless screenshot
|
|
1375
|
+
try {
|
|
1376
|
+
const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
|
|
1377
|
+
const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
|
|
1378
|
+
const nodeScript = `
|
|
1379
|
+
const puppeteer = require(${JSON.stringify(puppeteerModule)});
|
|
1380
|
+
(async () => {
|
|
1381
|
+
const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
|
|
1382
|
+
const page = await browser.newPage();
|
|
1383
|
+
await page.setDefaultNavigationTimeout(12000);
|
|
1384
|
+
await page.setViewport({width: 1280, height: 900});
|
|
1385
|
+
try {
|
|
1386
|
+
await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
|
|
1387
|
+
await new Promise(r => setTimeout(r, 2500));
|
|
1388
|
+
await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
|
|
1389
|
+
console.log('puppeteer screenshot ok');
|
|
1390
|
+
} finally {
|
|
1391
|
+
await browser.close();
|
|
1392
|
+
}
|
|
1393
|
+
})().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
|
|
1394
|
+
`;
|
|
1395
|
+
writeFileSync(scriptFile, nodeScript);
|
|
1396
|
+
await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
|
|
1397
|
+
await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
|
|
1398
|
+
const raw = readFileSync(tmpFile).toString('base64');
|
|
1399
|
+
await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
|
|
1400
|
+
imageData = `data:image/png;base64,${raw}`;
|
|
1401
|
+
} catch (pupErr) {
|
|
1402
|
+
console.warn(` [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
|
|
1403
|
+
// No screenshot possible — return audit notes only
|
|
1404
|
+
return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
// Store imageData so caller can emit to user if send_to_user=true
|
|
1409
|
+
this._lastScreenshotData = imageData;
|
|
1410
|
+
|
|
1411
|
+
const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
|
|
1412
|
+
|
|
1413
|
+
// Use the active model for vision analysis.
|
|
1414
|
+
try {
|
|
1415
|
+
// /api/chat with images array — supported by all Ollama vision-capable models
|
|
1416
|
+
const res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
1417
|
+
method: 'POST',
|
|
1418
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1419
|
+
body: JSON.stringify({
|
|
1420
|
+
model: this.model,
|
|
1421
|
+
messages: [{ role: 'user', content: question, images: [base64] }],
|
|
1422
|
+
stream: false,
|
|
1423
|
+
options: { num_ctx: 4096 }
|
|
1424
|
+
}),
|
|
1425
|
+
signal: AbortSignal.timeout(120000)
|
|
1426
|
+
});
|
|
1427
|
+
|
|
1428
|
+
if (res.ok) {
|
|
1429
|
+
const json = await res.json();
|
|
1430
|
+
const description = json.message?.content || json.response || '';
|
|
1431
|
+
const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
1432
|
+
if (clean) {
|
|
1433
|
+
console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
|
|
1434
|
+
return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
} catch (err) {
|
|
1438
|
+
console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
|
|
1442
|
+
}
|
|
1443
|
+
|
|
759
1444
|
_resolvePath(p, workDir) {
|
|
760
1445
|
return path.isAbsolute(p) ? p : path.join(workDir, p);
|
|
761
1446
|
}
|
|
@@ -781,28 +1466,65 @@ export class OllamaAgent extends EventEmitter {
|
|
|
781
1466
|
}
|
|
782
1467
|
|
|
783
1468
|
// ─── History persistence ──────────────────────────────────────────────────
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
1469
|
+
// History lives at ~/.agentforge/history/{agentId}.json — one canonical file
|
|
1470
|
+
// per agent, independent of workDir/sessionId/machine state. Never gets lost
|
|
1471
|
+
// due to workDir changes, worker restarts, or Railway assigning new sessionIds.
|
|
1472
|
+
|
|
1473
|
+
_historyPath(agentId) {
|
|
1474
|
+
const home = process.env.HOME || '/tmp';
|
|
1475
|
+
const dir = path.join(home, '.agentforge', 'history');
|
|
1476
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
1477
|
+
return path.join(dir, `${agentId}.json`);
|
|
787
1478
|
}
|
|
788
1479
|
|
|
789
1480
|
_loadHistory(agentId, workDir, sessionId) {
|
|
790
|
-
if (!sessionId) return [];
|
|
791
1481
|
try {
|
|
792
|
-
const fp = this._historyPath(
|
|
1482
|
+
const fp = this._historyPath(agentId);
|
|
793
1483
|
if (existsSync(fp)) {
|
|
794
1484
|
const data = JSON.parse(readFileSync(fp, 'utf-8'));
|
|
795
|
-
|
|
1485
|
+
console.log(` [${agentId}] Loaded ${data.length} history msgs from ~/.agentforge/history/`);
|
|
796
1486
|
return data.slice(-12);
|
|
797
1487
|
}
|
|
798
|
-
} catch {
|
|
1488
|
+
} catch (e) {
|
|
1489
|
+
console.warn(`⚠️ [${agentId}] History load error: ${e.message}`);
|
|
1490
|
+
}
|
|
799
1491
|
return [];
|
|
800
1492
|
}
|
|
801
1493
|
|
|
802
1494
|
_saveHistory(agentId, workDir, sessionId, messages) {
|
|
803
1495
|
try {
|
|
804
|
-
const fp = this._historyPath(
|
|
1496
|
+
const fp = this._historyPath(agentId);
|
|
805
1497
|
writeFileSync(fp, JSON.stringify(messages.slice(-20), null, 2));
|
|
806
|
-
} catch {
|
|
1498
|
+
} catch (e) {
|
|
1499
|
+
console.warn(`⚠️ [${agentId}] History save error: ${e.message}`);
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
async _isTaskComplete(task, output, signal) {
|
|
1504
|
+
try {
|
|
1505
|
+
const res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
1506
|
+
method: 'POST',
|
|
1507
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1508
|
+
signal,
|
|
1509
|
+
body: JSON.stringify({
|
|
1510
|
+
model: this.model,
|
|
1511
|
+
messages: [
|
|
1512
|
+
{ role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
|
|
1513
|
+
{ role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
|
|
1514
|
+
],
|
|
1515
|
+
stream: false,
|
|
1516
|
+
think: false,
|
|
1517
|
+
options: { num_ctx: 2048 }
|
|
1518
|
+
})
|
|
1519
|
+
});
|
|
1520
|
+
if (!res.ok) return true;
|
|
1521
|
+
const data = await res.json();
|
|
1522
|
+
const answer = (data.message?.content || '').toLowerCase().trim();
|
|
1523
|
+
console.log(` [_isTaskComplete] verdict: "${answer}"`);
|
|
1524
|
+
return answer.startsWith('yes');
|
|
1525
|
+
} catch (e) {
|
|
1526
|
+
console.warn(`⚠️ [_isTaskComplete] error: ${e.message}`);
|
|
1527
|
+
return true; // assume done on error to avoid infinite loops
|
|
1528
|
+
}
|
|
807
1529
|
}
|
|
808
1530
|
}
|