@hamp10/agentforge 0.2.16 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,30 @@
1
1
  import { exec } from 'child_process';
2
- import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
2
+ import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
3
3
  import { EventEmitter } from 'events';
4
4
  import path from 'path';
5
5
  import { promisify } from 'util';
6
6
  import { fileURLToPath } from 'url';
7
+ import { browserAction } from './hampagent/browser.js';
7
8
 
8
9
  const execAsync = promisify(exec);
9
10
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
11
 
12
+ // ── Worker log file — always write to /tmp/agentforge/worker.log so logs are
13
+ // accessible remotely via SSH regardless of how the worker was started.
14
+ const WORKER_LOG = '/tmp/agentforge/worker.log';
15
+ try { mkdirSync('/tmp/agentforge', { recursive: true }); } catch {}
16
+ const _origLog = console.log.bind(console);
17
+ console.log = (...args) => {
18
+ _origLog(...args);
19
+ try {
20
+ const line = args.map(a => (typeof a === 'object' ? JSON.stringify(a) : String(a))).join(' ');
21
+ appendFileSync(WORKER_LOG, `${new Date().toISOString()} ${line}\n`);
22
+ } catch {}
23
+ };
24
+
11
25
  // Minimal tool definitions — one compact JSON per line, embedded in system prompt.
12
- // Ollama's `tools` API param is broken for qwen3 (github.com/ollama/ollama/issues/14601).
13
- // Descriptions kept short to fit within qwen3-vl:8b's 4096 token context.
26
+ // Ollama's `tools` API param is unreliable tools are injected as text in the system prompt.
27
+ // Descriptions kept short to fit within a 4096 token context window.
14
28
  const TOOL_DEFS = [
15
29
  {
16
30
  type: 'function',
@@ -59,16 +73,27 @@ const TOOL_DEFS = [
59
73
  description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
60
74
  parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
61
75
  }
76
+ },
77
+ {
78
+ type: 'function',
79
+ function: {
80
+ name: 'screenshot_and_describe',
81
+ description: 'Screenshot a URL and get AI visual analysis. Use after building any web app to verify it looks correct before reporting done. Set send_to_user:true to show the screenshot to the user in chat.',
82
+ parameters: { type: 'object', properties: {
83
+ url: { type: 'string', description: 'URL to screenshot (e.g. http://localhost:3458)' },
84
+ check_for: { type: 'string', description: 'What should be visible (e.g. "snake game with canvas, scoreboard, and game controls")' },
85
+ send_to_user: { type: 'boolean', description: 'Send screenshot image to user in chat (true when confirmed working)' }
86
+ }, required: ['url'] }
87
+ }
62
88
  }
63
89
  ];
64
90
 
65
91
  // Minimal <tools> XML for system prompt — one compact JSON per line, no outer array.
66
- // Per qwen3 Hermes chat template (tokenizer_config.json).
67
92
  const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
68
93
 
69
94
  /**
70
95
  * Parse <tool_call>...</tool_call> blocks from streamed content.
71
- * qwen3-vl native format: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
96
+ * Some models emit: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
72
97
  * Returns array of {name, arguments} or null if no complete tool calls found.
73
98
  */
74
99
  function _parseToolCallTags(content) {
@@ -86,67 +111,170 @@ function _parseToolCallTags(content) {
86
111
  return calls.length > 0 ? calls : null;
87
112
  }
88
113
 
114
+ /**
115
+ * Parse WRITE_FILE code-fence format.
116
+ * Models struggle to JSON-escape large code files (unescaped quotes break JSON.parse).
117
+ * This format avoids the problem: path on the first line, raw content in a code fence.
118
+ *
119
+ * Accepted formats:
120
+ * WRITE_FILE /abs/path/to/file.js
121
+ * ```
122
+ * ...raw content, no escaping needed...
123
+ * ```
124
+ *
125
+ * write_file: /abs/path/to/file.js
126
+ * ```javascript
127
+ * ...content...
128
+ * ```
129
+ *
130
+ * Returns array of {name, arguments} or null if no matches found.
131
+ */
132
+ function _parseWriteFileFences(content) {
133
+ if (!content) return null;
134
+ const calls = [];
135
+ // Match WRITE_FILE <path> or write_file: <path> followed by a code fence
136
+ const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
137
+ let m;
138
+ while ((m = re.exec(content)) !== null) {
139
+ const filePath = m[1].trim();
140
+ const fileContent = m[2]; // raw content, no unescaping needed
141
+ if (filePath && fileContent !== undefined) {
142
+ calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
143
+ }
144
+ }
145
+ return calls.length > 0 ? calls : null;
146
+ }
147
+
148
+ /**
149
+ * Fallback: parse "Writing filename...\n```lang\ncontent\n```" code blocks.
150
+ * Many local models ignore the WRITE_FILE instruction and use raw markdown blocks.
151
+ * Extract the filename from the "Writing X..." line and write the file to the project dir.
152
+ * Project dir is inferred from the most recent "mkdir -p /path" in the content.
153
+ */
154
+ function _parseWritingFallback(content, workDir) {
155
+ if (!content) return null;
156
+ const calls = [];
157
+
158
+ // Infer project dir from last mkdir -p command in the stream
159
+ let projectDir = workDir;
160
+ const mkdirMatches = [...content.matchAll(/mkdir\s+-p\s+"?([^"\n]+)"?/g)];
161
+ if (mkdirMatches.length > 0) {
162
+ const lastMkdir = mkdirMatches[mkdirMatches.length - 1];
163
+ const candidate = lastMkdir[1].trim().replace(/~/, process.env.HOME || '/tmp');
164
+ if (candidate && !candidate.includes('$')) projectDir = candidate;
165
+ }
166
+
167
+ // Match: "Writing filename...\n```lang\ncontent\n```"
168
+ const re = /Writing\s+([\w./\-]+?)(?:\.{3})?\s*\n```[^\n]*\n([\s\S]*?)```(?:\n|$)/gi;
169
+ let m;
170
+ while ((m = re.exec(content)) !== null) {
171
+ const filename = m[1].trim();
172
+ const fileContent = m[2];
173
+ if (!filename || fileContent === undefined) continue;
174
+ // Skip if this is just a status echo with no real code
175
+ if (fileContent.trim().length < 5) continue;
176
+ const filePath = filename.startsWith('/') ? filename : `${projectDir}/${filename}`;
177
+ calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
178
+ }
179
+ return calls.length > 0 ? calls : null;
180
+ }
181
+
89
182
  /**
90
183
  * Detect text-based tool calls from model content.
91
- * qwen3-vl:8b outputs tool calls as JSON in content rather than tool_calls field.
184
+ * Models that don't use native tool_calls emit JSON in their text content instead.
92
185
  * Supports two schemas:
93
186
  * - {name, arguments} (OpenAI-style)
94
- * - {tool, args} (qwen3 native style)
95
- * Supports both compact (one JSON per line) and pretty-printed multi-line JSON blocks.
96
- * Returns array of {name, arguments} if content is ONLY tool calls, else null.
187
+ * - {tool, args} (alternate style)
188
+ * Supports:
189
+ * - Pure JSON (whole content is one or more JSON objects)
190
+ * - Mixed: "Status line\n{json}" — narration before the tool call JSON
191
+ * Returns array of {name, arguments} if any tool calls found, else null.
97
192
  */
98
193
  function _parseTextToolCalls(content) {
99
194
  if (!content) return null;
100
195
  const trimmed = content.trim();
101
- if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return null;
196
+ if (!trimmed) return null;
102
197
 
103
198
  // Normalise a single parsed object into {name, arguments}
199
+ // Handles multiple schemas models may emit:
200
+ // {name, arguments} — OpenAI-style (correct)
201
+ // {tool, args} — alternate native style
202
+ // {action:"write_file", path, content} — model shorthand
203
+ // {action:"bash", command} — model shorthand
204
+ // {action:"read_file", path} — model shorthand
104
205
  const normalise = (obj) => {
105
- if (typeof obj.name === 'string' && obj.arguments !== undefined) {
106
- const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
107
- return { name: obj.name, arguments: args };
108
- }
109
- if (typeof obj.tool === 'string' && obj.args !== undefined) {
110
- return { name: obj.tool, arguments: obj.args };
111
- }
206
+ try {
207
+ if (typeof obj.name === 'string' && obj.arguments !== undefined) {
208
+ const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
209
+ return { name: obj.name, arguments: args };
210
+ }
211
+ if (typeof obj.tool === 'string' && obj.args !== undefined) {
212
+ return { name: obj.tool, arguments: obj.args };
213
+ }
214
+ // Handle {action, ...} shorthand the model sometimes emits
215
+ if (typeof obj.action === 'string') {
216
+ const action = obj.action.toLowerCase().replace(/[ -]/g, '_');
217
+ // Map common action names to tool names
218
+ const toolName = action === 'write' ? 'write_file'
219
+ : action === 'read' ? 'read_file'
220
+ : action === 'list' ? 'list_directory'
221
+ : action === 'run' || action === 'execute' || action === 'exec' ? 'bash'
222
+ : action; // use as-is (write_file, bash, read_file, etc.)
223
+ const args = {};
224
+ if (obj.path !== undefined) args.path = obj.path;
225
+ if (obj.content !== undefined) args.content = obj.content;
226
+ if (obj.command !== undefined) args.command = obj.command;
227
+ if (obj.url !== undefined) args.url = obj.url;
228
+ if (obj.target !== undefined) args.target = obj.target;
229
+ if (Object.keys(args).length > 0) return { name: toolName, arguments: args };
230
+ }
231
+ } catch {}
112
232
  return null;
113
233
  };
114
234
 
115
- // Try parsing the whole content as a single JSON object/array
116
- try {
117
- const obj = JSON.parse(trimmed);
118
- if (Array.isArray(obj)) {
119
- const calls = obj.map(normalise);
120
- if (calls.every(Boolean)) return calls;
121
- return null;
122
- }
123
- const call = normalise(obj);
124
- if (call) return [call];
125
- return null;
126
- } catch {}
127
-
128
- // Try extracting multiple top-level JSON objects (separated by newlines/whitespace)
235
+ // Extract all JSON objects that start at the beginning of a line
236
+ // This handles both pure-JSON responses and "narration\n{json}" mixed responses
129
237
  const calls = [];
238
+ const lines = trimmed.split('\n');
130
239
  let i = 0;
131
- while (i < trimmed.length) {
132
- // Skip whitespace/newlines between objects
133
- while (i < trimmed.length && /\s/.test(trimmed[i])) i++;
134
- if (i >= trimmed.length) break;
135
- if (trimmed[i] !== '{') return null; // Non-JSON between objects bail
136
- // Find matching closing brace
137
- let depth = 0, j = i;
138
- while (j < trimmed.length) {
139
- if (trimmed[j] === '{') depth++;
140
- else if (trimmed[j] === '}') { depth--; if (depth === 0) { j++; break; } }
141
- j++;
240
+ while (i < lines.length) {
241
+ const line = lines[i].trim();
242
+ if (line.startsWith('{') || line.startsWith('[')) {
243
+ // Accumulate lines until we have a complete JSON object (handles multi-line JSON)
244
+ // Skips { } [ ] inside JSON strings so CSS/HTML brace counts don't confuse the parser.
245
+ let jsonStr = '';
246
+ let depth = 0;
247
+ while (i < lines.length) {
248
+ const l = lines[i];
249
+ jsonStr += (jsonStr ? '\n' : '') + l;
250
+ let inString = false, escape = false;
251
+ for (const ch of l) {
252
+ if (escape) { escape = false; continue; }
253
+ if (ch === '\\' && inString) { escape = true; continue; }
254
+ if (ch === '"') { inString = !inString; continue; }
255
+ if (!inString) {
256
+ if (ch === '{' || ch === '[') depth++;
257
+ else if (ch === '}' || ch === ']') depth--;
258
+ }
259
+ }
260
+ i++;
261
+ if (depth === 0 && jsonStr.trim()) break;
262
+ }
263
+ try {
264
+ const obj = JSON.parse(jsonStr.trim());
265
+ if (Array.isArray(obj)) {
266
+ for (const item of obj) {
267
+ const call = normalise(item);
268
+ if (call) calls.push(call);
269
+ }
270
+ } else {
271
+ const call = normalise(obj);
272
+ if (call) calls.push(call);
273
+ }
274
+ } catch {}
275
+ } else {
276
+ i++;
142
277
  }
143
- try {
144
- const obj = JSON.parse(trimmed.slice(i, j));
145
- const call = normalise(obj);
146
- if (!call) return null;
147
- calls.push(call);
148
- i = j;
149
- } catch { return null; }
150
278
  }
151
279
  return calls.length > 0 ? calls : null;
152
280
  }
@@ -206,13 +334,13 @@ export class OllamaAgent extends EventEmitter {
206
334
  return { agentId, workDir };
207
335
  }
208
336
 
209
- async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null) {
337
+ async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
210
338
  const startTime = Date.now();
211
339
  const controller = new AbortController();
212
340
 
213
341
  // Use per-agent model override if provided (and not the placeholder 'Default').
214
- // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/qwen3-vl:8b' but
215
- // Ollama's API expects bare names like 'qwen3-vl:8b'.
342
+ // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
343
+ // Ollama's API expects bare names like 'modelname:tag'.
216
344
  const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
217
345
  const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
218
346
 
@@ -224,82 +352,153 @@ export class OllamaAgent extends EventEmitter {
224
352
  console.log(` Task: ${task}`);
225
353
  console.log(` Working dir: ${workDir}`);
226
354
 
227
- // Detect model capabilities
228
- const isQwen3 = effectiveModel.startsWith('qwen3');
229
- const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(effectiveModel);
230
-
231
355
  try {
232
- // Load conversation history from disk (session persistence)
233
- const history = this._loadHistory(agentId, workDir, sessionId);
234
-
235
- // System prompt uses the exact format from qwen3's Hermes chat template.
236
- // Tools are embedded as <tools> XML — never passed via the API `tools` param (broken in Ollama).
237
- const systemPrompt = isQwen3
238
- ? [
239
- '/no_think',
240
- `You are a helpful assistant. Working directory: ${workDir}`,
241
- ``,
242
- `# Tools`,
243
- ``,
244
- `You may call one or more functions to complete the task.`,
245
- ``,
246
- `You are provided with function signatures within <tools></tools> XML tags:`,
247
- TOOLS_XML,
248
- ``,
249
- `For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:`,
250
- `<tool_call>`,
251
- `{"name": <function-name>, "arguments": <args-json-object>}`,
252
- `</tool_call>`,
253
- ``,
254
- `Rules:`,
255
- `- Call tools to take actions. Do NOT describe what you would do — just do it.`,
256
- `- For simple conversation (greetings, questions) respond with plain text, no tools.`,
257
- `- After finishing, write a brief summary.`,
258
- ].join('\n')
259
- : [
260
- `You are a helpful AI agent. Working directory: ${workDir}`,
261
- `Use the provided tools to complete tasks. Don't describe — act.`,
262
- ].join('\n');
356
+ // Load conversation history prefer Railway DB history (sent via task payload, works across
357
+ // any machine/user/model). Fall back to local file for offline or pre-fix sessions.
358
+ const history = (conversationHistory && conversationHistory.length > 0)
359
+ ? conversationHistory.slice(-20)
360
+ : this._loadHistory(agentId, workDir, sessionId);
361
+
362
+ // Text-based tool format is used rather than XML schemas — more reliable across models.
363
+ // Use flow's custom system prompt if provided, otherwise fall back to built-in default.
364
+ // ALL models get the same rule set and tool format — no model-specific branching.
365
+ const homeDir = process.env.HOME || '/tmp';
366
+ const projectsDir = `${homeDir}/Desktop/Projects`;
367
+ const universalRules = `
368
+ == WHAT YOU CAN DO ==
369
+ You have these tools:
370
+
371
+ bash: Run any shell command — file ops, servers, packages, logs, system queries.
372
+ read_file: Read a local file.
373
+ WRITE_FILE: Write a local file (code-fence format only).
374
+ list_directory: List a local directory.
375
+ web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
376
+ screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
377
+ browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
378
+
379
+ BROWSER TOOL use this instead of writing CDP scripts:
380
+ {"name":"browser","arguments":{"action":"tabs"}} ← list ALL open tabs with URLs (DO THIS FIRST)
381
+ {"name":"browser","arguments":{"action":"snapshot"}} ← read current page content + interactive elements (also shows all tabs)
382
+ {"name":"browser","arguments":{"action":"navigate","url":"https://..."}} ← go to URL
383
+ {"name":"browser","arguments":{"action":"focus","url":"expireddomains"}} ← switch to a tab by URL fragment
384
+ {"name":"browser","arguments":{"action":"click","ref":3}} ← click element by index from snapshot
385
+ {"name":"browser","arguments":{"action":"click","text":"Show Filter"}} ← click element by visible text
386
+ {"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}} ← click by CSS selector
387
+ {"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
388
+ {"name":"browser","arguments":{"action":"screenshot"}} ← take screenshot
389
+ {"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
390
+ {"name":"browser","arguments":{"action":"scroll","y":400}} ← scroll down
391
+
392
+ WORKFLOW when user says "the tab is already open":
393
+ 1. browser tabs → see ALL open tabs and their URLs
394
+ 2. browser focus with the URL fragment of the tab you need (e.g. "expireddomains")
395
+ 3. browser snapshot → read page content and get element indices
396
+ 4. browser click to interact (by ref index, by text, or by selector)
397
+ 5. browser snapshot again to see result
398
+ The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
399
+
400
+ == GENERAL RULES (all tasks) ==
401
+ G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
402
+ G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
403
+ G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
404
+ G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
405
+ G5. IF A TOOL FAILS: Try a different approach. web_fetch empty → screenshot_and_describe. Never repeat a failing call identically.
406
+ G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
407
+ G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
408
+ G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
409
+ G9. KEEP GOING until the task is fully complete.
410
+
411
+ == BUILD RULES (only when building apps/games/tools) ==
412
+ B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores).
413
+ B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
414
+ B3. BUILD FILE BY FILE — write each file completely before writing the next.
415
+ B4. ALWAYS use absolute paths.
416
+ B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
417
+ B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
418
+ B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT — if 000, check /tmp/server.log and fix the error.
419
+ B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
420
+ B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
421
+ B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
422
+ B11. ALWAYS open the finished app: bash open http://localhost:PORT
423
+ B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
424
+ B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
425
+ B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
426
+ B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
427
+ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
428
+ // Text-based tool format works reliably across all local models.
429
+ // WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
430
+ const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
431
+ const systemPrompt = customSystemPrompt || jsonToolFormat;
263
432
 
264
433
  const messages = [
265
434
  { role: 'system', content: systemPrompt },
266
435
  ...history,
267
436
  ];
268
437
 
269
- // Attach initial image to user message if provided
438
+ // Attach initial image if provided always include it; models that don't support
439
+ // images will ignore the field, and if they error we catch it below.
270
440
  const userMessage = { role: 'user', content: task };
271
- if (image && isVision) {
441
+ if (image) {
272
442
  const base64 = image.replace(/^data:image\/\w+;base64,/, '');
273
443
  userMessage.images = [base64];
274
444
  }
275
445
  messages.push(userMessage);
276
446
 
447
+ // Force-unload any currently loaded model so it reloads with our num_ctx setting.
448
+ // Model-agnostic and machine-agnostic — guarantees 32K context on every task.
449
+ try {
450
+ await fetch(`${this.baseUrl}/api/generate`, {
451
+ method: 'POST', signal: controller.signal,
452
+ headers: { 'Content-Type': 'application/json' },
453
+ body: JSON.stringify({ model: effectiveModel, keep_alive: 0, prompt: '' })
454
+ });
455
+ } catch { /* ignore — model may not be loaded yet */ }
456
+
277
457
  let finalContent = '';
278
458
  let allOutput = ''; // accumulate everything streamed across all turns
279
459
  const toolsUsed = []; // track tool names called (for fallback summary)
280
- const MAX_TURNS = 15; // reduce from 25 local models get stuck in tool loops
460
+ // No hard turn limit agent runs until done, loop-detected, or wall-clock timeout.
461
+ const recentCalls = []; // last N tool calls for loop detection
462
+ let emptyRetries = 0; // consecutive empty-response retries
281
463
 
282
- for (let turn = 0; turn < MAX_TURNS; turn++) {
464
+ for (let turn = 0; ; turn++) {
283
465
  if (controller.signal.aborted) break;
284
466
 
285
467
  this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
286
468
 
469
+ // All local Ollama models use the native /api/chat endpoint.
470
+ // The OpenAI-compatible /v1/chat/completions endpoint ignores options.num_ctx,
471
+ // causing all models to run at 4096-token context regardless of what we pass.
472
+ const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
473
+ const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
474
+
287
475
  let response;
288
476
  try {
289
- const requestBody = {
290
- model: effectiveModel,
291
- messages,
292
- stream: true,
293
- // qwen3: tools embedded in system prompt — do NOT pass tools param (broken in Ollama for qwen3)
294
- // Other models: pass tools normally
295
- ...(!isQwen3 ? { tools: TOOL_DEFS, tool_choice: 'auto' } : {}),
296
- options: {
297
- num_ctx: 8192, // explicit context — Ollama defaults to 2048 which is too small
298
- ...(isQwen3 ? { think: false } : {}), // CRITICAL: thinking + tools corrupts template
299
- },
300
- };
301
-
302
- response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
477
+
478
+ let requestBody;
479
+ let endpoint;
480
+
481
+ if (useNativeEndpoint) {
482
+ // Ollama native format — supports think:false at top level
483
+ endpoint = `${this.baseUrl}/api/chat`;
484
+ requestBody = {
485
+ model: effectiveModel,
486
+ messages,
487
+ stream: true,
488
+ think: false, // top-level think disable — WORKS on native endpoint
489
+ options: { num_ctx: 32768 },
490
+ };
491
+ } else {
492
+ endpoint = `${this.baseUrl}/v1/chat/completions`;
493
+ requestBody = {
494
+ model: effectiveModel,
495
+ messages,
496
+ stream: true,
497
+ options: { num_ctx: 32768 },
498
+ };
499
+ }
500
+
501
+ response = await fetch(endpoint, {
303
502
  method: 'POST',
304
503
  headers: { 'Content-Type': 'application/json' },
305
504
  signal: controller.signal,
@@ -315,21 +514,30 @@ export class OllamaAgent extends EventEmitter {
315
514
  throw new Error(`Local model error ${response.status}: ${body}`);
316
515
  }
317
516
 
318
- // ── Stream the SSE response ──
319
- // For qwen3: model emits text tokens including <tool_call>...</tool_call> blocks.
320
- // Stream text live to user, but suppress content inside <tool_call> tags.
321
- // For other models: also handle delta.tool_calls in the standard OpenAI format.
322
- let streamContent = ''; // full accumulated text (including tool_call tags for qwen3)
517
+ // ── Stream the response ──
518
+ // Two formats:
519
+ // Ollama native (/api/chat): NDJSON lines {"message":{"content":"..."},"done":false}
520
+ // OpenAI-compatible (/v1/...): SSE lines — data: {"choices":[{"delta":{"content":"..."}}]}
521
+ // Models may emit <tool_call>...</tool_call> or <think>...</think> blocks in text content.
522
+ // Stream text live to user; suppress think blocks and raw JSON tool call blobs.
523
+ let streamContent = ''; // full accumulated text (including any tool_call/think blocks)
323
524
  let visibleContent = ''; // text emitted live to user (no tool_call or think blocks)
324
- let streamToolCalls = {}; // OpenAI-format tool calls (non-qwen3 models)
525
+ let streamToolCalls = {}; // OpenAI-format tool calls from native tool_calls field
325
526
  let inThinkBlock = false;
326
527
  let inToolCallBlock = false; // inside <tool_call>...</tool_call>
528
+ let inJsonBlob = false; // inside bare JSON tool call — suppress from streaming
529
+ let inFenceBlock = false; // inside WRITE_FILE code fence — suppress content from streaming
530
+ let fenceDepth = 0; // ``` count since last WRITE_FILE (even=closed, odd=open)
327
531
  let rawTokenCount = 0;
532
+ let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
328
533
 
329
534
  const reader = response.body.getReader();
330
535
  const decoder = new TextDecoder();
331
536
  let buf = '';
332
537
 
538
+ // No timeouts — local model can take as long as it needs on any turn.
539
+ // Only the user abort (controller.signal) or stream end stops a turn.
540
+ let turnRetry = false;
333
541
  while (true) {
334
542
  if (controller.signal.aborted) break;
335
543
  const { done, value } = await reader.read();
@@ -340,33 +548,47 @@ export class OllamaAgent extends EventEmitter {
340
548
  buf = lines.pop();
341
549
 
342
550
  for (const line of lines) {
343
- if (!line.startsWith('data: ')) continue;
344
- const payload = line.slice(6).trim();
345
- if (payload === '[DONE]') continue;
346
- let evt;
347
- try { evt = JSON.parse(payload); } catch { continue; }
348
-
349
- const delta = evt.choices?.[0]?.delta;
350
- if (!delta) continue;
351
-
352
- // Standard OpenAI tool_calls (non-qwen3 models)
353
- if (delta.tool_calls) {
354
- for (const tc of delta.tool_calls) {
355
- const idx = tc.index ?? 0;
356
- if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
357
- if (tc.id) streamToolCalls[idx].id = tc.id;
358
- if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
359
- if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
551
+ if (!line.trim()) continue;
552
+
553
+ let tokenText = null;
554
+
555
+ if (useNativeEndpoint) {
556
+ // Ollama native NDJSON format
557
+ let nativeEvt;
558
+ try { nativeEvt = JSON.parse(line); } catch { continue; }
559
+ if (nativeEvt.done) continue;
560
+ tokenText = nativeEvt.message?.content ?? null;
561
+ } else {
562
+ // OpenAI SSE format
563
+ if (!line.startsWith('data: ')) continue;
564
+ const payload = line.slice(6).trim();
565
+ if (payload === '[DONE]') continue;
566
+ let evt;
567
+ try { evt = JSON.parse(payload); } catch { continue; }
568
+
569
+ const delta = evt.choices?.[0]?.delta;
570
+ if (!delta) continue;
571
+
572
+ // Standard OpenAI tool_calls from native tool_calls field
573
+ if (delta.tool_calls) {
574
+ for (const tc of delta.tool_calls) {
575
+ const idx = tc.index ?? 0;
576
+ if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
577
+ if (tc.id) streamToolCalls[idx].id = tc.id;
578
+ if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
579
+ if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
580
+ }
360
581
  }
582
+ tokenText = delta.content ?? null;
361
583
  }
362
584
 
363
- if (!delta.content) continue;
585
+ if (tokenText === null) continue;
364
586
  rawTokenCount++;
365
- streamContent += delta.content;
587
+ streamContent += tokenText;
366
588
 
367
589
  // Process token through think + tool_call filters, emit visible text live
368
590
  // We scan only the new delta token against the current buffer state
369
- const chunk = delta.content;
591
+ const chunk = tokenText;
370
592
  let visible = '';
371
593
  // Simple per-token state machine — handles split tags across tokens by tracking state flags
372
594
  if (!inThinkBlock && !inToolCallBlock) {
@@ -392,9 +614,59 @@ export class OllamaAgent extends EventEmitter {
392
614
  inToolCallBlock = false;
393
615
  }
394
616
 
395
- if (visible && !inThinkBlock && !inToolCallBlock) {
396
- visibleContent += visible;
397
- this.emit('agent_output', { agentId, output: visible, isChunk: true });
617
+ // Scan ALL lines completed in this token for state transitions.
618
+ // Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
619
+ if (tokenText.includes('\n')) {
620
+ const tokenStartIdx = streamContent.length - tokenText.length;
621
+ let nlIdx = streamContent.indexOf('\n', tokenStartIdx);
622
+ while (nlIdx !== -1) {
623
+ const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
624
+ const line = streamContent.slice(lineStart, nlIdx).trim();
625
+ if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
626
+ inFenceBlock = true; fenceDepth = 0;
627
+ } else if (inFenceBlock && /^```/.test(line)) {
628
+ fenceDepth++;
629
+ if (fenceDepth >= 2 && fenceDepth % 2 === 0) inFenceBlock = false;
630
+ } else if (!inFenceBlock && !inJsonBlob && line.length > 1 && (line.startsWith('{') || line.startsWith('['))) {
631
+ inJsonBlob = true;
632
+ }
633
+ nlIdx = streamContent.indexOf('\n', nlIdx + 1);
634
+ }
635
+ }
636
+
637
+ // Also check current partial line (mid-token, before next \n)
638
+ if (!inFenceBlock || !inJsonBlob) {
639
+ const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
640
+ const lastNL = cleanSC.lastIndexOf('\n');
641
+ const curLine = cleanSC.slice(lastNL + 1).trimStart();
642
+ if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
643
+ inFenceBlock = true; fenceDepth = 0;
644
+ }
645
+ if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
646
+ inJsonBlob = true;
647
+ }
648
+ }
649
+
650
+ // Emit visible content — safety filter removes any ``` or WRITE_FILE lines
651
+ // that slipped through (e.g. partial token at detection boundary)
652
+ if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
653
+ const safe = visible.split('\n').filter(ln => {
654
+ const t = ln.trimStart();
655
+ return !t.startsWith('```') && !/^(WRITE_FILE|write_file)/i.test(t);
656
+ }).join('\n');
657
+ if (safe.trim() || safe.includes('\n')) {
658
+ visibleContent += safe;
659
+ lastVisibleAt = Date.now();
660
+ this.emit('agent_output', { agentId, output: safe, isChunk: true });
661
+ }
662
+ }
663
+
664
+ // Thinking timeout: if the model has been in a <think> block for >90s with no visible output,
665
+ // abort the stream so we can retry with a kick. Prevents infinite thinking loops.
666
+ if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
667
+ console.log(` [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
668
+ reader.cancel().catch(() => {});
669
+ break;
398
670
  }
399
671
  }
400
672
  }
@@ -403,17 +675,35 @@ export class OllamaAgent extends EventEmitter {
403
675
  if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
404
676
 
405
677
  // ── Extract tool calls from content ───────────────────────────────────
406
- // For qwen3: parse <tool_call> XML tags from full streamed content.
407
- // For others: use API-level tool_calls already accumulated above.
678
+ // Try <tool_call> XML tags first (some models emit this format), then fall through
679
+ // to code-fence and JSON text parsers.
408
680
  let parsedTagCalls = null;
409
- if (isQwen3 && Object.keys(streamToolCalls).length === 0) {
681
+ if (Object.keys(streamToolCalls).length === 0) {
410
682
  parsedTagCalls = _parseToolCallTags(streamContent);
411
683
  if (parsedTagCalls) {
412
684
  console.log(` [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
413
685
  }
414
686
  }
415
687
 
416
- // Fallback: try legacy JSON-blob detection if no tags found
688
+ // Fallback 1: try WRITE_FILE code-fence format (avoids JSON-escaping issues with code)
689
+ if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
690
+ const fenceCalls = _parseWriteFileFences(streamContent);
691
+ if (fenceCalls) {
692
+ console.log(` [${agentId}] 🔍 ${fenceCalls.length} WRITE_FILE fence(s) detected`);
693
+ parsedTagCalls = fenceCalls;
694
+ }
695
+ }
696
+
697
+ // Fallback 2: "Writing filename...\n```\ncontent\n```" (model ignored WRITE_FILE instruction)
698
+ if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
699
+ const writingCalls = _parseWritingFallback(streamContent, workDir);
700
+ if (writingCalls) {
701
+ console.log(` [${agentId}] 🔍 ${writingCalls.length} Writing-block fallback file(s) detected`);
702
+ parsedTagCalls = writingCalls;
703
+ }
704
+ }
705
+
706
+ // Fallback 3: try legacy JSON-blob detection if no tags found
417
707
  if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
418
708
  const textCalls = _parseTextToolCalls(streamContent);
419
709
  if (textCalls) {
@@ -422,9 +712,35 @@ export class OllamaAgent extends EventEmitter {
422
712
  }
423
713
  }
424
714
 
425
- // Convert tag/text calls into streamToolCalls structure
715
+ // Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
716
+ // merge them so files get written AND bash runs
717
+ if (parsedTagCalls && streamContent) {
718
+ const writingCalls = _parseWritingFallback(streamContent, workDir);
719
+ if (writingCalls) {
720
+ const existingPaths = new Set(parsedTagCalls.filter(c => c.name === 'write_file').map(c => c.arguments.path));
721
+ const newWrites = writingCalls.filter(c => !existingPaths.has(c.arguments.path));
722
+ if (newWrites.length > 0) {
723
+ console.log(` [${agentId}] 🔍 +${newWrites.length} additional Writing-block file(s) merged`);
724
+ // Prepend file writes before bash commands so files exist before server starts
725
+ parsedTagCalls = [...newWrites, ...parsedTagCalls];
726
+ }
727
+ }
728
+ }
729
+
730
+ // Convert tag/text calls into streamToolCalls structure.
731
+ // Deduplicate: if model emits the same tool call N times in one stream, only run it once.
426
732
  if (parsedTagCalls) {
427
- parsedTagCalls.forEach((tc, i) => {
733
+ const seen = new Set();
734
+ const deduped = parsedTagCalls.filter(tc => {
735
+ const key = `${tc.name}:${JSON.stringify(tc.arguments)}`;
736
+ if (seen.has(key)) return false;
737
+ seen.add(key);
738
+ return true;
739
+ });
740
+ if (deduped.length < parsedTagCalls.length) {
741
+ console.log(` [${agentId}] 🔁 Deduplicated ${parsedTagCalls.length} → ${deduped.length} tool call(s)`);
742
+ }
743
+ deduped.forEach((tc, i) => {
428
744
  streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
429
745
  });
430
746
  // Don't accumulate raw tool_call XML as user-visible output
@@ -439,17 +755,17 @@ export class OllamaAgent extends EventEmitter {
439
755
  });
440
756
 
441
757
  // ── Push assistant message ────────────────────────────────────────────
758
+ // All local models now use JSON-in-text format on the native endpoint.
759
+ // Strip <think>...</think> blocks to avoid burning context on reasoning traces.
442
760
  const toolCallsArray = Object.values(streamToolCalls);
443
- if (isQwen3) {
444
- // qwen3: assistant message is the raw streamed content (includes <tool_call> tags)
445
- messages.push({ role: 'assistant', content: streamContent || '' });
446
- } else {
447
- messages.push({
448
- role: 'assistant',
449
- content: visibleContent || null,
450
- tool_calls: toolCallsArray.length > 0 ? toolCallsArray : undefined
451
- });
452
- }
761
+ const hasToolCalls = toolCallsArray.length > 0;
762
+ const cleanedContent = (streamContent || '')
763
+ .replace(/<think>[\s\S]*?<\/think>/g, '')
764
+ .trim();
765
+ messages.push({ role: 'assistant', content: cleanedContent || '' });
766
+
767
+ // Incremental save — always, regardless of sessionId (sessionId is null for OllamaAgent)
768
+ this._saveHistory(agentId, workDir, sessionId, messages.slice(1));
453
769
 
454
770
  // ── Execute tool calls ────────────────────────────────────────────────
455
771
  if (toolCallsArray.length > 0) {
@@ -461,14 +777,95 @@ export class OllamaAgent extends EventEmitter {
461
777
  try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
462
778
  catch { parsedArgs = {}; }
463
779
 
780
+ // ── Unknown tool name detection ──────────────────────────────────
781
+ // Block calls to tools that don't exist (e.g. model writes {"name":"curl",...}
782
+ // instead of {"name":"bash","arguments":{"command":"curl ..."}})
783
+ const VALID_TOOL_NAMES = new Set(['bash','read_file','write_file','list_directory','web_fetch','screenshot_and_describe','take_screenshot','browser']);
784
+ if (!VALID_TOOL_NAMES.has(name.toLowerCase())) {
785
+ console.log(` [${agentId}] ⚠️ Unknown tool "${name}" — blocked`);
786
+ messages.push({ role: 'user', content: `"${name}" is not a valid tool. Valid tools: bash, read_file, write_file, list_directory, web_fetch, screenshot_and_describe. To run a shell command use bash: {"name":"bash","arguments":{"command":"${name} ..."}}.` });
787
+ continue;
788
+ }
789
+
790
+ // ── Placeholder detection ────────────────────────────────────────
791
+ // Block tool calls where the agent passed a literal placeholder like
792
+ // "[The URL where the auction is being viewed]" instead of a real value.
793
+ // These come from the model reading its own planning text and mistaking it
794
+ // for a concrete argument.
795
+ {
796
+ const argStr = JSON.stringify(parsedArgs);
797
+ const hasPlaceholder = /\[(the |this |your |a |an |current )?(url|path|address|link|tab|page|site|location|file|directory)[^\]]*\]/i.test(argStr);
798
+ if (hasPlaceholder) {
799
+ console.log(` [${agentId}] ⚠️ Placeholder in args — blocked: ${argStr.slice(0, 120)}`);
800
+ messages.push({ role: 'user', content: `Tool call BLOCKED: your argument contains a placeholder "${argStr.slice(0, 100)}" — that is NOT a real URL or path. Look at the tool results already in the conversation (e.g. the curl localhost:9223/json output) and use the actual URL you found there.` });
801
+ continue;
802
+ }
803
+ }
804
+
464
805
  this.emit('tool_activity', {
465
806
  agentId, event: 'tool_start', tool: name,
466
807
  description: this._toolDesc(name, parsedArgs)
467
808
  });
468
809
  console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
469
810
  toolsUsed.push(name);
811
+ emptyRetries = 0; // reset on successful tool call
812
+
813
+ // Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
814
+ // Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
815
+ // "sleep 10 && curl ...URL" both map to the same key "curl:URL".
816
+ let callKey = `${name}:${JSON.stringify(parsedArgs)}`;
817
+ if (name === 'bash' && parsedArgs.command) {
818
+ const curlMatch = parsedArgs.command.match(/curl\s+.*?(https?:\/\/\S+|localhost:\d+)/);
819
+ if (curlMatch) callKey = `curl:${curlMatch[1]}`;
820
+ }
821
+ recentCalls.push(callKey);
822
+ if (recentCalls.length > 6) recentCalls.shift();
823
+
824
+ // Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
825
+ const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
826
+ const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
827
+ // Detect: alternating A,B,A,B pattern (last 4 calls)
828
+ const last4 = recentCalls.slice(-4);
829
+ const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
830
+ // Detect: A,B,C,A,B,C pattern (last 6)
831
+ const last6 = recentCalls.slice(-6);
832
+ const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
833
+
834
+ if (last3Same || abab || abcabc) {
835
+ const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
836
+ console.log(` [${agentId}] 🔁 Loop detected (${pattern}) — injecting fix hint`);
837
+ // Generate a context-aware hint based on what's looping
838
+ let loopFixMsg = `You are repeating the same action — STOP looping. Observe first, then act.\n`;
839
+ const loopCmd = parsedArgs.command || parsedArgs.path || '';
840
+ const noThink = '';
841
+ if (name === 'write_file') {
842
+ loopFixMsg += `You keep rewriting the same file. The file already exists with your previous code. Do NOT rewrite it from scratch.\nInstead:\n1. call screenshot_and_describe to SEE what the app looks like right now\n2. Identify the specific thing that is wrong or missing\n3. read_file the file to see current content\n4. Make a TARGETED edit — change only the specific broken section\nNever rewrite an entire file when the server is already running.`;
843
+ } else if (loopCmd.includes('mkdir') || loopCmd.includes('client')) {
844
+ loopFixMsg += `Files/folders already exist. STOP creating them. Call screenshot_and_describe to see the current state of the app, then identify what specifically needs to be improved and fix it with targeted edits.`;
845
+ } else if (loopCmd.includes('open http')) {
846
+ const openPortMatch = loopCmd.match(/:(\d+)/);
847
+ const openPort = openPortMatch ? openPortMatch[1] : '????';
848
+ loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
849
+ } else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
850
+ loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
851
+ } else if (loopCmd.includes('npm install')) {
852
+ loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
853
+ } else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
854
+ loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n // Click element containing the text you need (change "Filter" to what you see on the page)\n ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
855
+ } else if (name === 'screenshot_and_describe') {
856
+ const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
857
+ loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedback — answer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved — make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
858
+ } else {
859
+ loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
860
+ }
861
+ loopFixMsg += noThink;
862
+ messages.push({ role: 'user', content: loopFixMsg });
863
+ // Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
864
+ recentCalls.splice(0, recentCalls.length - 1);
865
+ break; // break inner tool loop, let model respond to hint
866
+ }
470
867
 
471
- const result = await this._executeTool(name, parsedArgs, workDir);
868
+ const result = await this._executeTool(name, parsedArgs, workDir, agentId);
472
869
 
473
870
  this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
474
871
 
@@ -477,30 +874,107 @@ export class OllamaAgent extends EventEmitter {
477
874
  this.emit('agent_image', { agentId, image: result });
478
875
  }
479
876
 
480
- if (isQwen3) {
481
- // qwen3 format: tool results go back as user messages with <tool_response> tags
482
- if (isImageResult && isVision) {
877
+ // ALL models get tool results fed back — no model should run blind.
878
+ // This is the core of the observe reason act loop: every tool result
879
+ // must be in context so the model can see what happened and react correctly.
880
+ {
881
+ const noThink = '';
882
+ if (isImageResult) {
483
883
  const base64 = result.replace(/^data:image\/\w+;base64,/, '');
484
- messages.push({ role: 'user', content: '<tool_response>\n[Screenshot captured]\n</tool_response>', images: [base64] });
884
+ messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
485
885
  } else {
486
- const resultText = isImageResult ? '[Screenshot captured — vision model needed to analyze]' : String(result).slice(0, 8000);
487
- messages.push({ role: 'user', content: `<tool_response>\n${resultText}\n</tool_response>` });
488
- }
489
- } else {
490
- // Standard OpenAI format
491
- if (isImageResult && isVision) {
492
- messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: '[Screenshot captured see image attached]' });
493
- const base64 = result.replace(/^data:image\/\w+;base64,/, '');
494
- messages.push({ role: 'user', content: 'Here is the screenshot:', images: [base64] });
495
- } else {
496
- messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: isImageResult ? '[Screenshot captured]' : String(result).slice(0, 8000) });
886
+ const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
887
+ messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
888
+
889
+ if (name === 'screenshot_and_describe') {
890
+ const screenshotResult = String(result);
891
+ const isLocalhost = (parsedArgs.url || '').includes('localhost') || (parsedArgs.url || '').includes('127.0.0.1');
892
+ // Server unreachable on localhost force bash restart (only for local servers, not public URLs)
893
+ if (screenshotResult.includes('SERVER IS NOT REACHABLE') && isLocalhost) {
894
+ const portMatch = (parsedArgs.url || '').match(/:(\d+)/);
895
+ const port = portMatch ? portMatch[1] : '????';
896
+ messages.push({ role: 'user', content: `The local server on port ${port} is not running. Restart it with bash — find the project directory, then: pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd /path/to/project && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${port}` });
897
+ }
898
+ // Public URL unreachable — try web_fetch instead
899
+ else if (screenshotResult.includes('SERVER IS NOT REACHABLE') && !isLocalhost) {
900
+ messages.push({ role: 'user', content: `screenshot_and_describe could not reach ${parsedArgs.url}. Try web_fetch instead:\n{"name":"web_fetch","arguments":{"url":"${parsedArgs.url}"}}` });
901
+ }
902
+ // Dependency audit issues — prevent port-hopping
903
+ else if (screenshotResult.includes('DEPENDENCY AUDIT FOUND ISSUES')) {
904
+ messages.push({ role: 'user', content: `CRITICAL: Missing client-side libraries in your HTML. Do NOT change the port. Fix it: (1) read_file the HTML; (2) add the missing script tags; (3) write_file back; (4) restart server same port; (5) screenshot to verify.` });
905
+ }
906
+ // Successful screenshot of a build task — push to make a code change
907
+ else if (isLocalhost) {
908
+ messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
909
+ }
910
+ // Successful screenshot of a public URL — agent is doing research, let it reason
911
+ }
912
+ // Catch placeholder/hello world pages — force the model to keep building
913
+ const screenshotText = String(result).toLowerCase();
914
+ const isPlaceholder = (
915
+ screenshotText.includes('hello world') ||
916
+ screenshotText.includes('cannot get /') ||
917
+ (screenshotText.includes('express') && screenshotText.includes('error')) ||
918
+ screenshotText.includes('placeholder') ||
919
+ screenshotText.includes('coming soon') ||
920
+ (screenshotText.includes('blank') && !screenshotText.includes('not blank'))
921
+ );
922
+ if (isPlaceholder) {
923
+ messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
924
+ }
497
925
  }
498
926
  }
499
927
  }
500
928
  continue; // loop back for next model turn
501
929
  }
502
930
 
503
- // ── No tool calls: final answer ───────────────────────────────────────
931
+ // ── No tool calls ────────────────────────────────────────────────────
932
+ {
933
+ const combined = (visibleContent + streamContent).replace(/<think>[\s\S]*?<\/think>/g, '');
934
+ const hasContent = combined.trim().length > 30;
935
+ const isEmpty = combined.trim().length === 0;
936
+
937
+ // Structural: truncated JSON — model started a tool call but stream ended early
938
+ const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
939
+ if (hasTruncatedJson) {
940
+ console.log(` [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
941
+ messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
942
+ continue;
943
+ }
944
+
945
+ // Structural: empty response — model produced nothing
946
+ if (isEmpty) {
947
+ if (emptyRetries < 3) {
948
+ emptyRetries++;
949
+ console.log(` [${agentId}] ⚡ Turn ${turn}: empty response (retry ${emptyRetries}/3) — kicking`);
950
+ messages.push({ role: 'user', content: toolsUsed.length === 0 ? 'Start now — make your first tool call.' : 'You stopped. Make your next tool call.' });
951
+ continue;
952
+ }
953
+ console.log(` [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
954
+ }
955
+
956
+ // Structural: agent hasn't used any tools yet — it must act before it can answer
957
+ if (toolsUsed.length === 0 && hasContent) {
958
+ console.log(` [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act`);
959
+ messages.push({ role: 'user', content: 'Make your first tool call now.' });
960
+ continue;
961
+ }
962
+
963
+ // Semantic: ask the LLM whether the task is actually complete.
964
+ // This replaces all regex-based intent detection — the model judges its own output.
965
+ if (hasContent && toolsUsed.length > 0) {
966
+ const originalTask = messages.find(m => m.role === 'user')?.content || task;
967
+ const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
968
+ if (!isDone) {
969
+ console.log(` [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
970
+ messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
971
+ continue;
972
+ }
973
+ console.log(` [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
974
+ }
975
+ }
976
+
977
+ // ── Final answer ──────────────────────────────────────────────────────
504
978
  if (visibleContent) finalContent = visibleContent;
505
979
  break;
506
980
 
@@ -519,7 +993,7 @@ export class OllamaAgent extends EventEmitter {
519
993
  ];
520
994
 
521
995
  try {
522
- const summaryRes = await fetch(`${this.baseUrl}/v1/chat/completions`, {
996
+ const summaryRes = await fetch(`${this.baseUrl}/api/chat`, {
523
997
  method: 'POST',
524
998
  headers: { 'Content-Type': 'application/json' },
525
999
  signal: controller.signal,
@@ -527,7 +1001,8 @@ export class OllamaAgent extends EventEmitter {
527
1001
  model: effectiveModel,
528
1002
  messages: summaryMessages,
529
1003
  stream: true,
530
- ...(isQwen3 ? { options: { think: false } } : {})
1004
+ think: false,
1005
+ options: { num_ctx: 32768 }
531
1006
  })
532
1007
  });
533
1008
 
@@ -570,7 +1045,7 @@ export class OllamaAgent extends EventEmitter {
570
1045
  }
571
1046
 
572
1047
  // Persist history for next task
573
- if (finalContent && sessionId) {
1048
+ if (finalContent) {
574
1049
  this._saveHistory(agentId, workDir, sessionId, [
575
1050
  ...history,
576
1051
  { role: 'user', content: task },
@@ -629,16 +1104,96 @@ export class OllamaAgent extends EventEmitter {
629
1104
 
630
1105
  // ─── Tool execution ───────────────────────────────────────────────────────
631
1106
 
632
- async _executeTool(name, args, workDir) {
1107
+ async _executeTool(name, args, workDir, agentId = 'agent') {
633
1108
  try {
634
1109
  switch (name) {
635
1110
  case 'bash': {
1111
+ // Block commands that would kill the worker process itself.
1112
+ // "pkill -f node" and "killall node" match the worker's own process.
1113
+ // Rewrite to only kill processes by their specific server log path or port.
1114
+ const cmd = args.command || '';
1115
+ if (/pkill\s+(-\w+\s+)*(-f\s+)?node\b/i.test(cmd) || /killall\s+node\b/i.test(cmd)) {
1116
+ // Safe replacement: kill only the app server on the port, not all node processes
1117
+ const portMatch = cmd.match(/localhost:(\d+)|:(\d+)/);
1118
+ const serverLogMatch = cmd.match(/server\.js/);
1119
+ if (portMatch || serverLogMatch) {
1120
+ const safeCmd = portMatch
1121
+ ? `lsof -ti:${portMatch[1] || portMatch[2]} | xargs kill -9 2>/dev/null || true`
1122
+ : `pkill -f "server.js" 2>/dev/null || true`;
1123
+ args = { ...args, command: safeCmd + cmd.slice(cmd.indexOf('&&') !== -1 ? cmd.indexOf('&&') : cmd.length) };
1124
+ } else {
1125
+ // No specific target — skip the pkill entirely, just run what follows &&
1126
+ const afterAnd = cmd.indexOf('&&');
1127
+ if (afterAnd !== -1) {
1128
+ args = { ...args, command: cmd.slice(afterAnd + 2).trim() };
1129
+ } else {
1130
+ return 'Skipped broad pkill to protect worker process. Use: lsof -ti:PORT | xargs kill -9';
1131
+ }
1132
+ }
1133
+ }
1134
+
1135
+ // Intercept "open http://..." — navigate the AgentForge CDP browser directly,
1136
+ // then auto-screenshot so the agent immediately sees what it built.
1137
+ const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
1138
+ if (openUrlMatch) {
1139
+ const targetUrl = openUrlMatch[1];
1140
+ let openedViaCDP = false;
1141
+ try {
1142
+ const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
1143
+ const newTabData = await newTabRes.json();
1144
+ const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
1145
+ await new Promise(r => tabWs.on('open', r));
1146
+ await new Promise(r => {
1147
+ let navigated = false;
1148
+ tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
1149
+ tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
1150
+ setTimeout(() => { tabWs.close(); r(); }, 3000);
1151
+ });
1152
+ openedViaCDP = true;
1153
+ } catch {
1154
+ // CDP unavailable — fall through to OS open
1155
+ try { await execAsync(`open "${targetUrl}"`); } catch {}
1156
+ }
1157
+ // Auto-screenshot after opening so the agent sees what it built.
1158
+ // Wait for page to load, then call screenshot_and_describe.
1159
+ await new Promise(r => setTimeout(r, 2500));
1160
+ try {
1161
+ const screenshotResult = await this._executeTool('screenshot_and_describe', {
1162
+ url: targetUrl,
1163
+ check_for: 'the running application',
1164
+ send_to_user: true
1165
+ }, workDir, agentId);
1166
+ return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
1167
+ } catch {
1168
+ return `Opened ${targetUrl} in browser. (Screenshot failed — verify with screenshot_and_describe)`;
1169
+ }
1170
+ }
1171
+
1172
+ // If workDir doesn't exist (e.g. /tmp was cleared after worker restart),
1173
+ // fall back to HOME rather than failing with ENOENT on every bash call.
1174
+ let bashCwd = workDir;
1175
+ const _home = process.env.HOME || '/tmp';
1176
+ try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
1177
+ // Background commands (ending with &) return no stdout — the model interprets
1178
+ // silence as failure and loops. Run them, then read back any log file to confirm.
1179
+ const isBackground = /&\s*$/.test(args.command.trim());
636
1180
  const { stdout, stderr } = await execAsync(args.command, {
637
- cwd: workDir,
638
- timeout: 60000,
1181
+ cwd: bashCwd,
1182
+ timeout: 120000,
639
1183
  maxBuffer: 1024 * 1024 * 2 // 2MB
640
1184
  });
641
- return (stdout + stderr).trim() || '(no output)';
1185
+ const out = (stdout + stderr).trim();
1186
+ if (isBackground && !out) {
1187
+ // Give the process a moment to start, then check /tmp/server.log if it exists
1188
+ await new Promise(r => setTimeout(r, 1500));
1189
+ let confirmation = 'Background process started.';
1190
+ try {
1191
+ const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
1192
+ if (logContent) confirmation = `Background process started. Server log:\n${logContent}`;
1193
+ } catch { /* no log yet */ }
1194
+ return confirmation;
1195
+ }
1196
+ return out || '(no output)';
642
1197
  }
643
1198
 
644
1199
  case 'read_file': {
@@ -685,6 +1240,26 @@ export class OllamaAgent extends EventEmitter {
685
1240
  }
686
1241
  }
687
1242
 
1243
+ case 'screenshot_and_describe': {
1244
+ const result = await this._screenshotAndDescribe(args.url, args.check_for);
1245
+ // Always send screenshot to user — agent called this tool, user should always see it
1246
+ if (this._lastScreenshotData) {
1247
+ this.emit('agent_image', { agentId, image: this._lastScreenshotData });
1248
+ this._lastScreenshotData = null;
1249
+ }
1250
+ return result;
1251
+ }
1252
+
1253
+ case 'browser': {
1254
+ const result = await browserAction(args);
1255
+ if (result && result.__screenshot) {
1256
+ const imgData = `data:image/png;base64,${result.base64}`;
1257
+ this.emit('agent_image', { agentId, image: imgData });
1258
+ return `Screenshot taken (${Math.round(result.base64.length * 0.75 / 1024)}KB). Image sent to chat.`;
1259
+ }
1260
+ return typeof result === 'string' ? result : JSON.stringify(result);
1261
+ }
1262
+
688
1263
  default:
689
1264
  return `Unknown tool: ${name}`;
690
1265
  }
@@ -697,24 +1272,13 @@ export class OllamaAgent extends EventEmitter {
697
1272
 
698
1273
  async _cdpScreenshot(navigateUrl, tmpFile) {
699
1274
  const CDP_PORT = 9223;
700
- let tabId;
701
-
702
- // Get or create a tab
703
- const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
704
- const tabs = await tabsRes.json();
705
- const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
706
-
707
- if (!usable) {
708
- // Create new tab
709
- const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
710
- const newTabData = await newTab.json();
711
- tabId = newTabData.id;
712
- } else {
713
- tabId = usable.id;
714
- }
1275
+
1276
+ // Always create a NEW tab — never hijack the dashboard or other existing tabs
1277
+ const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
1278
+ const newTabData = await newTabRes.json();
1279
+ const tabId = newTabData.id;
715
1280
 
716
1281
  return new Promise((resolve, reject) => {
717
- // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
718
1282
  const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
719
1283
  let msgId = 1;
720
1284
  const pending = new Map();
@@ -739,10 +1303,12 @@ export class OllamaAgent extends EventEmitter {
739
1303
  try {
740
1304
  if (navigateUrl) {
741
1305
  await send('Page.navigate', { url: navigateUrl });
742
- // Wait for load
1306
+ // Wait for page to fully render
743
1307
  await new Promise(r => setTimeout(r, 3000));
744
1308
  }
745
1309
  const { data } = await send('Page.captureScreenshot', { format: 'png' });
1310
+ // Close the temporary tab
1311
+ await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
746
1312
  ws.close();
747
1313
  resolve(`data:image/png;base64,${data}`);
748
1314
  } catch (err) {
@@ -752,10 +1318,129 @@ export class OllamaAgent extends EventEmitter {
752
1318
  });
753
1319
 
754
1320
  ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
755
- setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
1321
+ setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
756
1322
  });
757
1323
  }
758
1324
 
1325
+ // ─── Screenshot + vision analysis ─────────────────────────────────────────
1326
+ // Takes a screenshot of a URL, then asks the active vision model to describe it.
1327
+ // Returns a plain-text description the main agent can reason about.
1328
+
1329
+ async _screenshotAndDescribe(url, checkFor) {
1330
+ const question = checkFor
1331
+ ? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
1332
+ : `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count — be precise about what you see.`;
1333
+
1334
+ // === Server reachability check — fast fail if server is down ===
1335
+ try {
1336
+ await fetch(url, { signal: AbortSignal.timeout(4000) });
1337
+ } catch (reachErr) {
1338
+ const portMatch = url.match(/:(\d+)/);
1339
+ const port = portMatch ? portMatch[1] : '?';
1340
+ return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
1341
+ }
1342
+
1343
+ // === HTML dependency audit (always runs — fast, reliable) ===
1344
+ // Fetches the page HTML and checks for common missing client-side dependencies.
1345
+ // This catches issues that screenshots can't detect (JS errors, missing script tags).
1346
+ let auditNotes = '';
1347
+ try {
1348
+ const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
1349
+ const html = await htmlRes.text();
1350
+ const missing = [];
1351
+ // Check for socket.io client usage without the script tag
1352
+ if (/\bio\s*\(/.test(html) && !html.includes('/socket.io/socket.io.js')) {
1353
+ missing.push('Missing <script src="/socket.io/socket.io.js"></script> — io() is called but the client library is not loaded');
1354
+ // Also verify the server actually serves it
1355
+ try {
1356
+ const sioRes = await fetch(url.replace(/\/$/, '') + '/socket.io/socket.io.js', { signal: AbortSignal.timeout(5000) });
1357
+ if (!sioRes.ok || (await sioRes.text()).startsWith('<!')) {
1358
+ missing.push('Server does NOT serve /socket.io/socket.io.js — check that socket.io is installed and express-static is set up');
1359
+ }
1360
+ } catch {}
1361
+ }
1362
+ if (missing.length > 0) {
1363
+ auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
1364
+ }
1365
+ } catch {}
1366
+
1367
+ let imageData;
1368
+ const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
1369
+
1370
+ // Try AgentForge browser via CDP first
1371
+ try {
1372
+ imageData = await this._cdpScreenshot(url, null);
1373
+ } catch (cdpErr) {
1374
+ // CDP not available — try puppeteer headless screenshot
1375
+ try {
1376
+ const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
1377
+ const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
1378
+ const nodeScript = `
1379
+ const puppeteer = require(${JSON.stringify(puppeteerModule)});
1380
+ (async () => {
1381
+ const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
1382
+ const page = await browser.newPage();
1383
+ await page.setDefaultNavigationTimeout(12000);
1384
+ await page.setViewport({width: 1280, height: 900});
1385
+ try {
1386
+ await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
1387
+ await new Promise(r => setTimeout(r, 2500));
1388
+ await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
1389
+ console.log('puppeteer screenshot ok');
1390
+ } finally {
1391
+ await browser.close();
1392
+ }
1393
+ })().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
1394
+ `;
1395
+ writeFileSync(scriptFile, nodeScript);
1396
+ await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
1397
+ await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
1398
+ const raw = readFileSync(tmpFile).toString('base64');
1399
+ await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
1400
+ imageData = `data:image/png;base64,${raw}`;
1401
+ } catch (pupErr) {
1402
+ console.warn(` [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
1403
+ // No screenshot possible — return audit notes only
1404
+ return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
1405
+ }
1406
+ }
1407
+
1408
+ // Store imageData so caller can emit to user if send_to_user=true
1409
+ this._lastScreenshotData = imageData;
1410
+
1411
+ const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
1412
+
1413
+ // Use the active model for vision analysis.
1414
+ try {
1415
+ // /api/chat with images array — supported by all Ollama vision-capable models
1416
+ const res = await fetch(`${this.baseUrl}/api/chat`, {
1417
+ method: 'POST',
1418
+ headers: { 'Content-Type': 'application/json' },
1419
+ body: JSON.stringify({
1420
+ model: this.model,
1421
+ messages: [{ role: 'user', content: question, images: [base64] }],
1422
+ stream: false,
1423
+ options: { num_ctx: 4096 }
1424
+ }),
1425
+ signal: AbortSignal.timeout(120000)
1426
+ });
1427
+
1428
+ if (res.ok) {
1429
+ const json = await res.json();
1430
+ const description = json.message?.content || json.response || '';
1431
+ const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
1432
+ if (clean) {
1433
+ console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
1434
+ return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
1435
+ }
1436
+ }
1437
+ } catch (err) {
1438
+ console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
1439
+ }
1440
+
1441
+ return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
1442
+ }
1443
+
759
1444
  _resolvePath(p, workDir) {
760
1445
  return path.isAbsolute(p) ? p : path.join(workDir, p);
761
1446
  }
@@ -781,28 +1466,65 @@ export class OllamaAgent extends EventEmitter {
781
1466
  }
782
1467
 
783
1468
  // ─── History persistence ──────────────────────────────────────────────────
784
-
785
- _historyPath(workDir, sessionId) {
786
- return path.join(workDir, `.ollama_history_${sessionId}.json`);
1469
+ // History lives at ~/.agentforge/history/{agentId}.json — one canonical file
1470
+ // per agent, independent of workDir/sessionId/machine state. Never gets lost
1471
+ // due to workDir changes, worker restarts, or Railway assigning new sessionIds.
1472
+
1473
+ _historyPath(agentId) {
1474
+ const home = process.env.HOME || '/tmp';
1475
+ const dir = path.join(home, '.agentforge', 'history');
1476
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
1477
+ return path.join(dir, `${agentId}.json`);
787
1478
  }
788
1479
 
789
1480
  _loadHistory(agentId, workDir, sessionId) {
790
- if (!sessionId) return [];
791
1481
  try {
792
- const fp = this._historyPath(workDir, sessionId);
1482
+ const fp = this._historyPath(agentId);
793
1483
  if (existsSync(fp)) {
794
1484
  const data = JSON.parse(readFileSync(fp, 'utf-8'));
795
- // Keep last 20 messages to stay within context
1485
+ console.log(` [${agentId}] Loaded ${data.length} history msgs from ~/.agentforge/history/`);
796
1486
  return data.slice(-12);
797
1487
  }
798
- } catch {}
1488
+ } catch (e) {
1489
+ console.warn(`⚠️ [${agentId}] History load error: ${e.message}`);
1490
+ }
799
1491
  return [];
800
1492
  }
801
1493
 
802
1494
  _saveHistory(agentId, workDir, sessionId, messages) {
803
1495
  try {
804
- const fp = this._historyPath(workDir, sessionId);
1496
+ const fp = this._historyPath(agentId);
805
1497
  writeFileSync(fp, JSON.stringify(messages.slice(-20), null, 2));
806
- } catch {}
1498
+ } catch (e) {
1499
+ console.warn(`⚠️ [${agentId}] History save error: ${e.message}`);
1500
+ }
1501
+ }
1502
+
1503
+ async _isTaskComplete(task, output, signal) {
1504
+ try {
1505
+ const res = await fetch(`${this.baseUrl}/api/chat`, {
1506
+ method: 'POST',
1507
+ headers: { 'Content-Type': 'application/json' },
1508
+ signal,
1509
+ body: JSON.stringify({
1510
+ model: this.model,
1511
+ messages: [
1512
+ { role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
1513
+ { role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
1514
+ ],
1515
+ stream: false,
1516
+ think: false,
1517
+ options: { num_ctx: 2048 }
1518
+ })
1519
+ });
1520
+ if (!res.ok) return true;
1521
+ const data = await res.json();
1522
+ const answer = (data.message?.content || '').toLowerCase().trim();
1523
+ console.log(` [_isTaskComplete] verdict: "${answer}"`);
1524
+ return answer.startsWith('yes');
1525
+ } catch (e) {
1526
+ console.warn(`⚠️ [_isTaskComplete] error: ${e.message}`);
1527
+ return true; // assume done on error to avoid infinite loops
1528
+ }
807
1529
  }
808
1530
  }