@hamp10/agentforge 0.2.15 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,123 +1,99 @@
1
1
  import { exec } from 'child_process';
2
- import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
2
+ import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
3
3
  import { EventEmitter } from 'events';
4
4
  import path from 'path';
5
5
  import { promisify } from 'util';
6
6
  import { fileURLToPath } from 'url';
7
+ import { browserAction } from './hampagent/browser.js';
7
8
 
8
9
  const execAsync = promisify(exec);
9
10
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
11
 
11
- // Tool definitionsused both for embedded system prompt (qwen3 format)
12
- // and kept as structured objects for _toolDesc lookups.
12
+ // ── Worker log file always write to /tmp/agentforge/worker.log so logs are
13
+ // accessible remotely via SSH regardless of how the worker was started.
14
+ const WORKER_LOG = '/tmp/agentforge/worker.log';
15
+ try { mkdirSync('/tmp/agentforge', { recursive: true }); } catch {}
16
+ const _origLog = console.log.bind(console);
17
+ console.log = (...args) => {
18
+ _origLog(...args);
19
+ try {
20
+ const line = args.map(a => (typeof a === 'object' ? JSON.stringify(a) : String(a))).join(' ');
21
+ appendFileSync(WORKER_LOG, `${new Date().toISOString()} ${line}\n`);
22
+ } catch {}
23
+ };
24
+
25
+ // Minimal tool definitions — one compact JSON per line, embedded in system prompt.
26
+ // Ollama's `tools` API param is unreliable — tools are injected as text in the system prompt.
27
+ // Descriptions kept short to fit within a 4096 token context window.
13
28
  const TOOL_DEFS = [
14
29
  {
15
30
  type: 'function',
16
31
  function: {
17
32
  name: 'bash',
18
- description: 'Execute a shell command in the working directory. Returns stdout and stderr.',
19
- parameters: {
20
- type: 'object',
21
- properties: {
22
- command: { type: 'string', description: 'The shell command to run' }
23
- },
24
- required: ['command']
25
- }
33
+ description: 'Run a shell command. Returns stdout/stderr.',
34
+ parameters: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] }
26
35
  }
27
36
  },
28
37
  {
29
38
  type: 'function',
30
39
  function: {
31
40
  name: 'read_file',
32
- description: 'Read the full contents of a file.',
33
- parameters: {
34
- type: 'object',
35
- properties: {
36
- path: { type: 'string', description: 'Path to the file (absolute or relative to workdir)' }
37
- },
38
- required: ['path']
39
- }
41
+ description: 'Read a file.',
42
+ parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
40
43
  }
41
44
  },
42
45
  {
43
46
  type: 'function',
44
47
  function: {
45
48
  name: 'write_file',
46
- description: 'Write content to a file, creating it and any missing parent directories.',
47
- parameters: {
48
- type: 'object',
49
- properties: {
50
- path: { type: 'string', description: 'Path to write (absolute or relative to workdir)' },
51
- content: { type: 'string', description: 'File content to write' }
52
- },
53
- required: ['path', 'content']
54
- }
49
+ description: 'Write a file.',
50
+ parameters: { type: 'object', properties: { path: { type: 'string' }, content: { type: 'string' } }, required: ['path', 'content'] }
55
51
  }
56
52
  },
57
53
  {
58
54
  type: 'function',
59
55
  function: {
60
56
  name: 'list_directory',
61
- description: 'List files and subdirectories at a path.',
62
- parameters: {
63
- type: 'object',
64
- properties: {
65
- path: { type: 'string', description: 'Directory path (absolute or relative to workdir)' }
66
- },
67
- required: ['path']
68
- }
57
+ description: 'List files in a directory.',
58
+ parameters: { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }
69
59
  }
70
60
  },
71
61
  {
72
62
  type: 'function',
73
63
  function: {
74
64
  name: 'web_fetch',
75
- description: 'Fetch the text content of a URL (first 4000 chars).',
76
- parameters: {
77
- type: 'object',
78
- properties: {
79
- url: { type: 'string', description: 'URL to fetch' }
80
- },
81
- required: ['url']
82
- }
65
+ description: 'Fetch text content from a URL.',
66
+ parameters: { type: 'object', properties: { url: { type: 'string' } }, required: ['url'] }
83
67
  }
84
68
  },
85
69
  {
86
70
  type: 'function',
87
71
  function: {
88
72
  name: 'take_screenshot',
89
- description: 'Take a screenshot of the current screen or the agent browser (port 9223). Returns base64 image data you can analyze visually. Use this to check what a webpage looks like, verify a build result, or monitor a running process. Set send_to_user=true ONLY when the user explicitly asked to see a screenshot.',
90
- parameters: {
91
- type: 'object',
92
- properties: {
93
- target: {
94
- type: 'string',
95
- enum: ['screen', 'browser'],
96
- description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
97
- },
98
- url: {
99
- type: 'string',
100
- description: 'Optional: navigate the browser to this URL before taking the screenshot.'
101
- },
102
- send_to_user: {
103
- type: 'boolean',
104
- description: 'If true, send the screenshot to the user\'s chat. Only set this when the user explicitly asked to see a screenshot or visual output.'
105
- }
106
- },
107
- required: ['target']
108
- }
73
+ description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
74
+ parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
75
+ }
76
+ },
77
+ {
78
+ type: 'function',
79
+ function: {
80
+ name: 'screenshot_and_describe',
81
+ description: 'Screenshot a URL and get AI visual analysis. Use after building any web app to verify it looks correct before reporting done. Set send_to_user:true to show the screenshot to the user in chat.',
82
+ parameters: { type: 'object', properties: {
83
+ url: { type: 'string', description: 'URL to screenshot (e.g. http://localhost:3458)' },
84
+ check_for: { type: 'string', description: 'What should be visible (e.g. "snake game with canvas, scoreboard, and game controls")' },
85
+ send_to_user: { type: 'boolean', description: 'Send screenshot image to user in chat (true when confirmed working)' }
86
+ }, required: ['url'] }
109
87
  }
110
88
  }
111
89
  ];
112
90
 
113
- // Build the <tools> XML block to embed in the system prompt.
114
- // Ollama's `tools` API parameter is broken for qwen3 models (malformed JSON in the prompt).
115
- // The reliable fix is to embed tool definitions directly in the system prompt as XML.
116
- const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t)).join('\n')}\n</tools>`;
91
+ // Minimal <tools> XML for system prompt one compact JSON per line, no outer array.
92
+ const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
117
93
 
118
94
  /**
119
95
  * Parse <tool_call>...</tool_call> blocks from streamed content.
120
- * qwen3-vl native format: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
96
+ * Some models emit: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
121
97
  * Returns array of {name, arguments} or null if no complete tool calls found.
122
98
  */
123
99
  function _parseToolCallTags(content) {
@@ -135,67 +111,170 @@ function _parseToolCallTags(content) {
135
111
  return calls.length > 0 ? calls : null;
136
112
  }
137
113
 
114
+ /**
115
+ * Parse WRITE_FILE code-fence format.
116
+ * Models struggle to JSON-escape large code files (unescaped quotes break JSON.parse).
117
+ * This format avoids the problem: path on the first line, raw content in a code fence.
118
+ *
119
+ * Accepted formats:
120
+ * WRITE_FILE /abs/path/to/file.js
121
+ * ```
122
+ * ...raw content, no escaping needed...
123
+ * ```
124
+ *
125
+ * write_file: /abs/path/to/file.js
126
+ * ```javascript
127
+ * ...content...
128
+ * ```
129
+ *
130
+ * Returns array of {name, arguments} or null if no matches found.
131
+ */
132
+ function _parseWriteFileFences(content) {
133
+ if (!content) return null;
134
+ const calls = [];
135
+ // Match WRITE_FILE <path> or write_file: <path> followed by a code fence
136
+ const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
137
+ let m;
138
+ while ((m = re.exec(content)) !== null) {
139
+ const filePath = m[1].trim();
140
+ const fileContent = m[2]; // raw content, no unescaping needed
141
+ if (filePath && fileContent !== undefined) {
142
+ calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
143
+ }
144
+ }
145
+ return calls.length > 0 ? calls : null;
146
+ }
147
+
148
+ /**
149
+ * Fallback: parse "Writing filename...\n```lang\ncontent\n```" code blocks.
150
+ * Many local models ignore the WRITE_FILE instruction and use raw markdown blocks.
151
+ * Extract the filename from the "Writing X..." line and write the file to the project dir.
152
+ * Project dir is inferred from the most recent "mkdir -p /path" in the content.
153
+ */
154
+ function _parseWritingFallback(content, workDir) {
155
+ if (!content) return null;
156
+ const calls = [];
157
+
158
+ // Infer project dir from last mkdir -p command in the stream
159
+ let projectDir = workDir;
160
+ const mkdirMatches = [...content.matchAll(/mkdir\s+-p\s+"?([^"\n]+)"?/g)];
161
+ if (mkdirMatches.length > 0) {
162
+ const lastMkdir = mkdirMatches[mkdirMatches.length - 1];
163
+ const candidate = lastMkdir[1].trim().replace(/~/, process.env.HOME || '/tmp');
164
+ if (candidate && !candidate.includes('$')) projectDir = candidate;
165
+ }
166
+
167
+ // Match: "Writing filename...\n```lang\ncontent\n```"
168
+ const re = /Writing\s+([\w./\-]+?)(?:\.{3})?\s*\n```[^\n]*\n([\s\S]*?)```(?:\n|$)/gi;
169
+ let m;
170
+ while ((m = re.exec(content)) !== null) {
171
+ const filename = m[1].trim();
172
+ const fileContent = m[2];
173
+ if (!filename || fileContent === undefined) continue;
174
+ // Skip if this is just a status echo with no real code
175
+ if (fileContent.trim().length < 5) continue;
176
+ const filePath = filename.startsWith('/') ? filename : `${projectDir}/${filename}`;
177
+ calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
178
+ }
179
+ return calls.length > 0 ? calls : null;
180
+ }
181
+
138
182
  /**
139
183
  * Detect text-based tool calls from model content.
140
- * qwen3-vl:8b outputs tool calls as JSON in content rather than tool_calls field.
184
+ * Models that don't use native tool_calls emit JSON in their text content instead.
141
185
  * Supports two schemas:
142
186
  * - {name, arguments} (OpenAI-style)
143
- * - {tool, args} (qwen3 native style)
144
- * Supports both compact (one JSON per line) and pretty-printed multi-line JSON blocks.
145
- * Returns array of {name, arguments} if content is ONLY tool calls, else null.
187
+ * - {tool, args} (alternate style)
188
+ * Supports:
189
+ * - Pure JSON (whole content is one or more JSON objects)
190
+ * - Mixed: "Status line\n{json}" — narration before the tool call JSON
191
+ * Returns array of {name, arguments} if any tool calls found, else null.
146
192
  */
147
193
  function _parseTextToolCalls(content) {
148
194
  if (!content) return null;
149
195
  const trimmed = content.trim();
150
- if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return null;
196
+ if (!trimmed) return null;
151
197
 
152
198
  // Normalise a single parsed object into {name, arguments}
199
+ // Handles multiple schemas models may emit:
200
+ // {name, arguments} — OpenAI-style (correct)
201
+ // {tool, args} — alternate native style
202
+ // {action:"write_file", path, content} — model shorthand
203
+ // {action:"bash", command} — model shorthand
204
+ // {action:"read_file", path} — model shorthand
153
205
  const normalise = (obj) => {
154
- if (typeof obj.name === 'string' && obj.arguments !== undefined) {
155
- const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
156
- return { name: obj.name, arguments: args };
157
- }
158
- if (typeof obj.tool === 'string' && obj.args !== undefined) {
159
- return { name: obj.tool, arguments: obj.args };
160
- }
206
+ try {
207
+ if (typeof obj.name === 'string' && obj.arguments !== undefined) {
208
+ const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
209
+ return { name: obj.name, arguments: args };
210
+ }
211
+ if (typeof obj.tool === 'string' && obj.args !== undefined) {
212
+ return { name: obj.tool, arguments: obj.args };
213
+ }
214
+ // Handle {action, ...} shorthand the model sometimes emits
215
+ if (typeof obj.action === 'string') {
216
+ const action = obj.action.toLowerCase().replace(/[ -]/g, '_');
217
+ // Map common action names to tool names
218
+ const toolName = action === 'write' ? 'write_file'
219
+ : action === 'read' ? 'read_file'
220
+ : action === 'list' ? 'list_directory'
221
+ : action === 'run' || action === 'execute' || action === 'exec' ? 'bash'
222
+ : action; // use as-is (write_file, bash, read_file, etc.)
223
+ const args = {};
224
+ if (obj.path !== undefined) args.path = obj.path;
225
+ if (obj.content !== undefined) args.content = obj.content;
226
+ if (obj.command !== undefined) args.command = obj.command;
227
+ if (obj.url !== undefined) args.url = obj.url;
228
+ if (obj.target !== undefined) args.target = obj.target;
229
+ if (Object.keys(args).length > 0) return { name: toolName, arguments: args };
230
+ }
231
+ } catch {}
161
232
  return null;
162
233
  };
163
234
 
164
- // Try parsing the whole content as a single JSON object/array
165
- try {
166
- const obj = JSON.parse(trimmed);
167
- if (Array.isArray(obj)) {
168
- const calls = obj.map(normalise);
169
- if (calls.every(Boolean)) return calls;
170
- return null;
171
- }
172
- const call = normalise(obj);
173
- if (call) return [call];
174
- return null;
175
- } catch {}
176
-
177
- // Try extracting multiple top-level JSON objects (separated by newlines/whitespace)
235
+ // Extract all JSON objects that start at the beginning of a line
236
+ // This handles both pure-JSON responses and "narration\n{json}" mixed responses
178
237
  const calls = [];
238
+ const lines = trimmed.split('\n');
179
239
  let i = 0;
180
- while (i < trimmed.length) {
181
- // Skip whitespace/newlines between objects
182
- while (i < trimmed.length && /\s/.test(trimmed[i])) i++;
183
- if (i >= trimmed.length) break;
184
- if (trimmed[i] !== '{') return null; // Non-JSON between objects bail
185
- // Find matching closing brace
186
- let depth = 0, j = i;
187
- while (j < trimmed.length) {
188
- if (trimmed[j] === '{') depth++;
189
- else if (trimmed[j] === '}') { depth--; if (depth === 0) { j++; break; } }
190
- j++;
240
+ while (i < lines.length) {
241
+ const line = lines[i].trim();
242
+ if (line.startsWith('{') || line.startsWith('[')) {
243
+ // Accumulate lines until we have a complete JSON object (handles multi-line JSON)
244
+ // Skips { } [ ] inside JSON strings so CSS/HTML brace counts don't confuse the parser.
245
+ let jsonStr = '';
246
+ let depth = 0;
247
+ while (i < lines.length) {
248
+ const l = lines[i];
249
+ jsonStr += (jsonStr ? '\n' : '') + l;
250
+ let inString = false, escape = false;
251
+ for (const ch of l) {
252
+ if (escape) { escape = false; continue; }
253
+ if (ch === '\\' && inString) { escape = true; continue; }
254
+ if (ch === '"') { inString = !inString; continue; }
255
+ if (!inString) {
256
+ if (ch === '{' || ch === '[') depth++;
257
+ else if (ch === '}' || ch === ']') depth--;
258
+ }
259
+ }
260
+ i++;
261
+ if (depth === 0 && jsonStr.trim()) break;
262
+ }
263
+ try {
264
+ const obj = JSON.parse(jsonStr.trim());
265
+ if (Array.isArray(obj)) {
266
+ for (const item of obj) {
267
+ const call = normalise(item);
268
+ if (call) calls.push(call);
269
+ }
270
+ } else {
271
+ const call = normalise(obj);
272
+ if (call) calls.push(call);
273
+ }
274
+ } catch {}
275
+ } else {
276
+ i++;
191
277
  }
192
- try {
193
- const obj = JSON.parse(trimmed.slice(i, j));
194
- const call = normalise(obj);
195
- if (!call) return null;
196
- calls.push(call);
197
- i = j;
198
- } catch { return null; }
199
278
  }
200
279
  return calls.length > 0 ? calls : null;
201
280
  }
@@ -255,13 +334,13 @@ export class OllamaAgent extends EventEmitter {
255
334
  return { agentId, workDir };
256
335
  }
257
336
 
258
- async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null) {
337
+ async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
259
338
  const startTime = Date.now();
260
339
  const controller = new AbortController();
261
340
 
262
341
  // Use per-agent model override if provided (and not the placeholder 'Default').
263
- // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/qwen3-vl:8b' but
264
- // Ollama's API expects bare names like 'qwen3-vl:8b'.
342
+ // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
343
+ // Ollama's API expects bare names like 'modelname:tag'.
265
344
  const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
266
345
  const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
267
346
 
@@ -273,69 +352,153 @@ export class OllamaAgent extends EventEmitter {
273
352
  console.log(` Task: ${task}`);
274
353
  console.log(` Working dir: ${workDir}`);
275
354
 
276
- // Detect model capabilities
277
- const isQwen3 = effectiveModel.startsWith('qwen3');
278
- const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(effectiveModel);
279
-
280
355
  try {
281
- // Load conversation history from disk (session persistence)
282
- const history = this._loadHistory(agentId, workDir, sessionId);
283
-
284
- // For qwen3 models: embed tool definitions in the system prompt.
285
- // Ollama's `tools` API param is broken for qwen3 (malformed JSON sent to model).
286
- // Embedding as XML matches the model's native Hermes-style chat template.
287
- const toolsBlock = isQwen3 ? `\n\n${TOOLS_XML}\n\nFor each tool call, output ONLY a <tool_call> block with no surrounding text:\n<tool_call>\n{"name": "<tool_name>", "arguments": {<args>}}\n</tool_call>` : '';
288
-
289
- const systemPrompt = [
290
- isQwen3 ? '/no_think' : null,
291
- `You are an AI agent running on AgentForge.ai.`,
292
- `Your working directory is: ${workDir}`,
293
- ``,
294
- `CRITICAL RULES:`,
295
- `1. Use tools to act. Do NOT describe steps or write code blocks — call the actual tool.`,
296
- `2. bash = run shell commands. write_file = write files. read_file = read files. take_screenshot = screenshot.`,
297
- `3. For conversational messages (greetings, casual chat) — respond with plain text. No tools needed.`,
298
- `4. Do not ask for clarification make your best judgment and act immediately.`,
299
- `5. After completing work, write a brief summary of what you did.`,
300
- toolsBlock,
301
- ].filter(Boolean).join('\n');
356
+ // Load conversation history prefer Railway DB history (sent via task payload, works across
357
+ // any machine/user/model). Fall back to local file for offline or pre-fix sessions.
358
+ const history = (conversationHistory && conversationHistory.length > 0)
359
+ ? conversationHistory.slice(-20)
360
+ : this._loadHistory(agentId, workDir, sessionId);
361
+
362
+ // Text-based tool format is used rather than XML schemas more reliable across models.
363
+ // Use flow's custom system prompt if provided, otherwise fall back to built-in default.
364
+ // ALL models get the same rule set and tool format — no model-specific branching.
365
+ const homeDir = process.env.HOME || '/tmp';
366
+ const projectsDir = `${homeDir}/Desktop/Projects`;
367
+ const universalRules = `
368
+ == WHAT YOU CAN DO ==
369
+ You have these tools:
370
+
371
+ bash: Run any shell command file ops, servers, packages, logs, system queries.
372
+ read_file: Read a local file.
373
+ WRITE_FILE: Write a local file (code-fence format only).
374
+ list_directory: List a local directory.
375
+ web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
376
+ screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
377
+ browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
378
+
379
+ BROWSER TOOL — use this instead of writing CDP scripts:
380
+ {"name":"browser","arguments":{"action":"tabs"}} ← list ALL open tabs with URLs (DO THIS FIRST)
381
+ {"name":"browser","arguments":{"action":"snapshot"}} ← read current page content + interactive elements (also shows all tabs)
382
+ {"name":"browser","arguments":{"action":"navigate","url":"https://..."}} ← go to URL
383
+ {"name":"browser","arguments":{"action":"focus","url":"expireddomains"}} ← switch to a tab by URL fragment
384
+ {"name":"browser","arguments":{"action":"click","ref":3}} ← click element by index from snapshot
385
+ {"name":"browser","arguments":{"action":"click","text":"Show Filter"}} ← click element by visible text
386
+ {"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}} ← click by CSS selector
387
+ {"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
388
+ {"name":"browser","arguments":{"action":"screenshot"}} ← take screenshot
389
+ {"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
390
+ {"name":"browser","arguments":{"action":"scroll","y":400}} ← scroll down
391
+
392
+ WORKFLOW when user says "the tab is already open":
393
+ 1. browser tabs → see ALL open tabs and their URLs
394
+ 2. browser focus with the URL fragment of the tab you need (e.g. "expireddomains")
395
+ 3. browser snapshot → read page content and get element indices
396
+ 4. browser click to interact (by ref index, by text, or by selector)
397
+ 5. browser snapshot again to see result
398
+ The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
399
+
400
+ == GENERAL RULES (all tasks) ==
401
+ G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
402
+ G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
403
+ G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
404
+ G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
405
+ G5. IF A TOOL FAILS: Try a different approach. web_fetch empty → screenshot_and_describe. Never repeat a failing call identically.
406
+ G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
407
+ G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
408
+ G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
409
+ G9. KEEP GOING until the task is fully complete.
410
+
411
+ == BUILD RULES (only when building apps/games/tools) ==
412
+ B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores).
413
+ B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
414
+ B3. BUILD FILE BY FILE — write each file completely before writing the next.
415
+ B4. ALWAYS use absolute paths.
416
+ B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
417
+ B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
418
+ B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT — if 000, check /tmp/server.log and fix the error.
419
+ B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
420
+ B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
421
+ B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
422
+ B11. ALWAYS open the finished app: bash open http://localhost:PORT
423
+ B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
424
+ B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
425
+ B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
426
+ B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
427
+ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
428
+ // Text-based tool format works reliably across all local models.
429
+ // WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
430
+ const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
431
+ const systemPrompt = customSystemPrompt || jsonToolFormat;
302
432
 
303
433
  const messages = [
304
434
  { role: 'system', content: systemPrompt },
305
435
  ...history,
306
436
  ];
307
437
 
308
- // Attach initial image to user message if provided
438
+ // Attach initial image if provided always include it; models that don't support
439
+ // images will ignore the field, and if they error we catch it below.
309
440
  const userMessage = { role: 'user', content: task };
310
- if (image && isVision) {
441
+ if (image) {
311
442
  const base64 = image.replace(/^data:image\/\w+;base64,/, '');
312
443
  userMessage.images = [base64];
313
444
  }
314
445
  messages.push(userMessage);
315
446
 
447
+ // Force-unload any currently loaded model so it reloads with our num_ctx setting.
448
+ // Model-agnostic and machine-agnostic — guarantees 32K context on every task.
449
+ try {
450
+ await fetch(`${this.baseUrl}/api/generate`, {
451
+ method: 'POST', signal: controller.signal,
452
+ headers: { 'Content-Type': 'application/json' },
453
+ body: JSON.stringify({ model: effectiveModel, keep_alive: 0, prompt: '' })
454
+ });
455
+ } catch { /* ignore — model may not be loaded yet */ }
456
+
316
457
  let finalContent = '';
317
458
  let allOutput = ''; // accumulate everything streamed across all turns
318
459
  const toolsUsed = []; // track tool names called (for fallback summary)
319
- const MAX_TURNS = 15; // reduce from 25 local models get stuck in tool loops
460
+ // No hard turn limit agent runs until done, loop-detected, or wall-clock timeout.
461
+ const recentCalls = []; // last N tool calls for loop detection
462
+ let emptyRetries = 0; // consecutive empty-response retries
320
463
 
321
- for (let turn = 0; turn < MAX_TURNS; turn++) {
464
+ for (let turn = 0; ; turn++) {
322
465
  if (controller.signal.aborted) break;
323
466
 
324
467
  this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
325
468
 
469
+ // All local Ollama models use the native /api/chat endpoint.
470
+ // The OpenAI-compatible /v1/chat/completions endpoint ignores options.num_ctx,
471
+ // causing all models to run at 4096-token context regardless of what we pass.
472
+ const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
473
+ const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
474
+
326
475
  let response;
327
476
  try {
328
- const requestBody = {
329
- model: effectiveModel,
330
- messages,
331
- stream: true,
332
- // qwen3: tools embedded in system prompt — do NOT pass tools param (broken in Ollama)
333
- // Other models: pass tools normally via API
334
- ...(!isQwen3 ? { tools: TOOL_DEFS, tool_choice: 'auto' } : {}),
335
- ...(isQwen3 ? { options: { think: false } } : {}),
336
- };
337
-
338
- response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
477
+
478
+ let requestBody;
479
+ let endpoint;
480
+
481
+ if (useNativeEndpoint) {
482
+ // Ollama native format supports think:false at top level
483
+ endpoint = `${this.baseUrl}/api/chat`;
484
+ requestBody = {
485
+ model: effectiveModel,
486
+ messages,
487
+ stream: true,
488
+ think: false, // top-level think disable — WORKS on native endpoint
489
+ options: { num_ctx: 32768 },
490
+ };
491
+ } else {
492
+ endpoint = `${this.baseUrl}/v1/chat/completions`;
493
+ requestBody = {
494
+ model: effectiveModel,
495
+ messages,
496
+ stream: true,
497
+ options: { num_ctx: 32768 },
498
+ };
499
+ }
500
+
501
+ response = await fetch(endpoint, {
339
502
  method: 'POST',
340
503
  headers: { 'Content-Type': 'application/json' },
341
504
  signal: controller.signal,
@@ -351,21 +514,30 @@ export class OllamaAgent extends EventEmitter {
351
514
  throw new Error(`Local model error ${response.status}: ${body}`);
352
515
  }
353
516
 
354
- // ── Stream the SSE response ──
355
- // For qwen3: model emits text tokens including <tool_call>...</tool_call> blocks.
356
- // Stream text live to user, but suppress content inside <tool_call> tags.
357
- // For other models: also handle delta.tool_calls in the standard OpenAI format.
358
- let streamContent = ''; // full accumulated text (including tool_call tags for qwen3)
517
+ // ── Stream the response ──
518
+ // Two formats:
519
+ // Ollama native (/api/chat): NDJSON lines {"message":{"content":"..."},"done":false}
520
+ // OpenAI-compatible (/v1/...): SSE lines — data: {"choices":[{"delta":{"content":"..."}}]}
521
+ // Models may emit <tool_call>...</tool_call> or <think>...</think> blocks in text content.
522
+ // Stream text live to user; suppress think blocks and raw JSON tool call blobs.
523
+ let streamContent = ''; // full accumulated text (including any tool_call/think blocks)
359
524
  let visibleContent = ''; // text emitted live to user (no tool_call or think blocks)
360
- let streamToolCalls = {}; // OpenAI-format tool calls (non-qwen3 models)
525
+ let streamToolCalls = {}; // OpenAI-format tool calls from native tool_calls field
361
526
  let inThinkBlock = false;
362
527
  let inToolCallBlock = false; // inside <tool_call>...</tool_call>
528
+ let inJsonBlob = false; // inside bare JSON tool call — suppress from streaming
529
+ let inFenceBlock = false; // inside WRITE_FILE code fence — suppress content from streaming
530
+ let fenceDepth = 0; // ``` count since last WRITE_FILE (even=closed, odd=open)
363
531
  let rawTokenCount = 0;
532
+ let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
364
533
 
365
534
  const reader = response.body.getReader();
366
535
  const decoder = new TextDecoder();
367
536
  let buf = '';
368
537
 
538
+ // No timeouts — local model can take as long as it needs on any turn.
539
+ // Only the user abort (controller.signal) or stream end stops a turn.
540
+ let turnRetry = false;
369
541
  while (true) {
370
542
  if (controller.signal.aborted) break;
371
543
  const { done, value } = await reader.read();
@@ -376,33 +548,47 @@ export class OllamaAgent extends EventEmitter {
376
548
  buf = lines.pop();
377
549
 
378
550
  for (const line of lines) {
379
- if (!line.startsWith('data: ')) continue;
380
- const payload = line.slice(6).trim();
381
- if (payload === '[DONE]') continue;
382
- let evt;
383
- try { evt = JSON.parse(payload); } catch { continue; }
384
-
385
- const delta = evt.choices?.[0]?.delta;
386
- if (!delta) continue;
387
-
388
- // Standard OpenAI tool_calls (non-qwen3 models)
389
- if (delta.tool_calls) {
390
- for (const tc of delta.tool_calls) {
391
- const idx = tc.index ?? 0;
392
- if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
393
- if (tc.id) streamToolCalls[idx].id = tc.id;
394
- if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
395
- if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
551
+ if (!line.trim()) continue;
552
+
553
+ let tokenText = null;
554
+
555
+ if (useNativeEndpoint) {
556
+ // Ollama native NDJSON format
557
+ let nativeEvt;
558
+ try { nativeEvt = JSON.parse(line); } catch { continue; }
559
+ if (nativeEvt.done) continue;
560
+ tokenText = nativeEvt.message?.content ?? null;
561
+ } else {
562
+ // OpenAI SSE format
563
+ if (!line.startsWith('data: ')) continue;
564
+ const payload = line.slice(6).trim();
565
+ if (payload === '[DONE]') continue;
566
+ let evt;
567
+ try { evt = JSON.parse(payload); } catch { continue; }
568
+
569
+ const delta = evt.choices?.[0]?.delta;
570
+ if (!delta) continue;
571
+
572
+ // Standard OpenAI tool_calls from native tool_calls field
573
+ if (delta.tool_calls) {
574
+ for (const tc of delta.tool_calls) {
575
+ const idx = tc.index ?? 0;
576
+ if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
577
+ if (tc.id) streamToolCalls[idx].id = tc.id;
578
+ if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
579
+ if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
580
+ }
396
581
  }
582
+ tokenText = delta.content ?? null;
397
583
  }
398
584
 
399
- if (!delta.content) continue;
585
+ if (tokenText === null) continue;
400
586
  rawTokenCount++;
401
- streamContent += delta.content;
587
+ streamContent += tokenText;
402
588
 
403
589
  // Process token through think + tool_call filters, emit visible text live
404
590
  // We scan only the new delta token against the current buffer state
405
- const chunk = delta.content;
591
+ const chunk = tokenText;
406
592
  let visible = '';
407
593
  // Simple per-token state machine — handles split tags across tokens by tracking state flags
408
594
  if (!inThinkBlock && !inToolCallBlock) {
@@ -428,9 +614,59 @@ export class OllamaAgent extends EventEmitter {
428
614
  inToolCallBlock = false;
429
615
  }
430
616
 
431
- if (visible && !inThinkBlock && !inToolCallBlock) {
432
- visibleContent += visible;
433
- this.emit('agent_output', { agentId, output: visible, isChunk: true });
617
+ // Scan ALL lines completed in this token for state transitions.
618
+ // Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
619
+ if (tokenText.includes('\n')) {
620
+ const tokenStartIdx = streamContent.length - tokenText.length;
621
+ let nlIdx = streamContent.indexOf('\n', tokenStartIdx);
622
+ while (nlIdx !== -1) {
623
+ const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
624
+ const line = streamContent.slice(lineStart, nlIdx).trim();
625
+ if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
626
+ inFenceBlock = true; fenceDepth = 0;
627
+ } else if (inFenceBlock && /^```/.test(line)) {
628
+ fenceDepth++;
629
+ if (fenceDepth >= 2 && fenceDepth % 2 === 0) inFenceBlock = false;
630
+ } else if (!inFenceBlock && !inJsonBlob && line.length > 1 && (line.startsWith('{') || line.startsWith('['))) {
631
+ inJsonBlob = true;
632
+ }
633
+ nlIdx = streamContent.indexOf('\n', nlIdx + 1);
634
+ }
635
+ }
636
+
637
+ // Also check current partial line (mid-token, before next \n)
638
+ if (!inFenceBlock || !inJsonBlob) {
639
+ const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
640
+ const lastNL = cleanSC.lastIndexOf('\n');
641
+ const curLine = cleanSC.slice(lastNL + 1).trimStart();
642
+ if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
643
+ inFenceBlock = true; fenceDepth = 0;
644
+ }
645
+ if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
646
+ inJsonBlob = true;
647
+ }
648
+ }
649
+
650
+ // Emit visible content — safety filter removes any ``` or WRITE_FILE lines
651
+ // that slipped through (e.g. partial token at detection boundary)
652
+ if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
653
+ const safe = visible.split('\n').filter(ln => {
654
+ const t = ln.trimStart();
655
+ return !t.startsWith('```') && !/^(WRITE_FILE|write_file)/i.test(t);
656
+ }).join('\n');
657
+ if (safe.trim() || safe.includes('\n')) {
658
+ visibleContent += safe;
659
+ lastVisibleAt = Date.now();
660
+ this.emit('agent_output', { agentId, output: safe, isChunk: true });
661
+ }
662
+ }
663
+
664
+ // Thinking timeout: if the model has been in a <think> block for >90s with no visible output,
665
+ // abort the stream so we can retry with a kick. Prevents infinite thinking loops.
666
+ if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
667
+ console.log(` [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
668
+ reader.cancel().catch(() => {});
669
+ break;
434
670
  }
435
671
  }
436
672
  }
@@ -439,17 +675,35 @@ export class OllamaAgent extends EventEmitter {
439
675
  if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
440
676
 
441
677
  // ── Extract tool calls from content ───────────────────────────────────
442
- // For qwen3: parse <tool_call> XML tags from full streamed content.
443
- // For others: use API-level tool_calls already accumulated above.
678
+ // Try <tool_call> XML tags first (some models emit this format), then fall through
679
+ // to code-fence and JSON text parsers.
444
680
  let parsedTagCalls = null;
445
- if (isQwen3 && Object.keys(streamToolCalls).length === 0) {
681
+ if (Object.keys(streamToolCalls).length === 0) {
446
682
  parsedTagCalls = _parseToolCallTags(streamContent);
447
683
  if (parsedTagCalls) {
448
684
  console.log(` [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
449
685
  }
450
686
  }
451
687
 
452
- // Fallback: try legacy JSON-blob detection if no tags found
688
+ // Fallback 1: try WRITE_FILE code-fence format (avoids JSON-escaping issues with code)
689
+ if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
690
+ const fenceCalls = _parseWriteFileFences(streamContent);
691
+ if (fenceCalls) {
692
+ console.log(` [${agentId}] 🔍 ${fenceCalls.length} WRITE_FILE fence(s) detected`);
693
+ parsedTagCalls = fenceCalls;
694
+ }
695
+ }
696
+
697
+ // Fallback 2: "Writing filename...\n```\ncontent\n```" (model ignored WRITE_FILE instruction)
698
+ if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
699
+ const writingCalls = _parseWritingFallback(streamContent, workDir);
700
+ if (writingCalls) {
701
+ console.log(` [${agentId}] 🔍 ${writingCalls.length} Writing-block fallback file(s) detected`);
702
+ parsedTagCalls = writingCalls;
703
+ }
704
+ }
705
+
706
+ // Fallback 3: try legacy JSON-blob detection if no tags found
453
707
  if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
454
708
  const textCalls = _parseTextToolCalls(streamContent);
455
709
  if (textCalls) {
@@ -458,9 +712,35 @@ export class OllamaAgent extends EventEmitter {
458
712
  }
459
713
  }
460
714
 
461
- // Convert tag/text calls into streamToolCalls structure
715
+ // Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
716
+ // merge them so files get written AND bash runs
717
+ if (parsedTagCalls && streamContent) {
718
+ const writingCalls = _parseWritingFallback(streamContent, workDir);
719
+ if (writingCalls) {
720
+ const existingPaths = new Set(parsedTagCalls.filter(c => c.name === 'write_file').map(c => c.arguments.path));
721
+ const newWrites = writingCalls.filter(c => !existingPaths.has(c.arguments.path));
722
+ if (newWrites.length > 0) {
723
+ console.log(` [${agentId}] 🔍 +${newWrites.length} additional Writing-block file(s) merged`);
724
+ // Prepend file writes before bash commands so files exist before server starts
725
+ parsedTagCalls = [...newWrites, ...parsedTagCalls];
726
+ }
727
+ }
728
+ }
729
+
730
+ // Convert tag/text calls into streamToolCalls structure.
731
+ // Deduplicate: if model emits the same tool call N times in one stream, only run it once.
462
732
  if (parsedTagCalls) {
463
- parsedTagCalls.forEach((tc, i) => {
733
+ const seen = new Set();
734
+ const deduped = parsedTagCalls.filter(tc => {
735
+ const key = `${tc.name}:${JSON.stringify(tc.arguments)}`;
736
+ if (seen.has(key)) return false;
737
+ seen.add(key);
738
+ return true;
739
+ });
740
+ if (deduped.length < parsedTagCalls.length) {
741
+ console.log(` [${agentId}] 🔁 Deduplicated ${parsedTagCalls.length} → ${deduped.length} tool call(s)`);
742
+ }
743
+ deduped.forEach((tc, i) => {
464
744
  streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
465
745
  });
466
746
  // Don't accumulate raw tool_call XML as user-visible output
@@ -475,17 +755,17 @@ export class OllamaAgent extends EventEmitter {
475
755
  });
476
756
 
477
757
  // ── Push assistant message ────────────────────────────────────────────
758
+ // All local models now use JSON-in-text format on the native endpoint.
759
+ // Strip <think>...</think> blocks to avoid burning context on reasoning traces.
478
760
  const toolCallsArray = Object.values(streamToolCalls);
479
- if (isQwen3) {
480
- // qwen3: assistant message is the raw streamed content (includes <tool_call> tags)
481
- messages.push({ role: 'assistant', content: streamContent || '' });
482
- } else {
483
- messages.push({
484
- role: 'assistant',
485
- content: visibleContent || null,
486
- tool_calls: toolCallsArray.length > 0 ? toolCallsArray : undefined
487
- });
488
- }
761
+ const hasToolCalls = toolCallsArray.length > 0;
762
+ const cleanedContent = (streamContent || '')
763
+ .replace(/<think>[\s\S]*?<\/think>/g, '')
764
+ .trim();
765
+ messages.push({ role: 'assistant', content: cleanedContent || '' });
766
+
767
+ // Incremental save — always, regardless of sessionId (sessionId is null for OllamaAgent)
768
+ this._saveHistory(agentId, workDir, sessionId, messages.slice(1));
489
769
 
490
770
  // ── Execute tool calls ────────────────────────────────────────────────
491
771
  if (toolCallsArray.length > 0) {
@@ -497,14 +777,95 @@ export class OllamaAgent extends EventEmitter {
497
777
  try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
498
778
  catch { parsedArgs = {}; }
499
779
 
780
+ // ── Unknown tool name detection ──────────────────────────────────
781
+ // Block calls to tools that don't exist (e.g. model writes {"name":"curl",...}
782
+ // instead of {"name":"bash","arguments":{"command":"curl ..."}})
783
+ const VALID_TOOL_NAMES = new Set(['bash','read_file','write_file','list_directory','web_fetch','screenshot_and_describe','take_screenshot','browser']);
784
+ if (!VALID_TOOL_NAMES.has(name.toLowerCase())) {
785
+ console.log(` [${agentId}] ⚠️ Unknown tool "${name}" — blocked`);
786
+ messages.push({ role: 'user', content: `"${name}" is not a valid tool. Valid tools: bash, read_file, write_file, list_directory, web_fetch, screenshot_and_describe. To run a shell command use bash: {"name":"bash","arguments":{"command":"${name} ..."}}.` });
787
+ continue;
788
+ }
789
+
790
+ // ── Placeholder detection ────────────────────────────────────────
791
+ // Block tool calls where the agent passed a literal placeholder like
792
+ // "[The URL where the auction is being viewed]" instead of a real value.
793
+ // These come from the model reading its own planning text and mistaking it
794
+ // for a concrete argument.
795
+ {
796
+ const argStr = JSON.stringify(parsedArgs);
797
+ const hasPlaceholder = /\[(the |this |your |a |an |current )?(url|path|address|link|tab|page|site|location|file|directory)[^\]]*\]/i.test(argStr);
798
+ if (hasPlaceholder) {
799
+ console.log(` [${agentId}] ⚠️ Placeholder in args — blocked: ${argStr.slice(0, 120)}`);
800
+ messages.push({ role: 'user', content: `Tool call BLOCKED: your argument contains a placeholder "${argStr.slice(0, 100)}" — that is NOT a real URL or path. Look at the tool results already in the conversation (e.g. the curl localhost:9223/json output) and use the actual URL you found there.` });
801
+ continue;
802
+ }
803
+ }
804
+
500
805
  this.emit('tool_activity', {
501
806
  agentId, event: 'tool_start', tool: name,
502
807
  description: this._toolDesc(name, parsedArgs)
503
808
  });
504
809
  console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
505
810
  toolsUsed.push(name);
811
+ emptyRetries = 0; // reset on successful tool call
812
+
813
+ // Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
814
+ // Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
815
+ // "sleep 10 && curl ...URL" both map to the same key "curl:URL".
816
+ let callKey = `${name}:${JSON.stringify(parsedArgs)}`;
817
+ if (name === 'bash' && parsedArgs.command) {
818
+ const curlMatch = parsedArgs.command.match(/curl\s+.*?(https?:\/\/\S+|localhost:\d+)/);
819
+ if (curlMatch) callKey = `curl:${curlMatch[1]}`;
820
+ }
821
+ recentCalls.push(callKey);
822
+ if (recentCalls.length > 6) recentCalls.shift();
823
+
824
+ // Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
825
+ const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
826
+ const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
827
+ // Detect: alternating A,B,A,B pattern (last 4 calls)
828
+ const last4 = recentCalls.slice(-4);
829
+ const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
830
+ // Detect: A,B,C,A,B,C pattern (last 6)
831
+ const last6 = recentCalls.slice(-6);
832
+ const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
833
+
834
+ if (last3Same || abab || abcabc) {
835
+ const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
836
+ console.log(` [${agentId}] 🔁 Loop detected (${pattern}) — injecting fix hint`);
837
+ // Generate a context-aware hint based on what's looping
838
+ let loopFixMsg = `You are repeating the same action — STOP looping. Observe first, then act.\n`;
839
+ const loopCmd = parsedArgs.command || parsedArgs.path || '';
840
+ const noThink = '';
841
+ if (name === 'write_file') {
842
+ loopFixMsg += `You keep rewriting the same file. The file already exists with your previous code. Do NOT rewrite it from scratch.\nInstead:\n1. call screenshot_and_describe to SEE what the app looks like right now\n2. Identify the specific thing that is wrong or missing\n3. read_file the file to see current content\n4. Make a TARGETED edit — change only the specific broken section\nNever rewrite an entire file when the server is already running.`;
843
+ } else if (loopCmd.includes('mkdir') || loopCmd.includes('client')) {
844
+ loopFixMsg += `Files/folders already exist. STOP creating them. Call screenshot_and_describe to see the current state of the app, then identify what specifically needs to be improved and fix it with targeted edits.`;
845
+ } else if (loopCmd.includes('open http')) {
846
+ const openPortMatch = loopCmd.match(/:(\d+)/);
847
+ const openPort = openPortMatch ? openPortMatch[1] : '????';
848
+ loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
849
+ } else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
850
+ loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
851
+ } else if (loopCmd.includes('npm install')) {
852
+ loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
853
+ } else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
854
+ loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n // Click element containing the text you need (change "Filter" to what you see on the page)\n ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
855
+ } else if (name === 'screenshot_and_describe') {
856
+ const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
857
+ loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedback — answer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved — make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
858
+ } else {
859
+ loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
860
+ }
861
+ loopFixMsg += noThink;
862
+ messages.push({ role: 'user', content: loopFixMsg });
863
+ // Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
864
+ recentCalls.splice(0, recentCalls.length - 1);
865
+ break; // break inner tool loop, let model respond to hint
866
+ }
506
867
 
507
- const result = await this._executeTool(name, parsedArgs, workDir);
868
+ const result = await this._executeTool(name, parsedArgs, workDir, agentId);
508
869
 
509
870
  this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
510
871
 
@@ -513,30 +874,107 @@ export class OllamaAgent extends EventEmitter {
513
874
  this.emit('agent_image', { agentId, image: result });
514
875
  }
515
876
 
516
- if (isQwen3) {
517
- // qwen3 format: tool results go back as user messages with <tool_response> tags
518
- if (isImageResult && isVision) {
519
- const base64 = result.replace(/^data:image\/\w+;base64,/, '');
520
- messages.push({ role: 'user', content: '<tool_response>\n[Screenshot captured]\n</tool_response>', images: [base64] });
521
- } else {
522
- const resultText = isImageResult ? '[Screenshot captured — vision model needed to analyze]' : String(result).slice(0, 8000);
523
- messages.push({ role: 'user', content: `<tool_response>\n${resultText}\n</tool_response>` });
524
- }
525
- } else {
526
- // Standard OpenAI format
527
- if (isImageResult && isVision) {
528
- messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: '[Screenshot captured — see image attached]' });
877
+ // ALL models get tool results fed back — no model should run blind.
878
+ // This is the core of the observe reason act loop: every tool result
879
+ // must be in context so the model can see what happened and react correctly.
880
+ {
881
+ const noThink = '';
882
+ if (isImageResult) {
529
883
  const base64 = result.replace(/^data:image\/\w+;base64,/, '');
530
- messages.push({ role: 'user', content: 'Here is the screenshot:', images: [base64] });
884
+ messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
531
885
  } else {
532
- messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: isImageResult ? '[Screenshot captured]' : String(result).slice(0, 8000) });
886
+ const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
887
+ messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
888
+
889
+ if (name === 'screenshot_and_describe') {
890
+ const screenshotResult = String(result);
891
+ const isLocalhost = (parsedArgs.url || '').includes('localhost') || (parsedArgs.url || '').includes('127.0.0.1');
892
+ // Server unreachable on localhost — force bash restart (only for local servers, not public URLs)
893
+ if (screenshotResult.includes('SERVER IS NOT REACHABLE') && isLocalhost) {
894
+ const portMatch = (parsedArgs.url || '').match(/:(\d+)/);
895
+ const port = portMatch ? portMatch[1] : '????';
896
+ messages.push({ role: 'user', content: `The local server on port ${port} is not running. Restart it with bash — find the project directory, then: pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd /path/to/project && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${port}` });
897
+ }
898
+ // Public URL unreachable — try web_fetch instead
899
+ else if (screenshotResult.includes('SERVER IS NOT REACHABLE') && !isLocalhost) {
900
+ messages.push({ role: 'user', content: `screenshot_and_describe could not reach ${parsedArgs.url}. Try web_fetch instead:\n{"name":"web_fetch","arguments":{"url":"${parsedArgs.url}"}}` });
901
+ }
902
+ // Dependency audit issues — prevent port-hopping
903
+ else if (screenshotResult.includes('DEPENDENCY AUDIT FOUND ISSUES')) {
904
+ messages.push({ role: 'user', content: `CRITICAL: Missing client-side libraries in your HTML. Do NOT change the port. Fix it: (1) read_file the HTML; (2) add the missing script tags; (3) write_file back; (4) restart server same port; (5) screenshot to verify.` });
905
+ }
906
+ // Successful screenshot of a build task — push to make a code change
907
+ else if (isLocalhost) {
908
+ messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
909
+ }
910
+ // Successful screenshot of a public URL — agent is doing research, let it reason
911
+ }
912
+ // Catch placeholder/hello world pages — force the model to keep building
913
+ const screenshotText = String(result).toLowerCase();
914
+ const isPlaceholder = (
915
+ screenshotText.includes('hello world') ||
916
+ screenshotText.includes('cannot get /') ||
917
+ (screenshotText.includes('express') && screenshotText.includes('error')) ||
918
+ screenshotText.includes('placeholder') ||
919
+ screenshotText.includes('coming soon') ||
920
+ (screenshotText.includes('blank') && !screenshotText.includes('not blank'))
921
+ );
922
+ if (isPlaceholder) {
923
+ messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
924
+ }
533
925
  }
534
926
  }
535
927
  }
536
928
  continue; // loop back for next model turn
537
929
  }
538
930
 
539
- // ── No tool calls: final answer ───────────────────────────────────────
931
+ // ── No tool calls ────────────────────────────────────────────────────
932
+ {
933
+ const combined = (visibleContent + streamContent).replace(/<think>[\s\S]*?<\/think>/g, '');
934
+ const hasContent = combined.trim().length > 30;
935
+ const isEmpty = combined.trim().length === 0;
936
+
937
+ // Structural: truncated JSON — model started a tool call but stream ended early
938
+ const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
939
+ if (hasTruncatedJson) {
940
+ console.log(` [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
941
+ messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
942
+ continue;
943
+ }
944
+
945
+ // Structural: empty response — model produced nothing
946
+ if (isEmpty) {
947
+ if (emptyRetries < 3) {
948
+ emptyRetries++;
949
+ console.log(` [${agentId}] ⚡ Turn ${turn}: empty response (retry ${emptyRetries}/3) — kicking`);
950
+ messages.push({ role: 'user', content: toolsUsed.length === 0 ? 'Start now — make your first tool call.' : 'You stopped. Make your next tool call.' });
951
+ continue;
952
+ }
953
+ console.log(` [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
954
+ }
955
+
956
+ // Structural: agent hasn't used any tools yet — it must act before it can answer
957
+ if (toolsUsed.length === 0 && hasContent) {
958
+ console.log(` [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act`);
959
+ messages.push({ role: 'user', content: 'Make your first tool call now.' });
960
+ continue;
961
+ }
962
+
963
+ // Semantic: ask the LLM whether the task is actually complete.
964
+ // This replaces all regex-based intent detection — the model judges its own output.
965
+ if (hasContent && toolsUsed.length > 0) {
966
+ const originalTask = messages.find(m => m.role === 'user')?.content || task;
967
+ const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
968
+ if (!isDone) {
969
+ console.log(` [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
970
+ messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
971
+ continue;
972
+ }
973
+ console.log(` [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
974
+ }
975
+ }
976
+
977
+ // ── Final answer ──────────────────────────────────────────────────────
540
978
  if (visibleContent) finalContent = visibleContent;
541
979
  break;
542
980
 
@@ -555,7 +993,7 @@ export class OllamaAgent extends EventEmitter {
555
993
  ];
556
994
 
557
995
  try {
558
- const summaryRes = await fetch(`${this.baseUrl}/v1/chat/completions`, {
996
+ const summaryRes = await fetch(`${this.baseUrl}/api/chat`, {
559
997
  method: 'POST',
560
998
  headers: { 'Content-Type': 'application/json' },
561
999
  signal: controller.signal,
@@ -563,7 +1001,8 @@ export class OllamaAgent extends EventEmitter {
563
1001
  model: effectiveModel,
564
1002
  messages: summaryMessages,
565
1003
  stream: true,
566
- ...(isQwen3 ? { options: { think: false } } : {})
1004
+ think: false,
1005
+ options: { num_ctx: 32768 }
567
1006
  })
568
1007
  });
569
1008
 
@@ -606,7 +1045,7 @@ export class OllamaAgent extends EventEmitter {
606
1045
  }
607
1046
 
608
1047
  // Persist history for next task
609
- if (finalContent && sessionId) {
1048
+ if (finalContent) {
610
1049
  this._saveHistory(agentId, workDir, sessionId, [
611
1050
  ...history,
612
1051
  { role: 'user', content: task },
@@ -665,16 +1104,96 @@ export class OllamaAgent extends EventEmitter {
665
1104
 
666
1105
  // ─── Tool execution ───────────────────────────────────────────────────────
667
1106
 
668
- async _executeTool(name, args, workDir) {
1107
+ async _executeTool(name, args, workDir, agentId = 'agent') {
669
1108
  try {
670
1109
  switch (name) {
671
1110
  case 'bash': {
1111
+ // Block commands that would kill the worker process itself.
1112
+ // "pkill -f node" and "killall node" match the worker's own process.
1113
+ // Rewrite to only kill processes by their specific server log path or port.
1114
+ const cmd = args.command || '';
1115
+ if (/pkill\s+(-\w+\s+)*(-f\s+)?node\b/i.test(cmd) || /killall\s+node\b/i.test(cmd)) {
1116
+ // Safe replacement: kill only the app server on the port, not all node processes
1117
+ const portMatch = cmd.match(/localhost:(\d+)|:(\d+)/);
1118
+ const serverLogMatch = cmd.match(/server\.js/);
1119
+ if (portMatch || serverLogMatch) {
1120
+ const safeCmd = portMatch
1121
+ ? `lsof -ti:${portMatch[1] || portMatch[2]} | xargs kill -9 2>/dev/null || true`
1122
+ : `pkill -f "server.js" 2>/dev/null || true`;
1123
+ args = { ...args, command: safeCmd + cmd.slice(cmd.indexOf('&&') !== -1 ? cmd.indexOf('&&') : cmd.length) };
1124
+ } else {
1125
+ // No specific target — skip the pkill entirely, just run what follows &&
1126
+ const afterAnd = cmd.indexOf('&&');
1127
+ if (afterAnd !== -1) {
1128
+ args = { ...args, command: cmd.slice(afterAnd + 2).trim() };
1129
+ } else {
1130
+ return 'Skipped broad pkill to protect worker process. Use: lsof -ti:PORT | xargs kill -9';
1131
+ }
1132
+ }
1133
+ }
1134
+
1135
+ // Intercept "open http://..." — navigate the AgentForge CDP browser directly,
1136
+ // then auto-screenshot so the agent immediately sees what it built.
1137
+ const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
1138
+ if (openUrlMatch) {
1139
+ const targetUrl = openUrlMatch[1];
1140
+ let openedViaCDP = false;
1141
+ try {
1142
+ const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
1143
+ const newTabData = await newTabRes.json();
1144
+ const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
1145
+ await new Promise(r => tabWs.on('open', r));
1146
+ await new Promise(r => {
1147
+ let navigated = false;
1148
+ tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
1149
+ tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
1150
+ setTimeout(() => { tabWs.close(); r(); }, 3000);
1151
+ });
1152
+ openedViaCDP = true;
1153
+ } catch {
1154
+ // CDP unavailable — fall through to OS open
1155
+ try { await execAsync(`open "${targetUrl}"`); } catch {}
1156
+ }
1157
+ // Auto-screenshot after opening so the agent sees what it built.
1158
+ // Wait for page to load, then call screenshot_and_describe.
1159
+ await new Promise(r => setTimeout(r, 2500));
1160
+ try {
1161
+ const screenshotResult = await this._executeTool('screenshot_and_describe', {
1162
+ url: targetUrl,
1163
+ check_for: 'the running application',
1164
+ send_to_user: true
1165
+ }, workDir, agentId);
1166
+ return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
1167
+ } catch {
1168
+ return `Opened ${targetUrl} in browser. (Screenshot failed — verify with screenshot_and_describe)`;
1169
+ }
1170
+ }
1171
+
1172
+ // If workDir doesn't exist (e.g. /tmp was cleared after worker restart),
1173
+ // fall back to HOME rather than failing with ENOENT on every bash call.
1174
+ let bashCwd = workDir;
1175
+ const _home = process.env.HOME || '/tmp';
1176
+ try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
1177
+ // Background commands (ending with &) return no stdout — the model interprets
1178
+ // silence as failure and loops. Run them, then read back any log file to confirm.
1179
+ const isBackground = /&\s*$/.test(args.command.trim());
672
1180
  const { stdout, stderr } = await execAsync(args.command, {
673
- cwd: workDir,
674
- timeout: 60000,
1181
+ cwd: bashCwd,
1182
+ timeout: 120000,
675
1183
  maxBuffer: 1024 * 1024 * 2 // 2MB
676
1184
  });
677
- return (stdout + stderr).trim() || '(no output)';
1185
+ const out = (stdout + stderr).trim();
1186
+ if (isBackground && !out) {
1187
+ // Give the process a moment to start, then check /tmp/server.log if it exists
1188
+ await new Promise(r => setTimeout(r, 1500));
1189
+ let confirmation = 'Background process started.';
1190
+ try {
1191
+ const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
1192
+ if (logContent) confirmation = `Background process started. Server log:\n${logContent}`;
1193
+ } catch { /* no log yet */ }
1194
+ return confirmation;
1195
+ }
1196
+ return out || '(no output)';
678
1197
  }
679
1198
 
680
1199
  case 'read_file': {
@@ -721,6 +1240,26 @@ export class OllamaAgent extends EventEmitter {
721
1240
  }
722
1241
  }
723
1242
 
1243
+ case 'screenshot_and_describe': {
1244
+ const result = await this._screenshotAndDescribe(args.url, args.check_for);
1245
+ // Always send screenshot to user — agent called this tool, user should always see it
1246
+ if (this._lastScreenshotData) {
1247
+ this.emit('agent_image', { agentId, image: this._lastScreenshotData });
1248
+ this._lastScreenshotData = null;
1249
+ }
1250
+ return result;
1251
+ }
1252
+
1253
+ case 'browser': {
1254
+ const result = await browserAction(args);
1255
+ if (result && result.__screenshot) {
1256
+ const imgData = `data:image/png;base64,${result.base64}`;
1257
+ this.emit('agent_image', { agentId, image: imgData });
1258
+ return `Screenshot taken (${Math.round(result.base64.length * 0.75 / 1024)}KB). Image sent to chat.`;
1259
+ }
1260
+ return typeof result === 'string' ? result : JSON.stringify(result);
1261
+ }
1262
+
724
1263
  default:
725
1264
  return `Unknown tool: ${name}`;
726
1265
  }
@@ -733,24 +1272,13 @@ export class OllamaAgent extends EventEmitter {
733
1272
 
734
1273
  async _cdpScreenshot(navigateUrl, tmpFile) {
735
1274
  const CDP_PORT = 9223;
736
- let tabId;
737
-
738
- // Get or create a tab
739
- const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
740
- const tabs = await tabsRes.json();
741
- const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
742
-
743
- if (!usable) {
744
- // Create new tab
745
- const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
746
- const newTabData = await newTab.json();
747
- tabId = newTabData.id;
748
- } else {
749
- tabId = usable.id;
750
- }
1275
+
1276
+ // Always create a NEW tab — never hijack the dashboard or other existing tabs
1277
+ const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
1278
+ const newTabData = await newTabRes.json();
1279
+ const tabId = newTabData.id;
751
1280
 
752
1281
  return new Promise((resolve, reject) => {
753
- // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
754
1282
  const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
755
1283
  let msgId = 1;
756
1284
  const pending = new Map();
@@ -775,10 +1303,12 @@ export class OllamaAgent extends EventEmitter {
775
1303
  try {
776
1304
  if (navigateUrl) {
777
1305
  await send('Page.navigate', { url: navigateUrl });
778
- // Wait for load
1306
+ // Wait for page to fully render
779
1307
  await new Promise(r => setTimeout(r, 3000));
780
1308
  }
781
1309
  const { data } = await send('Page.captureScreenshot', { format: 'png' });
1310
+ // Close the temporary tab
1311
+ await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
782
1312
  ws.close();
783
1313
  resolve(`data:image/png;base64,${data}`);
784
1314
  } catch (err) {
@@ -788,10 +1318,129 @@ export class OllamaAgent extends EventEmitter {
788
1318
  });
789
1319
 
790
1320
  ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
791
- setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
1321
+ setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
792
1322
  });
793
1323
  }
794
1324
 
1325
+ // ─── Screenshot + vision analysis ─────────────────────────────────────────
1326
+ // Takes a screenshot of a URL, then asks the active vision model to describe it.
1327
+ // Returns a plain-text description the main agent can reason about.
1328
+
1329
+ async _screenshotAndDescribe(url, checkFor) {
1330
+ const question = checkFor
1331
+ ? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
1332
+ : `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count — be precise about what you see.`;
1333
+
1334
+ // === Server reachability check — fast fail if server is down ===
1335
+ try {
1336
+ await fetch(url, { signal: AbortSignal.timeout(4000) });
1337
+ } catch (reachErr) {
1338
+ const portMatch = url.match(/:(\d+)/);
1339
+ const port = portMatch ? portMatch[1] : '?';
1340
+ return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
1341
+ }
1342
+
1343
+ // === HTML dependency audit (always runs — fast, reliable) ===
1344
+ // Fetches the page HTML and checks for common missing client-side dependencies.
1345
+ // This catches issues that screenshots can't detect (JS errors, missing script tags).
1346
+ let auditNotes = '';
1347
+ try {
1348
+ const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
1349
+ const html = await htmlRes.text();
1350
+ const missing = [];
1351
+ // Check for socket.io client usage without the script tag
1352
+ if (/\bio\s*\(/.test(html) && !html.includes('/socket.io/socket.io.js')) {
1353
+ missing.push('Missing <script src="/socket.io/socket.io.js"></script> — io() is called but the client library is not loaded');
1354
+ // Also verify the server actually serves it
1355
+ try {
1356
+ const sioRes = await fetch(url.replace(/\/$/, '') + '/socket.io/socket.io.js', { signal: AbortSignal.timeout(5000) });
1357
+ if (!sioRes.ok || (await sioRes.text()).startsWith('<!')) {
1358
+ missing.push('Server does NOT serve /socket.io/socket.io.js — check that socket.io is installed and express-static is set up');
1359
+ }
1360
+ } catch {}
1361
+ }
1362
+ if (missing.length > 0) {
1363
+ auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
1364
+ }
1365
+ } catch {}
1366
+
1367
+ let imageData;
1368
+ const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
1369
+
1370
+ // Try AgentForge browser via CDP first
1371
+ try {
1372
+ imageData = await this._cdpScreenshot(url, null);
1373
+ } catch (cdpErr) {
1374
+ // CDP not available — try puppeteer headless screenshot
1375
+ try {
1376
+ const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
1377
+ const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
1378
+ const nodeScript = `
1379
+ const puppeteer = require(${JSON.stringify(puppeteerModule)});
1380
+ (async () => {
1381
+ const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
1382
+ const page = await browser.newPage();
1383
+ await page.setDefaultNavigationTimeout(12000);
1384
+ await page.setViewport({width: 1280, height: 900});
1385
+ try {
1386
+ await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
1387
+ await new Promise(r => setTimeout(r, 2500));
1388
+ await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
1389
+ console.log('puppeteer screenshot ok');
1390
+ } finally {
1391
+ await browser.close();
1392
+ }
1393
+ })().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
1394
+ `;
1395
+ writeFileSync(scriptFile, nodeScript);
1396
+ await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
1397
+ await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
1398
+ const raw = readFileSync(tmpFile).toString('base64');
1399
+ await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
1400
+ imageData = `data:image/png;base64,${raw}`;
1401
+ } catch (pupErr) {
1402
+ console.warn(` [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
1403
+ // No screenshot possible — return audit notes only
1404
+ return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
1405
+ }
1406
+ }
1407
+
1408
+ // Store imageData so caller can emit to user if send_to_user=true
1409
+ this._lastScreenshotData = imageData;
1410
+
1411
+ const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
1412
+
1413
+ // Use the active model for vision analysis.
1414
+ try {
1415
+ // /api/chat with images array — supported by all Ollama vision-capable models
1416
+ const res = await fetch(`${this.baseUrl}/api/chat`, {
1417
+ method: 'POST',
1418
+ headers: { 'Content-Type': 'application/json' },
1419
+ body: JSON.stringify({
1420
+ model: this.model,
1421
+ messages: [{ role: 'user', content: question, images: [base64] }],
1422
+ stream: false,
1423
+ options: { num_ctx: 4096 }
1424
+ }),
1425
+ signal: AbortSignal.timeout(120000)
1426
+ });
1427
+
1428
+ if (res.ok) {
1429
+ const json = await res.json();
1430
+ const description = json.message?.content || json.response || '';
1431
+ const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
1432
+ if (clean) {
1433
+ console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
1434
+ return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
1435
+ }
1436
+ }
1437
+ } catch (err) {
1438
+ console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
1439
+ }
1440
+
1441
+ return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
1442
+ }
1443
+
795
1444
  _resolvePath(p, workDir) {
796
1445
  return path.isAbsolute(p) ? p : path.join(workDir, p);
797
1446
  }
@@ -817,28 +1466,65 @@ export class OllamaAgent extends EventEmitter {
817
1466
  }
818
1467
 
819
1468
  // ─── History persistence ──────────────────────────────────────────────────
820
-
821
- _historyPath(workDir, sessionId) {
822
- return path.join(workDir, `.ollama_history_${sessionId}.json`);
1469
+ // History lives at ~/.agentforge/history/{agentId}.json — one canonical file
1470
+ // per agent, independent of workDir/sessionId/machine state. Never gets lost
1471
+ // due to workDir changes, worker restarts, or Railway assigning new sessionIds.
1472
+
1473
+ _historyPath(agentId) {
1474
+ const home = process.env.HOME || '/tmp';
1475
+ const dir = path.join(home, '.agentforge', 'history');
1476
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
1477
+ return path.join(dir, `${agentId}.json`);
823
1478
  }
824
1479
 
825
1480
  _loadHistory(agentId, workDir, sessionId) {
826
- if (!sessionId) return [];
827
1481
  try {
828
- const fp = this._historyPath(workDir, sessionId);
1482
+ const fp = this._historyPath(agentId);
829
1483
  if (existsSync(fp)) {
830
1484
  const data = JSON.parse(readFileSync(fp, 'utf-8'));
831
- // Keep last 20 messages to stay within context
1485
+ console.log(` [${agentId}] Loaded ${data.length} history msgs from ~/.agentforge/history/`);
832
1486
  return data.slice(-12);
833
1487
  }
834
- } catch {}
1488
+ } catch (e) {
1489
+ console.warn(`⚠️ [${agentId}] History load error: ${e.message}`);
1490
+ }
835
1491
  return [];
836
1492
  }
837
1493
 
838
1494
  _saveHistory(agentId, workDir, sessionId, messages) {
839
1495
  try {
840
- const fp = this._historyPath(workDir, sessionId);
1496
+ const fp = this._historyPath(agentId);
841
1497
  writeFileSync(fp, JSON.stringify(messages.slice(-20), null, 2));
842
- } catch {}
1498
+ } catch (e) {
1499
+ console.warn(`⚠️ [${agentId}] History save error: ${e.message}`);
1500
+ }
1501
+ }
1502
+
1503
+ async _isTaskComplete(task, output, signal) {
1504
+ try {
1505
+ const res = await fetch(`${this.baseUrl}/api/chat`, {
1506
+ method: 'POST',
1507
+ headers: { 'Content-Type': 'application/json' },
1508
+ signal,
1509
+ body: JSON.stringify({
1510
+ model: this.model,
1511
+ messages: [
1512
+ { role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
1513
+ { role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
1514
+ ],
1515
+ stream: false,
1516
+ think: false,
1517
+ options: { num_ctx: 2048 }
1518
+ })
1519
+ });
1520
+ if (!res.ok) return true;
1521
+ const data = await res.json();
1522
+ const answer = (data.message?.content || '').toLowerCase().trim();
1523
+ console.log(` [_isTaskComplete] verdict: "${answer}"`);
1524
+ return answer.startsWith('yes');
1525
+ } catch (e) {
1526
+ console.warn(`⚠️ [_isTaskComplete] error: ${e.message}`);
1527
+ return true; // assume done on error to avoid infinite loops
1528
+ }
843
1529
  }
844
1530
  }