npm - @hamp10/agentforge - Versions diffs - 0.2.16 → 0.2.17 - Mend

@hamp10/agentforge 0.2.16 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/agentforge.js +25 -2
package/package.json +5 -1
package/scripts/postinstall.js +62 -0
package/src/OllamaAgent.js +923 -201
package/src/hampagent/browser.js +209 -73
package/src/selfUpdate.js +7 -2
package/src/worker.js +68 -36
package/templates/agent/AGENTFORGE.md +120 -0

package/src/OllamaAgent.js CHANGED Viewed

@@ -1,16 +1,30 @@
 import { exec } from 'child_process';
-import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
+import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
 import { EventEmitter } from 'events';
 import path from 'path';
 import { promisify } from 'util';
 import { fileURLToPath } from 'url';
+import { browserAction } from './hampagent/browser.js';
 const execAsync = promisify(exec);
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
+// ── Worker log file — always write to /tmp/agentforge/worker.log so logs are
+//    accessible remotely via SSH regardless of how the worker was started.
+const WORKER_LOG = '/tmp/agentforge/worker.log';
+try { mkdirSync('/tmp/agentforge', { recursive: true }); } catch {}
+const _origLog = console.log.bind(console);
+console.log = (...args) => {
+  _origLog(...args);
+  try {
+    const line = args.map(a => (typeof a === 'object' ? JSON.stringify(a) : String(a))).join(' ');
+    appendFileSync(WORKER_LOG, `${new Date().toISOString()} ${line}\n`);
+  } catch {}
+};
 // Minimal tool definitions — one compact JSON per line, embedded in system prompt.
-// Ollama's `tools` API param is broken for qwen3 (github.com/ollama/ollama/issues/14601).
-// Descriptions kept short to fit within qwen3-vl:8b's 4096 token context.
+// Ollama's `tools` API param is unreliable — tools are injected as text in the system prompt.
+// Descriptions kept short to fit within a 4096 token context window.
 const TOOL_DEFS = [
   {
     type: 'function',
@@ -59,16 +73,27 @@ const TOOL_DEFS = [
       description: 'Screenshot the screen. Set send_to_user=true only if user asked to see it.',
       parameters: { type: 'object', properties: { target: { type: 'string', enum: ['screen', 'browser'] }, send_to_user: { type: 'boolean' } }, required: ['target'] }
     }
+  },
+  {
+    type: 'function',
+    function: {
+      name: 'screenshot_and_describe',
+      description: 'Screenshot a URL and get AI visual analysis. Use after building any web app to verify it looks correct before reporting done. Set send_to_user:true to show the screenshot to the user in chat.',
+      parameters: { type: 'object', properties: {
+        url: { type: 'string', description: 'URL to screenshot (e.g. http://localhost:3458)' },
+        check_for: { type: 'string', description: 'What should be visible (e.g. "snake game with canvas, scoreboard, and game controls")' },
+        send_to_user: { type: 'boolean', description: 'Send screenshot image to user in chat (true when confirmed working)' }
+      }, required: ['url'] }
+    }
   }
 ];
 // Minimal <tools> XML for system prompt — one compact JSON per line, no outer array.
-// Per qwen3 Hermes chat template (tokenizer_config.json).
 const TOOLS_XML = `<tools>\n${TOOL_DEFS.map(t => JSON.stringify(t.function)).join('\n')}\n</tools>`;
 /**
  * Parse <tool_call>...</tool_call> blocks from streamed content.
- * qwen3-vl native format: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
+ * Some models emit: <tool_call>{"name": "bash", "arguments": {"command": "..."}}</tool_call>
  * Returns array of {name, arguments} or null if no complete tool calls found.
  */
 function _parseToolCallTags(content) {
@@ -86,67 +111,170 @@ function _parseToolCallTags(content) {
   return calls.length > 0 ? calls : null;
 }
+/**
+ * Parse WRITE_FILE code-fence format.
+ * Models struggle to JSON-escape large code files (unescaped quotes break JSON.parse).
+ * This format avoids the problem: path on the first line, raw content in a code fence.
+ *
+ * Accepted formats:
+ *   WRITE_FILE /abs/path/to/file.js
+ *   ```
+ *   ...raw content, no escaping needed...
+ *   ```
+ *
+ *   write_file: /abs/path/to/file.js
+ *   ```javascript
+ *   ...content...
+ *   ```
+ *
+ * Returns array of {name, arguments} or null if no matches found.
+ */
+function _parseWriteFileFences(content) {
+  if (!content) return null;
+  const calls = [];
+  // Match WRITE_FILE <path> or write_file: <path> followed by a code fence
+  const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
+  let m;
+  while ((m = re.exec(content)) !== null) {
+    const filePath = m[1].trim();
+    const fileContent = m[2]; // raw content, no unescaping needed
+    if (filePath && fileContent !== undefined) {
+      calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
+    }
+  }
+  return calls.length > 0 ? calls : null;
+}
+/**
+ * Fallback: parse "Writing filename...\n```lang\ncontent\n```" code blocks.
+ * Many local models ignore the WRITE_FILE instruction and use raw markdown blocks.
+ * Extract the filename from the "Writing X..." line and write the file to the project dir.
+ * Project dir is inferred from the most recent "mkdir -p /path" in the content.
+ */
+function _parseWritingFallback(content, workDir) {
+  if (!content) return null;
+  const calls = [];
+  // Infer project dir from last mkdir -p command in the stream
+  let projectDir = workDir;
+  const mkdirMatches = [...content.matchAll(/mkdir\s+-p\s+"?([^"\n]+)"?/g)];
+  if (mkdirMatches.length > 0) {
+    const lastMkdir = mkdirMatches[mkdirMatches.length - 1];
+    const candidate = lastMkdir[1].trim().replace(/~/, process.env.HOME || '/tmp');
+    if (candidate && !candidate.includes('$')) projectDir = candidate;
+  }
+  // Match: "Writing filename...\n```lang\ncontent\n```"
+  const re = /Writing\s+([\w./\-]+?)(?:\.{3})?\s*\n```[^\n]*\n([\s\S]*?)```(?:\n|$)/gi;
+  let m;
+  while ((m = re.exec(content)) !== null) {
+    const filename = m[1].trim();
+    const fileContent = m[2];
+    if (!filename || fileContent === undefined) continue;
+    // Skip if this is just a status echo with no real code
+    if (fileContent.trim().length < 5) continue;
+    const filePath = filename.startsWith('/') ? filename : `${projectDir}/${filename}`;
+    calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
+  }
+  return calls.length > 0 ? calls : null;
+}
 /**
  * Detect text-based tool calls from model content.
- * qwen3-vl:8b outputs tool calls as JSON in content rather than tool_calls field.
+ * Models that don't use native tool_calls emit JSON in their text content instead.
  * Supports two schemas:
  *   - {name, arguments}   (OpenAI-style)
- *   - {tool, args}        (qwen3 native style)
- * Supports both compact (one JSON per line) and pretty-printed multi-line JSON blocks.
- * Returns array of {name, arguments} if content is ONLY tool calls, else null.
+ *   - {tool, args}        (alternate style)
+ * Supports:
+ *   - Pure JSON (whole content is one or more JSON objects)
+ *   - Mixed: "Status line\n{json}" — narration before the tool call JSON
+ * Returns array of {name, arguments} if any tool calls found, else null.
  */
 function _parseTextToolCalls(content) {
   if (!content) return null;
   const trimmed = content.trim();
-  if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return null;
+  if (!trimmed) return null;
   // Normalise a single parsed object into {name, arguments}
+  // Handles multiple schemas models may emit:
+  //   {name, arguments}              — OpenAI-style (correct)
+  //   {tool, args}                   — alternate native style
+  //   {action:"write_file", path, content} — model shorthand
+  //   {action:"bash", command}             — model shorthand
+  //   {action:"read_file", path}           — model shorthand
   const normalise = (obj) => {
-    if (typeof obj.name === 'string' && obj.arguments !== undefined) {
-      const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
-      return { name: obj.name, arguments: args };
-    }
-    if (typeof obj.tool === 'string' && obj.args !== undefined) {
-      return { name: obj.tool, arguments: obj.args };
-    }
+    try {
+      if (typeof obj.name === 'string' && obj.arguments !== undefined) {
+        const args = typeof obj.arguments === 'string' ? JSON.parse(obj.arguments) : obj.arguments;
+        return { name: obj.name, arguments: args };
+      }
+      if (typeof obj.tool === 'string' && obj.args !== undefined) {
+        return { name: obj.tool, arguments: obj.args };
+      }
+      // Handle {action, ...} shorthand the model sometimes emits
+      if (typeof obj.action === 'string') {
+        const action = obj.action.toLowerCase().replace(/[ -]/g, '_');
+        // Map common action names to tool names
+        const toolName = action === 'write' ? 'write_file'
+          : action === 'read' ? 'read_file'
+          : action === 'list' ? 'list_directory'
+          : action === 'run' || action === 'execute' || action === 'exec' ? 'bash'
+          : action; // use as-is (write_file, bash, read_file, etc.)
+        const args = {};
+        if (obj.path !== undefined) args.path = obj.path;
+        if (obj.content !== undefined) args.content = obj.content;
+        if (obj.command !== undefined) args.command = obj.command;
+        if (obj.url !== undefined) args.url = obj.url;
+        if (obj.target !== undefined) args.target = obj.target;
+        if (Object.keys(args).length > 0) return { name: toolName, arguments: args };
+      }
+    } catch {}
     return null;
   };
-  // Try parsing the whole content as a single JSON object/array
-  try {
-    const obj = JSON.parse(trimmed);
-    if (Array.isArray(obj)) {
-      const calls = obj.map(normalise);
-      if (calls.every(Boolean)) return calls;
-      return null;
-    }
-    const call = normalise(obj);
-    if (call) return [call];
-    return null;
-  } catch {}
-  // Try extracting multiple top-level JSON objects (separated by newlines/whitespace)
+  // Extract all JSON objects that start at the beginning of a line
+  // This handles both pure-JSON responses and "narration\n{json}" mixed responses
   const calls = [];
+  const lines = trimmed.split('\n');
   let i = 0;
-  while (i < trimmed.length) {
-    // Skip whitespace/newlines between objects
-    while (i < trimmed.length && /\s/.test(trimmed[i])) i++;
-    if (i >= trimmed.length) break;
-    if (trimmed[i] !== '{') return null; // Non-JSON between objects — bail
-    // Find matching closing brace
-    let depth = 0, j = i;
-    while (j < trimmed.length) {
-      if (trimmed[j] === '{') depth++;
-      else if (trimmed[j] === '}') { depth--; if (depth === 0) { j++; break; } }
-      j++;
+  while (i < lines.length) {
+    const line = lines[i].trim();
+    if (line.startsWith('{') || line.startsWith('[')) {
+      // Accumulate lines until we have a complete JSON object (handles multi-line JSON)
+      // Skips { } [ ] inside JSON strings so CSS/HTML brace counts don't confuse the parser.
+      let jsonStr = '';
+      let depth = 0;
+      while (i < lines.length) {
+        const l = lines[i];
+        jsonStr += (jsonStr ? '\n' : '') + l;
+        let inString = false, escape = false;
+        for (const ch of l) {
+          if (escape) { escape = false; continue; }
+          if (ch === '\\' && inString) { escape = true; continue; }
+          if (ch === '"') { inString = !inString; continue; }
+          if (!inString) {
+            if (ch === '{' || ch === '[') depth++;
+            else if (ch === '}' || ch === ']') depth--;
+          }
+        }
+        i++;
+        if (depth === 0 && jsonStr.trim()) break;
+      }
+      try {
+        const obj = JSON.parse(jsonStr.trim());
+        if (Array.isArray(obj)) {
+          for (const item of obj) {
+            const call = normalise(item);
+            if (call) calls.push(call);
+          }
+        } else {
+          const call = normalise(obj);
+          if (call) calls.push(call);
+        }
+      } catch {}
+    } else {
+      i++;
     }
-    try {
-      const obj = JSON.parse(trimmed.slice(i, j));
-      const call = normalise(obj);
-      if (!call) return null;
-      calls.push(call);
-      i = j;
-    } catch { return null; }
   }
   return calls.length > 0 ? calls : null;
 }
@@ -206,13 +334,13 @@ export class OllamaAgent extends EventEmitter {
     return { agentId, workDir };
   }
-  async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null) {
+  async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
     const startTime = Date.now();
     const controller = new AbortController();
     // Use per-agent model override if provided (and not the placeholder 'Default').
-    // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/qwen3-vl:8b' but
-    // Ollama's API expects bare names like 'qwen3-vl:8b'.
+    // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
+    // Ollama's API expects bare names like 'modelname:tag'.
     const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
     const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
@@ -224,82 +352,153 @@ export class OllamaAgent extends EventEmitter {
     console.log(`   Task: ${task}`);
     console.log(`   Working dir: ${workDir}`);
-    // Detect model capabilities
-    const isQwen3    = effectiveModel.startsWith('qwen3');
-    const isVision   = /vl|vision|llava|minicpm-v|moondream/i.test(effectiveModel);
     try {
-      // Load conversation history from disk (session persistence)
-      const history = this._loadHistory(agentId, workDir, sessionId);
-      // System prompt uses the exact format from qwen3's Hermes chat template.
-      // Tools are embedded as <tools> XML — never passed via the API `tools` param (broken in Ollama).
-      const systemPrompt = isQwen3
-        ? [
-            '/no_think',
-            `You are a helpful assistant. Working directory: ${workDir}`,
-            ``,
-            `# Tools`,
-            ``,
-            `You may call one or more functions to complete the task.`,
-            ``,
-            `You are provided with function signatures within <tools></tools> XML tags:`,
-            TOOLS_XML,
-            ``,
-            `For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:`,
-            `<tool_call>`,
-            `{"name": <function-name>, "arguments": <args-json-object>}`,
-            `</tool_call>`,
-            ``,
-            `Rules:`,
-            `- Call tools to take actions. Do NOT describe what you would do — just do it.`,
-            `- For simple conversation (greetings, questions) respond with plain text, no tools.`,
-            `- After finishing, write a brief summary.`,
-          ].join('\n')
-        : [
-            `You are a helpful AI agent. Working directory: ${workDir}`,
-            `Use the provided tools to complete tasks. Don't describe — act.`,
-          ].join('\n');
+      // Load conversation history — prefer Railway DB history (sent via task payload, works across
+      // any machine/user/model). Fall back to local file for offline or pre-fix sessions.
+      const history = (conversationHistory && conversationHistory.length > 0)
+        ? conversationHistory.slice(-20)
+        : this._loadHistory(agentId, workDir, sessionId);
+      // Text-based tool format is used rather than XML schemas — more reliable across models.
+      // Use flow's custom system prompt if provided, otherwise fall back to built-in default.
+      // ALL models get the same rule set and tool format — no model-specific branching.
+      const homeDir = process.env.HOME || '/tmp';
+      const projectsDir = `${homeDir}/Desktop/Projects`;
+      const universalRules = `
+== WHAT YOU CAN DO ==
+You have these tools:
+bash: Run any shell command — file ops, servers, packages, logs, system queries.
+read_file: Read a local file.
+WRITE_FILE: Write a local file (code-fence format only).
+list_directory: List a local directory.
+web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
+screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
+browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
+BROWSER TOOL — use this instead of writing CDP scripts:
+{"name":"browser","arguments":{"action":"tabs"}}                            ← list ALL open tabs with URLs (DO THIS FIRST)
+{"name":"browser","arguments":{"action":"snapshot"}}                        ← read current page content + interactive elements (also shows all tabs)
+{"name":"browser","arguments":{"action":"navigate","url":"https://..."}}    ← go to URL
+{"name":"browser","arguments":{"action":"focus","url":"expireddomains"}}    ← switch to a tab by URL fragment
+{"name":"browser","arguments":{"action":"click","ref":3}}                   ← click element by index from snapshot
+{"name":"browser","arguments":{"action":"click","text":"Show Filter"}}      ← click element by visible text
+{"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}}  ← click by CSS selector
+{"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
+{"name":"browser","arguments":{"action":"screenshot"}}                      ← take screenshot
+{"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
+{"name":"browser","arguments":{"action":"scroll","y":400}}                  ← scroll down
+WORKFLOW when user says "the tab is already open":
+1. browser tabs → see ALL open tabs and their URLs
+2. browser focus with the URL fragment of the tab you need (e.g. "expireddomains")
+3. browser snapshot → read page content and get element indices
+4. browser click to interact (by ref index, by text, or by selector)
+5. browser snapshot again to see result
+The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
+== GENERAL RULES (all tasks) ==
+G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
+G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
+G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
+G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
+G5. IF A TOOL FAILS: Try a different approach. web_fetch empty → screenshot_and_describe. Never repeat a failing call identically.
+G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
+G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
+G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
+G9. KEEP GOING until the task is fully complete.
+== BUILD RULES (only when building apps/games/tools) ==
+B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores).
+B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
+B3. BUILD FILE BY FILE — write each file completely before writing the next.
+B4. ALWAYS use absolute paths.
+B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
+B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
+B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT — if 000, check /tmp/server.log and fix the error.
+B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
+B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
+B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
+B11. ALWAYS open the finished app: bash open http://localhost:PORT
+B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
+B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
+B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
+B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
+B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
+      // Text-based tool format works reliably across all local models.
+      // WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
+      const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
+      const systemPrompt = customSystemPrompt || jsonToolFormat;
       const messages = [
         { role: 'system', content: systemPrompt },
         ...history,
       ];
-      // Attach initial image to user message if provided
+      // Attach initial image if provided — always include it; models that don't support
+      // images will ignore the field, and if they error we catch it below.
       const userMessage = { role: 'user', content: task };
-      if (image && isVision) {
+      if (image) {
         const base64 = image.replace(/^data:image\/\w+;base64,/, '');
         userMessage.images = [base64];
       }
       messages.push(userMessage);
+      // Force-unload any currently loaded model so it reloads with our num_ctx setting.
+      // Model-agnostic and machine-agnostic — guarantees 32K context on every task.
+      try {
+        await fetch(`${this.baseUrl}/api/generate`, {
+          method: 'POST', signal: controller.signal,
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ model: effectiveModel, keep_alive: 0, prompt: '' })
+        });
+      } catch { /* ignore — model may not be loaded yet */ }
       let finalContent = '';
       let allOutput = ''; // accumulate everything streamed across all turns
       const toolsUsed = []; // track tool names called (for fallback summary)
-      const MAX_TURNS = 15; // reduce from 25 — local models get stuck in tool loops
+      // No hard turn limit — agent runs until done, loop-detected, or wall-clock timeout.
+      const recentCalls = []; // last N tool calls for loop detection
+      let emptyRetries = 0; // consecutive empty-response retries
-      for (let turn = 0; turn < MAX_TURNS; turn++) {
+      for (let turn = 0; ; turn++) {
         if (controller.signal.aborted) break;
         this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
+        // All local Ollama models use the native /api/chat endpoint.
+        // The OpenAI-compatible /v1/chat/completions endpoint ignores options.num_ctx,
+        // causing all models to run at 4096-token context regardless of what we pass.
+        const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
+        const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
         let response;
         try {
-          const requestBody = {
-            model: effectiveModel,
-            messages,
-            stream: true,
-            // qwen3: tools embedded in system prompt — do NOT pass tools param (broken in Ollama for qwen3)
-            // Other models: pass tools normally
-            ...(!isQwen3 ? { tools: TOOL_DEFS, tool_choice: 'auto' } : {}),
-            options: {
-              num_ctx: 8192,  // explicit context — Ollama defaults to 2048 which is too small
-              ...(isQwen3 ? { think: false } : {}),  // CRITICAL: thinking + tools corrupts template
-            },
-          };
-          response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
+          let requestBody;
+          let endpoint;
+          if (useNativeEndpoint) {
+            // Ollama native format — supports think:false at top level
+            endpoint = `${this.baseUrl}/api/chat`;
+            requestBody = {
+              model: effectiveModel,
+              messages,
+              stream: true,
+              think: false,           // top-level think disable — WORKS on native endpoint
+              options: { num_ctx: 32768 },
+            };
+          } else {
+            endpoint = `${this.baseUrl}/v1/chat/completions`;
+            requestBody = {
+              model: effectiveModel,
+              messages,
+              stream: true,
+              options: { num_ctx: 32768 },
+            };
+          }
+          response = await fetch(endpoint, {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
             signal: controller.signal,
@@ -315,21 +514,30 @@ export class OllamaAgent extends EventEmitter {
           throw new Error(`Local model error ${response.status}: ${body}`);
         }
-        // ── Stream the SSE response ──
-        // For qwen3: model emits text tokens including <tool_call>...</tool_call> blocks.
-        // Stream text live to user, but suppress content inside <tool_call> tags.
-        // For other models: also handle delta.tool_calls in the standard OpenAI format.
-        let streamContent = '';        // full accumulated text (including tool_call tags for qwen3)
+        // ── Stream the response ──
+        // Two formats:
+        //   Ollama native (/api/chat):        NDJSON lines — {"message":{"content":"..."},"done":false}
+        //   OpenAI-compatible (/v1/...):      SSE lines   — data: {"choices":[{"delta":{"content":"..."}}]}
+        // Models may emit <tool_call>...</tool_call> or <think>...</think> blocks in text content.
+        // Stream text live to user; suppress think blocks and raw JSON tool call blobs.
+        let streamContent = '';        // full accumulated text (including any tool_call/think blocks)
         let visibleContent = '';       // text emitted live to user (no tool_call or think blocks)
-        let streamToolCalls = {};      // OpenAI-format tool calls (non-qwen3 models)
+        let streamToolCalls = {};      // OpenAI-format tool calls from native tool_calls field
         let inThinkBlock = false;
         let inToolCallBlock = false;   // inside <tool_call>...</tool_call>
+        let inJsonBlob = false;        // inside bare JSON tool call — suppress from streaming
+        let inFenceBlock = false;      // inside WRITE_FILE code fence — suppress content from streaming
+        let fenceDepth = 0;            // ``` count since last WRITE_FILE (even=closed, odd=open)
         let rawTokenCount = 0;
+        let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
         const reader = response.body.getReader();
         const decoder = new TextDecoder();
         let buf = '';
+        // No timeouts — local model can take as long as it needs on any turn.
+        // Only the user abort (controller.signal) or stream end stops a turn.
+        let turnRetry = false;
         while (true) {
           if (controller.signal.aborted) break;
           const { done, value } = await reader.read();
@@ -340,33 +548,47 @@ export class OllamaAgent extends EventEmitter {
           buf = lines.pop();
           for (const line of lines) {
-            if (!line.startsWith('data: ')) continue;
-            const payload = line.slice(6).trim();
-            if (payload === '[DONE]') continue;
-            let evt;
-            try { evt = JSON.parse(payload); } catch { continue; }
-            const delta = evt.choices?.[0]?.delta;
-            if (!delta) continue;
-            // Standard OpenAI tool_calls (non-qwen3 models)
-            if (delta.tool_calls) {
-              for (const tc of delta.tool_calls) {
-                const idx = tc.index ?? 0;
-                if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
-                if (tc.id) streamToolCalls[idx].id = tc.id;
-                if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
-                if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
+            if (!line.trim()) continue;
+            let tokenText = null;
+            if (useNativeEndpoint) {
+              // Ollama native NDJSON format
+              let nativeEvt;
+              try { nativeEvt = JSON.parse(line); } catch { continue; }
+              if (nativeEvt.done) continue;
+              tokenText = nativeEvt.message?.content ?? null;
+            } else {
+              // OpenAI SSE format
+              if (!line.startsWith('data: ')) continue;
+              const payload = line.slice(6).trim();
+              if (payload === '[DONE]') continue;
+              let evt;
+              try { evt = JSON.parse(payload); } catch { continue; }
+              const delta = evt.choices?.[0]?.delta;
+              if (!delta) continue;
+              // Standard OpenAI tool_calls from native tool_calls field
+              if (delta.tool_calls) {
+                for (const tc of delta.tool_calls) {
+                  const idx = tc.index ?? 0;
+                  if (!streamToolCalls[idx]) streamToolCalls[idx] = { id: tc.id || '', type: 'function', function: { name: '', arguments: '' } };
+                  if (tc.id) streamToolCalls[idx].id = tc.id;
+                  if (tc.function?.name) streamToolCalls[idx].function.name += tc.function.name;
+                  if (tc.function?.arguments) streamToolCalls[idx].function.arguments += tc.function.arguments;
+                }
               }
+              tokenText = delta.content ?? null;
             }
-            if (!delta.content) continue;
+            if (tokenText === null) continue;
             rawTokenCount++;
-            streamContent += delta.content;
+            streamContent += tokenText;
             // Process token through think + tool_call filters, emit visible text live
             // We scan only the new delta token against the current buffer state
-            const chunk = delta.content;
+            const chunk = tokenText;
             let visible = '';
             // Simple per-token state machine — handles split tags across tokens by tracking state flags
             if (!inThinkBlock && !inToolCallBlock) {
@@ -392,9 +614,59 @@ export class OllamaAgent extends EventEmitter {
               inToolCallBlock = false;
             }
-            if (visible && !inThinkBlock && !inToolCallBlock) {
-              visibleContent += visible;
-              this.emit('agent_output', { agentId, output: visible, isChunk: true });
+            // Scan ALL lines completed in this token for state transitions.
+            // Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
+            if (tokenText.includes('\n')) {
+              const tokenStartIdx = streamContent.length - tokenText.length;
+              let nlIdx = streamContent.indexOf('\n', tokenStartIdx);
+              while (nlIdx !== -1) {
+                const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
+                const line = streamContent.slice(lineStart, nlIdx).trim();
+                if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
+                  inFenceBlock = true; fenceDepth = 0;
+                } else if (inFenceBlock && /^```/.test(line)) {
+                  fenceDepth++;
+                  if (fenceDepth >= 2 && fenceDepth % 2 === 0) inFenceBlock = false;
+                } else if (!inFenceBlock && !inJsonBlob && line.length > 1 && (line.startsWith('{') || line.startsWith('['))) {
+                  inJsonBlob = true;
+                }
+                nlIdx = streamContent.indexOf('\n', nlIdx + 1);
+              }
+            }
+            // Also check current partial line (mid-token, before next \n)
+            if (!inFenceBlock || !inJsonBlob) {
+              const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
+              const lastNL = cleanSC.lastIndexOf('\n');
+              const curLine = cleanSC.slice(lastNL + 1).trimStart();
+              if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
+                inFenceBlock = true; fenceDepth = 0;
+              }
+              if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
+                inJsonBlob = true;
+              }
+            }
+            // Emit visible content — safety filter removes any ``` or WRITE_FILE lines
+            // that slipped through (e.g. partial token at detection boundary)
+            if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
+              const safe = visible.split('\n').filter(ln => {
+                const t = ln.trimStart();
+                return !t.startsWith('```') && !/^(WRITE_FILE|write_file)/i.test(t);
+              }).join('\n');
+              if (safe.trim() || safe.includes('\n')) {
+                visibleContent += safe;
+                lastVisibleAt = Date.now();
+                this.emit('agent_output', { agentId, output: safe, isChunk: true });
+              }
+            }
+            // Thinking timeout: if the model has been in a <think> block for >90s with no visible output,
+            // abort the stream so we can retry with a kick. Prevents infinite thinking loops.
+            if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
+              console.log(`   [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
+              reader.cancel().catch(() => {});
+              break;
             }
           }
         }
@@ -403,17 +675,35 @@ export class OllamaAgent extends EventEmitter {
         if (streamContent) console.log(`   [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
         // ── Extract tool calls from content ───────────────────────────────────
-        // For qwen3: parse <tool_call> XML tags from full streamed content.
-        // For others: use API-level tool_calls already accumulated above.
+        // Try <tool_call> XML tags first (some models emit this format), then fall through
+        // to code-fence and JSON text parsers.
         let parsedTagCalls = null;
-        if (isQwen3 && Object.keys(streamToolCalls).length === 0) {
+        if (Object.keys(streamToolCalls).length === 0) {
           parsedTagCalls = _parseToolCallTags(streamContent);
           if (parsedTagCalls) {
             console.log(`   [${agentId}] 🔍 ${parsedTagCalls.length} <tool_call> tag(s) detected`);
           }
         }
-        // Fallback: try legacy JSON-blob detection if no tags found
+        // Fallback 1: try WRITE_FILE code-fence format (avoids JSON-escaping issues with code)
+        if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
+          const fenceCalls = _parseWriteFileFences(streamContent);
+          if (fenceCalls) {
+            console.log(`   [${agentId}] 🔍 ${fenceCalls.length} WRITE_FILE fence(s) detected`);
+            parsedTagCalls = fenceCalls;
+          }
+        }
+        // Fallback 2: "Writing filename...\n```\ncontent\n```" (model ignored WRITE_FILE instruction)
+        if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
+          const writingCalls = _parseWritingFallback(streamContent, workDir);
+          if (writingCalls) {
+            console.log(`   [${agentId}] 🔍 ${writingCalls.length} Writing-block fallback file(s) detected`);
+            parsedTagCalls = writingCalls;
+          }
+        }
+        // Fallback 3: try legacy JSON-blob detection if no tags found
         if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
           const textCalls = _parseTextToolCalls(streamContent);
           if (textCalls) {
@@ -422,9 +712,35 @@ export class OllamaAgent extends EventEmitter {
           }
         }
-        // Convert tag/text calls into streamToolCalls structure
+        // Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
+        // merge them so files get written AND bash runs
+        if (parsedTagCalls && streamContent) {
+          const writingCalls = _parseWritingFallback(streamContent, workDir);
+          if (writingCalls) {
+            const existingPaths = new Set(parsedTagCalls.filter(c => c.name === 'write_file').map(c => c.arguments.path));
+            const newWrites = writingCalls.filter(c => !existingPaths.has(c.arguments.path));
+            if (newWrites.length > 0) {
+              console.log(`   [${agentId}] 🔍 +${newWrites.length} additional Writing-block file(s) merged`);
+              // Prepend file writes before bash commands so files exist before server starts
+              parsedTagCalls = [...newWrites, ...parsedTagCalls];
+            }
+          }
+        }
+        // Convert tag/text calls into streamToolCalls structure.
+        // Deduplicate: if model emits the same tool call N times in one stream, only run it once.
         if (parsedTagCalls) {
-          parsedTagCalls.forEach((tc, i) => {
+          const seen = new Set();
+          const deduped = parsedTagCalls.filter(tc => {
+            const key = `${tc.name}:${JSON.stringify(tc.arguments)}`;
+            if (seen.has(key)) return false;
+            seen.add(key);
+            return true;
+          });
+          if (deduped.length < parsedTagCalls.length) {
+            console.log(`   [${agentId}] 🔁 Deduplicated ${parsedTagCalls.length} → ${deduped.length} tool call(s)`);
+          }
+          deduped.forEach((tc, i) => {
             streamToolCalls[i] = { id: `tag-${i}`, type: 'function', function: { name: tc.name, arguments: JSON.stringify(tc.arguments) } };
           });
           // Don't accumulate raw tool_call XML as user-visible output
@@ -439,17 +755,17 @@ export class OllamaAgent extends EventEmitter {
         });
         // ── Push assistant message ────────────────────────────────────────────
+        // All local models now use JSON-in-text format on the native endpoint.
+        // Strip <think>...</think> blocks to avoid burning context on reasoning traces.
         const toolCallsArray = Object.values(streamToolCalls);
-        if (isQwen3) {
-          // qwen3: assistant message is the raw streamed content (includes <tool_call> tags)
-          messages.push({ role: 'assistant', content: streamContent || '' });
-        } else {
-          messages.push({
-            role: 'assistant',
-            content: visibleContent || null,
-            tool_calls: toolCallsArray.length > 0 ? toolCallsArray : undefined
-          });
-        }
+        const hasToolCalls = toolCallsArray.length > 0;
+        const cleanedContent = (streamContent || '')
+          .replace(/<think>[\s\S]*?<\/think>/g, '')
+          .trim();
+        messages.push({ role: 'assistant', content: cleanedContent || '' });
+        // Incremental save — always, regardless of sessionId (sessionId is null for OllamaAgent)
+        this._saveHistory(agentId, workDir, sessionId, messages.slice(1));
         // ── Execute tool calls ────────────────────────────────────────────────
         if (toolCallsArray.length > 0) {
@@ -461,14 +777,95 @@ export class OllamaAgent extends EventEmitter {
             try { parsedArgs = typeof args === 'string' ? JSON.parse(args) : args; }
             catch { parsedArgs = {}; }
+            // ── Unknown tool name detection ──────────────────────────────────
+            // Block calls to tools that don't exist (e.g. model writes {"name":"curl",...}
+            // instead of {"name":"bash","arguments":{"command":"curl ..."}})
+            const VALID_TOOL_NAMES = new Set(['bash','read_file','write_file','list_directory','web_fetch','screenshot_and_describe','take_screenshot','browser']);
+            if (!VALID_TOOL_NAMES.has(name.toLowerCase())) {
+              console.log(`   [${agentId}] ⚠️ Unknown tool "${name}" — blocked`);
+              messages.push({ role: 'user', content: `"${name}" is not a valid tool. Valid tools: bash, read_file, write_file, list_directory, web_fetch, screenshot_and_describe. To run a shell command use bash: {"name":"bash","arguments":{"command":"${name} ..."}}.` });
+              continue;
+            }
+            // ── Placeholder detection ────────────────────────────────────────
+            // Block tool calls where the agent passed a literal placeholder like
+            // "[The URL where the auction is being viewed]" instead of a real value.
+            // These come from the model reading its own planning text and mistaking it
+            // for a concrete argument.
+            {
+              const argStr = JSON.stringify(parsedArgs);
+              const hasPlaceholder = /\[(the |this |your |a |an |current )?(url|path|address|link|tab|page|site|location|file|directory)[^\]]*\]/i.test(argStr);
+              if (hasPlaceholder) {
+                console.log(`   [${agentId}] ⚠️ Placeholder in args — blocked: ${argStr.slice(0, 120)}`);
+                messages.push({ role: 'user', content: `Tool call BLOCKED: your argument contains a placeholder "${argStr.slice(0, 100)}" — that is NOT a real URL or path. Look at the tool results already in the conversation (e.g. the curl localhost:9223/json output) and use the actual URL you found there.` });
+                continue;
+              }
+            }
             this.emit('tool_activity', {
               agentId, event: 'tool_start', tool: name,
               description: this._toolDesc(name, parsedArgs)
             });
             console.log(`   [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
             toolsUsed.push(name);
+            emptyRetries = 0; // reset on successful tool call
+            // Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
+            // Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
+            // "sleep 10 && curl ...URL" both map to the same key "curl:URL".
+            let callKey = `${name}:${JSON.stringify(parsedArgs)}`;
+            if (name === 'bash' && parsedArgs.command) {
+              const curlMatch = parsedArgs.command.match(/curl\s+.*?(https?:\/\/\S+|localhost:\d+)/);
+              if (curlMatch) callKey = `curl:${curlMatch[1]}`;
+            }
+            recentCalls.push(callKey);
+            if (recentCalls.length > 6) recentCalls.shift();
+            // Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
+            const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
+            const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
+            // Detect: alternating A,B,A,B pattern (last 4 calls)
+            const last4 = recentCalls.slice(-4);
+            const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
+            // Detect: A,B,C,A,B,C pattern (last 6)
+            const last6 = recentCalls.slice(-6);
+            const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
+            if (last3Same || abab || abcabc) {
+              const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
+              console.log(`   [${agentId}] 🔁 Loop detected (${pattern}) — injecting fix hint`);
+              // Generate a context-aware hint based on what's looping
+              let loopFixMsg = `You are repeating the same action — STOP looping. Observe first, then act.\n`;
+              const loopCmd = parsedArgs.command || parsedArgs.path || '';
+              const noThink = '';
+              if (name === 'write_file') {
+                loopFixMsg += `You keep rewriting the same file. The file already exists with your previous code. Do NOT rewrite it from scratch.\nInstead:\n1. call screenshot_and_describe to SEE what the app looks like right now\n2. Identify the specific thing that is wrong or missing\n3. read_file the file to see current content\n4. Make a TARGETED edit — change only the specific broken section\nNever rewrite an entire file when the server is already running.`;
+              } else if (loopCmd.includes('mkdir') || loopCmd.includes('client')) {
+                loopFixMsg += `Files/folders already exist. STOP creating them. Call screenshot_and_describe to see the current state of the app, then identify what specifically needs to be improved and fix it with targeted edits.`;
+              } else if (loopCmd.includes('open http')) {
+                const openPortMatch = loopCmd.match(/:(\d+)/);
+                const openPort = openPortMatch ? openPortMatch[1] : '????';
+                loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
+              } else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
+                loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
+              } else if (loopCmd.includes('npm install')) {
+                loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
+              } else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
+                loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n  // Click element containing the text you need (change "Filter" to what you see on the page)\n  ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
+              } else if (name === 'screenshot_and_describe') {
+                const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
+                loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedback — answer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved — make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
+              } else {
+                loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
+              }
+              loopFixMsg += noThink;
+              messages.push({ role: 'user', content: loopFixMsg });
+              // Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
+              recentCalls.splice(0, recentCalls.length - 1);
+              break; // break inner tool loop, let model respond to hint
+            }
-            const result = await this._executeTool(name, parsedArgs, workDir);
+            const result = await this._executeTool(name, parsedArgs, workDir, agentId);
             this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
@@ -477,30 +874,107 @@ export class OllamaAgent extends EventEmitter {
               this.emit('agent_image', { agentId, image: result });
             }
-            if (isQwen3) {
-              // qwen3 format: tool results go back as user messages with <tool_response> tags
-              if (isImageResult && isVision) {
+            // ALL models get tool results fed back — no model should run blind.
+            // This is the core of the observe → reason → act loop: every tool result
+            // must be in context so the model can see what happened and react correctly.
+            {
+              const noThink = '';
+              if (isImageResult) {
                 const base64 = result.replace(/^data:image\/\w+;base64,/, '');
-                messages.push({ role: 'user', content: '<tool_response>\n[Screenshot captured]\n</tool_response>', images: [base64] });
+                messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
               } else {
-                const resultText = isImageResult ? '[Screenshot captured — vision model needed to analyze]' : String(result).slice(0, 8000);
-                messages.push({ role: 'user', content: `<tool_response>\n${resultText}\n</tool_response>` });
-              }
-            } else {
-              // Standard OpenAI format
-              if (isImageResult && isVision) {
-                messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: '[Screenshot captured — see image attached]' });
-                const base64 = result.replace(/^data:image\/\w+;base64,/, '');
-                messages.push({ role: 'user', content: 'Here is the screenshot:', images: [base64] });
-              } else {
-                messages.push({ role: 'tool', tool_call_id: toolCall.id || undefined, content: isImageResult ? '[Screenshot captured]' : String(result).slice(0, 8000) });
+                const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
+                messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
+                if (name === 'screenshot_and_describe') {
+                  const screenshotResult = String(result);
+                  const isLocalhost = (parsedArgs.url || '').includes('localhost') || (parsedArgs.url || '').includes('127.0.0.1');
+                  // Server unreachable on localhost — force bash restart (only for local servers, not public URLs)
+                  if (screenshotResult.includes('SERVER IS NOT REACHABLE') && isLocalhost) {
+                    const portMatch = (parsedArgs.url || '').match(/:(\d+)/);
+                    const port = portMatch ? portMatch[1] : '????';
+                    messages.push({ role: 'user', content: `The local server on port ${port} is not running. Restart it with bash — find the project directory, then: pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd /path/to/project && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${port}` });
+                  }
+                  // Public URL unreachable — try web_fetch instead
+                  else if (screenshotResult.includes('SERVER IS NOT REACHABLE') && !isLocalhost) {
+                    messages.push({ role: 'user', content: `screenshot_and_describe could not reach ${parsedArgs.url}. Try web_fetch instead:\n{"name":"web_fetch","arguments":{"url":"${parsedArgs.url}"}}` });
+                  }
+                  // Dependency audit issues — prevent port-hopping
+                  else if (screenshotResult.includes('DEPENDENCY AUDIT FOUND ISSUES')) {
+                    messages.push({ role: 'user', content: `CRITICAL: Missing client-side libraries in your HTML. Do NOT change the port. Fix it: (1) read_file the HTML; (2) add the missing script tags; (3) write_file back; (4) restart server same port; (5) screenshot to verify.` });
+                  }
+                  // Successful screenshot of a build task — push to make a code change
+                  else if (isLocalhost) {
+                    messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
+                  }
+                  // Successful screenshot of a public URL — agent is doing research, let it reason
+                }
+                // Catch placeholder/hello world pages — force the model to keep building
+                const screenshotText = String(result).toLowerCase();
+                const isPlaceholder = (
+                  screenshotText.includes('hello world') ||
+                  screenshotText.includes('cannot get /') ||
+                  (screenshotText.includes('express') && screenshotText.includes('error')) ||
+                  screenshotText.includes('placeholder') ||
+                  screenshotText.includes('coming soon') ||
+                  (screenshotText.includes('blank') && !screenshotText.includes('not blank'))
+                );
+                if (isPlaceholder) {
+                  messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
+                }
               }
             }
           }
           continue; // loop back for next model turn
         }
-        // ── No tool calls: final answer ───────────────────────────────────────
+        // ── No tool calls ────────────────────────────────────────────────────
+        {
+          const combined = (visibleContent + streamContent).replace(/<think>[\s\S]*?<\/think>/g, '');
+          const hasContent = combined.trim().length > 30;
+          const isEmpty = combined.trim().length === 0;
+          // Structural: truncated JSON — model started a tool call but stream ended early
+          const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
+          if (hasTruncatedJson) {
+            console.log(`   [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
+            messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
+            continue;
+          }
+          // Structural: empty response — model produced nothing
+          if (isEmpty) {
+            if (emptyRetries < 3) {
+              emptyRetries++;
+              console.log(`   [${agentId}] ⚡ Turn ${turn}: empty response (retry ${emptyRetries}/3) — kicking`);
+              messages.push({ role: 'user', content: toolsUsed.length === 0 ? 'Start now — make your first tool call.' : 'You stopped. Make your next tool call.' });
+              continue;
+            }
+            console.log(`   [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
+          }
+          // Structural: agent hasn't used any tools yet — it must act before it can answer
+          if (toolsUsed.length === 0 && hasContent) {
+            console.log(`   [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act`);
+            messages.push({ role: 'user', content: 'Make your first tool call now.' });
+            continue;
+          }
+          // Semantic: ask the LLM whether the task is actually complete.
+          // This replaces all regex-based intent detection — the model judges its own output.
+          if (hasContent && toolsUsed.length > 0) {
+            const originalTask = messages.find(m => m.role === 'user')?.content || task;
+            const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
+            if (!isDone) {
+              console.log(`   [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
+              messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
+              continue;
+            }
+            console.log(`   [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
+          }
+        }
+        // ── Final answer ──────────────────────────────────────────────────────
         if (visibleContent) finalContent = visibleContent;
         break;
@@ -519,7 +993,7 @@ export class OllamaAgent extends EventEmitter {
         ];
         try {
-          const summaryRes = await fetch(`${this.baseUrl}/v1/chat/completions`, {
+          const summaryRes = await fetch(`${this.baseUrl}/api/chat`, {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
             signal: controller.signal,
@@ -527,7 +1001,8 @@ export class OllamaAgent extends EventEmitter {
               model: effectiveModel,
               messages: summaryMessages,
               stream: true,
-              ...(isQwen3 ? { options: { think: false } } : {})
+              think: false,
+              options: { num_ctx: 32768 }
             })
           });
@@ -570,7 +1045,7 @@ export class OllamaAgent extends EventEmitter {
       }
       // Persist history for next task
-      if (finalContent && sessionId) {
+      if (finalContent) {
         this._saveHistory(agentId, workDir, sessionId, [
           ...history,
           { role: 'user', content: task },
@@ -629,16 +1104,96 @@ export class OllamaAgent extends EventEmitter {
   // ─── Tool execution ───────────────────────────────────────────────────────
-  async _executeTool(name, args, workDir) {
+  async _executeTool(name, args, workDir, agentId = 'agent') {
     try {
       switch (name) {
         case 'bash': {
+          // Block commands that would kill the worker process itself.
+          // "pkill -f node" and "killall node" match the worker's own process.
+          // Rewrite to only kill processes by their specific server log path or port.
+          const cmd = args.command || '';
+          if (/pkill\s+(-\w+\s+)*(-f\s+)?node\b/i.test(cmd) || /killall\s+node\b/i.test(cmd)) {
+            // Safe replacement: kill only the app server on the port, not all node processes
+            const portMatch = cmd.match(/localhost:(\d+)|:(\d+)/);
+            const serverLogMatch = cmd.match(/server\.js/);
+            if (portMatch || serverLogMatch) {
+              const safeCmd = portMatch
+                ? `lsof -ti:${portMatch[1] || portMatch[2]} | xargs kill -9 2>/dev/null || true`
+                : `pkill -f "server.js" 2>/dev/null || true`;
+              args = { ...args, command: safeCmd + cmd.slice(cmd.indexOf('&&') !== -1 ? cmd.indexOf('&&') : cmd.length) };
+            } else {
+              // No specific target — skip the pkill entirely, just run what follows &&
+              const afterAnd = cmd.indexOf('&&');
+              if (afterAnd !== -1) {
+                args = { ...args, command: cmd.slice(afterAnd + 2).trim() };
+              } else {
+                return 'Skipped broad pkill to protect worker process. Use: lsof -ti:PORT | xargs kill -9';
+              }
+            }
+          }
+          // Intercept "open http://..." — navigate the AgentForge CDP browser directly,
+          // then auto-screenshot so the agent immediately sees what it built.
+          const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
+          if (openUrlMatch) {
+            const targetUrl = openUrlMatch[1];
+            let openedViaCDP = false;
+            try {
+              const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
+              const newTabData = await newTabRes.json();
+              const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
+              await new Promise(r => tabWs.on('open', r));
+              await new Promise(r => {
+                let navigated = false;
+                tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
+                tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
+                setTimeout(() => { tabWs.close(); r(); }, 3000);
+              });
+              openedViaCDP = true;
+            } catch {
+              // CDP unavailable — fall through to OS open
+              try { await execAsync(`open "${targetUrl}"`); } catch {}
+            }
+            // Auto-screenshot after opening so the agent sees what it built.
+            // Wait for page to load, then call screenshot_and_describe.
+            await new Promise(r => setTimeout(r, 2500));
+            try {
+              const screenshotResult = await this._executeTool('screenshot_and_describe', {
+                url: targetUrl,
+                check_for: 'the running application',
+                send_to_user: true
+              }, workDir, agentId);
+              return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
+            } catch {
+              return `Opened ${targetUrl} in browser. (Screenshot failed — verify with screenshot_and_describe)`;
+            }
+          }
+          // If workDir doesn't exist (e.g. /tmp was cleared after worker restart),
+          // fall back to HOME rather than failing with ENOENT on every bash call.
+          let bashCwd = workDir;
+          const _home = process.env.HOME || '/tmp';
+          try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
+          // Background commands (ending with &) return no stdout — the model interprets
+          // silence as failure and loops. Run them, then read back any log file to confirm.
+          const isBackground = /&\s*$/.test(args.command.trim());
           const { stdout, stderr } = await execAsync(args.command, {
-            cwd: workDir,
-            timeout: 60000,
+            cwd: bashCwd,
+            timeout: 120000,
             maxBuffer: 1024 * 1024 * 2 // 2MB
           });
-          return (stdout + stderr).trim() || '(no output)';
+          const out = (stdout + stderr).trim();
+          if (isBackground && !out) {
+            // Give the process a moment to start, then check /tmp/server.log if it exists
+            await new Promise(r => setTimeout(r, 1500));
+            let confirmation = 'Background process started.';
+            try {
+              const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
+              if (logContent) confirmation = `Background process started. Server log:\n${logContent}`;
+            } catch { /* no log yet */ }
+            return confirmation;
+          }
+          return out || '(no output)';
         }
         case 'read_file': {
@@ -685,6 +1240,26 @@ export class OllamaAgent extends EventEmitter {
           }
         }
+        case 'screenshot_and_describe': {
+          const result = await this._screenshotAndDescribe(args.url, args.check_for);
+          // Always send screenshot to user — agent called this tool, user should always see it
+          if (this._lastScreenshotData) {
+            this.emit('agent_image', { agentId, image: this._lastScreenshotData });
+            this._lastScreenshotData = null;
+          }
+          return result;
+        }
+        case 'browser': {
+          const result = await browserAction(args);
+          if (result && result.__screenshot) {
+            const imgData = `data:image/png;base64,${result.base64}`;
+            this.emit('agent_image', { agentId, image: imgData });
+            return `Screenshot taken (${Math.round(result.base64.length * 0.75 / 1024)}KB). Image sent to chat.`;
+          }
+          return typeof result === 'string' ? result : JSON.stringify(result);
+        }
         default:
           return `Unknown tool: ${name}`;
       }
@@ -697,24 +1272,13 @@ export class OllamaAgent extends EventEmitter {
   async _cdpScreenshot(navigateUrl, tmpFile) {
     const CDP_PORT = 9223;
-    let tabId;
-    // Get or create a tab
-    const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
-    const tabs = await tabsRes.json();
-    const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
-    if (!usable) {
-      // Create new tab
-      const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
-      const newTabData = await newTab.json();
-      tabId = newTabData.id;
-    } else {
-      tabId = usable.id;
-    }
+    // Always create a NEW tab — never hijack the dashboard or other existing tabs
+    const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
+    const newTabData = await newTabRes.json();
+    const tabId = newTabData.id;
     return new Promise((resolve, reject) => {
-      // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
       const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
       let msgId = 1;
       const pending = new Map();
@@ -739,10 +1303,12 @@ export class OllamaAgent extends EventEmitter {
         try {
           if (navigateUrl) {
             await send('Page.navigate', { url: navigateUrl });
-            // Wait for load
+            // Wait for page to fully render
             await new Promise(r => setTimeout(r, 3000));
           }
           const { data } = await send('Page.captureScreenshot', { format: 'png' });
+          // Close the temporary tab
+          await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
           ws.close();
           resolve(`data:image/png;base64,${data}`);
         } catch (err) {
@@ -752,10 +1318,129 @@ export class OllamaAgent extends EventEmitter {
       });
       ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
-      setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
+      setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
     });
   }
+  // ─── Screenshot + vision analysis ─────────────────────────────────────────
+  // Takes a screenshot of a URL, then asks the active vision model to describe it.
+  // Returns a plain-text description the main agent can reason about.
+  async _screenshotAndDescribe(url, checkFor) {
+    const question = checkFor
+      ? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
+      : `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count — be precise about what you see.`;
+    // === Server reachability check — fast fail if server is down ===
+    try {
+      await fetch(url, { signal: AbortSignal.timeout(4000) });
+    } catch (reachErr) {
+      const portMatch = url.match(/:(\d+)/);
+      const port = portMatch ? portMatch[1] : '?';
+      return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
+    }
+    // === HTML dependency audit (always runs — fast, reliable) ===
+    // Fetches the page HTML and checks for common missing client-side dependencies.
+    // This catches issues that screenshots can't detect (JS errors, missing script tags).
+    let auditNotes = '';
+    try {
+      const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
+      const html = await htmlRes.text();
+      const missing = [];
+      // Check for socket.io client usage without the script tag
+      if (/\bio\s*\(/.test(html) && !html.includes('/socket.io/socket.io.js')) {
+        missing.push('Missing <script src="/socket.io/socket.io.js"></script> — io() is called but the client library is not loaded');
+        // Also verify the server actually serves it
+        try {
+          const sioRes = await fetch(url.replace(/\/$/, '') + '/socket.io/socket.io.js', { signal: AbortSignal.timeout(5000) });
+          if (!sioRes.ok || (await sioRes.text()).startsWith('<!')) {
+            missing.push('Server does NOT serve /socket.io/socket.io.js — check that socket.io is installed and express-static is set up');
+          }
+        } catch {}
+      }
+      if (missing.length > 0) {
+        auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
+      }
+    } catch {}
+    let imageData;
+    const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
+    // Try AgentForge browser via CDP first
+    try {
+      imageData = await this._cdpScreenshot(url, null);
+    } catch (cdpErr) {
+      // CDP not available — try puppeteer headless screenshot
+      try {
+        const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
+        const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
+        const nodeScript = `
+const puppeteer = require(${JSON.stringify(puppeteerModule)});
+(async () => {
+  const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
+  const page = await browser.newPage();
+  await page.setDefaultNavigationTimeout(12000);
+  await page.setViewport({width: 1280, height: 900});
+  try {
+    await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
+    await new Promise(r => setTimeout(r, 2500));
+    await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
+    console.log('puppeteer screenshot ok');
+  } finally {
+    await browser.close();
+  }
+})().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
+`;
+        writeFileSync(scriptFile, nodeScript);
+        await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
+        await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
+        const raw = readFileSync(tmpFile).toString('base64');
+        await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
+        imageData = `data:image/png;base64,${raw}`;
+      } catch (pupErr) {
+        console.warn(`   [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
+        // No screenshot possible — return audit notes only
+        return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
+      }
+    }
+    // Store imageData so caller can emit to user if send_to_user=true
+    this._lastScreenshotData = imageData;
+    const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
+    // Use the active model for vision analysis.
+    try {
+      // /api/chat with images array — supported by all Ollama vision-capable models
+      const res = await fetch(`${this.baseUrl}/api/chat`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          model: this.model,
+          messages: [{ role: 'user', content: question, images: [base64] }],
+          stream: false,
+          options: { num_ctx: 4096 }
+        }),
+        signal: AbortSignal.timeout(120000)
+      });
+      if (res.ok) {
+        const json = await res.json();
+        const description = json.message?.content || json.response || '';
+        const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+        if (clean) {
+          console.log(`   [screenshot_and_describe] ${clean.slice(0, 200)}`);
+          return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
+        }
+      }
+    } catch (err) {
+      console.warn(`   [screenshot_and_describe] vision call failed: ${err.message}`);
+    }
+    return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
+  }
   _resolvePath(p, workDir) {
     return path.isAbsolute(p) ? p : path.join(workDir, p);
   }
@@ -781,28 +1466,65 @@ export class OllamaAgent extends EventEmitter {
   }
   // ─── History persistence ──────────────────────────────────────────────────
-  _historyPath(workDir, sessionId) {
-    return path.join(workDir, `.ollama_history_${sessionId}.json`);
+  // History lives at ~/.agentforge/history/{agentId}.json — one canonical file
+  // per agent, independent of workDir/sessionId/machine state. Never gets lost
+  // due to workDir changes, worker restarts, or Railway assigning new sessionIds.
+  _historyPath(agentId) {
+    const home = process.env.HOME || '/tmp';
+    const dir = path.join(home, '.agentforge', 'history');
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+    return path.join(dir, `${agentId}.json`);
   }
   _loadHistory(agentId, workDir, sessionId) {
-    if (!sessionId) return [];
     try {
-      const fp = this._historyPath(workDir, sessionId);
+      const fp = this._historyPath(agentId);
       if (existsSync(fp)) {
         const data = JSON.parse(readFileSync(fp, 'utf-8'));
-        // Keep last 20 messages to stay within context
+        console.log(`   [${agentId}] Loaded ${data.length} history msgs from ~/.agentforge/history/`);
         return data.slice(-12);
       }
-    } catch {}
+    } catch (e) {
+      console.warn(`⚠️ [${agentId}] History load error: ${e.message}`);
+    }
     return [];
   }
   _saveHistory(agentId, workDir, sessionId, messages) {
     try {
-      const fp = this._historyPath(workDir, sessionId);
+      const fp = this._historyPath(agentId);
       writeFileSync(fp, JSON.stringify(messages.slice(-20), null, 2));
-    } catch {}
+    } catch (e) {
+      console.warn(`⚠️ [${agentId}] History save error: ${e.message}`);
+    }
+  }
+  async _isTaskComplete(task, output, signal) {
+    try {
+      const res = await fetch(`${this.baseUrl}/api/chat`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        signal,
+        body: JSON.stringify({
+          model: this.model,
+          messages: [
+            { role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
+            { role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
+          ],
+          stream: false,
+          think: false,
+          options: { num_ctx: 2048 }
+        })
+      });
+      if (!res.ok) return true;
+      const data = await res.json();
+      const answer = (data.message?.content || '').toLowerCase().trim();
+      console.log(`   [_isTaskComplete] verdict: "${answer}"`);
+      return answer.startsWith('yes');
+    } catch (e) {
+      console.warn(`⚠️ [_isTaskComplete] error: ${e.message}`);
+      return true; // assume done on error to avoid infinite loops
+    }
   }
 }