@hamp10/agentforge 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
- import { exec } from 'child_process';
1
+ import { exec, spawn } from 'child_process';
2
2
  import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync, statSync, appendFileSync } from 'fs';
3
3
  import { EventEmitter } from 'events';
4
4
  import path from 'path';
5
+ import { homedir } from 'os';
5
6
  import { promisify } from 'util';
6
7
  import { fileURLToPath } from 'url';
7
- import { browserAction } from './hampagent/browser.js';
8
+ import { browserAction, releaseAgentTab } from './browser.js';
8
9
 
9
10
  const execAsync = promisify(exec);
10
11
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -136,8 +137,13 @@ function _parseWriteFileFences(content) {
136
137
  const re = /(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi;
137
138
  let m;
138
139
  while ((m = re.exec(content)) !== null) {
139
- const filePath = m[1].trim();
140
+ const filePath = m[1].trim().replace(/\]$/, ''); // strip trailing ] if model used [write_file: /path] bracket notation
140
141
  const fileContent = m[2]; // raw content, no unescaping needed
142
+ // Reject compaction placeholders — model echoed the summary as content
143
+ if (/^\[wrote:/.test(fileContent.trim()) || /^\(\d+ chars, \d+ lines —/.test(fileContent.trim())) {
144
+ console.log(` ⚠️ WRITE_FILE skipped: content is a compaction placeholder, not real file content (${filePath})`);
145
+ continue;
146
+ }
141
147
  if (filePath && fileContent !== undefined) {
142
148
  calls.push({ name: 'write_file', arguments: { path: filePath, content: fileContent } });
143
149
  }
@@ -261,7 +267,10 @@ function _parseTextToolCalls(content) {
261
267
  if (depth === 0 && jsonStr.trim()) break;
262
268
  }
263
269
  try {
264
- const obj = JSON.parse(jsonStr.trim());
270
+ // Strip Gemma4 model artifacts that can appear after a complete JSON object:
271
+ // <tool_call|>, <|end_of_turn|>, <|end|>, etc.
272
+ const cleanJson = jsonStr.trim().replace(/<[^>]*>$/g, '').trimEnd();
273
+ const obj = JSON.parse(cleanJson);
265
274
  if (Array.isArray(obj)) {
266
275
  for (const item of obj) {
267
276
  const call = normalise(item);
@@ -334,15 +343,27 @@ export class OllamaAgent extends EventEmitter {
334
343
  return { agentId, workDir };
335
344
  }
336
345
 
337
- async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null) {
346
+ async runAgentTask(agentId, task, workDir, sessionId = null, image = null, browserProfile = null, actualWorkDir = null, agentModel = null, customSystemPrompt = null, conversationHistory = null, allImages = null, visionModel = null, providerKeys = null) {
338
347
  const startTime = Date.now();
339
348
  const controller = new AbortController();
340
349
 
341
350
  // Use per-agent model override if provided (and not the placeholder 'Default').
342
351
  // Strip 'ollama/' prefix — catalog returns IDs like 'ollama/modelname:tag' but
343
352
  // Ollama's API expects bare names like 'modelname:tag'.
353
+ // Cloud model IDs (google/..., anthropic/..., openai/...) are not valid Ollama names —
354
+ // fall back to the configured local model so a mismatch doesn't crash the task.
344
355
  const rawModel = (agentModel && agentModel !== 'Default') ? agentModel : this.model;
345
- const effectiveModel = rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel;
356
+ const isCloudModel = /^(google|anthropic|openai|mistral|cohere|azure)\//i.test(rawModel);
357
+ const effectiveModel = isCloudModel ? this.model : (rawModel.startsWith('ollama/') ? rawModel.slice(7) : rawModel);
358
+ if (isCloudModel) console.log(` [${agentId}] ⚠️ Cloud model ID "${rawModel}" ignored by local runner — using ${effectiveModel}`);
359
+
360
+ // Store per-task vision settings — used by _screenshotAndDescribe during this task
361
+ // Cleared at the end of the task so stale keys don't leak between tasks
362
+ this._taskVisionModel = visionModel || null;
363
+ this._taskProviderKeys = providerKeys || null;
364
+ const googleKey = providerKeys?.google || null;
365
+ if (visionModel) console.log(` [${agentId}] 👁️ Vision model: ${visionModel} (google key: ${googleKey ? 'present' : 'MISSING'})`);
366
+ else console.log(` [${agentId}] 👁️ Vision: Ollama default (no vision_model in flow config)`);
346
367
 
347
368
  // Fake proc-like object so worker.js pid checks don't crash
348
369
  const fakeProc = { pid: null };
@@ -356,7 +377,7 @@ export class OllamaAgent extends EventEmitter {
356
377
  // Load conversation history — prefer Railway DB history (sent via task payload, works across
357
378
  // any machine/user/model). Fall back to local file for offline or pre-fix sessions.
358
379
  const history = (conversationHistory && conversationHistory.length > 0)
359
- ? conversationHistory.slice(-20)
380
+ ? conversationHistory.slice(-60)
360
381
  : this._loadHistory(agentId, workDir, sessionId);
361
382
 
362
383
  // Text-based tool format is used rather than XML schemas — more reliable across models.
@@ -364,6 +385,27 @@ export class OllamaAgent extends EventEmitter {
364
385
  // ALL models get the same rule set and tool format — no model-specific branching.
365
386
  const homeDir = process.env.HOME || '/tmp';
366
387
  const projectsDir = `${homeDir}/Desktop/Projects`;
388
+
389
+ // ── Per-agent port assignment ──────────────────────────────────────────
390
+ // Each agent gets a deterministic port in range 3100-59099 derived from its ID
391
+ // (56000-port space — handles tens of thousands of projects before any collision).
392
+ // Port 3000 is reserved for agent_dashboard. At task start, any stale process
393
+ // on the assigned port is killed. If the port is still occupied by a live
394
+ // unrelated process, we walk up until we find a free one.
395
+ const agentPortOffset = parseInt(agentId.replace(/\D/g, '').slice(-5) || '0') % 56000;
396
+ let assignedPort = 3100 + agentPortOffset;
397
+ // Kill any stale server from a previous run of THIS agent
398
+ try { await execAsync(`lsof -t -i:${assignedPort} | xargs kill -9 2>/dev/null || true`); } catch {}
399
+ // If something else is still on that port, scan upward for a free one
400
+ for (let attempts = 0; attempts < 100; attempts++) {
401
+ try {
402
+ const { stdout } = await execAsync(`lsof -t -i:${assignedPort} 2>/dev/null || true`);
403
+ if (!stdout.trim()) break; // port is free
404
+ assignedPort++;
405
+ if (assignedPort > 59099) assignedPort = 3100;
406
+ } catch { break; }
407
+ }
408
+ console.log(` [${agentId}] 🔌 Assigned port: ${assignedPort}`);
367
409
  const universalRules = `
368
410
  == WHAT YOU CAN DO ==
369
411
  You have these tools:
@@ -373,7 +415,7 @@ read_file: Read a local file.
373
415
  WRITE_FILE: Write a local file (code-fence format only).
374
416
  list_directory: List a local directory.
375
417
  web_fetch: Fetch any public URL — websites, APIs, docs, raw data. Fast, text-only.
376
- screenshot_and_describe: Navigate a real browser to any URL and screenshot it. Use this when pages require JavaScript, you need visual output, or web_fetch returns nothing useful.
418
+ screenshot_and_describe: Take a screenshot and analyze it with vision so YOU can see and reason about what's on screen. Use this when: pages are JS-heavy, snapshot gives partial/empty data, you need to read numbers/text that aren't in the DOM, or you want to verify what's actually visible. Pass url to navigate first, or omit url to screenshot the current browser tab. Returns a text description YOU can reason about — this is NOT just for the user, it is how YOU SEE THE PAGE.
377
419
  browser: Control the AgentForge Browser directly (Chrome, always running, logged into user's services). Use for ALL browser interaction — navigating, clicking, typing, reading page content, screenshots.
378
420
 
379
421
  BROWSER TOOL — use this instead of writing CDP scripts:
@@ -385,7 +427,7 @@ BROWSER TOOL — use this instead of writing CDP scripts:
385
427
  {"name":"browser","arguments":{"action":"click","text":"Show Filter"}} ← click element by visible text
386
428
  {"name":"browser","arguments":{"action":"click","selector":"#filter-btn"}} ← click by CSS selector
387
429
  {"name":"browser","arguments":{"action":"type","selector":"input","text":"hello"}} ← type text
388
- {"name":"browser","arguments":{"action":"screenshot"}} ← take screenshot
430
+ {"name":"browser","arguments":{"action":"screenshot"}} ← sends screenshot to user (YOU cannot see it — use screenshot_and_describe to see the page yourself)
389
431
  {"name":"browser","arguments":{"action":"evaluate","script":"document.title"}} ← run JS
390
432
  {"name":"browser","arguments":{"action":"scroll","y":400}} ← scroll down
391
433
 
@@ -398,46 +440,181 @@ WORKFLOW when user says "the tab is already open":
398
440
  The browser has the user's sessions and cookies. You CAN click any button, filter, or link visible on the page.
399
441
 
400
442
  == GENERAL RULES (all tasks) ==
401
- G1. IDENTIFY THE TASK TYPE. Build? Research? Question? Match approach to task.
402
- G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer.
443
+ G1. IDENTIFY THE TASK TYPE FIRST:
444
+ - CONVERSATIONAL/QUESTION (asking for names, opinions, definitions, advice, comparisons, brainstorming): Answer in text. NO tools. Do NOT use browser, bash, screenshot, or any tool. Match the depth of your response to the complexity of the question — a simple factual question gets a concise answer, an open-ended or creative question gets a full, substantive response with reasoning.
445
+ - RESEARCH (look something up online): Use web_fetch or browser to find info, then answer in text.
446
+ - BUILD (create an app, game, script, file): Use bash, WRITE_FILE, browser as needed.
447
+ - BROWSER TASK (interact with a website): Use browser tools.
448
+ G2. START IMMEDIATELY. No intro text, no plans, no asking permission. First output = first tool call or direct answer. DO NOT repeat the user's question or task back to them — just respond.
403
449
  G3. ANY WEBSITE/URL IS ACCESSIBLE. User mentions a site or open tab? Use browser snapshot to see what's currently open, then browser navigate/click/type to interact. Never ask "what's the URL?" — find it yourself.
404
450
  G4. NEVER ASK PERMISSION. Never say "should I use X or Y?" — pick the right tool and use it.
405
- G5. IF A TOOL FAILS: Try a different approach. web_fetch empty screenshot_and_describe. Never repeat a failing call identically.
451
+ G4a. STOP WHEN DONE. After completing the task, STOP. Do NOT add meta-commentary about your capabilities, limitations, or what information you don't have. Do NOT explain what you cannot do. Answer and stop.
452
+ G4b. FORMATTING: Use **bold** for section labels and emphasis. Do NOT use markdown headers (# ## ### ####) — use **bold** instead. For bullet lists, ALWAYS write "- item" (dash + space + text). NEVER write "*item" (asterisk directly before text with no space) — that is not valid markdown and shows as a raw asterisk.
453
+ G5. IF A TOOL FAILS: Try a different approach. Browser snapshot empty? → try web_fetch on the same URL. web_fetch empty? → try screenshot_and_describe. NEVER repeat a failing call more than twice with different selectors — take a snapshot to see what's actually on the page. IF WEB BROWSING FAILS REPEATEDLY: fall back to web_fetch on the site's URL, or try a different URL entirely. NEVER write files, build code, or start a server as a fallback for web research — stay in browser/web_fetch tools until you have the data.
454
+ G5a. BROWSER FORM SUBMISSION: After typing into a search/input field, ALWAYS submit with {"action":"press","key":"Enter","selector":"<same-selector-you-typed-into>"} — pass the selector of the field you just typed in so Enter fires in the right element. NEVER try to click submit/compute/search buttons by ref, text, or selector. Buttons shift, break, or trigger ads. Enter always works.
455
+ G5b. BROWSER INTERACTION RULE: After navigating to a page, ALWAYS take a snapshot FIRST to see real element text, IDs, and indices before attempting to click or type. Do NOT guess selectors from memory — selectors change. Snapshot → read elements → interact.
456
+ G5c. READING PAGE CONTENT: For reading text on a page (titles, scores, prices, numbers), use browser → snapshot — it returns all DOM text fast. Use screenshot_and_describe only when you need to visually verify something rendered (canvas, image, CSS layout) OR when snapshot body text is under 200 chars (JS-heavy page, results not yet in DOM). When using screenshot_and_describe to find specific data, ALWAYS pass check_for with exactly what you need.
406
457
  G6. RESEARCH TASKS: web_fetch → read → reason → respond in text. No server, no localhost.
407
- G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data.
458
+ G7. NEVER INVENT TASKS. Do exactly what was asked. Do not build a web app when asked to analyze data. Do not write files when asked to look up information. Do not start coding when the task is browsing.
408
459
  G8. WHEN GENUINELY STUCK: State what you tried, what failed, ask ONE specific question.
409
460
  G9. KEEP GOING until the task is fully complete.
410
461
 
411
462
  == BUILD RULES (only when building apps/games/tools) ==
412
- B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spacesuse underscores).
463
+ B0. STATIC-FILE TASKS (saves to a local path, no deployment/hosting mentioned): If the task says "save to ~/some/path.html" or "create a file at ~/some/path" and does NOT mention serving, hosting, or deploying — just WRITE_FILE to that exact path, then open it with {"name":"browser","arguments":{"action":"navigate","url":"file:///abs/path/index.html"}} and screenshot to visually verify. Do NOT spin up a server, do NOT run npm init, do NOT install packages. Pure HTML/CSS/JS files run directly in browsers via file:// URLs no server needed.
464
+ B1. PROJECT LOCATION: Always put projects in ${projectsDir}/PROJECT_NAME/ (no spaces — use underscores). NEVER create directories or write project files under /tmp/agentforge/ — that path is platform-managed. Your Working directory (${workDir}) is only for tool execution context, NOT for storing project files.
413
465
  B2. WRITE EVERY FILE COMPLETELY — no stubs, no placeholders, no TODOs. Full working code only.
466
+ B2a. NEVER use echo or cat to append code line-by-line (e.g. echo 'code' >> file.js). Always use WRITE_FILE with the COMPLETE file content in one call. Appending one line per bash call wastes 100 turns to write what one WRITE_FILE does instantly.
414
467
  B3. BUILD FILE BY FILE — write each file completely before writing the next.
415
468
  B4. ALWAYS use absolute paths.
416
- B5. SERVING FILES: Node.js server: nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 & — NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server PORT --directory /abs/path/ > /tmp/server.log 2>&1 &
417
- B6. npm install: cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express
418
- B7. After starting server, verify: sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:PORT if 000, check /tmp/server.log and fix the error.
419
- B8. PORT MANAGEMENT: Check port before starting: lsof -i :PORT | head -3. If in use: kill old process, restart. If crashed: restart. If busy with something else: pick different port.
469
+ B5. SERVING FILES: Node.js server MUST cd into the project dir first — ALWAYS use this exact pattern: cd /abs/project/path && nohup /usr/local/bin/node /abs/project/path/server.js > /tmp/server.log 2>&1 & — NEVER use a bare filename like "nohup node server.js" without cd, or Node will look for server.js in the wrong directory and crash. NEVER blocking. Pure HTML/JS (no backend): nohup python3 -m http.server ${assignedPort} --directory /abs/path/ > /tmp/server.log 2>&1 &
470
+ B5b. STOPPING A SERVER: NEVER use "pkill -f node" — it kills the platform itself. To stop a running server: kill $(lsof -ti:PORT) 2>/dev/null || true
471
+ B6. npm install: ALL npm commands MUST be in ONE bash call with cd: {"name":"bash","arguments":{"command":"cd /abs/project/path && /usr/local/bin/npm init -y && /usr/local/bin/npm install express"}}NEVER run npm init or npm install as a separate bash call without cd, or packages install in the wrong directory and the server will crash with "Cannot find module".
472
+ B7. After starting server, verify using the ACTUAL PORT the server is listening on (not the assigned port): sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:ACTUAL_PORT
473
+ - If 000: server crashed. Read /tmp/server.log, fix the error, restart server (kill $(lsof -ti:PORT) 2>/dev/null || true && cd /abs/project/path && nohup /usr/local/bin/node /abs/path/server.js > /tmp/server.log 2>&1 &), then curl again.
474
+ - If 404: server is running but missing a file. Read /tmp/server.log — if you see "ENOENT" for public/index.html, that HTML file was NOT written. Write it immediately, then curl again. Do NOT rewrite server.js for a 404.
475
+ - If 200: server is up. Proceed to B10 screenshot QA.
476
+ After fixing any error, ALWAYS restart the server AND re-verify with curl before proceeding.
477
+ B8. PORT RULE: If the user's task explicitly specifies a port number, use that exact port everywhere — in server.js, in the verification curl, everywhere. If no port is specified, use your ASSIGNED PORT ${assignedPort}. In server.js: const PORT = process.env.PORT || YOUR_CHOSEN_PORT; Never use port 3000 (reserved by system).
420
478
  B9. EXPRESS WILDCARD ROUTE: NEVER write app.get('*', ...) — crashes in newer versions. Use app.use((req, res) => { ... }) instead.
421
- B10. MANDATORY SCREENSHOT QA: After curl returns 200, call screenshot_and_describe with send_to_user:true. You are NOT done until the screenshot shows the real working app.
422
- B11. ALWAYS open the finished app: bash open http://localhost:PORT
479
+ B10a. STATIC FILE PATHS: ALWAYS use path.join(__dirname, 'public') for express.static NEVER './public' or 'public'. For res.sendFile on the root route: ALWAYS path.join(__dirname, 'public', 'index.html') — NEVER path.join(__dirname, 'index.html'). Relative paths break under nohup.
480
+ B10b. server.js IS FOR LOGIC ONLY — NEVER EMBED HTML: All HTML belongs in public/index.html. Route handlers must NOT contain template literals with HTML (backtick strings with <div>, <h1>, etc.) — these cause SyntaxErrors. server.js should only have: require/import, middleware, JSON API routes, express.static, and app.listen. Anything visual goes in public/.
481
+ B10. MANDATORY SCREENSHOT QA — KEEP ITERATING UNTIL THE DESIGN PASSES:
482
+ After curl returns 200, call screenshot_and_describe(url:"http://localhost:PORT", send_to_user:true).
483
+ Evaluate against these pass/fail criteria. If ANY fail, fix immediately and screenshot again:
484
+ ✗ FAIL: Plain/unstyled HTML — no colors, raw browser defaults, looks like a text document
485
+ ✗ FAIL: Text barely visible or poor contrast against the background
486
+ ✗ FAIL: Layout broken, elements overlapping, or content spilling outside containers
487
+ ✗ FAIL: Buttons are plain gray browser defaults — unstyled
488
+ ✗ FAIL: Inputs are plain white browser defaults — unstyled
489
+ ✗ FAIL: No consistent color theme applied throughout
490
+ ✓ PASS: All of the above are satisfied — consistent theme, readable text, styled controls, proper layout
491
+ Stop only when ALL criteria pass. There is no fixed iteration count — stop when it genuinely looks good, whether that takes 1 screenshot or 10. Do NOT stop just because the server is running.
492
+ B11. CSS DESIGN STANDARDS — apply from the start, before any screenshot:
493
+ Use a dark background (#1a1a2e or #0d1117 or similar), white/light text, colored accents (#00b4d8, #4ade80, #f472b6, etc.). Style ALL inputs and buttons — no raw browser defaults. Use border-radius, padding, box-shadow, and flex/grid layout. Minimum: background gradient or solid dark color, styled form inputs (border: 1px solid #444, bg: #1e1e2e, color: #fff), primary buttons with colored background. The first version should already look good — not a plain HTML skeleton.
423
494
  B12. CANVAS GAMES: canvas 800×600, dark background #1a1a2e, all elements clearly visible. Dark theme, styled UI.
424
495
  B13. OBSERVE BEFORE FIXING: Screenshot first, then make targeted edits. Never rewrite an entire file from scratch when the server is running.
425
496
  B14. TARGETED EDITS: read_file to see current code, write_file only the changed section. Never throw away working code.
426
497
  B15. QUALITY LOOP: After each fix, screenshot again to verify. Iterate until it looks correct.
427
- B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.`;
498
+ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different states. Not just the header.
499
+ B17. AFTER DEPLOYING: Once a deployment command succeeds, immediately run the platform's URL command (e.g. railway domain, vercel --prod, netlify open:deploy, fly status) to get the live public URL. Your final message MUST include the full URL so the user can open it.`;
428
500
  // Text-based tool format works reliably across all local models.
429
501
  // WRITE_FILE uses code-fence to avoid JSON-escaping issues; all other tools use JSON.
430
- const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nDO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nTO WRITE A FILE (only when actually writing code/content to disk):\nWriting server.js...\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\n\nFOR ALL OTHER TOOLS — output JSON on its own line:\nRunning command...\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for","send_to_user":true}} — open ANY URL in a real browser and screenshot it. Use when pages are dynamic/JS-heavy or you need to show the user visuals.\n\n${universalRules}`;
502
+ const jsonToolFormat = `You are an AI agent. Working directory: ${workDir}\n\nCONVERSATIONAL QUESTIONS — answer directly with text, NO tools: brainstorming, opinions, explanations, greetings, "what is X", "give me ideas", "how does X work", anything you can answer from knowledge. Only use tools when you need to actually DO something: read/write files, run commands, browse real-time data, build or deploy something.\n\nACTION TASKS — DO NOT describe what you will do. DO NOT write plans. START EXECUTING IMMEDIATELY.\n\nCRITICAL — THINK SILENTLY: Any reasoning, planning, self-doubt, or "I cannot" thoughts MUST go inside <think>...</think> tags and NEVER appear as visible text. Your visible output must be ONLY tool calls and final answers. NEVER output limitations or explanations before calling a tool — think it, don't say it.\n\nTO WRITE A FILE output WRITE_FILE with the FULL ABSOLUTE PATH on the same line, then a code fence:\nWRITE_FILE /abs/path/to/server.js\n\`\`\`\n...complete file content here...\n\`\`\`\nCRITICAL: the path is MANDATORY — WRITE_FILE alone (no path) is invalid and will be ignored.\n\nTO RUN A COMMAND — output JSON on its own line:\n{"name":"bash","arguments":{"command":"shell command here"}}\n\nTools:\n- WRITE_FILE /path — write a local file. ONLY use this when actually creating/editing a file on disk.\n- {"name":"bash","arguments":{"command":"..."}} — run any shell command\n- {"name":"read_file","arguments":{"path":"/abs/path"}} — read a local file\n- {"name":"list_directory","arguments":{"path":"/abs/path"}} — list local directory\n- {"name":"web_fetch","arguments":{"url":"https://any-public-url.com"}} — fetch ANY website or URL and read its content. Use for research, data, docs, scraping public sites.\n- {"name":"screenshot_and_describe","arguments":{"url":"https://any-url.com","check_for":"what to look for"}} — screenshot a page and analyze it with vision so YOU can SEE what's on screen. CRITICAL: this is how you visually read a page — use it whenever snapshot returns partial/empty data or you need to read numbers/text from a JS-heavy page. Omit url to screenshot the current browser tab. Returns text description YOU can reason about.\n- {"name":"browser","arguments":{"action":"tabs"}} — control the REAL Chrome browser (pre-logged in with user's sessions). Use for bookmarks, logged-in sites, JavaScript-heavy pages. Actions: tabs, snapshot, navigate, click, type, press, screenshot (sends to user only — YOU cannot see it), evaluate, scroll, focus. SUBMITTING FORMS: after typing into a search box, use {"action":"press","key":"Enter"} to submit — do NOT click ref numbers which can hit ads. CLICKING BUTTONS: prefer {"action":"click","text":"button label"} over {"action":"click","ref":N} — ref numbers shift and can click the wrong element. To visually READ a page yourself, use screenshot_and_describe instead of browser screenshot.\n\n${universalRules}`;
431
503
  const systemPrompt = customSystemPrompt || jsonToolFormat;
432
504
 
505
+ // Build message array. When there is prior history, scan the last few assistant turns
506
+ // for signs the model got stuck (declared inability, looped, gave up). If stuck, trim
507
+ // the history so the user's new instruction lands with full weight rather than being
508
+ // buried under a wall of failed reasoning the model is anchored to.
509
+ let activeHistory = history;
510
+ if (activeHistory.length > 0) {
511
+ const recentAssistant = activeHistory
512
+ .filter(m => m.role === 'assistant')
513
+ .slice(-4)
514
+ .map(m => (m.content || '').toLowerCase());
515
+ const stuckSignals = [
516
+ 'i cannot', 'i am unable', 'unfortunately', 'environment does not',
517
+ 'not possible', 'i lack', 'i do not have the ability', 'i have exhausted',
518
+ 'cannot be done', 'is not supported', 'failed to', 'i have tried',
519
+ 'every attempt', 'cannot complete',
520
+ 'no specific task', 'no task has been given', 'no task was given',
521
+ 'cannot proceed with a meaningful', 'i must wait for a task',
522
+ 'waiting for a task', 'please provide a task', 'specify a task',
523
+ ];
524
+ const isStuck = recentAssistant.some(text =>
525
+ stuckSignals.some(sig => text.includes(sig))
526
+ );
527
+ if (isStuck) {
528
+ // Keep only the last 6 turns (3 exchanges) so the new instruction dominates.
529
+ // The user is course-correcting — don't let stale failure reasoning override them.
530
+ activeHistory = activeHistory.slice(-6);
531
+ console.log(` [${agentId}] 🔄 Stuck signals detected in history — trimmed to last 6 turns so new instruction takes priority`);
532
+ }
533
+ }
534
+
433
535
  const messages = [
434
536
  { role: 'system', content: systemPrompt },
435
- ...history,
537
+ ...activeHistory,
436
538
  ];
437
539
 
540
+ // Inject context the agent needs to work on existing projects.
541
+ // Registry is always injected (small, always relevant).
542
+ // Workspace files list only injected on fresh sessions (no history).
543
+ let taskContent = task;
544
+ {
545
+ const contextParts = [];
546
+
547
+ // 1. Known running projects from the global registry (always inject)
548
+ try {
549
+ const REGISTRY = '/tmp/agentforge/projects.json';
550
+ if (existsSync(REGISTRY)) {
551
+ const registry = JSON.parse(readFileSync(REGISTRY, 'utf8'));
552
+ const entries = Object.values(registry);
553
+ if (entries.length > 0) {
554
+ const lines = entries.map(e => {
555
+ let info = `- "${e.name}" → ${e.path} (running on port ${e.port}`;
556
+ if (e.railwayProject) info += `, Railway project: "${e.railwayProject}"`;
557
+ if (e.liveUrl) info += `, live URL: ${e.liveUrl}`;
558
+ return info + ')';
559
+ });
560
+ contextParts.push(`Known projects on this machine:\n${lines.join('\n')}`);
561
+ }
562
+ }
563
+ } catch {}
564
+
565
+ // 1b. Available deployment/publishing CLIs — probe what's actually installed and authed.
566
+ // Inject so the agent knows it CAN deploy rather than claiming it lacks credentials.
567
+ try {
568
+ const deployTools = [];
569
+ const candidates = [
570
+ { cmd: 'railway', check: 'railway whoami 2>/dev/null', label: 'railway' },
571
+ { cmd: 'vercel', check: 'vercel whoami 2>/dev/null', label: 'vercel' },
572
+ { cmd: 'netlify', check: 'netlify status 2>/dev/null', label: 'netlify' },
573
+ { cmd: 'fly', check: 'fly auth whoami 2>/dev/null',label: 'fly' },
574
+ { cmd: 'surge', check: 'surge whoami 2>/dev/null', label: 'surge' },
575
+ { cmd: 'gh', check: 'gh auth status 2>/dev/null', label: 'gh' },
576
+ ];
577
+ await Promise.all(candidates.map(async ({ cmd, check, label }) => {
578
+ try {
579
+ const { stdout } = await execAsync(`which ${cmd} 2>/dev/null && ${check}`, { timeout: 4000 });
580
+ if (stdout.trim()) deployTools.push(`${label} (authenticated: ${stdout.trim().split('\n')[0].slice(0, 60)})`);
581
+ } catch {}
582
+ }));
583
+ if (deployTools.length > 0) {
584
+ contextParts.push(`Deployment CLIs available and authenticated on this machine:\n${deployTools.map(t => `- ${t}`).join('\n')}\n\nYou can use these tools directly via bash to deploy projects publicly.`);
585
+ }
586
+ } catch {}
587
+
588
+ // 2. Existing files in this agent's workspace (fresh sessions only)
589
+ if (activeHistory.length === 0) {
590
+ try {
591
+ const SKIP_NAMES = new Set(['MEMORY.md', 'AGENTS.md', 'AGENTFORGE.md', 'node_modules', '.git', 'memory', '.npm', 'package-lock.json']);
592
+ const collectFiles = (dir, base = '', depth = 0) => {
593
+ if (depth > 3) return [];
594
+ let files = [];
595
+ for (const e of readdirSync(dir, { withFileTypes: true })) {
596
+ if (SKIP_NAMES.has(e.name)) continue;
597
+ const rel = base ? `${base}/${e.name}` : e.name;
598
+ if (e.isDirectory()) files.push(...collectFiles(path.join(dir, e.name), rel, depth + 1));
599
+ else files.push(rel);
600
+ }
601
+ return files;
602
+ };
603
+ const existingFiles = collectFiles(workDir);
604
+ if (existingFiles.length > 0) {
605
+ contextParts.push(`Your workspace already contains these files:\n${existingFiles.map(f => `- ${workDir}/${f}`).join('\n')}\n\nRead the relevant files before making any changes. Make targeted edits — do NOT rewrite working files from scratch.`);
606
+ }
607
+ } catch {}
608
+ }
609
+
610
+ if (contextParts.length > 0) {
611
+ taskContent = `${contextParts.join('\n\n')}\n\n${task}`;
612
+ }
613
+ }
614
+
438
615
  // Attach initial image if provided — always include it; models that don't support
439
616
  // images will ignore the field, and if they error we catch it below.
440
- const userMessage = { role: 'user', content: task };
617
+ const userMessage = { role: 'user', content: taskContent };
441
618
  if (image) {
442
619
  const base64 = image.replace(/^data:image\/\w+;base64,/, '');
443
620
  userMessage.images = [base64];
@@ -459,10 +636,52 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
459
636
  const toolsUsed = []; // track tool names called (for fallback summary)
460
637
  // No hard turn limit — agent runs until done, loop-detected, or wall-clock timeout.
461
638
  const recentCalls = []; // last N tool calls for loop detection
639
+ const recentBashCalls = []; // bash-only window — write_file doesn't contaminate bash loop detection
462
640
  let emptyRetries = 0; // consecutive empty-response retries
641
+ const recentOutputs = []; // last N no-tool-call outputs for repeated-output detection
642
+ let incompleteKicks = 0; // consecutive times _isTaskComplete returned false
643
+ let noToolKicks = 0; // consecutive turns with content but no tool calls — escalate message
644
+ let taskDoneEarly = false; // set by completion-language detector inside tool loop
645
+ let localBrowserTurns = 0; // consecutive browser tool calls on localhost — capped to prevent infinite QA loops
646
+ let successfulScreenshots = 0; // how many times we've seen a working (non-placeholder) localhost app
647
+ let midRefusalKicks = 0; // how many times we've overridden a mid-task refusal
648
+ let echoAppendCalls = 0; // consecutive bash calls using echo >> to append to a file
649
+ let consecutiveTruncations = 0; // how many times in a row the same truncated JSON was re-output
650
+ const fileReadCounts = new Map(); // path -> # of reads since last write_file (cross-turn read-loop detector)
463
651
 
464
652
  for (let turn = 0; ; turn++) {
465
653
  if (controller.signal.aborted) break;
654
+ let toolsUsedThisTurn = 0; // Fix 10: per-turn tool count — reset each turn so _isTaskComplete
655
+ // only fires when the current turn actually ran tools, not just
656
+ // because prior turns did. Prevents kicking mid-plan text outputs.
657
+
658
+ // Hard turn cap: prevent runaway agents. 60 turns handles complex multi-file projects.
659
+ if (turn >= 60) {
660
+ console.log(` [${agentId}] ⚠️ Turn cap (60) reached — forcing completion`);
661
+ messages.push({ role: 'user', content: 'You have used 60 turns. Provide your final answer now — describe what you built and any important notes. Be concise.' });
662
+ break;
663
+ }
664
+
665
+ // ── Per-turn context trim ────────────────────────────────────────────
666
+ // After large file writes the messages array can accumulate 15K+ tokens
667
+ // making each subsequent Ollama call slower and causing empty responses.
668
+ // When total content exceeds 30K chars (~7.5K tokens), drop middle messages
669
+ // (keep system prompt + first user task + last 8 messages).
670
+ // Threshold lowered from 60K: a single large WRITE_FILE can add 20K chars,
671
+ // causing every subsequent turn to have slow prefill.
672
+ const totalMsgChars = messages.reduce((s, m) => s + (typeof m.content === 'string' ? m.content.length : 0), 0);
673
+ if (totalMsgChars > 30000 && messages.length > 10) {
674
+ const systemMsg = messages[0];
675
+ const firstUserMsg = messages.find(m => m.role === 'user');
676
+ const recentMsgs = messages.slice(-8);
677
+ const trimmed = [systemMsg, firstUserMsg, ...recentMsgs].filter(Boolean);
678
+ // Only trim if it actually reduces messages (avoids trimming to same set)
679
+ if (trimmed.length < messages.length) {
680
+ console.log(` [${agentId}] ✂️ Turn ${turn}: context trim ${messages.length}→${trimmed.length} msgs (${Math.round(totalMsgChars/1000)}KB chars)`);
681
+ messages.length = 0;
682
+ messages.push(...trimmed);
683
+ }
684
+ }
466
685
 
467
686
  this.emit('tool_activity', { agentId, event: 'tool_start', tool: 'model', description: `Thinking…` });
468
687
 
@@ -472,6 +691,7 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
472
691
  const isOllamaBackend = this.baseUrl.includes('11434') || this.baseUrl.includes('localhost') || this.baseUrl.includes('127.0.0.1');
473
692
  const useNativeEndpoint = isOllamaBackend; // all local models use native endpoint
474
693
 
694
+ const inferenceStart = Date.now();
475
695
  let response;
476
696
  try {
477
697
 
@@ -498,14 +718,37 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
498
718
  };
499
719
  }
500
720
 
721
+ // Per-turn inference timeout: 8 minutes. Without this, a huge context (e.g. 37K-char
722
+ // file in messages) can make Ollama spin for 10+ minutes with no output. The context
723
+ // trim (60K char threshold) prevents most cases, but this is a safety valve.
724
+ const turnAbort = new AbortController();
725
+ const turnTimeoutId = setTimeout(() => {
726
+ console.log(` [${agentId}] ⏰ Turn ${turn}: inference timeout (8 min) — aborting and retrying with trimmed context`);
727
+ turnAbort.abort();
728
+ }, 8 * 60 * 1000);
729
+ const combinedSignal = AbortSignal.any
730
+ ? AbortSignal.any([controller.signal, turnAbort.signal])
731
+ : turnAbort.signal; // fallback: use turn signal only if any() unavailable
732
+
501
733
  response = await fetch(endpoint, {
502
734
  method: 'POST',
503
735
  headers: { 'Content-Type': 'application/json' },
504
- signal: controller.signal,
736
+ signal: combinedSignal,
505
737
  body: JSON.stringify(requestBody)
506
738
  });
739
+ clearTimeout(turnTimeoutId);
507
740
  } catch (fetchErr) {
508
- if (fetchErr.name === 'AbortError') break;
741
+ if (fetchErr.name === 'AbortError') {
742
+ // If the task-level controller was aborted, exit cleanly
743
+ if (controller.signal.aborted) break;
744
+ // Otherwise this was a turn-level timeout — treat like empty response and retry
745
+ console.log(` [${agentId}] ⏰ Turn ${turn}: inference timed out — forcing context trim and retry`);
746
+ // Trim aggressively: keep system + first user + last 4 messages
747
+ const _sys = messages[0]; const _usr = messages.find(m => m.role === 'user');
748
+ const _recent = messages.slice(-4);
749
+ messages.length = 0; messages.push(_sys, _usr, ..._recent.filter(Boolean));
750
+ continue; // retry this turn with trimmed context
751
+ }
509
752
  throw new Error(`Cannot reach local model server at ${this.baseUrl}. Is it running? (${fetchErr.message})`);
510
753
  }
511
754
 
@@ -529,18 +772,43 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
529
772
  let inFenceBlock = false; // inside WRITE_FILE code fence — suppress content from streaming
530
773
  let fenceDepth = 0; // ``` count since last WRITE_FILE (even=closed, odd=open)
531
774
  let rawTokenCount = 0;
775
+ let tokenCapTruncatedFile = false; // true when token cap fired mid-WRITE_FILE fence
532
776
  let lastVisibleAt = Date.now(); // track when we last got visible output (for think timeout)
533
777
 
534
778
  const reader = response.body.getReader();
535
779
  const decoder = new TextDecoder();
536
780
  let buf = '';
537
781
 
538
- // No timeouts — local model can take as long as it needs on any turn.
539
- // Only the user abort (controller.signal) or stream end stops a turn.
782
+ // No hard timeout on inference — local model can take as long as it needs.
783
+ // But we DO time out individual reader.read() calls (30s) so a silently-dropped
784
+ // connection never hangs the worker forever. And when Ollama signals done:true we
785
+ // immediately cancel the reader instead of waiting for the HTTP body to close on
786
+ // its own (which can stall indefinitely on keep-alive connections).
540
787
  let turnRetry = false;
541
- while (true) {
788
+ streamLoop: while (true) {
542
789
  if (controller.signal.aborted) break;
543
- const { done, value } = await reader.read();
790
+ // Time-box each individual read() call. If no bytes arrive for 30s the stream
791
+ // has stalled (Ollama crashed / connection dropped silently) — abort it.
792
+ let _readTimer;
793
+ let readResult;
794
+ try {
795
+ readResult = await Promise.race([
796
+ reader.read(),
797
+ new Promise((_, reject) => {
798
+ _readTimer = setTimeout(() => reject(new Error('stream_read_stall')), 30000);
799
+ })
800
+ ]);
801
+ } catch (e) {
802
+ if (e.message === 'stream_read_stall') {
803
+ console.log(` [${agentId}] ⏱️ Stream stalled (no data for 30s) — aborting`);
804
+ reader.cancel().catch(() => {});
805
+ break;
806
+ }
807
+ throw e;
808
+ } finally {
809
+ clearTimeout(_readTimer);
810
+ }
811
+ const { done, value } = readResult;
544
812
  if (done) break;
545
813
 
546
814
  buf += decoder.decode(value, { stream: true });
@@ -556,7 +824,12 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
556
824
  // Ollama native NDJSON format
557
825
  let nativeEvt;
558
826
  try { nativeEvt = JSON.parse(line); } catch { continue; }
559
- if (nativeEvt.done) continue;
827
+ if (nativeEvt.done) {
828
+ // Ollama says generation is complete — cancel the reader and exit now.
829
+ // Do NOT fall back to reader.read() which can hang on keep-alive connections.
830
+ reader.cancel().catch(() => {});
831
+ break streamLoop;
832
+ }
560
833
  tokenText = nativeEvt.message?.content ?? null;
561
834
  } else {
562
835
  // OpenAI SSE format
@@ -586,21 +859,37 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
586
859
  rawTokenCount++;
587
860
  streamContent += tokenText;
588
861
 
862
+ // Per-turn token cap — if a single turn generates >6000 tokens, the model is
863
+ // probably writing multiple large files in one shot or looping. Truncate the stream
864
+ // and let the agent loop handle the (partial) output. Keeps single-turn inference
865
+ // bounded to ~3-5 minutes on local hardware.
866
+ if (rawTokenCount >= 6000) {
867
+ console.log(` [${agentId}] ⚠️ Turn ${turn}: token cap (${rawTokenCount}) — truncating stream`);
868
+ // Close any open code fence so the WRITE_FILE parser can extract partial content.
869
+ // Track whether we truncated mid-write so we can inject a hint after the tool loop.
870
+ if (inFenceBlock && fenceDepth % 2 === 1) {
871
+ streamContent += '\n```\n';
872
+ inFenceBlock = false;
873
+ tokenCapTruncatedFile = true; // set below
874
+ }
875
+ reader.cancel().catch(() => {});
876
+ break streamLoop;
877
+ }
878
+
589
879
  // Process token through think + tool_call filters, emit visible text live
590
880
  // We scan only the new delta token against the current buffer state
591
881
  const chunk = tokenText;
592
882
  let visible = '';
883
+ const wasInThinkBlock = inThinkBlock;
593
884
  // Simple per-token state machine — handles split tags across tokens by tracking state flags
594
885
  if (!inThinkBlock && !inToolCallBlock) {
595
- // Check if this chunk starts a filtered block
596
- if (streamContent.includes('<think>') && !streamContent.includes('</think>')) {
886
+ // Check if this chunk starts a filtered block.
887
+ // Use `<think` (no closing >) to catch split tokens where `>` arrives separately.
888
+ // `<think` won't false-positive on `</think>` since that starts with `</`.
889
+ if (streamContent.includes('<think') && !streamContent.includes('</think>')) {
597
890
  inThinkBlock = true;
598
- // emit text before the <think> tag
599
- const before = streamContent.lastIndexOf('<think>');
600
- // already streamed everything before this point; just suppress from here
601
891
  } else if (streamContent.includes('<tool_call>') && !streamContent.slice(streamContent.lastIndexOf('<tool_call>')).includes('</tool_call>')) {
602
892
  inToolCallBlock = true;
603
- // Text before <tool_call> on this same token — already emitted or trivial
604
893
  } else if (!inThinkBlock && !inToolCallBlock) {
605
894
  visible = chunk;
606
895
  }
@@ -614,6 +903,26 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
614
903
  inToolCallBlock = false;
615
904
  }
616
905
 
906
+ // Stream think block content live — shown in a collapsible "Thinking…" panel in the dashboard
907
+ {
908
+ let thinkChunk = '';
909
+ if (!wasInThinkBlock && inThinkBlock) {
910
+ // Just entered think block — emit content after the opening <think> tag
911
+ const tagEnd = chunk.indexOf('<think>');
912
+ thinkChunk = tagEnd >= 0 ? chunk.slice(tagEnd + 7) : chunk;
913
+ } else if (wasInThinkBlock && inThinkBlock) {
914
+ // Mid-think block — emit raw chunk (strip stray tag fragments)
915
+ thinkChunk = chunk.replace(/<\/?think>/g, '');
916
+ } else if (wasInThinkBlock && !inThinkBlock) {
917
+ // Just exited think block — emit content before the closing </think> tag
918
+ const tagStart = chunk.indexOf('</think>');
919
+ thinkChunk = tagStart >= 0 ? chunk.slice(0, tagStart) : chunk;
920
+ }
921
+ if (thinkChunk) {
922
+ this.emit('agent_output', { agentId, output: thinkChunk, isThinking: true, isChunk: true });
923
+ }
924
+ }
925
+
617
926
  // Scan ALL lines completed in this token for state transitions.
618
927
  // Multi-char tokens can contain multiple lines (WRITE_FILE + ``` in same token).
619
928
  if (tokenText.includes('\n')) {
@@ -622,7 +931,7 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
622
931
  while (nlIdx !== -1) {
623
932
  const lineStart = Math.max(0, streamContent.lastIndexOf('\n', nlIdx - 1)) + 1;
624
933
  const line = streamContent.slice(lineStart, nlIdx).trim();
625
- if (/^(WRITE_FILE|write_file)[:\s]+\S/i.test(line)) {
934
+ if (/^(WRITE_FILE|write_file)/i.test(line)) {
626
935
  inFenceBlock = true; fenceDepth = 0;
627
936
  } else if (inFenceBlock && /^```/.test(line)) {
628
937
  fenceDepth++;
@@ -639,16 +948,28 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
639
948
  const cleanSC = streamContent.replace(/<think>[\s\S]*?<\/think>/g, '');
640
949
  const lastNL = cleanSC.lastIndexOf('\n');
641
950
  const curLine = cleanSC.slice(lastNL + 1).trimStart();
642
- if (!inFenceBlock && /^(WRITE_FILE|write_file)[:\s]+\S/i.test(curLine)) {
951
+ // Suppress as soon as "WRITE_FILE" appears at start of partial line —
952
+ // don't wait for the path to arrive or the word streams char-by-char to the user.
953
+ if (!inFenceBlock && /^(WRITE_FILE|write_file)/i.test(curLine)) {
643
954
  inFenceBlock = true; fenceDepth = 0;
644
955
  }
645
- if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || curLine.startsWith('['))) {
956
+ // Only treat as JSON blob if it looks like actual JSON — `[{` or `["` or `[` followed by quote/brace.
957
+ // Avoid false-positive on `[bash result]:`, `[tool result]:`, etc.
958
+ if (!inJsonBlob && !inFenceBlock && (curLine.startsWith('{') || /^\[[\[{"']/.test(curLine))) {
646
959
  inJsonBlob = true;
647
960
  }
648
961
  }
649
962
 
650
963
  // Emit visible content — safety filter removes any ``` or WRITE_FILE lines
651
- // that slipped through (e.g. partial token at detection boundary)
964
+ // that slipped through (e.g. partial token at detection boundary).
965
+ // If a complete <think>...</think> block arrived in one token (state machine missed it),
966
+ // route its content as a thinking chunk so users can see the agent's reasoning.
967
+ if (visible) {
968
+ visible = visible.replace(/<think>([\s\S]*?)<\/think>/g, (_, content) => {
969
+ if (content.trim()) this.emit('agent_output', { agentId, output: content, isThinking: true, isChunk: true });
970
+ return '';
971
+ }).replace(/<think>[\s\S]*/g, '');
972
+ }
652
973
  if (visible && !inThinkBlock && !inToolCallBlock && !inJsonBlob && !inFenceBlock) {
653
974
  const safe = visible.split('\n').filter(ln => {
654
975
  const t = ln.trimStart();
@@ -666,13 +987,40 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
666
987
  if (inThinkBlock && (Date.now() - lastVisibleAt) > 90000 && rawTokenCount > 100) {
667
988
  console.log(` [${agentId}] ⏱️ Think timeout (>90s, ${rawTokenCount} tokens) — aborting stream`);
668
989
  reader.cancel().catch(() => {});
669
- break;
990
+ break streamLoop;
991
+ }
992
+
993
+ // Repetition loop detection — catches runaway token loops (e.g. hundreds of </li> repeating).
994
+ // Small local models can get stuck when fed malformed HTML or very large context.
995
+ // Check every 50 tokens after warmup: if any short pattern fills most of the recent output → abort.
996
+ if (rawTokenCount % 50 === 0 && rawTokenCount > 150) {
997
+ const tail = streamContent.slice(-800);
998
+ let loopDetected = false;
999
+ for (let pLen = 4; pLen <= 15; pLen++) {
1000
+ const pat = tail.slice(-pLen);
1001
+ if (!pat.trim()) continue;
1002
+ let count = 0, pos = 0;
1003
+ while ((pos = tail.indexOf(pat, pos)) !== -1) { count++; pos += pLen; }
1004
+ if (count >= 30) { loopDetected = true; break; } // Fix 9: raised from 20 — HTML/CSS files have naturally repetitive short patterns (px;, </div>, etc.)
1005
+ }
1006
+ if (loopDetected) {
1007
+ console.log(` [${agentId}] 🔄 Repetition loop detected at ${rawTokenCount} tokens — aborting stream`);
1008
+ reader.cancel().catch(() => {});
1009
+ break streamLoop;
1010
+ }
670
1011
  }
671
1012
  }
672
1013
  }
673
1014
 
674
- console.log(` [${agentId}] 📊 Stream done: ${rawTokenCount} tokens, ${streamContent.length} chars, ${visibleContent.length} visible, apiToolCalls=${Object.keys(streamToolCalls).length}`);
675
- if (streamContent) console.log(` [${agentId}] 📝 First 200 chars: ${streamContent.slice(0, 200)}`);
1015
+ const inferenceMs = Date.now() - inferenceStart;
1016
+ console.log(` [${agentId}] 📊 Turn ${turn}: ${rawTokenCount} tokens, ${streamContent.length} chars raw, ${visibleContent.length} visible, apiToolCalls=${Object.keys(streamToolCalls).length}, inference=${(inferenceMs/1000).toFixed(1)}s`);
1017
+ if (rawTokenCount === 0 && inferenceMs > 10000) {
1018
+ console.log(` [${agentId}] ⚠️ Turn ${turn}: Ollama spent ${(inferenceMs/1000).toFixed(1)}s returning 0 tokens — possible OOM, KV cache eviction, or model degenerate state`);
1019
+ }
1020
+ // Log visible content (what the user sees) — helps diagnose planning vs acting
1021
+ if (visibleContent.trim()) console.log(` [${agentId}] 👁️ Visible: ${visibleContent.trim().replace(/\n/g, ' ').slice(0, 300)}`);
1022
+ // Log raw content if no visible (pure tool call turn) — helps diagnose tool format
1023
+ else if (streamContent.trim()) console.log(` [${agentId}] 📝 Raw: ${streamContent.trim().replace(/\n/g, ' ').slice(0, 200)}`);
676
1024
 
677
1025
  // ── Extract tool calls from content ───────────────────────────────────
678
1026
  // Try <tool_call> XML tags first (some models emit this format), then fall through
@@ -712,6 +1060,19 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
712
1060
  }
713
1061
  }
714
1062
 
1063
+ // Detect model mimicking compaction format: [wrote: /path — N chars, M lines]
1064
+ // This happens after context trim — model sees these summaries and generates them as fake outputs.
1065
+ // The model THINKS it wrote the file but it hasn't. Correct it immediately.
1066
+ if (!parsedTagCalls && Object.keys(streamToolCalls).length === 0 && streamContent) {
1067
+ const fakeWroteMatch = streamContent.match(/\[wrote:\s*([^\s\]]+)[^\]]*\]/i);
1068
+ if (fakeWroteMatch) {
1069
+ const fakePath = fakeWroteMatch[1];
1070
+ console.log(` [${agentId}] ⚠️ Model generated fake [wrote: ...] summary — correcting`);
1071
+ messages.push({ role: 'user', content: `You output "[wrote: ${fakePath}...]" but that is a SUMMARY FORMAT from your context history — you did NOT actually write any file. To actually write a file, you MUST use WRITE_FILE format:\n\nWRITE_FILE ${fakePath}\n\`\`\`\n...complete file content...\n\`\`\`\n\nOutput the full file content now using WRITE_FILE.` });
1072
+ continue;
1073
+ }
1074
+ }
1075
+
715
1076
  // Fallback 4: if we found ONLY bash tool calls but content has writing blocks too,
716
1077
  // merge them so files get written AND bash runs
717
1078
  if (parsedTagCalls && streamContent) {
@@ -757,10 +1118,18 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
757
1118
  // ── Push assistant message ────────────────────────────────────────────
758
1119
  // All local models now use JSON-in-text format on the native endpoint.
759
1120
  // Strip <think>...</think> blocks to avoid burning context on reasoning traces.
1121
+ // Also compact WRITE_FILE fences: replace the file body with a summary line
1122
+ // to prevent large file contents from flooding the context on every future turn.
760
1123
  const toolCallsArray = Object.values(streamToolCalls);
761
1124
  const hasToolCalls = toolCallsArray.length > 0;
762
1125
  const cleanedContent = (streamContent || '')
763
1126
  .replace(/<think>[\s\S]*?<\/think>/g, '')
1127
+ // Compact WRITE_FILE fence bodies: replace with a non-fence note so the model
1128
+ // cannot mistake the summary for real file content and echo it back.
1129
+ .replace(/(?:WRITE_FILE|write_file)[:\s]+([^\n]+)\n```[^\n]*\n([\s\S]*?)```/gi, (match, filePath, fileContent) => {
1130
+ const lines = fileContent.split('\n').length;
1131
+ return `[wrote: ${filePath.trim()} — ${fileContent.length} chars, ${lines} lines]`;
1132
+ })
764
1133
  .trim();
765
1134
  messages.push({ role: 'assistant', content: cleanedContent || '' });
766
1135
 
@@ -769,6 +1138,7 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
769
1138
 
770
1139
  // ── Execute tool calls ────────────────────────────────────────────────
771
1140
  if (toolCallsArray.length > 0) {
1141
+ let completionCheckedThisTurn = false; // deduplicate _isTaskComplete across tool calls in same turn
772
1142
  for (const toolCall of toolCallsArray) {
773
1143
  if (controller.signal.aborted) break;
774
1144
 
@@ -808,7 +1178,37 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
808
1178
  });
809
1179
  console.log(` [${agentId}] 🔧 ${name}: ${JSON.stringify(parsedArgs).slice(0, 120)}`);
810
1180
  toolsUsed.push(name);
1181
+ toolsUsedThisTurn++; // Fix 10: track per-turn for _isTaskComplete gating
811
1182
  emptyRetries = 0; // reset on successful tool call
1183
+ recentOutputs.length = 0; // reset repeated-output tracker on any tool execution
1184
+ // Track consecutive browser/screenshot calls on a locally-built app.
1185
+ // After 6 such calls the agent has browsed enough — check if done and stop.
1186
+ if (name === 'browser' || name === 'screenshot_and_describe') localBrowserTurns++;
1187
+ else if (name === 'write_file' || name === 'bash') localBrowserTurns = 0; // reset on real work
1188
+ if (localBrowserTurns >= 6 && toolsUsed.filter(t => t === 'write_file').length > 0) {
1189
+ const originalTask3 = messages.find(m => m.role === 'user')?.content || task;
1190
+ const isDoneBrowse = await this._isTaskComplete(originalTask3, visibleContent || allOutput, controller.signal);
1191
+ if (isDoneBrowse) {
1192
+ console.log(` [${agentId}] ✅ Done after ${localBrowserTurns} browser interactions — stopping`);
1193
+ if (visibleContent) finalContent = visibleContent;
1194
+ taskDoneEarly = true;
1195
+ break;
1196
+ }
1197
+ // Not done yet after 6 browser calls — push a targeted hint rather than silently resetting.
1198
+ // This fires every 6 browser calls to redirect the agent toward evaluation or completion.
1199
+ console.log(` [${agentId}] ⚠️ ${localBrowserTurns} browser interactions without completion — injecting guidance`);
1200
+ messages.push({ role: 'user', content: `You have taken ${localBrowserTurns} screenshots/browser interactions. You need to complete the task or make progress.
1201
+
1202
+ If you are trying to verify that a DYNAMIC feature works (timer counting down, animation playing, real-time updates), STOP using screenshots — they capture a single frozen moment and CANNOT prove motion or state change.
1203
+
1204
+ Use browser evaluate instead to directly check JavaScript state:
1205
+ {"name":"browser","arguments":{"action":"evaluate","script":"document.querySelector('#display').textContent"}}
1206
+ Or to read a value, wait 2 seconds, and compare:
1207
+ {"name":"browser","arguments":{"action":"evaluate","script":"(function(){ return new Promise(r => { var t1 = document.body.innerText; setTimeout(() => r('before: ' + t1.slice(0,50) + ' | after: ' + document.body.innerText.slice(0,50)), 2000); }); })()"}}
1208
+
1209
+ If the CODE is correct and the app LOOKS right, declare the task DONE — you do not need to prove every dynamic behavior via screenshot. State what you verified and what you built, then stop.` });
1210
+ localBrowserTurns = 0; // reset so hint fires again after 6 more if still stuck
1211
+ }
812
1212
 
813
1213
  // Loop detection: catch repeated single calls AND alternating A/B/A/B patterns.
814
1214
  // Normalize curl commands: strip sleep prefix so "sleep 3 && curl ...URL" and
@@ -821,15 +1221,44 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
821
1221
  recentCalls.push(callKey);
822
1222
  if (recentCalls.length > 6) recentCalls.shift();
823
1223
 
1224
+ // Bash-only window: write_file calls don't contaminate bash loop detection.
1225
+ // A write_file between two bash loops was causing the detector to miss patterns
1226
+ // like curl→cat→nohup→write_file(rewrite)→curl→cat→nohup (server-start loop).
1227
+ if (name === 'bash') {
1228
+ recentBashCalls.push(callKey);
1229
+ if (recentBashCalls.length > 6) recentBashCalls.shift();
1230
+ // Detect echo-append pattern: echo '...' >> file (building file line by line)
1231
+ if (/echo\s+['"]/.test(parsedArgs.command || '') && />>\s*\S/.test(parsedArgs.command || '')) {
1232
+ echoAppendCalls++;
1233
+ if (echoAppendCalls >= 4) {
1234
+ const appendTarget = (parsedArgs.command || '').match(/>>[ ]*(\S+)/)?.[1] || 'the file';
1235
+ console.log(` [${agentId}] ⚠️ echo-append loop (${echoAppendCalls}x) — injecting WRITE_FILE hint`);
1236
+ messages.push({ role: 'user', content: `STOP using echo >> to append code line by line — this wastes turns. You have already called echo >> ${echoAppendCalls} times. Use WRITE_FILE with the COMPLETE content of ${appendTarget} in ONE call instead:\n\nWRITE_FILE /abs/path/to/${appendTarget.split('/').pop()}\n\`\`\`\n...complete file content...\n\`\`\`` });
1237
+ echoAppendCalls = 0; // reset so hint only fires once per burst
1238
+ }
1239
+ } else {
1240
+ echoAppendCalls = 0; // non-echo bash call resets the counter
1241
+ }
1242
+ } else if (name === 'write_file') {
1243
+ // A successful write_file is progress; don't reset entirely but clear bash window
1244
+ // so the loop detector starts fresh for the post-rewrite phase.
1245
+ recentBashCalls.length = 0;
1246
+ echoAppendCalls = 0;
1247
+ }
1248
+
824
1249
  // Detect: same call 3x in a row (2x for screenshot — never valid to screenshot without a change)
825
1250
  const screenshotLoop = name === 'screenshot_and_describe' && recentCalls.length >= 2 && recentCalls.slice(-2).every(c => c === callKey);
826
1251
  const last3Same = screenshotLoop || (recentCalls.length >= 3 && recentCalls.slice(-3).every(c => c === callKey));
827
- // Detect: alternating A,B,A,B pattern (last 4 calls)
1252
+ // Detect: alternating A,B,A,B pattern (last 4 calls) — check both windows
828
1253
  const last4 = recentCalls.slice(-4);
829
- const abab = last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1];
830
- // Detect: A,B,C,A,B,C pattern (last 6)
1254
+ const last4bash = recentBashCalls.slice(-4);
1255
+ const abab = (last4.length === 4 && last4[0] === last4[2] && last4[1] === last4[3] && last4[0] !== last4[1])
1256
+ || (last4bash.length === 4 && last4bash[0] === last4bash[2] && last4bash[1] === last4bash[3] && last4bash[0] !== last4bash[1]);
1257
+ // Detect: A,B,C,A,B,C pattern (last 6) — check both windows
831
1258
  const last6 = recentCalls.slice(-6);
832
- const abcabc = last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5];
1259
+ const last6bash = recentBashCalls.slice(-6);
1260
+ const abcabc = (last6.length === 6 && last6[0] === last6[3] && last6[1] === last6[4] && last6[2] === last6[5])
1261
+ || (last6bash.length === 6 && last6bash[0] === last6bash[3] && last6bash[1] === last6bash[4] && last6bash[2] === last6bash[5]);
833
1262
 
834
1263
  if (last3Same || abab || abcabc) {
835
1264
  const pattern = last3Same ? 'same call 3x' : abab ? 'A/B/A/B alternating' : 'A/B/C repeating';
@@ -847,14 +1276,36 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
847
1276
  const openPort = openPortMatch ? openPortMatch[1] : '????';
848
1277
  loopFixMsg += `You are calling 'open http://localhost:${openPort}' repeatedly but the server is not running — opening the browser to a dead port does nothing. You must RESTART THE SERVER first:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${openPort}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup /usr/local/bin/node server.js > /tmp/server.log 2>&1 & sleep 3 && curl -s -o /dev/null -w '%{http_code}' http://localhost:${openPort}"}}\nIf curl returns 000, check the crash: bash cat /tmp/server.log. Fix the crash FIRST. Only call 'open' after curl returns 200.`;
849
1278
  } else if (name === 'bash' && (loopCmd.includes('curl') || loopCmd.includes('http_code'))) {
850
- loopFixMsg += `The server check is looping. Check /tmp/server.log for errors:\n{"name":"bash","arguments":{"command":"cat /tmp/server.log | tail -20"}}\nThen fix the actual error in the code. NEVER change the port.`;
1279
+ // Auto-read crash log now so the hint can include the actual error
1280
+ let crashLogNow = '';
1281
+ try {
1282
+ crashLogNow = String(await this._executeTool('bash', { command: 'cat /tmp/server.log 2>/dev/null | tail -30 || echo "No server.log"' }, workDir, agentId)).trim();
1283
+ } catch {}
1284
+ let serverLoopHint = `The server is stuck in a crash-restart loop — curl keeps returning 000.\n\nLatest crash log:\n${crashLogNow}\n\n`;
1285
+ // If crash log has a SyntaxError, auto-read the code snippet
1286
+ const synMatch = crashLogNow.match(/^(\/[^\n:]+\.(?:js|ts|mjs|cjs)):(\d+)\n/m);
1287
+ if (synMatch && /SyntaxError/.test(crashLogNow)) {
1288
+ const synFile = synMatch[1];
1289
+ const synLine = parseInt(synMatch[2], 10);
1290
+ let snippet = '';
1291
+ try {
1292
+ snippet = String(await this._executeTool('bash', {
1293
+ command: `awk 'NR>=${Math.max(1, synLine - 5)} && NR<=${synLine + 5} {printf "%4d: %s\\n", NR, $0}' "${synFile}" 2>/dev/null`
1294
+ }, workDir, agentId)).trim();
1295
+ } catch {}
1296
+ serverLoopHint += `⚠️ SyntaxError in ${synFile} at line ${synLine}${snippet ? `:\n\`\`\`\n${snippet}\n\`\`\`` : ''}.\n\n`;
1297
+ serverLoopHint += `Fix the syntax error:\n1. write_file to patch only the broken line (do NOT rewrite the whole file unless it is tiny)\n2. Then restart with nohup\nNEVER restart before fixing the syntax error — it will always crash again.`;
1298
+ } else {
1299
+ serverLoopHint += `The error is shown above. Fix the code, then restart. Do NOT call curl or cat again before making a fix.`;
1300
+ }
1301
+ loopFixMsg += serverLoopHint;
851
1302
  } else if (loopCmd.includes('npm install')) {
852
1303
  loopFixMsg += `npm install is looping — packages likely already installed. Skip it and start the server directly with nohup.`;
853
1304
  } else if (name === 'bash' && (loopCmd.includes('/tmp/') && (loopCmd.includes('.js') || loopCmd.includes('node')) && loopCmd.includes('9223'))) {
854
1305
  loopFixMsg += `Your Node.js/CDP script is only READING the page — that is why nothing changes. You need to WRITE A NEW SCRIPT THAT CLICKS.\n\nReplace your /tmp script with one that clicks the target element:\n\nWRITE_FILE /tmp/cdp_click.js\n\`\`\`javascript\nconst ws = new WebSocket('ws://localhost:9223/devtools/page/TAB_ID_HERE');\nws.onopen = () => {\n // Click element containing the text you need (change "Filter" to what you see on the page)\n ws.send(JSON.stringify({id:1, method:'Runtime.evaluate', params:{expression: 'Array.from(document.querySelectorAll("a,button,input,span,div,th")).find(el=>el.textContent.trim().includes("Filter"))?.click() || "not found"', returnByValue:true}}));\n};\nws.onmessage = e => { console.log(JSON.parse(e.data)); ws.close(); };\nsetTimeout(() => ws.close(), 5000);\n\`\`\`\n\nThen run: bash → /usr/local/bin/node --experimental-websocket /tmp/cdp_click.js\n\nYou CAN click. You CAN interact. Stop saying you cannot — write the clicking script.`;
855
1306
  } else if (name === 'screenshot_and_describe') {
856
1307
  const loopPort = (parsedArgs.url || '').match(/:(\d+)/)?.[1] || '????';
857
- loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing. You have two choices:\n\nA) If the user asked a question or gave feedbackanswer them with TEXT. You do NOT need a screenshot to reply to a conversation. Just write your response.\n\nB) If the app needs to be improved make a CODE CHANGE first, then take ONE screenshot to verify:\n1. read_file the file that needs changing\n2. write_file with the improvement\n3. restart the server: bash pkill+nohup\n4. screenshot ONCE to verify\n\nDo NOT take another screenshot without first doing one of the above.`;
1308
+ loopFixMsg += `You are calling screenshot_and_describe repeatedly — STOP. Taking the same screenshot over and over changes nothing.\n\nIf you are trying to verify DYNAMIC behavior (timer running, animation, countdown, live updates): screenshots CANNOT prove this they capture a frozen moment. Use browser evaluate instead:\n{"name":"browser","arguments":{"action":"evaluate","script":"document.querySelector('#timer-display, .display, #display, [id*=time], [class*=time]')?.textContent || document.body.innerText.slice(0,200)"}}\nOr wait 2s and compare: {"name":"browser","arguments":{"action":"evaluate","script":"(function(){ return new Promise(r => { var t1 = document.body.innerText.slice(0,100); setTimeout(() => r({before:t1, after:document.body.innerText.slice(0,100)}), 2000); }); })()" }}\n\nOtherwise, you have two choices:\nA) If the code is correct and the app looks right — declare the task DONE. You do not need to screenshot every feature.\nB) If something specific is visually wrong make a code change FIRST, then ONE screenshot to verify.`;
858
1309
  } else {
859
1310
  loopFixMsg += `Observe the tool results above, identify what is specifically broken, then make a targeted fix. Do not repeat commands that already ran.`;
860
1311
  }
@@ -862,9 +1313,31 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
862
1313
  messages.push({ role: 'user', content: loopFixMsg });
863
1314
  // Don't fully reset — keep 1 entry so next identical call fires after 2 more (not 3)
864
1315
  recentCalls.splice(0, recentCalls.length - 1);
1316
+ recentBashCalls.splice(0, recentBashCalls.length - 1);
865
1317
  break; // break inner tool loop, let model respond to hint
866
1318
  }
867
1319
 
1320
+ // ── Read-loop detector (cross-turn) ──────────────────────────────
1321
+ // Tracks how many times each file path has been read since the last write_file.
1322
+ // If the agent reads the same file 3+ times without writing anything, it is stuck
1323
+ // in a "read to plan" loop that never produces output — force it to write now.
1324
+ if (name === 'read_file' && parsedArgs.path) {
1325
+ const rp = parsedArgs.path;
1326
+ const readCount = (fileReadCounts.get(rp) || 0) + 1;
1327
+ fileReadCounts.set(rp, readCount);
1328
+ if (readCount >= 3) {
1329
+ const fname = path.basename(rp);
1330
+ console.log(` [${agentId}] 🔁 Read-loop: "${rp}" read ${readCount}x without a write — forcing write`);
1331
+ fileReadCounts.set(rp, 0); // reset so hint can fire again if agent persists
1332
+ messages.push({ role: 'user', content: `STOP. You have read ${fname} ${readCount} times in a row without writing anything. You already have the full file content in your context. Reading it again changes nothing.\n\nSTOP READING. Write your next WRITE_FILE now — put the updated ${fname} content in a fence:\n\nWRITE_FILE ${rp}\n\`\`\`\n...updated content...\n\`\`\`\n\nDo NOT read any more files. Write.` });
1333
+ break; // break inner tool loop
1334
+ }
1335
+ }
1336
+ // Any write_file clears the read counts — fresh slate after actual progress
1337
+ if (name === 'write_file') {
1338
+ fileReadCounts.clear();
1339
+ }
1340
+
868
1341
  const result = await this._executeTool(name, parsedArgs, workDir, agentId);
869
1342
 
870
1343
  this.emit('tool_activity', { agentId, event: 'tool_end', tool: name, description: `✓ ${name}` });
@@ -874,15 +1347,71 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
874
1347
  this.emit('agent_image', { agentId, image: result });
875
1348
  }
876
1349
 
877
- // ── Bash: curl returned 000 = server not running — force log read ──
1350
+ // ── Bash: curl result handling ────────────────────────────────────
878
1351
  if (name === 'bash') {
879
1352
  const resultStr = String(result).trim();
880
1353
  const isCurlZero = resultStr === '000' || resultStr.endsWith('\n000') || /\b000$/.test(resultStr);
881
- if (isCurlZero) {
882
- const logRead = await this._executeTool('bash', { command: 'cat /tmp/server.log 2>/dev/null | tail -30 || echo "No server.log found"' }, workDir, agentId);
883
- messages.push({ role: 'user', content: `[bash result]: 000\n\nThe server is NOT running — curl got 000 (connection refused). Here are the crash logs:\n\n${logRead}\n\nThe server crashed. Read the error above, fix the bug in the code, then restart. Do NOT assume it is running. Do NOT change the port. Fix the actual error.` });
1354
+ const isCurl404 = resultStr === '404' || resultStr.endsWith('\n404') || /\b404$/.test(resultStr);
1355
+ const isCurl200 = resultStr === '200' || resultStr.endsWith('\n200') || /\b200$/.test(resultStr);
1356
+
1357
+ if (isCurlZero || isCurl404) {
1358
+ const logRead = await this._executeTool('bash', { command: 'cat /tmp/server.log 2>/dev/null | tail -40 || echo "No server.log found"' }, workDir, agentId);
1359
+ if (isCurlZero) {
1360
+ // If crash log has a SyntaxError with file:line, auto-read the snippet to save the agent
1361
+ // a read_file round-trip and make the fix obvious
1362
+ let syntaxSnippet = '';
1363
+ const synErrMatch = String(logRead).match(/^(\/[^\n:]+\.(?:js|ts|mjs|cjs)):(\d+)\n/m);
1364
+ if (synErrMatch && /SyntaxError/.test(String(logRead))) {
1365
+ const synFile = synErrMatch[1];
1366
+ const synLine = parseInt(synErrMatch[2], 10);
1367
+ try {
1368
+ const snippet = await this._executeTool('bash', {
1369
+ command: `awk 'NR>=${Math.max(1, synLine - 8)} && NR<=${synLine + 8} {printf "%4d: %s\\n", NR, $0}' "${synFile}" 2>/dev/null`
1370
+ }, workDir, agentId);
1371
+ if (snippet && String(snippet).trim()) {
1372
+ syntaxSnippet = `\n\n⚠️ SYNTAX ERROR in ${synFile} near line ${synLine}. The relevant code:\n\`\`\`\n${snippet}\n\`\`\`\nFix the syntax error in that file BEFORE trying to restart.`;
1373
+ }
1374
+ } catch {}
1375
+ }
1376
+ messages.push({ role: 'user', content: `[bash result]: 000 (connection refused — server is NOT running)\n\nCrash log:\n${logRead}${syntaxSnippet}\n\nThe server crashed or never started. Fix the actual error shown above. Do NOT assume it is running. Do NOT change the port. Make a targeted fix to the code then restart.` });
1377
+ } else {
1378
+ messages.push({ role: 'user', content: `[bash result]: 404 (server is running but root route not found)\n\nServer log:\n${logRead}\n\nCommon cause: static files path is wrong. In server.js: (1) express.static must use path.join(__dirname, 'public'); (2) any res.sendFile for the root route must use path.join(__dirname, 'public', 'index.html') — NEVER path.join(__dirname, 'index.html') or relative paths. Fix and restart. Do NOT rewrite the whole file.` });
1379
+ }
884
1380
  continue;
885
1381
  }
1382
+
1383
+ // ── curl 200: server confirmed running — open in AgentForge browser ──
1384
+ // Platform responsibility: always show the user their app the moment it's live.
1385
+ // Agent does not need to call 'open' — the platform handles it here.
1386
+ if (isCurl200) {
1387
+ const curlCmd = parsedArgs.command || '';
1388
+ const portMatch = curlCmd.match(/localhost:(\d+)/);
1389
+ if (portMatch) {
1390
+ const appUrl = `http://localhost:${portMatch[1]}`;
1391
+ const { opened } = await this._openInBrowser(appUrl, agentId);
1392
+ // ── Register project in global registry so other agents can find it ──
1393
+ try {
1394
+ const REGISTRY = '/tmp/agentforge/projects.json';
1395
+ const registry = existsSync(REGISTRY) ? JSON.parse(readFileSync(REGISTRY, 'utf8')) : {};
1396
+ // Derive a readable project name: prefer Desktop/Projects subdir name, else workDir basename
1397
+ let projectName = path.basename(workDir);
1398
+ const homeDir2 = process.env.HOME || '/tmp';
1399
+ const desktopProjects = `${homeDir2}/Desktop/Projects`;
1400
+ try {
1401
+ // Walk Desktop/Projects for the most recently modified dir — likely the active project
1402
+ const dirs = readdirSync(desktopProjects, { withFileTypes: true })
1403
+ .filter(e => e.isDirectory())
1404
+ .map(e => ({ name: e.name, mtime: statSync(path.join(desktopProjects, e.name)).mtimeMs }))
1405
+ .sort((a, b) => b.mtime - a.mtime);
1406
+ if (dirs.length > 0) projectName = dirs[0].name;
1407
+ } catch {}
1408
+ registry[portMatch[1]] = { port: parseInt(portMatch[1]), path: workDir, agentId, name: projectName, updated: new Date().toISOString() };
1409
+ writeFileSync(REGISTRY, JSON.stringify(registry, null, 2));
1410
+ } catch {}
1411
+ messages.push({ role: 'user', content: `[bash result]: 200 — server is running at ${appUrl}${opened ? '. App opened in browser and screenshot sent to user.' : '.'}\n\nNow call screenshot_and_describe with url:"${appUrl}" and send_to_user:true to verify it looks correct, then iterate to improve it.` });
1412
+ continue;
1413
+ }
1414
+ }
886
1415
  }
887
1416
 
888
1417
  // ALL models get tool results fed back — no model should run blind.
@@ -890,12 +1419,90 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
890
1419
  // must be in context so the model can see what happened and react correctly.
891
1420
  {
892
1421
  const noThink = '';
1422
+
1423
+ // Deployment URL detection: if bash output contains a public HTTPS URL
1424
+ // from a known hosting platform, extract it, persist it to the project registry,
1425
+ // and tell the agent to report it.
1426
+ if (name === 'bash') {
1427
+ const resultStr = String(result);
1428
+ const deployUrlMatch = resultStr.match(/https:\/\/[a-zA-Z0-9._-]+\.(railway\.app|vercel\.app|netlify\.app|fly\.dev|surge\.sh|pages\.dev|web\.app|github\.io|onrender\.com|up\.railway\.app)[^\s]*/);
1429
+ if (deployUrlMatch) {
1430
+ const deployUrl = deployUrlMatch[0];
1431
+ console.log(` [${agentId}] 🌐 Deployment URL detected: ${deployUrl}`);
1432
+ // Persist the live URL (and Railway project name if available) into the registry
1433
+ // so future agents know the deployed URL without re-running CLI commands.
1434
+ try {
1435
+ const REGISTRY = '/tmp/agentforge/projects.json';
1436
+ const registry = existsSync(REGISTRY) ? JSON.parse(readFileSync(REGISTRY, 'utf8')) : {};
1437
+ const entry = Object.values(registry).find(e => e.path === workDir || e.agentId === agentId);
1438
+ if (entry) {
1439
+ entry.liveUrl = deployUrl;
1440
+ // Capture Railway project name if railway status is available
1441
+ try {
1442
+ const { stdout: statusOut } = await execAsync('railway status 2>/dev/null', { cwd: workDir, timeout: 5000 });
1443
+ const projectMatch = statusOut.match(/Project:\s*(.+)/);
1444
+ if (projectMatch) entry.railwayProject = projectMatch[1].trim();
1445
+ } catch {}
1446
+ const key = Object.keys(registry).find(k => registry[k] === entry);
1447
+ if (key) {
1448
+ registry[key] = entry;
1449
+ writeFileSync(REGISTRY, JSON.stringify(registry, null, 2));
1450
+ console.log(` [${agentId}] 💾 Saved live URL to registry: ${deployUrl}`);
1451
+ }
1452
+ }
1453
+ } catch {}
1454
+ messages.push({ role: 'user', content: `[bash result]:\n${resultStr.slice(0, 3000)}\n\nDeployment succeeded. The live URL is: ${deployUrl}\n\nReport this URL to the user as your final response.` });
1455
+ continue;
1456
+ }
1457
+ }
1458
+
1459
+ // After writing files, check if the task is complete — don't just blindly kick "Continue".
1460
+ // Run _isTaskComplete after any write_file call (≥2 tools used so agent has done real work).
1461
+ // Only check once per turn to avoid redundant LLM calls when multiple files are written.
1462
+ // Note: visibleContent may be just "WRITE_FILE" (10 chars) for pure file-write turns — don't
1463
+ // require long visible content here; the write_file result itself is sufficient evidence.
1464
+ if (name === 'write_file' && toolsUsed.length >= 2 && !completionCheckedThisTurn) {
1465
+ completionCheckedThisTurn = true;
1466
+ const originalTask2 = messages.find(m => m.role === 'user')?.content || task;
1467
+ // Use write_file result as context (includes path that was written) + any visible text
1468
+ const completionContext = (visibleContent.length > 10 ? visibleContent + '\n' : '') + 'Just wrote: ' + String(result).slice(0, 500);
1469
+ const isDoneEarly = await this._isTaskComplete(originalTask2, completionContext, controller.signal);
1470
+ if (isDoneEarly) {
1471
+ console.log(` [${agentId}] ✅ Task complete after write_file — stopping`);
1472
+ if (visibleContent) finalContent = visibleContent;
1473
+ taskDoneEarly = true;
1474
+ break; // break inner tool loop; outer loop checks taskDoneEarly
1475
+ }
1476
+ }
893
1477
  if (isImageResult) {
894
1478
  const base64 = result.replace(/^data:image\/\w+;base64,/, '');
895
1479
  messages.push({ role: 'user', content: `[${name} result]: Screenshot captured. Continue with the next step.${noThink}`, images: [base64] });
896
1480
  } else {
897
1481
  const resultText = isImageResult ? '[Screenshot captured]' : String(result).slice(0, 6000);
898
- messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\nContinue with the next step.${noThink}` });
1482
+ // Fix 12/19: after writing an HTML file for a static task, automatically navigate the browser
1483
+ // to the file:// URL so the agent's tab IS on the correct page before the next turn.
1484
+ // Previously we only injected a guidance message, which models often ignored — jumping straight
1485
+ // to screenshot_and_describe with no URL and getting a blank screenshot of the wrong tab.
1486
+ let continueMsg = `Continue with the next step.${noThink}`;
1487
+ if (name === 'write_file' && successfulScreenshots === 0) {
1488
+ const writtenPath = parsedArgs?.path || '';
1489
+ const isHtmlFile = /\.html?$/i.test(writtenPath);
1490
+ const taskLower2 = (messages.find(m => m.role === 'user')?.content || task).toLowerCase();
1491
+ const isStaticTask = isHtmlFile && !/\b(railway|vercel|render|netlify|fly\.io|heroku|deploy|server\.js|express|http\.createserver)\b/.test(taskLower2);
1492
+ if (isStaticTask && writtenPath) {
1493
+ const absolutePath = writtenPath.startsWith('~') ? writtenPath.replace(/^~/, process.env.HOME || '/Users/' + (workDir.split('/')[2] || 'user')) : writtenPath;
1494
+ // Fix 19: auto-navigate the browser tab to the file so it's already loaded.
1495
+ try {
1496
+ await browserAction({ action: 'navigate', url: `file://${absolutePath}` }, agentId);
1497
+ console.log(` [${agentId}] 🌐 Auto-navigated to file://${absolutePath}`);
1498
+ continueMsg = `File written and opened in browser at file://${absolutePath}. Now take a screenshot to verify it looks correct:\n{"name":"browser","arguments":{"action":"screenshot_and_describe","check_for":"the complete app with all required features"}}\n${noThink}`;
1499
+ } catch (navErr) {
1500
+ // Navigation failed — fall back to instruction-only
1501
+ continueMsg = `File written. YOUR NEXT ACTION MUST BE THIS — navigate to the file first, then screenshot:\n1. {"name":"browser","arguments":{"action":"navigate","url":"file://${absolutePath}"}}\n2. {"name":"browser","arguments":{"action":"screenshot_and_describe","check_for":"the complete app"}}\nDO NOT call screenshot_and_describe without url first — you will get a blank screenshot.${noThink}`;
1502
+ }
1503
+ }
1504
+ }
1505
+ messages.push({ role: 'user', content: `[${name} result]:\n${resultText}\n\n${continueMsg}` });
899
1506
 
900
1507
  if (name === 'screenshot_and_describe') {
901
1508
  const screenshotResult = String(result);
@@ -917,26 +1524,45 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
917
1524
  }
918
1525
  // Successful screenshot of a build task — push to make a code change
919
1526
  else if (isLocalhost) {
920
- messages.push({ role: 'user', content: `You have seen the current state. Now make your next improvement: read_file the code, write_file the fix, restart server, then screenshot once to verify.` });
1527
+ // Catch placeholder/hello world pages on localhost force the model to keep building
1528
+ const screenshotText = String(result).toLowerCase();
1529
+ const isPlaceholder = (
1530
+ screenshotText.includes('hello world') ||
1531
+ screenshotText.includes('cannot get /') ||
1532
+ (screenshotText.includes('express') && screenshotText.includes('error')) ||
1533
+ // Only match "placeholder" as an unbuilt-page indicator, not Gemini describing
1534
+ // a UI element's placeholder attribute (e.g. "Placeholder Text: Start typing...")
1535
+ /\bplaceholder\s*(page|app|content|site)\b/.test(screenshotText) ||
1536
+ screenshotText.includes('coming soon') ||
1537
+ (screenshotText.includes('blank') && !screenshotText.includes('not blank'))
1538
+ );
1539
+ if (isPlaceholder) {
1540
+ messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
1541
+ } else {
1542
+ successfulScreenshots++;
1543
+ if (successfulScreenshots >= 2) {
1544
+ // Agent has confirmed the app works at least twice. Time to wrap up rather
1545
+ // than looping indefinitely on minor improvements.
1546
+ messages.push({ role: 'user', content: `The app is working correctly (confirmed twice). Your task is complete. Write your final reply now: describe what you built, what it does, and how to use it. Do NOT make any more code changes — just reply in text.` });
1547
+ } else {
1548
+ messages.push({ role: 'user', content: `The app is running. If there is one specific thing that is clearly missing or broken, fix it now (read_file → write_file → restart → screenshot). If the app already fulfills all the requirements, skip improvements and write your final reply instead.` });
1549
+ }
1550
+ }
921
1551
  }
922
1552
  // Successful screenshot of a public URL — agent is doing research, let it reason
923
1553
  }
924
- // Catch placeholder/hello world pages — force the model to keep building
925
- const screenshotText = String(result).toLowerCase();
926
- const isPlaceholder = (
927
- screenshotText.includes('hello world') ||
928
- screenshotText.includes('cannot get /') ||
929
- (screenshotText.includes('express') && screenshotText.includes('error')) ||
930
- screenshotText.includes('placeholder') ||
931
- screenshotText.includes('coming soon') ||
932
- (screenshotText.includes('blank') && !screenshotText.includes('not blank'))
933
- );
934
- if (isPlaceholder) {
935
- messages.push({ role: 'user', content: `The screenshot shows a placeholder or empty page — the app is not done yet. Continue writing complete working code. Identify which files still need real implementation and write them now.${noThink}` });
936
- }
937
1554
  }
938
1555
  }
939
1556
  }
1557
+ // Token cap fired mid-WRITE_FILE — the last file written is truncated.
1558
+ // Alert the agent so it knows to complete the file instead of immediately starting the server.
1559
+ if (tokenCapTruncatedFile) {
1560
+ tokenCapTruncatedFile = false;
1561
+ console.log(` [${agentId}] ⚠️ Token cap truncated a file — injecting continuation hint`);
1562
+ messages.push({ role: 'user', content: `⚠️ Your last response was cut off — the file was only partially written. The server will crash with a SyntaxError.\n\nDo NOT run the server yet. First complete the truncated file: read_file it to see where it was cut, then write_file to add the missing code (closing braces, remaining routes, etc.). Make sure the file is syntactically complete before starting the server.` });
1563
+ tokenCapTruncatedFile = false;
1564
+ }
1565
+ if (taskDoneEarly) break; // completion language detected inside tool loop — stop the turn loop
940
1566
  continue; // loop back for next model turn
941
1567
  }
942
1568
 
@@ -946,13 +1572,78 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
946
1572
  const hasContent = combined.trim().length > 30;
947
1573
  const isEmpty = combined.trim().length === 0;
948
1574
 
1575
+ // Structural: agent writing "Running command..." or "WRITE_FILE" headers but no actual tool JSON.
1576
+ // Happens when the model plans multiple steps using the header format but forgets the JSON body.
1577
+ const hasFakeHeaders = (streamContent.match(/^Running command\.\.\./gm) || []).length >= 2 ||
1578
+ /^WRITE_FILE\s*$/m.test(streamContent); // WRITE_FILE with no path on same line
1579
+ if (hasFakeHeaders) {
1580
+ console.log(` [${agentId}] ⚡ Turn ${turn}: agent writing planning headers without tool calls — showing correct format`);
1581
+ messages.push({ role: 'user', content: `You are writing "Running command..." or "WRITE_FILE" as planning text but not outputting actual tool calls.\n\nSTOP PLANNING. Execute now. First step: create the project directory:\n{"name":"bash","arguments":{"command":"mkdir -p ${projectsDir}/PROJECT_NAME && cd ${projectsDir}/PROJECT_NAME && /usr/local/bin/npm init -y && /usr/local/bin/npm install express"}}\n\nThen write files with ABSOLUTE paths:\nWRITE_FILE ${projectsDir}/PROJECT_NAME/server.js\n\`\`\`\nconst express = require('express');\nconst PORT = process.env.PORT || ${assignedPort};\n// complete file here\n\`\`\`\n\nOutput ONLY the bash JSON tool call right now. Nothing else.` });
1582
+ continue;
1583
+ }
1584
+
949
1585
  // Structural: truncated JSON — model started a tool call but stream ended early
950
1586
  const hasTruncatedJson = /\{"name"\s*:\s*"(bash|web_fetch|screenshot_and_describe|read_file|write_file|list_directory)"/i.test(streamContent) && Object.keys(streamToolCalls).length === 0;
951
1587
  if (hasTruncatedJson) {
952
- console.log(` [${agentId}] ⚡ Turn ${turn}: truncated JSON tool call — kicking to re-output`);
953
- messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
1588
+ consecutiveTruncations++;
1589
+ console.log(` [${agentId}] Turn ${turn}: truncated JSON tool call (${consecutiveTruncations}x) kicking to re-output`);
1590
+
1591
+ // WRITE_FILE called as JSON — model is trying {"name":"WRITE_FILE","path":"...","content":"..."} which
1592
+ // always truncates because file content doesn't fit in a JSON string. Redirect immediately, every time.
1593
+ const isJsonWriteFileCall = /\{"name"\s*:\s*"WRITE_FILE"\s*,\s*"(path|arguments)"/i.test(streamContent);
1594
+ if (isJsonWriteFileCall) {
1595
+ const pathMatch = streamContent.match(/"path"\s*:\s*"([^"]+)"/);
1596
+ const filePath = pathMatch ? pathMatch[1] : '/Users/hamp/Desktop/Projects/PROJECTNAME/filename.js';
1597
+ console.log(` [${agentId}] ⚡ Turn ${turn}: WRITE_FILE used as JSON tool call — correcting to fence format`);
1598
+ consecutiveTruncations = 0;
1599
+ // Permanently inject reminder into system message so it survives all context trims
1600
+ if (messages[0] && messages[0].role === 'system' && !messages[0].content.includes('NEVER use {"name":"WRITE_FILE"')) {
1601
+ messages[0] = { ...messages[0], content: messages[0].content + `\n\n⚠️ WRITE_FILE REMINDER (injected after format error): NEVER use {"name":"WRITE_FILE",...} JSON. ALWAYS use the code-fence format:\nWRITE_FILE /absolute/path/to/file\n\`\`\`\nfull file content\n\`\`\`` };
1602
+ }
1603
+ messages.push({ role: 'user', content: `WRITE_FILE is NOT a JSON tool. It uses a code-fence format — the ONLY correct way:\n\nWRITE_FILE ${filePath}\n\`\`\`\n...complete file content here...\n\`\`\`\n\nOutput ONLY the WRITE_FILE fence now. No JSON, no explanation.` });
1604
+ continue;
1605
+ }
1606
+ // Fix 8: bash command that contains WRITE_FILE — model is confusing WRITE_FILE fence with a shell command.
1607
+ const isBashWriteFile = /\{"name"\s*:\s*"bash"[\s\S]{0,300}WRITE_FILE\s+(\/\S+)/i.test(streamContent);
1608
+ // Fix 18: bash command embedding file content via node -e writeFileSync, echo, cat, etc.
1609
+ // These always truncate because the file content doesn't fit in max_tokens.
1610
+ // After 3+ consecutive truncations, escalate with a firm WRITE_FILE redirect.
1611
+ const isBashEmbedFile = /\{"name"\s*:\s*"bash"[\s\S]{0,200}(writeFileSync|echo|cat\s+<<|printf)[\s\S]{0,200}\.(css|html|js|ts|py|json|txt|md)/i.test(streamContent);
1612
+ const fileNameMatch = streamContent.match(/writeFileSync\s*\(\s*['"`]?([^'"`\s,)]+\.[a-z]{1,5})/i)
1613
+ || streamContent.match(/>\s*['"]?([A-Za-z0-9_.-]+\.(css|html|js|ts|py|json|txt|md))['"]?/i);
1614
+ const fname = fileNameMatch ? fileNameMatch[1] : 'server.js';
1615
+
1616
+ if (isBashWriteFile) {
1617
+ const pathMatch = streamContent.match(/WRITE_FILE\s+(\/[^\s\\'"]+)/);
1618
+ const filePath = pathMatch ? pathMatch[1] : '/path/to/file';
1619
+ console.log(` [${agentId}] ⚡ Turn ${turn}: bash+WRITE_FILE pattern — correcting format`);
1620
+ messages.push({ role: 'user', content: `WRITE_FILE is NOT a bash command. Use the WRITE_FILE fence format directly at the top level (outside any bash call):\n\nWRITE_FILE ${filePath}\n\`\`\`\n...complete file content here...\n\`\`\`\n\nOutput ONLY the WRITE_FILE fence now — do NOT wrap it in a bash tool call.` });
1621
+ } else if (isBashEmbedFile || consecutiveTruncations >= 3) {
1622
+ // Large file embedded in bash always truncates. Redirect to WRITE_FILE.
1623
+ const dirMatch = streamContent.match(/cd\s+([\w/~.-]+)/);
1624
+ const dir = dirMatch ? dirMatch[1] : '/absolute/path/to/dir';
1625
+ const truncCount = consecutiveTruncations;
1626
+ console.log(` [${agentId}] ⚡ Turn ${turn}: bash-embed-file pattern (${truncCount}x) — redirecting to WRITE_FILE`);
1627
+ consecutiveTruncations = 0; // reset after escalation
1628
+ // Check if the task already had a successful write_file — if so, remind agent the file exists
1629
+ const hadPriorWrite = toolsUsed.filter(t => t === 'write_file').length > 0;
1630
+ const priorWriteHint = hadPriorWrite
1631
+ ? `\n\nNOTE: You already wrote a file earlier in this task. Check if you still need to write more files, or if you should instead verify the existing file works.`
1632
+ : '';
1633
+ messages.push({ role: 'user', content: `STOP. Your bash command embeds file content as a string and will ALWAYS be truncated — it cannot work. You have tried this ${truncCount} times in a row.\n\nTo write a file, ALWAYS use WRITE_FILE which handles files of any size:\n\nWRITE_FILE ${dir}/${fname}\n\`\`\`\n...complete file content here...\n\`\`\`${priorWriteHint}` });
1634
+ } else {
1635
+ const isEchoFileWrite = /\{"name"\s*:\s*"bash"[\s\S]{0,300}(echo|cat\s+<<|printf)[\s\S]{0,200}>\s*\S+\.(css|html|js|ts|py|json|txt|md)/i.test(streamContent);
1636
+ if (isEchoFileWrite) {
1637
+ const fnMatch = streamContent.match(/>\s*['"]?(\S+\.(css|html|js|ts|py|json|txt|md))['"]?/i);
1638
+ const fn = fnMatch ? fnMatch[1] : 'the file';
1639
+ messages.push({ role: 'user', content: `Your bash echo/cat command is too large and will always be truncated. You MUST use WRITE_FILE instead — it handles any file size:\n\nWriting ${fn}...\nWRITE_FILE /absolute/path/to/${fn}\n\`\`\`\n...complete file content here...\n\`\`\`` });
1640
+ } else {
1641
+ messages.push({ role: 'user', content: 'Your tool call was cut off. Output the complete JSON on one line now.' });
1642
+ }
1643
+ }
954
1644
  continue;
955
1645
  }
1646
+ consecutiveTruncations = 0; // reset on any successful parse
956
1647
 
957
1648
  // Structural: empty response — model produced nothing
958
1649
  if (isEmpty) {
@@ -965,23 +1656,150 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
965
1656
  console.log(` [${agentId}] ⚠️ Turn ${turn}: empty after 3 retries`);
966
1657
  }
967
1658
 
1659
+ // Structural: model echoed tool result / non-JSON bracket text as plain output.
1660
+ // visibleContent=0 despite having raw content means inJsonBlob fired on a false-positive
1661
+ // (e.g. "[bash result]: ..." starts with "[") or model output was all inside <think>.
1662
+ // Either way: no tool calls, nothing visible, task not done — kick it to continue.
1663
+ if (!isEmpty && visibleContent.length === 0 && toolsUsedThisTurn === 0 && toolsUsed.length > 0) {
1664
+ console.log(` [${agentId}] ⚡ Turn ${turn}: raw output with 0 visible content and no tool calls — model echoed tool result or thought-only response, kicking to continue`);
1665
+ messages.push({ role: 'user', content: 'You echoed a result instead of making your next tool call. Keep going — call the next tool now.' });
1666
+ continue;
1667
+ }
1668
+
1669
+ // Repeated output detection — context overflow causes model to output same text repeatedly.
1670
+ // Normalize whitespace, strip "I will not"/"task is complete" boilerplate, then compare.
1671
+ // If we see the same output 2+ times in a row with no tool calls, hard-stop.
1672
+ if (hasContent) {
1673
+ const normalizedOutput = combined.trim().replace(/\s+/g, ' ').slice(0, 300);
1674
+ recentOutputs.push(normalizedOutput);
1675
+ if (recentOutputs.length > 4) recentOutputs.shift();
1676
+ // Check: last 3 outputs identical (context maxed — repeating same text)
1677
+ const last3Same = recentOutputs.length >= 3 &&
1678
+ recentOutputs[recentOutputs.length - 1] === recentOutputs[recentOutputs.length - 2] &&
1679
+ recentOutputs[recentOutputs.length - 2] === recentOutputs[recentOutputs.length - 3];
1680
+ if (last3Same) {
1681
+ console.log(` [${agentId}] 🛑 Repeated identical output detected — context likely maxed. Hard-stopping.`);
1682
+ finalContent = combined.trim();
1683
+ break;
1684
+ }
1685
+ }
1686
+
1687
+ // Structural: agent outputting code as chat text instead of writing files.
1688
+ // Detected by markdown code fences (```html/css/js) in visible output.
1689
+ // This happens when tool calls fail repeatedly and the agent falls back to showing code.
1690
+ // Redirect to WRITE_FILE — never accept code dumps as a substitute for file writes.
1691
+ if (hasContent && /```(html|css|js|javascript|typescript|python|json)/i.test(visibleContent)) {
1692
+ console.log(` [${agentId}] ⚡ Turn ${turn}: agent dumping code as chat text — redirecting to WRITE_FILE`);
1693
+ messages.push({ role: 'user', content: 'Do NOT show code in chat. You MUST write files to disk using WRITE_FILE:\n\nWriting filename.ext...\nWRITE_FILE /absolute/path/to/filename.ext\n```\n...complete file content here...\n```\n\nWrite every file now.' });
1694
+ continue;
1695
+ }
1696
+
968
1697
  // Structural: agent hasn't used any tools yet — it must act before it can answer
969
1698
  if (toolsUsed.length === 0 && hasContent) {
970
- console.log(` [${agentId}] Turn ${turn}: no tools used yet kicking to act`);
971
- messages.push({ role: 'user', content: 'Make your first tool call now.' });
1699
+ // Ask the LLM whether the text response actually satisfies the task.
1700
+ // If complete, stop. If not, kick into tool use.
1701
+ if (visibleContent.length > 50) {
1702
+ const originalTask = messages.find(m => m.role === 'user')?.content || task;
1703
+ const isDone = await this._isTaskComplete(originalTask, visibleContent, controller.signal);
1704
+ if (isDone) {
1705
+ console.log(` [${agentId}] ✅ Turn ${turn}: text-only response satisfies task — stopping`);
1706
+ if (visibleContent) finalContent = visibleContent;
1707
+ break;
1708
+ }
1709
+ }
1710
+ noToolKicks++;
1711
+ const refusalPhrases = ['i cannot', 'i am unable', 'i don\'t have', 'i do not have', 'not possible', 'impossible', 'cannot be done', 'outside my', 'beyond my', 'you must use', 'you should use', 'you will need to', 'recommend using'];
1712
+ const isRefusing = refusalPhrases.some(p => visibleContent.toLowerCase().includes(p));
1713
+ // Also catch "I can't do X but I'll build a prototype/concept instead" pivot patterns
1714
+ const isDowngrading = /\b(prototype|concept|simplified|scaled.down|limited version|mock.up|demo|without.*backend|without.*server|front.?end only|client.?side only)\b/i.test(visibleContent) && /\b(i (can'?t|cannot|am unable|won'?t|will not)|not possible|too complex|beyond|outside)\b/i.test(visibleContent);
1715
+ if (isRefusing || isDowngrading) {
1716
+ console.log(` [${agentId}] ⚡ Turn ${turn}: agent ${isDowngrading ? 'downgrading task' : 'refusing task'} — overriding`);
1717
+ messages.push({ role: 'user', content: 'Do NOT explain what you cannot do. Do NOT build a prototype or simplified version unless explicitly asked. Build the real thing. You have bash, Node.js, npm, and a full browser. Start executing now — first tool call only, no text.' });
1718
+ } else if (noToolKicks >= 3) {
1719
+ // Agent has been kicked 3+ times and still not calling tools — give an explicit example
1720
+ console.log(` [${agentId}] ⚡ Turn ${turn}: no tools after ${noToolKicks} kicks — showing exact format`);
1721
+ messages.push({ role: 'user', content: `STOP writing plans. You have been asked ${noToolKicks} times and have not called a single tool.\n\nHere is exactly what a tool call looks like — output ONLY this, right now:\n{"name":"bash","arguments":{"command":"ls ${workDir}"}}\n\nNothing before it. Nothing after it. No "Running command...", no explanation, no plan. Just that one line of JSON. DO IT NOW.` });
1722
+ } else {
1723
+ console.log(` [${agentId}] ⚡ Turn ${turn}: no tools used yet — kicking to act (${noToolKicks})`);
1724
+ messages.push({ role: 'user', content: 'Stop planning. Make your first tool call now. Output only the JSON, nothing else.' });
1725
+ }
972
1726
  continue;
973
1727
  }
1728
+ noToolKicks = 0; // reset when tools are actually used
974
1729
 
975
- // Semantic: ask the LLM whether the task is actually complete.
976
- // This replaces all regex-based intent detection the model judges its own output.
1730
+ // Mid-task refusal detection agent used some tools but then refused to continue.
1731
+ // e.g. "I cannot access X" after reading files. Override with capability reminder.
1732
+ // NEVER accept defeat — always push harder with alternative approaches.
977
1733
  if (hasContent && toolsUsed.length > 0) {
1734
+ const midRefusalPhrases = [
1735
+ 'i cannot', 'i am unable', 'i do not have', "i don't have", 'not possible', 'impossible',
1736
+ 'cannot be done', 'outside my capabilities', 'beyond my capabilities', 'you must use',
1737
+ 'you will need to', 'you should use',
1738
+ // Deployment give-up patterns
1739
+ 'all.*methods failed', 'methods have failed', 'deployment.*failed', 'failed.*deployment',
1740
+ 'cannot provide a', 'i must stop', 'have concluded', 'since all', 'every attempt',
1741
+ 'all automated', 'i have tried', 'methods failed sequentially',
1742
+ ];
1743
+ const isMidRefusing = midRefusalPhrases.some(p => {
1744
+ if (p.includes('.*')) return new RegExp(p, 'i').test(visibleContent);
1745
+ return visibleContent.toLowerCase().includes(p);
1746
+ });
1747
+ // Also catch mid-task downgrade: agent did work but is wrapping up with "I cannot fully
1748
+ // replicate X" or pivoting to a framework/stub instead of real implementation.
1749
+ const isMidDowngrading = /\b(prototype|concept|simplified|scaled.down|limited version|mock.up|demo|without.*backend|without.*server|front.?end only|client.?side only|framework only|basic framework|foundational framework)\b/i.test(visibleContent)
1750
+ || /\bi cannot fully (replicate|implement|build|create)/i.test(visibleContent)
1751
+ || /\bthe application is ready for you to interact\b/i.test(visibleContent);
1752
+ if (isMidRefusing || isMidDowngrading) {
1753
+ midRefusalKicks++;
1754
+ const _taskText = messages.find(m => m.role === 'user')?.content || task;
1755
+ const isDeployTask = /\b(deploy|railway|vercel|render|netlify|fly\.io|heroku|public url|live url|host|hosting)\b/i.test(_taskText);
1756
+ if (midRefusalKicks >= 3 && isDeployTask) {
1757
+ // Deployment specifically — try alternative deploy strategy, never give up
1758
+ console.log(` [${agentId}] ⚡ Turn ${turn}: deployment refusal ${midRefusalKicks}x — forcing alternative strategy`);
1759
+ messages.push({ role: 'user', content: `STOP GIVING UP. Deployment is NOT optional. You have bash and Railway CLI at ~/.npm-global/bin/railway.\n\nExact commands to deploy:\n1. cd to project dir\n2. ~/.npm-global/bin/railway up --detach\n3. If "No service" error: ~/.npm-global/bin/railway up --service PROJECT_NAME --detach\n4. Get URL: ~/.npm-global/bin/railway domain\n\nIf Railway fails entirely, fall back: cd project && nohup node server.js > /tmp/server.log 2>&1 & then use ngrok or use python3 -m http.server in public/ dir and report the local URL.\n\nExecute a bash tool call RIGHT NOW. Do not explain. Do not summarize. Run the command.` });
1760
+ } else if (midRefusalKicks >= 3) {
1761
+ // Genuinely stuck on non-deploy task — try one more different approach before giving up
1762
+ console.log(` [${agentId}] ⚡ Turn ${turn}: mid-task refusal ${midRefusalKicks}x — forcing different approach`);
1763
+ messages.push({ role: 'user', content: `You keep saying you cannot do this, but giving up is NOT acceptable. Try a completely different approach. You have bash, Node.js, npm, WRITE_FILE, and a full browser. What is the simplest possible working solution? Do it now — execute a tool call immediately.` });
1764
+ } else if (isMidDowngrading) {
1765
+ console.log(` [${agentId}] ⚡ Turn ${turn}: mid-task downgrade detected — overriding (${midRefusalKicks})`);
1766
+ messages.push({ role: 'user', content: `Do NOT deliver a framework, prototype, or stub. Build the real thing. You have bash, Node.js, npm, canvas, and a full browser. Keep going — implement it fully now.` });
1767
+ } else {
1768
+ console.log(` [${agentId}] ⚡ Turn ${turn}: mid-task refusal detected — overriding (${midRefusalKicks})`);
1769
+ messages.push({ role: 'user', content: `You have bash access and can run any shell command. Stop saying you cannot. Try a different approach. Execute a tool call now — no explanations.` });
1770
+ }
1771
+ continue;
1772
+ }
1773
+ }
1774
+ midRefusalKicks = 0; // reset when agent proceeds normally
1775
+
1776
+ // Semantic: ask the LLM whether the task is actually complete.
1777
+ // Fix 10: only fire when the CURRENT turn actually used tools (toolsUsedThisTurn > 0),
1778
+ // OR when many turns have passed (turn >= 5). Using the cumulative toolsUsed.length caused
1779
+ // premature kicks on mid-plan text outputs (agent says "I will click the buttons" → gets
1780
+ // kicked → abandons button-click plan and starts a Node server instead).
1781
+ if (hasContent && (toolsUsedThisTurn > 0 || turn >= 5) && visibleContent.length > 100) {
1782
+ if (successfulScreenshots >= 2) {
1783
+ console.log(` [${agentId}] ✅ Turn ${turn}: app confirmed twice by screenshots — accepting final output`);
1784
+ if (visibleContent) finalContent = visibleContent;
1785
+ break;
1786
+ }
978
1787
  const originalTask = messages.find(m => m.role === 'user')?.content || task;
979
1788
  const isDone = await this._isTaskComplete(originalTask, combined, controller.signal);
980
1789
  if (!isDone) {
981
- console.log(` [${agentId}] ⚡ Turn ${turn}: LLM says task incomplete — kicking`);
982
- messages.push({ role: 'user', content: 'You have not completed the task yet. Try a different approach and keep going.' });
1790
+ incompleteKicks++;
1791
+ console.log(` [${agentId}] Turn ${turn}: LLM says task incomplete kicking (${incompleteKicks}/3)`);
1792
+ // After 3 consecutive incomplete verdicts, the agent likely has the answer
1793
+ // but is adding self-doubt text. Force-complete to stop the spiral.
1794
+ if (incompleteKicks >= 3) {
1795
+ console.log(` [${agentId}] 🛑 3 incomplete verdicts — forcing completion with current output`);
1796
+ if (visibleContent) finalContent = visibleContent;
1797
+ break;
1798
+ }
1799
+ messages.push({ role: 'user', content: 'The task is not complete yet. Continue making progress.' });
983
1800
  continue;
984
1801
  }
1802
+ incompleteKicks = 0; // reset on success
985
1803
  console.log(` [${agentId}] ✅ Turn ${turn}: LLM confirmed task complete`);
986
1804
  }
987
1805
  }
@@ -993,6 +1811,19 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
993
1811
  }
994
1812
 
995
1813
  if (!finalContent && allOutput) finalContent = allOutput;
1814
+ // Final safety strip — remove any <think> blocks that leaked through the per-token filter
1815
+ if (finalContent) finalContent = finalContent.replace(/<think>[\s\S]*?<\/think>/g, '').replace(/<\/?think>/g, '').trim();
1816
+
1817
+ // Quality gate: reject finalContent that is just a tool header with no real text.
1818
+ // e.g. "WRITE_FILE" or "{\"name\":" — these are tool invocations, not agent replies.
1819
+ if (finalContent) {
1820
+ const fc = finalContent.trim();
1821
+ const isToolHeader = /^WRITE_FILE\b|^READ_FILE\b|^\{"name":|^{"name":/.test(fc) || fc.length < 15;
1822
+ if (isToolHeader) {
1823
+ console.log(` [${agentId}] ⚠️ finalContent looks like a tool header ("${fc.slice(0, 40)}") — requesting summary`);
1824
+ finalContent = '';
1825
+ }
1826
+ }
996
1827
 
997
1828
  // If still no output (model did only tool calls, never wrote text), ask for a summary.
998
1829
  // Use only the last 6 messages to avoid context overflow after many tool-call turns.
@@ -1067,6 +1898,8 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1067
1898
 
1068
1899
  const duration = Date.now() - startTime;
1069
1900
  this.activeAgents.delete(agentId);
1901
+ this._taskVisionModel = null;
1902
+ this._taskProviderKeys = null;
1070
1903
 
1071
1904
  this.emit('agent_completed', {
1072
1905
  agentId,
@@ -1076,10 +1909,13 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1076
1909
  });
1077
1910
 
1078
1911
  console.log(`\n✅ [Ollama] Agent ${agentId} completed in ${(duration / 1000).toFixed(2)}s\n`);
1912
+ releaseAgentTab(agentId);
1079
1913
  return { success: true, agentId, duration, result: { output: finalContent } };
1080
1914
 
1081
1915
  } catch (err) {
1082
1916
  this.activeAgents.delete(agentId);
1917
+ this._taskVisionModel = null;
1918
+ this._taskProviderKeys = null;
1083
1919
 
1084
1920
  if (err.name === 'AbortError' || controller.signal.aborted) {
1085
1921
  this.emit('agent_cancelled', { agentId });
@@ -1114,6 +1950,26 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1114
1950
  return Array.from(this.activeAgents.values());
1115
1951
  }
1116
1952
 
1953
+ // ─── Open URL in AgentForge browser ──────────────────────────────────────
1954
+ // Single abstraction for navigating the user-facing browser.
1955
+ // Uses browserAction (puppeteer-core) — never raw CDP WebSocket directly.
1956
+ // Called by: bash 'open' intercept, curl 200 auto-launch.
1957
+ async _openInBrowser(url, agentId = 'agent') {
1958
+ try {
1959
+ await browserAction({ action: 'navigate', url }, agentId);
1960
+ await new Promise(r => setTimeout(r, 1500)); // let page render
1961
+ const shot = await browserAction({ action: 'screenshot' }, agentId);
1962
+ if (shot && shot.__screenshot) {
1963
+ this.emit('agent_image', { agentId, image: `data:image/png;base64,${shot.base64}` });
1964
+ }
1965
+ console.log(` [${agentId}] 🌐 Opened ${url} in AgentForge browser`);
1966
+ return { opened: true };
1967
+ } catch (err) {
1968
+ console.log(` [${agentId}] ⚠️ _openInBrowser(${url}): ${err.message}`);
1969
+ return { opened: false };
1970
+ }
1971
+ }
1972
+
1117
1973
  // ─── Tool execution ───────────────────────────────────────────────────────
1118
1974
 
1119
1975
  async _executeTool(name, args, workDir, agentId = 'agent') {
@@ -1144,40 +2000,32 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1144
2000
  }
1145
2001
  }
1146
2002
 
1147
- // Intercept "open http://..." navigate the AgentForge CDP browser directly,
1148
- // then auto-screenshot so the agent immediately sees what it built.
2003
+ // Intercept bash calls where command is exactly a tool name — model confused tool names
2004
+ // with CLI commands. e.g. {"name":"bash","arguments":{"command":"screenshot_and_describe"}}
2005
+ // Most common pattern: screenshot_and_describe / web_fetch called inside bash.
2006
+ const cmdTrimmed = args.command.trim().replace(/\s+.*$/, ''); // first word only
2007
+ if (cmdTrimmed === 'screenshot_and_describe') {
2008
+ console.log(` [${agentId}] 🔀 bash("screenshot_and_describe") → redirecting to screenshot_and_describe tool`);
2009
+ const urlMatch = args.command.match(/https?:\/\/\S+/);
2010
+ const result = await this._screenshotAndDescribe(urlMatch ? urlMatch[0] : null, null, agentId);
2011
+ if (this._lastScreenshotData) { this.emit('agent_image', { agentId, image: this._lastScreenshotData }); this._lastScreenshotData = null; }
2012
+ return result;
2013
+ }
2014
+
2015
+ // Intercept "open http://..." — navigate the AgentForge browser via _openInBrowser,
2016
+ // then get an AI description so the agent can reason about what it built.
1149
2017
  const openUrlMatch = args.command.trim().match(/^open\s+(https?:\/\/\S+)/);
1150
2018
  if (openUrlMatch) {
1151
2019
  const targetUrl = openUrlMatch[1];
1152
- let openedViaCDP = false;
1153
- try {
1154
- const newTabRes = await fetch('http://127.0.0.1:9223/json/new', { method: 'PUT', signal: AbortSignal.timeout(3000) });
1155
- const newTabData = await newTabRes.json();
1156
- const tabWs = new WebSocket(`ws://127.0.0.1:9223/devtools/page/${newTabData.id}`);
1157
- await new Promise(r => tabWs.on('open', r));
1158
- await new Promise(r => {
1159
- let navigated = false;
1160
- tabWs.send(JSON.stringify({ id: 1, method: 'Page.navigate', params: { url: targetUrl } }));
1161
- tabWs.on('message', () => { if (!navigated) { navigated = true; tabWs.close(); r(); } });
1162
- setTimeout(() => { tabWs.close(); r(); }, 3000);
1163
- });
1164
- openedViaCDP = true;
1165
- } catch {
1166
- // CDP unavailable — fall through to OS open
1167
- try { await execAsync(`open "${targetUrl}"`); } catch {}
1168
- }
1169
- // Auto-screenshot after opening so the agent sees what it built.
1170
- // Wait for page to load, then call screenshot_and_describe.
1171
- await new Promise(r => setTimeout(r, 2500));
2020
+ const { opened } = await this._openInBrowser(targetUrl, agentId);
2021
+ // Get AI description for agent context (screenshot already emitted by _openInBrowser)
2022
+ await new Promise(r => setTimeout(r, 800));
1172
2023
  try {
1173
- const screenshotResult = await this._executeTool('screenshot_and_describe', {
1174
- url: targetUrl,
1175
- check_for: 'the running application',
1176
- send_to_user: true
1177
- }, workDir, agentId);
1178
- return `Opened ${targetUrl} in browser${openedViaCDP ? ' (AgentForge browser)' : ''}.\n\nVisual snapshot of what is currently visible:\n${screenshotResult}`;
2024
+ const desc = await this._screenshotAndDescribe(targetUrl, 'the running application', agentId);
2025
+ this._lastScreenshotData = null; // suppress duplicate emit — raw already sent above
2026
+ return `Opened ${targetUrl}${opened ? ' in AgentForge browser' : ''}.\n\nWhat is currently visible:\n${desc}`;
1179
2027
  } catch {
1180
- return `Opened ${targetUrl} in browser. (Screenshot failed verify with screenshot_and_describe)`;
2028
+ return `Opened ${targetUrl}${opened ? ' in AgentForge browser' : ''}.`;
1181
2029
  }
1182
2030
  }
1183
2031
 
@@ -1186,18 +2034,39 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1186
2034
  let bashCwd = workDir;
1187
2035
  const _home = process.env.HOME || '/tmp';
1188
2036
  try { if (!existsSync(bashCwd)) bashCwd = _home; } catch { bashCwd = _home; }
1189
- // Background commands (ending with &) return no stdout the model interprets
1190
- // silence as failure and loops. Run them, then read back any log file to confirm.
2037
+ // Inject a PATH that includes the directories needed to find node/npm/python3
2038
+ // regardless of how the worker was started (nohup/launchd strip the user PATH).
2039
+ // process.execPath is the node binary running this worker — its directory always
2040
+ // contains npm too, and is correct on any machine/version/install method.
2041
+ const bashEnv = {
2042
+ ...process.env,
2043
+ PATH: [
2044
+ path.dirname(process.execPath), // node + npm, always matches running version
2045
+ '/usr/local/bin', // homebrew, system tools
2046
+ '/usr/local/sbin',
2047
+ process.env.HOME ? `${process.env.HOME}/.npm-global/bin` : '',
2048
+ process.env.PATH || '',
2049
+ ].filter(Boolean).join(':'),
2050
+ };
2051
+ // Background commands (ending with &): use spawn with detached+stdio:ignore so the
2052
+ // child process is fully detached from our pipe FDs and returns immediately.
2053
+ // Using execAsync here hangs for the full 120s timeout because the background process
2054
+ // inherits the exec pipe and keeps it open as long as the server runs.
1191
2055
  const isBackground = /&\s*$/.test(args.command.trim());
1192
- const { stdout, stderr } = await execAsync(args.command, {
1193
- cwd: bashCwd,
1194
- timeout: 120000,
1195
- maxBuffer: 1024 * 1024 * 2 // 2MB
1196
- });
1197
- const out = (stdout + stderr).trim();
1198
- if (isBackground && !out) {
1199
- // Give the process a moment to start, then check /tmp/server.log if it exists
1200
- await new Promise(r => setTimeout(r, 1500));
2056
+ if (isBackground) {
2057
+ // Strip trailing & — spawn will run detached
2058
+ const cmd = args.command.replace(/&\s*$/, '').trim();
2059
+ await new Promise((resolve) => {
2060
+ const child = spawn('/bin/sh', ['-c', cmd], {
2061
+ cwd: bashCwd,
2062
+ env: bashEnv,
2063
+ detached: true,
2064
+ stdio: 'ignore',
2065
+ });
2066
+ child.unref();
2067
+ // Give the process a moment to start up, then read back any log file
2068
+ setTimeout(resolve, 1500);
2069
+ });
1201
2070
  let confirmation = 'Background process started.';
1202
2071
  try {
1203
2072
  const logContent = readFileSync('/tmp/server.log', 'utf-8').trim().split('\n').slice(-3).join('\n');
@@ -1205,12 +2074,33 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1205
2074
  } catch { /* no log yet */ }
1206
2075
  return confirmation;
1207
2076
  }
1208
- return out || '(no output)';
2077
+ const { stdout, stderr } = await execAsync(args.command, {
2078
+ cwd: bashCwd,
2079
+ timeout: 120000,
2080
+ maxBuffer: 1024 * 1024 * 2, // 2MB
2081
+ env: bashEnv,
2082
+ });
2083
+ const rawOut = (stdout + stderr).trim() || '(no output)';
2084
+ // Truncate large outputs to prevent context flooding (e.g. npm install, large file cats)
2085
+ const MAX_BASH_OUTPUT = 3000;
2086
+ if (rawOut.length > MAX_BASH_OUTPUT) {
2087
+ const head = rawOut.slice(0, 500);
2088
+ const tail = rawOut.slice(-2000);
2089
+ return `${head}\n...(${rawOut.length - 2500} chars omitted)...\n${tail}`;
2090
+ }
2091
+ return rawOut;
1209
2092
  }
1210
2093
 
1211
2094
  case 'read_file': {
1212
2095
  const fp = this._resolvePath(args.path, workDir);
1213
- return readFileSync(fp, 'utf-8');
2096
+ const fileContent = readFileSync(fp, 'utf-8');
2097
+ const MAX_READ_OUTPUT = 8000;
2098
+ if (fileContent.length > MAX_READ_OUTPUT) {
2099
+ const head = fileContent.slice(0, 3000);
2100
+ const tail = fileContent.slice(-3000);
2101
+ return `${head}\n...(${fileContent.length - 6000} chars omitted — file is ${fileContent.length} chars total)...\n${tail}`;
2102
+ }
2103
+ return fileContent;
1214
2104
  }
1215
2105
 
1216
2106
  case 'write_file': {
@@ -1242,7 +2132,7 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1242
2132
 
1243
2133
  if (target === 'browser') {
1244
2134
  // Navigate + screenshot via CDP on agent browser (port 9223)
1245
- return await this._cdpScreenshot(args.url, tmpFile);
2135
+ return await this._cdpScreenshot(args.url, tmpFile, agentId);
1246
2136
  } else {
1247
2137
  // Full screen capture
1248
2138
  await execAsync(`screencapture -x "${tmpFile}"`);
@@ -1253,7 +2143,7 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1253
2143
  }
1254
2144
 
1255
2145
  case 'screenshot_and_describe': {
1256
- const result = await this._screenshotAndDescribe(args.url, args.check_for);
2146
+ const result = await this._screenshotAndDescribe(args.url, args.check_for, agentId);
1257
2147
  // Always send screenshot to user — agent called this tool, user should always see it
1258
2148
  if (this._lastScreenshotData) {
1259
2149
  this.emit('agent_image', { agentId, image: this._lastScreenshotData });
@@ -1263,7 +2153,22 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1263
2153
  }
1264
2154
 
1265
2155
  case 'browser': {
1266
- const result = await browserAction(args);
2156
+ // Intercept browser→screenshot_and_describe misuse — agent confused the browser action
2157
+ // namespace with the standalone tool name. Redirect to the real vision handler so the
2158
+ // agent gets back a text description it can reason about, not just "Image sent to chat."
2159
+ if (args.action === 'screenshot_and_describe' || args.action === 'describe') {
2160
+ const result = await this._screenshotAndDescribe(args.url || null, args.check_for || null, agentId);
2161
+ if (this._lastScreenshotData) {
2162
+ this.emit('agent_image', { agentId, image: this._lastScreenshotData });
2163
+ this._lastScreenshotData = null;
2164
+ }
2165
+ return result;
2166
+ }
2167
+ const t0 = Date.now();
2168
+ const result = await browserAction(args, agentId);
2169
+ const elapsed = Date.now() - t0;
2170
+ const resultPreview = typeof result === 'string' ? result.slice(0, 200) : (result?.__screenshot ? `[screenshot ${Math.round((result.base64?.length||0)*0.75/1024)}KB]` : JSON.stringify(result).slice(0,200));
2171
+ console.log(` [${agentId}] 🌐 browser(${args.action}) → ${elapsed}ms → ${resultPreview.replace(/\n/g,' ')}`);
1267
2172
  if (result && result.__screenshot) {
1268
2173
  const imgData = `data:image/png;base64,${result.base64}`;
1269
2174
  this.emit('agent_image', { agentId, image: imgData });
@@ -1281,81 +2186,51 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1281
2186
  }
1282
2187
 
1283
2188
  // ─── CDP browser screenshot ───────────────────────────────────────────────
2189
+ // Uses the persistent browserAction connection (puppeteer-core) — never raw CDP WebSocket.
2190
+ // This reuses the existing connection to port 9223 with ad blocking already active.
1284
2191
 
1285
- async _cdpScreenshot(navigateUrl, tmpFile) {
1286
- const CDP_PORT = 9223;
1287
-
1288
- // Always create a NEW tab — never hijack the dashboard or other existing tabs
1289
- const newTabRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
1290
- const newTabData = await newTabRes.json();
1291
- const tabId = newTabData.id;
1292
-
1293
- return new Promise((resolve, reject) => {
1294
- const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
1295
- let msgId = 1;
1296
- const pending = new Map();
1297
-
1298
- const send = (method, params = {}) => new Promise((res, rej) => {
1299
- const id = msgId++;
1300
- pending.set(id, { resolve: res, reject: rej });
1301
- ws.send(JSON.stringify({ id, method, params }));
1302
- });
1303
-
1304
- ws.addEventListener('message', (evt) => {
1305
- const msg = JSON.parse(evt.data);
1306
- if (msg.id && pending.has(msg.id)) {
1307
- const { resolve: res, reject: rej } = pending.get(msg.id);
1308
- pending.delete(msg.id);
1309
- if (msg.error) rej(new Error(msg.error.message));
1310
- else res(msg.result);
1311
- }
1312
- });
1313
-
1314
- ws.addEventListener('open', async () => {
1315
- try {
1316
- if (navigateUrl) {
1317
- await send('Page.navigate', { url: navigateUrl });
1318
- // Wait for page to fully render
1319
- await new Promise(r => setTimeout(r, 3000));
1320
- }
1321
- const { data } = await send('Page.captureScreenshot', { format: 'png' });
1322
- // Close the temporary tab
1323
- await send('Target.closeTarget', { targetId: tabId }).catch(() => {});
1324
- ws.close();
1325
- resolve(`data:image/png;base64,${data}`);
1326
- } catch (err) {
1327
- ws.close();
1328
- reject(err);
1329
- }
1330
- });
1331
-
1332
- ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
1333
- setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 25000);
1334
- });
2192
+ async _cdpScreenshot(navigateUrl, _tmpFile, agentId = 'agent') {
2193
+ if (navigateUrl) {
2194
+ await browserAction({ action: 'navigate', url: navigateUrl }, agentId);
2195
+ } else {
2196
+ // No navigation page may be mid-render (e.g., after press:Enter form submit or JS SPA update)
2197
+ // Wait for JS to finish rendering before snapping
2198
+ await new Promise(r => setTimeout(r, 1500));
2199
+ }
2200
+ const result = await browserAction({ action: 'screenshot' }, agentId);
2201
+ if (result && result.__screenshot) {
2202
+ return `data:image/png;base64,${result.base64}`;
2203
+ }
2204
+ throw new Error('Screenshot returned no image data');
1335
2205
  }
1336
2206
 
1337
2207
  // ─── Screenshot + vision analysis ─────────────────────────────────────────
1338
2208
  // Takes a screenshot of a URL, then asks the active vision model to describe it.
1339
2209
  // Returns a plain-text description the main agent can reason about.
1340
2210
 
1341
- async _screenshotAndDescribe(url, checkFor) {
2211
+ async _screenshotAndDescribe(url, checkFor, agentId = 'agent') {
1342
2212
  const question = checkFor
1343
- ? `Does this web page look like it's working? Specifically check: ${checkFor}. Describe precisely what you see — the background color, any canvas element, colored shapes (even tiny dots), text, buttons, game elements, or error messages. Is the background dark or white? Are there any colored pixels at all?`
1344
- : `Describe what you see on this web page. What is the background color? Are there any colored shapes, text, buttons, or UI elements? Is there a canvas? Even tiny colored dots count be precise about what you see.`;
2213
+ ? `Look at this web page and specifically find: ${checkFor}. List exactly what you see — exact text, numbers, titles, labels, counts. CRITICAL: Preserve ALL spaces between words exactly as they appear never merge adjacent words or labels together without a space between them. Also note the background color, any canvas element, or visual errors.`
2214
+ : `Describe this web page in full. List ALL visible text content: headlines, titles, labels, numbers, post titles, scores, counts — copy them exactly as shown. CRITICAL: Preserve ALL spaces between words — never concatenate adjacent text elements without a space. If two pieces of text appear next to each other (e.g. a label like "Posted" next to a value like "22 hr. ago"), always write them with a space between them. Then describe the visual layout: background color, UI elements, canvas, any errors.`;
1345
2215
 
1346
- // === Server reachability check — fast fail if server is down ===
1347
- try {
1348
- await fetch(url, { signal: AbortSignal.timeout(4000) });
1349
- } catch (reachErr) {
1350
- const portMatch = url.match(/:(\d+)/);
1351
- const port = portMatch ? portMatch[1] : '?';
1352
- return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
2216
+ // === Server reachability check — only for local dev servers ===
2217
+ // Skipped when url is null (current browser tab) or a public site.
2218
+ const isLocalUrl = url && (url.includes('localhost') || url.includes('127.0.0.1') || url.match(/:\d{4,5}/));
2219
+ if (isLocalUrl) {
2220
+ try {
2221
+ await fetch(url, { signal: AbortSignal.timeout(4000) });
2222
+ } catch (reachErr) {
2223
+ const portMatch = url.match(/:(\d+)/);
2224
+ const port = portMatch ? portMatch[1] : '?';
2225
+ return `SERVER IS NOT REACHABLE at ${url} (${reachErr.message}). The server on port ${port} is not running or crashed. You must restart it using bash before taking a screenshot:\n{"name":"bash","arguments":{"command":"pkill -f 'node.*${port}' 2>/dev/null; sleep 1; cd YOUR_PROJECT_DIR && nohup node server.js > /tmp/server.log 2>&1 & sleep 2 && echo started"}}\nCheck /tmp/server.log for errors if it still fails.`;
2226
+ }
1353
2227
  }
1354
2228
 
1355
- // === HTML dependency audit (always runs fast, reliable) ===
2229
+ // === HTML dependency audit only for local dev servers ===
1356
2230
  // Fetches the page HTML and checks for common missing client-side dependencies.
1357
- // This catches issues that screenshots can't detect (JS errors, missing script tags).
2231
+ // Skipped for external sites (useless) and null url (current tab).
1358
2232
  let auditNotes = '';
2233
+ if (isLocalUrl) {
1359
2234
  try {
1360
2235
  const htmlRes = await fetch(url, { signal: AbortSignal.timeout(8000) });
1361
2236
  const html = await htmlRes.text();
@@ -1375,46 +2250,31 @@ B16. TEST LIKE A USER: Scroll, click buttons, simulate actions, check different
1375
2250
  auditNotes = `\n\nHTML DEPENDENCY AUDIT FOUND ISSUES:\n${missing.map(m => '- ' + m).join('\n')}`;
1376
2251
  }
1377
2252
  } catch {}
2253
+ } // end isLocalUrl audit block
2254
+
2255
+ // === DOM snapshot (when no URL — current page, or file:// URL) ===
2256
+ // Captures all page text regardless of scroll position. Appended alongside the vision
2257
+ // result so the agent always gets DOM content even when results are below the fold.
2258
+ // Also runs for file:// URLs: vision models sometimes misidentify form inputs or static
2259
+ // elements — the DOM snapshot provides ground-truth element types and values alongside
2260
+ // the visual description so the agent can cross-reference and avoid false rewrites.
2261
+ let domSnapshot = '';
2262
+ if (!url || url.startsWith('file://')) {
2263
+ try {
2264
+ const snap = await browserAction({ action: 'snapshot' }, agentId);
2265
+ if (typeof snap === 'string' && snap.length > 200) {
2266
+ domSnapshot = `\n\n--- DOM snapshot (actual element types and values — use this to verify what is really on the page, not just what it looks like) ---\n${snap}`;
2267
+ }
2268
+ } catch {}
2269
+ }
1378
2270
 
1379
2271
  let imageData;
1380
- const tmpFile = `/tmp/af_verify_${Date.now()}.png`;
1381
2272
 
1382
- // Try AgentForge browser via CDP first
2273
+ // Use the AgentForge browser via browserAction (persistent puppeteer connection, ad blocking active)
1383
2274
  try {
1384
- imageData = await this._cdpScreenshot(url, null);
1385
- } catch (cdpErr) {
1386
- // CDP not available try puppeteer headless screenshot
1387
- try {
1388
- const puppeteerModule = process.env.HOME + '/.npm-global/lib/node_modules/puppeteer';
1389
- const scriptFile = `/tmp/af_pup_${Date.now()}.js`;
1390
- const nodeScript = `
1391
- const puppeteer = require(${JSON.stringify(puppeteerModule)});
1392
- (async () => {
1393
- const browser = await puppeteer.launch({headless: true, protocolTimeout: 30000, args: ['--no-sandbox','--disable-setuid-sandbox','--disable-gpu','--disable-dev-shm-usage']});
1394
- const page = await browser.newPage();
1395
- await page.setDefaultNavigationTimeout(12000);
1396
- await page.setViewport({width: 1280, height: 900});
1397
- try {
1398
- await page.goto(${JSON.stringify(url)}, {waitUntil: 'domcontentloaded', timeout: 12000}).catch(()=>{});
1399
- await new Promise(r => setTimeout(r, 2500));
1400
- await page.screenshot({path: ${JSON.stringify(tmpFile)}, fullPage: true});
1401
- console.log('puppeteer screenshot ok');
1402
- } finally {
1403
- await browser.close();
1404
- }
1405
- })().then(() => process.exit(0)).catch(e => { console.error(e.message); process.exit(1); });
1406
- `;
1407
- writeFileSync(scriptFile, nodeScript);
1408
- await execAsync(`/usr/local/bin/node "${scriptFile}"`, { timeout: 45000 });
1409
- await execAsync(`rm -f "${scriptFile}"`).catch(() => {});
1410
- const raw = readFileSync(tmpFile).toString('base64');
1411
- await execAsync(`rm -f "${tmpFile}"`).catch(() => {});
1412
- imageData = `data:image/png;base64,${raw}`;
1413
- } catch (pupErr) {
1414
- console.warn(` [screenshot_and_describe] puppeteer failed: ${pupErr.message}`);
1415
- // No screenshot possible — return audit notes only
1416
- return `Cannot take screenshot (CDP: ${cdpErr.message}, puppeteer: ${pupErr.message}). ${auditNotes || 'No dependency issues found in HTML. Check server logs for errors.'}`;
1417
- }
2275
+ imageData = await this._cdpScreenshot(url, null, agentId);
2276
+ } catch (err) {
2277
+ return `Cannot take screenshot: ${err.message}. Is the AgentForge Browser running?${auditNotes}${domSnapshot}`;
1418
2278
  }
1419
2279
 
1420
2280
  // Store imageData so caller can emit to user if send_to_user=true
@@ -1422,39 +2282,95 @@ const puppeteer = require(${JSON.stringify(puppeteerModule)});
1422
2282
 
1423
2283
  const base64 = imageData.replace(/^data:image\/\w+;base64,/, '');
1424
2284
 
1425
- // Use the active model for vision analysis.
1426
- try {
1427
- // /api/chat with images array — supported by all Ollama vision-capable models
1428
- const res = await fetch(`${this.baseUrl}/api/chat`, {
1429
- method: 'POST',
1430
- headers: { 'Content-Type': 'application/json' },
1431
- body: JSON.stringify({
1432
- model: this.model,
1433
- messages: [{ role: 'user', content: question, images: [base64] }],
1434
- stream: false,
1435
- options: { num_ctx: 4096 }
1436
- }),
1437
- signal: AbortSignal.timeout(120000)
1438
- });
2285
+ // Resolve vision backend: use task-level vision model if configured (from modelflow),
2286
+ // otherwise fall back to the agent's primary Ollama model.
2287
+ const taskVisionModel = this._taskVisionModel;
2288
+ const taskGeminiKey = this._taskProviderKeys?.google || null;
2289
+ const isGemini = taskVisionModel && (taskVisionModel.startsWith('google/') || taskVisionModel.startsWith('gemini-'));
2290
+
2291
+ if (isGemini && taskGeminiKey) {
2292
+ // ── Gemini vision via Google AI REST API ──────────────────────────────
2293
+ // Model ID from flow is like "google/gemini-2.5-flash" → strip "google/" prefix
2294
+ const geminiModel = taskVisionModel.startsWith('google/') ? taskVisionModel.slice(7) : taskVisionModel;
2295
+ console.log(` [screenshot_and_describe] Using Gemini vision: ${geminiModel}`);
2296
+ try {
2297
+ const geminiUrl = `https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${taskGeminiKey}`;
2298
+ const res = await fetch(geminiUrl, {
2299
+ method: 'POST',
2300
+ headers: { 'Content-Type': 'application/json' },
2301
+ body: JSON.stringify({
2302
+ contents: [{
2303
+ parts: [
2304
+ { text: question },
2305
+ { inline_data: { mime_type: 'image/png', data: base64 } }
2306
+ ]
2307
+ }],
2308
+ generationConfig: { maxOutputTokens: 1024 }
2309
+ }),
2310
+ signal: AbortSignal.timeout(30000)
2311
+ });
2312
+ if (res.ok) {
2313
+ const json = await res.json();
2314
+ const description = json.candidates?.[0]?.content?.parts?.[0]?.text || '';
2315
+ const clean = description.trim();
2316
+ if (clean) {
2317
+ console.log(` [screenshot_and_describe] Gemini: ${clean.slice(0, 200)}`);
2318
+ return `Screenshot analysis of ${url || 'current page'}:\n${clean}${auditNotes}${domSnapshot}`;
2319
+ }
2320
+ } else {
2321
+ const errText = await res.text().catch(() => '');
2322
+ console.warn(` [screenshot_and_describe] Gemini error ${res.status}: ${errText.slice(0, 200)}`);
2323
+ }
2324
+ } catch (err) {
2325
+ console.warn(` [screenshot_and_describe] Gemini vision call failed: ${err.message}`);
2326
+ }
2327
+ } else {
2328
+ // ── Ollama vision (default) ───────────────────────────────────────────
2329
+ try {
2330
+ const res = await fetch(`${this.baseUrl}/api/chat`, {
2331
+ method: 'POST',
2332
+ headers: { 'Content-Type': 'application/json' },
2333
+ body: JSON.stringify({
2334
+ model: this.model,
2335
+ messages: [{ role: 'user', content: question, images: [base64] }],
2336
+ stream: false,
2337
+ options: { num_ctx: 4096 }
2338
+ }),
2339
+ signal: AbortSignal.timeout(120000)
2340
+ });
1439
2341
 
1440
- if (res.ok) {
1441
- const json = await res.json();
1442
- const description = json.message?.content || json.response || '';
1443
- const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
1444
- if (clean) {
1445
- console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
1446
- return `Screenshot analysis of ${url}:\n${clean}${auditNotes}`;
2342
+ if (res.ok) {
2343
+ const json = await res.json();
2344
+ const description = json.message?.content || json.response || '';
2345
+ const clean = description.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
2346
+ if (clean) {
2347
+ console.log(` [screenshot_and_describe] ${clean.slice(0, 200)}`);
2348
+ return `Screenshot analysis of ${url || 'current page'}:\n${clean}${auditNotes}${domSnapshot}`;
2349
+ }
1447
2350
  }
2351
+ } catch (err) {
2352
+ console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
1448
2353
  }
1449
- } catch (err) {
1450
- console.warn(` [screenshot_and_describe] vision call failed: ${err.message}`);
1451
2354
  }
1452
2355
 
1453
- return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}`;
2356
+ return `Screenshot captured but description unavailable. The app is visible at ${url} — use read_file to check the code and make targeted improvements.${auditNotes}${domSnapshot}`;
1454
2357
  }
1455
2358
 
1456
2359
  _resolvePath(p, workDir) {
1457
- return path.isAbsolute(p) ? p : path.join(workDir, p);
2360
+ // Expand ~ to home directory before any other resolution.
2361
+ // path.isAbsolute('~/foo') === false, so without this the path would be
2362
+ // joined with workDir and land in /tmp/agentforge/agents/{id}/~/foo (wrong).
2363
+ if (p.startsWith('~/') || p === '~') {
2364
+ p = p.replace(/^~/, homedir());
2365
+ }
2366
+ if (!path.isAbsolute(p)) return path.join(workDir, p);
2367
+ // Reject paths directly under / (e.g. /index.html, /style.css) — those are filesystem root
2368
+ // and always read-only. Redirect to workDir so the file lands somewhere writable.
2369
+ if (path.dirname(p) === '/') {
2370
+ console.log(` [worker] ⚠️ Path "${p}" is at filesystem root — redirecting to ${workDir}`);
2371
+ return path.join(workDir, path.basename(p));
2372
+ }
2373
+ return p;
1458
2374
  }
1459
2375
 
1460
2376
  _toolDesc(name, args) {
@@ -1472,6 +2388,22 @@ const puppeteer = require(${JSON.stringify(puppeteerModule)});
1472
2388
  }
1473
2389
  case 'take_screenshot':
1474
2390
  return `Screenshot: ${args.url || args.target}`;
2391
+ case 'browser': {
2392
+ const action = args.action || 'browser';
2393
+ if (action === 'navigate' || action === 'open') {
2394
+ try { return `browser → ${new URL(args.url).hostname}`; } catch { return `browser → navigate`; }
2395
+ }
2396
+ if (action === 'snapshot') return 'browser → snapshot page';
2397
+ if (action === 'screenshot') return 'browser → screenshot';
2398
+ if (action === 'click') return `browser → click "${(args.text || args.selector || '').toString().slice(0, 40)}"`;
2399
+ if (action === 'type') return `browser → type into ${(args.selector || 'input').toString().slice(0, 40)}`;
2400
+ if (action === 'tabs') return 'browser → list tabs';
2401
+ if (action === 'evaluate') return 'browser → run JS';
2402
+ if (action === 'scroll') return 'browser → scroll';
2403
+ if (action === 'find_elements') return 'browser → find elements';
2404
+ if (action === 'get_bookmarks') return 'browser → get bookmarks';
2405
+ return `browser → ${action}`;
2406
+ }
1475
2407
  default:
1476
2408
  return name;
1477
2409
  }
@@ -1522,7 +2454,7 @@ const puppeteer = require(${JSON.stringify(puppeteerModule)});
1522
2454
  model: this.model,
1523
2455
  messages: [
1524
2456
  { role: 'system', content: 'You determine if a task is complete. Reply with only "yes" or "no".' },
1525
- { role: 'user', content: `Task: ${task.slice(0, 300)}\n\nAgent output: ${output.slice(0, 600)}\n\nDid the agent fully complete the task with real results (not excuses, not plans, not partial attempts)?` }
2457
+ { role: 'user', content: `Task: ${task.slice(0, 400)}\n\nAgent output (last part):\n${output.slice(-800)}\n\nDid the agent complete ALL requirements of the task? Judge based on evidence of completed actions (files written, commands run, results returned) — NOT based on the agent's own statements about what it can or cannot do. Agent self-assessments and disclaimers are unreliable.\n- For build/server tasks: code must be written AND server must be running locally. Do NOT require cloud deployment (Railway/Vercel/Render/etc.) unless the task explicitly says to deploy or host publicly.\n- For tasks that explicitly mention deploying to Railway/Vercel/Render/Netlify/fly.io/Heroku: there MUST be a live public URL in the output.\n- For research/Q&A tasks: specific facts must be present.\nAnswer "yes" only if ALL stated requirements are done. Answer "no" if ANY required step is missing.` }
1526
2458
  ],
1527
2459
  stream: false,
1528
2460
  think: false,