npm - create-walle - Versions diffs - 0.9.21 → 0.9.23 - Mend

create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (500) hide show

package/template/wall-e/coding-orchestrator.js CHANGED Viewed

@@ -15,6 +15,24 @@ const agentRunners = require('./agent-runners');
 const { ASK_USER_TOOL, QuestionManager } = require('./tools/question-manager');
 const { detectProject } = require('./tools/project-detector');
 const { normalizeToolCall } = require('./llm/text-tool-calls');
+const {
+  hasVerificationEvidence,
+  hasFailedVerificationAttempt,
+  callName,
+  toolResultSucceeded,
+  normalizeToolCallEvidence,
+  buildAcceptanceContract,
+  collectToolEvidence,
+  validatorFailure,
+  validatorPass,
+  summarizeValidatorFailures,
+} = require('./coding/acceptance-contract');
+const {
+  isFrontendFile,
+  checkFrontendStaticContracts,
+  resolveFrontendEntrypoints,
+} = require('./coding/frontend-verification');
+const { pathToFileURL } = require('node:url');
 // ─── Progress Streaming (Phase 8) ────────────────────────────────────────────
 // Global progress emitter — SSE endpoint and chat handler subscribe to this.
@@ -64,6 +82,10 @@ const {
   resolvePromptCapabilities,
   loadRequestedSkillInstructions,
 } = require('./coding/prompt-capabilities');
+const {
+  routeArtifactCapabilities,
+  hasCapability,
+} = require('./coding/capability-router');
 const { createCodingTranscript } = require('./coding/transcript-writer');
 const { createCodingCapabilities } = require('./coding/capability-broker');
 const {
@@ -72,8 +94,10 @@ const {
 } = require('./coding/compaction-service');
 const {
   emitAgentRunContextWarnings,
-  resolveAgentRunContext,
 } = require('./runtime/agent-run-context');
+const {
+  resolveWallERuntimeProfile,
+} = require('./runtime/walle-runtime');
 const { estimateTokens, estimateMessagesTokens } = require('./context/token-counter');
 const { recoverAllowedTextToolCalls } = require('./llm/text-tool-calls');
@@ -112,17 +136,42 @@ const CODING_TOOLS = [
   {
     name: 'run_shell',
     description: 'Run a shell command. Supports pipes, redirects, and subshells. '
-      + 'Destructive commands (rm, sudo, etc.) are blocked.',
+      + 'Destructive commands (rm, sudo, etc.) are blocked. '
+      + 'For dev servers, watchers, or long builds set background:true (never `&`) and poll with bg_output.',
     input_schema: {
       type: 'object',
       properties: {
         command: { type: 'string', description: 'Shell command to run (e.g., "npm test | tail -20")' },
         timeout_ms: { type: 'number', description: 'Timeout in ms (default 30000)' },
         cwd: { type: 'string', description: 'Working directory (optional)' },
+        background: { type: 'boolean', description: 'Run detached in the background; returns resource_id immediately. Use for dev servers/watchers/long builds instead of `&`.' },
       },
       required: ['command'],
     },
   },
+  {
+    name: 'bg_output',
+    description: 'Read the latest output of a background process started with run_shell {background:true}. Returns status (running/exited), exit code, and the log tail.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
+        tail_lines: { type: 'number', description: 'Trailing log lines to return (default 100)' },
+      },
+      required: ['resource_id'],
+    },
+  },
+  {
+    name: 'bg_kill',
+    description: 'Stop a background process started with run_shell {background:true}.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
+      },
+      required: ['resource_id'],
+    },
+  },
   {
     name: 'glob',
     description: 'Find files matching a glob pattern (e.g., "**/*.js").',
@@ -174,6 +223,140 @@ const CODING_TOOLS = [
       required: ['url'],
     },
   },
+  {
+    name: 'browser_smoke_test',
+    description: 'Render a URL in headless Chrome, collect JavaScript exceptions, console errors, failed requests, and safely click interactive elements. Use after frontend/UI work; screenshots prove appearance, this proves the page does not break when loaded or clicked.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'URL to validate; supports file:// local HTML files or localhost URLs from start_static_server.' },
+        viewport: { type: 'string', enum: ['desktop', 'mobile', 'tablet'], description: 'Viewport preset. Default: desktop.' },
+        click_selectors: { type: 'array', items: { type: 'string' }, description: 'Optional selectors to click. Defaults to [onclick], button, [role=button], and hash links.' },
+        max_clicks: { type: 'number', description: 'Maximum interactive elements to click. Default: 20.' },
+        settle_ms: { type: 'number', description: 'Milliseconds to wait after load/clicks. Default: 750.' },
+        timeout_ms: { type: 'number', description: 'Overall timeout. Default: 45000.' },
+      },
+      required: ['url'],
+    },
+  },
+  {
+    name: 'check_url',
+    description: 'Fetch an http:// or https:// URL and report whether it returns a 2xx/3xx response. Use this before claiming a local dev/static server is reachable.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'URL to fetch.' },
+        timeout_ms: { type: 'number', description: 'Timeout in ms (default 5000).' },
+      },
+      required: ['url'],
+    },
+  },
+  {
+    name: 'web_search',
+    description: 'Search the public web and return result titles, URLs, and snippets. Use to find documentation or error-message references when you do not know the URL; then read the page with web_fetch.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        query: { type: 'string', description: 'Search query' },
+        max_results: { type: 'number', description: 'Max results (default 8)' },
+      },
+      required: ['query'],
+    },
+  },
+  {
+    name: 'web_fetch',
+    description: 'Fetch a web page or API endpoint and return extracted text. Use for reading documentation or references found via web_search.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        url: { type: 'string', description: 'URL to fetch' },
+        extract_text: { type: 'boolean', description: 'Strip HTML tags (default true)' },
+      },
+      required: ['url'],
+    },
+  },
+  {
+    name: 'start_static_server',
+    description: 'Start a managed local static file server for a directory, wait for its health URL, and return a verified URL plus resource_id. Prefer this over run_shell background servers.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        directory: { type: 'string', description: 'Directory to serve. Defaults to project cwd.' },
+        port: { type: 'number', description: 'Port to bind. Use 0 or omit for an available port.' },
+        route: { type: 'string', description: 'Route to health-check after start. Default: /index.html.' },
+        timeout_ms: { type: 'number', description: 'Startup timeout in ms (default 5000).' },
+      },
+    },
+  },
+  {
+    name: 'stop_static_server',
+    description: 'Stop a static server started by start_static_server using its resource_id.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        resource_id: { type: 'string', description: 'resource_id returned by start_static_server.' },
+      },
+      required: ['resource_id'],
+    },
+  },
+  {
+    name: 'pdf_info',
+    description: 'Validate a PDF file and return structured metadata such as bytes, page count when available, and hash. Use before reading, summarizing, or claiming a PDF artifact is valid.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        file_path: { type: 'string', description: 'Path to the PDF file.' },
+        max_bytes: { type: 'number', description: 'Maximum allowed file size in bytes (default 32MB).' },
+      },
+      required: ['file_path'],
+    },
+  },
+  {
+    name: 'pdf_render_pages',
+    description: 'Render a bounded PDF page range to image previews using pdftoppm when available. Use page previews to visually inspect generated or input PDFs before claiming success.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        file_path: { type: 'string', description: 'Path to the PDF file.' },
+        pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1". Maximum 20 pages.' },
+        output_dir: { type: 'string', description: 'Directory for rendered preview images. Defaults to a temp directory.' },
+        dpi: { type: 'number', description: 'Render DPI, 72-200. Default 144.' },
+      },
+      required: ['file_path'],
+    },
+  },
+  {
+    name: 'pdf_read_pages',
+    description: 'Read text from a bounded PDF page range using pdftotext when available, and include PDF metadata. Use for PDF analysis before answering from a document.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        file_path: { type: 'string', description: 'Path to the PDF file.' },
+        pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1-5". Maximum 20 pages.' },
+        max_chars: { type: 'number', description: 'Maximum text characters to return (default 20000).' },
+      },
+      required: ['file_path'],
+    },
+  },
+  {
+    name: 'make_pdf',
+    description: 'Generate a PDF from Markdown or HTML through the configured make-pdf renderer, then validate the output and optionally render page previews. Use for PDF creation instead of claiming a document is done from source text alone.',
+    input_schema: {
+      type: 'object',
+      properties: {
+        input_path: { type: 'string', description: 'Markdown or HTML source file to render.' },
+        output_path: { type: 'string', description: 'Desired PDF output path. Defaults next to input.' },
+        title: { type: 'string', description: 'Optional document title metadata.' },
+        page_size: { type: 'string', description: 'Optional page size such as Letter or A4.' },
+        margins: { type: 'string', description: 'Optional margin preset/string if supported by renderer.' },
+        cover: { type: 'boolean', description: 'Ask the renderer for a cover page when supported.' },
+        toc: { type: 'boolean', description: 'Ask the renderer for a table of contents when supported.' },
+        watermark: { type: 'string', description: 'Optional watermark text when supported.' },
+        render_preview: { type: 'boolean', description: 'Render first-page previews after generation. Default true.' },
+      },
+      required: ['input_path'],
+    },
+  },
   {
     name: 'edit_file',
     description: 'Make a targeted edit to a file by replacing a string match. Uses a 9-strategy fuzzy matching chain — tolerates minor whitespace, indentation, and Unicode differences. More efficient than write_file for modifying existing files.',
@@ -332,8 +515,8 @@ const CODING_TOOLS = [
 // Inspired by OpenCode's agent types (build/plan/explore).
 // We simplify to tool filtering per phase since Wall-E has its own permission checker.
-const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
-const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
+const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
+const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
 // BUILD uses all CODING_TOOLS (default)
 const READ_ONLY_TOOLS = CODING_TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.name));
@@ -420,6 +603,7 @@ function toolRequiresPermission(name) {
     'apply_patch',
     'multi_edit',
     'browser_screenshot',
+    'browser_smoke_test',
     'applescript',
     'claude_code',
     'mail_send',
@@ -486,6 +670,78 @@ function parsePlan(output) {
   throw new Error('Failed to parse plan: no valid JSON with non-empty subtasks array found');
 }
+function safeBranchSlug(text, fallback = 'task') {
+  const slug = String(text || '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 48);
+  return slug || fallback;
+}
+function plannerOutputRequestsClarification(output = '') {
+  const text = contentToText(output).toLowerCase();
+  if (!text.trim()) return false;
+  return /\b(?:please|can you|could you)\s+(?:provide|clarify|tell me|share)\b/.test(text)
+    || /\b(?:need|needs|require|required)\s+(?:more|additional)\s+(?:information|context|details)\b/.test(text)
+    || /\b(?:which|what)\s+(?:file|directory|project|repo|repository|path)\b[\s\S]{0,120}\?/.test(text);
+}
+function plannerOutputRefusesTask(output = '') {
+  const text = contentToText(output).toLowerCase();
+  if (!text.trim()) return false;
+  return /\b(?:i\s+)?(?:cannot|can't|unable to|not able to)\b[\s\S]{0,160}\b(?:help|comply|perform|complete|do this task)\b/.test(text)
+    || /\b(?:unsafe|not allowed|forbidden|against policy)\b/.test(text);
+}
+function shouldRecoverPlannerParseFailure({ request, output, cwd } = {}) {
+  const requestText = contentToText(request);
+  if (!isActionRequiredPrompt(requestText, { mode: 'build' })) return false;
+  if (!cwd) return false;
+  const outputText = contentToText(output);
+  if (plannerOutputRequestsClarification(outputText)) return false;
+  if (plannerOutputRefusesTask(outputText)) return false;
+  return true;
+}
+function buildPlannerRecoveryPlan(request, context = {}, parseErr, plannerOutput = '') {
+  const filesHint = Object.keys(context.relevantFiles || {}).slice(0, 12);
+  const plannerNotes = [
+    context.plannerNotes ? `Planner exploration notes:\n${String(context.plannerNotes).slice(0, 2400)}` : '',
+    plannerOutput ? `Unstructured planner output excerpt:\n${contentToText(plannerOutput).slice(0, 1600)}` : '',
+  ].filter(Boolean).join('\n\n');
+  const promptLines = [
+    'The planning model failed to return the strict JSON plan, so this is a recovery build pass.',
+    'Do not stop at analysis, an audit, or another implementation plan.',
+    'Inspect the current workspace, make the concrete code/file changes requested by the user, then run the most relevant verification available.',
+    'If verification is blocked, provide tool-backed evidence of the blocker instead of claiming success.',
+    '',
+    `User request:\n${contentToText(request).trim()}`,
+  ];
+  if (plannerNotes) {
+    promptLines.push('', plannerNotes);
+  }
+  if (parseErr?.message) {
+    promptLines.push('', `Planner failure: ${parseErr.message}`);
+  }
+  return {
+    branch_name: `walle/direct-${safeBranchSlug(request)}`,
+    estimated_scope: 'recovered-single-pass',
+    planning_recovery: {
+      strategy: 'single_build_subtask',
+      reason: parseErr?.message || 'planner did not return valid plan JSON',
+    },
+    subtasks: [{
+      id: '1',
+      title: 'Implement request directly',
+      prompt: promptLines.join('\n'),
+      depends_on: [],
+      verify: { test: true, review: true },
+      files_hint: filesHint,
+    }],
+  };
+}
 // buildSubtaskPrompt moved to coding-prompts.js (imported above).
 function contentToText(content) {
@@ -541,13 +797,29 @@ function isLegitimateNoEditResponse(content, toolCallHistory = []) {
 }
 const EDIT_TOOL_NAMES = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
-const MEANINGFUL_ACTION_TOOL_NAMES = new Set([
+const CODING_EXECUTION_TOOL_NAMES = new Set([
   ...EDIT_TOOL_NAMES,
+  'start_coding',
+  'run_skill',
+]);
+const MEANINGFUL_ACTION_TOOL_NAMES = new Set([
+  ...CODING_EXECUTION_TOOL_NAMES,
   'run_shell',
   'browser_screenshot',
+  'browser_smoke_test',
+  'check_url',
+  'url_check',
+  'pdf_info',
+  'pdf_render_pages',
+  'pdf_read_pages',
+  'make_pdf',
   'mcp_call',
+]);
+const SETUP_ONLY_TOOL_NAMES = new Set([
   'load_skill',
   'skill',
+  'skill_loaded',
+  'skill_load_failed',
 ]);
 const ACTION_REQUIRED_PROMPT_RE = /\b(fix|implement|improve|update|change|edit|modify|add|remove|refactor|build|create|write|generate|convert|repair|apply|run|test|verify|make)\b/i;
@@ -562,21 +834,12 @@ const PROSPECTIVE_WORK_RE = new RegExp([
 ].join(''), 'i');
 function hasToolCall(toolCallHistory = [], names = new Set()) {
-  return (toolCallHistory || []).some((call) => names.has(call.name));
+  return (toolCallHistory || []).some((call) => names.has(callName(call)));
 }
-function isVerificationToolCall(call = {}) {
-  const name = String(call.name || '');
-  const input = String(call.inputHash || JSON.stringify(call.input || {}));
-  if (name === 'browser_screenshot') return true;
-  if (name === 'run_shell') {
-    return /\b(?:test|spec|lint|build|typecheck|tsc|pytest|jest|mocha|vitest|playwright|node\s+--(?:test|check)|npm\s+(?:test|run)|pnpm\s+(?:test|run)|yarn\s+(?:test|run)|git\s+diff\s+--check)\b/i.test(input);
-  }
-  return /(?:test|verify|screenshot|diagnostic|lint|build)/i.test(name);
-}
-function hasVerificationEvidence(toolCallHistory = []) {
-  return (toolCallHistory || []).some(isVerificationToolCall);
+function onlySetupToolCalls(toolCallHistory = []) {
+  const calls = toolCallHistory || [];
+  return calls.length > 0 && calls.every((call) => SETUP_ONLY_TOOL_NAMES.has(callName(call)));
 }
 function isVerificationBlockerResponse(content) {
@@ -602,15 +865,60 @@ function isActionRequiredPrompt(prompt, { mode } = {}) {
   return true;
 }
+function promptRequiresFileChanges(prompt, { mode } = {}) {
+  if (!isActionRequiredPrompt(prompt, { mode })) return false;
+  const intentText = stripPathLikeTokens(contentToText(prompt));
+  if (!FILE_CHANGE_PROMPT_RE.test(intentText)) return false;
+  if (NO_CHANGE_TASK_RE.test(intentText) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(intentText)) {
+    return false;
+  }
+  return true;
+}
+function isReadOnlyCodingIntent(intent = null) {
+  if (!intent || typeof intent !== 'object') return false;
+  if (intent.readOnly === true || intent.kind === 'read_only') return true;
+  return intent.expectsChange === false && intent.reason === 'conversational_update_language';
+}
 function isPrematureActionResponse(content) {
   const text = contentToText(content);
   if (!text.trim()) return false;
+  if (/\btool budget exhausted\b/i.test(text)) return true;
+  if (/\bwhat was not completed\b/i.test(text)) return true;
+  if (/\bnone of the proposed implementations were written\b/i.test(text)) return true;
+  if (/\bno changes were made\b/i.test(text) && /\b(?:not completed|failed|exhausted|recovery path)\b/i.test(text)) return true;
   if (PROSPECTIVE_WORK_RE.test(text)) return true;
   if (/\bwhat['’]?s wrong\b[\s\S]{0,400}\bfix:/i.test(text)) return true;
+  if (/\b(?:should i|shall i|do you want me to)\s+(?:proceed|continue|apply|implement|make|start|do)\b/i.test(text)) return true;
+  if (/\byour call\b[\s\S]{0,220}\b(?:proceed|continue|phase|prioriti[sz]e|pick|choose|apply|implement)\b/i.test(text)) return true;
+  if (/\b(?:implementation|fix|improvement)\s+plan\b/i.test(text)
+    && /\b(?:next steps?|recommendations?|roadmap|proceed|continue|apply|implement)\b/i.test(text)) return true;
   return false;
 }
+// "Screenshot, self-critique, and fix visual issues" is conditional: the
+// agent only writes files if it FINDS a problem in the screenshot. A clean
+// run that finds nothing to fix is the GOOD outcome — but the title contains
+// "fix", which the FILE_CHANGE_PROMPT_RE below would otherwise treat as an
+// edit task and fail with "Subtask ended without file changes".
+//
+// We trigger only when (a) the title contains an explicit visual-verification
+// keyword (screenshot / self-critique / visual review) AND (b) it LEADS with
+// one. "Review and improve UX" is excluded by (a) — the "improve" is a real
+// edit task, not conditional. "Fix issues found in screenshot" is excluded by
+// (b) — the primary verb is "Fix", screenshot is just context.
+const VERIFICATION_KEYWORD_RE = /\b(screenshot|self[-\s]?critique|browser[-\s]?screenshot|visual\s*review)\b/i;
+const VERIFICATION_LEADS_RE = /^\s*(screenshot|self[-\s]?critique|visual\s*review|browser[-\s]?screenshot)\b/i;
+function isVerificationPrimarySubtask(subtask = {}) {
+  const title = String(subtask.title || '');
+  if (!VERIFICATION_KEYWORD_RE.test(title)) return false;
+  return VERIFICATION_LEADS_RE.test(title);
+}
 function subtaskRequiresFileChanges(subtask = {}) {
+  if (isVerificationPrimarySubtask(subtask)) return false;
   const text = stripPathLikeTokens(`${subtask.title || ''}\n${subtask.prompt || ''}`);
   if (!FILE_CHANGE_PROMPT_RE.test(text)) return false;
   if (NO_CHANGE_TASK_RE.test(text) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(text)) {
@@ -620,19 +928,81 @@ function subtaskRequiresFileChanges(subtask = {}) {
 }
 function toolCallHistoryFromLog(log = []) {
-  return (log || [])
-    .flatMap((turn) => (turn.toolCalls || []).map((call) => ({
-      name: call.name,
-      inputHash: JSON.stringify(call.input || {}).slice(0, 500),
-    })));
+  return (log || []).flatMap((turn) => {
+    const results = turn.toolResults || [];
+    return (turn.toolCalls || []).map((call, index) => {
+      const resultRecord = results[index] || {};
+      return normalizeToolCallEvidence(call, resultRecord.result || resultRecord);
+    });
+  });
 }
-function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd } = {}) {
+function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd, codingIntent, intent } = {}) {
+  if (isReadOnlyCodingIntent(codingIntent || intent)) return null;
   if (!isActionRequiredPrompt(prompt, { mode })) return null;
   const madeEdits = hasToolCall(toolCallHistory, EDIT_TOOL_NAMES);
+  const requiresVisualEvidence = /\b(?:website|web\s*page|frontend|ui|ux|visual|responsive|mobile|layout|css|html)\b/i.test(contentToText(prompt));
+  const hasSuccessfulScreenshot = (toolCallHistory || []).some((call) => (
+    callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
+  ));
+  const hasSuccessfulBrowserSmoke = (toolCallHistory || []).some((call) => (
+    callName(call) === 'browser_smoke_test' && toolResultSucceeded(call)
+  ));
+  const requiresPdfEvidence = /\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b[\s\S]{0,80}\bpdf\b|\bpdf\b[\s\S]{0,80}\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b/i.test(contentToText(prompt));
+  const touchedPdfFlow = madeEdits || hasToolCall(toolCallHistory, new Set(['run_shell', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages']));
+  const hasSuccessfulPdfArtifact = (toolCallHistory || []).some((call) => {
+    const name = callName(call);
+    if (name !== 'make_pdf' && name !== 'pdf_info') return false;
+    if (!toolResultSucceeded(call)) return false;
+    const result = call.result && typeof call.result === 'object' ? call.result : call;
+    return Boolean(result.path || result.artifact?.path || result.bytes || result.sha256);
+  });
+  if (touchedPdfFlow && requiresPdfEvidence && !hasSuccessfulPdfArtifact && !isVerificationBlockerResponse(content)) {
+    const reason = hasFailedVerificationAttempt(toolCallHistory)
+      ? 'The assistant worked on PDF/document output but PDF artifact verification failed or produced no successful evidence.'
+      : 'The assistant worked on PDF/document output but ended before successful PDF artifact verification.';
+    if (!toolsAvailable) return { action: 'fail', reason: `${reason} No tool turns remain.` };
+    if (nudges >= maxNudges) return { action: 'fail', reason: `${reason} Verification continuation limit reached.` };
+    return {
+      action: 'continue',
+      reason,
+      message: `[SYSTEM] ${reason} This is not complete.\n` +
+        `For PDF/document generation, call make_pdf or otherwise validate the generated PDF with pdf_info and render at least one page with pdf_render_pages before claiming success.\n` +
+        `If PDF verification is genuinely impossible, state the exact failed tool result and do not claim the PDF is done.\n` +
+        `Working directory: ${cwd}`,
+    };
+  }
+  if (madeEdits && requiresVisualEvidence && (!hasSuccessfulScreenshot || !hasSuccessfulBrowserSmoke) && !isVerificationBlockerResponse(content)) {
+    const reason = hasFailedVerificationAttempt(toolCallHistory)
+      ? 'The assistant made frontend/UI changes but browser verification failed or produced incomplete evidence.'
+      : 'The assistant made frontend/UI changes but ended before successful browser screenshot and runtime smoke verification.';
+    if (!toolsAvailable) {
+      return {
+        action: 'fail',
+        reason: `${reason} No tool turns remain.`,
+      };
+    }
+    if (nudges >= maxNudges) {
+      return {
+        action: 'fail',
+        reason: `${reason} Verification continuation limit reached.`,
+      };
+    }
+    return {
+      action: 'continue',
+      reason,
+      message: `[SYSTEM] ${reason} This is not complete.\n` +
+        `For website/UI/UX/frontend work, capture browser_screenshot and run browser_smoke_test at the relevant file:// or verified local URL before claiming success. If a server is needed, use start_static_server then check_url before browser verification.\n` +
+        `If browser verification is genuinely impossible, state that blocker explicitly with the failed tool result and do not claim the website is ready.\n` +
+        `Working directory: ${cwd}`,
+    };
+  }
+  const failedVerification = hasFailedVerificationAttempt(toolCallHistory);
   if (madeEdits && !hasVerificationEvidence(toolCallHistory) && !isVerificationBlockerResponse(content)) {
-    const reason = 'The assistant made file changes but ended before running verification.';
+    const reason = failedVerification
+      ? 'The assistant made file changes but verification failed or produced no successful evidence.'
+      : 'The assistant made file changes but ended before running verification.';
     if (!toolsAvailable) {
       return {
         action: 'fail',
@@ -649,7 +1019,7 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
       action: 'continue',
       reason,
       message: `[SYSTEM] ${reason} This is not complete.\n` +
-        `Run the relevant verification now: tests, lint, build, typecheck, browser screenshot, or at minimum git diff --check when no project test exists.\n` +
+        `Run the relevant verification now: tests, lint, build, typecheck, check_url/browser_screenshot for websites, or at minimum git diff --check when no project test exists.\n` +
         `Only summarize success after a tool result proves the work. If verification is genuinely impossible, state the blocker with tool-backed evidence.\n` +
         `Working directory: ${cwd}`,
     };
@@ -657,8 +1027,41 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
   if (madeEdits) return null;
   if (isLegitimateNoEditResponse(content, toolCallHistory)) return null;
+  const ranCodingExecution = hasToolCall(toolCallHistory, CODING_EXECUTION_TOOL_NAMES);
   const didMeaningfulAction = hasToolCall(toolCallHistory, MEANINGFUL_ACTION_TOOL_NAMES);
   const premature = isPrematureActionResponse(content);
+  if (promptRequiresFileChanges(prompt, { mode }) && !ranCodingExecution) {
+    const reason = !toolCallHistory.length
+      ? 'The assistant ended an action-oriented coding turn without using any tools.'
+      : onlySetupToolCalls(toolCallHistory)
+        ? 'The assistant only loaded skills or capability context and did not execute the requested coding change.'
+        : premature
+          ? 'The assistant ended with prospective work instead of executing it.'
+          : 'The assistant inspected or diagnosed the requested change but did not execute a coding change.';
+    if (!toolsAvailable) {
+      return {
+        action: 'fail',
+        reason: `${reason} No tool turns remain.`,
+      };
+    }
+    if (nudges >= maxNudges) {
+      return {
+        action: 'fail',
+        reason: `${reason} Coding-execution continuation limit reached.`,
+      };
+    }
+    return {
+      action: 'continue',
+      reason,
+      message: `[SYSTEM] ${reason} This is not complete.\n` +
+        `Use the available tools now. In Wall-E chat, call start_coding for coding-agent work; in coding mode, inspect files, then edit/write/apply_patch to make the change, and run relevant verification.\n` +
+        `Do not end with a plan, audit, diagnostic report, or "I will..." statement. Finish only after work is executed, or state a concrete blocker/no-change reason backed by tool results.\n` +
+        `Working directory: ${cwd}`,
+    };
+  }
   if (didMeaningfulAction && !premature) return null;
   const reason = !toolCallHistory.length
@@ -692,6 +1095,18 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
 }
 function changedFilesSince(cwd, baseline = new Set()) {
+  // Structured baseline from captureChangedFilesBaseline() — handles git AND non-git cwds.
+  if (baseline && typeof baseline === 'object' && !(baseline instanceof Set)
+      && !Array.isArray(baseline) && typeof baseline.isGit === 'boolean') {
+    if (!baseline.isGit) {
+      // Non-git working dir: `git status` throws here (caught → empty set), so the agent's
+      // real writes would silently report as []. Detect created/modified files by mtime.
+      return collectFilesModifiedSince(cwd, baseline.startedAtMs || 0);
+    }
+    const before = baseline.dirty instanceof Set ? baseline.dirty : new Set(baseline.dirty || []);
+    return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
+  }
+  // Legacy: a Set/array of pre-existing dirty git paths.
   const before = baseline instanceof Set ? baseline : new Set(baseline || []);
   return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
 }
@@ -700,6 +1115,25 @@ function isTimeoutOnlyOutput(output) {
   return /^\s*\[Timeout reached\]\s*$/i.test(contentToText(output));
 }
+// Resolve the run's wall-clock timeout (ms). An explicit timeoutMs always wins.
+// Otherwise: headless/automated runs keep the 300s safety cap so CI/background work
+// can't hang; interactive runs (a user is present) get 0 = "no deadline" so they run
+// until the agent finishes or the user stops them (matches Claude Code / opencode).
+// Single source of truth for "is a human present this turn?" — used by both the run
+// deadline (resolveRunTimeoutMs) and the in-loop permission/acceptance behavior, so the
+// two can never drift. Interactive = no headless/benchmark and not explicitly opted out.
+function isInteractiveRun(opts = {}) {
+  if (opts.interactive === true) return true;
+  if (opts.interactive === false) return false;
+  return !opts.headless && !opts.benchmark;
+}
+function resolveRunTimeoutMs(opts = {}) {
+  if (opts.timeoutMs) return opts.timeoutMs;
+  const isInteractive = isInteractiveRun(opts);
+  return isInteractive ? 0 : 300000;
+}
 function providerSupportsToolCalls(provider) {
   if (!provider) return true;
   if (provider.capabilities?.tools === false) return false;
@@ -763,6 +1197,25 @@ function createCodingCompactionService(provider, modelId, opts = {}) {
   });
 }
+// A `stop` user hook can refuse to let the run finish (e.g. "tests must
+// pass"). Bounded: a flaky or unsatisfiable hook must not loop the agent
+// forever — after MAX_STOP_HOOK_BOUNCES the honest-failure path proceeds.
+const MAX_STOP_HOOK_BOUNCES = 3;
+async function evaluateStopGate({ userHooks, log, sessionId, cwd, mode, turn, text }) {
+  if (!userHooks || typeof userHooks.hasHooks !== 'function' || !userHooks.hasHooks('stop')) return null;
+  const bounces = log._stopHookBounces || 0;
+  if (bounces >= MAX_STOP_HOOK_BOUNCES) return null;
+  const verdict = await userHooks.run('stop', { sessionId, cwd, mode, turn, text });
+  if (verdict.decision !== 'deny') return null;
+  log._stopHookBounces = bounces + 1;
+  return {
+    reason: verdict.reason,
+    message: `A stop hook rejected finishing this task (attempt ${bounces + 1}/${MAX_STOP_HOOK_BOUNCES}): ${verdict.reason || 'no reason given'}\n` +
+      'Address the issue, then finish. If it is genuinely unresolvable, explain the exact blocker in your final summary.',
+  };
+}
 async function maybeCompactCodingContext({
   messages,
   compactionService,
@@ -775,13 +1228,43 @@ async function maybeCompactCodingContext({
   mode,
   step = -1,
   sessionMemory,
+  userHooks = null,
   reason = 'context_threshold',
   opts = {},
 } = {}) {
   if (!compactionService || !Array.isArray(messages) || messages.length < 2) return null;
   const systemTokens = estimateTokens(systemPrompt || '');
   const estimatedInputTokens = systemTokens + estimateMessagesTokens(messages);
-  if (!compactionService.shouldCompact({ messages, systemTokens })) return null;
+  // Cheap layer first: truncate OLD tool outputs before reaching for LLM
+  // summarization. Rewriting old messages resets the prompt-cache prefix,
+  // but pruning fires rarely (threshold crossing) and shrinks input enough
+  // to amortize the one-turn cache miss.
+  let pruneDetail = null;
+  if (typeof compactionService.shouldPrune === 'function'
+    && typeof compactionService.prune === 'function'
+    && compactionService.shouldPrune({ messages, systemTokens })) {
+    const pruneResult = compactionService.prune(messages);
+    if (pruneResult?.pruned && Array.isArray(pruneResult.messages)) {
+      messages.splice(0, messages.length, ...pruneResult.messages);
+      pruneDetail = {
+        prunedBlocks: pruneResult.prunedBlocks,
+        tokensBefore: pruneResult.tokensBefore,
+        tokensAfter: pruneResult.tokensAfter,
+      };
+      events?.emit?.('context.pruned', { sessionId, reason, ...pruneDetail });
+      emitProgress?.({
+        phase: mode || 'executing',
+        step,
+        message: `Pruned ${pruneResult.prunedBlocks} old tool output(s) (~${Math.max(0, pruneResult.tokensBefore - pruneResult.tokensAfter)} tokens)`,
+        detail: pruneDetail,
+      });
+    }
+  }
+  if (!compactionService.shouldCompact({ messages, systemTokens })) {
+    return pruneDetail ? { compacted: false, pruned: true, ...pruneDetail } : null;
+  }
   emitProgress?.({
     phase: mode || 'executing',
@@ -789,6 +1272,8 @@ async function maybeCompactCodingContext({
     message: 'Compacting coding context...',
   });
+  if (userHooks?.runObserved) await userHooks.runObserved('pre_compact', { sessionId, cwd, reason });
   const result = await compactionService.compact(messages, {
     sessionId,
     cwd,
@@ -849,6 +1334,16 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
       detail: { reason, fromProvider },
     });
   }
+  // Forward the run's auto-approval intent to the spawned CLI. The stream-native path
+  // answers tool-permission requests in-process via headlessPolicy; the CLI fallback
+  // spawns a real `claude`, so unless it is told to bypass permissions it silently
+  // stalls in ask-mode and writes nothing. Mirror runAgentLoop's effective policy
+  // (see headlessPolicy default below): an explicit opts.permissionMode wins, else
+  // headlessPolicy:'allow' (or a benchmark run) maps to bypassPermissions; any other
+  // policy leaves the CLI's default ask-mode intact.
+  const effectiveHeadlessPolicy = opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject');
+  const permissionMode = opts.permissionMode
+    || (effectiveHeadlessPolicy === 'allow' ? 'bypassPermissions' : undefined);
   const result = await runHeadless(prompt, {
     cwd,
     sessionId: sid,
@@ -857,6 +1352,8 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
     runnerId,
     model,
     mode: opts.mode || 'build',
+    permissionMode,
+    maxTurns: opts.maxTurns,
   });
   return {
     ...result,
@@ -1031,6 +1528,226 @@ function collectEmptyChangedFiles(cwd, changedFiles) {
   return empties;
 }
+function changedFilesTouchFrontend(files = []) {
+  return (files || []).some((file) => isFrontendFile(file));
+}
+function emitAcceptanceValidatorProgress(onProgress, event = {}) {
+  const payload = {
+    type: 'acceptance_validator',
+    phase: 'validating',
+    step: event.step ?? -1,
+    validator: event.validator || '',
+    status: event.status || '',
+    message: event.message || '',
+    detail: event.detail || {},
+  };
+  try { onProgress?.(payload); } catch {}
+  try {
+    safeTelemetry()?.track?.('coding_acceptance_validator', {
+      validator: payload.validator,
+      status: payload.status,
+      task_kind: event.taskKind || '',
+      failures: event.failures || 0,
+    });
+  } catch {}
+}
+function screenshotEvidenceExists(screenshots = [], toolCallHistory = []) {
+  if (Array.isArray(screenshots) && screenshots.some((shot) => shot && (shot.path || shot.url))) return true;
+  return (toolCallHistory || []).some((call) => (
+    callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
+  ));
+}
+async function runAcceptanceValidators({
+  cwd,
+  contract,
+  changedFiles = [],
+  screenshots = [],
+  toolCallHistory = [],
+  autoBrowser = false,
+  requireBrowserRuntime = false,
+  onProgress,
+  step = -1,
+} = {}) {
+  const validators = [];
+  const concerns = [];
+  const report = {
+    ok: true,
+    validators,
+    concerns,
+    frontend: null,
+  };
+  if (!contract?.requiresFrontendValidation) return report;
+  emitAcceptanceValidatorProgress(onProgress, {
+    step,
+    validator: 'frontend.static_contract',
+    status: 'started',
+    taskKind: contract.taskKind,
+  });
+  const staticVerdict = checkFrontendStaticContracts(cwd, changedFiles);
+  report.frontend = { static: staticVerdict };
+  if (!staticVerdict.ok) {
+    const failure = validatorFailure(
+      'frontend.static_contract',
+      `Frontend static contract failed: ${staticVerdict.concerns.slice(0, 3).join('; ')}`,
+      staticVerdict
+    );
+    validators.push(failure);
+    concerns.push(...staticVerdict.concerns);
+    emitAcceptanceValidatorProgress(onProgress, {
+      step,
+      validator: failure.name,
+      status: 'failed',
+      message: failure.message,
+      taskKind: contract.taskKind,
+      failures: staticVerdict.concerns.length,
+    });
+    report.ok = false;
+    return report;
+  }
+  validators.push(validatorPass('frontend.static_contract', 'Frontend static contract passed', staticVerdict));
+  emitAcceptanceValidatorProgress(onProgress, {
+    step,
+    validator: 'frontend.static_contract',
+    status: 'passed',
+    taskKind: contract.taskKind,
+  });
+  const hasScreenshot = screenshotEvidenceExists(screenshots, toolCallHistory);
+  if (!hasScreenshot) {
+    const failure = validatorFailure(
+      'frontend.screenshot_evidence',
+      'Frontend verification failed: no successful browser_screenshot evidence captured',
+      { screenshots: screenshots.length }
+    );
+    validators.push(failure);
+    concerns.push('[frontend-visual] No successful browser_screenshot evidence captured for frontend changes');
+    report.ok = false;
+  } else {
+    validators.push(validatorPass('frontend.screenshot_evidence', 'Frontend screenshot evidence present', {
+      screenshots: screenshots.length,
+    }));
+  }
+  const smokeEvidence = collectToolEvidence(toolCallHistory, 'browser_smoke_test');
+  const failedSmoke = smokeEvidence.find((item) => !item.ok);
+  if (failedSmoke) {
+    const failure = validatorFailure(
+      'frontend.browser_runtime',
+      'Frontend browser runtime smoke test failed',
+      failedSmoke.result
+    );
+    validators.push(failure);
+    concerns.push('[frontend-runtime] Browser runtime smoke test failed');
+    report.ok = false;
+    return report;
+  }
+  if (smokeEvidence.some((item) => item.ok)) {
+    validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke evidence present', {
+      evidence: smokeEvidence.length,
+    }));
+    return report;
+  }
+  if (!requireBrowserRuntime) {
+    validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke deferred to final gate', {
+      deferred: true,
+    }));
+    return report;
+  }
+  if (!autoBrowser) {
+    const failure = validatorFailure(
+      'frontend.browser_runtime',
+      'Frontend verification failed: no successful browser_smoke_test evidence captured',
+      {}
+    );
+    validators.push(failure);
+    concerns.push('[frontend-runtime] No successful browser_smoke_test evidence captured');
+    report.ok = false;
+    return report;
+  }
+  const entrypoints = resolveFrontendEntrypoints(cwd, changedFiles);
+  if (entrypoints.length === 0) {
+    validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke skipped: no HTML entrypoint found', {
+      skipped: true,
+    }));
+    return report;
+  }
+  const localTools = getLocalTools();
+  if (!localTools.findChromeExecutable()) {
+    const failure = validatorFailure(
+      'frontend.browser_runtime',
+      'Frontend browser runtime smoke test blocked: no Chromium-based browser found',
+      { entrypoints }
+    );
+    validators.push(failure);
+    concerns.push('[frontend-runtime] No Chromium-based browser available for browser_smoke_test');
+    report.ok = false;
+    return report;
+  }
+  const smokeResults = [];
+  for (const entrypoint of entrypoints.slice(0, 2)) {
+    for (const viewport of ['desktop', 'mobile']) {
+      emitAcceptanceValidatorProgress(onProgress, {
+        step,
+        validator: 'frontend.browser_runtime',
+        status: 'started',
+        taskKind: contract.taskKind,
+        detail: { entrypoint, viewport },
+      });
+      const result = await localTools.browserSmokeTest({
+        url: pathToFileURL(entrypoint).href,
+        viewport,
+        max_clicks: 25,
+        settle_ms: 750,
+      });
+      smokeResults.push(result);
+      if (!result.ok) {
+        const failure = validatorFailure(
+          'frontend.browser_runtime',
+          `Frontend browser runtime smoke failed for ${path.relative(cwd, entrypoint)} (${viewport})`,
+          result
+        );
+        validators.push(failure);
+        concerns.push(...(result.failures || []).slice(0, 5).map((item) => (
+          `[frontend-runtime] ${item.type || 'failure'} ${item.exception || item.args || item.errorText || item.error || ''}`.trim()
+        )));
+        if (concerns.length === 0) concerns.push(`[frontend-runtime] ${failure.message}`);
+        report.ok = false;
+        report.frontend.browserSmoke = smokeResults;
+        emitAcceptanceValidatorProgress(onProgress, {
+          step,
+          validator: failure.name,
+          status: 'failed',
+          message: failure.message,
+          taskKind: contract.taskKind,
+          failures: result.failures?.length || 1,
+        });
+        return report;
+      }
+    }
+  }
+  report.frontend.browserSmoke = smokeResults;
+  validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke passed', {
+    entrypoints: entrypoints.map((file) => path.relative(cwd, file)),
+    runs: smokeResults.length,
+  }));
+  emitAcceptanceValidatorProgress(onProgress, {
+    step,
+    validator: 'frontend.browser_runtime',
+    status: 'passed',
+    taskKind: contract.taskKind,
+  });
+  return report;
+}
 function collectEditedFilePaths(toolName, args = {}, result = {}) {
   const editTools = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
   if (!editTools.has(toolName)) return [];
@@ -1150,10 +1867,11 @@ async function shutdownPostEditMiddleware(state) {
 async function runAgentLoop(prompt, opts = {}) {
   const { cwd, timeoutMs, maxTurns, provider, model, tools, onProgress } = opts;
   const explicitProvider = !!provider;
-  const sid = opts._resumeSessionId || crypto.randomUUID();
+  const sid = opts._resumeSessionId || opts.runSessionId || opts.agentRunId || crypto.randomUUID();
+  const codingIntent = opts.codingIntent || opts.intent || null;
   // Persist activity start (Phase 2: Activity History)
-  const isResume = !!opts._resumeSessionId;
+  const isResume = Boolean(opts._resumeSessionId && opts._resumeMessages);
   try { getActivityLog().log({ session_id: sid, type: isResume ? 'coding_resume' : 'coding_start', title: isResume ? 'Coding session resumed' : 'Coding session started', body: prompt.slice(0, 200) }); } catch {}
   // Helper: emit progress both to callback and global emitter
@@ -1164,8 +1882,34 @@ async function runAgentLoop(prompt, opts = {}) {
     // Also forward to per-task event bus if available (A3 unification)
     if (events) events.emit('progress', full);
   }
-  const timeout = timeoutMs || 300000;
-  const deadline = Date.now() + timeout;
+  const externalSignal = opts.signal || opts.abortSignal || null;
+  const throwIfExternalAbort = () => {
+    if (!externalSignal?.aborted) return;
+    const err = new Error('Cancelled');
+    err.code = 'WALLE_CANCELLED';
+    throw err;
+  };
+  const linkExternalAbort = (controller) => {
+    if (!externalSignal || !controller) return () => {};
+    if (externalSignal.aborted) {
+      try { controller.abort(); } catch {}
+      return () => {};
+    }
+    const onAbort = () => {
+      try { controller.abort(); } catch {}
+    };
+    externalSignal.addEventListener('abort', onAbort, { once: true });
+    return () => {
+      try { externalSignal.removeEventListener('abort', onAbort); } catch {}
+    };
+  };
+  // Interactive sessions (a user is watching and approving) must not be killed by a
+  // wall-clock deadline — they run until the agent finishes or the user stops them,
+  // like Claude Code / opencode. The 300s default is only for headless/automated
+  // runs (so CI/background work can't hang). An explicit timeoutMs always wins.
+  const interactiveRun = isInteractiveRun(opts);
+  const timeout = resolveRunTimeoutMs(opts);
+  const deadline = timeout > 0 ? Date.now() + timeout : Infinity;
   let turns = maxTurns || MAX_AGENT_TURNS;
   const log = []; // training data: every turn logged
@@ -1178,8 +1922,10 @@ async function runAgentLoop(prompt, opts = {}) {
   }
   const modelId = resolveModelId(model, llm);
   const resolvedCwd = realpathBestEffort(cwd || process.cwd());
-  const preRunDirtyFiles = getGitChangedFiles(resolvedCwd);
-  const agentRunContext = resolveAgentRunContext({
+  // Baseline for post-run change detection. Works in non-git cwds too (mtime-based) so
+  // an agent that writes into a plain folder doesn't report changedFiles: [].
+  const preRunFileBaseline = captureChangedFilesBaseline(resolvedCwd);
+  const wallERuntimeProfile = resolveWallERuntimeProfile({
     ...opts,
     channel: opts.channel || 'coding',
     agentMode: opts.agentMode || 'coding',
@@ -1190,16 +1936,30 @@ async function runAgentLoop(prompt, opts = {}) {
     chatSessionId: opts.chatSessionId || opts.session_id || '',
     cwd: resolvedCwd,
   });
-  emitAgentRunContextWarnings(agentRunContext, { telemetry: safeTelemetry() });
+  const agentRunContext = wallERuntimeProfile.context;
+  emitAgentRunContextWarnings({ ...agentRunContext, warnings: wallERuntimeProfile.warnings }, { telemetry: safeTelemetry() });
   const promptCapabilityHints = opts.promptCapabilityHints || parsePromptCapabilityHints(prompt);
   const capabilities = resolveCodingCapabilities({ ...opts, promptCapabilityHints }, {
     cwd: resolvedCwd,
     brain: opts.brain || null,
   });
   const taskFileHints = extractTaskFileHints(prompt);
+  const artifactCapabilities = routeArtifactCapabilities({
+    prompt,
+    taskFileHints,
+    projectInfo: null,
+  });
   const runtimeMode = resolveRuntimeMode(opts);
   const baseTools = Array.isArray(tools) ? tools : getToolsForMode(opts.mode || 'build');
   const requestedTools = filterToolsForRuntimeMode(baseTools, runtimeMode);
+  const transcriptMessageOwner = String(opts.transcriptMessageOwner || opts.transcript_message_owner || '').toLowerCase();
+  const externalTranscriptMessages = opts.externalTranscriptMessages === true
+    || opts.external_transcript_messages === true
+    || opts.skipTranscriptMessages === true
+    || opts.skip_transcript_messages === true
+    || transcriptMessageOwner === 'ctm'
+    || transcriptMessageOwner === 'host'
+    || transcriptMessageOwner === 'external';
   const transcript = createCodingTranscript({
     transcript: opts.transcript,
     persistTranscript: opts.persistTranscript,
@@ -1230,7 +1990,7 @@ async function runAgentLoop(prompt, opts = {}) {
       mode: opts.mode || '',
     });
   }
-  if (!opts._resumeMessages && transcript?.appendUserMessage) {
+  if (!externalTranscriptMessages && !opts._resumeMessages && transcript?.appendUserMessage) {
     transcript.appendUserMessage(prompt, {
       sessionId: sid,
       cwd: resolvedCwd,
@@ -1288,7 +2048,7 @@ async function runAgentLoop(prompt, opts = {}) {
     }
   } catch {}
-  if (isFrontendTask(taskFileHints, prompt)
+  if ((hasCapability(artifactCapabilities, 'frontend_design') || isFrontendTask(taskFileHints, prompt))
       && !projectSkills.some((s) => s && s.name === 'frontend-design')) {
     projectSkills = [
       ...projectSkills,
@@ -1322,6 +2082,24 @@ async function runAgentLoop(prompt, opts = {}) {
   }
   promptCapabilities = await loadRequestedSkillInstructions(promptCapabilities, capabilities.skillRunner);
+  if (artifactCapabilities.length && transcript?.appendPart) {
+    transcript.appendPart({
+      sessionId: sid,
+      cwd: resolvedCwd,
+      partType: 'capability_routed',
+      data: {
+        type: 'capability_routed',
+        capabilities: artifactCapabilities.map((capability) => ({
+          id: capability.id,
+          label: capability.label,
+          tools: capability.tools,
+          requiredArtifacts: capability.requiredArtifacts,
+          completionGate: capability.completionGate,
+        })),
+      },
+    });
+  }
   // Build system prompt with project context.
   const systemPrompt = buildAgentSystemPrompt({
     resolvedCwd,
@@ -1329,9 +2107,13 @@ async function runAgentLoop(prompt, opts = {}) {
     projectSkills,
     taskFileHints,
     runtimeMode,
+    mode: opts.mode,
+    provider: llm.type || '',
+    model: modelId,
     runtimeContext: {
       memoryToolsAvailable: Boolean(capabilities.mcpClient),
       promptCapabilities,
+      artifactCapabilities,
       userTask: prompt,
     },
   });
@@ -1339,6 +2121,7 @@ async function runAgentLoop(prompt, opts = {}) {
   // Resume support: use restored messages if resuming from checkpoint
   const messages = opts._resumeMessages || [{ role: 'user', content: prompt }];
   let finalOutput = '';
+  let finalAnswerDelivered = false;
   let totalInput = 0;
   let totalOutput = 0;
   let consecutiveErrors = 0;
@@ -1372,10 +2155,12 @@ async function runAgentLoop(prompt, opts = {}) {
   mw.use('tool.after', screenshotTrackerHook(screenshotsTaken));
   const events = opts.events || new CodingEvents();
-  const { PermissionService } = require('./coding/permission-service');
+  const { PermissionService, WAIT_FOR_REPLY } = require('./coding/permission-service');
   const permissionService = opts.permissionService || new PermissionService({
     events,
-    timeoutMs: opts.permissionTimeoutMs,
+    // Interactive runs wait for the user to approve (no auto-deny timeout); headless
+    // runs resolve immediately via headlessPolicy, so the timeout never applies there.
+    timeoutMs: opts.permissionTimeoutMs ?? (interactiveRun ? WAIT_FOR_REPLY : undefined),
     headlessPolicy: opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject'),
   });
   const { AgentCatalog } = require('./coding/agent-catalog');
@@ -1401,7 +2186,7 @@ async function runAgentLoop(prompt, opts = {}) {
       headless: opts.headless,
       benchmark: opts.benchmark,
       headlessPolicy: opts.headlessPolicy,
-      _resumeSessionId: taskId,
+      runSessionId: taskId,
       enableTaskTool: false,
       brain: opts.brain || null,
       mcpClient: capabilities.mcpClient,
@@ -1437,6 +2222,102 @@ async function runAgentLoop(prompt, opts = {}) {
   // Inspired by OpenCode Question service (packages/opencode/src/question/index.ts)
   const questionManager = opts.questionManager || new QuestionManager(events);
   const compactionService = createCodingCompactionService(llm, modelId, opts);
+  const { RuntimeEventWriter } = require('./coding/runtime-events');
+  const {
+    appendPromptManifest,
+    buildCodingPromptManifest,
+  } = require('./coding/prompt-section-registry');
+  const runtimeEvents = opts.runtimeEvents || new RuntimeEventWriter({
+    transcript,
+    events,
+    defaults: {
+      sessionId: sid,
+      agentSessionId: agentRunContext.agentSessionId || sid,
+      cwd: resolvedCwd,
+      provider: llm.type || '',
+      model: modelId,
+      actor: agentRunContext.agentKind || 'walle-coding',
+    },
+  });
+  const promptManifest = opts.promptManifest || buildCodingPromptManifest({
+    systemPrompt,
+    userTask: prompt,
+    provider: llm.type || '',
+    model: modelId,
+    runtimeMode: runtimeMode.id,
+    tools: requestedTools,
+    promptCapabilities,
+    metadata: {
+      sessionId: sid,
+      agentKind: agentRunContext.agentKind,
+      agentMode: agentRunContext.agentMode,
+      runtimeProfile: wallERuntimeProfile.profileId,
+      persistenceProfile: wallERuntimeProfile.persistenceProfile,
+      permissionProfile: wallERuntimeProfile.permissionProfile,
+      outputContract: wallERuntimeProfile.outputContract,
+      mode: opts.mode || '',
+    },
+  });
+  appendPromptManifest(transcript, promptManifest, {
+    sessionId: sid,
+    cwd: resolvedCwd,
+    chatSessionId: opts.chatSessionId || '',
+  });
+  runtimeEvents.emit({
+    type: 'prompt_built',
+    payload: {
+      promptManifestId: promptManifest.promptManifestId,
+      stableHash: promptManifest.stableHash,
+      dynamicHash: promptManifest.dynamicHash,
+      stableSectionCount: promptManifest.stableSectionCount,
+      dynamicSectionCount: promptManifest.dynamicSectionCount,
+      tokenEstimate: promptManifest.tokenEstimate,
+    },
+  });
+  const { LifecycleHookBus } = require('./coding/lifecycle-hooks');
+  const { ToolExecutionController } = require('./coding/tool-execution-controller');
+  const lifecycleHooks = opts.lifecycleHooks || new LifecycleHookBus({
+    events,
+    middleware: mw,
+    runtimeEvents,
+    defaults: {
+      sessionId: sid,
+      agentSessionId: agentRunContext.agentSessionId || sid,
+      cwd: resolvedCwd,
+      provider: llm.type || '',
+      model: modelId,
+      actor: agentRunContext.agentKind || 'walle-coding',
+    },
+  });
+  // User-defined lifecycle hooks (.walle/hooks.json). `opts.userHooks` may
+  // inject a prebuilt instance (tests) or `null` to disable.
+  const { createUserHooks } = require('./coding/user-hooks');
+  const userHooks = opts.userHooks !== undefined
+    ? opts.userHooks
+    : createUserHooks({ projectRoot: resolvedCwd, cwd: resolvedCwd });
+  if (userHooks) {
+    emitProgress({ phase: opts.mode || 'executing', step: -1, message: `User hooks active (${userHooks.hooks.length})` });
+    userHooks.runObserved('session_start', { sessionId: sid, cwd: resolvedCwd, mode: opts.mode || 'build' });
+  }
+  const toolExecutionController = opts.toolExecutionController || new ToolExecutionController({
+    toolRegistry,
+    middleware: mw,
+    permissionService,
+    questionManager,
+    events,
+    lifecycleHooks,
+    cwd: resolvedCwd,
+    projectRoot: resolvedCwd,
+    sessionId: sid,
+    provider: llm.type || '',
+    model: modelId,
+    mode: opts.mode || '',
+    runtimeMode: runtimeMode.id,
+    headless: Boolean(opts.headless),
+    benchmark: Boolean(opts.benchmark),
+    userHooks,
+  });
   // projectInfo already detected above (before system prompt)
   const llmCtxRef = { current: null }; // populated each turn (see llmCtx below)
@@ -1448,47 +2329,87 @@ async function runAgentLoop(prompt, opts = {}) {
   // fall back to the legacy whole-response loop.
   if (shouldUseStreamProcessor(opts)) {
     const { StreamProcessor } = require('./coding/stream-processor');
-    const { SnapshotService } = require('./coding/snapshot-service');
+    const { SnapshotService, BoundaryStore } = require('./coding/snapshot-service');
+    const streamToolExecutionController = new ToolExecutionController({
+      toolRegistry,
+      middleware: mw,
+      permissionService: null,
+      questionManager,
+      events,
+      lifecycleHooks,
+      cwd: resolvedCwd,
+      projectRoot: resolvedCwd,
+      sessionId: sid,
+      provider: llm.type || '',
+      model: modelId,
+      mode: opts.mode || '',
+      runtimeMode: runtimeMode.id,
+      headless: Boolean(opts.headless),
+      benchmark: Boolean(opts.benchmark),
+      handlePermissions: false,
+      userHooks,
+    });
     const processor = new StreamProcessor({
       provider: llm,
       model: modelId,
       transcript,
-      snapshotService: opts.snapshotService || new SnapshotService({ cwd: resolvedCwd }),
+      snapshotService: opts.snapshotService || new SnapshotService({
+        cwd: resolvedCwd,
+        // Whole-worktree step snapshots + restart-surviving boundaries for
+        // the rewind API. WALLE_WORKTREE_SNAPSHOTS=0 disables.
+        worktreeSnapshots: process.env.WALLE_WORKTREE_SNAPSHOTS !== '0' && !opts.benchmark,
+        boundaryStore: new BoundaryStore(),
+      }),
       permissionService,
       headless: Boolean(opts.headless || opts.benchmark),
       toolExecutor: async (call) => {
-        const input = { ...(call.input || {}) };
-        if (['read_file', 'write_file', 'edit_file'].includes(call.name)) {
-          if (input.file_path && !path.isAbsolute(input.file_path)) input.file_path = path.join(resolvedCwd, input.file_path);
-          if (!input.file_path && input.path) input.file_path = path.isAbsolute(input.path) ? input.path : path.join(resolvedCwd, input.path);
-        }
-        if (call.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
-          input.directory = path.join(resolvedCwd, input.directory);
-        }
-        if (call.name === 'run_shell' && !input.cwd) {
-          input.cwd = resolvedCwd;
-        }
-        input.sessionId = sid;
-        input.projectRoot = resolvedCwd;
-        const toolCtx = { sessionId: sid, cwd: resolvedCwd, model: modelId, provider: llm.type, runtimeMode: runtimeMode.id };
-        const finalInput = await mw.run('tool.before', toolCtx, call.name, input);
-        const result = await toolRegistry.execute(call.name, finalInput, toolCtx);
-        return mw.run('tool.after', toolCtx, call.name, finalInput, result);
+        const execution = await streamToolExecutionController.execute(call, {
+          sessionId: sid,
+          cwd: resolvedCwd,
+          projectRoot: resolvedCwd,
+          model: modelId,
+          provider: llm.type,
+          mode: opts.mode || '',
+          runtimeMode: runtimeMode.id,
+          interactive: opts.interactive,
+          onTodos: (todos) => { currentTodos = todos; },
+        });
+        return execution.result;
       },
     });
-    processor.on('event', (evt) => emitProgress({
-      phase: opts.mode || 'executing',
-      step: 0,
-      message: evt.type,
-      detail: evt,
-    }));
+    processor.on('event', (evt) => {
+      // Forward structured runtime events with their top-level `type` intact.
+      // CTM (server.js onEvent: event.type === 'lane_event'/'permission_resolved')
+      // and the browser (walle-session.js: switch(ev.type) → case 'permission_request')
+      // both dispatch on the top-level type, so wrapping these into
+      // {phase,step,message,detail} silently swallowed the live approval card and
+      // the "Needs You" wait state — the request then parked until the user
+      // reloaded (the durable restore card in walle-ctm-history.js still worked).
+      // Keep approval + lane events un-wrapped so a watching client surfaces the
+      // card and waiting state without a reload.
+      if (evt && [
+        'tool_call', 'tool_result', 'tool_done', 'skill_loaded', 'skill_load_failed',
+        'permission_request', 'permission_resolved', 'permission_denied', 'lane_event',
+      ].includes(evt.type)) {
+        emitProgress(evt);
+        return;
+      }
+      emitProgress({
+        phase: opts.mode || 'executing',
+        step: 0,
+        message: evt?.type || 'event',
+        detail: evt,
+      });
+    });
     let streamStatus = 'finished';
     let streamStopReason = '';
     let streamModel = modelId;
     const streamErrors = [];
+    let streamProviderError = null;
     let streamHadEdit = false;
     for (let turnIndex = opts._resumeTurn || 0; turnIndex < turns; turnIndex++) {
+      throwIfExternalAbort();
       const remaining = deadline - Date.now();
       if (remaining <= 0) {
         streamStatus = 'error';
@@ -1503,6 +2424,7 @@ async function runAgentLoop(prompt, opts = {}) {
       });
       const perTurnCap = opts.perTurnTimeoutMs || (/ollama|mlx/.test(llm.type || '') ? 600000 : 300000);
       const ac = new AbortController();
+      const unlinkExternalAbort = linkExternalAbort(ac);
       const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
       let turn;
       let toolsForTurn = [];
@@ -1518,7 +2440,7 @@ async function runAgentLoop(prompt, opts = {}) {
           });
         const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
           provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
-          toolsAvailable: toolsForTurn.length > 0, promptCapabilities };
+          toolsAvailable: toolsForTurn.length > 0, promptCapabilities, promptManifest };
         llmCtxRef.current = llmCtx;
         await mw.run('llm.before', llmCtx);
         await maybeCompactCodingContext({
@@ -1533,6 +2455,7 @@ async function runAgentLoop(prompt, opts = {}) {
           mode: opts.mode || 'executing',
           step: turnIndex,
           sessionMemory: opts.sessionMemory,
+          userHooks,
           reason: 'stream_pre_turn',
           opts,
         });
@@ -1549,9 +2472,11 @@ async function runAgentLoop(prompt, opts = {}) {
           thinking: llmCtx.params.thinking,
           reasoningEffort: llmCtx.params.reasoningEffort,
           options: llmCtx.params.options,
+          promptCache: true,
         });
       } finally {
         clearTimeout(timer);
+        unlinkExternalAbort();
       }
       totalInput += turn.usage?.input || 0;
@@ -1560,17 +2485,25 @@ async function runAgentLoop(prompt, opts = {}) {
       streamStopReason = turn.stopReason || streamStopReason;
       streamModel = turn.model || streamModel;
       if (turn.errors?.length) streamErrors.push(...turn.errors);
+      if (turn.providerError) streamProviderError = turn.providerError;
       if (turn.text) finalOutput += turn.text;
       const streamToolCalls = (turn.toolCalls || []).map(tc => ({ name: tc.name, input: tc.input }));
-      toolCallHistory.push(...streamToolCalls.map(tc => ({
-        name: tc.name,
-        inputHash: JSON.stringify(tc.input || {}).slice(0, 500),
-      })));
+      const streamToolResults = turn.toolResults || [];
+      toolCallHistory.push(...streamToolCalls.map((tc, index) => {
+        const resultRecord = streamToolResults[index] || {};
+        return normalizeToolCallEvidence(tc, resultRecord.result || resultRecord);
+      }));
       log.push({
         turn: turnIndex,
         model: turn.model || modelId,
         provider: turn.provider || llm.type,
         toolCalls: streamToolCalls,
+        toolResults: streamToolResults.map((record) => ({
+          name: record.name,
+          ok: normalizeToolCallEvidence(record, record.result || record).ok === true,
+          error: record.error || record.result?.error || null,
+          result: record.result || null,
+        })),
         content: turn.text,
         stopReason: turn.stopReason,
       });
@@ -1586,6 +2519,7 @@ async function runAgentLoop(prompt, opts = {}) {
           toolsAvailable: toolsForTurn.length > 0,
           nudges: log._noActionNudges || 0,
           cwd: resolvedCwd,
+          codingIntent,
         });
         if (continuation?.action === 'continue') {
           log._noActionNudges = (log._noActionNudges || 0) + 1;
@@ -1600,6 +2534,16 @@ async function runAgentLoop(prompt, opts = {}) {
           emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Action guard failed incomplete no-tool response', detail: { reason: continuation.reason } });
           break;
         }
+        const stopGate = await evaluateStopGate({
+          userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn: turnIndex, text: contentToText(turn.text),
+        });
+        if (stopGate) {
+          if (turn.assistantMessage) messages.push(turn.assistantMessage);
+          messages.push({ role: 'user', content: stopGate.message });
+          emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
+          continue;
+        }
+        if (contentToText(turn.text).trim()) finalAnswerDelivered = true;
       }
       if (turn.assistantMessage) messages.push(turn.assistantMessage);
       if (turn.toolResultMessage) messages.push(turn.toolResultMessage);
@@ -1627,10 +2571,15 @@ async function runAgentLoop(prompt, opts = {}) {
         sessionId: sid,
         cwd: resolvedCwd,
         partType: 'error',
-        data: { errors: streamErrors },
+        data: streamProviderError
+          // Classified provider failure: surface the friendly, actionable message
+          // (parity with the chat path) instead of a raw "fetch failed". Raw text
+          // is retained in `errors` for debugging.
+          ? { message: streamProviderError.userMessage, providerError: streamProviderError, errors: streamErrors }
+          : { errors: streamErrors },
       });
     }
-    if (finalOutput && transcript?.appendAssistantMessage) {
+    if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
       transcript.appendAssistantMessage(finalOutput, {
         sessionId: sid,
         cwd: resolvedCwd,
@@ -1642,11 +2591,15 @@ async function runAgentLoop(prompt, opts = {}) {
     }
     await shutdownPostEditMiddleware(postEditMiddleware);
-    const changedFiles = changedFilesSince(resolvedCwd, preRunDirtyFiles);
+    const changedFiles = changedFilesSince(resolvedCwd, preRunFileBaseline);
     return {
       success: streamStatus !== 'error',
       output: finalOutput,
-      stderr: streamErrors.join('\n'),
+      // Surface the classified, friendly provider message (e.g. "AI provider network
+      // error: …could not reach the provider endpoint…") to the caller/chat reply
+      // instead of a raw "fetch failed". The raw text stays in `errors` for the
+      // CLI-recoverability pattern match.
+      stderr: (streamProviderError && streamProviderError.userMessage) || streamErrors.join('\n'),
       sessionId: sid,
       exitCode: streamStatus === 'error' ? -1 : 0,
       log,
@@ -1656,6 +2609,7 @@ async function runAgentLoop(prompt, opts = {}) {
       next: 'stop',
       runtimeMode: runtimeMode.id,
       changedFiles,
+      finalAnswerDelivered,
     };
   }
@@ -1674,6 +2628,7 @@ async function runAgentLoop(prompt, opts = {}) {
   try {
     const startTurn = opts._resumeTurn || 0;
     for (let turn = startTurn; turn < turns; turn++) {
+      throwIfExternalAbort();
       const remaining = deadline - Date.now();
       if (remaining <= 0) {
         finalOutput += '\n[Timeout reached]';
@@ -1692,13 +2647,14 @@ async function runAgentLoop(prompt, opts = {}) {
       const isLocal = /ollama|mlx/.test(llm.type || '');
       const perTurnCap = opts.perTurnTimeoutMs || (isLocal ? 600000 : 300000);
       const ac = new AbortController();
+      const unlinkExternalAbort = linkExternalAbort(ac);
       const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
       // Middleware: prepare LLM call
       const turnsRemaining = turns - turn;
       const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
         provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
-        toolsAvailable: turnsRemaining > 1, promptCapabilities };
+        toolsAvailable: turnsRemaining > 1, promptCapabilities, promptManifest };
       llmCtxRef.current = llmCtx; // expose to event bridge (A2)
       await mw.run('llm.before', llmCtx);
       let adaptedTools = await toolRegistry.getDefinitions(llmCtx);
@@ -1715,6 +2671,7 @@ async function runAgentLoop(prompt, opts = {}) {
         mode: opts.mode || 'executing',
         step: turn,
         sessionMemory: opts.sessionMemory,
+        userHooks,
         reason: 'legacy_pre_turn',
         opts,
       });
@@ -1759,10 +2716,12 @@ async function runAgentLoop(prompt, opts = {}) {
           thinking: llmCtx.params.thinking,
           reasoningEffort: llmCtx.params.reasoningEffort,
           options: llmCtx.params.options,
+          promptCache: true,
           signal: ac.signal,
         });
       } finally {
         clearTimeout(timer);
+        unlinkExternalAbort();
       }
       response = recoverAllowedTextToolCalls(response, adaptedTools);
       if (response.textToolCallFormat) {
@@ -1794,13 +2753,18 @@ async function runAgentLoop(prompt, opts = {}) {
       if (response.usage) {
         const inputTokens = response.usage.input || 0;
         const outputTokens = response.usage.output || 0;
+        // Cache hits cost 0.1x input price, cache writes 1.25x (Anthropic).
+        const cacheRead = response.usage.cacheRead || 0;
+        const cacheWrite = response.usage.cacheWrite || 0;
+        const effectiveInput = Math.max(0, inputTokens - cacheRead - cacheWrite)
+          + cacheRead * 0.1 + cacheWrite * 1.25;
         // Cost estimate: rough pricing per 1M tokens
         const costPer1M = {
           input: modelId.includes('haiku') ? 0.25 : modelId.includes('sonnet') ? 3.0 : 15.0,
           output: modelId.includes('haiku') ? 1.25 : modelId.includes('sonnet') ? 15.0 : 75.0,
         };
-        const turnCost = (inputTokens * costPer1M.input + outputTokens * costPer1M.output) / 1_000_000;
-        turnCosts.push({ turn, inputTokens, outputTokens, cost: turnCost });
+        const turnCost = (effectiveInput * costPer1M.input + outputTokens * costPer1M.output) / 1_000_000;
+        turnCosts.push({ turn, inputTokens, outputTokens, cacheRead, cacheWrite, cost: turnCost });
         budgetUsed += turnCost;
         if (opts.budgetUsd && budgetUsed > opts.budgetUsd) {
           finalOutput += '\n[Budget exceeded]';
@@ -1833,6 +2797,7 @@ async function runAgentLoop(prompt, opts = {}) {
           toolsAvailable: adaptedTools.length > 0,
           nudges: log._noActionNudges || 0,
           cwd: resolvedCwd,
+          codingIntent,
         });
         if (continuation?.action === 'continue') {
           log._noActionNudges = (log._noActionNudges || 0) + 1;
@@ -1844,12 +2809,22 @@ async function runAgentLoop(prompt, opts = {}) {
         if (continuation?.action === 'fail') {
           throw new Error(continuation.reason);
         }
+        const stopGate = await evaluateStopGate({
+          userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn, text: contentToText(response.content),
+        });
+        if (stopGate) {
+          messages.push({ role: 'assistant', content: assistantHistoryContent(response) });
+          messages.push({ role: 'user', content: stopGate.message });
+          emitProgress({ phase: opts.mode || 'executing', step: turn, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
+          continue;
+        }
         emitProgress({
           phase: opts.mode || 'executing',
           step: turn,
           message: 'Agent finished',
         });
         finalOutput += (typeof response.content === 'string' ? response.content : '') || '';
+        if (contentToText(response.content).trim()) finalAnswerDelivered = true;
         break;
       }
@@ -1866,125 +2841,34 @@ async function runAgentLoop(prompt, opts = {}) {
           detail: { tool: tc.name, input: tc.input },
         });
-        let result;
-        try {
-          const input = { ...tc.input };
-          // Auto-correct missing file_path: resolve relative paths to cwd
-          if (['read_file', 'write_file', 'edit_file'].includes(tc.name)) {
-            if (input.file_path && !path.isAbsolute(input.file_path)) {
-              input.file_path = path.join(resolvedCwd, input.file_path);
-            } else if (!input.file_path && tc.name === 'read_file' && input.path) {
-              // Some models use 'path' instead of 'file_path'
-              input.file_path = path.isAbsolute(input.path) ? input.path : path.join(resolvedCwd, input.path);
-            }
-          }
-          // Auto-correct list_directory: resolve relative paths
-          if (tc.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
-            input.directory = path.join(resolvedCwd, input.directory);
-          }
-          // Path traversal guard: file tools must stay within cwd
-          if (['read_file', 'write_file', 'edit_file'].includes(tc.name) && input.file_path) {
-            if (!isWithinDirectory(input.file_path, resolvedCwd)) {
-              result = { error: `Path ${input.file_path} is outside allowed directory ${resolvedCwd}` };
-              turnHadError = true;
-              throw new Error('path_blocked'); // skip to result push
-            }
-          }
-          // Override directory for search tools
-          if (tc.name === 'glob' && !input.directory) input.directory = resolvedCwd;
-          if (tc.name === 'grep_files' && !input.directory) input.directory = resolvedCwd;
-          if (tc.name === 'run_shell') {
-            input.timeout_ms = input.timeout_ms || 30000;
-            input.cwd = input.cwd || resolvedCwd;
-          }
-          if (toolRequiresPermission(tc.name)) {
-            const permResult = await permissionService.authorize({
-              sessionId: sid,
-              tool: tc.name,
-              input,
-              cwd: input.cwd || resolvedCwd,
-              projectRoot: resolvedCwd,
-              mode: opts.mode,
-              headless: Boolean(opts.headless || opts.benchmark),
-              metadata: { toolCallId: tc.id || tc.toolCallId || '' },
-            });
-            if (permResult.decision !== 'allow') {
-              result = { error: `Permission denied: ${permResult.reason || permResult.message || permResult.decision}` };
-              turnHadError = true;
-              throw new Error('path_blocked');
-            }
-          }
-          // Middleware: before tool
-          const modifiedInput = await mw.run('tool.before', llmCtx, tc.name, input);
-          const finalInput = (modifiedInput && typeof modifiedInput === 'object') ? modifiedInput : input;
-          if (['read_file', 'write_file', 'edit_file', 'apply_patch', 'multi_edit', 'glob', 'grep_files', 'list_directory'].includes(tc.name)) {
-            finalInput.sessionId = sid;
-            finalInput.projectRoot = resolvedCwd;
-          }
-          // In-flight todo tracking (6m)
-          if (tc.name === 'update_todos') {
-            currentTodos = finalInput.todos || [];
-            result = { ok: true, todos: currentTodos };
-          } else if (tc.name === 'ask_user') {
-            // In headless/benchmark mode, auto-dismiss ask_user to avoid blocking
-            if (opts.mode === 'build' && !opts.interactive) {
-              result = { dismissed: true, message: 'Running in non-interactive mode. Please proceed with your best judgment based on the code you have read.' };
-            } else {
-              // Interactive question (B1) — ask the user and wait for answer
-              try {
-                const answer = await questionManager.ask(sid, {
-                  question: finalInput.question,
-                  header: finalInput.header,
-                  options: finalInput.options,
-                  multiple: finalInput.multiple,
-                });
-                result = answer ? { answers: answer } : { dismissed: true, message: 'Question timed out or was dismissed' };
-              } catch (e) {
-                result = { error: `Question failed: ${e.message}` };
-              }
-            }
-          } else {
-            result = await toolRegistry.execute(tc.name, finalInput, {
-              sessionId: sid,
-              cwd: resolvedCwd,
-              model: modelId,
-              provider: llm.type,
-              llmCtx,
-            });
-          }
-          // Middleware: after tool
-          result = await mw.run('tool.after', llmCtx, tc.name, finalInput, result) || result;
+        const execution = await toolExecutionController.execute(tc, {
+          sessionId: sid,
+          cwd: resolvedCwd,
+          projectRoot: resolvedCwd,
+          model: modelId,
+          provider: llm.type,
+          mode: opts.mode || '',
+          runtimeMode: runtimeMode.id,
+          llmCtx,
+          interactive: opts.interactive,
+          onTodos: (todos) => { currentTodos = todos; },
+        });
+        const result = execution.result;
+        const evidenceInput = execution.evidenceInput || tc.input || {};
+        if (!execution.ok) turnHadError = true;
-          // ── Event bus emissions (A1) ──
-          // Fire events so middleware and subscribers can react to tool outcomes.
-          if (['edit_file', 'write_file', 'multi_edit'].includes(tc.name) && result && !result.error) {
-            events.emit('file.edited', { filePath: finalInput.file_path, sessionId: sid });
-          }
-          if (tc.name === 'apply_patch' && result && !result.error) {
-            const patchFiles = [
-              ...(result.added || []),
-              ...(result.modified || []),
-            ];
-            for (const filePath of patchFiles) {
-              events.emit('file.edited', { filePath, sessionId: sid });
-            }
-          }
-          if (tc.name === 'read_file' && result && !result.error) {
-            events.emit('file.read', { filePath: finalInput.file_path, sessionId: sid });
-          }
-        } catch (err) {
-          if (err.message !== 'path_blocked') {
-            result = { error: err.message };
-          }
-          turnHadError = true;
+        const typedArtifacts = storeTypedArtifactsForTranscript(result, {
+          sessionId: sid,
+          cwd: resolvedCwd,
+          toolCallId: tc.id || '',
+          toolName: tc.name,
+          transcript,
+        });
+        if (typedArtifacts.length && log[log.length - 1]) {
+          log[log.length - 1].artifacts = [
+            ...(log[log.length - 1].artifacts || []),
+            ...typedArtifacts,
+          ];
         }
         const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
@@ -2003,11 +2887,18 @@ async function runAgentLoop(prompt, opts = {}) {
         toolResults.push({ type: 'tool_result', tool_use_id: tc.id, content: capped });
         log[log.length - 1].toolResults = log[log.length - 1].toolResults || [];
-        log[log.length - 1].toolResults.push({ name: tc.name, resultLength: resultStr.length, error: turnHadError });
+        const evidence = normalizeToolCallEvidence({ name: tc.name, input: evidenceInput }, result);
+        log[log.length - 1].toolResults.push({
+          name: tc.name,
+          resultLength: resultStr.length,
+          ok: evidence.ok === true,
+          error: result?.error || null,
+          exitCode: result?.exitCode,
+          result,
+        });
         // Doom loop detection (6a) -- track tool calls for identical pattern
-        const inputHash = JSON.stringify(tc.input);
-        toolCallHistory.push({ name: tc.name, inputHash });
+        toolCallHistory.push(evidence);
         if (toolCallHistory.length >= DOOM_LOOP_THRESHOLD) {
           const recent = toolCallHistory.slice(-DOOM_LOOP_THRESHOLD);
@@ -2066,7 +2957,20 @@ async function runAgentLoop(prompt, opts = {}) {
       if (response.stopReason === 'end_turn' || response.stopReason === 'max_tokens') break;
     }
   } catch (err) {
-    emitProgress({ phase: 'error', step: -1, message: err.message });
+    // Classify provider/LLM failures into a clear, human message (naming the model) so the
+    // coding agent surfaces e.g. "AI provider network error: … could not reach the provider
+    // endpoint…" instead of a raw "fetch failed" — mirrors the stream path
+    // (stream-processor.js), which already decorates. Raw err.message is kept below for the
+    // CLI-recoverability pattern match (which keys on the actual error text).
+    let friendlyError = (err && err.message) || 'Coding session failed';
+    try {
+      const { decorateProviderError } = require('./llm/provider-error');
+      const decorated = decorateProviderError(err, { provider: llm?.type || '', model: modelId || model || '' });
+      if (decorated && decorated.providerError && decorated.providerError.userMessage) {
+        friendlyError = decorated.providerError.userMessage;
+      }
+    } catch {}
+    emitProgress({ phase: 'error', step: -1, message: friendlyError });
     // Persist activity error (Phase 2: Activity History)
     try { getActivityLog().log({ session_id: sid, type: 'coding_error', title: 'Coding session failed', body: err.message, detail: JSON.stringify({ turns: log.length }) }); } catch {}
@@ -2078,10 +2982,10 @@ async function runAgentLoop(prompt, opts = {}) {
         sessionId: sid,
         cwd: resolvedCwd,
         partType: 'error',
-        data: { message: err.message },
+        data: { message: friendlyError },
       });
     }
-    if (finalOutput && transcript?.appendAssistantMessage) {
+    if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
       transcript.appendAssistantMessage(finalOutput, {
         sessionId: sid,
         cwd: resolvedCwd,
@@ -2093,9 +2997,10 @@ async function runAgentLoop(prompt, opts = {}) {
     }
     // Graceful cleanup (6r)
+    try { require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid }); } catch {}
     const cleanup = {
       lastCompletedTurn: log.length - 1,
-      error: err.message,
+      error: friendlyError,
       todosAtAbort: currentTodos,
     };
@@ -2141,7 +3046,7 @@ async function runAgentLoop(prompt, opts = {}) {
     return {
       success: false,
       output: finalOutput,
-      stderr: err.message,
+      stderr: friendlyError,
       sessionId: sid,
       exitCode: -1,
       log,
@@ -2149,10 +3054,11 @@ async function runAgentLoop(prompt, opts = {}) {
       provider: llm?.type,
       model: modelId,
       runtimeMode: runtimeMode.id,
+      finalAnswerDelivered,
       turnCosts,
       budgetUsed,
       cleanup,
-      changedFiles: changedFilesSince(resolvedCwd, preRunDirtyFiles),
+      changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
     };
   }
@@ -2168,6 +3074,20 @@ async function runAgentLoop(prompt, opts = {}) {
   if (questionManager) questionManager.clear();
   try { require('./tools/file-tracker').clearSession(sid); } catch {}
+  // Stop session-scoped background processes (dev servers, watchers).
+  // Persistent ones are reported so the summary can mention them.
+  try {
+    const bg = require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid });
+    if (bg.stopped.length || bg.persisted.length) {
+      emitProgress({
+        phase: 'done',
+        step: -1,
+        message: `Background processes: stopped ${bg.stopped.length}, left running ${bg.persisted.length}`,
+        detail: bg,
+      });
+    }
+  } catch {}
   emitProgress({ phase: 'done', step: -1, message: 'Agent loop finished' });
   // Delete checkpoint on successful completion (no longer needed)
@@ -2175,7 +3095,7 @@ async function runAgentLoop(prompt, opts = {}) {
   // Persist activity completion (Phase 2: Activity History)
   try { getActivityLog().log({ session_id: sid, type: 'coding_complete', title: 'Coding session completed', body: finalOutput.slice(0, 500), detail: JSON.stringify({ turns: log.length, tokens: totalInput + totalOutput }) }); } catch {}
-  if (finalOutput && transcript?.appendAssistantMessage) {
+  if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
     transcript.appendAssistantMessage(finalOutput, {
       sessionId: sid,
       cwd: resolvedCwd,
@@ -2208,7 +3128,8 @@ async function runAgentLoop(prompt, opts = {}) {
     turnCosts,
     budgetUsed,
     screenshots: screenshotsTaken,
-    changedFiles: changedFilesSince(resolvedCwd, preRunDirtyFiles),
+    changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
+    finalAnswerDelivered,
   };
 }
@@ -2542,6 +3463,63 @@ function getGitChangedFiles(cwd) {
   }
 }
+// Directories we never descend into when scanning a non-git working tree for changes.
+// Dependency/build/VCS dirs would balloon the walk and never represent the agent's edits.
+const _CHANGED_SCAN_SKIP_DIRS = new Set([
+  '.git', '.hg', '.svn', 'node_modules', 'bower_components', '.next', '.nuxt',
+  'dist', 'build', 'out', 'target', 'vendor', '.venv', 'venv', '__pycache__',
+  '.cache', '.turbo', '.gradle', '.idea', '.vscode', 'coverage', '.parcel-cache',
+]);
+// Captures a baseline for changedFilesSince() that works in BOTH git and non-git cwds.
+// Git: the set of already-dirty paths, so the agent's own edits can be isolated from
+// pre-existing uncommitted changes. Non-git: a wall-clock marker captured before the
+// agent runs, so files it creates/modifies can be detected by mtime afterward.
+function captureChangedFilesBaseline(cwd) {
+  if (isGitRepository(cwd)) {
+    return { isGit: true, dirty: getGitChangedFiles(cwd) };
+  }
+  return { isGit: false, startedAtMs: Date.now() };
+}
+// Walks a non-git working tree and returns relative paths of files created or modified
+// at/after `sinceMs`. Bounded (skip-dirs + entry/result caps) so a huge tree — e.g. a
+// folder holding a multi-hundred-MB archive — can't make change detection runaway.
+function collectFilesModifiedSince(cwd, sinceMs, { maxEntries = 60000, maxResults = 5000 } = {}) {
+  const results = [];
+  let visited = 0;
+  const stack = [''];
+  while (stack.length) {
+    const relDir = stack.pop();
+    let entries;
+    try {
+      entries = fs.readdirSync(path.join(cwd, relDir), { withFileTypes: true });
+    } catch {
+      continue;
+    }
+    for (const ent of entries) {
+      if (visited++ >= maxEntries || results.length >= maxResults) return results;
+      const rel = relDir ? `${relDir}/${ent.name}` : ent.name;
+      if (ent.isDirectory()) {
+        if (_CHANGED_SCAN_SKIP_DIRS.has(ent.name)) continue;
+        stack.push(rel);
+      } else if (ent.isFile()) {
+        if (ent.name === '.DS_Store') continue;
+        let st;
+        try {
+          st = fs.statSync(path.join(cwd, rel));
+        } catch {
+          continue;
+        }
+        if (st.mtimeMs >= sinceMs && _isPathSafeRelative(rel)) results.push(rel);
+      }
+      // Symlinks (and other non-file/non-dir entries) are intentionally skipped to avoid
+      // following them out of the working tree or into cycles.
+    }
+  }
+  return results;
+}
 function getGitTrackedAndUntrackedFiles(cwd) {
   try {
     const stdout = execFileSync('git', ['ls-files', '-z', '--cached', '--others', '--exclude-standard'], {
@@ -2747,20 +3725,34 @@ async function plan(request, cwd, options = {}) {
     if (!result.success) {
       parseErr.message = `Planning failed before producing valid JSON (${result.stderr || 'provider error'}): ${parseErr.message}`;
     }
-    if (process.env.WALLE_PLAN_DEBUG) {
-      const dumpPath = path.join(
-        process.env.WALL_E_DATA_DIR || '/tmp',
-        `planner-debug-${Date.now()}.txt`,
-      );
-      try {
-        fs.writeFileSync(
-          dumpPath,
-          `=== prompt ===\n${prompt}\n\n=== output ===\n${result.output || ''}\n\n=== outputRaw ===\n${result.outputRaw || ''}\n`,
+    if (shouldRecoverPlannerParseFailure({ request, output: result.output, cwd })) {
+      if (onProgress) {
+        onProgress({
+          type: 'planning_recovery',
+          phase: 'planning',
+          step: -1,
+          message: 'Planner returned unstructured output; recovering with a direct implementation subtask.',
+          detail: { reason: parseErr.message },
+        });
+      }
+      planObj = buildPlannerRecoveryPlan(request, context, parseErr, result.output);
+      config._planningRecovery = planObj.planning_recovery;
+    } else {
+      if (process.env.WALLE_PLAN_DEBUG) {
+        const dumpPath = path.join(
+          process.env.WALL_E_DATA_DIR || '/tmp',
+          `planner-debug-${Date.now()}.txt`,
         );
-        parseErr.message += ` (planner debug dumped to ${dumpPath})`;
-      } catch {}
+        try {
+          fs.writeFileSync(
+            dumpPath,
+            `=== prompt ===\n${prompt}\n\n=== output ===\n${result.output || ''}\n\n=== outputRaw ===\n${result.outputRaw || ''}\n`,
+          );
+          parseErr.message += ` (planner debug dumped to ${dumpPath})`;
+        } catch {}
+      }
+      throw parseErr;
     }
-    throw parseErr;
   }
   // Enforce max_subtasks
@@ -2919,6 +3911,13 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
       const attemptChangedFiles = changedFilesSince(cwd, attemptStartDirtyFiles);
       const reportedChangedFiles = Array.isArray(result.changedFiles) ? result.changedFiles : [];
       const changedFilesForValidation = [...new Set([...reportedChangedFiles, ...attemptChangedFiles])];
+      const acceptanceContract = buildAcceptanceContract({
+        request: subtask.prompt || subtask.title || '',
+        subtask,
+        changedFiles: changedFilesForValidation,
+        frontend: changedFilesTouchFrontend(changedFilesForValidation),
+        requiresFileChanges,
+      });
       if (isTimeoutOnlyOutput(result.output)) {
         lastError = `Subtask timed out before producing a usable result${result.stderr ? `: ${result.stderr}` : ''}`;
@@ -2960,6 +3959,24 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
         continue;
       }
+      const acceptanceReport = await runAcceptanceValidators({
+        cwd,
+        contract: acceptanceContract,
+        changedFiles: changedFilesForValidation,
+        screenshots: result.screenshots || [],
+        toolCallHistory: toolCallHistoryFromLog(result.log),
+        autoBrowser: false,
+        requireBrowserRuntime: false,
+        onProgress,
+        step: i,
+      });
+      if (!acceptanceReport.ok) {
+        lastError = `Acceptance validation failed: ${summarizeValidatorFailures(acceptanceReport).join('; ')}`;
+        if (onProgress) onProgress({ type: 'retry', index: i, retry, error: lastError, acceptance: acceptanceReport });
+        if (retry < config.max_retries) restoreSubtaskSnapshot(snapshot, cwd, baselineUntracked);
+        continue;
+      }
       // Run tests if configured
       let testsOk = true;
       if (config.test_command) {
@@ -3080,6 +4097,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
         failed_subtask: i,
         files_changed: [...agentChangedFiles],
         pre_existing_dirty_files: [...preExistingDirtyFiles],
+        screenshots: allScreenshots,
       };
     }
@@ -3095,6 +4113,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
     state_path: statePath,
     files_changed: [...agentChangedFiles],
     pre_existing_dirty_files: [...preExistingDirtyFiles],
+    screenshots: allScreenshots,
   };
 }
@@ -3104,6 +4123,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
 async function complete(request, planData, executeResult, { cwd, brain, onProgress } = {}) {
   const { plan: planObj, config } = planData;
   const agentFiles = (executeResult.files_changed || []).filter(_isPathSafeRelative);
+  const screenshots = Array.isArray(executeResult.screenshots) ? executeResult.screenshots : [];
   const report = {
     success: executeResult.success,
     branch: planObj.branch_name,
@@ -3124,10 +4144,42 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
     return report;
   }
+  let diff = '';
+  if (agentFiles.length > 0) {
+    diff = await getGitDiffForFiles(cwd, agentFiles);
+  }
+  if (diff && codingReview.diffTouchesFrontend(diff)) {
+    const acceptanceContract = buildAcceptanceContract({
+      request,
+      subtask: { title: 'Final frontend acceptance', prompt: request },
+      changedFiles: agentFiles,
+      frontend: true,
+      requiresFileChanges: config.require_changes !== false,
+    });
+    const acceptanceReport = await runAcceptanceValidators({
+      cwd,
+      contract: acceptanceContract,
+      changedFiles: agentFiles,
+      screenshots,
+      toolCallHistory: [],
+      autoBrowser: config.browser_smoke !== false && config.browserSmoke !== false,
+      requireBrowserRuntime: config.browser_smoke !== false && config.browserSmoke !== false,
+      onProgress,
+      step: -1,
+    });
+    report.frontendVerification = acceptanceReport;
+    if (!acceptanceReport.ok) {
+      report.success = false;
+      report.error = summarizeValidatorFailures(acceptanceReport)[0] || 'Frontend acceptance validation failed';
+      report.concerns.push(...acceptanceReport.concerns.slice(0, 10));
+      return report;
+    }
+  }
   // Final review
   if (config.review) {
     if (onProgress) onProgress({ phase: 'reviewing', step: -1, message: 'Final review...' });
-    const diff = await getGitDiffForFiles(cwd, agentFiles);
     if (diff) {
       const verdict = config.review_quorum
         ? await codingQuorum.runCodingQuorum({
@@ -3155,8 +4207,8 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
         return report;
       }
-      if (codingReview.diffTouchesFrontend(diff) && allScreenshots.length > 0) {
-        const visualVerdict = await codingReview.reviewVisual(request, allScreenshots, diff, {
+      if (codingReview.diffTouchesFrontend(diff) && screenshots.length > 0) {
+        const visualVerdict = await codingReview.reviewVisual(request, screenshots, diff, {
           cwd,
           reviewer: config.reviewer,
           reviewers: config.reviewers,
@@ -3208,7 +4260,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
         if (diffErr.code !== 1) throw diffErr;
       }
       const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
-      const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Codex <noreply@openai.com>`;
+      const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
       const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
       // Extract commit hash
       const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);
@@ -3285,6 +4337,56 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
   return report;
 }
+function storeTypedArtifactsForTranscript(result, { sessionId, cwd, toolCallId, toolName, transcript } = {}) {
+  if (!result || typeof result !== 'object' || !transcript?.appendArtifact) return [];
+  let descriptors = [];
+  try {
+    const { extractTypedArtifactDescriptors } = require('./coding/stream-processor');
+    descriptors = extractTypedArtifactDescriptors(result, { id: toolCallId, name: toolName });
+  } catch {
+    descriptors = [];
+  }
+  if (!descriptors.length) return [];
+  let artifactStore = null;
+  try {
+    const { ArtifactStore } = require('./coding/artifact-store');
+    artifactStore = new ArtifactStore();
+  } catch {}
+  const stored = [];
+  const seen = new Set();
+  for (const descriptor of descriptors) {
+    try {
+      const artifact = artifactStore?.storeArtifact
+        ? artifactStore.storeArtifact({
+          sessionId,
+          toolCallId,
+          toolName,
+          kind: descriptor.kind,
+          sourcePath: descriptor.path || descriptor.sourcePath,
+          content: descriptor.content,
+          mimeType: descriptor.mimeType,
+          bytes: descriptor.bytes,
+          sha256: descriptor.sha256,
+          metadata: descriptor.metadata || {},
+        })
+        : descriptor;
+      const key = artifact.artifactId || `${artifact.kind}:${artifact.path}`;
+      if (seen.has(key)) continue;
+      seen.add(key);
+      transcript.appendArtifact({
+        sessionId,
+        cwd,
+        type: artifact.kind || 'artifact',
+        toolCallId,
+        name: toolName,
+        artifact,
+      });
+      stored.push(artifact);
+    } catch {}
+  }
+  return stored;
+}
 /**
  * Resume a coding session from a saved checkpoint.
  * Loads checkpoint data from brain DB, reconstructs messages, and re-enters runAgentLoop.
@@ -3335,13 +4437,20 @@ module.exports = {
   readCheckpoint,
   formatReport,
   isTimeoutOnlyOutput,
+  resolveRunTimeoutMs,
+  isInteractiveRun,
   isActionRequiredPrompt,
   isPrematureActionResponse,
   getNoActionContinuation,
+  evaluateStopGate,
+  MAX_STOP_HOOK_BOUNCES,
   hasVerificationEvidence,
   subtaskRequiresFileChanges,
   screenshotTrackerHook,
   collectEmptyChangedFiles,
+  changedFilesSince,
+  captureChangedFilesBaseline,
+  collectFilesModifiedSince,
   CODING_TOOLS,
   READ_ONLY_TOOLS,
   BUILD_TOOLS,