npm - codemini-cli - Versions diffs - 0.3.4 → 0.3.6 - Mend

codemini-cli 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +20 -18
package/package.json +6 -6
package/souls/anime.md +12 -9
package/souls/caveman.md +6 -6
package/souls/ceo.md +10 -9
package/souls/default.md +1 -1
package/souls/pirate.md +6 -6
package/souls/playful.md +7 -7
package/souls/professional.md +1 -1
package/src/cli.js +3 -1
package/src/commands/run.js +229 -16
package/src/core/agent-loop.js +167 -49
package/src/core/ast.js +40 -0
package/src/core/chat-runtime.js +720 -126
package/src/core/command-policy.js +56 -0
package/src/core/config-store.js +0 -3
package/src/core/crypto-utils.js +6 -2
package/src/core/memory-store.js +3 -3
package/src/core/project-index.js +4 -18
package/src/core/provider/anthropic.js +15 -2
package/src/core/provider/anthropic.sdk-backup.js +439 -0
package/src/core/provider/openai-compatible.js +93 -11
package/src/core/provider/openai-compatible.sdk-backup.js +412 -0
package/src/core/session-store.js +90 -25
package/src/core/shell-profile.js +26 -6
package/src/core/string-utils.js +37 -0
package/src/core/tools.js +216 -405
package/src/tui/chat-app.js +490 -146
package/src/tui/tool-activity/presenters/files.js +2 -2
package/src/tui/tool-narration.js +0 -3
package/src/tui/tool-narration/presenters/patch.js +0 -3

package/src/core/chat-runtime.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { parseInput } from './input-parser.js';
 import { loadCommandsAndSkills, renderCommandPrompt } from './command-loader.js';
 import { runAgentLoop, setResultDir, clearResultStore } from './agent-loop.js';
+import { trimInline, normalizePath } from './string-utils.js';
 import fs from 'node:fs/promises';
 import path from 'node:path';
 import {
@@ -27,6 +28,8 @@ import { buildMemorySnapshot } from './memory-prompt.js';
 import { forgetMemory, listMemories, searchMemories } from './memory-store.js';
 import { countActiveTodos, normalizeTodos } from './todo-state.js';
+const STREAM_SAVE_DEBOUNCE_MS = 120;
 function toOpenAIMessages(sessionMessages) {
   const mapped = [];
   for (const msg of sessionMessages) {
@@ -41,6 +44,8 @@ function toOpenAIMessages(sessionMessages) {
     mapped.push({
       role: msg.role,
       content: msg.content,
+      ...(typeof msg.reasoning_content === 'string' && msg.reasoning_content ? { reasoning_content: msg.reasoning_content } : {}),
+      ...(Array.isArray(msg.reasoning_details) && msg.reasoning_details.length > 0 ? { reasoning_details: msg.reasoning_details } : {}),
       ...(msg.tool_calls ? { tool_calls: msg.tool_calls } : {})
     });
   }
@@ -145,7 +150,8 @@ function getCompletionCopy(language = 'zh') {
         memory: '查看/搜索/删除持久记忆',
         history: '查看/恢复会话',
         debug: '运行时调试开关',
-        retry: '重试上一条用户请求'
+        retry: '重试上一条用户请求',
+        stop: '中止当前回答'
       },
       generic: {
         configCommand: '配置命令',
@@ -160,6 +166,7 @@ function getCompletionCopy(language = 'zh') {
         keyboardDebugCommand: '键盘调试命令',
         compactCommand: '上下文压缩命令',
         retryCommand: '重试上一条用户请求',
+        stopCommand: '中止当前回答',
         statusCommand: '查看运行状态',
         resumeSession: '恢复一个已保存的会话'
       }
@@ -232,7 +239,8 @@ function getCompletionCopy(language = 'zh') {
         memory: 'list/search/delete persistent memories',
         history: 'list/resume sessions',
         debug: 'runtime debug switches',
-        retry: 'retry the last user request'
+        retry: 'retry the last user request',
+        stop: 'stop the current response'
       },
       generic: {
         configCommand: 'config command',
@@ -247,6 +255,7 @@ function getCompletionCopy(language = 'zh') {
         keyboardDebugCommand: 'keyboard debug command',
         compactCommand: 'context compaction command',
         retryCommand: 'retry the last user request',
+        stopCommand: 'stop the current response',
         statusCommand: 'show runtime status',
         resumeSession: 'resume a saved session'
       }
@@ -261,62 +270,135 @@ function describeConfigKey(key, mode = 'set', language = 'zh') {
   return mode === 'get' ? copy.describeGet(label, hint) : copy.describeSet(label, hint);
 }
-const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester'];
+const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester', 'summarizer'];
+const ROLE_TOOL_POLICY = {
+  planner: ['read', 'grep', 'list', 'query_project_index', 'tool_search', 'glob', 'ast_query', 'read_ast_node'],
+  coder: ['read', 'grep', 'list', 'edit', 'write', 'run', 'ast_query', 'read_ast_node', 'glob', 'tool_search', 'update_todos'],
+  reviewer: ['read', 'grep', 'list', 'glob', 'tool_search', 'ast_query', 'read_ast_node'],
+  tester: ['read', 'grep', 'list', 'run', 'glob', 'tool_search'],
+  summarizer: ['read', 'grep', 'list', 'glob', 'tool_search']
+};
 const SUB_AGENT_CONTEXT_MAX_MESSAGES = 4;
 const SUB_AGENT_CONTEXT_MAX_CHARS = 1200;
 const SUB_AGENT_EVIDENCE_MAX_ITEMS = 3;
 const SUB_AGENT_HANDOFF_MAX_ITEMS = 6;
+const PLAN_MEMORY_MARKERS = {
+  findings: ['<!-- plan-findings-start -->', '<!-- plan-findings-end -->'],
+  progress: ['<!-- plan-progress-start -->', '<!-- plan-progress-end -->']
+};
 export function getSubAgentRolePrompt(role) {
   if (role === 'planner') {
-    return 'You are a planning sub-agent. Produce a concrete implementation plan with risks and verification.';
+    return [
+      'You are the planner in a multi-step agent pipeline.',
+      'Your job: inspect the codebase and produce a concrete, actionable plan.',
+      'Do not write implementation code.',
+      'Output format — keep it short and direct:',
+      'Findings:',
+      '- <important constraint, dependency, risk, or "none">',
+      'Actions Taken:',
+      '- <what you inspected>',
+      'Open Issues:',
+      '- <blocking uncertainty or "none">',
+      'Next Action:',
+      '- <the concrete next step for the following role>',
+      'Do not summarize your own work or add closing remarks — just deliver the structured handoff and stop.',
+      'IMPORTANT: Stop as soon as you have enough context to produce the plan. Do NOT keep exploring once the plan is clear — deliver it immediately.'
+    ].join('\n');
   }
   if (role === 'reviewer') {
     return [
-      'You are a review sub-agent. Focus on bugs, regressions, edge cases, and missing tests.',
-      'Start with the focused files or directories handed to you. Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
-      'Use this exact output structure:',
-      'Acceptance Status:',
-      '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
+      'You are the reviewer in a multi-step agent pipeline.',
+      'Focus on bugs, regressions, edge cases, and missing tests in the files handed to you.',
+      'Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
+      'Output format — keep it short and direct:',
       'Findings:',
       '- <bug, regression, risk, or "none">',
       'Verified:',
       '- <what you checked>',
       'Not Verified:',
       '- <what remains uncertain>',
-      'Next Action:',
-      '- <single best next step>'
+      'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
     ].join('\n');
   }
   if (role === 'tester') {
     return [
-      'You are a testing sub-agent. Focus on verification strategy, real test execution evidence, missing coverage, and whether the work was actually validated.',
-      'Prefer running concrete verification commands over only suggesting them.',
-      'Start with the focused files or directories handed to you. Verify those artifacts first before scanning the wider repo.',
-      'Use this exact output structure:',
-      'Acceptance Status:',
-      '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
+      'You are the tester in a multi-step agent pipeline.',
+      'Run concrete verification commands. Prefer real execution over suggestions.',
+      'Verify the handed-off files first before scanning wider.',
+      'Output format — keep it short and direct:',
       'Verified:',
       '- <commands run and evidence>',
       'Not Verified:',
       '- <what could not be validated>',
       'Failures:',
       '- <failed command or "none">',
-      'Next Action:',
-      '- <single best next step>'
+      'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
+    ].join('\n');
+  }
+  if (role === 'summarizer') {
+    return [
+      'You are the summarizer in a multi-step agent pipeline.',
+      'Your job is to synthesize the results of all prior steps into a concise, actionable final summary.',
+      'Do NOT re-analyze the codebase or make new tool calls unless the handed-off evidence is clearly insufficient.',
+      'Instead, read the accumulated step results in the plan file context provided to you.',
+      'Output format — keep it short and direct:',
+      'Summary:',
+      '- <overall result in 2-4 sentences>',
+      'Key Findings:',
+      '- <most important findings from all steps>',
+      'Actions Taken:',
+      '- <what was implemented/changed/verified>',
+      'Remaining Issues:',
+      '- <unresolved items or "none">',
+      'Recommended Next Steps:',
+      '- <concrete follow-up actions if any>',
+      'Do not add greetings, filler, or restate the goal. Deliver the summary and stop.'
     ].join('\n');
   }
   return [
-    'You are an execution sub-agent. Produce practical implementation guidance with code-level detail.',
-    'Stop when: you have produced the code change and verified it compiles/passes basic checks.',
-    'If blocked: report what blocked you and what you tried, then stop.'
+    'You are the coder in a multi-step agent pipeline.',
+    'Produce practical code changes with minimal explanation.',
+    'Output format — keep it short and direct:',
+    'Actions Taken:',
+    '- <file changes, commands, or "none">',
+    'Findings:',
+    '- <important implementation note, regression risk, or "none">',
+    'Verified:',
+    '- <test/check evidence or "none">',
+    'Open Issues:',
+    '- <remaining gap or "none">',
+    'Artifacts:',
+    '- <changed file path or "none">',
+    'Next Action:',
+    '- <the best next step for the following role or "none">',
+    'Do not summarize the goal, recap the plan, or add closing remarks.'
   ].join('\n');
 }
-function trimInlineText(value, maxLen = 220) {
-  const text = String(value || '').replace(/\s+/g, ' ').trim();
-  if (!text) return '';
-  if (text.length <= maxLen) return text;
-  return `${text.slice(0, maxLen - 3)}...`;
+function buildPipelineStepGuidance({ role, stepIndex, totalSteps, isFirst, isLast, priorSteps }) {
+  const lines = [];
+  lines.push(`Pipeline position: step ${stepIndex + 1} of ${totalSteps}.`);
+  if (isFirst) {
+    lines.push('You are the first step. Your output sets direction for the rest of the pipeline.');
+  } else if (isLast) {
+    lines.push('You are the final step. After you, the pipeline will present a combined result to the user.');
+  } else {
+    lines.push('You are in the middle of the pipeline. Your output feeds into the next step.');
+  }
+  if (priorSteps.length > 0) {
+    const prev = priorSteps[priorSteps.length - 1];
+    lines.push(`Previous step was [${prev.role}]: ${prev.title}. Use its output as your starting point.`);
+  }
+  lines.push('Style rules:');
+  lines.push('- Be direct and action-oriented. No greetings, no summaries, no "In conclusion" or "To summarize".');
+  lines.push('- Treat the Findings Ledger and Progress Ledger in the plan file context as the shared working memory for this pipeline.');
+  lines.push('- If you discover something new, record it under the requested headings instead of burying it in prose.');
+  lines.push('- Continue the established direction unless you have concrete contradictory evidence.');
+  lines.push('- Output only what the next step needs to know. Skip obvious observations.');
+  if (isLast) {
+    lines.push('- Since you are the final step, give a concise overall verdict the user can act on.');
+  }
+  return lines.join('\n');
 }
 function buildSubAgentContextPacket(session) {
@@ -330,7 +412,7 @@ function buildSubAgentContextPacket(session) {
   let usedChars = 0;
   for (const msg of recent) {
     const role = msg.role === 'assistant' ? 'assistant' : 'user';
-    const text = trimInlineText(msg.content, 260);
+    const text = trimInline(msg.content, 260);
     if (!text) continue;
     const line = `- ${role}: ${text}`;
     if (usedChars + line.length > SUB_AGENT_CONTEXT_MAX_CHARS) break;
@@ -346,8 +428,8 @@ function buildSubAgentContextPacket(session) {
 }
 function maybePushEvidence(out, seen, filePath, summary) {
-  const pathText = trimInlineText(filePath, 160);
-  const summaryText = trimInlineText(summary, 200);
+  const pathText = trimInline(filePath, 160);
+  const summaryText = trimInline(summary, 200);
   if (!pathText || seen.has(pathText)) return;
   seen.add(pathText);
   out.push(`- ${pathText}${summaryText ? ` :: ${summaryText}` : ''}`);
@@ -413,7 +495,7 @@ function extractLikelyPathsFromText(rawText, out, seen) {
 }
 function summarizeStepOutput(step) {
-  const text = trimInlineText(step?.output || step?.task || '', 220);
+  const text = trimInline(step?.output || step?.task || '', 800);
   return text || 'No concise output captured.';
 }
@@ -586,7 +668,7 @@ function classifyPlanTaskClass(goal = '') {
 }
 function buildGoalRequirementPacket(goal, role) {
-  const rawGoal = trimInlineText(goal, 800);
+  const rawGoal = trimInline(goal, 800);
   if (!rawGoal) return '';
   const requirements = deriveGoalRequirements(goal);
   const lines = ['Original goal:', rawGoal];
@@ -619,7 +701,8 @@ function buildAutoPlanPlannerGuidance() {
     '- Prefer the smallest local approach that satisfies the goal.',
     '- Do not output multiple alternative branches in the final plan.',
     '- Do not assume implementation should begin before the plan is coherent.',
-    '- Available sub-agent roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
+    '- Available sub-agent roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
+    '- The summarizer role reads accumulated step results from the plan file and synthesizes a final summary. It does NOT re-analyze the codebase. Prefer summarizer as the final step for multi-step plans.',
     '- For implementation-heavy or risky changes, prefer adding review and/or verification steps.',
     '- For analysis, recommendation, or planning-only goals, you may omit reviewer/tester if they do not add value.',
     '- Prefer 3-5 steps total unless the task is clearly larger.',
@@ -667,6 +750,196 @@ async function readJsonSafe(targetPath) {
   }
 }
+function extractManagedPlanSection(content = '', key = 'findings') {
+  const markers = PLAN_MEMORY_MARKERS[key];
+  if (!markers) return '';
+  const [startMarker, endMarker] = markers;
+  const start = String(content || '').indexOf(startMarker);
+  const end = String(content || '').indexOf(endMarker);
+  if (start === -1 || end === -1 || end <= start) return '';
+  return String(content || '')
+    .slice(start + startMarker.length, end)
+    .trim();
+}
+function replaceManagedPlanSection(content = '', key = 'findings', nextSection = '') {
+  const markers = PLAN_MEMORY_MARKERS[key];
+  if (!markers) return String(content || '');
+  const [startMarker, endMarker] = markers;
+  const sectionBody = `${startMarker}\n${String(nextSection || '').trim()}\n${endMarker}`;
+  const pattern = new RegExp(`${startMarker}[\\s\\S]*?${endMarker}`);
+  if (pattern.test(String(content || ''))) {
+    return String(content || '').replace(pattern, sectionBody);
+  }
+  return `${String(content || '').trimEnd()}\n\n${sectionBody}\n`;
+}
+function normalizeLedgerItems(items = [], fallback = '- None recorded yet.') {
+  const cleaned = [...new Set((Array.isArray(items) ? items : []).map((item) => String(item || '').trim()).filter(Boolean))];
+  return cleaned.length > 0 ? cleaned : [fallback];
+}
+function trimLedger(items = [], maxItems = 10) {
+  const cleaned = normalizeLedgerItems(items, '').filter(Boolean);
+  return cleaned.slice(Math.max(0, cleaned.length - maxItems));
+}
+export function extractStepWorkingMemory(output = '', artifactPaths = []) {
+  const findings = extractSectionBullets(output, 'Findings')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const actionsTaken = extractSectionBullets(output, 'Actions Taken')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const verified = extractSectionBullets(output, 'Verified')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const notVerified = extractSectionBullets(output, 'Not Verified')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const failures = extractSectionBullets(output, 'Failures')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const openIssues = extractSectionBullets(output, 'Open Issues')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const nextAction = extractSectionBullets(output, 'Next Action')
+    .filter((item) => !/^none\b/i.test(item))
+    .map((item) => `- ${item}`);
+  const artifactLines = [
+    ...extractSectionBullets(output, 'Artifacts')
+      .filter((item) => !/^none\b/i.test(item))
+      .map((item) => `- ${item}`),
+    ...(Array.isArray(artifactPaths) ? artifactPaths : []).filter(Boolean).map((item) => `- ${item}`)
+  ];
+  return {
+    findings: trimLedger(findings, 8),
+    actionsTaken: trimLedger(actionsTaken, 8),
+    verified: trimLedger(verified, 6),
+    notVerified: trimLedger(notVerified, 6),
+    failures: trimLedger(failures, 6),
+    openIssues: trimLedger(openIssues, 6),
+    nextAction: trimLedger(nextAction, 3),
+    artifacts: trimLedger(artifactLines, 6)
+  };
+}
+function buildProgressLedgerEntry(stepIndex, stepTitle, role, memory) {
+  const status = memory.failures.length > 0 || memory.openIssues.length > 0 || memory.notVerified.length > 0 ? 'attention-needed' : 'completed';
+  const highlights = [
+    memory.actionsTaken[0],
+    memory.verified[0],
+    memory.nextAction[0],
+    memory.openIssues[0],
+    memory.notVerified[0],
+    memory.failures[0]
+  ]
+    .filter(Boolean)
+    .map((item) => item.replace(/^- /, ''))
+    .slice(0, 2);
+  const suffix = highlights.length > 0 ? ` :: ${highlights.join(' | ')}` : '';
+  return `- Step ${stepIndex + 1} [${role}] ${stepTitle} -> ${status}${suffix}`;
+}
+function buildRecentStepResults(content = '', maxEntries = 2) {
+  const value = String(content || '');
+  const matches = [...value.matchAll(/^## Step \d+ Result: .*$/gm)];
+  if (matches.length === 0) return '';
+  const starts = matches.map((match) => match.index || 0);
+  const chunks = starts.map((start, index) => value.slice(start, starts[index + 1] || value.length).trim());
+  return chunks.slice(-maxEntries).join('\n\n---\n\n');
+}
+export function buildPlanWorkingMemoryContext(content = '', maxChars = 6000) {
+  const value = String(content || '').trim();
+  if (!value) return '';
+  const findings = extractManagedPlanSection(value, 'findings');
+  const progress = extractManagedPlanSection(value, 'progress');
+  if (!findings && !progress) {
+    if (value.length <= maxChars) return value;
+    const headSize = Math.floor(maxChars * 0.3);
+    const tailSize = maxChars - headSize - 50;
+    return `${value.slice(0, headSize)}\n\n... [plan file truncated, showing most recent step results] ...\n\n${value.slice(-tailSize)}`;
+  }
+  const headLimit = Math.max(600, Math.floor(maxChars * 0.35));
+  const head = value.slice(0, headLimit).trimEnd();
+  const recentResults = buildRecentStepResults(value, 2);
+  const sections = [
+    head,
+    '## Working Memory Snapshot',
+    '### Findings Ledger',
+    findings || '- None recorded yet.',
+    '### Progress Ledger',
+    progress || '- No progress recorded yet.'
+  ];
+  if (recentResults) {
+    sections.push('## Recent Step Results');
+    sections.push(recentResults);
+  }
+  const summary = sections.filter(Boolean).join('\n\n').trim();
+  return summary.length <= maxChars ? summary : `${summary.slice(0, maxChars - 42).trimEnd()}\n... [working memory truncated]`;
+}
+async function appendStepResultToPlanFile(planFilePath, stepIndex, stepTitle, role, output, artifactPaths = []) {
+  if (!planFilePath) return;
+  try {
+    const separator = '\n\n---\n\n';
+    const timestamp = new Date().toISOString();
+    const content = await fs.readFile(planFilePath, 'utf8');
+    const memory = extractStepWorkingMemory(output, artifactPaths);
+    const findingsBlock = [
+      ...extractManagedPlanSection(content, 'findings')
+        .split('\n')
+        .map((line) => line.trim())
+        .filter(Boolean),
+      ...memory.findings,
+      ...memory.openIssues,
+      ...memory.notVerified,
+      ...memory.failures
+    ];
+    const progressBlock = [
+      ...extractManagedPlanSection(content, 'progress')
+        .split('\n')
+        .map((line) => line.trim())
+        .filter(Boolean),
+      buildProgressLedgerEntry(stepIndex, stepTitle, role, memory)
+    ];
+    const entry = [
+      `## Step ${stepIndex + 1} Result: ${stepTitle}`,
+      `Role: ${role}`,
+      `Completed: ${timestamp}`,
+      '',
+      output || '(no output)',
+      ''
+    ].join('\n');
+    const nextContent = [
+      replaceManagedPlanSection(content, 'findings', normalizeLedgerItems(trimLedger(findingsBlock, 12)).join('\n')),
+      ''
+    ].join('\n');
+    const nextWithProgress = replaceManagedPlanSection(
+      nextContent,
+      'progress',
+      normalizeLedgerItems(trimLedger(progressBlock, 12), '- No progress recorded yet.').join('\n')
+    );
+    await fs.writeFile(planFilePath, `${nextWithProgress.trimEnd()}${separator}${entry}\n`, 'utf8');
+  } catch {
+    // Non-fatal: plan file handoff is best-effort
+  }
+}
+async function readPlanFileAsContext(planFilePath, maxChars = 6000) {
+  if (!planFilePath) return '';
+  try {
+    const content = await fs.readFile(planFilePath, 'utf8');
+    return buildPlanWorkingMemoryContext(content, maxChars);
+  } catch {
+    return '';
+  }
+}
 async function buildTesterVerificationPacket(focusPaths = []) {
   const cwd = process.cwd();
   const primary = [];
@@ -682,13 +955,13 @@ async function buildTesterVerificationPacket(focusPaths = []) {
     const pkg = await readJsonSafe(packageJsonPath);
     const scripts = pkg?.scripts || {};
     if (typeof scripts.test === 'string' && scripts.test.trim()) {
-      primary.push(`- npm test :: package.json script = ${trimInlineText(scripts.test, 140)}`);
+      primary.push(`- npm test :: package.json script = ${trimInline(scripts.test, 140)}`);
     }
     if (typeof scripts.build === 'string' && scripts.build.trim()) {
-      secondary.push(`- npm run build :: package.json script = ${trimInlineText(scripts.build, 140)}`);
+      secondary.push(`- npm run build :: package.json script = ${trimInline(scripts.build, 140)}`);
     }
     if (typeof scripts.lint === 'string' && scripts.lint.trim()) {
-      secondary.push(`- npm run lint :: package.json script = ${trimInlineText(scripts.lint, 140)}`);
+      secondary.push(`- npm run lint :: package.json script = ${trimInline(scripts.lint, 140)}`);
     }
     fallback.push('- If test/build scripts are not usable, inspect package.json scripts and run the narrowest relevant check.');
   }
@@ -1004,17 +1277,32 @@ function buildFallbackAutoPlan(goal) {
         title: 'Verify the changed flows',
         role: 'tester',
         task: `Verify the completed work for: ${goal}. Run the most relevant checks available, report concrete evidence, and call out anything still not verified.`
+      },
+      {
+        title: 'Synthesize final implementation status',
+        role: 'summarizer',
+        task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
       }
     ]
   };
 }
+function buildDefaultSummarizerStep(goal, source = []) {
+  const existing = (Array.isArray(source) ? source : []).find((step) => step.role === 'summarizer');
+  if (existing?.title && existing?.task) return existing;
+  return {
+    title: 'Synthesize final implementation status',
+    role: 'summarizer',
+    task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
+  };
+}
 function enforceAutoPlanGuardrailSteps(plan, goal) {
   const source = Array.isArray(plan?.steps) ? plan.steps : [];
   const requirements = deriveGoalRequirements(goal);
   const lightweightGoal = isLightweightAutoPlanGoal(goal, requirements);
   const taskClass = classifyPlanTaskClass(goal);
-  const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester');
+  const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester' && step.role !== 'summarizer');
   const primaryImplementationStep =
     implementationSteps.find((step) => step.role === 'coder') ||
     implementationSteps[0] || {
@@ -1032,6 +1320,7 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
     role: 'tester',
     task: `Test and verify the completed work for: ${goal}. Start with the artifacts produced by earlier implementation steps, run the most relevant checks available, report concrete evidence, and call out anything still unverified.`
   };
+  const summarizerStep = buildDefaultSummarizerStep(goal, source);
   const hasReviewer = source.some((step) => step.role === 'reviewer');
   const hasTester = source.some((step) => step.role === 'tester');
@@ -1050,13 +1339,16 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
     };
   }
+  const executionSteps = [
+    ...implementationSteps.slice(0, 6),
+    ...(hasReviewer ? [reviewerStep] : []),
+    ...(testerStep ? [testerStep] : [])
+  ];
+  const needsSummarizer = executionSteps.length >= 3;
   return {
     summary: String(plan?.summary || `Auto plan for: ${goal}`).trim(),
-    steps: [
-      ...implementationSteps.slice(0, 6),
-      ...(hasReviewer ? [reviewerStep] : []),
-      ...(testerStep ? [testerStep] : [])
-    ]
+    steps: needsSummarizer ? [...executionSteps, summarizerStep] : executionSteps
   };
 }
@@ -1081,15 +1373,74 @@ function stepOutputHasFailureSignals(role, text = '') {
   const failureBullet = extractSectionFirstBullet(value, 'Failures');
   const findingsBullet = extractSectionFirstBullet(value, 'Findings');
   const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
+  const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
+  const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
+  const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
+  const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
   const acceptanceFailures = extractAcceptanceStatusItems(value).filter((item) => item.status !== 'met');
   if (errorBullet && !/^none\b/i.test(errorBullet)) return true;
   if (failureBullet && !/^none\b/i.test(failureBullet)) return true;
   if (acceptanceFailures.length > 0) return true;
-  if (role === 'reviewer' && findingsBullet && !/^none\b/i.test(findingsBullet)) return true;
+  if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) return true;
+  if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return true;
+  if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) return true;
+  if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) return true;
   if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return true;
   return false;
 }
+function coderOutputLacksImplementationEvidence(actionsTaken = '', artifacts = '') {
+  const noActions = !String(actionsTaken || '').trim() || /^none\b/i.test(String(actionsTaken || '').trim());
+  const noArtifacts = !String(artifacts || '').trim() || /^none\b/i.test(String(artifacts || '').trim());
+  return noActions && noArtifacts;
+}
+function reviewerFindingNeedsAction(text = '') {
+  const value = String(text || '').trim();
+  if (!value || /^none\b/i.test(value)) return false;
+  const lower = value.toLowerCase();
+  if (
+    /\b(bug|regression|risk|risky|missing|missing test|unsafe|blocker|blocked|incorrect|broken|failure|failing|unverified|mismatch|incomplete|gap|can regress|still regress)\b/i.test(
+      lower
+    )
+  ) {
+    return true;
+  }
+  if (/\b(not covered|not handled|not verified|does not|doesn't|cannot|can't|lacks?)\b/i.test(lower)) {
+    return true;
+  }
+  return false;
+}
+function buildExitCriteriaFailureReason(role, text = '') {
+  const value = String(text || '').trim();
+  if (!value) return 'no structured step output was produced';
+  const errorBullet = extractSectionFirstBullet(value, 'Error');
+  if (errorBullet && !/^none\b/i.test(errorBullet)) return `error: ${errorBullet}`;
+  const failureBullet = extractSectionFirstBullet(value, 'Failures');
+  if (failureBullet && !/^none\b/i.test(failureBullet)) return `failures: ${failureBullet}`;
+  const findingsBullet = extractSectionFirstBullet(value, 'Findings');
+  const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
+  const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
+  if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) {
+    return 'coder output did not include implementation evidence';
+  }
+  if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return `review findings: ${findingsBullet}`;
+  const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
+  if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return `next action requires rework: ${nextActionBullet}`;
+  const acceptanceFailure = extractAcceptanceStatusItems(value).find((item) => item.status !== 'met');
+  if (acceptanceFailure) return `acceptance ${acceptanceFailure.status}: ${acceptanceFailure.label}`;
+  const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
+  if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) {
+    return `not verified: ${notVerifiedBullet}`;
+  }
+  const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
+  if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) {
+    return `remaining issues: ${remainingIssuesBullet}`;
+  }
+  return 'step output did not satisfy exit criteria';
+}
 function extractSectionFirstBullet(text = '', heading = '') {
   const escaped = String(heading || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
   const match = String(text || '').match(new RegExp(String.raw`(^|\n)\s*${escaped}\s*:\s*(?:\n|\r\n?)+\s*-\s*([^\n\r]+)`, 'i'));
@@ -1148,6 +1499,13 @@ function buildAutoPlanSystemSummary(auto) {
   if (auto.failedTitles?.length) {
     lines.push(`Failed steps: ${auto.failedTitles.slice(0, 5).join(', ')}`);
   }
+  // Always include plan steps for TUI rendering
+  if (Array.isArray(auto.steps) && auto.steps.length > 0) {
+    lines.push('Plan Steps:');
+    auto.steps.forEach((s, idx) => {
+      lines.push(`  ${idx + 1}. [${s.role}] ${s.title}`);
+    });
+  }
   if (auto.approvalStatus === 'pending') {
     lines.push('Next: review the plan summary, then use /plan approve to start implementation, /plan auto run <goal> to plan and run in one step next time, or /plan stay to keep planning.');
   }
@@ -1183,7 +1541,7 @@ function buildAutoPlanFinalSummaryUserPrompt({ goal, autoPlan, runItems, plannin
     if (item.warning) {
       lines.push(`Warning: ${item.warning}`);
     }
-    lines.push(`Output: ${trimInlineText(item.output || '(empty)', 500)}`);
+    lines.push(`Output: ${trimInline(item.output || '(empty)', 500)}`);
     if (Array.isArray(item.artifactPaths) && item.artifactPaths.length > 0) {
       lines.push(`Artifacts: ${item.artifactPaths.slice(0, 5).join(', ')}`);
     }
@@ -1242,7 +1600,7 @@ async function buildAutoPlanFinalSummary({
       timeoutMs: config.gateway.timeout_ms || 90000,
       maxRetries: config.gateway.max_retries ?? 2
     });
-    return trimInlineText(result.text || '', 600) || fallbackSummary;
+    return trimInline(result.text || '', 600) || fallbackSummary;
   } catch {
     return fallbackSummary;
   }
@@ -1423,7 +1781,7 @@ async function collectLikelyImplementationFiles(cwd) {
         continue;
       }
       if (!preferredExts.has(path.extname(entry.name).toLowerCase())) continue;
-      candidates.push(path.relative(cwd, abs).replace(/\\/g, '/'));
+      candidates.push(normalizePath(path.relative(cwd, abs)));
       if (candidates.length >= 8) return;
     }
   }
@@ -1495,8 +1853,10 @@ function effectiveMaxContextTokens(config) {
   return 32000;
 }
-function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode }) {
-  const currentContextTokens = estimateMessagesTokens(currentSession?.messages || []);
+function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode, extraSession }) {
+  const parentTokens = estimateMessagesTokens(currentSession?.messages || []);
+  const subTokens = extraSession ? estimateMessagesTokens(extraSession.messages || []) : 0;
+  const currentContextTokens = parentTokens + subTokens;
   const maxContextTokens = effectiveMaxContextTokens(config);
   const contextUsagePct = maxContextTokens > 0 ? Math.min(100, Math.max(0, (currentContextTokens / maxContextTokens) * 100)) : 0;
   const snapshot = {
@@ -1677,9 +2037,14 @@ async function askModel({
   model,
   systemPrompt,
   onAgentEvent,
+  requestToolApproval,
   persistSession = true,
   executionMode,
-  alwaysAllowTools
+  alwaysAllowTools,
+  signal,
+  allowedTools,
+  maxSteps: maxStepsOverride,
+  skipAnalysisNudge = false
 }) {
   const maxContextTokens = effectiveMaxContextTokens(config);
   const triggerPct = Number(config.context?.preflight_trigger_pct || 92);
@@ -1723,7 +2088,7 @@ async function askModel({
         if (done) done();
         savePromise = null;
       }
-    }, 400);
+    }, STREAM_SAVE_DEBOUNCE_MS);
   };
   const flushScheduledSave = async () => {
     if (!persistSession) return;
@@ -1739,10 +2104,20 @@ async function askModel({
     }
     if (savePromise) await savePromise;
   };
+  if (persistSession && signal) {
+    const flushOnAbort = () => {
+      void flushScheduledSave().catch(() => {});
+    };
+    if (signal.aborted) {
+      flushOnAbort();
+    } else {
+      signal.addEventListener('abort', flushOnAbort, { once: true });
+    }
+  }
-  if (persistSession && text) {
+  if (text) {
     session.messages.push(stampedMessage('user', text));
-    await saveSession(session);
+    if (persistSession) await saveSession(session);
   }
   const projectContextSnippet = await buildProjectContextSnippet(process.cwd(), text).catch(() => '');
@@ -1762,33 +2137,45 @@ async function askModel({
     }
   });
+  const filteredDefinitions = Array.isArray(allowedTools)
+    ? definitions.filter((t) => allowedTools.includes(t.function?.name || t.name))
+    : definitions;
+  const filteredHandlers = Array.isArray(allowedTools)
+    ? Object.fromEntries(Object.entries(handlers).filter(([name]) => allowedTools.includes(name)))
+    : handlers;
+  const filteredDeferred = Array.isArray(allowedTools)
+    ? Object.fromEntries(Object.entries(deferredDefinitions).filter(([name]) => allowedTools.includes(name)))
+    : deferredDefinitions;
   let activeAssistantIndex = -1;
   const wrappedAgentEvent = (event) => {
-    if (!persistSession) {
-      if (onAgentEvent) onAgentEvent(event);
-      return;
-    }
+    // Always accumulate messages in session (for token tracking), only save when persisting
     if (event?.type === 'assistant:start') {
       session.messages.push(stampedMessage('assistant', ''));
       activeAssistantIndex = session.messages.length - 1;
-      scheduleSessionSave();
+      if (persistSession) scheduleSessionSave();
     } else if (event?.type === 'assistant:delta') {
       if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
         const current = session.messages[activeAssistantIndex];
         current.content = `${current.content || ''}${event.text || ''}`;
         current.at = new Date().toISOString();
-        scheduleSessionSave();
+        if (persistSession) scheduleSessionSave();
       }
     } else if (event?.type === 'assistant:response') {
       if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
         const current = session.messages[activeAssistantIndex];
         current.content = event.assistantMessage?.content ?? event.text ?? current.content;
+        if (typeof event.assistantMessage?.reasoning_content === 'string' && event.assistantMessage.reasoning_content) {
+          current.reasoning_content = event.assistantMessage.reasoning_content;
+        }
+        if (Array.isArray(event.assistantMessage?.reasoning_details) && event.assistantMessage.reasoning_details.length > 0) {
+          current.reasoning_details = event.assistantMessage.reasoning_details;
+        }
         if (Array.isArray(event.assistantMessage?.tool_calls) && event.assistantMessage.tool_calls.length > 0) {
           current.tool_calls = event.assistantMessage.tool_calls;
         }
         current.at = new Date().toISOString();
-        scheduleSessionSave();
+        if (persistSession) scheduleSessionSave();
       }
       activeAssistantIndex = -1;
     } else if (event?.type === 'tool:result') {
@@ -1797,7 +2184,7 @@ async function askModel({
           tool_call_id: event.id || ''
         })
       );
-      scheduleSessionSave();
+      if (persistSession) scheduleSessionSave();
     }
     if (onAgentEvent) onAgentEvent(event);
@@ -1808,9 +2195,9 @@ async function askModel({
     systemPrompt: effectiveSystemPrompt,
     userPrompt: loopUserPrompt,
     model: model || config.model.name,
-    maxSteps: Number(config.execution?.max_steps || 16),
-    toolDefinitions: definitions,
-    toolHandlers: handlers,
+    maxSteps: maxStepsOverride ?? Number(config.execution?.max_steps || 16),
+    toolDefinitions: filteredDefinitions,
+    toolHandlers: filteredHandlers,
     initialMessages: toOpenAIMessages(session.messages),
     onEvent: wrappedAgentEvent,
     executionMode: executionMode || config.execution?.mode || 'auto',
@@ -1818,7 +2205,10 @@ async function askModel({
       alwaysAllowTools || config.execution?.always_allow_tools || ['run', 'read', 'write'],
     toolResultMaxChars: config.context?.tool_result_max_chars || 12000,
     toolFormatters: formatters,
-    deferredDefinitions,
+    deferredDefinitions: filteredDeferred,
+    requestToolApproval,
+    signal,
+    skipAnalysisNudge,
     requestCompletion: async ({ messages, tools, model: selectedModel }) => {
       let started = false;
       const startAssistantStream = () => {
@@ -1837,6 +2227,7 @@ async function askModel({
         tools,
         timeoutMs: config.gateway.timeout_ms || 90000,
         maxRetries: config.gateway.max_retries ?? 2,
+        signal,
         onTextDelta: (delta) => {
           startAssistantStream();
           if (onAgentEvent) onAgentEvent({ type: 'assistant:delta', text: delta });
@@ -1867,7 +2258,7 @@ async function askModel({
       // keep chat usable even if pruning fails
     }
   }
-  return { text: loopResult.text };
+  return { text: loopResult.text, aborted: !!loopResult.aborted };
 }
 async function runSubAgentTask({
@@ -1880,7 +2271,10 @@ async function runSubAgentTask({
   model,
   systemPrompt,
   onAgentEvent,
-  extraRolePrompt = ''
+  extraRolePrompt = '',
+  signal,
+  onSessionActive,
+  planFileContext = ''
 }) {
   const subSession = { id: `sub-${Date.now()}`, messages: [] };
   const rolePrompt = getSubAgentRolePrompt(role);
@@ -1891,11 +2285,15 @@ async function runSubAgentTask({
   const focusedTaskNote = buildFocusedTaskNote(role, handoffFocusPaths);
   const goalRequirementPacket = buildGoalRequirementPacket(goal, role);
   const verificationPacket = role === 'tester' ? await buildTesterVerificationPacket(handoffFocusPaths) : '';
+  const planFileSection = planFileContext
+    ? `Accumulated plan file context (results from prior steps):\n${planFileContext}`
+    : '';
   const scopedTask = [
     contextPacket,
     goalRequirementPacket,
     evidencePacket,
     handoffPacket,
+    planFileSection,
     verificationPacket,
     focusedTaskNote,
     'Task:',
@@ -1927,6 +2325,8 @@ async function runSubAgentTask({
     }
     if (onAgentEvent) onAgentEvent(evt);
   };
+  const roleAllowedTools = ROLE_TOOL_POLICY[role];
+  if (onSessionActive) onSessionActive(subSession);
   const subResult = await askModel({
     text: scopedTask,
     session: subSession,
@@ -1935,7 +2335,10 @@ async function runSubAgentTask({
     systemPrompt: `${systemPrompt}\n${rolePrompt}${extraRolePrompt ? `\n${extraRolePrompt}` : ''}`,
     onAgentEvent: wrappedOnAgentEvent,
     persistSession: false,
-    executionMode: 'auto'
+    executionMode: 'auto',
+    allowedTools: roleAllowedTools,
+    skipAnalysisNudge: true,
+    signal
   });
   const text = subResult.text || '';
   const hasErrorLine = /(^|\n)\s*error\s*:/i.test(text);
@@ -1948,6 +2351,142 @@ async function runSubAgentTask({
   };
 }
+async function executePlanWithSubAgents({
+  planState,
+  parentSession,
+  config,
+  model,
+  systemPrompt,
+  onAgentEvent,
+  signal,
+  onSubSessionActive
+}) {
+  const steps = Array.isArray(planState.steps) ? planState.steps : [];
+  const goal = planState.goal || '';
+  const planFilePath = planState.filePath || '';
+  let partialDeltaText = '';
+  const emitPlanEvent = (evt) => {
+    if (evt?.type === 'assistant:delta' && evt.text) {
+      partialDeltaText += String(evt.text);
+    }
+    if (onAgentEvent) onAgentEvent(evt);
+  };
+  if (steps.length === 0) {
+    return { text: '(no steps to execute)', aborted: false };
+  }
+  const priorSteps = [];
+  const results = [];
+  // Emit structured plan steps so TUI can show all steps with real role/title
+  emitPlanEvent({
+    type: 'plan:steps',
+    steps: steps.map((s, idx) => ({ index: idx + 1, role: s.role, title: s.title, status: 'pending' }))
+  });
+  for (let i = 0; i < steps.length; i += 1) {
+    const step = steps[i];
+    if (signal?.aborted) break;
+    emitPlanEvent({
+      type: 'assistant:delta',
+      text: `\n[plan] Step ${i + 1}/${steps.length} -> ${step.role}: ${step.title}\n`
+    });
+    // Read accumulated plan file context from prior step results (skip for step 0)
+    let planFileContext = '';
+    if (i > 0 && planFilePath) {
+      planFileContext = await readPlanFileAsContext(planFilePath);
+    }
+    const stepGuidance = buildPipelineStepGuidance({ role: step.role, stepIndex: i, totalSteps: steps.length, isFirst: i === 0, isLast: i === steps.length - 1, priorSteps });
+    const output = await runSubAgentTask({
+      role: step.role,
+      task: step.task,
+      goal,
+      priorSteps,
+      parentSession,
+      config,
+      model,
+      systemPrompt,
+      onAgentEvent: emitPlanEvent,
+      extraRolePrompt: stepGuidance,
+      signal,
+      onSessionActive: onSubSessionActive,
+      planFileContext
+    });
+    const stepRecord = {
+      role: step.role,
+      title: step.title,
+      task: step.task,
+      output: output.text || '',
+      blockedCount: output.blockedCount || 0,
+      toolErrorCount: output.toolErrorCount || 0,
+      hasErrorLine: output.hasErrorLine || false,
+      artifactPaths: output.artifactPaths || [],
+      failed:
+        output.hasErrorLine ||
+        stepOutputHasFailureSignals(step.role, output.text || ''),
+      failureReason: ''
+    };
+    if (stepRecord.failed) {
+      stepRecord.failureReason =
+        output.hasErrorLine
+          ? 'tool or model execution error'
+          : buildExitCriteriaFailureReason(step.role, output.text || '');
+    }
+    priorSteps.push(stepRecord);
+    results.push(stepRecord);
+    // Write step result to plan file for subsequent steps to read
+    if (planFilePath) {
+      await appendStepResultToPlanFile(
+        planFilePath,
+        i,
+        step.title,
+        step.role,
+        stepRecord.output,
+        stepRecord.artifactPaths
+      );
+    }
+    if (stepRecord.failed && i < steps.length - 1) break;
+  }
+  const summaryLines = [];
+  for (let i = 0; i < results.length; i += 1) {
+    const r = results[i];
+    const tag = r.failed ? 'FAILED' : 'DONE';
+    summaryLines.push(`[${tag}] ${r.role}: ${r.title}`);
+    summaryLines.push(r.output.slice(0, 400));
+    summaryLines.push('');
+  }
+  const failedSteps = results.filter((r) => r.failed);
+  if (failedSteps.length > 0) {
+    summaryLines.push(`${failedSteps.length} step(s) had errors.`);
+    const firstFailed = failedSteps[0];
+    if (firstFailed?.failureReason) {
+      summaryLines.push(`Pipeline stopped after exit criteria failed at [${firstFailed.role}] ${firstFailed.title}: ${firstFailed.failureReason}.`);
+    }
+  }
+  if (signal?.aborted) {
+    const partial = partialDeltaText.trim();
+    if (partial) {
+      const clipped = partial.length > 6000 ? `${partial.slice(0, 6000)}\n... [partial output truncated]` : partial;
+      parentSession.messages.push(stampedMessage('assistant', clipped));
+      await saveSession(parentSession);
+    }
+  }
+  return {
+    text: summaryLines.join('\n'),
+    aborted: !!signal?.aborted,
+    results
+  };
+}
 async function buildAutoPlanAndRun({
   goal,
   config,
@@ -1973,7 +2512,7 @@ async function buildAutoPlanAndRun({
     '- If the task is purely to inspect the current project and suggest improvements, a lean 2-step or 3-step plan is preferred.',
     '- Example advisory roles: planner -> inspect project shape, coder -> synthesize findings and prioritized recommendations.',
     '- Example implementation roles: planner -> inspect target area, coder -> implement change, tester -> verify changed behavior.',
-    'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.'
+    'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.'
   ].join('\n');
   let autoPlan = {
     summary: `Auto plan for: ${goal}`,
@@ -1998,8 +2537,9 @@ async function buildAutoPlanAndRun({
           role: 'user',
           content: [
             'Create an execution plan and assign best sub-agent role for each step.',
-            'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.',
-            'The available roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
+            'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.',
+            'The available roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
+            'The summarizer role synthesizes prior step results without re-analyzing. Use it as the final step for plans with 3+ steps.',
             `Task class: ${normalizedTaskClass}`,
             'Before choosing roles, decide whether the request is advisory, implementation, or verification-heavy.',
             requirementPacket,
@@ -2024,16 +2564,6 @@ async function buildAutoPlanAndRun({
     autoPlan = buildFallbackAutoPlan(goal);
   }
-  for (let i = 0; i < autoPlan.steps.length; i += 1) {
-    const step = autoPlan.steps[i];
-    if (onAgentEvent) {
-      onAgentEvent({
-        type: 'assistant:delta',
-        text: `\n[plan] Step ${i + 1}/${autoPlan.steps.length} -> ${step.role}: ${step.title}\n`
-      });
-    }
-  }
   const finalSummary = planningError
     ? `Plan created with fallback guidance because planning hit an error: ${planningError}`
     : 'Plan created and waiting for approval before implementation.';
@@ -2059,6 +2589,17 @@ async function buildAutoPlanAndRun({
   lines.push('');
   lines.push('## Approval');
   lines.push('Pending user approval before implementation.');
+  lines.push('');
+  lines.push('## Working Memory');
+  lines.push('### Findings Ledger');
+  lines.push(PLAN_MEMORY_MARKERS.findings[0]);
+  lines.push('- None recorded yet.');
+  lines.push(PLAN_MEMORY_MARKERS.findings[1]);
+  lines.push('');
+  lines.push('### Progress Ledger');
+  lines.push(PLAN_MEMORY_MARKERS.progress[0]);
+  lines.push('- Plan created and waiting for execution.');
+  lines.push(PLAN_MEMORY_MARKERS.progress[1]);
   const filePath = await writeMarkdownInProjectDir(
     'plans',
@@ -2109,8 +2650,10 @@ export async function createChatRuntime({
   session,
   config: initialConfig,
   model,
-  systemPrompt
+  systemPrompt,
+  requestToolApproval
 }) {
+  let activeRequestToolApproval = typeof requestToolApproval === 'function' ? requestToolApproval : null;
   const startupEvents = [];
   const initialIndex = await initializeProjectIndex(process.cwd()).catch(() => null);
   if (initialIndex?.summary) {
@@ -2247,7 +2790,8 @@ export async function createChatRuntime({
       { name: 'memory', description: completionCopy.commands.memory },
       { name: 'history', description: completionCopy.commands.history },
       { name: 'debug', description: completionCopy.commands.debug },
-      { name: 'retry', description: completionCopy.commands.retry }
+      { name: 'retry', description: completionCopy.commands.retry },
+      { name: 'stop', description: completionCopy.commands.stop }
     ];
     const out = [];
     for (const cmd of commands.values()) {
@@ -2291,7 +2835,7 @@ export async function createChatRuntime({
   ];
   const specTemplates = ['/spec <topic>'];
   const planTemplates = ['/plan <goal>', '/plan auto <goal>', '/plan auto run <goal>', '/plan approve', '/plan from-spec <spec-path?>'];
-  const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>'];
+  const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>', '/agents run summarizer <task>'];
   const debugTemplates = ['/debug keys on', '/debug keys off', '/debug keys status'];
   const compactTemplates = compactOptions.map((opt) => `/compact ${opt}`);
   const slashTemplates = [
@@ -2521,7 +3065,7 @@ export async function createChatRuntime({
       if (tokens.length === 1 || (tokens.length === 2 && !hasTrailingSpace)) {
         const sub = tokens[1] || '';
         if (sub === 'run') {
-          return ['planner', 'coder', 'reviewer', 'tester']
+          return ['planner', 'coder', 'reviewer', 'tester', 'summarizer']
             .map((r) => registerSuggestion(`/agents run ${r} `, completionCopy.generic.agentCommand));
         }
         return ['list', 'run']
@@ -2602,6 +3146,22 @@ export async function createChatRuntime({
     await saveSession(currentSession);
   };
+  const persistAssistantExchange = async (userText, assistantText, { includeUser = true } = {}) => {
+    if (includeUser && userText) {
+      currentSession.messages.push(stampedMessage('user', userText));
+    }
+    if (assistantText) {
+      currentSession.messages.push(stampedMessage('assistant', assistantText));
+    }
+    await saveSession(currentSession);
+  };
+  const persistUserExchange = async (userText) => {
+    if (!userText) return;
+    currentSession.messages.push(stampedMessage('user', userText));
+    await saveSession(currentSession);
+  };
   const buildActiveSystemPrompt = async () => {
     const soulPrompt = await buildSystemPromptWithSoul(baseSystemPrompt, config);
     const memorySnapshot = await buildMemorySnapshot({
@@ -2638,7 +3198,14 @@ export async function createChatRuntime({
     return localCommands.has(command);
   };
+  // 当前的 AbortController 引用，用于中止正在进行的回答
+  let activeAbortController = null;
+  let activeSubSession = null;
   const submit = async (line, onAgentEvent) => {
+    // 每次提交创建新的 AbortController，替代旧的
+    activeAbortController = new AbortController();
+    const { signal } = activeAbortController;
     const activeReplySystemPrompt = await buildActiveSystemPrompt();
     try {
       await appendInputHistory(line);
@@ -2658,7 +3225,7 @@ export async function createChatRuntime({
       if (parsedInput.command === 'help') {
         return {
           type: 'system',
-          text: 'Commands: /help /exit /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
+          text: 'Commands: /help /exit /stop /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
         };
       }
       if (parsedInput.command === 'status') {
@@ -2765,6 +3332,9 @@ export async function createChatRuntime({
           const runImmediately = (parsedInput.args[1] || '').trim().toLowerCase() === 'run';
           const goal = parsedInput.args.slice(runImmediately ? 2 : 1).join(' ').trim();
           if (!goal) return { type: 'system', text: 'Usage: /plan auto <goal> | /plan auto run <goal>' };
+          if (runImmediately) {
+            await persistUserExchange(line);
+          }
           const auto = await buildAutoPlanAndRun({
             goal,
             session: currentSession,
@@ -2776,30 +3346,30 @@ export async function createChatRuntime({
             taskClass: classifyPlanTaskClass(goal)
           });
           if (runImmediately) {
-            const result = await askModel({
-              text: buildApprovedPlanExecutionPrompt(
-                {
-                  status: 'approved',
-                  source: 'auto',
-                  goal,
-                  filePath: auto.filePath,
-                  summary: auto.summary || '',
-                  finalSummary: auto.finalSummary || auto.summary || '',
-                  steps: Array.isArray(auto.steps) ? auto.steps : []
-                },
-                '/plan auto run'
-              ),
-              session: currentSession,
+            const planState = {
+              status: 'approved',
+              source: 'auto',
+              goal,
+              filePath: auto.filePath,
+              summary: auto.summary || '',
+              finalSummary: auto.finalSummary || auto.summary || '',
+              steps: Array.isArray(auto.steps) ? auto.steps : []
+            };
+            const result = await executePlanWithSubAgents({
+              planState,
+              parentSession: currentSession,
               config,
               model,
-              systemPrompt: activeReplySystemPrompt,
+              systemPrompt: baseSystemPrompt,
               onAgentEvent,
-              executionMode: 'auto'
+              signal,
+              onSubSessionActive: (sub) => { activeSubSession = sub; }
             });
+            activeSubSession = null;
             currentSession.planState = null;
             executionMode = 'auto';
-            await saveSession(currentSession);
-            return { type: 'assistant', text: result.text };
+            await persistAssistantExchange(line, result.text || '', { includeUser: false });
+            return { type: 'assistant', text: result.text, aborted: !!result.aborted };
           }
           currentSession.planState = {
             status: 'pending_approval',
@@ -2822,20 +3392,23 @@ export async function createChatRuntime({
           if (!hasPendingPlanApproval(currentSession)) {
             return { type: 'system', text: 'No pending plan approval. Use /plan auto <goal> or /plan <goal> first.' };
           }
+          await persistUserExchange(line);
           const planState = { ...currentSession.planState };
-          const result = await askModel({
-            text: buildApprovedPlanExecutionPrompt(planState, '/plan approve'),
-            session: currentSession,
+          const result = await executePlanWithSubAgents({
+            planState,
+            parentSession: currentSession,
             config,
             model,
-            systemPrompt: activeReplySystemPrompt,
+            systemPrompt: baseSystemPrompt,
             onAgentEvent,
-            executionMode: 'auto'
+            signal,
+            onSubSessionActive: (sub) => { activeSubSession = sub; }
           });
+          activeSubSession = null;
           currentSession.planState = null;
           executionMode = 'auto';
-          await saveSession(currentSession);
-          return { type: 'assistant', text: result.text };
+          await persistAssistantExchange(line, result.text || '', { includeUser: false });
+          return { type: 'assistant', text: result.text, aborted: !!result.aborted };
         }
         if (sub === 'stay') {
           if (!hasPendingPlanApproval(currentSession)) {
@@ -2898,7 +3471,7 @@ export async function createChatRuntime({
         if (sub === 'list') {
           return {
             type: 'system',
-            text: 'Sub-agent roles: planner, coder, reviewer, tester\nUse: /agents run <role> <task>'
+            text: 'Sub-agent roles: planner, coder, reviewer, tester, summarizer\nUse: /agents run <role> <task>'
           };
         }
         if (sub === 'run') {
@@ -2906,7 +3479,7 @@ export async function createChatRuntime({
           const task = parsedInput.args.slice(2).join(' ').trim();
           if (!role || !task) return { type: 'system', text: 'Usage: /agents run <role> <task>' };
           if (!SUB_AGENT_ROLES.includes(role)) {
-            return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester' };
+            return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester|summarizer' };
           }
           const output = await runSubAgentTask({
             role,
@@ -3036,9 +3609,11 @@ export async function createChatRuntime({
           model,
           systemPrompt: activeReplySystemPrompt,
           onAgentEvent,
-          executionMode
+          requestToolApproval: activeRequestToolApproval,
+          executionMode,
+          signal
         });
-        return { type: 'assistant', text: result.text };
+        return { type: 'assistant', text: result.text, aborted: !!result.aborted };
       }
       if (parsedInput.command === 'config') {
         const sub = parsedInput.args[0];
@@ -3172,7 +3747,9 @@ export async function createChatRuntime({
           model,
           systemPrompt: activeReplySystemPrompt,
           onAgentEvent,
-          executionMode
+          requestToolApproval: activeRequestToolApproval,
+          executionMode,
+          signal
         });
       } catch (error) {
         if (custom.metadata.type === 'skill' && onAgentEvent) {
@@ -3192,20 +3769,23 @@ export async function createChatRuntime({
     if (hasPendingPlanApproval(currentSession)) {
       if (isApprovalText(parsedInput.text)) {
+        await persistUserExchange(line);
         const planState = { ...currentSession.planState };
-        const result = await askModel({
-          text: buildApprovedPlanExecutionPrompt(planState, parsedInput.text),
-          session: currentSession,
+        const result = await executePlanWithSubAgents({
+          planState,
+          parentSession: currentSession,
           config,
           model,
-          systemPrompt: activeReplySystemPrompt,
+          systemPrompt: baseSystemPrompt,
           onAgentEvent,
-          executionMode: 'auto'
+          signal,
+          onSubSessionActive: (sub) => { activeSubSession = sub; }
         });
+        activeSubSession = null;
         currentSession.planState = null;
         executionMode = 'auto';
-        await saveSession(currentSession);
-        return { type: 'assistant', text: result.text };
+        await persistAssistantExchange(line, result.text || '', { includeUser: false });
+        return { type: 'assistant', text: result.text, aborted: !!result.aborted };
       }
       if (isStayInPlanText(parsedInput.text)) {
         const text = buildPendingPlanApprovalMessage(currentSession.planState);
@@ -3291,9 +3871,11 @@ export async function createChatRuntime({
       model,
       systemPrompt: routedSystemPrompt,
       onAgentEvent,
-      executionMode
+      requestToolApproval: activeRequestToolApproval,
+      executionMode,
+      signal
     });
-    return { type: 'assistant', text: result.text };
+    return { type: 'assistant', text: result.text, aborted: !!result.aborted };
   };
   return {
@@ -3301,15 +3883,27 @@ export async function createChatRuntime({
     getCompletionOptions,
     isImmediateLocalInput,
     submit,
+    abort: () => {
+      if (activeAbortController && !activeAbortController.signal.aborted) {
+        activeAbortController.abort();
+        return true;
+      }
+      return false;
+    },
     consumeStartupEvents: () => startupEvents.splice(0, startupEvents.length),
     getInputHistory: () => loadInputHistory(),
     getCurrentSessionId: () => currentSession.id,
+    setRequestToolApproval: (handler) => {
+      activeRequestToolApproval = typeof handler === 'function' ? handler : null;
+      return true;
+    },
     getRuntimeState: () =>
       buildRuntimeStateSnapshot({
         currentSession,
         config,
         model,
-        executionMode
+        executionMode,
+        extraSession: activeSubSession
       })
   };
 }