codemini-cli 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { parseInput } from './input-parser.js';
2
2
  import { loadCommandsAndSkills, renderCommandPrompt } from './command-loader.js';
3
3
  import { runAgentLoop, setResultDir, clearResultStore } from './agent-loop.js';
4
+ import { trimInline, normalizePath } from './string-utils.js';
4
5
  import fs from 'node:fs/promises';
5
6
  import path from 'node:path';
6
7
  import {
@@ -27,6 +28,8 @@ import { buildMemorySnapshot } from './memory-prompt.js';
27
28
  import { forgetMemory, listMemories, searchMemories } from './memory-store.js';
28
29
  import { countActiveTodos, normalizeTodos } from './todo-state.js';
29
30
 
31
+ const STREAM_SAVE_DEBOUNCE_MS = 120;
32
+
30
33
  function toOpenAIMessages(sessionMessages) {
31
34
  const mapped = [];
32
35
  for (const msg of sessionMessages) {
@@ -41,6 +44,8 @@ function toOpenAIMessages(sessionMessages) {
41
44
  mapped.push({
42
45
  role: msg.role,
43
46
  content: msg.content,
47
+ ...(typeof msg.reasoning_content === 'string' && msg.reasoning_content ? { reasoning_content: msg.reasoning_content } : {}),
48
+ ...(Array.isArray(msg.reasoning_details) && msg.reasoning_details.length > 0 ? { reasoning_details: msg.reasoning_details } : {}),
44
49
  ...(msg.tool_calls ? { tool_calls: msg.tool_calls } : {})
45
50
  });
46
51
  }
@@ -145,7 +150,8 @@ function getCompletionCopy(language = 'zh') {
145
150
  memory: '查看/搜索/删除持久记忆',
146
151
  history: '查看/恢复会话',
147
152
  debug: '运行时调试开关',
148
- retry: '重试上一条用户请求'
153
+ retry: '重试上一条用户请求',
154
+ stop: '中止当前回答'
149
155
  },
150
156
  generic: {
151
157
  configCommand: '配置命令',
@@ -160,6 +166,7 @@ function getCompletionCopy(language = 'zh') {
160
166
  keyboardDebugCommand: '键盘调试命令',
161
167
  compactCommand: '上下文压缩命令',
162
168
  retryCommand: '重试上一条用户请求',
169
+ stopCommand: '中止当前回答',
163
170
  statusCommand: '查看运行状态',
164
171
  resumeSession: '恢复一个已保存的会话'
165
172
  }
@@ -232,7 +239,8 @@ function getCompletionCopy(language = 'zh') {
232
239
  memory: 'list/search/delete persistent memories',
233
240
  history: 'list/resume sessions',
234
241
  debug: 'runtime debug switches',
235
- retry: 'retry the last user request'
242
+ retry: 'retry the last user request',
243
+ stop: 'stop the current response'
236
244
  },
237
245
  generic: {
238
246
  configCommand: 'config command',
@@ -247,6 +255,7 @@ function getCompletionCopy(language = 'zh') {
247
255
  keyboardDebugCommand: 'keyboard debug command',
248
256
  compactCommand: 'context compaction command',
249
257
  retryCommand: 'retry the last user request',
258
+ stopCommand: 'stop the current response',
250
259
  statusCommand: 'show runtime status',
251
260
  resumeSession: 'resume a saved session'
252
261
  }
@@ -261,62 +270,135 @@ function describeConfigKey(key, mode = 'set', language = 'zh') {
261
270
  return mode === 'get' ? copy.describeGet(label, hint) : copy.describeSet(label, hint);
262
271
  }
263
272
 
264
- const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester'];
273
+ const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester', 'summarizer'];
274
+ const ROLE_TOOL_POLICY = {
275
+ planner: ['read', 'grep', 'list', 'query_project_index', 'tool_search', 'glob', 'ast_query', 'read_ast_node'],
276
+ coder: ['read', 'grep', 'list', 'edit', 'write', 'run', 'ast_query', 'read_ast_node', 'glob', 'tool_search', 'update_todos'],
277
+ reviewer: ['read', 'grep', 'list', 'glob', 'tool_search', 'ast_query', 'read_ast_node'],
278
+ tester: ['read', 'grep', 'list', 'run', 'glob', 'tool_search'],
279
+ summarizer: ['read', 'grep', 'list', 'glob', 'tool_search']
280
+ };
265
281
  const SUB_AGENT_CONTEXT_MAX_MESSAGES = 4;
266
282
  const SUB_AGENT_CONTEXT_MAX_CHARS = 1200;
267
283
  const SUB_AGENT_EVIDENCE_MAX_ITEMS = 3;
268
284
  const SUB_AGENT_HANDOFF_MAX_ITEMS = 6;
285
+ const PLAN_MEMORY_MARKERS = {
286
+ findings: ['<!-- plan-findings-start -->', '<!-- plan-findings-end -->'],
287
+ progress: ['<!-- plan-progress-start -->', '<!-- plan-progress-end -->']
288
+ };
269
289
  export function getSubAgentRolePrompt(role) {
270
290
  if (role === 'planner') {
271
- return 'You are a planning sub-agent. Produce a concrete implementation plan with risks and verification.';
291
+ return [
292
+ 'You are the planner in a multi-step agent pipeline.',
293
+ 'Your job: inspect the codebase and produce a concrete, actionable plan.',
294
+ 'Do not write implementation code.',
295
+ 'Output format — keep it short and direct:',
296
+ 'Findings:',
297
+ '- <important constraint, dependency, risk, or "none">',
298
+ 'Actions Taken:',
299
+ '- <what you inspected>',
300
+ 'Open Issues:',
301
+ '- <blocking uncertainty or "none">',
302
+ 'Next Action:',
303
+ '- <the concrete next step for the following role>',
304
+ 'Do not summarize your own work or add closing remarks — just deliver the structured handoff and stop.',
305
+ 'IMPORTANT: Stop as soon as you have enough context to produce the plan. Do NOT keep exploring once the plan is clear — deliver it immediately.'
306
+ ].join('\n');
272
307
  }
273
308
  if (role === 'reviewer') {
274
309
  return [
275
- 'You are a review sub-agent. Focus on bugs, regressions, edge cases, and missing tests.',
276
- 'Start with the focused files or directories handed to you. Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
277
- 'Use this exact output structure:',
278
- 'Acceptance Status:',
279
- '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
310
+ 'You are the reviewer in a multi-step agent pipeline.',
311
+ 'Focus on bugs, regressions, edge cases, and missing tests in the files handed to you.',
312
+ 'Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
313
+ 'Output format — keep it short and direct:',
280
314
  'Findings:',
281
315
  '- <bug, regression, risk, or "none">',
282
316
  'Verified:',
283
317
  '- <what you checked>',
284
318
  'Not Verified:',
285
319
  '- <what remains uncertain>',
286
- 'Next Action:',
287
- '- <single best next step>'
320
+ 'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
288
321
  ].join('\n');
289
322
  }
290
323
  if (role === 'tester') {
291
324
  return [
292
- 'You are a testing sub-agent. Focus on verification strategy, real test execution evidence, missing coverage, and whether the work was actually validated.',
293
- 'Prefer running concrete verification commands over only suggesting them.',
294
- 'Start with the focused files or directories handed to you. Verify those artifacts first before scanning the wider repo.',
295
- 'Use this exact output structure:',
296
- 'Acceptance Status:',
297
- '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
325
+ 'You are the tester in a multi-step agent pipeline.',
326
+ 'Run concrete verification commands. Prefer real execution over suggestions.',
327
+ 'Verify the handed-off files first before scanning wider.',
328
+ 'Output format keep it short and direct:',
298
329
  'Verified:',
299
330
  '- <commands run and evidence>',
300
331
  'Not Verified:',
301
332
  '- <what could not be validated>',
302
333
  'Failures:',
303
334
  '- <failed command or "none">',
304
- 'Next Action:',
305
- '- <single best next step>'
335
+ 'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
336
+ ].join('\n');
337
+ }
338
+ if (role === 'summarizer') {
339
+ return [
340
+ 'You are the summarizer in a multi-step agent pipeline.',
341
+ 'Your job is to synthesize the results of all prior steps into a concise, actionable final summary.',
342
+ 'Do NOT re-analyze the codebase or make new tool calls unless the handed-off evidence is clearly insufficient.',
343
+ 'Instead, read the accumulated step results in the plan file context provided to you.',
344
+ 'Output format — keep it short and direct:',
345
+ 'Summary:',
346
+ '- <overall result in 2-4 sentences>',
347
+ 'Key Findings:',
348
+ '- <most important findings from all steps>',
349
+ 'Actions Taken:',
350
+ '- <what was implemented/changed/verified>',
351
+ 'Remaining Issues:',
352
+ '- <unresolved items or "none">',
353
+ 'Recommended Next Steps:',
354
+ '- <concrete follow-up actions if any>',
355
+ 'Do not add greetings, filler, or restate the goal. Deliver the summary and stop.'
306
356
  ].join('\n');
307
357
  }
308
358
  return [
309
- 'You are an execution sub-agent. Produce practical implementation guidance with code-level detail.',
310
- 'Stop when: you have produced the code change and verified it compiles/passes basic checks.',
311
- 'If blocked: report what blocked you and what you tried, then stop.'
359
+ 'You are the coder in a multi-step agent pipeline.',
360
+ 'Produce practical code changes with minimal explanation.',
361
+ 'Output format keep it short and direct:',
362
+ 'Actions Taken:',
363
+ '- <file changes, commands, or "none">',
364
+ 'Findings:',
365
+ '- <important implementation note, regression risk, or "none">',
366
+ 'Verified:',
367
+ '- <test/check evidence or "none">',
368
+ 'Open Issues:',
369
+ '- <remaining gap or "none">',
370
+ 'Artifacts:',
371
+ '- <changed file path or "none">',
372
+ 'Next Action:',
373
+ '- <the best next step for the following role or "none">',
374
+ 'Do not summarize the goal, recap the plan, or add closing remarks.'
312
375
  ].join('\n');
313
376
  }
314
377
 
315
- function trimInlineText(value, maxLen = 220) {
316
- const text = String(value || '').replace(/\s+/g, ' ').trim();
317
- if (!text) return '';
318
- if (text.length <= maxLen) return text;
319
- return `${text.slice(0, maxLen - 3)}...`;
378
+ function buildPipelineStepGuidance({ role, stepIndex, totalSteps, isFirst, isLast, priorSteps }) {
379
+ const lines = [];
380
+ lines.push(`Pipeline position: step ${stepIndex + 1} of ${totalSteps}.`);
381
+ if (isFirst) {
382
+ lines.push('You are the first step. Your output sets direction for the rest of the pipeline.');
383
+ } else if (isLast) {
384
+ lines.push('You are the final step. After you, the pipeline will present a combined result to the user.');
385
+ } else {
386
+ lines.push('You are in the middle of the pipeline. Your output feeds into the next step.');
387
+ }
388
+ if (priorSteps.length > 0) {
389
+ const prev = priorSteps[priorSteps.length - 1];
390
+ lines.push(`Previous step was [${prev.role}]: ${prev.title}. Use its output as your starting point.`);
391
+ }
392
+ lines.push('Style rules:');
393
+ lines.push('- Be direct and action-oriented. No greetings, no summaries, no "In conclusion" or "To summarize".');
394
+ lines.push('- Treat the Findings Ledger and Progress Ledger in the plan file context as the shared working memory for this pipeline.');
395
+ lines.push('- If you discover something new, record it under the requested headings instead of burying it in prose.');
396
+ lines.push('- Continue the established direction unless you have concrete contradictory evidence.');
397
+ lines.push('- Output only what the next step needs to know. Skip obvious observations.');
398
+ if (isLast) {
399
+ lines.push('- Since you are the final step, give a concise overall verdict the user can act on.');
400
+ }
401
+ return lines.join('\n');
320
402
  }
321
403
 
322
404
  function buildSubAgentContextPacket(session) {
@@ -330,7 +412,7 @@ function buildSubAgentContextPacket(session) {
330
412
  let usedChars = 0;
331
413
  for (const msg of recent) {
332
414
  const role = msg.role === 'assistant' ? 'assistant' : 'user';
333
- const text = trimInlineText(msg.content, 260);
415
+ const text = trimInline(msg.content, 260);
334
416
  if (!text) continue;
335
417
  const line = `- ${role}: ${text}`;
336
418
  if (usedChars + line.length > SUB_AGENT_CONTEXT_MAX_CHARS) break;
@@ -346,8 +428,8 @@ function buildSubAgentContextPacket(session) {
346
428
  }
347
429
 
348
430
  function maybePushEvidence(out, seen, filePath, summary) {
349
- const pathText = trimInlineText(filePath, 160);
350
- const summaryText = trimInlineText(summary, 200);
431
+ const pathText = trimInline(filePath, 160);
432
+ const summaryText = trimInline(summary, 200);
351
433
  if (!pathText || seen.has(pathText)) return;
352
434
  seen.add(pathText);
353
435
  out.push(`- ${pathText}${summaryText ? ` :: ${summaryText}` : ''}`);
@@ -413,7 +495,7 @@ function extractLikelyPathsFromText(rawText, out, seen) {
413
495
  }
414
496
 
415
497
  function summarizeStepOutput(step) {
416
- const text = trimInlineText(step?.output || step?.task || '', 220);
498
+ const text = trimInline(step?.output || step?.task || '', 800);
417
499
  return text || 'No concise output captured.';
418
500
  }
419
501
 
@@ -586,7 +668,7 @@ function classifyPlanTaskClass(goal = '') {
586
668
  }
587
669
 
588
670
  function buildGoalRequirementPacket(goal, role) {
589
- const rawGoal = trimInlineText(goal, 800);
671
+ const rawGoal = trimInline(goal, 800);
590
672
  if (!rawGoal) return '';
591
673
  const requirements = deriveGoalRequirements(goal);
592
674
  const lines = ['Original goal:', rawGoal];
@@ -619,7 +701,8 @@ function buildAutoPlanPlannerGuidance() {
619
701
  '- Prefer the smallest local approach that satisfies the goal.',
620
702
  '- Do not output multiple alternative branches in the final plan.',
621
703
  '- Do not assume implementation should begin before the plan is coherent.',
622
- '- Available sub-agent roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
704
+ '- Available sub-agent roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
705
+ '- The summarizer role reads accumulated step results from the plan file and synthesizes a final summary. It does NOT re-analyze the codebase. Prefer summarizer as the final step for multi-step plans.',
623
706
  '- For implementation-heavy or risky changes, prefer adding review and/or verification steps.',
624
707
  '- For analysis, recommendation, or planning-only goals, you may omit reviewer/tester if they do not add value.',
625
708
  '- Prefer 3-5 steps total unless the task is clearly larger.',
@@ -667,6 +750,196 @@ async function readJsonSafe(targetPath) {
667
750
  }
668
751
  }
669
752
 
753
+ function extractManagedPlanSection(content = '', key = 'findings') {
754
+ const markers = PLAN_MEMORY_MARKERS[key];
755
+ if (!markers) return '';
756
+ const [startMarker, endMarker] = markers;
757
+ const start = String(content || '').indexOf(startMarker);
758
+ const end = String(content || '').indexOf(endMarker);
759
+ if (start === -1 || end === -1 || end <= start) return '';
760
+ return String(content || '')
761
+ .slice(start + startMarker.length, end)
762
+ .trim();
763
+ }
764
+
765
+ function replaceManagedPlanSection(content = '', key = 'findings', nextSection = '') {
766
+ const markers = PLAN_MEMORY_MARKERS[key];
767
+ if (!markers) return String(content || '');
768
+ const [startMarker, endMarker] = markers;
769
+ const sectionBody = `${startMarker}\n${String(nextSection || '').trim()}\n${endMarker}`;
770
+ const pattern = new RegExp(`${startMarker}[\\s\\S]*?${endMarker}`);
771
+ if (pattern.test(String(content || ''))) {
772
+ return String(content || '').replace(pattern, sectionBody);
773
+ }
774
+ return `${String(content || '').trimEnd()}\n\n${sectionBody}\n`;
775
+ }
776
+
777
+ function normalizeLedgerItems(items = [], fallback = '- None recorded yet.') {
778
+ const cleaned = [...new Set((Array.isArray(items) ? items : []).map((item) => String(item || '').trim()).filter(Boolean))];
779
+ return cleaned.length > 0 ? cleaned : [fallback];
780
+ }
781
+
782
+ function trimLedger(items = [], maxItems = 10) {
783
+ const cleaned = normalizeLedgerItems(items, '').filter(Boolean);
784
+ return cleaned.slice(Math.max(0, cleaned.length - maxItems));
785
+ }
786
+
787
+ export function extractStepWorkingMemory(output = '', artifactPaths = []) {
788
+ const findings = extractSectionBullets(output, 'Findings')
789
+ .filter((item) => !/^none\b/i.test(item))
790
+ .map((item) => `- ${item}`);
791
+ const actionsTaken = extractSectionBullets(output, 'Actions Taken')
792
+ .filter((item) => !/^none\b/i.test(item))
793
+ .map((item) => `- ${item}`);
794
+ const verified = extractSectionBullets(output, 'Verified')
795
+ .filter((item) => !/^none\b/i.test(item))
796
+ .map((item) => `- ${item}`);
797
+ const notVerified = extractSectionBullets(output, 'Not Verified')
798
+ .filter((item) => !/^none\b/i.test(item))
799
+ .map((item) => `- ${item}`);
800
+ const failures = extractSectionBullets(output, 'Failures')
801
+ .filter((item) => !/^none\b/i.test(item))
802
+ .map((item) => `- ${item}`);
803
+ const openIssues = extractSectionBullets(output, 'Open Issues')
804
+ .filter((item) => !/^none\b/i.test(item))
805
+ .map((item) => `- ${item}`);
806
+ const nextAction = extractSectionBullets(output, 'Next Action')
807
+ .filter((item) => !/^none\b/i.test(item))
808
+ .map((item) => `- ${item}`);
809
+ const artifactLines = [
810
+ ...extractSectionBullets(output, 'Artifacts')
811
+ .filter((item) => !/^none\b/i.test(item))
812
+ .map((item) => `- ${item}`),
813
+ ...(Array.isArray(artifactPaths) ? artifactPaths : []).filter(Boolean).map((item) => `- ${item}`)
814
+ ];
815
+
816
+ return {
817
+ findings: trimLedger(findings, 8),
818
+ actionsTaken: trimLedger(actionsTaken, 8),
819
+ verified: trimLedger(verified, 6),
820
+ notVerified: trimLedger(notVerified, 6),
821
+ failures: trimLedger(failures, 6),
822
+ openIssues: trimLedger(openIssues, 6),
823
+ nextAction: trimLedger(nextAction, 3),
824
+ artifacts: trimLedger(artifactLines, 6)
825
+ };
826
+ }
827
+
828
+ function buildProgressLedgerEntry(stepIndex, stepTitle, role, memory) {
829
+ const status = memory.failures.length > 0 || memory.openIssues.length > 0 || memory.notVerified.length > 0 ? 'attention-needed' : 'completed';
830
+ const highlights = [
831
+ memory.actionsTaken[0],
832
+ memory.verified[0],
833
+ memory.nextAction[0],
834
+ memory.openIssues[0],
835
+ memory.notVerified[0],
836
+ memory.failures[0]
837
+ ]
838
+ .filter(Boolean)
839
+ .map((item) => item.replace(/^- /, ''))
840
+ .slice(0, 2);
841
+ const suffix = highlights.length > 0 ? ` :: ${highlights.join(' | ')}` : '';
842
+ return `- Step ${stepIndex + 1} [${role}] ${stepTitle} -> ${status}${suffix}`;
843
+ }
844
+
845
+ function buildRecentStepResults(content = '', maxEntries = 2) {
846
+ const value = String(content || '');
847
+ const matches = [...value.matchAll(/^## Step \d+ Result: .*$/gm)];
848
+ if (matches.length === 0) return '';
849
+ const starts = matches.map((match) => match.index || 0);
850
+ const chunks = starts.map((start, index) => value.slice(start, starts[index + 1] || value.length).trim());
851
+ return chunks.slice(-maxEntries).join('\n\n---\n\n');
852
+ }
853
+
854
+ export function buildPlanWorkingMemoryContext(content = '', maxChars = 6000) {
855
+ const value = String(content || '').trim();
856
+ if (!value) return '';
857
+
858
+ const findings = extractManagedPlanSection(value, 'findings');
859
+ const progress = extractManagedPlanSection(value, 'progress');
860
+ if (!findings && !progress) {
861
+ if (value.length <= maxChars) return value;
862
+ const headSize = Math.floor(maxChars * 0.3);
863
+ const tailSize = maxChars - headSize - 50;
864
+ return `${value.slice(0, headSize)}\n\n... [plan file truncated, showing most recent step results] ...\n\n${value.slice(-tailSize)}`;
865
+ }
866
+
867
+ const headLimit = Math.max(600, Math.floor(maxChars * 0.35));
868
+ const head = value.slice(0, headLimit).trimEnd();
869
+ const recentResults = buildRecentStepResults(value, 2);
870
+ const sections = [
871
+ head,
872
+ '## Working Memory Snapshot',
873
+ '### Findings Ledger',
874
+ findings || '- None recorded yet.',
875
+ '### Progress Ledger',
876
+ progress || '- No progress recorded yet.'
877
+ ];
878
+ if (recentResults) {
879
+ sections.push('## Recent Step Results');
880
+ sections.push(recentResults);
881
+ }
882
+ const summary = sections.filter(Boolean).join('\n\n').trim();
883
+ return summary.length <= maxChars ? summary : `${summary.slice(0, maxChars - 42).trimEnd()}\n... [working memory truncated]`;
884
+ }
885
+
886
+ async function appendStepResultToPlanFile(planFilePath, stepIndex, stepTitle, role, output, artifactPaths = []) {
887
+ if (!planFilePath) return;
888
+ try {
889
+ const separator = '\n\n---\n\n';
890
+ const timestamp = new Date().toISOString();
891
+ const content = await fs.readFile(planFilePath, 'utf8');
892
+ const memory = extractStepWorkingMemory(output, artifactPaths);
893
+ const findingsBlock = [
894
+ ...extractManagedPlanSection(content, 'findings')
895
+ .split('\n')
896
+ .map((line) => line.trim())
897
+ .filter(Boolean),
898
+ ...memory.findings,
899
+ ...memory.openIssues,
900
+ ...memory.notVerified,
901
+ ...memory.failures
902
+ ];
903
+ const progressBlock = [
904
+ ...extractManagedPlanSection(content, 'progress')
905
+ .split('\n')
906
+ .map((line) => line.trim())
907
+ .filter(Boolean),
908
+ buildProgressLedgerEntry(stepIndex, stepTitle, role, memory)
909
+ ];
910
+ const entry = [
911
+ `## Step ${stepIndex + 1} Result: ${stepTitle}`,
912
+ `Role: ${role}`,
913
+ `Completed: ${timestamp}`,
914
+ '',
915
+ output || '(no output)',
916
+ ''
917
+ ].join('\n');
918
+ const nextContent = [
919
+ replaceManagedPlanSection(content, 'findings', normalizeLedgerItems(trimLedger(findingsBlock, 12)).join('\n')),
920
+ ''
921
+ ].join('\n');
922
+ const nextWithProgress = replaceManagedPlanSection(
923
+ nextContent,
924
+ 'progress',
925
+ normalizeLedgerItems(trimLedger(progressBlock, 12), '- No progress recorded yet.').join('\n')
926
+ );
927
+ await fs.writeFile(planFilePath, `${nextWithProgress.trimEnd()}${separator}${entry}\n`, 'utf8');
928
+ } catch {
929
+ // Non-fatal: plan file handoff is best-effort
930
+ }
931
+ }
932
+
933
+ async function readPlanFileAsContext(planFilePath, maxChars = 6000) {
934
+ if (!planFilePath) return '';
935
+ try {
936
+ const content = await fs.readFile(planFilePath, 'utf8');
937
+ return buildPlanWorkingMemoryContext(content, maxChars);
938
+ } catch {
939
+ return '';
940
+ }
941
+ }
942
+
670
943
  async function buildTesterVerificationPacket(focusPaths = []) {
671
944
  const cwd = process.cwd();
672
945
  const primary = [];
@@ -682,13 +955,13 @@ async function buildTesterVerificationPacket(focusPaths = []) {
682
955
  const pkg = await readJsonSafe(packageJsonPath);
683
956
  const scripts = pkg?.scripts || {};
684
957
  if (typeof scripts.test === 'string' && scripts.test.trim()) {
685
- primary.push(`- npm test :: package.json script = ${trimInlineText(scripts.test, 140)}`);
958
+ primary.push(`- npm test :: package.json script = ${trimInline(scripts.test, 140)}`);
686
959
  }
687
960
  if (typeof scripts.build === 'string' && scripts.build.trim()) {
688
- secondary.push(`- npm run build :: package.json script = ${trimInlineText(scripts.build, 140)}`);
961
+ secondary.push(`- npm run build :: package.json script = ${trimInline(scripts.build, 140)}`);
689
962
  }
690
963
  if (typeof scripts.lint === 'string' && scripts.lint.trim()) {
691
- secondary.push(`- npm run lint :: package.json script = ${trimInlineText(scripts.lint, 140)}`);
964
+ secondary.push(`- npm run lint :: package.json script = ${trimInline(scripts.lint, 140)}`);
692
965
  }
693
966
  fallback.push('- If test/build scripts are not usable, inspect package.json scripts and run the narrowest relevant check.');
694
967
  }
@@ -1004,17 +1277,32 @@ function buildFallbackAutoPlan(goal) {
1004
1277
  title: 'Verify the changed flows',
1005
1278
  role: 'tester',
1006
1279
  task: `Verify the completed work for: ${goal}. Run the most relevant checks available, report concrete evidence, and call out anything still not verified.`
1280
+ },
1281
+ {
1282
+ title: 'Synthesize final implementation status',
1283
+ role: 'summarizer',
1284
+ task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
1007
1285
  }
1008
1286
  ]
1009
1287
  };
1010
1288
  }
1011
1289
 
1290
+ function buildDefaultSummarizerStep(goal, source = []) {
1291
+ const existing = (Array.isArray(source) ? source : []).find((step) => step.role === 'summarizer');
1292
+ if (existing?.title && existing?.task) return existing;
1293
+ return {
1294
+ title: 'Synthesize final implementation status',
1295
+ role: 'summarizer',
1296
+ task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
1297
+ };
1298
+ }
1299
+
1012
1300
  function enforceAutoPlanGuardrailSteps(plan, goal) {
1013
1301
  const source = Array.isArray(plan?.steps) ? plan.steps : [];
1014
1302
  const requirements = deriveGoalRequirements(goal);
1015
1303
  const lightweightGoal = isLightweightAutoPlanGoal(goal, requirements);
1016
1304
  const taskClass = classifyPlanTaskClass(goal);
1017
- const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester');
1305
+ const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester' && step.role !== 'summarizer');
1018
1306
  const primaryImplementationStep =
1019
1307
  implementationSteps.find((step) => step.role === 'coder') ||
1020
1308
  implementationSteps[0] || {
@@ -1032,6 +1320,7 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
1032
1320
  role: 'tester',
1033
1321
  task: `Test and verify the completed work for: ${goal}. Start with the artifacts produced by earlier implementation steps, run the most relevant checks available, report concrete evidence, and call out anything still unverified.`
1034
1322
  };
1323
+ const summarizerStep = buildDefaultSummarizerStep(goal, source);
1035
1324
  const hasReviewer = source.some((step) => step.role === 'reviewer');
1036
1325
  const hasTester = source.some((step) => step.role === 'tester');
1037
1326
 
@@ -1050,13 +1339,16 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
1050
1339
  };
1051
1340
  }
1052
1341
 
1342
+ const executionSteps = [
1343
+ ...implementationSteps.slice(0, 6),
1344
+ ...(hasReviewer ? [reviewerStep] : []),
1345
+ ...(testerStep ? [testerStep] : [])
1346
+ ];
1347
+ const needsSummarizer = executionSteps.length >= 3;
1348
+
1053
1349
  return {
1054
1350
  summary: String(plan?.summary || `Auto plan for: ${goal}`).trim(),
1055
- steps: [
1056
- ...implementationSteps.slice(0, 6),
1057
- ...(hasReviewer ? [reviewerStep] : []),
1058
- ...(testerStep ? [testerStep] : [])
1059
- ]
1351
+ steps: needsSummarizer ? [...executionSteps, summarizerStep] : executionSteps
1060
1352
  };
1061
1353
  }
1062
1354
 
@@ -1081,15 +1373,74 @@ function stepOutputHasFailureSignals(role, text = '') {
1081
1373
  const failureBullet = extractSectionFirstBullet(value, 'Failures');
1082
1374
  const findingsBullet = extractSectionFirstBullet(value, 'Findings');
1083
1375
  const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
1376
+ const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
1377
+ const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
1378
+ const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
1379
+ const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
1084
1380
  const acceptanceFailures = extractAcceptanceStatusItems(value).filter((item) => item.status !== 'met');
1085
1381
  if (errorBullet && !/^none\b/i.test(errorBullet)) return true;
1086
1382
  if (failureBullet && !/^none\b/i.test(failureBullet)) return true;
1087
1383
  if (acceptanceFailures.length > 0) return true;
1088
- if (role === 'reviewer' && findingsBullet && !/^none\b/i.test(findingsBullet)) return true;
1384
+ if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) return true;
1385
+ if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return true;
1386
+ if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) return true;
1387
+ if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) return true;
1089
1388
  if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return true;
1090
1389
  return false;
1091
1390
  }
1092
1391
 
1392
+ function coderOutputLacksImplementationEvidence(actionsTaken = '', artifacts = '') {
1393
+ const noActions = !String(actionsTaken || '').trim() || /^none\b/i.test(String(actionsTaken || '').trim());
1394
+ const noArtifacts = !String(artifacts || '').trim() || /^none\b/i.test(String(artifacts || '').trim());
1395
+ return noActions && noArtifacts;
1396
+ }
1397
+
1398
+ function reviewerFindingNeedsAction(text = '') {
1399
+ const value = String(text || '').trim();
1400
+ if (!value || /^none\b/i.test(value)) return false;
1401
+ const lower = value.toLowerCase();
1402
+ if (
1403
+ /\b(bug|regression|risk|risky|missing|missing test|unsafe|blocker|blocked|incorrect|broken|failure|failing|unverified|mismatch|incomplete|gap|can regress|still regress)\b/i.test(
1404
+ lower
1405
+ )
1406
+ ) {
1407
+ return true;
1408
+ }
1409
+ if (/\b(not covered|not handled|not verified|does not|doesn't|cannot|can't|lacks?)\b/i.test(lower)) {
1410
+ return true;
1411
+ }
1412
+ return false;
1413
+ }
1414
+
1415
+ function buildExitCriteriaFailureReason(role, text = '') {
1416
+ const value = String(text || '').trim();
1417
+ if (!value) return 'no structured step output was produced';
1418
+ const errorBullet = extractSectionFirstBullet(value, 'Error');
1419
+ if (errorBullet && !/^none\b/i.test(errorBullet)) return `error: ${errorBullet}`;
1420
+ const failureBullet = extractSectionFirstBullet(value, 'Failures');
1421
+ if (failureBullet && !/^none\b/i.test(failureBullet)) return `failures: ${failureBullet}`;
1422
+ const findingsBullet = extractSectionFirstBullet(value, 'Findings');
1423
+ const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
1424
+ const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
1425
+ if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) {
1426
+ return 'coder output did not include implementation evidence';
1427
+ }
1428
+ if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return `review findings: ${findingsBullet}`;
1429
+ const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
1430
+ if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return `next action requires rework: ${nextActionBullet}`;
1431
+ const acceptanceFailure = extractAcceptanceStatusItems(value).find((item) => item.status !== 'met');
1432
+ if (acceptanceFailure) return `acceptance ${acceptanceFailure.status}: ${acceptanceFailure.label}`;
1433
+ const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
1434
+ if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) {
1435
+ return `not verified: ${notVerifiedBullet}`;
1436
+ }
1437
+ const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
1438
+ if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) {
1439
+ return `remaining issues: ${remainingIssuesBullet}`;
1440
+ }
1441
+ return 'step output did not satisfy exit criteria';
1442
+ }
1443
+
1093
1444
  function extractSectionFirstBullet(text = '', heading = '') {
1094
1445
  const escaped = String(heading || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1095
1446
  const match = String(text || '').match(new RegExp(String.raw`(^|\n)\s*${escaped}\s*:\s*(?:\n|\r\n?)+\s*-\s*([^\n\r]+)`, 'i'));
@@ -1148,6 +1499,13 @@ function buildAutoPlanSystemSummary(auto) {
1148
1499
  if (auto.failedTitles?.length) {
1149
1500
  lines.push(`Failed steps: ${auto.failedTitles.slice(0, 5).join(', ')}`);
1150
1501
  }
1502
+ // Always include plan steps for TUI rendering
1503
+ if (Array.isArray(auto.steps) && auto.steps.length > 0) {
1504
+ lines.push('Plan Steps:');
1505
+ auto.steps.forEach((s, idx) => {
1506
+ lines.push(` ${idx + 1}. [${s.role}] ${s.title}`);
1507
+ });
1508
+ }
1151
1509
  if (auto.approvalStatus === 'pending') {
1152
1510
  lines.push('Next: review the plan summary, then use /plan approve to start implementation, /plan auto run <goal> to plan and run in one step next time, or /plan stay to keep planning.');
1153
1511
  }
@@ -1183,7 +1541,7 @@ function buildAutoPlanFinalSummaryUserPrompt({ goal, autoPlan, runItems, plannin
1183
1541
  if (item.warning) {
1184
1542
  lines.push(`Warning: ${item.warning}`);
1185
1543
  }
1186
- lines.push(`Output: ${trimInlineText(item.output || '(empty)', 500)}`);
1544
+ lines.push(`Output: ${trimInline(item.output || '(empty)', 500)}`);
1187
1545
  if (Array.isArray(item.artifactPaths) && item.artifactPaths.length > 0) {
1188
1546
  lines.push(`Artifacts: ${item.artifactPaths.slice(0, 5).join(', ')}`);
1189
1547
  }
@@ -1242,7 +1600,7 @@ async function buildAutoPlanFinalSummary({
1242
1600
  timeoutMs: config.gateway.timeout_ms || 90000,
1243
1601
  maxRetries: config.gateway.max_retries ?? 2
1244
1602
  });
1245
- return trimInlineText(result.text || '', 600) || fallbackSummary;
1603
+ return trimInline(result.text || '', 600) || fallbackSummary;
1246
1604
  } catch {
1247
1605
  return fallbackSummary;
1248
1606
  }
@@ -1423,7 +1781,7 @@ async function collectLikelyImplementationFiles(cwd) {
1423
1781
  continue;
1424
1782
  }
1425
1783
  if (!preferredExts.has(path.extname(entry.name).toLowerCase())) continue;
1426
- candidates.push(path.relative(cwd, abs).replace(/\\/g, '/'));
1784
+ candidates.push(normalizePath(path.relative(cwd, abs)));
1427
1785
  if (candidates.length >= 8) return;
1428
1786
  }
1429
1787
  }
@@ -1495,8 +1853,10 @@ function effectiveMaxContextTokens(config) {
1495
1853
  return 32000;
1496
1854
  }
1497
1855
 
1498
- function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode }) {
1499
- const currentContextTokens = estimateMessagesTokens(currentSession?.messages || []);
1856
+ function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode, extraSession }) {
1857
+ const parentTokens = estimateMessagesTokens(currentSession?.messages || []);
1858
+ const subTokens = extraSession ? estimateMessagesTokens(extraSession.messages || []) : 0;
1859
+ const currentContextTokens = parentTokens + subTokens;
1500
1860
  const maxContextTokens = effectiveMaxContextTokens(config);
1501
1861
  const contextUsagePct = maxContextTokens > 0 ? Math.min(100, Math.max(0, (currentContextTokens / maxContextTokens) * 100)) : 0;
1502
1862
  const snapshot = {
@@ -1677,9 +2037,14 @@ async function askModel({
1677
2037
  model,
1678
2038
  systemPrompt,
1679
2039
  onAgentEvent,
2040
+ requestToolApproval,
1680
2041
  persistSession = true,
1681
2042
  executionMode,
1682
- alwaysAllowTools
2043
+ alwaysAllowTools,
2044
+ signal,
2045
+ allowedTools,
2046
+ maxSteps: maxStepsOverride,
2047
+ skipAnalysisNudge = false
1683
2048
  }) {
1684
2049
  const maxContextTokens = effectiveMaxContextTokens(config);
1685
2050
  const triggerPct = Number(config.context?.preflight_trigger_pct || 92);
@@ -1723,7 +2088,7 @@ async function askModel({
1723
2088
  if (done) done();
1724
2089
  savePromise = null;
1725
2090
  }
1726
- }, 400);
2091
+ }, STREAM_SAVE_DEBOUNCE_MS);
1727
2092
  };
1728
2093
  const flushScheduledSave = async () => {
1729
2094
  if (!persistSession) return;
@@ -1739,10 +2104,20 @@ async function askModel({
1739
2104
  }
1740
2105
  if (savePromise) await savePromise;
1741
2106
  };
2107
+ if (persistSession && signal) {
2108
+ const flushOnAbort = () => {
2109
+ void flushScheduledSave().catch(() => {});
2110
+ };
2111
+ if (signal.aborted) {
2112
+ flushOnAbort();
2113
+ } else {
2114
+ signal.addEventListener('abort', flushOnAbort, { once: true });
2115
+ }
2116
+ }
1742
2117
 
1743
- if (persistSession && text) {
2118
+ if (text) {
1744
2119
  session.messages.push(stampedMessage('user', text));
1745
- await saveSession(session);
2120
+ if (persistSession) await saveSession(session);
1746
2121
  }
1747
2122
 
1748
2123
  const projectContextSnippet = await buildProjectContextSnippet(process.cwd(), text).catch(() => '');
@@ -1762,33 +2137,45 @@ async function askModel({
1762
2137
  }
1763
2138
  });
1764
2139
 
2140
+ const filteredDefinitions = Array.isArray(allowedTools)
2141
+ ? definitions.filter((t) => allowedTools.includes(t.function?.name || t.name))
2142
+ : definitions;
2143
+ const filteredHandlers = Array.isArray(allowedTools)
2144
+ ? Object.fromEntries(Object.entries(handlers).filter(([name]) => allowedTools.includes(name)))
2145
+ : handlers;
2146
+ const filteredDeferred = Array.isArray(allowedTools)
2147
+ ? Object.fromEntries(Object.entries(deferredDefinitions).filter(([name]) => allowedTools.includes(name)))
2148
+ : deferredDefinitions;
2149
+
1765
2150
  let activeAssistantIndex = -1;
1766
2151
  const wrappedAgentEvent = (event) => {
1767
- if (!persistSession) {
1768
- if (onAgentEvent) onAgentEvent(event);
1769
- return;
1770
- }
1771
-
2152
+ // Always accumulate messages in session (for token tracking), only save when persisting
1772
2153
  if (event?.type === 'assistant:start') {
1773
2154
  session.messages.push(stampedMessage('assistant', ''));
1774
2155
  activeAssistantIndex = session.messages.length - 1;
1775
- scheduleSessionSave();
2156
+ if (persistSession) scheduleSessionSave();
1776
2157
  } else if (event?.type === 'assistant:delta') {
1777
2158
  if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
1778
2159
  const current = session.messages[activeAssistantIndex];
1779
2160
  current.content = `${current.content || ''}${event.text || ''}`;
1780
2161
  current.at = new Date().toISOString();
1781
- scheduleSessionSave();
2162
+ if (persistSession) scheduleSessionSave();
1782
2163
  }
1783
2164
  } else if (event?.type === 'assistant:response') {
1784
2165
  if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
1785
2166
  const current = session.messages[activeAssistantIndex];
1786
2167
  current.content = event.assistantMessage?.content ?? event.text ?? current.content;
2168
+ if (typeof event.assistantMessage?.reasoning_content === 'string' && event.assistantMessage.reasoning_content) {
2169
+ current.reasoning_content = event.assistantMessage.reasoning_content;
2170
+ }
2171
+ if (Array.isArray(event.assistantMessage?.reasoning_details) && event.assistantMessage.reasoning_details.length > 0) {
2172
+ current.reasoning_details = event.assistantMessage.reasoning_details;
2173
+ }
1787
2174
  if (Array.isArray(event.assistantMessage?.tool_calls) && event.assistantMessage.tool_calls.length > 0) {
1788
2175
  current.tool_calls = event.assistantMessage.tool_calls;
1789
2176
  }
1790
2177
  current.at = new Date().toISOString();
1791
- scheduleSessionSave();
2178
+ if (persistSession) scheduleSessionSave();
1792
2179
  }
1793
2180
  activeAssistantIndex = -1;
1794
2181
  } else if (event?.type === 'tool:result') {
@@ -1797,7 +2184,7 @@ async function askModel({
1797
2184
  tool_call_id: event.id || ''
1798
2185
  })
1799
2186
  );
1800
- scheduleSessionSave();
2187
+ if (persistSession) scheduleSessionSave();
1801
2188
  }
1802
2189
 
1803
2190
  if (onAgentEvent) onAgentEvent(event);
@@ -1808,9 +2195,9 @@ async function askModel({
1808
2195
  systemPrompt: effectiveSystemPrompt,
1809
2196
  userPrompt: loopUserPrompt,
1810
2197
  model: model || config.model.name,
1811
- maxSteps: Number(config.execution?.max_steps || 16),
1812
- toolDefinitions: definitions,
1813
- toolHandlers: handlers,
2198
+ maxSteps: maxStepsOverride ?? Number(config.execution?.max_steps || 16),
2199
+ toolDefinitions: filteredDefinitions,
2200
+ toolHandlers: filteredHandlers,
1814
2201
  initialMessages: toOpenAIMessages(session.messages),
1815
2202
  onEvent: wrappedAgentEvent,
1816
2203
  executionMode: executionMode || config.execution?.mode || 'auto',
@@ -1818,7 +2205,10 @@ async function askModel({
1818
2205
  alwaysAllowTools || config.execution?.always_allow_tools || ['run', 'read', 'write'],
1819
2206
  toolResultMaxChars: config.context?.tool_result_max_chars || 12000,
1820
2207
  toolFormatters: formatters,
1821
- deferredDefinitions,
2208
+ deferredDefinitions: filteredDeferred,
2209
+ requestToolApproval,
2210
+ signal,
2211
+ skipAnalysisNudge,
1822
2212
  requestCompletion: async ({ messages, tools, model: selectedModel }) => {
1823
2213
  let started = false;
1824
2214
  const startAssistantStream = () => {
@@ -1837,6 +2227,7 @@ async function askModel({
1837
2227
  tools,
1838
2228
  timeoutMs: config.gateway.timeout_ms || 90000,
1839
2229
  maxRetries: config.gateway.max_retries ?? 2,
2230
+ signal,
1840
2231
  onTextDelta: (delta) => {
1841
2232
  startAssistantStream();
1842
2233
  if (onAgentEvent) onAgentEvent({ type: 'assistant:delta', text: delta });
@@ -1867,7 +2258,7 @@ async function askModel({
1867
2258
  // keep chat usable even if pruning fails
1868
2259
  }
1869
2260
  }
1870
- return { text: loopResult.text };
2261
+ return { text: loopResult.text, aborted: !!loopResult.aborted };
1871
2262
  }
1872
2263
 
1873
2264
  async function runSubAgentTask({
@@ -1880,7 +2271,10 @@ async function runSubAgentTask({
1880
2271
  model,
1881
2272
  systemPrompt,
1882
2273
  onAgentEvent,
1883
- extraRolePrompt = ''
2274
+ extraRolePrompt = '',
2275
+ signal,
2276
+ onSessionActive,
2277
+ planFileContext = ''
1884
2278
  }) {
1885
2279
  const subSession = { id: `sub-${Date.now()}`, messages: [] };
1886
2280
  const rolePrompt = getSubAgentRolePrompt(role);
@@ -1891,11 +2285,15 @@ async function runSubAgentTask({
1891
2285
  const focusedTaskNote = buildFocusedTaskNote(role, handoffFocusPaths);
1892
2286
  const goalRequirementPacket = buildGoalRequirementPacket(goal, role);
1893
2287
  const verificationPacket = role === 'tester' ? await buildTesterVerificationPacket(handoffFocusPaths) : '';
2288
+ const planFileSection = planFileContext
2289
+ ? `Accumulated plan file context (results from prior steps):\n${planFileContext}`
2290
+ : '';
1894
2291
  const scopedTask = [
1895
2292
  contextPacket,
1896
2293
  goalRequirementPacket,
1897
2294
  evidencePacket,
1898
2295
  handoffPacket,
2296
+ planFileSection,
1899
2297
  verificationPacket,
1900
2298
  focusedTaskNote,
1901
2299
  'Task:',
@@ -1927,6 +2325,8 @@ async function runSubAgentTask({
1927
2325
  }
1928
2326
  if (onAgentEvent) onAgentEvent(evt);
1929
2327
  };
2328
+ const roleAllowedTools = ROLE_TOOL_POLICY[role];
2329
+ if (onSessionActive) onSessionActive(subSession);
1930
2330
  const subResult = await askModel({
1931
2331
  text: scopedTask,
1932
2332
  session: subSession,
@@ -1935,7 +2335,10 @@ async function runSubAgentTask({
1935
2335
  systemPrompt: `${systemPrompt}\n${rolePrompt}${extraRolePrompt ? `\n${extraRolePrompt}` : ''}`,
1936
2336
  onAgentEvent: wrappedOnAgentEvent,
1937
2337
  persistSession: false,
1938
- executionMode: 'auto'
2338
+ executionMode: 'auto',
2339
+ allowedTools: roleAllowedTools,
2340
+ skipAnalysisNudge: true,
2341
+ signal
1939
2342
  });
1940
2343
  const text = subResult.text || '';
1941
2344
  const hasErrorLine = /(^|\n)\s*error\s*:/i.test(text);
@@ -1948,6 +2351,142 @@ async function runSubAgentTask({
1948
2351
  };
1949
2352
  }
1950
2353
 
2354
+ async function executePlanWithSubAgents({
2355
+ planState,
2356
+ parentSession,
2357
+ config,
2358
+ model,
2359
+ systemPrompt,
2360
+ onAgentEvent,
2361
+ signal,
2362
+ onSubSessionActive
2363
+ }) {
2364
+ const steps = Array.isArray(planState.steps) ? planState.steps : [];
2365
+ const goal = planState.goal || '';
2366
+ const planFilePath = planState.filePath || '';
2367
+ let partialDeltaText = '';
2368
+ const emitPlanEvent = (evt) => {
2369
+ if (evt?.type === 'assistant:delta' && evt.text) {
2370
+ partialDeltaText += String(evt.text);
2371
+ }
2372
+ if (onAgentEvent) onAgentEvent(evt);
2373
+ };
2374
+ if (steps.length === 0) {
2375
+ return { text: '(no steps to execute)', aborted: false };
2376
+ }
2377
+
2378
+ const priorSteps = [];
2379
+ const results = [];
2380
+
2381
+ // Emit structured plan steps so TUI can show all steps with real role/title
2382
+ emitPlanEvent({
2383
+ type: 'plan:steps',
2384
+ steps: steps.map((s, idx) => ({ index: idx + 1, role: s.role, title: s.title, status: 'pending' }))
2385
+ });
2386
+
2387
+ for (let i = 0; i < steps.length; i += 1) {
2388
+ const step = steps[i];
2389
+ if (signal?.aborted) break;
2390
+
2391
+ emitPlanEvent({
2392
+ type: 'assistant:delta',
2393
+ text: `\n[plan] Step ${i + 1}/${steps.length} -> ${step.role}: ${step.title}\n`
2394
+ });
2395
+
2396
+ // Read accumulated plan file context from prior step results (skip for step 0)
2397
+ let planFileContext = '';
2398
+ if (i > 0 && planFilePath) {
2399
+ planFileContext = await readPlanFileAsContext(planFilePath);
2400
+ }
2401
+
2402
+ const stepGuidance = buildPipelineStepGuidance({ role: step.role, stepIndex: i, totalSteps: steps.length, isFirst: i === 0, isLast: i === steps.length - 1, priorSteps });
2403
+ const output = await runSubAgentTask({
2404
+ role: step.role,
2405
+ task: step.task,
2406
+ goal,
2407
+ priorSteps,
2408
+ parentSession,
2409
+ config,
2410
+ model,
2411
+ systemPrompt,
2412
+ onAgentEvent: emitPlanEvent,
2413
+ extraRolePrompt: stepGuidance,
2414
+ signal,
2415
+ onSessionActive: onSubSessionActive,
2416
+ planFileContext
2417
+ });
2418
+
2419
+ const stepRecord = {
2420
+ role: step.role,
2421
+ title: step.title,
2422
+ task: step.task,
2423
+ output: output.text || '',
2424
+ blockedCount: output.blockedCount || 0,
2425
+ toolErrorCount: output.toolErrorCount || 0,
2426
+ hasErrorLine: output.hasErrorLine || false,
2427
+ artifactPaths: output.artifactPaths || [],
2428
+ failed:
2429
+ output.hasErrorLine ||
2430
+ stepOutputHasFailureSignals(step.role, output.text || ''),
2431
+ failureReason: ''
2432
+ };
2433
+ if (stepRecord.failed) {
2434
+ stepRecord.failureReason =
2435
+ output.hasErrorLine
2436
+ ? 'tool or model execution error'
2437
+ : buildExitCriteriaFailureReason(step.role, output.text || '');
2438
+ }
2439
+ priorSteps.push(stepRecord);
2440
+ results.push(stepRecord);
2441
+
2442
+ // Write step result to plan file for subsequent steps to read
2443
+ if (planFilePath) {
2444
+ await appendStepResultToPlanFile(
2445
+ planFilePath,
2446
+ i,
2447
+ step.title,
2448
+ step.role,
2449
+ stepRecord.output,
2450
+ stepRecord.artifactPaths
2451
+ );
2452
+ }
2453
+
2454
+ if (stepRecord.failed && i < steps.length - 1) break;
2455
+ }
2456
+
2457
+ const summaryLines = [];
2458
+ for (let i = 0; i < results.length; i += 1) {
2459
+ const r = results[i];
2460
+ const tag = r.failed ? 'FAILED' : 'DONE';
2461
+ summaryLines.push(`[${tag}] ${r.role}: ${r.title}`);
2462
+ summaryLines.push(r.output.slice(0, 400));
2463
+ summaryLines.push('');
2464
+ }
2465
+
2466
+ const failedSteps = results.filter((r) => r.failed);
2467
+ if (failedSteps.length > 0) {
2468
+ summaryLines.push(`${failedSteps.length} step(s) had errors.`);
2469
+ const firstFailed = failedSteps[0];
2470
+ if (firstFailed?.failureReason) {
2471
+ summaryLines.push(`Pipeline stopped after exit criteria failed at [${firstFailed.role}] ${firstFailed.title}: ${firstFailed.failureReason}.`);
2472
+ }
2473
+ }
2474
+ if (signal?.aborted) {
2475
+ const partial = partialDeltaText.trim();
2476
+ if (partial) {
2477
+ const clipped = partial.length > 6000 ? `${partial.slice(0, 6000)}\n... [partial output truncated]` : partial;
2478
+ parentSession.messages.push(stampedMessage('assistant', clipped));
2479
+ await saveSession(parentSession);
2480
+ }
2481
+ }
2482
+
2483
+ return {
2484
+ text: summaryLines.join('\n'),
2485
+ aborted: !!signal?.aborted,
2486
+ results
2487
+ };
2488
+ }
2489
+
1951
2490
  async function buildAutoPlanAndRun({
1952
2491
  goal,
1953
2492
  config,
@@ -1973,7 +2512,7 @@ async function buildAutoPlanAndRun({
1973
2512
  '- If the task is purely to inspect the current project and suggest improvements, a lean 2-step or 3-step plan is preferred.',
1974
2513
  '- Example advisory roles: planner -> inspect project shape, coder -> synthesize findings and prioritized recommendations.',
1975
2514
  '- Example implementation roles: planner -> inspect target area, coder -> implement change, tester -> verify changed behavior.',
1976
- 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.'
2515
+ 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.'
1977
2516
  ].join('\n');
1978
2517
  let autoPlan = {
1979
2518
  summary: `Auto plan for: ${goal}`,
@@ -1998,8 +2537,9 @@ async function buildAutoPlanAndRun({
1998
2537
  role: 'user',
1999
2538
  content: [
2000
2539
  'Create an execution plan and assign best sub-agent role for each step.',
2001
- 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.',
2002
- 'The available roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
2540
+ 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.',
2541
+ 'The available roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
2542
+ 'The summarizer role synthesizes prior step results without re-analyzing. Use it as the final step for plans with 3+ steps.',
2003
2543
  `Task class: ${normalizedTaskClass}`,
2004
2544
  'Before choosing roles, decide whether the request is advisory, implementation, or verification-heavy.',
2005
2545
  requirementPacket,
@@ -2024,16 +2564,6 @@ async function buildAutoPlanAndRun({
2024
2564
  autoPlan = buildFallbackAutoPlan(goal);
2025
2565
  }
2026
2566
 
2027
- for (let i = 0; i < autoPlan.steps.length; i += 1) {
2028
- const step = autoPlan.steps[i];
2029
- if (onAgentEvent) {
2030
- onAgentEvent({
2031
- type: 'assistant:delta',
2032
- text: `\n[plan] Step ${i + 1}/${autoPlan.steps.length} -> ${step.role}: ${step.title}\n`
2033
- });
2034
- }
2035
- }
2036
-
2037
2567
  const finalSummary = planningError
2038
2568
  ? `Plan created with fallback guidance because planning hit an error: ${planningError}`
2039
2569
  : 'Plan created and waiting for approval before implementation.';
@@ -2059,6 +2589,17 @@ async function buildAutoPlanAndRun({
2059
2589
  lines.push('');
2060
2590
  lines.push('## Approval');
2061
2591
  lines.push('Pending user approval before implementation.');
2592
+ lines.push('');
2593
+ lines.push('## Working Memory');
2594
+ lines.push('### Findings Ledger');
2595
+ lines.push(PLAN_MEMORY_MARKERS.findings[0]);
2596
+ lines.push('- None recorded yet.');
2597
+ lines.push(PLAN_MEMORY_MARKERS.findings[1]);
2598
+ lines.push('');
2599
+ lines.push('### Progress Ledger');
2600
+ lines.push(PLAN_MEMORY_MARKERS.progress[0]);
2601
+ lines.push('- Plan created and waiting for execution.');
2602
+ lines.push(PLAN_MEMORY_MARKERS.progress[1]);
2062
2603
 
2063
2604
  const filePath = await writeMarkdownInProjectDir(
2064
2605
  'plans',
@@ -2109,8 +2650,10 @@ export async function createChatRuntime({
2109
2650
  session,
2110
2651
  config: initialConfig,
2111
2652
  model,
2112
- systemPrompt
2653
+ systemPrompt,
2654
+ requestToolApproval
2113
2655
  }) {
2656
+ let activeRequestToolApproval = typeof requestToolApproval === 'function' ? requestToolApproval : null;
2114
2657
  const startupEvents = [];
2115
2658
  const initialIndex = await initializeProjectIndex(process.cwd()).catch(() => null);
2116
2659
  if (initialIndex?.summary) {
@@ -2247,7 +2790,8 @@ export async function createChatRuntime({
2247
2790
  { name: 'memory', description: completionCopy.commands.memory },
2248
2791
  { name: 'history', description: completionCopy.commands.history },
2249
2792
  { name: 'debug', description: completionCopy.commands.debug },
2250
- { name: 'retry', description: completionCopy.commands.retry }
2793
+ { name: 'retry', description: completionCopy.commands.retry },
2794
+ { name: 'stop', description: completionCopy.commands.stop }
2251
2795
  ];
2252
2796
  const out = [];
2253
2797
  for (const cmd of commands.values()) {
@@ -2291,7 +2835,7 @@ export async function createChatRuntime({
2291
2835
  ];
2292
2836
  const specTemplates = ['/spec <topic>'];
2293
2837
  const planTemplates = ['/plan <goal>', '/plan auto <goal>', '/plan auto run <goal>', '/plan approve', '/plan from-spec <spec-path?>'];
2294
- const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>'];
2838
+ const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>', '/agents run summarizer <task>'];
2295
2839
  const debugTemplates = ['/debug keys on', '/debug keys off', '/debug keys status'];
2296
2840
  const compactTemplates = compactOptions.map((opt) => `/compact ${opt}`);
2297
2841
  const slashTemplates = [
@@ -2521,7 +3065,7 @@ export async function createChatRuntime({
2521
3065
  if (tokens.length === 1 || (tokens.length === 2 && !hasTrailingSpace)) {
2522
3066
  const sub = tokens[1] || '';
2523
3067
  if (sub === 'run') {
2524
- return ['planner', 'coder', 'reviewer', 'tester']
3068
+ return ['planner', 'coder', 'reviewer', 'tester', 'summarizer']
2525
3069
  .map((r) => registerSuggestion(`/agents run ${r} `, completionCopy.generic.agentCommand));
2526
3070
  }
2527
3071
  return ['list', 'run']
@@ -2602,6 +3146,22 @@ export async function createChatRuntime({
2602
3146
  await saveSession(currentSession);
2603
3147
  };
2604
3148
 
3149
+ const persistAssistantExchange = async (userText, assistantText, { includeUser = true } = {}) => {
3150
+ if (includeUser && userText) {
3151
+ currentSession.messages.push(stampedMessage('user', userText));
3152
+ }
3153
+ if (assistantText) {
3154
+ currentSession.messages.push(stampedMessage('assistant', assistantText));
3155
+ }
3156
+ await saveSession(currentSession);
3157
+ };
3158
+
3159
+ const persistUserExchange = async (userText) => {
3160
+ if (!userText) return;
3161
+ currentSession.messages.push(stampedMessage('user', userText));
3162
+ await saveSession(currentSession);
3163
+ };
3164
+
2605
3165
  const buildActiveSystemPrompt = async () => {
2606
3166
  const soulPrompt = await buildSystemPromptWithSoul(baseSystemPrompt, config);
2607
3167
  const memorySnapshot = await buildMemorySnapshot({
@@ -2638,7 +3198,14 @@ export async function createChatRuntime({
2638
3198
  return localCommands.has(command);
2639
3199
  };
2640
3200
 
3201
+ // 当前的 AbortController 引用,用于中止正在进行的回答
3202
+ let activeAbortController = null;
3203
+ let activeSubSession = null;
3204
+
2641
3205
  const submit = async (line, onAgentEvent) => {
3206
+ // 每次提交创建新的 AbortController,替代旧的
3207
+ activeAbortController = new AbortController();
3208
+ const { signal } = activeAbortController;
2642
3209
  const activeReplySystemPrompt = await buildActiveSystemPrompt();
2643
3210
  try {
2644
3211
  await appendInputHistory(line);
@@ -2658,7 +3225,7 @@ export async function createChatRuntime({
2658
3225
  if (parsedInput.command === 'help') {
2659
3226
  return {
2660
3227
  type: 'system',
2661
- text: 'Commands: /help /exit /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
3228
+ text: 'Commands: /help /exit /stop /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
2662
3229
  };
2663
3230
  }
2664
3231
  if (parsedInput.command === 'status') {
@@ -2765,6 +3332,9 @@ export async function createChatRuntime({
2765
3332
  const runImmediately = (parsedInput.args[1] || '').trim().toLowerCase() === 'run';
2766
3333
  const goal = parsedInput.args.slice(runImmediately ? 2 : 1).join(' ').trim();
2767
3334
  if (!goal) return { type: 'system', text: 'Usage: /plan auto <goal> | /plan auto run <goal>' };
3335
+ if (runImmediately) {
3336
+ await persistUserExchange(line);
3337
+ }
2768
3338
  const auto = await buildAutoPlanAndRun({
2769
3339
  goal,
2770
3340
  session: currentSession,
@@ -2776,30 +3346,30 @@ export async function createChatRuntime({
2776
3346
  taskClass: classifyPlanTaskClass(goal)
2777
3347
  });
2778
3348
  if (runImmediately) {
2779
- const result = await askModel({
2780
- text: buildApprovedPlanExecutionPrompt(
2781
- {
2782
- status: 'approved',
2783
- source: 'auto',
2784
- goal,
2785
- filePath: auto.filePath,
2786
- summary: auto.summary || '',
2787
- finalSummary: auto.finalSummary || auto.summary || '',
2788
- steps: Array.isArray(auto.steps) ? auto.steps : []
2789
- },
2790
- '/plan auto run'
2791
- ),
2792
- session: currentSession,
3349
+ const planState = {
3350
+ status: 'approved',
3351
+ source: 'auto',
3352
+ goal,
3353
+ filePath: auto.filePath,
3354
+ summary: auto.summary || '',
3355
+ finalSummary: auto.finalSummary || auto.summary || '',
3356
+ steps: Array.isArray(auto.steps) ? auto.steps : []
3357
+ };
3358
+ const result = await executePlanWithSubAgents({
3359
+ planState,
3360
+ parentSession: currentSession,
2793
3361
  config,
2794
3362
  model,
2795
- systemPrompt: activeReplySystemPrompt,
3363
+ systemPrompt: baseSystemPrompt,
2796
3364
  onAgentEvent,
2797
- executionMode: 'auto'
3365
+ signal,
3366
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
2798
3367
  });
3368
+ activeSubSession = null;
2799
3369
  currentSession.planState = null;
2800
3370
  executionMode = 'auto';
2801
- await saveSession(currentSession);
2802
- return { type: 'assistant', text: result.text };
3371
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3372
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
2803
3373
  }
2804
3374
  currentSession.planState = {
2805
3375
  status: 'pending_approval',
@@ -2822,20 +3392,23 @@ export async function createChatRuntime({
2822
3392
  if (!hasPendingPlanApproval(currentSession)) {
2823
3393
  return { type: 'system', text: 'No pending plan approval. Use /plan auto <goal> or /plan <goal> first.' };
2824
3394
  }
3395
+ await persistUserExchange(line);
2825
3396
  const planState = { ...currentSession.planState };
2826
- const result = await askModel({
2827
- text: buildApprovedPlanExecutionPrompt(planState, '/plan approve'),
2828
- session: currentSession,
3397
+ const result = await executePlanWithSubAgents({
3398
+ planState,
3399
+ parentSession: currentSession,
2829
3400
  config,
2830
3401
  model,
2831
- systemPrompt: activeReplySystemPrompt,
3402
+ systemPrompt: baseSystemPrompt,
2832
3403
  onAgentEvent,
2833
- executionMode: 'auto'
3404
+ signal,
3405
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
2834
3406
  });
3407
+ activeSubSession = null;
2835
3408
  currentSession.planState = null;
2836
3409
  executionMode = 'auto';
2837
- await saveSession(currentSession);
2838
- return { type: 'assistant', text: result.text };
3410
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3411
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
2839
3412
  }
2840
3413
  if (sub === 'stay') {
2841
3414
  if (!hasPendingPlanApproval(currentSession)) {
@@ -2898,7 +3471,7 @@ export async function createChatRuntime({
2898
3471
  if (sub === 'list') {
2899
3472
  return {
2900
3473
  type: 'system',
2901
- text: 'Sub-agent roles: planner, coder, reviewer, tester\nUse: /agents run <role> <task>'
3474
+ text: 'Sub-agent roles: planner, coder, reviewer, tester, summarizer\nUse: /agents run <role> <task>'
2902
3475
  };
2903
3476
  }
2904
3477
  if (sub === 'run') {
@@ -2906,7 +3479,7 @@ export async function createChatRuntime({
2906
3479
  const task = parsedInput.args.slice(2).join(' ').trim();
2907
3480
  if (!role || !task) return { type: 'system', text: 'Usage: /agents run <role> <task>' };
2908
3481
  if (!SUB_AGENT_ROLES.includes(role)) {
2909
- return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester' };
3482
+ return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester|summarizer' };
2910
3483
  }
2911
3484
  const output = await runSubAgentTask({
2912
3485
  role,
@@ -3036,9 +3609,11 @@ export async function createChatRuntime({
3036
3609
  model,
3037
3610
  systemPrompt: activeReplySystemPrompt,
3038
3611
  onAgentEvent,
3039
- executionMode
3612
+ requestToolApproval: activeRequestToolApproval,
3613
+ executionMode,
3614
+ signal
3040
3615
  });
3041
- return { type: 'assistant', text: result.text };
3616
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3042
3617
  }
3043
3618
  if (parsedInput.command === 'config') {
3044
3619
  const sub = parsedInput.args[0];
@@ -3172,7 +3747,9 @@ export async function createChatRuntime({
3172
3747
  model,
3173
3748
  systemPrompt: activeReplySystemPrompt,
3174
3749
  onAgentEvent,
3175
- executionMode
3750
+ requestToolApproval: activeRequestToolApproval,
3751
+ executionMode,
3752
+ signal
3176
3753
  });
3177
3754
  } catch (error) {
3178
3755
  if (custom.metadata.type === 'skill' && onAgentEvent) {
@@ -3192,20 +3769,23 @@ export async function createChatRuntime({
3192
3769
 
3193
3770
  if (hasPendingPlanApproval(currentSession)) {
3194
3771
  if (isApprovalText(parsedInput.text)) {
3772
+ await persistUserExchange(line);
3195
3773
  const planState = { ...currentSession.planState };
3196
- const result = await askModel({
3197
- text: buildApprovedPlanExecutionPrompt(planState, parsedInput.text),
3198
- session: currentSession,
3774
+ const result = await executePlanWithSubAgents({
3775
+ planState,
3776
+ parentSession: currentSession,
3199
3777
  config,
3200
3778
  model,
3201
- systemPrompt: activeReplySystemPrompt,
3779
+ systemPrompt: baseSystemPrompt,
3202
3780
  onAgentEvent,
3203
- executionMode: 'auto'
3781
+ signal,
3782
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
3204
3783
  });
3784
+ activeSubSession = null;
3205
3785
  currentSession.planState = null;
3206
3786
  executionMode = 'auto';
3207
- await saveSession(currentSession);
3208
- return { type: 'assistant', text: result.text };
3787
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3788
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3209
3789
  }
3210
3790
  if (isStayInPlanText(parsedInput.text)) {
3211
3791
  const text = buildPendingPlanApprovalMessage(currentSession.planState);
@@ -3291,9 +3871,11 @@ export async function createChatRuntime({
3291
3871
  model,
3292
3872
  systemPrompt: routedSystemPrompt,
3293
3873
  onAgentEvent,
3294
- executionMode
3874
+ requestToolApproval: activeRequestToolApproval,
3875
+ executionMode,
3876
+ signal
3295
3877
  });
3296
- return { type: 'assistant', text: result.text };
3878
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3297
3879
  };
3298
3880
 
3299
3881
  return {
@@ -3301,15 +3883,27 @@ export async function createChatRuntime({
3301
3883
  getCompletionOptions,
3302
3884
  isImmediateLocalInput,
3303
3885
  submit,
3886
+ abort: () => {
3887
+ if (activeAbortController && !activeAbortController.signal.aborted) {
3888
+ activeAbortController.abort();
3889
+ return true;
3890
+ }
3891
+ return false;
3892
+ },
3304
3893
  consumeStartupEvents: () => startupEvents.splice(0, startupEvents.length),
3305
3894
  getInputHistory: () => loadInputHistory(),
3306
3895
  getCurrentSessionId: () => currentSession.id,
3896
+ setRequestToolApproval: (handler) => {
3897
+ activeRequestToolApproval = typeof handler === 'function' ? handler : null;
3898
+ return true;
3899
+ },
3307
3900
  getRuntimeState: () =>
3308
3901
  buildRuntimeStateSnapshot({
3309
3902
  currentSession,
3310
3903
  config,
3311
3904
  model,
3312
- executionMode
3905
+ executionMode,
3906
+ extraSession: activeSubSession
3313
3907
  })
3314
3908
  };
3315
3909
  }