codemini-cli 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { parseInput } from './input-parser.js';
2
2
  import { loadCommandsAndSkills, renderCommandPrompt } from './command-loader.js';
3
3
  import { runAgentLoop, setResultDir, clearResultStore } from './agent-loop.js';
4
+ import { trimInline, normalizePath } from './string-utils.js';
4
5
  import fs from 'node:fs/promises';
5
6
  import path from 'node:path';
6
7
  import {
@@ -27,6 +28,8 @@ import { buildMemorySnapshot } from './memory-prompt.js';
27
28
  import { forgetMemory, listMemories, searchMemories } from './memory-store.js';
28
29
  import { countActiveTodos, normalizeTodos } from './todo-state.js';
29
30
 
31
+ const STREAM_SAVE_DEBOUNCE_MS = 120;
32
+
30
33
  function toOpenAIMessages(sessionMessages) {
31
34
  const mapped = [];
32
35
  for (const msg of sessionMessages) {
@@ -147,7 +150,8 @@ function getCompletionCopy(language = 'zh') {
147
150
  memory: '查看/搜索/删除持久记忆',
148
151
  history: '查看/恢复会话',
149
152
  debug: '运行时调试开关',
150
- retry: '重试上一条用户请求'
153
+ retry: '重试上一条用户请求',
154
+ stop: '中止当前回答'
151
155
  },
152
156
  generic: {
153
157
  configCommand: '配置命令',
@@ -162,6 +166,7 @@ function getCompletionCopy(language = 'zh') {
162
166
  keyboardDebugCommand: '键盘调试命令',
163
167
  compactCommand: '上下文压缩命令',
164
168
  retryCommand: '重试上一条用户请求',
169
+ stopCommand: '中止当前回答',
165
170
  statusCommand: '查看运行状态',
166
171
  resumeSession: '恢复一个已保存的会话'
167
172
  }
@@ -234,7 +239,8 @@ function getCompletionCopy(language = 'zh') {
234
239
  memory: 'list/search/delete persistent memories',
235
240
  history: 'list/resume sessions',
236
241
  debug: 'runtime debug switches',
237
- retry: 'retry the last user request'
242
+ retry: 'retry the last user request',
243
+ stop: 'stop the current response'
238
244
  },
239
245
  generic: {
240
246
  configCommand: 'config command',
@@ -249,6 +255,7 @@ function getCompletionCopy(language = 'zh') {
249
255
  keyboardDebugCommand: 'keyboard debug command',
250
256
  compactCommand: 'context compaction command',
251
257
  retryCommand: 'retry the last user request',
258
+ stopCommand: 'stop the current response',
252
259
  statusCommand: 'show runtime status',
253
260
  resumeSession: 'resume a saved session'
254
261
  }
@@ -263,62 +270,135 @@ function describeConfigKey(key, mode = 'set', language = 'zh') {
263
270
  return mode === 'get' ? copy.describeGet(label, hint) : copy.describeSet(label, hint);
264
271
  }
265
272
 
266
- const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester'];
273
+ const SUB_AGENT_ROLES = ['planner', 'coder', 'reviewer', 'tester', 'summarizer'];
274
+ const ROLE_TOOL_POLICY = {
275
+ planner: ['read', 'grep', 'list', 'query_project_index', 'tool_search', 'glob', 'ast_query', 'read_ast_node'],
276
+ coder: ['read', 'grep', 'list', 'edit', 'write', 'run', 'ast_query', 'read_ast_node', 'glob', 'tool_search', 'update_todos'],
277
+ reviewer: ['read', 'grep', 'list', 'glob', 'tool_search', 'ast_query', 'read_ast_node'],
278
+ tester: ['read', 'grep', 'list', 'run', 'glob', 'tool_search'],
279
+ summarizer: ['read', 'grep', 'list', 'glob', 'tool_search']
280
+ };
267
281
  const SUB_AGENT_CONTEXT_MAX_MESSAGES = 4;
268
282
  const SUB_AGENT_CONTEXT_MAX_CHARS = 1200;
269
283
  const SUB_AGENT_EVIDENCE_MAX_ITEMS = 3;
270
284
  const SUB_AGENT_HANDOFF_MAX_ITEMS = 6;
285
+ const PLAN_MEMORY_MARKERS = {
286
+ findings: ['<!-- plan-findings-start -->', '<!-- plan-findings-end -->'],
287
+ progress: ['<!-- plan-progress-start -->', '<!-- plan-progress-end -->']
288
+ };
271
289
  export function getSubAgentRolePrompt(role) {
272
290
  if (role === 'planner') {
273
- return 'You are a planning sub-agent. Produce a concrete implementation plan with risks and verification.';
291
+ return [
292
+ 'You are the planner in a multi-step agent pipeline.',
293
+ 'Your job: inspect the codebase and produce a concrete, actionable plan.',
294
+ 'Do not write implementation code.',
295
+ 'Output format — keep it short and direct:',
296
+ 'Findings:',
297
+ '- <important constraint, dependency, risk, or "none">',
298
+ 'Actions Taken:',
299
+ '- <what you inspected>',
300
+ 'Open Issues:',
301
+ '- <blocking uncertainty or "none">',
302
+ 'Next Action:',
303
+ '- <the concrete next step for the following role>',
304
+ 'Do not summarize your own work or add closing remarks — just deliver the structured handoff and stop.',
305
+ 'IMPORTANT: Stop as soon as you have enough context to produce the plan. Do NOT keep exploring once the plan is clear — deliver it immediately.'
306
+ ].join('\n');
274
307
  }
275
308
  if (role === 'reviewer') {
276
309
  return [
277
- 'You are a review sub-agent. Focus on bugs, regressions, edge cases, and missing tests.',
278
- 'Start with the focused files or directories handed to you. Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
279
- 'Use this exact output structure:',
280
- 'Acceptance Status:',
281
- '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
310
+ 'You are the reviewer in a multi-step agent pipeline.',
311
+ 'Focus on bugs, regressions, edge cases, and missing tests in the files handed to you.',
312
+ 'Do not roam unrelated parts of the repo unless the handed-off evidence is insufficient.',
313
+ 'Output format — keep it short and direct:',
282
314
  'Findings:',
283
315
  '- <bug, regression, risk, or "none">',
284
316
  'Verified:',
285
317
  '- <what you checked>',
286
318
  'Not Verified:',
287
319
  '- <what remains uncertain>',
288
- 'Next Action:',
289
- '- <single best next step>'
320
+ 'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
290
321
  ].join('\n');
291
322
  }
292
323
  if (role === 'tester') {
293
324
  return [
294
- 'You are a testing sub-agent. Focus on verification strategy, real test execution evidence, missing coverage, and whether the work was actually validated.',
295
- 'Prefer running concrete verification commands over only suggesting them.',
296
- 'Start with the focused files or directories handed to you. Verify those artifacts first before scanning the wider repo.',
297
- 'Use this exact output structure:',
298
- 'Acceptance Status:',
299
- '- <met|unmet|unverified> :: <acceptance checklist item or "none">',
325
+ 'You are the tester in a multi-step agent pipeline.',
326
+ 'Run concrete verification commands. Prefer real execution over suggestions.',
327
+ 'Verify the handed-off files first before scanning wider.',
328
+ 'Output format keep it short and direct:',
300
329
  'Verified:',
301
330
  '- <commands run and evidence>',
302
331
  'Not Verified:',
303
332
  '- <what could not be validated>',
304
333
  'Failures:',
305
334
  '- <failed command or "none">',
306
- 'Next Action:',
307
- '- <single best next step>'
335
+ 'Do not add a closing summary or "Next Action" — the pipeline handles what comes next.'
336
+ ].join('\n');
337
+ }
338
+ if (role === 'summarizer') {
339
+ return [
340
+ 'You are the summarizer in a multi-step agent pipeline.',
341
+ 'Your job is to synthesize the results of all prior steps into a concise, actionable final summary.',
342
+ 'Do NOT re-analyze the codebase or make new tool calls unless the handed-off evidence is clearly insufficient.',
343
+ 'Instead, read the accumulated step results in the plan file context provided to you.',
344
+ 'Output format — keep it short and direct:',
345
+ 'Summary:',
346
+ '- <overall result in 2-4 sentences>',
347
+ 'Key Findings:',
348
+ '- <most important findings from all steps>',
349
+ 'Actions Taken:',
350
+ '- <what was implemented/changed/verified>',
351
+ 'Remaining Issues:',
352
+ '- <unresolved items or "none">',
353
+ 'Recommended Next Steps:',
354
+ '- <concrete follow-up actions if any>',
355
+ 'Do not add greetings, filler, or restate the goal. Deliver the summary and stop.'
308
356
  ].join('\n');
309
357
  }
310
358
  return [
311
- 'You are an execution sub-agent. Produce practical implementation guidance with code-level detail.',
312
- 'Stop when: you have produced the code change and verified it compiles/passes basic checks.',
313
- 'If blocked: report what blocked you and what you tried, then stop.'
359
+ 'You are the coder in a multi-step agent pipeline.',
360
+ 'Produce practical code changes with minimal explanation.',
361
+ 'Output format keep it short and direct:',
362
+ 'Actions Taken:',
363
+ '- <file changes, commands, or "none">',
364
+ 'Findings:',
365
+ '- <important implementation note, regression risk, or "none">',
366
+ 'Verified:',
367
+ '- <test/check evidence or "none">',
368
+ 'Open Issues:',
369
+ '- <remaining gap or "none">',
370
+ 'Artifacts:',
371
+ '- <changed file path or "none">',
372
+ 'Next Action:',
373
+ '- <the best next step for the following role or "none">',
374
+ 'Do not summarize the goal, recap the plan, or add closing remarks.'
314
375
  ].join('\n');
315
376
  }
316
377
 
317
- function trimInlineText(value, maxLen = 220) {
318
- const text = String(value || '').replace(/\s+/g, ' ').trim();
319
- if (!text) return '';
320
- if (text.length <= maxLen) return text;
321
- return `${text.slice(0, maxLen - 3)}...`;
378
+ function buildPipelineStepGuidance({ role, stepIndex, totalSteps, isFirst, isLast, priorSteps }) {
379
+ const lines = [];
380
+ lines.push(`Pipeline position: step ${stepIndex + 1} of ${totalSteps}.`);
381
+ if (isFirst) {
382
+ lines.push('You are the first step. Your output sets direction for the rest of the pipeline.');
383
+ } else if (isLast) {
384
+ lines.push('You are the final step. After you, the pipeline will present a combined result to the user.');
385
+ } else {
386
+ lines.push('You are in the middle of the pipeline. Your output feeds into the next step.');
387
+ }
388
+ if (priorSteps.length > 0) {
389
+ const prev = priorSteps[priorSteps.length - 1];
390
+ lines.push(`Previous step was [${prev.role}]: ${prev.title}. Use its output as your starting point.`);
391
+ }
392
+ lines.push('Style rules:');
393
+ lines.push('- Be direct and action-oriented. No greetings, no summaries, no "In conclusion" or "To summarize".');
394
+ lines.push('- Treat the Findings Ledger and Progress Ledger in the plan file context as the shared working memory for this pipeline.');
395
+ lines.push('- If you discover something new, record it under the requested headings instead of burying it in prose.');
396
+ lines.push('- Continue the established direction unless you have concrete contradictory evidence.');
397
+ lines.push('- Output only what the next step needs to know. Skip obvious observations.');
398
+ if (isLast) {
399
+ lines.push('- Since you are the final step, give a concise overall verdict the user can act on.');
400
+ }
401
+ return lines.join('\n');
322
402
  }
323
403
 
324
404
  function buildSubAgentContextPacket(session) {
@@ -332,7 +412,7 @@ function buildSubAgentContextPacket(session) {
332
412
  let usedChars = 0;
333
413
  for (const msg of recent) {
334
414
  const role = msg.role === 'assistant' ? 'assistant' : 'user';
335
- const text = trimInlineText(msg.content, 260);
415
+ const text = trimInline(msg.content, 260);
336
416
  if (!text) continue;
337
417
  const line = `- ${role}: ${text}`;
338
418
  if (usedChars + line.length > SUB_AGENT_CONTEXT_MAX_CHARS) break;
@@ -348,8 +428,8 @@ function buildSubAgentContextPacket(session) {
348
428
  }
349
429
 
350
430
  function maybePushEvidence(out, seen, filePath, summary) {
351
- const pathText = trimInlineText(filePath, 160);
352
- const summaryText = trimInlineText(summary, 200);
431
+ const pathText = trimInline(filePath, 160);
432
+ const summaryText = trimInline(summary, 200);
353
433
  if (!pathText || seen.has(pathText)) return;
354
434
  seen.add(pathText);
355
435
  out.push(`- ${pathText}${summaryText ? ` :: ${summaryText}` : ''}`);
@@ -415,7 +495,7 @@ function extractLikelyPathsFromText(rawText, out, seen) {
415
495
  }
416
496
 
417
497
  function summarizeStepOutput(step) {
418
- const text = trimInlineText(step?.output || step?.task || '', 220);
498
+ const text = trimInline(step?.output || step?.task || '', 800);
419
499
  return text || 'No concise output captured.';
420
500
  }
421
501
 
@@ -588,7 +668,7 @@ function classifyPlanTaskClass(goal = '') {
588
668
  }
589
669
 
590
670
  function buildGoalRequirementPacket(goal, role) {
591
- const rawGoal = trimInlineText(goal, 800);
671
+ const rawGoal = trimInline(goal, 800);
592
672
  if (!rawGoal) return '';
593
673
  const requirements = deriveGoalRequirements(goal);
594
674
  const lines = ['Original goal:', rawGoal];
@@ -621,7 +701,8 @@ function buildAutoPlanPlannerGuidance() {
621
701
  '- Prefer the smallest local approach that satisfies the goal.',
622
702
  '- Do not output multiple alternative branches in the final plan.',
623
703
  '- Do not assume implementation should begin before the plan is coherent.',
624
- '- Available sub-agent roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
704
+ '- Available sub-agent roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
705
+ '- The summarizer role reads accumulated step results from the plan file and synthesizes a final summary. It does NOT re-analyze the codebase. Prefer summarizer as the final step for multi-step plans.',
625
706
  '- For implementation-heavy or risky changes, prefer adding review and/or verification steps.',
626
707
  '- For analysis, recommendation, or planning-only goals, you may omit reviewer/tester if they do not add value.',
627
708
  '- Prefer 3-5 steps total unless the task is clearly larger.',
@@ -669,6 +750,196 @@ async function readJsonSafe(targetPath) {
669
750
  }
670
751
  }
671
752
 
753
+ function extractManagedPlanSection(content = '', key = 'findings') {
754
+ const markers = PLAN_MEMORY_MARKERS[key];
755
+ if (!markers) return '';
756
+ const [startMarker, endMarker] = markers;
757
+ const start = String(content || '').indexOf(startMarker);
758
+ const end = String(content || '').indexOf(endMarker);
759
+ if (start === -1 || end === -1 || end <= start) return '';
760
+ return String(content || '')
761
+ .slice(start + startMarker.length, end)
762
+ .trim();
763
+ }
764
+
765
+ function replaceManagedPlanSection(content = '', key = 'findings', nextSection = '') {
766
+ const markers = PLAN_MEMORY_MARKERS[key];
767
+ if (!markers) return String(content || '');
768
+ const [startMarker, endMarker] = markers;
769
+ const sectionBody = `${startMarker}\n${String(nextSection || '').trim()}\n${endMarker}`;
770
+ const pattern = new RegExp(`${startMarker}[\\s\\S]*?${endMarker}`);
771
+ if (pattern.test(String(content || ''))) {
772
+ return String(content || '').replace(pattern, sectionBody);
773
+ }
774
+ return `${String(content || '').trimEnd()}\n\n${sectionBody}\n`;
775
+ }
776
+
777
+ function normalizeLedgerItems(items = [], fallback = '- None recorded yet.') {
778
+ const cleaned = [...new Set((Array.isArray(items) ? items : []).map((item) => String(item || '').trim()).filter(Boolean))];
779
+ return cleaned.length > 0 ? cleaned : [fallback];
780
+ }
781
+
782
+ function trimLedger(items = [], maxItems = 10) {
783
+ const cleaned = normalizeLedgerItems(items, '').filter(Boolean);
784
+ return cleaned.slice(Math.max(0, cleaned.length - maxItems));
785
+ }
786
+
787
+ export function extractStepWorkingMemory(output = '', artifactPaths = []) {
788
+ const findings = extractSectionBullets(output, 'Findings')
789
+ .filter((item) => !/^none\b/i.test(item))
790
+ .map((item) => `- ${item}`);
791
+ const actionsTaken = extractSectionBullets(output, 'Actions Taken')
792
+ .filter((item) => !/^none\b/i.test(item))
793
+ .map((item) => `- ${item}`);
794
+ const verified = extractSectionBullets(output, 'Verified')
795
+ .filter((item) => !/^none\b/i.test(item))
796
+ .map((item) => `- ${item}`);
797
+ const notVerified = extractSectionBullets(output, 'Not Verified')
798
+ .filter((item) => !/^none\b/i.test(item))
799
+ .map((item) => `- ${item}`);
800
+ const failures = extractSectionBullets(output, 'Failures')
801
+ .filter((item) => !/^none\b/i.test(item))
802
+ .map((item) => `- ${item}`);
803
+ const openIssues = extractSectionBullets(output, 'Open Issues')
804
+ .filter((item) => !/^none\b/i.test(item))
805
+ .map((item) => `- ${item}`);
806
+ const nextAction = extractSectionBullets(output, 'Next Action')
807
+ .filter((item) => !/^none\b/i.test(item))
808
+ .map((item) => `- ${item}`);
809
+ const artifactLines = [
810
+ ...extractSectionBullets(output, 'Artifacts')
811
+ .filter((item) => !/^none\b/i.test(item))
812
+ .map((item) => `- ${item}`),
813
+ ...(Array.isArray(artifactPaths) ? artifactPaths : []).filter(Boolean).map((item) => `- ${item}`)
814
+ ];
815
+
816
+ return {
817
+ findings: trimLedger(findings, 8),
818
+ actionsTaken: trimLedger(actionsTaken, 8),
819
+ verified: trimLedger(verified, 6),
820
+ notVerified: trimLedger(notVerified, 6),
821
+ failures: trimLedger(failures, 6),
822
+ openIssues: trimLedger(openIssues, 6),
823
+ nextAction: trimLedger(nextAction, 3),
824
+ artifacts: trimLedger(artifactLines, 6)
825
+ };
826
+ }
827
+
828
+ function buildProgressLedgerEntry(stepIndex, stepTitle, role, memory) {
829
+ const status = memory.failures.length > 0 || memory.openIssues.length > 0 || memory.notVerified.length > 0 ? 'attention-needed' : 'completed';
830
+ const highlights = [
831
+ memory.actionsTaken[0],
832
+ memory.verified[0],
833
+ memory.nextAction[0],
834
+ memory.openIssues[0],
835
+ memory.notVerified[0],
836
+ memory.failures[0]
837
+ ]
838
+ .filter(Boolean)
839
+ .map((item) => item.replace(/^- /, ''))
840
+ .slice(0, 2);
841
+ const suffix = highlights.length > 0 ? ` :: ${highlights.join(' | ')}` : '';
842
+ return `- Step ${stepIndex + 1} [${role}] ${stepTitle} -> ${status}${suffix}`;
843
+ }
844
+
845
+ function buildRecentStepResults(content = '', maxEntries = 2) {
846
+ const value = String(content || '');
847
+ const matches = [...value.matchAll(/^## Step \d+ Result: .*$/gm)];
848
+ if (matches.length === 0) return '';
849
+ const starts = matches.map((match) => match.index || 0);
850
+ const chunks = starts.map((start, index) => value.slice(start, starts[index + 1] || value.length).trim());
851
+ return chunks.slice(-maxEntries).join('\n\n---\n\n');
852
+ }
853
+
854
+ export function buildPlanWorkingMemoryContext(content = '', maxChars = 6000) {
855
+ const value = String(content || '').trim();
856
+ if (!value) return '';
857
+
858
+ const findings = extractManagedPlanSection(value, 'findings');
859
+ const progress = extractManagedPlanSection(value, 'progress');
860
+ if (!findings && !progress) {
861
+ if (value.length <= maxChars) return value;
862
+ const headSize = Math.floor(maxChars * 0.3);
863
+ const tailSize = maxChars - headSize - 50;
864
+ return `${value.slice(0, headSize)}\n\n... [plan file truncated, showing most recent step results] ...\n\n${value.slice(-tailSize)}`;
865
+ }
866
+
867
+ const headLimit = Math.max(600, Math.floor(maxChars * 0.35));
868
+ const head = value.slice(0, headLimit).trimEnd();
869
+ const recentResults = buildRecentStepResults(value, 2);
870
+ const sections = [
871
+ head,
872
+ '## Working Memory Snapshot',
873
+ '### Findings Ledger',
874
+ findings || '- None recorded yet.',
875
+ '### Progress Ledger',
876
+ progress || '- No progress recorded yet.'
877
+ ];
878
+ if (recentResults) {
879
+ sections.push('## Recent Step Results');
880
+ sections.push(recentResults);
881
+ }
882
+ const summary = sections.filter(Boolean).join('\n\n').trim();
883
+ return summary.length <= maxChars ? summary : `${summary.slice(0, maxChars - 42).trimEnd()}\n... [working memory truncated]`;
884
+ }
885
+
886
+ async function appendStepResultToPlanFile(planFilePath, stepIndex, stepTitle, role, output, artifactPaths = []) {
887
+ if (!planFilePath) return;
888
+ try {
889
+ const separator = '\n\n---\n\n';
890
+ const timestamp = new Date().toISOString();
891
+ const content = await fs.readFile(planFilePath, 'utf8');
892
+ const memory = extractStepWorkingMemory(output, artifactPaths);
893
+ const findingsBlock = [
894
+ ...extractManagedPlanSection(content, 'findings')
895
+ .split('\n')
896
+ .map((line) => line.trim())
897
+ .filter(Boolean),
898
+ ...memory.findings,
899
+ ...memory.openIssues,
900
+ ...memory.notVerified,
901
+ ...memory.failures
902
+ ];
903
+ const progressBlock = [
904
+ ...extractManagedPlanSection(content, 'progress')
905
+ .split('\n')
906
+ .map((line) => line.trim())
907
+ .filter(Boolean),
908
+ buildProgressLedgerEntry(stepIndex, stepTitle, role, memory)
909
+ ];
910
+ const entry = [
911
+ `## Step ${stepIndex + 1} Result: ${stepTitle}`,
912
+ `Role: ${role}`,
913
+ `Completed: ${timestamp}`,
914
+ '',
915
+ output || '(no output)',
916
+ ''
917
+ ].join('\n');
918
+ const nextContent = [
919
+ replaceManagedPlanSection(content, 'findings', normalizeLedgerItems(trimLedger(findingsBlock, 12)).join('\n')),
920
+ ''
921
+ ].join('\n');
922
+ const nextWithProgress = replaceManagedPlanSection(
923
+ nextContent,
924
+ 'progress',
925
+ normalizeLedgerItems(trimLedger(progressBlock, 12), '- No progress recorded yet.').join('\n')
926
+ );
927
+ await fs.writeFile(planFilePath, `${nextWithProgress.trimEnd()}${separator}${entry}\n`, 'utf8');
928
+ } catch {
929
+ // Non-fatal: plan file handoff is best-effort
930
+ }
931
+ }
932
+
933
+ async function readPlanFileAsContext(planFilePath, maxChars = 6000) {
934
+ if (!planFilePath) return '';
935
+ try {
936
+ const content = await fs.readFile(planFilePath, 'utf8');
937
+ return buildPlanWorkingMemoryContext(content, maxChars);
938
+ } catch {
939
+ return '';
940
+ }
941
+ }
942
+
672
943
  async function buildTesterVerificationPacket(focusPaths = []) {
673
944
  const cwd = process.cwd();
674
945
  const primary = [];
@@ -684,13 +955,13 @@ async function buildTesterVerificationPacket(focusPaths = []) {
684
955
  const pkg = await readJsonSafe(packageJsonPath);
685
956
  const scripts = pkg?.scripts || {};
686
957
  if (typeof scripts.test === 'string' && scripts.test.trim()) {
687
- primary.push(`- npm test :: package.json script = ${trimInlineText(scripts.test, 140)}`);
958
+ primary.push(`- npm test :: package.json script = ${trimInline(scripts.test, 140)}`);
688
959
  }
689
960
  if (typeof scripts.build === 'string' && scripts.build.trim()) {
690
- secondary.push(`- npm run build :: package.json script = ${trimInlineText(scripts.build, 140)}`);
961
+ secondary.push(`- npm run build :: package.json script = ${trimInline(scripts.build, 140)}`);
691
962
  }
692
963
  if (typeof scripts.lint === 'string' && scripts.lint.trim()) {
693
- secondary.push(`- npm run lint :: package.json script = ${trimInlineText(scripts.lint, 140)}`);
964
+ secondary.push(`- npm run lint :: package.json script = ${trimInline(scripts.lint, 140)}`);
694
965
  }
695
966
  fallback.push('- If test/build scripts are not usable, inspect package.json scripts and run the narrowest relevant check.');
696
967
  }
@@ -1006,17 +1277,32 @@ function buildFallbackAutoPlan(goal) {
1006
1277
  title: 'Verify the changed flows',
1007
1278
  role: 'tester',
1008
1279
  task: `Verify the completed work for: ${goal}. Run the most relevant checks available, report concrete evidence, and call out anything still not verified.`
1280
+ },
1281
+ {
1282
+ title: 'Synthesize final implementation status',
1283
+ role: 'summarizer',
1284
+ task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
1009
1285
  }
1010
1286
  ]
1011
1287
  };
1012
1288
  }
1013
1289
 
1290
+ function buildDefaultSummarizerStep(goal, source = []) {
1291
+ const existing = (Array.isArray(source) ? source : []).find((step) => step.role === 'summarizer');
1292
+ if (existing?.title && existing?.task) return existing;
1293
+ return {
1294
+ title: 'Synthesize final implementation status',
1295
+ role: 'summarizer',
1296
+ task: `Synthesize the completed work for: ${goal}. Read the accumulated findings, verification evidence, and open issues from earlier steps, then produce a concise final status with remaining risks and the single best next action.`
1297
+ };
1298
+ }
1299
+
1014
1300
  function enforceAutoPlanGuardrailSteps(plan, goal) {
1015
1301
  const source = Array.isArray(plan?.steps) ? plan.steps : [];
1016
1302
  const requirements = deriveGoalRequirements(goal);
1017
1303
  const lightweightGoal = isLightweightAutoPlanGoal(goal, requirements);
1018
1304
  const taskClass = classifyPlanTaskClass(goal);
1019
- const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester');
1305
+ const implementationSteps = source.filter((step) => step.role !== 'reviewer' && step.role !== 'tester' && step.role !== 'summarizer');
1020
1306
  const primaryImplementationStep =
1021
1307
  implementationSteps.find((step) => step.role === 'coder') ||
1022
1308
  implementationSteps[0] || {
@@ -1034,6 +1320,7 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
1034
1320
  role: 'tester',
1035
1321
  task: `Test and verify the completed work for: ${goal}. Start with the artifacts produced by earlier implementation steps, run the most relevant checks available, report concrete evidence, and call out anything still unverified.`
1036
1322
  };
1323
+ const summarizerStep = buildDefaultSummarizerStep(goal, source);
1037
1324
  const hasReviewer = source.some((step) => step.role === 'reviewer');
1038
1325
  const hasTester = source.some((step) => step.role === 'tester');
1039
1326
 
@@ -1052,13 +1339,16 @@ function enforceAutoPlanGuardrailSteps(plan, goal) {
1052
1339
  };
1053
1340
  }
1054
1341
 
1342
+ const executionSteps = [
1343
+ ...implementationSteps.slice(0, 6),
1344
+ ...(hasReviewer ? [reviewerStep] : []),
1345
+ ...(testerStep ? [testerStep] : [])
1346
+ ];
1347
+ const needsSummarizer = executionSteps.length >= 3;
1348
+
1055
1349
  return {
1056
1350
  summary: String(plan?.summary || `Auto plan for: ${goal}`).trim(),
1057
- steps: [
1058
- ...implementationSteps.slice(0, 6),
1059
- ...(hasReviewer ? [reviewerStep] : []),
1060
- ...(testerStep ? [testerStep] : [])
1061
- ]
1351
+ steps: needsSummarizer ? [...executionSteps, summarizerStep] : executionSteps
1062
1352
  };
1063
1353
  }
1064
1354
 
@@ -1083,15 +1373,74 @@ function stepOutputHasFailureSignals(role, text = '') {
1083
1373
  const failureBullet = extractSectionFirstBullet(value, 'Failures');
1084
1374
  const findingsBullet = extractSectionFirstBullet(value, 'Findings');
1085
1375
  const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
1376
+ const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
1377
+ const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
1378
+ const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
1379
+ const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
1086
1380
  const acceptanceFailures = extractAcceptanceStatusItems(value).filter((item) => item.status !== 'met');
1087
1381
  if (errorBullet && !/^none\b/i.test(errorBullet)) return true;
1088
1382
  if (failureBullet && !/^none\b/i.test(failureBullet)) return true;
1089
1383
  if (acceptanceFailures.length > 0) return true;
1090
- if (role === 'reviewer' && findingsBullet && !/^none\b/i.test(findingsBullet)) return true;
1384
+ if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) return true;
1385
+ if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return true;
1386
+ if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) return true;
1387
+ if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) return true;
1091
1388
  if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return true;
1092
1389
  return false;
1093
1390
  }
1094
1391
 
1392
+ function coderOutputLacksImplementationEvidence(actionsTaken = '', artifacts = '') {
1393
+ const noActions = !String(actionsTaken || '').trim() || /^none\b/i.test(String(actionsTaken || '').trim());
1394
+ const noArtifacts = !String(artifacts || '').trim() || /^none\b/i.test(String(artifacts || '').trim());
1395
+ return noActions && noArtifacts;
1396
+ }
1397
+
1398
+ function reviewerFindingNeedsAction(text = '') {
1399
+ const value = String(text || '').trim();
1400
+ if (!value || /^none\b/i.test(value)) return false;
1401
+ const lower = value.toLowerCase();
1402
+ if (
1403
+ /\b(bug|regression|risk|risky|missing|missing test|unsafe|blocker|blocked|incorrect|broken|failure|failing|unverified|mismatch|incomplete|gap|can regress|still regress)\b/i.test(
1404
+ lower
1405
+ )
1406
+ ) {
1407
+ return true;
1408
+ }
1409
+ if (/\b(not covered|not handled|not verified|does not|doesn't|cannot|can't|lacks?)\b/i.test(lower)) {
1410
+ return true;
1411
+ }
1412
+ return false;
1413
+ }
1414
+
1415
+ function buildExitCriteriaFailureReason(role, text = '') {
1416
+ const value = String(text || '').trim();
1417
+ if (!value) return 'no structured step output was produced';
1418
+ const errorBullet = extractSectionFirstBullet(value, 'Error');
1419
+ if (errorBullet && !/^none\b/i.test(errorBullet)) return `error: ${errorBullet}`;
1420
+ const failureBullet = extractSectionFirstBullet(value, 'Failures');
1421
+ if (failureBullet && !/^none\b/i.test(failureBullet)) return `failures: ${failureBullet}`;
1422
+ const findingsBullet = extractSectionFirstBullet(value, 'Findings');
1423
+ const actionsTakenBullet = extractSectionFirstBullet(value, 'Actions Taken');
1424
+ const artifactsBullet = extractSectionFirstBullet(value, 'Artifacts');
1425
+ if (role === 'coder' && coderOutputLacksImplementationEvidence(actionsTakenBullet, artifactsBullet)) {
1426
+ return 'coder output did not include implementation evidence';
1427
+ }
1428
+ if (role === 'reviewer' && reviewerFindingNeedsAction(findingsBullet)) return `review findings: ${findingsBullet}`;
1429
+ const nextActionBullet = extractSectionFirstBullet(value, 'Next Action');
1430
+ if (nextActionBullet && /^(fix|retry|correct|repair)\b/i.test(nextActionBullet)) return `next action requires rework: ${nextActionBullet}`;
1431
+ const acceptanceFailure = extractAcceptanceStatusItems(value).find((item) => item.status !== 'met');
1432
+ if (acceptanceFailure) return `acceptance ${acceptanceFailure.status}: ${acceptanceFailure.label}`;
1433
+ const notVerifiedBullet = extractSectionFirstBullet(value, 'Not Verified');
1434
+ if ((role === 'tester' || role === 'summarizer') && notVerifiedBullet && !/^none\b/i.test(notVerifiedBullet)) {
1435
+ return `not verified: ${notVerifiedBullet}`;
1436
+ }
1437
+ const remainingIssuesBullet = extractSectionFirstBullet(value, 'Remaining Issues');
1438
+ if (role === 'summarizer' && remainingIssuesBullet && !/^none\b/i.test(remainingIssuesBullet)) {
1439
+ return `remaining issues: ${remainingIssuesBullet}`;
1440
+ }
1441
+ return 'step output did not satisfy exit criteria';
1442
+ }
1443
+
1095
1444
  function extractSectionFirstBullet(text = '', heading = '') {
1096
1445
  const escaped = String(heading || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1097
1446
  const match = String(text || '').match(new RegExp(String.raw`(^|\n)\s*${escaped}\s*:\s*(?:\n|\r\n?)+\s*-\s*([^\n\r]+)`, 'i'));
@@ -1150,6 +1499,13 @@ function buildAutoPlanSystemSummary(auto) {
1150
1499
  if (auto.failedTitles?.length) {
1151
1500
  lines.push(`Failed steps: ${auto.failedTitles.slice(0, 5).join(', ')}`);
1152
1501
  }
1502
+ // Always include plan steps for TUI rendering
1503
+ if (Array.isArray(auto.steps) && auto.steps.length > 0) {
1504
+ lines.push('Plan Steps:');
1505
+ auto.steps.forEach((s, idx) => {
1506
+ lines.push(` ${idx + 1}. [${s.role}] ${s.title}`);
1507
+ });
1508
+ }
1153
1509
  if (auto.approvalStatus === 'pending') {
1154
1510
  lines.push('Next: review the plan summary, then use /plan approve to start implementation, /plan auto run <goal> to plan and run in one step next time, or /plan stay to keep planning.');
1155
1511
  }
@@ -1185,7 +1541,7 @@ function buildAutoPlanFinalSummaryUserPrompt({ goal, autoPlan, runItems, plannin
1185
1541
  if (item.warning) {
1186
1542
  lines.push(`Warning: ${item.warning}`);
1187
1543
  }
1188
- lines.push(`Output: ${trimInlineText(item.output || '(empty)', 500)}`);
1544
+ lines.push(`Output: ${trimInline(item.output || '(empty)', 500)}`);
1189
1545
  if (Array.isArray(item.artifactPaths) && item.artifactPaths.length > 0) {
1190
1546
  lines.push(`Artifacts: ${item.artifactPaths.slice(0, 5).join(', ')}`);
1191
1547
  }
@@ -1244,7 +1600,7 @@ async function buildAutoPlanFinalSummary({
1244
1600
  timeoutMs: config.gateway.timeout_ms || 90000,
1245
1601
  maxRetries: config.gateway.max_retries ?? 2
1246
1602
  });
1247
- return trimInlineText(result.text || '', 600) || fallbackSummary;
1603
+ return trimInline(result.text || '', 600) || fallbackSummary;
1248
1604
  } catch {
1249
1605
  return fallbackSummary;
1250
1606
  }
@@ -1425,7 +1781,7 @@ async function collectLikelyImplementationFiles(cwd) {
1425
1781
  continue;
1426
1782
  }
1427
1783
  if (!preferredExts.has(path.extname(entry.name).toLowerCase())) continue;
1428
- candidates.push(path.relative(cwd, abs).replace(/\\/g, '/'));
1784
+ candidates.push(normalizePath(path.relative(cwd, abs)));
1429
1785
  if (candidates.length >= 8) return;
1430
1786
  }
1431
1787
  }
@@ -1497,8 +1853,10 @@ function effectiveMaxContextTokens(config) {
1497
1853
  return 32000;
1498
1854
  }
1499
1855
 
1500
- function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode }) {
1501
- const currentContextTokens = estimateMessagesTokens(currentSession?.messages || []);
1856
+ function buildRuntimeStateSnapshot({ currentSession, config, model, executionMode, extraSession }) {
1857
+ const parentTokens = estimateMessagesTokens(currentSession?.messages || []);
1858
+ const subTokens = extraSession ? estimateMessagesTokens(extraSession.messages || []) : 0;
1859
+ const currentContextTokens = parentTokens + subTokens;
1502
1860
  const maxContextTokens = effectiveMaxContextTokens(config);
1503
1861
  const contextUsagePct = maxContextTokens > 0 ? Math.min(100, Math.max(0, (currentContextTokens / maxContextTokens) * 100)) : 0;
1504
1862
  const snapshot = {
@@ -1679,9 +2037,14 @@ async function askModel({
1679
2037
  model,
1680
2038
  systemPrompt,
1681
2039
  onAgentEvent,
2040
+ requestToolApproval,
1682
2041
  persistSession = true,
1683
2042
  executionMode,
1684
- alwaysAllowTools
2043
+ alwaysAllowTools,
2044
+ signal,
2045
+ allowedTools,
2046
+ maxSteps: maxStepsOverride,
2047
+ skipAnalysisNudge = false
1685
2048
  }) {
1686
2049
  const maxContextTokens = effectiveMaxContextTokens(config);
1687
2050
  const triggerPct = Number(config.context?.preflight_trigger_pct || 92);
@@ -1725,7 +2088,7 @@ async function askModel({
1725
2088
  if (done) done();
1726
2089
  savePromise = null;
1727
2090
  }
1728
- }, 400);
2091
+ }, STREAM_SAVE_DEBOUNCE_MS);
1729
2092
  };
1730
2093
  const flushScheduledSave = async () => {
1731
2094
  if (!persistSession) return;
@@ -1741,10 +2104,20 @@ async function askModel({
1741
2104
  }
1742
2105
  if (savePromise) await savePromise;
1743
2106
  };
2107
+ if (persistSession && signal) {
2108
+ const flushOnAbort = () => {
2109
+ void flushScheduledSave().catch(() => {});
2110
+ };
2111
+ if (signal.aborted) {
2112
+ flushOnAbort();
2113
+ } else {
2114
+ signal.addEventListener('abort', flushOnAbort, { once: true });
2115
+ }
2116
+ }
1744
2117
 
1745
- if (persistSession && text) {
2118
+ if (text) {
1746
2119
  session.messages.push(stampedMessage('user', text));
1747
- await saveSession(session);
2120
+ if (persistSession) await saveSession(session);
1748
2121
  }
1749
2122
 
1750
2123
  const projectContextSnippet = await buildProjectContextSnippet(process.cwd(), text).catch(() => '');
@@ -1764,23 +2137,29 @@ async function askModel({
1764
2137
  }
1765
2138
  });
1766
2139
 
2140
+ const filteredDefinitions = Array.isArray(allowedTools)
2141
+ ? definitions.filter((t) => allowedTools.includes(t.function?.name || t.name))
2142
+ : definitions;
2143
+ const filteredHandlers = Array.isArray(allowedTools)
2144
+ ? Object.fromEntries(Object.entries(handlers).filter(([name]) => allowedTools.includes(name)))
2145
+ : handlers;
2146
+ const filteredDeferred = Array.isArray(allowedTools)
2147
+ ? Object.fromEntries(Object.entries(deferredDefinitions).filter(([name]) => allowedTools.includes(name)))
2148
+ : deferredDefinitions;
2149
+
1767
2150
  let activeAssistantIndex = -1;
1768
2151
  const wrappedAgentEvent = (event) => {
1769
- if (!persistSession) {
1770
- if (onAgentEvent) onAgentEvent(event);
1771
- return;
1772
- }
1773
-
2152
+ // Always accumulate messages in session (for token tracking), only save when persisting
1774
2153
  if (event?.type === 'assistant:start') {
1775
2154
  session.messages.push(stampedMessage('assistant', ''));
1776
2155
  activeAssistantIndex = session.messages.length - 1;
1777
- scheduleSessionSave();
2156
+ if (persistSession) scheduleSessionSave();
1778
2157
  } else if (event?.type === 'assistant:delta') {
1779
2158
  if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
1780
2159
  const current = session.messages[activeAssistantIndex];
1781
2160
  current.content = `${current.content || ''}${event.text || ''}`;
1782
2161
  current.at = new Date().toISOString();
1783
- scheduleSessionSave();
2162
+ if (persistSession) scheduleSessionSave();
1784
2163
  }
1785
2164
  } else if (event?.type === 'assistant:response') {
1786
2165
  if (activeAssistantIndex >= 0 && session.messages[activeAssistantIndex]) {
@@ -1796,7 +2175,7 @@ async function askModel({
1796
2175
  current.tool_calls = event.assistantMessage.tool_calls;
1797
2176
  }
1798
2177
  current.at = new Date().toISOString();
1799
- scheduleSessionSave();
2178
+ if (persistSession) scheduleSessionSave();
1800
2179
  }
1801
2180
  activeAssistantIndex = -1;
1802
2181
  } else if (event?.type === 'tool:result') {
@@ -1805,7 +2184,7 @@ async function askModel({
1805
2184
  tool_call_id: event.id || ''
1806
2185
  })
1807
2186
  );
1808
- scheduleSessionSave();
2187
+ if (persistSession) scheduleSessionSave();
1809
2188
  }
1810
2189
 
1811
2190
  if (onAgentEvent) onAgentEvent(event);
@@ -1816,9 +2195,9 @@ async function askModel({
1816
2195
  systemPrompt: effectiveSystemPrompt,
1817
2196
  userPrompt: loopUserPrompt,
1818
2197
  model: model || config.model.name,
1819
- maxSteps: Number(config.execution?.max_steps || 16),
1820
- toolDefinitions: definitions,
1821
- toolHandlers: handlers,
2198
+ maxSteps: maxStepsOverride ?? Number(config.execution?.max_steps || 16),
2199
+ toolDefinitions: filteredDefinitions,
2200
+ toolHandlers: filteredHandlers,
1822
2201
  initialMessages: toOpenAIMessages(session.messages),
1823
2202
  onEvent: wrappedAgentEvent,
1824
2203
  executionMode: executionMode || config.execution?.mode || 'auto',
@@ -1826,7 +2205,10 @@ async function askModel({
1826
2205
  alwaysAllowTools || config.execution?.always_allow_tools || ['run', 'read', 'write'],
1827
2206
  toolResultMaxChars: config.context?.tool_result_max_chars || 12000,
1828
2207
  toolFormatters: formatters,
1829
- deferredDefinitions,
2208
+ deferredDefinitions: filteredDeferred,
2209
+ requestToolApproval,
2210
+ signal,
2211
+ skipAnalysisNudge,
1830
2212
  requestCompletion: async ({ messages, tools, model: selectedModel }) => {
1831
2213
  let started = false;
1832
2214
  const startAssistantStream = () => {
@@ -1845,6 +2227,7 @@ async function askModel({
1845
2227
  tools,
1846
2228
  timeoutMs: config.gateway.timeout_ms || 90000,
1847
2229
  maxRetries: config.gateway.max_retries ?? 2,
2230
+ signal,
1848
2231
  onTextDelta: (delta) => {
1849
2232
  startAssistantStream();
1850
2233
  if (onAgentEvent) onAgentEvent({ type: 'assistant:delta', text: delta });
@@ -1875,7 +2258,7 @@ async function askModel({
1875
2258
  // keep chat usable even if pruning fails
1876
2259
  }
1877
2260
  }
1878
- return { text: loopResult.text };
2261
+ return { text: loopResult.text, aborted: !!loopResult.aborted };
1879
2262
  }
1880
2263
 
1881
2264
  async function runSubAgentTask({
@@ -1888,7 +2271,10 @@ async function runSubAgentTask({
1888
2271
  model,
1889
2272
  systemPrompt,
1890
2273
  onAgentEvent,
1891
- extraRolePrompt = ''
2274
+ extraRolePrompt = '',
2275
+ signal,
2276
+ onSessionActive,
2277
+ planFileContext = ''
1892
2278
  }) {
1893
2279
  const subSession = { id: `sub-${Date.now()}`, messages: [] };
1894
2280
  const rolePrompt = getSubAgentRolePrompt(role);
@@ -1899,11 +2285,15 @@ async function runSubAgentTask({
1899
2285
  const focusedTaskNote = buildFocusedTaskNote(role, handoffFocusPaths);
1900
2286
  const goalRequirementPacket = buildGoalRequirementPacket(goal, role);
1901
2287
  const verificationPacket = role === 'tester' ? await buildTesterVerificationPacket(handoffFocusPaths) : '';
2288
+ const planFileSection = planFileContext
2289
+ ? `Accumulated plan file context (results from prior steps):\n${planFileContext}`
2290
+ : '';
1902
2291
  const scopedTask = [
1903
2292
  contextPacket,
1904
2293
  goalRequirementPacket,
1905
2294
  evidencePacket,
1906
2295
  handoffPacket,
2296
+ planFileSection,
1907
2297
  verificationPacket,
1908
2298
  focusedTaskNote,
1909
2299
  'Task:',
@@ -1935,6 +2325,8 @@ async function runSubAgentTask({
1935
2325
  }
1936
2326
  if (onAgentEvent) onAgentEvent(evt);
1937
2327
  };
2328
+ const roleAllowedTools = ROLE_TOOL_POLICY[role];
2329
+ if (onSessionActive) onSessionActive(subSession);
1938
2330
  const subResult = await askModel({
1939
2331
  text: scopedTask,
1940
2332
  session: subSession,
@@ -1943,7 +2335,10 @@ async function runSubAgentTask({
1943
2335
  systemPrompt: `${systemPrompt}\n${rolePrompt}${extraRolePrompt ? `\n${extraRolePrompt}` : ''}`,
1944
2336
  onAgentEvent: wrappedOnAgentEvent,
1945
2337
  persistSession: false,
1946
- executionMode: 'auto'
2338
+ executionMode: 'auto',
2339
+ allowedTools: roleAllowedTools,
2340
+ skipAnalysisNudge: true,
2341
+ signal
1947
2342
  });
1948
2343
  const text = subResult.text || '';
1949
2344
  const hasErrorLine = /(^|\n)\s*error\s*:/i.test(text);
@@ -1956,6 +2351,142 @@ async function runSubAgentTask({
1956
2351
  };
1957
2352
  }
1958
2353
 
2354
+ async function executePlanWithSubAgents({
2355
+ planState,
2356
+ parentSession,
2357
+ config,
2358
+ model,
2359
+ systemPrompt,
2360
+ onAgentEvent,
2361
+ signal,
2362
+ onSubSessionActive
2363
+ }) {
2364
+ const steps = Array.isArray(planState.steps) ? planState.steps : [];
2365
+ const goal = planState.goal || '';
2366
+ const planFilePath = planState.filePath || '';
2367
+ let partialDeltaText = '';
2368
+ const emitPlanEvent = (evt) => {
2369
+ if (evt?.type === 'assistant:delta' && evt.text) {
2370
+ partialDeltaText += String(evt.text);
2371
+ }
2372
+ if (onAgentEvent) onAgentEvent(evt);
2373
+ };
2374
+ if (steps.length === 0) {
2375
+ return { text: '(no steps to execute)', aborted: false };
2376
+ }
2377
+
2378
+ const priorSteps = [];
2379
+ const results = [];
2380
+
2381
+ // Emit structured plan steps so TUI can show all steps with real role/title
2382
+ emitPlanEvent({
2383
+ type: 'plan:steps',
2384
+ steps: steps.map((s, idx) => ({ index: idx + 1, role: s.role, title: s.title, status: 'pending' }))
2385
+ });
2386
+
2387
+ for (let i = 0; i < steps.length; i += 1) {
2388
+ const step = steps[i];
2389
+ if (signal?.aborted) break;
2390
+
2391
+ emitPlanEvent({
2392
+ type: 'assistant:delta',
2393
+ text: `\n[plan] Step ${i + 1}/${steps.length} -> ${step.role}: ${step.title}\n`
2394
+ });
2395
+
2396
+ // Read accumulated plan file context from prior step results (skip for step 0)
2397
+ let planFileContext = '';
2398
+ if (i > 0 && planFilePath) {
2399
+ planFileContext = await readPlanFileAsContext(planFilePath);
2400
+ }
2401
+
2402
+ const stepGuidance = buildPipelineStepGuidance({ role: step.role, stepIndex: i, totalSteps: steps.length, isFirst: i === 0, isLast: i === steps.length - 1, priorSteps });
2403
+ const output = await runSubAgentTask({
2404
+ role: step.role,
2405
+ task: step.task,
2406
+ goal,
2407
+ priorSteps,
2408
+ parentSession,
2409
+ config,
2410
+ model,
2411
+ systemPrompt,
2412
+ onAgentEvent: emitPlanEvent,
2413
+ extraRolePrompt: stepGuidance,
2414
+ signal,
2415
+ onSessionActive: onSubSessionActive,
2416
+ planFileContext
2417
+ });
2418
+
2419
+ const stepRecord = {
2420
+ role: step.role,
2421
+ title: step.title,
2422
+ task: step.task,
2423
+ output: output.text || '',
2424
+ blockedCount: output.blockedCount || 0,
2425
+ toolErrorCount: output.toolErrorCount || 0,
2426
+ hasErrorLine: output.hasErrorLine || false,
2427
+ artifactPaths: output.artifactPaths || [],
2428
+ failed:
2429
+ output.hasErrorLine ||
2430
+ stepOutputHasFailureSignals(step.role, output.text || ''),
2431
+ failureReason: ''
2432
+ };
2433
+ if (stepRecord.failed) {
2434
+ stepRecord.failureReason =
2435
+ output.hasErrorLine
2436
+ ? 'tool or model execution error'
2437
+ : buildExitCriteriaFailureReason(step.role, output.text || '');
2438
+ }
2439
+ priorSteps.push(stepRecord);
2440
+ results.push(stepRecord);
2441
+
2442
+ // Write step result to plan file for subsequent steps to read
2443
+ if (planFilePath) {
2444
+ await appendStepResultToPlanFile(
2445
+ planFilePath,
2446
+ i,
2447
+ step.title,
2448
+ step.role,
2449
+ stepRecord.output,
2450
+ stepRecord.artifactPaths
2451
+ );
2452
+ }
2453
+
2454
+ if (stepRecord.failed && i < steps.length - 1) break;
2455
+ }
2456
+
2457
+ const summaryLines = [];
2458
+ for (let i = 0; i < results.length; i += 1) {
2459
+ const r = results[i];
2460
+ const tag = r.failed ? 'FAILED' : 'DONE';
2461
+ summaryLines.push(`[${tag}] ${r.role}: ${r.title}`);
2462
+ summaryLines.push(r.output.slice(0, 400));
2463
+ summaryLines.push('');
2464
+ }
2465
+
2466
+ const failedSteps = results.filter((r) => r.failed);
2467
+ if (failedSteps.length > 0) {
2468
+ summaryLines.push(`${failedSteps.length} step(s) had errors.`);
2469
+ const firstFailed = failedSteps[0];
2470
+ if (firstFailed?.failureReason) {
2471
+ summaryLines.push(`Pipeline stopped after exit criteria failed at [${firstFailed.role}] ${firstFailed.title}: ${firstFailed.failureReason}.`);
2472
+ }
2473
+ }
2474
+ if (signal?.aborted) {
2475
+ const partial = partialDeltaText.trim();
2476
+ if (partial) {
2477
+ const clipped = partial.length > 6000 ? `${partial.slice(0, 6000)}\n... [partial output truncated]` : partial;
2478
+ parentSession.messages.push(stampedMessage('assistant', clipped));
2479
+ await saveSession(parentSession);
2480
+ }
2481
+ }
2482
+
2483
+ return {
2484
+ text: summaryLines.join('\n'),
2485
+ aborted: !!signal?.aborted,
2486
+ results
2487
+ };
2488
+ }
2489
+
1959
2490
  async function buildAutoPlanAndRun({
1960
2491
  goal,
1961
2492
  config,
@@ -1981,7 +2512,7 @@ async function buildAutoPlanAndRun({
1981
2512
  '- If the task is purely to inspect the current project and suggest improvements, a lean 2-step or 3-step plan is preferred.',
1982
2513
  '- Example advisory roles: planner -> inspect project shape, coder -> synthesize findings and prioritized recommendations.',
1983
2514
  '- Example implementation roles: planner -> inspect target area, coder -> implement change, tester -> verify changed behavior.',
1984
- 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.'
2515
+ 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.'
1985
2516
  ].join('\n');
1986
2517
  let autoPlan = {
1987
2518
  summary: `Auto plan for: ${goal}`,
@@ -2006,8 +2537,9 @@ async function buildAutoPlanAndRun({
2006
2537
  role: 'user',
2007
2538
  content: [
2008
2539
  'Create an execution plan and assign best sub-agent role for each step.',
2009
- 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.',
2010
- 'The available roles are planner, coder, reviewer, and tester. Use only the roles the task actually needs.',
2540
+ 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester|summarizer","task":"..."}]}. No markdown.',
2541
+ 'The available roles are planner, coder, reviewer, tester, and summarizer. Use only the roles the task actually needs.',
2542
+ 'The summarizer role synthesizes prior step results without re-analyzing. Use it as the final step for plans with 3+ steps.',
2011
2543
  `Task class: ${normalizedTaskClass}`,
2012
2544
  'Before choosing roles, decide whether the request is advisory, implementation, or verification-heavy.',
2013
2545
  requirementPacket,
@@ -2032,16 +2564,6 @@ async function buildAutoPlanAndRun({
2032
2564
  autoPlan = buildFallbackAutoPlan(goal);
2033
2565
  }
2034
2566
 
2035
- for (let i = 0; i < autoPlan.steps.length; i += 1) {
2036
- const step = autoPlan.steps[i];
2037
- if (onAgentEvent) {
2038
- onAgentEvent({
2039
- type: 'assistant:delta',
2040
- text: `\n[plan] Step ${i + 1}/${autoPlan.steps.length} -> ${step.role}: ${step.title}\n`
2041
- });
2042
- }
2043
- }
2044
-
2045
2567
  const finalSummary = planningError
2046
2568
  ? `Plan created with fallback guidance because planning hit an error: ${planningError}`
2047
2569
  : 'Plan created and waiting for approval before implementation.';
@@ -2067,6 +2589,17 @@ async function buildAutoPlanAndRun({
2067
2589
  lines.push('');
2068
2590
  lines.push('## Approval');
2069
2591
  lines.push('Pending user approval before implementation.');
2592
+ lines.push('');
2593
+ lines.push('## Working Memory');
2594
+ lines.push('### Findings Ledger');
2595
+ lines.push(PLAN_MEMORY_MARKERS.findings[0]);
2596
+ lines.push('- None recorded yet.');
2597
+ lines.push(PLAN_MEMORY_MARKERS.findings[1]);
2598
+ lines.push('');
2599
+ lines.push('### Progress Ledger');
2600
+ lines.push(PLAN_MEMORY_MARKERS.progress[0]);
2601
+ lines.push('- Plan created and waiting for execution.');
2602
+ lines.push(PLAN_MEMORY_MARKERS.progress[1]);
2070
2603
 
2071
2604
  const filePath = await writeMarkdownInProjectDir(
2072
2605
  'plans',
@@ -2117,8 +2650,10 @@ export async function createChatRuntime({
2117
2650
  session,
2118
2651
  config: initialConfig,
2119
2652
  model,
2120
- systemPrompt
2653
+ systemPrompt,
2654
+ requestToolApproval
2121
2655
  }) {
2656
+ let activeRequestToolApproval = typeof requestToolApproval === 'function' ? requestToolApproval : null;
2122
2657
  const startupEvents = [];
2123
2658
  const initialIndex = await initializeProjectIndex(process.cwd()).catch(() => null);
2124
2659
  if (initialIndex?.summary) {
@@ -2255,7 +2790,8 @@ export async function createChatRuntime({
2255
2790
  { name: 'memory', description: completionCopy.commands.memory },
2256
2791
  { name: 'history', description: completionCopy.commands.history },
2257
2792
  { name: 'debug', description: completionCopy.commands.debug },
2258
- { name: 'retry', description: completionCopy.commands.retry }
2793
+ { name: 'retry', description: completionCopy.commands.retry },
2794
+ { name: 'stop', description: completionCopy.commands.stop }
2259
2795
  ];
2260
2796
  const out = [];
2261
2797
  for (const cmd of commands.values()) {
@@ -2299,7 +2835,7 @@ export async function createChatRuntime({
2299
2835
  ];
2300
2836
  const specTemplates = ['/spec <topic>'];
2301
2837
  const planTemplates = ['/plan <goal>', '/plan auto <goal>', '/plan auto run <goal>', '/plan approve', '/plan from-spec <spec-path?>'];
2302
- const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>'];
2838
+ const agentTemplates = ['/agents list', '/agents run planner <task>', '/agents run coder <task>', '/agents run reviewer <task>', '/agents run tester <task>', '/agents run summarizer <task>'];
2303
2839
  const debugTemplates = ['/debug keys on', '/debug keys off', '/debug keys status'];
2304
2840
  const compactTemplates = compactOptions.map((opt) => `/compact ${opt}`);
2305
2841
  const slashTemplates = [
@@ -2529,7 +3065,7 @@ export async function createChatRuntime({
2529
3065
  if (tokens.length === 1 || (tokens.length === 2 && !hasTrailingSpace)) {
2530
3066
  const sub = tokens[1] || '';
2531
3067
  if (sub === 'run') {
2532
- return ['planner', 'coder', 'reviewer', 'tester']
3068
+ return ['planner', 'coder', 'reviewer', 'tester', 'summarizer']
2533
3069
  .map((r) => registerSuggestion(`/agents run ${r} `, completionCopy.generic.agentCommand));
2534
3070
  }
2535
3071
  return ['list', 'run']
@@ -2610,6 +3146,22 @@ export async function createChatRuntime({
2610
3146
  await saveSession(currentSession);
2611
3147
  };
2612
3148
 
3149
+ const persistAssistantExchange = async (userText, assistantText, { includeUser = true } = {}) => {
3150
+ if (includeUser && userText) {
3151
+ currentSession.messages.push(stampedMessage('user', userText));
3152
+ }
3153
+ if (assistantText) {
3154
+ currentSession.messages.push(stampedMessage('assistant', assistantText));
3155
+ }
3156
+ await saveSession(currentSession);
3157
+ };
3158
+
3159
+ const persistUserExchange = async (userText) => {
3160
+ if (!userText) return;
3161
+ currentSession.messages.push(stampedMessage('user', userText));
3162
+ await saveSession(currentSession);
3163
+ };
3164
+
2613
3165
  const buildActiveSystemPrompt = async () => {
2614
3166
  const soulPrompt = await buildSystemPromptWithSoul(baseSystemPrompt, config);
2615
3167
  const memorySnapshot = await buildMemorySnapshot({
@@ -2646,7 +3198,14 @@ export async function createChatRuntime({
2646
3198
  return localCommands.has(command);
2647
3199
  };
2648
3200
 
3201
+ // 当前的 AbortController 引用,用于中止正在进行的回答
3202
+ let activeAbortController = null;
3203
+ let activeSubSession = null;
3204
+
2649
3205
  const submit = async (line, onAgentEvent) => {
3206
+ // 每次提交创建新的 AbortController,替代旧的
3207
+ activeAbortController = new AbortController();
3208
+ const { signal } = activeAbortController;
2650
3209
  const activeReplySystemPrompt = await buildActiveSystemPrompt();
2651
3210
  try {
2652
3211
  await appendInputHistory(line);
@@ -2666,7 +3225,7 @@ export async function createChatRuntime({
2666
3225
  if (parsedInput.command === 'help') {
2667
3226
  return {
2668
3227
  type: 'system',
2669
- text: 'Commands: /help /exit /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
3228
+ text: 'Commands: /help /exit /stop /commands /status /mode /compact /checkpoint /spec /plan /agents /config /memory /history /debug /retry /<custom> !<shell>'
2670
3229
  };
2671
3230
  }
2672
3231
  if (parsedInput.command === 'status') {
@@ -2773,6 +3332,9 @@ export async function createChatRuntime({
2773
3332
  const runImmediately = (parsedInput.args[1] || '').trim().toLowerCase() === 'run';
2774
3333
  const goal = parsedInput.args.slice(runImmediately ? 2 : 1).join(' ').trim();
2775
3334
  if (!goal) return { type: 'system', text: 'Usage: /plan auto <goal> | /plan auto run <goal>' };
3335
+ if (runImmediately) {
3336
+ await persistUserExchange(line);
3337
+ }
2776
3338
  const auto = await buildAutoPlanAndRun({
2777
3339
  goal,
2778
3340
  session: currentSession,
@@ -2784,30 +3346,30 @@ export async function createChatRuntime({
2784
3346
  taskClass: classifyPlanTaskClass(goal)
2785
3347
  });
2786
3348
  if (runImmediately) {
2787
- const result = await askModel({
2788
- text: buildApprovedPlanExecutionPrompt(
2789
- {
2790
- status: 'approved',
2791
- source: 'auto',
2792
- goal,
2793
- filePath: auto.filePath,
2794
- summary: auto.summary || '',
2795
- finalSummary: auto.finalSummary || auto.summary || '',
2796
- steps: Array.isArray(auto.steps) ? auto.steps : []
2797
- },
2798
- '/plan auto run'
2799
- ),
2800
- session: currentSession,
3349
+ const planState = {
3350
+ status: 'approved',
3351
+ source: 'auto',
3352
+ goal,
3353
+ filePath: auto.filePath,
3354
+ summary: auto.summary || '',
3355
+ finalSummary: auto.finalSummary || auto.summary || '',
3356
+ steps: Array.isArray(auto.steps) ? auto.steps : []
3357
+ };
3358
+ const result = await executePlanWithSubAgents({
3359
+ planState,
3360
+ parentSession: currentSession,
2801
3361
  config,
2802
3362
  model,
2803
- systemPrompt: activeReplySystemPrompt,
3363
+ systemPrompt: baseSystemPrompt,
2804
3364
  onAgentEvent,
2805
- executionMode: 'auto'
3365
+ signal,
3366
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
2806
3367
  });
3368
+ activeSubSession = null;
2807
3369
  currentSession.planState = null;
2808
3370
  executionMode = 'auto';
2809
- await saveSession(currentSession);
2810
- return { type: 'assistant', text: result.text };
3371
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3372
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
2811
3373
  }
2812
3374
  currentSession.planState = {
2813
3375
  status: 'pending_approval',
@@ -2830,20 +3392,23 @@ export async function createChatRuntime({
2830
3392
  if (!hasPendingPlanApproval(currentSession)) {
2831
3393
  return { type: 'system', text: 'No pending plan approval. Use /plan auto <goal> or /plan <goal> first.' };
2832
3394
  }
3395
+ await persistUserExchange(line);
2833
3396
  const planState = { ...currentSession.planState };
2834
- const result = await askModel({
2835
- text: buildApprovedPlanExecutionPrompt(planState, '/plan approve'),
2836
- session: currentSession,
3397
+ const result = await executePlanWithSubAgents({
3398
+ planState,
3399
+ parentSession: currentSession,
2837
3400
  config,
2838
3401
  model,
2839
- systemPrompt: activeReplySystemPrompt,
3402
+ systemPrompt: baseSystemPrompt,
2840
3403
  onAgentEvent,
2841
- executionMode: 'auto'
3404
+ signal,
3405
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
2842
3406
  });
3407
+ activeSubSession = null;
2843
3408
  currentSession.planState = null;
2844
3409
  executionMode = 'auto';
2845
- await saveSession(currentSession);
2846
- return { type: 'assistant', text: result.text };
3410
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3411
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
2847
3412
  }
2848
3413
  if (sub === 'stay') {
2849
3414
  if (!hasPendingPlanApproval(currentSession)) {
@@ -2906,7 +3471,7 @@ export async function createChatRuntime({
2906
3471
  if (sub === 'list') {
2907
3472
  return {
2908
3473
  type: 'system',
2909
- text: 'Sub-agent roles: planner, coder, reviewer, tester\nUse: /agents run <role> <task>'
3474
+ text: 'Sub-agent roles: planner, coder, reviewer, tester, summarizer\nUse: /agents run <role> <task>'
2910
3475
  };
2911
3476
  }
2912
3477
  if (sub === 'run') {
@@ -2914,7 +3479,7 @@ export async function createChatRuntime({
2914
3479
  const task = parsedInput.args.slice(2).join(' ').trim();
2915
3480
  if (!role || !task) return { type: 'system', text: 'Usage: /agents run <role> <task>' };
2916
3481
  if (!SUB_AGENT_ROLES.includes(role)) {
2917
- return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester' };
3482
+ return { type: 'system', text: 'Unknown role. Allowed: planner|coder|reviewer|tester|summarizer' };
2918
3483
  }
2919
3484
  const output = await runSubAgentTask({
2920
3485
  role,
@@ -3044,9 +3609,11 @@ export async function createChatRuntime({
3044
3609
  model,
3045
3610
  systemPrompt: activeReplySystemPrompt,
3046
3611
  onAgentEvent,
3047
- executionMode
3612
+ requestToolApproval: activeRequestToolApproval,
3613
+ executionMode,
3614
+ signal
3048
3615
  });
3049
- return { type: 'assistant', text: result.text };
3616
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3050
3617
  }
3051
3618
  if (parsedInput.command === 'config') {
3052
3619
  const sub = parsedInput.args[0];
@@ -3180,7 +3747,9 @@ export async function createChatRuntime({
3180
3747
  model,
3181
3748
  systemPrompt: activeReplySystemPrompt,
3182
3749
  onAgentEvent,
3183
- executionMode
3750
+ requestToolApproval: activeRequestToolApproval,
3751
+ executionMode,
3752
+ signal
3184
3753
  });
3185
3754
  } catch (error) {
3186
3755
  if (custom.metadata.type === 'skill' && onAgentEvent) {
@@ -3200,20 +3769,23 @@ export async function createChatRuntime({
3200
3769
 
3201
3770
  if (hasPendingPlanApproval(currentSession)) {
3202
3771
  if (isApprovalText(parsedInput.text)) {
3772
+ await persistUserExchange(line);
3203
3773
  const planState = { ...currentSession.planState };
3204
- const result = await askModel({
3205
- text: buildApprovedPlanExecutionPrompt(planState, parsedInput.text),
3206
- session: currentSession,
3774
+ const result = await executePlanWithSubAgents({
3775
+ planState,
3776
+ parentSession: currentSession,
3207
3777
  config,
3208
3778
  model,
3209
- systemPrompt: activeReplySystemPrompt,
3779
+ systemPrompt: baseSystemPrompt,
3210
3780
  onAgentEvent,
3211
- executionMode: 'auto'
3781
+ signal,
3782
+ onSubSessionActive: (sub) => { activeSubSession = sub; }
3212
3783
  });
3784
+ activeSubSession = null;
3213
3785
  currentSession.planState = null;
3214
3786
  executionMode = 'auto';
3215
- await saveSession(currentSession);
3216
- return { type: 'assistant', text: result.text };
3787
+ await persistAssistantExchange(line, result.text || '', { includeUser: false });
3788
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3217
3789
  }
3218
3790
  if (isStayInPlanText(parsedInput.text)) {
3219
3791
  const text = buildPendingPlanApprovalMessage(currentSession.planState);
@@ -3299,9 +3871,11 @@ export async function createChatRuntime({
3299
3871
  model,
3300
3872
  systemPrompt: routedSystemPrompt,
3301
3873
  onAgentEvent,
3302
- executionMode
3874
+ requestToolApproval: activeRequestToolApproval,
3875
+ executionMode,
3876
+ signal
3303
3877
  });
3304
- return { type: 'assistant', text: result.text };
3878
+ return { type: 'assistant', text: result.text, aborted: !!result.aborted };
3305
3879
  };
3306
3880
 
3307
3881
  return {
@@ -3309,15 +3883,27 @@ export async function createChatRuntime({
3309
3883
  getCompletionOptions,
3310
3884
  isImmediateLocalInput,
3311
3885
  submit,
3886
+ abort: () => {
3887
+ if (activeAbortController && !activeAbortController.signal.aborted) {
3888
+ activeAbortController.abort();
3889
+ return true;
3890
+ }
3891
+ return false;
3892
+ },
3312
3893
  consumeStartupEvents: () => startupEvents.splice(0, startupEvents.length),
3313
3894
  getInputHistory: () => loadInputHistory(),
3314
3895
  getCurrentSessionId: () => currentSession.id,
3896
+ setRequestToolApproval: (handler) => {
3897
+ activeRequestToolApproval = typeof handler === 'function' ? handler : null;
3898
+ return true;
3899
+ },
3315
3900
  getRuntimeState: () =>
3316
3901
  buildRuntimeStateSnapshot({
3317
3902
  currentSession,
3318
3903
  config,
3319
3904
  model,
3320
- executionMode
3905
+ executionMode,
3906
+ extraSession: activeSubSession
3321
3907
  })
3322
3908
  };
3323
3909
  }