codemini-cli 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codemini-cli",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "Coding CLI optimized for small-model workflows and Windows PowerShell",
5
5
  "keywords": [
6
6
  "cli",
@@ -18,24 +18,35 @@ Routing:
18
18
  - execute directly
19
19
  - do not force brainstorming
20
20
 
21
- 2. If the goal is clear but there are multiple reasonable implementation paths:
21
+ 2. If the task is a non-trivial implementation that likely needs codebase exploration, touches multiple areas, changes shared behavior, or needs explicit review/testing before coding:
22
+ - prefer `auto plan`
23
+ - inspect first, then present a short implementation plan for approval
24
+ - do not jump straight into coding
25
+ - do not use `brainstorm` as a substitute for implementation planning
26
+
27
+ 3. If the goal is clear but there are multiple reasonable implementation paths and the missing piece is mainly user preference, tradeoff choice, or one key constraint:
22
28
  - use `brainstorm`
23
29
  - ask exactly one clarifying question first
24
30
  - do not give options, recommendations, or a tentative solution in the same response
25
31
  - stop after the question and wait for the user's answer before continuing
26
32
 
27
- 3. If the request is still missing a key constraint or success condition:
33
+ 4. If the request is still missing a key constraint or success condition:
28
34
  - ask exactly one clarifying question
29
35
  - do not give options yet
30
36
  - do not write code yet
31
37
  - stop after the question and wait for the user's answer
32
38
 
33
- 4. If the request is greenfield and underspecified, such as "build a page", "make a site", "generate an app", or similar:
39
+ 5. If the request is greenfield and underspecified, such as "build a page", "make a site", "generate an app", or similar:
34
40
  - treat it as missing key constraints by default
35
41
  - ask one high-value question before coding
36
42
  - do not assume features, storage model, or scope unless the user already gave them
37
43
  - stop after the question and wait for the user's answer
38
44
 
45
+ Decision boundary:
46
+ - Use `brainstorm` when one focused user answer will determine the direction.
47
+ - Use `auto plan` when the task is already implementation-shaped but the work is large enough that you should explore first and get sign-off on the plan.
48
+ - If both could apply, prefer `brainstorm` first when the core uncertainty is user intent; prefer `auto plan` first when the core uncertainty is codebase impact and execution shape.
49
+
39
50
  Tool order:
40
51
  - prefer `grep` first for content search and candidate discovery
41
52
  - use `read` to inspect the smallest useful code block
@@ -74,7 +85,7 @@ Run the relevant test, check, or command before saying work is fixed or complete
74
85
  Default workflow:
75
86
  - Search with `grep`
76
87
  - Inspect local context with `read`
77
- - If the request is unclear, first decide: ask one question, brainstorm, or proceed
88
+ - If the request is unclear, first decide: ask one question, brainstorm, auto plan, or proceed
78
89
  - Plan the next smallest step
79
90
  - Delegate if the work is independent
80
91
  - Edit with `edit`
package/src/cli.js CHANGED
@@ -4,7 +4,7 @@ import { handleConfig } from './commands/config.js';
4
4
  import { handleDoctor } from './commands/doctor.js';
5
5
  import { handleSkill } from './commands/skill.js';
6
6
 
7
- const VERSION = '0.2.3';
7
+ const VERSION = '0.2.5';
8
8
 
9
9
  function printHelp() {
10
10
  console.log(`codemini ${VERSION}
@@ -450,7 +450,16 @@ export async function runAgentLoop({
450
450
  }
451
451
 
452
452
  if (executionMode === 'plan') {
453
- finalText = `${assistantText || ''}\n\n[plan mode] ${toolCalls.length} tool call(s) were planned but not executed.`;
453
+ const plannedLines = callsToPlanSummary(toolCalls);
454
+ finalText = [
455
+ assistantText || '',
456
+ '',
457
+ `[plan mode] ${toolCalls.length} tool call(s) were planned but not executed.`,
458
+ plannedLines.length > 0 ? 'Planned exploration:' : '',
459
+ ...plannedLines
460
+ ]
461
+ .filter(Boolean)
462
+ .join('\n');
454
463
  return { text: finalText.trim(), messages, steps: step + 1 };
455
464
  }
456
465
 
@@ -598,3 +607,12 @@ export async function runAgentLoop({
598
607
  steps: maxSteps
599
608
  };
600
609
  }
610
+
611
+ function callsToPlanSummary(toolCalls = []) {
612
+ return toolCalls
613
+ .slice(0, 8)
614
+ .map((call) => {
615
+ const args = safeJsonParse(call?.arguments);
616
+ return `- ${formatToolDisplayName(normalizeToolCallName(call?.name), args)}`;
617
+ });
618
+ }
@@ -434,11 +434,15 @@ function buildGoalRequirementPacket(goal, role) {
434
434
 
435
435
  function buildAutoPlanPlannerGuidance() {
436
436
  return [
437
+ 'Design a short implementation plan for a small model.',
437
438
  'Auto-plan planning rules:',
439
+ '- Start with a discovery or clarification step when the current implementation is not yet verified.',
438
440
  '- If the goal still leaves room for multiple approaches, choose one practical direction before planning execution.',
439
441
  '- Prefer the smallest local approach that satisfies the goal.',
440
442
  '- Do not output multiple alternative branches in the final plan.',
441
- '- Turn the chosen direction into concrete execution steps for coder, reviewer, and tester.',
443
+ '- Do not assume implementation should begin before the plan is coherent.',
444
+ '- Turn the chosen direction into concrete execution steps for planner, coder, reviewer, and tester.',
445
+ '- Prefer 3-5 steps total unless the task is clearly larger.',
442
446
  '- Keep the plan ordered, implementation-oriented, and easy for small sub-agents to follow.'
443
447
  ].join('\n');
444
448
  }
@@ -594,8 +598,79 @@ function selectAutoSkillNames(text = '') {
594
598
  return selected;
595
599
  }
596
600
 
601
+ function shouldAutoPlan(text = '') {
602
+ const input = String(text || '').trim();
603
+ if (!input) return false;
604
+
605
+ const lower = input.toLowerCase();
606
+ const explicitPlanning =
607
+ /(\/plan\b|plan first|make a plan|implementation plan|先做计划|先出方案|先规划|先计划)/i.test(lower);
608
+ if (explicitPlanning) return false;
609
+
610
+ const simpleSkip =
611
+ /(typo|readme|console\.log|log this|rename\s+\w+|one line|small tweak|tiny fix|格式化|拼写|注释|文案|小改|微调)/i.test(
612
+ lower
613
+ );
614
+ if (simpleSkip) return false;
615
+
616
+ const discussionFirst =
617
+ /(brainstorm|头脑风暴|方案|思路|怎么做|如何做|which (?:approach|option|way)|best way|trade-?off|not sure|unsure|unclear|whether it should|要不要|不确定|先别写|先不要写|先讨论|先想一下)/i.test(
618
+ lower
619
+ );
620
+ if (discussionFirst) return false;
621
+
622
+ const implementationRequest =
623
+ /\b(add|build|create|implement|support|introduce|design|refactor|rework|migrate|change|update|rewrite|restructure)\b/i.test(
624
+ lower
625
+ ) ||
626
+ /(新增|增加|实现|支持|设计|重构|改造|迁移|调整|重写|重做)/i.test(lower);
627
+ if (!implementationRequest) return false;
628
+
629
+ const nonTrivialSignals =
630
+ /\b(auth|authentication|workflow|flow|system|architecture|api|endpoint|state management|cache|caching|database|migration|service|shared helper|helper module|refactor|multi[- ]file|across files|with tests?|and tests?|with validation|error handling)\b/i.test(
631
+ lower
632
+ ) ||
633
+ /(架构|流程|系统|接口|缓存|数据库|迁移|服务|共享|模块|跨文件|测试|校验|错误处理)/i.test(lower);
634
+
635
+ const multipleActions = /\b(and|plus|also|while|along with)\b/i.test(lower) || /[,、;;].+/.test(input);
636
+ const singleFileScoped =
637
+ /\b(?:in|inside|within|only in)\s+[-_/.\w]+\.(?:[cm]?[jt]sx?|py|go|rb|java|rs|php|md)\b/i.test(lower) ||
638
+ /\b(?:src|app|lib|tests?)\/[-_/.\w]+\.(?:[cm]?[jt]sx?|py|go|rb|java|rs|php|md)\b/i.test(lower);
639
+
640
+ if (singleFileScoped && !multipleActions) return false;
641
+ if (singleFileScoped && !nonTrivialSignals) return false;
642
+
643
+ return nonTrivialSignals || (multipleActions && !singleFileScoped);
644
+ }
645
+
646
+ function classifyAutoRoute(text = '') {
647
+ const selectedSkills = selectAutoSkillNames(text);
648
+ const hasBrainstorm = selectedSkills.includes('brainstorm');
649
+ if (hasBrainstorm) {
650
+ return {
651
+ mode: 'brainstorm',
652
+ autoPlan: false,
653
+ selectedSkills
654
+ };
655
+ }
656
+
657
+ if (shouldAutoPlan(text)) {
658
+ return {
659
+ mode: 'auto_plan',
660
+ autoPlan: true,
661
+ selectedSkills: ['superpowers-lite']
662
+ };
663
+ }
664
+
665
+ return {
666
+ mode: 'direct',
667
+ autoPlan: false,
668
+ selectedSkills
669
+ };
670
+ }
671
+
597
672
  function buildAutoSkillSystemPrompt(baseSystemPrompt, commands, config, text) {
598
- const selected = selectAutoSkillNames(text).filter((name) => isSkillEnabled(config, name));
673
+ const selected = classifyAutoRoute(text).selectedSkills.filter((name) => isSkillEnabled(config, name));
599
674
  if (selected.length === 0) return baseSystemPrompt;
600
675
 
601
676
  const blocks = [];
@@ -662,6 +737,74 @@ function normalizeAutoPlan(parsed, goal) {
662
737
  return enforceAutoPlanGuardrailSteps(basePlan, goal);
663
738
  }
664
739
 
740
+ function summarizeGoalForStepTitle(goal, fallback = 'requested change') {
741
+ const text = String(goal || '')
742
+ .replace(/\s+/g, ' ')
743
+ .trim();
744
+ if (!text) return fallback;
745
+ const compact = text.length > 72 ? `${text.slice(0, 69).trimEnd()}...` : text;
746
+ return compact;
747
+ }
748
+
749
+ function buildFallbackAutoPlan(goal) {
750
+ const requirements = deriveGoalRequirements(goal);
751
+ const lightweightGoal = isLightweightAutoPlanGoal(goal, requirements);
752
+ const focus = summarizeGoalForStepTitle(goal);
753
+ const summary =
754
+ requirements.length > 0
755
+ ? `Auto fallback plan for: ${requirements.join('; ')}`
756
+ : `Auto fallback plan for: ${goal}`;
757
+
758
+ if (lightweightGoal) {
759
+ return {
760
+ summary,
761
+ steps: [
762
+ {
763
+ title: `Implement ${focus}`,
764
+ role: 'coder',
765
+ task: `Implement the requested change for: ${goal}. Follow the acceptance checklist and keep the change narrowly scoped.`
766
+ },
767
+ {
768
+ title: 'Verify the change',
769
+ role: 'tester',
770
+ task: `Verify the completed change for: ${goal}. Run the most relevant focused checks available and report concrete evidence plus anything still unverified.`
771
+ }
772
+ ]
773
+ };
774
+ }
775
+
776
+ return {
777
+ summary,
778
+ steps: [
779
+ {
780
+ title: 'Inspect the target area',
781
+ role: 'planner',
782
+ task: `Inspect the existing code paths, affected files, and current behavior for: ${goal}. Identify constraints, dependencies, and any compatibility risks before implementation.`
783
+ },
784
+ {
785
+ title: `Implement ${focus}`,
786
+ role: 'coder',
787
+ task: `Implement the requested changes for: ${goal}. Keep the behavior aligned with the acceptance checklist and preserve existing external behavior unless the goal explicitly changes it.`
788
+ },
789
+ {
790
+ title: 'Update or add focused verification',
791
+ role: 'coder',
792
+ task: `Add or update the most relevant tests and focused verification coverage for: ${goal}. Prefer narrow checks tied to the changed files and flows.`
793
+ },
794
+ {
795
+ title: 'Review for regressions and gaps',
796
+ role: 'reviewer',
797
+ task: `Review the completed work for: ${goal}. Start with the changed files, then check regressions, risky assumptions, backward compatibility, and missing edge cases.`
798
+ },
799
+ {
800
+ title: 'Verify the changed flows',
801
+ role: 'tester',
802
+ task: `Verify the completed work for: ${goal}. Run the most relevant checks available, report concrete evidence, and call out anything still not verified.`
803
+ }
804
+ ]
805
+ };
806
+ }
807
+
665
808
  function enforceAutoPlanGuardrailSteps(plan, goal) {
666
809
  const source = Array.isArray(plan?.steps) ? plan.steps : [];
667
810
  const requirements = deriveGoalRequirements(goal);
@@ -770,6 +913,7 @@ function buildAutoPlanSystemSummary(auto) {
770
913
  `File: ${auto.filePath}`,
771
914
  `Plan Summary: ${auto.summary || '-'}`,
772
915
  `Final Summary: ${auto.finalSummary || auto.summary || '-'}`,
916
+ `Approval: ${auto.approvalStatus || 'not_required'}`,
773
917
  `Steps: ${auto.steps.length} total`,
774
918
  `Completed: ${auto.completedCount}`,
775
919
  `Warnings: ${auto.warningCount}`,
@@ -781,6 +925,9 @@ function buildAutoPlanSystemSummary(auto) {
781
925
  if (auto.failedTitles?.length) {
782
926
  lines.push(`Failed steps: ${auto.failedTitles.slice(0, 5).join(', ')}`);
783
927
  }
928
+ if (auto.approvalStatus === 'pending') {
929
+ lines.push('Next: review the plan summary, then use /plan approve to start implementation or /plan stay to keep planning.');
930
+ }
784
931
  return lines.join('\n');
785
932
  }
786
933
 
@@ -1142,6 +1289,11 @@ function buildRuntimeStateSnapshot({ currentSession, config, model, executionMod
1142
1289
  value: contextUsagePct,
1143
1290
  enumerable: false,
1144
1291
  writable: false
1292
+ },
1293
+ pendingPlanApproval: {
1294
+ value: currentSession?.planState?.status === 'pending_approval',
1295
+ enumerable: false,
1296
+ writable: false
1145
1297
  }
1146
1298
  });
1147
1299
  return snapshot;
@@ -1164,6 +1316,61 @@ function stampedMessage(role, content, extra = {}) {
1164
1316
  };
1165
1317
  }
1166
1318
 
1319
+ function hasPendingPlanApproval(session) {
1320
+ return session?.planState?.status === 'pending_approval';
1321
+ }
1322
+
1323
+ function isApprovalText(text = '') {
1324
+ const value = String(text || '').trim().toLowerCase();
1325
+ if (!value) return false;
1326
+ return /^(yes|y|ok|okay|approve|approved|continue|proceed|go ahead|start|开始|继续|可以|同意|批准|通过|按这个做)$/.test(value);
1327
+ }
1328
+
1329
+ function isStayInPlanText(text = '') {
1330
+ const value = String(text || '').trim().toLowerCase();
1331
+ if (!value) return false;
1332
+ return /^(stay|keep planning|keep in plan mode|not yet|wait|先别|先等等|继续计划|继续讨论|继续规划|暂不批准)$/.test(value);
1333
+ }
1334
+
1335
+ function buildPendingPlanApprovalMessage(planState) {
1336
+ const lines = [
1337
+ 'Plan approval is still pending.',
1338
+ `Goal: ${planState?.goal || '-'}`,
1339
+ `Plan file: ${planState?.filePath || '-'}`,
1340
+ `Summary: ${planState?.finalSummary || planState?.summary || '-'}`,
1341
+ 'Use /plan approve to start implementation, or /plan stay to keep refining the plan first.'
1342
+ ];
1343
+ return lines.join('\n');
1344
+ }
1345
+
1346
+ function buildApprovedPlanExecutionPrompt(planState, approvalText = '') {
1347
+ const lines = [
1348
+ 'Approved implementation plan:',
1349
+ `Original goal: ${planState?.goal || '-'}`,
1350
+ `Plan file: ${planState?.filePath || '-'}`,
1351
+ `Plan summary: ${planState?.summary || '-'}`,
1352
+ `Final planning summary: ${planState?.finalSummary || planState?.summary || '-'}`,
1353
+ `User approval: ${String(approvalText || '').trim() || 'approved'}`,
1354
+ Array.isArray(planState?.steps) && planState.steps.length > 0 ? 'Planned steps:' : '',
1355
+ ...(Array.isArray(planState?.steps)
1356
+ ? planState.steps.slice(0, 8).map((step, index) => `${index + 1}. [${step.role}] ${step.title} :: ${step.task}`)
1357
+ : []),
1358
+ 'Proceed with implementation now.',
1359
+ 'Follow the approved direction unless a blocking contradiction appears.',
1360
+ 'Output rules for this implementation phase:',
1361
+ '- Be concise and practical.',
1362
+ '- Do not celebrate, praise, or use emojis.',
1363
+ '- Do not restate the full plan back to the user.',
1364
+ '- If the work is already done, say so briefly and cite the verification evidence.',
1365
+ '- After implementation or verification, prefer a short result summary in 3-6 lines.',
1366
+ '- If the work is complete, use this exact structure:',
1367
+ 'Status: <done|partial|blocked>',
1368
+ 'Verified: <tests, checks, or evidence>',
1369
+ 'Next: <none or the single next action>'
1370
+ ];
1371
+ return lines.join('\n');
1372
+ }
1373
+
1167
1374
  async function resolveSpecPath(rawArg = '', sessionId = '') {
1168
1375
  const input = String(rawArg || '').trim();
1169
1376
  const roots = [
@@ -1493,7 +1700,6 @@ async function runSubAgentTask({
1493
1700
 
1494
1701
  async function buildAutoPlanAndRun({
1495
1702
  goal,
1496
- session,
1497
1703
  config,
1498
1704
  model,
1499
1705
  systemPrompt,
@@ -1527,8 +1733,12 @@ async function buildAutoPlanAndRun({
1527
1733
  role: 'user',
1528
1734
  content: [
1529
1735
  'Create an execution plan and assign best sub-agent role for each step.',
1736
+ 'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.',
1737
+ 'Always include final reviewer and tester steps unless the task is explicitly tiny.',
1530
1738
  requirementPacket,
1531
- 'The final steps must include review and testing/verification unless the goal is a tiny single-change task, in which case you may keep only one implementation step plus one testing/verification step.'
1739
+ 'The first step should usually inspect or clarify the target area before implementation.',
1740
+ 'The final steps must include review and testing/verification unless the goal is a tiny single-change task, in which case you may keep only one implementation step plus one testing/verification step.',
1741
+ 'Prefer 3-5 steps total.'
1532
1742
  ]
1533
1743
  .filter(Boolean)
1534
1744
  .join('\n')
@@ -1541,89 +1751,22 @@ async function buildAutoPlanAndRun({
1541
1751
  autoPlan = normalizeAutoPlan(parsed, goal);
1542
1752
  } catch (err) {
1543
1753
  planningError = String(err?.message || err || 'planning failed');
1754
+ autoPlan = buildFallbackAutoPlan(goal);
1544
1755
  }
1545
1756
 
1546
- const runItems = [];
1547
- const totalPlanSteps = autoPlan.steps.length + 1;
1548
1757
  for (let i = 0; i < autoPlan.steps.length; i += 1) {
1549
1758
  const step = autoPlan.steps[i];
1550
1759
  if (onAgentEvent) {
1551
1760
  onAgentEvent({
1552
1761
  type: 'assistant:delta',
1553
- text: `\n[plan] Step ${i + 1}/${totalPlanSteps} -> ${step.role}: ${step.title}\n`
1554
- });
1555
- }
1556
- try {
1557
- const stepResult = await runSubAgentTask({
1558
- role: step.role,
1559
- task: step.task,
1560
- goal,
1561
- priorSteps: runItems,
1562
- parentSession: session,
1563
- config,
1564
- model,
1565
- systemPrompt,
1566
- onAgentEvent,
1567
- extraRolePrompt: buildAutoPlanExecutionGuidance(step.role)
1568
- });
1569
- const outputLooksSuccessful = looksLikeSuccessfulStepOutput(stepResult.text);
1570
- const outputHasFailureSignals = stepOutputHasFailureSignals(step.role, stepResult.text);
1571
- const warningParts = [];
1572
- if (stepResult.blockedCount > 0) warningParts.push(`${stepResult.blockedCount} blocked tool call(s)`);
1573
- if (stepResult.toolErrorCount > 0) warningParts.push(`${stepResult.toolErrorCount} tool error(s)`);
1574
- const warning = warningParts.length > 0 ? `sub-agent recovered after ${warningParts.join(', ')}` : '';
1575
- const failed =
1576
- stepResult.hasErrorLine ||
1577
- outputHasFailureSignals ||
1578
- (!outputLooksSuccessful && (stepResult.blockedCount > 0 || stepResult.toolErrorCount > 0));
1579
- let error = '';
1580
- if (stepResult.hasErrorLine) {
1581
- error = 'sub-agent output contains error line(s)';
1582
- } else if (outputHasFailureSignals) {
1583
- error = 'sub-agent output reports unmet requirements or failed verification';
1584
- } else if (failed && stepResult.blockedCount > 0) {
1585
- error = `sub-agent ended with ${stepResult.blockedCount} blocked tool call(s)`;
1586
- } else if (failed && stepResult.toolErrorCount > 0) {
1587
- error = `sub-agent ended with ${stepResult.toolErrorCount} tool error(s)`;
1588
- }
1589
- runItems.push({
1590
- ...step,
1591
- output: stepResult.text,
1592
- error,
1593
- warning,
1594
- failed,
1595
- artifactPaths: stepResult.artifactPaths || []
1596
- });
1597
- } catch (err) {
1598
- runItems.push({
1599
- ...step,
1600
- output: '',
1601
- error: String(err?.message || err || 'sub-agent step failed'),
1602
- warning: '',
1603
- failed: true
1762
+ text: `\n[plan] Step ${i + 1}/${autoPlan.steps.length} -> ${step.role}: ${step.title}\n`
1604
1763
  });
1605
1764
  }
1606
1765
  }
1607
1766
 
1608
- const failedItems = runItems.filter((s) => s.failed || s.error);
1609
- const warningItems = runItems.filter((s) => !s.failed && s.warning);
1610
- const completedItems = runItems.filter((s) => !s.failed);
1611
-
1612
- if (onAgentEvent) {
1613
- onAgentEvent({
1614
- type: 'assistant:delta',
1615
- text: `\n[plan] Step ${totalPlanSteps}/${totalPlanSteps} -> summarizer: Final summary\n`
1616
- });
1617
- }
1618
- const finalSummary = await buildAutoPlanFinalSummary({
1619
- goal,
1620
- autoPlan,
1621
- runItems,
1622
- planningError,
1623
- config,
1624
- model,
1625
- systemPrompt
1626
- });
1767
+ const finalSummary = planningError
1768
+ ? `Plan created with fallback guidance because planning hit an error: ${planningError}`
1769
+ : 'Plan created and waiting for approval before implementation.';
1627
1770
 
1628
1771
  const lines = [];
1629
1772
  lines.push(`# Auto Plan: ${goal}`);
@@ -1644,25 +1787,8 @@ async function buildAutoPlanAndRun({
1644
1787
  lines.push(` - task: ${s.task}`);
1645
1788
  });
1646
1789
  lines.push('');
1647
- lines.push('## Sub-Agent Outputs');
1648
- runItems.forEach((s, idx) => {
1649
- lines.push(`### ${idx + 1}. [${s.role}] ${s.title}`);
1650
- if (s.error) {
1651
- lines.push(`Error: ${s.error}`);
1652
- if (s.output) {
1653
- lines.push('');
1654
- lines.push(s.output);
1655
- }
1656
- lines.push('');
1657
- return;
1658
- }
1659
- if (s.warning) {
1660
- lines.push(`Note: ${s.warning}`);
1661
- lines.push('');
1662
- }
1663
- lines.push(s.output || '(empty)');
1664
- lines.push('');
1665
- });
1790
+ lines.push('## Approval');
1791
+ lines.push('Pending user approval before implementation.');
1666
1792
 
1667
1793
  const filePath = await writeMarkdownInProjectDir(
1668
1794
  'plans',
@@ -1675,12 +1801,13 @@ async function buildAutoPlanAndRun({
1675
1801
  filePath,
1676
1802
  summary: autoPlan.summary,
1677
1803
  finalSummary,
1804
+ approvalStatus: 'pending',
1678
1805
  steps: autoPlan.steps,
1679
- completedCount: completedItems.length,
1680
- warningCount: warningItems.length,
1681
- failedCount: failedItems.length,
1682
- warningTitles: warningItems.map((s) => `${s.role}:${s.title}`),
1683
- failedTitles: failedItems.map((s) => `${s.role}:${s.title}`)
1806
+ completedCount: 0,
1807
+ warningCount: planningError ? 1 : 0,
1808
+ failedCount: 0,
1809
+ warningTitles: planningError ? ['planner:fallback-plan'] : [],
1810
+ failedTitles: []
1684
1811
  };
1685
1812
  }
1686
1813
 
@@ -1728,6 +1855,9 @@ export async function createChatRuntime({
1728
1855
  let config = initialConfig;
1729
1856
  const baseSystemPrompt = systemPrompt;
1730
1857
  let executionMode = config.execution?.mode || 'auto';
1858
+ if (hasPendingPlanApproval(currentSession)) {
1859
+ executionMode = 'plan';
1860
+ }
1731
1861
  const commands = await loadCommandsAndSkills();
1732
1862
 
1733
1863
  // Set up tool result store under session directory
@@ -2372,6 +2502,16 @@ export async function createChatRuntime({
2372
2502
  onAgentEvent,
2373
2503
  sessionId: currentSession.id
2374
2504
  });
2505
+ currentSession.planState = {
2506
+ status: 'pending_approval',
2507
+ source: 'auto',
2508
+ goal,
2509
+ filePath: auto.filePath,
2510
+ summary: auto.summary || '',
2511
+ finalSummary: auto.finalSummary || auto.summary || '',
2512
+ steps: Array.isArray(auto.steps) ? auto.steps : []
2513
+ };
2514
+ executionMode = 'plan';
2375
2515
  const text = buildAutoPlanSystemSummary(auto);
2376
2516
  await persistLocalExchange(line, text);
2377
2517
  return {
@@ -2379,6 +2519,33 @@ export async function createChatRuntime({
2379
2519
  text
2380
2520
  };
2381
2521
  }
2522
+ if (sub === 'approve') {
2523
+ if (!hasPendingPlanApproval(currentSession)) {
2524
+ return { type: 'system', text: 'No pending plan approval. Use /plan auto <goal> or /plan <goal> first.' };
2525
+ }
2526
+ const planState = { ...currentSession.planState };
2527
+ const result = await askModel({
2528
+ text: buildApprovedPlanExecutionPrompt(planState, '/plan approve'),
2529
+ session: currentSession,
2530
+ config,
2531
+ model,
2532
+ systemPrompt: activeReplySystemPrompt,
2533
+ onAgentEvent,
2534
+ executionMode: 'auto'
2535
+ });
2536
+ currentSession.planState = null;
2537
+ executionMode = 'auto';
2538
+ await saveSession(currentSession);
2539
+ return { type: 'assistant', text: result.text };
2540
+ }
2541
+ if (sub === 'stay') {
2542
+ if (!hasPendingPlanApproval(currentSession)) {
2543
+ return { type: 'system', text: 'No pending plan approval.' };
2544
+ }
2545
+ const text = buildPendingPlanApprovalMessage(currentSession.planState);
2546
+ await persistLocalExchange(line, text);
2547
+ return { type: 'system', text };
2548
+ }
2382
2549
  if (sub === 'from-spec') {
2383
2550
  const specArg = parsedInput.args.slice(1).join(' ').trim();
2384
2551
  const specPath = await resolveSpecPath(specArg, currentSession.id);
@@ -2499,6 +2666,9 @@ export async function createChatRuntime({
2499
2666
  const loaded = await loadSession(targetId);
2500
2667
  currentSession = loaded;
2501
2668
  setResultDir(path.join(getSessionsDir(), String(targetId)));
2669
+ if (hasPendingPlanApproval(currentSession)) {
2670
+ executionMode = 'plan';
2671
+ }
2502
2672
  if (!historyIdCache.includes(targetId)) historyIdCache.unshift(targetId);
2503
2673
  historySessionCache = [
2504
2674
  { id: targetId, messageCount: Array.isArray(loaded.messages) ? loaded.messages.length : 0 },
@@ -2638,6 +2808,7 @@ export async function createChatRuntime({
2638
2808
  renderCommandPrompt(custom, []),
2639
2809
  'Explicit brainstorm mode:',
2640
2810
  '- Ask exactly one clarifying question first if any important uncertainty remains.',
2811
+ '- Stop after the question and wait for the user\'s answer before continuing.',
2641
2812
  '- Do not inspect the repo or generate code unless the user explicitly asks for that.',
2642
2813
  '- If you recommend an option, present it as a suggested decision rather than a final choice for the user.',
2643
2814
  parsedInput.args.length > 0 ? `Current question:\n${parsedInput.args.join(' ')}` : ''
@@ -2676,6 +2847,34 @@ export async function createChatRuntime({
2676
2847
  return { type: 'assistant', text: result.text };
2677
2848
  }
2678
2849
 
2850
+ if (hasPendingPlanApproval(currentSession)) {
2851
+ if (isApprovalText(parsedInput.text)) {
2852
+ const planState = { ...currentSession.planState };
2853
+ const result = await askModel({
2854
+ text: buildApprovedPlanExecutionPrompt(planState, parsedInput.text),
2855
+ session: currentSession,
2856
+ config,
2857
+ model,
2858
+ systemPrompt: activeReplySystemPrompt,
2859
+ onAgentEvent,
2860
+ executionMode: 'auto'
2861
+ });
2862
+ currentSession.planState = null;
2863
+ executionMode = 'auto';
2864
+ await saveSession(currentSession);
2865
+ return { type: 'assistant', text: result.text };
2866
+ }
2867
+ if (isStayInPlanText(parsedInput.text)) {
2868
+ const text = buildPendingPlanApprovalMessage(currentSession.planState);
2869
+ await persistLocalExchange(line, text);
2870
+ return { type: 'system', text };
2871
+ }
2872
+ return {
2873
+ type: 'system',
2874
+ text: buildPendingPlanApprovalMessage(currentSession.planState)
2875
+ };
2876
+ }
2877
+
2679
2878
  if (compactState.autoEnabled) {
2680
2879
  const currentTokens = estimateMessagesTokens(currentSession.messages);
2681
2880
  const maxTokens = effectiveMaxContextTokens(config);
@@ -2703,7 +2902,33 @@ export async function createChatRuntime({
2703
2902
  }
2704
2903
 
2705
2904
  const expandedText = await expandFileMentions(parsedInput.text, process.cwd());
2706
- const selectedAutoSkills = selectAutoSkillNames(expandedText).filter((name) => isSkillEnabled(config, name));
2905
+ const autoRoute = classifyAutoRoute(expandedText);
2906
+ if (autoRoute.autoPlan) {
2907
+ const auto = await buildAutoPlanAndRun({
2908
+ goal: expandedText,
2909
+ session: currentSession,
2910
+ config,
2911
+ model,
2912
+ systemPrompt: activeBaseSystemPrompt,
2913
+ onAgentEvent,
2914
+ sessionId: currentSession.id
2915
+ });
2916
+ currentSession.planState = {
2917
+ status: 'pending_approval',
2918
+ source: 'auto',
2919
+ goal: expandedText,
2920
+ filePath: auto.filePath,
2921
+ summary: auto.summary || '',
2922
+ finalSummary: auto.finalSummary || auto.summary || '',
2923
+ steps: Array.isArray(auto.steps) ? auto.steps : []
2924
+ };
2925
+ executionMode = 'plan';
2926
+ const text = buildAutoPlanSystemSummary(auto);
2927
+ await persistLocalExchange(line, text);
2928
+ return { type: 'system', text };
2929
+ }
2930
+
2931
+ const selectedAutoSkills = autoRoute.selectedSkills.filter((name) => isSkillEnabled(config, name));
2707
2932
  if (selectedAutoSkills.length > 0 && onAgentEvent) {
2708
2933
  onAgentEvent({
2709
2934
  type: 'skill:auto',
@@ -66,6 +66,25 @@ function sanitizeSession(session, fallbackId = '') {
66
66
 
67
67
  if (session?.model) out.model = String(session.model);
68
68
  if (session?.mode) out.mode = String(session.mode);
69
+ if (session?.planState && typeof session.planState === 'object') {
70
+ out.planState = {
71
+ status: String(session.planState.status || '').trim(),
72
+ source: String(session.planState.source || '').trim(),
73
+ goal: String(session.planState.goal || '').trim(),
74
+ filePath: String(session.planState.filePath || '').trim(),
75
+ summary: String(session.planState.summary || '').trim(),
76
+ finalSummary: String(session.planState.finalSummary || '').trim()
77
+ };
78
+ if (Array.isArray(session.planState.steps)) {
79
+ out.planState.steps = session.planState.steps
80
+ .map((step) => ({
81
+ title: String(step?.title || '').trim(),
82
+ role: String(step?.role || '').trim(),
83
+ task: String(step?.task || '').trim()
84
+ }))
85
+ .filter((step) => step.title || step.role || step.task);
86
+ }
87
+ }
69
88
 
70
89
  return out;
71
90
  }
@@ -142,12 +142,22 @@ Some tools are loaded on demand. If a needed tool is not listed, call tool_searc
142
142
 
143
143
  # Doing tasks
144
144
 
145
+ - Search or read before editing unless the exact target is already known
145
146
  - If a command or tool is blocked or fails, inspect the error and retry with allowed commands or tools
146
147
  - For AST-scoped edits, if edit rejects due to missing or stale ast_target, fix arguments and retry
147
148
  - Do not claim filesystem access is impossible unless search/read tools also fail
148
149
  - Prefer editing existing files over creating new ones
149
150
  - Do not add comments, docstrings, or type annotations to code you did not change
150
151
  - Do not add features or refactor code beyond what was asked
152
+ - When a tool result is large, keep only the useful summary in your reply and read the saved output only if it is needed
153
+ - Keep tool results compact in context: prefer short conclusions over re-pasting raw output
154
+
155
+ # Plan mode
156
+
157
+ - In plan mode, explore and propose the next steps first
158
+ - In plan mode, do not start implementation until the user asks you to continue
159
+ - If requirements are still unclear, ask one focused question and stop
160
+ - If there are multiple reasonable approaches, give short options and a suggested direction, then stop for user confirmation
151
161
 
152
162
  # Tone and style
153
163
 
package/src/core/tools.js CHANGED
@@ -1738,7 +1738,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1738
1738
  function: {
1739
1739
  name: 'read',
1740
1740
  description:
1741
- 'Read a file. Call once for metadata and a read_token, then again with include_content=true and the same token to get content. Use this before editing.',
1741
+ 'Inspect a file. Call once for metadata and a read_token, then again with include_content=true and the same token to get content. Use this before editing. Do not use run with cat, head, or tail for file reads.',
1742
1742
  parameters: {
1743
1743
  type: 'object',
1744
1744
  properties: {
@@ -1758,7 +1758,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1758
1758
  function: {
1759
1759
  name: 'grep',
1760
1760
  description:
1761
- 'Search file contents. Use this for code search instead of grep or rg in run.',
1761
+ 'Search file contents. Use this for code search before read or edit. Do not use run with grep or rg for normal code search.',
1762
1762
  parameters: {
1763
1763
  type: 'object',
1764
1764
  properties: {
@@ -1780,7 +1780,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1780
1780
  function: {
1781
1781
  name: 'glob',
1782
1782
  description:
1783
- 'Find files by glob pattern. Use this for file discovery instead of find in run.',
1783
+ 'Find files by glob pattern. Use this for file discovery before read. Do not use run with find for normal file lookup.',
1784
1784
  parameters: {
1785
1785
  type: 'object',
1786
1786
  properties: {
@@ -1797,7 +1797,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1797
1797
  type: 'function',
1798
1798
  function: {
1799
1799
  name: 'list',
1800
- description: 'List files and directories in a workspace path.',
1800
+ description: 'List files and directories in a workspace path. Use this for quick directory discovery before deeper reads.',
1801
1801
  parameters: {
1802
1802
  type: 'object',
1803
1803
  properties: {
@@ -1812,7 +1812,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1812
1812
  function: {
1813
1813
  name: 'edit',
1814
1814
  description:
1815
- 'Edit existing files. Use block edits, exact replacements, or anchored inserts. When ast_target is provided, keep the edit constrained to that node. Prefer this over write for code changes.',
1815
+ 'Edit existing files. Use block edits, exact replacements, or anchored inserts. When ast_target is provided, keep the edit constrained to that node. Read first unless the exact target is already known. Prefer this over write for code changes.',
1816
1816
  parameters: {
1817
1817
  type: 'object',
1818
1818
  properties: {
@@ -1840,11 +1840,11 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1840
1840
  function: {
1841
1841
  name: 'write',
1842
1842
  description:
1843
- 'Create a new file or overwrite a file. Use this for new files or full rewrites. Prefer edit for existing code.',
1843
+ 'Create a new file or overwrite a file. Always include path and content. Use this for new files or explicit full rewrites only. If the file path is not decided yet, do not call write yet. Prefer edit for existing code changes.',
1844
1844
  parameters: {
1845
1845
  type: 'object',
1846
1846
  properties: {
1847
- path: { type: 'string', description: 'File path to create or overwrite' },
1847
+ path: { type: 'string', description: 'Required file path like src/app.js or pages/index.html. Never omit this.' },
1848
1848
  content: { type: 'string', description: 'Content to write' },
1849
1849
  append: { type: 'boolean', description: 'Append instead of overwrite' },
1850
1850
  full_file_rewrite: { type: 'boolean', description: 'Set true for whole-file rewrites' }
@@ -1892,7 +1892,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1892
1892
  function: {
1893
1893
  name: 'ast_query',
1894
1894
  description:
1895
- 'Run a Tree-sitter query on a code file and return ast_target objects for node-scoped reads or edits.',
1895
+ 'Run a Tree-sitter query on a code file and return ast_target objects. Use this when you need node-scoped reads or edits for functions, classes, or methods.',
1896
1896
  parameters: {
1897
1897
  type: 'object',
1898
1898
  properties: {
@@ -1911,7 +1911,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1911
1911
  function: {
1912
1912
  name: 'read_ast_node',
1913
1913
  description:
1914
- 'Read a previously selected AST node with compact structural context.',
1914
+ 'Read a previously selected AST node with compact structural context. Use this after ast_query before a scoped structural edit.',
1915
1915
  parameters: {
1916
1916
  type: 'object',
1917
1917
  properties: {
@@ -1927,7 +1927,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1927
1927
  type: 'function',
1928
1928
  function: {
1929
1929
  name: 'generate_diff',
1930
- description: 'Generate a unified diff for proposed content',
1930
+ description: 'Generate a unified diff for proposed content. Use this when you want to preview or prepare a patch before applying it.',
1931
1931
  parameters: {
1932
1932
  type: 'object',
1933
1933
  properties: {
@@ -1942,7 +1942,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1942
1942
  type: 'function',
1943
1943
  function: {
1944
1944
  name: 'patch',
1945
- description: 'Apply one or more unified diff hunks to workspace files',
1945
+ description: 'Apply one or more unified diff hunks to workspace files. Use this for prepared unified diffs instead of ad-hoc shell patching.',
1946
1946
  parameters: {
1947
1947
  type: 'object',
1948
1948
  properties: {
@@ -1958,7 +1958,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1958
1958
  function: {
1959
1959
  name: 'start_service',
1960
1960
  description:
1961
- 'Start a long-running local service and return a compact handle.',
1961
+ 'Start a long-running local service and return a compact handle. Do not use run for watchers, dev servers, or other persistent processes.',
1962
1962
  parameters: {
1963
1963
  type: 'object',
1964
1964
  properties: {
@@ -1985,7 +1985,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1985
1985
  type: 'function',
1986
1986
  function: {
1987
1987
  name: 'list_services',
1988
- description: 'List tracked local services and their current status.',
1988
+ description: 'List tracked local services and their current status. Use this to find existing service handles before starting another one.',
1989
1989
  parameters: {
1990
1990
  type: 'object',
1991
1991
  properties: {}
@@ -1996,7 +1996,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
1996
1996
  type: 'function',
1997
1997
  function: {
1998
1998
  name: 'get_service_status',
1999
- description: 'Get the status of a started service.',
1999
+ description: 'Get the status of a started service. Use this to confirm startup or diagnose a stalled service.',
2000
2000
  parameters: {
2001
2001
  type: 'object',
2002
2002
  properties: {
@@ -2010,7 +2010,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
2010
2010
  type: 'function',
2011
2011
  function: {
2012
2012
  name: 'get_service_logs',
2013
- description: 'Read recent logs from a started service.',
2013
+ description: 'Read recent logs from a started service. Use this for targeted diagnosis instead of restarting blindly.',
2014
2014
  parameters: {
2015
2015
  type: 'object',
2016
2016
  properties: {
@@ -2026,7 +2026,7 @@ export function getBuiltinTools({ workspaceRoot = process.cwd(), config, onSyste
2026
2026
  type: 'function',
2027
2027
  function: {
2028
2028
  name: 'stop_service',
2029
- description: 'Stop a started service.',
2029
+ description: 'Stop a started service when it is no longer needed or when you need a clean restart.',
2030
2030
  parameters: {
2031
2031
  type: 'object',
2032
2032
  properties: {