@covibes/zeroshot 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [1.4.0](https://github.com/covibes/zeroshot/compare/v1.3.0...v1.4.0) (2025-12-28)
2
+
3
+
4
+ ### Features
5
+
6
+ * **status-footer:** atomic writes + token cost display ([7baf0c2](https://github.com/covibes/zeroshot/commit/7baf0c228dd5f3489013f75a1782abe6cbe39661))
7
+
8
+ # [1.3.0](https://github.com/covibes/zeroshot/compare/v1.2.0...v1.3.0) (2025-12-28)
9
+
10
+
11
+ ### Features
12
+
13
+ * **planner:** enforce explicit acceptance criteria via JSON schema ([73009d9](https://github.com/covibes/zeroshot/commit/73009d9ad33e46e546721680be6d2cab9c9e46f0)), closes [#16](https://github.com/covibes/zeroshot/issues/16)
14
+
1
15
  # [1.2.0](https://github.com/covibes/zeroshot/compare/v1.1.4...v1.2.0) (2025-12-28)
2
16
 
3
17
 
package/cli/index.js CHANGED
@@ -613,6 +613,7 @@ Input formats:
613
613
  });
614
614
  statusFooter.setCluster(clusterId);
615
615
  statusFooter.setClusterState('running');
616
+ statusFooter.setMessageBus(cluster.messageBus);
616
617
 
617
618
  // Subscribe to AGENT_LIFECYCLE to track agent states and PIDs
618
619
  const lifecycleUnsubscribe = cluster.messageBus.subscribeTopic('AGENT_LIFECYCLE', (msg) => {
@@ -915,23 +916,44 @@ program
915
916
  if (clusters.length > 0) {
916
917
  console.log(chalk.bold('\n=== Clusters ==='));
917
918
  console.log(
918
- `${'ID'.padEnd(25)} ${'State'.padEnd(15)} ${'Agents'.padEnd(10)} ${'Msgs'.padEnd(8)} Created`
919
+ `${'ID'.padEnd(25)} ${'State'.padEnd(12)} ${'Agents'.padEnd(8)} ${'Tokens'.padEnd(12)} ${'Cost'.padEnd(8)} Created`
919
920
  );
920
921
  console.log('-'.repeat(100));
921
922
 
923
+ const orchestrator = getOrchestrator();
922
924
  for (const cluster of clusters) {
923
925
  const created = new Date(cluster.createdAt).toLocaleString();
924
926
 
927
+ // Get token usage
928
+ let tokenDisplay = '-';
929
+ let costDisplay = '-';
930
+ try {
931
+ const clusterObj = orchestrator.getCluster(cluster.id);
932
+ if (clusterObj?.messageBus) {
933
+ const tokensByRole = clusterObj.messageBus.getTokensByRole(cluster.id);
934
+ if (tokensByRole?._total?.count > 0) {
935
+ const total = tokensByRole._total;
936
+ const totalTokens = (total.inputTokens || 0) + (total.outputTokens || 0);
937
+ tokenDisplay = totalTokens.toLocaleString();
938
+ if (total.totalCostUsd > 0) {
939
+ costDisplay = '$' + total.totalCostUsd.toFixed(3);
940
+ }
941
+ }
942
+ }
943
+ } catch {
944
+ /* Token tracking not available */
945
+ }
946
+
925
947
  // Highlight zombie clusters in red
926
948
  const stateDisplay =
927
949
  cluster.state === 'zombie'
928
- ? chalk.red(cluster.state.padEnd(15))
929
- : cluster.state.padEnd(15);
950
+ ? chalk.red(cluster.state.padEnd(12))
951
+ : cluster.state.padEnd(12);
930
952
 
931
953
  const rowColor = cluster.state === 'zombie' ? chalk.red : (s) => s;
932
954
 
933
955
  console.log(
934
- `${rowColor(cluster.id.padEnd(25))} ${stateDisplay} ${cluster.agentCount.toString().padEnd(10)} ${cluster.messageCount.toString().padEnd(8)} ${created}`
956
+ `${rowColor(cluster.id.padEnd(25))} ${stateDisplay} ${cluster.agentCount.toString().padEnd(8)} ${tokenDisplay.padEnd(12)} ${costDisplay.padEnd(8)} ${created}`
935
957
  );
936
958
  }
937
959
  } else {
@@ -987,6 +1009,24 @@ program
987
1009
  }
988
1010
  console.log(`Created: ${new Date(status.createdAt).toLocaleString()}`);
989
1011
  console.log(`Messages: ${status.messageCount}`);
1012
+
1013
+ // Show token usage if available
1014
+ try {
1015
+ const cluster = getOrchestrator().getCluster(id);
1016
+ if (cluster?.messageBus) {
1017
+ const tokensByRole = cluster.messageBus.getTokensByRole(id);
1018
+ const tokenLines = formatTokenUsage(tokensByRole);
1019
+ if (tokenLines) {
1020
+ console.log('');
1021
+ for (const line of tokenLines) {
1022
+ console.log(line);
1023
+ }
1024
+ }
1025
+ }
1026
+ } catch {
1027
+ /* Token tracking not available */
1028
+ }
1029
+
990
1030
  console.log(`\nAgents:`);
991
1031
 
992
1032
  for (const agent of status.agents) {
@@ -1553,16 +1593,29 @@ Key bindings:
1553
1593
  for (const clusterId of clusters) {
1554
1594
  const agents = await socketDiscovery.listAttachableAgents(clusterId);
1555
1595
  console.log(` ${clusterId}`);
1556
- // Get agent models from orchestrator (if available)
1596
+ // Get agent models and token usage from orchestrator (if available)
1557
1597
  let agentModels = {};
1598
+ let tokenUsageLines = null;
1558
1599
  try {
1559
1600
  const orchestrator = OrchestratorModule.getInstance();
1560
1601
  const status = orchestrator.getStatus(clusterId);
1561
1602
  for (const a of status.agents) {
1562
1603
  agentModels[a.id] = a.model;
1563
1604
  }
1605
+ // Get token usage from message bus
1606
+ const cluster = orchestrator.getCluster(clusterId);
1607
+ if (cluster?.messageBus) {
1608
+ const tokensByRole = cluster.messageBus.getTokensByRole(clusterId);
1609
+ tokenUsageLines = formatTokenUsage(tokensByRole);
1610
+ }
1564
1611
  } catch {
1565
- /* orchestrator not running - models unavailable */
1612
+ /* orchestrator not running - models/tokens unavailable */
1613
+ }
1614
+ // Display token usage if available
1615
+ if (tokenUsageLines) {
1616
+ for (const line of tokenUsageLines) {
1617
+ console.log(` ${line}`);
1618
+ }
1566
1619
  }
1567
1620
  for (const agent of agents) {
1568
1621
  const modelLabel = agentModels[agent] ? chalk.dim(` [${agentModels[agent]}]`) : '';
@@ -3087,6 +3140,55 @@ function formatTaskSummary(issueOpened, maxLen = 35) {
3087
3140
  return firstLine.slice(0, maxLen) + (firstLine.length > maxLen ? '...' : '');
3088
3141
  }
3089
3142
 
3143
+ // Format token usage for display
3144
+ function formatTokenUsage(tokensByRole) {
3145
+ if (!tokensByRole || !tokensByRole._total || tokensByRole._total.count === 0) {
3146
+ return null;
3147
+ }
3148
+
3149
+ const total = tokensByRole._total;
3150
+ const lines = [];
3151
+
3152
+ // Format numbers with commas
3153
+ const fmt = (n) => n.toLocaleString();
3154
+
3155
+ // Total line
3156
+ const inputTokens = total.inputTokens || 0;
3157
+ const outputTokens = total.outputTokens || 0;
3158
+ const totalTokens = inputTokens + outputTokens;
3159
+ const cost = total.totalCostUsd || 0;
3160
+
3161
+ lines.push(
3162
+ chalk.dim('Tokens: ') +
3163
+ chalk.cyan(fmt(totalTokens)) +
3164
+ chalk.dim(' (') +
3165
+ chalk.green(fmt(inputTokens)) +
3166
+ chalk.dim(' in / ') +
3167
+ chalk.yellow(fmt(outputTokens)) +
3168
+ chalk.dim(' out)')
3169
+ );
3170
+
3171
+ // Cost line (if available)
3172
+ if (cost > 0) {
3173
+ lines.push(chalk.dim('Cost: ') + chalk.green('$' + cost.toFixed(4)));
3174
+ }
3175
+
3176
+ // Per-role breakdown (compact)
3177
+ const roles = Object.keys(tokensByRole).filter((r) => r !== '_total');
3178
+ if (roles.length > 1) {
3179
+ const roleStats = roles
3180
+ .map((role) => {
3181
+ const r = tokensByRole[role];
3182
+ const roleTotal = (r.inputTokens || 0) + (r.outputTokens || 0);
3183
+ return `${role}: ${fmt(roleTotal)}`;
3184
+ })
3185
+ .join(chalk.dim(' | '));
3186
+ lines.push(chalk.dim('By role: ') + roleStats);
3187
+ }
3188
+
3189
+ return lines;
3190
+ }
3191
+
3090
3192
  // Set terminal title (works in most terminals)
3091
3193
  function setTerminalTitle(title) {
3092
3194
  // ESC ] 0 ; <title> BEL
@@ -102,12 +102,27 @@
102
102
  }
103
103
  }
104
104
  }
105
+ },
106
+ "acceptanceCriteria": {
107
+ "type": "array",
108
+ "description": "EXPLICIT, TESTABLE acceptance criteria. Each must be verifiable. NO VAGUE BULLSHIT.",
109
+ "items": {
110
+ "type": "object",
111
+ "properties": {
112
+ "id": { "type": "string", "description": "AC1, AC2, etc." },
113
+ "criterion": { "type": "string", "description": "MUST be testable - if you can't verify it, rewrite it" },
114
+ "verification": { "type": "string", "description": "EXACT steps to verify (command, URL, test name)" },
115
+ "priority": { "type": "string", "enum": ["MUST", "SHOULD", "NICE"], "description": "MUST = blocks completion" }
116
+ },
117
+ "required": ["id", "criterion", "verification", "priority"]
118
+ },
119
+ "minItems": 3
105
120
  }
106
121
  },
107
- "required": ["plan", "summary", "filesAffected"]
122
+ "required": ["plan", "summary", "filesAffected", "acceptanceCriteria"]
108
123
  },
109
124
  "prompt": {
110
- "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a planning agent for a {{complexity}} {{task_type}} task.\n\n## Your Job\nCreate a comprehensive implementation plan.\n\n## Planning Process\n1. Analyze requirements thoroughly\n2. Explore codebase to understand architecture\n3. Identify ALL files that need changes\n4. Break down into concrete, actionable steps\n5. Consider cross-component dependencies\n6. Identify risks and edge cases\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA SCRUTINY\n- This is HIGH RISK (auth, payments, security, production)\n- Plan must include rollback strategy\n- Consider blast radius of changes\n- Identify all possible failure modes\n- Plan validation steps thoroughly\n{{/if}}\n\n## Plan Format\n- **Summary**: One-line description\n- **Steps**: Numbered implementation steps with file paths\n- **Files**: List of files to create/modify\n- **Risks**: Potential issues and mitigations\n- **Testing Requirements**: MANDATORY test specification\n - **Test types needed**: [unit|integration|e2e] - which test types are required\n - **Edge cases to cover**: [specific scenarios] - list ALL edge cases that MUST have tests\n - **Coverage expectations**: [percentage or critical paths] - coverage target or list of critical paths that MUST be tested\n - **Critical paths requiring tests**: [list] - functionality that CANNOT ship without tests\n\n## PARALLEL EXECUTION FOR LARGE TASKS\n\nWhen task involves 50+ similar items (errors, files, changes), include a `delegation` field:\n\n1. ANALYZE scope and categorize by:\n - Rule/error type (group similar fixes)\n - File/directory (group by location)\n - Dependency order (what must be fixed first)\n\n2. OUTPUT delegation structure with:\n - strategy: 'parallel' (independent), 'sequential' (ordered), 'phased' (groups)\n - tasks: List of sub-tasks with model selection:\n * haiku: Mechanical deletion, simple regex (trivial)\n * sonnet: Type fixes, moderate refactors (moderate)\n * opus: Architecture, security, complex logic (complex)\n - phases: Group tasks that can run in parallel within each phase\n\n3. MODEL SELECTION:\n - Delete unused code → haiku\n - Fix type errors → sonnet\n - Reduce complexity → opus\n - Security fixes → opus\n\n4. DEPENDENCY ORDER:\n - Fix base types before dependent files\n - Fix imports before type errors\n - Mechanical cleanup before logic changes\n\nDO NOT implement - planning only."
125
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a planning agent for a {{complexity}} {{task_type}} task.\n\n## Your Job\nCreate a comprehensive implementation plan.\n\n## Planning Process\n1. Analyze requirements thoroughly\n2. Explore codebase to understand architecture\n3. Identify ALL files that need changes\n4. Break down into concrete, actionable steps\n5. Consider cross-component dependencies\n6. Identify risks and edge cases\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA SCRUTINY\n- This is HIGH RISK (auth, payments, security, production)\n- Plan must include rollback strategy\n- Consider blast radius of changes\n- Identify all possible failure modes\n- Plan validation steps thoroughly\n{{/if}}\n\n## Plan Format\n- **Summary**: One-line description\n- **Steps**: Numbered implementation steps with file paths\n- **Files**: List of files to create/modify\n- **Risks**: Potential issues and mitigations\n- **Testing Requirements**: MANDATORY test specification\n - **Test types needed**: [unit|integration|e2e] - which test types are required\n - **Edge cases to cover**: [specific scenarios] - list ALL edge cases that MUST have tests\n - **Coverage expectations**: [percentage or critical paths] - coverage target or list of critical paths that MUST be tested\n - **Critical paths requiring tests**: [list] - functionality that CANNOT ship without tests\n\n## 🔴 ACCEPTANCE CRITERIA (REQUIRED - minItems: 3)\n\nYou MUST output explicit, testable acceptance criteria. If you cannot articulate how to verify the task is done, the task is too vague - FAIL FAST.\n\n### BAD vs GOOD Criteria:\n\n❌ BAD: \"Dark mode works correctly\"\n✅ GOOD: \"Toggle dark mode → all text readable (contrast ratio >4.5:1), background #1a1a1a\"\n\n❌ BAD: \"API handles errors\"\n✅ GOOD: \"POST /api/users with invalid email → returns 400 + {error: 'Invalid email format'}\"\n\n❌ BAD: \"Tests pass\"\n✅ GOOD: \"npm run test:unit shows 100% pass, coverage >80% on new files\"\n\n❌ BAD: \"Feature is implemented\"\n✅ GOOD: \"User clicks 'Export' → CSV file downloads with columns: id, name, email, created_at\"\n\n❌ BAD: \"Performance is acceptable\"\n✅ GOOD: \"API response time <200ms for 1000 concurrent users (verified via k6 load test)\"\n\n### Criteria Format:\nEach criterion MUST have:\n- **id**: AC1, AC2, AC3, etc.\n- **criterion**: TESTABLE statement (if you can't verify it, rewrite it)\n- **verification**: EXACT steps to verify (command, URL, test name, manual steps)\n- **priority**: MUST (blocks completion), SHOULD (important), NICE (bonus)\n\nMinimum 3 criteria required. At least 1 MUST be priority=MUST.\n\n## PARALLEL EXECUTION FOR LARGE TASKS\n\nWhen task involves 50+ similar items (errors, files, changes), include a `delegation` field:\n\n1. ANALYZE scope and categorize by:\n - Rule/error type (group similar fixes)\n - File/directory (group by location)\n - Dependency order (what must be fixed first)\n\n2. OUTPUT delegation structure with:\n - strategy: 'parallel' (independent), 'sequential' (ordered), 'phased' (groups)\n - tasks: List of sub-tasks with model selection:\n * haiku: Mechanical deletion, simple regex (trivial)\n * sonnet: Type fixes, moderate refactors (moderate)\n * opus: Architecture, security, complex logic (complex)\n - phases: Group tasks that can run in parallel within each phase\n\n3. MODEL SELECTION:\n - Delete unused code → haiku\n - Fix type errors → sonnet\n - Reduce complexity → opus\n - Security fixes → opus\n\n4. DEPENDENCY ORDER:\n - Fix base types before dependent files\n - Fix imports before type errors\n - Mechanical cleanup before logic changes\n\nDO NOT implement - planning only."
111
126
  },
112
127
  "contextStrategy": {
113
128
  "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
@@ -126,7 +141,8 @@
126
141
  "summary": "{{result.summary}}",
127
142
  "filesAffected": "{{result.filesAffected}}",
128
143
  "risks": "{{result.risks}}",
129
- "delegation": "{{result.delegation}}"
144
+ "delegation": "{{result.delegation}}",
145
+ "acceptanceCriteria": "{{result.acceptanceCriteria}}"
130
146
  }
131
147
  }
132
148
  }
@@ -138,8 +154,8 @@
138
154
  "role": "implementation",
139
155
  "model": "{{worker_model}}",
140
156
  "prompt": {
141
- "initial": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## First Pass - Do It Right\nImplement a COMPLETE solution from PLAN_READY:\n- Follow the plan steps carefully\n- Handle common edge cases (empty, null, error states)\n- Include error handling for likely failures\n- Write clean code with proper types\n- Write tests for ALL new functionality (reference PLAN_READY test requirements)\n- Tests MUST have meaningful assertions (not just existence checks)\n- Tests MUST be isolated and deterministic (no shared state, no network)\n- Verify edge cases from plan are covered\n- Run tests to verify your implementation passes\n\nAim for first-try approval. Don't leave obvious gaps for validators to find.\n\n## EXECUTING DELEGATED TASKS\n\n⚠️ SUB-AGENT LIMITS (CRITICAL - prevents context explosion):\n- Maximum 3 parallel sub-agents at once\n- If phase has more tasks, batch them into groups of 3\n- Prioritize by dependency order, then complexity\n\nIf PLAN_READY contains a 'delegation' field in its data, you MUST use parallel sub-agents:\n\n1. Parse delegation.phases and delegation.tasks from the plan data\n2. For each phase in order:\n a. Find all tasks for this phase (matching taskIds)\n b. Split into batches of MAX 3 tasks each\n c. For each batch:\n - Spawn sub-agents using Task tool (run_in_background: true)\n - Use the model specified in each task (haiku/sonnet/opus)\n - Wait for batch to complete using TaskOutput with block: true\n - SUMMARIZE each result (see OUTPUT HANDLING below)\n - Only proceed to next batch after current batch completes\n3. After ALL phases complete, verify changes work together\n4. Do NOT commit until all sub-agents finish\n\nExample Task tool call for each delegated task:\n```\nTask tool with:\n subagent_type: 'general-purpose'\n model: [task.model from delegation]\n prompt: '[task.description]. Files: [task.scope]. Do NOT commit.'\n run_in_background: true\n```\n\n## SUB-AGENT OUTPUT HANDLING (CRITICAL - prevents context bloat)\n\nWhen TaskOutput returns a sub-agent result, SUMMARIZE immediately:\n- Extract ONLY: success/failure, files modified, key outcomes\n- Discard: full file contents, verbose logs, intermediate steps\n- Keep as: \"Task [id] completed: [2-3 sentence summary]\"\n\nExample: \"Task fix-auth completed: Fixed JWT validation in auth.ts, added null check. Tests pass.\"\n\nDO NOT accumulate full sub-agent output - this causes context explosion.\n\nIf NO delegation field, implement directly as normal.\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA CARE\n- Double-check every change\n- No shortcuts or assumptions\n- Consider security implications\n- Add comprehensive error handling\n{{/if}}",
142
- "subsequent": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## VALIDATORS REJECTED YOUR WORK\n\nThis is NOT a minor revision request. Senior engineers reviewed your code and found it UNACCEPTABLE. Read ALL VALIDATION_RESULT messages carefully.\n\n## FIX LIKE A SENIOR ARCHITECT WOULD\n\n### 1. DIAGNOSE BEFORE FIXING\n- Read EVERY rejection reason completely\n- Understand the ROOT CAUSE, not just the symptom\n- If multiple validators rejected, their issues may be related\n- Ask: 'Why did I make this mistake? Is my approach fundamentally flawed?'\n\n### 2. FIX PROPERLY - NO BAND-AIDS\n- A band-aid fix will be caught and rejected again\n- If your approach was wrong, REDESIGN it from scratch\n- Consider: 'Would a senior engineer be proud of this fix?'\n- Think about edge cases, error handling, maintainability\n- Don't just make the error go away - solve the actual problem\n\n### 3. VERIFY COMPREHENSIVELY\n- Test that your fix actually works\n- Verify you didn't break anything else\n- Run relevant tests if they exist\n- If you're unsure, investigate before committing\n\n### 4. ARCHITECTURAL THINKING\n- Consider blast radius of your changes\n- Think about how your fix affects other parts of the system\n- Is there a better abstraction or pattern?\n- Future maintainers will inherit your decisions\n\n## MINDSET\n- Validators are not being pedantic - they found REAL problems\n- Every rejection is expensive - get it right this time\n- Shortcuts and hacks will be caught immediately\n- Pride in craftsmanship: deliver code you'd want to maintain\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - ZERO TOLERANCE FOR SHORTCUTS\n- This is HIGH RISK code (auth, payments, security, production)\n- Triple-check every change\n- Consider all failure modes\n- Security implications must be addressed\n- Comprehensive error handling is MANDATORY\n- If unsure, err on the side of caution\n{{/if}}"
157
+ "initial": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## First Pass - Do It Right\nImplement a COMPLETE solution from PLAN_READY:\n- Follow the plan steps carefully\n- Handle common edge cases (empty, null, error states)\n- Include error handling for likely failures\n- Write clean code with proper types\n- Write tests for ALL new functionality (reference PLAN_READY test requirements)\n- Tests MUST have meaningful assertions (not just existence checks)\n- Tests MUST be isolated and deterministic (no shared state, no network)\n- Verify edge cases from plan are covered\n- Run tests to verify your implementation passes\n\nAim for first-try approval. Don't leave obvious gaps for validators to find.\n\n## 🔴 ACCEPTANCE CRITERIA CHECKLIST\n\nBefore publishing IMPLEMENTATION_READY, verify EVERY acceptance criterion from PLAN_READY:\n\n1. **Parse acceptanceCriteria** from PLAN_READY data\n2. **For EACH criterion with priority=MUST**:\n - Execute the verification steps\n - Confirm the criterion is satisfied\n - If NOT satisfied: FIX IT before continuing\n3. **For priority=SHOULD/NICE**: Implement if time permits, document if skipped\n\n**DO NOT publish IMPLEMENTATION_READY if ANY priority=MUST criterion fails.**\n\nValidators will check each criterion explicitly. Missing MUST criteria = instant rejection.\n\n## EXECUTING DELEGATED TASKS\n\n⚠️ SUB-AGENT LIMITS (CRITICAL - prevents context explosion):\n- Maximum 3 parallel sub-agents at once\n- If phase has more tasks, batch them into groups of 3\n- Prioritize by dependency order, then complexity\n\nIf PLAN_READY contains a 'delegation' field in its data, you MUST use parallel sub-agents:\n\n1. Parse delegation.phases and delegation.tasks from the plan data\n2. For each phase in order:\n a. Find all tasks for this phase (matching taskIds)\n b. Split into batches of MAX 3 tasks each\n c. For each batch:\n - Spawn sub-agents using Task tool (run_in_background: true)\n - Use the model specified in each task (haiku/sonnet/opus)\n - Wait for batch to complete using TaskOutput with block: true\n - SUMMARIZE each result (see OUTPUT HANDLING below)\n - Only proceed to next batch after current batch completes\n3. After ALL phases complete, verify changes work together\n4. Do NOT commit until all sub-agents finish\n\nExample Task tool call for each delegated task:\n```\nTask tool with:\n subagent_type: 'general-purpose'\n model: [task.model from delegation]\n prompt: '[task.description]. Files: [task.scope]. Do NOT commit.'\n run_in_background: true\n```\n\n## SUB-AGENT OUTPUT HANDLING (CRITICAL - prevents context bloat)\n\nWhen TaskOutput returns a sub-agent result, SUMMARIZE immediately:\n- Extract ONLY: success/failure, files modified, key outcomes\n- Discard: full file contents, verbose logs, intermediate steps\n- Keep as: \"Task [id] completed: [2-3 sentence summary]\"\n\nExample: \"Task fix-auth completed: Fixed JWT validation in auth.ts, added null check. Tests pass.\"\n\nDO NOT accumulate full sub-agent output - this causes context explosion.\n\nIf NO delegation field, implement directly as normal.\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA CARE\n- Double-check every change\n- No shortcuts or assumptions\n- Consider security implications\n- Add comprehensive error handling\n{{/if}}",
158
+ "subsequent": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## VALIDATORS REJECTED YOUR WORK\n\nThis is NOT a minor revision request. Senior engineers reviewed your code and found it UNACCEPTABLE. Read ALL VALIDATION_RESULT messages carefully.\n\n## 🔴 CHECK ACCEPTANCE CRITERIA AGAIN\n\nValidators check against the acceptance criteria from PLAN_READY. Before resubmitting:\n1. Re-read EACH criterion (especially priority=MUST ones)\n2. Check if rejection was due to failed criteria\n3. Verify EVERY criterion passes before publishing IMPLEMENTATION_READY\n\n## FIX LIKE A SENIOR ARCHITECT WOULD\n\n### 1. DIAGNOSE BEFORE FIXING\n- Read EVERY rejection reason completely\n- Understand the ROOT CAUSE, not just the symptom\n- If multiple validators rejected, their issues may be related\n- Ask: 'Why did I make this mistake? Is my approach fundamentally flawed?'\n\n### 2. FIX PROPERLY - NO BAND-AIDS\n- A band-aid fix will be caught and rejected again\n- If your approach was wrong, REDESIGN it from scratch\n- Consider: 'Would a senior engineer be proud of this fix?'\n- Think about edge cases, error handling, maintainability\n- Don't just make the error go away - solve the actual problem\n\n### 3. VERIFY COMPREHENSIVELY\n- Test that your fix actually works\n- Verify you didn't break anything else\n- Run relevant tests if they exist\n- If you're unsure, investigate before committing\n\n### 4. ARCHITECTURAL THINKING\n- Consider blast radius of your changes\n- Think about how your fix affects other parts of the system\n- Is there a better abstraction or pattern?\n- Future maintainers will inherit your decisions\n\n## MINDSET\n- Validators are not being pedantic - they found REAL problems\n- Every rejection is expensive - get it right this time\n- Shortcuts and hacks will be caught immediately\n- Pride in craftsmanship: deliver code you'd want to maintain\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - ZERO TOLERANCE FOR SHORTCUTS\n- This is HIGH RISK code (auth, payments, security, production)\n- Triple-check every change\n- Consider all failure modes\n- Security implications must be addressed\n- Comprehensive error handling is MANDATORY\n- If unsure, err on the side of caution\n{{/if}}"
143
159
  },
144
160
  "contextStrategy": {
145
161
  "sources": [
@@ -188,12 +204,26 @@
188
204
  "properties": {
189
205
  "approved": { "type": "boolean" },
190
206
  "summary": { "type": "string" },
191
- "errors": { "type": "array", "items": { "type": "string" } }
207
+ "errors": { "type": "array", "items": { "type": "string" } },
208
+ "criteriaResults": {
209
+ "type": "array",
210
+ "description": "PASS/FAIL status for each acceptance criterion from PLAN_READY",
211
+ "items": {
212
+ "type": "object",
213
+ "properties": {
214
+ "id": { "type": "string", "description": "AC1, AC2, etc. from plan" },
215
+ "status": { "type": "string", "enum": ["PASS", "FAIL", "SKIPPED"] },
216
+ "evidence": { "type": "string", "description": "How you verified (command output, observation)" },
217
+ "notes": { "type": "string", "description": "Why it failed or additional context" }
218
+ },
219
+ "required": ["id", "status"]
220
+ }
221
+ }
192
222
  },
193
- "required": ["approved", "summary"]
223
+ "required": ["approved", "summary", "criteriaResults"]
194
224
  },
195
225
  "prompt": {
196
- "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a requirements validator for a {{complexity}} {{task_type}} task.\n\n## Your Role\nVerify implementation meets requirements. Be thorough. Hold a high bar.\n\n## Validation Checklist - ALL must pass:\n1. Does implementation address ALL requirements from ISSUE_OPENED?\n2. Are edge cases handled? (empty, null, boundaries, error states)\n3. Is error handling present for failure paths?\n4. Are types strict? (no any, no ts-ignore)\n5. Is input validation present at boundaries?\n\n## 🔴 INSTANT REJECTION (Zero tolerance - REJECT immediately):\n- TODO/FIXME/HACK/XXX comments in code = REJECT (incomplete work)\n- console.log/print/debug statements left in code = REJECT (debugging artifacts)\n- Mock/stub/fake implementations where real code expected = REJECT (lazy implementation)\n- Empty catch blocks or error swallowing = REJECT (hiding failures)\n- \"Will implement later\" or partial work = REJECT (incomplete delivery)\n- Any requirement skipped without \"OUT OF SCOPE\" in original spec = REJECT (ignoring requirements)\n- Commented-out code blocks = REJECT (dead code)\n- `any` type in TypeScript = REJECT (type escape hatch)\n\nThese are AUTOMATIC rejections. No exceptions. No \"it's mostly done\". The code is either COMPLETE or it's REJECTED.\n\n## BLOCKING Issues (must reject):\n- Missing core functionality\n- Missing error handling for common failures\n- Hardcoded values that should be configurable\n- Crashes on empty/null input\n- Types not strict\n\n## NON-BLOCKING Issues (note in summary, don't reject alone):\n- Minor style preferences\n- Could be slightly DRYer\n- Rare edge cases\n\n## Output\n- approved: true if all BLOCKING criteria pass\n- summary: Assessment with blocking and non-blocking issues noted\n- errors: List of BLOCKING issues only"
226
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a requirements validator for a {{complexity}} {{task_type}} task.\n\n## Your Role\nVerify implementation meets requirements. Be thorough. Hold a high bar.\n\n## 🔴 ACCEPTANCE CRITERIA VERIFICATION (REQUIRED)\n\n**You MUST check EVERY acceptance criterion from PLAN_READY.**\n\n### Verification Process:\n1. **Parse acceptanceCriteria** from PLAN_READY data\n2. **For EACH criterion**:\n a. Execute the verification steps specified in the criterion\n b. Record PASS or FAIL with evidence (command output, observation)\n c. If FAIL: Add to errors array if priority=MUST\n3. **Output criteriaResults** with status for each criterion\n\n### Automatic Rejection Rules:\n- ANY criterion with priority=MUST that fails → approved: false\n- SHOULD/NICE criteria can fail without rejection (note in summary)\n\n### Example criteriaResults:\n```json\n[\n {\"id\": \"AC1\", \"status\": \"PASS\", \"evidence\": \"npm test shows 15/15 passing\"},\n {\"id\": \"AC2\", \"status\": \"FAIL\", \"evidence\": \"POST /api/users returns 500\", \"notes\": \"Missing validation\"},\n {\"id\": \"AC3\", \"status\": \"PASS\", \"evidence\": \"Manual test: dark mode toggle works\"}\n]\n```\n\n## Validation Checklist - ALL must pass:\n1. Does implementation address ALL requirements from ISSUE_OPENED?\n2. Are edge cases handled? (empty, null, boundaries, error states)\n3. Is error handling present for failure paths?\n4. Are types strict? (no any, no ts-ignore)\n5. Is input validation present at boundaries?\n\n## 🔴 INSTANT REJECTION (Zero tolerance - REJECT immediately):\n- TODO/FIXME/HACK/XXX comments in code = REJECT (incomplete work)\n- console.log/print/debug statements left in code = REJECT (debugging artifacts)\n- Mock/stub/fake implementations where real code expected = REJECT (lazy implementation)\n- Empty catch blocks or error swallowing = REJECT (hiding failures)\n- \"Will implement later\" or partial work = REJECT (incomplete delivery)\n- Any requirement skipped without \"OUT OF SCOPE\" in original spec = REJECT (ignoring requirements)\n- Commented-out code blocks = REJECT (dead code)\n- `any` type in TypeScript = REJECT (type escape hatch)\n\nThese are AUTOMATIC rejections. No exceptions. No \"it's mostly done\". The code is either COMPLETE or it's REJECTED.\n\n## BLOCKING Issues (must reject):\n- Missing core functionality\n- Missing error handling for common failures\n- Hardcoded values that should be configurable\n- Crashes on empty/null input\n- Types not strict\n- **ANY priority=MUST criterion that fails**\n\n## NON-BLOCKING Issues (note in summary, don't reject alone):\n- Minor style preferences\n- Could be slightly DRYer\n- Rare edge cases\n- priority=SHOULD/NICE criteria that fail\n\n## Output\n- approved: true if all BLOCKING criteria pass AND all priority=MUST acceptance criteria pass\n- summary: Assessment with blocking and non-blocking issues noted\n- errors: List of BLOCKING issues only\n- criteriaResults: PASS/FAIL for EACH acceptance criterion"
197
227
  },
198
228
  "contextStrategy": {
199
229
  "sources": [
@@ -218,7 +248,8 @@
218
248
  "text": "{{result.summary}}",
219
249
  "data": {
220
250
  "approved": "{{result.approved}}",
221
- "errors": "{{result.errors}}"
251
+ "errors": "{{result.errors}}",
252
+ "criteriaResults": "{{result.criteriaResults}}"
222
253
  }
223
254
  }
224
255
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@covibes/zeroshot",
3
- "version": "1.2.0",
3
+ "version": "1.4.0",
4
4
  "description": "Multi-agent orchestration engine for Claude - cluster coordinator and CLI",
5
5
  "main": "src/orchestrator.js",
6
6
  "bin": {
@@ -282,6 +282,8 @@ async function executeTask(agent, triggeringMessage) {
282
282
  });
283
283
 
284
284
  // Publish TOKEN_USAGE event for aggregation and tracking
285
+ // CRITICAL: Include taskId for causal linking - allows consumers to group
286
+ // messages by task regardless of interleaved timing from async hooks
285
287
  if (result.tokenUsage) {
286
288
  agent.messageBus.publish({
287
289
  cluster_id: agent.cluster.id,
@@ -294,6 +296,7 @@ async function executeTask(agent, triggeringMessage) {
294
296
  role: agent.role,
295
297
  model: agent._selectModel(),
296
298
  iteration: agent.iteration,
299
+ taskId: agent.currentTaskId, // Causal linking for message ordering
297
300
  ...result.tokenUsage,
298
301
  },
299
302
  },
package/src/ledger.js CHANGED
@@ -116,6 +116,84 @@ class Ledger extends EventEmitter {
116
116
  }
117
117
  }
118
118
 
119
+ /**
120
+ * Append multiple messages atomically using a transaction
121
+ * All messages get contiguous timestamps and are committed together.
122
+ * If any insert fails, the entire batch is rolled back.
123
+ *
124
+ * Use this for task completion messages to prevent interleaving:
125
+ * - TOKEN_USAGE, TASK_COMPLETED, and hook messages published atomically
126
+ * - Other agents' messages cannot appear between them
127
+ *
128
+ * @param {Array<Object>} messages - Array of message objects
129
+ * @returns {Array<Object>} Array of appended messages with generated IDs
130
+ */
131
+ batchAppend(messages) {
132
+ if (!Array.isArray(messages) || messages.length === 0) {
133
+ return [];
134
+ }
135
+
136
+ // Create transaction function - all inserts happen atomically
137
+ const insertMany = this.db.transaction((msgs) => {
138
+ const results = [];
139
+ const baseTimestamp = Date.now();
140
+
141
+ for (let i = 0; i < msgs.length; i++) {
142
+ const message = msgs[i];
143
+ const id = message.id || `msg_${crypto.randomBytes(16).toString('hex')}`;
144
+ // Use incrementing timestamps to preserve order within batch
145
+ const timestamp = message.timestamp || (baseTimestamp + i);
146
+
147
+ const record = {
148
+ id,
149
+ timestamp,
150
+ topic: message.topic,
151
+ sender: message.sender,
152
+ receiver: message.receiver || 'broadcast',
153
+ content_text: message.content?.text || null,
154
+ content_data: message.content?.data ? JSON.stringify(message.content.data) : null,
155
+ metadata: message.metadata ? JSON.stringify(message.metadata) : null,
156
+ cluster_id: message.cluster_id,
157
+ };
158
+
159
+ this.stmts.insert.run(
160
+ record.id,
161
+ record.timestamp,
162
+ record.topic,
163
+ record.sender,
164
+ record.receiver,
165
+ record.content_text,
166
+ record.content_data,
167
+ record.metadata,
168
+ record.cluster_id
169
+ );
170
+
171
+ results.push(this._deserializeMessage(record));
172
+ }
173
+
174
+ return results;
175
+ });
176
+
177
+ try {
178
+ // Execute transaction (atomic - all or nothing)
179
+ const appendedMessages = insertMany(messages);
180
+
181
+ // Invalidate cache
182
+ this.cache.clear();
183
+
184
+ // Emit events for subscriptions AFTER transaction commits
185
+ // This ensures listeners see consistent state
186
+ for (const fullMessage of appendedMessages) {
187
+ this.emit('message', fullMessage);
188
+ this.emit(`topic:${fullMessage.topic}`, fullMessage);
189
+ }
190
+
191
+ return appendedMessages;
192
+ } catch (error) {
193
+ throw new Error(`Failed to batch append messages: ${error.message}`);
194
+ }
195
+ }
196
+
119
197
  /**
120
198
  * Query messages with filters
121
199
  * @param {Object} criteria - Query criteria
@@ -269,6 +347,77 @@ class Ledger extends EventEmitter {
269
347
  return rows.map((row) => this._deserializeMessage(row));
270
348
  }
271
349
 
350
+ /**
351
+ * Get aggregated token usage by agent role
352
+ * Queries TOKEN_USAGE messages and sums tokens per role
353
+ * @param {String} cluster_id - Cluster ID
354
+ * @returns {Object} Token usage aggregated by role
355
+ * Example: {
356
+ * implementation: { inputTokens: 5000, outputTokens: 2000, totalCostUsd: 0.05, count: 3 },
357
+ * validator: { inputTokens: 3000, outputTokens: 1500, totalCostUsd: 0.03, count: 2 },
358
+ * _total: { inputTokens: 8000, outputTokens: 3500, totalCostUsd: 0.08, count: 5 }
359
+ * }
360
+ */
361
+ getTokensByRole(cluster_id) {
362
+ if (!cluster_id) {
363
+ throw new Error('cluster_id is required for getTokensByRole');
364
+ }
365
+
366
+ // Query all TOKEN_USAGE messages for this cluster
367
+ const sql = `SELECT * FROM messages WHERE cluster_id = ? AND topic = 'TOKEN_USAGE' ORDER BY timestamp ASC`;
368
+ const stmt = this.db.prepare(sql);
369
+ const rows = stmt.all(cluster_id);
370
+
371
+ const byRole = {};
372
+ const total = {
373
+ inputTokens: 0,
374
+ outputTokens: 0,
375
+ cacheReadInputTokens: 0,
376
+ cacheCreationInputTokens: 0,
377
+ totalCostUsd: 0,
378
+ count: 0,
379
+ };
380
+
381
+ for (const row of rows) {
382
+ const message = this._deserializeMessage(row);
383
+ const data = message.content?.data || {};
384
+ const role = data.role || 'unknown';
385
+
386
+ // Initialize role bucket if needed
387
+ if (!byRole[role]) {
388
+ byRole[role] = {
389
+ inputTokens: 0,
390
+ outputTokens: 0,
391
+ cacheReadInputTokens: 0,
392
+ cacheCreationInputTokens: 0,
393
+ totalCostUsd: 0,
394
+ count: 0,
395
+ };
396
+ }
397
+
398
+ // Aggregate tokens for this role
399
+ byRole[role].inputTokens += data.inputTokens || 0;
400
+ byRole[role].outputTokens += data.outputTokens || 0;
401
+ byRole[role].cacheReadInputTokens += data.cacheReadInputTokens || 0;
402
+ byRole[role].cacheCreationInputTokens += data.cacheCreationInputTokens || 0;
403
+ byRole[role].totalCostUsd += data.totalCostUsd || 0;
404
+ byRole[role].count += 1;
405
+
406
+ // Aggregate totals
407
+ total.inputTokens += data.inputTokens || 0;
408
+ total.outputTokens += data.outputTokens || 0;
409
+ total.cacheReadInputTokens += data.cacheReadInputTokens || 0;
410
+ total.cacheCreationInputTokens += data.cacheCreationInputTokens || 0;
411
+ total.totalCostUsd += data.totalCostUsd || 0;
412
+ total.count += 1;
413
+ }
414
+
415
+ // Add total as special _total key
416
+ byRole._total = total;
417
+
418
+ return byRole;
419
+ }
420
+
272
421
  /**
273
422
  * Subscribe to new messages
274
423
  * @param {Function} callback - Called with each new message
@@ -126,6 +126,48 @@ class MessageBus extends EventEmitter {
126
126
  return this.ledger.getAll(cluster_id);
127
127
  }
128
128
 
129
+ /**
130
+ * Publish multiple messages atomically
131
+ * All messages are committed in a single transaction with contiguous timestamps.
132
+ * Use this for task completion to prevent message interleaving between agents.
133
+ *
134
+ * @param {Array<Object>} messages - Array of messages to publish
135
+ * @returns {Array<Object>} Published messages with IDs
136
+ */
137
+ batchPublish(messages) {
138
+ // Validate all messages
139
+ for (const message of messages) {
140
+ if (!message.cluster_id) {
141
+ throw new Error('cluster_id is required for all messages');
142
+ }
143
+ if (!message.topic) {
144
+ throw new Error('topic is required for all messages');
145
+ }
146
+ if (!message.sender) {
147
+ throw new Error('sender is required for all messages');
148
+ }
149
+ }
150
+
151
+ // Delegate to ledger's atomic batchAppend
152
+ const published = this.ledger.batchAppend(messages);
153
+
154
+ // Emit to topic-specific listeners for each message
155
+ for (const msg of published) {
156
+ this.emit(`topic:${msg.topic}`, msg);
157
+ }
158
+
159
+ return published;
160
+ }
161
+
162
+ /**
163
+ * Get aggregated token usage by role (passthrough to ledger)
164
+ * @param {String} cluster_id - Cluster ID
165
+ * @returns {Object} Token usage aggregated by role with _total key
166
+ */
167
+ getTokensByRole(cluster_id) {
168
+ return this.ledger.getTokensByRole(cluster_id);
169
+ }
170
+
129
171
  /**
130
172
  * Register a WebSocket client for broadcasts
131
173
  * @param {WebSocket} ws - WebSocket connection
@@ -86,6 +86,7 @@ class StatusFooter {
86
86
  this.clusterId = null;
87
87
  this.clusterState = 'initializing';
88
88
  this.startTime = Date.now();
89
+ this.messageBus = null; // MessageBus for token usage tracking
89
90
 
90
91
  // Robust resize handling state
91
92
  this.isRendering = false; // Render lock - prevents concurrent renders
@@ -137,23 +138,60 @@ class StatusFooter {
137
138
  }
138
139
 
139
140
  /**
140
- * Clear all footer lines (uses last known height for safety)
141
+ * Generate move cursor ANSI sequence (returns string, doesn't write)
142
+ * Used for atomic buffered writes to prevent interleaving
143
+ * @param {number} row - 1-based row
144
+ * @param {number} col - 1-based column
145
+ * @returns {string} ANSI escape sequence
141
146
  * @private
142
147
  */
143
- _clearFooterArea() {
148
+ _moveToStr(row, col) {
149
+ return `${CSI}${row};${col}H`;
150
+ }
151
+
152
+ /**
153
+ * Generate clear line ANSI sequence (returns string, doesn't write)
154
+ * Used for atomic buffered writes to prevent interleaving
155
+ * @param {number} row - 1-based row number
156
+ * @returns {string} ANSI escape sequence
157
+ * @private
158
+ */
159
+ _clearLineStr(row) {
160
+ return `${CSI}${row};1H${CLEAR_LINE}`;
161
+ }
162
+
163
+ /**
164
+ * Generate ANSI sequences to clear all footer lines (returns string)
165
+ * Used for atomic buffered writes to prevent interleaving
166
+ * @returns {string} ANSI escape sequences
167
+ * @private
168
+ */
169
+ _clearFooterAreaStr() {
144
170
  const { rows } = this.getTerminalSize();
145
171
  // Use max of current and last footer height to ensure full cleanup
146
172
  const heightToClear = Math.max(this.footerHeight, this.lastFooterHeight, 3);
147
173
  const startRow = Math.max(1, rows - heightToClear + 1);
148
174
 
175
+ let buffer = '';
149
176
  for (let row = startRow; row <= rows; row++) {
150
- this._clearLine(row);
177
+ buffer += this._clearLineStr(row);
151
178
  }
179
+ return buffer;
180
+ }
181
+
182
+ /**
183
+ * Clear all footer lines (uses last known height for safety)
184
+ * Uses single atomic write to prevent interleaving with other processes
185
+ * @private
186
+ */
187
+ _clearFooterArea() {
188
+ process.stdout.write(this._clearFooterAreaStr());
152
189
  }
153
190
 
154
191
  /**
155
192
  * Set up scroll region to reserve space for footer
156
193
  * ROBUST: Clears footer area first, resets to full screen, then sets new region
194
+ * Uses single atomic write to prevent interleaving with other processes
157
195
  */
158
196
  setupScrollRegion() {
159
197
  if (!this.isTTY()) return;
@@ -178,39 +216,53 @@ class StatusFooter {
178
216
 
179
217
  const scrollEnd = rows - this.footerHeight;
180
218
 
181
- // CRITICAL: Save cursor before any manipulation
182
- process.stdout.write(SAVE_CURSOR);
183
- process.stdout.write(HIDE_CURSOR);
219
+ // BUILD ENTIRE OUTPUT INTO SINGLE BUFFER for atomic write
220
+ let buffer = '';
184
221
 
185
- // Step 1: Reset scroll region to full screen first (prevents artifacts)
186
- process.stdout.write(`${CSI}1;${rows}r`);
222
+ // Step 1: Save cursor before any manipulation
223
+ buffer += SAVE_CURSOR;
224
+ buffer += HIDE_CURSOR;
187
225
 
188
- // Step 2: Clear footer area completely (prevents ghosting)
189
- this._clearFooterArea();
226
+ // Step 2: Reset scroll region to full screen first (prevents artifacts)
227
+ buffer += `${CSI}1;${rows}r`;
190
228
 
191
- // Step 3: Set new scroll region (lines 1 to scrollEnd)
192
- process.stdout.write(`${CSI}1;${scrollEnd}r`);
229
+ // Step 3: Clear footer area completely (prevents ghosting)
230
+ buffer += this._clearFooterAreaStr();
193
231
 
194
- // Step 4: Move cursor to bottom of scroll region (safe position)
195
- process.stdout.write(`${CSI}${scrollEnd};1H`);
232
+ // Step 4: Set new scroll region (lines 1 to scrollEnd)
233
+ buffer += `${CSI}1;${scrollEnd}r`;
196
234
 
197
- // Restore cursor and show it
198
- process.stdout.write(RESTORE_CURSOR);
199
- process.stdout.write(SHOW_CURSOR);
235
+ // Step 5: Move cursor to bottom of scroll region (safe position)
236
+ buffer += this._moveToStr(scrollEnd, 1);
237
+
238
+ // Step 6: Restore cursor and show it
239
+ buffer += RESTORE_CURSOR;
240
+ buffer += SHOW_CURSOR;
241
+
242
+ // SINGLE ATOMIC WRITE - prevents interleaving
243
+ process.stdout.write(buffer);
200
244
 
201
245
  this.scrollRegionSet = true;
202
246
  this.lastKnownRows = rows;
203
247
  this.lastKnownCols = cols;
204
248
  }
205
249
 
250
+ /**
251
+ * Generate reset scroll region string (returns string, doesn't write)
252
+ * @private
253
+ */
254
+ _resetScrollRegionStr() {
255
+ const { rows } = this.getTerminalSize();
256
+ return `${CSI}1;${rows}r`;
257
+ }
258
+
206
259
  /**
207
260
  * Reset scroll region to full terminal
208
261
  */
209
262
  resetScrollRegion() {
210
263
  if (!this.isTTY()) return;
211
264
 
212
- const { rows } = this.getTerminalSize();
213
- process.stdout.write(`${CSI}1;${rows}r`);
265
+ process.stdout.write(this._resetScrollRegionStr());
214
266
  this.scrollRegionSet = false;
215
267
  }
216
268
 
@@ -251,6 +303,14 @@ class StatusFooter {
251
303
  this.clusterId = clusterId;
252
304
  }
253
305
 
306
+ /**
307
+ * Set message bus for token usage tracking
308
+ * @param {object} messageBus - MessageBus instance with getTokensByRole()
309
+ */
310
+ setMessageBus(messageBus) {
311
+ this.messageBus = messageBus;
312
+ }
313
+
254
314
  /**
255
315
  * Update cluster state
256
316
  * @param {string} state
@@ -401,32 +461,37 @@ class StatusFooter {
401
461
  const agentRows = this.buildAgentRows(executingAgents, cols);
402
462
  const summaryLine = this.buildSummaryLine(cols);
403
463
 
404
- // Save cursor, render footer, restore cursor
405
- process.stdout.write(SAVE_CURSOR);
406
- process.stdout.write(HIDE_CURSOR);
464
+ // BUILD ENTIRE OUTPUT INTO SINGLE BUFFER for atomic write
465
+ // This prevents interleaving with other processes writing to stdout
466
+ let buffer = '';
467
+ buffer += SAVE_CURSOR;
468
+ buffer += HIDE_CURSOR;
407
469
 
408
470
  // Render from top of footer area
409
471
  let currentRow = rows - this.footerHeight + 1;
410
472
 
411
473
  // Header line
412
- this.moveTo(currentRow++, 1);
413
- process.stdout.write(CLEAR_LINE);
414
- process.stdout.write(`${COLORS.bgBlack}${headerLine}${COLORS.reset}`);
474
+ buffer += this._moveToStr(currentRow++, 1);
475
+ buffer += CLEAR_LINE;
476
+ buffer += `${COLORS.bgBlack}${headerLine}${COLORS.reset}`;
415
477
 
416
478
  // Agent rows
417
479
  for (const agentRow of agentRows) {
418
- this.moveTo(currentRow++, 1);
419
- process.stdout.write(CLEAR_LINE);
420
- process.stdout.write(`${COLORS.bgBlack}${agentRow}${COLORS.reset}`);
480
+ buffer += this._moveToStr(currentRow++, 1);
481
+ buffer += CLEAR_LINE;
482
+ buffer += `${COLORS.bgBlack}${agentRow}${COLORS.reset}`;
421
483
  }
422
484
 
423
485
  // Summary line (with bottom border)
424
- this.moveTo(currentRow, 1);
425
- process.stdout.write(CLEAR_LINE);
426
- process.stdout.write(`${COLORS.bgBlack}${summaryLine}${COLORS.reset}`);
486
+ buffer += this._moveToStr(currentRow, 1);
487
+ buffer += CLEAR_LINE;
488
+ buffer += `${COLORS.bgBlack}${summaryLine}${COLORS.reset}`;
489
+
490
+ buffer += RESTORE_CURSOR;
491
+ buffer += SHOW_CURSOR;
427
492
 
428
- process.stdout.write(RESTORE_CURSOR);
429
- process.stdout.write(SHOW_CURSOR);
493
+ // SINGLE ATOMIC WRITE - prevents interleaving
494
+ process.stdout.write(buffer);
430
495
  } finally {
431
496
  this.isRendering = false;
432
497
 
@@ -554,6 +619,21 @@ class StatusFooter {
554
619
  const total = this.agents.size;
555
620
  parts.push(` ${COLORS.gray}│${COLORS.reset} ${COLORS.green}${executing}/${total}${COLORS.reset} active`);
556
621
 
622
+ // Token cost (from message bus)
623
+ if (this.messageBus && this.clusterId) {
624
+ try {
625
+ const tokensByRole = this.messageBus.getTokensByRole(this.clusterId);
626
+ const totalCost = tokensByRole?._total?.totalCostUsd || 0;
627
+ if (totalCost > 0) {
628
+ // Format: $0.05 or $1.23 or $12.34
629
+ const costStr = totalCost < 0.01 ? '<$0.01' : `$${totalCost.toFixed(2)}`;
630
+ parts.push(` ${COLORS.gray}│${COLORS.reset} ${COLORS.yellow}${costStr}${COLORS.reset}`);
631
+ }
632
+ } catch {
633
+ // Ignore errors - token tracking is optional
634
+ }
635
+ }
636
+
557
637
  // Aggregate metrics
558
638
  let totalCpu = 0;
559
639
  let totalMem = 0;
@@ -621,7 +701,9 @@ class StatusFooter {
621
701
  process.stdout.on('resize', this._debouncedResize);
622
702
 
623
703
  // Start refresh interval
704
+ // Guard: Skip if previous render still running (prevents overlapping renders)
624
705
  this.intervalId = setInterval(() => {
706
+ if (this.isRendering) return;
625
707
  this.render();
626
708
  }, this.refreshInterval);
627
709
 
@@ -642,17 +724,25 @@ class StatusFooter {
642
724
  process.stdout.removeListener('resize', this._debouncedResize);
643
725
 
644
726
  if (this.isTTY() && !this.hidden) {
727
+ // BUILD SINGLE BUFFER for atomic shutdown write
728
+ // Prevents interleaving with agent output during cleanup
729
+ let buffer = '';
730
+
645
731
  // Reset scroll region
646
- this.resetScrollRegion();
732
+ buffer += this._resetScrollRegionStr();
733
+ this.scrollRegionSet = false;
647
734
 
648
735
  // Clear all footer lines
649
- this._clearFooterArea();
736
+ buffer += this._clearFooterAreaStr();
650
737
 
651
- // Move cursor to safe position
738
+ // Move cursor to safe position and show cursor
652
739
  const { rows } = this.getTerminalSize();
653
740
  const startRow = rows - this.footerHeight + 1;
654
- this.moveTo(startRow, 1);
655
- process.stdout.write(SHOW_CURSOR);
741
+ buffer += this._moveToStr(startRow, 1);
742
+ buffer += SHOW_CURSOR;
743
+
744
+ // SINGLE ATOMIC WRITE
745
+ process.stdout.write(buffer);
656
746
  }
657
747
  }
658
748
 
@@ -662,8 +752,11 @@ class StatusFooter {
662
752
  hide() {
663
753
  if (!this.isTTY()) return;
664
754
 
665
- this.resetScrollRegion();
666
- this._clearFooterArea();
755
+ // Single atomic write for hide operation
756
+ let buffer = this._resetScrollRegionStr();
757
+ this.scrollRegionSet = false;
758
+ buffer += this._clearFooterAreaStr();
759
+ process.stdout.write(buffer);
667
760
  }
668
761
 
669
762
  /**