attocode 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/CHANGELOG.md +111 -1
  2. package/README.md +7 -0
  3. package/dist/src/adapters.d.ts +6 -1
  4. package/dist/src/adapters.d.ts.map +1 -1
  5. package/dist/src/adapters.js +14 -1
  6. package/dist/src/adapters.js.map +1 -1
  7. package/dist/src/agent.d.ts +50 -0
  8. package/dist/src/agent.d.ts.map +1 -1
  9. package/dist/src/agent.js +734 -316
  10. package/dist/src/agent.js.map +1 -1
  11. package/dist/src/defaults.d.ts +1 -1
  12. package/dist/src/defaults.d.ts.map +1 -1
  13. package/dist/src/defaults.js +2 -0
  14. package/dist/src/defaults.js.map +1 -1
  15. package/dist/src/integrations/agent-registry.d.ts +9 -2
  16. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  17. package/dist/src/integrations/agent-registry.js +30 -4
  18. package/dist/src/integrations/agent-registry.js.map +1 -1
  19. package/dist/src/integrations/async-subagent.d.ts +135 -0
  20. package/dist/src/integrations/async-subagent.d.ts.map +1 -0
  21. package/dist/src/integrations/async-subagent.js +213 -0
  22. package/dist/src/integrations/async-subagent.js.map +1 -0
  23. package/dist/src/integrations/auto-checkpoint.d.ts +98 -0
  24. package/dist/src/integrations/auto-checkpoint.d.ts.map +1 -0
  25. package/dist/src/integrations/auto-checkpoint.js +252 -0
  26. package/dist/src/integrations/auto-checkpoint.js.map +1 -0
  27. package/dist/src/integrations/budget-pool.d.ts +13 -1
  28. package/dist/src/integrations/budget-pool.d.ts.map +1 -1
  29. package/dist/src/integrations/budget-pool.js +17 -0
  30. package/dist/src/integrations/budget-pool.js.map +1 -1
  31. package/dist/src/integrations/complexity-classifier.d.ts +86 -0
  32. package/dist/src/integrations/complexity-classifier.d.ts.map +1 -0
  33. package/dist/src/integrations/complexity-classifier.js +233 -0
  34. package/dist/src/integrations/complexity-classifier.js.map +1 -0
  35. package/dist/src/integrations/delegation-protocol.d.ts +86 -0
  36. package/dist/src/integrations/delegation-protocol.d.ts.map +1 -0
  37. package/dist/src/integrations/delegation-protocol.js +127 -0
  38. package/dist/src/integrations/delegation-protocol.js.map +1 -0
  39. package/dist/src/integrations/dynamic-budget.d.ts +81 -0
  40. package/dist/src/integrations/dynamic-budget.d.ts.map +1 -0
  41. package/dist/src/integrations/dynamic-budget.js +151 -0
  42. package/dist/src/integrations/dynamic-budget.js.map +1 -0
  43. package/dist/src/integrations/economics.d.ts +44 -1
  44. package/dist/src/integrations/economics.d.ts.map +1 -1
  45. package/dist/src/integrations/economics.js +182 -3
  46. package/dist/src/integrations/economics.js.map +1 -1
  47. package/dist/src/integrations/environment-facts.d.ts +52 -0
  48. package/dist/src/integrations/environment-facts.d.ts.map +1 -0
  49. package/dist/src/integrations/environment-facts.js +84 -0
  50. package/dist/src/integrations/environment-facts.js.map +1 -0
  51. package/dist/src/integrations/index.d.ts +16 -1
  52. package/dist/src/integrations/index.d.ts.map +1 -1
  53. package/dist/src/integrations/index.js +31 -1
  54. package/dist/src/integrations/index.js.map +1 -1
  55. package/dist/src/integrations/injection-budget.d.ts +71 -0
  56. package/dist/src/integrations/injection-budget.d.ts.map +1 -0
  57. package/dist/src/integrations/injection-budget.js +136 -0
  58. package/dist/src/integrations/injection-budget.js.map +1 -0
  59. package/dist/src/integrations/mcp-client.d.ts.map +1 -1
  60. package/dist/src/integrations/mcp-client.js +14 -0
  61. package/dist/src/integrations/mcp-client.js.map +1 -1
  62. package/dist/src/integrations/mcp-custom-tools.d.ts +102 -0
  63. package/dist/src/integrations/mcp-custom-tools.d.ts.map +1 -0
  64. package/dist/src/integrations/mcp-custom-tools.js +232 -0
  65. package/dist/src/integrations/mcp-custom-tools.js.map +1 -0
  66. package/dist/src/integrations/mcp-tool-validator.d.ts +60 -0
  67. package/dist/src/integrations/mcp-tool-validator.d.ts.map +1 -0
  68. package/dist/src/integrations/mcp-tool-validator.js +141 -0
  69. package/dist/src/integrations/mcp-tool-validator.js.map +1 -0
  70. package/dist/src/integrations/routing.d.ts +2 -1
  71. package/dist/src/integrations/routing.d.ts.map +1 -1
  72. package/dist/src/integrations/routing.js.map +1 -1
  73. package/dist/src/integrations/self-improvement.d.ts +90 -0
  74. package/dist/src/integrations/self-improvement.d.ts.map +1 -0
  75. package/dist/src/integrations/self-improvement.js +217 -0
  76. package/dist/src/integrations/self-improvement.js.map +1 -0
  77. package/dist/src/integrations/smart-decomposer.d.ts +4 -0
  78. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  79. package/dist/src/integrations/smart-decomposer.js +55 -28
  80. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  81. package/dist/src/integrations/subagent-output-store.d.ts +91 -0
  82. package/dist/src/integrations/subagent-output-store.d.ts.map +1 -0
  83. package/dist/src/integrations/subagent-output-store.js +257 -0
  84. package/dist/src/integrations/subagent-output-store.js.map +1 -0
  85. package/dist/src/integrations/swarm/index.d.ts +1 -1
  86. package/dist/src/integrations/swarm/index.d.ts.map +1 -1
  87. package/dist/src/integrations/swarm/index.js +1 -1
  88. package/dist/src/integrations/swarm/index.js.map +1 -1
  89. package/dist/src/integrations/swarm/model-selector.d.ts +1 -0
  90. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  91. package/dist/src/integrations/swarm/model-selector.js +37 -3
  92. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  93. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +10 -1
  94. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  95. package/dist/src/integrations/swarm/swarm-config-loader.js +72 -6
  96. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  97. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  98. package/dist/src/integrations/swarm/swarm-event-bridge.js +26 -4
  99. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  100. package/dist/src/integrations/swarm/swarm-events.d.ts +11 -0
  101. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  102. package/dist/src/integrations/swarm/swarm-events.js +4 -0
  103. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  104. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +11 -0
  105. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  106. package/dist/src/integrations/swarm/swarm-orchestrator.js +233 -10
  107. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  108. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +9 -2
  109. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
  110. package/dist/src/integrations/swarm/swarm-quality-gate.js +128 -11
  111. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
  112. package/dist/src/integrations/swarm/task-queue.d.ts +11 -1
  113. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  114. package/dist/src/integrations/swarm/task-queue.js +125 -15
  115. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  116. package/dist/src/integrations/swarm/types.d.ts +40 -1
  117. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  118. package/dist/src/integrations/swarm/types.js +6 -1
  119. package/dist/src/integrations/swarm/types.js.map +1 -1
  120. package/dist/src/integrations/swarm/worker-pool.d.ts +9 -3
  121. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  122. package/dist/src/integrations/swarm/worker-pool.js +89 -17
  123. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  124. package/dist/src/integrations/thinking-strategy.d.ts +52 -0
  125. package/dist/src/integrations/thinking-strategy.d.ts.map +1 -0
  126. package/dist/src/integrations/thinking-strategy.js +129 -0
  127. package/dist/src/integrations/thinking-strategy.js.map +1 -0
  128. package/dist/src/integrations/tool-recommendation.d.ts +58 -0
  129. package/dist/src/integrations/tool-recommendation.d.ts.map +1 -0
  130. package/dist/src/integrations/tool-recommendation.js +215 -0
  131. package/dist/src/integrations/tool-recommendation.js.map +1 -0
  132. package/dist/src/integrations/verification-gate.d.ts +80 -0
  133. package/dist/src/integrations/verification-gate.d.ts.map +1 -0
  134. package/dist/src/integrations/verification-gate.js +146 -0
  135. package/dist/src/integrations/verification-gate.js.map +1 -0
  136. package/dist/src/integrations/work-log.d.ts +87 -0
  137. package/dist/src/integrations/work-log.d.ts.map +1 -0
  138. package/dist/src/integrations/work-log.js +275 -0
  139. package/dist/src/integrations/work-log.js.map +1 -0
  140. package/dist/src/main.js +5 -4
  141. package/dist/src/main.js.map +1 -1
  142. package/dist/src/modes.d.ts +6 -0
  143. package/dist/src/modes.d.ts.map +1 -1
  144. package/dist/src/modes.js +73 -2
  145. package/dist/src/modes.js.map +1 -1
  146. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  147. package/dist/src/providers/adapters/anthropic.js +20 -3
  148. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  149. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  150. package/dist/src/providers/adapters/openrouter.js +3 -1
  151. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  152. package/dist/src/providers/types.d.ts +4 -0
  153. package/dist/src/providers/types.d.ts.map +1 -1
  154. package/dist/src/providers/types.js.map +1 -1
  155. package/dist/src/tools/bash.d.ts +8 -2
  156. package/dist/src/tools/bash.d.ts.map +1 -1
  157. package/dist/src/tools/bash.js +14 -1
  158. package/dist/src/tools/bash.js.map +1 -1
  159. package/dist/src/tools/coercion.d.ts +14 -0
  160. package/dist/src/tools/coercion.d.ts.map +1 -0
  161. package/dist/src/tools/coercion.js +25 -0
  162. package/dist/src/tools/coercion.js.map +1 -0
  163. package/dist/src/tools/file.d.ts +2 -2
  164. package/dist/src/tools/file.d.ts.map +1 -1
  165. package/dist/src/tools/file.js +2 -1
  166. package/dist/src/tools/file.js.map +1 -1
  167. package/dist/src/tools/standard.d.ts +17 -1
  168. package/dist/src/tools/standard.d.ts.map +1 -1
  169. package/dist/src/tools/standard.js +64 -11
  170. package/dist/src/tools/standard.js.map +1 -1
  171. package/dist/src/tui/app.d.ts.map +1 -1
  172. package/dist/src/tui/app.js +8 -1
  173. package/dist/src/tui/app.js.map +1 -1
  174. package/dist/src/tui/event-display.d.ts.map +1 -1
  175. package/dist/src/tui/event-display.js +8 -1
  176. package/dist/src/tui/event-display.js.map +1 -1
  177. package/dist/src/types.d.ts +26 -0
  178. package/dist/src/types.d.ts.map +1 -1
  179. package/package.json +6 -2
package/dist/src/agent.js CHANGED
@@ -21,7 +21,11 @@
21
21
  import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
22
22
  import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
23
23
  import { createLSPFileTools, } from './agent-tools/index.js';
24
- import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, } from './integrations/index.js';
24
+ import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createDynamicBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate,
25
+ // Phase 2: Orchestration
26
+ classifyComplexity, getScalingGuidance, buildDelegationPrompt, createMinimalDelegationSpec, getSubagentQualityPrompt, ToolRecommendationEngine, createToolRecommendationEngine, createInjectionBudgetManager,
27
+ // Phase 3: Advanced
28
+ getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, createSubagentSupervisor, createSubagentHandle, } from './integrations/index.js';
25
29
  // Lesson 26: Tracing & Evaluation integration
26
30
  import { createTraceCollector } from './tracing/trace-collector.js';
27
31
  // Model registry for context window limits
@@ -34,6 +38,119 @@ import { createTaskTools, } from './tools/tasks.js';
34
38
  // =============================================================================
35
39
  // PRODUCTION AGENT
36
40
  // =============================================================================
41
+ /**
42
+ * Tools that are safe to execute in parallel (read-only, no side effects).
43
+ * These tools don't modify state, so running them concurrently is safe.
44
+ */
45
+ export const PARALLELIZABLE_TOOLS = new Set([
46
+ 'read_file', 'glob', 'grep', 'list_files', 'search_files',
47
+ 'search_code', 'get_file_info',
48
+ ]);
49
+ /**
50
+ * Tools that can run in parallel IF they target different files.
51
+ * write_file and edit_file on different paths are safe to parallelize.
52
+ */
53
+ export const CONDITIONALLY_PARALLEL_TOOLS = new Set([
54
+ 'write_file', 'edit_file',
55
+ ]);
56
+ /**
57
+ * Extract the target file path from a tool call's arguments.
58
+ * Returns null if no file path can be determined.
59
+ */
60
+ export function extractToolFilePath(toolCall) {
61
+ // Check common argument patterns
62
+ const args = toolCall;
63
+ for (const key of ['path', 'file_path', 'filename', 'file']) {
64
+ if (typeof args[key] === 'string')
65
+ return args[key];
66
+ }
67
+ // Check nested args object
68
+ if (args.args && typeof args.args === 'object') {
69
+ const nested = args.args;
70
+ for (const key of ['path', 'file_path', 'filename', 'file']) {
71
+ if (typeof nested[key] === 'string')
72
+ return nested[key];
73
+ }
74
+ }
75
+ // Check input object (common in structured tool calls)
76
+ if (args.input && typeof args.input === 'object') {
77
+ const input = args.input;
78
+ for (const key of ['path', 'file_path', 'filename', 'file']) {
79
+ if (typeof input[key] === 'string')
80
+ return input[key];
81
+ }
82
+ }
83
+ return null;
84
+ }
85
+ /**
86
+ * Check if a conditionally-parallel tool call conflicts with any tool
87
+ * in the current accumulator (same file path).
88
+ */
89
+ function hasFileConflict(toolCall, accumulator) {
90
+ const path = extractToolFilePath(toolCall);
91
+ if (!path)
92
+ return true; // Can't determine path → assume conflict
93
+ for (const existing of accumulator) {
94
+ const existingPath = extractToolFilePath(existing);
95
+ if (existingPath === path)
96
+ return true; // Same file → conflict
97
+ }
98
+ return false;
99
+ }
100
+ /**
101
+ * Groups tool calls into batches for parallel/sequential execution.
102
+ * Uses accumulate-and-flush: parallelizable tools accumulate until a
103
+ * non-parallelizable tool flushes them as a batch. This produces optimal
104
+ * batching even for non-consecutive parallelizable tools.
105
+ *
106
+ * Enhanced with conditional parallelism: write_file/edit_file on
107
+ * DIFFERENT files can be batched together for parallel execution.
108
+ *
109
+ * Example: [read1, read2, write, read3, grep] → [[read1, read2], [write], [read3, grep]]
110
+ * (Previous algorithm produced 4 batches; this produces 3)
111
+ *
112
+ * Enhanced: [write_a, write_b, write_a] → [[write_a, write_b], [write_a]]
113
+ * (Different files parallelized, same file sequential)
114
+ */
115
+ export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name), isConditionallyParallel = (tc) => CONDITIONALLY_PARALLEL_TOOLS.has(tc.name)) {
116
+ if (toolCalls.length === 0)
117
+ return [];
118
+ const batches = [];
119
+ let parallelAccum = [];
120
+ for (const toolCall of toolCalls) {
121
+ if (isParallelizable(toolCall)) {
122
+ parallelAccum.push(toolCall);
123
+ }
124
+ else if (isConditionallyParallel(toolCall)) {
125
+ // Can parallelize if no file conflict with existing accumulator
126
+ if (!hasFileConflict(toolCall, parallelAccum)) {
127
+ parallelAccum.push(toolCall);
128
+ }
129
+ else {
130
+ // Conflict: flush current batch, start new one with this tool
131
+ if (parallelAccum.length > 0) {
132
+ batches.push(parallelAccum);
133
+ parallelAccum = [];
134
+ }
135
+ parallelAccum.push(toolCall);
136
+ }
137
+ }
138
+ else {
139
+ // Flush any accumulated parallel tools as a single batch
140
+ if (parallelAccum.length > 0) {
141
+ batches.push(parallelAccum);
142
+ parallelAccum = [];
143
+ }
144
+ // Non-parallelizable tool gets its own batch
145
+ batches.push([toolCall]);
146
+ }
147
+ }
148
+ // Flush remaining parallel tools
149
+ if (parallelAccum.length > 0) {
150
+ batches.push(parallelAccum);
151
+ }
152
+ return batches;
153
+ }
37
154
  /**
38
155
  * Production-ready agent that composes all features.
39
156
  */
@@ -73,12 +190,22 @@ export class ProductionAgent {
73
190
  fileChangeTracker = null;
74
191
  capabilitiesRegistry = null;
75
192
  toolResolver = null;
193
+ agentId;
76
194
  blackboard = null;
77
195
  fileCache = null;
78
196
  budgetPool = null;
79
197
  taskManager = null;
80
198
  store = null;
81
199
  swarmOrchestrator = null;
200
+ workLog = null;
201
+ verificationGate = null;
202
+ // Phase 2-4 integration modules
203
+ injectionBudget = null;
204
+ selfImprovement = null;
205
+ subagentOutputStore = null;
206
+ autoCheckpointManager = null;
207
+ toolRecommendation = null;
208
+ lastComplexityAssessment = null;
82
209
  // Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
83
210
  // Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
84
211
  spawnedTasks = new Map();
@@ -124,6 +251,8 @@ export class ProductionAgent {
124
251
  // Build complete config with defaults
125
252
  this.config = buildConfig(userConfig);
126
253
  this.provider = userConfig.provider;
254
+ // Set unique agent ID (passed from spawnAgent for subagents, auto-generated for parents)
255
+ this.agentId = userConfig.agentId || `agent-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
127
256
  // Initialize tool registry
128
257
  this.tools = new Map();
129
258
  for (const tool of this.config.tools) {
@@ -271,6 +400,19 @@ export class ProductionAgent {
271
400
  maxIterations: this.config.maxIterations,
272
401
  targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
273
402
  });
403
+ // Work Log - compaction-resilient summary of agent work
404
+ // Always enabled - minimal overhead and critical for long-running tasks
405
+ this.workLog = createWorkLog();
406
+ // Verification Gate - opt-in completion verification
407
+ if (this.config.verificationCriteria) {
408
+ this.verificationGate = createVerificationGate(this.config.verificationCriteria);
409
+ }
410
+ // Phase 2-4: Orchestration & Advanced modules (always enabled, lightweight)
411
+ this.injectionBudget = createInjectionBudgetManager();
412
+ this.selfImprovement = createSelfImprovementProtocol(undefined, this.learningStore ?? undefined);
413
+ this.subagentOutputStore = createSubagentOutputStore({ persistToFile: false });
414
+ this.autoCheckpointManager = createAutoCheckpointManager({ enabled: true });
415
+ this.toolRecommendation = createToolRecommendationEngine();
274
416
  // Agent Registry - always enabled for subagent support
275
417
  this.agentRegistry = new AgentRegistry();
276
418
  // Load user agents asynchronously - tracked for ensureReady()
@@ -301,6 +443,15 @@ export class ProductionAgent {
301
443
  for (const tool of taskTools) {
302
444
  this.tools.set(tool.name, tool);
303
445
  }
446
+ // Built-in web search (Serper API) — gracefully handles missing API key
447
+ const serperCustomTool = createSerperSearchTool();
448
+ this.tools.set('web_search', {
449
+ name: serperCustomTool.name,
450
+ description: serperCustomTool.description,
451
+ parameters: serperCustomTool.inputSchema,
452
+ execute: serperCustomTool.execute,
453
+ dangerLevel: 'safe',
454
+ });
304
455
  // Swarm Mode (experimental)
305
456
  if (this.config.swarm) {
306
457
  const swarmConfig = this.config.swarm;
@@ -797,6 +948,10 @@ export class ProductionAgent {
797
948
  try {
798
949
  // Check for cancellation before starting
799
950
  cancellationToken?.throwIfCancellationRequested();
951
+ // Classify task complexity for scaling guidance
952
+ this.lastComplexityAssessment = classifyComplexity(task, {
953
+ hasActivePlan: !!this.state.plan,
954
+ });
800
955
  // Check if swarm mode should handle this task
801
956
  if (this.swarmOrchestrator) {
802
957
  const swarmResult = await this.runSwarm(task);
@@ -1079,6 +1234,14 @@ export class ProductionAgent {
1079
1234
  content: `[CONTEXT REDUCED: Earlier messages were removed to stay within budget. Conversation continues from recent context.]`,
1080
1235
  });
1081
1236
  messages.push(...recentMessages);
1237
+ // Inject work log after emergency truncation to prevent amnesia
1238
+ if (this.workLog?.hasContent()) {
1239
+ const workLogMessage = {
1240
+ role: 'user',
1241
+ content: this.workLog.toCompactString(),
1242
+ };
1243
+ messages.push(workLogMessage);
1244
+ }
1082
1245
  // Update state messages too
1083
1246
  this.state.messages.length = 0;
1084
1247
  this.state.messages.push(...messages);
@@ -1259,6 +1422,35 @@ export class ProductionAgent {
1259
1422
  }
1260
1423
  }
1261
1424
  // =====================================================================
1425
+ // INJECTION BUDGET ANALYSIS (Phase 2 - monitoring mode)
1426
+ // Collects stats on context injections without gating; logs when
1427
+ // budget would have dropped items. Validates system before enabling gating.
1428
+ // =====================================================================
1429
+ if (this.injectionBudget) {
1430
+ const proposals = [];
1431
+ if (budgetInjectedPrompt) {
1432
+ proposals.push({ name: 'budget_warning', priority: 0, maxTokens: 500, content: budgetInjectedPrompt });
1433
+ }
1434
+ // Approximate recitation content (actual injection handled above)
1435
+ if (this.contextEngineering) {
1436
+ const failureCtx = this.contextEngineering.getFailureContext(5);
1437
+ if (failureCtx) {
1438
+ proposals.push({ name: 'failure_context', priority: 2, maxTokens: 300, content: failureCtx });
1439
+ }
1440
+ }
1441
+ if (proposals.length > 0) {
1442
+ const accepted = this.injectionBudget.allocate(proposals);
1443
+ const stats = this.injectionBudget.getLastStats();
1444
+ if (stats && stats.droppedNames.length > 0 && process.env.DEBUG) {
1445
+ console.log(`[injection-budget] Would drop: ${stats.droppedNames.join(', ')} (${stats.proposedTokens} proposed, ${stats.acceptedTokens} accepted)`);
1446
+ }
1447
+ // Log total injection overhead for observability
1448
+ if (stats && process.env.DEBUG_LLM) {
1449
+ console.log(`[injection-budget] Iteration ${this.state.iteration}: ${accepted.length}/${proposals.length} injections, ~${stats.acceptedTokens} tokens`);
1450
+ }
1451
+ }
1452
+ }
1453
+ // =====================================================================
1262
1454
  // RESILIENT LLM CALL: Empty response retries + max_tokens continuation
1263
1455
  // =====================================================================
1264
1456
  // Get resilience config
@@ -1554,6 +1746,24 @@ export class ProductionAgent {
1554
1746
  });
1555
1747
  incompleteActionRetries = 0;
1556
1748
  }
1749
+ // Verification gate: if criteria not met, nudge agent to verify before completing
1750
+ if (this.verificationGate && !forceTextOnly) {
1751
+ const vResult = this.verificationGate.check();
1752
+ if (!vResult.satisfied && !vResult.forceAllow && vResult.nudge) {
1753
+ // Inject nudge and continue the loop
1754
+ const nudgeMessage = {
1755
+ role: 'user',
1756
+ content: vResult.nudge,
1757
+ };
1758
+ messages.push(nudgeMessage);
1759
+ this.state.messages.push(nudgeMessage);
1760
+ this.observability?.logger?.info('Verification gate nudge', {
1761
+ missing: vResult.missing,
1762
+ nudgeCount: this.verificationGate.getState().nudgeCount,
1763
+ });
1764
+ continue;
1765
+ }
1766
+ }
1557
1767
  // No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
1558
1768
  // The model has "consumed" the tool outputs and produced a response,
1559
1769
  // so we can replace verbose outputs with compact summaries
@@ -1590,12 +1800,33 @@ export class ProductionAgent {
1590
1800
  // Execute tool calls (we know toolCalls is defined here due to the check above)
1591
1801
  const toolCalls = response.toolCalls;
1592
1802
  const toolResults = await this.executeToolCalls(toolCalls);
1593
- // Record tool calls for economics/progress tracking
1803
+ // Record tool calls for economics/progress tracking + work log
1594
1804
  for (let i = 0; i < toolCalls.length; i++) {
1595
1805
  const toolCall = toolCalls[i];
1596
1806
  const result = toolResults[i];
1597
1807
  executedToolNames.add(toolCall.name);
1598
1808
  this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
1809
+ // Record in work log for compaction resilience
1810
+ const toolOutput = result?.result && typeof result.result === 'object' && 'output' in result.result
1811
+ ? String(result.result.output)
1812
+ : typeof result?.result === 'string' ? result.result : undefined;
1813
+ this.workLog?.recordToolExecution(toolCall.name, toolCall.arguments, toolOutput);
1814
+ // Record in verification gate
1815
+ if (this.verificationGate) {
1816
+ if (toolCall.name === 'bash') {
1817
+ const toolRes = result?.result;
1818
+ const output = toolRes && typeof toolRes === 'object' && 'output' in toolRes
1819
+ ? String(toolRes.output)
1820
+ : typeof toolRes === 'string' ? toolRes : '';
1821
+ const exitCode = toolRes && typeof toolRes === 'object' && toolRes.metadata
1822
+ ? toolRes.metadata.exitCode ?? null
1823
+ : null;
1824
+ this.verificationGate.recordBashExecution(String(toolCall.arguments.command || ''), output, exitCode);
1825
+ }
1826
+ if (['write_file', 'edit_file'].includes(toolCall.name)) {
1827
+ this.verificationGate.recordFileChange();
1828
+ }
1829
+ }
1599
1830
  }
1600
1831
  // Add tool results to messages (with truncation and proactive budget management)
1601
1832
  const MAX_TOOL_OUTPUT_CHARS = 8000; // ~2000 tokens max per tool output
@@ -1617,6 +1848,15 @@ export class ProductionAgent {
1617
1848
  messages.push(...compactionResult.compactedMessages);
1618
1849
  this.state.messages.length = 0;
1619
1850
  this.state.messages.push(...compactionResult.compactedMessages);
1851
+ // Inject work log after compaction to prevent amnesia
1852
+ if (this.workLog?.hasContent()) {
1853
+ const workLogMessage = {
1854
+ role: 'user',
1855
+ content: this.workLog.toCompactString(),
1856
+ };
1857
+ messages.push(workLogMessage);
1858
+ this.state.messages.push(workLogMessage);
1859
+ }
1620
1860
  }
1621
1861
  else if (compactionResult.status === 'hard_limit') {
1622
1862
  // Hard limit reached - this is serious, emit error
@@ -1811,12 +2051,25 @@ export class ProductionAgent {
1811
2051
  }
1812
2052
  }
1813
2053
  // Build system prompt using cache-aware builder if available (Trick P)
1814
- // Combine memory, learnings, and codebase context
1815
- const combinedContext = [
2054
+ // Combine memory, learnings, codebase context, and environment facts
2055
+ const combinedContextParts = [
2056
+ // Environment facts — temporal/platform grounding (prevents stale date hallucinations)
2057
+ formatFactsBlock(getEnvironmentFacts()),
1816
2058
  ...(memoryContext.length > 0 ? memoryContext : []),
1817
2059
  ...(learningsContext ? [learningsContext] : []),
1818
2060
  ...(codebaseContextStr ? [`\n## Relevant Code\n${codebaseContextStr}`] : []),
1819
- ].join('\n');
2061
+ ];
2062
+ // Inject thinking directives and scaling guidance for non-simple tasks
2063
+ if (this.lastComplexityAssessment) {
2064
+ const thinkingPrompt = getThinkingSystemPrompt(this.lastComplexityAssessment.tier);
2065
+ if (thinkingPrompt) {
2066
+ combinedContextParts.push(thinkingPrompt);
2067
+ }
2068
+ if (this.lastComplexityAssessment.tier !== 'simple') {
2069
+ combinedContextParts.push(getScalingGuidance(this.lastComplexityAssessment));
2070
+ }
2071
+ }
2072
+ const combinedContext = combinedContextParts.join('\n');
1820
2073
  const promptOptions = {
1821
2074
  rules: rulesContent + (skillsPrompt ? '\n\n' + skillsPrompt : ''),
1822
2075
  tools: toolDescriptions,
@@ -1882,9 +2135,12 @@ export class ProductionAgent {
1882
2135
  this.emit({ type: 'llm.start', model: this.config.model || 'default' });
1883
2136
  // Prompt caching (Improvement P1): Replace the system message with structured content
1884
2137
  // that includes cache_control markers, enabling 60-70% cache hit rates.
1885
- // The original Message[] is kept for token estimation; the provider gets MessageWithContent[].
2138
+ // Only use structured cache_control markers for Anthropic models other providers
2139
+ // (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
2140
+ const configModel = this.config.model || 'default';
2141
+ const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
1886
2142
  let providerMessages = messages;
1887
- if (this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
2143
+ if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
1888
2144
  providerMessages = messages.map((m, i) => {
1889
2145
  if (i === 0 && m.role === 'system') {
1890
2146
  // Replace system message with structured cacheable content
@@ -1953,6 +2209,8 @@ export class ProductionAgent {
1953
2209
  },
1954
2210
  },
1955
2211
  });
2212
+ // Pause duration budget during LLM call - network time shouldn't count against agent
2213
+ this.economics?.pauseDuration();
1956
2214
  try {
1957
2215
  let response;
1958
2216
  let actualModel = model;
@@ -1967,7 +2225,7 @@ export class ProductionAgent {
1967
2225
  taskType: 'general',
1968
2226
  estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
1969
2227
  };
1970
- const result = await this.routing.executeWithFallback(messages, context);
2228
+ const result = await this.routing.executeWithFallback(providerMessages, context);
1971
2229
  response = result.response;
1972
2230
  actualModel = result.model;
1973
2231
  // Emit routing insight
@@ -2017,6 +2275,14 @@ export class ProductionAgent {
2017
2275
  });
2018
2276
  }
2019
2277
  const duration = Date.now() - startTime;
2278
+ // Debug cache stats when DEBUG_CACHE is set
2279
+ if (process.env.DEBUG_CACHE) {
2280
+ const cr = response.usage?.cacheReadTokens ?? 0;
2281
+ const cw = response.usage?.cacheWriteTokens ?? 0;
2282
+ const inp = response.usage?.inputTokens ?? 0;
2283
+ const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
2284
+ console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
2285
+ }
2020
2286
  // Lesson 26: Record LLM response for tracing
2021
2287
  this.traceCollector?.record({
2022
2288
  type: 'llm.response',
@@ -2084,6 +2350,10 @@ export class ProductionAgent {
2084
2350
  this.observability?.tracer?.endSpan(spanId);
2085
2351
  throw error;
2086
2352
  }
2353
+ finally {
2354
+ // Resume duration budget after LLM call completes (success or failure)
2355
+ this.economics?.resumeDuration();
2356
+ }
2087
2357
  }
2088
2358
  /**
2089
2359
  * Execute an async callback while excluding wall-clock wait time from duration budgeting.
@@ -2100,324 +2370,360 @@ export class ProductionAgent {
2100
2370
  }
2101
2371
  /**
2102
2372
  * Execute tool calls with safety checks and execution policy enforcement.
2373
+ * Parallelizable read-only tools are batched and executed concurrently.
2103
2374
  */
2104
2375
  async executeToolCalls(toolCalls) {
2105
2376
  const results = [];
2106
- for (const toolCall of toolCalls) {
2107
- const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2108
- const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2109
- this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2110
- const startTime = Date.now();
2111
- // Lesson 26: Record tool start for tracing
2377
+ // Group consecutive parallelizable tool calls into batches
2378
+ const batches = groupToolCallsIntoBatches(toolCalls);
2379
+ // Execute batches: parallel batches use Promise.allSettled, sequential execute one-by-one
2380
+ for (const batch of batches) {
2381
+ if (batch.length > 1 && PARALLELIZABLE_TOOLS.has(batch[0].name)) {
2382
+ // Execute parallelizable batch concurrently
2383
+ const batchResults = await Promise.allSettled(batch.map(tc => this.executeSingleToolCall(tc)));
2384
+ for (const result of batchResults) {
2385
+ if (result.status === 'fulfilled') {
2386
+ results.push(result.value);
2387
+ }
2388
+ else {
2389
+ // Should not happen since executeSingleToolCall catches errors internally
2390
+ const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
2391
+ results.push({ callId: 'unknown', result: `Error: ${error}`, error });
2392
+ }
2393
+ }
2394
+ }
2395
+ else {
2396
+ // Execute sequentially
2397
+ for (const tc of batch) {
2398
+ results.push(await this.executeSingleToolCall(tc));
2399
+ }
2400
+ }
2401
+ }
2402
+ return results;
2403
+ }
2404
+ /**
2405
+ * Execute a single tool call with all safety checks, tracing, and error handling.
2406
+ */
2407
+ async executeSingleToolCall(toolCall) {
2408
+ const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2409
+ const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2410
+ this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2411
+ const startTime = Date.now();
2412
+ // Short-circuit if tool call arguments failed to parse
2413
+ if (toolCall.parseError) {
2414
+ const errorMsg = `Tool arguments could not be parsed: ${toolCall.parseError}. Please retry with complete, valid JSON.`;
2415
+ this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: errorMsg });
2112
2416
  this.traceCollector?.record({
2113
- type: 'tool.start',
2114
- data: {
2115
- executionId,
2116
- toolName: toolCall.name,
2117
- arguments: toolCall.arguments,
2118
- },
2417
+ type: 'tool.end',
2418
+ data: { executionId, status: 'error', error: new Error(errorMsg), durationMs: Date.now() - startTime },
2119
2419
  });
2120
- try {
2121
- // =====================================================================
2122
- // PLAN MODE WRITE INTERCEPTION
2123
- // =====================================================================
2124
- // In plan mode, intercept write operations and queue them as proposed changes
2125
- if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2126
- // Extract contextual reasoning instead of simple truncation
2127
- const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2128
- // Start a new plan if needed
2129
- if (!this.pendingPlanManager.hasPendingPlan()) {
2130
- const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2131
- const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2132
- this.pendingPlanManager.startPlan(task);
2133
- }
2134
- // Queue the write operation
2135
- const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2136
- // Emit event for UI
2137
- this.emit({
2138
- type: 'plan.change.queued',
2139
- tool: toolCall.name,
2140
- changeId: change?.id,
2141
- summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2142
- });
2143
- // Return a message indicating the change was queued
2144
- const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2145
- `Tool: ${toolCall.name}\n` +
2146
- `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2147
- `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2148
- results.push({
2149
- callId: toolCall.id,
2150
- result: queueMessage,
2151
- });
2152
- this.observability?.tracer?.endSpan(spanId);
2153
- continue; // Skip actual execution
2420
+ this.observability?.tracer?.endSpan(spanId);
2421
+ return { callId: toolCall.id, result: `Error: ${errorMsg}`, error: errorMsg };
2422
+ }
2423
+ // Lesson 26: Record tool start for tracing
2424
+ this.traceCollector?.record({
2425
+ type: 'tool.start',
2426
+ data: {
2427
+ executionId,
2428
+ toolName: toolCall.name,
2429
+ arguments: toolCall.arguments,
2430
+ },
2431
+ });
2432
+ try {
2433
+ // =====================================================================
2434
+ // PLAN MODE WRITE INTERCEPTION
2435
+ // =====================================================================
2436
+ // In plan mode, intercept write operations and queue them as proposed changes
2437
+ if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2438
+ // Extract contextual reasoning instead of simple truncation
2439
+ const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2440
+ // Start a new plan if needed
2441
+ if (!this.pendingPlanManager.hasPendingPlan()) {
2442
+ const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2443
+ const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2444
+ this.pendingPlanManager.startPlan(task);
2154
2445
  }
2155
- // =====================================================================
2156
- // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2157
- // =====================================================================
2158
- let policyApprovedByUser = false;
2159
- if (this.executionPolicy) {
2160
- const policyContext = {
2161
- messages: this.state.messages,
2162
- currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2163
- previousToolCalls: toolCalls.slice(0, toolCalls.indexOf(toolCall)),
2164
- };
2165
- const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2166
- // Emit policy event
2167
- this.emit({
2168
- type: 'policy.evaluated',
2169
- tool: toolCall.name,
2170
- policy: evaluation.policy,
2171
- reason: evaluation.reason,
2172
- });
2173
- // Emit decision transparency event
2174
- this.emit({
2175
- type: 'decision.tool',
2176
- tool: toolCall.name,
2177
- decision: evaluation.policy === 'forbidden' ? 'blocked'
2178
- : evaluation.policy === 'prompt' ? 'prompted'
2446
+ // Queue the write operation
2447
+ const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2448
+ // Emit event for UI
2449
+ this.emit({
2450
+ type: 'plan.change.queued',
2451
+ tool: toolCall.name,
2452
+ changeId: change?.id,
2453
+ summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2454
+ });
2455
+ // Return a message indicating the change was queued
2456
+ const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2457
+ `Tool: ${toolCall.name}\n` +
2458
+ `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2459
+ `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2460
+ this.observability?.tracer?.endSpan(spanId);
2461
+ return { callId: toolCall.id, result: queueMessage };
2462
+ }
2463
+ // =====================================================================
2464
+ // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2465
+ // =====================================================================
2466
+ let policyApprovedByUser = false;
2467
+ if (this.executionPolicy) {
2468
+ const policyContext = {
2469
+ messages: this.state.messages,
2470
+ currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2471
+ previousToolCalls: [],
2472
+ };
2473
+ const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2474
+ // Emit policy event
2475
+ this.emit({
2476
+ type: 'policy.evaluated',
2477
+ tool: toolCall.name,
2478
+ policy: evaluation.policy,
2479
+ reason: evaluation.reason,
2480
+ });
2481
+ // Emit decision transparency event
2482
+ this.emit({
2483
+ type: 'decision.tool',
2484
+ tool: toolCall.name,
2485
+ decision: evaluation.policy === 'forbidden' ? 'blocked'
2486
+ : evaluation.policy === 'prompt' ? 'prompted'
2487
+ : 'allowed',
2488
+ policyMatch: evaluation.reason,
2489
+ });
2490
+ // Enhanced tracing: Record policy decision
2491
+ this.traceCollector?.record({
2492
+ type: 'decision',
2493
+ data: {
2494
+ type: 'policy',
2495
+ decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2496
+ outcome: evaluation.policy === 'forbidden' ? 'blocked'
2497
+ : evaluation.policy === 'prompt' ? 'deferred'
2179
2498
  : 'allowed',
2180
- policyMatch: evaluation.reason,
2181
- });
2182
- // Enhanced tracing: Record policy decision
2183
- this.traceCollector?.record({
2184
- type: 'decision',
2185
- data: {
2186
- type: 'policy',
2187
- decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2188
- outcome: evaluation.policy === 'forbidden' ? 'blocked'
2189
- : evaluation.policy === 'prompt' ? 'deferred'
2190
- : 'allowed',
2191
- reasoning: evaluation.reason,
2192
- factors: [
2193
- { name: 'policy', value: evaluation.policy },
2194
- { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2195
- ],
2196
- confidence: evaluation.intent?.confidence ?? 0.8,
2197
- },
2198
- });
2199
- // Handle forbidden policy - always block
2200
- if (evaluation.policy === 'forbidden') {
2201
- throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2202
- }
2203
- // Handle prompt policy - requires approval
2204
- if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2205
- // Try to get approval through safety manager's human-in-loop
2206
- const humanInLoop = this.safety?.humanInLoop;
2207
- if (humanInLoop) {
2208
- const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2209
- if (!approval.approved) {
2210
- throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2211
- }
2212
- policyApprovedByUser = true;
2213
- // Create a grant for future similar calls if approved
2214
- this.executionPolicy.createGrant({
2215
- toolName: toolCall.name,
2216
- grantedBy: 'user',
2217
- reason: 'Approved during execution',
2218
- maxUsages: 5, // Allow 5 more similar calls
2219
- });
2220
- }
2221
- else {
2222
- // No approval handler - block by default for safety
2223
- throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2499
+ reasoning: evaluation.reason,
2500
+ factors: [
2501
+ { name: 'policy', value: evaluation.policy },
2502
+ { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2503
+ ],
2504
+ confidence: evaluation.intent?.confidence ?? 0.8,
2505
+ },
2506
+ });
2507
+ // Handle forbidden policy - always block
2508
+ if (evaluation.policy === 'forbidden') {
2509
+ throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2510
+ }
2511
+ // Handle prompt policy - requires approval
2512
+ if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2513
+ // Try to get approval through safety manager's human-in-loop
2514
+ const humanInLoop = this.safety?.humanInLoop;
2515
+ if (humanInLoop) {
2516
+ const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2517
+ if (!approval.approved) {
2518
+ throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2224
2519
  }
2225
- }
2226
- // Log intent classification if available
2227
- if (evaluation.intent) {
2228
- this.emit({
2229
- type: 'intent.classified',
2230
- tool: toolCall.name,
2231
- intent: evaluation.intent.type,
2232
- confidence: evaluation.intent.confidence,
2520
+ policyApprovedByUser = true;
2521
+ // Create a grant for future similar calls if approved
2522
+ this.executionPolicy.createGrant({
2523
+ toolName: toolCall.name,
2524
+ grantedBy: 'user',
2525
+ reason: 'Approved during execution',
2526
+ maxUsages: 5, // Allow 5 more similar calls
2233
2527
  });
2234
2528
  }
2235
- }
2236
- // =====================================================================
2237
- // SAFETY VALIDATION (Lesson 20-21)
2238
- // =====================================================================
2239
- if (this.safety) {
2240
- const safety = this.safety;
2241
- const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2242
- if (!validation.allowed) {
2243
- throw new Error(`Tool call blocked: ${validation.reason}`);
2244
- }
2245
- }
2246
- // Get tool definition (with lazy-loading support for MCP tools)
2247
- let tool = this.tools.get(toolCall.name);
2248
- const wasPreloaded = !!tool;
2249
- if (!tool && this.toolResolver) {
2250
- // Try to resolve and load the tool on-demand
2251
- const resolved = this.toolResolver(toolCall.name);
2252
- if (resolved) {
2253
- this.addTool(resolved);
2254
- tool = resolved;
2255
- if (process.env.DEBUG)
2256
- console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2257
- this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2529
+ else {
2530
+ // No approval handler - block by default for safety
2531
+ throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2258
2532
  }
2259
2533
  }
2260
- if (!tool) {
2261
- throw new Error(`Unknown tool: ${toolCall.name}`);
2534
+ // Log intent classification if available
2535
+ if (evaluation.intent) {
2536
+ this.emit({
2537
+ type: 'intent.classified',
2538
+ tool: toolCall.name,
2539
+ intent: evaluation.intent.type,
2540
+ confidence: evaluation.intent.confidence,
2541
+ });
2262
2542
  }
2263
- // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2264
- if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2265
- console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2543
+ }
2544
+ // =====================================================================
2545
+ // SAFETY VALIDATION (Lesson 20-21)
2546
+ // =====================================================================
2547
+ if (this.safety) {
2548
+ const safety = this.safety;
2549
+ const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2550
+ if (!validation.allowed) {
2551
+ throw new Error(`Tool call blocked: ${validation.reason}`);
2266
2552
  }
2267
- // =====================================================================
2268
- // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2269
- // =====================================================================
2270
- // Claim file resources before write operations to prevent conflicts
2271
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2272
- const args = toolCall.arguments;
2273
- const filePath = String(args.path || args.file_path || '');
2274
- if (filePath) {
2275
- const agentId = this.config.systemPrompt?.slice(0, 50) || 'agent';
2276
- const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2277
- ttl: 60000, // 1 minute claim
2278
- intent: `${toolCall.name}: ${filePath}`,
2279
- });
2280
- if (!claimed) {
2281
- const existingClaim = this.blackboard.getClaim(filePath);
2282
- throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2283
- `Wait for the other agent to complete or choose a different file.`);
2284
- }
2285
- }
2553
+ }
2554
+ // Get tool definition (with lazy-loading support for MCP tools)
2555
+ let tool = this.tools.get(toolCall.name);
2556
+ const wasPreloaded = !!tool;
2557
+ if (!tool && this.toolResolver) {
2558
+ // Try to resolve and load the tool on-demand
2559
+ const resolved = this.toolResolver(toolCall.name);
2560
+ if (resolved) {
2561
+ this.addTool(resolved);
2562
+ tool = resolved;
2563
+ if (process.env.DEBUG)
2564
+ console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2565
+ this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2286
2566
  }
2287
- // FILE CACHE: Check cache for read_file operations before executing
2288
- if (this.fileCache && toolCall.name === 'read_file') {
2289
- const args = toolCall.arguments;
2290
- const readPath = String(args.path || '');
2291
- if (readPath) {
2292
- const cached = this.fileCache.get(readPath);
2293
- if (cached !== undefined) {
2294
- const lines = cached.split('\n').length;
2295
- const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2296
- const duration = Date.now() - startTime;
2297
- this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2298
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2299
- this.state.metrics.toolCalls++;
2300
- this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2301
- results.push({
2302
- callId: toolCall.id,
2303
- result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2304
- });
2305
- this.observability?.tracer?.endSpan(spanId);
2306
- continue; // Skip actual file I/O
2307
- }
2567
+ }
2568
+ if (!tool) {
2569
+ throw new Error(`Unknown tool: ${toolCall.name}`);
2570
+ }
2571
+ // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2572
+ if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2573
+ console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2574
+ }
2575
+ // =====================================================================
2576
+ // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2577
+ // =====================================================================
2578
+ // Claim file resources before write operations to prevent conflicts
2579
+ if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2580
+ const args = toolCall.arguments;
2581
+ const filePath = String(args.path || args.file_path || '');
2582
+ if (filePath) {
2583
+ const agentId = this.agentId;
2584
+ const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2585
+ ttl: 60000, // 1 minute claim
2586
+ intent: `${toolCall.name}: ${filePath}`,
2587
+ });
2588
+ if (!claimed) {
2589
+ const existingClaim = this.blackboard.getClaim(filePath);
2590
+ throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2591
+ `Wait for the other agent to complete or choose a different file.`);
2308
2592
  }
2309
2593
  }
2310
- // Execute tool (with sandbox if available)
2311
- let result;
2312
- if (this.safety?.sandbox) {
2313
- // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2314
- // The default 60s sandbox timeout would kill subagents prematurely
2315
- // Subagents may run for minutes (per their own timeout config)
2316
- const isSpawnAgent = toolCall.name === 'spawn_agent';
2317
- const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2318
- const isSubagentTool = isSpawnAgent || isSpawnParallel;
2319
- const subagentConfig = this.config.subagent;
2320
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2321
- const subagentTimeout = hasSubagentConfig
2322
- ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2323
- : 600000;
2324
- // Use subagent timeout + buffer for spawn tools, default for others
2325
- // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2326
- // but the total wall-clock time should still allow the slowest agent to complete)
2327
- const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2328
- result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2329
- }
2330
- else {
2331
- result = await tool.execute(toolCall.arguments);
2332
- }
2333
- const duration = Date.now() - startTime;
2334
- // Lesson 26: Record tool completion for tracing
2335
- this.traceCollector?.record({
2336
- type: 'tool.end',
2337
- data: {
2338
- executionId,
2339
- status: 'success',
2340
- result,
2341
- durationMs: duration,
2342
- },
2343
- });
2344
- // Record metrics
2345
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2346
- this.state.metrics.toolCalls++;
2347
- this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2348
- // FILE CACHE: Store read results and invalidate on writes
2349
- if (this.fileCache) {
2350
- const args = toolCall.arguments;
2351
- const filePath = String(args.path || args.file_path || '');
2352
- if (toolCall.name === 'read_file' && filePath) {
2353
- // Cache successful read results
2354
- const resultObj = result;
2355
- if (resultObj?.success && typeof resultObj.output === 'string') {
2356
- this.fileCache.set(filePath, resultObj.output);
2357
- }
2358
- }
2359
- else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2360
- // Invalidate cache when files are modified (including undo operations)
2361
- this.fileCache.invalidate(filePath);
2594
+ }
2595
+ // FILE CACHE: Check cache for read_file operations before executing
2596
+ if (this.fileCache && toolCall.name === 'read_file') {
2597
+ const args = toolCall.arguments;
2598
+ const readPath = String(args.path || '');
2599
+ if (readPath) {
2600
+ const cached = this.fileCache.get(readPath);
2601
+ if (cached !== undefined) {
2602
+ const lines = cached.split('\n').length;
2603
+ const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2604
+ const duration = Date.now() - startTime;
2605
+ this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2606
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2607
+ this.state.metrics.toolCalls++;
2608
+ this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2609
+ this.observability?.tracer?.endSpan(spanId);
2610
+ return {
2611
+ callId: toolCall.id,
2612
+ result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2613
+ };
2362
2614
  }
2363
2615
  }
2364
- // Emit tool insight with result summary
2365
- const summary = this.summarizeToolResult(toolCall.name, result);
2366
- this.emit({
2367
- type: 'insight.tool',
2368
- tool: toolCall.name,
2369
- summary,
2370
- durationMs: duration,
2371
- success: true,
2372
- });
2373
- results.push({
2374
- callId: toolCall.id,
2616
+ }
2617
+ // Execute tool (with sandbox if available)
2618
+ let result;
2619
+ if (this.safety?.sandbox) {
2620
+ // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2621
+ // The default 60s sandbox timeout would kill subagents prematurely
2622
+ // Subagents may run for minutes (per their own timeout config)
2623
+ const isSpawnAgent = toolCall.name === 'spawn_agent';
2624
+ const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2625
+ const isSubagentTool = isSpawnAgent || isSpawnParallel;
2626
+ const subagentConfig = this.config.subagent;
2627
+ const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2628
+ const subagentTimeout = hasSubagentConfig
2629
+ ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2630
+ : 600000;
2631
+ // Use subagent timeout + buffer for spawn tools, default for others
2632
+ // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2633
+ // but the total wall-clock time should still allow the slowest agent to complete)
2634
+ const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2635
+ result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2636
+ }
2637
+ else {
2638
+ result = await tool.execute(toolCall.arguments);
2639
+ }
2640
+ const duration = Date.now() - startTime;
2641
+ // Lesson 26: Record tool completion for tracing
2642
+ this.traceCollector?.record({
2643
+ type: 'tool.end',
2644
+ data: {
2645
+ executionId,
2646
+ status: 'success',
2375
2647
  result,
2376
- });
2377
- // Release blackboard claim after successful file write
2378
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2379
- const args = toolCall.arguments;
2380
- const filePath = String(args.path || args.file_path || '');
2381
- if (filePath) {
2382
- const agentId = this.config.systemPrompt?.slice(0, 50) || 'agent';
2383
- this.blackboard.release(filePath, agentId);
2648
+ durationMs: duration,
2649
+ },
2650
+ });
2651
+ // Record metrics
2652
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2653
+ this.state.metrics.toolCalls++;
2654
+ this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2655
+ // FILE CACHE: Store read results and invalidate on writes
2656
+ if (this.fileCache) {
2657
+ const args = toolCall.arguments;
2658
+ const filePath = String(args.path || args.file_path || '');
2659
+ if (toolCall.name === 'read_file' && filePath) {
2660
+ // Cache successful read results
2661
+ const resultObj = result;
2662
+ if (resultObj?.success && typeof resultObj.output === 'string') {
2663
+ this.fileCache.set(filePath, resultObj.output);
2384
2664
  }
2385
2665
  }
2386
- this.observability?.tracer?.endSpan(spanId);
2666
+ else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2667
+ // Invalidate cache when files are modified (including undo operations)
2668
+ this.fileCache.invalidate(filePath);
2669
+ }
2387
2670
  }
2388
- catch (err) {
2389
- const error = err instanceof Error ? err : new Error(String(err));
2390
- const duration = Date.now() - startTime;
2391
- // Lesson 26: Record tool error for tracing
2392
- this.traceCollector?.record({
2393
- type: 'tool.end',
2394
- data: {
2395
- executionId,
2396
- status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
2397
- error,
2398
- durationMs: duration,
2399
- },
2400
- });
2401
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
2402
- this.observability?.tracer?.recordError(error);
2403
- this.observability?.tracer?.endSpan(spanId);
2404
- // FAILURE EVIDENCE RECORDING (Trick S)
2405
- // Track failed tool calls to prevent loops and provide context
2406
- this.contextEngineering?.recordFailure({
2407
- action: toolCall.name,
2408
- args: toolCall.arguments,
2671
+ // Emit tool insight with result summary
2672
+ const summary = this.summarizeToolResult(toolCall.name, result);
2673
+ this.emit({
2674
+ type: 'insight.tool',
2675
+ tool: toolCall.name,
2676
+ summary,
2677
+ durationMs: duration,
2678
+ success: true,
2679
+ });
2680
+ // Release blackboard claim after successful file write
2681
+ if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2682
+ const args = toolCall.arguments;
2683
+ const filePath = String(args.path || args.file_path || '');
2684
+ if (filePath) {
2685
+ const agentId = this.agentId;
2686
+ this.blackboard.release(filePath, agentId);
2687
+ }
2688
+ }
2689
+ // Self-improvement: record success pattern
2690
+ this.selfImprovement?.recordSuccess(toolCall.name, toolCall.arguments, typeof result === 'string' ? result.slice(0, 200) : JSON.stringify(result).slice(0, 200));
2691
+ this.observability?.tracer?.endSpan(spanId);
2692
+ return { callId: toolCall.id, result };
2693
+ }
2694
+ catch (err) {
2695
+ const error = err instanceof Error ? err : new Error(String(err));
2696
+ const duration = Date.now() - startTime;
2697
+ // Lesson 26: Record tool error for tracing
2698
+ this.traceCollector?.record({
2699
+ type: 'tool.end',
2700
+ data: {
2701
+ executionId,
2702
+ status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
2409
2703
  error,
2410
- intent: `Execute tool ${toolCall.name}`,
2411
- });
2412
- results.push({
2413
- callId: toolCall.id,
2414
- result: `Error: ${error.message}`,
2415
- error: error.message,
2416
- });
2417
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
2704
+ durationMs: duration,
2705
+ },
2706
+ });
2707
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
2708
+ this.observability?.tracer?.recordError(error);
2709
+ this.observability?.tracer?.endSpan(spanId);
2710
+ // FAILURE EVIDENCE RECORDING (Trick S)
2711
+ // Track failed tool calls to prevent loops and provide context
2712
+ this.contextEngineering?.recordFailure({
2713
+ action: toolCall.name,
2714
+ args: toolCall.arguments,
2715
+ error,
2716
+ intent: `Execute tool ${toolCall.name}`,
2717
+ });
2718
+ // Self-improvement: enhance error message with diagnosis for better LLM recovery
2719
+ if (this.selfImprovement) {
2720
+ const enhanced = this.selfImprovement.enhanceErrorMessage(toolCall.name, error.message, toolCall.arguments);
2721
+ this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: enhanced });
2722
+ return { callId: toolCall.id, result: `Error: ${enhanced}`, error: enhanced };
2418
2723
  }
2724
+ this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
2725
+ return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
2419
2726
  }
2420
- return results;
2421
2727
  }
2422
2728
  /**
2423
2729
  * Get recently modified file paths from the file change tracker.
@@ -3443,6 +3749,19 @@ export class ProductionAgent {
3443
3749
  }
3444
3750
  // Create the checkpoint
3445
3751
  const label = `auto-iter-${this.state.iteration}`;
3752
+ // Supplementary: also save to AutoCheckpointManager (file-based)
3753
+ if (this.autoCheckpointManager) {
3754
+ try {
3755
+ this.autoCheckpointManager.save({
3756
+ label,
3757
+ sessionId: this.agentId,
3758
+ iteration: this.state.iteration,
3759
+ });
3760
+ }
3761
+ catch {
3762
+ // Non-critical — don't fail the main checkpoint path
3763
+ }
3764
+ }
3446
3765
  return this.createCheckpoint(label);
3447
3766
  }
3448
3767
  // =========================================================================
@@ -3596,7 +3915,18 @@ export class ProductionAgent {
3596
3915
  let workerResultId;
3597
3916
  try {
3598
3917
  // Filter tools for this agent
3599
- const agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
3918
+ let agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
3919
+ // Apply tool recommendations to improve subagent focus (only for large tool sets)
3920
+ if (this.toolRecommendation && agentTools.length > 15) {
3921
+ const taskType = ToolRecommendationEngine.inferTaskType(agentName);
3922
+ const recommendations = this.toolRecommendation.recommendTools(task, taskType, agentTools.map(t => t.name));
3923
+ if (recommendations.length > 0) {
3924
+ const recommendedNames = new Set(recommendations.map(r => r.toolName));
3925
+ // Always keep spawn tools even if not recommended
3926
+ const alwaysKeep = new Set(['spawn_agent', 'spawn_agents_parallel']);
3927
+ agentTools = agentTools.filter(t => recommendedNames.has(t.name) || alwaysKeep.has(t.name));
3928
+ }
3929
+ }
3600
3930
  // Resolve model - abstract tiers (fast/balanced/quality) should use parent's model
3601
3931
  // Only use agentDef.model if it's an actual model ID (contains '/')
3602
3932
  const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
@@ -3688,14 +4018,30 @@ export class ProductionAgent {
3688
4018
  // BUDGET AWARENESS: Always inject so subagent understands its limits
3689
4019
  const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
3690
4020
  const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
3691
- constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
3692
- `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
3693
- `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
3694
- `- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
3695
- `- Do not explore indefinitely - be focused and efficient.\n` +
3696
- `- If approaching limits, summarize findings and return.\n` +
3697
- `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
3698
- ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4021
+ if (isSwarmWorker) {
4022
+ // V6: Calmer resource awareness for swarm workers — prevents weaker models
4023
+ // from confabulating budget warnings and wrapping up without doing work
4024
+ constraintParts.push(`**Resource Info:**\n` +
4025
+ `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens (you have plenty)\n` +
4026
+ `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
4027
+ `- Focus on completing your task. Do NOT wrap up prematurely.\n` +
4028
+ `- You will receive a system warning IF you approach budget limits. Until then, work normally.\n` +
4029
+ `- **IMPORTANT:** Budget warnings come from the SYSTEM, not from your own assessment. ` +
4030
+ `Do not preemptively claim budget issues.\n` +
4031
+ `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
4032
+ ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4033
+ }
4034
+ else {
4035
+ // Original RESOURCE AWARENESS text for regular subagents
4036
+ constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
4037
+ `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
4038
+ `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
4039
+ `- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
4040
+ `- Do not explore indefinitely - be focused and efficient.\n` +
4041
+ `- If approaching limits, summarize findings and return.\n` +
4042
+ `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
4043
+ ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4044
+ }
3699
4045
  if (constraints) {
3700
4046
  if (constraints.focusAreas && constraints.focusAreas.length > 0) {
3701
4047
  constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
@@ -3711,11 +4057,19 @@ export class ProductionAgent {
3711
4057
  }
3712
4058
  }
3713
4059
  const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
4060
+ // Build delegation-enhanced system prompt
4061
+ let delegationContext = '';
4062
+ if (this.lastComplexityAssessment && this.lastComplexityAssessment.tier !== 'simple') {
4063
+ const spec = createMinimalDelegationSpec(task, agentName);
4064
+ delegationContext = '\n\n' + buildDelegationPrompt(spec);
4065
+ }
4066
+ // Quality self-assessment prompt for subagent
4067
+ const qualityPrompt = '\n\n' + getSubagentQualityPrompt();
3714
4068
  // Build subagent system prompt with subagent-specific plan mode addition
3715
4069
  const parentMode = this.getMode();
3716
4070
  const subagentSystemPrompt = parentMode === 'plan'
3717
- ? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}`
3718
- : `${agentDef.systemPrompt}${blackboardContext}${constraintContext}`;
4071
+ ? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`
4072
+ : `${agentDef.systemPrompt}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`;
3719
4073
  // Allocate budget from pool (or use default) — track allocation ID for release later
3720
4074
  const pooledBudget = this.getSubagentBudget(agentName, constraints);
3721
4075
  const poolAllocationId = pooledBudget.allocationId;
@@ -3765,6 +4119,8 @@ export class ProductionAgent {
3765
4119
  builtIn: { logging: false, timing: false, metrics: false },
3766
4120
  custom: [],
3767
4121
  },
4122
+ // Pass unique agentId for blackboard coordination and tracing
4123
+ agentId,
3768
4124
  // Share parent's blackboard for coordination between parallel subagents
3769
4125
  blackboard: this.blackboard || undefined,
3770
4126
  // Share parent's file cache to eliminate redundant reads across agents
@@ -3925,6 +4281,25 @@ export class ProductionAgent {
3925
4281
  },
3926
4282
  structured,
3927
4283
  };
4284
+ // Save full output to subagent output store (avoids telephone problem)
4285
+ if (this.subagentOutputStore) {
4286
+ const outputEntry = {
4287
+ id: agentId,
4288
+ agentId,
4289
+ agentName,
4290
+ task,
4291
+ fullOutput: finalOutput,
4292
+ structured,
4293
+ filesModified: [],
4294
+ filesCreated: [],
4295
+ timestamp: new Date(),
4296
+ tokensUsed: result.metrics.totalTokens,
4297
+ durationMs: duration,
4298
+ };
4299
+ const storeId = this.subagentOutputStore.save(outputEntry);
4300
+ // Attach reference so downstream consumers can retrieve full output
4301
+ spawnResultFinal.outputStoreId = storeId;
4302
+ }
3928
4303
  if (workerResultId && this.store?.hasWorkerResultsFeature()) {
3929
4304
  try {
3930
4305
  this.store.completeWorkerResult(workerResultId, {
@@ -4229,9 +4604,41 @@ export class ProductionAgent {
4229
4604
  count: tasks.length,
4230
4605
  agents: tasks.map(t => t.agent),
4231
4606
  });
4232
- // Execute all tasks in parallel using allSettled to handle partial failures
4233
- const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4234
- const settled = await Promise.allSettled(promises);
4607
+ // Use DynamicBudgetPool for parallel spawns (prevents child starvation,
4608
+ // enables priority-based allocation). Falls back to regular pool for single tasks.
4609
+ let settled;
4610
+ const originalPool = this.budgetPool;
4611
+ // SubagentSupervisor for unified monitoring of concurrent subagents
4612
+ const supervisor = tasks.length > 1 ? createSubagentSupervisor() : null;
4613
+ if (this.budgetPool && tasks.length > 1) {
4614
+ // Swap to DynamicBudgetPool for this parallel batch
4615
+ const poolStats = this.budgetPool.getStats();
4616
+ const dynamicPool = createDynamicBudgetPool(poolStats.tokensRemaining, 0.1);
4617
+ dynamicPool.setExpectedChildren(tasks.length);
4618
+ // Temporarily replace the budget pool so spawnAgent's reserve() uses the dynamic one
4619
+ this.budgetPool = dynamicPool;
4620
+ try {
4621
+ const promises = tasks.map(({ agent, task }) => {
4622
+ const spawnPromise = this.spawnAgent(agent, task);
4623
+ // Register with supervisor for monitoring
4624
+ if (supervisor) {
4625
+ const handle = createSubagentHandle(`parallel-${agent}-${Date.now()}`, agent, task, spawnPromise, {});
4626
+ supervisor.add(handle);
4627
+ }
4628
+ return spawnPromise;
4629
+ });
4630
+ settled = await Promise.allSettled(promises);
4631
+ }
4632
+ finally {
4633
+ this.budgetPool = originalPool;
4634
+ supervisor?.stop();
4635
+ }
4636
+ }
4637
+ else {
4638
+ // Single task or no pool - use standard sequential allocation
4639
+ const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4640
+ settled = await Promise.allSettled(promises);
4641
+ }
4235
4642
  // Convert settled results to SpawnResult array
4236
4643
  const results = settled.map((result, i) => {
4237
4644
  if (result.status === 'fulfilled') {
@@ -4939,8 +5346,19 @@ If the task is a simple question or doesn't need specialized handling, set bestA
4939
5346
  this.unsubscribers = [];
4940
5347
  // Flush trace collector before cleanup
4941
5348
  await this.traceCollector?.flush();
4942
- // Clear blackboard (releases file claim locks)
4943
- this.blackboard?.clear();
5349
+ // Per-agent blackboard cleanup: release only this agent's claims and subscriptions
5350
+ // so parallel siblings don't lose their data. Only root agent clears everything.
5351
+ if (this.blackboard) {
5352
+ if (this.parentIterations > 0 && this.agentId) {
5353
+ // Subagent: release only our claims and subscriptions
5354
+ this.blackboard.releaseAll(this.agentId);
5355
+ this.blackboard.unsubscribeAgent(this.agentId);
5356
+ }
5357
+ else {
5358
+ // Root agent: full clear
5359
+ this.blackboard.clear();
5360
+ }
5361
+ }
4944
5362
  // Wait for any pending init before cleanup
4945
5363
  if (this.initPromises.length > 0) {
4946
5364
  try {