attocode 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/CHANGELOG.md +141 -1
  2. package/dist/src/adapters.d.ts.map +1 -1
  3. package/dist/src/adapters.js +7 -0
  4. package/dist/src/adapters.js.map +1 -1
  5. package/dist/src/agent.d.ts +24 -0
  6. package/dist/src/agent.d.ts.map +1 -1
  7. package/dist/src/agent.js +539 -330
  8. package/dist/src/agent.js.map +1 -1
  9. package/dist/src/cli.d.ts +6 -0
  10. package/dist/src/cli.d.ts.map +1 -1
  11. package/dist/src/cli.js +37 -0
  12. package/dist/src/cli.js.map +1 -1
  13. package/dist/src/commands/init-commands.d.ts.map +1 -1
  14. package/dist/src/commands/init-commands.js +57 -0
  15. package/dist/src/commands/init-commands.js.map +1 -1
  16. package/dist/src/core/protocol/types.d.ts +14 -14
  17. package/dist/src/defaults.d.ts +1 -1
  18. package/dist/src/defaults.d.ts.map +1 -1
  19. package/dist/src/defaults.js +1 -0
  20. package/dist/src/defaults.js.map +1 -1
  21. package/dist/src/integrations/agent-registry.d.ts +7 -2
  22. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  23. package/dist/src/integrations/agent-registry.js +30 -4
  24. package/dist/src/integrations/agent-registry.js.map +1 -1
  25. package/dist/src/integrations/budget-pool.d.ts +13 -1
  26. package/dist/src/integrations/budget-pool.d.ts.map +1 -1
  27. package/dist/src/integrations/budget-pool.js +17 -0
  28. package/dist/src/integrations/budget-pool.js.map +1 -1
  29. package/dist/src/integrations/economics.d.ts +9 -0
  30. package/dist/src/integrations/economics.d.ts.map +1 -1
  31. package/dist/src/integrations/economics.js +25 -0
  32. package/dist/src/integrations/economics.js.map +1 -1
  33. package/dist/src/integrations/index.d.ts +2 -1
  34. package/dist/src/integrations/index.d.ts.map +1 -1
  35. package/dist/src/integrations/index.js +3 -1
  36. package/dist/src/integrations/index.js.map +1 -1
  37. package/dist/src/integrations/learning-store.d.ts.map +1 -1
  38. package/dist/src/integrations/learning-store.js +6 -0
  39. package/dist/src/integrations/learning-store.js.map +1 -1
  40. package/dist/src/integrations/routing.d.ts +2 -1
  41. package/dist/src/integrations/routing.d.ts.map +1 -1
  42. package/dist/src/integrations/routing.js.map +1 -1
  43. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  44. package/dist/src/integrations/smart-decomposer.js +7 -0
  45. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  46. package/dist/src/integrations/swarm/index.d.ts +29 -0
  47. package/dist/src/integrations/swarm/index.d.ts.map +1 -0
  48. package/dist/src/integrations/swarm/index.js +29 -0
  49. package/dist/src/integrations/swarm/index.js.map +1 -0
  50. package/dist/src/integrations/swarm/model-selector.d.ts +55 -0
  51. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -0
  52. package/dist/src/integrations/swarm/model-selector.js +342 -0
  53. package/dist/src/integrations/swarm/model-selector.js.map +1 -0
  54. package/dist/src/integrations/swarm/request-throttle.d.ts +112 -0
  55. package/dist/src/integrations/swarm/request-throttle.d.ts.map +1 -0
  56. package/dist/src/integrations/swarm/request-throttle.js +263 -0
  57. package/dist/src/integrations/swarm/request-throttle.js.map +1 -0
  58. package/dist/src/integrations/swarm/swarm-budget.d.ts +31 -0
  59. package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -0
  60. package/dist/src/integrations/swarm/swarm-budget.js +36 -0
  61. package/dist/src/integrations/swarm/swarm-budget.js.map +1 -0
  62. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +51 -0
  63. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -0
  64. package/dist/src/integrations/swarm/swarm-config-loader.js +458 -0
  65. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -0
  66. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +145 -0
  67. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -0
  68. package/dist/src/integrations/swarm/swarm-event-bridge.js +443 -0
  69. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -0
  70. package/dist/src/integrations/swarm/swarm-events.d.ts +157 -0
  71. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -0
  72. package/dist/src/integrations/swarm/swarm-events.js +81 -0
  73. package/dist/src/integrations/swarm/swarm-events.js.map +1 -0
  74. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +166 -0
  75. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -0
  76. package/dist/src/integrations/swarm/swarm-orchestrator.js +1114 -0
  77. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -0
  78. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +29 -0
  79. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -0
  80. package/dist/src/integrations/swarm/swarm-quality-gate.js +85 -0
  81. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -0
  82. package/dist/src/integrations/swarm/swarm-state-store.d.ts +31 -0
  83. package/dist/src/integrations/swarm/swarm-state-store.d.ts.map +1 -0
  84. package/dist/src/integrations/swarm/swarm-state-store.js +91 -0
  85. package/dist/src/integrations/swarm/swarm-state-store.js.map +1 -0
  86. package/dist/src/integrations/swarm/task-queue.d.ts +128 -0
  87. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -0
  88. package/dist/src/integrations/swarm/task-queue.js +379 -0
  89. package/dist/src/integrations/swarm/task-queue.js.map +1 -0
  90. package/dist/src/integrations/swarm/types.d.ts +425 -0
  91. package/dist/src/integrations/swarm/types.d.ts.map +1 -0
  92. package/dist/src/integrations/swarm/types.js +96 -0
  93. package/dist/src/integrations/swarm/types.js.map +1 -0
  94. package/dist/src/integrations/swarm/worker-pool.d.ts +96 -0
  95. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -0
  96. package/dist/src/integrations/swarm/worker-pool.js +269 -0
  97. package/dist/src/integrations/swarm/worker-pool.js.map +1 -0
  98. package/dist/src/main.js +88 -0
  99. package/dist/src/main.js.map +1 -1
  100. package/dist/src/modes/repl.d.ts +1 -0
  101. package/dist/src/modes/repl.d.ts.map +1 -1
  102. package/dist/src/modes/repl.js +2 -1
  103. package/dist/src/modes/repl.js.map +1 -1
  104. package/dist/src/modes/tui.d.ts +1 -0
  105. package/dist/src/modes/tui.d.ts.map +1 -1
  106. package/dist/src/modes/tui.js +3 -1
  107. package/dist/src/modes/tui.js.map +1 -1
  108. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  109. package/dist/src/providers/adapters/anthropic.js +20 -3
  110. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  111. package/dist/src/providers/adapters/openrouter.d.ts +14 -0
  112. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  113. package/dist/src/providers/adapters/openrouter.js +56 -2
  114. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  115. package/dist/src/providers/resilient-fetch.d.ts +2 -0
  116. package/dist/src/providers/resilient-fetch.d.ts.map +1 -1
  117. package/dist/src/providers/resilient-fetch.js +27 -3
  118. package/dist/src/providers/resilient-fetch.js.map +1 -1
  119. package/dist/src/providers/types.d.ts +15 -0
  120. package/dist/src/providers/types.d.ts.map +1 -1
  121. package/dist/src/providers/types.js.map +1 -1
  122. package/dist/src/tools/bash.d.ts +4 -4
  123. package/dist/src/tools/bash.d.ts.map +1 -1
  124. package/dist/src/tools/bash.js +2 -1
  125. package/dist/src/tools/bash.js.map +1 -1
  126. package/dist/src/tools/coercion.d.ts +14 -0
  127. package/dist/src/tools/coercion.d.ts.map +1 -0
  128. package/dist/src/tools/coercion.js +25 -0
  129. package/dist/src/tools/coercion.js.map +1 -0
  130. package/dist/src/tools/file.d.ts +5 -5
  131. package/dist/src/tools/file.d.ts.map +1 -1
  132. package/dist/src/tools/file.js +2 -1
  133. package/dist/src/tools/file.js.map +1 -1
  134. package/dist/src/tui/app.d.ts.map +1 -1
  135. package/dist/src/tui/app.js +83 -5
  136. package/dist/src/tui/app.js.map +1 -1
  137. package/dist/src/tui/components/SwarmStatusPanel.d.ts +27 -0
  138. package/dist/src/tui/components/SwarmStatusPanel.d.ts.map +1 -0
  139. package/dist/src/tui/components/SwarmStatusPanel.js +108 -0
  140. package/dist/src/tui/components/SwarmStatusPanel.js.map +1 -0
  141. package/dist/src/tui/event-display.d.ts.map +1 -1
  142. package/dist/src/tui/event-display.js +8 -1
  143. package/dist/src/tui/event-display.js.map +1 -1
  144. package/dist/src/types.d.ts +11 -1
  145. package/dist/src/types.d.ts.map +1 -1
  146. package/package.json +1 -1
package/dist/src/agent.js CHANGED
@@ -21,7 +21,7 @@
21
21
  import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
22
22
  import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
23
23
  import { createLSPFileTools, } from './agent-tools/index.js';
24
- import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, } from './integrations/index.js';
24
+ import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, } from './integrations/index.js';
25
25
  // Lesson 26: Tracing & Evaluation integration
26
26
  import { createTraceCollector } from './tracing/trace-collector.js';
27
27
  // Model registry for context window limits
@@ -34,6 +34,43 @@ import { createTaskTools, } from './tools/tasks.js';
34
34
  // =============================================================================
35
35
  // PRODUCTION AGENT
36
36
  // =============================================================================
37
+ /**
38
+ * Tools that are safe to execute in parallel (read-only, no side effects).
39
+ * These tools don't modify state, so running them concurrently is safe.
40
+ */
41
+ export const PARALLELIZABLE_TOOLS = new Set([
42
+ 'read_file', 'glob', 'grep', 'list_files', 'search_files',
43
+ 'search_code', 'get_file_info',
44
+ ]);
45
+ /**
46
+ * Groups consecutive tool calls into batches for parallel/sequential execution.
47
+ * Consecutive parallelizable tools form a single parallel batch.
48
+ * Non-parallelizable tools break the sequence, starting a new batch.
49
+ */
50
+ export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name)) {
51
+ const batches = [];
52
+ let currentBatch = [];
53
+ let currentIsParallel = false;
54
+ for (const toolCall of toolCalls) {
55
+ const isParallel = isParallelizable(toolCall);
56
+ if (batches.length === 0 && currentBatch.length === 0) {
57
+ currentBatch.push(toolCall);
58
+ currentIsParallel = isParallel;
59
+ }
60
+ else if (isParallel && currentIsParallel) {
61
+ currentBatch.push(toolCall);
62
+ }
63
+ else {
64
+ batches.push(currentBatch);
65
+ currentBatch = [toolCall];
66
+ currentIsParallel = isParallel;
67
+ }
68
+ }
69
+ if (currentBatch.length > 0) {
70
+ batches.push(currentBatch);
71
+ }
72
+ return batches;
73
+ }
37
74
  /**
38
75
  * Production-ready agent that composes all features.
39
76
  */
@@ -73,11 +110,13 @@ export class ProductionAgent {
73
110
  fileChangeTracker = null;
74
111
  capabilitiesRegistry = null;
75
112
  toolResolver = null;
113
+ agentId;
76
114
  blackboard = null;
77
115
  fileCache = null;
78
116
  budgetPool = null;
79
117
  taskManager = null;
80
118
  store = null;
119
+ swarmOrchestrator = null;
81
120
  // Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
82
121
  // Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
83
122
  spawnedTasks = new Map();
@@ -123,6 +162,8 @@ export class ProductionAgent {
123
162
  // Build complete config with defaults
124
163
  this.config = buildConfig(userConfig);
125
164
  this.provider = userConfig.provider;
165
+ // Set unique agent ID (passed from spawnAgent for subagents, auto-generated for parents)
166
+ this.agentId = userConfig.agentId || `agent-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
126
167
  // Initialize tool registry
127
168
  this.tools = new Map();
128
169
  for (const tool of this.config.tools) {
@@ -300,6 +341,26 @@ export class ProductionAgent {
300
341
  for (const tool of taskTools) {
301
342
  this.tools.set(tool.name, tool);
302
343
  }
344
+ // Swarm Mode (experimental)
345
+ if (this.config.swarm) {
346
+ const swarmConfig = this.config.swarm;
347
+ // Wrap provider with request throttle to prevent 429 rate limiting.
348
+ // All subagents share this.provider by reference (line 4398),
349
+ // so wrapping here throttles ALL downstream LLM calls.
350
+ if (swarmConfig.throttle !== false) {
351
+ const throttleConfig = swarmConfig.throttle === 'paid'
352
+ ? PAID_TIER_THROTTLE
353
+ : swarmConfig.throttle === 'free' || swarmConfig.throttle === undefined
354
+ ? FREE_TIER_THROTTLE
355
+ : swarmConfig.throttle;
356
+ this.provider = createThrottledProvider(this.provider, throttleConfig);
357
+ }
358
+ this.swarmOrchestrator = createSwarmOrchestrator(swarmConfig, this.provider, this.agentRegistry, (name, task) => this.spawnAgent(name, task), this.blackboard ?? undefined);
359
+ // Override parent budget pool with swarm's much larger pool so spawnAgent()
360
+ // allocates from the swarm budget (e.g. 10M tokens) instead of the parent's
361
+ // generic pool (200K tokens). Without this, workers get 5K emergency budget.
362
+ this.budgetPool = this.swarmOrchestrator.getBudgetPool().pool;
363
+ }
303
364
  // Cancellation Support
304
365
  if (isFeatureEnabled(this.config.cancellation)) {
305
366
  this.cancellation = createCancellationManager();
@@ -776,8 +837,14 @@ export class ProductionAgent {
776
837
  try {
777
838
  // Check for cancellation before starting
778
839
  cancellationToken?.throwIfCancellationRequested();
779
- // Check if planning is needed
780
- if (this.planning?.shouldPlan(task)) {
840
+ // Check if swarm mode should handle this task
841
+ if (this.swarmOrchestrator) {
842
+ const swarmResult = await this.runSwarm(task);
843
+ // Store swarm summary as an assistant message for the response
844
+ this.state.messages.push({ role: 'assistant', content: swarmResult.summary });
845
+ }
846
+ else if (this.planning?.shouldPlan(task)) {
847
+ // Check if planning is needed
781
848
  await this.createAndExecutePlan(task);
782
849
  }
783
850
  else {
@@ -902,6 +969,43 @@ export class ProductionAgent {
902
969
  }
903
970
  }
904
971
  }
972
+ /**
973
+ * Run a task in swarm mode using the SwarmOrchestrator.
974
+ */
975
+ async runSwarm(task) {
976
+ if (!this.swarmOrchestrator) {
977
+ throw new Error('Swarm orchestrator not initialized');
978
+ }
979
+ this.observability?.logger?.info('Starting swarm execution', { task: task.slice(0, 100) });
980
+ this.observability?.logger?.info('Starting swarm mode — decomposing task into subtasks...');
981
+ // Forward swarm events to the main agent event system
982
+ const unsubSwarm = this.swarmOrchestrator.subscribe(event => {
983
+ // Forward as a generic agent event for TUI display
984
+ this.emit(event);
985
+ });
986
+ // Bridge events to filesystem for live dashboard
987
+ const { SwarmEventBridge } = await import('./integrations/swarm/swarm-event-bridge.js');
988
+ const bridge = new SwarmEventBridge({ outputDir: '.agent/swarm-live' });
989
+ const unsubBridge = bridge.attach(this.swarmOrchestrator);
990
+ try {
991
+ const result = await this.swarmOrchestrator.execute(task);
992
+ // Populate task DAG for dashboard after execution
993
+ bridge.setTasks(result.tasks);
994
+ this.observability?.logger?.info('Swarm execution complete', {
995
+ success: result.success,
996
+ tasks: result.stats.totalTasks,
997
+ completed: result.stats.completedTasks,
998
+ tokens: result.stats.totalTokens,
999
+ cost: result.stats.totalCost,
1000
+ });
1001
+ return result;
1002
+ }
1003
+ finally {
1004
+ unsubBridge();
1005
+ bridge.close();
1006
+ unsubSwarm();
1007
+ }
1008
+ }
905
1009
  /**
906
1010
  * Execute a task directly without planning.
907
1011
  */
@@ -1247,8 +1351,9 @@ export class ProductionAgent {
1247
1351
  while (resilienceEnabled && emptyRetries < MAX_EMPTY_RETRIES) {
1248
1352
  const hasContent = response.content && response.content.length >= MIN_CONTENT_LENGTH;
1249
1353
  const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
1354
+ const hasThinking = response.thinking && response.thinking.length > 0;
1250
1355
  if (hasContent || hasToolCalls) {
1251
- // Valid response received
1356
+ // Valid visible response
1252
1357
  if (emptyRetries > 0) {
1253
1358
  this.emit({
1254
1359
  type: 'resilience.recovered',
@@ -1261,7 +1366,38 @@ export class ProductionAgent {
1261
1366
  }
1262
1367
  break;
1263
1368
  }
1264
- // Empty response - retry with nudge
1369
+ if (hasThinking && !hasContent && !hasToolCalls) {
1370
+ // Model produced reasoning but no visible output (e.g., DeepSeek-R1, GLM-4, QwQ).
1371
+ // Give ONE targeted nudge, then accept thinking as content.
1372
+ if (emptyRetries === 0) {
1373
+ emptyRetries++;
1374
+ this.emit({
1375
+ type: 'resilience.retry',
1376
+ reason: 'thinking_only_response',
1377
+ attempt: emptyRetries,
1378
+ maxAttempts: MAX_EMPTY_RETRIES,
1379
+ });
1380
+ this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1381
+ this.observability?.logger?.warn('Thinking-only response (no visible content), nudging', {
1382
+ thinkingLength: response.thinking.length,
1383
+ });
1384
+ const thinkingNudge = {
1385
+ role: 'user',
1386
+ content: '[System: You produced reasoning but no visible response. Please provide your answer based on your analysis.]',
1387
+ };
1388
+ messages.push(thinkingNudge);
1389
+ this.state.messages.push(thinkingNudge);
1390
+ response = await this.callLLM(messages);
1391
+ continue;
1392
+ }
1393
+ // Second attempt also thinking-only → accept thinking as content
1394
+ this.observability?.logger?.info('Accepting thinking as content after nudge failed', {
1395
+ thinkingLength: response.thinking.length,
1396
+ });
1397
+ response = { ...response, content: response.thinking };
1398
+ break;
1399
+ }
1400
+ // Truly empty (no content, no tools, no thinking) — existing retry logic
1265
1401
  emptyRetries++;
1266
1402
  this.emit({
1267
1403
  type: 'resilience.retry',
@@ -1378,10 +1514,11 @@ export class ProductionAgent {
1378
1514
  role: 'assistant',
1379
1515
  content: response.content,
1380
1516
  toolCalls: response.toolCalls,
1517
+ ...(response.thinking ? { metadata: { thinking: response.thinking } } : {}),
1381
1518
  };
1382
1519
  messages.push(assistantMessage);
1383
1520
  this.state.messages.push(assistantMessage);
1384
- lastResponse = response.content;
1521
+ lastResponse = response.content || (response.thinking ? response.thinking : '');
1385
1522
  // In plan mode: capture exploration findings as we go (not just at the end)
1386
1523
  // This ensures we collect context from exploration iterations before writes are queued
1387
1524
  if (this.modeManager.getMode() === 'plan' && response.content && response.content.length > 50) {
@@ -1785,9 +1922,12 @@ export class ProductionAgent {
1785
1922
  this.emit({ type: 'llm.start', model: this.config.model || 'default' });
1786
1923
  // Prompt caching (Improvement P1): Replace the system message with structured content
1787
1924
  // that includes cache_control markers, enabling 60-70% cache hit rates.
1788
- // The original Message[] is kept for token estimation; the provider gets MessageWithContent[].
1925
+ // Only use structured cache_control markers for Anthropic models other providers
1926
+ // (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
1927
+ const configModel = this.config.model || 'default';
1928
+ const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
1789
1929
  let providerMessages = messages;
1790
- if (this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
1930
+ if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
1791
1931
  providerMessages = messages.map((m, i) => {
1792
1932
  if (i === 0 && m.role === 'system') {
1793
1933
  // Replace system message with structured cacheable content
@@ -1870,7 +2010,7 @@ export class ProductionAgent {
1870
2010
  taskType: 'general',
1871
2011
  estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
1872
2012
  };
1873
- const result = await this.routing.executeWithFallback(messages, context);
2013
+ const result = await this.routing.executeWithFallback(providerMessages, context);
1874
2014
  response = result.response;
1875
2015
  actualModel = result.model;
1876
2016
  // Emit routing insight
@@ -1920,6 +2060,14 @@ export class ProductionAgent {
1920
2060
  });
1921
2061
  }
1922
2062
  const duration = Date.now() - startTime;
2063
+ // Debug cache stats when DEBUG_CACHE is set
2064
+ if (process.env.DEBUG_CACHE) {
2065
+ const cr = response.usage?.cacheReadTokens ?? 0;
2066
+ const cw = response.usage?.cacheWriteTokens ?? 0;
2067
+ const inp = response.usage?.inputTokens ?? 0;
2068
+ const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
2069
+ console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
2070
+ }
1923
2071
  // Lesson 26: Record LLM response for tracing
1924
2072
  this.traceCollector?.record({
1925
2073
  type: 'llm.response',
@@ -2003,324 +2151,352 @@ export class ProductionAgent {
2003
2151
  }
2004
2152
  /**
2005
2153
  * Execute tool calls with safety checks and execution policy enforcement.
2154
+ * Parallelizable read-only tools are batched and executed concurrently.
2006
2155
  */
2007
2156
  async executeToolCalls(toolCalls) {
2008
2157
  const results = [];
2009
- for (const toolCall of toolCalls) {
2010
- const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2011
- const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2012
- this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2013
- const startTime = Date.now();
2014
- // Lesson 26: Record tool start for tracing
2158
+ // Group consecutive parallelizable tool calls into batches
2159
+ const batches = groupToolCallsIntoBatches(toolCalls);
2160
+ // Execute batches: parallel batches use Promise.allSettled, sequential execute one-by-one
2161
+ for (const batch of batches) {
2162
+ if (batch.length > 1 && PARALLELIZABLE_TOOLS.has(batch[0].name)) {
2163
+ // Execute parallelizable batch concurrently
2164
+ const batchResults = await Promise.allSettled(batch.map(tc => this.executeSingleToolCall(tc)));
2165
+ for (const result of batchResults) {
2166
+ if (result.status === 'fulfilled') {
2167
+ results.push(result.value);
2168
+ }
2169
+ else {
2170
+ // Should not happen since executeSingleToolCall catches errors internally
2171
+ const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
2172
+ results.push({ callId: 'unknown', result: `Error: ${error}`, error });
2173
+ }
2174
+ }
2175
+ }
2176
+ else {
2177
+ // Execute sequentially
2178
+ for (const tc of batch) {
2179
+ results.push(await this.executeSingleToolCall(tc));
2180
+ }
2181
+ }
2182
+ }
2183
+ return results;
2184
+ }
2185
+ /**
2186
+ * Execute a single tool call with all safety checks, tracing, and error handling.
2187
+ */
2188
+ async executeSingleToolCall(toolCall) {
2189
+ const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2190
+ const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2191
+ this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2192
+ const startTime = Date.now();
2193
+ // Short-circuit if tool call arguments failed to parse
2194
+ if (toolCall.parseError) {
2195
+ const errorMsg = `Tool arguments could not be parsed: ${toolCall.parseError}. Please retry with complete, valid JSON.`;
2196
+ this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: errorMsg });
2015
2197
  this.traceCollector?.record({
2016
- type: 'tool.start',
2017
- data: {
2018
- executionId,
2019
- toolName: toolCall.name,
2020
- arguments: toolCall.arguments,
2021
- },
2198
+ type: 'tool.end',
2199
+ data: { executionId, status: 'error', error: new Error(errorMsg), durationMs: Date.now() - startTime },
2022
2200
  });
2023
- try {
2024
- // =====================================================================
2025
- // PLAN MODE WRITE INTERCEPTION
2026
- // =====================================================================
2027
- // In plan mode, intercept write operations and queue them as proposed changes
2028
- if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2029
- // Extract contextual reasoning instead of simple truncation
2030
- const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2031
- // Start a new plan if needed
2032
- if (!this.pendingPlanManager.hasPendingPlan()) {
2033
- const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2034
- const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2035
- this.pendingPlanManager.startPlan(task);
2036
- }
2037
- // Queue the write operation
2038
- const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2039
- // Emit event for UI
2040
- this.emit({
2041
- type: 'plan.change.queued',
2042
- tool: toolCall.name,
2043
- changeId: change?.id,
2044
- summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2045
- });
2046
- // Return a message indicating the change was queued
2047
- const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2048
- `Tool: ${toolCall.name}\n` +
2049
- `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2050
- `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2051
- results.push({
2052
- callId: toolCall.id,
2053
- result: queueMessage,
2054
- });
2055
- this.observability?.tracer?.endSpan(spanId);
2056
- continue; // Skip actual execution
2201
+ this.observability?.tracer?.endSpan(spanId);
2202
+ return { callId: toolCall.id, result: `Error: ${errorMsg}`, error: errorMsg };
2203
+ }
2204
+ // Lesson 26: Record tool start for tracing
2205
+ this.traceCollector?.record({
2206
+ type: 'tool.start',
2207
+ data: {
2208
+ executionId,
2209
+ toolName: toolCall.name,
2210
+ arguments: toolCall.arguments,
2211
+ },
2212
+ });
2213
+ try {
2214
+ // =====================================================================
2215
+ // PLAN MODE WRITE INTERCEPTION
2216
+ // =====================================================================
2217
+ // In plan mode, intercept write operations and queue them as proposed changes
2218
+ if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2219
+ // Extract contextual reasoning instead of simple truncation
2220
+ const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2221
+ // Start a new plan if needed
2222
+ if (!this.pendingPlanManager.hasPendingPlan()) {
2223
+ const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2224
+ const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2225
+ this.pendingPlanManager.startPlan(task);
2057
2226
  }
2058
- // =====================================================================
2059
- // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2060
- // =====================================================================
2061
- let policyApprovedByUser = false;
2062
- if (this.executionPolicy) {
2063
- const policyContext = {
2064
- messages: this.state.messages,
2065
- currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2066
- previousToolCalls: toolCalls.slice(0, toolCalls.indexOf(toolCall)),
2067
- };
2068
- const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2069
- // Emit policy event
2070
- this.emit({
2071
- type: 'policy.evaluated',
2072
- tool: toolCall.name,
2073
- policy: evaluation.policy,
2074
- reason: evaluation.reason,
2075
- });
2076
- // Emit decision transparency event
2077
- this.emit({
2078
- type: 'decision.tool',
2079
- tool: toolCall.name,
2080
- decision: evaluation.policy === 'forbidden' ? 'blocked'
2081
- : evaluation.policy === 'prompt' ? 'prompted'
2227
+ // Queue the write operation
2228
+ const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2229
+ // Emit event for UI
2230
+ this.emit({
2231
+ type: 'plan.change.queued',
2232
+ tool: toolCall.name,
2233
+ changeId: change?.id,
2234
+ summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2235
+ });
2236
+ // Return a message indicating the change was queued
2237
+ const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2238
+ `Tool: ${toolCall.name}\n` +
2239
+ `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2240
+ `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2241
+ this.observability?.tracer?.endSpan(spanId);
2242
+ return { callId: toolCall.id, result: queueMessage };
2243
+ }
2244
+ // =====================================================================
2245
+ // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2246
+ // =====================================================================
2247
+ let policyApprovedByUser = false;
2248
+ if (this.executionPolicy) {
2249
+ const policyContext = {
2250
+ messages: this.state.messages,
2251
+ currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2252
+ previousToolCalls: [],
2253
+ };
2254
+ const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2255
+ // Emit policy event
2256
+ this.emit({
2257
+ type: 'policy.evaluated',
2258
+ tool: toolCall.name,
2259
+ policy: evaluation.policy,
2260
+ reason: evaluation.reason,
2261
+ });
2262
+ // Emit decision transparency event
2263
+ this.emit({
2264
+ type: 'decision.tool',
2265
+ tool: toolCall.name,
2266
+ decision: evaluation.policy === 'forbidden' ? 'blocked'
2267
+ : evaluation.policy === 'prompt' ? 'prompted'
2268
+ : 'allowed',
2269
+ policyMatch: evaluation.reason,
2270
+ });
2271
+ // Enhanced tracing: Record policy decision
2272
+ this.traceCollector?.record({
2273
+ type: 'decision',
2274
+ data: {
2275
+ type: 'policy',
2276
+ decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2277
+ outcome: evaluation.policy === 'forbidden' ? 'blocked'
2278
+ : evaluation.policy === 'prompt' ? 'deferred'
2082
2279
  : 'allowed',
2083
- policyMatch: evaluation.reason,
2084
- });
2085
- // Enhanced tracing: Record policy decision
2086
- this.traceCollector?.record({
2087
- type: 'decision',
2088
- data: {
2089
- type: 'policy',
2090
- decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2091
- outcome: evaluation.policy === 'forbidden' ? 'blocked'
2092
- : evaluation.policy === 'prompt' ? 'deferred'
2093
- : 'allowed',
2094
- reasoning: evaluation.reason,
2095
- factors: [
2096
- { name: 'policy', value: evaluation.policy },
2097
- { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2098
- ],
2099
- confidence: evaluation.intent?.confidence ?? 0.8,
2100
- },
2101
- });
2102
- // Handle forbidden policy - always block
2103
- if (evaluation.policy === 'forbidden') {
2104
- throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2105
- }
2106
- // Handle prompt policy - requires approval
2107
- if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2108
- // Try to get approval through safety manager's human-in-loop
2109
- const humanInLoop = this.safety?.humanInLoop;
2110
- if (humanInLoop) {
2111
- const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2112
- if (!approval.approved) {
2113
- throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2114
- }
2115
- policyApprovedByUser = true;
2116
- // Create a grant for future similar calls if approved
2117
- this.executionPolicy.createGrant({
2118
- toolName: toolCall.name,
2119
- grantedBy: 'user',
2120
- reason: 'Approved during execution',
2121
- maxUsages: 5, // Allow 5 more similar calls
2122
- });
2123
- }
2124
- else {
2125
- // No approval handler - block by default for safety
2126
- throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2280
+ reasoning: evaluation.reason,
2281
+ factors: [
2282
+ { name: 'policy', value: evaluation.policy },
2283
+ { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2284
+ ],
2285
+ confidence: evaluation.intent?.confidence ?? 0.8,
2286
+ },
2287
+ });
2288
+ // Handle forbidden policy - always block
2289
+ if (evaluation.policy === 'forbidden') {
2290
+ throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2291
+ }
2292
+ // Handle prompt policy - requires approval
2293
+ if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2294
+ // Try to get approval through safety manager's human-in-loop
2295
+ const humanInLoop = this.safety?.humanInLoop;
2296
+ if (humanInLoop) {
2297
+ const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2298
+ if (!approval.approved) {
2299
+ throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2127
2300
  }
2128
- }
2129
- // Log intent classification if available
2130
- if (evaluation.intent) {
2131
- this.emit({
2132
- type: 'intent.classified',
2133
- tool: toolCall.name,
2134
- intent: evaluation.intent.type,
2135
- confidence: evaluation.intent.confidence,
2301
+ policyApprovedByUser = true;
2302
+ // Create a grant for future similar calls if approved
2303
+ this.executionPolicy.createGrant({
2304
+ toolName: toolCall.name,
2305
+ grantedBy: 'user',
2306
+ reason: 'Approved during execution',
2307
+ maxUsages: 5, // Allow 5 more similar calls
2136
2308
  });
2137
2309
  }
2138
- }
2139
- // =====================================================================
2140
- // SAFETY VALIDATION (Lesson 20-21)
2141
- // =====================================================================
2142
- if (this.safety) {
2143
- const safety = this.safety;
2144
- const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2145
- if (!validation.allowed) {
2146
- throw new Error(`Tool call blocked: ${validation.reason}`);
2147
- }
2148
- }
2149
- // Get tool definition (with lazy-loading support for MCP tools)
2150
- let tool = this.tools.get(toolCall.name);
2151
- const wasPreloaded = !!tool;
2152
- if (!tool && this.toolResolver) {
2153
- // Try to resolve and load the tool on-demand
2154
- const resolved = this.toolResolver(toolCall.name);
2155
- if (resolved) {
2156
- this.addTool(resolved);
2157
- tool = resolved;
2158
- if (process.env.DEBUG)
2159
- console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2160
- this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2310
+ else {
2311
+ // No approval handler - block by default for safety
2312
+ throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2161
2313
  }
2162
2314
  }
2163
- if (!tool) {
2164
- throw new Error(`Unknown tool: ${toolCall.name}`);
2315
+ // Log intent classification if available
2316
+ if (evaluation.intent) {
2317
+ this.emit({
2318
+ type: 'intent.classified',
2319
+ tool: toolCall.name,
2320
+ intent: evaluation.intent.type,
2321
+ confidence: evaluation.intent.confidence,
2322
+ });
2165
2323
  }
2166
- // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2167
- if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2168
- console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2324
+ }
2325
+ // =====================================================================
2326
+ // SAFETY VALIDATION (Lesson 20-21)
2327
+ // =====================================================================
2328
+ if (this.safety) {
2329
+ const safety = this.safety;
2330
+ const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2331
+ if (!validation.allowed) {
2332
+ throw new Error(`Tool call blocked: ${validation.reason}`);
2169
2333
  }
2170
- // =====================================================================
2171
- // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2172
- // =====================================================================
2173
- // Claim file resources before write operations to prevent conflicts
2174
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2175
- const args = toolCall.arguments;
2176
- const filePath = String(args.path || args.file_path || '');
2177
- if (filePath) {
2178
- const agentId = this.config.systemPrompt?.slice(0, 50) || 'agent';
2179
- const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2180
- ttl: 60000, // 1 minute claim
2181
- intent: `${toolCall.name}: ${filePath}`,
2182
- });
2183
- if (!claimed) {
2184
- const existingClaim = this.blackboard.getClaim(filePath);
2185
- throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2186
- `Wait for the other agent to complete or choose a different file.`);
2187
- }
2188
- }
2334
+ }
2335
+ // Get tool definition (with lazy-loading support for MCP tools)
2336
+ let tool = this.tools.get(toolCall.name);
2337
+ const wasPreloaded = !!tool;
2338
+ if (!tool && this.toolResolver) {
2339
+ // Try to resolve and load the tool on-demand
2340
+ const resolved = this.toolResolver(toolCall.name);
2341
+ if (resolved) {
2342
+ this.addTool(resolved);
2343
+ tool = resolved;
2344
+ if (process.env.DEBUG)
2345
+ console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2346
+ this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2189
2347
  }
2190
- // FILE CACHE: Check cache for read_file operations before executing
2191
- if (this.fileCache && toolCall.name === 'read_file') {
2192
- const args = toolCall.arguments;
2193
- const readPath = String(args.path || '');
2194
- if (readPath) {
2195
- const cached = this.fileCache.get(readPath);
2196
- if (cached !== undefined) {
2197
- const lines = cached.split('\n').length;
2198
- const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2199
- const duration = Date.now() - startTime;
2200
- this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2201
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2202
- this.state.metrics.toolCalls++;
2203
- this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2204
- results.push({
2205
- callId: toolCall.id,
2206
- result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2207
- });
2208
- this.observability?.tracer?.endSpan(spanId);
2209
- continue; // Skip actual file I/O
2210
- }
2348
+ }
2349
+ if (!tool) {
2350
+ throw new Error(`Unknown tool: ${toolCall.name}`);
2351
+ }
2352
+ // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2353
+ if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2354
+ console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2355
+ }
2356
+ // =====================================================================
2357
+ // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2358
+ // =====================================================================
2359
+ // Claim file resources before write operations to prevent conflicts
2360
+ if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2361
+ const args = toolCall.arguments;
2362
+ const filePath = String(args.path || args.file_path || '');
2363
+ if (filePath) {
2364
+ const agentId = this.agentId;
2365
+ const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2366
+ ttl: 60000, // 1 minute claim
2367
+ intent: `${toolCall.name}: ${filePath}`,
2368
+ });
2369
+ if (!claimed) {
2370
+ const existingClaim = this.blackboard.getClaim(filePath);
2371
+ throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2372
+ `Wait for the other agent to complete or choose a different file.`);
2211
2373
  }
2212
2374
  }
2213
- // Execute tool (with sandbox if available)
2214
- let result;
2215
- if (this.safety?.sandbox) {
2216
- // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2217
- // The default 60s sandbox timeout would kill subagents prematurely
2218
- // Subagents may run for minutes (per their own timeout config)
2219
- const isSpawnAgent = toolCall.name === 'spawn_agent';
2220
- const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2221
- const isSubagentTool = isSpawnAgent || isSpawnParallel;
2222
- const subagentConfig = this.config.subagent;
2223
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2224
- const subagentTimeout = hasSubagentConfig
2225
- ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2226
- : 600000;
2227
- // Use subagent timeout + buffer for spawn tools, default for others
2228
- // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2229
- // but the total wall-clock time should still allow the slowest agent to complete)
2230
- const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2231
- result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2232
- }
2233
- else {
2234
- result = await tool.execute(toolCall.arguments);
2235
- }
2236
- const duration = Date.now() - startTime;
2237
- // Lesson 26: Record tool completion for tracing
2238
- this.traceCollector?.record({
2239
- type: 'tool.end',
2240
- data: {
2241
- executionId,
2242
- status: 'success',
2243
- result,
2244
- durationMs: duration,
2245
- },
2246
- });
2247
- // Record metrics
2248
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2249
- this.state.metrics.toolCalls++;
2250
- this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2251
- // FILE CACHE: Store read results and invalidate on writes
2252
- if (this.fileCache) {
2253
- const args = toolCall.arguments;
2254
- const filePath = String(args.path || args.file_path || '');
2255
- if (toolCall.name === 'read_file' && filePath) {
2256
- // Cache successful read results
2257
- const resultObj = result;
2258
- if (resultObj?.success && typeof resultObj.output === 'string') {
2259
- this.fileCache.set(filePath, resultObj.output);
2260
- }
2261
- }
2262
- else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2263
- // Invalidate cache when files are modified (including undo operations)
2264
- this.fileCache.invalidate(filePath);
2375
+ }
2376
+ // FILE CACHE: Check cache for read_file operations before executing
2377
+ if (this.fileCache && toolCall.name === 'read_file') {
2378
+ const args = toolCall.arguments;
2379
+ const readPath = String(args.path || '');
2380
+ if (readPath) {
2381
+ const cached = this.fileCache.get(readPath);
2382
+ if (cached !== undefined) {
2383
+ const lines = cached.split('\n').length;
2384
+ const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2385
+ const duration = Date.now() - startTime;
2386
+ this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2387
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2388
+ this.state.metrics.toolCalls++;
2389
+ this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2390
+ this.observability?.tracer?.endSpan(spanId);
2391
+ return {
2392
+ callId: toolCall.id,
2393
+ result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2394
+ };
2265
2395
  }
2266
2396
  }
2267
- // Emit tool insight with result summary
2268
- const summary = this.summarizeToolResult(toolCall.name, result);
2269
- this.emit({
2270
- type: 'insight.tool',
2271
- tool: toolCall.name,
2272
- summary,
2273
- durationMs: duration,
2274
- success: true,
2275
- });
2276
- results.push({
2277
- callId: toolCall.id,
2397
+ }
2398
+ // Execute tool (with sandbox if available)
2399
+ let result;
2400
+ if (this.safety?.sandbox) {
2401
+ // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2402
+ // The default 60s sandbox timeout would kill subagents prematurely
2403
+ // Subagents may run for minutes (per their own timeout config)
2404
+ const isSpawnAgent = toolCall.name === 'spawn_agent';
2405
+ const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2406
+ const isSubagentTool = isSpawnAgent || isSpawnParallel;
2407
+ const subagentConfig = this.config.subagent;
2408
+ const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2409
+ const subagentTimeout = hasSubagentConfig
2410
+ ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2411
+ : 600000;
2412
+ // Use subagent timeout + buffer for spawn tools, default for others
2413
+ // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2414
+ // but the total wall-clock time should still allow the slowest agent to complete)
2415
+ const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2416
+ result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2417
+ }
2418
+ else {
2419
+ result = await tool.execute(toolCall.arguments);
2420
+ }
2421
+ const duration = Date.now() - startTime;
2422
+ // Lesson 26: Record tool completion for tracing
2423
+ this.traceCollector?.record({
2424
+ type: 'tool.end',
2425
+ data: {
2426
+ executionId,
2427
+ status: 'success',
2278
2428
  result,
2279
- });
2280
- // Release blackboard claim after successful file write
2281
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2282
- const args = toolCall.arguments;
2283
- const filePath = String(args.path || args.file_path || '');
2284
- if (filePath) {
2285
- const agentId = this.config.systemPrompt?.slice(0, 50) || 'agent';
2286
- this.blackboard.release(filePath, agentId);
2429
+ durationMs: duration,
2430
+ },
2431
+ });
2432
+ // Record metrics
2433
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2434
+ this.state.metrics.toolCalls++;
2435
+ this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2436
+ // FILE CACHE: Store read results and invalidate on writes
2437
+ if (this.fileCache) {
2438
+ const args = toolCall.arguments;
2439
+ const filePath = String(args.path || args.file_path || '');
2440
+ if (toolCall.name === 'read_file' && filePath) {
2441
+ // Cache successful read results
2442
+ const resultObj = result;
2443
+ if (resultObj?.success && typeof resultObj.output === 'string') {
2444
+ this.fileCache.set(filePath, resultObj.output);
2287
2445
  }
2288
2446
  }
2289
- this.observability?.tracer?.endSpan(spanId);
2447
+ else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2448
+ // Invalidate cache when files are modified (including undo operations)
2449
+ this.fileCache.invalidate(filePath);
2450
+ }
2290
2451
  }
2291
- catch (err) {
2292
- const error = err instanceof Error ? err : new Error(String(err));
2293
- const duration = Date.now() - startTime;
2294
- // Lesson 26: Record tool error for tracing
2295
- this.traceCollector?.record({
2296
- type: 'tool.end',
2297
- data: {
2298
- executionId,
2299
- status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
2300
- error,
2301
- durationMs: duration,
2302
- },
2303
- });
2304
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
2305
- this.observability?.tracer?.recordError(error);
2306
- this.observability?.tracer?.endSpan(spanId);
2307
- // FAILURE EVIDENCE RECORDING (Trick S)
2308
- // Track failed tool calls to prevent loops and provide context
2309
- this.contextEngineering?.recordFailure({
2310
- action: toolCall.name,
2311
- args: toolCall.arguments,
2312
- error,
2313
- intent: `Execute tool ${toolCall.name}`,
2314
- });
2315
- results.push({
2316
- callId: toolCall.id,
2317
- result: `Error: ${error.message}`,
2318
- error: error.message,
2319
- });
2320
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
2452
+ // Emit tool insight with result summary
2453
+ const summary = this.summarizeToolResult(toolCall.name, result);
2454
+ this.emit({
2455
+ type: 'insight.tool',
2456
+ tool: toolCall.name,
2457
+ summary,
2458
+ durationMs: duration,
2459
+ success: true,
2460
+ });
2461
+ // Release blackboard claim after successful file write
2462
+ if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2463
+ const args = toolCall.arguments;
2464
+ const filePath = String(args.path || args.file_path || '');
2465
+ if (filePath) {
2466
+ const agentId = this.agentId;
2467
+ this.blackboard.release(filePath, agentId);
2468
+ }
2321
2469
  }
2470
+ this.observability?.tracer?.endSpan(spanId);
2471
+ return { callId: toolCall.id, result };
2472
+ }
2473
+ catch (err) {
2474
+ const error = err instanceof Error ? err : new Error(String(err));
2475
+ const duration = Date.now() - startTime;
2476
+ // Lesson 26: Record tool error for tracing
2477
+ this.traceCollector?.record({
2478
+ type: 'tool.end',
2479
+ data: {
2480
+ executionId,
2481
+ status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
2482
+ error,
2483
+ durationMs: duration,
2484
+ },
2485
+ });
2486
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
2487
+ this.observability?.tracer?.recordError(error);
2488
+ this.observability?.tracer?.endSpan(spanId);
2489
+ // FAILURE EVIDENCE RECORDING (Trick S)
2490
+ // Track failed tool calls to prevent loops and provide context
2491
+ this.contextEngineering?.recordFailure({
2492
+ action: toolCall.name,
2493
+ args: toolCall.arguments,
2494
+ error,
2495
+ intent: `Execute tool ${toolCall.name}`,
2496
+ });
2497
+ this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
2498
+ return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
2322
2499
  }
2323
- return results;
2324
2500
  }
2325
2501
  /**
2326
2502
  * Get recently modified file paths from the file change tracker.
@@ -3425,7 +3601,9 @@ export class ProductionAgent {
3425
3601
  };
3426
3602
  }
3427
3603
  // DUPLICATE SPAWN PREVENTION with SEMANTIC SIMILARITY
3428
- // First try exact string match, then check semantic similarity for similar tasks
3604
+ // Skip for swarm workers the orchestrator handles retry logic and deduplication
3605
+ // at the task level. Without this bypass, retried swarm tasks return stale results.
3606
+ const isSwarmWorker = agentName.startsWith('swarm-');
3429
3607
  const SEMANTIC_SIMILARITY_THRESHOLD = 0.75; // 75% similarity = duplicate
3430
3608
  const taskKey = `${agentName}:${task.slice(0, 150).toLowerCase().replace(/\s+/g, ' ').trim()}`;
3431
3609
  const now = Date.now();
@@ -3435,30 +3613,33 @@ export class ProductionAgent {
3435
3613
  this.spawnedTasks.delete(key);
3436
3614
  }
3437
3615
  }
3438
- // Check for exact match first
3439
- let existingMatch = this.spawnedTasks.get(taskKey);
3616
+ let existingMatch;
3440
3617
  let matchType = 'exact';
3441
- // If no exact match, check for semantic similarity among same agent's tasks
3442
- if (!existingMatch) {
3443
- for (const [key, entry] of this.spawnedTasks.entries()) {
3444
- // Only compare tasks from the same agent type
3445
- if (!key.startsWith(`${agentName}:`))
3446
- continue;
3447
- if (now - entry.timestamp >= ProductionAgent.SPAWN_DEDUP_WINDOW_MS)
3448
- continue;
3449
- // Extract the task portion from the key
3450
- const existingTask = key.slice(agentName.length + 1);
3451
- const similarity = calculateTaskSimilarity(task, existingTask);
3452
- if (similarity >= SEMANTIC_SIMILARITY_THRESHOLD) {
3453
- existingMatch = entry;
3454
- matchType = 'semantic';
3455
- this.observability?.logger?.debug('Semantic duplicate detected', {
3456
- agent: agentName,
3457
- newTask: task.slice(0, 80),
3458
- existingTask: existingTask.slice(0, 80),
3459
- similarity: (similarity * 100).toFixed(1) + '%',
3460
- });
3461
- break;
3618
+ if (!isSwarmWorker) {
3619
+ // Check for exact match first
3620
+ existingMatch = this.spawnedTasks.get(taskKey);
3621
+ // If no exact match, check for semantic similarity among same agent's tasks
3622
+ if (!existingMatch) {
3623
+ for (const [key, entry] of this.spawnedTasks.entries()) {
3624
+ // Only compare tasks from the same agent type
3625
+ if (!key.startsWith(`${agentName}:`))
3626
+ continue;
3627
+ if (now - entry.timestamp >= ProductionAgent.SPAWN_DEDUP_WINDOW_MS)
3628
+ continue;
3629
+ // Extract the task portion from the key
3630
+ const existingTask = key.slice(agentName.length + 1);
3631
+ const similarity = calculateTaskSimilarity(task, existingTask);
3632
+ if (similarity >= SEMANTIC_SIMILARITY_THRESHOLD) {
3633
+ existingMatch = entry;
3634
+ matchType = 'semantic';
3635
+ this.observability?.logger?.debug('Semantic duplicate detected', {
3636
+ agent: agentName,
3637
+ newTask: task.slice(0, 80),
3638
+ existingTask: existingTask.slice(0, 80),
3639
+ similarity: (similarity * 100).toFixed(1) + '%',
3640
+ });
3641
+ break;
3642
+ }
3462
3643
  }
3463
3644
  }
3464
3645
  }
@@ -3650,7 +3831,12 @@ export class ProductionAgent {
3650
3831
  observability: this.config.observability,
3651
3832
  sandbox: this.config.sandbox,
3652
3833
  humanInLoop: this.config.humanInLoop,
3653
- executionPolicy: this.config.executionPolicy,
3834
+ // Subagents get 'allow' as default policy since they're already
3835
+ // constrained to their registered tool set. The parent's 'prompt'
3836
+ // policy can't work without humanInLoop.
3837
+ executionPolicy: this.config.executionPolicy
3838
+ ? { ...this.config.executionPolicy, defaultPolicy: 'allow' }
3839
+ : this.config.executionPolicy,
3654
3840
  threads: false,
3655
3841
  // Disable hooks console output in subagents - parent handles event display
3656
3842
  hooks: this.config.hooks === false ? false : {
@@ -3658,6 +3844,8 @@ export class ProductionAgent {
3658
3844
  builtIn: { logging: false, timing: false, metrics: false },
3659
3845
  custom: [],
3660
3846
  },
3847
+ // Pass unique agentId for blackboard coordination and tracing
3848
+ agentId,
3661
3849
  // Share parent's blackboard for coordination between parallel subagents
3662
3850
  blackboard: this.blackboard || undefined,
3663
3851
  // Share parent's file cache to eliminate redundant reads across agents
@@ -4122,9 +4310,30 @@ export class ProductionAgent {
4122
4310
  count: tasks.length,
4123
4311
  agents: tasks.map(t => t.agent),
4124
4312
  });
4125
- // Execute all tasks in parallel using allSettled to handle partial failures
4126
- const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4127
- const settled = await Promise.allSettled(promises);
4313
+ // Pre-divide budget pool equally to prevent first-come starvation.
4314
+ // Temporarily lower maxPerChild so each spawnAgent's normal reserve() call
4315
+ // gets an equal share instead of racing for the full maxPerChild allocation.
4316
+ let settled;
4317
+ if (this.budgetPool && tasks.length > 1) {
4318
+ const poolStats = this.budgetPool.getStats();
4319
+ // equalShare is always ≤ remaining ≤ totalTokens ≤ originalMaxPerChild
4320
+ // (guaranteed by createBudgetPool capping maxPerChild to poolTokens)
4321
+ // so we don't need Math.min(equalShare, originalMaxPerChild) here.
4322
+ const equalShare = Math.floor(poolStats.tokensRemaining / tasks.length);
4323
+ this.budgetPool.setMaxPerChild(equalShare);
4324
+ try {
4325
+ const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4326
+ settled = await Promise.allSettled(promises);
4327
+ }
4328
+ finally {
4329
+ this.budgetPool.resetMaxPerChild();
4330
+ }
4331
+ }
4332
+ else {
4333
+ // Single task or no pool - use standard sequential allocation
4334
+ const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4335
+ settled = await Promise.allSettled(promises);
4336
+ }
4128
4337
  // Convert settled results to SpawnResult array
4129
4338
  const results = settled.map((result, i) => {
4130
4339
  if (result.status === 'fulfilled') {