attocode 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +35 -1
  2. package/dist/src/adapters.d.ts.map +1 -1
  3. package/dist/src/adapters.js +6 -0
  4. package/dist/src/adapters.js.map +1 -1
  5. package/dist/src/agent.d.ts +66 -1
  6. package/dist/src/agent.d.ts.map +1 -1
  7. package/dist/src/agent.js +849 -108
  8. package/dist/src/agent.js.map +1 -1
  9. package/dist/src/commands/handler.d.ts.map +1 -1
  10. package/dist/src/commands/handler.js +6 -0
  11. package/dist/src/commands/handler.js.map +1 -1
  12. package/dist/src/defaults.d.ts +3 -1
  13. package/dist/src/defaults.d.ts.map +1 -1
  14. package/dist/src/defaults.js +9 -1
  15. package/dist/src/defaults.js.map +1 -1
  16. package/dist/src/integrations/agent-registry.d.ts +14 -0
  17. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  18. package/dist/src/integrations/agent-registry.js.map +1 -1
  19. package/dist/src/integrations/budget-pool.d.ts +96 -0
  20. package/dist/src/integrations/budget-pool.d.ts.map +1 -0
  21. package/dist/src/integrations/budget-pool.js +145 -0
  22. package/dist/src/integrations/budget-pool.js.map +1 -0
  23. package/dist/src/integrations/cancellation.d.ts +62 -0
  24. package/dist/src/integrations/cancellation.d.ts.map +1 -1
  25. package/dist/src/integrations/cancellation.js +174 -0
  26. package/dist/src/integrations/cancellation.js.map +1 -1
  27. package/dist/src/integrations/context-engineering.d.ts +16 -1
  28. package/dist/src/integrations/context-engineering.d.ts.map +1 -1
  29. package/dist/src/integrations/context-engineering.js +17 -0
  30. package/dist/src/integrations/context-engineering.js.map +1 -1
  31. package/dist/src/integrations/dead-letter-queue.js +1 -1
  32. package/dist/src/integrations/dead-letter-queue.js.map +1 -1
  33. package/dist/src/integrations/economics.d.ts +41 -0
  34. package/dist/src/integrations/economics.d.ts.map +1 -1
  35. package/dist/src/integrations/economics.js +114 -8
  36. package/dist/src/integrations/economics.js.map +1 -1
  37. package/dist/src/integrations/file-cache.d.ts +90 -0
  38. package/dist/src/integrations/file-cache.d.ts.map +1 -0
  39. package/dist/src/integrations/file-cache.js +164 -0
  40. package/dist/src/integrations/file-cache.js.map +1 -0
  41. package/dist/src/integrations/history.d.ts +72 -0
  42. package/dist/src/integrations/history.d.ts.map +1 -0
  43. package/dist/src/integrations/history.js +165 -0
  44. package/dist/src/integrations/history.js.map +1 -0
  45. package/dist/src/integrations/index.d.ts +8 -5
  46. package/dist/src/integrations/index.d.ts.map +1 -1
  47. package/dist/src/integrations/index.js +8 -2
  48. package/dist/src/integrations/index.js.map +1 -1
  49. package/dist/src/integrations/resources.d.ts +5 -0
  50. package/dist/src/integrations/resources.d.ts.map +1 -1
  51. package/dist/src/integrations/resources.js +7 -0
  52. package/dist/src/integrations/resources.js.map +1 -1
  53. package/dist/src/integrations/safety.d.ts +28 -1
  54. package/dist/src/integrations/safety.d.ts.map +1 -1
  55. package/dist/src/integrations/safety.js +69 -5
  56. package/dist/src/integrations/safety.js.map +1 -1
  57. package/dist/src/modes/tui.d.ts.map +1 -1
  58. package/dist/src/modes/tui.js +6 -0
  59. package/dist/src/modes/tui.js.map +1 -1
  60. package/dist/src/providers/adapters/anthropic.d.ts +1 -1
  61. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  62. package/dist/src/providers/adapters/anthropic.js +15 -2
  63. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  64. package/dist/src/providers/adapters/mock.d.ts +2 -2
  65. package/dist/src/providers/adapters/mock.d.ts.map +1 -1
  66. package/dist/src/providers/adapters/mock.js +2 -1
  67. package/dist/src/providers/adapters/mock.js.map +1 -1
  68. package/dist/src/providers/adapters/openai.d.ts +1 -1
  69. package/dist/src/providers/adapters/openai.d.ts.map +1 -1
  70. package/dist/src/providers/adapters/openai.js +2 -2
  71. package/dist/src/providers/adapters/openai.js.map +1 -1
  72. package/dist/src/providers/adapters/openrouter.d.ts +1 -1
  73. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  74. package/dist/src/providers/adapters/openrouter.js +9 -6
  75. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  76. package/dist/src/providers/types.d.ts +1 -1
  77. package/dist/src/providers/types.d.ts.map +1 -1
  78. package/dist/src/tools/agent.d.ts.map +1 -1
  79. package/dist/src/tools/agent.js +13 -1
  80. package/dist/src/tools/agent.js.map +1 -1
  81. package/dist/src/tricks/kv-cache-context.d.ts +24 -0
  82. package/dist/src/tricks/kv-cache-context.d.ts.map +1 -1
  83. package/dist/src/tricks/kv-cache-context.js +68 -0
  84. package/dist/src/tricks/kv-cache-context.js.map +1 -1
  85. package/dist/src/tui/app.d.ts.map +1 -1
  86. package/dist/src/tui/app.js +213 -103
  87. package/dist/src/tui/app.js.map +1 -1
  88. package/dist/src/tui/components/DebugPanel.d.ts +41 -0
  89. package/dist/src/tui/components/DebugPanel.d.ts.map +1 -0
  90. package/dist/src/tui/components/DebugPanel.js +104 -0
  91. package/dist/src/tui/components/DebugPanel.js.map +1 -0
  92. package/dist/src/tui/components/ErrorDetailPanel.d.ts +49 -0
  93. package/dist/src/tui/components/ErrorDetailPanel.d.ts.map +1 -0
  94. package/dist/src/tui/components/ErrorDetailPanel.js +109 -0
  95. package/dist/src/tui/components/ErrorDetailPanel.js.map +1 -0
  96. package/dist/src/tui/components/ToolCallItem.d.ts +3 -4
  97. package/dist/src/tui/components/ToolCallItem.d.ts.map +1 -1
  98. package/dist/src/tui/components/ToolCallItem.js +51 -15
  99. package/dist/src/tui/components/ToolCallItem.js.map +1 -1
  100. package/dist/src/tui/components/index.d.ts +2 -0
  101. package/dist/src/tui/components/index.d.ts.map +1 -1
  102. package/dist/src/tui/components/index.js +4 -0
  103. package/dist/src/tui/components/index.js.map +1 -1
  104. package/dist/src/types.d.ts +109 -1
  105. package/dist/src/types.d.ts.map +1 -1
  106. package/package.json +1 -1
package/dist/src/agent.js CHANGED
@@ -21,7 +21,7 @@
21
21
  import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
22
22
  import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
23
23
  import { createLSPFileTools, } from './agent-tools/index.js';
24
- import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createTimeoutToken, createLinkedToken, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, } from './integrations/index.js';
24
+ import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, } from './integrations/index.js';
25
25
  // Lesson 26: Tracing & Evaluation integration
26
26
  import { createTraceCollector } from './tracing/trace-collector.js';
27
27
  // Model registry for context window limits
@@ -74,7 +74,10 @@ export class ProductionAgent {
74
74
  capabilitiesRegistry = null;
75
75
  toolResolver = null;
76
76
  blackboard = null;
77
+ fileCache = null;
78
+ budgetPool = null;
77
79
  taskManager = null;
80
+ store = null;
78
81
  // Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
79
82
  // Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
80
83
  spawnedTasks = new Map();
@@ -84,9 +87,17 @@ export class ProductionAgent {
84
87
  // External cancellation token (for subagent timeout propagation)
85
88
  // When set, the agent will check this token in addition to its own cancellation manager
86
89
  externalCancellationToken = null;
90
+ // Graceful wrapup support (for subagent timeout wrapup phase)
91
+ wrapupRequested = false;
92
+ wrapupReason = null;
93
+ // Cacheable system prompt blocks for prompt caching (Improvement P1)
94
+ // When set, callLLM() will inject these as structured content with cache_control markers
95
+ cacheableSystemBlocks = null;
87
96
  // Initialization tracking
88
97
  initPromises = [];
89
98
  initComplete = false;
99
+ // Event listener cleanup tracking (prevents memory leaks in long sessions)
100
+ unsubscribers = [];
90
101
  // State
91
102
  state = {
92
103
  status: 'idle',
@@ -101,6 +112,10 @@ export class ProductionAgent {
101
112
  llmCalls: 0,
102
113
  toolCalls: 0,
103
114
  duration: 0,
115
+ successCount: 0,
116
+ failureCount: 0,
117
+ cancelCount: 0,
118
+ retryCount: 0,
104
119
  },
105
120
  iteration: 0,
106
121
  };
@@ -131,6 +146,26 @@ export class ProductionAgent {
131
146
  deduplicateFindings: true,
132
147
  });
133
148
  }
149
+ // Shared File Cache - eliminates redundant file reads across parent and subagents
150
+ // Subagents inherit parent's cache; parent agents create their own
151
+ if (userConfig.fileCache) {
152
+ this.fileCache = userConfig.fileCache;
153
+ }
154
+ else if (this.config.subagent !== false) {
155
+ this.fileCache = createSharedFileCache({
156
+ maxCacheBytes: 5 * 1024 * 1024, // 5MB
157
+ ttlMs: 5 * 60 * 1000, // 5 minutes
158
+ });
159
+ }
160
+ // Shared Budget Pool - pools token budget across parent and subagents
161
+ // Only parent agents create the pool; subagents don't need their own
162
+ // The pool is used in spawnAgent() to allocate budgets from the parent's total
163
+ if (this.config.subagent !== false) {
164
+ // Use actual configured budget (custom or default), not always STANDARD_BUDGET
165
+ const baseBudget = this.config.budget ?? STANDARD_BUDGET;
166
+ const parentBudgetTokens = baseBudget.maxTokens ?? STANDARD_BUDGET.maxTokens ?? 200000;
167
+ this.budgetPool = createBudgetPool(parentBudgetTokens, 0.25, 100000);
168
+ }
134
169
  // Initialize enabled features
135
170
  this.initializeFeatures();
136
171
  }
@@ -227,11 +262,13 @@ export class ProductionAgent {
227
262
  }));
228
263
  }
229
264
  // Economics System (Token Budget) - always enabled
265
+ // Use custom budget if provided (subagents use SUBAGENT_BUDGET), otherwise STANDARD_BUDGET
266
+ const baseBudget = this.config.budget ?? STANDARD_BUDGET;
230
267
  this.economics = new ExecutionEconomicsManager({
231
- ...STANDARD_BUDGET,
268
+ ...baseBudget,
232
269
  // Use maxIterations from config as absolute safety cap
233
270
  maxIterations: this.config.maxIterations,
234
- targetIterations: Math.min(20, this.config.maxIterations),
271
+ targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
235
272
  });
236
273
  // Agent Registry - always enabled for subagent support
237
274
  this.agentRegistry = new AgentRegistry();
@@ -247,13 +284,17 @@ export class ProductionAgent {
247
284
  this.tools.set(boundParallelSpawnTool.name, boundParallelSpawnTool);
248
285
  // Task Manager - Claude Code-style task system for coordination
249
286
  this.taskManager = createTaskManager();
250
- // Forward task events
251
- this.taskManager.on('task.created', (data) => {
287
+ // Forward task events (with cleanup tracking for EventEmitter-based managers)
288
+ const taskCreatedHandler = (data) => {
252
289
  this.emit({ type: 'task.created', task: data.task });
253
- });
254
- this.taskManager.on('task.updated', (data) => {
290
+ };
291
+ this.taskManager.on('task.created', taskCreatedHandler);
292
+ this.unsubscribers.push(() => this.taskManager?.off('task.created', taskCreatedHandler));
293
+ const taskUpdatedHandler = (data) => {
255
294
  this.emit({ type: 'task.updated', task: data.task });
256
- });
295
+ };
296
+ this.taskManager.on('task.updated', taskUpdatedHandler);
297
+ this.unsubscribers.push(() => this.taskManager?.off('task.updated', taskUpdatedHandler));
257
298
  // Register task tools
258
299
  const taskTools = createTaskTools(this.taskManager);
259
300
  for (const tool of taskTools) {
@@ -262,12 +303,13 @@ export class ProductionAgent {
262
303
  // Cancellation Support
263
304
  if (isFeatureEnabled(this.config.cancellation)) {
264
305
  this.cancellation = createCancellationManager();
265
- // Forward cancellation events
266
- this.cancellation.subscribe(event => {
306
+ // Forward cancellation events (with cleanup tracking)
307
+ const unsubCancellation = this.cancellation.subscribe(event => {
267
308
  if (event.type === 'cancellation.requested') {
268
309
  this.emit({ type: 'cancellation.requested', reason: event.reason });
269
310
  }
270
311
  });
312
+ this.unsubscribers.push(unsubCancellation);
271
313
  }
272
314
  // Resource Monitoring
273
315
  if (isFeatureEnabled(this.config.resources)) {
@@ -298,8 +340,8 @@ export class ProductionAgent {
298
340
  maxSize: this.config.semanticCache.maxSize,
299
341
  ttl: this.config.semanticCache.ttl,
300
342
  });
301
- // Forward cache events
302
- this.semanticCache.subscribe(event => {
343
+ // Forward cache events (with cleanup tracking)
344
+ const unsubSemanticCache = this.semanticCache.subscribe(event => {
303
345
  if (event.type === 'cache.hit') {
304
346
  this.emit({ type: 'cache.hit', query: event.query, similarity: event.similarity });
305
347
  }
@@ -310,6 +352,7 @@ export class ProductionAgent {
310
352
  this.emit({ type: 'cache.set', query: event.query });
311
353
  }
312
354
  });
355
+ this.unsubscribers.push(unsubSemanticCache);
313
356
  }
314
357
  // Skills Support
315
358
  if (isFeatureEnabled(this.config.skills)) {
@@ -361,8 +404,8 @@ export class ProductionAgent {
361
404
  this.codebaseContext.setLSPManager(this.lspManager);
362
405
  }
363
406
  }
364
- // Forward context engineering events
365
- this.contextEngineering.on(event => {
407
+ // Forward context engineering events (with cleanup tracking)
408
+ const unsubContextEngineering = this.contextEngineering.on(event => {
366
409
  switch (event.type) {
367
410
  case 'failure.recorded':
368
411
  this.observability?.logger?.warn('Failure recorded', {
@@ -384,6 +427,7 @@ export class ProductionAgent {
384
427
  break;
385
428
  }
386
429
  });
430
+ this.unsubscribers.push(unsubContextEngineering);
387
431
  // Interactive Planning (conversational + editable planning)
388
432
  if (isFeatureEnabled(this.config.interactivePlanning)) {
389
433
  const interactiveConfig = typeof this.config.interactivePlanning === 'object'
@@ -395,8 +439,8 @@ export class ProductionAgent {
395
439
  maxCheckpoints: 20,
396
440
  autoPauseAtDecisions: true,
397
441
  });
398
- // Forward planner events to observability
399
- this.interactivePlanner.on(event => {
442
+ // Forward planner events to observability (with cleanup tracking)
443
+ const unsubInteractivePlanner = this.interactivePlanner.on(event => {
400
444
  switch (event.type) {
401
445
  case 'plan.created':
402
446
  this.observability?.logger?.info('Interactive plan created', {
@@ -420,6 +464,7 @@ export class ProductionAgent {
420
464
  break;
421
465
  }
422
466
  });
467
+ this.unsubscribers.push(unsubInteractivePlanner);
423
468
  }
424
469
  // Recursive Context (RLM - Recursive Language Models)
425
470
  // Enables on-demand context exploration for large codebases
@@ -436,8 +481,8 @@ export class ProductionAgent {
436
481
  });
437
482
  // Note: File system source should be registered when needed with proper glob/readFile functions
438
483
  // This is deferred to allow flexible configuration
439
- // Forward RLM events
440
- this.recursiveContext.on(event => {
484
+ // Forward RLM events (with cleanup tracking)
485
+ const unsubRecursiveContext = this.recursiveContext.on(event => {
441
486
  switch (event.type) {
442
487
  case 'process.started':
443
488
  this.observability?.logger?.debug('RLM process started', {
@@ -464,6 +509,7 @@ export class ProductionAgent {
464
509
  break;
465
510
  }
466
511
  });
512
+ this.unsubscribers.push(unsubRecursiveContext);
467
513
  }
468
514
  // Learning Store (cross-session learning from failures)
469
515
  // Connects to the failure tracker in contextEngineering for automatic learning extraction
@@ -484,8 +530,8 @@ export class ProductionAgent {
484
530
  this.learningStore.connectFailureTracker(failureTracker);
485
531
  }
486
532
  }
487
- // Forward learning events to observability
488
- this.learningStore.on(event => {
533
+ // Forward learning events to observability (with cleanup tracking)
534
+ const unsubLearningStore = this.learningStore.on(event => {
489
535
  switch (event.type) {
490
536
  case 'learning.proposed':
491
537
  this.observability?.logger?.info('Learning proposed', {
@@ -523,6 +569,7 @@ export class ProductionAgent {
523
569
  break;
524
570
  }
525
571
  });
572
+ this.unsubscribers.push(unsubLearningStore);
526
573
  }
527
574
  // Auto-Compaction Manager (sophisticated context compaction)
528
575
  // Uses the Compactor for LLM-based summarization with threshold monitoring
@@ -585,8 +632,8 @@ export class ProductionAgent {
585
632
  maxContextTokens, // Dynamic from model registry or config
586
633
  compactHandler, // Use reversible compaction when contextEngineering is available
587
634
  });
588
- // Forward compactor events to observability
589
- this.compactor.on(event => {
635
+ // Forward compactor events to observability (with cleanup tracking)
636
+ const unsubCompactor = this.compactor.on(event => {
590
637
  switch (event.type) {
591
638
  case 'compaction.start':
592
639
  this.observability?.logger?.info('Compaction started', {
@@ -607,8 +654,9 @@ export class ProductionAgent {
607
654
  break;
608
655
  }
609
656
  });
610
- // Forward auto-compaction events
611
- this.autoCompactionManager.on((event) => {
657
+ this.unsubscribers.push(unsubCompactor);
658
+ // Forward auto-compaction events (with cleanup tracking)
659
+ const unsubAutoCompaction = this.autoCompactionManager.on((event) => {
612
660
  switch (event.type) {
613
661
  case 'autocompaction.warning':
614
662
  this.observability?.logger?.warn('Context approaching limit', {
@@ -655,6 +703,7 @@ export class ProductionAgent {
655
703
  break;
656
704
  }
657
705
  });
706
+ this.unsubscribers.push(unsubAutoCompaction);
658
707
  }
659
708
  // Note: FileChangeTracker requires a database instance which is not
660
709
  // available at this point. Use initFileChangeTracker() to enable it
@@ -743,6 +792,7 @@ export class ProductionAgent {
743
792
  // Finalize
744
793
  const duration = Date.now() - startTime;
745
794
  this.state.metrics.duration = duration;
795
+ this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
746
796
  await this.observability?.tracer?.endTrace();
747
797
  const result = {
748
798
  success: true,
@@ -773,6 +823,7 @@ export class ProductionAgent {
773
823
  const cleanupDuration = Date.now() - cleanupStart;
774
824
  this.emit({ type: 'cancellation.completed', cleanupDuration });
775
825
  this.observability?.logger?.info('Agent cancelled', { reason: error.message, cleanupDuration });
826
+ this.state.metrics.cancelCount = (this.state.metrics.cancelCount ?? 0) + 1;
776
827
  // Lesson 26: End trace capture on cancellation
777
828
  if (this.traceCollector?.isTaskActive()) {
778
829
  await this.traceCollector.endTask({ success: false, failureReason: `Cancelled: ${error.message}` });
@@ -791,6 +842,7 @@ export class ProductionAgent {
791
842
  }
792
843
  this.observability?.tracer?.recordError(error);
793
844
  await this.observability?.tracer?.endTrace();
845
+ this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
794
846
  this.emit({ type: 'error', error: error.message });
795
847
  this.observability?.logger?.error('Agent failed', { error: error.message });
796
848
  // Lesson 26: End trace capture on error
@@ -870,6 +922,9 @@ export class ProductionAgent {
870
922
  : 0.8;
871
923
  let reflectionAttempt = 0;
872
924
  let lastResponse = '';
925
+ let incompleteActionRetries = 0;
926
+ const requestedArtifact = this.extractRequestedArtifact(task);
927
+ const executedToolNames = new Set();
873
928
  // Outer loop for reflection (if enabled)
874
929
  while (reflectionAttempt < maxReflectionAttempts) {
875
930
  reflectionAttempt++;
@@ -883,16 +938,13 @@ export class ProductionAgent {
883
938
  });
884
939
  // =======================================================================
885
940
  // CANCELLATION CHECK
886
- // Checks both internal cancellation (ESC key) and external cancellation
887
- // (parent timeout when this agent is a subagent)
941
+ // Checks internal cancellation (ESC key) always immediate.
942
+ // External cancellation (parent timeout) is checked after economics
943
+ // to allow graceful wrapup when wrapup has been requested.
888
944
  // =======================================================================
889
945
  if (this.cancellation?.isCancelled) {
890
946
  this.cancellation.token.throwIfCancellationRequested();
891
947
  }
892
- // Also check external cancellation token (from parent when spawned as subagent)
893
- if (this.externalCancellationToken?.isCancellationRequested) {
894
- this.externalCancellationToken.throwIfCancellationRequested();
895
- }
896
948
  // =======================================================================
897
949
  // RESOURCE CHECK - system resource limits
898
950
  // =======================================================================
@@ -942,6 +994,7 @@ export class ProductionAgent {
942
994
  attempt: 1,
943
995
  maxAttempts: 1,
944
996
  });
997
+ this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
945
998
  // Mark that we've attempted recovery to prevent infinite loops
946
999
  this.state._recoveryAttempted = true;
947
1000
  const tokensBefore = this.estimateContextTokens(messages);
@@ -1035,6 +1088,26 @@ export class ProductionAgent {
1035
1088
  }
1036
1089
  }
1037
1090
  // =======================================================================
1091
+ // GRACEFUL WRAPUP CHECK
1092
+ // If a wrapup has been requested (e.g., timeout approaching), convert
1093
+ // to forceTextOnly + inject wrapup prompt for structured summary.
1094
+ // Must come after economics check (which may also set forceTextOnly).
1095
+ // =======================================================================
1096
+ if (this.wrapupRequested && !forceTextOnly) {
1097
+ forceTextOnly = true;
1098
+ budgetInjectedPrompt = TIMEOUT_WRAPUP_PROMPT;
1099
+ this.wrapupRequested = false;
1100
+ }
1101
+ // =======================================================================
1102
+ // EXTERNAL CANCELLATION CHECK (deferred from above)
1103
+ // Checked after wrapup so that graceful wrapup can intercept the timeout.
1104
+ // If wrapup was already requested and converted to forceTextOnly above,
1105
+ // we skip throwing here to allow one more text-only turn for the summary.
1106
+ // =======================================================================
1107
+ if (this.externalCancellationToken?.isCancellationRequested && !forceTextOnly) {
1108
+ this.externalCancellationToken.throwIfCancellationRequested();
1109
+ }
1110
+ // =======================================================================
1038
1111
  // INTELLIGENT LOOP DETECTION & NUDGE INJECTION
1039
1112
  // Uses economics system for doom loops, exploration saturation, etc.
1040
1113
  // =======================================================================
@@ -1133,6 +1206,40 @@ export class ProductionAgent {
1133
1206
  const MAX_CONTINUATIONS = resilienceConfig.maxContinuations ?? 3;
1134
1207
  const AUTO_CONTINUE = resilienceConfig.autoContinue ?? true;
1135
1208
  const MIN_CONTENT_LENGTH = resilienceConfig.minContentLength ?? 1;
1209
+ const INCOMPLETE_ACTION_RECOVERY = resilienceConfig.incompleteActionRecovery ?? true;
1210
+ const MAX_INCOMPLETE_ACTION_RETRIES = resilienceConfig.maxIncompleteActionRetries ?? 2;
1211
+ const ENFORCE_REQUESTED_ARTIFACTS = resilienceConfig.enforceRequestedArtifacts ?? true;
1212
+ // =================================================================
1213
+ // PRE-FLIGHT BUDGET CHECK: Estimate if LLM call would exceed budget
1214
+ // Catches cases where we're at e.g. 120k and next call adds ~35k
1215
+ // =================================================================
1216
+ if (this.economics && !forceTextOnly) {
1217
+ const estimatedInputTokens = this.estimateContextTokens(messages);
1218
+ const estimatedOutputTokens = 4096; // Conservative output estimate
1219
+ const currentUsage = this.economics.getUsage();
1220
+ const budget = this.economics.getBudget();
1221
+ const projectedTotal = currentUsage.tokens + estimatedInputTokens + estimatedOutputTokens;
1222
+ if (projectedTotal > budget.maxTokens) {
1223
+ this.observability?.logger?.warn('Pre-flight budget check: projected overshoot', {
1224
+ currentTokens: currentUsage.tokens,
1225
+ estimatedInput: estimatedInputTokens,
1226
+ projectedTotal,
1227
+ maxTokens: budget.maxTokens,
1228
+ });
1229
+ // Inject wrap-up prompt if not already injected
1230
+ if (!budgetInjectedPrompt) {
1231
+ messages.push({
1232
+ role: 'user',
1233
+ content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1234
+ });
1235
+ this.state.messages.push({
1236
+ role: 'user',
1237
+ content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1238
+ });
1239
+ }
1240
+ forceTextOnly = true;
1241
+ }
1242
+ }
1136
1243
  let response = await this.callLLM(messages);
1137
1244
  let emptyRetries = 0;
1138
1245
  let continuations = 0;
@@ -1162,6 +1269,7 @@ export class ProductionAgent {
1162
1269
  attempt: emptyRetries,
1163
1270
  maxAttempts: MAX_EMPTY_RETRIES,
1164
1271
  });
1272
+ this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1165
1273
  this.observability?.logger?.warn('Empty LLM response, retrying', {
1166
1274
  attempt: emptyRetries,
1167
1275
  maxAttempts: MAX_EMPTY_RETRIES,
@@ -1219,10 +1327,51 @@ export class ProductionAgent {
1219
1327
  });
1220
1328
  }
1221
1329
  }
1330
+ // Phase 2b: Handle truncated tool calls (stopReason=max_tokens with tool calls present)
1331
+ // When a model hits max_tokens mid-tool-call, the JSON arguments are truncated and unparseable.
1332
+ // Instead of executing broken tool calls, strip them and ask the LLM to retry smaller.
1333
+ if (resilienceEnabled && response.stopReason === 'max_tokens' && response.toolCalls?.length) {
1334
+ this.emit({
1335
+ type: 'resilience.truncated_tool_call',
1336
+ toolNames: response.toolCalls.map(tc => tc.name),
1337
+ });
1338
+ this.observability?.logger?.warn('Tool call truncated at max_tokens', {
1339
+ toolNames: response.toolCalls.map(tc => tc.name),
1340
+ outputTokens: response.usage?.outputTokens,
1341
+ });
1342
+ // Strip truncated tool calls, inject recovery message
1343
+ const truncatedResponse = response;
1344
+ response = { ...response, toolCalls: undefined };
1345
+ const recoveryMessage = {
1346
+ role: 'user',
1347
+ content: '[System: Your previous tool call was truncated because the output exceeded the token limit. ' +
1348
+ 'The tool call arguments were cut off and could not be parsed. ' +
1349
+ 'Please retry with a smaller approach: for write_file, break the content into smaller chunks ' +
1350
+ 'or use edit_file for targeted changes instead of rewriting entire files.]',
1351
+ };
1352
+ messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1353
+ messages.push(recoveryMessage);
1354
+ this.state.messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1355
+ this.state.messages.push(recoveryMessage);
1356
+ response = await this.callLLM(messages);
1357
+ }
1222
1358
  // Record LLM usage for economics
1223
1359
  if (this.economics && response.usage) {
1224
1360
  this.economics.recordLLMUsage(response.usage.inputTokens, response.usage.outputTokens, this.config.model, response.usage.cost // Use actual cost from provider when available
1225
1361
  );
1362
+ // =================================================================
1363
+ // POST-LLM BUDGET CHECK: Prevent tool execution if over budget
1364
+ // A single LLM call can push us over - catch it before running tools
1365
+ // =================================================================
1366
+ if (!forceTextOnly) {
1367
+ const postCheck = this.economics.checkBudget();
1368
+ if (!postCheck.canContinue) {
1369
+ this.observability?.logger?.warn('Budget exceeded after LLM call, skipping tool execution', {
1370
+ reason: postCheck.reason,
1371
+ });
1372
+ forceTextOnly = true;
1373
+ }
1374
+ }
1226
1375
  }
1227
1376
  // Add assistant message
1228
1377
  const assistantMessage = {
@@ -1253,6 +1402,61 @@ export class ProductionAgent {
1253
1402
  iteration: this.state.iteration,
1254
1403
  });
1255
1404
  }
1405
+ const incompleteAction = this.detectIncompleteActionResponse(response.content || '');
1406
+ const missingRequiredArtifact = ENFORCE_REQUESTED_ARTIFACTS
1407
+ ? this.isRequestedArtifactMissing(requestedArtifact, executedToolNames)
1408
+ : false;
1409
+ const shouldRecoverIncompleteAction = resilienceEnabled
1410
+ && INCOMPLETE_ACTION_RECOVERY
1411
+ && !forceTextOnly
1412
+ && (incompleteAction || missingRequiredArtifact);
1413
+ if (shouldRecoverIncompleteAction) {
1414
+ if (incompleteActionRetries < MAX_INCOMPLETE_ACTION_RETRIES) {
1415
+ incompleteActionRetries++;
1416
+ const reason = missingRequiredArtifact && requestedArtifact
1417
+ ? `missing_requested_artifact:${requestedArtifact}`
1418
+ : 'future_intent_without_action';
1419
+ this.emit({
1420
+ type: 'resilience.incomplete_action_detected',
1421
+ reason,
1422
+ attempt: incompleteActionRetries,
1423
+ maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1424
+ requiresArtifact: missingRequiredArtifact,
1425
+ });
1426
+ this.observability?.logger?.warn('Incomplete action detected, retrying with nudge', {
1427
+ reason,
1428
+ attempt: incompleteActionRetries,
1429
+ maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1430
+ });
1431
+ const nudgeMessage = {
1432
+ role: 'user',
1433
+ content: missingRequiredArtifact && requestedArtifact
1434
+ ? `[System: You said you would complete the next action, but no tool call was made. The task requires creating or updating "${requestedArtifact}". Execute the required tool now, or explicitly explain why it cannot be produced.]`
1435
+ : '[System: You described a next action but did not execute it. If work remains, call the required tool now. If the task is complete, provide a final answer with no pending action language.]',
1436
+ };
1437
+ messages.push(nudgeMessage);
1438
+ this.state.messages.push(nudgeMessage);
1439
+ continue;
1440
+ }
1441
+ const failureReason = missingRequiredArtifact && requestedArtifact
1442
+ ? `incomplete_action_missing_artifact:${requestedArtifact}`
1443
+ : 'incomplete_action_unresolved';
1444
+ this.emit({
1445
+ type: 'resilience.incomplete_action_failed',
1446
+ reason: failureReason,
1447
+ attempts: incompleteActionRetries,
1448
+ maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1449
+ });
1450
+ throw new Error(`LLM failed to complete requested action after ${incompleteActionRetries} retries (${failureReason})`);
1451
+ }
1452
+ if (incompleteActionRetries > 0) {
1453
+ this.emit({
1454
+ type: 'resilience.incomplete_action_recovered',
1455
+ reason: 'incomplete_action',
1456
+ attempts: incompleteActionRetries,
1457
+ });
1458
+ incompleteActionRetries = 0;
1459
+ }
1256
1460
  // No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
1257
1461
  // The model has "consumed" the tool outputs and produced a response,
1258
1462
  // so we can replace verbose outputs with compact summaries
@@ -1293,6 +1497,7 @@ export class ProductionAgent {
1293
1497
  for (let i = 0; i < toolCalls.length; i++) {
1294
1498
  const toolCall = toolCalls[i];
1295
1499
  const result = toolResults[i];
1500
+ executedToolNames.add(toolCall.name);
1296
1501
  this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
1297
1502
  }
1298
1503
  // Add tool results to messages (with truncation and proactive budget management)
@@ -1340,8 +1545,11 @@ export class ProductionAgent {
1340
1545
  this.compactToolOutputs();
1341
1546
  }
1342
1547
  }
1548
+ const toolCallNameById = new Map(toolCalls.map(tc => [tc.id, tc.name]));
1343
1549
  for (const result of toolResults) {
1344
1550
  let content = typeof result.result === 'string' ? result.result : stableStringify(result.result);
1551
+ const sourceToolName = toolCallNameById.get(result.callId);
1552
+ const isExpensiveResult = sourceToolName === 'spawn_agent' || sourceToolName === 'spawn_agents_parallel';
1345
1553
  // Truncate long outputs to save context
1346
1554
  if (content.length > MAX_TOOL_OUTPUT_CHARS) {
1347
1555
  content = content.slice(0, MAX_TOOL_OUTPUT_CHARS) + `\n\n... [truncated ${content.length - MAX_TOOL_OUTPUT_CHARS} chars]`;
@@ -1376,6 +1584,15 @@ export class ProductionAgent {
1376
1584
  role: 'tool',
1377
1585
  content,
1378
1586
  toolCallId: result.callId,
1587
+ ...(isExpensiveResult
1588
+ ? {
1589
+ metadata: {
1590
+ preserveFromCompaction: true,
1591
+ costToRegenerate: 'high',
1592
+ source: sourceToolName,
1593
+ },
1594
+ }
1595
+ : {}),
1379
1596
  };
1380
1597
  messages.push(toolMessage);
1381
1598
  this.state.messages.push(toolMessage);
@@ -1497,28 +1714,42 @@ export class ProductionAgent {
1497
1714
  }
1498
1715
  }
1499
1716
  // Build system prompt using cache-aware builder if available (Trick P)
1500
- let systemPrompt;
1501
1717
  // Combine memory, learnings, and codebase context
1502
1718
  const combinedContext = [
1503
1719
  ...(memoryContext.length > 0 ? memoryContext : []),
1504
1720
  ...(learningsContext ? [learningsContext] : []),
1505
1721
  ...(codebaseContextStr ? [`\n## Relevant Code\n${codebaseContextStr}`] : []),
1506
1722
  ].join('\n');
1723
+ const promptOptions = {
1724
+ rules: rulesContent + (skillsPrompt ? '\n\n' + skillsPrompt : ''),
1725
+ tools: toolDescriptions,
1726
+ memory: combinedContext.length > 0 ? combinedContext : undefined,
1727
+ dynamic: {
1728
+ mode: this.modeManager?.getMode() ?? 'default',
1729
+ },
1730
+ };
1507
1731
  if (this.contextEngineering) {
1508
- // Use cache-optimized prompt builder - orders sections for KV-cache reuse:
1509
- // static prefix -> rules -> tools -> memory/codebase -> dynamic
1510
- systemPrompt = this.contextEngineering.buildSystemPrompt({
1511
- rules: rulesContent + (skillsPrompt ? '\n\n' + skillsPrompt : ''),
1512
- tools: toolDescriptions,
1513
- memory: combinedContext.length > 0 ? combinedContext : undefined,
1514
- dynamic: {
1515
- mode: this.modeManager?.getMode() ?? 'default',
1516
- },
1517
- });
1732
+ // Build cache-aware system prompt with cache_control markers (Improvement P1).
1733
+ // Store structured blocks for callLLM() to inject as MessageWithContent.
1734
+ // The string version is still used for token estimation and debugging.
1735
+ const cacheableBlocks = this.contextEngineering.buildCacheableSystemPrompt(promptOptions);
1736
+ // Safety check: ensure we have content (empty array = no cache context configured)
1737
+ if (cacheableBlocks.length === 0 || cacheableBlocks.every(b => b.text.trim().length === 0)) {
1738
+ this.cacheableSystemBlocks = null;
1739
+ messages.push({ role: 'system', content: this.config.systemPrompt || 'You are a helpful AI assistant.' });
1740
+ }
1741
+ else {
1742
+ // Store cacheable blocks for provider injection
1743
+ this.cacheableSystemBlocks = cacheableBlocks;
1744
+ // Push a regular string Message for backward compatibility (token estimation, etc.)
1745
+ const flatPrompt = cacheableBlocks.map(b => b.text).join('');
1746
+ messages.push({ role: 'system', content: flatPrompt });
1747
+ }
1518
1748
  }
1519
1749
  else {
1520
- // Fallback: manual concatenation (original behavior)
1521
- systemPrompt = this.config.systemPrompt;
1750
+ // Fallback: manual concatenation (original behavior) — no cache markers
1751
+ this.cacheableSystemBlocks = null;
1752
+ let systemPrompt = this.config.systemPrompt;
1522
1753
  if (rulesContent)
1523
1754
  systemPrompt += '\n\n' + rulesContent;
1524
1755
  if (skillsPrompt)
@@ -1529,13 +1760,13 @@ export class ProductionAgent {
1529
1760
  if (toolDescriptions) {
1530
1761
  systemPrompt += '\n\nAvailable tools:\n' + toolDescriptions;
1531
1762
  }
1763
+ // Safety check: ensure system prompt is not empty
1764
+ if (!systemPrompt || systemPrompt.trim().length === 0) {
1765
+ console.warn('[buildMessages] Warning: Empty system prompt detected, using fallback');
1766
+ systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
1767
+ }
1768
+ messages.push({ role: 'system', content: systemPrompt });
1532
1769
  }
1533
- // Safety check: ensure system prompt is not empty
1534
- if (!systemPrompt || systemPrompt.trim().length === 0) {
1535
- console.warn('[buildMessages] Warning: Empty system prompt detected, using fallback');
1536
- systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
1537
- }
1538
- messages.push({ role: 'system', content: systemPrompt });
1539
1770
  // Add existing conversation
1540
1771
  for (const msg of this.state.messages) {
1541
1772
  if (msg.role !== 'system') {
@@ -1552,6 +1783,22 @@ export class ProductionAgent {
1552
1783
  async callLLM(messages) {
1553
1784
  const spanId = this.observability?.tracer?.startSpan('llm.call');
1554
1785
  this.emit({ type: 'llm.start', model: this.config.model || 'default' });
1786
+ // Prompt caching (Improvement P1): Replace the system message with structured content
1787
+ // that includes cache_control markers, enabling 60-70% cache hit rates.
1788
+ // The original Message[] is kept for token estimation; the provider gets MessageWithContent[].
1789
+ let providerMessages = messages;
1790
+ if (this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
1791
+ providerMessages = messages.map((m, i) => {
1792
+ if (i === 0 && m.role === 'system') {
1793
+ // Replace system message with structured cacheable content
1794
+ return {
1795
+ role: 'system',
1796
+ content: this.cacheableSystemBlocks,
1797
+ };
1798
+ }
1799
+ return m;
1800
+ });
1801
+ }
1555
1802
  // Emit context insight for verbose feedback
1556
1803
  const estimatedTokens = messages.reduce((sum, m) => {
1557
1804
  const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
@@ -1667,7 +1914,7 @@ export class ProductionAgent {
1667
1914
  });
1668
1915
  }
1669
1916
  else {
1670
- response = await this.provider.chat(messages, {
1917
+ response = await this.provider.chat(providerMessages, {
1671
1918
  model: this.config.model,
1672
1919
  tools: Array.from(this.tools.values()),
1673
1920
  });
@@ -1741,6 +1988,19 @@ export class ProductionAgent {
1741
1988
  throw error;
1742
1989
  }
1743
1990
  }
1991
+ /**
1992
+ * Execute an async callback while excluding wall-clock wait time from duration budgeting.
1993
+ * Used for external waits such as approval dialogs and delegation confirmation.
1994
+ */
1995
+ async withPausedDuration(fn) {
1996
+ this.economics?.pauseDuration();
1997
+ try {
1998
+ return await fn();
1999
+ }
2000
+ finally {
2001
+ this.economics?.resumeDuration();
2002
+ }
2003
+ }
1744
2004
  /**
1745
2005
  * Execute tool calls with safety checks and execution policy enforcement.
1746
2006
  */
@@ -1798,6 +2058,7 @@ export class ProductionAgent {
1798
2058
  // =====================================================================
1799
2059
  // EXECUTION POLICY ENFORCEMENT (Lesson 23)
1800
2060
  // =====================================================================
2061
+ let policyApprovedByUser = false;
1801
2062
  if (this.executionPolicy) {
1802
2063
  const policyContext = {
1803
2064
  messages: this.state.messages,
@@ -1845,11 +2106,13 @@ export class ProductionAgent {
1845
2106
  // Handle prompt policy - requires approval
1846
2107
  if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
1847
2108
  // Try to get approval through safety manager's human-in-loop
1848
- if (this.safety?.humanInLoop) {
1849
- const approval = await this.safety.humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`);
2109
+ const humanInLoop = this.safety?.humanInLoop;
2110
+ if (humanInLoop) {
2111
+ const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
1850
2112
  if (!approval.approved) {
1851
2113
  throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
1852
2114
  }
2115
+ policyApprovedByUser = true;
1853
2116
  // Create a grant for future similar calls if approved
1854
2117
  this.executionPolicy.createGrant({
1855
2118
  toolName: toolCall.name,
@@ -1877,7 +2140,8 @@ export class ProductionAgent {
1877
2140
  // SAFETY VALIDATION (Lesson 20-21)
1878
2141
  // =====================================================================
1879
2142
  if (this.safety) {
1880
- const validation = await this.safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`);
2143
+ const safety = this.safety;
2144
+ const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
1881
2145
  if (!validation.allowed) {
1882
2146
  throw new Error(`Tool call blocked: ${validation.reason}`);
1883
2147
  }
@@ -1923,6 +2187,29 @@ export class ProductionAgent {
1923
2187
  }
1924
2188
  }
1925
2189
  }
2190
+ // FILE CACHE: Check cache for read_file operations before executing
2191
+ if (this.fileCache && toolCall.name === 'read_file') {
2192
+ const args = toolCall.arguments;
2193
+ const readPath = String(args.path || '');
2194
+ if (readPath) {
2195
+ const cached = this.fileCache.get(readPath);
2196
+ if (cached !== undefined) {
2197
+ const lines = cached.split('\n').length;
2198
+ const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2199
+ const duration = Date.now() - startTime;
2200
+ this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2201
+ this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2202
+ this.state.metrics.toolCalls++;
2203
+ this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2204
+ results.push({
2205
+ callId: toolCall.id,
2206
+ result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2207
+ });
2208
+ this.observability?.tracer?.endSpan(spanId);
2209
+ continue; // Skip actual file I/O
2210
+ }
2211
+ }
2212
+ }
1926
2213
  // Execute tool (with sandbox if available)
1927
2214
  let result;
1928
2215
  if (this.safety?.sandbox) {
@@ -1961,6 +2248,22 @@ export class ProductionAgent {
1961
2248
  this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
1962
2249
  this.state.metrics.toolCalls++;
1963
2250
  this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2251
+ // FILE CACHE: Store read results and invalidate on writes
2252
+ if (this.fileCache) {
2253
+ const args = toolCall.arguments;
2254
+ const filePath = String(args.path || args.file_path || '');
2255
+ if (toolCall.name === 'read_file' && filePath) {
2256
+ // Cache successful read results
2257
+ const resultObj = result;
2258
+ if (resultObj?.success && typeof resultObj.output === 'string') {
2259
+ this.fileCache.set(filePath, resultObj.output);
2260
+ }
2261
+ }
2262
+ else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2263
+ // Invalidate cache when files are modified (including undo operations)
2264
+ this.fileCache.invalidate(filePath);
2265
+ }
2266
+ }
1964
2267
  // Emit tool insight with result summary
1965
2268
  const summary = this.summarizeToolResult(toolCall.name, result);
1966
2269
  this.emit({
@@ -2286,7 +2589,14 @@ export class ProductionAgent {
2286
2589
  */
2287
2590
  getMetrics() {
2288
2591
  if (this.observability?.metrics) {
2289
- return this.observability.metrics.getMetrics();
2592
+ const observed = this.observability.metrics.getMetrics();
2593
+ return {
2594
+ ...observed,
2595
+ successCount: this.state.metrics.successCount ?? 0,
2596
+ failureCount: this.state.metrics.failureCount ?? 0,
2597
+ cancelCount: this.state.metrics.cancelCount ?? 0,
2598
+ retryCount: this.state.metrics.retryCount ?? 0,
2599
+ };
2290
2600
  }
2291
2601
  return this.state.metrics;
2292
2602
  }
@@ -2418,6 +2728,10 @@ export class ProductionAgent {
2418
2728
  llmCalls: 0,
2419
2729
  toolCalls: 0,
2420
2730
  duration: 0,
2731
+ successCount: 0,
2732
+ failureCount: 0,
2733
+ cancelCount: 0,
2734
+ retryCount: 0,
2421
2735
  },
2422
2736
  iteration: 0,
2423
2737
  };
@@ -2563,6 +2877,10 @@ export class ProductionAgent {
2563
2877
  toolCalls: sanitized.metrics.toolCalls ?? 0,
2564
2878
  duration: sanitized.metrics.duration ?? 0,
2565
2879
  reflectionAttempts: sanitized.metrics.reflectionAttempts,
2880
+ successCount: sanitized.metrics.successCount ?? 0,
2881
+ failureCount: sanitized.metrics.failureCount ?? 0,
2882
+ cancelCount: sanitized.metrics.cancelCount ?? 0,
2883
+ retryCount: sanitized.metrics.retryCount ?? 0,
2566
2884
  };
2567
2885
  }
2568
2886
  // Restore plan if present
@@ -2610,10 +2928,20 @@ export class ProductionAgent {
2610
2928
  */
2611
2929
  compactToolOutputs() {
2612
2930
  const COMPACT_PREVIEW_LENGTH = 200; // Keep first 200 chars as preview
2931
+ const MAX_PRESERVED_EXPENSIVE_RESULTS = 6;
2613
2932
  let compactedCount = 0;
2614
2933
  let savedChars = 0;
2615
- for (const msg of this.state.messages) {
2934
+ const preservedExpensiveIndexes = this.state.messages
2935
+ .map((msg, index) => ({ msg, index }))
2936
+ .filter(({ msg }) => msg.role === 'tool' && msg.metadata?.preserveFromCompaction === true)
2937
+ .map(({ index }) => index);
2938
+ const preserveSet = new Set(preservedExpensiveIndexes.slice(-MAX_PRESERVED_EXPENSIVE_RESULTS));
2939
+ for (let i = 0; i < this.state.messages.length; i++) {
2940
+ const msg = this.state.messages[i];
2616
2941
  if (msg.role === 'tool' && msg.content && msg.content.length > COMPACT_PREVIEW_LENGTH * 2) {
2942
+ if (msg.metadata?.preserveFromCompaction === true && preserveSet.has(i)) {
2943
+ continue;
2944
+ }
2617
2945
  const originalLength = msg.content.length;
2618
2946
  const preview = msg.content.slice(0, COMPACT_PREVIEW_LENGTH).replace(/\n/g, ' ');
2619
2947
  msg.content = `[${preview}...] (${originalLength} chars, compacted)`;
@@ -2645,6 +2973,42 @@ export class ProductionAgent {
2645
2973
  }
2646
2974
  return Math.ceil(totalChars / 4); // ~4 chars per token
2647
2975
  }
2976
+ /**
2977
+ * Extract a requested markdown artifact filename from a task prompt.
2978
+ * Returns null when no explicit artifact requirement is detected.
2979
+ */
2980
+ extractRequestedArtifact(task) {
2981
+ const markdownArtifactMatch = task.match(/(?:write|save|create)[^.\n]{0,120}\b([A-Za-z0-9._/-]+\.md)\b/i);
2982
+ return markdownArtifactMatch?.[1] ?? null;
2983
+ }
2984
+ /**
2985
+ * Check whether a requested artifact appears to be missing based on executed tools.
2986
+ */
2987
+ isRequestedArtifactMissing(requestedArtifact, executedToolNames) {
2988
+ if (!requestedArtifact) {
2989
+ return false;
2990
+ }
2991
+ const artifactWriteTools = ['write_file', 'edit_file', 'apply_patch', 'append_file'];
2992
+ return !artifactWriteTools.some(toolName => executedToolNames.has(toolName));
2993
+ }
2994
+ /**
2995
+ * Detect "future-intent" responses that imply the model has not completed work.
2996
+ */
2997
+ detectIncompleteActionResponse(content) {
2998
+ const trimmed = content.trim();
2999
+ if (!trimmed) {
3000
+ return false;
3001
+ }
3002
+ const lower = trimmed.toLowerCase();
3003
+ const futureIntentPatterns = [
3004
+ /^(now|next|then)\s+(i\s+will|i'll|let me)\b/,
3005
+ /^i\s+(will|am going to|can)\b/,
3006
+ /^(let me|i'll|i will)\s+(create|write|save|do|make|generate|start)\b/,
3007
+ /^(now|next|then)\s+i(?:'ll| will)\b/,
3008
+ ];
3009
+ const completionSignals = /\b(done|completed|finished|here is|created|saved|wrote)\b/;
3010
+ return futureIntentPatterns.some(pattern => pattern.test(lower)) && !completionSignals.test(lower);
3011
+ }
2648
3012
  /**
2649
3013
  * Get audit log (if human-in-loop is enabled).
2650
3014
  */
@@ -2667,8 +3031,8 @@ export class ProductionAgent {
2667
3031
  for (const role of roles) {
2668
3032
  this.multiAgent.registerRole(role);
2669
3033
  }
2670
- // Set up event forwarding
2671
- this.multiAgent.on(event => {
3034
+ // Set up event forwarding (unsubscribe after operation to prevent memory leaks)
3035
+ const unsubMultiAgent = this.multiAgent.on(event => {
2672
3036
  switch (event.type) {
2673
3037
  case 'agent.spawn':
2674
3038
  this.emit({ type: 'multiagent.spawn', agentId: event.agentId, role: event.role });
@@ -2684,14 +3048,19 @@ export class ProductionAgent {
2684
3048
  break;
2685
3049
  }
2686
3050
  });
2687
- const result = await this.multiAgent.runWithTeam(task, {
2688
- roles,
2689
- consensusStrategy: this.config.multiAgent && isFeatureEnabled(this.config.multiAgent)
2690
- ? this.config.multiAgent.consensusStrategy || 'voting'
2691
- : 'voting',
2692
- communicationMode: 'broadcast',
2693
- });
2694
- return result;
3051
+ try {
3052
+ const result = await this.multiAgent.runWithTeam(task, {
3053
+ roles,
3054
+ consensusStrategy: this.config.multiAgent && isFeatureEnabled(this.config.multiAgent)
3055
+ ? this.config.multiAgent.consensusStrategy || 'voting'
3056
+ : 'voting',
3057
+ communicationMode: 'broadcast',
3058
+ });
3059
+ return result;
3060
+ }
3061
+ finally {
3062
+ unsubMultiAgent();
3063
+ }
2695
3064
  }
2696
3065
  /**
2697
3066
  * Add a role to the multi-agent manager.
@@ -2714,8 +3083,8 @@ export class ProductionAgent {
2714
3083
  throw new Error('ReAct not enabled. Enable it in config to use runWithReAct()');
2715
3084
  }
2716
3085
  this.observability?.logger?.info('Running with ReAct', { task });
2717
- // Set up event forwarding
2718
- this.react.on(event => {
3086
+ // Set up event forwarding (unsubscribe after operation to prevent memory leaks)
3087
+ const unsubReact = this.react.on(event => {
2719
3088
  switch (event.type) {
2720
3089
  case 'react.thought':
2721
3090
  this.emit({ type: 'react.thought', step: event.step, thought: event.thought });
@@ -2731,15 +3100,20 @@ export class ProductionAgent {
2731
3100
  break;
2732
3101
  }
2733
3102
  });
2734
- const trace = await this.react.run(task);
2735
- // Store trace in memory if available
2736
- if (this.memory && trace.finalAnswer) {
2737
- this.memory.storeConversation([
2738
- { role: 'user', content: task },
2739
- { role: 'assistant', content: trace.finalAnswer },
2740
- ]);
3103
+ try {
3104
+ const trace = await this.react.run(task);
3105
+ // Store trace in memory if available
3106
+ if (this.memory && trace.finalAnswer) {
3107
+ this.memory.storeConversation([
3108
+ { role: 'user', content: task },
3109
+ { role: 'assistant', content: trace.finalAnswer },
3110
+ ]);
3111
+ }
3112
+ return trace;
3113
+ }
3114
+ finally {
3115
+ unsubReact();
2741
3116
  }
2742
- return trace;
2743
3117
  }
2744
3118
  /**
2745
3119
  * Get the ReAct trace formatted as a string.
@@ -3109,11 +3483,15 @@ export class ProductionAgent {
3109
3483
  metrics: { tokens: 0, duration: 0, toolCalls: 0 },
3110
3484
  };
3111
3485
  }
3112
- this.emit({ type: 'agent.spawn', agentId: `spawn-${Date.now()}`, name: agentName, task });
3486
+ // Generate a unique ID for this agent instance that will be used consistently
3487
+ // throughout the agent's lifecycle (spawn event, token events, completion events)
3488
+ const agentId = `spawn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
3489
+ this.emit({ type: 'agent.spawn', agentId, name: agentName, task });
3113
3490
  this.observability?.logger?.info('Spawning agent', { name: agentName, task });
3114
3491
  const startTime = Date.now();
3115
3492
  const childSessionId = `subagent-${agentName}-${Date.now()}`;
3116
3493
  const childTraceId = `trace-${childSessionId}`;
3494
+ let workerResultId;
3117
3495
  try {
3118
3496
  // Filter tools for this agent
3119
3497
  const agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
@@ -3122,22 +3500,48 @@ export class ProductionAgent {
3122
3500
  const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
3123
3501
  ? agentDef.model
3124
3502
  : this.config.model;
3503
+ // Persist subagent task lifecycle in durable storage when available
3504
+ if (this.store?.hasWorkerResultsFeature()) {
3505
+ try {
3506
+ workerResultId = this.store.createWorkerResult(agentId, task.slice(0, 500), resolvedModel || 'default');
3507
+ }
3508
+ catch (storeErr) {
3509
+ this.observability?.logger?.warn('Failed to create worker result record', {
3510
+ agentId,
3511
+ error: storeErr.message,
3512
+ });
3513
+ }
3514
+ }
3125
3515
  // Get subagent config with agent-type-specific timeouts and iteration limits
3126
3516
  // Uses dynamic configuration based on agent type (researcher needs more time than reviewer)
3517
+ // Precedence: per-type config > per-type default > global config > hardcoded fallback
3127
3518
  const subagentConfig = this.config.subagent;
3128
3519
  const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
3129
- // Agent-type-specific timeout: researchers get 5min, reviewers get 2min, etc.
3520
+ // Timeout precedence: per-type config override > agent-type default > global config default
3130
3521
  const agentTypeTimeout = getSubagentTimeout(agentName);
3131
- const configTimeout = hasSubagentConfig
3522
+ const rawPerTypeTimeout = hasSubagentConfig
3523
+ ? subagentConfig.timeouts?.[agentName]
3524
+ : undefined;
3525
+ const rawGlobalTimeout = hasSubagentConfig
3132
3526
  ? subagentConfig.defaultTimeout
3133
3527
  : undefined;
3134
- const subagentTimeout = configTimeout ?? agentTypeTimeout;
3135
- // Agent-type-specific iteration limit: researchers get 25, documenters get 10, etc.
3528
+ // Validate: reject negative, NaN, or non-finite timeout values
3529
+ const isValidTimeout = (v) => v !== undefined && Number.isFinite(v) && v > 0;
3530
+ const perTypeConfigTimeout = isValidTimeout(rawPerTypeTimeout) ? rawPerTypeTimeout : undefined;
3531
+ const globalConfigTimeout = isValidTimeout(rawGlobalTimeout) ? rawGlobalTimeout : undefined;
3532
+ const subagentTimeout = perTypeConfigTimeout ?? agentTypeTimeout ?? globalConfigTimeout ?? 300000;
3533
+ // Iteration precedence: per-type config override > agent-type default > global config default
3136
3534
  const agentTypeMaxIter = getSubagentMaxIterations(agentName);
3137
- const configMaxIter = hasSubagentConfig
3535
+ const rawPerTypeMaxIter = hasSubagentConfig
3536
+ ? subagentConfig.maxIterations?.[agentName]
3537
+ : undefined;
3538
+ const rawGlobalMaxIter = hasSubagentConfig
3138
3539
  ? subagentConfig.defaultMaxIterations
3139
3540
  : undefined;
3140
- const defaultMaxIterations = agentDef.maxIterations ?? configMaxIter ?? agentTypeMaxIter;
3541
+ const isValidIter = (v) => v !== undefined && Number.isFinite(v) && v > 0 && Number.isInteger(v);
3542
+ const perTypeConfigMaxIter = isValidIter(rawPerTypeMaxIter) ? rawPerTypeMaxIter : undefined;
3543
+ const globalConfigMaxIter = isValidIter(rawGlobalMaxIter) ? rawGlobalMaxIter : undefined;
3544
+ const defaultMaxIterations = agentDef.maxIterations ?? perTypeConfigMaxIter ?? agentTypeMaxIter ?? globalConfigMaxIter ?? 15;
3141
3545
  // BLACKBOARD CONTEXT INJECTION
3142
3546
  // Gather relevant context from the blackboard for the subagent
3143
3547
  let blackboardContext = '';
@@ -3177,9 +3581,20 @@ export class ProductionAgent {
3177
3581
  }
3178
3582
  // CONSTRAINT INJECTION
3179
3583
  // Add constraints to the subagent's context if provided
3180
- let constraintContext = '';
3584
+ // Also always include budget awareness so subagents know their limits
3585
+ const constraintParts = [];
3586
+ // BUDGET AWARENESS: Always inject so subagent understands its limits
3587
+ const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
3588
+ const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
3589
+ constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
3590
+ `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
3591
+ `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
3592
+ `- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
3593
+ `- Do not explore indefinitely - be focused and efficient.\n` +
3594
+ `- If approaching limits, summarize findings and return.\n` +
3595
+ `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
3596
+ ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
3181
3597
  if (constraints) {
3182
- const constraintParts = [];
3183
3598
  if (constraints.focusAreas && constraints.focusAreas.length > 0) {
3184
3599
  constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
3185
3600
  }
@@ -3189,22 +3604,21 @@ export class ProductionAgent {
3189
3604
  if (constraints.requiredDeliverables && constraints.requiredDeliverables.length > 0) {
3190
3605
  constraintParts.push(`**REQUIRED DELIVERABLES (you must produce these):**\n${constraints.requiredDeliverables.map(d => ` - ${d}`).join('\n')}`);
3191
3606
  }
3192
- if (constraints.maxTokens) {
3193
- constraintParts.push(`**TOKEN BUDGET:** ${constraints.maxTokens} tokens maximum`);
3194
- }
3195
3607
  if (constraints.timeboxMinutes) {
3196
3608
  constraintParts.push(`**TIME LIMIT:** ${constraints.timeboxMinutes} minutes (soft limit - wrap up if approaching)`);
3197
3609
  }
3198
- if (constraintParts.length > 0) {
3199
- constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
3200
- }
3201
3610
  }
3611
+ const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
3202
3612
  // Build subagent system prompt with subagent-specific plan mode addition
3203
3613
  const parentMode = this.getMode();
3204
3614
  const subagentSystemPrompt = parentMode === 'plan'
3205
3615
  ? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}`
3206
3616
  : `${agentDef.systemPrompt}${blackboardContext}${constraintContext}`;
3617
+ // Allocate budget from pool (or use default) — track allocation ID for release later
3618
+ const pooledBudget = this.getSubagentBudget(agentName, constraints);
3619
+ const poolAllocationId = pooledBudget.allocationId;
3207
3620
  // Create a sub-agent with the agent's config
3621
+ // Use SUBAGENT_BUDGET to constrain resource usage (prevents runaway token consumption)
3208
3622
  const subAgent = new ProductionAgent({
3209
3623
  provider: this.provider,
3210
3624
  tools: agentTools,
@@ -3219,6 +3633,20 @@ export class ProductionAgent {
3219
3633
  memory: false,
3220
3634
  planning: false,
3221
3635
  reflection: false,
3636
+ // Enable lightweight compaction for subagents (Improvement P5)
3637
+ // tokenThreshold configures the Compactor's per-pass size limit
3638
+ // maxContextTokens constrains AutoCompactionManager's percentage thresholds
3639
+ // With maxContextTokens=80000 and default 80% threshold, compaction triggers at ~64K
3640
+ compaction: {
3641
+ enabled: true,
3642
+ mode: 'auto',
3643
+ tokenThreshold: 40000, // Compactor summarization size limit per pass
3644
+ preserveRecentCount: 4, // Preserve fewer messages (splits to 2 user + 2 assistant)
3645
+ preserveToolResults: false, // More aggressive — subagents can re-read files
3646
+ summaryMaxTokens: 500,
3647
+ },
3648
+ // Lower context window for subagents so percentage-based compaction triggers earlier
3649
+ maxContextTokens: 80000,
3222
3650
  observability: this.config.observability,
3223
3651
  sandbox: this.config.sandbox,
3224
3652
  humanInLoop: this.config.humanInLoop,
@@ -3232,6 +3660,11 @@ export class ProductionAgent {
3232
3660
  },
3233
3661
  // Share parent's blackboard for coordination between parallel subagents
3234
3662
  blackboard: this.blackboard || undefined,
3663
+ // Share parent's file cache to eliminate redundant reads across agents
3664
+ fileCache: this.fileCache || undefined,
3665
+ // CONSTRAINED BUDGET: Use pooled budget when available, falling back to SUBAGENT_BUDGET
3666
+ // Pooled budget ensures total tree cost stays bounded by parent's budget
3667
+ budget: pooledBudget.budget,
3235
3668
  });
3236
3669
  // CRITICAL: Subagent inherits parent's mode
3237
3670
  // This ensures that if parent is in plan mode:
@@ -3241,6 +3674,17 @@ export class ProductionAgent {
3241
3674
  if (parentMode !== 'build') {
3242
3675
  subAgent.setMode(parentMode);
3243
3676
  }
3677
+ // APPROVAL BATCHING (Improvement P6): Set approval scope for subagents
3678
+ // Read-only tools are auto-approved; write tools get scoped approval
3679
+ // This reduces interruptions from ~8 per session to ~1-2
3680
+ subAgent.setApprovalScope({
3681
+ autoApprove: ['read_file', 'list_files', 'glob', 'grep', 'show_file_history', 'show_session_changes'],
3682
+ scopedApprove: {
3683
+ write_file: { paths: ['src/', 'tests/', 'tools/'] },
3684
+ edit_file: { paths: ['src/', 'tests/', 'tools/'] },
3685
+ },
3686
+ requireApproval: ['bash', 'delete_file'],
3687
+ });
3244
3688
  // Pass parent's iteration count to subagent for accurate budget tracking
3245
3689
  // This prevents subagents from consuming excessive iterations when parent already used many
3246
3690
  subAgent.setParentIterations(this.getTotalIterations());
@@ -3255,23 +3699,61 @@ export class ProductionAgent {
3255
3699
  });
3256
3700
  subAgent.setTraceCollector(subagentTraceView);
3257
3701
  }
3258
- // Forward events from subagent with context
3259
- subAgent.subscribe(event => {
3260
- // Tag event with subagent source so TUI can display it properly
3261
- const taggedEvent = { ...event, subagent: agentName };
3702
+ // GRACEFUL TIMEOUT with WRAPUP PHASE
3703
+ // Instead of instant death on timeout, the subagent gets a wrapup window
3704
+ // to produce a structured summary before being killed:
3705
+ // 1. Normal operation: progress extends idle timer
3706
+ // 2. Wrapup phase: 30s before hard kill, wrapup callback fires → forceTextOnly
3707
+ // 3. Hard kill: race() throws CancellationError after wrapup window
3708
+ const IDLE_TIMEOUT = 120000; // 2 minutes without progress = timeout
3709
+ let WRAPUP_WINDOW = 30000;
3710
+ let IDLE_CHECK_INTERVAL = 5000;
3711
+ if (this.config.subagent) {
3712
+ WRAPUP_WINDOW = this.config.subagent.wrapupWindowMs ?? WRAPUP_WINDOW;
3713
+ IDLE_CHECK_INTERVAL = this.config.subagent.idleCheckIntervalMs ?? IDLE_CHECK_INTERVAL;
3714
+ }
3715
+ const progressAwareTimeout = createGracefulTimeout(subagentTimeout, // Max total time (hard limit from agent type config)
3716
+ IDLE_TIMEOUT, // Idle timeout (soft limit - no progress triggers this)
3717
+ WRAPUP_WINDOW, // Wrapup window before hard kill
3718
+ IDLE_CHECK_INTERVAL);
3719
+ // Register wrapup callback — fires 30s before hard kill
3720
+ // This triggers the subagent's forceTextOnly path for a structured summary
3721
+ progressAwareTimeout.onWrapupWarning(() => {
3722
+ this.emit({
3723
+ type: 'subagent.wrapup.started',
3724
+ agentId,
3725
+ agentType: agentName,
3726
+ reason: 'Timeout approaching - graceful wrapup window opened',
3727
+ elapsedMs: Date.now() - startTime,
3728
+ });
3729
+ subAgent.requestWrapup('Timeout approaching — produce structured summary');
3730
+ });
3731
+ // Forward events from subagent with context (track for cleanup)
3732
+ // Also report progress to the timeout tracker
3733
+ const unsubSubAgent = subAgent.subscribe(event => {
3734
+ // Tag event with subagent source AND unique ID so TUI can properly attribute
3735
+ // events to the specific agent instance (critical for multiple same-type agents)
3736
+ const taggedEvent = { ...event, subagent: agentName, subagentId: agentId };
3262
3737
  this.emit(taggedEvent);
3738
+ // Report progress for timeout extension
3739
+ // Progress events: tool calls, LLM responses, token updates
3740
+ const progressEvents = ['tool.start', 'tool.complete', 'llm.start', 'llm.complete'];
3741
+ if (progressEvents.includes(event.type)) {
3742
+ progressAwareTimeout.reportProgress();
3743
+ }
3263
3744
  });
3264
- // Create timeout token for subagent execution
3265
- const timeoutSource = createTimeoutToken(subagentTimeout);
3266
- // Link parent's cancellation with subagent timeout so ESC propagates to subagents
3745
+ // Link parent's cancellation with progress-aware timeout so ESC propagates to subagents
3267
3746
  const parentSource = this.cancellation?.getSource();
3268
3747
  const effectiveSource = parentSource
3269
- ? createLinkedToken(parentSource, timeoutSource)
3270
- : timeoutSource;
3748
+ ? createLinkedToken(parentSource, progressAwareTimeout)
3749
+ : progressAwareTimeout;
3271
3750
  // CRITICAL: Pass the cancellation token to the subagent so it can check and stop
3272
3751
  // gracefully when timeout fires. Without this, the subagent continues running as
3273
3752
  // a "zombie" even after race() returns with a timeout error.
3274
3753
  subAgent.setExternalCancellation(effectiveSource.token);
3754
+ // Pause parent's duration timer while subagent runs to prevent
3755
+ // the parent from timing out on wall-clock while waiting for subagent
3756
+ this.economics?.pauseDuration();
3275
3757
  try {
3276
3758
  // Run the task with cancellation propagation from parent
3277
3759
  const result = await race(subAgent.run(task), effectiveSource.token);
@@ -3324,6 +3806,8 @@ export class ProductionAgent {
3324
3806
  const finalOutput = queuedChangeSummary
3325
3807
  ? (result.response || '') + queuedChangeSummary
3326
3808
  : (result.response || result.error || '');
3809
+ // Parse structured closure report from agent's response (if it produced one)
3810
+ const structured = parseStructuredClosureReport(result.response || '', 'completed');
3327
3811
  const spawnResultFinal = {
3328
3812
  success: result.success,
3329
3813
  output: finalOutput,
@@ -3332,13 +3816,43 @@ export class ProductionAgent {
3332
3816
  duration,
3333
3817
  toolCalls: result.metrics.toolCalls,
3334
3818
  },
3819
+ structured,
3335
3820
  };
3821
+ if (workerResultId && this.store?.hasWorkerResultsFeature()) {
3822
+ try {
3823
+ this.store.completeWorkerResult(workerResultId, {
3824
+ fullOutput: finalOutput,
3825
+ summary: finalOutput.slice(0, 500),
3826
+ artifacts: structured ? [{ type: 'structured_report', data: structured }] : undefined,
3827
+ metrics: {
3828
+ tokens: result.metrics.totalTokens,
3829
+ duration,
3830
+ toolCalls: result.metrics.toolCalls,
3831
+ },
3832
+ });
3833
+ }
3834
+ catch (storeErr) {
3835
+ this.observability?.logger?.warn('Failed to persist worker result', {
3836
+ agentId,
3837
+ error: storeErr.message,
3838
+ });
3839
+ }
3840
+ }
3336
3841
  this.emit({
3337
3842
  type: 'agent.complete',
3338
- agentId: agentName,
3843
+ agentId, // Use unique spawn ID for precise tracking
3844
+ agentType: agentName, // Keep type for display purposes
3339
3845
  success: result.success,
3340
3846
  output: finalOutput.slice(0, 500), // Include output preview
3341
3847
  });
3848
+ if (progressAwareTimeout.isInWrapupPhase()) {
3849
+ this.emit({
3850
+ type: 'subagent.wrapup.completed',
3851
+ agentId,
3852
+ agentType: agentName,
3853
+ elapsedMs: Date.now() - startTime,
3854
+ });
3855
+ }
3342
3856
  // Enhanced tracing: Record subagent completion
3343
3857
  this.traceCollector?.record({
3344
3858
  type: 'subagent.link',
@@ -3365,6 +3879,8 @@ export class ProductionAgent {
3365
3879
  },
3366
3880
  },
3367
3881
  });
3882
+ // Unsubscribe from subagent events before cleanup
3883
+ unsubSubAgent();
3368
3884
  await subAgent.cleanup();
3369
3885
  // Cache result for duplicate spawn prevention
3370
3886
  // Use the same taskKey from the dedup check above
@@ -3382,8 +3898,17 @@ export class ProductionAgent {
3382
3898
  const isUserCancellation = parentSource?.isCancellationRequested;
3383
3899
  const reason = isUserCancellation
3384
3900
  ? 'User cancelled'
3385
- : `Timed out after ${subagentTimeout}ms`;
3386
- this.emit({ type: 'agent.error', agentId: agentName, error: reason });
3901
+ : err.reason || `Timed out after ${subagentTimeout}ms`;
3902
+ this.emit({ type: 'agent.error', agentId, agentType: agentName, error: reason });
3903
+ if (!isUserCancellation) {
3904
+ this.emit({
3905
+ type: 'subagent.timeout.hard_kill',
3906
+ agentId,
3907
+ agentType: agentName,
3908
+ reason,
3909
+ elapsedMs: Date.now() - startTime,
3910
+ });
3911
+ }
3387
3912
  // =======================================================================
3388
3913
  // PRESERVE PARTIAL RESULTS
3389
3914
  // Instead of discarding all work, capture whatever the subagent produced
@@ -3433,7 +3958,8 @@ export class ProductionAgent {
3433
3958
  this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
3434
3959
  }
3435
3960
  }
3436
- // Try to cleanup the subagent gracefully
3961
+ // Unsubscribe from subagent events and cleanup gracefully
3962
+ unsubSubAgent();
3437
3963
  try {
3438
3964
  await subAgent.cleanup();
3439
3965
  }
@@ -3474,6 +4000,20 @@ export class ProductionAgent {
3474
4000
  },
3475
4001
  },
3476
4002
  });
4003
+ // Parse structured closure report from partial response
4004
+ const exitReason = isUserCancellation ? 'cancelled' : 'timeout_graceful';
4005
+ const structured = parseStructuredClosureReport(partialResponse, exitReason, task);
4006
+ if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4007
+ try {
4008
+ this.store.failWorkerResult(workerResultId, reason);
4009
+ }
4010
+ catch (storeErr) {
4011
+ this.observability?.logger?.warn('Failed to mark cancelled worker result as failed', {
4012
+ agentId,
4013
+ error: storeErr.message,
4014
+ });
4015
+ }
4016
+ }
3477
4017
  return {
3478
4018
  success: false,
3479
4019
  output: baseOutput + partialResultSection + cancelledQueuedSummary,
@@ -3484,19 +4024,40 @@ export class ProductionAgent {
3484
4024
  duration,
3485
4025
  toolCalls: subagentMetrics.toolCalls,
3486
4026
  },
4027
+ structured,
3487
4028
  };
3488
4029
  }
3489
4030
  throw err; // Re-throw non-cancellation errors
3490
4031
  }
3491
4032
  finally {
4033
+ // Resume parent's duration timer now that subagent is done
4034
+ this.economics?.resumeDuration();
3492
4035
  // Dispose both sources (linked source disposes its internal state, timeout source handles its timer)
3493
4036
  effectiveSource.dispose();
3494
- timeoutSource.dispose();
4037
+ progressAwareTimeout.dispose();
4038
+ // BUDGET POOL: Record actual usage and release the allocation
4039
+ // This must happen in finally to ensure cleanup on both success and error paths
4040
+ if (this.budgetPool && poolAllocationId) {
4041
+ const subMetrics = subAgent.getMetrics();
4042
+ this.budgetPool.recordUsage(poolAllocationId, subMetrics.totalTokens, subMetrics.estimatedCost);
4043
+ this.budgetPool.release(poolAllocationId);
4044
+ }
3495
4045
  }
3496
4046
  }
3497
4047
  catch (err) {
3498
4048
  const error = err instanceof Error ? err.message : String(err);
3499
- this.emit({ type: 'agent.error', agentId: agentName, error });
4049
+ this.emit({ type: 'agent.error', agentId, agentType: agentName, error });
4050
+ if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4051
+ try {
4052
+ this.store.failWorkerResult(workerResultId, error);
4053
+ }
4054
+ catch (storeErr) {
4055
+ this.observability?.logger?.warn('Failed to mark worker result as failed', {
4056
+ agentId,
4057
+ error: storeErr.message,
4058
+ });
4059
+ }
4060
+ }
3500
4061
  return {
3501
4062
  success: false,
3502
4063
  output: `Agent error: ${error}`,
@@ -3507,6 +4068,52 @@ export class ProductionAgent {
3507
4068
  /**
3508
4069
  * Spawn multiple agents in parallel to work on independent tasks.
3509
4070
  * Uses the shared blackboard for coordination and conflict prevention.
4071
+ *
4072
+ * Get budget for a subagent, using the pooled budget when available.
4073
+ * Falls back to the static SUBAGENT_BUDGET if no pool is configured.
4074
+ * Returns both the budget and the pool allocation ID (if any) for tracking.
4075
+ */
4076
+ getSubagentBudget(agentName, constraints) {
4077
+ // If explicit maxTokens constraint, use that
4078
+ if (constraints?.maxTokens) {
4079
+ return {
4080
+ budget: { ...SUBAGENT_BUDGET, maxTokens: constraints.maxTokens },
4081
+ allocationId: null,
4082
+ };
4083
+ }
4084
+ // Try to allocate from the shared budget pool
4085
+ if (this.budgetPool) {
4086
+ const allocationId = `${agentName}-${Date.now()}`;
4087
+ const allocation = this.budgetPool.reserve(allocationId);
4088
+ if (allocation) {
4089
+ return {
4090
+ budget: {
4091
+ ...SUBAGENT_BUDGET,
4092
+ maxTokens: allocation.tokenBudget,
4093
+ softTokenLimit: Math.floor(allocation.tokenBudget * 0.7),
4094
+ maxCost: allocation.costBudget,
4095
+ },
4096
+ allocationId,
4097
+ };
4098
+ }
4099
+ // Pool exhausted — give a tiny emergency budget (just enough to report failure)
4100
+ // This does NOT bypass the pool — it's a fixed small cost for error messaging
4101
+ return {
4102
+ budget: {
4103
+ ...SUBAGENT_BUDGET,
4104
+ maxTokens: 5000,
4105
+ softTokenLimit: 3000,
4106
+ maxCost: 0.01,
4107
+ },
4108
+ allocationId: null,
4109
+ };
4110
+ }
4111
+ // No pool — use default subagent budget
4112
+ return { budget: SUBAGENT_BUDGET, allocationId: null };
4113
+ }
4114
+ /**
4115
+ * Uses Promise.allSettled to handle partial failures gracefully - if one
4116
+ * agent fails or times out, others can still complete successfully.
3510
4117
  */
3511
4118
  async spawnAgentsParallel(tasks) {
3512
4119
  // Emit start event for TUI visibility
@@ -3515,9 +4122,28 @@ export class ProductionAgent {
3515
4122
  count: tasks.length,
3516
4123
  agents: tasks.map(t => t.agent),
3517
4124
  });
3518
- // Execute all tasks in parallel
4125
+ // Execute all tasks in parallel using allSettled to handle partial failures
3519
4126
  const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
3520
- const results = await Promise.all(promises);
4127
+ const settled = await Promise.allSettled(promises);
4128
+ // Convert settled results to SpawnResult array
4129
+ const results = settled.map((result, i) => {
4130
+ if (result.status === 'fulfilled') {
4131
+ return result.value;
4132
+ }
4133
+ // Handle rejected promises (shouldn't happen since spawnAgent catches errors internally,
4134
+ // but this is a safety net for unexpected failures)
4135
+ const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
4136
+ this.emit({
4137
+ type: 'agent.error',
4138
+ agentId: tasks[i].agent,
4139
+ error: `Unexpected parallel spawn error: ${error}`,
4140
+ });
4141
+ return {
4142
+ success: false,
4143
+ output: `Parallel spawn error: ${error}`,
4144
+ metrics: { tokens: 0, duration: 0, toolCalls: 0 },
4145
+ };
4146
+ });
3521
4147
  // Emit completion event
3522
4148
  this.emit({
3523
4149
  type: 'parallel.spawn.complete',
@@ -3666,7 +4292,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
3666
4292
  const topSuggestion = suggestions[0];
3667
4293
  // If confirmation callback provided, ask user
3668
4294
  if (confirmDelegate && topSuggestion) {
3669
- const confirmed = await confirmDelegate(topSuggestion.agent, topSuggestion.reason);
4295
+ const confirmed = await this.withPausedDuration(() => confirmDelegate(topSuggestion.agent, topSuggestion.reason));
3670
4296
  if (!confirmed) {
3671
4297
  // User declined, run with main agent
3672
4298
  return this.run(task);
@@ -3723,6 +4349,14 @@ If the task is a simple question or doesn't need specialized handling, set bestA
3723
4349
  getResourceStatus() {
3724
4350
  return this.resourceManager?.getStatusString() || null;
3725
4351
  }
4352
+ /**
4353
+ * Reset CPU time counter for the resource manager.
4354
+ * Call this when starting a new prompt to allow per-prompt time limits
4355
+ * instead of session-wide limits.
4356
+ */
4357
+ resetResourceTimer() {
4358
+ this.resourceManager?.resetCpuTime();
4359
+ }
3726
4360
  // =========================================================================
3727
4361
  // LSP (LANGUAGE SERVER) METHODS
3728
4362
  // =========================================================================
@@ -3873,6 +4507,15 @@ If the task is a simple question or doesn't need specialized handling, set bestA
3873
4507
  setParentIterations(count) {
3874
4508
  this.parentIterations = count;
3875
4509
  }
4510
+ /**
4511
+ * Set an approval scope for this agent (used by parent when spawning subagents).
4512
+ * Enables pre-approved operations within a defined scope, reducing approval prompts.
4513
+ */
4514
+ setApprovalScope(scope) {
4515
+ if (this.safety?.humanInLoop) {
4516
+ this.safety.humanInLoop.setApprovalScope(scope);
4517
+ }
4518
+ }
3876
4519
  /**
3877
4520
  * Set an external cancellation token for this agent.
3878
4521
  * Used when spawning subagents to propagate parent timeout/cancellation.
@@ -3882,6 +4525,12 @@ If the task is a simple question or doesn't need specialized handling, set bestA
3882
4525
  setExternalCancellation(token) {
3883
4526
  this.externalCancellationToken = token;
3884
4527
  }
4528
+ /**
4529
+ * Set a SQLite store instance for durable persistence features.
4530
+ */
4531
+ setStore(store) {
4532
+ this.store = store;
4533
+ }
3885
4534
  /**
3886
4535
  * Check if external cancellation has been requested.
3887
4536
  * Returns true if the external token signals cancellation.
@@ -3889,6 +4538,15 @@ If the task is a simple question or doesn't need specialized handling, set bestA
3889
4538
  isExternallyCancelled() {
3890
4539
  return this.externalCancellationToken?.isCancellationRequested ?? false;
3891
4540
  }
4541
+ /**
4542
+ * Request a graceful wrapup of the agent's current work.
4543
+ * On the next main loop iteration, the agent will produce a structured summary
4544
+ * instead of making more tool calls.
4545
+ */
4546
+ requestWrapup(reason) {
4547
+ this.wrapupRequested = true;
4548
+ this.wrapupReason = reason || 'Timeout approaching';
4549
+ }
3892
4550
  /**
3893
4551
  * Get total iterations (this agent + parent).
3894
4552
  * Used for accurate budget tracking across subagent hierarchies.
@@ -4162,6 +4820,29 @@ If the task is a simple question or doesn't need specialized handling, set bestA
4162
4820
  * Cleanup resources.
4163
4821
  */
4164
4822
  async cleanup() {
4823
+ // Unsubscribe all event listeners (prevents memory leaks in long sessions)
4824
+ for (const unsub of this.unsubscribers) {
4825
+ try {
4826
+ unsub();
4827
+ }
4828
+ catch {
4829
+ // Ignore unsubscribe errors during cleanup
4830
+ }
4831
+ }
4832
+ this.unsubscribers = [];
4833
+ // Flush trace collector before cleanup
4834
+ await this.traceCollector?.flush();
4835
+ // Clear blackboard (releases file claim locks)
4836
+ this.blackboard?.clear();
4837
+ // Wait for any pending init before cleanup
4838
+ if (this.initPromises.length > 0) {
4839
+ try {
4840
+ await Promise.all(this.initPromises);
4841
+ }
4842
+ catch {
4843
+ // Ignore init errors during cleanup
4844
+ }
4845
+ }
4165
4846
  this.cancellation?.cleanup();
4166
4847
  this.resourceManager?.cleanup();
4167
4848
  await this.lspManager?.cleanup();
@@ -4369,4 +5050,64 @@ export class ProductionAgentBuilder {
4369
5050
  export function buildAgent() {
4370
5051
  return new ProductionAgentBuilder();
4371
5052
  }
5053
+ // =============================================================================
5054
+ // STRUCTURED CLOSURE REPORT PARSER
5055
+ // =============================================================================
5056
+ /**
5057
+ * Parse a structured closure report from a subagent's text response.
5058
+ * The subagent may have produced JSON in response to a TIMEOUT_WRAPUP_PROMPT.
5059
+ *
5060
+ * @param text - The subagent's last response text
5061
+ * @param defaultExitReason - Exit reason to use (completed, timeout_graceful, cancelled, etc.)
5062
+ * @param fallbackTask - Original task description for fallback remainingWork
5063
+ * @returns Parsed StructuredClosureReport, or undefined if no JSON found and no fallback needed
5064
+ */
5065
+ export function parseStructuredClosureReport(text, defaultExitReason, fallbackTask) {
5066
+ if (!text) {
5067
+ // No text at all — create a hard timeout fallback if we have a task
5068
+ if (fallbackTask) {
5069
+ return {
5070
+ findings: [],
5071
+ actionsTaken: [],
5072
+ failures: ['Timeout before producing structured summary'],
5073
+ remainingWork: [fallbackTask],
5074
+ exitReason: 'timeout_hard',
5075
+ };
5076
+ }
5077
+ return undefined;
5078
+ }
5079
+ try {
5080
+ // Try to extract JSON from the response
5081
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
5082
+ if (jsonMatch) {
5083
+ const parsed = JSON.parse(jsonMatch[0]);
5084
+ // Validate that it looks like a closure report (has at least one expected field)
5085
+ if (parsed.findings || parsed.actionsTaken || parsed.failures || parsed.remainingWork) {
5086
+ return {
5087
+ findings: Array.isArray(parsed.findings) ? parsed.findings : [],
5088
+ actionsTaken: Array.isArray(parsed.actionsTaken) ? parsed.actionsTaken : [],
5089
+ failures: Array.isArray(parsed.failures) ? parsed.failures : [],
5090
+ remainingWork: Array.isArray(parsed.remainingWork) ? parsed.remainingWork : [],
5091
+ exitReason: defaultExitReason,
5092
+ suggestedNextSteps: Array.isArray(parsed.suggestedNextSteps) ? parsed.suggestedNextSteps : undefined,
5093
+ };
5094
+ }
5095
+ }
5096
+ }
5097
+ catch {
5098
+ // JSON parse failed — fall through to fallback
5099
+ }
5100
+ // Fallback: LLM didn't produce valid JSON but we have text
5101
+ if (defaultExitReason !== 'completed') {
5102
+ return {
5103
+ findings: [text.slice(0, 500)],
5104
+ actionsTaken: [],
5105
+ failures: ['Did not produce structured JSON summary'],
5106
+ remainingWork: fallbackTask ? [fallbackTask] : [],
5107
+ exitReason: defaultExitReason === 'timeout_graceful' ? 'timeout_hard' : defaultExitReason,
5108
+ };
5109
+ }
5110
+ // For completed agents, don't force a structured report if they didn't produce one
5111
+ return undefined;
5112
+ }
4372
5113
  //# sourceMappingURL=agent.js.map