attocode 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +111 -1
- package/README.md +7 -0
- package/dist/src/adapters.d.ts +6 -1
- package/dist/src/adapters.d.ts.map +1 -1
- package/dist/src/adapters.js +14 -1
- package/dist/src/adapters.js.map +1 -1
- package/dist/src/agent.d.ts +50 -0
- package/dist/src/agent.d.ts.map +1 -1
- package/dist/src/agent.js +734 -316
- package/dist/src/agent.js.map +1 -1
- package/dist/src/defaults.d.ts +1 -1
- package/dist/src/defaults.d.ts.map +1 -1
- package/dist/src/defaults.js +2 -0
- package/dist/src/defaults.js.map +1 -1
- package/dist/src/integrations/agent-registry.d.ts +9 -2
- package/dist/src/integrations/agent-registry.d.ts.map +1 -1
- package/dist/src/integrations/agent-registry.js +30 -4
- package/dist/src/integrations/agent-registry.js.map +1 -1
- package/dist/src/integrations/async-subagent.d.ts +135 -0
- package/dist/src/integrations/async-subagent.d.ts.map +1 -0
- package/dist/src/integrations/async-subagent.js +213 -0
- package/dist/src/integrations/async-subagent.js.map +1 -0
- package/dist/src/integrations/auto-checkpoint.d.ts +98 -0
- package/dist/src/integrations/auto-checkpoint.d.ts.map +1 -0
- package/dist/src/integrations/auto-checkpoint.js +252 -0
- package/dist/src/integrations/auto-checkpoint.js.map +1 -0
- package/dist/src/integrations/budget-pool.d.ts +13 -1
- package/dist/src/integrations/budget-pool.d.ts.map +1 -1
- package/dist/src/integrations/budget-pool.js +17 -0
- package/dist/src/integrations/budget-pool.js.map +1 -1
- package/dist/src/integrations/complexity-classifier.d.ts +86 -0
- package/dist/src/integrations/complexity-classifier.d.ts.map +1 -0
- package/dist/src/integrations/complexity-classifier.js +233 -0
- package/dist/src/integrations/complexity-classifier.js.map +1 -0
- package/dist/src/integrations/delegation-protocol.d.ts +86 -0
- package/dist/src/integrations/delegation-protocol.d.ts.map +1 -0
- package/dist/src/integrations/delegation-protocol.js +127 -0
- package/dist/src/integrations/delegation-protocol.js.map +1 -0
- package/dist/src/integrations/dynamic-budget.d.ts +81 -0
- package/dist/src/integrations/dynamic-budget.d.ts.map +1 -0
- package/dist/src/integrations/dynamic-budget.js +151 -0
- package/dist/src/integrations/dynamic-budget.js.map +1 -0
- package/dist/src/integrations/economics.d.ts +44 -1
- package/dist/src/integrations/economics.d.ts.map +1 -1
- package/dist/src/integrations/economics.js +182 -3
- package/dist/src/integrations/economics.js.map +1 -1
- package/dist/src/integrations/environment-facts.d.ts +52 -0
- package/dist/src/integrations/environment-facts.d.ts.map +1 -0
- package/dist/src/integrations/environment-facts.js +84 -0
- package/dist/src/integrations/environment-facts.js.map +1 -0
- package/dist/src/integrations/index.d.ts +16 -1
- package/dist/src/integrations/index.d.ts.map +1 -1
- package/dist/src/integrations/index.js +31 -1
- package/dist/src/integrations/index.js.map +1 -1
- package/dist/src/integrations/injection-budget.d.ts +71 -0
- package/dist/src/integrations/injection-budget.d.ts.map +1 -0
- package/dist/src/integrations/injection-budget.js +136 -0
- package/dist/src/integrations/injection-budget.js.map +1 -0
- package/dist/src/integrations/mcp-client.d.ts.map +1 -1
- package/dist/src/integrations/mcp-client.js +14 -0
- package/dist/src/integrations/mcp-client.js.map +1 -1
- package/dist/src/integrations/mcp-custom-tools.d.ts +102 -0
- package/dist/src/integrations/mcp-custom-tools.d.ts.map +1 -0
- package/dist/src/integrations/mcp-custom-tools.js +232 -0
- package/dist/src/integrations/mcp-custom-tools.js.map +1 -0
- package/dist/src/integrations/mcp-tool-validator.d.ts +60 -0
- package/dist/src/integrations/mcp-tool-validator.d.ts.map +1 -0
- package/dist/src/integrations/mcp-tool-validator.js +141 -0
- package/dist/src/integrations/mcp-tool-validator.js.map +1 -0
- package/dist/src/integrations/routing.d.ts +2 -1
- package/dist/src/integrations/routing.d.ts.map +1 -1
- package/dist/src/integrations/routing.js.map +1 -1
- package/dist/src/integrations/self-improvement.d.ts +90 -0
- package/dist/src/integrations/self-improvement.d.ts.map +1 -0
- package/dist/src/integrations/self-improvement.js +217 -0
- package/dist/src/integrations/self-improvement.js.map +1 -0
- package/dist/src/integrations/smart-decomposer.d.ts +4 -0
- package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
- package/dist/src/integrations/smart-decomposer.js +55 -28
- package/dist/src/integrations/smart-decomposer.js.map +1 -1
- package/dist/src/integrations/subagent-output-store.d.ts +91 -0
- package/dist/src/integrations/subagent-output-store.d.ts.map +1 -0
- package/dist/src/integrations/subagent-output-store.js +257 -0
- package/dist/src/integrations/subagent-output-store.js.map +1 -0
- package/dist/src/integrations/swarm/index.d.ts +1 -1
- package/dist/src/integrations/swarm/index.d.ts.map +1 -1
- package/dist/src/integrations/swarm/index.js +1 -1
- package/dist/src/integrations/swarm/index.js.map +1 -1
- package/dist/src/integrations/swarm/model-selector.d.ts +1 -0
- package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
- package/dist/src/integrations/swarm/model-selector.js +37 -3
- package/dist/src/integrations/swarm/model-selector.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts +10 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.js +72 -6
- package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.js +26 -4
- package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts +11 -0
- package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.js +4 -0
- package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +11 -0
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.js +233 -10
- package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +9 -2
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.js +128 -11
- package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
- package/dist/src/integrations/swarm/task-queue.d.ts +11 -1
- package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
- package/dist/src/integrations/swarm/task-queue.js +125 -15
- package/dist/src/integrations/swarm/task-queue.js.map +1 -1
- package/dist/src/integrations/swarm/types.d.ts +40 -1
- package/dist/src/integrations/swarm/types.d.ts.map +1 -1
- package/dist/src/integrations/swarm/types.js +6 -1
- package/dist/src/integrations/swarm/types.js.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.d.ts +9 -3
- package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.js +89 -17
- package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
- package/dist/src/integrations/thinking-strategy.d.ts +52 -0
- package/dist/src/integrations/thinking-strategy.d.ts.map +1 -0
- package/dist/src/integrations/thinking-strategy.js +129 -0
- package/dist/src/integrations/thinking-strategy.js.map +1 -0
- package/dist/src/integrations/tool-recommendation.d.ts +58 -0
- package/dist/src/integrations/tool-recommendation.d.ts.map +1 -0
- package/dist/src/integrations/tool-recommendation.js +215 -0
- package/dist/src/integrations/tool-recommendation.js.map +1 -0
- package/dist/src/integrations/verification-gate.d.ts +80 -0
- package/dist/src/integrations/verification-gate.d.ts.map +1 -0
- package/dist/src/integrations/verification-gate.js +146 -0
- package/dist/src/integrations/verification-gate.js.map +1 -0
- package/dist/src/integrations/work-log.d.ts +87 -0
- package/dist/src/integrations/work-log.d.ts.map +1 -0
- package/dist/src/integrations/work-log.js +275 -0
- package/dist/src/integrations/work-log.js.map +1 -0
- package/dist/src/main.js +5 -4
- package/dist/src/main.js.map +1 -1
- package/dist/src/modes.d.ts +6 -0
- package/dist/src/modes.d.ts.map +1 -1
- package/dist/src/modes.js +73 -2
- package/dist/src/modes.js.map +1 -1
- package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
- package/dist/src/providers/adapters/anthropic.js +20 -3
- package/dist/src/providers/adapters/anthropic.js.map +1 -1
- package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
- package/dist/src/providers/adapters/openrouter.js +3 -1
- package/dist/src/providers/adapters/openrouter.js.map +1 -1
- package/dist/src/providers/types.d.ts +4 -0
- package/dist/src/providers/types.d.ts.map +1 -1
- package/dist/src/providers/types.js.map +1 -1
- package/dist/src/tools/bash.d.ts +8 -2
- package/dist/src/tools/bash.d.ts.map +1 -1
- package/dist/src/tools/bash.js +14 -1
- package/dist/src/tools/bash.js.map +1 -1
- package/dist/src/tools/coercion.d.ts +14 -0
- package/dist/src/tools/coercion.d.ts.map +1 -0
- package/dist/src/tools/coercion.js +25 -0
- package/dist/src/tools/coercion.js.map +1 -0
- package/dist/src/tools/file.d.ts +2 -2
- package/dist/src/tools/file.d.ts.map +1 -1
- package/dist/src/tools/file.js +2 -1
- package/dist/src/tools/file.js.map +1 -1
- package/dist/src/tools/standard.d.ts +17 -1
- package/dist/src/tools/standard.d.ts.map +1 -1
- package/dist/src/tools/standard.js +64 -11
- package/dist/src/tools/standard.js.map +1 -1
- package/dist/src/tui/app.d.ts.map +1 -1
- package/dist/src/tui/app.js +8 -1
- package/dist/src/tui/app.js.map +1 -1
- package/dist/src/tui/event-display.d.ts.map +1 -1
- package/dist/src/tui/event-display.js +8 -1
- package/dist/src/tui/event-display.js.map +1 -1
- package/dist/src/types.d.ts +26 -0
- package/dist/src/types.d.ts.map +1 -1
- package/package.json +6 -2
package/dist/src/agent.js
CHANGED
|
@@ -21,7 +21,11 @@
|
|
|
21
21
|
import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
|
|
22
22
|
import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
|
|
23
23
|
import { createLSPFileTools, } from './agent-tools/index.js';
|
|
24
|
-
import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE,
|
|
24
|
+
import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createDynamicBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate,
|
|
25
|
+
// Phase 2: Orchestration
|
|
26
|
+
classifyComplexity, getScalingGuidance, buildDelegationPrompt, createMinimalDelegationSpec, getSubagentQualityPrompt, ToolRecommendationEngine, createToolRecommendationEngine, createInjectionBudgetManager,
|
|
27
|
+
// Phase 3: Advanced
|
|
28
|
+
getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, createSubagentSupervisor, createSubagentHandle, } from './integrations/index.js';
|
|
25
29
|
// Lesson 26: Tracing & Evaluation integration
|
|
26
30
|
import { createTraceCollector } from './tracing/trace-collector.js';
|
|
27
31
|
// Model registry for context window limits
|
|
@@ -34,6 +38,119 @@ import { createTaskTools, } from './tools/tasks.js';
|
|
|
34
38
|
// =============================================================================
|
|
35
39
|
// PRODUCTION AGENT
|
|
36
40
|
// =============================================================================
|
|
41
|
+
/**
|
|
42
|
+
* Tools that are safe to execute in parallel (read-only, no side effects).
|
|
43
|
+
* These tools don't modify state, so running them concurrently is safe.
|
|
44
|
+
*/
|
|
45
|
+
export const PARALLELIZABLE_TOOLS = new Set([
|
|
46
|
+
'read_file', 'glob', 'grep', 'list_files', 'search_files',
|
|
47
|
+
'search_code', 'get_file_info',
|
|
48
|
+
]);
|
|
49
|
+
/**
|
|
50
|
+
* Tools that can run in parallel IF they target different files.
|
|
51
|
+
* write_file and edit_file on different paths are safe to parallelize.
|
|
52
|
+
*/
|
|
53
|
+
export const CONDITIONALLY_PARALLEL_TOOLS = new Set([
|
|
54
|
+
'write_file', 'edit_file',
|
|
55
|
+
]);
|
|
56
|
+
/**
|
|
57
|
+
* Extract the target file path from a tool call's arguments.
|
|
58
|
+
* Returns null if no file path can be determined.
|
|
59
|
+
*/
|
|
60
|
+
export function extractToolFilePath(toolCall) {
|
|
61
|
+
// Check common argument patterns
|
|
62
|
+
const args = toolCall;
|
|
63
|
+
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
64
|
+
if (typeof args[key] === 'string')
|
|
65
|
+
return args[key];
|
|
66
|
+
}
|
|
67
|
+
// Check nested args object
|
|
68
|
+
if (args.args && typeof args.args === 'object') {
|
|
69
|
+
const nested = args.args;
|
|
70
|
+
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
71
|
+
if (typeof nested[key] === 'string')
|
|
72
|
+
return nested[key];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Check input object (common in structured tool calls)
|
|
76
|
+
if (args.input && typeof args.input === 'object') {
|
|
77
|
+
const input = args.input;
|
|
78
|
+
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
79
|
+
if (typeof input[key] === 'string')
|
|
80
|
+
return input[key];
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Check if a conditionally-parallel tool call conflicts with any tool
|
|
87
|
+
* in the current accumulator (same file path).
|
|
88
|
+
*/
|
|
89
|
+
function hasFileConflict(toolCall, accumulator) {
|
|
90
|
+
const path = extractToolFilePath(toolCall);
|
|
91
|
+
if (!path)
|
|
92
|
+
return true; // Can't determine path → assume conflict
|
|
93
|
+
for (const existing of accumulator) {
|
|
94
|
+
const existingPath = extractToolFilePath(existing);
|
|
95
|
+
if (existingPath === path)
|
|
96
|
+
return true; // Same file → conflict
|
|
97
|
+
}
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Groups tool calls into batches for parallel/sequential execution.
|
|
102
|
+
* Uses accumulate-and-flush: parallelizable tools accumulate until a
|
|
103
|
+
* non-parallelizable tool flushes them as a batch. This produces optimal
|
|
104
|
+
* batching even for non-consecutive parallelizable tools.
|
|
105
|
+
*
|
|
106
|
+
* Enhanced with conditional parallelism: write_file/edit_file on
|
|
107
|
+
* DIFFERENT files can be batched together for parallel execution.
|
|
108
|
+
*
|
|
109
|
+
* Example: [read1, read2, write, read3, grep] → [[read1, read2], [write], [read3, grep]]
|
|
110
|
+
* (Previous algorithm produced 4 batches; this produces 3)
|
|
111
|
+
*
|
|
112
|
+
* Enhanced: [write_a, write_b, write_a] → [[write_a, write_b], [write_a]]
|
|
113
|
+
* (Different files parallelized, same file sequential)
|
|
114
|
+
*/
|
|
115
|
+
export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name), isConditionallyParallel = (tc) => CONDITIONALLY_PARALLEL_TOOLS.has(tc.name)) {
|
|
116
|
+
if (toolCalls.length === 0)
|
|
117
|
+
return [];
|
|
118
|
+
const batches = [];
|
|
119
|
+
let parallelAccum = [];
|
|
120
|
+
for (const toolCall of toolCalls) {
|
|
121
|
+
if (isParallelizable(toolCall)) {
|
|
122
|
+
parallelAccum.push(toolCall);
|
|
123
|
+
}
|
|
124
|
+
else if (isConditionallyParallel(toolCall)) {
|
|
125
|
+
// Can parallelize if no file conflict with existing accumulator
|
|
126
|
+
if (!hasFileConflict(toolCall, parallelAccum)) {
|
|
127
|
+
parallelAccum.push(toolCall);
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
// Conflict: flush current batch, start new one with this tool
|
|
131
|
+
if (parallelAccum.length > 0) {
|
|
132
|
+
batches.push(parallelAccum);
|
|
133
|
+
parallelAccum = [];
|
|
134
|
+
}
|
|
135
|
+
parallelAccum.push(toolCall);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// Flush any accumulated parallel tools as a single batch
|
|
140
|
+
if (parallelAccum.length > 0) {
|
|
141
|
+
batches.push(parallelAccum);
|
|
142
|
+
parallelAccum = [];
|
|
143
|
+
}
|
|
144
|
+
// Non-parallelizable tool gets its own batch
|
|
145
|
+
batches.push([toolCall]);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Flush remaining parallel tools
|
|
149
|
+
if (parallelAccum.length > 0) {
|
|
150
|
+
batches.push(parallelAccum);
|
|
151
|
+
}
|
|
152
|
+
return batches;
|
|
153
|
+
}
|
|
37
154
|
/**
|
|
38
155
|
* Production-ready agent that composes all features.
|
|
39
156
|
*/
|
|
@@ -73,12 +190,22 @@ export class ProductionAgent {
|
|
|
73
190
|
fileChangeTracker = null;
|
|
74
191
|
capabilitiesRegistry = null;
|
|
75
192
|
toolResolver = null;
|
|
193
|
+
agentId;
|
|
76
194
|
blackboard = null;
|
|
77
195
|
fileCache = null;
|
|
78
196
|
budgetPool = null;
|
|
79
197
|
taskManager = null;
|
|
80
198
|
store = null;
|
|
81
199
|
swarmOrchestrator = null;
|
|
200
|
+
workLog = null;
|
|
201
|
+
verificationGate = null;
|
|
202
|
+
// Phase 2-4 integration modules
|
|
203
|
+
injectionBudget = null;
|
|
204
|
+
selfImprovement = null;
|
|
205
|
+
subagentOutputStore = null;
|
|
206
|
+
autoCheckpointManager = null;
|
|
207
|
+
toolRecommendation = null;
|
|
208
|
+
lastComplexityAssessment = null;
|
|
82
209
|
// Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
|
|
83
210
|
// Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
|
|
84
211
|
spawnedTasks = new Map();
|
|
@@ -124,6 +251,8 @@ export class ProductionAgent {
|
|
|
124
251
|
// Build complete config with defaults
|
|
125
252
|
this.config = buildConfig(userConfig);
|
|
126
253
|
this.provider = userConfig.provider;
|
|
254
|
+
// Set unique agent ID (passed from spawnAgent for subagents, auto-generated for parents)
|
|
255
|
+
this.agentId = userConfig.agentId || `agent-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
127
256
|
// Initialize tool registry
|
|
128
257
|
this.tools = new Map();
|
|
129
258
|
for (const tool of this.config.tools) {
|
|
@@ -271,6 +400,19 @@ export class ProductionAgent {
|
|
|
271
400
|
maxIterations: this.config.maxIterations,
|
|
272
401
|
targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
|
|
273
402
|
});
|
|
403
|
+
// Work Log - compaction-resilient summary of agent work
|
|
404
|
+
// Always enabled - minimal overhead and critical for long-running tasks
|
|
405
|
+
this.workLog = createWorkLog();
|
|
406
|
+
// Verification Gate - opt-in completion verification
|
|
407
|
+
if (this.config.verificationCriteria) {
|
|
408
|
+
this.verificationGate = createVerificationGate(this.config.verificationCriteria);
|
|
409
|
+
}
|
|
410
|
+
// Phase 2-4: Orchestration & Advanced modules (always enabled, lightweight)
|
|
411
|
+
this.injectionBudget = createInjectionBudgetManager();
|
|
412
|
+
this.selfImprovement = createSelfImprovementProtocol(undefined, this.learningStore ?? undefined);
|
|
413
|
+
this.subagentOutputStore = createSubagentOutputStore({ persistToFile: false });
|
|
414
|
+
this.autoCheckpointManager = createAutoCheckpointManager({ enabled: true });
|
|
415
|
+
this.toolRecommendation = createToolRecommendationEngine();
|
|
274
416
|
// Agent Registry - always enabled for subagent support
|
|
275
417
|
this.agentRegistry = new AgentRegistry();
|
|
276
418
|
// Load user agents asynchronously - tracked for ensureReady()
|
|
@@ -301,6 +443,15 @@ export class ProductionAgent {
|
|
|
301
443
|
for (const tool of taskTools) {
|
|
302
444
|
this.tools.set(tool.name, tool);
|
|
303
445
|
}
|
|
446
|
+
// Built-in web search (Serper API) — gracefully handles missing API key
|
|
447
|
+
const serperCustomTool = createSerperSearchTool();
|
|
448
|
+
this.tools.set('web_search', {
|
|
449
|
+
name: serperCustomTool.name,
|
|
450
|
+
description: serperCustomTool.description,
|
|
451
|
+
parameters: serperCustomTool.inputSchema,
|
|
452
|
+
execute: serperCustomTool.execute,
|
|
453
|
+
dangerLevel: 'safe',
|
|
454
|
+
});
|
|
304
455
|
// Swarm Mode (experimental)
|
|
305
456
|
if (this.config.swarm) {
|
|
306
457
|
const swarmConfig = this.config.swarm;
|
|
@@ -797,6 +948,10 @@ export class ProductionAgent {
|
|
|
797
948
|
try {
|
|
798
949
|
// Check for cancellation before starting
|
|
799
950
|
cancellationToken?.throwIfCancellationRequested();
|
|
951
|
+
// Classify task complexity for scaling guidance
|
|
952
|
+
this.lastComplexityAssessment = classifyComplexity(task, {
|
|
953
|
+
hasActivePlan: !!this.state.plan,
|
|
954
|
+
});
|
|
800
955
|
// Check if swarm mode should handle this task
|
|
801
956
|
if (this.swarmOrchestrator) {
|
|
802
957
|
const swarmResult = await this.runSwarm(task);
|
|
@@ -1079,6 +1234,14 @@ export class ProductionAgent {
|
|
|
1079
1234
|
content: `[CONTEXT REDUCED: Earlier messages were removed to stay within budget. Conversation continues from recent context.]`,
|
|
1080
1235
|
});
|
|
1081
1236
|
messages.push(...recentMessages);
|
|
1237
|
+
// Inject work log after emergency truncation to prevent amnesia
|
|
1238
|
+
if (this.workLog?.hasContent()) {
|
|
1239
|
+
const workLogMessage = {
|
|
1240
|
+
role: 'user',
|
|
1241
|
+
content: this.workLog.toCompactString(),
|
|
1242
|
+
};
|
|
1243
|
+
messages.push(workLogMessage);
|
|
1244
|
+
}
|
|
1082
1245
|
// Update state messages too
|
|
1083
1246
|
this.state.messages.length = 0;
|
|
1084
1247
|
this.state.messages.push(...messages);
|
|
@@ -1259,6 +1422,35 @@ export class ProductionAgent {
|
|
|
1259
1422
|
}
|
|
1260
1423
|
}
|
|
1261
1424
|
// =====================================================================
|
|
1425
|
+
// INJECTION BUDGET ANALYSIS (Phase 2 - monitoring mode)
|
|
1426
|
+
// Collects stats on context injections without gating; logs when
|
|
1427
|
+
// budget would have dropped items. Validates system before enabling gating.
|
|
1428
|
+
// =====================================================================
|
|
1429
|
+
if (this.injectionBudget) {
|
|
1430
|
+
const proposals = [];
|
|
1431
|
+
if (budgetInjectedPrompt) {
|
|
1432
|
+
proposals.push({ name: 'budget_warning', priority: 0, maxTokens: 500, content: budgetInjectedPrompt });
|
|
1433
|
+
}
|
|
1434
|
+
// Approximate recitation content (actual injection handled above)
|
|
1435
|
+
if (this.contextEngineering) {
|
|
1436
|
+
const failureCtx = this.contextEngineering.getFailureContext(5);
|
|
1437
|
+
if (failureCtx) {
|
|
1438
|
+
proposals.push({ name: 'failure_context', priority: 2, maxTokens: 300, content: failureCtx });
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
if (proposals.length > 0) {
|
|
1442
|
+
const accepted = this.injectionBudget.allocate(proposals);
|
|
1443
|
+
const stats = this.injectionBudget.getLastStats();
|
|
1444
|
+
if (stats && stats.droppedNames.length > 0 && process.env.DEBUG) {
|
|
1445
|
+
console.log(`[injection-budget] Would drop: ${stats.droppedNames.join(', ')} (${stats.proposedTokens} proposed, ${stats.acceptedTokens} accepted)`);
|
|
1446
|
+
}
|
|
1447
|
+
// Log total injection overhead for observability
|
|
1448
|
+
if (stats && process.env.DEBUG_LLM) {
|
|
1449
|
+
console.log(`[injection-budget] Iteration ${this.state.iteration}: ${accepted.length}/${proposals.length} injections, ~${stats.acceptedTokens} tokens`);
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
// =====================================================================
|
|
1262
1454
|
// RESILIENT LLM CALL: Empty response retries + max_tokens continuation
|
|
1263
1455
|
// =====================================================================
|
|
1264
1456
|
// Get resilience config
|
|
@@ -1554,6 +1746,24 @@ export class ProductionAgent {
|
|
|
1554
1746
|
});
|
|
1555
1747
|
incompleteActionRetries = 0;
|
|
1556
1748
|
}
|
|
1749
|
+
// Verification gate: if criteria not met, nudge agent to verify before completing
|
|
1750
|
+
if (this.verificationGate && !forceTextOnly) {
|
|
1751
|
+
const vResult = this.verificationGate.check();
|
|
1752
|
+
if (!vResult.satisfied && !vResult.forceAllow && vResult.nudge) {
|
|
1753
|
+
// Inject nudge and continue the loop
|
|
1754
|
+
const nudgeMessage = {
|
|
1755
|
+
role: 'user',
|
|
1756
|
+
content: vResult.nudge,
|
|
1757
|
+
};
|
|
1758
|
+
messages.push(nudgeMessage);
|
|
1759
|
+
this.state.messages.push(nudgeMessage);
|
|
1760
|
+
this.observability?.logger?.info('Verification gate nudge', {
|
|
1761
|
+
missing: vResult.missing,
|
|
1762
|
+
nudgeCount: this.verificationGate.getState().nudgeCount,
|
|
1763
|
+
});
|
|
1764
|
+
continue;
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1557
1767
|
// No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
|
|
1558
1768
|
// The model has "consumed" the tool outputs and produced a response,
|
|
1559
1769
|
// so we can replace verbose outputs with compact summaries
|
|
@@ -1590,12 +1800,33 @@ export class ProductionAgent {
|
|
|
1590
1800
|
// Execute tool calls (we know toolCalls is defined here due to the check above)
|
|
1591
1801
|
const toolCalls = response.toolCalls;
|
|
1592
1802
|
const toolResults = await this.executeToolCalls(toolCalls);
|
|
1593
|
-
// Record tool calls for economics/progress tracking
|
|
1803
|
+
// Record tool calls for economics/progress tracking + work log
|
|
1594
1804
|
for (let i = 0; i < toolCalls.length; i++) {
|
|
1595
1805
|
const toolCall = toolCalls[i];
|
|
1596
1806
|
const result = toolResults[i];
|
|
1597
1807
|
executedToolNames.add(toolCall.name);
|
|
1598
1808
|
this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
|
|
1809
|
+
// Record in work log for compaction resilience
|
|
1810
|
+
const toolOutput = result?.result && typeof result.result === 'object' && 'output' in result.result
|
|
1811
|
+
? String(result.result.output)
|
|
1812
|
+
: typeof result?.result === 'string' ? result.result : undefined;
|
|
1813
|
+
this.workLog?.recordToolExecution(toolCall.name, toolCall.arguments, toolOutput);
|
|
1814
|
+
// Record in verification gate
|
|
1815
|
+
if (this.verificationGate) {
|
|
1816
|
+
if (toolCall.name === 'bash') {
|
|
1817
|
+
const toolRes = result?.result;
|
|
1818
|
+
const output = toolRes && typeof toolRes === 'object' && 'output' in toolRes
|
|
1819
|
+
? String(toolRes.output)
|
|
1820
|
+
: typeof toolRes === 'string' ? toolRes : '';
|
|
1821
|
+
const exitCode = toolRes && typeof toolRes === 'object' && toolRes.metadata
|
|
1822
|
+
? toolRes.metadata.exitCode ?? null
|
|
1823
|
+
: null;
|
|
1824
|
+
this.verificationGate.recordBashExecution(String(toolCall.arguments.command || ''), output, exitCode);
|
|
1825
|
+
}
|
|
1826
|
+
if (['write_file', 'edit_file'].includes(toolCall.name)) {
|
|
1827
|
+
this.verificationGate.recordFileChange();
|
|
1828
|
+
}
|
|
1829
|
+
}
|
|
1599
1830
|
}
|
|
1600
1831
|
// Add tool results to messages (with truncation and proactive budget management)
|
|
1601
1832
|
const MAX_TOOL_OUTPUT_CHARS = 8000; // ~2000 tokens max per tool output
|
|
@@ -1617,6 +1848,15 @@ export class ProductionAgent {
|
|
|
1617
1848
|
messages.push(...compactionResult.compactedMessages);
|
|
1618
1849
|
this.state.messages.length = 0;
|
|
1619
1850
|
this.state.messages.push(...compactionResult.compactedMessages);
|
|
1851
|
+
// Inject work log after compaction to prevent amnesia
|
|
1852
|
+
if (this.workLog?.hasContent()) {
|
|
1853
|
+
const workLogMessage = {
|
|
1854
|
+
role: 'user',
|
|
1855
|
+
content: this.workLog.toCompactString(),
|
|
1856
|
+
};
|
|
1857
|
+
messages.push(workLogMessage);
|
|
1858
|
+
this.state.messages.push(workLogMessage);
|
|
1859
|
+
}
|
|
1620
1860
|
}
|
|
1621
1861
|
else if (compactionResult.status === 'hard_limit') {
|
|
1622
1862
|
// Hard limit reached - this is serious, emit error
|
|
@@ -1811,12 +2051,25 @@ export class ProductionAgent {
|
|
|
1811
2051
|
}
|
|
1812
2052
|
}
|
|
1813
2053
|
// Build system prompt using cache-aware builder if available (Trick P)
|
|
1814
|
-
// Combine memory, learnings, and
|
|
1815
|
-
const
|
|
2054
|
+
// Combine memory, learnings, codebase context, and environment facts
|
|
2055
|
+
const combinedContextParts = [
|
|
2056
|
+
// Environment facts — temporal/platform grounding (prevents stale date hallucinations)
|
|
2057
|
+
formatFactsBlock(getEnvironmentFacts()),
|
|
1816
2058
|
...(memoryContext.length > 0 ? memoryContext : []),
|
|
1817
2059
|
...(learningsContext ? [learningsContext] : []),
|
|
1818
2060
|
...(codebaseContextStr ? [`\n## Relevant Code\n${codebaseContextStr}`] : []),
|
|
1819
|
-
]
|
|
2061
|
+
];
|
|
2062
|
+
// Inject thinking directives and scaling guidance for non-simple tasks
|
|
2063
|
+
if (this.lastComplexityAssessment) {
|
|
2064
|
+
const thinkingPrompt = getThinkingSystemPrompt(this.lastComplexityAssessment.tier);
|
|
2065
|
+
if (thinkingPrompt) {
|
|
2066
|
+
combinedContextParts.push(thinkingPrompt);
|
|
2067
|
+
}
|
|
2068
|
+
if (this.lastComplexityAssessment.tier !== 'simple') {
|
|
2069
|
+
combinedContextParts.push(getScalingGuidance(this.lastComplexityAssessment));
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
const combinedContext = combinedContextParts.join('\n');
|
|
1820
2073
|
const promptOptions = {
|
|
1821
2074
|
rules: rulesContent + (skillsPrompt ? '\n\n' + skillsPrompt : ''),
|
|
1822
2075
|
tools: toolDescriptions,
|
|
@@ -1882,9 +2135,12 @@ export class ProductionAgent {
|
|
|
1882
2135
|
this.emit({ type: 'llm.start', model: this.config.model || 'default' });
|
|
1883
2136
|
// Prompt caching (Improvement P1): Replace the system message with structured content
|
|
1884
2137
|
// that includes cache_control markers, enabling 60-70% cache hit rates.
|
|
1885
|
-
//
|
|
2138
|
+
// Only use structured cache_control markers for Anthropic models — other providers
|
|
2139
|
+
// (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
|
|
2140
|
+
const configModel = this.config.model || 'default';
|
|
2141
|
+
const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
|
|
1886
2142
|
let providerMessages = messages;
|
|
1887
|
-
if (this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
|
|
2143
|
+
if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
|
|
1888
2144
|
providerMessages = messages.map((m, i) => {
|
|
1889
2145
|
if (i === 0 && m.role === 'system') {
|
|
1890
2146
|
// Replace system message with structured cacheable content
|
|
@@ -1953,6 +2209,8 @@ export class ProductionAgent {
|
|
|
1953
2209
|
},
|
|
1954
2210
|
},
|
|
1955
2211
|
});
|
|
2212
|
+
// Pause duration budget during LLM call - network time shouldn't count against agent
|
|
2213
|
+
this.economics?.pauseDuration();
|
|
1956
2214
|
try {
|
|
1957
2215
|
let response;
|
|
1958
2216
|
let actualModel = model;
|
|
@@ -1967,7 +2225,7 @@ export class ProductionAgent {
|
|
|
1967
2225
|
taskType: 'general',
|
|
1968
2226
|
estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
|
|
1969
2227
|
};
|
|
1970
|
-
const result = await this.routing.executeWithFallback(
|
|
2228
|
+
const result = await this.routing.executeWithFallback(providerMessages, context);
|
|
1971
2229
|
response = result.response;
|
|
1972
2230
|
actualModel = result.model;
|
|
1973
2231
|
// Emit routing insight
|
|
@@ -2017,6 +2275,14 @@ export class ProductionAgent {
|
|
|
2017
2275
|
});
|
|
2018
2276
|
}
|
|
2019
2277
|
const duration = Date.now() - startTime;
|
|
2278
|
+
// Debug cache stats when DEBUG_CACHE is set
|
|
2279
|
+
if (process.env.DEBUG_CACHE) {
|
|
2280
|
+
const cr = response.usage?.cacheReadTokens ?? 0;
|
|
2281
|
+
const cw = response.usage?.cacheWriteTokens ?? 0;
|
|
2282
|
+
const inp = response.usage?.inputTokens ?? 0;
|
|
2283
|
+
const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
|
|
2284
|
+
console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
|
|
2285
|
+
}
|
|
2020
2286
|
// Lesson 26: Record LLM response for tracing
|
|
2021
2287
|
this.traceCollector?.record({
|
|
2022
2288
|
type: 'llm.response',
|
|
@@ -2084,6 +2350,10 @@ export class ProductionAgent {
|
|
|
2084
2350
|
this.observability?.tracer?.endSpan(spanId);
|
|
2085
2351
|
throw error;
|
|
2086
2352
|
}
|
|
2353
|
+
finally {
|
|
2354
|
+
// Resume duration budget after LLM call completes (success or failure)
|
|
2355
|
+
this.economics?.resumeDuration();
|
|
2356
|
+
}
|
|
2087
2357
|
}
|
|
2088
2358
|
/**
|
|
2089
2359
|
* Execute an async callback while excluding wall-clock wait time from duration budgeting.
|
|
@@ -2100,324 +2370,360 @@ export class ProductionAgent {
|
|
|
2100
2370
|
}
|
|
2101
2371
|
/**
|
|
2102
2372
|
* Execute tool calls with safety checks and execution policy enforcement.
|
|
2373
|
+
* Parallelizable read-only tools are batched and executed concurrently.
|
|
2103
2374
|
*/
|
|
2104
2375
|
async executeToolCalls(toolCalls) {
|
|
2105
2376
|
const results = [];
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2377
|
+
// Group consecutive parallelizable tool calls into batches
|
|
2378
|
+
const batches = groupToolCallsIntoBatches(toolCalls);
|
|
2379
|
+
// Execute batches: parallel batches use Promise.allSettled, sequential execute one-by-one
|
|
2380
|
+
for (const batch of batches) {
|
|
2381
|
+
if (batch.length > 1 && PARALLELIZABLE_TOOLS.has(batch[0].name)) {
|
|
2382
|
+
// Execute parallelizable batch concurrently
|
|
2383
|
+
const batchResults = await Promise.allSettled(batch.map(tc => this.executeSingleToolCall(tc)));
|
|
2384
|
+
for (const result of batchResults) {
|
|
2385
|
+
if (result.status === 'fulfilled') {
|
|
2386
|
+
results.push(result.value);
|
|
2387
|
+
}
|
|
2388
|
+
else {
|
|
2389
|
+
// Should not happen since executeSingleToolCall catches errors internally
|
|
2390
|
+
const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
2391
|
+
results.push({ callId: 'unknown', result: `Error: ${error}`, error });
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
}
|
|
2395
|
+
else {
|
|
2396
|
+
// Execute sequentially
|
|
2397
|
+
for (const tc of batch) {
|
|
2398
|
+
results.push(await this.executeSingleToolCall(tc));
|
|
2399
|
+
}
|
|
2400
|
+
}
|
|
2401
|
+
}
|
|
2402
|
+
return results;
|
|
2403
|
+
}
|
|
2404
|
+
/**
|
|
2405
|
+
* Execute a single tool call with all safety checks, tracing, and error handling.
|
|
2406
|
+
*/
|
|
2407
|
+
async executeSingleToolCall(toolCall) {
|
|
2408
|
+
const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
|
|
2409
|
+
const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
2410
|
+
this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
|
|
2411
|
+
const startTime = Date.now();
|
|
2412
|
+
// Short-circuit if tool call arguments failed to parse
|
|
2413
|
+
if (toolCall.parseError) {
|
|
2414
|
+
const errorMsg = `Tool arguments could not be parsed: ${toolCall.parseError}. Please retry with complete, valid JSON.`;
|
|
2415
|
+
this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: errorMsg });
|
|
2112
2416
|
this.traceCollector?.record({
|
|
2113
|
-
type: 'tool.
|
|
2114
|
-
data: {
|
|
2115
|
-
executionId,
|
|
2116
|
-
toolName: toolCall.name,
|
|
2117
|
-
arguments: toolCall.arguments,
|
|
2118
|
-
},
|
|
2417
|
+
type: 'tool.end',
|
|
2418
|
+
data: { executionId, status: 'error', error: new Error(errorMsg), durationMs: Date.now() - startTime },
|
|
2119
2419
|
});
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
`Tool: ${toolCall.name}\n` +
|
|
2146
|
-
`${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
|
|
2147
|
-
`Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
|
|
2148
|
-
results.push({
|
|
2149
|
-
callId: toolCall.id,
|
|
2150
|
-
result: queueMessage,
|
|
2151
|
-
});
|
|
2152
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2153
|
-
continue; // Skip actual execution
|
|
2420
|
+
this.observability?.tracer?.endSpan(spanId);
|
|
2421
|
+
return { callId: toolCall.id, result: `Error: ${errorMsg}`, error: errorMsg };
|
|
2422
|
+
}
|
|
2423
|
+
// Lesson 26: Record tool start for tracing
|
|
2424
|
+
this.traceCollector?.record({
|
|
2425
|
+
type: 'tool.start',
|
|
2426
|
+
data: {
|
|
2427
|
+
executionId,
|
|
2428
|
+
toolName: toolCall.name,
|
|
2429
|
+
arguments: toolCall.arguments,
|
|
2430
|
+
},
|
|
2431
|
+
});
|
|
2432
|
+
try {
|
|
2433
|
+
// =====================================================================
|
|
2434
|
+
// PLAN MODE WRITE INTERCEPTION
|
|
2435
|
+
// =====================================================================
|
|
2436
|
+
// In plan mode, intercept write operations and queue them as proposed changes
|
|
2437
|
+
if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
|
|
2438
|
+
// Extract contextual reasoning instead of simple truncation
|
|
2439
|
+
const reason = this.extractChangeReasoning(toolCall, this.state.messages);
|
|
2440
|
+
// Start a new plan if needed
|
|
2441
|
+
if (!this.pendingPlanManager.hasPendingPlan()) {
|
|
2442
|
+
const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
|
|
2443
|
+
const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
|
|
2444
|
+
this.pendingPlanManager.startPlan(task);
|
|
2154
2445
|
}
|
|
2155
|
-
//
|
|
2156
|
-
|
|
2157
|
-
//
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
this.
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2446
|
+
// Queue the write operation
|
|
2447
|
+
const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
|
|
2448
|
+
// Emit event for UI
|
|
2449
|
+
this.emit({
|
|
2450
|
+
type: 'plan.change.queued',
|
|
2451
|
+
tool: toolCall.name,
|
|
2452
|
+
changeId: change?.id,
|
|
2453
|
+
summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
|
|
2454
|
+
});
|
|
2455
|
+
// Return a message indicating the change was queued
|
|
2456
|
+
const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
|
|
2457
|
+
`Tool: ${toolCall.name}\n` +
|
|
2458
|
+
`${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
|
|
2459
|
+
`Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
|
|
2460
|
+
this.observability?.tracer?.endSpan(spanId);
|
|
2461
|
+
return { callId: toolCall.id, result: queueMessage };
|
|
2462
|
+
}
|
|
2463
|
+
// =====================================================================
|
|
2464
|
+
// EXECUTION POLICY ENFORCEMENT (Lesson 23)
|
|
2465
|
+
// =====================================================================
|
|
2466
|
+
let policyApprovedByUser = false;
|
|
2467
|
+
if (this.executionPolicy) {
|
|
2468
|
+
const policyContext = {
|
|
2469
|
+
messages: this.state.messages,
|
|
2470
|
+
currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
|
|
2471
|
+
previousToolCalls: [],
|
|
2472
|
+
};
|
|
2473
|
+
const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
|
|
2474
|
+
// Emit policy event
|
|
2475
|
+
this.emit({
|
|
2476
|
+
type: 'policy.evaluated',
|
|
2477
|
+
tool: toolCall.name,
|
|
2478
|
+
policy: evaluation.policy,
|
|
2479
|
+
reason: evaluation.reason,
|
|
2480
|
+
});
|
|
2481
|
+
// Emit decision transparency event
|
|
2482
|
+
this.emit({
|
|
2483
|
+
type: 'decision.tool',
|
|
2484
|
+
tool: toolCall.name,
|
|
2485
|
+
decision: evaluation.policy === 'forbidden' ? 'blocked'
|
|
2486
|
+
: evaluation.policy === 'prompt' ? 'prompted'
|
|
2487
|
+
: 'allowed',
|
|
2488
|
+
policyMatch: evaluation.reason,
|
|
2489
|
+
});
|
|
2490
|
+
// Enhanced tracing: Record policy decision
|
|
2491
|
+
this.traceCollector?.record({
|
|
2492
|
+
type: 'decision',
|
|
2493
|
+
data: {
|
|
2494
|
+
type: 'policy',
|
|
2495
|
+
decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
|
|
2496
|
+
outcome: evaluation.policy === 'forbidden' ? 'blocked'
|
|
2497
|
+
: evaluation.policy === 'prompt' ? 'deferred'
|
|
2179
2498
|
: 'allowed',
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
}
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
if (evaluation.policy === 'forbidden') {
|
|
2201
|
-
throw new Error(`Forbidden by policy: ${evaluation.reason}`);
|
|
2202
|
-
}
|
|
2203
|
-
// Handle prompt policy - requires approval
|
|
2204
|
-
if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
|
|
2205
|
-
// Try to get approval through safety manager's human-in-loop
|
|
2206
|
-
const humanInLoop = this.safety?.humanInLoop;
|
|
2207
|
-
if (humanInLoop) {
|
|
2208
|
-
const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
|
|
2209
|
-
if (!approval.approved) {
|
|
2210
|
-
throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
|
|
2211
|
-
}
|
|
2212
|
-
policyApprovedByUser = true;
|
|
2213
|
-
// Create a grant for future similar calls if approved
|
|
2214
|
-
this.executionPolicy.createGrant({
|
|
2215
|
-
toolName: toolCall.name,
|
|
2216
|
-
grantedBy: 'user',
|
|
2217
|
-
reason: 'Approved during execution',
|
|
2218
|
-
maxUsages: 5, // Allow 5 more similar calls
|
|
2219
|
-
});
|
|
2220
|
-
}
|
|
2221
|
-
else {
|
|
2222
|
-
// No approval handler - block by default for safety
|
|
2223
|
-
throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
|
|
2499
|
+
reasoning: evaluation.reason,
|
|
2500
|
+
factors: [
|
|
2501
|
+
{ name: 'policy', value: evaluation.policy },
|
|
2502
|
+
{ name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
|
|
2503
|
+
],
|
|
2504
|
+
confidence: evaluation.intent?.confidence ?? 0.8,
|
|
2505
|
+
},
|
|
2506
|
+
});
|
|
2507
|
+
// Handle forbidden policy - always block
|
|
2508
|
+
if (evaluation.policy === 'forbidden') {
|
|
2509
|
+
throw new Error(`Forbidden by policy: ${evaluation.reason}`);
|
|
2510
|
+
}
|
|
2511
|
+
// Handle prompt policy - requires approval
|
|
2512
|
+
if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
|
|
2513
|
+
// Try to get approval through safety manager's human-in-loop
|
|
2514
|
+
const humanInLoop = this.safety?.humanInLoop;
|
|
2515
|
+
if (humanInLoop) {
|
|
2516
|
+
const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
|
|
2517
|
+
if (!approval.approved) {
|
|
2518
|
+
throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
|
|
2224
2519
|
}
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
confidence: evaluation.intent.confidence,
|
|
2520
|
+
policyApprovedByUser = true;
|
|
2521
|
+
// Create a grant for future similar calls if approved
|
|
2522
|
+
this.executionPolicy.createGrant({
|
|
2523
|
+
toolName: toolCall.name,
|
|
2524
|
+
grantedBy: 'user',
|
|
2525
|
+
reason: 'Approved during execution',
|
|
2526
|
+
maxUsages: 5, // Allow 5 more similar calls
|
|
2233
2527
|
});
|
|
2234
2528
|
}
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
// =====================================================================
|
|
2239
|
-
if (this.safety) {
|
|
2240
|
-
const safety = this.safety;
|
|
2241
|
-
const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
|
|
2242
|
-
if (!validation.allowed) {
|
|
2243
|
-
throw new Error(`Tool call blocked: ${validation.reason}`);
|
|
2244
|
-
}
|
|
2245
|
-
}
|
|
2246
|
-
// Get tool definition (with lazy-loading support for MCP tools)
|
|
2247
|
-
let tool = this.tools.get(toolCall.name);
|
|
2248
|
-
const wasPreloaded = !!tool;
|
|
2249
|
-
if (!tool && this.toolResolver) {
|
|
2250
|
-
// Try to resolve and load the tool on-demand
|
|
2251
|
-
const resolved = this.toolResolver(toolCall.name);
|
|
2252
|
-
if (resolved) {
|
|
2253
|
-
this.addTool(resolved);
|
|
2254
|
-
tool = resolved;
|
|
2255
|
-
if (process.env.DEBUG)
|
|
2256
|
-
console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
|
|
2257
|
-
this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
|
|
2529
|
+
else {
|
|
2530
|
+
// No approval handler - block by default for safety
|
|
2531
|
+
throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
|
|
2258
2532
|
}
|
|
2259
2533
|
}
|
|
2260
|
-
if
|
|
2261
|
-
|
|
2534
|
+
// Log intent classification if available
|
|
2535
|
+
if (evaluation.intent) {
|
|
2536
|
+
this.emit({
|
|
2537
|
+
type: 'intent.classified',
|
|
2538
|
+
tool: toolCall.name,
|
|
2539
|
+
intent: evaluation.intent.type,
|
|
2540
|
+
confidence: evaluation.intent.confidence,
|
|
2541
|
+
});
|
|
2262
2542
|
}
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2543
|
+
}
|
|
2544
|
+
// =====================================================================
|
|
2545
|
+
// SAFETY VALIDATION (Lesson 20-21)
|
|
2546
|
+
// =====================================================================
|
|
2547
|
+
if (this.safety) {
|
|
2548
|
+
const safety = this.safety;
|
|
2549
|
+
const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
|
|
2550
|
+
if (!validation.allowed) {
|
|
2551
|
+
throw new Error(`Tool call blocked: ${validation.reason}`);
|
|
2266
2552
|
}
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
if (!claimed) {
|
|
2281
|
-
const existingClaim = this.blackboard.getClaim(filePath);
|
|
2282
|
-
throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
|
|
2283
|
-
`Wait for the other agent to complete or choose a different file.`);
|
|
2284
|
-
}
|
|
2285
|
-
}
|
|
2553
|
+
}
|
|
2554
|
+
// Get tool definition (with lazy-loading support for MCP tools)
|
|
2555
|
+
let tool = this.tools.get(toolCall.name);
|
|
2556
|
+
const wasPreloaded = !!tool;
|
|
2557
|
+
if (!tool && this.toolResolver) {
|
|
2558
|
+
// Try to resolve and load the tool on-demand
|
|
2559
|
+
const resolved = this.toolResolver(toolCall.name);
|
|
2560
|
+
if (resolved) {
|
|
2561
|
+
this.addTool(resolved);
|
|
2562
|
+
tool = resolved;
|
|
2563
|
+
if (process.env.DEBUG)
|
|
2564
|
+
console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
|
|
2565
|
+
this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
|
|
2286
2566
|
}
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2567
|
+
}
|
|
2568
|
+
if (!tool) {
|
|
2569
|
+
throw new Error(`Unknown tool: ${toolCall.name}`);
|
|
2570
|
+
}
|
|
2571
|
+
// Log whether tool was pre-loaded or auto-loaded (for MCP tools)
|
|
2572
|
+
if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
|
|
2573
|
+
console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
|
|
2574
|
+
}
|
|
2575
|
+
// =====================================================================
|
|
2576
|
+
// BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
|
|
2577
|
+
// =====================================================================
|
|
2578
|
+
// Claim file resources before write operations to prevent conflicts
|
|
2579
|
+
if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
|
|
2580
|
+
const args = toolCall.arguments;
|
|
2581
|
+
const filePath = String(args.path || args.file_path || '');
|
|
2582
|
+
if (filePath) {
|
|
2583
|
+
const agentId = this.agentId;
|
|
2584
|
+
const claimed = this.blackboard.claim(filePath, agentId, 'write', {
|
|
2585
|
+
ttl: 60000, // 1 minute claim
|
|
2586
|
+
intent: `${toolCall.name}: ${filePath}`,
|
|
2587
|
+
});
|
|
2588
|
+
if (!claimed) {
|
|
2589
|
+
const existingClaim = this.blackboard.getClaim(filePath);
|
|
2590
|
+
throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
|
|
2591
|
+
`Wait for the other agent to complete or choose a different file.`);
|
|
2308
2592
|
}
|
|
2309
2593
|
}
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
const
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
else {
|
|
2331
|
-
result = await tool.execute(toolCall.arguments);
|
|
2332
|
-
}
|
|
2333
|
-
const duration = Date.now() - startTime;
|
|
2334
|
-
// Lesson 26: Record tool completion for tracing
|
|
2335
|
-
this.traceCollector?.record({
|
|
2336
|
-
type: 'tool.end',
|
|
2337
|
-
data: {
|
|
2338
|
-
executionId,
|
|
2339
|
-
status: 'success',
|
|
2340
|
-
result,
|
|
2341
|
-
durationMs: duration,
|
|
2342
|
-
},
|
|
2343
|
-
});
|
|
2344
|
-
// Record metrics
|
|
2345
|
-
this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
|
|
2346
|
-
this.state.metrics.toolCalls++;
|
|
2347
|
-
this.emit({ type: 'tool.complete', tool: toolCall.name, result });
|
|
2348
|
-
// FILE CACHE: Store read results and invalidate on writes
|
|
2349
|
-
if (this.fileCache) {
|
|
2350
|
-
const args = toolCall.arguments;
|
|
2351
|
-
const filePath = String(args.path || args.file_path || '');
|
|
2352
|
-
if (toolCall.name === 'read_file' && filePath) {
|
|
2353
|
-
// Cache successful read results
|
|
2354
|
-
const resultObj = result;
|
|
2355
|
-
if (resultObj?.success && typeof resultObj.output === 'string') {
|
|
2356
|
-
this.fileCache.set(filePath, resultObj.output);
|
|
2357
|
-
}
|
|
2358
|
-
}
|
|
2359
|
-
else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
|
|
2360
|
-
// Invalidate cache when files are modified (including undo operations)
|
|
2361
|
-
this.fileCache.invalidate(filePath);
|
|
2594
|
+
}
|
|
2595
|
+
// FILE CACHE: Check cache for read_file operations before executing
|
|
2596
|
+
if (this.fileCache && toolCall.name === 'read_file') {
|
|
2597
|
+
const args = toolCall.arguments;
|
|
2598
|
+
const readPath = String(args.path || '');
|
|
2599
|
+
if (readPath) {
|
|
2600
|
+
const cached = this.fileCache.get(readPath);
|
|
2601
|
+
if (cached !== undefined) {
|
|
2602
|
+
const lines = cached.split('\n').length;
|
|
2603
|
+
const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
|
|
2604
|
+
const duration = Date.now() - startTime;
|
|
2605
|
+
this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
|
|
2606
|
+
this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
|
|
2607
|
+
this.state.metrics.toolCalls++;
|
|
2608
|
+
this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
|
|
2609
|
+
this.observability?.tracer?.endSpan(spanId);
|
|
2610
|
+
return {
|
|
2611
|
+
callId: toolCall.id,
|
|
2612
|
+
result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
|
|
2613
|
+
};
|
|
2362
2614
|
}
|
|
2363
2615
|
}
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2616
|
+
}
|
|
2617
|
+
// Execute tool (with sandbox if available)
|
|
2618
|
+
let result;
|
|
2619
|
+
if (this.safety?.sandbox) {
|
|
2620
|
+
// CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
|
|
2621
|
+
// The default 60s sandbox timeout would kill subagents prematurely
|
|
2622
|
+
// Subagents may run for minutes (per their own timeout config)
|
|
2623
|
+
const isSpawnAgent = toolCall.name === 'spawn_agent';
|
|
2624
|
+
const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
|
|
2625
|
+
const isSubagentTool = isSpawnAgent || isSpawnParallel;
|
|
2626
|
+
const subagentConfig = this.config.subagent;
|
|
2627
|
+
const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
|
|
2628
|
+
const subagentTimeout = hasSubagentConfig
|
|
2629
|
+
? subagentConfig.defaultTimeout ?? 600000 // 10 min default
|
|
2630
|
+
: 600000;
|
|
2631
|
+
// Use subagent timeout + buffer for spawn tools, default for others
|
|
2632
|
+
// For spawn_agents_parallel, multiply by number of agents (they run in parallel,
|
|
2633
|
+
// but the total wall-clock time should still allow the slowest agent to complete)
|
|
2634
|
+
const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
|
|
2635
|
+
result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
|
|
2636
|
+
}
|
|
2637
|
+
else {
|
|
2638
|
+
result = await tool.execute(toolCall.arguments);
|
|
2639
|
+
}
|
|
2640
|
+
const duration = Date.now() - startTime;
|
|
2641
|
+
// Lesson 26: Record tool completion for tracing
|
|
2642
|
+
this.traceCollector?.record({
|
|
2643
|
+
type: 'tool.end',
|
|
2644
|
+
data: {
|
|
2645
|
+
executionId,
|
|
2646
|
+
status: 'success',
|
|
2375
2647
|
result,
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2648
|
+
durationMs: duration,
|
|
2649
|
+
},
|
|
2650
|
+
});
|
|
2651
|
+
// Record metrics
|
|
2652
|
+
this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
|
|
2653
|
+
this.state.metrics.toolCalls++;
|
|
2654
|
+
this.emit({ type: 'tool.complete', tool: toolCall.name, result });
|
|
2655
|
+
// FILE CACHE: Store read results and invalidate on writes
|
|
2656
|
+
if (this.fileCache) {
|
|
2657
|
+
const args = toolCall.arguments;
|
|
2658
|
+
const filePath = String(args.path || args.file_path || '');
|
|
2659
|
+
if (toolCall.name === 'read_file' && filePath) {
|
|
2660
|
+
// Cache successful read results
|
|
2661
|
+
const resultObj = result;
|
|
2662
|
+
if (resultObj?.success && typeof resultObj.output === 'string') {
|
|
2663
|
+
this.fileCache.set(filePath, resultObj.output);
|
|
2384
2664
|
}
|
|
2385
2665
|
}
|
|
2386
|
-
|
|
2666
|
+
else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
|
|
2667
|
+
// Invalidate cache when files are modified (including undo operations)
|
|
2668
|
+
this.fileCache.invalidate(filePath);
|
|
2669
|
+
}
|
|
2387
2670
|
}
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2671
|
+
// Emit tool insight with result summary
|
|
2672
|
+
const summary = this.summarizeToolResult(toolCall.name, result);
|
|
2673
|
+
this.emit({
|
|
2674
|
+
type: 'insight.tool',
|
|
2675
|
+
tool: toolCall.name,
|
|
2676
|
+
summary,
|
|
2677
|
+
durationMs: duration,
|
|
2678
|
+
success: true,
|
|
2679
|
+
});
|
|
2680
|
+
// Release blackboard claim after successful file write
|
|
2681
|
+
if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
|
|
2682
|
+
const args = toolCall.arguments;
|
|
2683
|
+
const filePath = String(args.path || args.file_path || '');
|
|
2684
|
+
if (filePath) {
|
|
2685
|
+
const agentId = this.agentId;
|
|
2686
|
+
this.blackboard.release(filePath, agentId);
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
// Self-improvement: record success pattern
|
|
2690
|
+
this.selfImprovement?.recordSuccess(toolCall.name, toolCall.arguments, typeof result === 'string' ? result.slice(0, 200) : JSON.stringify(result).slice(0, 200));
|
|
2691
|
+
this.observability?.tracer?.endSpan(spanId);
|
|
2692
|
+
return { callId: toolCall.id, result };
|
|
2693
|
+
}
|
|
2694
|
+
catch (err) {
|
|
2695
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
2696
|
+
const duration = Date.now() - startTime;
|
|
2697
|
+
// Lesson 26: Record tool error for tracing
|
|
2698
|
+
this.traceCollector?.record({
|
|
2699
|
+
type: 'tool.end',
|
|
2700
|
+
data: {
|
|
2701
|
+
executionId,
|
|
2702
|
+
status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
|
|
2409
2703
|
error,
|
|
2410
|
-
|
|
2411
|
-
}
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2704
|
+
durationMs: duration,
|
|
2705
|
+
},
|
|
2706
|
+
});
|
|
2707
|
+
this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
|
|
2708
|
+
this.observability?.tracer?.recordError(error);
|
|
2709
|
+
this.observability?.tracer?.endSpan(spanId);
|
|
2710
|
+
// FAILURE EVIDENCE RECORDING (Trick S)
|
|
2711
|
+
// Track failed tool calls to prevent loops and provide context
|
|
2712
|
+
this.contextEngineering?.recordFailure({
|
|
2713
|
+
action: toolCall.name,
|
|
2714
|
+
args: toolCall.arguments,
|
|
2715
|
+
error,
|
|
2716
|
+
intent: `Execute tool ${toolCall.name}`,
|
|
2717
|
+
});
|
|
2718
|
+
// Self-improvement: enhance error message with diagnosis for better LLM recovery
|
|
2719
|
+
if (this.selfImprovement) {
|
|
2720
|
+
const enhanced = this.selfImprovement.enhanceErrorMessage(toolCall.name, error.message, toolCall.arguments);
|
|
2721
|
+
this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: enhanced });
|
|
2722
|
+
return { callId: toolCall.id, result: `Error: ${enhanced}`, error: enhanced };
|
|
2418
2723
|
}
|
|
2724
|
+
this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
|
|
2725
|
+
return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
|
|
2419
2726
|
}
|
|
2420
|
-
return results;
|
|
2421
2727
|
}
|
|
2422
2728
|
/**
|
|
2423
2729
|
* Get recently modified file paths from the file change tracker.
|
|
@@ -3443,6 +3749,19 @@ export class ProductionAgent {
|
|
|
3443
3749
|
}
|
|
3444
3750
|
// Create the checkpoint
|
|
3445
3751
|
const label = `auto-iter-${this.state.iteration}`;
|
|
3752
|
+
// Supplementary: also save to AutoCheckpointManager (file-based)
|
|
3753
|
+
if (this.autoCheckpointManager) {
|
|
3754
|
+
try {
|
|
3755
|
+
this.autoCheckpointManager.save({
|
|
3756
|
+
label,
|
|
3757
|
+
sessionId: this.agentId,
|
|
3758
|
+
iteration: this.state.iteration,
|
|
3759
|
+
});
|
|
3760
|
+
}
|
|
3761
|
+
catch {
|
|
3762
|
+
// Non-critical — don't fail the main checkpoint path
|
|
3763
|
+
}
|
|
3764
|
+
}
|
|
3446
3765
|
return this.createCheckpoint(label);
|
|
3447
3766
|
}
|
|
3448
3767
|
// =========================================================================
|
|
@@ -3596,7 +3915,18 @@ export class ProductionAgent {
|
|
|
3596
3915
|
let workerResultId;
|
|
3597
3916
|
try {
|
|
3598
3917
|
// Filter tools for this agent
|
|
3599
|
-
|
|
3918
|
+
let agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
|
|
3919
|
+
// Apply tool recommendations to improve subagent focus (only for large tool sets)
|
|
3920
|
+
if (this.toolRecommendation && agentTools.length > 15) {
|
|
3921
|
+
const taskType = ToolRecommendationEngine.inferTaskType(agentName);
|
|
3922
|
+
const recommendations = this.toolRecommendation.recommendTools(task, taskType, agentTools.map(t => t.name));
|
|
3923
|
+
if (recommendations.length > 0) {
|
|
3924
|
+
const recommendedNames = new Set(recommendations.map(r => r.toolName));
|
|
3925
|
+
// Always keep spawn tools even if not recommended
|
|
3926
|
+
const alwaysKeep = new Set(['spawn_agent', 'spawn_agents_parallel']);
|
|
3927
|
+
agentTools = agentTools.filter(t => recommendedNames.has(t.name) || alwaysKeep.has(t.name));
|
|
3928
|
+
}
|
|
3929
|
+
}
|
|
3600
3930
|
// Resolve model - abstract tiers (fast/balanced/quality) should use parent's model
|
|
3601
3931
|
// Only use agentDef.model if it's an actual model ID (contains '/')
|
|
3602
3932
|
const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
|
|
@@ -3688,14 +4018,30 @@ export class ProductionAgent {
|
|
|
3688
4018
|
// BUDGET AWARENESS: Always inject so subagent understands its limits
|
|
3689
4019
|
const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
|
|
3690
4020
|
const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
|
|
3691
|
-
|
|
3692
|
-
|
|
3693
|
-
|
|
3694
|
-
|
|
3695
|
-
|
|
3696
|
-
|
|
3697
|
-
|
|
3698
|
-
|
|
4021
|
+
if (isSwarmWorker) {
|
|
4022
|
+
// V6: Calmer resource awareness for swarm workers — prevents weaker models
|
|
4023
|
+
// from confabulating budget warnings and wrapping up without doing work
|
|
4024
|
+
constraintParts.push(`**Resource Info:**\n` +
|
|
4025
|
+
`- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens (you have plenty)\n` +
|
|
4026
|
+
`- Time limit: ~${subagentBudgetMinutes} minutes\n` +
|
|
4027
|
+
`- Focus on completing your task. Do NOT wrap up prematurely.\n` +
|
|
4028
|
+
`- You will receive a system warning IF you approach budget limits. Until then, work normally.\n` +
|
|
4029
|
+
`- **IMPORTANT:** Budget warnings come from the SYSTEM, not from your own assessment. ` +
|
|
4030
|
+
`Do not preemptively claim budget issues.\n` +
|
|
4031
|
+
`- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
|
|
4032
|
+
` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
|
|
4033
|
+
}
|
|
4034
|
+
else {
|
|
4035
|
+
// Original RESOURCE AWARENESS text for regular subagents
|
|
4036
|
+
constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
|
|
4037
|
+
`- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
|
|
4038
|
+
`- Time limit: ~${subagentBudgetMinutes} minutes\n` +
|
|
4039
|
+
`- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
|
|
4040
|
+
`- Do not explore indefinitely - be focused and efficient.\n` +
|
|
4041
|
+
`- If approaching limits, summarize findings and return.\n` +
|
|
4042
|
+
`- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
|
|
4043
|
+
` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
|
|
4044
|
+
}
|
|
3699
4045
|
if (constraints) {
|
|
3700
4046
|
if (constraints.focusAreas && constraints.focusAreas.length > 0) {
|
|
3701
4047
|
constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
|
|
@@ -3711,11 +4057,19 @@ export class ProductionAgent {
|
|
|
3711
4057
|
}
|
|
3712
4058
|
}
|
|
3713
4059
|
const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
|
|
4060
|
+
// Build delegation-enhanced system prompt
|
|
4061
|
+
let delegationContext = '';
|
|
4062
|
+
if (this.lastComplexityAssessment && this.lastComplexityAssessment.tier !== 'simple') {
|
|
4063
|
+
const spec = createMinimalDelegationSpec(task, agentName);
|
|
4064
|
+
delegationContext = '\n\n' + buildDelegationPrompt(spec);
|
|
4065
|
+
}
|
|
4066
|
+
// Quality self-assessment prompt for subagent
|
|
4067
|
+
const qualityPrompt = '\n\n' + getSubagentQualityPrompt();
|
|
3714
4068
|
// Build subagent system prompt with subagent-specific plan mode addition
|
|
3715
4069
|
const parentMode = this.getMode();
|
|
3716
4070
|
const subagentSystemPrompt = parentMode === 'plan'
|
|
3717
|
-
? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}`
|
|
3718
|
-
: `${agentDef.systemPrompt}${blackboardContext}${constraintContext}`;
|
|
4071
|
+
? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`
|
|
4072
|
+
: `${agentDef.systemPrompt}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`;
|
|
3719
4073
|
// Allocate budget from pool (or use default) — track allocation ID for release later
|
|
3720
4074
|
const pooledBudget = this.getSubagentBudget(agentName, constraints);
|
|
3721
4075
|
const poolAllocationId = pooledBudget.allocationId;
|
|
@@ -3765,6 +4119,8 @@ export class ProductionAgent {
|
|
|
3765
4119
|
builtIn: { logging: false, timing: false, metrics: false },
|
|
3766
4120
|
custom: [],
|
|
3767
4121
|
},
|
|
4122
|
+
// Pass unique agentId for blackboard coordination and tracing
|
|
4123
|
+
agentId,
|
|
3768
4124
|
// Share parent's blackboard for coordination between parallel subagents
|
|
3769
4125
|
blackboard: this.blackboard || undefined,
|
|
3770
4126
|
// Share parent's file cache to eliminate redundant reads across agents
|
|
@@ -3925,6 +4281,25 @@ export class ProductionAgent {
|
|
|
3925
4281
|
},
|
|
3926
4282
|
structured,
|
|
3927
4283
|
};
|
|
4284
|
+
// Save full output to subagent output store (avoids telephone problem)
|
|
4285
|
+
if (this.subagentOutputStore) {
|
|
4286
|
+
const outputEntry = {
|
|
4287
|
+
id: agentId,
|
|
4288
|
+
agentId,
|
|
4289
|
+
agentName,
|
|
4290
|
+
task,
|
|
4291
|
+
fullOutput: finalOutput,
|
|
4292
|
+
structured,
|
|
4293
|
+
filesModified: [],
|
|
4294
|
+
filesCreated: [],
|
|
4295
|
+
timestamp: new Date(),
|
|
4296
|
+
tokensUsed: result.metrics.totalTokens,
|
|
4297
|
+
durationMs: duration,
|
|
4298
|
+
};
|
|
4299
|
+
const storeId = this.subagentOutputStore.save(outputEntry);
|
|
4300
|
+
// Attach reference so downstream consumers can retrieve full output
|
|
4301
|
+
spawnResultFinal.outputStoreId = storeId;
|
|
4302
|
+
}
|
|
3928
4303
|
if (workerResultId && this.store?.hasWorkerResultsFeature()) {
|
|
3929
4304
|
try {
|
|
3930
4305
|
this.store.completeWorkerResult(workerResultId, {
|
|
@@ -4229,9 +4604,41 @@ export class ProductionAgent {
|
|
|
4229
4604
|
count: tasks.length,
|
|
4230
4605
|
agents: tasks.map(t => t.agent),
|
|
4231
4606
|
});
|
|
4232
|
-
//
|
|
4233
|
-
|
|
4234
|
-
|
|
4607
|
+
// Use DynamicBudgetPool for parallel spawns (prevents child starvation,
|
|
4608
|
+
// enables priority-based allocation). Falls back to regular pool for single tasks.
|
|
4609
|
+
let settled;
|
|
4610
|
+
const originalPool = this.budgetPool;
|
|
4611
|
+
// SubagentSupervisor for unified monitoring of concurrent subagents
|
|
4612
|
+
const supervisor = tasks.length > 1 ? createSubagentSupervisor() : null;
|
|
4613
|
+
if (this.budgetPool && tasks.length > 1) {
|
|
4614
|
+
// Swap to DynamicBudgetPool for this parallel batch
|
|
4615
|
+
const poolStats = this.budgetPool.getStats();
|
|
4616
|
+
const dynamicPool = createDynamicBudgetPool(poolStats.tokensRemaining, 0.1);
|
|
4617
|
+
dynamicPool.setExpectedChildren(tasks.length);
|
|
4618
|
+
// Temporarily replace the budget pool so spawnAgent's reserve() uses the dynamic one
|
|
4619
|
+
this.budgetPool = dynamicPool;
|
|
4620
|
+
try {
|
|
4621
|
+
const promises = tasks.map(({ agent, task }) => {
|
|
4622
|
+
const spawnPromise = this.spawnAgent(agent, task);
|
|
4623
|
+
// Register with supervisor for monitoring
|
|
4624
|
+
if (supervisor) {
|
|
4625
|
+
const handle = createSubagentHandle(`parallel-${agent}-${Date.now()}`, agent, task, spawnPromise, {});
|
|
4626
|
+
supervisor.add(handle);
|
|
4627
|
+
}
|
|
4628
|
+
return spawnPromise;
|
|
4629
|
+
});
|
|
4630
|
+
settled = await Promise.allSettled(promises);
|
|
4631
|
+
}
|
|
4632
|
+
finally {
|
|
4633
|
+
this.budgetPool = originalPool;
|
|
4634
|
+
supervisor?.stop();
|
|
4635
|
+
}
|
|
4636
|
+
}
|
|
4637
|
+
else {
|
|
4638
|
+
// Single task or no pool - use standard sequential allocation
|
|
4639
|
+
const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
|
|
4640
|
+
settled = await Promise.allSettled(promises);
|
|
4641
|
+
}
|
|
4235
4642
|
// Convert settled results to SpawnResult array
|
|
4236
4643
|
const results = settled.map((result, i) => {
|
|
4237
4644
|
if (result.status === 'fulfilled') {
|
|
@@ -4939,8 +5346,19 @@ If the task is a simple question or doesn't need specialized handling, set bestA
|
|
|
4939
5346
|
this.unsubscribers = [];
|
|
4940
5347
|
// Flush trace collector before cleanup
|
|
4941
5348
|
await this.traceCollector?.flush();
|
|
4942
|
-
//
|
|
4943
|
-
|
|
5349
|
+
// Per-agent blackboard cleanup: release only this agent's claims and subscriptions
|
|
5350
|
+
// so parallel siblings don't lose their data. Only root agent clears everything.
|
|
5351
|
+
if (this.blackboard) {
|
|
5352
|
+
if (this.parentIterations > 0 && this.agentId) {
|
|
5353
|
+
// Subagent: release only our claims and subscriptions
|
|
5354
|
+
this.blackboard.releaseAll(this.agentId);
|
|
5355
|
+
this.blackboard.unsubscribeAgent(this.agentId);
|
|
5356
|
+
}
|
|
5357
|
+
else {
|
|
5358
|
+
// Root agent: full clear
|
|
5359
|
+
this.blackboard.clear();
|
|
5360
|
+
}
|
|
5361
|
+
}
|
|
4944
5362
|
// Wait for any pending init before cleanup
|
|
4945
5363
|
if (this.initPromises.length > 0) {
|
|
4946
5364
|
try {
|