attocode 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -1
- package/README.md +65 -5
- package/dist/src/adapters.d.ts.map +1 -1
- package/dist/src/adapters.js +15 -11
- package/dist/src/adapters.js.map +1 -1
- package/dist/src/agent.d.ts +38 -98
- package/dist/src/agent.d.ts.map +1 -1
- package/dist/src/agent.js +505 -2892
- package/dist/src/agent.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +2 -1
- package/dist/src/cli.js.map +1 -1
- package/dist/src/commands/handler.d.ts.map +1 -1
- package/dist/src/commands/handler.js +11 -3
- package/dist/src/commands/handler.js.map +1 -1
- package/dist/src/commands/init-commands.d.ts.map +1 -1
- package/dist/src/commands/init-commands.js +16 -1
- package/dist/src/commands/init-commands.js.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/init.js +31 -0
- package/dist/src/commands/init.js.map +1 -1
- package/dist/src/config/base-types.d.ts +45 -0
- package/dist/src/config/base-types.d.ts.map +1 -0
- package/dist/src/config/base-types.js +9 -0
- package/dist/src/config/base-types.js.map +1 -0
- package/dist/src/config/config-manager.d.ts +35 -0
- package/dist/src/config/config-manager.d.ts.map +1 -0
- package/dist/src/config/config-manager.js +108 -0
- package/dist/src/config/config-manager.js.map +1 -0
- package/dist/src/config/index.d.ts +4 -0
- package/dist/src/config/index.d.ts.map +1 -0
- package/dist/src/config/index.js +3 -0
- package/dist/src/config/index.js.map +1 -0
- package/dist/src/config/schema.d.ts +1546 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/config/schema.js +268 -0
- package/dist/src/config/schema.js.map +1 -0
- package/dist/src/config.d.ts +4 -1
- package/dist/src/config.d.ts.map +1 -1
- package/dist/src/config.js +8 -12
- package/dist/src/config.js.map +1 -1
- package/dist/src/core/agent-state-machine.d.ts +131 -0
- package/dist/src/core/agent-state-machine.d.ts.map +1 -0
- package/dist/src/core/agent-state-machine.js +302 -0
- package/dist/src/core/agent-state-machine.js.map +1 -0
- package/dist/src/core/base-manager.d.ts +79 -0
- package/dist/src/core/base-manager.d.ts.map +1 -0
- package/dist/src/core/base-manager.js +170 -0
- package/dist/src/core/base-manager.js.map +1 -0
- package/dist/src/core/completion-analyzer.d.ts +15 -0
- package/dist/src/core/completion-analyzer.d.ts.map +1 -0
- package/dist/src/core/completion-analyzer.js +53 -0
- package/dist/src/core/completion-analyzer.js.map +1 -0
- package/dist/src/core/execution-loop.d.ts +46 -0
- package/dist/src/core/execution-loop.d.ts.map +1 -0
- package/dist/src/core/execution-loop.js +1258 -0
- package/dist/src/core/execution-loop.js.map +1 -0
- package/dist/src/core/index.d.ts +7 -0
- package/dist/src/core/index.d.ts.map +1 -1
- package/dist/src/core/index.js +9 -0
- package/dist/src/core/index.js.map +1 -1
- package/dist/src/core/process-handlers.d.ts.map +1 -1
- package/dist/src/core/process-handlers.js +14 -0
- package/dist/src/core/process-handlers.js.map +1 -1
- package/dist/src/core/protocol/types.d.ts +12 -12
- package/dist/src/core/response-handler.d.ts +16 -0
- package/dist/src/core/response-handler.d.ts.map +1 -0
- package/dist/src/core/response-handler.js +234 -0
- package/dist/src/core/response-handler.js.map +1 -0
- package/dist/src/core/subagent-spawner.d.ts +43 -0
- package/dist/src/core/subagent-spawner.d.ts.map +1 -0
- package/dist/src/core/subagent-spawner.js +966 -0
- package/dist/src/core/subagent-spawner.js.map +1 -0
- package/dist/src/core/tool-executor.d.ts +59 -0
- package/dist/src/core/tool-executor.d.ts.map +1 -0
- package/dist/src/core/tool-executor.js +677 -0
- package/dist/src/core/tool-executor.js.map +1 -0
- package/dist/src/core/types.d.ts +133 -0
- package/dist/src/core/types.d.ts.map +1 -0
- package/dist/src/core/types.js +12 -0
- package/dist/src/core/types.js.map +1 -0
- package/dist/src/defaults.d.ts +2 -2
- package/dist/src/defaults.d.ts.map +1 -1
- package/dist/src/defaults.js +29 -1
- package/dist/src/defaults.js.map +1 -1
- package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
- package/dist/src/integrations/auto-compaction.js +3 -2
- package/dist/src/integrations/auto-compaction.js.map +1 -1
- package/dist/src/integrations/budget-pool.d.ts +7 -0
- package/dist/src/integrations/budget-pool.d.ts.map +1 -1
- package/dist/src/integrations/budget-pool.js +43 -0
- package/dist/src/integrations/budget-pool.js.map +1 -1
- package/dist/src/integrations/codebase-ast.d.ts +52 -0
- package/dist/src/integrations/codebase-ast.d.ts.map +1 -0
- package/dist/src/integrations/codebase-ast.js +457 -0
- package/dist/src/integrations/codebase-ast.js.map +1 -0
- package/dist/src/integrations/codebase-context.d.ts +18 -0
- package/dist/src/integrations/codebase-context.d.ts.map +1 -1
- package/dist/src/integrations/codebase-context.js +197 -17
- package/dist/src/integrations/codebase-context.js.map +1 -1
- package/dist/src/integrations/compaction.d.ts.map +1 -1
- package/dist/src/integrations/compaction.js +14 -6
- package/dist/src/integrations/compaction.js.map +1 -1
- package/dist/src/integrations/context-engineering.d.ts +8 -0
- package/dist/src/integrations/context-engineering.d.ts.map +1 -1
- package/dist/src/integrations/context-engineering.js +19 -0
- package/dist/src/integrations/context-engineering.js.map +1 -1
- package/dist/src/integrations/economics.d.ts +25 -1
- package/dist/src/integrations/economics.d.ts.map +1 -1
- package/dist/src/integrations/economics.js +217 -38
- package/dist/src/integrations/economics.js.map +1 -1
- package/dist/src/integrations/edit-validator.d.ts +30 -0
- package/dist/src/integrations/edit-validator.d.ts.map +1 -0
- package/dist/src/integrations/edit-validator.js +85 -0
- package/dist/src/integrations/edit-validator.js.map +1 -0
- package/dist/src/integrations/file-cache.d.ts +7 -0
- package/dist/src/integrations/file-cache.d.ts.map +1 -1
- package/dist/src/integrations/file-cache.js +54 -0
- package/dist/src/integrations/file-cache.js.map +1 -1
- package/dist/src/integrations/health-check.d.ts.map +1 -1
- package/dist/src/integrations/health-check.js +3 -2
- package/dist/src/integrations/health-check.js.map +1 -1
- package/dist/src/integrations/hierarchical-config.d.ts +3 -0
- package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
- package/dist/src/integrations/hierarchical-config.js +3 -0
- package/dist/src/integrations/hierarchical-config.js.map +1 -1
- package/dist/src/integrations/hooks.d.ts +2 -0
- package/dist/src/integrations/hooks.d.ts.map +1 -1
- package/dist/src/integrations/hooks.js +99 -15
- package/dist/src/integrations/hooks.js.map +1 -1
- package/dist/src/integrations/index.d.ts +7 -0
- package/dist/src/integrations/index.d.ts.map +1 -1
- package/dist/src/integrations/index.js +9 -1
- package/dist/src/integrations/index.js.map +1 -1
- package/dist/src/integrations/logger.d.ts +104 -0
- package/dist/src/integrations/logger.d.ts.map +1 -0
- package/dist/src/integrations/logger.js +219 -0
- package/dist/src/integrations/logger.js.map +1 -0
- package/dist/src/integrations/lsp.d.ts.map +1 -1
- package/dist/src/integrations/lsp.js +5 -4
- package/dist/src/integrations/lsp.js.map +1 -1
- package/dist/src/integrations/mcp-client.d.ts.map +1 -1
- package/dist/src/integrations/mcp-client.js +8 -7
- package/dist/src/integrations/mcp-client.js.map +1 -1
- package/dist/src/integrations/observability.d.ts.map +1 -1
- package/dist/src/integrations/observability.js +5 -4
- package/dist/src/integrations/observability.js.map +1 -1
- package/dist/src/integrations/openrouter-pricing.d.ts.map +1 -1
- package/dist/src/integrations/openrouter-pricing.js +4 -3
- package/dist/src/integrations/openrouter-pricing.js.map +1 -1
- package/dist/src/integrations/persistence.d.ts.map +1 -1
- package/dist/src/integrations/persistence.js +5 -4
- package/dist/src/integrations/persistence.js.map +1 -1
- package/dist/src/integrations/planning.d.ts.map +1 -1
- package/dist/src/integrations/planning.js +5 -4
- package/dist/src/integrations/planning.js.map +1 -1
- package/dist/src/integrations/retry.d.ts +1 -0
- package/dist/src/integrations/retry.d.ts.map +1 -1
- package/dist/src/integrations/retry.js.map +1 -1
- package/dist/src/integrations/routing.d.ts.map +1 -1
- package/dist/src/integrations/routing.js +2 -1
- package/dist/src/integrations/routing.js.map +1 -1
- package/dist/src/integrations/safety.d.ts.map +1 -1
- package/dist/src/integrations/safety.js +13 -13
- package/dist/src/integrations/safety.js.map +1 -1
- package/dist/src/integrations/sandbox/docker.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/docker.js +2 -1
- package/dist/src/integrations/sandbox/docker.js.map +1 -1
- package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/index.js +5 -4
- package/dist/src/integrations/sandbox/index.js.map +1 -1
- package/dist/src/integrations/session-store.d.ts +1 -0
- package/dist/src/integrations/session-store.d.ts.map +1 -1
- package/dist/src/integrations/session-store.js +1 -0
- package/dist/src/integrations/session-store.js.map +1 -1
- package/dist/src/integrations/shared-blackboard.d.ts +3 -0
- package/dist/src/integrations/shared-blackboard.d.ts.map +1 -1
- package/dist/src/integrations/shared-blackboard.js +47 -0
- package/dist/src/integrations/shared-blackboard.js.map +1 -1
- package/dist/src/integrations/smart-decomposer.d.ts +27 -0
- package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
- package/dist/src/integrations/smart-decomposer.js +414 -30
- package/dist/src/integrations/smart-decomposer.js.map +1 -1
- package/dist/src/integrations/sqlite-store.d.ts +2 -0
- package/dist/src/integrations/sqlite-store.d.ts.map +1 -1
- package/dist/src/integrations/sqlite-store.js +18 -6
- package/dist/src/integrations/sqlite-store.js.map +1 -1
- package/dist/src/integrations/swarm/failure-classifier.d.ts +11 -0
- package/dist/src/integrations/swarm/failure-classifier.d.ts.map +1 -0
- package/dist/src/integrations/swarm/failure-classifier.js +95 -0
- package/dist/src/integrations/swarm/failure-classifier.js.map +1 -0
- package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
- package/dist/src/integrations/swarm/model-selector.js +2 -1
- package/dist/src/integrations/swarm/model-selector.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts +8 -0
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.js +95 -0
- package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +74 -0
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.js +37 -0
- package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts +3 -0
- package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.js +1 -1
- package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +23 -0
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.js +530 -55
- package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-state-store.d.ts +4 -1
- package/dist/src/integrations/swarm/swarm-state-store.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-state-store.js +8 -1
- package/dist/src/integrations/swarm/swarm-state-store.js.map +1 -1
- package/dist/src/integrations/swarm/task-queue.d.ts +10 -0
- package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
- package/dist/src/integrations/swarm/task-queue.js +36 -1
- package/dist/src/integrations/swarm/task-queue.js.map +1 -1
- package/dist/src/integrations/swarm/types.d.ts +41 -0
- package/dist/src/integrations/swarm/types.d.ts.map +1 -1
- package/dist/src/integrations/swarm/types.js +9 -0
- package/dist/src/integrations/swarm/types.js.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.d.ts +12 -2
- package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.js +53 -4
- package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
- package/dist/src/integrations/task-manager.d.ts +33 -1
- package/dist/src/integrations/task-manager.d.ts.map +1 -1
- package/dist/src/integrations/task-manager.js +78 -4
- package/dist/src/integrations/task-manager.js.map +1 -1
- package/dist/src/main.js +83 -32
- package/dist/src/main.js.map +1 -1
- package/dist/src/modes/repl.d.ts.map +1 -1
- package/dist/src/modes/repl.js +40 -8
- package/dist/src/modes/repl.js.map +1 -1
- package/dist/src/modes/tui.d.ts.map +1 -1
- package/dist/src/modes/tui.js +36 -6
- package/dist/src/modes/tui.js.map +1 -1
- package/dist/src/observability/tracer.d.ts.map +1 -1
- package/dist/src/observability/tracer.js +2 -1
- package/dist/src/observability/tracer.js.map +1 -1
- package/dist/src/persistence/schema.d.ts.map +1 -1
- package/dist/src/persistence/schema.js +11 -0
- package/dist/src/persistence/schema.js.map +1 -1
- package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
- package/dist/src/providers/adapters/anthropic.js +3 -2
- package/dist/src/providers/adapters/anthropic.js.map +1 -1
- package/dist/src/providers/adapters/openai.d.ts.map +1 -1
- package/dist/src/providers/adapters/openai.js +3 -2
- package/dist/src/providers/adapters/openai.js.map +1 -1
- package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
- package/dist/src/providers/adapters/openrouter.js +11 -11
- package/dist/src/providers/adapters/openrouter.js.map +1 -1
- package/dist/src/providers/circuit-breaker.d.ts +1 -0
- package/dist/src/providers/circuit-breaker.d.ts.map +1 -1
- package/dist/src/providers/circuit-breaker.js.map +1 -1
- package/dist/src/providers/provider.d.ts.map +1 -1
- package/dist/src/providers/provider.js +2 -1
- package/dist/src/providers/provider.js.map +1 -1
- package/dist/src/providers/resilient-provider.d.ts.map +1 -1
- package/dist/src/providers/resilient-provider.js +2 -1
- package/dist/src/providers/resilient-provider.js.map +1 -1
- package/dist/src/session-picker.d.ts.map +1 -1
- package/dist/src/session-picker.js +40 -5
- package/dist/src/session-picker.js.map +1 -1
- package/dist/src/shared/budget-tracker.d.ts +65 -0
- package/dist/src/shared/budget-tracker.d.ts.map +1 -0
- package/dist/src/shared/budget-tracker.js +128 -0
- package/dist/src/shared/budget-tracker.js.map +1 -0
- package/dist/src/shared/context-engine.d.ts +64 -0
- package/dist/src/shared/context-engine.d.ts.map +1 -0
- package/dist/src/shared/context-engine.js +117 -0
- package/dist/src/shared/context-engine.js.map +1 -0
- package/dist/src/shared/index.d.ts +12 -0
- package/dist/src/shared/index.d.ts.map +1 -0
- package/dist/src/shared/index.js +12 -0
- package/dist/src/shared/index.js.map +1 -0
- package/dist/src/shared/persistence.d.ts +57 -0
- package/dist/src/shared/persistence.d.ts.map +1 -0
- package/dist/src/shared/persistence.js +168 -0
- package/dist/src/shared/persistence.js.map +1 -0
- package/dist/src/shared/shared-context-state.d.ts +89 -0
- package/dist/src/shared/shared-context-state.d.ts.map +1 -0
- package/dist/src/shared/shared-context-state.js +175 -0
- package/dist/src/shared/shared-context-state.js.map +1 -0
- package/dist/src/shared/shared-economics-state.d.ts +61 -0
- package/dist/src/shared/shared-economics-state.d.ts.map +1 -0
- package/dist/src/shared/shared-economics-state.js +100 -0
- package/dist/src/shared/shared-economics-state.js.map +1 -0
- package/dist/src/tools/bash.d.ts +3 -3
- package/dist/src/tools/bash.d.ts.map +1 -1
- package/dist/src/tools/bash.js +2 -1
- package/dist/src/tools/bash.js.map +1 -1
- package/dist/src/tools/file.d.ts +3 -3
- package/dist/src/tools/permission.d.ts.map +1 -1
- package/dist/src/tools/permission.js +6 -5
- package/dist/src/tools/permission.js.map +1 -1
- package/dist/src/tools/types.d.ts +1 -0
- package/dist/src/tools/types.d.ts.map +1 -1
- package/dist/src/tools/types.js.map +1 -1
- package/dist/src/tracing/trace-collector.d.ts +125 -0
- package/dist/src/tracing/trace-collector.d.ts.map +1 -1
- package/dist/src/tracing/trace-collector.js +112 -5
- package/dist/src/tracing/trace-collector.js.map +1 -1
- package/dist/src/tracing/types.d.ts +96 -1
- package/dist/src/tracing/types.d.ts.map +1 -1
- package/dist/src/tracing/types.js.map +1 -1
- package/dist/src/tricks/failure-evidence.d.ts.map +1 -1
- package/dist/src/tricks/failure-evidence.js +2 -1
- package/dist/src/tricks/failure-evidence.js.map +1 -1
- package/dist/src/tui/app.d.ts +13 -0
- package/dist/src/tui/app.d.ts.map +1 -1
- package/dist/src/tui/app.js +129 -15
- package/dist/src/tui/app.js.map +1 -1
- package/dist/src/tui/components/ErrorBoundary.d.ts.map +1 -1
- package/dist/src/tui/components/ErrorBoundary.js +3 -2
- package/dist/src/tui/components/ErrorBoundary.js.map +1 -1
- package/dist/src/tui/event-display.d.ts.map +1 -1
- package/dist/src/tui/event-display.js +36 -62
- package/dist/src/tui/event-display.js.map +1 -1
- package/dist/src/tui/index.d.ts +4 -0
- package/dist/src/tui/index.d.ts.map +1 -1
- package/dist/src/tui/index.js +17 -0
- package/dist/src/tui/index.js.map +1 -1
- package/dist/src/types.d.ts +143 -1
- package/dist/src/types.d.ts.map +1 -1
- package/package.json +18 -3
package/dist/src/agent.js
CHANGED
|
@@ -18,20 +18,16 @@
|
|
|
18
18
|
* - Execution Policies (Lesson 23)
|
|
19
19
|
* - Thread Management (Lesson 24)
|
|
20
20
|
*/
|
|
21
|
-
import
|
|
22
|
-
import {
|
|
21
|
+
import * as path from 'node:path';
|
|
22
|
+
import { buildConfig, isFeatureEnabled, getEnabledFeatures, } from './defaults.js';
|
|
23
|
+
import { createModeManager, formatModeList, parseMode, } from './modes.js';
|
|
23
24
|
import { createLSPFileTools, } from './agent-tools/index.js';
|
|
24
|
-
import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET,
|
|
25
|
-
|
|
26
|
-
classifyComplexity, getScalingGuidance, buildDelegationPrompt, createMinimalDelegationSpec, getSubagentQualityPrompt, ToolRecommendationEngine, createToolRecommendationEngine, createInjectionBudgetManager,
|
|
27
|
-
// Phase 3: Advanced
|
|
28
|
-
getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, createSubagentSupervisor, createSubagentHandle, } from './integrations/index.js';
|
|
29
|
-
import { mergeApprovalScopeWithProfile, resolvePolicyProfile, } from './integrations/policy-engine.js';
|
|
30
|
-
// Lesson 26: Tracing & Evaluation integration
|
|
25
|
+
import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, AgentRegistry, formatAgentList, createCancellationManager, isCancellationError, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, createCodebaseContext, buildContextFromChunks, generateLightweightRepoMap, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate, classifyComplexity, getScalingGuidance, createToolRecommendationEngine, createInjectionBudgetManager, getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, } from './integrations/index.js';
|
|
26
|
+
import { resolvePolicyProfile, } from './integrations/policy-engine.js';
|
|
31
27
|
import { createTraceCollector } from './tracing/trace-collector.js';
|
|
32
|
-
// Model registry for context window limits
|
|
33
28
|
import { modelRegistry } from './costs/index.js';
|
|
34
29
|
import { getModelContextLength } from './integrations/openrouter-pricing.js';
|
|
30
|
+
import { createComponentLogger } from './integrations/logger.js';
|
|
35
31
|
// Spawn agent tools for LLM-driven subagent delegation
|
|
36
32
|
import { createBoundSpawnAgentTool, createBoundSpawnAgentsParallelTool, } from './tools/agent.js';
|
|
37
33
|
// Task tools for Claude Code-style task management
|
|
@@ -43,115 +39,15 @@ import { createTaskTools, } from './tools/tasks.js';
|
|
|
43
39
|
* Tools that are safe to execute in parallel (read-only, no side effects).
|
|
44
40
|
* These tools don't modify state, so running them concurrently is safe.
|
|
45
41
|
*/
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
'write_file', 'edit_file',
|
|
56
|
-
]);
|
|
57
|
-
/**
|
|
58
|
-
* Extract the target file path from a tool call's arguments.
|
|
59
|
-
* Returns null if no file path can be determined.
|
|
60
|
-
*/
|
|
61
|
-
export function extractToolFilePath(toolCall) {
|
|
62
|
-
// Check common argument patterns
|
|
63
|
-
const args = toolCall;
|
|
64
|
-
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
65
|
-
if (typeof args[key] === 'string')
|
|
66
|
-
return args[key];
|
|
67
|
-
}
|
|
68
|
-
// Check nested args object
|
|
69
|
-
if (args.args && typeof args.args === 'object') {
|
|
70
|
-
const nested = args.args;
|
|
71
|
-
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
72
|
-
if (typeof nested[key] === 'string')
|
|
73
|
-
return nested[key];
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
// Check input object (common in structured tool calls)
|
|
77
|
-
if (args.input && typeof args.input === 'object') {
|
|
78
|
-
const input = args.input;
|
|
79
|
-
for (const key of ['path', 'file_path', 'filename', 'file']) {
|
|
80
|
-
if (typeof input[key] === 'string')
|
|
81
|
-
return input[key];
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return null;
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* Check if a conditionally-parallel tool call conflicts with any tool
|
|
88
|
-
* in the current accumulator (same file path).
|
|
89
|
-
*/
|
|
90
|
-
function hasFileConflict(toolCall, accumulator) {
|
|
91
|
-
const path = extractToolFilePath(toolCall);
|
|
92
|
-
if (!path)
|
|
93
|
-
return true; // Can't determine path → assume conflict
|
|
94
|
-
for (const existing of accumulator) {
|
|
95
|
-
const existingPath = extractToolFilePath(existing);
|
|
96
|
-
if (existingPath === path)
|
|
97
|
-
return true; // Same file → conflict
|
|
98
|
-
}
|
|
99
|
-
return false;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Groups tool calls into batches for parallel/sequential execution.
|
|
103
|
-
* Uses accumulate-and-flush: parallelizable tools accumulate until a
|
|
104
|
-
* non-parallelizable tool flushes them as a batch. This produces optimal
|
|
105
|
-
* batching even for non-consecutive parallelizable tools.
|
|
106
|
-
*
|
|
107
|
-
* Enhanced with conditional parallelism: write_file/edit_file on
|
|
108
|
-
* DIFFERENT files can be batched together for parallel execution.
|
|
109
|
-
*
|
|
110
|
-
* Example: [read1, read2, write, read3, grep] → [[read1, read2], [write], [read3, grep]]
|
|
111
|
-
* (Previous algorithm produced 4 batches; this produces 3)
|
|
112
|
-
*
|
|
113
|
-
* Enhanced: [write_a, write_b, write_a] → [[write_a, write_b], [write_a]]
|
|
114
|
-
* (Different files parallelized, same file sequential)
|
|
115
|
-
*/
|
|
116
|
-
export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name), isConditionallyParallel = (tc) => CONDITIONALLY_PARALLEL_TOOLS.has(tc.name)) {
|
|
117
|
-
if (toolCalls.length === 0)
|
|
118
|
-
return [];
|
|
119
|
-
const batches = [];
|
|
120
|
-
let parallelAccum = [];
|
|
121
|
-
for (const toolCall of toolCalls) {
|
|
122
|
-
if (isParallelizable(toolCall)) {
|
|
123
|
-
parallelAccum.push(toolCall);
|
|
124
|
-
}
|
|
125
|
-
else if (isConditionallyParallel(toolCall)) {
|
|
126
|
-
// Can parallelize if no file conflict with existing accumulator
|
|
127
|
-
if (!hasFileConflict(toolCall, parallelAccum)) {
|
|
128
|
-
parallelAccum.push(toolCall);
|
|
129
|
-
}
|
|
130
|
-
else {
|
|
131
|
-
// Conflict: flush current batch, start new one with this tool
|
|
132
|
-
if (parallelAccum.length > 0) {
|
|
133
|
-
batches.push(parallelAccum);
|
|
134
|
-
parallelAccum = [];
|
|
135
|
-
}
|
|
136
|
-
parallelAccum.push(toolCall);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
else {
|
|
140
|
-
// Flush any accumulated parallel tools as a single batch
|
|
141
|
-
if (parallelAccum.length > 0) {
|
|
142
|
-
batches.push(parallelAccum);
|
|
143
|
-
parallelAccum = [];
|
|
144
|
-
}
|
|
145
|
-
// Non-parallelizable tool gets its own batch
|
|
146
|
-
batches.push([toolCall]);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
// Flush remaining parallel tools
|
|
150
|
-
if (parallelAccum.length > 0) {
|
|
151
|
-
batches.push(parallelAccum);
|
|
152
|
-
}
|
|
153
|
-
return batches;
|
|
154
|
-
}
|
|
42
|
+
const log = createComponentLogger('ProductionAgent');
|
|
43
|
+
// Tool-batching constants (canonical home: core/tool-executor.ts)
|
|
44
|
+
import { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches, } from './core/index.js';
|
|
45
|
+
export { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches };
|
|
46
|
+
// Extracted core modules (Phase 2.1 — thin orchestrator delegates)
|
|
47
|
+
import { executeDirectly as coreExecuteDirectly, spawnAgent as coreSpawnAgent, spawnAgentsParallel as coreSpawnAgentsParallel, } from './core/index.js';
|
|
48
|
+
// Phase 2.2: Agent State Machine
|
|
49
|
+
import { createAgentStateMachine } from './core/agent-state-machine.js';
|
|
50
|
+
import { detectIncompleteActionResponse } from './core/completion-analyzer.js';
|
|
155
51
|
/**
|
|
156
52
|
* Production-ready agent that composes all features.
|
|
157
53
|
*/
|
|
@@ -195,6 +91,8 @@ export class ProductionAgent {
|
|
|
195
91
|
agentId;
|
|
196
92
|
blackboard = null;
|
|
197
93
|
fileCache = null;
|
|
94
|
+
_sharedContextState = null;
|
|
95
|
+
_sharedEconomicsState = null;
|
|
198
96
|
budgetPool = null;
|
|
199
97
|
taskManager = null;
|
|
200
98
|
store = null;
|
|
@@ -207,11 +105,13 @@ export class ProductionAgent {
|
|
|
207
105
|
subagentOutputStore = null;
|
|
208
106
|
autoCheckpointManager = null;
|
|
209
107
|
toolRecommendation = null;
|
|
108
|
+
stateMachine = null;
|
|
210
109
|
lastComplexityAssessment = null;
|
|
110
|
+
lastSystemPromptLength = 0;
|
|
211
111
|
// Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
|
|
212
112
|
// Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
|
|
213
113
|
spawnedTasks = new Map();
|
|
214
|
-
|
|
114
|
+
// SPAWN_DEDUP_WINDOW_MS moved to core/subagent-spawner.ts
|
|
215
115
|
// Parent iteration tracking for total budget calculation
|
|
216
116
|
parentIterations = 0;
|
|
217
117
|
// External cancellation token (for subagent timeout propagation)
|
|
@@ -301,6 +201,9 @@ export class ProductionAgent {
|
|
|
301
201
|
const parentBudgetTokens = baseBudget.maxTokens ?? STANDARD_BUDGET.maxTokens ?? 200000;
|
|
302
202
|
this.budgetPool = createBudgetPool(parentBudgetTokens, 0.25, 100000);
|
|
303
203
|
}
|
|
204
|
+
// Shared state for swarm workers (passed from orchestrator via config)
|
|
205
|
+
this._sharedContextState = userConfig.sharedContextState ?? null;
|
|
206
|
+
this._sharedEconomicsState = userConfig.sharedEconomicsState ?? null;
|
|
304
207
|
// Initialize enabled features
|
|
305
208
|
this.initializeFeatures();
|
|
306
209
|
}
|
|
@@ -311,7 +214,7 @@ export class ProductionAgent {
|
|
|
311
214
|
// Debug output only when DEBUG env var is set
|
|
312
215
|
if (process.env.DEBUG) {
|
|
313
216
|
const features = getEnabledFeatures(this.config);
|
|
314
|
-
|
|
217
|
+
log.debug('Initializing with features', { features: features.join(', ') });
|
|
315
218
|
}
|
|
316
219
|
// Hooks & Plugins
|
|
317
220
|
if (isFeatureEnabled(this.config.hooks) && isFeatureEnabled(this.config.plugins)) {
|
|
@@ -415,7 +318,7 @@ export class ProductionAgent {
|
|
|
415
318
|
});
|
|
416
319
|
// Load rules asynchronously - tracked for ensureReady()
|
|
417
320
|
this.initPromises.push(this.rules.loadRules().catch(err => {
|
|
418
|
-
|
|
321
|
+
log.warn('Failed to load rules', { error: String(err) });
|
|
419
322
|
}));
|
|
420
323
|
}
|
|
421
324
|
// Economics System (Token Budget) - always enabled
|
|
@@ -426,7 +329,24 @@ export class ProductionAgent {
|
|
|
426
329
|
// Use maxIterations from config as absolute safety cap
|
|
427
330
|
maxIterations: this.config.maxIterations,
|
|
428
331
|
targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
|
|
332
|
+
}, this._sharedEconomicsState ?? undefined, this.agentId);
|
|
333
|
+
// Phase 2.2: Agent State Machine - formalizes phase tracking
|
|
334
|
+
// Always enabled - provides structured phase transitions with metrics
|
|
335
|
+
this.stateMachine = createAgentStateMachine();
|
|
336
|
+
// Forward state machine phase transitions as subagent.phase events
|
|
337
|
+
const phaseMap = {
|
|
338
|
+
exploring: 'exploring', planning: 'planning', acting: 'executing', verifying: 'completing',
|
|
339
|
+
};
|
|
340
|
+
const unsubStateMachine = this.stateMachine.subscribe(event => {
|
|
341
|
+
if (event.type === 'phase.changed') {
|
|
342
|
+
this.emit({
|
|
343
|
+
type: 'subagent.phase',
|
|
344
|
+
agentId: this.agentId,
|
|
345
|
+
phase: phaseMap[event.transition.to] ?? 'exploring',
|
|
346
|
+
});
|
|
347
|
+
}
|
|
429
348
|
});
|
|
349
|
+
this.unsubscribers.push(unsubStateMachine);
|
|
430
350
|
// Work Log - compaction-resilient summary of agent work
|
|
431
351
|
// Always enabled - minimal overhead and critical for long-running tasks
|
|
432
352
|
this.workLog = createWorkLog();
|
|
@@ -444,7 +364,7 @@ export class ProductionAgent {
|
|
|
444
364
|
this.agentRegistry = new AgentRegistry();
|
|
445
365
|
// Load user agents asynchronously - tracked for ensureReady()
|
|
446
366
|
this.initPromises.push(this.agentRegistry.loadUserAgents().catch(err => {
|
|
447
|
-
|
|
367
|
+
log.warn('Failed to load user agents', { error: String(err) });
|
|
448
368
|
}));
|
|
449
369
|
// Register spawn_agent tool so LLM can delegate to subagents
|
|
450
370
|
const boundSpawnTool = createBoundSpawnAgentTool((name, task, constraints) => this.spawnAgent(name, task, constraints));
|
|
@@ -493,11 +413,16 @@ export class ProductionAgent {
|
|
|
493
413
|
: swarmConfig.throttle;
|
|
494
414
|
this.provider = createThrottledProvider(this.provider, throttleConfig);
|
|
495
415
|
}
|
|
416
|
+
// Pass codebaseContext so the decomposer can ground tasks in actual project files
|
|
417
|
+
swarmConfig.codebaseContext = this.codebaseContext ?? undefined;
|
|
496
418
|
this.swarmOrchestrator = createSwarmOrchestrator(swarmConfig, this.provider, this.agentRegistry, (name, task) => this.spawnAgent(name, task), this.blackboard ?? undefined);
|
|
497
419
|
// Override parent budget pool with swarm's much larger pool so spawnAgent()
|
|
498
420
|
// allocates from the swarm budget (e.g. 10M tokens) instead of the parent's
|
|
499
421
|
// generic pool (200K tokens). Without this, workers get 5K emergency budget.
|
|
500
422
|
this.budgetPool = this.swarmOrchestrator.getBudgetPool().pool;
|
|
423
|
+
// Phase 3.1+3.2: Set shared state so workers inherit it via buildContext()
|
|
424
|
+
this._sharedContextState = this.swarmOrchestrator.getSharedContextState();
|
|
425
|
+
this._sharedEconomicsState = this.swarmOrchestrator.getSharedEconomicsState();
|
|
501
426
|
}
|
|
502
427
|
// Cancellation Support
|
|
503
428
|
if (isFeatureEnabled(this.config.cancellation)) {
|
|
@@ -565,7 +490,7 @@ export class ProductionAgent {
|
|
|
565
490
|
this.initPromises.push(this.skillManager.loadSkills()
|
|
566
491
|
.then(() => { }) // Convert to void
|
|
567
492
|
.catch(err => {
|
|
568
|
-
|
|
493
|
+
log.warn('Failed to load skills', { error: String(err) });
|
|
569
494
|
}));
|
|
570
495
|
}
|
|
571
496
|
// Context Engineering (Manus-inspired tricks P, Q, R, S, T)
|
|
@@ -581,6 +506,10 @@ export class ProductionAgent {
|
|
|
581
506
|
maxFailures: 30,
|
|
582
507
|
maxReferences: 50,
|
|
583
508
|
});
|
|
509
|
+
// Bind shared context state for cross-worker failure learning (swarm workers only)
|
|
510
|
+
if (this._sharedContextState) {
|
|
511
|
+
this.contextEngineering.setSharedState(this._sharedContextState);
|
|
512
|
+
}
|
|
584
513
|
// Codebase Context - intelligent code selection for context management
|
|
585
514
|
// Analyzes repo structure and selects relevant code within token budgets
|
|
586
515
|
if (this.config.codebaseContext !== false) {
|
|
@@ -597,6 +526,10 @@ export class ProductionAgent {
|
|
|
597
526
|
cacheResults: true,
|
|
598
527
|
cacheTTL: 5 * 60 * 1000, // 5 minutes
|
|
599
528
|
});
|
|
529
|
+
// Forward trace collector so codebase analysis can emit codebase.map entries.
|
|
530
|
+
if (this.traceCollector) {
|
|
531
|
+
this.codebaseContext.traceCollector = this.traceCollector;
|
|
532
|
+
}
|
|
600
533
|
// Connect LSP manager to codebase context for enhanced code selection
|
|
601
534
|
// This enables LSP-based relevance boosting (Phase 4.1)
|
|
602
535
|
if (this.lspManager) {
|
|
@@ -951,6 +884,7 @@ export class ProductionAgent {
|
|
|
951
884
|
async run(task) {
|
|
952
885
|
// Ensure all integrations are ready before running
|
|
953
886
|
await this.ensureReady();
|
|
887
|
+
this.reconcileStaleTasks('run_start');
|
|
954
888
|
const startTime = Date.now();
|
|
955
889
|
// Create cancellation context if enabled
|
|
956
890
|
const cancellationConfig = isFeatureEnabled(this.config.cancellation) ? this.config.cancellation : null;
|
|
@@ -958,6 +892,7 @@ export class ProductionAgent {
|
|
|
958
892
|
// Start tracing
|
|
959
893
|
const traceId = this.observability?.tracer?.startTrace('agent.run') || `trace-${Date.now()}`;
|
|
960
894
|
this.emit({ type: 'start', task, traceId });
|
|
895
|
+
this.emit({ type: 'run.before', task });
|
|
961
896
|
this.observability?.logger?.info('Agent started', { task });
|
|
962
897
|
// Lesson 26: Start trace capture
|
|
963
898
|
// If session is already active (managed by REPL), start a task within it.
|
|
@@ -977,6 +912,12 @@ export class ProductionAgent {
|
|
|
977
912
|
await this.traceCollector?.startSession(traceSessionId, task, this.config.model || 'default', sessionMetadata);
|
|
978
913
|
}
|
|
979
914
|
try {
|
|
915
|
+
let runSuccess = true;
|
|
916
|
+
let runFailureReason;
|
|
917
|
+
let completion = {
|
|
918
|
+
success: true,
|
|
919
|
+
reason: 'completed',
|
|
920
|
+
};
|
|
980
921
|
// Check for cancellation before starting
|
|
981
922
|
cancellationToken?.throwIfCancellationRequested();
|
|
982
923
|
// Classify task complexity for scaling guidance
|
|
@@ -986,6 +927,27 @@ export class ProductionAgent {
|
|
|
986
927
|
// Check if swarm mode should handle this task
|
|
987
928
|
if (this.swarmOrchestrator) {
|
|
988
929
|
const swarmResult = await this.runSwarm(task);
|
|
930
|
+
if (!swarmResult.success) {
|
|
931
|
+
runSuccess = false;
|
|
932
|
+
runFailureReason = swarmResult.summary || 'Swarm reported unsuccessful execution';
|
|
933
|
+
completion = {
|
|
934
|
+
success: false,
|
|
935
|
+
reason: 'swarm_failure',
|
|
936
|
+
details: runFailureReason,
|
|
937
|
+
};
|
|
938
|
+
}
|
|
939
|
+
// Guard against summaries that still indicate pending work.
|
|
940
|
+
if (detectIncompleteActionResponse(swarmResult.summary || '')) {
|
|
941
|
+
this.emit({ type: 'completion.before', reason: 'future_intent' });
|
|
942
|
+
runSuccess = false;
|
|
943
|
+
runFailureReason = 'Swarm summary indicates pending, unexecuted work';
|
|
944
|
+
completion = {
|
|
945
|
+
success: false,
|
|
946
|
+
reason: 'future_intent',
|
|
947
|
+
details: runFailureReason,
|
|
948
|
+
futureIntentDetected: true,
|
|
949
|
+
};
|
|
950
|
+
}
|
|
989
951
|
// Store swarm summary as an assistant message for the response
|
|
990
952
|
this.state.messages.push({ role: 'assistant', content: swarmResult.summary });
|
|
991
953
|
}
|
|
@@ -994,7 +956,17 @@ export class ProductionAgent {
|
|
|
994
956
|
await this.createAndExecutePlan(task);
|
|
995
957
|
}
|
|
996
958
|
else {
|
|
997
|
-
await this.executeDirectly(task);
|
|
959
|
+
const directResult = await this.executeDirectly(task);
|
|
960
|
+
if (!directResult.success) {
|
|
961
|
+
runSuccess = false;
|
|
962
|
+
runFailureReason = directResult.failureReason || directResult.terminationReason;
|
|
963
|
+
}
|
|
964
|
+
completion = {
|
|
965
|
+
success: directResult.success,
|
|
966
|
+
reason: directResult.terminationReason,
|
|
967
|
+
...(directResult.failureReason ? { details: directResult.failureReason } : {}),
|
|
968
|
+
...(directResult.openTasks ? { openTasks: directResult.openTasks } : {}),
|
|
969
|
+
};
|
|
998
970
|
}
|
|
999
971
|
// Get final response - find the LAST assistant message (not just check if last message is assistant)
|
|
1000
972
|
const assistantMessages = this.state.messages.filter(m => m.role === 'assistant');
|
|
@@ -1002,28 +974,101 @@ export class ProductionAgent {
|
|
|
1002
974
|
const response = typeof lastAssistantMessage?.content === 'string'
|
|
1003
975
|
? lastAssistantMessage.content
|
|
1004
976
|
: '';
|
|
977
|
+
// Final guardrail: never mark a run successful if the final answer is "I'll do X".
|
|
978
|
+
if (runSuccess && detectIncompleteActionResponse(response)) {
|
|
979
|
+
this.emit({ type: 'completion.before', reason: 'future_intent' });
|
|
980
|
+
runSuccess = false;
|
|
981
|
+
runFailureReason = 'Final response indicates pending, unexecuted work';
|
|
982
|
+
completion = {
|
|
983
|
+
success: false,
|
|
984
|
+
reason: 'future_intent',
|
|
985
|
+
details: runFailureReason,
|
|
986
|
+
futureIntentDetected: true,
|
|
987
|
+
};
|
|
988
|
+
}
|
|
989
|
+
if (runSuccess && completion.reason === 'completed') {
|
|
990
|
+
this.reconcileStaleTasks('run_end');
|
|
991
|
+
const openTasks = this.getOpenTasksSummary();
|
|
992
|
+
if (openTasks && (openTasks.inProgress > 0 || openTasks.pending > 0)) {
|
|
993
|
+
this.emit({ type: 'completion.before', reason: 'open_tasks' });
|
|
994
|
+
runSuccess = false;
|
|
995
|
+
runFailureReason = `Open tasks remain: ${openTasks.pending} pending, ${openTasks.inProgress} in_progress`;
|
|
996
|
+
completion = {
|
|
997
|
+
success: false,
|
|
998
|
+
reason: 'open_tasks',
|
|
999
|
+
details: runFailureReason,
|
|
1000
|
+
openTasks,
|
|
1001
|
+
};
|
|
1002
|
+
this.emit({
|
|
1003
|
+
type: 'completion.blocked',
|
|
1004
|
+
reasons: [
|
|
1005
|
+
runFailureReason,
|
|
1006
|
+
openTasks.blocked > 0 ? `${openTasks.blocked} pending tasks are blocked` : '',
|
|
1007
|
+
].filter(Boolean),
|
|
1008
|
+
openTasks,
|
|
1009
|
+
diagnostics: {
|
|
1010
|
+
forceTextOnly: false,
|
|
1011
|
+
availableTasks: this.taskManager?.getAvailableTasks().length ?? 0,
|
|
1012
|
+
pendingWithOwner: 0,
|
|
1013
|
+
},
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1005
1017
|
// Finalize
|
|
1006
1018
|
const duration = Date.now() - startTime;
|
|
1007
1019
|
this.state.metrics.duration = duration;
|
|
1008
|
-
|
|
1020
|
+
if (runSuccess) {
|
|
1021
|
+
this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
|
|
1022
|
+
}
|
|
1023
|
+
else {
|
|
1024
|
+
this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
|
|
1025
|
+
}
|
|
1009
1026
|
await this.observability?.tracer?.endTrace();
|
|
1010
1027
|
const result = {
|
|
1011
|
-
success:
|
|
1028
|
+
success: runSuccess,
|
|
1012
1029
|
response,
|
|
1030
|
+
...(runSuccess ? {} : { error: runFailureReason ?? 'Task failed' }),
|
|
1013
1031
|
metrics: this.getMetrics(),
|
|
1014
1032
|
messages: this.state.messages,
|
|
1015
1033
|
traceId,
|
|
1016
1034
|
plan: this.state.plan,
|
|
1035
|
+
completion,
|
|
1036
|
+
};
|
|
1037
|
+
result.completion.recovery = {
|
|
1038
|
+
intraRunRetries: this.state.metrics.retryCount ?? 0,
|
|
1039
|
+
autoLoopRuns: 0,
|
|
1040
|
+
terminal: !runSuccess,
|
|
1041
|
+
reasonChain: [completion.reason],
|
|
1017
1042
|
};
|
|
1018
1043
|
this.emit({ type: 'complete', result });
|
|
1019
|
-
this.
|
|
1044
|
+
this.emit({
|
|
1045
|
+
type: 'completion.after',
|
|
1046
|
+
success: runSuccess,
|
|
1047
|
+
reason: completion.reason,
|
|
1048
|
+
...(completion.details ? { details: completion.details } : {}),
|
|
1049
|
+
});
|
|
1050
|
+
this.emit({
|
|
1051
|
+
type: 'run.after',
|
|
1052
|
+
success: runSuccess,
|
|
1053
|
+
reason: completion.reason,
|
|
1054
|
+
...(completion.details ? { details: completion.details } : {}),
|
|
1055
|
+
});
|
|
1056
|
+
this.observability?.logger?.info('Agent completed', {
|
|
1057
|
+
duration,
|
|
1058
|
+
success: runSuccess,
|
|
1059
|
+
...(runFailureReason ? { failureReason: runFailureReason } : {}),
|
|
1060
|
+
});
|
|
1020
1061
|
// Lesson 26: End trace capture
|
|
1021
1062
|
// If task is active (REPL mode), end the task. Otherwise end the session (single-task mode).
|
|
1022
1063
|
if (this.traceCollector?.isTaskActive()) {
|
|
1023
|
-
await this.traceCollector.endTask(
|
|
1064
|
+
await this.traceCollector.endTask(runSuccess
|
|
1065
|
+
? { success: true, output: response }
|
|
1066
|
+
: { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
|
|
1024
1067
|
}
|
|
1025
1068
|
else if (this.traceCollector?.isSessionActive()) {
|
|
1026
|
-
await this.traceCollector.endSession(
|
|
1069
|
+
await this.traceCollector.endSession(runSuccess
|
|
1070
|
+
? { success: true, output: response }
|
|
1071
|
+
: { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
|
|
1027
1072
|
}
|
|
1028
1073
|
return result;
|
|
1029
1074
|
}
|
|
@@ -1044,6 +1089,18 @@ export class ProductionAgent {
|
|
|
1044
1089
|
else if (this.traceCollector?.isSessionActive()) {
|
|
1045
1090
|
await this.traceCollector.endSession({ success: false, failureReason: `Cancelled: ${error.message}` });
|
|
1046
1091
|
}
|
|
1092
|
+
this.emit({
|
|
1093
|
+
type: 'completion.after',
|
|
1094
|
+
success: false,
|
|
1095
|
+
reason: 'cancelled',
|
|
1096
|
+
details: `Cancelled: ${error.message}`,
|
|
1097
|
+
});
|
|
1098
|
+
this.emit({
|
|
1099
|
+
type: 'run.after',
|
|
1100
|
+
success: false,
|
|
1101
|
+
reason: 'cancelled',
|
|
1102
|
+
details: `Cancelled: ${error.message}`,
|
|
1103
|
+
});
|
|
1047
1104
|
return {
|
|
1048
1105
|
success: false,
|
|
1049
1106
|
response: '',
|
|
@@ -1051,6 +1108,11 @@ export class ProductionAgent {
|
|
|
1051
1108
|
metrics: this.getMetrics(),
|
|
1052
1109
|
messages: this.state.messages,
|
|
1053
1110
|
traceId,
|
|
1111
|
+
completion: {
|
|
1112
|
+
success: false,
|
|
1113
|
+
reason: 'cancelled',
|
|
1114
|
+
details: `Cancelled: ${error.message}`,
|
|
1115
|
+
},
|
|
1054
1116
|
};
|
|
1055
1117
|
}
|
|
1056
1118
|
this.observability?.tracer?.recordError(error);
|
|
@@ -1058,6 +1120,9 @@ export class ProductionAgent {
|
|
|
1058
1120
|
this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
|
|
1059
1121
|
this.emit({ type: 'error', error: error.message });
|
|
1060
1122
|
this.observability?.logger?.error('Agent failed', { error: error.message });
|
|
1123
|
+
const completionReason = error.message.includes('failed to complete requested action')
|
|
1124
|
+
? 'incomplete_action'
|
|
1125
|
+
: 'error';
|
|
1061
1126
|
// Lesson 26: End trace capture on error
|
|
1062
1127
|
if (this.traceCollector?.isTaskActive()) {
|
|
1063
1128
|
await this.traceCollector.endTask({ success: false, failureReason: error.message });
|
|
@@ -1065,14 +1130,26 @@ export class ProductionAgent {
|
|
|
1065
1130
|
else if (this.traceCollector?.isSessionActive()) {
|
|
1066
1131
|
await this.traceCollector.endSession({ success: false, failureReason: error.message });
|
|
1067
1132
|
}
|
|
1068
|
-
|
|
1133
|
+
const errorResult = {
|
|
1069
1134
|
success: false,
|
|
1070
1135
|
response: '',
|
|
1071
1136
|
error: error.message,
|
|
1072
1137
|
metrics: this.getMetrics(),
|
|
1073
1138
|
messages: this.state.messages,
|
|
1074
1139
|
traceId,
|
|
1140
|
+
completion: {
|
|
1141
|
+
success: false,
|
|
1142
|
+
reason: completionReason,
|
|
1143
|
+
details: error.message,
|
|
1144
|
+
},
|
|
1075
1145
|
};
|
|
1146
|
+
this.emit({
|
|
1147
|
+
type: 'run.after',
|
|
1148
|
+
success: false,
|
|
1149
|
+
reason: completionReason,
|
|
1150
|
+
details: error.message,
|
|
1151
|
+
});
|
|
1152
|
+
return errorResult;
|
|
1076
1153
|
}
|
|
1077
1154
|
finally {
|
|
1078
1155
|
// Dispose cancellation context on completion
|
|
@@ -1099,7 +1176,7 @@ export class ProductionAgent {
|
|
|
1099
1176
|
this.planning.completeTask(currentTask.id);
|
|
1100
1177
|
this.emit({ type: 'task.complete', task: currentTask });
|
|
1101
1178
|
}
|
|
1102
|
-
catch (
|
|
1179
|
+
catch (_err) {
|
|
1103
1180
|
this.planning.failTask(currentTask.id);
|
|
1104
1181
|
this.observability?.logger?.warn('Plan task failed', { taskId: currentTask.id });
|
|
1105
1182
|
// Continue with other tasks if possible
|
|
@@ -1133,6 +1210,133 @@ export class ProductionAgent {
|
|
|
1133
1210
|
const { SwarmEventBridge } = await import('./integrations/swarm/swarm-event-bridge.js');
|
|
1134
1211
|
const bridge = new SwarmEventBridge({ outputDir: '.agent/swarm-live' });
|
|
1135
1212
|
const unsubBridge = bridge.attach(this.swarmOrchestrator);
|
|
1213
|
+
const writeCodeMapSnapshot = () => {
|
|
1214
|
+
if (!this.codebaseContext) {
|
|
1215
|
+
return;
|
|
1216
|
+
}
|
|
1217
|
+
const repoMap = this.codebaseContext.getRepoMap();
|
|
1218
|
+
if (!repoMap) {
|
|
1219
|
+
return;
|
|
1220
|
+
}
|
|
1221
|
+
// Build dependency edges from the dependency graph
|
|
1222
|
+
const depEdges = [];
|
|
1223
|
+
for (const [file, deps] of repoMap.dependencyGraph) {
|
|
1224
|
+
depEdges.push({ file, imports: Array.from(deps) });
|
|
1225
|
+
}
|
|
1226
|
+
// Build top chunks sorted by importance
|
|
1227
|
+
const chunks = Array.from(repoMap.chunks.values());
|
|
1228
|
+
const topChunks = chunks
|
|
1229
|
+
.sort((a, b) => b.importance - a.importance)
|
|
1230
|
+
.slice(0, 100)
|
|
1231
|
+
.map(c => ({
|
|
1232
|
+
filePath: c.filePath,
|
|
1233
|
+
tokenCount: c.tokenCount,
|
|
1234
|
+
importance: c.importance,
|
|
1235
|
+
type: c.type,
|
|
1236
|
+
symbols: c.symbolDetails,
|
|
1237
|
+
}));
|
|
1238
|
+
const files = chunks.map((chunk) => ({
|
|
1239
|
+
filePath: chunk.filePath,
|
|
1240
|
+
directory: path.dirname(chunk.filePath) === '.' ? '' : path.dirname(chunk.filePath),
|
|
1241
|
+
fileName: path.basename(chunk.filePath),
|
|
1242
|
+
tokenCount: chunk.tokenCount,
|
|
1243
|
+
importance: chunk.importance,
|
|
1244
|
+
type: chunk.type,
|
|
1245
|
+
symbols: chunk.symbolDetails,
|
|
1246
|
+
inDegree: repoMap.reverseDependencyGraph.get(chunk.filePath)?.size ?? 0,
|
|
1247
|
+
outDegree: repoMap.dependencyGraph.get(chunk.filePath)?.size ?? 0,
|
|
1248
|
+
}));
|
|
1249
|
+
bridge.writeCodeMapSnapshot({
|
|
1250
|
+
totalFiles: repoMap.chunks.size,
|
|
1251
|
+
totalTokens: repoMap.totalTokens,
|
|
1252
|
+
entryPoints: repoMap.entryPoints,
|
|
1253
|
+
coreModules: repoMap.coreModules,
|
|
1254
|
+
dependencyEdges: depEdges,
|
|
1255
|
+
files,
|
|
1256
|
+
topChunks,
|
|
1257
|
+
});
|
|
1258
|
+
};
|
|
1259
|
+
let codeMapRefreshInFlight = false;
|
|
1260
|
+
let codeMapRefreshTimer = null;
|
|
1261
|
+
const refreshAndWriteCodeMapSnapshot = async () => {
|
|
1262
|
+
if (!this.codebaseContext || codeMapRefreshInFlight) {
|
|
1263
|
+
return;
|
|
1264
|
+
}
|
|
1265
|
+
codeMapRefreshInFlight = true;
|
|
1266
|
+
try {
|
|
1267
|
+
// Re-analyze from disk so snapshots include newly created files during swarm execution.
|
|
1268
|
+
this.codebaseContext.clearCache();
|
|
1269
|
+
await this.codebaseContext.analyze();
|
|
1270
|
+
writeCodeMapSnapshot();
|
|
1271
|
+
}
|
|
1272
|
+
catch {
|
|
1273
|
+
// Best effort
|
|
1274
|
+
}
|
|
1275
|
+
finally {
|
|
1276
|
+
codeMapRefreshInFlight = false;
|
|
1277
|
+
}
|
|
1278
|
+
};
|
|
1279
|
+
// Write observability snapshots to swarm-live/ on relevant events
|
|
1280
|
+
const unsubSnapshots = this.swarmOrchestrator.subscribe(event => {
|
|
1281
|
+
// Write codemap snapshot when tasks are loaded.
|
|
1282
|
+
if (event.type === 'swarm.tasks.loaded' && this.codebaseContext) {
|
|
1283
|
+
try {
|
|
1284
|
+
writeCodeMapSnapshot();
|
|
1285
|
+
}
|
|
1286
|
+
catch {
|
|
1287
|
+
// Best effort — don't crash the swarm
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
// Refresh codemap after each completed wave to avoid stale 0-file snapshots.
|
|
1291
|
+
if (event.type === 'swarm.wave.complete' && this.codebaseContext) {
|
|
1292
|
+
void refreshAndWriteCodeMapSnapshot();
|
|
1293
|
+
}
|
|
1294
|
+
if (event.type === 'swarm.task.completed' && this.codebaseContext) {
|
|
1295
|
+
if (codeMapRefreshTimer) {
|
|
1296
|
+
clearTimeout(codeMapRefreshTimer);
|
|
1297
|
+
}
|
|
1298
|
+
codeMapRefreshTimer = setTimeout(() => {
|
|
1299
|
+
void refreshAndWriteCodeMapSnapshot();
|
|
1300
|
+
}, 1200);
|
|
1301
|
+
}
|
|
1302
|
+
// Write blackboard.json on wave completion or task completion
|
|
1303
|
+
if ((event.type === 'swarm.wave.complete' || event.type === 'swarm.task.completed') && this.blackboard) {
|
|
1304
|
+
try {
|
|
1305
|
+
const findings = this.blackboard.getAllFindings();
|
|
1306
|
+
bridge.writeBlackboardSnapshot({
|
|
1307
|
+
findings: findings.map(f => ({
|
|
1308
|
+
id: f.id ?? '',
|
|
1309
|
+
topic: f.topic ?? '',
|
|
1310
|
+
type: f.type ?? '',
|
|
1311
|
+
agentId: f.agentId ?? '',
|
|
1312
|
+
confidence: f.confidence ?? 0,
|
|
1313
|
+
content: (f.content ?? '').slice(0, 500),
|
|
1314
|
+
})),
|
|
1315
|
+
claims: [],
|
|
1316
|
+
updatedAt: new Date().toISOString(),
|
|
1317
|
+
});
|
|
1318
|
+
}
|
|
1319
|
+
catch {
|
|
1320
|
+
// Best effort
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
// Write budget-pool.json on budget updates
|
|
1324
|
+
if (event.type === 'swarm.budget.update' && this.budgetPool) {
|
|
1325
|
+
try {
|
|
1326
|
+
const stats = this.budgetPool.getStats();
|
|
1327
|
+
bridge.writeBudgetPoolSnapshot({
|
|
1328
|
+
poolTotal: stats.totalTokens,
|
|
1329
|
+
poolUsed: stats.tokensUsed,
|
|
1330
|
+
poolRemaining: stats.tokensRemaining,
|
|
1331
|
+
allocations: [],
|
|
1332
|
+
updatedAt: new Date().toISOString(),
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
catch {
|
|
1336
|
+
// Best effort
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
});
|
|
1136
1340
|
// Bridge swarm events into JSONL trace pipeline
|
|
1137
1341
|
const traceCollector = this.traceCollector;
|
|
1138
1342
|
let unsubTrace;
|
|
@@ -1289,6 +1493,22 @@ export class ProductionAgent {
|
|
|
1289
1493
|
});
|
|
1290
1494
|
}
|
|
1291
1495
|
try {
|
|
1496
|
+
// Ensure codebase context is analyzed before decomposition so repo map is available
|
|
1497
|
+
if (this.codebaseContext && !this.codebaseContext.getRepoMap()) {
|
|
1498
|
+
try {
|
|
1499
|
+
await this.codebaseContext.analyze();
|
|
1500
|
+
}
|
|
1501
|
+
catch {
|
|
1502
|
+
// non-fatal — decomposer will work without codebase context
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
// Write codemap snapshot immediately so dashboard can render even if decomposition fails.
|
|
1506
|
+
try {
|
|
1507
|
+
writeCodeMapSnapshot();
|
|
1508
|
+
}
|
|
1509
|
+
catch {
|
|
1510
|
+
// Best effort
|
|
1511
|
+
}
|
|
1292
1512
|
const result = await this.swarmOrchestrator.execute(task);
|
|
1293
1513
|
// Populate task DAG for dashboard after execution
|
|
1294
1514
|
bridge.setTasks(result.tasks);
|
|
@@ -1302,967 +1522,24 @@ export class ProductionAgent {
|
|
|
1302
1522
|
return result;
|
|
1303
1523
|
}
|
|
1304
1524
|
finally {
|
|
1525
|
+
if (codeMapRefreshTimer) {
|
|
1526
|
+
clearTimeout(codeMapRefreshTimer);
|
|
1527
|
+
}
|
|
1305
1528
|
unsubTrace?.();
|
|
1529
|
+
unsubSnapshots();
|
|
1306
1530
|
unsubBridge();
|
|
1307
1531
|
bridge.close();
|
|
1308
1532
|
unsubSwarm();
|
|
1309
1533
|
}
|
|
1310
1534
|
}
|
|
1311
1535
|
/**
|
|
1312
|
-
* Execute a task directly without planning.
|
|
1536
|
+
* Execute a task directly without planning (delegates to core/execution-loop).
|
|
1313
1537
|
*/
|
|
1314
1538
|
async executeDirectly(task) {
|
|
1315
|
-
|
|
1316
|
-
const
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
// Reflection configuration
|
|
1320
|
-
const reflectionConfig = this.config.reflection;
|
|
1321
|
-
const reflectionEnabled = isFeatureEnabled(reflectionConfig);
|
|
1322
|
-
const autoReflect = reflectionEnabled && reflectionConfig.autoReflect;
|
|
1323
|
-
const maxReflectionAttempts = reflectionEnabled
|
|
1324
|
-
? (reflectionConfig.maxAttempts || 3)
|
|
1325
|
-
: 1;
|
|
1326
|
-
const confidenceThreshold = reflectionEnabled
|
|
1327
|
-
? (reflectionConfig.confidenceThreshold || 0.8)
|
|
1328
|
-
: 0.8;
|
|
1329
|
-
let reflectionAttempt = 0;
|
|
1330
|
-
let lastResponse = '';
|
|
1331
|
-
let incompleteActionRetries = 0;
|
|
1332
|
-
const requestedArtifact = this.extractRequestedArtifact(task);
|
|
1333
|
-
const executedToolNames = new Set();
|
|
1334
|
-
// Outer loop for reflection (if enabled)
|
|
1335
|
-
while (reflectionAttempt < maxReflectionAttempts) {
|
|
1336
|
-
reflectionAttempt++;
|
|
1337
|
-
// Agent loop - now uses economics-based budget checking
|
|
1338
|
-
while (true) {
|
|
1339
|
-
this.state.iteration++;
|
|
1340
|
-
// Record iteration start for tracing
|
|
1341
|
-
this.traceCollector?.record({
|
|
1342
|
-
type: 'iteration.start',
|
|
1343
|
-
data: { iterationNumber: this.state.iteration },
|
|
1344
|
-
});
|
|
1345
|
-
// =======================================================================
|
|
1346
|
-
// CANCELLATION CHECK
|
|
1347
|
-
// Checks internal cancellation (ESC key) — always immediate.
|
|
1348
|
-
// External cancellation (parent timeout) is checked after economics
|
|
1349
|
-
// to allow graceful wrapup when wrapup has been requested.
|
|
1350
|
-
// =======================================================================
|
|
1351
|
-
if (this.cancellation?.isCancelled) {
|
|
1352
|
-
this.cancellation.token.throwIfCancellationRequested();
|
|
1353
|
-
}
|
|
1354
|
-
// =======================================================================
|
|
1355
|
-
// RESOURCE CHECK - system resource limits
|
|
1356
|
-
// =======================================================================
|
|
1357
|
-
if (this.resourceManager) {
|
|
1358
|
-
const resourceCheck = this.resourceManager.check();
|
|
1359
|
-
if (!resourceCheck.canContinue) {
|
|
1360
|
-
this.observability?.logger?.warn('Resource limit reached', {
|
|
1361
|
-
status: resourceCheck.status,
|
|
1362
|
-
message: resourceCheck.message,
|
|
1363
|
-
});
|
|
1364
|
-
this.emit({ type: 'error', error: resourceCheck.message || 'Resource limit exceeded' });
|
|
1365
|
-
break;
|
|
1366
|
-
}
|
|
1367
|
-
// Log warnings for elevated usage
|
|
1368
|
-
if (resourceCheck.status === 'warning' || resourceCheck.status === 'critical') {
|
|
1369
|
-
this.observability?.logger?.info(`Resource status: ${resourceCheck.status}`, {
|
|
1370
|
-
message: resourceCheck.message,
|
|
1371
|
-
});
|
|
1372
|
-
}
|
|
1373
|
-
}
|
|
1374
|
-
// =======================================================================
|
|
1375
|
-
// ECONOMICS CHECK (Token Budget) - replaces hard iteration limit
|
|
1376
|
-
// With recovery: try compaction before giving up on token limits
|
|
1377
|
-
// =======================================================================
|
|
1378
|
-
let forceTextOnly = false; // Track if we should skip tool execution
|
|
1379
|
-
let budgetInjectedPrompt;
|
|
1380
|
-
if (this.economics) {
|
|
1381
|
-
const budgetCheck = this.economics.checkBudget();
|
|
1382
|
-
// Capture forceTextOnly and injectedPrompt for later use
|
|
1383
|
-
forceTextOnly = budgetCheck.forceTextOnly ?? false;
|
|
1384
|
-
budgetInjectedPrompt = budgetCheck.injectedPrompt;
|
|
1385
|
-
if (!budgetCheck.canContinue) {
|
|
1386
|
-
// ===================================================================
|
|
1387
|
-
// RECOVERY ATTEMPT: Try emergency context reduction before giving up
|
|
1388
|
-
// Only for token-based limits, not iteration limits
|
|
1389
|
-
// ===================================================================
|
|
1390
|
-
const isTokenLimit = budgetCheck.budgetType === 'tokens' || budgetCheck.budgetType === 'cost';
|
|
1391
|
-
const alreadyTriedRecovery = this.state._recoveryAttempted === true;
|
|
1392
|
-
if (isTokenLimit && !alreadyTriedRecovery) {
|
|
1393
|
-
this.observability?.logger?.info('Budget limit reached, attempting recovery via context reduction', {
|
|
1394
|
-
reason: budgetCheck.reason,
|
|
1395
|
-
percentUsed: budgetCheck.percentUsed,
|
|
1396
|
-
});
|
|
1397
|
-
this.emit({
|
|
1398
|
-
type: 'resilience.retry',
|
|
1399
|
-
reason: 'budget_limit_compaction',
|
|
1400
|
-
attempt: 1,
|
|
1401
|
-
maxAttempts: 1,
|
|
1402
|
-
});
|
|
1403
|
-
this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
|
|
1404
|
-
// Mark that we've attempted recovery to prevent infinite loops
|
|
1405
|
-
this.state._recoveryAttempted = true;
|
|
1406
|
-
const tokensBefore = this.estimateContextTokens(messages);
|
|
1407
|
-
// Step 1: Compact tool outputs aggressively
|
|
1408
|
-
this.compactToolOutputs();
|
|
1409
|
-
// Step 2: Emergency truncation - keep system + last N messages
|
|
1410
|
-
const PRESERVE_RECENT = 10;
|
|
1411
|
-
if (messages.length > PRESERVE_RECENT + 2) {
|
|
1412
|
-
const systemMessage = messages.find(m => m.role === 'system');
|
|
1413
|
-
const recentMessages = messages.slice(-(PRESERVE_RECENT));
|
|
1414
|
-
// Rebuild message array
|
|
1415
|
-
messages.length = 0;
|
|
1416
|
-
if (systemMessage) {
|
|
1417
|
-
messages.push(systemMessage);
|
|
1418
|
-
}
|
|
1419
|
-
messages.push({
|
|
1420
|
-
role: 'system',
|
|
1421
|
-
content: `[CONTEXT REDUCED: Earlier messages were removed to stay within budget. Conversation continues from recent context.]`,
|
|
1422
|
-
});
|
|
1423
|
-
messages.push(...recentMessages);
|
|
1424
|
-
// Inject work log after emergency truncation to prevent amnesia
|
|
1425
|
-
if (this.workLog?.hasContent()) {
|
|
1426
|
-
const workLogMessage = {
|
|
1427
|
-
role: 'user',
|
|
1428
|
-
content: this.workLog.toCompactString(),
|
|
1429
|
-
};
|
|
1430
|
-
messages.push(workLogMessage);
|
|
1431
|
-
}
|
|
1432
|
-
// Update state messages too
|
|
1433
|
-
this.state.messages.length = 0;
|
|
1434
|
-
this.state.messages.push(...messages);
|
|
1435
|
-
}
|
|
1436
|
-
const tokensAfter = this.estimateContextTokens(messages);
|
|
1437
|
-
const reduction = Math.round((1 - tokensAfter / tokensBefore) * 100);
|
|
1438
|
-
if (tokensAfter < tokensBefore * 0.8) {
|
|
1439
|
-
// Significant reduction achieved
|
|
1440
|
-
this.observability?.logger?.info('Context reduction successful, continuing execution', {
|
|
1441
|
-
tokensBefore,
|
|
1442
|
-
tokensAfter,
|
|
1443
|
-
reduction,
|
|
1444
|
-
});
|
|
1445
|
-
this.emit({
|
|
1446
|
-
type: 'resilience.recovered',
|
|
1447
|
-
reason: 'budget_limit_compaction',
|
|
1448
|
-
attempts: 1,
|
|
1449
|
-
});
|
|
1450
|
-
this.emit({
|
|
1451
|
-
type: 'compaction.auto',
|
|
1452
|
-
tokensBefore,
|
|
1453
|
-
tokensAfter,
|
|
1454
|
-
messagesCompacted: tokensBefore - tokensAfter,
|
|
1455
|
-
});
|
|
1456
|
-
// Continue execution instead of breaking
|
|
1457
|
-
continue;
|
|
1458
|
-
}
|
|
1459
|
-
this.observability?.logger?.warn('Context reduction insufficient', {
|
|
1460
|
-
tokensBefore,
|
|
1461
|
-
tokensAfter,
|
|
1462
|
-
reduction,
|
|
1463
|
-
});
|
|
1464
|
-
}
|
|
1465
|
-
// Hard limit reached and recovery failed (or not applicable)
|
|
1466
|
-
this.observability?.logger?.warn('Budget limit reached', {
|
|
1467
|
-
reason: budgetCheck.reason,
|
|
1468
|
-
budgetType: budgetCheck.budgetType,
|
|
1469
|
-
});
|
|
1470
|
-
// Emit appropriate event
|
|
1471
|
-
if (budgetCheck.budgetType === 'iterations') {
|
|
1472
|
-
const totalIter = this.getTotalIterations();
|
|
1473
|
-
const iterMsg = this.parentIterations > 0
|
|
1474
|
-
? `${this.state.iteration} + ${this.parentIterations} parent = ${totalIter}`
|
|
1475
|
-
: `${this.state.iteration}`;
|
|
1476
|
-
this.emit({ type: 'error', error: `Max iterations reached (${iterMsg})` });
|
|
1477
|
-
}
|
|
1478
|
-
else {
|
|
1479
|
-
this.emit({ type: 'error', error: budgetCheck.reason || 'Budget exceeded' });
|
|
1480
|
-
}
|
|
1481
|
-
break;
|
|
1482
|
-
}
|
|
1483
|
-
// Check for soft limits and potential extension
|
|
1484
|
-
if (budgetCheck.isSoftLimit && budgetCheck.suggestedAction === 'request_extension') {
|
|
1485
|
-
this.observability?.logger?.info('Approaching budget limit', {
|
|
1486
|
-
reason: budgetCheck.reason,
|
|
1487
|
-
percentUsed: budgetCheck.percentUsed,
|
|
1488
|
-
});
|
|
1489
|
-
// Could request extension here if handler is set
|
|
1490
|
-
}
|
|
1491
|
-
}
|
|
1492
|
-
else {
|
|
1493
|
-
// Fallback to simple iteration check if economics not available
|
|
1494
|
-
// Use getTotalIterations() to account for parent iterations (subagent hierarchy)
|
|
1495
|
-
if (this.getTotalIterations() >= this.config.maxIterations) {
|
|
1496
|
-
this.observability?.logger?.warn('Max iterations reached', {
|
|
1497
|
-
iteration: this.state.iteration,
|
|
1498
|
-
parentIterations: this.parentIterations,
|
|
1499
|
-
total: this.getTotalIterations(),
|
|
1500
|
-
});
|
|
1501
|
-
break;
|
|
1502
|
-
}
|
|
1503
|
-
}
|
|
1504
|
-
// =======================================================================
|
|
1505
|
-
// GRACEFUL WRAPUP CHECK
|
|
1506
|
-
// If a wrapup has been requested (e.g., timeout approaching), convert
|
|
1507
|
-
// to forceTextOnly + inject wrapup prompt for structured summary.
|
|
1508
|
-
// Must come after economics check (which may also set forceTextOnly).
|
|
1509
|
-
// =======================================================================
|
|
1510
|
-
if (this.wrapupRequested && !forceTextOnly) {
|
|
1511
|
-
forceTextOnly = true;
|
|
1512
|
-
budgetInjectedPrompt = TIMEOUT_WRAPUP_PROMPT;
|
|
1513
|
-
this.wrapupRequested = false;
|
|
1514
|
-
}
|
|
1515
|
-
// =======================================================================
|
|
1516
|
-
// EXTERNAL CANCELLATION CHECK (deferred from above)
|
|
1517
|
-
// Checked after wrapup so that graceful wrapup can intercept the timeout.
|
|
1518
|
-
// If wrapup was already requested and converted to forceTextOnly above,
|
|
1519
|
-
// we skip throwing here to allow one more text-only turn for the summary.
|
|
1520
|
-
// =======================================================================
|
|
1521
|
-
if (this.externalCancellationToken?.isCancellationRequested && !forceTextOnly) {
|
|
1522
|
-
this.externalCancellationToken.throwIfCancellationRequested();
|
|
1523
|
-
}
|
|
1524
|
-
// =======================================================================
|
|
1525
|
-
// INTELLIGENT LOOP DETECTION & NUDGE INJECTION
|
|
1526
|
-
// Uses economics system for doom loops, exploration saturation, etc.
|
|
1527
|
-
// =======================================================================
|
|
1528
|
-
if (this.economics && budgetInjectedPrompt) {
|
|
1529
|
-
// Inject contextual guidance from economics system
|
|
1530
|
-
messages.push({
|
|
1531
|
-
role: 'user',
|
|
1532
|
-
content: budgetInjectedPrompt,
|
|
1533
|
-
});
|
|
1534
|
-
const loopState = this.economics.getLoopState();
|
|
1535
|
-
const phaseState = this.economics.getPhaseState();
|
|
1536
|
-
this.observability?.logger?.info('Loop detection - injecting guidance', {
|
|
1537
|
-
iteration: this.state.iteration,
|
|
1538
|
-
doomLoop: loopState.doomLoopDetected,
|
|
1539
|
-
phase: phaseState.phase,
|
|
1540
|
-
filesRead: phaseState.uniqueFilesRead,
|
|
1541
|
-
filesModified: phaseState.filesModified,
|
|
1542
|
-
shouldTransition: phaseState.shouldTransition,
|
|
1543
|
-
forceTextOnly,
|
|
1544
|
-
});
|
|
1545
|
-
}
|
|
1546
|
-
// =======================================================================
|
|
1547
|
-
// RECITATION INJECTION (Trick Q) - Combat "lost in middle" attention
|
|
1548
|
-
// =======================================================================
|
|
1549
|
-
if (this.contextEngineering) {
|
|
1550
|
-
if (process.env.DEBUG_LLM) {
|
|
1551
|
-
if (process.env.DEBUG)
|
|
1552
|
-
console.log(`[recitation] Before: ${messages.length} messages`);
|
|
1553
|
-
}
|
|
1554
|
-
const enrichedMessages = this.contextEngineering.injectRecitation(messages, {
|
|
1555
|
-
goal: task,
|
|
1556
|
-
plan: this.state.plan ? {
|
|
1557
|
-
description: this.state.plan.goal || task,
|
|
1558
|
-
tasks: this.state.plan.tasks.map(t => ({
|
|
1559
|
-
id: t.id,
|
|
1560
|
-
description: t.description,
|
|
1561
|
-
status: t.status,
|
|
1562
|
-
})),
|
|
1563
|
-
currentTaskIndex: this.state.plan.tasks.findIndex(t => t.status === 'in_progress'),
|
|
1564
|
-
} : undefined,
|
|
1565
|
-
activeFiles: this.economics?.getProgress().filesModified
|
|
1566
|
-
? [`${this.economics.getProgress().filesModified} files modified`]
|
|
1567
|
-
: undefined,
|
|
1568
|
-
recentErrors: this.contextEngineering.getFailureInsights().slice(0, 2),
|
|
1569
|
-
});
|
|
1570
|
-
if (process.env.DEBUG_LLM) {
|
|
1571
|
-
if (process.env.DEBUG)
|
|
1572
|
-
console.log(`[recitation] After: ${enrichedMessages?.length ?? 'null/undefined'} messages`);
|
|
1573
|
-
}
|
|
1574
|
-
// Only replace if we got a DIFFERENT array back (avoid clearing same reference)
|
|
1575
|
-
// When no injection needed, injectRecitation returns the same array reference
|
|
1576
|
-
if (enrichedMessages && enrichedMessages !== messages && enrichedMessages.length > 0) {
|
|
1577
|
-
messages.length = 0;
|
|
1578
|
-
messages.push(...enrichedMessages);
|
|
1579
|
-
}
|
|
1580
|
-
else if (!enrichedMessages || enrichedMessages.length === 0) {
|
|
1581
|
-
console.warn('[executeDirectly] Recitation returned empty/null messages, keeping original');
|
|
1582
|
-
}
|
|
1583
|
-
// If enrichedMessages === messages, we don't need to do anything (same reference)
|
|
1584
|
-
// Update recitation frequency based on context size
|
|
1585
|
-
const contextTokens = messages.reduce((sum, m) => sum + (m.content?.length || 0) / 4, 0);
|
|
1586
|
-
this.contextEngineering.updateRecitationFrequency(contextTokens);
|
|
1587
|
-
}
|
|
1588
|
-
// =======================================================================
|
|
1589
|
-
// FAILURE CONTEXT INJECTION (Trick S) - Learn from mistakes
|
|
1590
|
-
// =======================================================================
|
|
1591
|
-
if (this.contextEngineering) {
|
|
1592
|
-
const failureContext = this.contextEngineering.getFailureContext(5);
|
|
1593
|
-
if (failureContext) {
|
|
1594
|
-
// Insert failure context before the last user message
|
|
1595
|
-
// (Using reverse iteration for ES2022 compatibility)
|
|
1596
|
-
let lastUserIdx = -1;
|
|
1597
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1598
|
-
if (messages[i].role === 'user') {
|
|
1599
|
-
lastUserIdx = i;
|
|
1600
|
-
break;
|
|
1601
|
-
}
|
|
1602
|
-
}
|
|
1603
|
-
if (lastUserIdx > 0) {
|
|
1604
|
-
messages.splice(lastUserIdx, 0, {
|
|
1605
|
-
role: 'system',
|
|
1606
|
-
content: failureContext,
|
|
1607
|
-
});
|
|
1608
|
-
}
|
|
1609
|
-
}
|
|
1610
|
-
}
|
|
1611
|
-
// =====================================================================
|
|
1612
|
-
// INJECTION BUDGET ANALYSIS (Phase 2 - monitoring mode)
|
|
1613
|
-
// Collects stats on context injections without gating; logs when
|
|
1614
|
-
// budget would have dropped items. Validates system before enabling gating.
|
|
1615
|
-
// =====================================================================
|
|
1616
|
-
if (this.injectionBudget) {
|
|
1617
|
-
const proposals = [];
|
|
1618
|
-
if (budgetInjectedPrompt) {
|
|
1619
|
-
proposals.push({ name: 'budget_warning', priority: 0, maxTokens: 500, content: budgetInjectedPrompt });
|
|
1620
|
-
}
|
|
1621
|
-
// Approximate recitation content (actual injection handled above)
|
|
1622
|
-
if (this.contextEngineering) {
|
|
1623
|
-
const failureCtx = this.contextEngineering.getFailureContext(5);
|
|
1624
|
-
if (failureCtx) {
|
|
1625
|
-
proposals.push({ name: 'failure_context', priority: 2, maxTokens: 300, content: failureCtx });
|
|
1626
|
-
}
|
|
1627
|
-
}
|
|
1628
|
-
if (proposals.length > 0) {
|
|
1629
|
-
const accepted = this.injectionBudget.allocate(proposals);
|
|
1630
|
-
const stats = this.injectionBudget.getLastStats();
|
|
1631
|
-
if (stats && stats.droppedNames.length > 0 && process.env.DEBUG) {
|
|
1632
|
-
console.log(`[injection-budget] Would drop: ${stats.droppedNames.join(', ')} (${stats.proposedTokens} proposed, ${stats.acceptedTokens} accepted)`);
|
|
1633
|
-
}
|
|
1634
|
-
// Log total injection overhead for observability
|
|
1635
|
-
if (stats && process.env.DEBUG_LLM) {
|
|
1636
|
-
console.log(`[injection-budget] Iteration ${this.state.iteration}: ${accepted.length}/${proposals.length} injections, ~${stats.acceptedTokens} tokens`);
|
|
1637
|
-
}
|
|
1638
|
-
}
|
|
1639
|
-
}
|
|
1640
|
-
// =====================================================================
|
|
1641
|
-
// RESILIENT LLM CALL: Empty response retries + max_tokens continuation
|
|
1642
|
-
// =====================================================================
|
|
1643
|
-
// Get resilience config
|
|
1644
|
-
const resilienceConfig = typeof this.config.resilience === 'object'
|
|
1645
|
-
? this.config.resilience
|
|
1646
|
-
: {};
|
|
1647
|
-
const resilienceEnabled = isFeatureEnabled(this.config.resilience);
|
|
1648
|
-
const MAX_EMPTY_RETRIES = resilienceConfig.maxEmptyRetries ?? 2;
|
|
1649
|
-
const MAX_CONTINUATIONS = resilienceConfig.maxContinuations ?? 3;
|
|
1650
|
-
const AUTO_CONTINUE = resilienceConfig.autoContinue ?? true;
|
|
1651
|
-
const MIN_CONTENT_LENGTH = resilienceConfig.minContentLength ?? 1;
|
|
1652
|
-
const INCOMPLETE_ACTION_RECOVERY = resilienceConfig.incompleteActionRecovery ?? true;
|
|
1653
|
-
const MAX_INCOMPLETE_ACTION_RETRIES = resilienceConfig.maxIncompleteActionRetries ?? 2;
|
|
1654
|
-
const ENFORCE_REQUESTED_ARTIFACTS = resilienceConfig.enforceRequestedArtifacts ?? true;
|
|
1655
|
-
// =================================================================
|
|
1656
|
-
// PRE-FLIGHT BUDGET CHECK: Estimate if LLM call would exceed budget
|
|
1657
|
-
// Catches cases where we're at e.g. 120k and next call adds ~35k
|
|
1658
|
-
// =================================================================
|
|
1659
|
-
if (this.economics && !forceTextOnly) {
|
|
1660
|
-
const estimatedInputTokens = this.estimateContextTokens(messages);
|
|
1661
|
-
const estimatedOutputTokens = 4096; // Conservative output estimate
|
|
1662
|
-
const currentUsage = this.economics.getUsage();
|
|
1663
|
-
const budget = this.economics.getBudget();
|
|
1664
|
-
const projectedTotal = currentUsage.tokens + estimatedInputTokens + estimatedOutputTokens;
|
|
1665
|
-
if (projectedTotal > budget.maxTokens) {
|
|
1666
|
-
this.observability?.logger?.warn('Pre-flight budget check: projected overshoot', {
|
|
1667
|
-
currentTokens: currentUsage.tokens,
|
|
1668
|
-
estimatedInput: estimatedInputTokens,
|
|
1669
|
-
projectedTotal,
|
|
1670
|
-
maxTokens: budget.maxTokens,
|
|
1671
|
-
});
|
|
1672
|
-
// Inject wrap-up prompt if not already injected
|
|
1673
|
-
if (!budgetInjectedPrompt) {
|
|
1674
|
-
messages.push({
|
|
1675
|
-
role: 'user',
|
|
1676
|
-
content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
|
|
1677
|
-
});
|
|
1678
|
-
this.state.messages.push({
|
|
1679
|
-
role: 'user',
|
|
1680
|
-
content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
|
|
1681
|
-
});
|
|
1682
|
-
}
|
|
1683
|
-
forceTextOnly = true;
|
|
1684
|
-
}
|
|
1685
|
-
}
|
|
1686
|
-
let response = await this.callLLM(messages);
|
|
1687
|
-
let emptyRetries = 0;
|
|
1688
|
-
let continuations = 0;
|
|
1689
|
-
// Phase 1: Handle empty responses with retry (if resilience enabled)
|
|
1690
|
-
while (resilienceEnabled && emptyRetries < MAX_EMPTY_RETRIES) {
|
|
1691
|
-
const hasContent = response.content && response.content.length >= MIN_CONTENT_LENGTH;
|
|
1692
|
-
const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
|
|
1693
|
-
const hasThinking = response.thinking && response.thinking.length > 0;
|
|
1694
|
-
if (hasContent || hasToolCalls) {
|
|
1695
|
-
// Valid visible response
|
|
1696
|
-
if (emptyRetries > 0) {
|
|
1697
|
-
this.emit({
|
|
1698
|
-
type: 'resilience.recovered',
|
|
1699
|
-
reason: 'empty_response',
|
|
1700
|
-
attempts: emptyRetries,
|
|
1701
|
-
});
|
|
1702
|
-
this.observability?.logger?.info('Recovered from empty response', {
|
|
1703
|
-
retries: emptyRetries,
|
|
1704
|
-
});
|
|
1705
|
-
}
|
|
1706
|
-
break;
|
|
1707
|
-
}
|
|
1708
|
-
if (hasThinking && !hasContent && !hasToolCalls) {
|
|
1709
|
-
// Model produced reasoning but no visible output (e.g., DeepSeek-R1, GLM-4, QwQ).
|
|
1710
|
-
// Give ONE targeted nudge, then accept thinking as content.
|
|
1711
|
-
if (emptyRetries === 0) {
|
|
1712
|
-
emptyRetries++;
|
|
1713
|
-
this.emit({
|
|
1714
|
-
type: 'resilience.retry',
|
|
1715
|
-
reason: 'thinking_only_response',
|
|
1716
|
-
attempt: emptyRetries,
|
|
1717
|
-
maxAttempts: MAX_EMPTY_RETRIES,
|
|
1718
|
-
});
|
|
1719
|
-
this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
|
|
1720
|
-
this.observability?.logger?.warn('Thinking-only response (no visible content), nudging', {
|
|
1721
|
-
thinkingLength: response.thinking.length,
|
|
1722
|
-
});
|
|
1723
|
-
const thinkingNudge = {
|
|
1724
|
-
role: 'user',
|
|
1725
|
-
content: '[System: You produced reasoning but no visible response. Please provide your answer based on your analysis.]',
|
|
1726
|
-
};
|
|
1727
|
-
messages.push(thinkingNudge);
|
|
1728
|
-
this.state.messages.push(thinkingNudge);
|
|
1729
|
-
response = await this.callLLM(messages);
|
|
1730
|
-
continue;
|
|
1731
|
-
}
|
|
1732
|
-
// Second attempt also thinking-only → accept thinking as content
|
|
1733
|
-
this.observability?.logger?.info('Accepting thinking as content after nudge failed', {
|
|
1734
|
-
thinkingLength: response.thinking.length,
|
|
1735
|
-
});
|
|
1736
|
-
response = { ...response, content: response.thinking };
|
|
1737
|
-
break;
|
|
1738
|
-
}
|
|
1739
|
-
// Truly empty (no content, no tools, no thinking) — existing retry logic
|
|
1740
|
-
emptyRetries++;
|
|
1741
|
-
this.emit({
|
|
1742
|
-
type: 'resilience.retry',
|
|
1743
|
-
reason: 'empty_response',
|
|
1744
|
-
attempt: emptyRetries,
|
|
1745
|
-
maxAttempts: MAX_EMPTY_RETRIES,
|
|
1746
|
-
});
|
|
1747
|
-
this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
|
|
1748
|
-
this.observability?.logger?.warn('Empty LLM response, retrying', {
|
|
1749
|
-
attempt: emptyRetries,
|
|
1750
|
-
maxAttempts: MAX_EMPTY_RETRIES,
|
|
1751
|
-
});
|
|
1752
|
-
// Add gentle nudge and retry
|
|
1753
|
-
const nudgeMessage = {
|
|
1754
|
-
role: 'user',
|
|
1755
|
-
content: '[System: Your previous response was empty. Please provide a response or use a tool.]',
|
|
1756
|
-
};
|
|
1757
|
-
messages.push(nudgeMessage);
|
|
1758
|
-
this.state.messages.push(nudgeMessage);
|
|
1759
|
-
response = await this.callLLM(messages);
|
|
1760
|
-
}
|
|
1761
|
-
// Phase 2: Handle max_tokens truncation with continuation (if enabled)
|
|
1762
|
-
if (resilienceEnabled && AUTO_CONTINUE && response.stopReason === 'max_tokens' && !response.toolCalls?.length) {
|
|
1763
|
-
let accumulatedContent = response.content || '';
|
|
1764
|
-
while (continuations < MAX_CONTINUATIONS && response.stopReason === 'max_tokens') {
|
|
1765
|
-
continuations++;
|
|
1766
|
-
this.emit({
|
|
1767
|
-
type: 'resilience.continue',
|
|
1768
|
-
reason: 'max_tokens',
|
|
1769
|
-
continuation: continuations,
|
|
1770
|
-
maxContinuations: MAX_CONTINUATIONS,
|
|
1771
|
-
accumulatedLength: accumulatedContent.length,
|
|
1772
|
-
});
|
|
1773
|
-
this.observability?.logger?.info('Response truncated at max_tokens, continuing', {
|
|
1774
|
-
continuation: continuations,
|
|
1775
|
-
accumulatedLength: accumulatedContent.length,
|
|
1776
|
-
});
|
|
1777
|
-
// Add continuation request
|
|
1778
|
-
const continuationMessage = {
|
|
1779
|
-
role: 'assistant',
|
|
1780
|
-
content: accumulatedContent,
|
|
1781
|
-
};
|
|
1782
|
-
const continueRequest = {
|
|
1783
|
-
role: 'user',
|
|
1784
|
-
content: '[System: Please continue from where you left off. Do not repeat what you already said.]',
|
|
1785
|
-
};
|
|
1786
|
-
messages.push(continuationMessage, continueRequest);
|
|
1787
|
-
this.state.messages.push(continuationMessage, continueRequest);
|
|
1788
|
-
response = await this.callLLM(messages);
|
|
1789
|
-
// Accumulate content
|
|
1790
|
-
if (response.content) {
|
|
1791
|
-
accumulatedContent += response.content;
|
|
1792
|
-
}
|
|
1793
|
-
}
|
|
1794
|
-
// Update response with accumulated content
|
|
1795
|
-
if (continuations > 0) {
|
|
1796
|
-
response = { ...response, content: accumulatedContent };
|
|
1797
|
-
this.emit({
|
|
1798
|
-
type: 'resilience.completed',
|
|
1799
|
-
reason: 'max_tokens_continuation',
|
|
1800
|
-
continuations,
|
|
1801
|
-
finalLength: accumulatedContent.length,
|
|
1802
|
-
});
|
|
1803
|
-
}
|
|
1804
|
-
}
|
|
1805
|
-
// Phase 2b: Handle truncated tool calls (stopReason=max_tokens with tool calls present)
|
|
1806
|
-
// When a model hits max_tokens mid-tool-call, the JSON arguments are truncated and unparseable.
|
|
1807
|
-
// Instead of executing broken tool calls, strip them and ask the LLM to retry smaller.
|
|
1808
|
-
if (resilienceEnabled && response.stopReason === 'max_tokens' && response.toolCalls?.length) {
|
|
1809
|
-
this.emit({
|
|
1810
|
-
type: 'resilience.truncated_tool_call',
|
|
1811
|
-
toolNames: response.toolCalls.map(tc => tc.name),
|
|
1812
|
-
});
|
|
1813
|
-
this.observability?.logger?.warn('Tool call truncated at max_tokens', {
|
|
1814
|
-
toolNames: response.toolCalls.map(tc => tc.name),
|
|
1815
|
-
outputTokens: response.usage?.outputTokens,
|
|
1816
|
-
});
|
|
1817
|
-
// Strip truncated tool calls, inject recovery message
|
|
1818
|
-
const truncatedResponse = response;
|
|
1819
|
-
response = { ...response, toolCalls: undefined };
|
|
1820
|
-
const recoveryMessage = {
|
|
1821
|
-
role: 'user',
|
|
1822
|
-
content: '[System: Your previous tool call was truncated because the output exceeded the token limit. ' +
|
|
1823
|
-
'The tool call arguments were cut off and could not be parsed. ' +
|
|
1824
|
-
'Please retry with a smaller approach: for write_file, break the content into smaller chunks ' +
|
|
1825
|
-
'or use edit_file for targeted changes instead of rewriting entire files.]',
|
|
1826
|
-
};
|
|
1827
|
-
messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
|
|
1828
|
-
messages.push(recoveryMessage);
|
|
1829
|
-
this.state.messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
|
|
1830
|
-
this.state.messages.push(recoveryMessage);
|
|
1831
|
-
response = await this.callLLM(messages);
|
|
1832
|
-
}
|
|
1833
|
-
// Record LLM usage for economics
|
|
1834
|
-
if (this.economics && response.usage) {
|
|
1835
|
-
this.economics.recordLLMUsage(response.usage.inputTokens, response.usage.outputTokens, this.config.model, response.usage.cost // Use actual cost from provider when available
|
|
1836
|
-
);
|
|
1837
|
-
// =================================================================
|
|
1838
|
-
// POST-LLM BUDGET CHECK: Prevent tool execution if over budget
|
|
1839
|
-
// A single LLM call can push us over - catch it before running tools
|
|
1840
|
-
// =================================================================
|
|
1841
|
-
if (!forceTextOnly) {
|
|
1842
|
-
const postCheck = this.economics.checkBudget();
|
|
1843
|
-
if (!postCheck.canContinue) {
|
|
1844
|
-
this.observability?.logger?.warn('Budget exceeded after LLM call, skipping tool execution', {
|
|
1845
|
-
reason: postCheck.reason,
|
|
1846
|
-
});
|
|
1847
|
-
forceTextOnly = true;
|
|
1848
|
-
}
|
|
1849
|
-
}
|
|
1850
|
-
}
|
|
1851
|
-
// Add assistant message
|
|
1852
|
-
const assistantMessage = {
|
|
1853
|
-
role: 'assistant',
|
|
1854
|
-
content: response.content,
|
|
1855
|
-
toolCalls: response.toolCalls,
|
|
1856
|
-
...(response.thinking ? { metadata: { thinking: response.thinking } } : {}),
|
|
1857
|
-
};
|
|
1858
|
-
messages.push(assistantMessage);
|
|
1859
|
-
this.state.messages.push(assistantMessage);
|
|
1860
|
-
lastResponse = response.content || (response.thinking ? response.thinking : '');
|
|
1861
|
-
// In plan mode: capture exploration findings as we go (not just at the end)
|
|
1862
|
-
// This ensures we collect context from exploration iterations before writes are queued
|
|
1863
|
-
if (this.modeManager.getMode() === 'plan' && response.content && response.content.length > 50) {
|
|
1864
|
-
const hasReadOnlyTools = response.toolCalls?.every(tc => ['read_file', 'list_files', 'glob', 'grep', 'search', 'mcp_'].some(prefix => tc.name.startsWith(prefix) || tc.name === prefix));
|
|
1865
|
-
// Capture substantive exploration content (not just "let me read..." responses)
|
|
1866
|
-
if (hasReadOnlyTools && !response.content.match(/^(Let me|I'll|I will|I need to|First,)/i)) {
|
|
1867
|
-
this.pendingPlanManager.appendExplorationFinding(response.content.slice(0, 1000));
|
|
1868
|
-
}
|
|
1869
|
-
}
|
|
1870
|
-
// Check for tool calls
|
|
1871
|
-
// When forceTextOnly is set (max iterations reached), ignore any tool calls
|
|
1872
|
-
const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
|
|
1873
|
-
if (!hasToolCalls || forceTextOnly) {
|
|
1874
|
-
// Log if we're ignoring tool calls due to forceTextOnly
|
|
1875
|
-
if (forceTextOnly && hasToolCalls) {
|
|
1876
|
-
this.observability?.logger?.info('Ignoring tool calls due to forceTextOnly (max steps reached)', {
|
|
1877
|
-
toolCallCount: response.toolCalls?.length,
|
|
1878
|
-
iteration: this.state.iteration,
|
|
1879
|
-
});
|
|
1880
|
-
}
|
|
1881
|
-
const incompleteAction = this.detectIncompleteActionResponse(response.content || '');
|
|
1882
|
-
const missingRequiredArtifact = ENFORCE_REQUESTED_ARTIFACTS
|
|
1883
|
-
? this.isRequestedArtifactMissing(requestedArtifact, executedToolNames)
|
|
1884
|
-
: false;
|
|
1885
|
-
const shouldRecoverIncompleteAction = resilienceEnabled
|
|
1886
|
-
&& INCOMPLETE_ACTION_RECOVERY
|
|
1887
|
-
&& !forceTextOnly
|
|
1888
|
-
&& (incompleteAction || missingRequiredArtifact);
|
|
1889
|
-
if (shouldRecoverIncompleteAction) {
|
|
1890
|
-
if (incompleteActionRetries < MAX_INCOMPLETE_ACTION_RETRIES) {
|
|
1891
|
-
incompleteActionRetries++;
|
|
1892
|
-
const reason = missingRequiredArtifact && requestedArtifact
|
|
1893
|
-
? `missing_requested_artifact:${requestedArtifact}`
|
|
1894
|
-
: 'future_intent_without_action';
|
|
1895
|
-
this.emit({
|
|
1896
|
-
type: 'resilience.incomplete_action_detected',
|
|
1897
|
-
reason,
|
|
1898
|
-
attempt: incompleteActionRetries,
|
|
1899
|
-
maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
|
|
1900
|
-
requiresArtifact: missingRequiredArtifact,
|
|
1901
|
-
});
|
|
1902
|
-
this.observability?.logger?.warn('Incomplete action detected, retrying with nudge', {
|
|
1903
|
-
reason,
|
|
1904
|
-
attempt: incompleteActionRetries,
|
|
1905
|
-
maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
|
|
1906
|
-
});
|
|
1907
|
-
const nudgeMessage = {
|
|
1908
|
-
role: 'user',
|
|
1909
|
-
content: missingRequiredArtifact && requestedArtifact
|
|
1910
|
-
? `[System: You said you would complete the next action, but no tool call was made. The task requires creating or updating "${requestedArtifact}". Execute the required tool now, or explicitly explain why it cannot be produced.]`
|
|
1911
|
-
: '[System: You described a next action but did not execute it. If work remains, call the required tool now. If the task is complete, provide a final answer with no pending action language.]',
|
|
1912
|
-
};
|
|
1913
|
-
messages.push(nudgeMessage);
|
|
1914
|
-
this.state.messages.push(nudgeMessage);
|
|
1915
|
-
continue;
|
|
1916
|
-
}
|
|
1917
|
-
const failureReason = missingRequiredArtifact && requestedArtifact
|
|
1918
|
-
? `incomplete_action_missing_artifact:${requestedArtifact}`
|
|
1919
|
-
: 'incomplete_action_unresolved';
|
|
1920
|
-
this.emit({
|
|
1921
|
-
type: 'resilience.incomplete_action_failed',
|
|
1922
|
-
reason: failureReason,
|
|
1923
|
-
attempts: incompleteActionRetries,
|
|
1924
|
-
maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
|
|
1925
|
-
});
|
|
1926
|
-
throw new Error(`LLM failed to complete requested action after ${incompleteActionRetries} retries (${failureReason})`);
|
|
1927
|
-
}
|
|
1928
|
-
if (incompleteActionRetries > 0) {
|
|
1929
|
-
this.emit({
|
|
1930
|
-
type: 'resilience.incomplete_action_recovered',
|
|
1931
|
-
reason: 'incomplete_action',
|
|
1932
|
-
attempts: incompleteActionRetries,
|
|
1933
|
-
});
|
|
1934
|
-
incompleteActionRetries = 0;
|
|
1935
|
-
}
|
|
1936
|
-
// Verification gate: if criteria not met, nudge agent to verify before completing
|
|
1937
|
-
if (this.verificationGate && !forceTextOnly) {
|
|
1938
|
-
const vResult = this.verificationGate.check();
|
|
1939
|
-
if (!vResult.satisfied && !vResult.forceAllow && vResult.nudge) {
|
|
1940
|
-
// Inject nudge and continue the loop
|
|
1941
|
-
const nudgeMessage = {
|
|
1942
|
-
role: 'user',
|
|
1943
|
-
content: vResult.nudge,
|
|
1944
|
-
};
|
|
1945
|
-
messages.push(nudgeMessage);
|
|
1946
|
-
this.state.messages.push(nudgeMessage);
|
|
1947
|
-
this.observability?.logger?.info('Verification gate nudge', {
|
|
1948
|
-
missing: vResult.missing,
|
|
1949
|
-
nudgeCount: this.verificationGate.getState().nudgeCount,
|
|
1950
|
-
});
|
|
1951
|
-
continue;
|
|
1952
|
-
}
|
|
1953
|
-
}
|
|
1954
|
-
// No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
|
|
1955
|
-
// The model has "consumed" the tool outputs and produced a response,
|
|
1956
|
-
// so we can replace verbose outputs with compact summaries
|
|
1957
|
-
this.compactToolOutputs();
|
|
1958
|
-
// In plan mode: capture exploration summary from the final response
|
|
1959
|
-
// This provides context for what was learned during exploration before proposing changes
|
|
1960
|
-
if (this.modeManager.getMode() === 'plan' && this.pendingPlanManager.hasPendingPlan()) {
|
|
1961
|
-
const explorationContent = response.content || '';
|
|
1962
|
-
if (explorationContent.length > 0) {
|
|
1963
|
-
this.pendingPlanManager.setExplorationSummary(explorationContent);
|
|
1964
|
-
}
|
|
1965
|
-
}
|
|
1966
|
-
// Final validation: warn if response is still empty after all retries
|
|
1967
|
-
if (!response.content || response.content.length === 0) {
|
|
1968
|
-
this.observability?.logger?.error('Agent finished with empty response after all retries', {
|
|
1969
|
-
emptyRetries,
|
|
1970
|
-
continuations,
|
|
1971
|
-
iteration: this.state.iteration,
|
|
1972
|
-
});
|
|
1973
|
-
this.emit({
|
|
1974
|
-
type: 'resilience.failed',
|
|
1975
|
-
reason: 'empty_final_response',
|
|
1976
|
-
emptyRetries,
|
|
1977
|
-
continuations,
|
|
1978
|
-
});
|
|
1979
|
-
}
|
|
1980
|
-
// Record iteration end for tracing (no tool calls case)
|
|
1981
|
-
this.traceCollector?.record({
|
|
1982
|
-
type: 'iteration.end',
|
|
1983
|
-
data: { iterationNumber: this.state.iteration },
|
|
1984
|
-
});
|
|
1985
|
-
break;
|
|
1986
|
-
}
|
|
1987
|
-
// Execute tool calls (we know toolCalls is defined here due to the check above)
|
|
1988
|
-
const toolCalls = response.toolCalls;
|
|
1989
|
-
const toolResults = await this.executeToolCalls(toolCalls);
|
|
1990
|
-
// Record tool calls for economics/progress tracking + work log
|
|
1991
|
-
for (let i = 0; i < toolCalls.length; i++) {
|
|
1992
|
-
const toolCall = toolCalls[i];
|
|
1993
|
-
const result = toolResults[i];
|
|
1994
|
-
executedToolNames.add(toolCall.name);
|
|
1995
|
-
this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
|
|
1996
|
-
// Record in work log for compaction resilience
|
|
1997
|
-
const toolOutput = result?.result && typeof result.result === 'object' && 'output' in result.result
|
|
1998
|
-
? String(result.result.output)
|
|
1999
|
-
: typeof result?.result === 'string' ? result.result : undefined;
|
|
2000
|
-
this.workLog?.recordToolExecution(toolCall.name, toolCall.arguments, toolOutput);
|
|
2001
|
-
// Record in verification gate
|
|
2002
|
-
if (this.verificationGate) {
|
|
2003
|
-
if (toolCall.name === 'bash') {
|
|
2004
|
-
const toolRes = result?.result;
|
|
2005
|
-
const output = toolRes && typeof toolRes === 'object' && 'output' in toolRes
|
|
2006
|
-
? String(toolRes.output)
|
|
2007
|
-
: typeof toolRes === 'string' ? toolRes : '';
|
|
2008
|
-
const exitCode = toolRes && typeof toolRes === 'object' && toolRes.metadata
|
|
2009
|
-
? toolRes.metadata.exitCode ?? null
|
|
2010
|
-
: null;
|
|
2011
|
-
this.verificationGate.recordBashExecution(String(toolCall.arguments.command || ''), output, exitCode);
|
|
2012
|
-
}
|
|
2013
|
-
if (['write_file', 'edit_file'].includes(toolCall.name)) {
|
|
2014
|
-
this.verificationGate.recordFileChange();
|
|
2015
|
-
}
|
|
2016
|
-
}
|
|
2017
|
-
}
|
|
2018
|
-
// Add tool results to messages (with truncation and proactive budget management)
|
|
2019
|
-
const MAX_TOOL_OUTPUT_CHARS = 8000; // ~2000 tokens max per tool output
|
|
2020
|
-
// =======================================================================
|
|
2021
|
-
// PROACTIVE BUDGET CHECK - compact BEFORE we overflow, not after
|
|
2022
|
-
// Uses AutoCompactionManager if available for sophisticated compaction
|
|
2023
|
-
// =======================================================================
|
|
2024
|
-
const currentContextTokens = this.estimateContextTokens(messages);
|
|
2025
|
-
if (this.autoCompactionManager) {
|
|
2026
|
-
// Use the AutoCompactionManager for threshold-based compaction
|
|
2027
|
-
const compactionResult = await this.autoCompactionManager.checkAndMaybeCompact({
|
|
2028
|
-
currentTokens: currentContextTokens,
|
|
2029
|
-
messages: messages,
|
|
2030
|
-
});
|
|
2031
|
-
// Handle compaction result
|
|
2032
|
-
if (compactionResult.status === 'compacted' && compactionResult.compactedMessages) {
|
|
2033
|
-
// ─── Pre-compaction agentic turn ───────────────────────────────
|
|
2034
|
-
// Give the agent one LLM turn to summarize critical state before
|
|
2035
|
-
// compaction clears the context. On the first trigger we inject a
|
|
2036
|
-
// system message and skip compaction; on the next trigger (the
|
|
2037
|
-
// agent has already responded) we proceed with actual compaction.
|
|
2038
|
-
if (!this.compactionPending) {
|
|
2039
|
-
this.compactionPending = true;
|
|
2040
|
-
const preCompactionMsg = {
|
|
2041
|
-
role: 'user',
|
|
2042
|
-
content: '[SYSTEM] Context compaction is imminent. Summarize your current progress, key findings, and next steps into a single concise message. This will be preserved after compaction.',
|
|
2043
|
-
};
|
|
2044
|
-
messages.push(preCompactionMsg);
|
|
2045
|
-
this.state.messages.push(preCompactionMsg);
|
|
2046
|
-
this.observability?.logger?.info('Pre-compaction agentic turn: injected summary request');
|
|
2047
|
-
// Skip compaction this iteration — let the agent respond first
|
|
2048
|
-
// (continue to tool result processing below)
|
|
2049
|
-
}
|
|
2050
|
-
else {
|
|
2051
|
-
// Agent has had its chance to summarize — now compact for real
|
|
2052
|
-
this.compactionPending = false;
|
|
2053
|
-
// Pre-compaction checkpoint: save full state before discarding
|
|
2054
|
-
try {
|
|
2055
|
-
this.autoCheckpoint(true); // force=true bypasses frequency check
|
|
2056
|
-
}
|
|
2057
|
-
catch {
|
|
2058
|
-
// Non-critical — don't block compaction
|
|
2059
|
-
}
|
|
2060
|
-
// Replace messages with compacted version
|
|
2061
|
-
messages.length = 0;
|
|
2062
|
-
messages.push(...compactionResult.compactedMessages);
|
|
2063
|
-
this.state.messages.length = 0;
|
|
2064
|
-
this.state.messages.push(...compactionResult.compactedMessages);
|
|
2065
|
-
// Inject work log after compaction to prevent amnesia
|
|
2066
|
-
if (this.workLog?.hasContent()) {
|
|
2067
|
-
const workLogMessage = {
|
|
2068
|
-
role: 'user',
|
|
2069
|
-
content: this.workLog.toCompactString(),
|
|
2070
|
-
};
|
|
2071
|
-
messages.push(workLogMessage);
|
|
2072
|
-
this.state.messages.push(workLogMessage);
|
|
2073
|
-
}
|
|
2074
|
-
// Context recovery: re-inject critical state after compaction
|
|
2075
|
-
const recoveryParts = [];
|
|
2076
|
-
// Goals
|
|
2077
|
-
if (this.store) {
|
|
2078
|
-
const goalsSummary = this.store.getGoalsSummary();
|
|
2079
|
-
if (goalsSummary && goalsSummary !== 'No active goals.' && goalsSummary !== 'Goals feature not available.') {
|
|
2080
|
-
recoveryParts.push(goalsSummary);
|
|
2081
|
-
}
|
|
2082
|
-
}
|
|
2083
|
-
// Junctures (last 5 key moments)
|
|
2084
|
-
if (this.store) {
|
|
2085
|
-
const juncturesSummary = this.store.getJuncturesSummary(undefined, 5);
|
|
2086
|
-
if (juncturesSummary) {
|
|
2087
|
-
recoveryParts.push(juncturesSummary);
|
|
2088
|
-
}
|
|
2089
|
-
}
|
|
2090
|
-
// Learnings from past patterns
|
|
2091
|
-
if (this.learningStore) {
|
|
2092
|
-
const learnings = this.learningStore.getLearningContext({ maxLearnings: 3 });
|
|
2093
|
-
if (learnings) {
|
|
2094
|
-
recoveryParts.push(learnings);
|
|
2095
|
-
}
|
|
2096
|
-
}
|
|
2097
|
-
if (recoveryParts.length > 0) {
|
|
2098
|
-
const recoveryMessage = {
|
|
2099
|
-
role: 'user',
|
|
2100
|
-
content: `[CONTEXT RECOVERY — Re-injected after compaction]\n\n${recoveryParts.join('\n\n')}`,
|
|
2101
|
-
};
|
|
2102
|
-
messages.push(recoveryMessage);
|
|
2103
|
-
this.state.messages.push(recoveryMessage);
|
|
2104
|
-
}
|
|
2105
|
-
// Emit compaction event for observability
|
|
2106
|
-
const compactionTokensAfter = this.estimateContextTokens(messages);
|
|
2107
|
-
const compactionRecoveryInjected = recoveryParts.length > 0;
|
|
2108
|
-
const compactionEvent = {
|
|
2109
|
-
type: 'context.compacted',
|
|
2110
|
-
tokensBefore: currentContextTokens,
|
|
2111
|
-
tokensAfter: compactionTokensAfter,
|
|
2112
|
-
recoveryInjected: compactionRecoveryInjected,
|
|
2113
|
-
};
|
|
2114
|
-
this.emit(compactionEvent);
|
|
2115
|
-
// Record to trace collector for JSONL output
|
|
2116
|
-
if (this.traceCollector) {
|
|
2117
|
-
this.traceCollector.record({
|
|
2118
|
-
type: 'context.compacted',
|
|
2119
|
-
data: {
|
|
2120
|
-
tokensBefore: currentContextTokens,
|
|
2121
|
-
tokensAfter: compactionTokensAfter,
|
|
2122
|
-
recoveryInjected: compactionRecoveryInjected,
|
|
2123
|
-
},
|
|
2124
|
-
});
|
|
2125
|
-
}
|
|
2126
|
-
}
|
|
2127
|
-
}
|
|
2128
|
-
else if (compactionResult.status === 'hard_limit') {
|
|
2129
|
-
// Hard limit reached - this is serious, emit error
|
|
2130
|
-
this.emit({
|
|
2131
|
-
type: 'error',
|
|
2132
|
-
error: `Context hard limit reached (${Math.round(compactionResult.ratio * 100)}% of max tokens)`,
|
|
2133
|
-
});
|
|
2134
|
-
break;
|
|
2135
|
-
}
|
|
2136
|
-
}
|
|
2137
|
-
else if (this.economics) {
|
|
2138
|
-
// Fallback to simple compaction
|
|
2139
|
-
const currentUsage = this.economics.getUsage();
|
|
2140
|
-
const budget = this.economics.getBudget();
|
|
2141
|
-
const percentUsed = (currentUsage.tokens / budget.maxTokens) * 100;
|
|
2142
|
-
// If we're at 70%+ of budget, proactively compact to make room
|
|
2143
|
-
if (percentUsed >= 70) {
|
|
2144
|
-
this.observability?.logger?.info('Proactive compaction triggered', {
|
|
2145
|
-
percentUsed: Math.round(percentUsed),
|
|
2146
|
-
currentTokens: currentUsage.tokens,
|
|
2147
|
-
maxTokens: budget.maxTokens,
|
|
2148
|
-
});
|
|
2149
|
-
// Also checkpoint before fallback compaction
|
|
2150
|
-
try {
|
|
2151
|
-
this.autoCheckpoint(true);
|
|
2152
|
-
}
|
|
2153
|
-
catch {
|
|
2154
|
-
// Non-critical
|
|
2155
|
-
}
|
|
2156
|
-
this.compactToolOutputs();
|
|
2157
|
-
}
|
|
2158
|
-
}
|
|
2159
|
-
const toolCallNameById = new Map(toolCalls.map(tc => [tc.id, tc.name]));
|
|
2160
|
-
for (const result of toolResults) {
|
|
2161
|
-
let content = typeof result.result === 'string' ? result.result : stableStringify(result.result);
|
|
2162
|
-
const sourceToolName = toolCallNameById.get(result.callId);
|
|
2163
|
-
const isExpensiveResult = sourceToolName === 'spawn_agent' || sourceToolName === 'spawn_agents_parallel';
|
|
2164
|
-
// Truncate long outputs to save context
|
|
2165
|
-
// Use larger limit for subagent results to preserve critical context
|
|
2166
|
-
const effectiveMaxChars = isExpensiveResult ? MAX_TOOL_OUTPUT_CHARS * 2 : MAX_TOOL_OUTPUT_CHARS;
|
|
2167
|
-
if (content.length > effectiveMaxChars) {
|
|
2168
|
-
content = content.slice(0, effectiveMaxChars) + `\n\n... [truncated ${content.length - effectiveMaxChars} chars]`;
|
|
2169
|
-
}
|
|
2170
|
-
// =======================================================================
|
|
2171
|
-
// ESTIMATE if adding this result would exceed budget
|
|
2172
|
-
// =======================================================================
|
|
2173
|
-
if (this.economics) {
|
|
2174
|
-
const estimatedNewTokens = Math.ceil(content.length / 4); // ~4 chars per token
|
|
2175
|
-
const currentContextTokens = this.estimateContextTokens(messages);
|
|
2176
|
-
const budget = this.economics.getBudget();
|
|
2177
|
-
// Check if adding this would push us over the hard limit
|
|
2178
|
-
if (currentContextTokens + estimatedNewTokens > budget.maxTokens * 0.95) {
|
|
2179
|
-
this.observability?.logger?.warn('Skipping tool result to stay within budget', {
|
|
2180
|
-
toolCallId: result.callId,
|
|
2181
|
-
estimatedTokens: estimatedNewTokens,
|
|
2182
|
-
currentContext: currentContextTokens,
|
|
2183
|
-
limit: budget.maxTokens,
|
|
2184
|
-
});
|
|
2185
|
-
// Add a truncated placeholder instead
|
|
2186
|
-
const toolMessage = {
|
|
2187
|
-
role: 'tool',
|
|
2188
|
-
content: `[Result omitted to stay within token budget. Original size: ${content.length} chars]`,
|
|
2189
|
-
toolCallId: result.callId,
|
|
2190
|
-
};
|
|
2191
|
-
messages.push(toolMessage);
|
|
2192
|
-
this.state.messages.push(toolMessage);
|
|
2193
|
-
continue;
|
|
2194
|
-
}
|
|
2195
|
-
}
|
|
2196
|
-
const toolMessage = {
|
|
2197
|
-
role: 'tool',
|
|
2198
|
-
content,
|
|
2199
|
-
toolCallId: result.callId,
|
|
2200
|
-
...(isExpensiveResult
|
|
2201
|
-
? {
|
|
2202
|
-
metadata: {
|
|
2203
|
-
preserveFromCompaction: true,
|
|
2204
|
-
costToRegenerate: 'high',
|
|
2205
|
-
source: sourceToolName,
|
|
2206
|
-
},
|
|
2207
|
-
}
|
|
2208
|
-
: {}),
|
|
2209
|
-
};
|
|
2210
|
-
messages.push(toolMessage);
|
|
2211
|
-
this.state.messages.push(toolMessage);
|
|
2212
|
-
}
|
|
2213
|
-
// Emit context health after adding tool results
|
|
2214
|
-
const currentTokenEstimate = this.estimateContextTokens(messages);
|
|
2215
|
-
const contextLimit = this.getMaxContextTokens();
|
|
2216
|
-
const percentUsed = Math.round((currentTokenEstimate / contextLimit) * 100);
|
|
2217
|
-
const avgTokensPerExchange = currentTokenEstimate / Math.max(1, this.state.iteration);
|
|
2218
|
-
const remainingTokens = contextLimit - currentTokenEstimate;
|
|
2219
|
-
const estimatedExchanges = Math.floor(remainingTokens / Math.max(1, avgTokensPerExchange));
|
|
2220
|
-
this.emit({
|
|
2221
|
-
type: 'context.health',
|
|
2222
|
-
currentTokens: currentTokenEstimate,
|
|
2223
|
-
maxTokens: contextLimit,
|
|
2224
|
-
estimatedExchanges,
|
|
2225
|
-
percentUsed,
|
|
2226
|
-
});
|
|
2227
|
-
// Record iteration end for tracing (after tool execution)
|
|
2228
|
-
this.traceCollector?.record({
|
|
2229
|
-
type: 'iteration.end',
|
|
2230
|
-
data: { iterationNumber: this.state.iteration },
|
|
2231
|
-
});
|
|
2232
|
-
}
|
|
2233
|
-
// =======================================================================
|
|
2234
|
-
// REFLECTION (Lesson 16)
|
|
2235
|
-
// =======================================================================
|
|
2236
|
-
if (autoReflect && this.planning && reflectionAttempt < maxReflectionAttempts) {
|
|
2237
|
-
this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: false });
|
|
2238
|
-
const reflectionResult = await this.planning.reflect(task, lastResponse, this.provider);
|
|
2239
|
-
this.state.metrics.reflectionAttempts = reflectionAttempt;
|
|
2240
|
-
if (reflectionResult.satisfied && reflectionResult.confidence >= confidenceThreshold) {
|
|
2241
|
-
// Output is satisfactory
|
|
2242
|
-
this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: true });
|
|
2243
|
-
break;
|
|
2244
|
-
}
|
|
2245
|
-
// Not satisfied - add feedback and continue
|
|
2246
|
-
const feedbackMessage = {
|
|
2247
|
-
role: 'user',
|
|
2248
|
-
content: `[Reflection feedback]\nThe previous output needs improvement:\n- Critique: ${reflectionResult.critique}\n- Suggestions: ${reflectionResult.suggestions.join(', ')}\n\nPlease improve the output.`,
|
|
2249
|
-
};
|
|
2250
|
-
messages.push(feedbackMessage);
|
|
2251
|
-
this.state.messages.push(feedbackMessage);
|
|
2252
|
-
this.observability?.logger?.info('Reflection not satisfied, retrying', {
|
|
2253
|
-
attempt: reflectionAttempt,
|
|
2254
|
-
confidence: reflectionResult.confidence,
|
|
2255
|
-
critique: reflectionResult.critique,
|
|
2256
|
-
});
|
|
2257
|
-
}
|
|
2258
|
-
else {
|
|
2259
|
-
// No reflection or already satisfied
|
|
2260
|
-
break;
|
|
2261
|
-
}
|
|
2262
|
-
}
|
|
2263
|
-
// Store conversation in memory
|
|
2264
|
-
this.memory?.storeConversation(this.state.messages);
|
|
2265
|
-
this.updateMemoryStats();
|
|
1539
|
+
const messages = await this.buildMessages(task);
|
|
1540
|
+
const ctx = this.buildContext();
|
|
1541
|
+
const mutators = this.buildMutators();
|
|
1542
|
+
return coreExecuteDirectly(task, messages, ctx, mutators);
|
|
2266
1543
|
}
|
|
2267
1544
|
/**
|
|
2268
1545
|
* Build messages for LLM call.
|
|
@@ -2270,7 +1547,7 @@ export class ProductionAgent {
|
|
|
2270
1547
|
* Uses cache-aware system prompt building (Trick P) when contextEngineering
|
|
2271
1548
|
* is available, ensuring static content is ordered for optimal KV-cache reuse.
|
|
2272
1549
|
*/
|
|
2273
|
-
buildMessages(task) {
|
|
1550
|
+
async buildMessages(task) {
|
|
2274
1551
|
const messages = [];
|
|
2275
1552
|
// Gather all context components
|
|
2276
1553
|
const rulesContent = this.rules?.getRulesContent() ?? '';
|
|
@@ -2289,12 +1566,18 @@ export class ProductionAgent {
|
|
|
2289
1566
|
const reservedTokens = 10500;
|
|
2290
1567
|
const maxContextTokens = (this.config.maxContextTokens ?? 80000) - reservedTokens;
|
|
2291
1568
|
const codebaseBudget = Math.min(maxContextTokens * 0.3, 15000); // Up to 30% or 15K tokens
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
if (!repoMap && !this.codebaseAnalysisTriggered) {
|
|
1569
|
+
// Synchronous analysis on first system prompt build so context is available immediately
|
|
1570
|
+
if (!this.codebaseContext.getRepoMap() && !this.codebaseAnalysisTriggered) {
|
|
2295
1571
|
this.codebaseAnalysisTriggered = true;
|
|
2296
|
-
|
|
1572
|
+
try {
|
|
1573
|
+
await this.codebaseContext.analyze();
|
|
1574
|
+
}
|
|
1575
|
+
catch {
|
|
1576
|
+
// non-fatal — agent can still work without codebase context
|
|
1577
|
+
}
|
|
2297
1578
|
}
|
|
1579
|
+
// Get repo map AFTER analysis so we have fresh data on first prompt
|
|
1580
|
+
const repoMap = this.codebaseContext.getRepoMap();
|
|
2298
1581
|
if (repoMap) {
|
|
2299
1582
|
try {
|
|
2300
1583
|
const selection = this.selectRelevantCodeSync(task, codebaseBudget);
|
|
@@ -2396,7 +1679,7 @@ export class ProductionAgent {
|
|
|
2396
1679
|
}
|
|
2397
1680
|
// Safety check: ensure system prompt is not empty
|
|
2398
1681
|
if (!systemPrompt || systemPrompt.trim().length === 0) {
|
|
2399
|
-
|
|
1682
|
+
log.warn('Empty system prompt detected, using fallback');
|
|
2400
1683
|
systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
|
|
2401
1684
|
}
|
|
2402
1685
|
messages.push({ role: 'system', content: systemPrompt });
|
|
@@ -2409,625 +1692,79 @@ export class ProductionAgent {
|
|
|
2409
1692
|
}
|
|
2410
1693
|
// Add current task
|
|
2411
1694
|
messages.push({ role: 'user', content: task });
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
async callLLM(messages) {
|
|
2418
|
-
const spanId = this.observability?.tracer?.startSpan('llm.call');
|
|
2419
|
-
this.emit({ type: 'llm.start', model: this.config.model || 'default' });
|
|
2420
|
-
// Prompt caching (Improvement P1): Replace the system message with structured content
|
|
2421
|
-
// that includes cache_control markers, enabling 60-70% cache hit rates.
|
|
2422
|
-
// Only use structured cache_control markers for Anthropic models — other providers
|
|
2423
|
-
// (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
|
|
2424
|
-
const configModel = this.config.model || 'default';
|
|
2425
|
-
const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
|
|
2426
|
-
let providerMessages = messages;
|
|
2427
|
-
if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
|
|
2428
|
-
providerMessages = messages.map((m, i) => {
|
|
2429
|
-
if (i === 0 && m.role === 'system') {
|
|
2430
|
-
// Replace system message with structured cacheable content
|
|
2431
|
-
return {
|
|
2432
|
-
role: 'system',
|
|
2433
|
-
content: this.cacheableSystemBlocks,
|
|
2434
|
-
};
|
|
2435
|
-
}
|
|
2436
|
-
return m;
|
|
2437
|
-
});
|
|
2438
|
-
}
|
|
2439
|
-
// Emit context insight for verbose feedback
|
|
2440
|
-
const estimatedTokens = messages.reduce((sum, m) => {
|
|
2441
|
-
const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
|
|
2442
|
-
return sum + Math.ceil(content.length / 3.5); // ~3.5 chars per token estimate
|
|
2443
|
-
}, 0);
|
|
2444
|
-
// Use context window size, not output token limit
|
|
2445
|
-
const contextLimit = this.getMaxContextTokens();
|
|
2446
|
-
this.emit({
|
|
2447
|
-
type: 'insight.context',
|
|
2448
|
-
currentTokens: estimatedTokens,
|
|
2449
|
-
maxTokens: contextLimit,
|
|
2450
|
-
messageCount: messages.length,
|
|
2451
|
-
percentUsed: Math.round((estimatedTokens / contextLimit) * 100),
|
|
2452
|
-
});
|
|
2453
|
-
const startTime = Date.now();
|
|
2454
|
-
const requestId = `req-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
2455
|
-
// Debug: Log message count and structure (helps diagnose API errors)
|
|
2456
|
-
if (process.env.DEBUG_LLM) {
|
|
2457
|
-
console.log(`[callLLM] Sending ${messages.length} messages:`);
|
|
2458
|
-
messages.forEach((m, i) => {
|
|
2459
|
-
console.log(` [${i}] ${m.role}: ${m.content?.slice(0, 50)}...`);
|
|
2460
|
-
});
|
|
2461
|
-
}
|
|
2462
|
-
// Validate messages are not empty
|
|
2463
|
-
if (!messages || messages.length === 0) {
|
|
2464
|
-
throw new Error('No messages to send to LLM');
|
|
2465
|
-
}
|
|
2466
|
-
// Lesson 26: Record LLM request for tracing
|
|
2467
|
-
const model = this.config.model || 'default';
|
|
2468
|
-
const provider = this.config.provider?.name || 'unknown';
|
|
2469
|
-
this.traceCollector?.record({
|
|
2470
|
-
type: 'llm.request',
|
|
2471
|
-
data: {
|
|
2472
|
-
requestId,
|
|
2473
|
-
model,
|
|
2474
|
-
provider,
|
|
2475
|
-
messages: messages.map(m => ({
|
|
2476
|
-
role: m.role,
|
|
2477
|
-
content: m.content,
|
|
2478
|
-
toolCallId: m.toolCallId,
|
|
2479
|
-
toolCalls: m.toolCalls?.map(tc => ({
|
|
2480
|
-
id: tc.id,
|
|
2481
|
-
name: tc.name,
|
|
2482
|
-
arguments: tc.arguments,
|
|
2483
|
-
})),
|
|
2484
|
-
})),
|
|
2485
|
-
tools: Array.from(this.tools.values()).map(t => ({
|
|
2486
|
-
name: t.name,
|
|
2487
|
-
description: t.description,
|
|
2488
|
-
parametersSchema: t.parameters,
|
|
2489
|
-
})),
|
|
2490
|
-
parameters: {
|
|
2491
|
-
maxTokens: this.config.maxTokens,
|
|
2492
|
-
temperature: this.config.temperature,
|
|
2493
|
-
},
|
|
2494
|
-
},
|
|
2495
|
-
});
|
|
2496
|
-
// Pause duration budget during LLM call - network time shouldn't count against agent
|
|
2497
|
-
this.economics?.pauseDuration();
|
|
2498
|
-
try {
|
|
2499
|
-
let response;
|
|
2500
|
-
let actualModel = model;
|
|
2501
|
-
// Use routing if enabled
|
|
2502
|
-
if (this.routing) {
|
|
2503
|
-
const complexity = this.routing.estimateComplexity(messages[messages.length - 1]?.content || '');
|
|
2504
|
-
const context = {
|
|
2505
|
-
task: messages[messages.length - 1]?.content || '',
|
|
2506
|
-
complexity,
|
|
2507
|
-
hasTools: this.tools.size > 0,
|
|
2508
|
-
hasImages: false,
|
|
2509
|
-
taskType: 'general',
|
|
2510
|
-
estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
|
|
2511
|
-
};
|
|
2512
|
-
const result = await this.routing.executeWithFallback(providerMessages, context);
|
|
2513
|
-
response = result.response;
|
|
2514
|
-
actualModel = result.model;
|
|
2515
|
-
// Emit routing insight
|
|
2516
|
-
this.emit({
|
|
2517
|
-
type: 'insight.routing',
|
|
2518
|
-
model: actualModel,
|
|
2519
|
-
reason: actualModel !== model ? 'Routed based on complexity' : 'Default model',
|
|
2520
|
-
complexity: complexity <= 0.3 ? 'low' : complexity <= 0.7 ? 'medium' : 'high',
|
|
2521
|
-
});
|
|
2522
|
-
// Emit decision transparency event
|
|
2523
|
-
this.emit({
|
|
2524
|
-
type: 'decision.routing',
|
|
2525
|
-
model: actualModel,
|
|
2526
|
-
reason: actualModel !== model
|
|
2527
|
-
? `Complexity ${(complexity * 100).toFixed(0)}% - using ${actualModel}`
|
|
2528
|
-
: 'Default model for current task',
|
|
2529
|
-
alternatives: actualModel !== model
|
|
2530
|
-
? [{ model, rejected: 'complexity threshold exceeded' }]
|
|
2531
|
-
: undefined,
|
|
2532
|
-
});
|
|
2533
|
-
// Enhanced tracing: Record routing decision
|
|
2534
|
-
this.traceCollector?.record({
|
|
2535
|
-
type: 'decision',
|
|
2536
|
-
data: {
|
|
2537
|
-
type: 'routing',
|
|
2538
|
-
decision: `Selected model: ${actualModel}`,
|
|
2539
|
-
outcome: 'allowed',
|
|
2540
|
-
reasoning: actualModel !== model
|
|
2541
|
-
? `Task complexity ${(complexity * 100).toFixed(0)}% exceeded threshold - routed to ${actualModel}`
|
|
2542
|
-
: `Default model ${model} suitable for task complexity ${(complexity * 100).toFixed(0)}%`,
|
|
2543
|
-
factors: [
|
|
2544
|
-
{ name: 'complexity', value: complexity, weight: 0.8 },
|
|
2545
|
-
{ name: 'hasTools', value: context.hasTools, weight: 0.1 },
|
|
2546
|
-
{ name: 'taskType', value: context.taskType, weight: 0.1 },
|
|
2547
|
-
],
|
|
2548
|
-
alternatives: actualModel !== model
|
|
2549
|
-
? [{ option: model, reason: 'complexity threshold exceeded', rejected: true }]
|
|
2550
|
-
: undefined,
|
|
2551
|
-
confidence: 0.9,
|
|
2552
|
-
},
|
|
2553
|
-
});
|
|
2554
|
-
}
|
|
2555
|
-
else {
|
|
2556
|
-
response = await this.provider.chat(providerMessages, {
|
|
2557
|
-
model: this.config.model,
|
|
2558
|
-
tools: Array.from(this.tools.values()),
|
|
2559
|
-
});
|
|
2560
|
-
}
|
|
2561
|
-
const duration = Date.now() - startTime;
|
|
2562
|
-
// Debug cache stats when DEBUG_CACHE is set
|
|
2563
|
-
if (process.env.DEBUG_CACHE) {
|
|
2564
|
-
const cr = response.usage?.cacheReadTokens ?? 0;
|
|
2565
|
-
const cw = response.usage?.cacheWriteTokens ?? 0;
|
|
2566
|
-
const inp = response.usage?.inputTokens ?? 0;
|
|
2567
|
-
const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
|
|
2568
|
-
console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
|
|
2569
|
-
}
|
|
2570
|
-
// Lesson 26: Record LLM response for tracing
|
|
2571
|
-
this.traceCollector?.record({
|
|
2572
|
-
type: 'llm.response',
|
|
2573
|
-
data: {
|
|
2574
|
-
requestId,
|
|
2575
|
-
content: response.content || '',
|
|
2576
|
-
toolCalls: response.toolCalls?.map(tc => ({
|
|
2577
|
-
id: tc.id,
|
|
2578
|
-
name: tc.name,
|
|
2579
|
-
arguments: tc.arguments,
|
|
2580
|
-
})),
|
|
2581
|
-
stopReason: response.stopReason === 'end_turn' ? 'end_turn'
|
|
2582
|
-
: response.stopReason === 'tool_use' ? 'tool_use'
|
|
2583
|
-
: response.stopReason === 'max_tokens' ? 'max_tokens'
|
|
2584
|
-
: 'stop_sequence',
|
|
2585
|
-
usage: {
|
|
2586
|
-
inputTokens: response.usage?.inputTokens || 0,
|
|
2587
|
-
outputTokens: response.usage?.outputTokens || 0,
|
|
2588
|
-
cacheReadTokens: response.usage?.cacheReadTokens,
|
|
2589
|
-
cacheWriteTokens: response.usage?.cacheWriteTokens,
|
|
2590
|
-
cost: response.usage?.cost, // Actual cost from provider (e.g., OpenRouter)
|
|
2591
|
-
},
|
|
2592
|
-
durationMs: duration,
|
|
2593
|
-
},
|
|
2594
|
-
});
|
|
2595
|
-
// Enhanced tracing: Record thinking/reasoning blocks if present
|
|
2596
|
-
if (response.thinking) {
|
|
2597
|
-
this.traceCollector?.record({
|
|
2598
|
-
type: 'llm.thinking',
|
|
2599
|
-
data: {
|
|
2600
|
-
requestId,
|
|
2601
|
-
content: response.thinking,
|
|
2602
|
-
summarized: response.thinking.length > 10000, // Summarize if very long
|
|
2603
|
-
originalLength: response.thinking.length,
|
|
2604
|
-
durationMs: duration,
|
|
2605
|
-
},
|
|
2606
|
-
});
|
|
2607
|
-
}
|
|
2608
|
-
// Record metrics
|
|
2609
|
-
this.observability?.metrics?.recordLLMCall(response.usage?.inputTokens || 0, response.usage?.outputTokens || 0, duration, actualModel, response.usage?.cost // Actual cost from provider (e.g., OpenRouter)
|
|
2610
|
-
);
|
|
2611
|
-
this.state.metrics.llmCalls++;
|
|
2612
|
-
this.state.metrics.inputTokens += response.usage?.inputTokens || 0;
|
|
2613
|
-
this.state.metrics.outputTokens += response.usage?.outputTokens || 0;
|
|
2614
|
-
this.state.metrics.totalTokens = this.state.metrics.inputTokens + this.state.metrics.outputTokens;
|
|
2615
|
-
this.emit({ type: 'llm.complete', response });
|
|
2616
|
-
// Emit token usage insight for verbose feedback
|
|
2617
|
-
if (response.usage) {
|
|
2618
|
-
this.emit({
|
|
2619
|
-
type: 'insight.tokens',
|
|
2620
|
-
inputTokens: response.usage.inputTokens,
|
|
2621
|
-
outputTokens: response.usage.outputTokens,
|
|
2622
|
-
cacheReadTokens: response.usage.cacheReadTokens,
|
|
2623
|
-
cacheWriteTokens: response.usage.cacheWriteTokens,
|
|
2624
|
-
cost: response.usage.cost,
|
|
2625
|
-
model: actualModel,
|
|
2626
|
-
});
|
|
2627
|
-
}
|
|
2628
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2629
|
-
return response;
|
|
2630
|
-
}
|
|
2631
|
-
catch (err) {
|
|
2632
|
-
const error = err instanceof Error ? err : new Error(String(err));
|
|
2633
|
-
this.observability?.tracer?.recordError(error);
|
|
2634
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2635
|
-
throw error;
|
|
2636
|
-
}
|
|
2637
|
-
finally {
|
|
2638
|
-
// Resume duration budget after LLM call completes (success or failure)
|
|
2639
|
-
this.economics?.resumeDuration();
|
|
2640
|
-
}
|
|
2641
|
-
}
|
|
2642
|
-
/**
|
|
2643
|
-
* Execute an async callback while excluding wall-clock wait time from duration budgeting.
|
|
2644
|
-
* Used for external waits such as approval dialogs and delegation confirmation.
|
|
2645
|
-
*/
|
|
2646
|
-
async withPausedDuration(fn) {
|
|
2647
|
-
this.economics?.pauseDuration();
|
|
2648
|
-
try {
|
|
2649
|
-
return await fn();
|
|
2650
|
-
}
|
|
2651
|
-
finally {
|
|
2652
|
-
this.economics?.resumeDuration();
|
|
1695
|
+
// Track system prompt length for context % estimation
|
|
1696
|
+
const sysMsg = messages.find(m => m.role === 'system');
|
|
1697
|
+
if (sysMsg) {
|
|
1698
|
+
const content = typeof sysMsg.content === 'string' ? sysMsg.content : JSON.stringify(sysMsg.content);
|
|
1699
|
+
this.lastSystemPromptLength = content.length;
|
|
2653
1700
|
}
|
|
1701
|
+
return messages;
|
|
2654
1702
|
}
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
1703
|
+
// ===========================================================================
|
|
1704
|
+
// CONTEXT BUILDERS — Bridge private fields to extracted core modules
|
|
1705
|
+
// ===========================================================================
|
|
1706
|
+
buildContext() {
|
|
1707
|
+
return {
|
|
1708
|
+
config: this.config, agentId: this.agentId, provider: this.provider,
|
|
1709
|
+
tools: this.tools, state: this.state,
|
|
1710
|
+
modeManager: this.modeManager, pendingPlanManager: this.pendingPlanManager,
|
|
1711
|
+
hooks: this.hooks, economics: this.economics, cancellation: this.cancellation,
|
|
1712
|
+
resourceManager: this.resourceManager, safety: this.safety,
|
|
1713
|
+
observability: this.observability, contextEngineering: this.contextEngineering,
|
|
1714
|
+
traceCollector: this.traceCollector, executionPolicy: this.executionPolicy,
|
|
1715
|
+
routing: this.routing, planning: this.planning, memory: this.memory,
|
|
1716
|
+
react: this.react, blackboard: this.blackboard, fileCache: this.fileCache,
|
|
1717
|
+
budgetPool: this.budgetPool, taskManager: this.taskManager, store: this.store,
|
|
1718
|
+
codebaseContext: this.codebaseContext, learningStore: this.learningStore,
|
|
1719
|
+
compactor: this.compactor, autoCompactionManager: this.autoCompactionManager,
|
|
1720
|
+
workLog: this.workLog, verificationGate: this.verificationGate,
|
|
1721
|
+
agentRegistry: this.agentRegistry, toolRecommendation: this.toolRecommendation,
|
|
1722
|
+
selfImprovement: this.selfImprovement, subagentOutputStore: this.subagentOutputStore,
|
|
1723
|
+
autoCheckpointManager: this.autoCheckpointManager, injectionBudget: this.injectionBudget,
|
|
1724
|
+
skillManager: this.skillManager, semanticCache: this.semanticCache,
|
|
1725
|
+
lspManager: this.lspManager, threadManager: this.threadManager,
|
|
1726
|
+
interactivePlanner: this.interactivePlanner, recursiveContext: this.recursiveContext,
|
|
1727
|
+
fileChangeTracker: this.fileChangeTracker, capabilitiesRegistry: this.capabilitiesRegistry,
|
|
1728
|
+
rules: this.rules, stateMachine: this.stateMachine,
|
|
1729
|
+
lastComplexityAssessment: this.lastComplexityAssessment,
|
|
1730
|
+
cacheableSystemBlocks: this.cacheableSystemBlocks,
|
|
1731
|
+
parentIterations: this.parentIterations,
|
|
1732
|
+
externalCancellationToken: this.externalCancellationToken,
|
|
1733
|
+
wrapupRequested: this.wrapupRequested, wrapupReason: this.wrapupReason,
|
|
1734
|
+
compactionPending: this.compactionPending,
|
|
1735
|
+
sharedContextState: this._sharedContextState,
|
|
1736
|
+
sharedEconomicsState: this._sharedEconomicsState,
|
|
1737
|
+
spawnedTasks: this.spawnedTasks, toolResolver: this.toolResolver,
|
|
1738
|
+
emit: (event) => this.emit(event),
|
|
1739
|
+
addTool: (tool) => this.addTool(tool),
|
|
1740
|
+
getMaxContextTokens: () => this.getMaxContextTokens(),
|
|
1741
|
+
getTotalIterations: () => this.getTotalIterations(),
|
|
1742
|
+
};
|
|
2687
1743
|
}
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
type: 'tool.start',
|
|
2710
|
-
data: {
|
|
2711
|
-
executionId,
|
|
2712
|
-
toolName: toolCall.name,
|
|
2713
|
-
arguments: toolCall.arguments,
|
|
2714
|
-
},
|
|
2715
|
-
});
|
|
2716
|
-
try {
|
|
2717
|
-
// =====================================================================
|
|
2718
|
-
// PLAN MODE WRITE INTERCEPTION
|
|
2719
|
-
// =====================================================================
|
|
2720
|
-
// In plan mode, intercept write operations and queue them as proposed changes
|
|
2721
|
-
if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
|
|
2722
|
-
// Extract contextual reasoning instead of simple truncation
|
|
2723
|
-
const reason = this.extractChangeReasoning(toolCall, this.state.messages);
|
|
2724
|
-
// Start a new plan if needed
|
|
2725
|
-
if (!this.pendingPlanManager.hasPendingPlan()) {
|
|
2726
|
-
const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
|
|
2727
|
-
const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
|
|
2728
|
-
this.pendingPlanManager.startPlan(task);
|
|
2729
|
-
}
|
|
2730
|
-
// Queue the write operation
|
|
2731
|
-
const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
|
|
2732
|
-
// Emit event for UI
|
|
2733
|
-
this.emit({
|
|
2734
|
-
type: 'plan.change.queued',
|
|
2735
|
-
tool: toolCall.name,
|
|
2736
|
-
changeId: change?.id,
|
|
2737
|
-
summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
|
|
2738
|
-
});
|
|
2739
|
-
// Return a message indicating the change was queued
|
|
2740
|
-
const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
|
|
2741
|
-
`Tool: ${toolCall.name}\n` +
|
|
2742
|
-
`${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
|
|
2743
|
-
`Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
|
|
2744
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2745
|
-
return { callId: toolCall.id, result: queueMessage };
|
|
2746
|
-
}
|
|
2747
|
-
// =====================================================================
|
|
2748
|
-
// EXECUTION POLICY ENFORCEMENT (Lesson 23)
|
|
2749
|
-
// =====================================================================
|
|
2750
|
-
let policyApprovedByUser = false;
|
|
2751
|
-
if (this.executionPolicy) {
|
|
2752
|
-
const policyContext = {
|
|
2753
|
-
messages: this.state.messages,
|
|
2754
|
-
currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
|
|
2755
|
-
previousToolCalls: [],
|
|
2756
|
-
};
|
|
2757
|
-
const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
|
|
2758
|
-
// Emit policy event
|
|
2759
|
-
this.emit({
|
|
2760
|
-
type: 'policy.evaluated',
|
|
2761
|
-
tool: toolCall.name,
|
|
2762
|
-
policy: evaluation.policy,
|
|
2763
|
-
reason: evaluation.reason,
|
|
2764
|
-
});
|
|
2765
|
-
// Emit decision transparency event
|
|
2766
|
-
this.emit({
|
|
2767
|
-
type: 'decision.tool',
|
|
2768
|
-
tool: toolCall.name,
|
|
2769
|
-
decision: evaluation.policy === 'forbidden' ? 'blocked'
|
|
2770
|
-
: evaluation.policy === 'prompt' ? 'prompted'
|
|
2771
|
-
: 'allowed',
|
|
2772
|
-
policyMatch: evaluation.reason,
|
|
2773
|
-
});
|
|
2774
|
-
// Enhanced tracing: Record policy decision
|
|
2775
|
-
this.traceCollector?.record({
|
|
2776
|
-
type: 'decision',
|
|
2777
|
-
data: {
|
|
2778
|
-
type: 'policy',
|
|
2779
|
-
decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
|
|
2780
|
-
outcome: evaluation.policy === 'forbidden' ? 'blocked'
|
|
2781
|
-
: evaluation.policy === 'prompt' ? 'deferred'
|
|
2782
|
-
: 'allowed',
|
|
2783
|
-
reasoning: evaluation.reason,
|
|
2784
|
-
factors: [
|
|
2785
|
-
{ name: 'policy', value: evaluation.policy },
|
|
2786
|
-
{ name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
|
|
2787
|
-
],
|
|
2788
|
-
confidence: evaluation.intent?.confidence ?? 0.8,
|
|
2789
|
-
},
|
|
2790
|
-
});
|
|
2791
|
-
// Handle forbidden policy - always block
|
|
2792
|
-
if (evaluation.policy === 'forbidden') {
|
|
2793
|
-
this.emit({
|
|
2794
|
-
type: 'policy.tool.blocked',
|
|
2795
|
-
tool: toolCall.name,
|
|
2796
|
-
phase: 'enforced',
|
|
2797
|
-
reason: `Forbidden by execution policy: ${evaluation.reason}`,
|
|
2798
|
-
});
|
|
2799
|
-
throw new Error(`Forbidden by policy: ${evaluation.reason}`);
|
|
2800
|
-
}
|
|
2801
|
-
// Handle prompt policy - requires approval
|
|
2802
|
-
if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
|
|
2803
|
-
// Try to get approval through safety manager's human-in-loop
|
|
2804
|
-
const humanInLoop = this.safety?.humanInLoop;
|
|
2805
|
-
if (humanInLoop) {
|
|
2806
|
-
const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
|
|
2807
|
-
if (!approval.approved) {
|
|
2808
|
-
throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
|
|
2809
|
-
}
|
|
2810
|
-
policyApprovedByUser = true;
|
|
2811
|
-
// Create a grant for future similar calls if approved
|
|
2812
|
-
this.executionPolicy.createGrant({
|
|
2813
|
-
toolName: toolCall.name,
|
|
2814
|
-
grantedBy: 'user',
|
|
2815
|
-
reason: 'Approved during execution',
|
|
2816
|
-
maxUsages: 5, // Allow 5 more similar calls
|
|
2817
|
-
});
|
|
2818
|
-
}
|
|
2819
|
-
else {
|
|
2820
|
-
// No approval handler - block by default for safety
|
|
2821
|
-
throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
|
|
2822
|
-
}
|
|
2823
|
-
}
|
|
2824
|
-
// Log intent classification if available
|
|
2825
|
-
if (evaluation.intent) {
|
|
2826
|
-
this.emit({
|
|
2827
|
-
type: 'intent.classified',
|
|
2828
|
-
tool: toolCall.name,
|
|
2829
|
-
intent: evaluation.intent.type,
|
|
2830
|
-
confidence: evaluation.intent.confidence,
|
|
2831
|
-
});
|
|
2832
|
-
}
|
|
2833
|
-
}
|
|
2834
|
-
// =====================================================================
|
|
2835
|
-
// SAFETY VALIDATION (Lesson 20-21)
|
|
2836
|
-
// =====================================================================
|
|
2837
|
-
if (this.safety) {
|
|
2838
|
-
const safety = this.safety;
|
|
2839
|
-
const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
|
|
2840
|
-
if (!validation.allowed) {
|
|
2841
|
-
this.emit({
|
|
2842
|
-
type: 'policy.tool.blocked',
|
|
2843
|
-
tool: toolCall.name,
|
|
2844
|
-
phase: 'enforced',
|
|
2845
|
-
reason: validation.reason || 'Blocked by safety manager',
|
|
2846
|
-
});
|
|
2847
|
-
if (toolCall.name === 'bash') {
|
|
2848
|
-
const args = toolCall.arguments;
|
|
2849
|
-
this.emit({
|
|
2850
|
-
type: 'policy.bash.blocked',
|
|
2851
|
-
phase: 'enforced',
|
|
2852
|
-
command: String(args.command || args.cmd || ''),
|
|
2853
|
-
reason: validation.reason || 'Blocked by safety manager',
|
|
2854
|
-
});
|
|
2855
|
-
}
|
|
2856
|
-
throw new Error(`Tool call blocked: ${validation.reason}`);
|
|
2857
|
-
}
|
|
2858
|
-
}
|
|
2859
|
-
// Get tool definition (with lazy-loading support for MCP tools)
|
|
2860
|
-
let tool = this.tools.get(toolCall.name);
|
|
2861
|
-
const wasPreloaded = !!tool;
|
|
2862
|
-
if (!tool && this.toolResolver) {
|
|
2863
|
-
// Try to resolve and load the tool on-demand
|
|
2864
|
-
const resolved = this.toolResolver(toolCall.name);
|
|
2865
|
-
if (resolved) {
|
|
2866
|
-
this.addTool(resolved);
|
|
2867
|
-
tool = resolved;
|
|
2868
|
-
if (process.env.DEBUG)
|
|
2869
|
-
console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
|
|
2870
|
-
this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
|
|
2871
|
-
}
|
|
2872
|
-
}
|
|
2873
|
-
if (!tool) {
|
|
2874
|
-
throw new Error(`Unknown tool: ${toolCall.name}`);
|
|
2875
|
-
}
|
|
2876
|
-
// Log whether tool was pre-loaded or auto-loaded (for MCP tools)
|
|
2877
|
-
if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
|
|
2878
|
-
console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
|
|
2879
|
-
}
|
|
2880
|
-
// =====================================================================
|
|
2881
|
-
// BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
|
|
2882
|
-
// =====================================================================
|
|
2883
|
-
// Claim file resources before write operations to prevent conflicts
|
|
2884
|
-
if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
|
|
2885
|
-
const args = toolCall.arguments;
|
|
2886
|
-
const filePath = String(args.path || args.file_path || '');
|
|
2887
|
-
if (filePath) {
|
|
2888
|
-
const agentId = this.agentId;
|
|
2889
|
-
const claimed = this.blackboard.claim(filePath, agentId, 'write', {
|
|
2890
|
-
ttl: 60000, // 1 minute claim
|
|
2891
|
-
intent: `${toolCall.name}: ${filePath}`,
|
|
2892
|
-
});
|
|
2893
|
-
if (!claimed) {
|
|
2894
|
-
const existingClaim = this.blackboard.getClaim(filePath);
|
|
2895
|
-
throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
|
|
2896
|
-
`Wait for the other agent to complete or choose a different file.`);
|
|
2897
|
-
}
|
|
2898
|
-
}
|
|
2899
|
-
}
|
|
2900
|
-
// FILE CACHE: Check cache for read_file operations before executing
|
|
2901
|
-
if (this.fileCache && toolCall.name === 'read_file') {
|
|
2902
|
-
const args = toolCall.arguments;
|
|
2903
|
-
const readPath = String(args.path || '');
|
|
2904
|
-
if (readPath) {
|
|
2905
|
-
const cached = this.fileCache.get(readPath);
|
|
2906
|
-
if (cached !== undefined) {
|
|
2907
|
-
const lines = cached.split('\n').length;
|
|
2908
|
-
const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
|
|
2909
|
-
const duration = Date.now() - startTime;
|
|
2910
|
-
this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
|
|
2911
|
-
this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
|
|
2912
|
-
this.state.metrics.toolCalls++;
|
|
2913
|
-
this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
|
|
2914
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2915
|
-
return {
|
|
2916
|
-
callId: toolCall.id,
|
|
2917
|
-
result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
|
|
2918
|
-
};
|
|
2919
|
-
}
|
|
2920
|
-
}
|
|
2921
|
-
}
|
|
2922
|
-
// Execute tool (with sandbox if available)
|
|
2923
|
-
let result;
|
|
2924
|
-
if (this.safety?.sandbox) {
|
|
2925
|
-
// CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
|
|
2926
|
-
// The default 60s sandbox timeout would kill subagents prematurely
|
|
2927
|
-
// Subagents may run for minutes (per their own timeout config)
|
|
2928
|
-
const isSpawnAgent = toolCall.name === 'spawn_agent';
|
|
2929
|
-
const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
|
|
2930
|
-
const isSubagentTool = isSpawnAgent || isSpawnParallel;
|
|
2931
|
-
const subagentConfig = this.config.subagent;
|
|
2932
|
-
const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
|
|
2933
|
-
const subagentTimeout = hasSubagentConfig
|
|
2934
|
-
? subagentConfig.defaultTimeout ?? 600000 // 10 min default
|
|
2935
|
-
: 600000;
|
|
2936
|
-
// Use subagent timeout + buffer for spawn tools, default for others
|
|
2937
|
-
// For spawn_agents_parallel, multiply by number of agents (they run in parallel,
|
|
2938
|
-
// but the total wall-clock time should still allow the slowest agent to complete)
|
|
2939
|
-
const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
|
|
2940
|
-
result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
|
|
2941
|
-
}
|
|
2942
|
-
else {
|
|
2943
|
-
result = await tool.execute(toolCall.arguments);
|
|
2944
|
-
}
|
|
2945
|
-
const duration = Date.now() - startTime;
|
|
2946
|
-
// Lesson 26: Record tool completion for tracing
|
|
2947
|
-
this.traceCollector?.record({
|
|
2948
|
-
type: 'tool.end',
|
|
2949
|
-
data: {
|
|
2950
|
-
executionId,
|
|
2951
|
-
status: 'success',
|
|
2952
|
-
result,
|
|
2953
|
-
durationMs: duration,
|
|
2954
|
-
},
|
|
2955
|
-
});
|
|
2956
|
-
// Record metrics
|
|
2957
|
-
this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
|
|
2958
|
-
this.state.metrics.toolCalls++;
|
|
2959
|
-
this.emit({ type: 'tool.complete', tool: toolCall.name, result });
|
|
2960
|
-
// FILE CACHE: Store read results and invalidate on writes
|
|
2961
|
-
if (this.fileCache) {
|
|
2962
|
-
const args = toolCall.arguments;
|
|
2963
|
-
const filePath = String(args.path || args.file_path || '');
|
|
2964
|
-
if (toolCall.name === 'read_file' && filePath) {
|
|
2965
|
-
// Cache successful read results
|
|
2966
|
-
const resultObj = result;
|
|
2967
|
-
if (resultObj?.success && typeof resultObj.output === 'string') {
|
|
2968
|
-
this.fileCache.set(filePath, resultObj.output);
|
|
2969
|
-
}
|
|
2970
|
-
}
|
|
2971
|
-
else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
|
|
2972
|
-
// Invalidate cache when files are modified (including undo operations)
|
|
2973
|
-
this.fileCache.invalidate(filePath);
|
|
2974
|
-
}
|
|
2975
|
-
}
|
|
2976
|
-
// Emit tool insight with result summary
|
|
2977
|
-
const summary = this.summarizeToolResult(toolCall.name, result);
|
|
2978
|
-
this.emit({
|
|
2979
|
-
type: 'insight.tool',
|
|
2980
|
-
tool: toolCall.name,
|
|
2981
|
-
summary,
|
|
2982
|
-
durationMs: duration,
|
|
2983
|
-
success: true,
|
|
2984
|
-
});
|
|
2985
|
-
// Release blackboard claim after successful file write
|
|
2986
|
-
if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
|
|
2987
|
-
const args = toolCall.arguments;
|
|
2988
|
-
const filePath = String(args.path || args.file_path || '');
|
|
2989
|
-
if (filePath) {
|
|
2990
|
-
const agentId = this.agentId;
|
|
2991
|
-
this.blackboard.release(filePath, agentId);
|
|
2992
|
-
}
|
|
2993
|
-
}
|
|
2994
|
-
// Self-improvement: record success pattern
|
|
2995
|
-
this.selfImprovement?.recordSuccess(toolCall.name, toolCall.arguments, typeof result === 'string' ? result.slice(0, 200) : JSON.stringify(result).slice(0, 200));
|
|
2996
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
2997
|
-
return { callId: toolCall.id, result };
|
|
1744
|
+
buildMutators() {
|
|
1745
|
+
return {
|
|
1746
|
+
setBudgetPool: (pool) => { this.budgetPool = pool; },
|
|
1747
|
+
setCacheableSystemBlocks: (blocks) => { this.cacheableSystemBlocks = blocks; },
|
|
1748
|
+
setCompactionPending: (pending) => { this.compactionPending = pending; },
|
|
1749
|
+
setWrapupRequested: (requested) => { this.wrapupRequested = requested; },
|
|
1750
|
+
setLastComplexityAssessment: (a) => { this.lastComplexityAssessment = a; },
|
|
1751
|
+
setExternalCancellationToken: (t) => { this.externalCancellationToken = t; },
|
|
1752
|
+
};
|
|
1753
|
+
}
|
|
1754
|
+
createSubAgentFactory() {
|
|
1755
|
+
return (config) => new ProductionAgent(config);
|
|
1756
|
+
}
|
|
1757
|
+
/**
|
|
1758
|
+
* Execute an async callback while excluding wall-clock wait time from duration budgeting.
|
|
1759
|
+
* Used for external waits such as approval dialogs and delegation confirmation.
|
|
1760
|
+
*/
|
|
1761
|
+
async withPausedDuration(fn) {
|
|
1762
|
+
this.economics?.pauseDuration();
|
|
1763
|
+
try {
|
|
1764
|
+
return await fn();
|
|
2998
1765
|
}
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
const duration = Date.now() - startTime;
|
|
3002
|
-
// Lesson 26: Record tool error for tracing
|
|
3003
|
-
this.traceCollector?.record({
|
|
3004
|
-
type: 'tool.end',
|
|
3005
|
-
data: {
|
|
3006
|
-
executionId,
|
|
3007
|
-
status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
|
|
3008
|
-
error,
|
|
3009
|
-
durationMs: duration,
|
|
3010
|
-
},
|
|
3011
|
-
});
|
|
3012
|
-
this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
|
|
3013
|
-
this.observability?.tracer?.recordError(error);
|
|
3014
|
-
this.observability?.tracer?.endSpan(spanId);
|
|
3015
|
-
// FAILURE EVIDENCE RECORDING (Trick S)
|
|
3016
|
-
// Track failed tool calls to prevent loops and provide context
|
|
3017
|
-
this.contextEngineering?.recordFailure({
|
|
3018
|
-
action: toolCall.name,
|
|
3019
|
-
args: toolCall.arguments,
|
|
3020
|
-
error,
|
|
3021
|
-
intent: `Execute tool ${toolCall.name}`,
|
|
3022
|
-
});
|
|
3023
|
-
// Self-improvement: enhance error message with diagnosis for better LLM recovery
|
|
3024
|
-
if (this.selfImprovement) {
|
|
3025
|
-
const enhanced = this.selfImprovement.enhanceErrorMessage(toolCall.name, error.message, toolCall.arguments);
|
|
3026
|
-
this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: enhanced });
|
|
3027
|
-
return { callId: toolCall.id, result: `Error: ${enhanced}`, error: enhanced };
|
|
3028
|
-
}
|
|
3029
|
-
this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
|
|
3030
|
-
return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
|
|
1766
|
+
finally {
|
|
1767
|
+
this.economics?.resumeDuration();
|
|
3031
1768
|
}
|
|
3032
1769
|
}
|
|
3033
1770
|
/**
|
|
@@ -3166,123 +1903,6 @@ export class ProductionAgent {
|
|
|
3166
1903
|
emit(event) {
|
|
3167
1904
|
this.hooks?.emit(event);
|
|
3168
1905
|
}
|
|
3169
|
-
/**
|
|
3170
|
-
* Create a brief summary of a tool result for insight display.
|
|
3171
|
-
*/
|
|
3172
|
-
summarizeToolResult(toolName, result) {
|
|
3173
|
-
if (result === null || result === undefined) {
|
|
3174
|
-
return 'No output';
|
|
3175
|
-
}
|
|
3176
|
-
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
3177
|
-
// Tool-specific summaries
|
|
3178
|
-
if (toolName === 'list_files' || toolName === 'glob') {
|
|
3179
|
-
const lines = resultStr.split('\n').filter(l => l.trim());
|
|
3180
|
-
return `Found ${lines.length} file${lines.length !== 1 ? 's' : ''}`;
|
|
3181
|
-
}
|
|
3182
|
-
if (toolName === 'bash' || toolName === 'execute_command') {
|
|
3183
|
-
const lines = resultStr.split('\n').filter(l => l.trim());
|
|
3184
|
-
if (resultStr.includes('exit code: 0') || !resultStr.includes('exit code:')) {
|
|
3185
|
-
return lines.length > 1 ? `Success (${lines.length} lines)` : 'Success';
|
|
3186
|
-
}
|
|
3187
|
-
return `Failed - ${lines[0]?.slice(0, 50) || 'see output'}`;
|
|
3188
|
-
}
|
|
3189
|
-
if (toolName === 'read_file') {
|
|
3190
|
-
const lines = resultStr.split('\n').length;
|
|
3191
|
-
return `Read ${lines} line${lines !== 1 ? 's' : ''}`;
|
|
3192
|
-
}
|
|
3193
|
-
if (toolName === 'write_file' || toolName === 'edit_file') {
|
|
3194
|
-
return 'File updated';
|
|
3195
|
-
}
|
|
3196
|
-
if (toolName === 'search' || toolName === 'grep') {
|
|
3197
|
-
const matches = (resultStr.match(/\n/g) || []).length;
|
|
3198
|
-
return `${matches} match${matches !== 1 ? 'es' : ''}`;
|
|
3199
|
-
}
|
|
3200
|
-
// Generic summary
|
|
3201
|
-
if (resultStr.length <= 50) {
|
|
3202
|
-
return resultStr;
|
|
3203
|
-
}
|
|
3204
|
-
return `${resultStr.slice(0, 47)}...`;
|
|
3205
|
-
}
|
|
3206
|
-
/**
|
|
3207
|
-
* Format tool arguments for plan display.
|
|
3208
|
-
*/
|
|
3209
|
-
formatToolArgsForPlan(toolName, args) {
|
|
3210
|
-
if (toolName === 'write_file') {
|
|
3211
|
-
const path = args.path || args.file_path;
|
|
3212
|
-
const content = String(args.content || '');
|
|
3213
|
-
const preview = content.slice(0, 100).replace(/\n/g, '\\n');
|
|
3214
|
-
return `File: ${path}\nContent preview: ${preview}${content.length > 100 ? '...' : ''}`;
|
|
3215
|
-
}
|
|
3216
|
-
if (toolName === 'edit_file') {
|
|
3217
|
-
const path = args.path || args.file_path;
|
|
3218
|
-
return `File: ${path}\nOld: ${String(args.old_string || args.search || '').slice(0, 50)}...\nNew: ${String(args.new_string || args.replace || '').slice(0, 50)}...`;
|
|
3219
|
-
}
|
|
3220
|
-
if (toolName === 'bash') {
|
|
3221
|
-
return `Command: ${String(args.command || '').slice(0, 100)}`;
|
|
3222
|
-
}
|
|
3223
|
-
if (toolName === 'delete_file') {
|
|
3224
|
-
return `Delete: ${args.path || args.file_path}`;
|
|
3225
|
-
}
|
|
3226
|
-
if (toolName === 'spawn_agent' || toolName === 'researcher') {
|
|
3227
|
-
const task = String(args.task || args.prompt || args.goal || '');
|
|
3228
|
-
const model = args.model ? ` (${args.model})` : '';
|
|
3229
|
-
const firstLine = task.split('\n')[0].slice(0, 100);
|
|
3230
|
-
return `${firstLine}${task.length > 100 ? '...' : ''}${model}`;
|
|
3231
|
-
}
|
|
3232
|
-
// Generic
|
|
3233
|
-
return `Args: ${JSON.stringify(args).slice(0, 100)}...`;
|
|
3234
|
-
}
|
|
3235
|
-
/**
|
|
3236
|
-
* Extract contextual reasoning for a proposed change in plan mode.
|
|
3237
|
-
* Looks at recent assistant messages to find relevant explanation.
|
|
3238
|
-
* Returns a more complete reason than simple truncation.
|
|
3239
|
-
*/
|
|
3240
|
-
extractChangeReasoning(toolCall, messages) {
|
|
3241
|
-
// Get last few assistant messages (most recent first)
|
|
3242
|
-
const assistantMsgs = messages
|
|
3243
|
-
.filter(m => m.role === 'assistant' && typeof m.content === 'string')
|
|
3244
|
-
.slice(-3)
|
|
3245
|
-
.reverse();
|
|
3246
|
-
if (assistantMsgs.length === 0) {
|
|
3247
|
-
return `Proposed change: ${toolCall.name}`;
|
|
3248
|
-
}
|
|
3249
|
-
// Use the most recent assistant message
|
|
3250
|
-
const lastMsg = assistantMsgs[0];
|
|
3251
|
-
const content = lastMsg.content;
|
|
3252
|
-
// For spawn_agent, the task itself is usually the reason
|
|
3253
|
-
if (toolCall.name === 'spawn_agent') {
|
|
3254
|
-
const args = toolCall.arguments;
|
|
3255
|
-
const task = String(args.task || args.prompt || args.goal || '');
|
|
3256
|
-
if (task.length > 0) {
|
|
3257
|
-
// Use first paragraph or 500 chars of task as reason
|
|
3258
|
-
const firstPara = task.split(/\n\n/)[0];
|
|
3259
|
-
return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
|
|
3260
|
-
}
|
|
3261
|
-
}
|
|
3262
|
-
// For file operations, look for context about the file
|
|
3263
|
-
if (['write_file', 'edit_file'].includes(toolCall.name)) {
|
|
3264
|
-
const args = toolCall.arguments;
|
|
3265
|
-
const path = String(args.path || args.file_path || '');
|
|
3266
|
-
// Look for mentions of this file in the assistant's explanation
|
|
3267
|
-
if (path && content.toLowerCase().includes(path.toLowerCase().split('/').pop() || '')) {
|
|
3268
|
-
// Extract the sentence(s) mentioning this file
|
|
3269
|
-
const sentences = content.split(/[.!?\n]+/).filter(s => s.toLowerCase().includes(path.toLowerCase().split('/').pop() || ''));
|
|
3270
|
-
if (sentences.length > 0) {
|
|
3271
|
-
const relevant = sentences.slice(0, 2).join('. ').trim();
|
|
3272
|
-
return relevant.length > 500 ? relevant.slice(0, 500) + '...' : relevant;
|
|
3273
|
-
}
|
|
3274
|
-
}
|
|
3275
|
-
}
|
|
3276
|
-
// Fallback: use first 500 chars instead of 200
|
|
3277
|
-
// Look for the first meaningful paragraph/section
|
|
3278
|
-
const paragraphs = content.split(/\n\n+/).filter(p => p.trim().length > 20);
|
|
3279
|
-
if (paragraphs.length > 0) {
|
|
3280
|
-
const firstPara = paragraphs[0].trim();
|
|
3281
|
-
return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
|
|
3282
|
-
}
|
|
3283
|
-
// Ultimate fallback
|
|
3284
|
-
return content.length > 500 ? content.slice(0, 500) + '...' : content;
|
|
3285
|
-
}
|
|
3286
1906
|
/**
|
|
3287
1907
|
* Update memory statistics.
|
|
3288
1908
|
* Memory stats are retrieved via memory manager, not stored in state.
|
|
@@ -3308,12 +1928,27 @@ export class ProductionAgent {
|
|
|
3308
1928
|
}
|
|
3309
1929
|
return this.state.metrics;
|
|
3310
1930
|
}
|
|
1931
|
+
getResilienceConfig() {
|
|
1932
|
+
return this.config.resilience;
|
|
1933
|
+
}
|
|
3311
1934
|
/**
|
|
3312
1935
|
* Get current state.
|
|
3313
1936
|
*/
|
|
3314
1937
|
getState() {
|
|
3315
1938
|
return { ...this.state };
|
|
3316
1939
|
}
|
|
1940
|
+
/**
|
|
1941
|
+
* Get shared state stats for TUI visibility.
|
|
1942
|
+
* Returns null when not in a swarm context.
|
|
1943
|
+
*/
|
|
1944
|
+
getSharedStats() {
|
|
1945
|
+
if (!this._sharedContextState)
|
|
1946
|
+
return null;
|
|
1947
|
+
return {
|
|
1948
|
+
context: this._sharedContextState.getStats(),
|
|
1949
|
+
economics: this._sharedEconomicsState?.getStats() ?? { fingerprints: 0, globalLoops: [] },
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
3317
1952
|
/**
|
|
3318
1953
|
* Get the maximum context tokens for this agent's model.
|
|
3319
1954
|
* Priority: user config > OpenRouter API > hardcoded ModelRegistry > 200K default
|
|
@@ -3335,6 +1970,16 @@ export class ProductionAgent {
|
|
|
3335
1970
|
// Default
|
|
3336
1971
|
return 200000;
|
|
3337
1972
|
}
|
|
1973
|
+
/**
|
|
1974
|
+
* Estimate tokens used by the system prompt (codebase context, tools, rules).
|
|
1975
|
+
* Used by TUI to display accurate context % that includes system overhead.
|
|
1976
|
+
*/
|
|
1977
|
+
getSystemPromptTokenEstimate() {
|
|
1978
|
+
if (this.lastSystemPromptLength > 0) {
|
|
1979
|
+
return Math.ceil(this.lastSystemPromptLength / 3.2);
|
|
1980
|
+
}
|
|
1981
|
+
return 0;
|
|
1982
|
+
}
|
|
3338
1983
|
/**
|
|
3339
1984
|
* Get the trace collector (Lesson 26).
|
|
3340
1985
|
* Returns null if trace capture is not enabled.
|
|
@@ -3348,6 +1993,9 @@ export class ProductionAgent {
|
|
|
3348
1993
|
*/
|
|
3349
1994
|
setTraceCollector(collector) {
|
|
3350
1995
|
this.traceCollector = collector;
|
|
1996
|
+
if (this.codebaseContext) {
|
|
1997
|
+
this.codebaseContext.traceCollector = collector;
|
|
1998
|
+
}
|
|
3351
1999
|
}
|
|
3352
2000
|
/**
|
|
3353
2001
|
* Get the learning store for cross-session learning.
|
|
@@ -3559,7 +2207,7 @@ export class ProductionAgent {
|
|
|
3559
2207
|
const validation = this.validateCheckpoint(savedState);
|
|
3560
2208
|
// Log warnings
|
|
3561
2209
|
for (const warning of validation.warnings) {
|
|
3562
|
-
|
|
2210
|
+
log.warn('Checkpoint validation warning', { warning });
|
|
3563
2211
|
this.observability?.logger?.warn('Checkpoint validation warning', { warning });
|
|
3564
2212
|
}
|
|
3565
2213
|
// Fail on validation errors
|
|
@@ -3658,7 +2306,7 @@ export class ProductionAgent {
|
|
|
3658
2306
|
}
|
|
3659
2307
|
}
|
|
3660
2308
|
if (compactedCount > 0 && process.env.DEBUG) {
|
|
3661
|
-
|
|
2309
|
+
log.debug('Compacted tool outputs', { compactedCount, savedTokens: Math.round(savedChars / 4) });
|
|
3662
2310
|
}
|
|
3663
2311
|
}
|
|
3664
2312
|
/**
|
|
@@ -3699,23 +2347,32 @@ export class ProductionAgent {
|
|
|
3699
2347
|
const artifactWriteTools = ['write_file', 'edit_file', 'apply_patch', 'append_file'];
|
|
3700
2348
|
return !artifactWriteTools.some(toolName => executedToolNames.has(toolName));
|
|
3701
2349
|
}
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
const
|
|
3707
|
-
|
|
3708
|
-
|
|
2350
|
+
getOpenTasksSummary() {
|
|
2351
|
+
if (!this.taskManager) {
|
|
2352
|
+
return undefined;
|
|
2353
|
+
}
|
|
2354
|
+
const tasks = this.taskManager.list();
|
|
2355
|
+
const pending = tasks.filter(t => t.status === 'pending').length;
|
|
2356
|
+
const inProgress = tasks.filter(t => t.status === 'in_progress').length;
|
|
2357
|
+
const blocked = tasks.filter(t => t.status === 'pending' && this.taskManager?.isBlocked(t.id)).length;
|
|
2358
|
+
return { pending, inProgress, blocked };
|
|
2359
|
+
}
|
|
2360
|
+
reconcileStaleTasks(reason) {
|
|
2361
|
+
if (!this.taskManager)
|
|
2362
|
+
return;
|
|
2363
|
+
const staleAfterMs = typeof this.config.resilience === 'object'
|
|
2364
|
+
? (this.config.resilience.taskLeaseStaleMs ?? 5 * 60 * 1000)
|
|
2365
|
+
: 5 * 60 * 1000;
|
|
2366
|
+
const recovered = this.taskManager.reconcileStaleInProgress({
|
|
2367
|
+
staleAfterMs,
|
|
2368
|
+
reason,
|
|
2369
|
+
});
|
|
2370
|
+
if (recovered.reconciled > 0) {
|
|
2371
|
+
this.observability?.logger?.info('Recovered stale task leases', {
|
|
2372
|
+
reason,
|
|
2373
|
+
recovered: recovered.reconciled,
|
|
2374
|
+
});
|
|
3709
2375
|
}
|
|
3710
|
-
const lower = trimmed.toLowerCase();
|
|
3711
|
-
const futureIntentPatterns = [
|
|
3712
|
-
/^(now|next|then)\s+(i\s+will|i'll|let me)\b/,
|
|
3713
|
-
/^i\s+(will|am going to|can)\b/,
|
|
3714
|
-
/^(let me|i'll|i will)\s+(create|write|save|do|make|generate|start)\b/,
|
|
3715
|
-
/^(now|next|then)\s+i(?:'ll| will)\b/,
|
|
3716
|
-
];
|
|
3717
|
-
const completionSignals = /\b(done|completed|finished|here is|created|saved|wrote)\b/;
|
|
3718
|
-
return futureIntentPatterns.some(pattern => pattern.test(lower)) && !completionSignals.test(lower);
|
|
3719
2376
|
}
|
|
3720
2377
|
/**
|
|
3721
2378
|
* Get audit log (if human-in-loop is enabled).
|
|
@@ -4128,1003 +2785,16 @@ export class ProductionAgent {
|
|
|
4128
2785
|
return success;
|
|
4129
2786
|
}
|
|
4130
2787
|
/**
|
|
4131
|
-
* Spawn
|
|
4132
|
-
* Returns the result when the agent completes.
|
|
4133
|
-
*
|
|
4134
|
-
* @param agentName - Name of the agent to spawn (researcher, coder, etc.)
|
|
4135
|
-
* @param task - The task description for the agent
|
|
4136
|
-
* @param constraints - Optional constraints to keep the subagent focused
|
|
2788
|
+
* Spawn a subagent (delegates to core/subagent-spawner).
|
|
4137
2789
|
*/
|
|
4138
2790
|
async spawnAgent(agentName, task, constraints) {
|
|
4139
|
-
|
|
4140
|
-
return {
|
|
4141
|
-
success: false,
|
|
4142
|
-
output: 'Agent registry not initialized',
|
|
4143
|
-
metrics: { tokens: 0, duration: 0, toolCalls: 0 },
|
|
4144
|
-
};
|
|
4145
|
-
}
|
|
4146
|
-
const agentDef = this.agentRegistry.getAgent(agentName);
|
|
4147
|
-
if (!agentDef) {
|
|
4148
|
-
return {
|
|
4149
|
-
success: false,
|
|
4150
|
-
output: `Agent not found: ${agentName}`,
|
|
4151
|
-
metrics: { tokens: 0, duration: 0, toolCalls: 0 },
|
|
4152
|
-
};
|
|
4153
|
-
}
|
|
4154
|
-
// DUPLICATE SPAWN PREVENTION with SEMANTIC SIMILARITY
|
|
4155
|
-
// Skip for swarm workers — the orchestrator handles retry logic and deduplication
|
|
4156
|
-
// at the task level. Without this bypass, retried swarm tasks return stale results.
|
|
4157
|
-
const isSwarmWorker = agentName.startsWith('swarm-');
|
|
4158
|
-
const SEMANTIC_SIMILARITY_THRESHOLD = 0.75; // 75% similarity = duplicate
|
|
4159
|
-
const taskKey = `${agentName}:${task.slice(0, 150).toLowerCase().replace(/\s+/g, ' ').trim()}`;
|
|
4160
|
-
const now = Date.now();
|
|
4161
|
-
// Clean up old entries (older than dedup window)
|
|
4162
|
-
for (const [key, entry] of this.spawnedTasks.entries()) {
|
|
4163
|
-
if (now - entry.timestamp > ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
|
|
4164
|
-
this.spawnedTasks.delete(key);
|
|
4165
|
-
}
|
|
4166
|
-
}
|
|
4167
|
-
let existingMatch;
|
|
4168
|
-
let matchType = 'exact';
|
|
4169
|
-
if (!isSwarmWorker) {
|
|
4170
|
-
// Check for exact match first
|
|
4171
|
-
existingMatch = this.spawnedTasks.get(taskKey);
|
|
4172
|
-
// If no exact match, check for semantic similarity among same agent's tasks
|
|
4173
|
-
if (!existingMatch) {
|
|
4174
|
-
for (const [key, entry] of this.spawnedTasks.entries()) {
|
|
4175
|
-
// Only compare tasks from the same agent type
|
|
4176
|
-
if (!key.startsWith(`${agentName}:`))
|
|
4177
|
-
continue;
|
|
4178
|
-
if (now - entry.timestamp >= ProductionAgent.SPAWN_DEDUP_WINDOW_MS)
|
|
4179
|
-
continue;
|
|
4180
|
-
// Extract the task portion from the key
|
|
4181
|
-
const existingTask = key.slice(agentName.length + 1);
|
|
4182
|
-
const similarity = calculateTaskSimilarity(task, existingTask);
|
|
4183
|
-
if (similarity >= SEMANTIC_SIMILARITY_THRESHOLD) {
|
|
4184
|
-
existingMatch = entry;
|
|
4185
|
-
matchType = 'semantic';
|
|
4186
|
-
this.observability?.logger?.debug('Semantic duplicate detected', {
|
|
4187
|
-
agent: agentName,
|
|
4188
|
-
newTask: task.slice(0, 80),
|
|
4189
|
-
existingTask: existingTask.slice(0, 80),
|
|
4190
|
-
similarity: (similarity * 100).toFixed(1) + '%',
|
|
4191
|
-
});
|
|
4192
|
-
break;
|
|
4193
|
-
}
|
|
4194
|
-
}
|
|
4195
|
-
}
|
|
4196
|
-
}
|
|
4197
|
-
if (existingMatch && now - existingMatch.timestamp < ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
|
|
4198
|
-
// Same or semantically similar task spawned within the dedup window
|
|
4199
|
-
this.observability?.logger?.warn('Duplicate spawn prevented', {
|
|
4200
|
-
agent: agentName,
|
|
4201
|
-
task: task.slice(0, 100),
|
|
4202
|
-
matchType,
|
|
4203
|
-
originalTimestamp: existingMatch.timestamp,
|
|
4204
|
-
elapsedMs: now - existingMatch.timestamp,
|
|
4205
|
-
});
|
|
4206
|
-
const duplicateMessage = `[DUPLICATE SPAWN PREVENTED${matchType === 'semantic' ? ' - SEMANTIC MATCH' : ''}]\n` +
|
|
4207
|
-
`This task was already spawned ${Math.round((now - existingMatch.timestamp) / 1000)}s ago.\n` +
|
|
4208
|
-
`${existingMatch.queuedChanges > 0
|
|
4209
|
-
? `The previous spawn queued ${existingMatch.queuedChanges} change(s) to the pending plan.\n` +
|
|
4210
|
-
`These changes are already in your plan - do NOT spawn again.\n`
|
|
4211
|
-
: ''}Previous result summary:\n${existingMatch.result.slice(0, 500)}`;
|
|
4212
|
-
return {
|
|
4213
|
-
success: true, // Mark as success since original task completed
|
|
4214
|
-
output: duplicateMessage,
|
|
4215
|
-
metrics: { tokens: 0, duration: 0, toolCalls: 0 },
|
|
4216
|
-
};
|
|
4217
|
-
}
|
|
4218
|
-
// Generate a unique ID for this agent instance that will be used consistently
|
|
4219
|
-
// throughout the agent's lifecycle (spawn event, token events, completion events)
|
|
4220
|
-
const agentId = `spawn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4221
|
-
this.emit({ type: 'agent.spawn', agentId, name: agentName, task });
|
|
4222
|
-
this.observability?.logger?.info('Spawning agent', { name: agentName, task });
|
|
4223
|
-
const startTime = Date.now();
|
|
4224
|
-
const childSessionId = `subagent-${agentName}-${Date.now()}`;
|
|
4225
|
-
const childTraceId = `trace-${childSessionId}`;
|
|
4226
|
-
let workerResultId;
|
|
4227
|
-
try {
|
|
4228
|
-
// Filter tools for this agent
|
|
4229
|
-
let agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
|
|
4230
|
-
// Resolve policy profile FIRST so we know which tools the policy allows.
|
|
4231
|
-
// This must happen before the recommendation filter so policy-allowed tools
|
|
4232
|
-
// are preserved through the recommendation pruning step.
|
|
4233
|
-
const inferredTaskType = agentDef.taskType ?? ToolRecommendationEngine.inferTaskType(agentName);
|
|
4234
|
-
const policyResolution = resolvePolicyProfile({
|
|
4235
|
-
policyEngine: this.config.policyEngine,
|
|
4236
|
-
requestedProfile: agentDef.policyProfile,
|
|
4237
|
-
swarmConfig: isSwarmWorker && this.config.swarm && typeof this.config.swarm === 'object'
|
|
4238
|
-
? this.config.swarm
|
|
4239
|
-
: undefined,
|
|
4240
|
-
taskType: inferredTaskType,
|
|
4241
|
-
isSwarmWorker,
|
|
4242
|
-
sandboxConfig: this.config.sandbox && typeof this.config.sandbox === 'object'
|
|
4243
|
-
? this.config.sandbox
|
|
4244
|
-
: undefined,
|
|
4245
|
-
});
|
|
4246
|
-
this.emit({
|
|
4247
|
-
type: 'policy.profile.resolved',
|
|
4248
|
-
profile: policyResolution.profileName,
|
|
4249
|
-
context: isSwarmWorker ? 'swarm' : 'subagent',
|
|
4250
|
-
selectionSource: policyResolution.metadata.selectionSource,
|
|
4251
|
-
usedLegacyMappings: policyResolution.metadata.usedLegacyMappings,
|
|
4252
|
-
legacySources: policyResolution.metadata.legacyMappingSources,
|
|
4253
|
-
});
|
|
4254
|
-
if (policyResolution.metadata.usedLegacyMappings) {
|
|
4255
|
-
this.emit({
|
|
4256
|
-
type: 'policy.legacy.fallback.used',
|
|
4257
|
-
profile: policyResolution.profileName,
|
|
4258
|
-
sources: policyResolution.metadata.legacyMappingSources,
|
|
4259
|
-
warnings: policyResolution.metadata.warnings,
|
|
4260
|
-
});
|
|
4261
|
-
this.observability?.logger?.warn('Policy legacy mappings used', {
|
|
4262
|
-
agent: agentName,
|
|
4263
|
-
profile: policyResolution.profileName,
|
|
4264
|
-
sources: policyResolution.metadata.legacyMappingSources,
|
|
4265
|
-
});
|
|
4266
|
-
}
|
|
4267
|
-
// Apply tool recommendations to improve subagent focus (only for large tool sets)
|
|
4268
|
-
if (this.toolRecommendation && agentTools.length > 15) {
|
|
4269
|
-
const taskType = ToolRecommendationEngine.inferTaskType(agentName);
|
|
4270
|
-
const recommendations = this.toolRecommendation.recommendTools(task, taskType, agentTools.map(t => t.name));
|
|
4271
|
-
if (recommendations.length > 0) {
|
|
4272
|
-
const recommendedNames = new Set(recommendations.map(r => r.toolName));
|
|
4273
|
-
// Always keep spawn tools even if not recommended
|
|
4274
|
-
const alwaysKeep = new Set(['spawn_agent', 'spawn_agents_parallel']);
|
|
4275
|
-
// Also keep tools that the resolved policy profile explicitly allows.
|
|
4276
|
-
// This prevents the recommendation engine from stripping tools that the
|
|
4277
|
-
// security policy says the worker should have.
|
|
4278
|
-
if (policyResolution.profile.allowedTools) {
|
|
4279
|
-
for (const t of policyResolution.profile.allowedTools)
|
|
4280
|
-
alwaysKeep.add(t);
|
|
4281
|
-
}
|
|
4282
|
-
agentTools = agentTools.filter(t => recommendedNames.has(t.name) || alwaysKeep.has(t.name));
|
|
4283
|
-
}
|
|
4284
|
-
}
|
|
4285
|
-
// Enforce unified tool policy at spawn-time so denied tools are never exposed.
|
|
4286
|
-
if (policyResolution.profile.toolAccessMode === 'whitelist' && policyResolution.profile.allowedTools) {
|
|
4287
|
-
const allowed = new Set(policyResolution.profile.allowedTools);
|
|
4288
|
-
agentTools = agentTools.filter(t => allowed.has(t.name));
|
|
4289
|
-
}
|
|
4290
|
-
else if (policyResolution.profile.deniedTools && policyResolution.profile.deniedTools.length > 0) {
|
|
4291
|
-
const denied = new Set(policyResolution.profile.deniedTools);
|
|
4292
|
-
agentTools = agentTools.filter(t => !denied.has(t.name));
|
|
4293
|
-
}
|
|
4294
|
-
// Fail fast if tool filtering resulted in zero tools — the worker can't do anything
|
|
4295
|
-
if (agentTools.length === 0) {
|
|
4296
|
-
throw new Error(`Worker '${agentName}' has zero available tools after filtering. Check toolAccessMode and policy profile '${policyResolution.profileName}'.`);
|
|
4297
|
-
}
|
|
4298
|
-
// Resolve model - abstract tiers (fast/balanced/quality) should use parent's model
|
|
4299
|
-
// Only use agentDef.model if it's an actual model ID (contains '/')
|
|
4300
|
-
const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
|
|
4301
|
-
? agentDef.model
|
|
4302
|
-
: this.config.model;
|
|
4303
|
-
// Persist subagent task lifecycle in durable storage when available
|
|
4304
|
-
if (this.store?.hasWorkerResultsFeature()) {
|
|
4305
|
-
try {
|
|
4306
|
-
workerResultId = this.store.createWorkerResult(agentId, task.slice(0, 500), resolvedModel || 'default');
|
|
4307
|
-
}
|
|
4308
|
-
catch (storeErr) {
|
|
4309
|
-
this.observability?.logger?.warn('Failed to create worker result record', {
|
|
4310
|
-
agentId,
|
|
4311
|
-
error: storeErr.message,
|
|
4312
|
-
});
|
|
4313
|
-
}
|
|
4314
|
-
}
|
|
4315
|
-
// Get subagent config with agent-type-specific timeouts and iteration limits
|
|
4316
|
-
// Uses dynamic configuration based on agent type (researcher needs more time than reviewer)
|
|
4317
|
-
// Precedence: per-type config > per-type default > global config > hardcoded fallback
|
|
4318
|
-
const subagentConfig = this.config.subagent;
|
|
4319
|
-
const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
|
|
4320
|
-
// Timeout precedence: agentDef.timeout > per-type config > agent-type default > global config default
|
|
4321
|
-
// agentDef.timeout is set by worker-pool for swarm workers, giving them precise timeout control
|
|
4322
|
-
const agentTypeTimeout = getSubagentTimeout(agentName);
|
|
4323
|
-
const rawPerTypeTimeout = hasSubagentConfig
|
|
4324
|
-
? subagentConfig.timeouts?.[agentName]
|
|
4325
|
-
: undefined;
|
|
4326
|
-
const rawGlobalTimeout = hasSubagentConfig
|
|
4327
|
-
? subagentConfig.defaultTimeout
|
|
4328
|
-
: undefined;
|
|
4329
|
-
// Validate: reject negative, NaN, or non-finite timeout values
|
|
4330
|
-
const isValidTimeout = (v) => v !== undefined && Number.isFinite(v) && v > 0;
|
|
4331
|
-
const agentDefTimeout = isValidTimeout(agentDef.timeout) ? agentDef.timeout : undefined;
|
|
4332
|
-
const perTypeConfigTimeout = isValidTimeout(rawPerTypeTimeout) ? rawPerTypeTimeout : undefined;
|
|
4333
|
-
const globalConfigTimeout = isValidTimeout(rawGlobalTimeout) ? rawGlobalTimeout : undefined;
|
|
4334
|
-
const subagentTimeout = agentDefTimeout ?? perTypeConfigTimeout ?? agentTypeTimeout ?? globalConfigTimeout ?? 300000;
|
|
4335
|
-
// Iteration precedence: per-type config override > agent-type default > global config default
|
|
4336
|
-
const agentTypeMaxIter = getSubagentMaxIterations(agentName);
|
|
4337
|
-
const rawPerTypeMaxIter = hasSubagentConfig
|
|
4338
|
-
? subagentConfig.maxIterations?.[agentName]
|
|
4339
|
-
: undefined;
|
|
4340
|
-
const rawGlobalMaxIter = hasSubagentConfig
|
|
4341
|
-
? subagentConfig.defaultMaxIterations
|
|
4342
|
-
: undefined;
|
|
4343
|
-
const isValidIter = (v) => v !== undefined && Number.isFinite(v) && v > 0 && Number.isInteger(v);
|
|
4344
|
-
const perTypeConfigMaxIter = isValidIter(rawPerTypeMaxIter) ? rawPerTypeMaxIter : undefined;
|
|
4345
|
-
const globalConfigMaxIter = isValidIter(rawGlobalMaxIter) ? rawGlobalMaxIter : undefined;
|
|
4346
|
-
const defaultMaxIterations = agentDef.maxIterations ?? perTypeConfigMaxIter ?? agentTypeMaxIter ?? globalConfigMaxIter ?? 15;
|
|
4347
|
-
// BLACKBOARD CONTEXT INJECTION
|
|
4348
|
-
// Gather relevant context from the blackboard for the subagent
|
|
4349
|
-
let blackboardContext = '';
|
|
4350
|
-
const parentAgentId = `parent-${Date.now()}`;
|
|
4351
|
-
if (this.blackboard) {
|
|
4352
|
-
// Post parent's exploration context before spawning
|
|
4353
|
-
this.blackboard.post(parentAgentId, {
|
|
4354
|
-
topic: 'spawn.parent_context',
|
|
4355
|
-
content: `Parent spawning ${agentName} for task: ${task.slice(0, 200)}`,
|
|
4356
|
-
type: 'progress',
|
|
4357
|
-
confidence: 1,
|
|
4358
|
-
metadata: { agentName, taskPreview: task.slice(0, 100) },
|
|
4359
|
-
});
|
|
4360
|
-
// Gather recent findings that might help the subagent
|
|
4361
|
-
const recentFindings = this.blackboard.query({
|
|
4362
|
-
limit: 5,
|
|
4363
|
-
types: ['discovery', 'analysis', 'progress'],
|
|
4364
|
-
minConfidence: 0.7,
|
|
4365
|
-
});
|
|
4366
|
-
if (recentFindings.length > 0) {
|
|
4367
|
-
const findingsSummary = recentFindings
|
|
4368
|
-
.map(f => `- [${f.agentId}] ${f.topic}: ${f.content.slice(0, 150)}${f.content.length > 150 ? '...' : ''}`)
|
|
4369
|
-
.join('\n');
|
|
4370
|
-
blackboardContext = `\n\n**BLACKBOARD CONTEXT (from parent/sibling agents):**\n${findingsSummary}\n`;
|
|
4371
|
-
}
|
|
4372
|
-
}
|
|
4373
|
-
// Check for files already being modified in parent's pending plan
|
|
4374
|
-
const currentPlan = this.pendingPlanManager.getPendingPlan();
|
|
4375
|
-
if (currentPlan && currentPlan.proposedChanges.length > 0) {
|
|
4376
|
-
const pendingFiles = currentPlan.proposedChanges
|
|
4377
|
-
.filter((c) => c.tool === 'write_file' || c.tool === 'edit_file')
|
|
4378
|
-
.map((c) => c.args.path || c.args.file_path)
|
|
4379
|
-
.filter(Boolean);
|
|
4380
|
-
if (pendingFiles.length > 0) {
|
|
4381
|
-
blackboardContext += `\n**FILES ALREADY IN PENDING PLAN (do not duplicate):**\n${pendingFiles.slice(0, 10).join('\n')}\n`;
|
|
4382
|
-
}
|
|
4383
|
-
}
|
|
4384
|
-
// CONSTRAINT INJECTION
|
|
4385
|
-
// Add constraints to the subagent's context if provided
|
|
4386
|
-
// Also always include budget awareness so subagents know their limits
|
|
4387
|
-
const constraintParts = [];
|
|
4388
|
-
// BUDGET AWARENESS: Always inject so subagent understands its limits
|
|
4389
|
-
const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
|
|
4390
|
-
const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
|
|
4391
|
-
if (isSwarmWorker) {
|
|
4392
|
-
// V8: Minimal resource awareness for swarm workers — removes budget/time
|
|
4393
|
-
// messaging entirely to prevent cheap models from bail-out anxiety.
|
|
4394
|
-
// The economics system handles budget warnings via system messages when needed.
|
|
4395
|
-
// Wrapup JSON format is ONLY injected when requestWrapup() is called.
|
|
4396
|
-
constraintParts.push(`**Execution Mode:** You are a focused worker agent.\n` +
|
|
4397
|
-
`- Complete your assigned task using tool calls.\n` +
|
|
4398
|
-
`- Your FIRST action must be a tool call (read_file, write_file, edit_file, grep, glob, etc.).\n` +
|
|
4399
|
-
`- To create files use write_file. To modify files use edit_file. Do NOT use bash for file operations.\n` +
|
|
4400
|
-
`- You will receive a system message if you need to wrap up. Until then, work normally.\n` +
|
|
4401
|
-
`- Do NOT produce summaries or reports — produce CODE and FILE CHANGES.`);
|
|
4402
|
-
}
|
|
4403
|
-
else {
|
|
4404
|
-
// Original RESOURCE AWARENESS text for regular subagents
|
|
4405
|
-
constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
|
|
4406
|
-
`- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
|
|
4407
|
-
`- Time limit: ~${subagentBudgetMinutes} minutes\n` +
|
|
4408
|
-
`- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
|
|
4409
|
-
`- Do not explore indefinitely - be focused and efficient.\n` +
|
|
4410
|
-
`- If approaching limits, summarize findings and return.\n` +
|
|
4411
|
-
`- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
|
|
4412
|
-
` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
|
|
4413
|
-
}
|
|
4414
|
-
if (constraints) {
|
|
4415
|
-
if (constraints.focusAreas && constraints.focusAreas.length > 0) {
|
|
4416
|
-
constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
|
|
4417
|
-
}
|
|
4418
|
-
if (constraints.excludeAreas && constraints.excludeAreas.length > 0) {
|
|
4419
|
-
constraintParts.push(`**EXCLUDED AREAS (do NOT explore these):**\n${constraints.excludeAreas.map(a => ` - ${a}`).join('\n')}`);
|
|
4420
|
-
}
|
|
4421
|
-
if (constraints.requiredDeliverables && constraints.requiredDeliverables.length > 0) {
|
|
4422
|
-
constraintParts.push(`**REQUIRED DELIVERABLES (you must produce these):**\n${constraints.requiredDeliverables.map(d => ` - ${d}`).join('\n')}`);
|
|
4423
|
-
}
|
|
4424
|
-
if (constraints.timeboxMinutes) {
|
|
4425
|
-
constraintParts.push(`**TIME LIMIT:** ${constraints.timeboxMinutes} minutes (soft limit - wrap up if approaching)`);
|
|
4426
|
-
}
|
|
4427
|
-
}
|
|
4428
|
-
const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
|
|
4429
|
-
// Build delegation-enhanced system prompt
|
|
4430
|
-
let delegationContext = '';
|
|
4431
|
-
if (this.lastComplexityAssessment && this.lastComplexityAssessment.tier !== 'simple') {
|
|
4432
|
-
const spec = createMinimalDelegationSpec(task, agentName);
|
|
4433
|
-
delegationContext = '\n\n' + buildDelegationPrompt(spec);
|
|
4434
|
-
}
|
|
4435
|
-
// Quality self-assessment prompt for subagent
|
|
4436
|
-
const qualityPrompt = '\n\n' + getSubagentQualityPrompt();
|
|
4437
|
-
// Build subagent system prompt with subagent-specific plan mode addition
|
|
4438
|
-
const parentMode = this.getMode();
|
|
4439
|
-
const subagentSystemPrompt = parentMode === 'plan'
|
|
4440
|
-
? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`
|
|
4441
|
-
: `${agentDef.systemPrompt}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`;
|
|
4442
|
-
// Allocate budget from pool (or use default) — track allocation ID for release later
|
|
4443
|
-
const pooledBudget = this.getSubagentBudget(agentName, constraints);
|
|
4444
|
-
const poolAllocationId = pooledBudget.allocationId;
|
|
4445
|
-
const deniedByProfile = new Set(policyResolution.profile.deniedTools ?? []);
|
|
4446
|
-
const policyToolPolicies = {};
|
|
4447
|
-
for (const toolName of deniedByProfile) {
|
|
4448
|
-
policyToolPolicies[toolName] = {
|
|
4449
|
-
policy: 'forbidden',
|
|
4450
|
-
reason: `Denied by policy profile '${policyResolution.profileName}'`,
|
|
4451
|
-
};
|
|
4452
|
-
}
|
|
4453
|
-
if ((policyResolution.profile.bashMode ?? 'full') === 'disabled') {
|
|
4454
|
-
policyToolPolicies.bash = {
|
|
4455
|
-
policy: 'forbidden',
|
|
4456
|
-
reason: `Bash is disabled by policy profile '${policyResolution.profileName}'`,
|
|
4457
|
-
};
|
|
4458
|
-
}
|
|
4459
|
-
// Create a sub-agent with the agent's config
|
|
4460
|
-
// Use SUBAGENT_BUDGET to constrain resource usage (prevents runaway token consumption)
|
|
4461
|
-
const subAgent = new ProductionAgent({
|
|
4462
|
-
provider: this.provider,
|
|
4463
|
-
tools: agentTools,
|
|
4464
|
-
// Pass toolResolver so subagent can lazy-load MCP tools
|
|
4465
|
-
toolResolver: this.toolResolver || undefined,
|
|
4466
|
-
// Pass MCP tool summaries so subagent knows what tools are available
|
|
4467
|
-
mcpToolSummaries: this.config.mcpToolSummaries,
|
|
4468
|
-
systemPrompt: subagentSystemPrompt,
|
|
4469
|
-
model: resolvedModel,
|
|
4470
|
-
maxIterations: agentDef.maxIterations || defaultMaxIterations,
|
|
4471
|
-
// Inherit some features but keep subagent simpler
|
|
4472
|
-
memory: false,
|
|
4473
|
-
planning: false,
|
|
4474
|
-
reflection: false,
|
|
4475
|
-
// Enable lightweight compaction for subagents (Improvement P5)
|
|
4476
|
-
// tokenThreshold configures the Compactor's per-pass size limit
|
|
4477
|
-
// maxContextTokens constrains AutoCompactionManager's percentage thresholds
|
|
4478
|
-
// With maxContextTokens=80000 and default 80% threshold, compaction triggers at ~64K
|
|
4479
|
-
compaction: {
|
|
4480
|
-
enabled: true,
|
|
4481
|
-
mode: 'auto',
|
|
4482
|
-
tokenThreshold: 40000, // Compactor summarization size limit per pass
|
|
4483
|
-
preserveRecentCount: 4, // Preserve fewer messages (splits to 2 user + 2 assistant)
|
|
4484
|
-
preserveToolResults: false, // More aggressive — subagents can re-read files
|
|
4485
|
-
summaryMaxTokens: 500,
|
|
4486
|
-
},
|
|
4487
|
-
// Lower context window for subagents so percentage-based compaction triggers earlier
|
|
4488
|
-
maxContextTokens: 80000,
|
|
4489
|
-
observability: this.config.observability,
|
|
4490
|
-
sandbox: (() => {
|
|
4491
|
-
const swarm = this.config.swarm;
|
|
4492
|
-
const extraCmds = swarm && typeof swarm === 'object' && swarm.permissions?.additionalAllowedCommands;
|
|
4493
|
-
const baseSbx = this.config.sandbox;
|
|
4494
|
-
if (baseSbx && typeof baseSbx === 'object') {
|
|
4495
|
-
const sbx = baseSbx;
|
|
4496
|
-
const allowedCommands = extraCmds
|
|
4497
|
-
? [...(sbx.allowedCommands || []), ...extraCmds]
|
|
4498
|
-
: sbx.allowedCommands;
|
|
4499
|
-
return {
|
|
4500
|
-
...sbx,
|
|
4501
|
-
allowedCommands,
|
|
4502
|
-
bashMode: policyResolution.profile.bashMode ?? sbx.bashMode,
|
|
4503
|
-
bashWriteProtection: policyResolution.profile.bashWriteProtection ?? sbx.bashWriteProtection,
|
|
4504
|
-
blockFileCreationViaBash: (policyResolution.profile.bashWriteProtection ?? 'off') === 'block_file_mutation'
|
|
4505
|
-
? true
|
|
4506
|
-
: sbx.blockFileCreationViaBash,
|
|
4507
|
-
};
|
|
4508
|
-
}
|
|
4509
|
-
return baseSbx;
|
|
4510
|
-
})(),
|
|
4511
|
-
humanInLoop: this.config.humanInLoop,
|
|
4512
|
-
// Subagents get 'allow' as default policy since they're already
|
|
4513
|
-
// constrained to their registered tool set. The parent's 'prompt'
|
|
4514
|
-
// policy can't work without humanInLoop.
|
|
4515
|
-
executionPolicy: (() => {
|
|
4516
|
-
const hasPolicyOverrides = Object.keys(policyToolPolicies).length > 0;
|
|
4517
|
-
if (this.config.executionPolicy) {
|
|
4518
|
-
return {
|
|
4519
|
-
...this.config.executionPolicy,
|
|
4520
|
-
defaultPolicy: 'allow',
|
|
4521
|
-
toolPolicies: {
|
|
4522
|
-
...(this.config.executionPolicy.toolPolicies ?? {}),
|
|
4523
|
-
...policyToolPolicies,
|
|
4524
|
-
},
|
|
4525
|
-
};
|
|
4526
|
-
}
|
|
4527
|
-
if (hasPolicyOverrides) {
|
|
4528
|
-
return {
|
|
4529
|
-
enabled: true,
|
|
4530
|
-
defaultPolicy: 'allow',
|
|
4531
|
-
toolPolicies: policyToolPolicies,
|
|
4532
|
-
intentAware: false,
|
|
4533
|
-
};
|
|
4534
|
-
}
|
|
4535
|
-
return this.config.executionPolicy;
|
|
4536
|
-
})(),
|
|
4537
|
-
policyEngine: this.config.policyEngine
|
|
4538
|
-
? { ...this.config.policyEngine, defaultProfile: policyResolution.profileName }
|
|
4539
|
-
: this.config.policyEngine,
|
|
4540
|
-
threads: false,
|
|
4541
|
-
// Disable hooks console output in subagents - parent handles event display
|
|
4542
|
-
hooks: this.config.hooks === false ? false : {
|
|
4543
|
-
enabled: true,
|
|
4544
|
-
builtIn: { logging: false, timing: false, metrics: false },
|
|
4545
|
-
custom: [],
|
|
4546
|
-
},
|
|
4547
|
-
// Pass unique agentId for blackboard coordination and tracing
|
|
4548
|
-
agentId,
|
|
4549
|
-
// Share parent's blackboard for coordination between parallel subagents
|
|
4550
|
-
blackboard: this.blackboard || undefined,
|
|
4551
|
-
// Share parent's file cache to eliminate redundant reads across agents
|
|
4552
|
-
fileCache: this.fileCache || undefined,
|
|
4553
|
-
// CONSTRAINED BUDGET: Use pooled budget when available, falling back to SUBAGENT_BUDGET
|
|
4554
|
-
// Pooled budget ensures total tree cost stays bounded by parent's budget
|
|
4555
|
-
// Merge economicsTuning from agent definition so swarm workers get custom thresholds
|
|
4556
|
-
budget: agentDef.economicsTuning
|
|
4557
|
-
? { ...pooledBudget.budget, tuning: agentDef.economicsTuning }
|
|
4558
|
-
: pooledBudget.budget,
|
|
4559
|
-
});
|
|
4560
|
-
// CRITICAL: Subagent inherits parent's mode
|
|
4561
|
-
// This ensures that if parent is in plan mode:
|
|
4562
|
-
// - Subagent's read operations execute immediately (visible exploration)
|
|
4563
|
-
// - Subagent's write operations get queued in the subagent's pending plan
|
|
4564
|
-
// - User maintains control over what actually gets written
|
|
4565
|
-
if (parentMode !== 'build') {
|
|
4566
|
-
subAgent.setMode(parentMode);
|
|
4567
|
-
}
|
|
4568
|
-
// APPROVAL BATCHING (Improvement P6): Set approval scope for subagents
|
|
4569
|
-
// Read-only tools are auto-approved; write tools get scoped approval
|
|
4570
|
-
// This reduces interruptions from ~8 per session to ~1-2
|
|
4571
|
-
// Swarm permissions from config override defaults when present
|
|
4572
|
-
const swarmPerms = this.config.swarm && typeof this.config.swarm === 'object'
|
|
4573
|
-
? this.config.swarm.permissions : undefined;
|
|
4574
|
-
const baseAutoApprove = ['read_file', 'list_files', 'glob', 'grep', 'show_file_history', 'show_session_changes'];
|
|
4575
|
-
const baseScopedApprove = isSwarmWorker
|
|
4576
|
-
? {
|
|
4577
|
-
write_file: { paths: ['src/', 'tests/', 'tools/'] },
|
|
4578
|
-
edit_file: { paths: ['src/', 'tests/', 'tools/'] },
|
|
4579
|
-
bash: { paths: ['src/', 'tests/', 'tools/'] },
|
|
4580
|
-
}
|
|
4581
|
-
: {
|
|
4582
|
-
write_file: { paths: ['src/', 'tests/', 'tools/'] },
|
|
4583
|
-
edit_file: { paths: ['src/', 'tests/', 'tools/'] },
|
|
4584
|
-
};
|
|
4585
|
-
const baseRequireApproval = isSwarmWorker ? ['delete_file'] : ['bash', 'delete_file'];
|
|
4586
|
-
const mergedScope = mergeApprovalScopeWithProfile({
|
|
4587
|
-
autoApprove: swarmPerms?.autoApprove
|
|
4588
|
-
? [...new Set([...baseAutoApprove, ...swarmPerms.autoApprove])]
|
|
4589
|
-
: baseAutoApprove,
|
|
4590
|
-
scopedApprove: swarmPerms?.scopedApprove
|
|
4591
|
-
? { ...baseScopedApprove, ...swarmPerms.scopedApprove }
|
|
4592
|
-
: baseScopedApprove,
|
|
4593
|
-
// requireApproval: full replacement (not merge) — user may want to REMOVE
|
|
4594
|
-
// tools like 'bash' to let workers run freely
|
|
4595
|
-
requireApproval: swarmPerms?.requireApproval
|
|
4596
|
-
? swarmPerms.requireApproval
|
|
4597
|
-
: baseRequireApproval,
|
|
4598
|
-
}, policyResolution.profile);
|
|
4599
|
-
subAgent.setApprovalScope(mergedScope);
|
|
4600
|
-
// Pass parent's iteration count to subagent for accurate budget tracking
|
|
4601
|
-
// This prevents subagents from consuming excessive iterations when parent already used many
|
|
4602
|
-
subAgent.setParentIterations(this.getTotalIterations());
|
|
4603
|
-
// UNIFIED TRACING: Share parent's trace collector with subagent context
|
|
4604
|
-
// This ensures all subagent events are written to the same trace file as the parent,
|
|
4605
|
-
// tagged with subagent context for proper aggregation in /trace output
|
|
4606
|
-
if (this.traceCollector) {
|
|
4607
|
-
const subagentTraceView = this.traceCollector.createSubagentView({
|
|
4608
|
-
parentSessionId: this.traceCollector.getSessionId() || 'unknown',
|
|
4609
|
-
agentType: agentName,
|
|
4610
|
-
spawnedAtIteration: this.state.iteration,
|
|
4611
|
-
});
|
|
4612
|
-
subAgent.setTraceCollector(subagentTraceView);
|
|
4613
|
-
}
|
|
4614
|
-
// GRACEFUL TIMEOUT with WRAPUP PHASE
|
|
4615
|
-
// Instead of instant death on timeout, the subagent gets a wrapup window
|
|
4616
|
-
// to produce a structured summary before being killed:
|
|
4617
|
-
// 1. Normal operation: progress extends idle timer
|
|
4618
|
-
// 2. Wrapup phase: 30s before hard kill, wrapup callback fires → forceTextOnly
|
|
4619
|
-
// 3. Hard kill: race() throws CancellationError after wrapup window
|
|
4620
|
-
const IDLE_TIMEOUT = agentDef.idleTimeout ?? 120000; // Configurable idle timeout (default: 2 min)
|
|
4621
|
-
let WRAPUP_WINDOW = 30000;
|
|
4622
|
-
let IDLE_CHECK_INTERVAL = 5000;
|
|
4623
|
-
if (this.config.subagent) {
|
|
4624
|
-
WRAPUP_WINDOW = this.config.subagent.wrapupWindowMs ?? WRAPUP_WINDOW;
|
|
4625
|
-
IDLE_CHECK_INTERVAL = this.config.subagent.idleCheckIntervalMs ?? IDLE_CHECK_INTERVAL;
|
|
4626
|
-
}
|
|
4627
|
-
const progressAwareTimeout = createGracefulTimeout(subagentTimeout, // Max total time (hard limit from agent type config)
|
|
4628
|
-
IDLE_TIMEOUT, // Idle timeout (soft limit - no progress triggers this)
|
|
4629
|
-
WRAPUP_WINDOW, // Wrapup window before hard kill
|
|
4630
|
-
IDLE_CHECK_INTERVAL);
|
|
4631
|
-
// Register wrapup callback — fires 30s before hard kill
|
|
4632
|
-
// This triggers the subagent's forceTextOnly path for a structured summary
|
|
4633
|
-
progressAwareTimeout.onWrapupWarning(() => {
|
|
4634
|
-
this.emit({
|
|
4635
|
-
type: 'subagent.wrapup.started',
|
|
4636
|
-
agentId,
|
|
4637
|
-
agentType: agentName,
|
|
4638
|
-
reason: 'Timeout approaching - graceful wrapup window opened',
|
|
4639
|
-
elapsedMs: Date.now() - startTime,
|
|
4640
|
-
});
|
|
4641
|
-
subAgent.requestWrapup('Timeout approaching — produce structured summary');
|
|
4642
|
-
});
|
|
4643
|
-
// Forward events from subagent with context (track for cleanup)
|
|
4644
|
-
// Also report progress to the timeout tracker
|
|
4645
|
-
const unsubSubAgent = subAgent.subscribe(event => {
|
|
4646
|
-
// Tag event with subagent source AND unique ID so TUI can properly attribute
|
|
4647
|
-
// events to the specific agent instance (critical for multiple same-type agents)
|
|
4648
|
-
const taggedEvent = { ...event, subagent: agentName, subagentId: agentId };
|
|
4649
|
-
this.emit(taggedEvent);
|
|
4650
|
-
// Report progress for timeout extension
|
|
4651
|
-
// Progress events: tool calls, LLM responses, token updates
|
|
4652
|
-
const progressEvents = ['tool.start', 'tool.complete', 'llm.start', 'llm.complete'];
|
|
4653
|
-
if (progressEvents.includes(event.type)) {
|
|
4654
|
-
progressAwareTimeout.reportProgress();
|
|
4655
|
-
}
|
|
4656
|
-
});
|
|
4657
|
-
// Link parent's cancellation with progress-aware timeout so ESC propagates to subagents
|
|
4658
|
-
const parentSource = this.cancellation?.getSource();
|
|
4659
|
-
const effectiveSource = parentSource
|
|
4660
|
-
? createLinkedToken(parentSource, progressAwareTimeout)
|
|
4661
|
-
: progressAwareTimeout;
|
|
4662
|
-
// CRITICAL: Pass the cancellation token to the subagent so it can check and stop
|
|
4663
|
-
// gracefully when timeout fires. Without this, the subagent continues running as
|
|
4664
|
-
// a "zombie" even after race() returns with a timeout error.
|
|
4665
|
-
subAgent.setExternalCancellation(effectiveSource.token);
|
|
4666
|
-
// Pause parent's duration timer while subagent runs to prevent
|
|
4667
|
-
// the parent from timing out on wall-clock while waiting for subagent
|
|
4668
|
-
this.economics?.pauseDuration();
|
|
4669
|
-
try {
|
|
4670
|
-
// Run the task with cancellation propagation from parent
|
|
4671
|
-
const result = await race(subAgent.run(task), effectiveSource.token);
|
|
4672
|
-
const duration = Date.now() - startTime;
|
|
4673
|
-
// BEFORE cleanup - extract subagent's pending plan and merge into parent's plan
|
|
4674
|
-
// This ensures that when a subagent in plan mode queues writes, they bubble up to the parent
|
|
4675
|
-
let queuedChangeSummary = '';
|
|
4676
|
-
let queuedChangesCount = 0;
|
|
4677
|
-
if (subAgent.hasPendingPlan()) {
|
|
4678
|
-
const subPlan = subAgent.getPendingPlan();
|
|
4679
|
-
if (subPlan && subPlan.proposedChanges.length > 0) {
|
|
4680
|
-
queuedChangesCount = subPlan.proposedChanges.length;
|
|
4681
|
-
// Emit event for TUI to display
|
|
4682
|
-
this.emit({
|
|
4683
|
-
type: 'agent.pending_plan',
|
|
4684
|
-
agentId: agentName,
|
|
4685
|
-
changes: subPlan.proposedChanges,
|
|
4686
|
-
});
|
|
4687
|
-
// Build detailed summary of what was queued for the return message
|
|
4688
|
-
// This prevents the "doom loop" where parent doesn't know what subagent did
|
|
4689
|
-
const changeSummaries = subPlan.proposedChanges.map(c => {
|
|
4690
|
-
if (c.tool === 'write_file' || c.tool === 'edit_file') {
|
|
4691
|
-
const path = c.args.path || c.args.file_path || '(unknown file)';
|
|
4692
|
-
return ` - [${c.tool}] ${path}: ${c.reason}`;
|
|
4693
|
-
}
|
|
4694
|
-
else if (c.tool === 'bash') {
|
|
4695
|
-
const cmd = String(c.args.command || '').slice(0, 60);
|
|
4696
|
-
return ` - [bash] ${cmd}${String(c.args.command || '').length > 60 ? '...' : ''}: ${c.reason}`;
|
|
4697
|
-
}
|
|
4698
|
-
return ` - [${c.tool}]: ${c.reason}`;
|
|
4699
|
-
});
|
|
4700
|
-
queuedChangeSummary = `\n\n[PLAN MODE - CHANGES QUEUED TO PARENT]\n` +
|
|
4701
|
-
`The following ${subPlan.proposedChanges.length} change(s) have been queued in the parent's pending plan:\n` +
|
|
4702
|
-
changeSummaries.join('\n') + '\n' +
|
|
4703
|
-
`\nThese changes are now in YOUR pending plan. The task for this subagent is COMPLETE.\n` +
|
|
4704
|
-
`Do NOT spawn another agent for the same task - the changes are already queued.\n` +
|
|
4705
|
-
`Use /show-plan to see all pending changes, /approve to execute them.`;
|
|
4706
|
-
// Merge into parent's pending plan with subagent context
|
|
4707
|
-
for (const change of subPlan.proposedChanges) {
|
|
4708
|
-
this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
|
|
4709
|
-
}
|
|
4710
|
-
}
|
|
4711
|
-
// Also merge exploration summary if available
|
|
4712
|
-
if (subPlan?.explorationSummary) {
|
|
4713
|
-
this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
|
|
4714
|
-
}
|
|
4715
|
-
}
|
|
4716
|
-
// If subagent queued changes, override output with informative message
|
|
4717
|
-
// This is critical to prevent doom loops where parent doesn't understand what happened
|
|
4718
|
-
const finalOutput = queuedChangeSummary
|
|
4719
|
-
? (result.response || '') + queuedChangeSummary
|
|
4720
|
-
: (result.response || result.error || '');
|
|
4721
|
-
// Parse structured closure report from agent's response (if it produced one)
|
|
4722
|
-
const structured = parseStructuredClosureReport(result.response || '', 'completed');
|
|
4723
|
-
// Extract real file paths from subagent's economics tracker (before cleanup)
|
|
4724
|
-
const subagentFilePaths = subAgent.getModifiedFilePaths();
|
|
4725
|
-
const spawnResultFinal = {
|
|
4726
|
-
success: result.success,
|
|
4727
|
-
output: finalOutput,
|
|
4728
|
-
metrics: {
|
|
4729
|
-
tokens: result.metrics.totalTokens,
|
|
4730
|
-
duration,
|
|
4731
|
-
toolCalls: result.metrics.toolCalls,
|
|
4732
|
-
},
|
|
4733
|
-
structured,
|
|
4734
|
-
filesModified: subagentFilePaths,
|
|
4735
|
-
};
|
|
4736
|
-
// Save full output to subagent output store (avoids telephone problem)
|
|
4737
|
-
if (this.subagentOutputStore) {
|
|
4738
|
-
const outputEntry = {
|
|
4739
|
-
id: agentId,
|
|
4740
|
-
agentId,
|
|
4741
|
-
agentName,
|
|
4742
|
-
task,
|
|
4743
|
-
fullOutput: finalOutput,
|
|
4744
|
-
structured,
|
|
4745
|
-
filesModified: subagentFilePaths,
|
|
4746
|
-
filesCreated: [],
|
|
4747
|
-
timestamp: new Date(),
|
|
4748
|
-
tokensUsed: result.metrics.totalTokens,
|
|
4749
|
-
durationMs: duration,
|
|
4750
|
-
};
|
|
4751
|
-
const storeId = this.subagentOutputStore.save(outputEntry);
|
|
4752
|
-
// Attach reference so downstream consumers can retrieve full output
|
|
4753
|
-
spawnResultFinal.outputStoreId = storeId;
|
|
4754
|
-
}
|
|
4755
|
-
if (workerResultId && this.store?.hasWorkerResultsFeature()) {
|
|
4756
|
-
try {
|
|
4757
|
-
this.store.completeWorkerResult(workerResultId, {
|
|
4758
|
-
fullOutput: finalOutput,
|
|
4759
|
-
summary: finalOutput.slice(0, 500),
|
|
4760
|
-
artifacts: structured ? [{ type: 'structured_report', data: structured }] : undefined,
|
|
4761
|
-
metrics: {
|
|
4762
|
-
tokens: result.metrics.totalTokens,
|
|
4763
|
-
duration,
|
|
4764
|
-
toolCalls: result.metrics.toolCalls,
|
|
4765
|
-
},
|
|
4766
|
-
});
|
|
4767
|
-
}
|
|
4768
|
-
catch (storeErr) {
|
|
4769
|
-
this.observability?.logger?.warn('Failed to persist worker result', {
|
|
4770
|
-
agentId,
|
|
4771
|
-
error: storeErr.message,
|
|
4772
|
-
});
|
|
4773
|
-
}
|
|
4774
|
-
}
|
|
4775
|
-
this.emit({
|
|
4776
|
-
type: 'agent.complete',
|
|
4777
|
-
agentId, // Use unique spawn ID for precise tracking
|
|
4778
|
-
agentType: agentName, // Keep type for display purposes
|
|
4779
|
-
success: result.success,
|
|
4780
|
-
output: finalOutput.slice(0, 500), // Include output preview
|
|
4781
|
-
});
|
|
4782
|
-
if (progressAwareTimeout.isInWrapupPhase()) {
|
|
4783
|
-
this.emit({
|
|
4784
|
-
type: 'subagent.wrapup.completed',
|
|
4785
|
-
agentId,
|
|
4786
|
-
agentType: agentName,
|
|
4787
|
-
elapsedMs: Date.now() - startTime,
|
|
4788
|
-
});
|
|
4789
|
-
}
|
|
4790
|
-
// Enhanced tracing: Record subagent completion
|
|
4791
|
-
this.traceCollector?.record({
|
|
4792
|
-
type: 'subagent.link',
|
|
4793
|
-
data: {
|
|
4794
|
-
parentSessionId: this.traceCollector.getSessionId() || 'unknown',
|
|
4795
|
-
childSessionId,
|
|
4796
|
-
childTraceId,
|
|
4797
|
-
childConfig: {
|
|
4798
|
-
agentType: agentName,
|
|
4799
|
-
model: resolvedModel || 'default',
|
|
4800
|
-
task,
|
|
4801
|
-
tools: agentTools.map(t => t.name),
|
|
4802
|
-
},
|
|
4803
|
-
spawnContext: {
|
|
4804
|
-
reason: `Delegated task: ${task.slice(0, 100)}`,
|
|
4805
|
-
expectedOutcome: agentDef.description,
|
|
4806
|
-
parentIteration: this.state.iteration,
|
|
4807
|
-
},
|
|
4808
|
-
result: {
|
|
4809
|
-
success: result.success,
|
|
4810
|
-
summary: (result.response || result.error || '').slice(0, 500),
|
|
4811
|
-
tokensUsed: result.metrics.totalTokens,
|
|
4812
|
-
durationMs: duration,
|
|
4813
|
-
},
|
|
4814
|
-
},
|
|
4815
|
-
});
|
|
4816
|
-
// Unsubscribe from subagent events before cleanup
|
|
4817
|
-
unsubSubAgent();
|
|
4818
|
-
await subAgent.cleanup();
|
|
4819
|
-
// Cache result for duplicate spawn prevention
|
|
4820
|
-
// Use the same taskKey from the dedup check above
|
|
4821
|
-
this.spawnedTasks.set(taskKey, {
|
|
4822
|
-
timestamp: Date.now(),
|
|
4823
|
-
result: finalOutput,
|
|
4824
|
-
queuedChanges: queuedChangesCount,
|
|
4825
|
-
});
|
|
4826
|
-
return spawnResultFinal;
|
|
4827
|
-
}
|
|
4828
|
-
catch (err) {
|
|
4829
|
-
// Handle cancellation (user ESC or timeout) for cleaner error messages
|
|
4830
|
-
if (isCancellationError(err)) {
|
|
4831
|
-
const duration = Date.now() - startTime;
|
|
4832
|
-
const isUserCancellation = parentSource?.isCancellationRequested;
|
|
4833
|
-
const reason = isUserCancellation
|
|
4834
|
-
? 'User cancelled'
|
|
4835
|
-
: err.reason || `Timed out after ${subagentTimeout}ms`;
|
|
4836
|
-
this.emit({ type: 'agent.error', agentId, agentType: agentName, error: reason });
|
|
4837
|
-
if (!isUserCancellation) {
|
|
4838
|
-
this.emit({
|
|
4839
|
-
type: 'subagent.timeout.hard_kill',
|
|
4840
|
-
agentId,
|
|
4841
|
-
agentType: agentName,
|
|
4842
|
-
reason,
|
|
4843
|
-
elapsedMs: Date.now() - startTime,
|
|
4844
|
-
});
|
|
4845
|
-
}
|
|
4846
|
-
// =======================================================================
|
|
4847
|
-
// PRESERVE PARTIAL RESULTS
|
|
4848
|
-
// Instead of discarding all work, capture whatever the subagent produced
|
|
4849
|
-
// before timeout. This prevents the "zombie agent" problem where tokens
|
|
4850
|
-
// are consumed but results are lost.
|
|
4851
|
-
// =======================================================================
|
|
4852
|
-
const subagentState = subAgent.getState();
|
|
4853
|
-
const subagentMetrics = subAgent.getMetrics();
|
|
4854
|
-
// Extract partial response from the last assistant message
|
|
4855
|
-
const assistantMessages = subagentState.messages.filter(m => m.role === 'assistant');
|
|
4856
|
-
const lastAssistantMsg = assistantMessages[assistantMessages.length - 1];
|
|
4857
|
-
const partialResponse = typeof lastAssistantMsg?.content === 'string'
|
|
4858
|
-
? lastAssistantMsg.content
|
|
4859
|
-
: '';
|
|
4860
|
-
// Extract pending plan before cleanup (even on cancellation, preserve any queued work)
|
|
4861
|
-
let cancelledQueuedSummary = '';
|
|
4862
|
-
if (subAgent.hasPendingPlan()) {
|
|
4863
|
-
const subPlan = subAgent.getPendingPlan();
|
|
4864
|
-
if (subPlan && subPlan.proposedChanges.length > 0) {
|
|
4865
|
-
this.emit({
|
|
4866
|
-
type: 'agent.pending_plan',
|
|
4867
|
-
agentId: agentName,
|
|
4868
|
-
changes: subPlan.proposedChanges,
|
|
4869
|
-
});
|
|
4870
|
-
// Build summary of changes that were queued before cancellation
|
|
4871
|
-
const changeSummaries = subPlan.proposedChanges.map(c => {
|
|
4872
|
-
if (c.tool === 'write_file' || c.tool === 'edit_file') {
|
|
4873
|
-
const path = c.args.path || c.args.file_path || '(unknown file)';
|
|
4874
|
-
return ` - [${c.tool}] ${path}: ${c.reason}`;
|
|
4875
|
-
}
|
|
4876
|
-
else if (c.tool === 'bash') {
|
|
4877
|
-
const cmd = String(c.args.command || '').slice(0, 60);
|
|
4878
|
-
return ` - [bash] ${cmd}...: ${c.reason}`;
|
|
4879
|
-
}
|
|
4880
|
-
return ` - [${c.tool}]: ${c.reason}`;
|
|
4881
|
-
});
|
|
4882
|
-
cancelledQueuedSummary = `\n\n[PLAN MODE - CHANGES QUEUED BEFORE CANCELLATION]\n` +
|
|
4883
|
-
`${subPlan.proposedChanges.length} change(s) were queued to the parent plan:\n` +
|
|
4884
|
-
changeSummaries.join('\n') + '\n' +
|
|
4885
|
-
`These changes are preserved in your pending plan.`;
|
|
4886
|
-
for (const change of subPlan.proposedChanges) {
|
|
4887
|
-
this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
|
|
4888
|
-
}
|
|
4889
|
-
}
|
|
4890
|
-
// Also preserve exploration summary
|
|
4891
|
-
if (subPlan?.explorationSummary) {
|
|
4892
|
-
this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
|
|
4893
|
-
}
|
|
4894
|
-
}
|
|
4895
|
-
// Extract real file paths from subagent's economics tracker (before cleanup)
|
|
4896
|
-
const subagentFilePaths = subAgent.getModifiedFilePaths();
|
|
4897
|
-
// Unsubscribe from subagent events and cleanup gracefully
|
|
4898
|
-
unsubSubAgent();
|
|
4899
|
-
try {
|
|
4900
|
-
await subAgent.cleanup();
|
|
4901
|
-
}
|
|
4902
|
-
catch {
|
|
4903
|
-
// Ignore cleanup errors on cancellation
|
|
4904
|
-
}
|
|
4905
|
-
// Build output message with partial results
|
|
4906
|
-
const baseOutput = isUserCancellation
|
|
4907
|
-
? `Subagent '${agentName}' was cancelled by user.`
|
|
4908
|
-
: `Subagent '${agentName}' timed out after ${Math.round(subagentTimeout / 1000)}s.`;
|
|
4909
|
-
// Include partial response if we have one
|
|
4910
|
-
const partialResultSection = partialResponse
|
|
4911
|
-
? `\n\n[PARTIAL RESULTS BEFORE TIMEOUT]\n${partialResponse.slice(0, 2000)}${partialResponse.length > 2000 ? '...(truncated)' : ''}`
|
|
4912
|
-
: '';
|
|
4913
|
-
// Enhanced tracing: Record subagent timeout with partial results
|
|
4914
|
-
this.traceCollector?.record({
|
|
4915
|
-
type: 'subagent.link',
|
|
4916
|
-
data: {
|
|
4917
|
-
parentSessionId: this.traceCollector.getSessionId() || 'unknown',
|
|
4918
|
-
childSessionId,
|
|
4919
|
-
childTraceId,
|
|
4920
|
-
childConfig: {
|
|
4921
|
-
agentType: agentName,
|
|
4922
|
-
model: resolvedModel || 'default',
|
|
4923
|
-
task,
|
|
4924
|
-
tools: agentTools.map(t => t.name),
|
|
4925
|
-
},
|
|
4926
|
-
spawnContext: {
|
|
4927
|
-
reason: `Delegated task: ${task.slice(0, 100)}`,
|
|
4928
|
-
expectedOutcome: agentDef.description,
|
|
4929
|
-
parentIteration: this.state.iteration,
|
|
4930
|
-
},
|
|
4931
|
-
result: {
|
|
4932
|
-
success: false,
|
|
4933
|
-
summary: `[TIMEOUT] ${baseOutput}\n${partialResponse.slice(0, 200)}`,
|
|
4934
|
-
tokensUsed: subagentMetrics.totalTokens,
|
|
4935
|
-
durationMs: duration,
|
|
4936
|
-
},
|
|
4937
|
-
},
|
|
4938
|
-
});
|
|
4939
|
-
// Parse structured closure report from partial response
|
|
4940
|
-
const exitReason = isUserCancellation ? 'cancelled' : 'timeout_graceful';
|
|
4941
|
-
const structured = parseStructuredClosureReport(partialResponse, exitReason, task);
|
|
4942
|
-
if (workerResultId && this.store?.hasWorkerResultsFeature()) {
|
|
4943
|
-
try {
|
|
4944
|
-
this.store.failWorkerResult(workerResultId, reason);
|
|
4945
|
-
}
|
|
4946
|
-
catch (storeErr) {
|
|
4947
|
-
this.observability?.logger?.warn('Failed to mark cancelled worker result as failed', {
|
|
4948
|
-
agentId,
|
|
4949
|
-
error: storeErr.message,
|
|
4950
|
-
});
|
|
4951
|
-
}
|
|
4952
|
-
}
|
|
4953
|
-
return {
|
|
4954
|
-
success: false,
|
|
4955
|
-
output: baseOutput + partialResultSection + cancelledQueuedSummary,
|
|
4956
|
-
// IMPORTANT: Use actual metrics instead of zeros
|
|
4957
|
-
// This ensures accurate token tracking in /trace output
|
|
4958
|
-
metrics: {
|
|
4959
|
-
tokens: subagentMetrics.totalTokens,
|
|
4960
|
-
duration,
|
|
4961
|
-
toolCalls: subagentMetrics.toolCalls,
|
|
4962
|
-
},
|
|
4963
|
-
structured,
|
|
4964
|
-
filesModified: subagentFilePaths,
|
|
4965
|
-
};
|
|
4966
|
-
}
|
|
4967
|
-
throw err; // Re-throw non-cancellation errors
|
|
4968
|
-
}
|
|
4969
|
-
finally {
|
|
4970
|
-
// Resume parent's duration timer now that subagent is done
|
|
4971
|
-
this.economics?.resumeDuration();
|
|
4972
|
-
// Dispose both sources (linked source disposes its internal state, timeout source handles its timer)
|
|
4973
|
-
effectiveSource.dispose();
|
|
4974
|
-
progressAwareTimeout.dispose();
|
|
4975
|
-
// BUDGET POOL: Record actual usage and release the allocation
|
|
4976
|
-
// This must happen in finally to ensure cleanup on both success and error paths
|
|
4977
|
-
if (this.budgetPool && poolAllocationId) {
|
|
4978
|
-
const subMetrics = subAgent.getMetrics();
|
|
4979
|
-
this.budgetPool.recordUsage(poolAllocationId, subMetrics.totalTokens, subMetrics.estimatedCost);
|
|
4980
|
-
this.budgetPool.release(poolAllocationId);
|
|
4981
|
-
}
|
|
4982
|
-
}
|
|
4983
|
-
}
|
|
4984
|
-
catch (err) {
|
|
4985
|
-
const error = err instanceof Error ? err.message : String(err);
|
|
4986
|
-
this.emit({ type: 'agent.error', agentId, agentType: agentName, error });
|
|
4987
|
-
if (workerResultId && this.store?.hasWorkerResultsFeature()) {
|
|
4988
|
-
try {
|
|
4989
|
-
this.store.failWorkerResult(workerResultId, error);
|
|
4990
|
-
}
|
|
4991
|
-
catch (storeErr) {
|
|
4992
|
-
this.observability?.logger?.warn('Failed to mark worker result as failed', {
|
|
4993
|
-
agentId,
|
|
4994
|
-
error: storeErr.message,
|
|
4995
|
-
});
|
|
4996
|
-
}
|
|
4997
|
-
}
|
|
4998
|
-
return {
|
|
4999
|
-
success: false,
|
|
5000
|
-
output: `Agent error: ${error}`,
|
|
5001
|
-
metrics: { tokens: 0, duration: Date.now() - startTime, toolCalls: 0 },
|
|
5002
|
-
};
|
|
5003
|
-
}
|
|
5004
|
-
}
|
|
5005
|
-
/**
|
|
5006
|
-
* Spawn multiple agents in parallel to work on independent tasks.
|
|
5007
|
-
* Uses the shared blackboard for coordination and conflict prevention.
|
|
5008
|
-
*
|
|
5009
|
-
* Get budget for a subagent, using the pooled budget when available.
|
|
5010
|
-
* Falls back to the static SUBAGENT_BUDGET if no pool is configured.
|
|
5011
|
-
* Returns both the budget and the pool allocation ID (if any) for tracking.
|
|
5012
|
-
*/
|
|
5013
|
-
getSubagentBudget(agentName, constraints) {
|
|
5014
|
-
// If explicit maxTokens constraint, use that
|
|
5015
|
-
if (constraints?.maxTokens) {
|
|
5016
|
-
return {
|
|
5017
|
-
budget: { ...SUBAGENT_BUDGET, maxTokens: constraints.maxTokens },
|
|
5018
|
-
allocationId: null,
|
|
5019
|
-
};
|
|
5020
|
-
}
|
|
5021
|
-
// Try to allocate from the shared budget pool
|
|
5022
|
-
if (this.budgetPool) {
|
|
5023
|
-
const allocationId = `${agentName}-${Date.now()}`;
|
|
5024
|
-
const allocation = this.budgetPool.reserve(allocationId);
|
|
5025
|
-
if (allocation) {
|
|
5026
|
-
return {
|
|
5027
|
-
budget: {
|
|
5028
|
-
...SUBAGENT_BUDGET,
|
|
5029
|
-
maxTokens: allocation.tokenBudget,
|
|
5030
|
-
softTokenLimit: Math.floor(allocation.tokenBudget * 0.7),
|
|
5031
|
-
maxCost: allocation.costBudget,
|
|
5032
|
-
},
|
|
5033
|
-
allocationId,
|
|
5034
|
-
};
|
|
5035
|
-
}
|
|
5036
|
-
// Pool exhausted — give a tiny emergency budget (just enough to report failure)
|
|
5037
|
-
// This does NOT bypass the pool — it's a fixed small cost for error messaging
|
|
5038
|
-
return {
|
|
5039
|
-
budget: {
|
|
5040
|
-
...SUBAGENT_BUDGET,
|
|
5041
|
-
maxTokens: 5000,
|
|
5042
|
-
softTokenLimit: 3000,
|
|
5043
|
-
maxCost: 0.01,
|
|
5044
|
-
},
|
|
5045
|
-
allocationId: null,
|
|
5046
|
-
};
|
|
5047
|
-
}
|
|
5048
|
-
// No pool — use default subagent budget
|
|
5049
|
-
return { budget: SUBAGENT_BUDGET, allocationId: null };
|
|
2791
|
+
return coreSpawnAgent(agentName, task, this.buildContext(), this.createSubAgentFactory(), constraints);
|
|
5050
2792
|
}
|
|
5051
2793
|
/**
|
|
5052
|
-
*
|
|
5053
|
-
* agent fails or times out, others can still complete successfully.
|
|
2794
|
+
* Spawn multiple subagents in parallel (delegates to core/subagent-spawner).
|
|
5054
2795
|
*/
|
|
5055
2796
|
async spawnAgentsParallel(tasks) {
|
|
5056
|
-
|
|
5057
|
-
this.emit({
|
|
5058
|
-
type: 'parallel.spawn.start',
|
|
5059
|
-
count: tasks.length,
|
|
5060
|
-
agents: tasks.map(t => t.agent),
|
|
5061
|
-
});
|
|
5062
|
-
// Use DynamicBudgetPool for parallel spawns (prevents child starvation,
|
|
5063
|
-
// enables priority-based allocation). Falls back to regular pool for single tasks.
|
|
5064
|
-
let settled;
|
|
5065
|
-
const originalPool = this.budgetPool;
|
|
5066
|
-
// SubagentSupervisor for unified monitoring of concurrent subagents
|
|
5067
|
-
const supervisor = tasks.length > 1 ? createSubagentSupervisor() : null;
|
|
5068
|
-
if (this.budgetPool && tasks.length > 1) {
|
|
5069
|
-
// Swap to DynamicBudgetPool for this parallel batch
|
|
5070
|
-
const poolStats = this.budgetPool.getStats();
|
|
5071
|
-
const dynamicPool = createDynamicBudgetPool(poolStats.tokensRemaining, 0.1);
|
|
5072
|
-
dynamicPool.setExpectedChildren(tasks.length);
|
|
5073
|
-
// Temporarily replace the budget pool so spawnAgent's reserve() uses the dynamic one
|
|
5074
|
-
this.budgetPool = dynamicPool;
|
|
5075
|
-
try {
|
|
5076
|
-
const promises = tasks.map(({ agent, task }) => {
|
|
5077
|
-
const spawnPromise = this.spawnAgent(agent, task);
|
|
5078
|
-
// Register with supervisor for monitoring
|
|
5079
|
-
if (supervisor) {
|
|
5080
|
-
const handle = createSubagentHandle(`parallel-${agent}-${Date.now()}`, agent, task, spawnPromise, {});
|
|
5081
|
-
supervisor.add(handle);
|
|
5082
|
-
}
|
|
5083
|
-
return spawnPromise;
|
|
5084
|
-
});
|
|
5085
|
-
settled = await Promise.allSettled(promises);
|
|
5086
|
-
}
|
|
5087
|
-
finally {
|
|
5088
|
-
this.budgetPool = originalPool;
|
|
5089
|
-
supervisor?.stop();
|
|
5090
|
-
}
|
|
5091
|
-
}
|
|
5092
|
-
else {
|
|
5093
|
-
// Single task or no pool - use standard sequential allocation
|
|
5094
|
-
const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
|
|
5095
|
-
settled = await Promise.allSettled(promises);
|
|
5096
|
-
}
|
|
5097
|
-
// Convert settled results to SpawnResult array
|
|
5098
|
-
const results = settled.map((result, i) => {
|
|
5099
|
-
if (result.status === 'fulfilled') {
|
|
5100
|
-
return result.value;
|
|
5101
|
-
}
|
|
5102
|
-
// Handle rejected promises (shouldn't happen since spawnAgent catches errors internally,
|
|
5103
|
-
// but this is a safety net for unexpected failures)
|
|
5104
|
-
const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
5105
|
-
this.emit({
|
|
5106
|
-
type: 'agent.error',
|
|
5107
|
-
agentId: tasks[i].agent,
|
|
5108
|
-
error: `Unexpected parallel spawn error: ${error}`,
|
|
5109
|
-
});
|
|
5110
|
-
return {
|
|
5111
|
-
success: false,
|
|
5112
|
-
output: `Parallel spawn error: ${error}`,
|
|
5113
|
-
metrics: { tokens: 0, duration: 0, toolCalls: 0 },
|
|
5114
|
-
};
|
|
5115
|
-
});
|
|
5116
|
-
// Emit completion event
|
|
5117
|
-
this.emit({
|
|
5118
|
-
type: 'parallel.spawn.complete',
|
|
5119
|
-
count: tasks.length,
|
|
5120
|
-
successCount: results.filter(r => r.success).length,
|
|
5121
|
-
results: results.map((r, i) => ({
|
|
5122
|
-
agent: tasks[i].agent,
|
|
5123
|
-
success: r.success,
|
|
5124
|
-
tokens: r.metrics?.tokens || 0,
|
|
5125
|
-
})),
|
|
5126
|
-
});
|
|
5127
|
-
return results;
|
|
2797
|
+
return coreSpawnAgentsParallel(tasks, this.buildContext(), this.buildMutators(), this.createSubAgentFactory());
|
|
5128
2798
|
}
|
|
5129
2799
|
/**
|
|
5130
2800
|
* Get a formatted list of available agents.
|
|
@@ -5290,7 +2960,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
|
|
|
5290
2960
|
*/
|
|
5291
2961
|
cancel(reason) {
|
|
5292
2962
|
if (!this.cancellation) {
|
|
5293
|
-
|
|
2963
|
+
log.warn('Cancellation not enabled');
|
|
5294
2964
|
return;
|
|
5295
2965
|
}
|
|
5296
2966
|
this.cancellation.cancel(reason);
|
|
@@ -5401,7 +3071,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
|
|
|
5401
3071
|
*/
|
|
5402
3072
|
enableLSPFileTools(options) {
|
|
5403
3073
|
if (!this.lspManager) {
|
|
5404
|
-
|
|
3074
|
+
log.warn('LSP not enabled, cannot enable LSP file tools');
|
|
5405
3075
|
return;
|
|
5406
3076
|
}
|
|
5407
3077
|
const lspTools = this.getLSPFileTools(options);
|
|
@@ -6031,63 +3701,6 @@ export function buildAgent() {
|
|
|
6031
3701
|
return new ProductionAgentBuilder();
|
|
6032
3702
|
}
|
|
6033
3703
|
// =============================================================================
|
|
6034
|
-
//
|
|
6035
|
-
|
|
6036
|
-
/**
|
|
6037
|
-
* Parse a structured closure report from a subagent's text response.
|
|
6038
|
-
* The subagent may have produced JSON in response to a TIMEOUT_WRAPUP_PROMPT.
|
|
6039
|
-
*
|
|
6040
|
-
* @param text - The subagent's last response text
|
|
6041
|
-
* @param defaultExitReason - Exit reason to use (completed, timeout_graceful, cancelled, etc.)
|
|
6042
|
-
* @param fallbackTask - Original task description for fallback remainingWork
|
|
6043
|
-
* @returns Parsed StructuredClosureReport, or undefined if no JSON found and no fallback needed
|
|
6044
|
-
*/
|
|
6045
|
-
export function parseStructuredClosureReport(text, defaultExitReason, fallbackTask) {
|
|
6046
|
-
if (!text) {
|
|
6047
|
-
// No text at all — create a hard timeout fallback if we have a task
|
|
6048
|
-
if (fallbackTask) {
|
|
6049
|
-
return {
|
|
6050
|
-
findings: [],
|
|
6051
|
-
actionsTaken: [],
|
|
6052
|
-
failures: ['Timeout before producing structured summary'],
|
|
6053
|
-
remainingWork: [fallbackTask],
|
|
6054
|
-
exitReason: 'timeout_hard',
|
|
6055
|
-
};
|
|
6056
|
-
}
|
|
6057
|
-
return undefined;
|
|
6058
|
-
}
|
|
6059
|
-
try {
|
|
6060
|
-
// Try to extract JSON from the response
|
|
6061
|
-
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
6062
|
-
if (jsonMatch) {
|
|
6063
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
6064
|
-
// Validate that it looks like a closure report (has at least one expected field)
|
|
6065
|
-
if (parsed.findings || parsed.actionsTaken || parsed.failures || parsed.remainingWork) {
|
|
6066
|
-
return {
|
|
6067
|
-
findings: Array.isArray(parsed.findings) ? parsed.findings : [],
|
|
6068
|
-
actionsTaken: Array.isArray(parsed.actionsTaken) ? parsed.actionsTaken : [],
|
|
6069
|
-
failures: Array.isArray(parsed.failures) ? parsed.failures : [],
|
|
6070
|
-
remainingWork: Array.isArray(parsed.remainingWork) ? parsed.remainingWork : [],
|
|
6071
|
-
exitReason: defaultExitReason,
|
|
6072
|
-
suggestedNextSteps: Array.isArray(parsed.suggestedNextSteps) ? parsed.suggestedNextSteps : undefined,
|
|
6073
|
-
};
|
|
6074
|
-
}
|
|
6075
|
-
}
|
|
6076
|
-
}
|
|
6077
|
-
catch {
|
|
6078
|
-
// JSON parse failed — fall through to fallback
|
|
6079
|
-
}
|
|
6080
|
-
// Fallback: LLM didn't produce valid JSON but we have text
|
|
6081
|
-
if (defaultExitReason !== 'completed') {
|
|
6082
|
-
return {
|
|
6083
|
-
findings: [text.slice(0, 500)],
|
|
6084
|
-
actionsTaken: [],
|
|
6085
|
-
failures: ['Did not produce structured JSON summary'],
|
|
6086
|
-
remainingWork: fallbackTask ? [fallbackTask] : [],
|
|
6087
|
-
exitReason: defaultExitReason === 'timeout_graceful' ? 'timeout_hard' : defaultExitReason,
|
|
6088
|
-
};
|
|
6089
|
-
}
|
|
6090
|
-
// For completed agents, don't force a structured report if they didn't produce one
|
|
6091
|
-
return undefined;
|
|
6092
|
-
}
|
|
3704
|
+
// Re-export from core for backward compatibility
|
|
3705
|
+
export { parseStructuredClosureReport } from './core/index.js';
|
|
6093
3706
|
//# sourceMappingURL=agent.js.map
|