attocode 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +191 -1
- package/README.md +7 -0
- package/dist/src/adapters.d.ts +6 -1
- package/dist/src/adapters.d.ts.map +1 -1
- package/dist/src/adapters.js +8 -1
- package/dist/src/adapters.js.map +1 -1
- package/dist/src/agent.d.ts +41 -4
- package/dist/src/agent.d.ts.map +1 -1
- package/dist/src/agent.js +846 -75
- package/dist/src/agent.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +23 -2
- package/dist/src/cli.js.map +1 -1
- package/dist/src/core/protocol/types.d.ts +8 -8
- package/dist/src/defaults.d.ts +7 -2
- package/dist/src/defaults.d.ts.map +1 -1
- package/dist/src/defaults.js +38 -2
- package/dist/src/defaults.js.map +1 -1
- package/dist/src/integrations/agent-registry.d.ts +13 -0
- package/dist/src/integrations/agent-registry.d.ts.map +1 -1
- package/dist/src/integrations/agent-registry.js.map +1 -1
- package/dist/src/integrations/async-subagent.d.ts +135 -0
- package/dist/src/integrations/async-subagent.d.ts.map +1 -0
- package/dist/src/integrations/async-subagent.js +213 -0
- package/dist/src/integrations/async-subagent.js.map +1 -0
- package/dist/src/integrations/auto-checkpoint.d.ts +98 -0
- package/dist/src/integrations/auto-checkpoint.d.ts.map +1 -0
- package/dist/src/integrations/auto-checkpoint.js +252 -0
- package/dist/src/integrations/auto-checkpoint.js.map +1 -0
- package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
- package/dist/src/integrations/auto-compaction.js +5 -1
- package/dist/src/integrations/auto-compaction.js.map +1 -1
- package/dist/src/integrations/bash-policy.d.ts +33 -0
- package/dist/src/integrations/bash-policy.d.ts.map +1 -0
- package/dist/src/integrations/bash-policy.js +142 -0
- package/dist/src/integrations/bash-policy.js.map +1 -0
- package/dist/src/integrations/codebase-context.d.ts +5 -0
- package/dist/src/integrations/codebase-context.d.ts.map +1 -1
- package/dist/src/integrations/codebase-context.js +33 -0
- package/dist/src/integrations/codebase-context.js.map +1 -1
- package/dist/src/integrations/complexity-classifier.d.ts +86 -0
- package/dist/src/integrations/complexity-classifier.d.ts.map +1 -0
- package/dist/src/integrations/complexity-classifier.js +233 -0
- package/dist/src/integrations/complexity-classifier.js.map +1 -0
- package/dist/src/integrations/delegation-protocol.d.ts +86 -0
- package/dist/src/integrations/delegation-protocol.d.ts.map +1 -0
- package/dist/src/integrations/delegation-protocol.js +127 -0
- package/dist/src/integrations/delegation-protocol.js.map +1 -0
- package/dist/src/integrations/dynamic-budget.d.ts +81 -0
- package/dist/src/integrations/dynamic-budget.d.ts.map +1 -0
- package/dist/src/integrations/dynamic-budget.js +151 -0
- package/dist/src/integrations/dynamic-budget.js.map +1 -0
- package/dist/src/integrations/economics.d.ts +86 -1
- package/dist/src/integrations/economics.d.ts.map +1 -1
- package/dist/src/integrations/economics.js +306 -11
- package/dist/src/integrations/economics.js.map +1 -1
- package/dist/src/integrations/environment-facts.d.ts +52 -0
- package/dist/src/integrations/environment-facts.d.ts.map +1 -0
- package/dist/src/integrations/environment-facts.js +84 -0
- package/dist/src/integrations/environment-facts.js.map +1 -0
- package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
- package/dist/src/integrations/hierarchical-config.js +17 -0
- package/dist/src/integrations/hierarchical-config.js.map +1 -1
- package/dist/src/integrations/index.d.ts +19 -2
- package/dist/src/integrations/index.d.ts.map +1 -1
- package/dist/src/integrations/index.js +34 -2
- package/dist/src/integrations/index.js.map +1 -1
- package/dist/src/integrations/injection-budget.d.ts +71 -0
- package/dist/src/integrations/injection-budget.d.ts.map +1 -0
- package/dist/src/integrations/injection-budget.js +136 -0
- package/dist/src/integrations/injection-budget.js.map +1 -0
- package/dist/src/integrations/mcp-client.d.ts.map +1 -1
- package/dist/src/integrations/mcp-client.js +14 -0
- package/dist/src/integrations/mcp-client.js.map +1 -1
- package/dist/src/integrations/mcp-custom-tools.d.ts +102 -0
- package/dist/src/integrations/mcp-custom-tools.d.ts.map +1 -0
- package/dist/src/integrations/mcp-custom-tools.js +232 -0
- package/dist/src/integrations/mcp-custom-tools.js.map +1 -0
- package/dist/src/integrations/mcp-tool-validator.d.ts +60 -0
- package/dist/src/integrations/mcp-tool-validator.d.ts.map +1 -0
- package/dist/src/integrations/mcp-tool-validator.js +141 -0
- package/dist/src/integrations/mcp-tool-validator.js.map +1 -0
- package/dist/src/integrations/policy-engine.d.ts +55 -0
- package/dist/src/integrations/policy-engine.d.ts.map +1 -0
- package/dist/src/integrations/policy-engine.js +247 -0
- package/dist/src/integrations/policy-engine.js.map +1 -0
- package/dist/src/integrations/safety.d.ts +5 -4
- package/dist/src/integrations/safety.d.ts.map +1 -1
- package/dist/src/integrations/safety.js +32 -7
- package/dist/src/integrations/safety.js.map +1 -1
- package/dist/src/integrations/sandbox/basic.d.ts +7 -0
- package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/basic.js +27 -2
- package/dist/src/integrations/sandbox/basic.js.map +1 -1
- package/dist/src/integrations/sandbox/index.d.ts +6 -0
- package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/index.js +3 -0
- package/dist/src/integrations/sandbox/index.js.map +1 -1
- package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/landlock.js +3 -0
- package/dist/src/integrations/sandbox/landlock.js.map +1 -1
- package/dist/src/integrations/self-improvement.d.ts +90 -0
- package/dist/src/integrations/self-improvement.d.ts.map +1 -0
- package/dist/src/integrations/self-improvement.js +229 -0
- package/dist/src/integrations/self-improvement.js.map +1 -0
- package/dist/src/integrations/smart-decomposer.d.ts +22 -1
- package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
- package/dist/src/integrations/smart-decomposer.js +127 -28
- package/dist/src/integrations/smart-decomposer.js.map +1 -1
- package/dist/src/integrations/subagent-output-store.d.ts +91 -0
- package/dist/src/integrations/subagent-output-store.d.ts.map +1 -0
- package/dist/src/integrations/subagent-output-store.js +257 -0
- package/dist/src/integrations/subagent-output-store.js.map +1 -0
- package/dist/src/integrations/swarm/index.d.ts +2 -2
- package/dist/src/integrations/swarm/index.d.ts.map +1 -1
- package/dist/src/integrations/swarm/index.js +1 -1
- package/dist/src/integrations/swarm/index.js.map +1 -1
- package/dist/src/integrations/swarm/model-selector.d.ts +16 -0
- package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
- package/dist/src/integrations/swarm/model-selector.js +123 -10
- package/dist/src/integrations/swarm/model-selector.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
- package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-budget.js +6 -0
- package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts +10 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.js +226 -13
- package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +12 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.js +178 -9
- package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts +66 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.js +26 -5
- package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +127 -0
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.js +1842 -47
- package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +91 -3
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.js +395 -19
- package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
- package/dist/src/integrations/swarm/task-queue.d.ts +55 -1
- package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
- package/dist/src/integrations/swarm/task-queue.js +389 -16
- package/dist/src/integrations/swarm/task-queue.js.map +1 -1
- package/dist/src/integrations/swarm/types.d.ts +247 -11
- package/dist/src/integrations/swarm/types.d.ts.map +1 -1
- package/dist/src/integrations/swarm/types.js +67 -9
- package/dist/src/integrations/swarm/types.js.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.d.ts +18 -5
- package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.js +236 -34
- package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
- package/dist/src/integrations/thinking-strategy.d.ts +52 -0
- package/dist/src/integrations/thinking-strategy.d.ts.map +1 -0
- package/dist/src/integrations/thinking-strategy.js +129 -0
- package/dist/src/integrations/thinking-strategy.js.map +1 -0
- package/dist/src/integrations/tool-recommendation.d.ts +61 -0
- package/dist/src/integrations/tool-recommendation.d.ts.map +1 -0
- package/dist/src/integrations/tool-recommendation.js +268 -0
- package/dist/src/integrations/tool-recommendation.js.map +1 -0
- package/dist/src/integrations/verification-gate.d.ts +80 -0
- package/dist/src/integrations/verification-gate.d.ts.map +1 -0
- package/dist/src/integrations/verification-gate.js +146 -0
- package/dist/src/integrations/verification-gate.js.map +1 -0
- package/dist/src/integrations/work-log.d.ts +87 -0
- package/dist/src/integrations/work-log.d.ts.map +1 -0
- package/dist/src/integrations/work-log.js +275 -0
- package/dist/src/integrations/work-log.js.map +1 -0
- package/dist/src/main.js +31 -5
- package/dist/src/main.js.map +1 -1
- package/dist/src/modes/repl.d.ts.map +1 -1
- package/dist/src/modes/repl.js +10 -4
- package/dist/src/modes/repl.js.map +1 -1
- package/dist/src/modes/tui.d.ts.map +1 -1
- package/dist/src/modes/tui.js +5 -0
- package/dist/src/modes/tui.js.map +1 -1
- package/dist/src/modes.d.ts +6 -0
- package/dist/src/modes.d.ts.map +1 -1
- package/dist/src/modes.js +69 -21
- package/dist/src/modes.js.map +1 -1
- package/dist/src/tools/agent.d.ts.map +1 -1
- package/dist/src/tools/agent.js +11 -2
- package/dist/src/tools/agent.js.map +1 -1
- package/dist/src/tools/bash.d.ts +9 -3
- package/dist/src/tools/bash.d.ts.map +1 -1
- package/dist/src/tools/bash.js +12 -0
- package/dist/src/tools/bash.js.map +1 -1
- package/dist/src/tools/coercion.d.ts +6 -0
- package/dist/src/tools/coercion.d.ts.map +1 -1
- package/dist/src/tools/coercion.js +13 -0
- package/dist/src/tools/coercion.js.map +1 -1
- package/dist/src/tools/file.d.ts +2 -2
- package/dist/src/tools/file.js +2 -2
- package/dist/src/tools/file.js.map +1 -1
- package/dist/src/tools/permission.d.ts.map +1 -1
- package/dist/src/tools/permission.js +4 -111
- package/dist/src/tools/permission.js.map +1 -1
- package/dist/src/tools/standard.d.ts +17 -1
- package/dist/src/tools/standard.d.ts.map +1 -1
- package/dist/src/tools/standard.js +64 -11
- package/dist/src/tools/standard.js.map +1 -1
- package/dist/src/tracing/trace-collector.d.ts +167 -0
- package/dist/src/tracing/trace-collector.d.ts.map +1 -1
- package/dist/src/tracing/trace-collector.js +137 -0
- package/dist/src/tracing/trace-collector.js.map +1 -1
- package/dist/src/tracing/types.d.ts +105 -1
- package/dist/src/tracing/types.d.ts.map +1 -1
- package/dist/src/tracing/types.js.map +1 -1
- package/dist/src/tui/app.d.ts.map +1 -1
- package/dist/src/tui/app.js +34 -5
- package/dist/src/tui/app.js.map +1 -1
- package/dist/src/types.d.ts +89 -0
- package/dist/src/types.d.ts.map +1 -1
- package/package.json +6 -2
|
@@ -16,15 +16,71 @@
|
|
|
16
16
|
* - State persistence and resume
|
|
17
17
|
* - Orchestrator decision logging
|
|
18
18
|
*/
|
|
19
|
-
import
|
|
19
|
+
import * as fs from 'node:fs';
|
|
20
|
+
import * as path from 'node:path';
|
|
21
|
+
import { createSmartDecomposer, parseDecompositionResponse, validateDecomposition } from '../smart-decomposer.js';
|
|
20
22
|
import { createResultSynthesizer } from '../result-synthesizer.js';
|
|
21
|
-
import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG,
|
|
23
|
+
import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, getTaskTypeConfig } from './types.js';
|
|
22
24
|
import { createSwarmTaskQueue } from './task-queue.js';
|
|
23
25
|
import { createSwarmBudgetPool } from './swarm-budget.js';
|
|
24
26
|
import { createSwarmWorkerPool } from './worker-pool.js';
|
|
25
|
-
import { evaluateWorkerOutput } from './swarm-quality-gate.js';
|
|
27
|
+
import { evaluateWorkerOutput, runPreFlightChecks, checkArtifacts, checkArtifactsEnhanced, runConcreteChecks } from './swarm-quality-gate.js';
|
|
26
28
|
import { ModelHealthTracker, selectAlternativeModel } from './model-selector.js';
|
|
27
29
|
import { SwarmStateStore } from './swarm-state-store.js';
|
|
30
|
+
// ─── Hollow Completion Detection ──────────────────────────────────────────
|
|
31
|
+
/**
|
|
32
|
+
* V11: Hollow completion detection — catches empty completions AND "success" with failure language.
|
|
33
|
+
* Zero tool calls AND trivial output is always hollow.
|
|
34
|
+
* Additionally, success=true but output containing failure admissions is also hollow —
|
|
35
|
+
* this catches workers that report success but actually did no useful work.
|
|
36
|
+
*/
|
|
37
|
+
const FAILURE_INDICATORS = [
|
|
38
|
+
'budget exhausted', 'unable to complete', 'could not complete',
|
|
39
|
+
'ran out of budget', 'no changes were made', 'no files were modified',
|
|
40
|
+
'no files were created', 'failed to complete', 'before research could begin',
|
|
41
|
+
'i was unable to', 'i could not', 'unfortunately i',
|
|
42
|
+
];
|
|
43
|
+
const BOILERPLATE_INDICATORS = [
|
|
44
|
+
'task completed successfully', 'i have completed the task',
|
|
45
|
+
'the task has been completed', 'done', 'completed', 'finished',
|
|
46
|
+
'no issues found', 'everything looks good', 'all tasks completed',
|
|
47
|
+
];
|
|
48
|
+
export function isHollowCompletion(spawnResult, taskType, swarmConfig) {
|
|
49
|
+
// Timeout uses toolCalls === -1, not hollow
|
|
50
|
+
if ((spawnResult.metrics.toolCalls ?? 0) === -1)
|
|
51
|
+
return false;
|
|
52
|
+
const toolCalls = spawnResult.metrics.toolCalls ?? 0;
|
|
53
|
+
// Truly empty completions: zero tools AND trivial output
|
|
54
|
+
// P4: Higher threshold (120 chars) + configurable via SwarmConfig
|
|
55
|
+
const hollowThreshold = swarmConfig?.hollowOutputThreshold ?? 120;
|
|
56
|
+
if (toolCalls === 0
|
|
57
|
+
&& (spawnResult.output?.trim().length ?? 0) < hollowThreshold) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
// P4: Boilerplate detection — zero tools AND short output that's just boilerplate
|
|
61
|
+
if (toolCalls === 0 && (spawnResult.output?.trim().length ?? 0) < 300) {
|
|
62
|
+
const outputLower = (spawnResult.output ?? '').toLowerCase().trim();
|
|
63
|
+
if (BOILERPLATE_INDICATORS.some(b => outputLower.includes(b))) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// "Success" that admits failure: worker claims success but output contains failure language
|
|
68
|
+
if (spawnResult.success) {
|
|
69
|
+
const outputLower = (spawnResult.output ?? '').toLowerCase();
|
|
70
|
+
if (FAILURE_INDICATORS.some(f => outputLower.includes(f))) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// V7: Use configurable requiresToolCalls from TaskTypeConfig.
|
|
75
|
+
// For action-oriented tasks (implement/test/refactor/etc), zero tool calls is ALWAYS hollow.
|
|
76
|
+
if (taskType) {
|
|
77
|
+
const typeConfig = getTaskTypeConfig(taskType, swarmConfig);
|
|
78
|
+
if (typeConfig.requiresToolCalls && toolCalls === 0) {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
28
84
|
// ─── Orchestrator ──────────────────────────────────────────────────────────
|
|
29
85
|
export class SwarmOrchestrator {
|
|
30
86
|
config;
|
|
@@ -47,10 +103,15 @@ export class SwarmOrchestrator {
|
|
|
47
103
|
retries = 0;
|
|
48
104
|
startTime = 0;
|
|
49
105
|
modelUsage = new Map();
|
|
106
|
+
// Orchestrator's own LLM usage (separate from worker usage)
|
|
107
|
+
orchestratorTokens = 0;
|
|
108
|
+
orchestratorCost = 0;
|
|
109
|
+
orchestratorCalls = 0;
|
|
50
110
|
// V2: Planning, review, verification, health, persistence
|
|
51
111
|
plan;
|
|
52
112
|
waveReviews = [];
|
|
53
113
|
verificationResult;
|
|
114
|
+
artifactInventory;
|
|
54
115
|
orchestratorDecisions = [];
|
|
55
116
|
healthTracker;
|
|
56
117
|
stateStore;
|
|
@@ -61,43 +122,111 @@ export class SwarmOrchestrator {
|
|
|
61
122
|
static CIRCUIT_BREAKER_WINDOW_MS = 30_000;
|
|
62
123
|
static CIRCUIT_BREAKER_THRESHOLD = 3;
|
|
63
124
|
static CIRCUIT_BREAKER_PAUSE_MS = 15_000;
|
|
125
|
+
// P3: Per-model quality gate circuit breaker (replaces global circuit breaker)
|
|
126
|
+
perModelQualityRejections = new Map();
|
|
127
|
+
qualityGateDisabledModels = new Set();
|
|
128
|
+
static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 5;
|
|
129
|
+
// Hollow completion streak: early termination when single-model swarm produces only hollows
|
|
130
|
+
hollowStreak = 0;
|
|
131
|
+
static HOLLOW_STREAK_THRESHOLD = 3;
|
|
132
|
+
// V7: Global dispatch + hollow ratio tracking for multi-model termination
|
|
133
|
+
totalDispatches = 0;
|
|
134
|
+
totalHollows = 0;
|
|
135
|
+
// Hollow ratio warning (fired once, then suppressed to avoid log spam)
|
|
136
|
+
hollowRatioWarned = false;
|
|
137
|
+
// P7: Adaptive dispatch stagger — increases on rate limits, decreases on success
|
|
138
|
+
adaptiveStaggerMs = 0; // Initialized from config in constructor
|
|
139
|
+
// F25: Consecutive timeout tracking per task — early-fail after limit
|
|
140
|
+
taskTimeoutCounts = new Map();
|
|
141
|
+
// Original prompt for re-planning on resume
|
|
142
|
+
originalPrompt = '';
|
|
143
|
+
// Mid-swarm re-planning: only once per swarm execution
|
|
144
|
+
hasReplanned = false;
|
|
64
145
|
constructor(config, provider, agentRegistry, spawnAgentFn, blackboard) {
|
|
65
146
|
this.config = { ...DEFAULT_SWARM_CONFIG, ...config };
|
|
66
147
|
this.provider = provider;
|
|
67
148
|
this.blackboard = blackboard;
|
|
68
149
|
this.spawnAgentFn = spawnAgentFn;
|
|
69
150
|
this.healthTracker = new ModelHealthTracker();
|
|
151
|
+
this.adaptiveStaggerMs = this.getStaggerMs();
|
|
70
152
|
this.taskQueue = createSwarmTaskQueue();
|
|
71
153
|
this.budgetPool = createSwarmBudgetPool(this.config);
|
|
72
|
-
this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool);
|
|
154
|
+
this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool, this.healthTracker);
|
|
73
155
|
// Initialize state store if persistence enabled
|
|
74
156
|
if (this.config.enablePersistence) {
|
|
75
157
|
this.stateStore = new SwarmStateStore(this.config.stateDir ?? '.agent/swarm-state', this.config.resumeSessionId);
|
|
76
158
|
}
|
|
77
159
|
// C1: Build LLM decompose function with explicit JSON schema
|
|
78
160
|
const llmDecompose = async (task, _context) => {
|
|
161
|
+
// V7: Dynamically build the allowed type list from built-in + user-defined types
|
|
162
|
+
const builtinTypes = ['research', 'analysis', 'design', 'implement', 'test', 'refactor', 'review', 'document', 'integrate', 'deploy', 'merge'];
|
|
163
|
+
const customTypes = Object.keys(this.config.taskTypes ?? {}).filter(t => !builtinTypes.includes(t));
|
|
164
|
+
const allTypes = [...builtinTypes, ...customTypes];
|
|
165
|
+
const typeListStr = allTypes.map(t => `"${t}"`).join(' | ');
|
|
166
|
+
// Build custom type descriptions so the LLM knows when to use them
|
|
167
|
+
let customTypeSection = '';
|
|
168
|
+
if (customTypes.length > 0) {
|
|
169
|
+
const descriptions = customTypes.map(t => {
|
|
170
|
+
const cfg = this.config.taskTypes[t];
|
|
171
|
+
const parts = [` - "${t}"`];
|
|
172
|
+
if (cfg.capability)
|
|
173
|
+
parts.push(`(capability: ${cfg.capability})`);
|
|
174
|
+
if (cfg.promptTemplate)
|
|
175
|
+
parts.push(`— uses ${cfg.promptTemplate} workflow`);
|
|
176
|
+
if (cfg.timeout)
|
|
177
|
+
parts.push(`— timeout: ${Math.round(cfg.timeout / 60000)}min`);
|
|
178
|
+
return parts.join(' ');
|
|
179
|
+
}).join('\n');
|
|
180
|
+
customTypeSection = `\n\nCustom task types available:\n${descriptions}\nUse these when their description matches the subtask's purpose.`;
|
|
181
|
+
}
|
|
79
182
|
const systemPrompt = `You are a task decomposition expert. Break down the given task into well-defined subtasks with clear dependencies.
|
|
80
183
|
|
|
184
|
+
CRITICAL: Dependencies MUST use zero-based integer indices referring to other subtasks in the array.
|
|
185
|
+
|
|
81
186
|
Respond with valid JSON matching this exact schema:
|
|
82
187
|
{
|
|
83
188
|
"subtasks": [
|
|
84
189
|
{
|
|
85
190
|
"description": "Clear description of what this subtask does",
|
|
86
|
-
"type":
|
|
191
|
+
"type": ${typeListStr},
|
|
87
192
|
"complexity": 1-10,
|
|
88
|
-
"dependencies": [
|
|
193
|
+
"dependencies": [0, 1],
|
|
89
194
|
"parallelizable": true | false,
|
|
90
195
|
"relevantFiles": ["src/path/to/file.ts"]
|
|
91
196
|
}
|
|
92
197
|
],
|
|
93
198
|
"strategy": "sequential" | "parallel" | "hierarchical" | "adaptive" | "pipeline",
|
|
94
199
|
"reasoning": "Brief explanation of why this decomposition was chosen"
|
|
200
|
+
}${customTypeSection}
|
|
201
|
+
|
|
202
|
+
EXAMPLE 1 — Research task (3 parallel research + 1 merge):
|
|
203
|
+
{
|
|
204
|
+
"subtasks": [
|
|
205
|
+
{ "description": "Research React state management", "type": "research", "complexity": 3, "dependencies": [], "parallelizable": true },
|
|
206
|
+
{ "description": "Research routing options", "type": "research", "complexity": 3, "dependencies": [], "parallelizable": true },
|
|
207
|
+
{ "description": "Research testing frameworks", "type": "research", "complexity": 2, "dependencies": [], "parallelizable": true },
|
|
208
|
+
{ "description": "Synthesize findings into recommendation", "type": "merge", "complexity": 4, "dependencies": [0, 1, 2], "parallelizable": false }
|
|
209
|
+
],
|
|
210
|
+
"strategy": "parallel",
|
|
211
|
+
"reasoning": "Independent research tasks feed into a single merge"
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
EXAMPLE 2 — Implementation task (sequential chain):
|
|
215
|
+
{
|
|
216
|
+
"subtasks": [
|
|
217
|
+
{ "description": "Design API schema", "type": "design", "complexity": 4, "dependencies": [], "parallelizable": false },
|
|
218
|
+
{ "description": "Implement API endpoints", "type": "implement", "complexity": 6, "dependencies": [0], "parallelizable": false },
|
|
219
|
+
{ "description": "Write integration tests", "type": "test", "complexity": 3, "dependencies": [1], "parallelizable": false }
|
|
220
|
+
],
|
|
221
|
+
"strategy": "sequential",
|
|
222
|
+
"reasoning": "Each step depends on the previous"
|
|
95
223
|
}
|
|
96
224
|
|
|
97
225
|
Rules:
|
|
226
|
+
- Dependencies MUST be integer indices (e.g., [0, 1]), NOT descriptions or strings
|
|
98
227
|
- Each subtask must have a clear, actionable description
|
|
99
|
-
- Dependencies reference other subtask descriptions or zero-based indices
|
|
100
228
|
- Mark subtasks as parallelizable: true if they don't depend on each other
|
|
229
|
+
- If there are multiple independent subtasks, ALWAYS create a final merge task that depends on ALL of them
|
|
101
230
|
- Complexity 1-3: simple, 4-6: moderate, 7-10: complex
|
|
102
231
|
- Return at least 2 subtasks for non-trivial tasks`;
|
|
103
232
|
const response = await this.provider.chat([
|
|
@@ -108,6 +237,7 @@ Rules:
|
|
|
108
237
|
maxTokens: 4000,
|
|
109
238
|
temperature: 0.3,
|
|
110
239
|
});
|
|
240
|
+
this.trackOrchestratorUsage(response, 'decompose');
|
|
111
241
|
// Use parseDecompositionResponse which handles markdown code blocks and edge cases
|
|
112
242
|
return parseDecompositionResponse(response.content);
|
|
113
243
|
};
|
|
@@ -151,6 +281,25 @@ Rules:
|
|
|
151
281
|
}
|
|
152
282
|
}
|
|
153
283
|
}
|
|
284
|
+
/**
|
|
285
|
+
* Track token usage from an orchestrator LLM call.
|
|
286
|
+
*/
|
|
287
|
+
trackOrchestratorUsage(response, purpose) {
|
|
288
|
+
if (!response.usage)
|
|
289
|
+
return;
|
|
290
|
+
const tokens = response.usage.total_tokens ?? ((response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0));
|
|
291
|
+
const cost = tokens * 0.000015; // ~$15/M tokens average for orchestrator models
|
|
292
|
+
this.orchestratorTokens += tokens;
|
|
293
|
+
this.orchestratorCost += cost;
|
|
294
|
+
this.orchestratorCalls++;
|
|
295
|
+
this.emit({
|
|
296
|
+
type: 'swarm.orchestrator.llm',
|
|
297
|
+
model: this.config.orchestratorModel,
|
|
298
|
+
purpose,
|
|
299
|
+
tokens,
|
|
300
|
+
cost,
|
|
301
|
+
});
|
|
302
|
+
}
|
|
154
303
|
/**
|
|
155
304
|
* Execute the full swarm pipeline for a task.
|
|
156
305
|
*
|
|
@@ -167,6 +316,7 @@ Rules:
|
|
|
167
316
|
*/
|
|
168
317
|
async execute(task) {
|
|
169
318
|
this.startTime = Date.now();
|
|
319
|
+
this.originalPrompt = task;
|
|
170
320
|
try {
|
|
171
321
|
// V2: Check for resume
|
|
172
322
|
if (this.config.resumeSessionId && this.stateStore) {
|
|
@@ -174,19 +324,100 @@ Rules:
|
|
|
174
324
|
}
|
|
175
325
|
// Phase 1: Decompose
|
|
176
326
|
this.currentPhase = 'decomposing';
|
|
177
|
-
|
|
327
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'decomposing', message: 'Decomposing task into subtasks...' });
|
|
328
|
+
let decomposition = await this.decompose(task);
|
|
178
329
|
if (!decomposition) {
|
|
179
330
|
this.currentPhase = 'failed';
|
|
180
331
|
return this.buildErrorResult('Decomposition failed — task may be too simple for swarm mode');
|
|
181
332
|
}
|
|
333
|
+
// F5: Validate decomposition — check for cycles, invalid deps, granularity
|
|
334
|
+
const validation = validateDecomposition(decomposition);
|
|
335
|
+
if (validation.warnings.length > 0) {
|
|
336
|
+
this.logDecision('decomposition-validation', `Warnings: ${validation.warnings.join('; ')}`, '');
|
|
337
|
+
}
|
|
338
|
+
if (!validation.valid) {
|
|
339
|
+
this.logDecision('decomposition-validation', `Invalid decomposition: ${validation.issues.join('; ')}`, 'Retrying...');
|
|
340
|
+
// Retry decomposition once with feedback
|
|
341
|
+
decomposition = await this.decompose(`${task}\n\nIMPORTANT: Previous decomposition was invalid: ${validation.issues.join('. ')}. Fix these issues.`);
|
|
342
|
+
if (!decomposition) {
|
|
343
|
+
this.currentPhase = 'failed';
|
|
344
|
+
return this.buildErrorResult(`Decomposition validation failed: ${validation.issues.join('; ')}`);
|
|
345
|
+
}
|
|
346
|
+
const retryValidation = validateDecomposition(decomposition);
|
|
347
|
+
if (!retryValidation.valid) {
|
|
348
|
+
this.logDecision('decomposition-validation', `Retry still invalid: ${retryValidation.issues.join('; ')}`, 'Proceeding anyway');
|
|
349
|
+
}
|
|
350
|
+
}
|
|
182
351
|
// Phase 2: Schedule into waves
|
|
183
352
|
this.currentPhase = 'scheduling';
|
|
353
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduling ${decomposition.subtasks.length} subtasks into waves...` });
|
|
184
354
|
this.taskQueue.loadFromDecomposition(decomposition, this.config);
|
|
355
|
+
// F3: Dynamic orchestrator reserve scaling based on subtask count.
|
|
356
|
+
// More subtasks = more quality gate calls, synthesis work, and review overhead.
|
|
357
|
+
// Formula: max(configured ratio, 5% per subtask), capped at 40%.
|
|
358
|
+
const subtaskCount = decomposition.subtasks.length;
|
|
359
|
+
const dynamicReserveRatio = Math.min(0.40, Math.max(this.config.orchestratorReserveRatio, subtaskCount * 0.05));
|
|
360
|
+
if (dynamicReserveRatio > this.config.orchestratorReserveRatio) {
|
|
361
|
+
this.logDecision('budget-scaling', `Scaled orchestrator reserve from ${(this.config.orchestratorReserveRatio * 100).toFixed(0)}% to ${(dynamicReserveRatio * 100).toFixed(0)}% for ${subtaskCount} subtasks`, '');
|
|
362
|
+
}
|
|
363
|
+
// Foundation task detection: tasks that are the sole dependency of 3+ downstream
|
|
364
|
+
// tasks are critical — if they fail, the entire swarm cascade-skips.
|
|
365
|
+
// Give them extra retries and timeout scaling.
|
|
366
|
+
this.detectFoundationTasks();
|
|
367
|
+
// D3/F1: Probe model capability before dispatch (default: true)
|
|
368
|
+
if (this.config.probeModels !== false) {
|
|
369
|
+
await this.probeModelCapability();
|
|
370
|
+
// F15/F23: Handle all-models-failed probe scenario
|
|
371
|
+
// Resolve strategy: explicit probeFailureStrategy > legacy ignoreProbeFailures > default 'warn-and-try'
|
|
372
|
+
const probeStrategy = this.config.probeFailureStrategy
|
|
373
|
+
?? (this.config.ignoreProbeFailures ? 'warn-and-try' : 'warn-and-try');
|
|
374
|
+
const uniqueModels = [...new Set(this.config.workers.map(w => w.model))];
|
|
375
|
+
const healthyModels = this.healthTracker.getHealthy(uniqueModels);
|
|
376
|
+
if (healthyModels.length === 0 && uniqueModels.length > 0) {
|
|
377
|
+
if (probeStrategy === 'abort') {
|
|
378
|
+
// Hard abort — no tasks dispatched
|
|
379
|
+
const reason = `All ${uniqueModels.length} worker model(s) failed capability probes — no model can make tool calls. Aborting swarm to prevent budget waste. Fix model configuration and retry.`;
|
|
380
|
+
this.logDecision('probe-abort', reason, `Models tested: ${uniqueModels.join(', ')}`);
|
|
381
|
+
this.emit({ type: 'swarm.abort', reason });
|
|
382
|
+
this.skipRemainingTasks(reason);
|
|
383
|
+
const totalTasks = this.taskQueue.getStats().total;
|
|
384
|
+
const abortStats = {
|
|
385
|
+
completedTasks: 0, failedTasks: 0, skippedTasks: totalTasks,
|
|
386
|
+
totalTasks, totalWaves: 0, totalTokens: 0, totalCost: 0,
|
|
387
|
+
totalDurationMs: Date.now() - this.startTime,
|
|
388
|
+
qualityRejections: 0, retries: 0,
|
|
389
|
+
modelUsage: new Map(),
|
|
390
|
+
};
|
|
391
|
+
this.emit({ type: 'swarm.complete', stats: abortStats, errors: this.errors });
|
|
392
|
+
return {
|
|
393
|
+
success: false, summary: reason,
|
|
394
|
+
tasks: this.taskQueue.getAllTasks(), stats: abortStats, errors: this.errors,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
// F23: warn-and-try — log warning, reset health, let real tasks prove capability
|
|
399
|
+
this.logDecision('probe-warning', `All ${uniqueModels.length} model(s) failed probe — continuing anyway (strategy: warn-and-try)`, 'Will abort after first real task failure if model cannot use tools');
|
|
400
|
+
// Reset health so dispatch doesn't skip all models
|
|
401
|
+
for (const model of uniqueModels) {
|
|
402
|
+
this.healthTracker.recordSuccess(model, 0);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
// Emit skip events when tasks are cascade-skipped due to dependency failures
|
|
408
|
+
this.taskQueue.setOnCascadeSkip((skippedTaskId, reason) => {
|
|
409
|
+
this.emit({ type: 'swarm.task.skipped', taskId: skippedTaskId, reason });
|
|
410
|
+
});
|
|
185
411
|
const stats = this.taskQueue.getStats();
|
|
186
|
-
|
|
412
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduled ${stats.total} tasks in ${this.taskQueue.getTotalWaves()} waves` });
|
|
413
|
+
// V2: Phase 2.5: Plan execution — fire in background, don't block waves
|
|
414
|
+
let planPromise;
|
|
187
415
|
if (this.config.enablePlanning) {
|
|
188
416
|
this.currentPhase = 'planning';
|
|
189
|
-
|
|
417
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'planning', message: 'Creating acceptance criteria...' });
|
|
418
|
+
planPromise = this.planExecution(task, decomposition).catch(err => {
|
|
419
|
+
this.logDecision('planning', 'Planning failed (non-fatal)', err.message);
|
|
420
|
+
});
|
|
190
421
|
}
|
|
191
422
|
this.emit({
|
|
192
423
|
type: 'swarm.start',
|
|
@@ -204,9 +435,17 @@ Rules:
|
|
|
204
435
|
type: 'swarm.tasks.loaded',
|
|
205
436
|
tasks: this.taskQueue.getAllTasks(),
|
|
206
437
|
});
|
|
207
|
-
// Phase 3: Execute waves (
|
|
438
|
+
// Phase 3: Execute waves (planning runs concurrently)
|
|
208
439
|
this.currentPhase = 'executing';
|
|
209
440
|
await this.executeWaves();
|
|
441
|
+
// V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
|
|
442
|
+
if (!this.cancelled)
|
|
443
|
+
await this.finalRescuePass();
|
|
444
|
+
// Ensure planning completed before verification/synthesis
|
|
445
|
+
if (planPromise)
|
|
446
|
+
await planPromise;
|
|
447
|
+
// Post-wave artifact audit: scan filesystem for files created by workers
|
|
448
|
+
this.artifactInventory = this.buildArtifactInventory();
|
|
210
449
|
// V2: Phase 3.5: Verify integration
|
|
211
450
|
if (this.config.enableVerification && this.plan?.integrationTestPlan) {
|
|
212
451
|
this.currentPhase = 'verifying';
|
|
@@ -222,10 +461,14 @@ Rules:
|
|
|
222
461
|
const executionStats = this.buildStats();
|
|
223
462
|
// V2: Final checkpoint
|
|
224
463
|
this.checkpoint('final');
|
|
225
|
-
|
|
464
|
+
const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
|
|
465
|
+
this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
|
|
226
466
|
return {
|
|
227
467
|
success: executionStats.completedTasks > 0,
|
|
468
|
+
partialSuccess: !executionStats.completedTasks && hasArtifacts,
|
|
469
|
+
partialFailure: executionStats.failedTasks > 0,
|
|
228
470
|
synthesisResult: synthesisResult ?? undefined,
|
|
471
|
+
artifactInventory: this.artifactInventory,
|
|
229
472
|
summary: this.buildSummary(executionStats),
|
|
230
473
|
tasks: this.taskQueue.getAllTasks(),
|
|
231
474
|
stats: executionStats,
|
|
@@ -257,6 +500,16 @@ Rules:
|
|
|
257
500
|
// Too simple for swarm mode
|
|
258
501
|
return null;
|
|
259
502
|
}
|
|
503
|
+
// Reject heuristic fallback — the generic 3-task chain is worse than aborting
|
|
504
|
+
if (!result.metadata.llmAssisted) {
|
|
505
|
+
this.logDecision('decomposition', 'Rejected heuristic fallback DAG', 'LLM decomposition failed after retries. Heuristic DAG is not useful.');
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
// Flat-DAG detection: warn when all tasks land in wave 0 with no dependencies
|
|
509
|
+
const hasAnyDependency = result.subtasks.some(s => s.dependencies.length > 0);
|
|
510
|
+
if (!hasAnyDependency && result.subtasks.length >= 3) {
|
|
511
|
+
this.logDecision('decomposition', `Flat DAG: ${result.subtasks.length} tasks, zero dependencies`, 'All tasks will execute in wave 0 without ordering');
|
|
512
|
+
}
|
|
260
513
|
return result;
|
|
261
514
|
}
|
|
262
515
|
catch (error) {
|
|
@@ -315,6 +568,7 @@ Respond with valid JSON:
|
|
|
315
568
|
maxTokens: 3000,
|
|
316
569
|
temperature: 0.3,
|
|
317
570
|
});
|
|
571
|
+
this.trackOrchestratorUsage(response, 'plan');
|
|
318
572
|
const parsed = this.parseJSON(response.content);
|
|
319
573
|
if (parsed) {
|
|
320
574
|
this.plan = {
|
|
@@ -386,6 +640,7 @@ Respond with valid JSON:
|
|
|
386
640
|
},
|
|
387
641
|
{ role: 'user', content: `Review these wave ${waveIndex + 1} outputs:\n\n${taskSummaries}` },
|
|
388
642
|
], { model: reviewModel, maxTokens: 2000, temperature: 0.3 });
|
|
643
|
+
this.trackOrchestratorUsage(response, 'review');
|
|
389
644
|
const parsed = this.parseJSON(response.content);
|
|
390
645
|
if (!parsed)
|
|
391
646
|
return null;
|
|
@@ -412,6 +667,11 @@ Respond with valid JSON:
|
|
|
412
667
|
}
|
|
413
668
|
if (fixupTasks.length > 0) {
|
|
414
669
|
this.taskQueue.addFixupTasks(fixupTasks);
|
|
670
|
+
// V5: Re-emit full task list so dashboard picks up fixup tasks + edges
|
|
671
|
+
this.emit({
|
|
672
|
+
type: 'swarm.tasks.loaded',
|
|
673
|
+
tasks: this.taskQueue.getAllTasks(),
|
|
674
|
+
});
|
|
415
675
|
}
|
|
416
676
|
}
|
|
417
677
|
const result = {
|
|
@@ -505,6 +765,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
505
765
|
},
|
|
506
766
|
{ role: 'user', content: `Original task: ${task}\n\nFailed verifications:\n${failedSteps}` },
|
|
507
767
|
], { model: this.config.plannerModel ?? this.config.orchestratorModel, maxTokens: 1500, temperature: 0.3 });
|
|
768
|
+
this.trackOrchestratorUsage(response, 'verification-fixup');
|
|
508
769
|
const parsed = this.parseJSON(response.content);
|
|
509
770
|
if (parsed?.fixups && parsed.fixups.length > 0) {
|
|
510
771
|
const fixupTasks = parsed.fixups.map((f, i) => ({
|
|
@@ -520,6 +781,11 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
520
781
|
fixInstructions: f.description,
|
|
521
782
|
}));
|
|
522
783
|
this.taskQueue.addFixupTasks(fixupTasks);
|
|
784
|
+
// V5: Re-emit full task list so dashboard picks up verification fixup tasks
|
|
785
|
+
this.emit({
|
|
786
|
+
type: 'swarm.tasks.loaded',
|
|
787
|
+
tasks: this.taskQueue.getAllTasks(),
|
|
788
|
+
});
|
|
523
789
|
// Execute fix-up wave
|
|
524
790
|
this.currentPhase = 'executing';
|
|
525
791
|
await this.executeWave(fixupTasks);
|
|
@@ -550,6 +816,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
550
816
|
this.logDecision('resume', `Resuming from wave ${checkpoint.currentWave}`, `Session: ${checkpoint.sessionId}`);
|
|
551
817
|
this.emit({ type: 'swarm.state.resume', sessionId: checkpoint.sessionId, fromWave: checkpoint.currentWave });
|
|
552
818
|
// Restore state
|
|
819
|
+
if (checkpoint.originalPrompt)
|
|
820
|
+
this.originalPrompt = checkpoint.originalPrompt;
|
|
553
821
|
if (checkpoint.plan)
|
|
554
822
|
this.plan = checkpoint.plan;
|
|
555
823
|
if (checkpoint.modelHealth.length > 0)
|
|
@@ -566,9 +834,61 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
566
834
|
waves: checkpoint.waves,
|
|
567
835
|
currentWave: checkpoint.currentWave,
|
|
568
836
|
});
|
|
837
|
+
// Reset orphaned dispatched tasks — their workers died with the previous process
|
|
838
|
+
let resetCount = 0;
|
|
839
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
840
|
+
if (task.status === 'dispatched') {
|
|
841
|
+
task.status = 'ready';
|
|
842
|
+
// Preserve at least 1 retry attempt
|
|
843
|
+
task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
|
|
844
|
+
resetCount++;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
if (resetCount > 0) {
|
|
848
|
+
this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
|
|
849
|
+
}
|
|
850
|
+
// Reset skipped tasks whose dependencies are now satisfied
|
|
851
|
+
let unskippedCount = 0;
|
|
852
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
853
|
+
if (task.status === 'skipped') {
|
|
854
|
+
const deps = task.dependencies.map(id => this.taskQueue.getTask(id));
|
|
855
|
+
const allDepsSatisfied = deps.every(d => d && (d.status === 'completed' || d.status === 'decomposed'));
|
|
856
|
+
if (allDepsSatisfied) {
|
|
857
|
+
task.status = 'ready';
|
|
858
|
+
task.attempts = 0;
|
|
859
|
+
task.rescueContext = 'Recovered on resume — dependencies now satisfied';
|
|
860
|
+
unskippedCount++;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
// Also reset failed tasks that have retry budget
|
|
865
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
866
|
+
if (task.status === 'failed') {
|
|
867
|
+
task.status = 'ready';
|
|
868
|
+
task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
|
|
869
|
+
unskippedCount++;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
if (unskippedCount > 0) {
|
|
873
|
+
this.logDecision('resume', `Recovered ${unskippedCount} skipped/failed tasks`, 'Fresh retry on resume');
|
|
874
|
+
}
|
|
875
|
+
// If many tasks are still stuck after un-skip, trigger re-plan
|
|
876
|
+
const resumeStats = this.taskQueue.getStats();
|
|
877
|
+
const stuckCount = resumeStats.failed + resumeStats.skipped;
|
|
878
|
+
const totalAttempted = resumeStats.completed + stuckCount;
|
|
879
|
+
if (totalAttempted > 0 && stuckCount / totalAttempted > 0.4) {
|
|
880
|
+
this.logDecision('resume-replan', `${stuckCount}/${totalAttempted} tasks still stuck after resume — triggering re-plan`, '');
|
|
881
|
+
this.hasReplanned = false; // Allow re-plan on resume
|
|
882
|
+
await this.midSwarmReplan();
|
|
883
|
+
}
|
|
569
884
|
// Continue from where we left off
|
|
570
885
|
this.currentPhase = 'executing';
|
|
571
886
|
await this.executeWaves();
|
|
887
|
+
// V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
|
|
888
|
+
if (!this.cancelled)
|
|
889
|
+
await this.finalRescuePass();
|
|
890
|
+
// Post-wave artifact audit
|
|
891
|
+
this.artifactInventory = this.buildArtifactInventory();
|
|
572
892
|
// Continue with verification and synthesis as normal
|
|
573
893
|
if (this.config.enableVerification && this.plan?.integrationTestPlan) {
|
|
574
894
|
this.currentPhase = 'verifying';
|
|
@@ -582,10 +902,14 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
582
902
|
this.currentPhase = 'completed';
|
|
583
903
|
const executionStats = this.buildStats();
|
|
584
904
|
this.checkpoint('final');
|
|
585
|
-
|
|
905
|
+
const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
|
|
906
|
+
this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
|
|
586
907
|
return {
|
|
587
908
|
success: executionStats.completedTasks > 0,
|
|
909
|
+
partialSuccess: !executionStats.completedTasks && hasArtifacts,
|
|
910
|
+
partialFailure: executionStats.failedTasks > 0,
|
|
588
911
|
synthesisResult: synthesisResult ?? undefined,
|
|
912
|
+
artifactInventory: this.artifactInventory,
|
|
589
913
|
summary: this.buildSummary(executionStats),
|
|
590
914
|
tasks: this.taskQueue.getAllTasks(),
|
|
591
915
|
stats: executionStats,
|
|
@@ -602,6 +926,13 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
602
926
|
while (waveIndex < totalWaves && !this.cancelled) {
|
|
603
927
|
const readyTasks = this.taskQueue.getReadyTasks();
|
|
604
928
|
const queueStats = this.taskQueue.getStats();
|
|
929
|
+
// F18: Skip empty waves — if no tasks are ready and none are running,
|
|
930
|
+
// remaining tasks are all blocked/failed/skipped. Break instead of
|
|
931
|
+
// running useless review cycles.
|
|
932
|
+
if (readyTasks.length === 0 && queueStats.running === 0 && queueStats.ready === 0) {
|
|
933
|
+
this.logDecision('wave-skip', `Skipping waves ${waveIndex + 1}-${totalWaves}: no dispatchable tasks remain`, `Stats: ${queueStats.completed} completed, ${queueStats.failed} failed, ${queueStats.skipped} skipped`);
|
|
934
|
+
break;
|
|
935
|
+
}
|
|
605
936
|
this.emit({
|
|
606
937
|
type: 'swarm.wave.start',
|
|
607
938
|
wave: waveIndex + 1,
|
|
@@ -623,12 +954,75 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
623
954
|
failed: waveFailed,
|
|
624
955
|
skipped: waveSkipped,
|
|
625
956
|
});
|
|
957
|
+
// Wave failure recovery: if ALL tasks in a wave failed, retry with adapted context
|
|
958
|
+
if (waveCompleted === 0 && waveFailed > 0 && readyTasks.length > 0) {
|
|
959
|
+
this.emit({ type: 'swarm.wave.allFailed', wave: waveIndex + 1 });
|
|
960
|
+
this.logDecision('wave-recovery', `Entire wave ${waveIndex + 1} failed (${waveFailed} tasks)`, 'Checking if budget allows retry with adapted strategy');
|
|
961
|
+
// Re-queue failed tasks with retry context if budget allows
|
|
962
|
+
const budgetRemaining = this.budgetPool.hasCapacity();
|
|
963
|
+
const failedWaveTasks = readyTasks.filter(t => {
|
|
964
|
+
const task = this.taskQueue.getTask(t.id);
|
|
965
|
+
return task && task.status === 'failed' && task.attempts < (this.config.workerRetries + 1);
|
|
966
|
+
});
|
|
967
|
+
if (budgetRemaining && failedWaveTasks.length > 0) {
|
|
968
|
+
for (const t of failedWaveTasks) {
|
|
969
|
+
const task = this.taskQueue.getTask(t.id);
|
|
970
|
+
if (!task)
|
|
971
|
+
continue;
|
|
972
|
+
task.status = 'ready';
|
|
973
|
+
task.retryContext = {
|
|
974
|
+
previousFeedback: 'All tasks in this batch failed. Try a fundamentally different approach — the previous strategy did not work.',
|
|
975
|
+
previousScore: 0,
|
|
976
|
+
attempt: task.attempts,
|
|
977
|
+
previousModel: task.assignedModel,
|
|
978
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
979
|
+
};
|
|
980
|
+
}
|
|
981
|
+
this.logDecision('wave-recovery', `Re-queued ${failedWaveTasks.length} tasks with adapted retry context`, 'Budget allows retry');
|
|
982
|
+
// Re-execute the wave with adapted tasks
|
|
983
|
+
await this.executeWave(failedWaveTasks.map(t => this.taskQueue.getTask(t.id)).filter(t => t.status === 'ready'));
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
// F5: Adaptive re-decomposition — if < 50% of wave tasks succeeded,
|
|
987
|
+
// the decomposition may be structurally flawed. Log for observability.
|
|
988
|
+
// (Full re-decomposition of remaining work would require re-architecting the queue,
|
|
989
|
+
// so we log the signal and let wave retry + fixup handle recovery.)
|
|
990
|
+
const waveTotal = waveCompleted + waveFailed + waveSkipped;
|
|
991
|
+
const waveSuccessRate = waveTotal > 0 ? waveCompleted / waveTotal : 0;
|
|
992
|
+
if (waveSuccessRate < 0.5 && waveTotal >= 2) {
|
|
993
|
+
this.logDecision('decomposition-quality', `Wave ${waveIndex + 1} success rate ${(waveSuccessRate * 100).toFixed(0)}% (${waveCompleted}/${waveTotal})`, 'Low success rate may indicate decomposition quality issues');
|
|
994
|
+
}
|
|
626
995
|
// V2: Review wave outputs
|
|
627
996
|
const review = await this.reviewWave(waveIndex);
|
|
628
997
|
if (review && review.fixupTasks.length > 0) {
|
|
629
998
|
// Execute fix-up tasks immediately
|
|
630
999
|
await this.executeWave(review.fixupTasks);
|
|
631
1000
|
}
|
|
1001
|
+
// Rescue cascade-skipped tasks that can still run
|
|
1002
|
+
// (after wave review + fixup, some skipped tasks may now be viable)
|
|
1003
|
+
const rescued = this.rescueCascadeSkipped();
|
|
1004
|
+
if (rescued.length > 0) {
|
|
1005
|
+
this.logDecision('cascade-rescue', `Rescued ${rescued.length} cascade-skipped tasks after wave ${waveIndex + 1}`, rescued.map(t => t.id).join(', '));
|
|
1006
|
+
await this.executeWave(rescued);
|
|
1007
|
+
}
|
|
1008
|
+
// Reset quality circuit breaker at wave boundary — each wave gets a fresh chance.
|
|
1009
|
+
// Within a wave, rejections accumulate properly so the breaker can trip.
|
|
1010
|
+
// Between waves, we reset so each wave gets a fresh quality evaluation window.
|
|
1011
|
+
// (The within-wave reset at quality-gate-passed is kept — that's correct.)
|
|
1012
|
+
if (this.qualityGateDisabledModels.size > 0) {
|
|
1013
|
+
this.qualityGateDisabledModels.clear();
|
|
1014
|
+
this.perModelQualityRejections.clear();
|
|
1015
|
+
this.logDecision('quality-circuit-breaker', `Re-enabled quality gates for all models at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
|
|
1016
|
+
}
|
|
1017
|
+
// F3: Log budget reallocation after wave completion.
|
|
1018
|
+
// SharedBudgetPool already returns unused tokens via release(), but we log it
|
|
1019
|
+
// for observability so operators can see how budget flows between waves.
|
|
1020
|
+
const budgetStats = this.budgetPool.getStats();
|
|
1021
|
+
this.logDecision('budget-reallocation', `After wave ${waveIndex + 1}: ${budgetStats.tokensRemaining} tokens remaining (${(budgetStats.utilization * 100).toFixed(0)}% utilized)`, '');
|
|
1022
|
+
this.budgetPool.reallocateUnused(budgetStats.tokensRemaining);
|
|
1023
|
+
// F21: Mid-swarm situational assessment — evaluate success rate and budget health,
|
|
1024
|
+
// optionally triage low-priority tasks to conserve budget for critical path.
|
|
1025
|
+
await this.assessAndAdapt(waveIndex);
|
|
632
1026
|
// V2: Checkpoint after each wave
|
|
633
1027
|
this.checkpoint(`wave-${waveIndex}`);
|
|
634
1028
|
// Advance to next wave
|
|
@@ -656,7 +1050,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
656
1050
|
taskIndex++;
|
|
657
1051
|
// Stagger dispatches to avoid rate limit storms
|
|
658
1052
|
if (taskIndex < tasks.length && this.workerPool.availableSlots > 0) {
|
|
659
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1053
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
660
1054
|
}
|
|
661
1055
|
}
|
|
662
1056
|
// Process completions and dispatch more tasks as slots open
|
|
@@ -677,7 +1071,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
677
1071
|
await this.dispatchTask(task);
|
|
678
1072
|
// Stagger dispatches to avoid rate limit storms
|
|
679
1073
|
if (taskIndex + 1 < tasks.length && this.workerPool.availableSlots > 0) {
|
|
680
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1074
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
681
1075
|
}
|
|
682
1076
|
}
|
|
683
1077
|
taskIndex++;
|
|
@@ -692,57 +1086,152 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
692
1086
|
await this.dispatchTask(moreReady[i]);
|
|
693
1087
|
// Stagger dispatches to avoid rate limit storms
|
|
694
1088
|
if (i + 1 < moreReady.length && this.workerPool.availableSlots > 0) {
|
|
695
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1089
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
// F20: Re-dispatch pass — after all workers finish, budget may have been freed
|
|
1095
|
+
// by completed tasks. Try to dispatch any still-ready tasks (e.g., those paused
|
|
1096
|
+
// by budget exhaustion earlier).
|
|
1097
|
+
if (!this.cancelled && this.budgetPool.hasCapacity()) {
|
|
1098
|
+
const stillReady = this.taskQueue.getAllReadyTasks()
|
|
1099
|
+
.filter(t => !this.workerPool.getActiveWorkerStatus().some(w => w.taskId === t.id));
|
|
1100
|
+
if (stillReady.length > 0) {
|
|
1101
|
+
this.logDecision('budget-redispatch', `Budget freed after wave — re-dispatching ${stillReady.length} ready task(s)`, `Budget: ${JSON.stringify(this.budgetPool.getStats())}`);
|
|
1102
|
+
for (const task of stillReady) {
|
|
1103
|
+
if (this.workerPool.availableSlots <= 0 || !this.budgetPool.hasCapacity())
|
|
1104
|
+
break;
|
|
1105
|
+
await this.dispatchTask(task);
|
|
1106
|
+
if (this.workerPool.availableSlots > 0) {
|
|
1107
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
696
1108
|
}
|
|
697
1109
|
}
|
|
1110
|
+
// Wait for these re-dispatched tasks to complete
|
|
1111
|
+
while (this.workerPool.activeCount > 0 && !this.cancelled) {
|
|
1112
|
+
const completed = await this.workerPool.waitForAny();
|
|
1113
|
+
if (!completed)
|
|
1114
|
+
break;
|
|
1115
|
+
await this.handleTaskCompletion(completed.taskId, completed.result, completed.startedAt);
|
|
1116
|
+
this.emitBudgetUpdate();
|
|
1117
|
+
this.emitStatusUpdate();
|
|
1118
|
+
}
|
|
698
1119
|
}
|
|
699
1120
|
}
|
|
700
1121
|
}
|
|
701
1122
|
/**
|
|
702
1123
|
* Dispatch a single task to a worker.
|
|
1124
|
+
* Selects the worker once and passes it through to avoid double-selection.
|
|
703
1125
|
*/
|
|
704
1126
|
async dispatchTask(task) {
|
|
705
1127
|
const worker = this.workerPool.selectWorker(task);
|
|
706
1128
|
if (!worker) {
|
|
707
1129
|
// M2: Emit error and mark task failed instead of silently returning
|
|
708
|
-
|
|
1130
|
+
// V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
|
|
1131
|
+
this.logDecision('no-worker', `${task.id}: no worker for type ${task.type}`, '');
|
|
1132
|
+
if (task.attempts > 0) {
|
|
1133
|
+
const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
|
|
1134
|
+
const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
|
|
1135
|
+
if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
|
|
1136
|
+
return;
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
this.taskQueue.markFailedWithoutCascade(task.id, 0);
|
|
1140
|
+
this.taskQueue.triggerCascadeSkip(task.id);
|
|
709
1141
|
this.emit({
|
|
710
1142
|
type: 'swarm.task.failed',
|
|
711
1143
|
taskId: task.id,
|
|
712
1144
|
error: `No worker available for task type: ${task.type}`,
|
|
713
|
-
attempt:
|
|
1145
|
+
attempt: task.attempts,
|
|
714
1146
|
maxAttempts: 0,
|
|
715
1147
|
willRetry: false,
|
|
1148
|
+
failureMode: 'error',
|
|
716
1149
|
});
|
|
717
1150
|
return;
|
|
718
1151
|
}
|
|
719
1152
|
try {
|
|
720
|
-
|
|
721
|
-
|
|
1153
|
+
// Pre-dispatch auto-split for critical-path bottlenecks
|
|
1154
|
+
if (this.shouldAutoSplit(task)) {
|
|
1155
|
+
try {
|
|
1156
|
+
const splitResult = await this.judgeSplit(task);
|
|
1157
|
+
if (splitResult.shouldSplit && splitResult.subtasks) {
|
|
1158
|
+
task.status = 'dispatched'; // Required for replaceWithSubtasks
|
|
1159
|
+
this.taskQueue.replaceWithSubtasks(task.id, splitResult.subtasks);
|
|
1160
|
+
this.emit({
|
|
1161
|
+
type: 'swarm.task.resilience',
|
|
1162
|
+
taskId: task.id,
|
|
1163
|
+
strategy: 'auto-split',
|
|
1164
|
+
succeeded: true,
|
|
1165
|
+
reason: `Pre-dispatch split into ${splitResult.subtasks.length} parallel subtasks`,
|
|
1166
|
+
artifactsFound: 0,
|
|
1167
|
+
toolCalls: 0,
|
|
1168
|
+
});
|
|
1169
|
+
return; // Subtasks now in queue, will be dispatched this wave
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
catch (err) {
|
|
1173
|
+
this.logDecision('auto-split', `${task.id}: split judge failed — ${err.message}`, '');
|
|
1174
|
+
// Fall through to normal dispatch
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
this.totalDispatches++;
|
|
1178
|
+
const dispatchedModel = task.assignedModel ?? worker.model;
|
|
1179
|
+
this.taskQueue.markDispatched(task.id, dispatchedModel);
|
|
1180
|
+
if (task.assignedModel && task.assignedModel !== worker.model) {
|
|
1181
|
+
this.logDecision('failover', `Dispatching ${task.id} with failover model ${task.assignedModel} (worker default: ${worker.model})`, 'Retry model override is active');
|
|
1182
|
+
}
|
|
1183
|
+
// Pass the pre-selected worker to avoid double-selection in dispatch()
|
|
1184
|
+
await this.workerPool.dispatch(task, worker);
|
|
722
1185
|
this.emit({
|
|
723
1186
|
type: 'swarm.task.dispatched',
|
|
724
1187
|
taskId: task.id,
|
|
725
1188
|
description: task.description,
|
|
726
|
-
model:
|
|
1189
|
+
model: dispatchedModel,
|
|
727
1190
|
workerName: worker.name,
|
|
1191
|
+
toolCount: worker.allowedTools?.length ?? -1, // -1 = all tools
|
|
1192
|
+
tools: worker.allowedTools,
|
|
1193
|
+
retryContext: task.retryContext,
|
|
1194
|
+
fromModel: task.retryContext ? task.retryContext.previousModel : undefined,
|
|
1195
|
+
attempts: task.attempts,
|
|
728
1196
|
});
|
|
729
1197
|
}
|
|
730
1198
|
catch (error) {
|
|
1199
|
+
const errorMsg = error.message;
|
|
1200
|
+
// F20: Budget exhaustion is NOT a task failure — the task is fine, we just ran out of money.
|
|
1201
|
+
// Reset status to ready so it can be picked up if budget becomes available
|
|
1202
|
+
// (e.g., after tokens are released from completing tasks).
|
|
1203
|
+
if (errorMsg.includes('Budget pool exhausted')) {
|
|
1204
|
+
task.status = 'ready';
|
|
1205
|
+
this.logDecision('budget-pause', `Cannot dispatch ${task.id}: budget exhausted — task kept ready for potential re-dispatch`, `Budget stats: ${JSON.stringify(this.budgetPool.getStats())}`);
|
|
1206
|
+
return;
|
|
1207
|
+
}
|
|
731
1208
|
this.errors.push({
|
|
732
1209
|
taskId: task.id,
|
|
733
1210
|
phase: 'dispatch',
|
|
734
|
-
message:
|
|
1211
|
+
message: errorMsg,
|
|
735
1212
|
recovered: false,
|
|
736
1213
|
});
|
|
1214
|
+
this.logDecision('dispatch-error', `${task.id}: dispatch failed: ${errorMsg.slice(0, 100)}`, `attempts: ${task.attempts}`);
|
|
1215
|
+
// V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
|
|
1216
|
+
if (task.attempts > 0) {
|
|
1217
|
+
const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
|
|
1218
|
+
const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
|
|
1219
|
+
if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
|
|
1220
|
+
this.errors[this.errors.length - 1].recovered = true;
|
|
1221
|
+
return;
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
this.taskQueue.markFailedWithoutCascade(task.id, 0);
|
|
1225
|
+
this.taskQueue.triggerCascadeSkip(task.id);
|
|
737
1226
|
this.emit({
|
|
738
1227
|
type: 'swarm.task.failed',
|
|
739
1228
|
taskId: task.id,
|
|
740
|
-
error:
|
|
1229
|
+
error: errorMsg,
|
|
741
1230
|
attempt: task.attempts,
|
|
742
1231
|
maxAttempts: 1 + this.config.workerRetries,
|
|
743
1232
|
willRetry: false,
|
|
1233
|
+
failureMode: 'error',
|
|
744
1234
|
});
|
|
745
|
-
this.taskQueue.markFailed(task.id, 0);
|
|
746
1235
|
}
|
|
747
1236
|
}
|
|
748
1237
|
/**
|
|
@@ -752,6 +1241,36 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
752
1241
|
const task = this.taskQueue.getTask(taskId);
|
|
753
1242
|
if (!task)
|
|
754
1243
|
return;
|
|
1244
|
+
// Guard: task was terminally resolved while its worker was running — ignore the result
|
|
1245
|
+
// F4: But NOT if pendingCascadeSkip — those results are evaluated below
|
|
1246
|
+
if ((task.status === 'skipped' || task.status === 'failed') && !task.pendingCascadeSkip)
|
|
1247
|
+
return;
|
|
1248
|
+
// V7: Global dispatch cap — prevent any single task from burning budget.
|
|
1249
|
+
// Try resilience recovery (micro-decompose, degraded acceptance) before hard-failing.
|
|
1250
|
+
const maxDispatches = this.config.maxDispatchesPerTask ?? 5;
|
|
1251
|
+
if (task.attempts >= maxDispatches) {
|
|
1252
|
+
const durationMs = Date.now() - startedAt;
|
|
1253
|
+
const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
|
|
1254
|
+
this.totalTokens += taskResult.tokensUsed;
|
|
1255
|
+
this.totalCost += taskResult.costUsed;
|
|
1256
|
+
// Try resilience recovery before hard fail
|
|
1257
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1258
|
+
return;
|
|
1259
|
+
}
|
|
1260
|
+
this.taskQueue.markFailedWithoutCascade(taskId, 0);
|
|
1261
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1262
|
+
this.emit({
|
|
1263
|
+
type: 'swarm.task.failed',
|
|
1264
|
+
taskId,
|
|
1265
|
+
error: `Dispatch cap reached (${maxDispatches} attempts)`,
|
|
1266
|
+
attempt: task.attempts,
|
|
1267
|
+
maxAttempts: maxDispatches,
|
|
1268
|
+
willRetry: false,
|
|
1269
|
+
failureMode: task.failureMode,
|
|
1270
|
+
});
|
|
1271
|
+
this.logDecision('dispatch-cap', `${taskId}: hard cap reached (${task.attempts}/${maxDispatches})`, 'No more retries — resilience recovery also failed');
|
|
1272
|
+
return;
|
|
1273
|
+
}
|
|
755
1274
|
const durationMs = Date.now() - startedAt;
|
|
756
1275
|
const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
|
|
757
1276
|
// Track model usage
|
|
@@ -763,21 +1282,94 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
763
1282
|
this.modelUsage.set(model, usage);
|
|
764
1283
|
this.totalTokens += taskResult.tokensUsed;
|
|
765
1284
|
this.totalCost += taskResult.costUsed;
|
|
1285
|
+
// V10: Emit per-attempt event for full decision traceability
|
|
1286
|
+
this.emit({
|
|
1287
|
+
type: 'swarm.task.attempt',
|
|
1288
|
+
taskId,
|
|
1289
|
+
attempt: task.attempts,
|
|
1290
|
+
model,
|
|
1291
|
+
success: spawnResult.success,
|
|
1292
|
+
durationMs,
|
|
1293
|
+
toolCalls: spawnResult.metrics.toolCalls ?? 0,
|
|
1294
|
+
failureMode: !spawnResult.success ? task.failureMode : undefined,
|
|
1295
|
+
qualityScore: taskResult.qualityScore,
|
|
1296
|
+
output: taskResult.output.slice(0, 500),
|
|
1297
|
+
});
|
|
766
1298
|
if (!spawnResult.success) {
|
|
767
1299
|
// V2: Record model health
|
|
768
1300
|
const errorMsg = spawnResult.output.toLowerCase();
|
|
769
1301
|
const is429 = errorMsg.includes('429') || errorMsg.includes('rate');
|
|
770
1302
|
const is402 = errorMsg.includes('402') || errorMsg.includes('spend limit');
|
|
771
|
-
const
|
|
1303
|
+
const isTimeout = spawnResult.metrics.toolCalls === -1;
|
|
1304
|
+
// F25: Use 'timeout' errorType for timeouts (was 'error')
|
|
1305
|
+
const errorType = is429 ? '429' : is402 ? '402' : isTimeout ? 'timeout' : 'error';
|
|
772
1306
|
this.healthTracker.recordFailure(model, errorType);
|
|
773
1307
|
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1308
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1309
|
+
task.failureMode = (is429 || is402) ? 'rate-limit' : (spawnResult.metrics.toolCalls === -1 ? 'timeout' : 'error');
|
|
774
1310
|
// Feed circuit breaker
|
|
775
1311
|
if (is429 || is402) {
|
|
776
1312
|
this.recordRateLimit();
|
|
777
1313
|
}
|
|
1314
|
+
// F25a: Consecutive timeout tracking — early-fail after N consecutive timeouts
|
|
1315
|
+
if (isTimeout) {
|
|
1316
|
+
const count = (this.taskTimeoutCounts.get(taskId) ?? 0) + 1;
|
|
1317
|
+
this.taskTimeoutCounts.set(taskId, count);
|
|
1318
|
+
const timeoutLimit = this.config.consecutiveTimeoutLimit ?? 3;
|
|
1319
|
+
this.logDecision('timeout-tracking', `${taskId}: consecutive timeout ${count}/${timeoutLimit}`, '');
|
|
1320
|
+
if (count >= timeoutLimit) {
|
|
1321
|
+
// F25b: Try model failover before giving up
|
|
1322
|
+
let failoverSucceeded = false;
|
|
1323
|
+
if (this.config.enableModelFailover) {
|
|
1324
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
1325
|
+
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
1326
|
+
if (alternative) {
|
|
1327
|
+
this.emit({
|
|
1328
|
+
type: 'swarm.model.failover',
|
|
1329
|
+
taskId,
|
|
1330
|
+
fromModel: model,
|
|
1331
|
+
toModel: alternative.model,
|
|
1332
|
+
reason: 'consecutive-timeouts',
|
|
1333
|
+
});
|
|
1334
|
+
task.assignedModel = alternative.model;
|
|
1335
|
+
this.taskTimeoutCounts.set(taskId, 0); // Reset counter for new model
|
|
1336
|
+
this.logDecision('failover', `Timeout failover ${taskId}: ${model} → ${alternative.model}`, `${count} consecutive timeouts`);
|
|
1337
|
+
failoverSucceeded = true;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
if (!failoverSucceeded) {
|
|
1341
|
+
// No alternative model — try resilience recovery before hard fail.
|
|
1342
|
+
// Timeouts often produce artifacts (worker WAS working, just ran out of time).
|
|
1343
|
+
task.failureMode = 'timeout';
|
|
1344
|
+
const taskResult = this.workerPool.toTaskResult(spawnResult, task, Date.now() - startedAt);
|
|
1345
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1346
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1347
|
+
return;
|
|
1348
|
+
}
|
|
1349
|
+
this.taskQueue.markFailedWithoutCascade(taskId, 0);
|
|
1350
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1351
|
+
this.emit({
|
|
1352
|
+
type: 'swarm.task.failed',
|
|
1353
|
+
taskId,
|
|
1354
|
+
error: `${count} consecutive timeouts — no alternative model available`,
|
|
1355
|
+
attempt: task.attempts,
|
|
1356
|
+
maxAttempts: maxDispatches,
|
|
1357
|
+
willRetry: false,
|
|
1358
|
+
failureMode: 'timeout',
|
|
1359
|
+
});
|
|
1360
|
+
this.logDecision('timeout-early-fail', `${taskId}: ${count} consecutive timeouts, no alt model — resilience recovery also failed`, '');
|
|
1361
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1362
|
+
return;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
else {
|
|
1367
|
+
// Non-timeout failure — reset the counter
|
|
1368
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1369
|
+
}
|
|
778
1370
|
// V2: Model failover on rate limits
|
|
779
1371
|
if ((is429 || is402) && this.config.enableModelFailover) {
|
|
780
|
-
const capability =
|
|
1372
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
781
1373
|
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
782
1374
|
if (alternative) {
|
|
783
1375
|
this.emit({
|
|
@@ -791,11 +1383,30 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
791
1383
|
this.logDecision('failover', `Switched ${taskId} from ${model} to ${alternative.model}`, `${errorType} error`);
|
|
792
1384
|
}
|
|
793
1385
|
}
|
|
794
|
-
//
|
|
1386
|
+
// V5/V7: Store error context so retry gets different prompt
|
|
1387
|
+
if (!(is429 || is402)) {
|
|
1388
|
+
// V7: Timeout-specific feedback — the worker WAS working, just ran out of time
|
|
1389
|
+
const timeoutSeconds = isTimeout ? Math.round(durationMs / 1000) : 0;
|
|
1390
|
+
task.retryContext = {
|
|
1391
|
+
previousFeedback: isTimeout
|
|
1392
|
+
? `Previous attempt timed out after ${timeoutSeconds}s. You must complete this task more efficiently — work faster, use fewer tool calls, and produce your result sooner.`
|
|
1393
|
+
: spawnResult.output.slice(0, 2000),
|
|
1394
|
+
previousScore: 0,
|
|
1395
|
+
attempt: task.attempts,
|
|
1396
|
+
previousModel: model,
|
|
1397
|
+
previousFiles: taskResult.filesModified,
|
|
1398
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1399
|
+
};
|
|
1400
|
+
}
|
|
1401
|
+
// V7: Reset hollow streak on non-hollow failure (error is not a hollow completion)
|
|
1402
|
+
this.hollowStreak = 0;
|
|
1403
|
+
// Worker failed — use higher retry limit for rate limit errors.
|
|
1404
|
+
// V7: Fixup tasks get capped retries, foundation tasks get +1.
|
|
1405
|
+
const baseRetries = this.getEffectiveRetries(task);
|
|
795
1406
|
const retryLimit = (is429 || is402)
|
|
796
|
-
? (this.config.rateLimitRetries ?? 3)
|
|
797
|
-
:
|
|
798
|
-
const canRetry = this.taskQueue.
|
|
1407
|
+
? Math.min(this.config.rateLimitRetries ?? 3, baseRetries + 1)
|
|
1408
|
+
: baseRetries;
|
|
1409
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, retryLimit);
|
|
799
1410
|
if (canRetry) {
|
|
800
1411
|
this.retries++;
|
|
801
1412
|
// Non-blocking cooldown: set retryAfter timestamp instead of blocking
|
|
@@ -803,8 +1414,21 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
803
1414
|
const baseDelay = this.config.retryBaseDelayMs ?? 5000;
|
|
804
1415
|
const cooldownMs = Math.min(baseDelay * Math.pow(2, task.attempts - 1), 30000);
|
|
805
1416
|
this.taskQueue.setRetryAfter(taskId, cooldownMs);
|
|
1417
|
+
this.logDecision('rate-limit-cooldown', `${taskId}: ${errorType} cooldown ${cooldownMs}ms, model ${model}`, '');
|
|
806
1418
|
}
|
|
807
1419
|
}
|
|
1420
|
+
else if (!(is429 || is402)) {
|
|
1421
|
+
// Resilience recovery for non-rate-limit errors (micro-decompose + degraded acceptance)
|
|
1422
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1423
|
+
return;
|
|
1424
|
+
}
|
|
1425
|
+
// Recovery failed — NOW trigger cascade
|
|
1426
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1427
|
+
}
|
|
1428
|
+
else {
|
|
1429
|
+
// Rate-limit exhaustion — trigger cascade
|
|
1430
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1431
|
+
}
|
|
808
1432
|
this.emit({
|
|
809
1433
|
type: 'swarm.task.failed',
|
|
810
1434
|
taskId,
|
|
@@ -812,17 +1436,153 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
812
1436
|
attempt: task.attempts,
|
|
813
1437
|
maxAttempts: 1 + this.config.workerRetries,
|
|
814
1438
|
willRetry: canRetry,
|
|
1439
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
1440
|
+
failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
|
|
1441
|
+
failureMode: task.failureMode,
|
|
1442
|
+
});
|
|
1443
|
+
return;
|
|
1444
|
+
}
|
|
1445
|
+
// V6: Hollow completion detection — workers that "succeed" without doing any work
|
|
1446
|
+
// Must check BEFORE recording success, otherwise hollow completions inflate health scores
|
|
1447
|
+
if (isHollowCompletion(spawnResult, task.type, this.config)) {
|
|
1448
|
+
// F4: Hollow result + pendingCascadeSkip — honor the skip immediately, no retry
|
|
1449
|
+
if (task.pendingCascadeSkip) {
|
|
1450
|
+
task.pendingCascadeSkip = undefined;
|
|
1451
|
+
task.status = 'skipped';
|
|
1452
|
+
this.totalHollows++;
|
|
1453
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (hollow completion)`, '');
|
|
1454
|
+
this.emit({ type: 'swarm.task.skipped', taskId, reason: 'cascade skip honored — hollow completion' });
|
|
1455
|
+
return;
|
|
1456
|
+
}
|
|
1457
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1458
|
+
task.failureMode = 'hollow';
|
|
1459
|
+
// Record hollow completion so hollow-prone models accumulate hollow-specific records
|
|
1460
|
+
// and get deprioritized by the model selector (also records generic failure internally)
|
|
1461
|
+
this.healthTracker.recordHollow(model);
|
|
1462
|
+
const admitsFailure = spawnResult.success && FAILURE_INDICATORS.some(f => (spawnResult.output ?? '').toLowerCase().includes(f));
|
|
1463
|
+
task.retryContext = {
|
|
1464
|
+
previousFeedback: admitsFailure
|
|
1465
|
+
? 'Previous attempt reported success but admitted failure (e.g., "budget exhausted", "unable to complete"). You MUST execute tool calls and produce concrete output this time.'
|
|
1466
|
+
: 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
|
|
1467
|
+
previousScore: 1,
|
|
1468
|
+
attempt: task.attempts,
|
|
1469
|
+
previousModel: model,
|
|
1470
|
+
previousFiles: taskResult.filesModified,
|
|
1471
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1472
|
+
};
|
|
1473
|
+
// Model failover for hollow completions — same pattern as quality failover
|
|
1474
|
+
if (this.config.enableModelFailover) {
|
|
1475
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
1476
|
+
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
1477
|
+
if (alternative) {
|
|
1478
|
+
this.emit({
|
|
1479
|
+
type: 'swarm.model.failover',
|
|
1480
|
+
taskId,
|
|
1481
|
+
fromModel: model,
|
|
1482
|
+
toModel: alternative.model,
|
|
1483
|
+
reason: 'hollow-completion',
|
|
1484
|
+
});
|
|
1485
|
+
task.assignedModel = alternative.model;
|
|
1486
|
+
this.logDecision('failover', `Hollow failover ${taskId}: ${model} → ${alternative.model}`, 'Model produced hollow completion');
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
const hollowRetries = this.getEffectiveRetries(task);
|
|
1490
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, hollowRetries);
|
|
1491
|
+
if (canRetry) {
|
|
1492
|
+
this.retries++;
|
|
1493
|
+
}
|
|
1494
|
+
else {
|
|
1495
|
+
// Retries exhausted — try shared resilience recovery (micro-decompose, degraded acceptance)
|
|
1496
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1497
|
+
return;
|
|
1498
|
+
}
|
|
1499
|
+
// Recovery failed — NOW trigger cascade
|
|
1500
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1501
|
+
}
|
|
1502
|
+
this.emit({
|
|
1503
|
+
type: 'swarm.task.failed',
|
|
1504
|
+
taskId,
|
|
1505
|
+
error: 'Hollow completion: worker used no tools',
|
|
1506
|
+
attempt: task.attempts,
|
|
1507
|
+
maxAttempts: 1 + this.config.workerRetries,
|
|
1508
|
+
willRetry: canRetry,
|
|
1509
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
1510
|
+
failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
|
|
1511
|
+
failureMode: 'hollow',
|
|
815
1512
|
});
|
|
1513
|
+
this.hollowStreak++;
|
|
1514
|
+
this.totalHollows++;
|
|
1515
|
+
this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls (streak: ${this.hollowStreak}, total hollows: ${this.totalHollows}/${this.totalDispatches})`, canRetry ? 'Marking as failed for retry' : 'Retries exhausted — hard fail');
|
|
1516
|
+
// B2: Hollow streak handling — only terminate if enableHollowTermination is explicitly on
|
|
1517
|
+
if (this.hollowStreak >= SwarmOrchestrator.HOLLOW_STREAK_THRESHOLD) {
|
|
1518
|
+
const uniqueModels = new Set(this.config.workers.map(w => w.model));
|
|
1519
|
+
const singleModel = uniqueModels.size === 1;
|
|
1520
|
+
const onlyModel = [...uniqueModels][0];
|
|
1521
|
+
const modelUnhealthy = singleModel && !this.healthTracker.getAllRecords().find(r => r.model === onlyModel)?.healthy;
|
|
1522
|
+
if (singleModel && modelUnhealthy) {
|
|
1523
|
+
if (this.config.enableHollowTermination) {
|
|
1524
|
+
this.logDecision('early-termination', `Terminating swarm: ${this.hollowStreak} consecutive hollow completions on sole model ${onlyModel}`, 'Single-model swarm with unhealthy model — enableHollowTermination is on');
|
|
1525
|
+
this.skipRemainingTasks(`Single-model hollow streak (${this.hollowStreak}x on ${onlyModel})`);
|
|
1526
|
+
}
|
|
1527
|
+
else {
|
|
1528
|
+
this.logDecision('stall-mode', `${this.hollowStreak} consecutive hollows on sole model ${onlyModel} — entering stall mode`, 'Will attempt model failover or simplified retry on next dispatch');
|
|
1529
|
+
// Reset streak to allow more attempts with adjusted strategy
|
|
1530
|
+
this.hollowStreak = 0;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
// V7: Multi-model hollow ratio — warn but don't terminate unless opt-in
|
|
1535
|
+
const minDispatches = this.config.hollowTerminationMinDispatches ?? 8;
|
|
1536
|
+
const threshold = this.config.hollowTerminationRatio ?? 0.55;
|
|
1537
|
+
if (this.totalDispatches >= minDispatches) {
|
|
1538
|
+
const ratio = this.totalHollows / this.totalDispatches;
|
|
1539
|
+
if (ratio > threshold) {
|
|
1540
|
+
if (this.config.enableHollowTermination) {
|
|
1541
|
+
this.logDecision('early-termination', `Terminating swarm: hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, `Exceeds threshold ${(threshold * 100).toFixed(0)}% after ${minDispatches}+ dispatches — enableHollowTermination is on`);
|
|
1542
|
+
this.skipRemainingTasks(`Hollow ratio ${(ratio * 100).toFixed(0)}% — models cannot execute tasks`);
|
|
1543
|
+
}
|
|
1544
|
+
else if (!this.hollowRatioWarned) {
|
|
1545
|
+
this.hollowRatioWarned = true;
|
|
1546
|
+
this.logDecision('stall-warning', `Hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, 'High hollow rate but continuing — tasks may still recover via resilience');
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
816
1550
|
return;
|
|
817
1551
|
}
|
|
818
|
-
//
|
|
1552
|
+
// F4: Task had pendingCascadeSkip but produced non-hollow results.
|
|
1553
|
+
// Run pre-flight checks — if the output is good, accept it instead of skipping.
|
|
1554
|
+
if (task.pendingCascadeSkip) {
|
|
1555
|
+
const cachedReport = checkArtifacts(task);
|
|
1556
|
+
const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedReport);
|
|
1557
|
+
if (preFlight && !preFlight.passed) {
|
|
1558
|
+
// Output is garbage — honor the cascade skip
|
|
1559
|
+
task.pendingCascadeSkip = undefined;
|
|
1560
|
+
task.status = 'skipped';
|
|
1561
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (pre-flight failed: ${preFlight.feedback})`, '');
|
|
1562
|
+
this.emit({ type: 'swarm.task.skipped', taskId, reason: `cascade skip honored — output failed pre-flight: ${preFlight.feedback}` });
|
|
1563
|
+
return;
|
|
1564
|
+
}
|
|
1565
|
+
// Output is good — clear the flag and accept the result
|
|
1566
|
+
task.pendingCascadeSkip = undefined;
|
|
1567
|
+
task.status = 'dispatched'; // Reset so markCompleted works
|
|
1568
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip overridden — worker produced valid output`, '');
|
|
1569
|
+
}
|
|
1570
|
+
// Record model health on success (only for non-hollow completions)
|
|
819
1571
|
this.healthTracker.recordSuccess(model, durationMs);
|
|
820
|
-
//
|
|
1572
|
+
this.decreaseStagger(); // P7: Speed up on success
|
|
1573
|
+
// Run quality gate if enabled — skip under API pressure, skip if circuit breaker tripped,
|
|
1574
|
+
// and let the final attempt through without quality gate (so tasks produce *something*)
|
|
1575
|
+
// Foundation tasks get +1 retry to reduce cascade failure risk.
|
|
1576
|
+
const effectiveRetries = this.getEffectiveRetries(task);
|
|
821
1577
|
const recentRLCount = this.recentRateLimits.filter(t => t > Date.now() - 30_000).length;
|
|
1578
|
+
const isLastAttempt = task.attempts >= (effectiveRetries + 1);
|
|
822
1579
|
const shouldRunQualityGate = this.config.qualityGates
|
|
823
|
-
&&
|
|
1580
|
+
&& !this.qualityGateDisabledModels.has(model)
|
|
1581
|
+
&& !isLastAttempt
|
|
824
1582
|
&& Date.now() >= this.circuitBreakerUntil
|
|
825
1583
|
&& recentRLCount < 2;
|
|
1584
|
+
// C1: Pre-compute artifact report once — shared by quality gate and pre-flight checks
|
|
1585
|
+
const cachedArtifactReport = checkArtifacts(task);
|
|
826
1586
|
if (shouldRunQualityGate) {
|
|
827
1587
|
// V3: Judge role handles quality gates
|
|
828
1588
|
const judgeModel = this.config.hierarchy?.judge?.model
|
|
@@ -832,27 +1592,272 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
832
1592
|
persona: this.config.hierarchy?.judge?.persona,
|
|
833
1593
|
};
|
|
834
1594
|
this.emit({ type: 'swarm.role.action', role: 'judge', action: 'quality-gate', model: judgeModel, taskId });
|
|
835
|
-
|
|
1595
|
+
// Extract file artifacts from worker output for quality gate visibility.
|
|
1596
|
+
// When workers create files via write_file/edit_file, the judge needs to see
|
|
1597
|
+
// the actual content — not just the worker's text claims about what was created.
|
|
1598
|
+
const fileArtifacts = this.extractFileArtifacts(task, taskResult);
|
|
1599
|
+
// Foundation tasks get a relaxed quality threshold (threshold - 1, min 2)
|
|
1600
|
+
// to reduce the chance of cascade-skipping the entire swarm.
|
|
1601
|
+
const baseThreshold = this.config.qualityThreshold ?? 3;
|
|
1602
|
+
const qualityThreshold = task.isFoundation ? Math.max(2, baseThreshold - 1) : baseThreshold;
|
|
1603
|
+
const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, qualityThreshold, (resp, purpose) => this.trackOrchestratorUsage(resp, purpose), fileArtifacts, this.config, cachedArtifactReport);
|
|
836
1604
|
taskResult.qualityScore = quality.score;
|
|
837
1605
|
taskResult.qualityFeedback = quality.feedback;
|
|
1606
|
+
// F11: Foundation tasks that barely pass the relaxed threshold get concrete validation.
|
|
1607
|
+
// A 2/5 foundation task with truncated output will cascade-poison all dependents.
|
|
1608
|
+
if (quality.passed && task.isFoundation && quality.score <= baseThreshold - 1) {
|
|
1609
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1610
|
+
if (!concreteResult.passed) {
|
|
1611
|
+
quality.passed = false;
|
|
1612
|
+
quality.feedback += ` [F11: foundation task barely passed (${quality.score}/${baseThreshold}) but concrete validation failed: ${concreteResult.issues.join('; ')}]`;
|
|
1613
|
+
this.logDecision('foundation-concrete-gate', `${taskId}: foundation task scored ${quality.score} (relaxed threshold ${qualityThreshold}) but concrete checks failed — rejecting`, concreteResult.issues.join('; '));
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
838
1616
|
if (!quality.passed) {
|
|
1617
|
+
// F7: Gate error fallback — when LLM judge fails, use concrete validation
|
|
1618
|
+
// If concrete checks pass, tentatively accept the result instead of rejecting.
|
|
1619
|
+
if (quality.gateError && (this.config.enableConcreteValidation !== false)) {
|
|
1620
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1621
|
+
if (concreteResult.passed) {
|
|
1622
|
+
// Concrete validation passed — tentatively accept despite gate error
|
|
1623
|
+
this.logDecision('gate-error-fallback', `${taskId}: gate error but concrete checks passed — tentatively accepting`, quality.gateErrorMessage ?? 'unknown');
|
|
1624
|
+
taskResult.qualityScore = quality.score;
|
|
1625
|
+
taskResult.qualityFeedback = `${quality.feedback} [concrete validation passed — tentative accept]`;
|
|
1626
|
+
// Fall through to success path (don't return)
|
|
1627
|
+
}
|
|
1628
|
+
else {
|
|
1629
|
+
// Both gate and concrete failed — reject
|
|
1630
|
+
this.logDecision('gate-error-fallback', `${taskId}: gate error AND concrete checks failed — rejecting`, `Concrete issues: ${concreteResult.issues.join('; ')}`);
|
|
1631
|
+
// Fall through to normal rejection below
|
|
1632
|
+
}
|
|
1633
|
+
// If concrete passed, skip the rejection path
|
|
1634
|
+
if (concreteResult.passed) {
|
|
1635
|
+
this.perModelQualityRejections.delete(model);
|
|
1636
|
+
// Jump to success path below
|
|
1637
|
+
}
|
|
1638
|
+
else {
|
|
1639
|
+
// Proceed with normal rejection
|
|
1640
|
+
this.qualityRejections++;
|
|
1641
|
+
task.failureMode = 'quality';
|
|
1642
|
+
this.healthTracker.recordQualityRejection(model, quality.score);
|
|
1643
|
+
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1644
|
+
this.hollowStreak = 0;
|
|
1645
|
+
task.retryContext = {
|
|
1646
|
+
previousFeedback: `Gate error + concrete validation failed: ${concreteResult.issues.join('; ')}`,
|
|
1647
|
+
previousScore: quality.score,
|
|
1648
|
+
attempt: task.attempts,
|
|
1649
|
+
previousModel: model,
|
|
1650
|
+
previousFiles: taskResult.filesModified,
|
|
1651
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1652
|
+
};
|
|
1653
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1654
|
+
if (canRetry) {
|
|
1655
|
+
this.retries++;
|
|
1656
|
+
}
|
|
1657
|
+
else {
|
|
1658
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1659
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1660
|
+
return;
|
|
1661
|
+
}
|
|
1662
|
+
// Recovery failed — NOW trigger cascade
|
|
1663
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1664
|
+
}
|
|
1665
|
+
this.emit({
|
|
1666
|
+
type: 'swarm.quality.rejected',
|
|
1667
|
+
taskId,
|
|
1668
|
+
score: quality.score,
|
|
1669
|
+
feedback: quality.feedback,
|
|
1670
|
+
artifactCount: fileArtifacts.length,
|
|
1671
|
+
outputLength: taskResult.output.length,
|
|
1672
|
+
preFlightReject: false,
|
|
1673
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1674
|
+
});
|
|
1675
|
+
return;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
else if (!quality.gateError) {
|
|
1679
|
+
// Normal quality rejection (LLM judge rejected, no gate error)
|
|
1680
|
+
this.qualityRejections++;
|
|
1681
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1682
|
+
task.failureMode = 'quality';
|
|
1683
|
+
// P1: Quality rejections update model health — undo premature recordSuccess
|
|
1684
|
+
this.healthTracker.recordQualityRejection(model, quality.score);
|
|
1685
|
+
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1686
|
+
// V7: Quality rejection is NOT hollow — worker did work, just poorly
|
|
1687
|
+
this.hollowStreak = 0;
|
|
1688
|
+
// F7: Per-model circuit breaker → "pre-flight only mode" instead of fully disabling gates.
|
|
1689
|
+
// After threshold rejections, skip LLM judge but keep pre-flight mandatory.
|
|
1690
|
+
if (!quality.preFlightReject) {
|
|
1691
|
+
const modelRejections = (this.perModelQualityRejections.get(model) ?? 0) + 1;
|
|
1692
|
+
this.perModelQualityRejections.set(model, modelRejections);
|
|
1693
|
+
if (modelRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
|
|
1694
|
+
this.qualityGateDisabledModels.add(model);
|
|
1695
|
+
this.logDecision('quality-circuit-breaker', `Switched model ${model} to pre-flight-only mode after ${modelRejections} rejections`, 'Skipping LLM judge but keeping pre-flight checks mandatory');
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
// V5: Attach feedback so retry prompt includes it
|
|
1699
|
+
task.retryContext = {
|
|
1700
|
+
previousFeedback: quality.feedback,
|
|
1701
|
+
previousScore: quality.score,
|
|
1702
|
+
attempt: task.attempts,
|
|
1703
|
+
previousModel: model,
|
|
1704
|
+
previousFiles: taskResult.filesModified,
|
|
1705
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1706
|
+
};
|
|
1707
|
+
// V5: Model failover on quality rejection — but NOT on artifact auto-fails
|
|
1708
|
+
// P1: Widened from score<=1 to score<threshold so failover triggers on any rejection
|
|
1709
|
+
if (quality.score < qualityThreshold && this.config.enableModelFailover && !quality.artifactAutoFail) {
|
|
1710
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
1711
|
+
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
1712
|
+
if (alternative) {
|
|
1713
|
+
this.emit({
|
|
1714
|
+
type: 'swarm.model.failover',
|
|
1715
|
+
taskId,
|
|
1716
|
+
fromModel: model,
|
|
1717
|
+
toModel: alternative.model,
|
|
1718
|
+
reason: `quality-score-${quality.score}`,
|
|
1719
|
+
});
|
|
1720
|
+
task.assignedModel = alternative.model;
|
|
1721
|
+
this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1725
|
+
if (canRetry) {
|
|
1726
|
+
this.retries++;
|
|
1727
|
+
}
|
|
1728
|
+
else {
|
|
1729
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1730
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1731
|
+
return;
|
|
1732
|
+
}
|
|
1733
|
+
// Recovery failed — NOW trigger cascade
|
|
1734
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1735
|
+
}
|
|
1736
|
+
// M1: Only emit quality.rejected (not duplicate task.failed)
|
|
1737
|
+
this.emit({
|
|
1738
|
+
type: 'swarm.quality.rejected',
|
|
1739
|
+
taskId,
|
|
1740
|
+
score: quality.score,
|
|
1741
|
+
feedback: quality.feedback,
|
|
1742
|
+
artifactCount: fileArtifacts.length,
|
|
1743
|
+
outputLength: taskResult.output.length,
|
|
1744
|
+
preFlightReject: quality.preFlightReject,
|
|
1745
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1746
|
+
});
|
|
1747
|
+
return;
|
|
1748
|
+
}
|
|
1749
|
+
else {
|
|
1750
|
+
// gateError=true but concrete validation disabled — reject
|
|
1751
|
+
this.qualityRejections++;
|
|
1752
|
+
task.failureMode = 'quality';
|
|
1753
|
+
this.hollowStreak = 0;
|
|
1754
|
+
task.retryContext = {
|
|
1755
|
+
previousFeedback: quality.feedback,
|
|
1756
|
+
previousScore: quality.score,
|
|
1757
|
+
attempt: task.attempts,
|
|
1758
|
+
previousModel: model,
|
|
1759
|
+
previousFiles: taskResult.filesModified,
|
|
1760
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1761
|
+
};
|
|
1762
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1763
|
+
if (canRetry) {
|
|
1764
|
+
this.retries++;
|
|
1765
|
+
}
|
|
1766
|
+
else {
|
|
1767
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1768
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1769
|
+
return;
|
|
1770
|
+
}
|
|
1771
|
+
// Recovery failed — NOW trigger cascade
|
|
1772
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1773
|
+
}
|
|
1774
|
+
this.emit({
|
|
1775
|
+
type: 'swarm.quality.rejected',
|
|
1776
|
+
taskId,
|
|
1777
|
+
score: quality.score,
|
|
1778
|
+
feedback: quality.feedback,
|
|
1779
|
+
artifactCount: fileArtifacts.length,
|
|
1780
|
+
outputLength: taskResult.output.length,
|
|
1781
|
+
preFlightReject: false,
|
|
1782
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1783
|
+
});
|
|
1784
|
+
return;
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
// Quality passed — reset per-model rejection counter
|
|
1788
|
+
this.perModelQualityRejections.delete(model);
|
|
1789
|
+
}
|
|
1790
|
+
// F7: When quality gate was skipped (last attempt, pre-flight-only mode, API pressure),
|
|
1791
|
+
// still run pre-flight + concrete checks so obviously broken outputs don't slip through.
|
|
1792
|
+
// C1: Use cached artifact report to avoid double filesystem scan.
|
|
1793
|
+
if (!shouldRunQualityGate && this.config.qualityGates) {
|
|
1794
|
+
const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedArtifactReport);
|
|
1795
|
+
if (preFlight && !preFlight.passed) {
|
|
1796
|
+
taskResult.qualityScore = preFlight.score;
|
|
1797
|
+
taskResult.qualityFeedback = preFlight.feedback;
|
|
839
1798
|
this.qualityRejections++;
|
|
840
|
-
const canRetry = this.taskQueue.
|
|
1799
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
841
1800
|
if (canRetry) {
|
|
842
1801
|
this.retries++;
|
|
843
1802
|
}
|
|
844
|
-
|
|
1803
|
+
else {
|
|
1804
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1805
|
+
this.logDecision('preflight-reject', `${taskId}: pre-flight failed: ${preFlight.feedback}`, '');
|
|
1806
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1807
|
+
return;
|
|
1808
|
+
}
|
|
1809
|
+
// Recovery failed — NOW trigger cascade
|
|
1810
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1811
|
+
}
|
|
845
1812
|
this.emit({
|
|
846
1813
|
type: 'swarm.quality.rejected',
|
|
847
1814
|
taskId,
|
|
848
|
-
score:
|
|
849
|
-
feedback:
|
|
1815
|
+
score: preFlight.score,
|
|
1816
|
+
feedback: preFlight.feedback,
|
|
1817
|
+
artifactCount: 0,
|
|
1818
|
+
outputLength: taskResult.output.length,
|
|
1819
|
+
preFlightReject: true,
|
|
850
1820
|
});
|
|
851
1821
|
return;
|
|
852
1822
|
}
|
|
1823
|
+
// F2: Run concrete validation when pre-flight passes but gate was skipped
|
|
1824
|
+
if (this.config.enableConcreteValidation !== false) {
|
|
1825
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1826
|
+
if (!concreteResult.passed) {
|
|
1827
|
+
taskResult.qualityScore = 2;
|
|
1828
|
+
taskResult.qualityFeedback = `Concrete validation failed: ${concreteResult.issues.join('; ')}`;
|
|
1829
|
+
this.qualityRejections++;
|
|
1830
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1831
|
+
if (canRetry) {
|
|
1832
|
+
this.retries++;
|
|
1833
|
+
}
|
|
1834
|
+
else {
|
|
1835
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1836
|
+
this.logDecision('concrete-reject', `${taskId}: concrete validation failed: ${concreteResult.issues.join('; ')}`, '');
|
|
1837
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1838
|
+
return;
|
|
1839
|
+
}
|
|
1840
|
+
// Recovery failed — NOW trigger cascade
|
|
1841
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1842
|
+
}
|
|
1843
|
+
this.emit({
|
|
1844
|
+
type: 'swarm.quality.rejected',
|
|
1845
|
+
taskId,
|
|
1846
|
+
score: 2,
|
|
1847
|
+
feedback: taskResult.qualityFeedback,
|
|
1848
|
+
artifactCount: 0,
|
|
1849
|
+
outputLength: taskResult.output.length,
|
|
1850
|
+
preFlightReject: false,
|
|
1851
|
+
});
|
|
1852
|
+
return;
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
853
1855
|
}
|
|
854
1856
|
// Task passed — mark completed
|
|
855
1857
|
this.taskQueue.markCompleted(taskId, taskResult);
|
|
1858
|
+
this.hollowStreak = 0;
|
|
1859
|
+
// F25: Clear timeout counter on success
|
|
1860
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
856
1861
|
// H6: Post findings to blackboard with error handling
|
|
857
1862
|
if (this.blackboard && taskResult.findings) {
|
|
858
1863
|
try {
|
|
@@ -885,6 +1890,10 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
885
1890
|
costUsed: taskResult.costUsed,
|
|
886
1891
|
durationMs: taskResult.durationMs,
|
|
887
1892
|
qualityScore: taskResult.qualityScore,
|
|
1893
|
+
qualityFeedback: taskResult.qualityFeedback,
|
|
1894
|
+
output: taskResult.output,
|
|
1895
|
+
closureReport: taskResult.closureReport,
|
|
1896
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
888
1897
|
});
|
|
889
1898
|
}
|
|
890
1899
|
/**
|
|
@@ -894,7 +1903,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
894
1903
|
const tasks = this.taskQueue.getAllTasks();
|
|
895
1904
|
const outputs = tasks
|
|
896
1905
|
.filter(t => t.status === 'completed')
|
|
897
|
-
.map(t => taskResultToAgentOutput(t))
|
|
1906
|
+
.map(t => taskResultToAgentOutput(t, this.config))
|
|
898
1907
|
.filter((o) => o !== null);
|
|
899
1908
|
if (outputs.length === 0)
|
|
900
1909
|
return null;
|
|
@@ -924,11 +1933,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
924
1933
|
activeWorkers: this.workerPool.getActiveWorkerStatus(),
|
|
925
1934
|
queue: stats,
|
|
926
1935
|
budget: {
|
|
927
|
-
tokensUsed: this.totalTokens,
|
|
1936
|
+
tokensUsed: this.totalTokens + this.orchestratorTokens,
|
|
928
1937
|
tokensTotal: this.config.totalBudget,
|
|
929
|
-
costUsed: this.totalCost,
|
|
1938
|
+
costUsed: this.totalCost + this.orchestratorCost,
|
|
930
1939
|
costTotal: this.config.maxCost,
|
|
931
1940
|
},
|
|
1941
|
+
orchestrator: {
|
|
1942
|
+
tokens: this.orchestratorTokens,
|
|
1943
|
+
cost: this.orchestratorCost,
|
|
1944
|
+
calls: this.orchestratorCalls,
|
|
1945
|
+
model: this.config.orchestratorModel,
|
|
1946
|
+
},
|
|
932
1947
|
};
|
|
933
1948
|
}
|
|
934
1949
|
/**
|
|
@@ -940,6 +1955,69 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
940
1955
|
this.currentPhase = 'failed';
|
|
941
1956
|
await this.workerPool.cancelAll();
|
|
942
1957
|
}
|
|
1958
|
+
// ─── D3: Model Capability Probing ─────────────────────────────────────
|
|
1959
|
+
/**
|
|
1960
|
+
* D3/F23: Probe each unique model to verify it can make tool calls.
|
|
1961
|
+
* Models that fail the probe are marked unhealthy so they're skipped in dispatch.
|
|
1962
|
+
*
|
|
1963
|
+
* F23 fix: Uses chatWithTools() with actual tool definitions instead of
|
|
1964
|
+
* plain chat() which never included tools in the API request.
|
|
1965
|
+
*/
|
|
1966
|
+
async probeModelCapability() {
|
|
1967
|
+
const uniqueModels = new Set(this.config.workers.map(w => w.model));
|
|
1968
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Probing ${uniqueModels.size} model(s) for tool-calling capability...` });
|
|
1969
|
+
// F23: Check if provider supports native tool calling
|
|
1970
|
+
const supportsTools = 'chatWithTools' in this.provider
|
|
1971
|
+
&& typeof this.provider.chatWithTools === 'function';
|
|
1972
|
+
if (!supportsTools) {
|
|
1973
|
+
// Provider doesn't support chatWithTools — skip probe entirely.
|
|
1974
|
+
// Workers will rely on text-based tool parsing fallback.
|
|
1975
|
+
this.logDecision('model-probe', 'Provider does not support chatWithTools — skipping probe', '');
|
|
1976
|
+
return;
|
|
1977
|
+
}
|
|
1978
|
+
const providerWithTools = this.provider;
|
|
1979
|
+
const probeTools = [{
|
|
1980
|
+
type: 'function',
|
|
1981
|
+
function: {
|
|
1982
|
+
name: 'read_file',
|
|
1983
|
+
description: 'Read a file from disk',
|
|
1984
|
+
parameters: {
|
|
1985
|
+
type: 'object',
|
|
1986
|
+
properties: { path: { type: 'string', description: 'File path' } },
|
|
1987
|
+
required: ['path'],
|
|
1988
|
+
},
|
|
1989
|
+
},
|
|
1990
|
+
}];
|
|
1991
|
+
// F24: Configurable probe timeout — generous default for slow models/connections
|
|
1992
|
+
const probeTimeout = this.config.probeTimeoutMs ?? 60_000;
|
|
1993
|
+
for (const model of uniqueModels) {
|
|
1994
|
+
try {
|
|
1995
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Probe timeout (${probeTimeout}ms)`)), probeTimeout));
|
|
1996
|
+
const response = await Promise.race([
|
|
1997
|
+
providerWithTools.chatWithTools([
|
|
1998
|
+
{ role: 'system', content: 'You are a test probe. Call the read_file tool with path "package.json".' },
|
|
1999
|
+
{ role: 'user', content: 'Read package.json.' },
|
|
2000
|
+
], { model, maxTokens: 200, temperature: 0, tools: probeTools, tool_choice: 'required' }),
|
|
2001
|
+
timeoutPromise,
|
|
2002
|
+
]);
|
|
2003
|
+
const hasToolCall = (response.toolCalls?.length ?? 0) > 0;
|
|
2004
|
+
if (!hasToolCall) {
|
|
2005
|
+
// F19: Directly mark unhealthy — probe failure is definitive evidence
|
|
2006
|
+
this.healthTracker.markUnhealthy(model);
|
|
2007
|
+
this.logDecision('model-probe', `Model ${model} failed probe (no tool calls)`, 'Marked unhealthy');
|
|
2008
|
+
}
|
|
2009
|
+
else {
|
|
2010
|
+
this.healthTracker.recordSuccess(model, 0);
|
|
2011
|
+
this.logDecision('model-probe', `Model ${model} passed probe`, '');
|
|
2012
|
+
}
|
|
2013
|
+
}
|
|
2014
|
+
catch {
|
|
2015
|
+
// F19: Directly mark unhealthy on probe error (includes timeout)
|
|
2016
|
+
this.healthTracker.markUnhealthy(model);
|
|
2017
|
+
this.logDecision('model-probe', `Model ${model} probe errored`, 'Marked unhealthy');
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
943
2021
|
// ─── Circuit Breaker ────────────────────────────────────────────────
|
|
944
2022
|
/**
|
|
945
2023
|
* Record a rate limit hit and check if the circuit breaker should trip.
|
|
@@ -947,6 +2025,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
947
2025
|
recordRateLimit() {
|
|
948
2026
|
const now = Date.now();
|
|
949
2027
|
this.recentRateLimits.push(now);
|
|
2028
|
+
this.increaseStagger(); // P7: Back off on rate limits
|
|
950
2029
|
// Prune entries older than the window
|
|
951
2030
|
const cutoff = now - SwarmOrchestrator.CIRCUIT_BREAKER_WINDOW_MS;
|
|
952
2031
|
this.recentRateLimits = this.recentRateLimits.filter(t => t > cutoff);
|
|
@@ -974,6 +2053,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
974
2053
|
}
|
|
975
2054
|
return false;
|
|
976
2055
|
}
|
|
2056
|
+
// ─── P7: Adaptive Stagger ────────────────────────────────────────────
|
|
2057
|
+
/** P7: Get current stagger delay (adapts based on rate limit / success signals). */
|
|
2058
|
+
getStaggerMs() {
|
|
2059
|
+
return this.adaptiveStaggerMs;
|
|
2060
|
+
}
|
|
2061
|
+
/** P7: Increase stagger on rate limit (×1.5, capped at 10s). */
|
|
2062
|
+
increaseStagger() {
|
|
2063
|
+
this.adaptiveStaggerMs = Math.min(this.adaptiveStaggerMs * 1.5, 10_000);
|
|
2064
|
+
}
|
|
2065
|
+
/** P7: Decrease stagger on success (×0.9, floor at 200ms). */
|
|
2066
|
+
decreaseStagger() {
|
|
2067
|
+
this.adaptiveStaggerMs = Math.max(this.adaptiveStaggerMs * 0.9, 200);
|
|
2068
|
+
}
|
|
977
2069
|
// ─── V2: Decision Logging ─────────────────────────────────────────────
|
|
978
2070
|
logDecision(phase, decision, reasoning) {
|
|
979
2071
|
const entry = {
|
|
@@ -1000,14 +2092,15 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1000
2092
|
waves: queueState.waves,
|
|
1001
2093
|
currentWave: queueState.currentWave,
|
|
1002
2094
|
stats: {
|
|
1003
|
-
totalTokens: this.totalTokens,
|
|
1004
|
-
totalCost: this.totalCost,
|
|
2095
|
+
totalTokens: this.totalTokens + this.orchestratorTokens,
|
|
2096
|
+
totalCost: this.totalCost + this.orchestratorCost,
|
|
1005
2097
|
qualityRejections: this.qualityRejections,
|
|
1006
2098
|
retries: this.retries,
|
|
1007
2099
|
},
|
|
1008
2100
|
modelHealth: this.healthTracker.getAllRecords(),
|
|
1009
2101
|
decisions: this.orchestratorDecisions,
|
|
1010
2102
|
errors: this.errors,
|
|
2103
|
+
originalPrompt: this.originalPrompt,
|
|
1011
2104
|
});
|
|
1012
2105
|
this.emit({
|
|
1013
2106
|
type: 'swarm.state.checkpoint',
|
|
@@ -1027,9 +2120,9 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1027
2120
|
emitBudgetUpdate() {
|
|
1028
2121
|
this.emit({
|
|
1029
2122
|
type: 'swarm.budget.update',
|
|
1030
|
-
tokensUsed: this.totalTokens,
|
|
2123
|
+
tokensUsed: this.totalTokens + this.orchestratorTokens,
|
|
1031
2124
|
tokensTotal: this.config.totalBudget,
|
|
1032
|
-
costUsed: this.totalCost,
|
|
2125
|
+
costUsed: this.totalCost + this.orchestratorCost,
|
|
1033
2126
|
costTotal: this.config.maxCost,
|
|
1034
2127
|
});
|
|
1035
2128
|
}
|
|
@@ -1044,8 +2137,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1044
2137
|
failedTasks: queueStats.failed,
|
|
1045
2138
|
skippedTasks: queueStats.skipped,
|
|
1046
2139
|
totalWaves: this.taskQueue.getTotalWaves(),
|
|
1047
|
-
totalTokens: this.totalTokens,
|
|
1048
|
-
totalCost: this.totalCost,
|
|
2140
|
+
totalTokens: this.totalTokens + this.orchestratorTokens,
|
|
2141
|
+
totalCost: this.totalCost + this.orchestratorCost,
|
|
1049
2142
|
totalDurationMs: Date.now() - this.startTime,
|
|
1050
2143
|
qualityRejections: this.qualityRejections,
|
|
1051
2144
|
retries: this.retries,
|
|
@@ -1070,6 +2163,16 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1070
2163
|
if (this.verificationResult) {
|
|
1071
2164
|
parts.push(` Verification: ${this.verificationResult.passed ? 'PASSED' : 'FAILED'}`);
|
|
1072
2165
|
}
|
|
2166
|
+
// Artifact inventory: show what files actually exist on disk regardless of task status
|
|
2167
|
+
if (this.artifactInventory && this.artifactInventory.totalFiles > 0) {
|
|
2168
|
+
parts.push(` Files on disk: ${this.artifactInventory.totalFiles} files (${(this.artifactInventory.totalBytes / 1024).toFixed(1)}KB)`);
|
|
2169
|
+
for (const f of this.artifactInventory.files.slice(0, 15)) {
|
|
2170
|
+
parts.push(` ${f.path}: ${f.sizeBytes}B`);
|
|
2171
|
+
}
|
|
2172
|
+
if (this.artifactInventory.files.length > 15) {
|
|
2173
|
+
parts.push(` ... and ${this.artifactInventory.files.length - 15} more`);
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
1073
2176
|
return parts.join('\n');
|
|
1074
2177
|
}
|
|
1075
2178
|
buildErrorResult(message) {
|
|
@@ -1096,6 +2199,698 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1096
2199
|
return null;
|
|
1097
2200
|
}
|
|
1098
2201
|
}
|
|
2202
|
+
/**
|
|
2203
|
+
* Detect foundation tasks: tasks that are a dependency of 2+ downstream tasks.
|
|
2204
|
+
* These are critical single-points-of-failure — mark them for extra resilience.
|
|
2205
|
+
*/
|
|
2206
|
+
detectFoundationTasks() {
|
|
2207
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2208
|
+
const dependentCounts = new Map();
|
|
2209
|
+
for (const task of allTasks) {
|
|
2210
|
+
for (const depId of task.dependencies) {
|
|
2211
|
+
dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
|
|
2212
|
+
}
|
|
2213
|
+
}
|
|
2214
|
+
for (const task of allTasks) {
|
|
2215
|
+
const dependentCount = dependentCounts.get(task.id) ?? 0;
|
|
2216
|
+
if (dependentCount >= 2) {
|
|
2217
|
+
task.isFoundation = true;
|
|
2218
|
+
this.logDecision('scheduling', `Foundation task: ${task.id} (${dependentCount} dependents)`, 'Extra retries and relaxed quality threshold applied');
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
/**
|
|
2223
|
+
* Extract file artifacts from a worker's output for quality gate visibility.
|
|
2224
|
+
* Reads actual file content from disk so the judge can verify real work,
|
|
2225
|
+
* not just text claims about what was created.
|
|
2226
|
+
*/
|
|
2227
|
+
extractFileArtifacts(task, taskResult) {
|
|
2228
|
+
const artifacts = [];
|
|
2229
|
+
const seen = new Set();
|
|
2230
|
+
// Collect file paths from multiple sources
|
|
2231
|
+
const candidatePaths = [];
|
|
2232
|
+
// 1. filesModified from structured closure report
|
|
2233
|
+
if (taskResult.filesModified) {
|
|
2234
|
+
candidatePaths.push(...taskResult.filesModified);
|
|
2235
|
+
}
|
|
2236
|
+
// 2. targetFiles from task definition
|
|
2237
|
+
if (task.targetFiles) {
|
|
2238
|
+
candidatePaths.push(...task.targetFiles);
|
|
2239
|
+
}
|
|
2240
|
+
// 3. Extract file paths mentioned in worker output (e.g., "Created src/foo.ts")
|
|
2241
|
+
const filePathPattern = /(?:created|wrote|modified|edited|updated)\s+["`']?([^\s"`',]+\.\w+)/gi;
|
|
2242
|
+
let match;
|
|
2243
|
+
while ((match = filePathPattern.exec(taskResult.output)) !== null) {
|
|
2244
|
+
candidatePaths.push(match[1]);
|
|
2245
|
+
}
|
|
2246
|
+
// Resolve against the target project directory, not CWD
|
|
2247
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2248
|
+
// Read previews from disk
|
|
2249
|
+
for (const filePath of candidatePaths) {
|
|
2250
|
+
if (seen.has(filePath))
|
|
2251
|
+
continue;
|
|
2252
|
+
seen.add(filePath);
|
|
2253
|
+
try {
|
|
2254
|
+
const resolved = path.resolve(baseDir, filePath);
|
|
2255
|
+
if (fs.existsSync(resolved)) {
|
|
2256
|
+
const content = fs.readFileSync(resolved, 'utf-8');
|
|
2257
|
+
if (content.length > 0) {
|
|
2258
|
+
artifacts.push({ path: filePath, preview: content.slice(0, 2000) });
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
catch {
|
|
2263
|
+
// Skip unreadable files
|
|
2264
|
+
}
|
|
2265
|
+
// Limit to 10 files to keep prompt size reasonable
|
|
2266
|
+
if (artifacts.length >= 10)
|
|
2267
|
+
break;
|
|
2268
|
+
}
|
|
2269
|
+
return artifacts;
|
|
2270
|
+
}
|
|
2271
|
+
/**
|
|
2272
|
+
* Build an inventory of filesystem artifacts produced during swarm execution.
|
|
2273
|
+
* Scans all tasks' targetFiles and readFiles to check what actually exists on disk.
|
|
2274
|
+
* This reveals work done by workers even when tasks "failed" (timeout, quality gate, etc.).
|
|
2275
|
+
*/
|
|
2276
|
+
buildArtifactInventory() {
|
|
2277
|
+
const allFiles = new Set();
|
|
2278
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
2279
|
+
for (const f of (task.targetFiles ?? []))
|
|
2280
|
+
allFiles.add(f);
|
|
2281
|
+
for (const f of (task.readFiles ?? []))
|
|
2282
|
+
allFiles.add(f);
|
|
2283
|
+
}
|
|
2284
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2285
|
+
const artifacts = [];
|
|
2286
|
+
for (const filePath of allFiles) {
|
|
2287
|
+
try {
|
|
2288
|
+
const resolved = path.resolve(baseDir, filePath);
|
|
2289
|
+
if (fs.existsSync(resolved)) {
|
|
2290
|
+
const stats = fs.statSync(resolved);
|
|
2291
|
+
if (stats.isFile() && stats.size > 0) {
|
|
2292
|
+
artifacts.push({ path: filePath, sizeBytes: stats.size, exists: true });
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2296
|
+
catch { /* skip unreadable files */ }
|
|
2297
|
+
}
|
|
2298
|
+
return {
|
|
2299
|
+
files: artifacts,
|
|
2300
|
+
totalFiles: artifacts.length,
|
|
2301
|
+
totalBytes: artifacts.reduce((s, a) => s + a.sizeBytes, 0),
|
|
2302
|
+
};
|
|
2303
|
+
}
|
|
2304
|
+
/**
|
|
2305
|
+
* Skip all remaining pending/ready tasks (used for early termination).
|
|
2306
|
+
*/
|
|
2307
|
+
skipRemainingTasks(reason) {
|
|
2308
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
2309
|
+
if (task.status === 'pending' || task.status === 'ready') {
|
|
2310
|
+
task.status = 'skipped';
|
|
2311
|
+
this.emit({ type: 'swarm.task.skipped', taskId: task.id, reason });
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
}
|
|
2315
|
+
/**
|
|
2316
|
+
* F21: Mid-swarm situational assessment after each wave.
|
|
2317
|
+
* Evaluates success rate and budget health, triages low-priority tasks when budget is tight.
|
|
2318
|
+
* Also detects stalled progress and triggers mid-swarm re-planning.
|
|
2319
|
+
*/
|
|
2320
|
+
async assessAndAdapt(waveIndex) {
|
|
2321
|
+
const stats = this.taskQueue.getStats();
|
|
2322
|
+
const budgetStats = this.budgetPool.getStats();
|
|
2323
|
+
// 1. Calculate success rate for this swarm run
|
|
2324
|
+
const successRate = stats.completed / Math.max(1, stats.completed + stats.failed + stats.skipped);
|
|
2325
|
+
// 2. Budget efficiency: tokens spent per completed task
|
|
2326
|
+
const tokensPerTask = stats.completed > 0
|
|
2327
|
+
? (this.totalTokens / stats.completed)
|
|
2328
|
+
: Infinity;
|
|
2329
|
+
// 3. Remaining budget vs remaining tasks
|
|
2330
|
+
const remainingTasks = stats.total - stats.completed - stats.failed - stats.skipped;
|
|
2331
|
+
const estimatedTokensNeeded = remainingTasks * tokensPerTask;
|
|
2332
|
+
const budgetSufficient = budgetStats.tokensRemaining > estimatedTokensNeeded * 0.5;
|
|
2333
|
+
// Log the assessment for observability
|
|
2334
|
+
this.logDecision('mid-swarm-assessment', `After wave ${waveIndex + 1}: ${stats.completed}/${stats.total} completed (${(successRate * 100).toFixed(0)}%), ` +
|
|
2335
|
+
`${remainingTasks} remaining, ${budgetStats.tokensRemaining} tokens left`, budgetSufficient ? 'Budget looks sufficient' : 'Budget may be insufficient for remaining tasks');
|
|
2336
|
+
// 4. If budget is tight, prioritize: skip low-value remaining tasks
|
|
2337
|
+
// Only triage if we have actual data (at least one completion to estimate from)
|
|
2338
|
+
if (!budgetSufficient && remainingTasks > 1 && stats.completed > 0) {
|
|
2339
|
+
// Prefer pausing over skipping: if workers are still running, wait for budget release
|
|
2340
|
+
const runningCount = stats.running ?? 0;
|
|
2341
|
+
if (runningCount > 0) {
|
|
2342
|
+
this.logDecision('budget-wait', 'Budget tight but workers still running — waiting for budget release', `${runningCount} workers active, ${budgetStats.tokensRemaining} tokens remaining`);
|
|
2343
|
+
return;
|
|
2344
|
+
}
|
|
2345
|
+
const expendableTasks = this.findExpendableTasks();
|
|
2346
|
+
// Hard cap: never skip more than 20% of remaining tasks in one triage pass
|
|
2347
|
+
const maxSkips = Math.max(1, Math.floor(remainingTasks * 0.2));
|
|
2348
|
+
if (expendableTasks.length > 0) {
|
|
2349
|
+
let currentEstimate = estimatedTokensNeeded;
|
|
2350
|
+
let skipped = 0;
|
|
2351
|
+
for (const task of expendableTasks) {
|
|
2352
|
+
if (skipped >= maxSkips)
|
|
2353
|
+
break;
|
|
2354
|
+
// Stop trimming once we're within budget
|
|
2355
|
+
if (currentEstimate * 0.7 <= budgetStats.tokensRemaining)
|
|
2356
|
+
break;
|
|
2357
|
+
task.status = 'skipped';
|
|
2358
|
+
skipped++;
|
|
2359
|
+
this.emit({ type: 'swarm.task.skipped', taskId: task.id,
|
|
2360
|
+
reason: 'Budget conservation: skipping low-priority task to protect critical path' });
|
|
2361
|
+
this.logDecision('budget-triage', `Skipping ${task.id} (${task.type}, complexity ${task.complexity}) to conserve budget`, `${remainingTasks} tasks remain, ${budgetStats.tokensRemaining} tokens`);
|
|
2362
|
+
currentEstimate -= tokensPerTask;
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
// 5. Stall detection: if progress ratio is too low, trigger re-plan
|
|
2367
|
+
const attemptedTasks = stats.completed + stats.failed + stats.skipped;
|
|
2368
|
+
if (attemptedTasks >= 5) {
|
|
2369
|
+
const progressRatio = stats.completed / Math.max(1, attemptedTasks);
|
|
2370
|
+
if (progressRatio < 0.4) {
|
|
2371
|
+
this.logDecision('stall-detected', `Progress stalled: ${stats.completed}/${attemptedTasks} tasks succeeded (${(progressRatio * 100).toFixed(0)}%)`, 'Triggering mid-swarm re-plan');
|
|
2372
|
+
this.emit({
|
|
2373
|
+
type: 'swarm.stall',
|
|
2374
|
+
progressRatio,
|
|
2375
|
+
attempted: attemptedTasks,
|
|
2376
|
+
completed: stats.completed,
|
|
2377
|
+
});
|
|
2378
|
+
await this.midSwarmReplan();
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
/**
|
|
2383
|
+
* F21: Find expendable tasks — leaf tasks (no dependents) with lowest complexity.
|
|
2384
|
+
* These are the safest to skip when budget is tight.
|
|
2385
|
+
* Only tasks with complexity <= 2 are considered expendable.
|
|
2386
|
+
*/
|
|
2387
|
+
findExpendableTasks() {
|
|
2388
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2389
|
+
// Build reverse dependency map: which tasks depend on each task?
|
|
2390
|
+
const dependentCounts = new Map();
|
|
2391
|
+
for (const task of allTasks) {
|
|
2392
|
+
for (const depId of task.dependencies) {
|
|
2393
|
+
dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
|
|
2394
|
+
}
|
|
2395
|
+
}
|
|
2396
|
+
// Expendable = pending/ready, never attempted, no dependents, not foundation,
|
|
2397
|
+
// complexity <= 2 (simple leaf tasks only), lowest complexity first
|
|
2398
|
+
return allTasks
|
|
2399
|
+
.filter(t => (t.status === 'pending' || t.status === 'ready') &&
|
|
2400
|
+
t.attempts === 0 &&
|
|
2401
|
+
!t.isFoundation &&
|
|
2402
|
+
(t.complexity ?? 5) <= 2 &&
|
|
2403
|
+
(dependentCounts.get(t.id) ?? 0) === 0)
|
|
2404
|
+
.sort((a, b) => (a.complexity ?? 5) - (b.complexity ?? 5));
|
|
2405
|
+
}
|
|
2406
|
+
/**
|
|
2407
|
+
* Mid-swarm re-planning: when progress stalls, ask LLM to re-plan remaining work.
|
|
2408
|
+
* Creates simpler replacement tasks for stuck/failed work, building on what's already done.
|
|
2409
|
+
* Only triggers once per swarm execution to avoid infinite re-planning loops.
|
|
2410
|
+
*/
|
|
2411
|
+
async midSwarmReplan() {
|
|
2412
|
+
if (this.hasReplanned)
|
|
2413
|
+
return;
|
|
2414
|
+
this.hasReplanned = true;
|
|
2415
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2416
|
+
const completed = allTasks.filter(t => t.status === 'completed' || t.status === 'decomposed');
|
|
2417
|
+
const stuck = allTasks.filter(t => t.status === 'failed' || t.status === 'skipped');
|
|
2418
|
+
if (stuck.length === 0)
|
|
2419
|
+
return;
|
|
2420
|
+
const completedSummary = completed.map(t => `- ${t.description} [${t.type}] → completed${t.degraded ? ' (degraded)' : ''}`).join('\n') || '(none)';
|
|
2421
|
+
const stuckSummary = stuck.map(t => `- ${t.description} [${t.type}] → ${t.status} (${t.failureMode ?? 'unknown'})`).join('\n');
|
|
2422
|
+
const artifactInventory = this.buildArtifactInventory();
|
|
2423
|
+
const artifactSummary = artifactInventory.files.map(f => `- ${f.path} (${f.sizeBytes}B)`).join('\n') || '(none)';
|
|
2424
|
+
const replanPrompt = `The swarm is stalled. Here's the situation:
|
|
2425
|
+
|
|
2426
|
+
COMPLETED WORK:
|
|
2427
|
+
${completedSummary}
|
|
2428
|
+
|
|
2429
|
+
FILES ON DISK:
|
|
2430
|
+
${artifactSummary}
|
|
2431
|
+
|
|
2432
|
+
STUCK TASKS (failed or skipped):
|
|
2433
|
+
${stuckSummary}
|
|
2434
|
+
|
|
2435
|
+
Re-plan the remaining work. Create new subtasks that:
|
|
2436
|
+
1. Build on what's already completed (don't redo work)
|
|
2437
|
+
2. Are more focused in scope (but assign realistic complexity for the work involved — don't underestimate)
|
|
2438
|
+
3. Can succeed independently (minimize dependencies)
|
|
2439
|
+
|
|
2440
|
+
Return JSON: { "subtasks": [{ "description": "...", "type": "implement|test|research|review|document|refactor", "complexity": 1-5, "dependencies": [], "relevantFiles": [] }] }
|
|
2441
|
+
Return ONLY the JSON object, no other text.`;
|
|
2442
|
+
try {
|
|
2443
|
+
const response = await this.provider.chat([{ role: 'user', content: replanPrompt }]);
|
|
2444
|
+
this.trackOrchestratorUsage(response, 'mid-swarm-replan');
|
|
2445
|
+
const content = response.content ?? '';
|
|
2446
|
+
const jsonMatch = content.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
|
|
2447
|
+
if (!jsonMatch) {
|
|
2448
|
+
this.logDecision('replan-failed', 'LLM produced no parseable re-plan JSON', content.slice(0, 200));
|
|
2449
|
+
return;
|
|
2450
|
+
}
|
|
2451
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
2452
|
+
if (!parsed.subtasks || parsed.subtasks.length === 0) {
|
|
2453
|
+
this.logDecision('replan-failed', 'LLM produced empty subtask list', '');
|
|
2454
|
+
return;
|
|
2455
|
+
}
|
|
2456
|
+
// Add new tasks from re-plan into current wave
|
|
2457
|
+
const newTasks = this.taskQueue.addReplanTasks(parsed.subtasks, this.taskQueue.getCurrentWave());
|
|
2458
|
+
this.logDecision('replan-success', `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`, newTasks.map(t => t.description).join('; '));
|
|
2459
|
+
this.emit({
|
|
2460
|
+
type: 'swarm.replan',
|
|
2461
|
+
stuckCount: stuck.length,
|
|
2462
|
+
newTaskCount: newTasks.length,
|
|
2463
|
+
});
|
|
2464
|
+
this.emit({
|
|
2465
|
+
type: 'swarm.orchestrator.decision',
|
|
2466
|
+
decision: {
|
|
2467
|
+
timestamp: Date.now(),
|
|
2468
|
+
phase: 'replan',
|
|
2469
|
+
decision: `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`,
|
|
2470
|
+
reasoning: newTasks.map(t => `${t.id}: ${t.description}`).join('; '),
|
|
2471
|
+
},
|
|
2472
|
+
});
|
|
2473
|
+
}
|
|
2474
|
+
catch (error) {
|
|
2475
|
+
this.logDecision('replan-failed', `Re-plan LLM call failed: ${error.message}`, '');
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
/**
|
|
2479
|
+
* Rescue cascade-skipped tasks that can still run.
|
|
2480
|
+
* After cascade-skip fires, assess whether skipped tasks can still be attempted:
|
|
2481
|
+
* - If all OTHER dependencies completed and the failed dep's artifacts exist on disk → un-skip
|
|
2482
|
+
* - If the task has no strict data dependency on the failed task (different file targets) → un-skip with warning
|
|
2483
|
+
*/
|
|
2484
|
+
rescueCascadeSkipped(lenient = false) {
|
|
2485
|
+
const skippedTasks = this.taskQueue.getSkippedTasks();
|
|
2486
|
+
const rescued = [];
|
|
2487
|
+
for (const task of skippedTasks) {
|
|
2488
|
+
if (task.dependencies.length === 0)
|
|
2489
|
+
continue;
|
|
2490
|
+
let completedDeps = 0;
|
|
2491
|
+
let failedDepsWithArtifacts = 0;
|
|
2492
|
+
let failedDepsWithoutArtifacts = 0;
|
|
2493
|
+
let skippedDepsBlockedBySkipped = 0;
|
|
2494
|
+
let totalDeps = 0;
|
|
2495
|
+
const failedDepDescriptions = [];
|
|
2496
|
+
for (const depId of task.dependencies) {
|
|
2497
|
+
const dep = this.taskQueue.getTask(depId);
|
|
2498
|
+
if (!dep)
|
|
2499
|
+
continue;
|
|
2500
|
+
totalDeps++;
|
|
2501
|
+
if (dep.status === 'completed' || dep.status === 'decomposed') {
|
|
2502
|
+
completedDeps++;
|
|
2503
|
+
}
|
|
2504
|
+
else if (dep.status === 'failed' || dep.status === 'skipped') {
|
|
2505
|
+
// V10: In lenient mode, use checkArtifactsEnhanced for broader detection
|
|
2506
|
+
const artifactReport = lenient ? checkArtifactsEnhanced(dep) : checkArtifacts(dep);
|
|
2507
|
+
if (artifactReport && artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length > 0) {
|
|
2508
|
+
failedDepsWithArtifacts++;
|
|
2509
|
+
failedDepDescriptions.push(`${dep.description} (failed but ${artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length} artifacts exist)`);
|
|
2510
|
+
}
|
|
2511
|
+
else {
|
|
2512
|
+
// Check if this dep's target files exist on disk (may have been created by earlier attempt)
|
|
2513
|
+
const targetFiles = dep.targetFiles ?? [];
|
|
2514
|
+
const existingFiles = targetFiles.filter(f => {
|
|
2515
|
+
try {
|
|
2516
|
+
const resolved = path.resolve(this.config.facts?.workingDirectory ?? process.cwd(), f);
|
|
2517
|
+
return fs.statSync(resolved).size > 0;
|
|
2518
|
+
}
|
|
2519
|
+
catch {
|
|
2520
|
+
return false;
|
|
2521
|
+
}
|
|
2522
|
+
});
|
|
2523
|
+
if (existingFiles.length > 0) {
|
|
2524
|
+
failedDepsWithArtifacts++;
|
|
2525
|
+
failedDepDescriptions.push(`${dep.description} (failed but ${existingFiles.length}/${targetFiles.length} target files exist)`);
|
|
2526
|
+
}
|
|
2527
|
+
else {
|
|
2528
|
+
// Check if skipped task's targets don't overlap with the failed dep's targets
|
|
2529
|
+
const taskTargets = new Set(task.targetFiles ?? []);
|
|
2530
|
+
const depTargets = new Set(dep.targetFiles ?? []);
|
|
2531
|
+
const hasOverlap = [...taskTargets].some(f => depTargets.has(f));
|
|
2532
|
+
if (!hasOverlap && taskTargets.size > 0) {
|
|
2533
|
+
// Different file targets — task probably doesn't need the failed dep's output
|
|
2534
|
+
failedDepsWithArtifacts++;
|
|
2535
|
+
failedDepDescriptions.push(`${dep.description} (failed, no file overlap — likely independent)`);
|
|
2536
|
+
}
|
|
2537
|
+
else if (lenient && dep.status === 'skipped') {
|
|
2538
|
+
// V10: In lenient mode, count skipped-by-skipped deps separately
|
|
2539
|
+
// (transitive cascade — the dep itself was a victim, not truly broken)
|
|
2540
|
+
skippedDepsBlockedBySkipped++;
|
|
2541
|
+
failedDepDescriptions.push(`${dep.description} (skipped — transitive cascade victim)`);
|
|
2542
|
+
}
|
|
2543
|
+
else {
|
|
2544
|
+
failedDepsWithoutArtifacts++;
|
|
2545
|
+
}
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
}
|
|
2550
|
+
// Rescue condition:
|
|
2551
|
+
// Normal: all failed deps have artifacts or are independent, AND at least some deps completed
|
|
2552
|
+
// Lenient: tolerate up to 1 truly-missing dep, and count transitive cascade victims as recoverable
|
|
2553
|
+
const effectiveWithout = failedDepsWithoutArtifacts;
|
|
2554
|
+
const maxMissing = lenient ? 1 : 0;
|
|
2555
|
+
const hasEnoughContext = lenient ? (completedDeps + failedDepsWithArtifacts + skippedDepsBlockedBySkipped > 0) : (completedDeps > 0);
|
|
2556
|
+
if (totalDeps > 0 && effectiveWithout <= maxMissing && hasEnoughContext) {
|
|
2557
|
+
const rescueContext = `Rescued from cascade-skip${lenient ? ' (lenient)' : ''}: ${completedDeps}/${totalDeps} deps completed, ` +
|
|
2558
|
+
`${failedDepsWithArtifacts} failed deps have artifacts${skippedDepsBlockedBySkipped > 0 ? `, ${skippedDepsBlockedBySkipped} transitive cascade victims` : ''}. ${failedDepDescriptions.join('; ')}`;
|
|
2559
|
+
this.taskQueue.rescueTask(task.id, rescueContext);
|
|
2560
|
+
rescued.push(task);
|
|
2561
|
+
this.logDecision('cascade-rescue', `${task.id}: rescued from cascade-skip${lenient ? ' (lenient)' : ''}`, rescueContext);
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
return rescued;
|
|
2565
|
+
}
|
|
2566
|
+
/**
|
|
2567
|
+
* Final rescue pass — runs after executeWaves() finishes.
|
|
2568
|
+
* Uses lenient mode to rescue cascade-skipped tasks that have partial context.
|
|
2569
|
+
* Re-dispatches rescued tasks in a final wave.
|
|
2570
|
+
*/
|
|
2571
|
+
async finalRescuePass() {
|
|
2572
|
+
const skipped = this.taskQueue.getSkippedTasks();
|
|
2573
|
+
if (skipped.length === 0)
|
|
2574
|
+
return;
|
|
2575
|
+
this.logDecision('final-rescue', `${skipped.length} skipped tasks — running final rescue pass`, '');
|
|
2576
|
+
const rescued = this.rescueCascadeSkipped(true); // lenient=true
|
|
2577
|
+
if (rescued.length > 0) {
|
|
2578
|
+
this.logDecision('final-rescue', `Rescued ${rescued.length} tasks`, rescued.map(t => t.id).join(', '));
|
|
2579
|
+
await this.executeWave(rescued);
|
|
2580
|
+
}
|
|
2581
|
+
}
|
|
2582
|
+
/**
|
|
2583
|
+
* Try resilience recovery strategies before hard-failing a task.
|
|
2584
|
+
* Called from dispatch-cap, timeout, hollow, and error paths to avoid bypassing resilience.
|
|
2585
|
+
*
|
|
2586
|
+
* Strategies (in order):
|
|
2587
|
+
* 1. Micro-decomposition — break complex failing tasks into subtasks
|
|
2588
|
+
* 2. Degraded acceptance — accept partial work if artifacts exist on disk
|
|
2589
|
+
*
|
|
2590
|
+
* Returns true if recovery succeeded (caller should return), false if hard-fail should proceed.
|
|
2591
|
+
*/
|
|
2592
|
+
async tryResilienceRecovery(task, taskId, taskResult, spawnResult) {
|
|
2593
|
+
// Strategy 1: Micro-decompose complex tasks into smaller subtasks
|
|
2594
|
+
// V10: Lowered threshold from >= 6 to >= 4 so moderately complex tasks can be recovered
|
|
2595
|
+
if ((task.complexity ?? 0) >= 4 && task.attempts >= 2 && this.budgetPool.hasCapacity()) {
|
|
2596
|
+
const subtasks = await this.microDecompose(task);
|
|
2597
|
+
if (subtasks && subtasks.length >= 2) {
|
|
2598
|
+
// Reset task status so replaceWithSubtasks can mark it as decomposed
|
|
2599
|
+
task.status = 'dispatched';
|
|
2600
|
+
this.taskQueue.replaceWithSubtasks(taskId, subtasks);
|
|
2601
|
+
this.logDecision('micro-decompose', `${taskId}: decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
|
|
2602
|
+
this.emit({
|
|
2603
|
+
type: 'swarm.task.failed',
|
|
2604
|
+
taskId,
|
|
2605
|
+
error: `Micro-decomposed into ${subtasks.length} subtasks`,
|
|
2606
|
+
attempt: task.attempts,
|
|
2607
|
+
maxAttempts: this.config.maxDispatchesPerTask ?? 5,
|
|
2608
|
+
willRetry: false,
|
|
2609
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
2610
|
+
failureMode: task.failureMode,
|
|
2611
|
+
});
|
|
2612
|
+
this.emit({
|
|
2613
|
+
type: 'swarm.task.resilience',
|
|
2614
|
+
taskId,
|
|
2615
|
+
strategy: 'micro-decompose',
|
|
2616
|
+
succeeded: true,
|
|
2617
|
+
reason: `Decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`,
|
|
2618
|
+
artifactsFound: 0,
|
|
2619
|
+
toolCalls: spawnResult.metrics.toolCalls ?? 0,
|
|
2620
|
+
});
|
|
2621
|
+
return true;
|
|
2622
|
+
}
|
|
2623
|
+
// Micro-decompose was attempted but didn't produce usable subtasks
|
|
2624
|
+
if ((task.complexity ?? 0) < 4) {
|
|
2625
|
+
this.logDecision('resilience-skip', `${taskId}: skipped micro-decompose — complexity ${task.complexity} < 4`, '');
|
|
2626
|
+
}
|
|
2627
|
+
}
|
|
2628
|
+
// Strategy 2: Degraded acceptance — check if any attempt produced files on disk.
|
|
2629
|
+
// V10: Use checkArtifactsEnhanced for broader detection (filesModified, closureReport, output)
|
|
2630
|
+
const artifactReport = checkArtifactsEnhanced(task, taskResult);
|
|
2631
|
+
const existingArtifacts = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0);
|
|
2632
|
+
const hasArtifacts = existingArtifacts.length > 0;
|
|
2633
|
+
// V10: Fix timeout detection — toolCalls=-1 means timeout (worker WAS working)
|
|
2634
|
+
const toolCalls = spawnResult.metrics.toolCalls ?? 0;
|
|
2635
|
+
const hadToolCalls = toolCalls > 0 || toolCalls === -1
|
|
2636
|
+
|| (taskResult.filesModified && taskResult.filesModified.length > 0);
|
|
2637
|
+
if (hasArtifacts || hadToolCalls) {
|
|
2638
|
+
// Accept with degraded flag — prevents cascade-skip of dependents
|
|
2639
|
+
taskResult.success = true;
|
|
2640
|
+
taskResult.degraded = true;
|
|
2641
|
+
taskResult.qualityScore = 2; // Capped at low quality
|
|
2642
|
+
taskResult.qualityFeedback = 'Degraded acceptance: retries exhausted but filesystem artifacts exist';
|
|
2643
|
+
task.degraded = true;
|
|
2644
|
+
// Reset status so markCompleted works (markFailed may have set it to 'failed')
|
|
2645
|
+
task.status = 'dispatched';
|
|
2646
|
+
this.taskQueue.markCompleted(taskId, taskResult);
|
|
2647
|
+
this.hollowStreak = 0;
|
|
2648
|
+
this.logDecision('degraded-acceptance', `${taskId}: accepted as degraded — ${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`, 'Prevents cascade-skip of dependent tasks');
|
|
2649
|
+
this.emit({
|
|
2650
|
+
type: 'swarm.task.completed',
|
|
2651
|
+
taskId,
|
|
2652
|
+
success: true,
|
|
2653
|
+
tokensUsed: taskResult.tokensUsed,
|
|
2654
|
+
costUsed: taskResult.costUsed,
|
|
2655
|
+
durationMs: taskResult.durationMs,
|
|
2656
|
+
qualityScore: 2,
|
|
2657
|
+
qualityFeedback: 'Degraded acceptance',
|
|
2658
|
+
output: taskResult.output,
|
|
2659
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
2660
|
+
});
|
|
2661
|
+
this.emit({
|
|
2662
|
+
type: 'swarm.task.resilience',
|
|
2663
|
+
taskId,
|
|
2664
|
+
strategy: 'degraded-acceptance',
|
|
2665
|
+
succeeded: true,
|
|
2666
|
+
reason: `${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`,
|
|
2667
|
+
artifactsFound: existingArtifacts.length,
|
|
2668
|
+
toolCalls,
|
|
2669
|
+
});
|
|
2670
|
+
return true;
|
|
2671
|
+
}
|
|
2672
|
+
// Both strategies failed — log exhaustion for traceability
|
|
2673
|
+
this.logDecision('resilience-exhausted', `${taskId}: no recovery — artifacts: ${existingArtifacts.length}, toolCalls: ${toolCalls}, filesModified: ${taskResult.filesModified?.length ?? 0}`, '');
|
|
2674
|
+
this.emit({
|
|
2675
|
+
type: 'swarm.task.resilience',
|
|
2676
|
+
taskId,
|
|
2677
|
+
strategy: 'none',
|
|
2678
|
+
succeeded: false,
|
|
2679
|
+
reason: `No artifacts found, toolCalls=${toolCalls}, filesModified=${taskResult.filesModified?.length ?? 0}`,
|
|
2680
|
+
artifactsFound: existingArtifacts.length,
|
|
2681
|
+
toolCalls,
|
|
2682
|
+
});
|
|
2683
|
+
return false;
|
|
2684
|
+
}
|
|
2685
|
+
/**
|
|
2686
|
+
* Micro-decompose a complex task into 2-3 smaller subtasks using the LLM.
|
|
2687
|
+
* Called when a complex task (complexity >= 6) fails 2+ times with the same failure mode.
|
|
2688
|
+
* Returns null if decomposition doesn't make sense or LLM can't produce valid subtasks.
|
|
2689
|
+
*/
|
|
2690
|
+
async microDecompose(task) {
|
|
2691
|
+
if ((task.complexity ?? 0) < 4)
|
|
2692
|
+
return null;
|
|
2693
|
+
try {
|
|
2694
|
+
const prompt = `Task "${task.description}" failed ${task.attempts} times on model ${task.assignedModel ?? 'unknown'}.
|
|
2695
|
+
The task has complexity ${task.complexity}/10 and type "${task.type}".
|
|
2696
|
+
${task.targetFiles?.length ? `Target files: ${task.targetFiles.join(', ')}` : ''}
|
|
2697
|
+
|
|
2698
|
+
Break this task into 2-3 smaller, independent subtasks that each handle a portion of the work.
|
|
2699
|
+
Each subtask MUST be simpler (complexity <= ${Math.ceil(task.complexity / 2)}).
|
|
2700
|
+
Each subtask should be self-contained and produce concrete file changes.
|
|
2701
|
+
|
|
2702
|
+
Return JSON ONLY (no markdown, no explanation):
|
|
2703
|
+
{
|
|
2704
|
+
"subtasks": [
|
|
2705
|
+
{ "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number> }
|
|
2706
|
+
]
|
|
2707
|
+
}`;
|
|
2708
|
+
const response = await this.provider.chat([
|
|
2709
|
+
{ role: 'system', content: 'You are a task decomposition assistant. Return only valid JSON.' },
|
|
2710
|
+
{ role: 'user', content: prompt },
|
|
2711
|
+
], {
|
|
2712
|
+
model: this.config.orchestratorModel,
|
|
2713
|
+
maxTokens: 2000,
|
|
2714
|
+
temperature: 0.3,
|
|
2715
|
+
});
|
|
2716
|
+
this.trackOrchestratorUsage(response, 'micro-decompose');
|
|
2717
|
+
// Parse response — handle markdown code blocks
|
|
2718
|
+
let jsonStr = response.content.trim();
|
|
2719
|
+
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
2720
|
+
if (codeBlockMatch)
|
|
2721
|
+
jsonStr = codeBlockMatch[1].trim();
|
|
2722
|
+
const parsed = JSON.parse(jsonStr);
|
|
2723
|
+
if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
|
|
2724
|
+
return null;
|
|
2725
|
+
}
|
|
2726
|
+
const subtasks = parsed.subtasks.map((sub, idx) => ({
|
|
2727
|
+
id: `${task.id}-sub${idx + 1}`,
|
|
2728
|
+
description: sub.description,
|
|
2729
|
+
type: sub.type ?? task.type,
|
|
2730
|
+
dependencies: [], // Will be set by replaceWithSubtasks
|
|
2731
|
+
status: 'ready',
|
|
2732
|
+
complexity: Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1),
|
|
2733
|
+
wave: task.wave,
|
|
2734
|
+
targetFiles: sub.targetFiles ?? [],
|
|
2735
|
+
readFiles: task.readFiles,
|
|
2736
|
+
attempts: 0,
|
|
2737
|
+
}));
|
|
2738
|
+
return subtasks;
|
|
2739
|
+
}
|
|
2740
|
+
catch (error) {
|
|
2741
|
+
this.logDecision('micro-decompose', `${task.id}: micro-decomposition failed — ${error.message}`, 'Falling through to normal failure path');
|
|
2742
|
+
return null;
|
|
2743
|
+
}
|
|
2744
|
+
}
|
|
2745
|
+
// ─── Pre-Dispatch Auto-Split ──────────────────────────────────────────────
|
|
2746
|
+
/**
|
|
2747
|
+
* Heuristic pre-filter: should this task be considered for auto-split?
|
|
2748
|
+
* Cheap check — no LLM call. Returns true if all conditions are met.
|
|
2749
|
+
*/
|
|
2750
|
+
shouldAutoSplit(task) {
|
|
2751
|
+
const cfg = this.config.autoSplit;
|
|
2752
|
+
if (cfg?.enabled === false)
|
|
2753
|
+
return false;
|
|
2754
|
+
const floor = cfg?.complexityFloor ?? 6;
|
|
2755
|
+
const splittable = cfg?.splittableTypes ?? ['implement', 'refactor', 'test'];
|
|
2756
|
+
// Only first attempts — retries use micro-decompose
|
|
2757
|
+
if (task.attempts > 0)
|
|
2758
|
+
return false;
|
|
2759
|
+
// Complexity check
|
|
2760
|
+
if ((task.complexity ?? 0) < floor)
|
|
2761
|
+
return false;
|
|
2762
|
+
// Type check
|
|
2763
|
+
if (!splittable.includes(task.type))
|
|
2764
|
+
return false;
|
|
2765
|
+
// Must be on critical path (foundation task)
|
|
2766
|
+
if (!task.isFoundation)
|
|
2767
|
+
return false;
|
|
2768
|
+
// Budget capacity check
|
|
2769
|
+
if (!this.budgetPool.hasCapacity())
|
|
2770
|
+
return false;
|
|
2771
|
+
return true;
|
|
2772
|
+
}
|
|
2773
|
+
/**
|
|
2774
|
+
* LLM judge call: ask the orchestrator model whether and how to split a task.
|
|
2775
|
+
* Returns { shouldSplit: false } or { shouldSplit: true, subtasks: [...] }.
|
|
2776
|
+
*/
|
|
2777
|
+
async judgeSplit(task) {
|
|
2778
|
+
const maxSubs = this.config.autoSplit?.maxSubtasks ?? 4;
|
|
2779
|
+
const prompt = `You are evaluating whether a task should be split into parallel subtasks before dispatch.
|
|
2780
|
+
|
|
2781
|
+
TASK: "${task.description}"
|
|
2782
|
+
TYPE: ${task.type}
|
|
2783
|
+
COMPLEXITY: ${task.complexity}/10
|
|
2784
|
+
TARGET FILES: ${task.targetFiles?.join(', ') || 'none specified'}
|
|
2785
|
+
DOWNSTREAM DEPENDENTS: This is a foundation task — other tasks are waiting on it.
|
|
2786
|
+
|
|
2787
|
+
Should this task be split into 2-${maxSubs} parallel subtasks that different workers can execute simultaneously?
|
|
2788
|
+
|
|
2789
|
+
SPLIT if:
|
|
2790
|
+
- The task involves multiple independent pieces of work (e.g., different files, different functions, different concerns)
|
|
2791
|
+
- Parallel execution would meaningfully reduce wall-clock time
|
|
2792
|
+
- The subtasks can produce useful output independently
|
|
2793
|
+
|
|
2794
|
+
DO NOT SPLIT if:
|
|
2795
|
+
- The work is conceptually atomic (one function, one algorithm, tightly coupled logic)
|
|
2796
|
+
- The subtasks would need to coordinate on the same files/functions
|
|
2797
|
+
- Splitting would add more overhead than it saves
|
|
2798
|
+
|
|
2799
|
+
Return JSON ONLY:
|
|
2800
|
+
{
|
|
2801
|
+
"shouldSplit": true/false,
|
|
2802
|
+
"reason": "brief explanation",
|
|
2803
|
+
"subtasks": [
|
|
2804
|
+
{ "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number 1-10> }
|
|
2805
|
+
]
|
|
2806
|
+
}
|
|
2807
|
+
If shouldSplit is false, omit subtasks.`;
|
|
2808
|
+
const response = await this.provider.chat([
|
|
2809
|
+
{ role: 'system', content: 'You are a task planning judge. Return only valid JSON.' },
|
|
2810
|
+
{ role: 'user', content: prompt },
|
|
2811
|
+
], {
|
|
2812
|
+
model: this.config.orchestratorModel,
|
|
2813
|
+
maxTokens: 1500,
|
|
2814
|
+
temperature: 0.2,
|
|
2815
|
+
});
|
|
2816
|
+
this.trackOrchestratorUsage(response, 'auto-split-judge');
|
|
2817
|
+
// Parse response — reuse markdown code block stripping from microDecompose
|
|
2818
|
+
let jsonStr = response.content.trim();
|
|
2819
|
+
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
2820
|
+
if (codeBlockMatch)
|
|
2821
|
+
jsonStr = codeBlockMatch[1].trim();
|
|
2822
|
+
const parsed = JSON.parse(jsonStr);
|
|
2823
|
+
if (!parsed.shouldSplit) {
|
|
2824
|
+
this.logDecision('auto-split', `${task.id}: judge says no split — ${parsed.reason}`, '');
|
|
2825
|
+
return { shouldSplit: false };
|
|
2826
|
+
}
|
|
2827
|
+
if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
|
|
2828
|
+
return { shouldSplit: false };
|
|
2829
|
+
}
|
|
2830
|
+
// Build SwarmTask[] from judge output (same pattern as microDecompose)
|
|
2831
|
+
const subtasks = parsed.subtasks.slice(0, maxSubs).map((sub, idx) => ({
|
|
2832
|
+
id: `${task.id}-split${idx + 1}`,
|
|
2833
|
+
description: sub.description,
|
|
2834
|
+
type: sub.type ?? task.type,
|
|
2835
|
+
dependencies: [],
|
|
2836
|
+
status: 'ready',
|
|
2837
|
+
complexity: Math.max(3, Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1)),
|
|
2838
|
+
wave: task.wave,
|
|
2839
|
+
targetFiles: sub.targetFiles ?? [],
|
|
2840
|
+
readFiles: task.readFiles,
|
|
2841
|
+
attempts: 0,
|
|
2842
|
+
rescueContext: `Auto-split from ${task.id} (original complexity ${task.complexity})`,
|
|
2843
|
+
}));
|
|
2844
|
+
this.logDecision('auto-split', `${task.id}: split into ${subtasks.length} subtasks — ${parsed.reason}`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
|
|
2845
|
+
return { shouldSplit: true, subtasks };
|
|
2846
|
+
}
|
|
2847
|
+
/**
|
|
2848
|
+
* V7: Compute effective retry limit for a task.
|
|
2849
|
+
* F10: Fixup tasks get max 2 retries (3 attempts total) — one full model-failover cycle.
|
|
2850
|
+
* Foundation tasks get +1 retry to reduce cascade failure risk.
|
|
2851
|
+
*/
|
|
2852
|
+
getEffectiveRetries(task) {
|
|
2853
|
+
const isFixup = 'fixesTaskId' in task;
|
|
2854
|
+
if (isFixup)
|
|
2855
|
+
return 2; // Fixup tasks: 2 retries max (3 attempts total)
|
|
2856
|
+
return task.isFoundation ? this.config.workerRetries + 1 : this.config.workerRetries;
|
|
2857
|
+
}
|
|
2858
|
+
/**
|
|
2859
|
+
* F22: Build a brief summary of swarm progress for retry context.
|
|
2860
|
+
* Helps retrying workers understand what the swarm has already accomplished.
|
|
2861
|
+
*/
|
|
2862
|
+
getSwarmProgressSummary() {
|
|
2863
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2864
|
+
const completed = allTasks.filter(t => t.status === 'completed');
|
|
2865
|
+
if (completed.length === 0)
|
|
2866
|
+
return '';
|
|
2867
|
+
const lines = [];
|
|
2868
|
+
for (const task of completed) {
|
|
2869
|
+
const score = task.result?.qualityScore ? ` (${task.result.qualityScore}/5)` : '';
|
|
2870
|
+
lines.push(`- ${task.id}: ${task.description.slice(0, 80)}${score}`);
|
|
2871
|
+
}
|
|
2872
|
+
// Collect files created by completed tasks
|
|
2873
|
+
const files = new Set();
|
|
2874
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2875
|
+
for (const task of completed) {
|
|
2876
|
+
for (const f of (task.result?.filesModified ?? []))
|
|
2877
|
+
files.add(f);
|
|
2878
|
+
for (const f of (task.targetFiles ?? [])) {
|
|
2879
|
+
try {
|
|
2880
|
+
const resolved = path.resolve(baseDir, f);
|
|
2881
|
+
if (fs.existsSync(resolved))
|
|
2882
|
+
files.add(f);
|
|
2883
|
+
}
|
|
2884
|
+
catch { /* skip */ }
|
|
2885
|
+
}
|
|
2886
|
+
}
|
|
2887
|
+
const parts = [`The following tasks have completed successfully:\n${lines.join('\n')}`];
|
|
2888
|
+
if (files.size > 0) {
|
|
2889
|
+
parts.push(`Files already created/modified: ${[...files].slice(0, 20).join(', ')}`);
|
|
2890
|
+
parts.push('You can build on these existing files.');
|
|
2891
|
+
}
|
|
2892
|
+
return parts.join('\n');
|
|
2893
|
+
}
|
|
1099
2894
|
/** Get a model health summary for emitting events. */
|
|
1100
2895
|
getModelHealthSummary(model) {
|
|
1101
2896
|
const records = this.healthTracker.getAllRecords();
|