attocode 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +103 -3
- package/dist/src/agent.d.ts +6 -0
- package/dist/src/agent.d.ts.map +1 -1
- package/dist/src/agent.js +504 -49
- package/dist/src/agent.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +23 -2
- package/dist/src/cli.js.map +1 -1
- package/dist/src/core/protocol/types.d.ts +8 -8
- package/dist/src/defaults.d.ts +6 -1
- package/dist/src/defaults.d.ts.map +1 -1
- package/dist/src/defaults.js +36 -2
- package/dist/src/defaults.js.map +1 -1
- package/dist/src/integrations/agent-registry.d.ts +11 -0
- package/dist/src/integrations/agent-registry.d.ts.map +1 -1
- package/dist/src/integrations/agent-registry.js.map +1 -1
- package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
- package/dist/src/integrations/auto-compaction.js +5 -1
- package/dist/src/integrations/auto-compaction.js.map +1 -1
- package/dist/src/integrations/bash-policy.d.ts +33 -0
- package/dist/src/integrations/bash-policy.d.ts.map +1 -0
- package/dist/src/integrations/bash-policy.js +142 -0
- package/dist/src/integrations/bash-policy.js.map +1 -0
- package/dist/src/integrations/codebase-context.d.ts +5 -0
- package/dist/src/integrations/codebase-context.d.ts.map +1 -1
- package/dist/src/integrations/codebase-context.js +33 -0
- package/dist/src/integrations/codebase-context.js.map +1 -1
- package/dist/src/integrations/delegation-protocol.js +2 -2
- package/dist/src/integrations/delegation-protocol.js.map +1 -1
- package/dist/src/integrations/economics.d.ts +42 -0
- package/dist/src/integrations/economics.d.ts.map +1 -1
- package/dist/src/integrations/economics.js +130 -14
- package/dist/src/integrations/economics.js.map +1 -1
- package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
- package/dist/src/integrations/hierarchical-config.js +17 -0
- package/dist/src/integrations/hierarchical-config.js.map +1 -1
- package/dist/src/integrations/index.d.ts +3 -1
- package/dist/src/integrations/index.d.ts.map +1 -1
- package/dist/src/integrations/index.js +3 -1
- package/dist/src/integrations/index.js.map +1 -1
- package/dist/src/integrations/policy-engine.d.ts +55 -0
- package/dist/src/integrations/policy-engine.d.ts.map +1 -0
- package/dist/src/integrations/policy-engine.js +247 -0
- package/dist/src/integrations/policy-engine.js.map +1 -0
- package/dist/src/integrations/safety.d.ts +5 -4
- package/dist/src/integrations/safety.d.ts.map +1 -1
- package/dist/src/integrations/safety.js +32 -7
- package/dist/src/integrations/safety.js.map +1 -1
- package/dist/src/integrations/sandbox/basic.d.ts +7 -0
- package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/basic.js +27 -2
- package/dist/src/integrations/sandbox/basic.js.map +1 -1
- package/dist/src/integrations/sandbox/index.d.ts +6 -0
- package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/index.js +3 -0
- package/dist/src/integrations/sandbox/index.js.map +1 -1
- package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
- package/dist/src/integrations/sandbox/landlock.js +3 -0
- package/dist/src/integrations/sandbox/landlock.js.map +1 -1
- package/dist/src/integrations/self-improvement.d.ts.map +1 -1
- package/dist/src/integrations/self-improvement.js +12 -0
- package/dist/src/integrations/self-improvement.js.map +1 -1
- package/dist/src/integrations/smart-decomposer.d.ts +18 -1
- package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
- package/dist/src/integrations/smart-decomposer.js +72 -0
- package/dist/src/integrations/smart-decomposer.js.map +1 -1
- package/dist/src/integrations/swarm/index.d.ts +1 -1
- package/dist/src/integrations/swarm/index.d.ts.map +1 -1
- package/dist/src/integrations/swarm/index.js.map +1 -1
- package/dist/src/integrations/swarm/model-selector.d.ts +15 -0
- package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
- package/dist/src/integrations/swarm/model-selector.js +99 -20
- package/dist/src/integrations/swarm/model-selector.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
- package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-budget.js +6 -0
- package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-config-loader.js +154 -7
- package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +12 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-event-bridge.js +170 -23
- package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts +55 -1
- package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-events.js +22 -5
- package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +124 -8
- package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-orchestrator.js +1668 -96
- package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +83 -2
- package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
- package/dist/src/integrations/swarm/swarm-quality-gate.js +278 -19
- package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
- package/dist/src/integrations/swarm/task-queue.d.ts +44 -0
- package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
- package/dist/src/integrations/swarm/task-queue.js +274 -11
- package/dist/src/integrations/swarm/task-queue.js.map +1 -1
- package/dist/src/integrations/swarm/types.d.ts +210 -13
- package/dist/src/integrations/swarm/types.d.ts.map +1 -1
- package/dist/src/integrations/swarm/types.js +61 -8
- package/dist/src/integrations/swarm/types.js.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.d.ts +11 -4
- package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
- package/dist/src/integrations/swarm/worker-pool.js +173 -43
- package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
- package/dist/src/integrations/tool-recommendation.d.ts +7 -4
- package/dist/src/integrations/tool-recommendation.d.ts.map +1 -1
- package/dist/src/integrations/tool-recommendation.js +58 -5
- package/dist/src/integrations/tool-recommendation.js.map +1 -1
- package/dist/src/integrations/work-log.js +4 -4
- package/dist/src/integrations/work-log.js.map +1 -1
- package/dist/src/main.js +26 -1
- package/dist/src/main.js.map +1 -1
- package/dist/src/modes/repl.d.ts.map +1 -1
- package/dist/src/modes/repl.js +10 -4
- package/dist/src/modes/repl.js.map +1 -1
- package/dist/src/modes/tui.d.ts.map +1 -1
- package/dist/src/modes/tui.js +5 -0
- package/dist/src/modes/tui.js.map +1 -1
- package/dist/src/modes.d.ts.map +1 -1
- package/dist/src/modes.js +4 -27
- package/dist/src/modes.js.map +1 -1
- package/dist/src/tools/agent.d.ts.map +1 -1
- package/dist/src/tools/agent.js +11 -2
- package/dist/src/tools/agent.js.map +1 -1
- package/dist/src/tools/bash.d.ts +3 -3
- package/dist/src/tools/coercion.d.ts +6 -0
- package/dist/src/tools/coercion.d.ts.map +1 -1
- package/dist/src/tools/coercion.js +13 -0
- package/dist/src/tools/coercion.js.map +1 -1
- package/dist/src/tools/file.d.ts +2 -2
- package/dist/src/tools/file.js +2 -2
- package/dist/src/tools/file.js.map +1 -1
- package/dist/src/tools/permission.d.ts.map +1 -1
- package/dist/src/tools/permission.js +4 -111
- package/dist/src/tools/permission.js.map +1 -1
- package/dist/src/tracing/trace-collector.d.ts +167 -0
- package/dist/src/tracing/trace-collector.d.ts.map +1 -1
- package/dist/src/tracing/trace-collector.js +137 -0
- package/dist/src/tracing/trace-collector.js.map +1 -1
- package/dist/src/tracing/types.d.ts +105 -1
- package/dist/src/tracing/types.d.ts.map +1 -1
- package/dist/src/tracing/types.js.map +1 -1
- package/dist/src/tui/app.d.ts.map +1 -1
- package/dist/src/tui/app.js +34 -5
- package/dist/src/tui/app.js.map +1 -1
- package/dist/src/types.d.ts +71 -0
- package/dist/src/types.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -16,28 +16,70 @@
|
|
|
16
16
|
* - State persistence and resume
|
|
17
17
|
* - Orchestrator decision logging
|
|
18
18
|
*/
|
|
19
|
-
import
|
|
19
|
+
import * as fs from 'node:fs';
|
|
20
|
+
import * as path from 'node:path';
|
|
21
|
+
import { createSmartDecomposer, parseDecompositionResponse, validateDecomposition } from '../smart-decomposer.js';
|
|
20
22
|
import { createResultSynthesizer } from '../result-synthesizer.js';
|
|
21
|
-
import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG,
|
|
23
|
+
import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, getTaskTypeConfig } from './types.js';
|
|
22
24
|
import { createSwarmTaskQueue } from './task-queue.js';
|
|
23
25
|
import { createSwarmBudgetPool } from './swarm-budget.js';
|
|
24
26
|
import { createSwarmWorkerPool } from './worker-pool.js';
|
|
25
|
-
import { evaluateWorkerOutput } from './swarm-quality-gate.js';
|
|
27
|
+
import { evaluateWorkerOutput, runPreFlightChecks, checkArtifacts, checkArtifactsEnhanced, runConcreteChecks } from './swarm-quality-gate.js';
|
|
26
28
|
import { ModelHealthTracker, selectAlternativeModel } from './model-selector.js';
|
|
27
29
|
import { SwarmStateStore } from './swarm-state-store.js';
|
|
28
30
|
// ─── Hollow Completion Detection ──────────────────────────────────────────
|
|
29
31
|
/**
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
32
|
+
* V11: Hollow completion detection — catches empty completions AND "success" with failure language.
|
|
33
|
+
* Zero tool calls AND trivial output is always hollow.
|
|
34
|
+
* Additionally, success=true but output containing failure admissions is also hollow —
|
|
35
|
+
* this catches workers that report success but actually did no useful work.
|
|
33
36
|
*/
|
|
34
|
-
|
|
37
|
+
const FAILURE_INDICATORS = [
|
|
38
|
+
'budget exhausted', 'unable to complete', 'could not complete',
|
|
39
|
+
'ran out of budget', 'no changes were made', 'no files were modified',
|
|
40
|
+
'no files were created', 'failed to complete', 'before research could begin',
|
|
41
|
+
'i was unable to', 'i could not', 'unfortunately i',
|
|
42
|
+
];
|
|
43
|
+
const BOILERPLATE_INDICATORS = [
|
|
44
|
+
'task completed successfully', 'i have completed the task',
|
|
45
|
+
'the task has been completed', 'done', 'completed', 'finished',
|
|
46
|
+
'no issues found', 'everything looks good', 'all tasks completed',
|
|
47
|
+
];
|
|
48
|
+
export function isHollowCompletion(spawnResult, taskType, swarmConfig) {
|
|
35
49
|
// Timeout uses toolCalls === -1, not hollow
|
|
36
|
-
if (spawnResult.metrics.toolCalls === -1)
|
|
50
|
+
if ((spawnResult.metrics.toolCalls ?? 0) === -1)
|
|
37
51
|
return false;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
52
|
+
const toolCalls = spawnResult.metrics.toolCalls ?? 0;
|
|
53
|
+
// Truly empty completions: zero tools AND trivial output
|
|
54
|
+
// P4: Higher threshold (120 chars) + configurable via SwarmConfig
|
|
55
|
+
const hollowThreshold = swarmConfig?.hollowOutputThreshold ?? 120;
|
|
56
|
+
if (toolCalls === 0
|
|
57
|
+
&& (spawnResult.output?.trim().length ?? 0) < hollowThreshold) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
// P4: Boilerplate detection — zero tools AND short output that's just boilerplate
|
|
61
|
+
if (toolCalls === 0 && (spawnResult.output?.trim().length ?? 0) < 300) {
|
|
62
|
+
const outputLower = (spawnResult.output ?? '').toLowerCase().trim();
|
|
63
|
+
if (BOILERPLATE_INDICATORS.some(b => outputLower.includes(b))) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// "Success" that admits failure: worker claims success but output contains failure language
|
|
68
|
+
if (spawnResult.success) {
|
|
69
|
+
const outputLower = (spawnResult.output ?? '').toLowerCase();
|
|
70
|
+
if (FAILURE_INDICATORS.some(f => outputLower.includes(f))) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// V7: Use configurable requiresToolCalls from TaskTypeConfig.
|
|
75
|
+
// For action-oriented tasks (implement/test/refactor/etc), zero tool calls is ALWAYS hollow.
|
|
76
|
+
if (taskType) {
|
|
77
|
+
const typeConfig = getTaskTypeConfig(taskType, swarmConfig);
|
|
78
|
+
if (typeConfig.requiresToolCalls && toolCalls === 0) {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return false;
|
|
41
83
|
}
|
|
42
84
|
// ─── Orchestrator ──────────────────────────────────────────────────────────
|
|
43
85
|
export class SwarmOrchestrator {
|
|
@@ -61,10 +103,15 @@ export class SwarmOrchestrator {
|
|
|
61
103
|
retries = 0;
|
|
62
104
|
startTime = 0;
|
|
63
105
|
modelUsage = new Map();
|
|
106
|
+
// Orchestrator's own LLM usage (separate from worker usage)
|
|
107
|
+
orchestratorTokens = 0;
|
|
108
|
+
orchestratorCost = 0;
|
|
109
|
+
orchestratorCalls = 0;
|
|
64
110
|
// V2: Planning, review, verification, health, persistence
|
|
65
111
|
plan;
|
|
66
112
|
waveReviews = [];
|
|
67
113
|
verificationResult;
|
|
114
|
+
artifactInventory;
|
|
68
115
|
orchestratorDecisions = [];
|
|
69
116
|
healthTracker;
|
|
70
117
|
stateStore;
|
|
@@ -75,25 +122,63 @@ export class SwarmOrchestrator {
|
|
|
75
122
|
static CIRCUIT_BREAKER_WINDOW_MS = 30_000;
|
|
76
123
|
static CIRCUIT_BREAKER_THRESHOLD = 3;
|
|
77
124
|
static CIRCUIT_BREAKER_PAUSE_MS = 15_000;
|
|
78
|
-
//
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
static QUALITY_CIRCUIT_BREAKER_THRESHOLD =
|
|
125
|
+
// P3: Per-model quality gate circuit breaker (replaces global circuit breaker)
|
|
126
|
+
perModelQualityRejections = new Map();
|
|
127
|
+
qualityGateDisabledModels = new Set();
|
|
128
|
+
static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 5;
|
|
129
|
+
// Hollow completion streak: early termination when single-model swarm produces only hollows
|
|
130
|
+
hollowStreak = 0;
|
|
131
|
+
static HOLLOW_STREAK_THRESHOLD = 3;
|
|
132
|
+
// V7: Global dispatch + hollow ratio tracking for multi-model termination
|
|
133
|
+
totalDispatches = 0;
|
|
134
|
+
totalHollows = 0;
|
|
135
|
+
// Hollow ratio warning (fired once, then suppressed to avoid log spam)
|
|
136
|
+
hollowRatioWarned = false;
|
|
137
|
+
// P7: Adaptive dispatch stagger — increases on rate limits, decreases on success
|
|
138
|
+
adaptiveStaggerMs = 0; // Initialized from config in constructor
|
|
139
|
+
// F25: Consecutive timeout tracking per task — early-fail after limit
|
|
140
|
+
taskTimeoutCounts = new Map();
|
|
141
|
+
// Original prompt for re-planning on resume
|
|
142
|
+
originalPrompt = '';
|
|
143
|
+
// Mid-swarm re-planning: only once per swarm execution
|
|
144
|
+
hasReplanned = false;
|
|
82
145
|
constructor(config, provider, agentRegistry, spawnAgentFn, blackboard) {
|
|
83
146
|
this.config = { ...DEFAULT_SWARM_CONFIG, ...config };
|
|
84
147
|
this.provider = provider;
|
|
85
148
|
this.blackboard = blackboard;
|
|
86
149
|
this.spawnAgentFn = spawnAgentFn;
|
|
87
150
|
this.healthTracker = new ModelHealthTracker();
|
|
151
|
+
this.adaptiveStaggerMs = this.getStaggerMs();
|
|
88
152
|
this.taskQueue = createSwarmTaskQueue();
|
|
89
153
|
this.budgetPool = createSwarmBudgetPool(this.config);
|
|
90
|
-
this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool);
|
|
154
|
+
this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool, this.healthTracker);
|
|
91
155
|
// Initialize state store if persistence enabled
|
|
92
156
|
if (this.config.enablePersistence) {
|
|
93
157
|
this.stateStore = new SwarmStateStore(this.config.stateDir ?? '.agent/swarm-state', this.config.resumeSessionId);
|
|
94
158
|
}
|
|
95
159
|
// C1: Build LLM decompose function with explicit JSON schema
|
|
96
160
|
const llmDecompose = async (task, _context) => {
|
|
161
|
+
// V7: Dynamically build the allowed type list from built-in + user-defined types
|
|
162
|
+
const builtinTypes = ['research', 'analysis', 'design', 'implement', 'test', 'refactor', 'review', 'document', 'integrate', 'deploy', 'merge'];
|
|
163
|
+
const customTypes = Object.keys(this.config.taskTypes ?? {}).filter(t => !builtinTypes.includes(t));
|
|
164
|
+
const allTypes = [...builtinTypes, ...customTypes];
|
|
165
|
+
const typeListStr = allTypes.map(t => `"${t}"`).join(' | ');
|
|
166
|
+
// Build custom type descriptions so the LLM knows when to use them
|
|
167
|
+
let customTypeSection = '';
|
|
168
|
+
if (customTypes.length > 0) {
|
|
169
|
+
const descriptions = customTypes.map(t => {
|
|
170
|
+
const cfg = this.config.taskTypes[t];
|
|
171
|
+
const parts = [` - "${t}"`];
|
|
172
|
+
if (cfg.capability)
|
|
173
|
+
parts.push(`(capability: ${cfg.capability})`);
|
|
174
|
+
if (cfg.promptTemplate)
|
|
175
|
+
parts.push(`— uses ${cfg.promptTemplate} workflow`);
|
|
176
|
+
if (cfg.timeout)
|
|
177
|
+
parts.push(`— timeout: ${Math.round(cfg.timeout / 60000)}min`);
|
|
178
|
+
return parts.join(' ');
|
|
179
|
+
}).join('\n');
|
|
180
|
+
customTypeSection = `\n\nCustom task types available:\n${descriptions}\nUse these when their description matches the subtask's purpose.`;
|
|
181
|
+
}
|
|
97
182
|
const systemPrompt = `You are a task decomposition expert. Break down the given task into well-defined subtasks with clear dependencies.
|
|
98
183
|
|
|
99
184
|
CRITICAL: Dependencies MUST use zero-based integer indices referring to other subtasks in the array.
|
|
@@ -103,7 +188,7 @@ Respond with valid JSON matching this exact schema:
|
|
|
103
188
|
"subtasks": [
|
|
104
189
|
{
|
|
105
190
|
"description": "Clear description of what this subtask does",
|
|
106
|
-
"type":
|
|
191
|
+
"type": ${typeListStr},
|
|
107
192
|
"complexity": 1-10,
|
|
108
193
|
"dependencies": [0, 1],
|
|
109
194
|
"parallelizable": true | false,
|
|
@@ -112,7 +197,7 @@ Respond with valid JSON matching this exact schema:
|
|
|
112
197
|
],
|
|
113
198
|
"strategy": "sequential" | "parallel" | "hierarchical" | "adaptive" | "pipeline",
|
|
114
199
|
"reasoning": "Brief explanation of why this decomposition was chosen"
|
|
115
|
-
}
|
|
200
|
+
}${customTypeSection}
|
|
116
201
|
|
|
117
202
|
EXAMPLE 1 — Research task (3 parallel research + 1 merge):
|
|
118
203
|
{
|
|
@@ -152,6 +237,7 @@ Rules:
|
|
|
152
237
|
maxTokens: 4000,
|
|
153
238
|
temperature: 0.3,
|
|
154
239
|
});
|
|
240
|
+
this.trackOrchestratorUsage(response, 'decompose');
|
|
155
241
|
// Use parseDecompositionResponse which handles markdown code blocks and edge cases
|
|
156
242
|
return parseDecompositionResponse(response.content);
|
|
157
243
|
};
|
|
@@ -195,6 +281,25 @@ Rules:
|
|
|
195
281
|
}
|
|
196
282
|
}
|
|
197
283
|
}
|
|
284
|
+
/**
|
|
285
|
+
* Track token usage from an orchestrator LLM call.
|
|
286
|
+
*/
|
|
287
|
+
trackOrchestratorUsage(response, purpose) {
|
|
288
|
+
if (!response.usage)
|
|
289
|
+
return;
|
|
290
|
+
const tokens = response.usage.total_tokens ?? ((response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0));
|
|
291
|
+
const cost = tokens * 0.000015; // ~$15/M tokens average for orchestrator models
|
|
292
|
+
this.orchestratorTokens += tokens;
|
|
293
|
+
this.orchestratorCost += cost;
|
|
294
|
+
this.orchestratorCalls++;
|
|
295
|
+
this.emit({
|
|
296
|
+
type: 'swarm.orchestrator.llm',
|
|
297
|
+
model: this.config.orchestratorModel,
|
|
298
|
+
purpose,
|
|
299
|
+
tokens,
|
|
300
|
+
cost,
|
|
301
|
+
});
|
|
302
|
+
}
|
|
198
303
|
/**
|
|
199
304
|
* Execute the full swarm pipeline for a task.
|
|
200
305
|
*
|
|
@@ -211,6 +316,7 @@ Rules:
|
|
|
211
316
|
*/
|
|
212
317
|
async execute(task) {
|
|
213
318
|
this.startTime = Date.now();
|
|
319
|
+
this.originalPrompt = task;
|
|
214
320
|
try {
|
|
215
321
|
// V2: Check for resume
|
|
216
322
|
if (this.config.resumeSessionId && this.stateStore) {
|
|
@@ -219,15 +325,85 @@ Rules:
|
|
|
219
325
|
// Phase 1: Decompose
|
|
220
326
|
this.currentPhase = 'decomposing';
|
|
221
327
|
this.emit({ type: 'swarm.phase.progress', phase: 'decomposing', message: 'Decomposing task into subtasks...' });
|
|
222
|
-
|
|
328
|
+
let decomposition = await this.decompose(task);
|
|
223
329
|
if (!decomposition) {
|
|
224
330
|
this.currentPhase = 'failed';
|
|
225
331
|
return this.buildErrorResult('Decomposition failed — task may be too simple for swarm mode');
|
|
226
332
|
}
|
|
333
|
+
// F5: Validate decomposition — check for cycles, invalid deps, granularity
|
|
334
|
+
const validation = validateDecomposition(decomposition);
|
|
335
|
+
if (validation.warnings.length > 0) {
|
|
336
|
+
this.logDecision('decomposition-validation', `Warnings: ${validation.warnings.join('; ')}`, '');
|
|
337
|
+
}
|
|
338
|
+
if (!validation.valid) {
|
|
339
|
+
this.logDecision('decomposition-validation', `Invalid decomposition: ${validation.issues.join('; ')}`, 'Retrying...');
|
|
340
|
+
// Retry decomposition once with feedback
|
|
341
|
+
decomposition = await this.decompose(`${task}\n\nIMPORTANT: Previous decomposition was invalid: ${validation.issues.join('. ')}. Fix these issues.`);
|
|
342
|
+
if (!decomposition) {
|
|
343
|
+
this.currentPhase = 'failed';
|
|
344
|
+
return this.buildErrorResult(`Decomposition validation failed: ${validation.issues.join('; ')}`);
|
|
345
|
+
}
|
|
346
|
+
const retryValidation = validateDecomposition(decomposition);
|
|
347
|
+
if (!retryValidation.valid) {
|
|
348
|
+
this.logDecision('decomposition-validation', `Retry still invalid: ${retryValidation.issues.join('; ')}`, 'Proceeding anyway');
|
|
349
|
+
}
|
|
350
|
+
}
|
|
227
351
|
// Phase 2: Schedule into waves
|
|
228
352
|
this.currentPhase = 'scheduling';
|
|
229
353
|
this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduling ${decomposition.subtasks.length} subtasks into waves...` });
|
|
230
354
|
this.taskQueue.loadFromDecomposition(decomposition, this.config);
|
|
355
|
+
// F3: Dynamic orchestrator reserve scaling based on subtask count.
|
|
356
|
+
// More subtasks = more quality gate calls, synthesis work, and review overhead.
|
|
357
|
+
// Formula: max(configured ratio, 5% per subtask), capped at 40%.
|
|
358
|
+
const subtaskCount = decomposition.subtasks.length;
|
|
359
|
+
const dynamicReserveRatio = Math.min(0.40, Math.max(this.config.orchestratorReserveRatio, subtaskCount * 0.05));
|
|
360
|
+
if (dynamicReserveRatio > this.config.orchestratorReserveRatio) {
|
|
361
|
+
this.logDecision('budget-scaling', `Scaled orchestrator reserve from ${(this.config.orchestratorReserveRatio * 100).toFixed(0)}% to ${(dynamicReserveRatio * 100).toFixed(0)}% for ${subtaskCount} subtasks`, '');
|
|
362
|
+
}
|
|
363
|
+
// Foundation task detection: tasks that are the sole dependency of 3+ downstream
|
|
364
|
+
// tasks are critical — if they fail, the entire swarm cascade-skips.
|
|
365
|
+
// Give them extra retries and timeout scaling.
|
|
366
|
+
this.detectFoundationTasks();
|
|
367
|
+
// D3/F1: Probe model capability before dispatch (default: true)
|
|
368
|
+
if (this.config.probeModels !== false) {
|
|
369
|
+
await this.probeModelCapability();
|
|
370
|
+
// F15/F23: Handle all-models-failed probe scenario
|
|
371
|
+
// Resolve strategy: explicit probeFailureStrategy > legacy ignoreProbeFailures > default 'warn-and-try'
|
|
372
|
+
const probeStrategy = this.config.probeFailureStrategy
|
|
373
|
+
?? (this.config.ignoreProbeFailures ? 'warn-and-try' : 'warn-and-try');
|
|
374
|
+
const uniqueModels = [...new Set(this.config.workers.map(w => w.model))];
|
|
375
|
+
const healthyModels = this.healthTracker.getHealthy(uniqueModels);
|
|
376
|
+
if (healthyModels.length === 0 && uniqueModels.length > 0) {
|
|
377
|
+
if (probeStrategy === 'abort') {
|
|
378
|
+
// Hard abort — no tasks dispatched
|
|
379
|
+
const reason = `All ${uniqueModels.length} worker model(s) failed capability probes — no model can make tool calls. Aborting swarm to prevent budget waste. Fix model configuration and retry.`;
|
|
380
|
+
this.logDecision('probe-abort', reason, `Models tested: ${uniqueModels.join(', ')}`);
|
|
381
|
+
this.emit({ type: 'swarm.abort', reason });
|
|
382
|
+
this.skipRemainingTasks(reason);
|
|
383
|
+
const totalTasks = this.taskQueue.getStats().total;
|
|
384
|
+
const abortStats = {
|
|
385
|
+
completedTasks: 0, failedTasks: 0, skippedTasks: totalTasks,
|
|
386
|
+
totalTasks, totalWaves: 0, totalTokens: 0, totalCost: 0,
|
|
387
|
+
totalDurationMs: Date.now() - this.startTime,
|
|
388
|
+
qualityRejections: 0, retries: 0,
|
|
389
|
+
modelUsage: new Map(),
|
|
390
|
+
};
|
|
391
|
+
this.emit({ type: 'swarm.complete', stats: abortStats, errors: this.errors });
|
|
392
|
+
return {
|
|
393
|
+
success: false, summary: reason,
|
|
394
|
+
tasks: this.taskQueue.getAllTasks(), stats: abortStats, errors: this.errors,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
// F23: warn-and-try — log warning, reset health, let real tasks prove capability
|
|
399
|
+
this.logDecision('probe-warning', `All ${uniqueModels.length} model(s) failed probe — continuing anyway (strategy: warn-and-try)`, 'Will abort after first real task failure if model cannot use tools');
|
|
400
|
+
// Reset health so dispatch doesn't skip all models
|
|
401
|
+
for (const model of uniqueModels) {
|
|
402
|
+
this.healthTracker.recordSuccess(model, 0);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
231
407
|
// Emit skip events when tasks are cascade-skipped due to dependency failures
|
|
232
408
|
this.taskQueue.setOnCascadeSkip((skippedTaskId, reason) => {
|
|
233
409
|
this.emit({ type: 'swarm.task.skipped', taskId: skippedTaskId, reason });
|
|
@@ -262,9 +438,14 @@ Rules:
|
|
|
262
438
|
// Phase 3: Execute waves (planning runs concurrently)
|
|
263
439
|
this.currentPhase = 'executing';
|
|
264
440
|
await this.executeWaves();
|
|
441
|
+
// V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
|
|
442
|
+
if (!this.cancelled)
|
|
443
|
+
await this.finalRescuePass();
|
|
265
444
|
// Ensure planning completed before verification/synthesis
|
|
266
445
|
if (planPromise)
|
|
267
446
|
await planPromise;
|
|
447
|
+
// Post-wave artifact audit: scan filesystem for files created by workers
|
|
448
|
+
this.artifactInventory = this.buildArtifactInventory();
|
|
268
449
|
// V2: Phase 3.5: Verify integration
|
|
269
450
|
if (this.config.enableVerification && this.plan?.integrationTestPlan) {
|
|
270
451
|
this.currentPhase = 'verifying';
|
|
@@ -280,10 +461,14 @@ Rules:
|
|
|
280
461
|
const executionStats = this.buildStats();
|
|
281
462
|
// V2: Final checkpoint
|
|
282
463
|
this.checkpoint('final');
|
|
283
|
-
|
|
464
|
+
const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
|
|
465
|
+
this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
|
|
284
466
|
return {
|
|
285
467
|
success: executionStats.completedTasks > 0,
|
|
468
|
+
partialSuccess: !executionStats.completedTasks && hasArtifacts,
|
|
469
|
+
partialFailure: executionStats.failedTasks > 0,
|
|
286
470
|
synthesisResult: synthesisResult ?? undefined,
|
|
471
|
+
artifactInventory: this.artifactInventory,
|
|
287
472
|
summary: this.buildSummary(executionStats),
|
|
288
473
|
tasks: this.taskQueue.getAllTasks(),
|
|
289
474
|
stats: executionStats,
|
|
@@ -383,6 +568,7 @@ Respond with valid JSON:
|
|
|
383
568
|
maxTokens: 3000,
|
|
384
569
|
temperature: 0.3,
|
|
385
570
|
});
|
|
571
|
+
this.trackOrchestratorUsage(response, 'plan');
|
|
386
572
|
const parsed = this.parseJSON(response.content);
|
|
387
573
|
if (parsed) {
|
|
388
574
|
this.plan = {
|
|
@@ -454,6 +640,7 @@ Respond with valid JSON:
|
|
|
454
640
|
},
|
|
455
641
|
{ role: 'user', content: `Review these wave ${waveIndex + 1} outputs:\n\n${taskSummaries}` },
|
|
456
642
|
], { model: reviewModel, maxTokens: 2000, temperature: 0.3 });
|
|
643
|
+
this.trackOrchestratorUsage(response, 'review');
|
|
457
644
|
const parsed = this.parseJSON(response.content);
|
|
458
645
|
if (!parsed)
|
|
459
646
|
return null;
|
|
@@ -578,6 +765,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
578
765
|
},
|
|
579
766
|
{ role: 'user', content: `Original task: ${task}\n\nFailed verifications:\n${failedSteps}` },
|
|
580
767
|
], { model: this.config.plannerModel ?? this.config.orchestratorModel, maxTokens: 1500, temperature: 0.3 });
|
|
768
|
+
this.trackOrchestratorUsage(response, 'verification-fixup');
|
|
581
769
|
const parsed = this.parseJSON(response.content);
|
|
582
770
|
if (parsed?.fixups && parsed.fixups.length > 0) {
|
|
583
771
|
const fixupTasks = parsed.fixups.map((f, i) => ({
|
|
@@ -628,6 +816,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
628
816
|
this.logDecision('resume', `Resuming from wave ${checkpoint.currentWave}`, `Session: ${checkpoint.sessionId}`);
|
|
629
817
|
this.emit({ type: 'swarm.state.resume', sessionId: checkpoint.sessionId, fromWave: checkpoint.currentWave });
|
|
630
818
|
// Restore state
|
|
819
|
+
if (checkpoint.originalPrompt)
|
|
820
|
+
this.originalPrompt = checkpoint.originalPrompt;
|
|
631
821
|
if (checkpoint.plan)
|
|
632
822
|
this.plan = checkpoint.plan;
|
|
633
823
|
if (checkpoint.modelHealth.length > 0)
|
|
@@ -657,9 +847,48 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
657
847
|
if (resetCount > 0) {
|
|
658
848
|
this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
|
|
659
849
|
}
|
|
850
|
+
// Reset skipped tasks whose dependencies are now satisfied
|
|
851
|
+
let unskippedCount = 0;
|
|
852
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
853
|
+
if (task.status === 'skipped') {
|
|
854
|
+
const deps = task.dependencies.map(id => this.taskQueue.getTask(id));
|
|
855
|
+
const allDepsSatisfied = deps.every(d => d && (d.status === 'completed' || d.status === 'decomposed'));
|
|
856
|
+
if (allDepsSatisfied) {
|
|
857
|
+
task.status = 'ready';
|
|
858
|
+
task.attempts = 0;
|
|
859
|
+
task.rescueContext = 'Recovered on resume — dependencies now satisfied';
|
|
860
|
+
unskippedCount++;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
// Also reset failed tasks that have retry budget
|
|
865
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
866
|
+
if (task.status === 'failed') {
|
|
867
|
+
task.status = 'ready';
|
|
868
|
+
task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
|
|
869
|
+
unskippedCount++;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
if (unskippedCount > 0) {
|
|
873
|
+
this.logDecision('resume', `Recovered ${unskippedCount} skipped/failed tasks`, 'Fresh retry on resume');
|
|
874
|
+
}
|
|
875
|
+
// If many tasks are still stuck after un-skip, trigger re-plan
|
|
876
|
+
const resumeStats = this.taskQueue.getStats();
|
|
877
|
+
const stuckCount = resumeStats.failed + resumeStats.skipped;
|
|
878
|
+
const totalAttempted = resumeStats.completed + stuckCount;
|
|
879
|
+
if (totalAttempted > 0 && stuckCount / totalAttempted > 0.4) {
|
|
880
|
+
this.logDecision('resume-replan', `${stuckCount}/${totalAttempted} tasks still stuck after resume — triggering re-plan`, '');
|
|
881
|
+
this.hasReplanned = false; // Allow re-plan on resume
|
|
882
|
+
await this.midSwarmReplan();
|
|
883
|
+
}
|
|
660
884
|
// Continue from where we left off
|
|
661
885
|
this.currentPhase = 'executing';
|
|
662
886
|
await this.executeWaves();
|
|
887
|
+
// V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
|
|
888
|
+
if (!this.cancelled)
|
|
889
|
+
await this.finalRescuePass();
|
|
890
|
+
// Post-wave artifact audit
|
|
891
|
+
this.artifactInventory = this.buildArtifactInventory();
|
|
663
892
|
// Continue with verification and synthesis as normal
|
|
664
893
|
if (this.config.enableVerification && this.plan?.integrationTestPlan) {
|
|
665
894
|
this.currentPhase = 'verifying';
|
|
@@ -673,10 +902,14 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
673
902
|
this.currentPhase = 'completed';
|
|
674
903
|
const executionStats = this.buildStats();
|
|
675
904
|
this.checkpoint('final');
|
|
676
|
-
|
|
905
|
+
const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
|
|
906
|
+
this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
|
|
677
907
|
return {
|
|
678
908
|
success: executionStats.completedTasks > 0,
|
|
909
|
+
partialSuccess: !executionStats.completedTasks && hasArtifacts,
|
|
910
|
+
partialFailure: executionStats.failedTasks > 0,
|
|
679
911
|
synthesisResult: synthesisResult ?? undefined,
|
|
912
|
+
artifactInventory: this.artifactInventory,
|
|
680
913
|
summary: this.buildSummary(executionStats),
|
|
681
914
|
tasks: this.taskQueue.getAllTasks(),
|
|
682
915
|
stats: executionStats,
|
|
@@ -693,6 +926,13 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
693
926
|
while (waveIndex < totalWaves && !this.cancelled) {
|
|
694
927
|
const readyTasks = this.taskQueue.getReadyTasks();
|
|
695
928
|
const queueStats = this.taskQueue.getStats();
|
|
929
|
+
// F18: Skip empty waves — if no tasks are ready and none are running,
|
|
930
|
+
// remaining tasks are all blocked/failed/skipped. Break instead of
|
|
931
|
+
// running useless review cycles.
|
|
932
|
+
if (readyTasks.length === 0 && queueStats.running === 0 && queueStats.ready === 0) {
|
|
933
|
+
this.logDecision('wave-skip', `Skipping waves ${waveIndex + 1}-${totalWaves}: no dispatchable tasks remain`, `Stats: ${queueStats.completed} completed, ${queueStats.failed} failed, ${queueStats.skipped} skipped`);
|
|
934
|
+
break;
|
|
935
|
+
}
|
|
696
936
|
this.emit({
|
|
697
937
|
type: 'swarm.wave.start',
|
|
698
938
|
wave: waveIndex + 1,
|
|
@@ -734,6 +974,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
734
974
|
previousFeedback: 'All tasks in this batch failed. Try a fundamentally different approach — the previous strategy did not work.',
|
|
735
975
|
previousScore: 0,
|
|
736
976
|
attempt: task.attempts,
|
|
977
|
+
previousModel: task.assignedModel,
|
|
978
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
737
979
|
};
|
|
738
980
|
}
|
|
739
981
|
this.logDecision('wave-recovery', `Re-queued ${failedWaveTasks.length} tasks with adapted retry context`, 'Budget allows retry');
|
|
@@ -741,21 +983,46 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
741
983
|
await this.executeWave(failedWaveTasks.map(t => this.taskQueue.getTask(t.id)).filter(t => t.status === 'ready'));
|
|
742
984
|
}
|
|
743
985
|
}
|
|
986
|
+
// F5: Adaptive re-decomposition — if < 50% of wave tasks succeeded,
|
|
987
|
+
// the decomposition may be structurally flawed. Log for observability.
|
|
988
|
+
// (Full re-decomposition of remaining work would require re-architecting the queue,
|
|
989
|
+
// so we log the signal and let wave retry + fixup handle recovery.)
|
|
990
|
+
const waveTotal = waveCompleted + waveFailed + waveSkipped;
|
|
991
|
+
const waveSuccessRate = waveTotal > 0 ? waveCompleted / waveTotal : 0;
|
|
992
|
+
if (waveSuccessRate < 0.5 && waveTotal >= 2) {
|
|
993
|
+
this.logDecision('decomposition-quality', `Wave ${waveIndex + 1} success rate ${(waveSuccessRate * 100).toFixed(0)}% (${waveCompleted}/${waveTotal})`, 'Low success rate may indicate decomposition quality issues');
|
|
994
|
+
}
|
|
744
995
|
// V2: Review wave outputs
|
|
745
996
|
const review = await this.reviewWave(waveIndex);
|
|
746
997
|
if (review && review.fixupTasks.length > 0) {
|
|
747
998
|
// Execute fix-up tasks immediately
|
|
748
999
|
await this.executeWave(review.fixupTasks);
|
|
749
1000
|
}
|
|
1001
|
+
// Rescue cascade-skipped tasks that can still run
|
|
1002
|
+
// (after wave review + fixup, some skipped tasks may now be viable)
|
|
1003
|
+
const rescued = this.rescueCascadeSkipped();
|
|
1004
|
+
if (rescued.length > 0) {
|
|
1005
|
+
this.logDecision('cascade-rescue', `Rescued ${rescued.length} cascade-skipped tasks after wave ${waveIndex + 1}`, rescued.map(t => t.id).join(', '));
|
|
1006
|
+
await this.executeWave(rescued);
|
|
1007
|
+
}
|
|
750
1008
|
// Reset quality circuit breaker at wave boundary — each wave gets a fresh chance.
|
|
751
1009
|
// Within a wave, rejections accumulate properly so the breaker can trip.
|
|
752
1010
|
// Between waves, we reset so each wave gets a fresh quality evaluation window.
|
|
753
1011
|
// (The within-wave reset at quality-gate-passed is kept — that's correct.)
|
|
754
|
-
if (this.
|
|
755
|
-
this.
|
|
756
|
-
this.
|
|
757
|
-
this.logDecision('quality-circuit-breaker', `Re-enabled quality gates at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
|
|
1012
|
+
if (this.qualityGateDisabledModels.size > 0) {
|
|
1013
|
+
this.qualityGateDisabledModels.clear();
|
|
1014
|
+
this.perModelQualityRejections.clear();
|
|
1015
|
+
this.logDecision('quality-circuit-breaker', `Re-enabled quality gates for all models at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
|
|
758
1016
|
}
|
|
1017
|
+
// F3: Log budget reallocation after wave completion.
|
|
1018
|
+
// SharedBudgetPool already returns unused tokens via release(), but we log it
|
|
1019
|
+
// for observability so operators can see how budget flows between waves.
|
|
1020
|
+
const budgetStats = this.budgetPool.getStats();
|
|
1021
|
+
this.logDecision('budget-reallocation', `After wave ${waveIndex + 1}: ${budgetStats.tokensRemaining} tokens remaining (${(budgetStats.utilization * 100).toFixed(0)}% utilized)`, '');
|
|
1022
|
+
this.budgetPool.reallocateUnused(budgetStats.tokensRemaining);
|
|
1023
|
+
// F21: Mid-swarm situational assessment — evaluate success rate and budget health,
|
|
1024
|
+
// optionally triage low-priority tasks to conserve budget for critical path.
|
|
1025
|
+
await this.assessAndAdapt(waveIndex);
|
|
759
1026
|
// V2: Checkpoint after each wave
|
|
760
1027
|
this.checkpoint(`wave-${waveIndex}`);
|
|
761
1028
|
// Advance to next wave
|
|
@@ -783,7 +1050,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
783
1050
|
taskIndex++;
|
|
784
1051
|
// Stagger dispatches to avoid rate limit storms
|
|
785
1052
|
if (taskIndex < tasks.length && this.workerPool.availableSlots > 0) {
|
|
786
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1053
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
787
1054
|
}
|
|
788
1055
|
}
|
|
789
1056
|
// Process completions and dispatch more tasks as slots open
|
|
@@ -804,7 +1071,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
804
1071
|
await this.dispatchTask(task);
|
|
805
1072
|
// Stagger dispatches to avoid rate limit storms
|
|
806
1073
|
if (taskIndex + 1 < tasks.length && this.workerPool.availableSlots > 0) {
|
|
807
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1074
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
808
1075
|
}
|
|
809
1076
|
}
|
|
810
1077
|
taskIndex++;
|
|
@@ -819,11 +1086,38 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
819
1086
|
await this.dispatchTask(moreReady[i]);
|
|
820
1087
|
// Stagger dispatches to avoid rate limit storms
|
|
821
1088
|
if (i + 1 < moreReady.length && this.workerPool.availableSlots > 0) {
|
|
822
|
-
await new Promise(resolve => setTimeout(resolve, this.
|
|
1089
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
823
1090
|
}
|
|
824
1091
|
}
|
|
825
1092
|
}
|
|
826
1093
|
}
|
|
1094
|
+
// F20: Re-dispatch pass — after all workers finish, budget may have been freed
|
|
1095
|
+
// by completed tasks. Try to dispatch any still-ready tasks (e.g., those paused
|
|
1096
|
+
// by budget exhaustion earlier).
|
|
1097
|
+
if (!this.cancelled && this.budgetPool.hasCapacity()) {
|
|
1098
|
+
const stillReady = this.taskQueue.getAllReadyTasks()
|
|
1099
|
+
.filter(t => !this.workerPool.getActiveWorkerStatus().some(w => w.taskId === t.id));
|
|
1100
|
+
if (stillReady.length > 0) {
|
|
1101
|
+
this.logDecision('budget-redispatch', `Budget freed after wave — re-dispatching ${stillReady.length} ready task(s)`, `Budget: ${JSON.stringify(this.budgetPool.getStats())}`);
|
|
1102
|
+
for (const task of stillReady) {
|
|
1103
|
+
if (this.workerPool.availableSlots <= 0 || !this.budgetPool.hasCapacity())
|
|
1104
|
+
break;
|
|
1105
|
+
await this.dispatchTask(task);
|
|
1106
|
+
if (this.workerPool.availableSlots > 0) {
|
|
1107
|
+
await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
// Wait for these re-dispatched tasks to complete
|
|
1111
|
+
while (this.workerPool.activeCount > 0 && !this.cancelled) {
|
|
1112
|
+
const completed = await this.workerPool.waitForAny();
|
|
1113
|
+
if (!completed)
|
|
1114
|
+
break;
|
|
1115
|
+
await this.handleTaskCompletion(completed.taskId, completed.result, completed.startedAt);
|
|
1116
|
+
this.emitBudgetUpdate();
|
|
1117
|
+
this.emitStatusUpdate();
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
827
1121
|
}
|
|
828
1122
|
/**
|
|
829
1123
|
* Dispatch a single task to a worker.
|
|
@@ -833,45 +1127,111 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
833
1127
|
const worker = this.workerPool.selectWorker(task);
|
|
834
1128
|
if (!worker) {
|
|
835
1129
|
// M2: Emit error and mark task failed instead of silently returning
|
|
836
|
-
|
|
1130
|
+
// V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
|
|
1131
|
+
this.logDecision('no-worker', `${task.id}: no worker for type ${task.type}`, '');
|
|
1132
|
+
if (task.attempts > 0) {
|
|
1133
|
+
const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
|
|
1134
|
+
const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
|
|
1135
|
+
if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
|
|
1136
|
+
return;
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
this.taskQueue.markFailedWithoutCascade(task.id, 0);
|
|
1140
|
+
this.taskQueue.triggerCascadeSkip(task.id);
|
|
837
1141
|
this.emit({
|
|
838
1142
|
type: 'swarm.task.failed',
|
|
839
1143
|
taskId: task.id,
|
|
840
1144
|
error: `No worker available for task type: ${task.type}`,
|
|
841
|
-
attempt:
|
|
1145
|
+
attempt: task.attempts,
|
|
842
1146
|
maxAttempts: 0,
|
|
843
1147
|
willRetry: false,
|
|
1148
|
+
failureMode: 'error',
|
|
844
1149
|
});
|
|
845
1150
|
return;
|
|
846
1151
|
}
|
|
847
1152
|
try {
|
|
848
|
-
|
|
1153
|
+
// Pre-dispatch auto-split for critical-path bottlenecks
|
|
1154
|
+
if (this.shouldAutoSplit(task)) {
|
|
1155
|
+
try {
|
|
1156
|
+
const splitResult = await this.judgeSplit(task);
|
|
1157
|
+
if (splitResult.shouldSplit && splitResult.subtasks) {
|
|
1158
|
+
task.status = 'dispatched'; // Required for replaceWithSubtasks
|
|
1159
|
+
this.taskQueue.replaceWithSubtasks(task.id, splitResult.subtasks);
|
|
1160
|
+
this.emit({
|
|
1161
|
+
type: 'swarm.task.resilience',
|
|
1162
|
+
taskId: task.id,
|
|
1163
|
+
strategy: 'auto-split',
|
|
1164
|
+
succeeded: true,
|
|
1165
|
+
reason: `Pre-dispatch split into ${splitResult.subtasks.length} parallel subtasks`,
|
|
1166
|
+
artifactsFound: 0,
|
|
1167
|
+
toolCalls: 0,
|
|
1168
|
+
});
|
|
1169
|
+
return; // Subtasks now in queue, will be dispatched this wave
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
catch (err) {
|
|
1173
|
+
this.logDecision('auto-split', `${task.id}: split judge failed — ${err.message}`, '');
|
|
1174
|
+
// Fall through to normal dispatch
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
this.totalDispatches++;
|
|
1178
|
+
const dispatchedModel = task.assignedModel ?? worker.model;
|
|
1179
|
+
this.taskQueue.markDispatched(task.id, dispatchedModel);
|
|
1180
|
+
if (task.assignedModel && task.assignedModel !== worker.model) {
|
|
1181
|
+
this.logDecision('failover', `Dispatching ${task.id} with failover model ${task.assignedModel} (worker default: ${worker.model})`, 'Retry model override is active');
|
|
1182
|
+
}
|
|
849
1183
|
// Pass the pre-selected worker to avoid double-selection in dispatch()
|
|
850
1184
|
await this.workerPool.dispatch(task, worker);
|
|
851
1185
|
this.emit({
|
|
852
1186
|
type: 'swarm.task.dispatched',
|
|
853
1187
|
taskId: task.id,
|
|
854
1188
|
description: task.description,
|
|
855
|
-
model:
|
|
1189
|
+
model: dispatchedModel,
|
|
856
1190
|
workerName: worker.name,
|
|
1191
|
+
toolCount: worker.allowedTools?.length ?? -1, // -1 = all tools
|
|
1192
|
+
tools: worker.allowedTools,
|
|
1193
|
+
retryContext: task.retryContext,
|
|
1194
|
+
fromModel: task.retryContext ? task.retryContext.previousModel : undefined,
|
|
1195
|
+
attempts: task.attempts,
|
|
857
1196
|
});
|
|
858
1197
|
}
|
|
859
1198
|
catch (error) {
|
|
1199
|
+
const errorMsg = error.message;
|
|
1200
|
+
// F20: Budget exhaustion is NOT a task failure — the task is fine, we just ran out of money.
|
|
1201
|
+
// Reset status to ready so it can be picked up if budget becomes available
|
|
1202
|
+
// (e.g., after tokens are released from completing tasks).
|
|
1203
|
+
if (errorMsg.includes('Budget pool exhausted')) {
|
|
1204
|
+
task.status = 'ready';
|
|
1205
|
+
this.logDecision('budget-pause', `Cannot dispatch ${task.id}: budget exhausted — task kept ready for potential re-dispatch`, `Budget stats: ${JSON.stringify(this.budgetPool.getStats())}`);
|
|
1206
|
+
return;
|
|
1207
|
+
}
|
|
860
1208
|
this.errors.push({
|
|
861
1209
|
taskId: task.id,
|
|
862
1210
|
phase: 'dispatch',
|
|
863
|
-
message:
|
|
1211
|
+
message: errorMsg,
|
|
864
1212
|
recovered: false,
|
|
865
1213
|
});
|
|
1214
|
+
this.logDecision('dispatch-error', `${task.id}: dispatch failed: ${errorMsg.slice(0, 100)}`, `attempts: ${task.attempts}`);
|
|
1215
|
+
// V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
|
|
1216
|
+
if (task.attempts > 0) {
|
|
1217
|
+
const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
|
|
1218
|
+
const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
|
|
1219
|
+
if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
|
|
1220
|
+
this.errors[this.errors.length - 1].recovered = true;
|
|
1221
|
+
return;
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
this.taskQueue.markFailedWithoutCascade(task.id, 0);
|
|
1225
|
+
this.taskQueue.triggerCascadeSkip(task.id);
|
|
866
1226
|
this.emit({
|
|
867
1227
|
type: 'swarm.task.failed',
|
|
868
1228
|
taskId: task.id,
|
|
869
|
-
error:
|
|
1229
|
+
error: errorMsg,
|
|
870
1230
|
attempt: task.attempts,
|
|
871
1231
|
maxAttempts: 1 + this.config.workerRetries,
|
|
872
1232
|
willRetry: false,
|
|
1233
|
+
failureMode: 'error',
|
|
873
1234
|
});
|
|
874
|
-
this.taskQueue.markFailed(task.id, 0);
|
|
875
1235
|
}
|
|
876
1236
|
}
|
|
877
1237
|
/**
|
|
@@ -881,9 +1241,36 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
881
1241
|
const task = this.taskQueue.getTask(taskId);
|
|
882
1242
|
if (!task)
|
|
883
1243
|
return;
|
|
884
|
-
// Guard: task was
|
|
885
|
-
if
|
|
1244
|
+
// Guard: task was terminally resolved while its worker was running — ignore the result
|
|
1245
|
+
// F4: But NOT if pendingCascadeSkip — those results are evaluated below
|
|
1246
|
+
if ((task.status === 'skipped' || task.status === 'failed') && !task.pendingCascadeSkip)
|
|
886
1247
|
return;
|
|
1248
|
+
// V7: Global dispatch cap — prevent any single task from burning budget.
|
|
1249
|
+
// Try resilience recovery (micro-decompose, degraded acceptance) before hard-failing.
|
|
1250
|
+
const maxDispatches = this.config.maxDispatchesPerTask ?? 5;
|
|
1251
|
+
if (task.attempts >= maxDispatches) {
|
|
1252
|
+
const durationMs = Date.now() - startedAt;
|
|
1253
|
+
const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
|
|
1254
|
+
this.totalTokens += taskResult.tokensUsed;
|
|
1255
|
+
this.totalCost += taskResult.costUsed;
|
|
1256
|
+
// Try resilience recovery before hard fail
|
|
1257
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1258
|
+
return;
|
|
1259
|
+
}
|
|
1260
|
+
this.taskQueue.markFailedWithoutCascade(taskId, 0);
|
|
1261
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1262
|
+
this.emit({
|
|
1263
|
+
type: 'swarm.task.failed',
|
|
1264
|
+
taskId,
|
|
1265
|
+
error: `Dispatch cap reached (${maxDispatches} attempts)`,
|
|
1266
|
+
attempt: task.attempts,
|
|
1267
|
+
maxAttempts: maxDispatches,
|
|
1268
|
+
willRetry: false,
|
|
1269
|
+
failureMode: task.failureMode,
|
|
1270
|
+
});
|
|
1271
|
+
this.logDecision('dispatch-cap', `${taskId}: hard cap reached (${task.attempts}/${maxDispatches})`, 'No more retries — resilience recovery also failed');
|
|
1272
|
+
return;
|
|
1273
|
+
}
|
|
887
1274
|
const durationMs = Date.now() - startedAt;
|
|
888
1275
|
const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
|
|
889
1276
|
// Track model usage
|
|
@@ -895,21 +1282,94 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
895
1282
|
this.modelUsage.set(model, usage);
|
|
896
1283
|
this.totalTokens += taskResult.tokensUsed;
|
|
897
1284
|
this.totalCost += taskResult.costUsed;
|
|
1285
|
+
// V10: Emit per-attempt event for full decision traceability
|
|
1286
|
+
this.emit({
|
|
1287
|
+
type: 'swarm.task.attempt',
|
|
1288
|
+
taskId,
|
|
1289
|
+
attempt: task.attempts,
|
|
1290
|
+
model,
|
|
1291
|
+
success: spawnResult.success,
|
|
1292
|
+
durationMs,
|
|
1293
|
+
toolCalls: spawnResult.metrics.toolCalls ?? 0,
|
|
1294
|
+
failureMode: !spawnResult.success ? task.failureMode : undefined,
|
|
1295
|
+
qualityScore: taskResult.qualityScore,
|
|
1296
|
+
output: taskResult.output.slice(0, 500),
|
|
1297
|
+
});
|
|
898
1298
|
if (!spawnResult.success) {
|
|
899
1299
|
// V2: Record model health
|
|
900
1300
|
const errorMsg = spawnResult.output.toLowerCase();
|
|
901
1301
|
const is429 = errorMsg.includes('429') || errorMsg.includes('rate');
|
|
902
1302
|
const is402 = errorMsg.includes('402') || errorMsg.includes('spend limit');
|
|
903
|
-
const
|
|
1303
|
+
const isTimeout = spawnResult.metrics.toolCalls === -1;
|
|
1304
|
+
// F25: Use 'timeout' errorType for timeouts (was 'error')
|
|
1305
|
+
const errorType = is429 ? '429' : is402 ? '402' : isTimeout ? 'timeout' : 'error';
|
|
904
1306
|
this.healthTracker.recordFailure(model, errorType);
|
|
905
1307
|
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1308
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1309
|
+
task.failureMode = (is429 || is402) ? 'rate-limit' : (spawnResult.metrics.toolCalls === -1 ? 'timeout' : 'error');
|
|
906
1310
|
// Feed circuit breaker
|
|
907
1311
|
if (is429 || is402) {
|
|
908
1312
|
this.recordRateLimit();
|
|
909
1313
|
}
|
|
1314
|
+
// F25a: Consecutive timeout tracking — early-fail after N consecutive timeouts
|
|
1315
|
+
if (isTimeout) {
|
|
1316
|
+
const count = (this.taskTimeoutCounts.get(taskId) ?? 0) + 1;
|
|
1317
|
+
this.taskTimeoutCounts.set(taskId, count);
|
|
1318
|
+
const timeoutLimit = this.config.consecutiveTimeoutLimit ?? 3;
|
|
1319
|
+
this.logDecision('timeout-tracking', `${taskId}: consecutive timeout ${count}/${timeoutLimit}`, '');
|
|
1320
|
+
if (count >= timeoutLimit) {
|
|
1321
|
+
// F25b: Try model failover before giving up
|
|
1322
|
+
let failoverSucceeded = false;
|
|
1323
|
+
if (this.config.enableModelFailover) {
|
|
1324
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
1325
|
+
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
1326
|
+
if (alternative) {
|
|
1327
|
+
this.emit({
|
|
1328
|
+
type: 'swarm.model.failover',
|
|
1329
|
+
taskId,
|
|
1330
|
+
fromModel: model,
|
|
1331
|
+
toModel: alternative.model,
|
|
1332
|
+
reason: 'consecutive-timeouts',
|
|
1333
|
+
});
|
|
1334
|
+
task.assignedModel = alternative.model;
|
|
1335
|
+
this.taskTimeoutCounts.set(taskId, 0); // Reset counter for new model
|
|
1336
|
+
this.logDecision('failover', `Timeout failover ${taskId}: ${model} → ${alternative.model}`, `${count} consecutive timeouts`);
|
|
1337
|
+
failoverSucceeded = true;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
if (!failoverSucceeded) {
|
|
1341
|
+
// No alternative model — try resilience recovery before hard fail.
|
|
1342
|
+
// Timeouts often produce artifacts (worker WAS working, just ran out of time).
|
|
1343
|
+
task.failureMode = 'timeout';
|
|
1344
|
+
const taskResult = this.workerPool.toTaskResult(spawnResult, task, Date.now() - startedAt);
|
|
1345
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1346
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1347
|
+
return;
|
|
1348
|
+
}
|
|
1349
|
+
this.taskQueue.markFailedWithoutCascade(taskId, 0);
|
|
1350
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1351
|
+
this.emit({
|
|
1352
|
+
type: 'swarm.task.failed',
|
|
1353
|
+
taskId,
|
|
1354
|
+
error: `${count} consecutive timeouts — no alternative model available`,
|
|
1355
|
+
attempt: task.attempts,
|
|
1356
|
+
maxAttempts: maxDispatches,
|
|
1357
|
+
willRetry: false,
|
|
1358
|
+
failureMode: 'timeout',
|
|
1359
|
+
});
|
|
1360
|
+
this.logDecision('timeout-early-fail', `${taskId}: ${count} consecutive timeouts, no alt model — resilience recovery also failed`, '');
|
|
1361
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1362
|
+
return;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
else {
|
|
1367
|
+
// Non-timeout failure — reset the counter
|
|
1368
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1369
|
+
}
|
|
910
1370
|
// V2: Model failover on rate limits
|
|
911
1371
|
if ((is429 || is402) && this.config.enableModelFailover) {
|
|
912
|
-
const capability =
|
|
1372
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
913
1373
|
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
914
1374
|
if (alternative) {
|
|
915
1375
|
this.emit({
|
|
@@ -926,21 +1386,27 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
926
1386
|
// V5/V7: Store error context so retry gets different prompt
|
|
927
1387
|
if (!(is429 || is402)) {
|
|
928
1388
|
// V7: Timeout-specific feedback — the worker WAS working, just ran out of time
|
|
929
|
-
const isTimeout = spawnResult.metrics.toolCalls === -1;
|
|
930
1389
|
const timeoutSeconds = isTimeout ? Math.round(durationMs / 1000) : 0;
|
|
931
1390
|
task.retryContext = {
|
|
932
1391
|
previousFeedback: isTimeout
|
|
933
1392
|
? `Previous attempt timed out after ${timeoutSeconds}s. You must complete this task more efficiently — work faster, use fewer tool calls, and produce your result sooner.`
|
|
934
|
-
: spawnResult.output.slice(0,
|
|
1393
|
+
: spawnResult.output.slice(0, 2000),
|
|
935
1394
|
previousScore: 0,
|
|
936
1395
|
attempt: task.attempts,
|
|
1396
|
+
previousModel: model,
|
|
1397
|
+
previousFiles: taskResult.filesModified,
|
|
1398
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
937
1399
|
};
|
|
938
1400
|
}
|
|
939
|
-
//
|
|
1401
|
+
// V7: Reset hollow streak on non-hollow failure (error is not a hollow completion)
|
|
1402
|
+
this.hollowStreak = 0;
|
|
1403
|
+
// Worker failed — use higher retry limit for rate limit errors.
|
|
1404
|
+
// V7: Fixup tasks get capped retries, foundation tasks get +1.
|
|
1405
|
+
const baseRetries = this.getEffectiveRetries(task);
|
|
940
1406
|
const retryLimit = (is429 || is402)
|
|
941
|
-
? (this.config.rateLimitRetries ?? 3)
|
|
942
|
-
:
|
|
943
|
-
const canRetry = this.taskQueue.
|
|
1407
|
+
? Math.min(this.config.rateLimitRetries ?? 3, baseRetries + 1)
|
|
1408
|
+
: baseRetries;
|
|
1409
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, retryLimit);
|
|
944
1410
|
if (canRetry) {
|
|
945
1411
|
this.retries++;
|
|
946
1412
|
// Non-blocking cooldown: set retryAfter timestamp instead of blocking
|
|
@@ -948,8 +1414,21 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
948
1414
|
const baseDelay = this.config.retryBaseDelayMs ?? 5000;
|
|
949
1415
|
const cooldownMs = Math.min(baseDelay * Math.pow(2, task.attempts - 1), 30000);
|
|
950
1416
|
this.taskQueue.setRetryAfter(taskId, cooldownMs);
|
|
1417
|
+
this.logDecision('rate-limit-cooldown', `${taskId}: ${errorType} cooldown ${cooldownMs}ms, model ${model}`, '');
|
|
951
1418
|
}
|
|
952
1419
|
}
|
|
1420
|
+
else if (!(is429 || is402)) {
|
|
1421
|
+
// Resilience recovery for non-rate-limit errors (micro-decompose + degraded acceptance)
|
|
1422
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1423
|
+
return;
|
|
1424
|
+
}
|
|
1425
|
+
// Recovery failed — NOW trigger cascade
|
|
1426
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1427
|
+
}
|
|
1428
|
+
else {
|
|
1429
|
+
// Rate-limit exhaustion — trigger cascade
|
|
1430
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1431
|
+
}
|
|
953
1432
|
this.emit({
|
|
954
1433
|
type: 'swarm.task.failed',
|
|
955
1434
|
taskId,
|
|
@@ -957,23 +1436,43 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
957
1436
|
attempt: task.attempts,
|
|
958
1437
|
maxAttempts: 1 + this.config.workerRetries,
|
|
959
1438
|
willRetry: canRetry,
|
|
1439
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
1440
|
+
failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
|
|
1441
|
+
failureMode: task.failureMode,
|
|
960
1442
|
});
|
|
961
1443
|
return;
|
|
962
1444
|
}
|
|
963
1445
|
// V6: Hollow completion detection — workers that "succeed" without doing any work
|
|
964
1446
|
// Must check BEFORE recording success, otherwise hollow completions inflate health scores
|
|
965
|
-
if (isHollowCompletion(spawnResult)) {
|
|
966
|
-
//
|
|
967
|
-
|
|
968
|
-
|
|
1447
|
+
if (isHollowCompletion(spawnResult, task.type, this.config)) {
|
|
1448
|
+
// F4: Hollow result + pendingCascadeSkip — honor the skip immediately, no retry
|
|
1449
|
+
if (task.pendingCascadeSkip) {
|
|
1450
|
+
task.pendingCascadeSkip = undefined;
|
|
1451
|
+
task.status = 'skipped';
|
|
1452
|
+
this.totalHollows++;
|
|
1453
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (hollow completion)`, '');
|
|
1454
|
+
this.emit({ type: 'swarm.task.skipped', taskId, reason: 'cascade skip honored — hollow completion' });
|
|
1455
|
+
return;
|
|
1456
|
+
}
|
|
1457
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1458
|
+
task.failureMode = 'hollow';
|
|
1459
|
+
// Record hollow completion so hollow-prone models accumulate hollow-specific records
|
|
1460
|
+
// and get deprioritized by the model selector (also records generic failure internally)
|
|
1461
|
+
this.healthTracker.recordHollow(model);
|
|
1462
|
+
const admitsFailure = spawnResult.success && FAILURE_INDICATORS.some(f => (spawnResult.output ?? '').toLowerCase().includes(f));
|
|
969
1463
|
task.retryContext = {
|
|
970
|
-
previousFeedback:
|
|
1464
|
+
previousFeedback: admitsFailure
|
|
1465
|
+
? 'Previous attempt reported success but admitted failure (e.g., "budget exhausted", "unable to complete"). You MUST execute tool calls and produce concrete output this time.'
|
|
1466
|
+
: 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
|
|
971
1467
|
previousScore: 1,
|
|
972
1468
|
attempt: task.attempts,
|
|
1469
|
+
previousModel: model,
|
|
1470
|
+
previousFiles: taskResult.filesModified,
|
|
1471
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
973
1472
|
};
|
|
974
1473
|
// Model failover for hollow completions — same pattern as quality failover
|
|
975
1474
|
if (this.config.enableModelFailover) {
|
|
976
|
-
const capability =
|
|
1475
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
977
1476
|
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
978
1477
|
if (alternative) {
|
|
979
1478
|
this.emit({
|
|
@@ -987,9 +1486,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
987
1486
|
this.logDecision('failover', `Hollow failover ${taskId}: ${model} → ${alternative.model}`, 'Model produced hollow completion');
|
|
988
1487
|
}
|
|
989
1488
|
}
|
|
990
|
-
const
|
|
991
|
-
|
|
1489
|
+
const hollowRetries = this.getEffectiveRetries(task);
|
|
1490
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, hollowRetries);
|
|
1491
|
+
if (canRetry) {
|
|
992
1492
|
this.retries++;
|
|
1493
|
+
}
|
|
1494
|
+
else {
|
|
1495
|
+
// Retries exhausted — try shared resilience recovery (micro-decompose, degraded acceptance)
|
|
1496
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1497
|
+
return;
|
|
1498
|
+
}
|
|
1499
|
+
// Recovery failed — NOW trigger cascade
|
|
1500
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1501
|
+
}
|
|
993
1502
|
this.emit({
|
|
994
1503
|
type: 'swarm.task.failed',
|
|
995
1504
|
taskId,
|
|
@@ -997,21 +1506,83 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
997
1506
|
attempt: task.attempts,
|
|
998
1507
|
maxAttempts: 1 + this.config.workerRetries,
|
|
999
1508
|
willRetry: canRetry,
|
|
1509
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
1510
|
+
failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
|
|
1511
|
+
failureMode: 'hollow',
|
|
1000
1512
|
});
|
|
1001
|
-
this.
|
|
1513
|
+
this.hollowStreak++;
|
|
1514
|
+
this.totalHollows++;
|
|
1515
|
+
this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls (streak: ${this.hollowStreak}, total hollows: ${this.totalHollows}/${this.totalDispatches})`, canRetry ? 'Marking as failed for retry' : 'Retries exhausted — hard fail');
|
|
1516
|
+
// B2: Hollow streak handling — only terminate if enableHollowTermination is explicitly on
|
|
1517
|
+
if (this.hollowStreak >= SwarmOrchestrator.HOLLOW_STREAK_THRESHOLD) {
|
|
1518
|
+
const uniqueModels = new Set(this.config.workers.map(w => w.model));
|
|
1519
|
+
const singleModel = uniqueModels.size === 1;
|
|
1520
|
+
const onlyModel = [...uniqueModels][0];
|
|
1521
|
+
const modelUnhealthy = singleModel && !this.healthTracker.getAllRecords().find(r => r.model === onlyModel)?.healthy;
|
|
1522
|
+
if (singleModel && modelUnhealthy) {
|
|
1523
|
+
if (this.config.enableHollowTermination) {
|
|
1524
|
+
this.logDecision('early-termination', `Terminating swarm: ${this.hollowStreak} consecutive hollow completions on sole model ${onlyModel}`, 'Single-model swarm with unhealthy model — enableHollowTermination is on');
|
|
1525
|
+
this.skipRemainingTasks(`Single-model hollow streak (${this.hollowStreak}x on ${onlyModel})`);
|
|
1526
|
+
}
|
|
1527
|
+
else {
|
|
1528
|
+
this.logDecision('stall-mode', `${this.hollowStreak} consecutive hollows on sole model ${onlyModel} — entering stall mode`, 'Will attempt model failover or simplified retry on next dispatch');
|
|
1529
|
+
// Reset streak to allow more attempts with adjusted strategy
|
|
1530
|
+
this.hollowStreak = 0;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
// V7: Multi-model hollow ratio — warn but don't terminate unless opt-in
|
|
1535
|
+
const minDispatches = this.config.hollowTerminationMinDispatches ?? 8;
|
|
1536
|
+
const threshold = this.config.hollowTerminationRatio ?? 0.55;
|
|
1537
|
+
if (this.totalDispatches >= minDispatches) {
|
|
1538
|
+
const ratio = this.totalHollows / this.totalDispatches;
|
|
1539
|
+
if (ratio > threshold) {
|
|
1540
|
+
if (this.config.enableHollowTermination) {
|
|
1541
|
+
this.logDecision('early-termination', `Terminating swarm: hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, `Exceeds threshold ${(threshold * 100).toFixed(0)}% after ${minDispatches}+ dispatches — enableHollowTermination is on`);
|
|
1542
|
+
this.skipRemainingTasks(`Hollow ratio ${(ratio * 100).toFixed(0)}% — models cannot execute tasks`);
|
|
1543
|
+
}
|
|
1544
|
+
else if (!this.hollowRatioWarned) {
|
|
1545
|
+
this.hollowRatioWarned = true;
|
|
1546
|
+
this.logDecision('stall-warning', `Hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, 'High hollow rate but continuing — tasks may still recover via resilience');
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1002
1550
|
return;
|
|
1003
1551
|
}
|
|
1552
|
+
// F4: Task had pendingCascadeSkip but produced non-hollow results.
|
|
1553
|
+
// Run pre-flight checks — if the output is good, accept it instead of skipping.
|
|
1554
|
+
if (task.pendingCascadeSkip) {
|
|
1555
|
+
const cachedReport = checkArtifacts(task);
|
|
1556
|
+
const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedReport);
|
|
1557
|
+
if (preFlight && !preFlight.passed) {
|
|
1558
|
+
// Output is garbage — honor the cascade skip
|
|
1559
|
+
task.pendingCascadeSkip = undefined;
|
|
1560
|
+
task.status = 'skipped';
|
|
1561
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (pre-flight failed: ${preFlight.feedback})`, '');
|
|
1562
|
+
this.emit({ type: 'swarm.task.skipped', taskId, reason: `cascade skip honored — output failed pre-flight: ${preFlight.feedback}` });
|
|
1563
|
+
return;
|
|
1564
|
+
}
|
|
1565
|
+
// Output is good — clear the flag and accept the result
|
|
1566
|
+
task.pendingCascadeSkip = undefined;
|
|
1567
|
+
task.status = 'dispatched'; // Reset so markCompleted works
|
|
1568
|
+
this.logDecision('cascade-skip', `${taskId}: pending cascade skip overridden — worker produced valid output`, '');
|
|
1569
|
+
}
|
|
1004
1570
|
// Record model health on success (only for non-hollow completions)
|
|
1005
1571
|
this.healthTracker.recordSuccess(model, durationMs);
|
|
1572
|
+
this.decreaseStagger(); // P7: Speed up on success
|
|
1006
1573
|
// Run quality gate if enabled — skip under API pressure, skip if circuit breaker tripped,
|
|
1007
1574
|
// and let the final attempt through without quality gate (so tasks produce *something*)
|
|
1575
|
+
// Foundation tasks get +1 retry to reduce cascade failure risk.
|
|
1576
|
+
const effectiveRetries = this.getEffectiveRetries(task);
|
|
1008
1577
|
const recentRLCount = this.recentRateLimits.filter(t => t > Date.now() - 30_000).length;
|
|
1009
|
-
const isLastAttempt = task.attempts >= (
|
|
1578
|
+
const isLastAttempt = task.attempts >= (effectiveRetries + 1);
|
|
1010
1579
|
const shouldRunQualityGate = this.config.qualityGates
|
|
1011
|
-
&& !this.
|
|
1580
|
+
&& !this.qualityGateDisabledModels.has(model)
|
|
1012
1581
|
&& !isLastAttempt
|
|
1013
1582
|
&& Date.now() >= this.circuitBreakerUntil
|
|
1014
1583
|
&& recentRLCount < 2;
|
|
1584
|
+
// C1: Pre-compute artifact report once — shared by quality gate and pre-flight checks
|
|
1585
|
+
const cachedArtifactReport = checkArtifacts(task);
|
|
1015
1586
|
if (shouldRunQualityGate) {
|
|
1016
1587
|
// V3: Judge role handles quality gates
|
|
1017
1588
|
const judgeModel = this.config.hierarchy?.judge?.model
|
|
@@ -1021,57 +1592,272 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1021
1592
|
persona: this.config.hierarchy?.judge?.persona,
|
|
1022
1593
|
};
|
|
1023
1594
|
this.emit({ type: 'swarm.role.action', role: 'judge', action: 'quality-gate', model: judgeModel, taskId });
|
|
1024
|
-
|
|
1595
|
+
// Extract file artifacts from worker output for quality gate visibility.
|
|
1596
|
+
// When workers create files via write_file/edit_file, the judge needs to see
|
|
1597
|
+
// the actual content — not just the worker's text claims about what was created.
|
|
1598
|
+
const fileArtifacts = this.extractFileArtifacts(task, taskResult);
|
|
1599
|
+
// Foundation tasks get a relaxed quality threshold (threshold - 1, min 2)
|
|
1600
|
+
// to reduce the chance of cascade-skipping the entire swarm.
|
|
1601
|
+
const baseThreshold = this.config.qualityThreshold ?? 3;
|
|
1602
|
+
const qualityThreshold = task.isFoundation ? Math.max(2, baseThreshold - 1) : baseThreshold;
|
|
1603
|
+
const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, qualityThreshold, (resp, purpose) => this.trackOrchestratorUsage(resp, purpose), fileArtifacts, this.config, cachedArtifactReport);
|
|
1025
1604
|
taskResult.qualityScore = quality.score;
|
|
1026
1605
|
taskResult.qualityFeedback = quality.feedback;
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
if (
|
|
1032
|
-
|
|
1033
|
-
|
|
1606
|
+
// F11: Foundation tasks that barely pass the relaxed threshold get concrete validation.
|
|
1607
|
+
// A 2/5 foundation task with truncated output will cascade-poison all dependents.
|
|
1608
|
+
if (quality.passed && task.isFoundation && quality.score <= baseThreshold - 1) {
|
|
1609
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1610
|
+
if (!concreteResult.passed) {
|
|
1611
|
+
quality.passed = false;
|
|
1612
|
+
quality.feedback += ` [F11: foundation task barely passed (${quality.score}/${baseThreshold}) but concrete validation failed: ${concreteResult.issues.join('; ')}]`;
|
|
1613
|
+
this.logDecision('foundation-concrete-gate', `${taskId}: foundation task scored ${quality.score} (relaxed threshold ${qualityThreshold}) but concrete checks failed — rejecting`, concreteResult.issues.join('; '));
|
|
1034
1614
|
}
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1615
|
+
}
|
|
1616
|
+
if (!quality.passed) {
|
|
1617
|
+
// F7: Gate error fallback — when LLM judge fails, use concrete validation
|
|
1618
|
+
// If concrete checks pass, tentatively accept the result instead of rejecting.
|
|
1619
|
+
if (quality.gateError && (this.config.enableConcreteValidation !== false)) {
|
|
1620
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1621
|
+
if (concreteResult.passed) {
|
|
1622
|
+
// Concrete validation passed — tentatively accept despite gate error
|
|
1623
|
+
this.logDecision('gate-error-fallback', `${taskId}: gate error but concrete checks passed — tentatively accepting`, quality.gateErrorMessage ?? 'unknown');
|
|
1624
|
+
taskResult.qualityScore = quality.score;
|
|
1625
|
+
taskResult.qualityFeedback = `${quality.feedback} [concrete validation passed — tentative accept]`;
|
|
1626
|
+
// Fall through to success path (don't return)
|
|
1627
|
+
}
|
|
1628
|
+
else {
|
|
1629
|
+
// Both gate and concrete failed — reject
|
|
1630
|
+
this.logDecision('gate-error-fallback', `${taskId}: gate error AND concrete checks failed — rejecting`, `Concrete issues: ${concreteResult.issues.join('; ')}`);
|
|
1631
|
+
// Fall through to normal rejection below
|
|
1632
|
+
}
|
|
1633
|
+
// If concrete passed, skip the rejection path
|
|
1634
|
+
if (concreteResult.passed) {
|
|
1635
|
+
this.perModelQualityRejections.delete(model);
|
|
1636
|
+
// Jump to success path below
|
|
1637
|
+
}
|
|
1638
|
+
else {
|
|
1639
|
+
// Proceed with normal rejection
|
|
1640
|
+
this.qualityRejections++;
|
|
1641
|
+
task.failureMode = 'quality';
|
|
1642
|
+
this.healthTracker.recordQualityRejection(model, quality.score);
|
|
1643
|
+
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1644
|
+
this.hollowStreak = 0;
|
|
1645
|
+
task.retryContext = {
|
|
1646
|
+
previousFeedback: `Gate error + concrete validation failed: ${concreteResult.issues.join('; ')}`,
|
|
1647
|
+
previousScore: quality.score,
|
|
1648
|
+
attempt: task.attempts,
|
|
1649
|
+
previousModel: model,
|
|
1650
|
+
previousFiles: taskResult.filesModified,
|
|
1651
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1652
|
+
};
|
|
1653
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1654
|
+
if (canRetry) {
|
|
1655
|
+
this.retries++;
|
|
1656
|
+
}
|
|
1657
|
+
else {
|
|
1658
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1659
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1660
|
+
return;
|
|
1661
|
+
}
|
|
1662
|
+
// Recovery failed — NOW trigger cascade
|
|
1663
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1664
|
+
}
|
|
1046
1665
|
this.emit({
|
|
1047
|
-
type: 'swarm.
|
|
1666
|
+
type: 'swarm.quality.rejected',
|
|
1048
1667
|
taskId,
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1668
|
+
score: quality.score,
|
|
1669
|
+
feedback: quality.feedback,
|
|
1670
|
+
artifactCount: fileArtifacts.length,
|
|
1671
|
+
outputLength: taskResult.output.length,
|
|
1672
|
+
preFlightReject: false,
|
|
1673
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1052
1674
|
});
|
|
1053
|
-
|
|
1054
|
-
|
|
1675
|
+
return;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
else if (!quality.gateError) {
|
|
1679
|
+
// Normal quality rejection (LLM judge rejected, no gate error)
|
|
1680
|
+
this.qualityRejections++;
|
|
1681
|
+
// P6: Tag failure mode for cascade threshold awareness
|
|
1682
|
+
task.failureMode = 'quality';
|
|
1683
|
+
// P1: Quality rejections update model health — undo premature recordSuccess
|
|
1684
|
+
this.healthTracker.recordQualityRejection(model, quality.score);
|
|
1685
|
+
this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
|
|
1686
|
+
// V7: Quality rejection is NOT hollow — worker did work, just poorly
|
|
1687
|
+
this.hollowStreak = 0;
|
|
1688
|
+
// F7: Per-model circuit breaker → "pre-flight only mode" instead of fully disabling gates.
|
|
1689
|
+
// After threshold rejections, skip LLM judge but keep pre-flight mandatory.
|
|
1690
|
+
if (!quality.preFlightReject) {
|
|
1691
|
+
const modelRejections = (this.perModelQualityRejections.get(model) ?? 0) + 1;
|
|
1692
|
+
this.perModelQualityRejections.set(model, modelRejections);
|
|
1693
|
+
if (modelRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
|
|
1694
|
+
this.qualityGateDisabledModels.add(model);
|
|
1695
|
+
this.logDecision('quality-circuit-breaker', `Switched model ${model} to pre-flight-only mode after ${modelRejections} rejections`, 'Skipping LLM judge but keeping pre-flight checks mandatory');
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
// V5: Attach feedback so retry prompt includes it
|
|
1699
|
+
task.retryContext = {
|
|
1700
|
+
previousFeedback: quality.feedback,
|
|
1701
|
+
previousScore: quality.score,
|
|
1702
|
+
attempt: task.attempts,
|
|
1703
|
+
previousModel: model,
|
|
1704
|
+
previousFiles: taskResult.filesModified,
|
|
1705
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1706
|
+
};
|
|
1707
|
+
// V5: Model failover on quality rejection — but NOT on artifact auto-fails
|
|
1708
|
+
// P1: Widened from score<=1 to score<threshold so failover triggers on any rejection
|
|
1709
|
+
if (quality.score < qualityThreshold && this.config.enableModelFailover && !quality.artifactAutoFail) {
|
|
1710
|
+
const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
|
|
1711
|
+
const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
|
|
1712
|
+
if (alternative) {
|
|
1713
|
+
this.emit({
|
|
1714
|
+
type: 'swarm.model.failover',
|
|
1715
|
+
taskId,
|
|
1716
|
+
fromModel: model,
|
|
1717
|
+
toModel: alternative.model,
|
|
1718
|
+
reason: `quality-score-${quality.score}`,
|
|
1719
|
+
});
|
|
1720
|
+
task.assignedModel = alternative.model;
|
|
1721
|
+
this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1725
|
+
if (canRetry) {
|
|
1726
|
+
this.retries++;
|
|
1727
|
+
}
|
|
1728
|
+
else {
|
|
1729
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1730
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1731
|
+
return;
|
|
1732
|
+
}
|
|
1733
|
+
// Recovery failed — NOW trigger cascade
|
|
1734
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1735
|
+
}
|
|
1736
|
+
// M1: Only emit quality.rejected (not duplicate task.failed)
|
|
1737
|
+
this.emit({
|
|
1738
|
+
type: 'swarm.quality.rejected',
|
|
1739
|
+
taskId,
|
|
1740
|
+
score: quality.score,
|
|
1741
|
+
feedback: quality.feedback,
|
|
1742
|
+
artifactCount: fileArtifacts.length,
|
|
1743
|
+
outputLength: taskResult.output.length,
|
|
1744
|
+
preFlightReject: quality.preFlightReject,
|
|
1745
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1746
|
+
});
|
|
1747
|
+
return;
|
|
1748
|
+
}
|
|
1749
|
+
else {
|
|
1750
|
+
// gateError=true but concrete validation disabled — reject
|
|
1751
|
+
this.qualityRejections++;
|
|
1752
|
+
task.failureMode = 'quality';
|
|
1753
|
+
this.hollowStreak = 0;
|
|
1754
|
+
task.retryContext = {
|
|
1755
|
+
previousFeedback: quality.feedback,
|
|
1756
|
+
previousScore: quality.score,
|
|
1757
|
+
attempt: task.attempts,
|
|
1758
|
+
previousModel: model,
|
|
1759
|
+
previousFiles: taskResult.filesModified,
|
|
1760
|
+
swarmProgress: this.getSwarmProgressSummary(),
|
|
1761
|
+
};
|
|
1762
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1763
|
+
if (canRetry) {
|
|
1764
|
+
this.retries++;
|
|
1765
|
+
}
|
|
1766
|
+
else {
|
|
1767
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1768
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1769
|
+
return;
|
|
1770
|
+
}
|
|
1771
|
+
// Recovery failed — NOW trigger cascade
|
|
1772
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1055
1773
|
}
|
|
1774
|
+
this.emit({
|
|
1775
|
+
type: 'swarm.quality.rejected',
|
|
1776
|
+
taskId,
|
|
1777
|
+
score: quality.score,
|
|
1778
|
+
feedback: quality.feedback,
|
|
1779
|
+
artifactCount: fileArtifacts.length,
|
|
1780
|
+
outputLength: taskResult.output.length,
|
|
1781
|
+
preFlightReject: false,
|
|
1782
|
+
filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
|
|
1783
|
+
});
|
|
1784
|
+
return;
|
|
1056
1785
|
}
|
|
1057
|
-
|
|
1786
|
+
}
|
|
1787
|
+
// Quality passed — reset per-model rejection counter
|
|
1788
|
+
this.perModelQualityRejections.delete(model);
|
|
1789
|
+
}
|
|
1790
|
+
// F7: When quality gate was skipped (last attempt, pre-flight-only mode, API pressure),
|
|
1791
|
+
// still run pre-flight + concrete checks so obviously broken outputs don't slip through.
|
|
1792
|
+
// C1: Use cached artifact report to avoid double filesystem scan.
|
|
1793
|
+
if (!shouldRunQualityGate && this.config.qualityGates) {
|
|
1794
|
+
const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedArtifactReport);
|
|
1795
|
+
if (preFlight && !preFlight.passed) {
|
|
1796
|
+
taskResult.qualityScore = preFlight.score;
|
|
1797
|
+
taskResult.qualityFeedback = preFlight.feedback;
|
|
1798
|
+
this.qualityRejections++;
|
|
1799
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1058
1800
|
if (canRetry) {
|
|
1059
1801
|
this.retries++;
|
|
1060
1802
|
}
|
|
1061
|
-
|
|
1803
|
+
else {
|
|
1804
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1805
|
+
this.logDecision('preflight-reject', `${taskId}: pre-flight failed: ${preFlight.feedback}`, '');
|
|
1806
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1807
|
+
return;
|
|
1808
|
+
}
|
|
1809
|
+
// Recovery failed — NOW trigger cascade
|
|
1810
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1811
|
+
}
|
|
1062
1812
|
this.emit({
|
|
1063
1813
|
type: 'swarm.quality.rejected',
|
|
1064
1814
|
taskId,
|
|
1065
|
-
score:
|
|
1066
|
-
feedback:
|
|
1815
|
+
score: preFlight.score,
|
|
1816
|
+
feedback: preFlight.feedback,
|
|
1817
|
+
artifactCount: 0,
|
|
1818
|
+
outputLength: taskResult.output.length,
|
|
1819
|
+
preFlightReject: true,
|
|
1067
1820
|
});
|
|
1068
1821
|
return;
|
|
1069
1822
|
}
|
|
1070
|
-
//
|
|
1071
|
-
this.
|
|
1823
|
+
// F2: Run concrete validation when pre-flight passes but gate was skipped
|
|
1824
|
+
if (this.config.enableConcreteValidation !== false) {
|
|
1825
|
+
const concreteResult = runConcreteChecks(task, taskResult);
|
|
1826
|
+
if (!concreteResult.passed) {
|
|
1827
|
+
taskResult.qualityScore = 2;
|
|
1828
|
+
taskResult.qualityFeedback = `Concrete validation failed: ${concreteResult.issues.join('; ')}`;
|
|
1829
|
+
this.qualityRejections++;
|
|
1830
|
+
const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
|
|
1831
|
+
if (canRetry) {
|
|
1832
|
+
this.retries++;
|
|
1833
|
+
}
|
|
1834
|
+
else {
|
|
1835
|
+
// Retries exhausted — try resilience recovery before cascade-skip
|
|
1836
|
+
this.logDecision('concrete-reject', `${taskId}: concrete validation failed: ${concreteResult.issues.join('; ')}`, '');
|
|
1837
|
+
if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
|
|
1838
|
+
return;
|
|
1839
|
+
}
|
|
1840
|
+
// Recovery failed — NOW trigger cascade
|
|
1841
|
+
this.taskQueue.triggerCascadeSkip(taskId);
|
|
1842
|
+
}
|
|
1843
|
+
this.emit({
|
|
1844
|
+
type: 'swarm.quality.rejected',
|
|
1845
|
+
taskId,
|
|
1846
|
+
score: 2,
|
|
1847
|
+
feedback: taskResult.qualityFeedback,
|
|
1848
|
+
artifactCount: 0,
|
|
1849
|
+
outputLength: taskResult.output.length,
|
|
1850
|
+
preFlightReject: false,
|
|
1851
|
+
});
|
|
1852
|
+
return;
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1072
1855
|
}
|
|
1073
1856
|
// Task passed — mark completed
|
|
1074
1857
|
this.taskQueue.markCompleted(taskId, taskResult);
|
|
1858
|
+
this.hollowStreak = 0;
|
|
1859
|
+
// F25: Clear timeout counter on success
|
|
1860
|
+
this.taskTimeoutCounts.delete(taskId);
|
|
1075
1861
|
// H6: Post findings to blackboard with error handling
|
|
1076
1862
|
if (this.blackboard && taskResult.findings) {
|
|
1077
1863
|
try {
|
|
@@ -1117,7 +1903,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1117
1903
|
const tasks = this.taskQueue.getAllTasks();
|
|
1118
1904
|
const outputs = tasks
|
|
1119
1905
|
.filter(t => t.status === 'completed')
|
|
1120
|
-
.map(t => taskResultToAgentOutput(t))
|
|
1906
|
+
.map(t => taskResultToAgentOutput(t, this.config))
|
|
1121
1907
|
.filter((o) => o !== null);
|
|
1122
1908
|
if (outputs.length === 0)
|
|
1123
1909
|
return null;
|
|
@@ -1147,11 +1933,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1147
1933
|
activeWorkers: this.workerPool.getActiveWorkerStatus(),
|
|
1148
1934
|
queue: stats,
|
|
1149
1935
|
budget: {
|
|
1150
|
-
tokensUsed: this.totalTokens,
|
|
1936
|
+
tokensUsed: this.totalTokens + this.orchestratorTokens,
|
|
1151
1937
|
tokensTotal: this.config.totalBudget,
|
|
1152
|
-
costUsed: this.totalCost,
|
|
1938
|
+
costUsed: this.totalCost + this.orchestratorCost,
|
|
1153
1939
|
costTotal: this.config.maxCost,
|
|
1154
1940
|
},
|
|
1941
|
+
orchestrator: {
|
|
1942
|
+
tokens: this.orchestratorTokens,
|
|
1943
|
+
cost: this.orchestratorCost,
|
|
1944
|
+
calls: this.orchestratorCalls,
|
|
1945
|
+
model: this.config.orchestratorModel,
|
|
1946
|
+
},
|
|
1155
1947
|
};
|
|
1156
1948
|
}
|
|
1157
1949
|
/**
|
|
@@ -1163,6 +1955,69 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1163
1955
|
this.currentPhase = 'failed';
|
|
1164
1956
|
await this.workerPool.cancelAll();
|
|
1165
1957
|
}
|
|
1958
|
+
// ─── D3: Model Capability Probing ─────────────────────────────────────
|
|
1959
|
+
/**
|
|
1960
|
+
* D3/F23: Probe each unique model to verify it can make tool calls.
|
|
1961
|
+
* Models that fail the probe are marked unhealthy so they're skipped in dispatch.
|
|
1962
|
+
*
|
|
1963
|
+
* F23 fix: Uses chatWithTools() with actual tool definitions instead of
|
|
1964
|
+
* plain chat() which never included tools in the API request.
|
|
1965
|
+
*/
|
|
1966
|
+
async probeModelCapability() {
|
|
1967
|
+
const uniqueModels = new Set(this.config.workers.map(w => w.model));
|
|
1968
|
+
this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Probing ${uniqueModels.size} model(s) for tool-calling capability...` });
|
|
1969
|
+
// F23: Check if provider supports native tool calling
|
|
1970
|
+
const supportsTools = 'chatWithTools' in this.provider
|
|
1971
|
+
&& typeof this.provider.chatWithTools === 'function';
|
|
1972
|
+
if (!supportsTools) {
|
|
1973
|
+
// Provider doesn't support chatWithTools — skip probe entirely.
|
|
1974
|
+
// Workers will rely on text-based tool parsing fallback.
|
|
1975
|
+
this.logDecision('model-probe', 'Provider does not support chatWithTools — skipping probe', '');
|
|
1976
|
+
return;
|
|
1977
|
+
}
|
|
1978
|
+
const providerWithTools = this.provider;
|
|
1979
|
+
const probeTools = [{
|
|
1980
|
+
type: 'function',
|
|
1981
|
+
function: {
|
|
1982
|
+
name: 'read_file',
|
|
1983
|
+
description: 'Read a file from disk',
|
|
1984
|
+
parameters: {
|
|
1985
|
+
type: 'object',
|
|
1986
|
+
properties: { path: { type: 'string', description: 'File path' } },
|
|
1987
|
+
required: ['path'],
|
|
1988
|
+
},
|
|
1989
|
+
},
|
|
1990
|
+
}];
|
|
1991
|
+
// F24: Configurable probe timeout — generous default for slow models/connections
|
|
1992
|
+
const probeTimeout = this.config.probeTimeoutMs ?? 60_000;
|
|
1993
|
+
for (const model of uniqueModels) {
|
|
1994
|
+
try {
|
|
1995
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Probe timeout (${probeTimeout}ms)`)), probeTimeout));
|
|
1996
|
+
const response = await Promise.race([
|
|
1997
|
+
providerWithTools.chatWithTools([
|
|
1998
|
+
{ role: 'system', content: 'You are a test probe. Call the read_file tool with path "package.json".' },
|
|
1999
|
+
{ role: 'user', content: 'Read package.json.' },
|
|
2000
|
+
], { model, maxTokens: 200, temperature: 0, tools: probeTools, tool_choice: 'required' }),
|
|
2001
|
+
timeoutPromise,
|
|
2002
|
+
]);
|
|
2003
|
+
const hasToolCall = (response.toolCalls?.length ?? 0) > 0;
|
|
2004
|
+
if (!hasToolCall) {
|
|
2005
|
+
// F19: Directly mark unhealthy — probe failure is definitive evidence
|
|
2006
|
+
this.healthTracker.markUnhealthy(model);
|
|
2007
|
+
this.logDecision('model-probe', `Model ${model} failed probe (no tool calls)`, 'Marked unhealthy');
|
|
2008
|
+
}
|
|
2009
|
+
else {
|
|
2010
|
+
this.healthTracker.recordSuccess(model, 0);
|
|
2011
|
+
this.logDecision('model-probe', `Model ${model} passed probe`, '');
|
|
2012
|
+
}
|
|
2013
|
+
}
|
|
2014
|
+
catch {
|
|
2015
|
+
// F19: Directly mark unhealthy on probe error (includes timeout)
|
|
2016
|
+
this.healthTracker.markUnhealthy(model);
|
|
2017
|
+
this.logDecision('model-probe', `Model ${model} probe errored`, 'Marked unhealthy');
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
1166
2021
|
// ─── Circuit Breaker ────────────────────────────────────────────────
|
|
1167
2022
|
/**
|
|
1168
2023
|
* Record a rate limit hit and check if the circuit breaker should trip.
|
|
@@ -1170,6 +2025,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1170
2025
|
recordRateLimit() {
|
|
1171
2026
|
const now = Date.now();
|
|
1172
2027
|
this.recentRateLimits.push(now);
|
|
2028
|
+
this.increaseStagger(); // P7: Back off on rate limits
|
|
1173
2029
|
// Prune entries older than the window
|
|
1174
2030
|
const cutoff = now - SwarmOrchestrator.CIRCUIT_BREAKER_WINDOW_MS;
|
|
1175
2031
|
this.recentRateLimits = this.recentRateLimits.filter(t => t > cutoff);
|
|
@@ -1197,6 +2053,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1197
2053
|
}
|
|
1198
2054
|
return false;
|
|
1199
2055
|
}
|
|
2056
|
+
// ─── P7: Adaptive Stagger ────────────────────────────────────────────
|
|
2057
|
+
/** P7: Get current stagger delay (adapts based on rate limit / success signals). */
|
|
2058
|
+
getStaggerMs() {
|
|
2059
|
+
return this.adaptiveStaggerMs;
|
|
2060
|
+
}
|
|
2061
|
+
/** P7: Increase stagger on rate limit (×1.5, capped at 10s). */
|
|
2062
|
+
increaseStagger() {
|
|
2063
|
+
this.adaptiveStaggerMs = Math.min(this.adaptiveStaggerMs * 1.5, 10_000);
|
|
2064
|
+
}
|
|
2065
|
+
/** P7: Decrease stagger on success (×0.9, floor at 200ms). */
|
|
2066
|
+
decreaseStagger() {
|
|
2067
|
+
this.adaptiveStaggerMs = Math.max(this.adaptiveStaggerMs * 0.9, 200);
|
|
2068
|
+
}
|
|
1200
2069
|
// ─── V2: Decision Logging ─────────────────────────────────────────────
|
|
1201
2070
|
logDecision(phase, decision, reasoning) {
|
|
1202
2071
|
const entry = {
|
|
@@ -1223,14 +2092,15 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1223
2092
|
waves: queueState.waves,
|
|
1224
2093
|
currentWave: queueState.currentWave,
|
|
1225
2094
|
stats: {
|
|
1226
|
-
totalTokens: this.totalTokens,
|
|
1227
|
-
totalCost: this.totalCost,
|
|
2095
|
+
totalTokens: this.totalTokens + this.orchestratorTokens,
|
|
2096
|
+
totalCost: this.totalCost + this.orchestratorCost,
|
|
1228
2097
|
qualityRejections: this.qualityRejections,
|
|
1229
2098
|
retries: this.retries,
|
|
1230
2099
|
},
|
|
1231
2100
|
modelHealth: this.healthTracker.getAllRecords(),
|
|
1232
2101
|
decisions: this.orchestratorDecisions,
|
|
1233
2102
|
errors: this.errors,
|
|
2103
|
+
originalPrompt: this.originalPrompt,
|
|
1234
2104
|
});
|
|
1235
2105
|
this.emit({
|
|
1236
2106
|
type: 'swarm.state.checkpoint',
|
|
@@ -1250,9 +2120,9 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1250
2120
|
emitBudgetUpdate() {
|
|
1251
2121
|
this.emit({
|
|
1252
2122
|
type: 'swarm.budget.update',
|
|
1253
|
-
tokensUsed: this.totalTokens,
|
|
2123
|
+
tokensUsed: this.totalTokens + this.orchestratorTokens,
|
|
1254
2124
|
tokensTotal: this.config.totalBudget,
|
|
1255
|
-
costUsed: this.totalCost,
|
|
2125
|
+
costUsed: this.totalCost + this.orchestratorCost,
|
|
1256
2126
|
costTotal: this.config.maxCost,
|
|
1257
2127
|
});
|
|
1258
2128
|
}
|
|
@@ -1267,8 +2137,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1267
2137
|
failedTasks: queueStats.failed,
|
|
1268
2138
|
skippedTasks: queueStats.skipped,
|
|
1269
2139
|
totalWaves: this.taskQueue.getTotalWaves(),
|
|
1270
|
-
totalTokens: this.totalTokens,
|
|
1271
|
-
totalCost: this.totalCost,
|
|
2140
|
+
totalTokens: this.totalTokens + this.orchestratorTokens,
|
|
2141
|
+
totalCost: this.totalCost + this.orchestratorCost,
|
|
1272
2142
|
totalDurationMs: Date.now() - this.startTime,
|
|
1273
2143
|
qualityRejections: this.qualityRejections,
|
|
1274
2144
|
retries: this.retries,
|
|
@@ -1293,6 +2163,16 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1293
2163
|
if (this.verificationResult) {
|
|
1294
2164
|
parts.push(` Verification: ${this.verificationResult.passed ? 'PASSED' : 'FAILED'}`);
|
|
1295
2165
|
}
|
|
2166
|
+
// Artifact inventory: show what files actually exist on disk regardless of task status
|
|
2167
|
+
if (this.artifactInventory && this.artifactInventory.totalFiles > 0) {
|
|
2168
|
+
parts.push(` Files on disk: ${this.artifactInventory.totalFiles} files (${(this.artifactInventory.totalBytes / 1024).toFixed(1)}KB)`);
|
|
2169
|
+
for (const f of this.artifactInventory.files.slice(0, 15)) {
|
|
2170
|
+
parts.push(` ${f.path}: ${f.sizeBytes}B`);
|
|
2171
|
+
}
|
|
2172
|
+
if (this.artifactInventory.files.length > 15) {
|
|
2173
|
+
parts.push(` ... and ${this.artifactInventory.files.length - 15} more`);
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
1296
2176
|
return parts.join('\n');
|
|
1297
2177
|
}
|
|
1298
2178
|
buildErrorResult(message) {
|
|
@@ -1319,6 +2199,698 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
|
|
|
1319
2199
|
return null;
|
|
1320
2200
|
}
|
|
1321
2201
|
}
|
|
2202
|
+
/**
|
|
2203
|
+
* Detect foundation tasks: tasks that are a dependency of 2+ downstream tasks.
|
|
2204
|
+
* These are critical single-points-of-failure — mark them for extra resilience.
|
|
2205
|
+
*/
|
|
2206
|
+
detectFoundationTasks() {
|
|
2207
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2208
|
+
const dependentCounts = new Map();
|
|
2209
|
+
for (const task of allTasks) {
|
|
2210
|
+
for (const depId of task.dependencies) {
|
|
2211
|
+
dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
|
|
2212
|
+
}
|
|
2213
|
+
}
|
|
2214
|
+
for (const task of allTasks) {
|
|
2215
|
+
const dependentCount = dependentCounts.get(task.id) ?? 0;
|
|
2216
|
+
if (dependentCount >= 2) {
|
|
2217
|
+
task.isFoundation = true;
|
|
2218
|
+
this.logDecision('scheduling', `Foundation task: ${task.id} (${dependentCount} dependents)`, 'Extra retries and relaxed quality threshold applied');
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
/**
|
|
2223
|
+
* Extract file artifacts from a worker's output for quality gate visibility.
|
|
2224
|
+
* Reads actual file content from disk so the judge can verify real work,
|
|
2225
|
+
* not just text claims about what was created.
|
|
2226
|
+
*/
|
|
2227
|
+
extractFileArtifacts(task, taskResult) {
|
|
2228
|
+
const artifacts = [];
|
|
2229
|
+
const seen = new Set();
|
|
2230
|
+
// Collect file paths from multiple sources
|
|
2231
|
+
const candidatePaths = [];
|
|
2232
|
+
// 1. filesModified from structured closure report
|
|
2233
|
+
if (taskResult.filesModified) {
|
|
2234
|
+
candidatePaths.push(...taskResult.filesModified);
|
|
2235
|
+
}
|
|
2236
|
+
// 2. targetFiles from task definition
|
|
2237
|
+
if (task.targetFiles) {
|
|
2238
|
+
candidatePaths.push(...task.targetFiles);
|
|
2239
|
+
}
|
|
2240
|
+
// 3. Extract file paths mentioned in worker output (e.g., "Created src/foo.ts")
|
|
2241
|
+
const filePathPattern = /(?:created|wrote|modified|edited|updated)\s+["`']?([^\s"`',]+\.\w+)/gi;
|
|
2242
|
+
let match;
|
|
2243
|
+
while ((match = filePathPattern.exec(taskResult.output)) !== null) {
|
|
2244
|
+
candidatePaths.push(match[1]);
|
|
2245
|
+
}
|
|
2246
|
+
// Resolve against the target project directory, not CWD
|
|
2247
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2248
|
+
// Read previews from disk
|
|
2249
|
+
for (const filePath of candidatePaths) {
|
|
2250
|
+
if (seen.has(filePath))
|
|
2251
|
+
continue;
|
|
2252
|
+
seen.add(filePath);
|
|
2253
|
+
try {
|
|
2254
|
+
const resolved = path.resolve(baseDir, filePath);
|
|
2255
|
+
if (fs.existsSync(resolved)) {
|
|
2256
|
+
const content = fs.readFileSync(resolved, 'utf-8');
|
|
2257
|
+
if (content.length > 0) {
|
|
2258
|
+
artifacts.push({ path: filePath, preview: content.slice(0, 2000) });
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
catch {
|
|
2263
|
+
// Skip unreadable files
|
|
2264
|
+
}
|
|
2265
|
+
// Limit to 10 files to keep prompt size reasonable
|
|
2266
|
+
if (artifacts.length >= 10)
|
|
2267
|
+
break;
|
|
2268
|
+
}
|
|
2269
|
+
return artifacts;
|
|
2270
|
+
}
|
|
2271
|
+
/**
|
|
2272
|
+
* Build an inventory of filesystem artifacts produced during swarm execution.
|
|
2273
|
+
* Scans all tasks' targetFiles and readFiles to check what actually exists on disk.
|
|
2274
|
+
* This reveals work done by workers even when tasks "failed" (timeout, quality gate, etc.).
|
|
2275
|
+
*/
|
|
2276
|
+
buildArtifactInventory() {
|
|
2277
|
+
const allFiles = new Set();
|
|
2278
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
2279
|
+
for (const f of (task.targetFiles ?? []))
|
|
2280
|
+
allFiles.add(f);
|
|
2281
|
+
for (const f of (task.readFiles ?? []))
|
|
2282
|
+
allFiles.add(f);
|
|
2283
|
+
}
|
|
2284
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2285
|
+
const artifacts = [];
|
|
2286
|
+
for (const filePath of allFiles) {
|
|
2287
|
+
try {
|
|
2288
|
+
const resolved = path.resolve(baseDir, filePath);
|
|
2289
|
+
if (fs.existsSync(resolved)) {
|
|
2290
|
+
const stats = fs.statSync(resolved);
|
|
2291
|
+
if (stats.isFile() && stats.size > 0) {
|
|
2292
|
+
artifacts.push({ path: filePath, sizeBytes: stats.size, exists: true });
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2296
|
+
catch { /* skip unreadable files */ }
|
|
2297
|
+
}
|
|
2298
|
+
return {
|
|
2299
|
+
files: artifacts,
|
|
2300
|
+
totalFiles: artifacts.length,
|
|
2301
|
+
totalBytes: artifacts.reduce((s, a) => s + a.sizeBytes, 0),
|
|
2302
|
+
};
|
|
2303
|
+
}
|
|
2304
|
+
/**
|
|
2305
|
+
* Skip all remaining pending/ready tasks (used for early termination).
|
|
2306
|
+
*/
|
|
2307
|
+
skipRemainingTasks(reason) {
|
|
2308
|
+
for (const task of this.taskQueue.getAllTasks()) {
|
|
2309
|
+
if (task.status === 'pending' || task.status === 'ready') {
|
|
2310
|
+
task.status = 'skipped';
|
|
2311
|
+
this.emit({ type: 'swarm.task.skipped', taskId: task.id, reason });
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
}
|
|
2315
|
+
/**
|
|
2316
|
+
* F21: Mid-swarm situational assessment after each wave.
|
|
2317
|
+
* Evaluates success rate and budget health, triages low-priority tasks when budget is tight.
|
|
2318
|
+
* Also detects stalled progress and triggers mid-swarm re-planning.
|
|
2319
|
+
*/
|
|
2320
|
+
async assessAndAdapt(waveIndex) {
|
|
2321
|
+
const stats = this.taskQueue.getStats();
|
|
2322
|
+
const budgetStats = this.budgetPool.getStats();
|
|
2323
|
+
// 1. Calculate success rate for this swarm run
|
|
2324
|
+
const successRate = stats.completed / Math.max(1, stats.completed + stats.failed + stats.skipped);
|
|
2325
|
+
// 2. Budget efficiency: tokens spent per completed task
|
|
2326
|
+
const tokensPerTask = stats.completed > 0
|
|
2327
|
+
? (this.totalTokens / stats.completed)
|
|
2328
|
+
: Infinity;
|
|
2329
|
+
// 3. Remaining budget vs remaining tasks
|
|
2330
|
+
const remainingTasks = stats.total - stats.completed - stats.failed - stats.skipped;
|
|
2331
|
+
const estimatedTokensNeeded = remainingTasks * tokensPerTask;
|
|
2332
|
+
const budgetSufficient = budgetStats.tokensRemaining > estimatedTokensNeeded * 0.5;
|
|
2333
|
+
// Log the assessment for observability
|
|
2334
|
+
this.logDecision('mid-swarm-assessment', `After wave ${waveIndex + 1}: ${stats.completed}/${stats.total} completed (${(successRate * 100).toFixed(0)}%), ` +
|
|
2335
|
+
`${remainingTasks} remaining, ${budgetStats.tokensRemaining} tokens left`, budgetSufficient ? 'Budget looks sufficient' : 'Budget may be insufficient for remaining tasks');
|
|
2336
|
+
// 4. If budget is tight, prioritize: skip low-value remaining tasks
|
|
2337
|
+
// Only triage if we have actual data (at least one completion to estimate from)
|
|
2338
|
+
if (!budgetSufficient && remainingTasks > 1 && stats.completed > 0) {
|
|
2339
|
+
// Prefer pausing over skipping: if workers are still running, wait for budget release
|
|
2340
|
+
const runningCount = stats.running ?? 0;
|
|
2341
|
+
if (runningCount > 0) {
|
|
2342
|
+
this.logDecision('budget-wait', 'Budget tight but workers still running — waiting for budget release', `${runningCount} workers active, ${budgetStats.tokensRemaining} tokens remaining`);
|
|
2343
|
+
return;
|
|
2344
|
+
}
|
|
2345
|
+
const expendableTasks = this.findExpendableTasks();
|
|
2346
|
+
// Hard cap: never skip more than 20% of remaining tasks in one triage pass
|
|
2347
|
+
const maxSkips = Math.max(1, Math.floor(remainingTasks * 0.2));
|
|
2348
|
+
if (expendableTasks.length > 0) {
|
|
2349
|
+
let currentEstimate = estimatedTokensNeeded;
|
|
2350
|
+
let skipped = 0;
|
|
2351
|
+
for (const task of expendableTasks) {
|
|
2352
|
+
if (skipped >= maxSkips)
|
|
2353
|
+
break;
|
|
2354
|
+
// Stop trimming once we're within budget
|
|
2355
|
+
if (currentEstimate * 0.7 <= budgetStats.tokensRemaining)
|
|
2356
|
+
break;
|
|
2357
|
+
task.status = 'skipped';
|
|
2358
|
+
skipped++;
|
|
2359
|
+
this.emit({ type: 'swarm.task.skipped', taskId: task.id,
|
|
2360
|
+
reason: 'Budget conservation: skipping low-priority task to protect critical path' });
|
|
2361
|
+
this.logDecision('budget-triage', `Skipping ${task.id} (${task.type}, complexity ${task.complexity}) to conserve budget`, `${remainingTasks} tasks remain, ${budgetStats.tokensRemaining} tokens`);
|
|
2362
|
+
currentEstimate -= tokensPerTask;
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
// 5. Stall detection: if progress ratio is too low, trigger re-plan
|
|
2367
|
+
const attemptedTasks = stats.completed + stats.failed + stats.skipped;
|
|
2368
|
+
if (attemptedTasks >= 5) {
|
|
2369
|
+
const progressRatio = stats.completed / Math.max(1, attemptedTasks);
|
|
2370
|
+
if (progressRatio < 0.4) {
|
|
2371
|
+
this.logDecision('stall-detected', `Progress stalled: ${stats.completed}/${attemptedTasks} tasks succeeded (${(progressRatio * 100).toFixed(0)}%)`, 'Triggering mid-swarm re-plan');
|
|
2372
|
+
this.emit({
|
|
2373
|
+
type: 'swarm.stall',
|
|
2374
|
+
progressRatio,
|
|
2375
|
+
attempted: attemptedTasks,
|
|
2376
|
+
completed: stats.completed,
|
|
2377
|
+
});
|
|
2378
|
+
await this.midSwarmReplan();
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
/**
|
|
2383
|
+
* F21: Find expendable tasks — leaf tasks (no dependents) with lowest complexity.
|
|
2384
|
+
* These are the safest to skip when budget is tight.
|
|
2385
|
+
* Only tasks with complexity <= 2 are considered expendable.
|
|
2386
|
+
*/
|
|
2387
|
+
findExpendableTasks() {
|
|
2388
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2389
|
+
// Build reverse dependency map: which tasks depend on each task?
|
|
2390
|
+
const dependentCounts = new Map();
|
|
2391
|
+
for (const task of allTasks) {
|
|
2392
|
+
for (const depId of task.dependencies) {
|
|
2393
|
+
dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
|
|
2394
|
+
}
|
|
2395
|
+
}
|
|
2396
|
+
// Expendable = pending/ready, never attempted, no dependents, not foundation,
|
|
2397
|
+
// complexity <= 2 (simple leaf tasks only), lowest complexity first
|
|
2398
|
+
return allTasks
|
|
2399
|
+
.filter(t => (t.status === 'pending' || t.status === 'ready') &&
|
|
2400
|
+
t.attempts === 0 &&
|
|
2401
|
+
!t.isFoundation &&
|
|
2402
|
+
(t.complexity ?? 5) <= 2 &&
|
|
2403
|
+
(dependentCounts.get(t.id) ?? 0) === 0)
|
|
2404
|
+
.sort((a, b) => (a.complexity ?? 5) - (b.complexity ?? 5));
|
|
2405
|
+
}
|
|
2406
|
+
/**
|
|
2407
|
+
* Mid-swarm re-planning: when progress stalls, ask LLM to re-plan remaining work.
|
|
2408
|
+
* Creates simpler replacement tasks for stuck/failed work, building on what's already done.
|
|
2409
|
+
* Only triggers once per swarm execution to avoid infinite re-planning loops.
|
|
2410
|
+
*/
|
|
2411
|
+
async midSwarmReplan() {
|
|
2412
|
+
if (this.hasReplanned)
|
|
2413
|
+
return;
|
|
2414
|
+
this.hasReplanned = true;
|
|
2415
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2416
|
+
const completed = allTasks.filter(t => t.status === 'completed' || t.status === 'decomposed');
|
|
2417
|
+
const stuck = allTasks.filter(t => t.status === 'failed' || t.status === 'skipped');
|
|
2418
|
+
if (stuck.length === 0)
|
|
2419
|
+
return;
|
|
2420
|
+
const completedSummary = completed.map(t => `- ${t.description} [${t.type}] → completed${t.degraded ? ' (degraded)' : ''}`).join('\n') || '(none)';
|
|
2421
|
+
const stuckSummary = stuck.map(t => `- ${t.description} [${t.type}] → ${t.status} (${t.failureMode ?? 'unknown'})`).join('\n');
|
|
2422
|
+
const artifactInventory = this.buildArtifactInventory();
|
|
2423
|
+
const artifactSummary = artifactInventory.files.map(f => `- ${f.path} (${f.sizeBytes}B)`).join('\n') || '(none)';
|
|
2424
|
+
const replanPrompt = `The swarm is stalled. Here's the situation:
|
|
2425
|
+
|
|
2426
|
+
COMPLETED WORK:
|
|
2427
|
+
${completedSummary}
|
|
2428
|
+
|
|
2429
|
+
FILES ON DISK:
|
|
2430
|
+
${artifactSummary}
|
|
2431
|
+
|
|
2432
|
+
STUCK TASKS (failed or skipped):
|
|
2433
|
+
${stuckSummary}
|
|
2434
|
+
|
|
2435
|
+
Re-plan the remaining work. Create new subtasks that:
|
|
2436
|
+
1. Build on what's already completed (don't redo work)
|
|
2437
|
+
2. Are more focused in scope (but assign realistic complexity for the work involved — don't underestimate)
|
|
2438
|
+
3. Can succeed independently (minimize dependencies)
|
|
2439
|
+
|
|
2440
|
+
Return JSON: { "subtasks": [{ "description": "...", "type": "implement|test|research|review|document|refactor", "complexity": 1-5, "dependencies": [], "relevantFiles": [] }] }
|
|
2441
|
+
Return ONLY the JSON object, no other text.`;
|
|
2442
|
+
try {
|
|
2443
|
+
const response = await this.provider.chat([{ role: 'user', content: replanPrompt }]);
|
|
2444
|
+
this.trackOrchestratorUsage(response, 'mid-swarm-replan');
|
|
2445
|
+
const content = response.content ?? '';
|
|
2446
|
+
const jsonMatch = content.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
|
|
2447
|
+
if (!jsonMatch) {
|
|
2448
|
+
this.logDecision('replan-failed', 'LLM produced no parseable re-plan JSON', content.slice(0, 200));
|
|
2449
|
+
return;
|
|
2450
|
+
}
|
|
2451
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
2452
|
+
if (!parsed.subtasks || parsed.subtasks.length === 0) {
|
|
2453
|
+
this.logDecision('replan-failed', 'LLM produced empty subtask list', '');
|
|
2454
|
+
return;
|
|
2455
|
+
}
|
|
2456
|
+
// Add new tasks from re-plan into current wave
|
|
2457
|
+
const newTasks = this.taskQueue.addReplanTasks(parsed.subtasks, this.taskQueue.getCurrentWave());
|
|
2458
|
+
this.logDecision('replan-success', `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`, newTasks.map(t => t.description).join('; '));
|
|
2459
|
+
this.emit({
|
|
2460
|
+
type: 'swarm.replan',
|
|
2461
|
+
stuckCount: stuck.length,
|
|
2462
|
+
newTaskCount: newTasks.length,
|
|
2463
|
+
});
|
|
2464
|
+
this.emit({
|
|
2465
|
+
type: 'swarm.orchestrator.decision',
|
|
2466
|
+
decision: {
|
|
2467
|
+
timestamp: Date.now(),
|
|
2468
|
+
phase: 'replan',
|
|
2469
|
+
decision: `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`,
|
|
2470
|
+
reasoning: newTasks.map(t => `${t.id}: ${t.description}`).join('; '),
|
|
2471
|
+
},
|
|
2472
|
+
});
|
|
2473
|
+
}
|
|
2474
|
+
catch (error) {
|
|
2475
|
+
this.logDecision('replan-failed', `Re-plan LLM call failed: ${error.message}`, '');
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
/**
|
|
2479
|
+
* Rescue cascade-skipped tasks that can still run.
|
|
2480
|
+
* After cascade-skip fires, assess whether skipped tasks can still be attempted:
|
|
2481
|
+
* - If all OTHER dependencies completed and the failed dep's artifacts exist on disk → un-skip
|
|
2482
|
+
* - If the task has no strict data dependency on the failed task (different file targets) → un-skip with warning
|
|
2483
|
+
*/
|
|
2484
|
+
rescueCascadeSkipped(lenient = false) {
|
|
2485
|
+
const skippedTasks = this.taskQueue.getSkippedTasks();
|
|
2486
|
+
const rescued = [];
|
|
2487
|
+
for (const task of skippedTasks) {
|
|
2488
|
+
if (task.dependencies.length === 0)
|
|
2489
|
+
continue;
|
|
2490
|
+
let completedDeps = 0;
|
|
2491
|
+
let failedDepsWithArtifacts = 0;
|
|
2492
|
+
let failedDepsWithoutArtifacts = 0;
|
|
2493
|
+
let skippedDepsBlockedBySkipped = 0;
|
|
2494
|
+
let totalDeps = 0;
|
|
2495
|
+
const failedDepDescriptions = [];
|
|
2496
|
+
for (const depId of task.dependencies) {
|
|
2497
|
+
const dep = this.taskQueue.getTask(depId);
|
|
2498
|
+
if (!dep)
|
|
2499
|
+
continue;
|
|
2500
|
+
totalDeps++;
|
|
2501
|
+
if (dep.status === 'completed' || dep.status === 'decomposed') {
|
|
2502
|
+
completedDeps++;
|
|
2503
|
+
}
|
|
2504
|
+
else if (dep.status === 'failed' || dep.status === 'skipped') {
|
|
2505
|
+
// V10: In lenient mode, use checkArtifactsEnhanced for broader detection
|
|
2506
|
+
const artifactReport = lenient ? checkArtifactsEnhanced(dep) : checkArtifacts(dep);
|
|
2507
|
+
if (artifactReport && artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length > 0) {
|
|
2508
|
+
failedDepsWithArtifacts++;
|
|
2509
|
+
failedDepDescriptions.push(`${dep.description} (failed but ${artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length} artifacts exist)`);
|
|
2510
|
+
}
|
|
2511
|
+
else {
|
|
2512
|
+
// Check if this dep's target files exist on disk (may have been created by earlier attempt)
|
|
2513
|
+
const targetFiles = dep.targetFiles ?? [];
|
|
2514
|
+
const existingFiles = targetFiles.filter(f => {
|
|
2515
|
+
try {
|
|
2516
|
+
const resolved = path.resolve(this.config.facts?.workingDirectory ?? process.cwd(), f);
|
|
2517
|
+
return fs.statSync(resolved).size > 0;
|
|
2518
|
+
}
|
|
2519
|
+
catch {
|
|
2520
|
+
return false;
|
|
2521
|
+
}
|
|
2522
|
+
});
|
|
2523
|
+
if (existingFiles.length > 0) {
|
|
2524
|
+
failedDepsWithArtifacts++;
|
|
2525
|
+
failedDepDescriptions.push(`${dep.description} (failed but ${existingFiles.length}/${targetFiles.length} target files exist)`);
|
|
2526
|
+
}
|
|
2527
|
+
else {
|
|
2528
|
+
// Check if skipped task's targets don't overlap with the failed dep's targets
|
|
2529
|
+
const taskTargets = new Set(task.targetFiles ?? []);
|
|
2530
|
+
const depTargets = new Set(dep.targetFiles ?? []);
|
|
2531
|
+
const hasOverlap = [...taskTargets].some(f => depTargets.has(f));
|
|
2532
|
+
if (!hasOverlap && taskTargets.size > 0) {
|
|
2533
|
+
// Different file targets — task probably doesn't need the failed dep's output
|
|
2534
|
+
failedDepsWithArtifacts++;
|
|
2535
|
+
failedDepDescriptions.push(`${dep.description} (failed, no file overlap — likely independent)`);
|
|
2536
|
+
}
|
|
2537
|
+
else if (lenient && dep.status === 'skipped') {
|
|
2538
|
+
// V10: In lenient mode, count skipped-by-skipped deps separately
|
|
2539
|
+
// (transitive cascade — the dep itself was a victim, not truly broken)
|
|
2540
|
+
skippedDepsBlockedBySkipped++;
|
|
2541
|
+
failedDepDescriptions.push(`${dep.description} (skipped — transitive cascade victim)`);
|
|
2542
|
+
}
|
|
2543
|
+
else {
|
|
2544
|
+
failedDepsWithoutArtifacts++;
|
|
2545
|
+
}
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
}
|
|
2550
|
+
// Rescue condition:
|
|
2551
|
+
// Normal: all failed deps have artifacts or are independent, AND at least some deps completed
|
|
2552
|
+
// Lenient: tolerate up to 1 truly-missing dep, and count transitive cascade victims as recoverable
|
|
2553
|
+
const effectiveWithout = failedDepsWithoutArtifacts;
|
|
2554
|
+
const maxMissing = lenient ? 1 : 0;
|
|
2555
|
+
const hasEnoughContext = lenient ? (completedDeps + failedDepsWithArtifacts + skippedDepsBlockedBySkipped > 0) : (completedDeps > 0);
|
|
2556
|
+
if (totalDeps > 0 && effectiveWithout <= maxMissing && hasEnoughContext) {
|
|
2557
|
+
const rescueContext = `Rescued from cascade-skip${lenient ? ' (lenient)' : ''}: ${completedDeps}/${totalDeps} deps completed, ` +
|
|
2558
|
+
`${failedDepsWithArtifacts} failed deps have artifacts${skippedDepsBlockedBySkipped > 0 ? `, ${skippedDepsBlockedBySkipped} transitive cascade victims` : ''}. ${failedDepDescriptions.join('; ')}`;
|
|
2559
|
+
this.taskQueue.rescueTask(task.id, rescueContext);
|
|
2560
|
+
rescued.push(task);
|
|
2561
|
+
this.logDecision('cascade-rescue', `${task.id}: rescued from cascade-skip${lenient ? ' (lenient)' : ''}`, rescueContext);
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
return rescued;
|
|
2565
|
+
}
|
|
2566
|
+
/**
|
|
2567
|
+
* Final rescue pass — runs after executeWaves() finishes.
|
|
2568
|
+
* Uses lenient mode to rescue cascade-skipped tasks that have partial context.
|
|
2569
|
+
* Re-dispatches rescued tasks in a final wave.
|
|
2570
|
+
*/
|
|
2571
|
+
async finalRescuePass() {
|
|
2572
|
+
const skipped = this.taskQueue.getSkippedTasks();
|
|
2573
|
+
if (skipped.length === 0)
|
|
2574
|
+
return;
|
|
2575
|
+
this.logDecision('final-rescue', `${skipped.length} skipped tasks — running final rescue pass`, '');
|
|
2576
|
+
const rescued = this.rescueCascadeSkipped(true); // lenient=true
|
|
2577
|
+
if (rescued.length > 0) {
|
|
2578
|
+
this.logDecision('final-rescue', `Rescued ${rescued.length} tasks`, rescued.map(t => t.id).join(', '));
|
|
2579
|
+
await this.executeWave(rescued);
|
|
2580
|
+
}
|
|
2581
|
+
}
|
|
2582
|
+
/**
|
|
2583
|
+
* Try resilience recovery strategies before hard-failing a task.
|
|
2584
|
+
* Called from dispatch-cap, timeout, hollow, and error paths to avoid bypassing resilience.
|
|
2585
|
+
*
|
|
2586
|
+
* Strategies (in order):
|
|
2587
|
+
* 1. Micro-decomposition — break complex failing tasks into subtasks
|
|
2588
|
+
* 2. Degraded acceptance — accept partial work if artifacts exist on disk
|
|
2589
|
+
*
|
|
2590
|
+
* Returns true if recovery succeeded (caller should return), false if hard-fail should proceed.
|
|
2591
|
+
*/
|
|
2592
|
+
async tryResilienceRecovery(task, taskId, taskResult, spawnResult) {
|
|
2593
|
+
// Strategy 1: Micro-decompose complex tasks into smaller subtasks
|
|
2594
|
+
// V10: Lowered threshold from >= 6 to >= 4 so moderately complex tasks can be recovered
|
|
2595
|
+
if ((task.complexity ?? 0) >= 4 && task.attempts >= 2 && this.budgetPool.hasCapacity()) {
|
|
2596
|
+
const subtasks = await this.microDecompose(task);
|
|
2597
|
+
if (subtasks && subtasks.length >= 2) {
|
|
2598
|
+
// Reset task status so replaceWithSubtasks can mark it as decomposed
|
|
2599
|
+
task.status = 'dispatched';
|
|
2600
|
+
this.taskQueue.replaceWithSubtasks(taskId, subtasks);
|
|
2601
|
+
this.logDecision('micro-decompose', `${taskId}: decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
|
|
2602
|
+
this.emit({
|
|
2603
|
+
type: 'swarm.task.failed',
|
|
2604
|
+
taskId,
|
|
2605
|
+
error: `Micro-decomposed into ${subtasks.length} subtasks`,
|
|
2606
|
+
attempt: task.attempts,
|
|
2607
|
+
maxAttempts: this.config.maxDispatchesPerTask ?? 5,
|
|
2608
|
+
willRetry: false,
|
|
2609
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
2610
|
+
failureMode: task.failureMode,
|
|
2611
|
+
});
|
|
2612
|
+
this.emit({
|
|
2613
|
+
type: 'swarm.task.resilience',
|
|
2614
|
+
taskId,
|
|
2615
|
+
strategy: 'micro-decompose',
|
|
2616
|
+
succeeded: true,
|
|
2617
|
+
reason: `Decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`,
|
|
2618
|
+
artifactsFound: 0,
|
|
2619
|
+
toolCalls: spawnResult.metrics.toolCalls ?? 0,
|
|
2620
|
+
});
|
|
2621
|
+
return true;
|
|
2622
|
+
}
|
|
2623
|
+
// Micro-decompose was attempted but didn't produce usable subtasks
|
|
2624
|
+
if ((task.complexity ?? 0) < 4) {
|
|
2625
|
+
this.logDecision('resilience-skip', `${taskId}: skipped micro-decompose — complexity ${task.complexity} < 4`, '');
|
|
2626
|
+
}
|
|
2627
|
+
}
|
|
2628
|
+
// Strategy 2: Degraded acceptance — check if any attempt produced files on disk.
|
|
2629
|
+
// V10: Use checkArtifactsEnhanced for broader detection (filesModified, closureReport, output)
|
|
2630
|
+
const artifactReport = checkArtifactsEnhanced(task, taskResult);
|
|
2631
|
+
const existingArtifacts = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0);
|
|
2632
|
+
const hasArtifacts = existingArtifacts.length > 0;
|
|
2633
|
+
// V10: Fix timeout detection — toolCalls=-1 means timeout (worker WAS working)
|
|
2634
|
+
const toolCalls = spawnResult.metrics.toolCalls ?? 0;
|
|
2635
|
+
const hadToolCalls = toolCalls > 0 || toolCalls === -1
|
|
2636
|
+
|| (taskResult.filesModified && taskResult.filesModified.length > 0);
|
|
2637
|
+
if (hasArtifacts || hadToolCalls) {
|
|
2638
|
+
// Accept with degraded flag — prevents cascade-skip of dependents
|
|
2639
|
+
taskResult.success = true;
|
|
2640
|
+
taskResult.degraded = true;
|
|
2641
|
+
taskResult.qualityScore = 2; // Capped at low quality
|
|
2642
|
+
taskResult.qualityFeedback = 'Degraded acceptance: retries exhausted but filesystem artifacts exist';
|
|
2643
|
+
task.degraded = true;
|
|
2644
|
+
// Reset status so markCompleted works (markFailed may have set it to 'failed')
|
|
2645
|
+
task.status = 'dispatched';
|
|
2646
|
+
this.taskQueue.markCompleted(taskId, taskResult);
|
|
2647
|
+
this.hollowStreak = 0;
|
|
2648
|
+
this.logDecision('degraded-acceptance', `${taskId}: accepted as degraded — ${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`, 'Prevents cascade-skip of dependent tasks');
|
|
2649
|
+
this.emit({
|
|
2650
|
+
type: 'swarm.task.completed',
|
|
2651
|
+
taskId,
|
|
2652
|
+
success: true,
|
|
2653
|
+
tokensUsed: taskResult.tokensUsed,
|
|
2654
|
+
costUsed: taskResult.costUsed,
|
|
2655
|
+
durationMs: taskResult.durationMs,
|
|
2656
|
+
qualityScore: 2,
|
|
2657
|
+
qualityFeedback: 'Degraded acceptance',
|
|
2658
|
+
output: taskResult.output,
|
|
2659
|
+
toolCalls: spawnResult.metrics.toolCalls,
|
|
2660
|
+
});
|
|
2661
|
+
this.emit({
|
|
2662
|
+
type: 'swarm.task.resilience',
|
|
2663
|
+
taskId,
|
|
2664
|
+
strategy: 'degraded-acceptance',
|
|
2665
|
+
succeeded: true,
|
|
2666
|
+
reason: `${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`,
|
|
2667
|
+
artifactsFound: existingArtifacts.length,
|
|
2668
|
+
toolCalls,
|
|
2669
|
+
});
|
|
2670
|
+
return true;
|
|
2671
|
+
}
|
|
2672
|
+
// Both strategies failed — log exhaustion for traceability
|
|
2673
|
+
this.logDecision('resilience-exhausted', `${taskId}: no recovery — artifacts: ${existingArtifacts.length}, toolCalls: ${toolCalls}, filesModified: ${taskResult.filesModified?.length ?? 0}`, '');
|
|
2674
|
+
this.emit({
|
|
2675
|
+
type: 'swarm.task.resilience',
|
|
2676
|
+
taskId,
|
|
2677
|
+
strategy: 'none',
|
|
2678
|
+
succeeded: false,
|
|
2679
|
+
reason: `No artifacts found, toolCalls=${toolCalls}, filesModified=${taskResult.filesModified?.length ?? 0}`,
|
|
2680
|
+
artifactsFound: existingArtifacts.length,
|
|
2681
|
+
toolCalls,
|
|
2682
|
+
});
|
|
2683
|
+
return false;
|
|
2684
|
+
}
|
|
2685
|
+
/**
|
|
2686
|
+
* Micro-decompose a complex task into 2-3 smaller subtasks using the LLM.
|
|
2687
|
+
* Called when a complex task (complexity >= 6) fails 2+ times with the same failure mode.
|
|
2688
|
+
* Returns null if decomposition doesn't make sense or LLM can't produce valid subtasks.
|
|
2689
|
+
*/
|
|
2690
|
+
async microDecompose(task) {
|
|
2691
|
+
if ((task.complexity ?? 0) < 4)
|
|
2692
|
+
return null;
|
|
2693
|
+
try {
|
|
2694
|
+
const prompt = `Task "${task.description}" failed ${task.attempts} times on model ${task.assignedModel ?? 'unknown'}.
|
|
2695
|
+
The task has complexity ${task.complexity}/10 and type "${task.type}".
|
|
2696
|
+
${task.targetFiles?.length ? `Target files: ${task.targetFiles.join(', ')}` : ''}
|
|
2697
|
+
|
|
2698
|
+
Break this task into 2-3 smaller, independent subtasks that each handle a portion of the work.
|
|
2699
|
+
Each subtask MUST be simpler (complexity <= ${Math.ceil(task.complexity / 2)}).
|
|
2700
|
+
Each subtask should be self-contained and produce concrete file changes.
|
|
2701
|
+
|
|
2702
|
+
Return JSON ONLY (no markdown, no explanation):
|
|
2703
|
+
{
|
|
2704
|
+
"subtasks": [
|
|
2705
|
+
{ "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number> }
|
|
2706
|
+
]
|
|
2707
|
+
}`;
|
|
2708
|
+
const response = await this.provider.chat([
|
|
2709
|
+
{ role: 'system', content: 'You are a task decomposition assistant. Return only valid JSON.' },
|
|
2710
|
+
{ role: 'user', content: prompt },
|
|
2711
|
+
], {
|
|
2712
|
+
model: this.config.orchestratorModel,
|
|
2713
|
+
maxTokens: 2000,
|
|
2714
|
+
temperature: 0.3,
|
|
2715
|
+
});
|
|
2716
|
+
this.trackOrchestratorUsage(response, 'micro-decompose');
|
|
2717
|
+
// Parse response — handle markdown code blocks
|
|
2718
|
+
let jsonStr = response.content.trim();
|
|
2719
|
+
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
2720
|
+
if (codeBlockMatch)
|
|
2721
|
+
jsonStr = codeBlockMatch[1].trim();
|
|
2722
|
+
const parsed = JSON.parse(jsonStr);
|
|
2723
|
+
if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
|
|
2724
|
+
return null;
|
|
2725
|
+
}
|
|
2726
|
+
const subtasks = parsed.subtasks.map((sub, idx) => ({
|
|
2727
|
+
id: `${task.id}-sub${idx + 1}`,
|
|
2728
|
+
description: sub.description,
|
|
2729
|
+
type: sub.type ?? task.type,
|
|
2730
|
+
dependencies: [], // Will be set by replaceWithSubtasks
|
|
2731
|
+
status: 'ready',
|
|
2732
|
+
complexity: Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1),
|
|
2733
|
+
wave: task.wave,
|
|
2734
|
+
targetFiles: sub.targetFiles ?? [],
|
|
2735
|
+
readFiles: task.readFiles,
|
|
2736
|
+
attempts: 0,
|
|
2737
|
+
}));
|
|
2738
|
+
return subtasks;
|
|
2739
|
+
}
|
|
2740
|
+
catch (error) {
|
|
2741
|
+
this.logDecision('micro-decompose', `${task.id}: micro-decomposition failed — ${error.message}`, 'Falling through to normal failure path');
|
|
2742
|
+
return null;
|
|
2743
|
+
}
|
|
2744
|
+
}
|
|
2745
|
+
// ─── Pre-Dispatch Auto-Split ──────────────────────────────────────────────
|
|
2746
|
+
/**
|
|
2747
|
+
* Heuristic pre-filter: should this task be considered for auto-split?
|
|
2748
|
+
* Cheap check — no LLM call. Returns true if all conditions are met.
|
|
2749
|
+
*/
|
|
2750
|
+
shouldAutoSplit(task) {
|
|
2751
|
+
const cfg = this.config.autoSplit;
|
|
2752
|
+
if (cfg?.enabled === false)
|
|
2753
|
+
return false;
|
|
2754
|
+
const floor = cfg?.complexityFloor ?? 6;
|
|
2755
|
+
const splittable = cfg?.splittableTypes ?? ['implement', 'refactor', 'test'];
|
|
2756
|
+
// Only first attempts — retries use micro-decompose
|
|
2757
|
+
if (task.attempts > 0)
|
|
2758
|
+
return false;
|
|
2759
|
+
// Complexity check
|
|
2760
|
+
if ((task.complexity ?? 0) < floor)
|
|
2761
|
+
return false;
|
|
2762
|
+
// Type check
|
|
2763
|
+
if (!splittable.includes(task.type))
|
|
2764
|
+
return false;
|
|
2765
|
+
// Must be on critical path (foundation task)
|
|
2766
|
+
if (!task.isFoundation)
|
|
2767
|
+
return false;
|
|
2768
|
+
// Budget capacity check
|
|
2769
|
+
if (!this.budgetPool.hasCapacity())
|
|
2770
|
+
return false;
|
|
2771
|
+
return true;
|
|
2772
|
+
}
|
|
2773
|
+
/**
|
|
2774
|
+
* LLM judge call: ask the orchestrator model whether and how to split a task.
|
|
2775
|
+
* Returns { shouldSplit: false } or { shouldSplit: true, subtasks: [...] }.
|
|
2776
|
+
*/
|
|
2777
|
+
async judgeSplit(task) {
|
|
2778
|
+
const maxSubs = this.config.autoSplit?.maxSubtasks ?? 4;
|
|
2779
|
+
const prompt = `You are evaluating whether a task should be split into parallel subtasks before dispatch.
|
|
2780
|
+
|
|
2781
|
+
TASK: "${task.description}"
|
|
2782
|
+
TYPE: ${task.type}
|
|
2783
|
+
COMPLEXITY: ${task.complexity}/10
|
|
2784
|
+
TARGET FILES: ${task.targetFiles?.join(', ') || 'none specified'}
|
|
2785
|
+
DOWNSTREAM DEPENDENTS: This is a foundation task — other tasks are waiting on it.
|
|
2786
|
+
|
|
2787
|
+
Should this task be split into 2-${maxSubs} parallel subtasks that different workers can execute simultaneously?
|
|
2788
|
+
|
|
2789
|
+
SPLIT if:
|
|
2790
|
+
- The task involves multiple independent pieces of work (e.g., different files, different functions, different concerns)
|
|
2791
|
+
- Parallel execution would meaningfully reduce wall-clock time
|
|
2792
|
+
- The subtasks can produce useful output independently
|
|
2793
|
+
|
|
2794
|
+
DO NOT SPLIT if:
|
|
2795
|
+
- The work is conceptually atomic (one function, one algorithm, tightly coupled logic)
|
|
2796
|
+
- The subtasks would need to coordinate on the same files/functions
|
|
2797
|
+
- Splitting would add more overhead than it saves
|
|
2798
|
+
|
|
2799
|
+
Return JSON ONLY:
|
|
2800
|
+
{
|
|
2801
|
+
"shouldSplit": true/false,
|
|
2802
|
+
"reason": "brief explanation",
|
|
2803
|
+
"subtasks": [
|
|
2804
|
+
{ "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number 1-10> }
|
|
2805
|
+
]
|
|
2806
|
+
}
|
|
2807
|
+
If shouldSplit is false, omit subtasks.`;
|
|
2808
|
+
const response = await this.provider.chat([
|
|
2809
|
+
{ role: 'system', content: 'You are a task planning judge. Return only valid JSON.' },
|
|
2810
|
+
{ role: 'user', content: prompt },
|
|
2811
|
+
], {
|
|
2812
|
+
model: this.config.orchestratorModel,
|
|
2813
|
+
maxTokens: 1500,
|
|
2814
|
+
temperature: 0.2,
|
|
2815
|
+
});
|
|
2816
|
+
this.trackOrchestratorUsage(response, 'auto-split-judge');
|
|
2817
|
+
// Parse response — reuse markdown code block stripping from microDecompose
|
|
2818
|
+
let jsonStr = response.content.trim();
|
|
2819
|
+
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
2820
|
+
if (codeBlockMatch)
|
|
2821
|
+
jsonStr = codeBlockMatch[1].trim();
|
|
2822
|
+
const parsed = JSON.parse(jsonStr);
|
|
2823
|
+
if (!parsed.shouldSplit) {
|
|
2824
|
+
this.logDecision('auto-split', `${task.id}: judge says no split — ${parsed.reason}`, '');
|
|
2825
|
+
return { shouldSplit: false };
|
|
2826
|
+
}
|
|
2827
|
+
if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
|
|
2828
|
+
return { shouldSplit: false };
|
|
2829
|
+
}
|
|
2830
|
+
// Build SwarmTask[] from judge output (same pattern as microDecompose)
|
|
2831
|
+
const subtasks = parsed.subtasks.slice(0, maxSubs).map((sub, idx) => ({
|
|
2832
|
+
id: `${task.id}-split${idx + 1}`,
|
|
2833
|
+
description: sub.description,
|
|
2834
|
+
type: sub.type ?? task.type,
|
|
2835
|
+
dependencies: [],
|
|
2836
|
+
status: 'ready',
|
|
2837
|
+
complexity: Math.max(3, Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1)),
|
|
2838
|
+
wave: task.wave,
|
|
2839
|
+
targetFiles: sub.targetFiles ?? [],
|
|
2840
|
+
readFiles: task.readFiles,
|
|
2841
|
+
attempts: 0,
|
|
2842
|
+
rescueContext: `Auto-split from ${task.id} (original complexity ${task.complexity})`,
|
|
2843
|
+
}));
|
|
2844
|
+
this.logDecision('auto-split', `${task.id}: split into ${subtasks.length} subtasks — ${parsed.reason}`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
|
|
2845
|
+
return { shouldSplit: true, subtasks };
|
|
2846
|
+
}
|
|
2847
|
+
/**
|
|
2848
|
+
* V7: Compute effective retry limit for a task.
|
|
2849
|
+
* F10: Fixup tasks get max 2 retries (3 attempts total) — one full model-failover cycle.
|
|
2850
|
+
* Foundation tasks get +1 retry to reduce cascade failure risk.
|
|
2851
|
+
*/
|
|
2852
|
+
getEffectiveRetries(task) {
|
|
2853
|
+
const isFixup = 'fixesTaskId' in task;
|
|
2854
|
+
if (isFixup)
|
|
2855
|
+
return 2; // Fixup tasks: 2 retries max (3 attempts total)
|
|
2856
|
+
return task.isFoundation ? this.config.workerRetries + 1 : this.config.workerRetries;
|
|
2857
|
+
}
|
|
2858
|
+
/**
|
|
2859
|
+
* F22: Build a brief summary of swarm progress for retry context.
|
|
2860
|
+
* Helps retrying workers understand what the swarm has already accomplished.
|
|
2861
|
+
*/
|
|
2862
|
+
getSwarmProgressSummary() {
|
|
2863
|
+
const allTasks = this.taskQueue.getAllTasks();
|
|
2864
|
+
const completed = allTasks.filter(t => t.status === 'completed');
|
|
2865
|
+
if (completed.length === 0)
|
|
2866
|
+
return '';
|
|
2867
|
+
const lines = [];
|
|
2868
|
+
for (const task of completed) {
|
|
2869
|
+
const score = task.result?.qualityScore ? ` (${task.result.qualityScore}/5)` : '';
|
|
2870
|
+
lines.push(`- ${task.id}: ${task.description.slice(0, 80)}${score}`);
|
|
2871
|
+
}
|
|
2872
|
+
// Collect files created by completed tasks
|
|
2873
|
+
const files = new Set();
|
|
2874
|
+
const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
|
|
2875
|
+
for (const task of completed) {
|
|
2876
|
+
for (const f of (task.result?.filesModified ?? []))
|
|
2877
|
+
files.add(f);
|
|
2878
|
+
for (const f of (task.targetFiles ?? [])) {
|
|
2879
|
+
try {
|
|
2880
|
+
const resolved = path.resolve(baseDir, f);
|
|
2881
|
+
if (fs.existsSync(resolved))
|
|
2882
|
+
files.add(f);
|
|
2883
|
+
}
|
|
2884
|
+
catch { /* skip */ }
|
|
2885
|
+
}
|
|
2886
|
+
}
|
|
2887
|
+
const parts = [`The following tasks have completed successfully:\n${lines.join('\n')}`];
|
|
2888
|
+
if (files.size > 0) {
|
|
2889
|
+
parts.push(`Files already created/modified: ${[...files].slice(0, 20).join(', ')}`);
|
|
2890
|
+
parts.push('You can build on these existing files.');
|
|
2891
|
+
}
|
|
2892
|
+
return parts.join('\n');
|
|
2893
|
+
}
|
|
1322
2894
|
/** Get a model health summary for emitting events. */
|
|
1323
2895
|
getModelHealthSummary(model) {
|
|
1324
2896
|
const records = this.healthTracker.getAllRecords();
|