attocode 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/CHANGELOG.md +103 -3
  2. package/dist/src/agent.d.ts +6 -0
  3. package/dist/src/agent.d.ts.map +1 -1
  4. package/dist/src/agent.js +504 -49
  5. package/dist/src/agent.js.map +1 -1
  6. package/dist/src/cli.d.ts.map +1 -1
  7. package/dist/src/cli.js +23 -2
  8. package/dist/src/cli.js.map +1 -1
  9. package/dist/src/core/protocol/types.d.ts +8 -8
  10. package/dist/src/defaults.d.ts +6 -1
  11. package/dist/src/defaults.d.ts.map +1 -1
  12. package/dist/src/defaults.js +36 -2
  13. package/dist/src/defaults.js.map +1 -1
  14. package/dist/src/integrations/agent-registry.d.ts +11 -0
  15. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  16. package/dist/src/integrations/agent-registry.js.map +1 -1
  17. package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
  18. package/dist/src/integrations/auto-compaction.js +5 -1
  19. package/dist/src/integrations/auto-compaction.js.map +1 -1
  20. package/dist/src/integrations/bash-policy.d.ts +33 -0
  21. package/dist/src/integrations/bash-policy.d.ts.map +1 -0
  22. package/dist/src/integrations/bash-policy.js +142 -0
  23. package/dist/src/integrations/bash-policy.js.map +1 -0
  24. package/dist/src/integrations/codebase-context.d.ts +5 -0
  25. package/dist/src/integrations/codebase-context.d.ts.map +1 -1
  26. package/dist/src/integrations/codebase-context.js +33 -0
  27. package/dist/src/integrations/codebase-context.js.map +1 -1
  28. package/dist/src/integrations/delegation-protocol.js +2 -2
  29. package/dist/src/integrations/delegation-protocol.js.map +1 -1
  30. package/dist/src/integrations/economics.d.ts +42 -0
  31. package/dist/src/integrations/economics.d.ts.map +1 -1
  32. package/dist/src/integrations/economics.js +130 -14
  33. package/dist/src/integrations/economics.js.map +1 -1
  34. package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
  35. package/dist/src/integrations/hierarchical-config.js +17 -0
  36. package/dist/src/integrations/hierarchical-config.js.map +1 -1
  37. package/dist/src/integrations/index.d.ts +3 -1
  38. package/dist/src/integrations/index.d.ts.map +1 -1
  39. package/dist/src/integrations/index.js +3 -1
  40. package/dist/src/integrations/index.js.map +1 -1
  41. package/dist/src/integrations/policy-engine.d.ts +55 -0
  42. package/dist/src/integrations/policy-engine.d.ts.map +1 -0
  43. package/dist/src/integrations/policy-engine.js +247 -0
  44. package/dist/src/integrations/policy-engine.js.map +1 -0
  45. package/dist/src/integrations/safety.d.ts +5 -4
  46. package/dist/src/integrations/safety.d.ts.map +1 -1
  47. package/dist/src/integrations/safety.js +32 -7
  48. package/dist/src/integrations/safety.js.map +1 -1
  49. package/dist/src/integrations/sandbox/basic.d.ts +7 -0
  50. package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
  51. package/dist/src/integrations/sandbox/basic.js +27 -2
  52. package/dist/src/integrations/sandbox/basic.js.map +1 -1
  53. package/dist/src/integrations/sandbox/index.d.ts +6 -0
  54. package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
  55. package/dist/src/integrations/sandbox/index.js +3 -0
  56. package/dist/src/integrations/sandbox/index.js.map +1 -1
  57. package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
  58. package/dist/src/integrations/sandbox/landlock.js +3 -0
  59. package/dist/src/integrations/sandbox/landlock.js.map +1 -1
  60. package/dist/src/integrations/self-improvement.d.ts.map +1 -1
  61. package/dist/src/integrations/self-improvement.js +12 -0
  62. package/dist/src/integrations/self-improvement.js.map +1 -1
  63. package/dist/src/integrations/smart-decomposer.d.ts +18 -1
  64. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  65. package/dist/src/integrations/smart-decomposer.js +72 -0
  66. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  67. package/dist/src/integrations/swarm/index.d.ts +1 -1
  68. package/dist/src/integrations/swarm/index.d.ts.map +1 -1
  69. package/dist/src/integrations/swarm/index.js.map +1 -1
  70. package/dist/src/integrations/swarm/model-selector.d.ts +15 -0
  71. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  72. package/dist/src/integrations/swarm/model-selector.js +99 -20
  73. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  74. package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
  75. package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
  76. package/dist/src/integrations/swarm/swarm-budget.js +6 -0
  77. package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
  78. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  79. package/dist/src/integrations/swarm/swarm-config-loader.js +154 -7
  80. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  81. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +12 -1
  82. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  83. package/dist/src/integrations/swarm/swarm-event-bridge.js +170 -23
  84. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  85. package/dist/src/integrations/swarm/swarm-events.d.ts +55 -1
  86. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  87. package/dist/src/integrations/swarm/swarm-events.js +22 -5
  88. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  89. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +124 -8
  90. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  91. package/dist/src/integrations/swarm/swarm-orchestrator.js +1668 -96
  92. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  93. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +83 -2
  94. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
  95. package/dist/src/integrations/swarm/swarm-quality-gate.js +278 -19
  96. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
  97. package/dist/src/integrations/swarm/task-queue.d.ts +44 -0
  98. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  99. package/dist/src/integrations/swarm/task-queue.js +274 -11
  100. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  101. package/dist/src/integrations/swarm/types.d.ts +210 -13
  102. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  103. package/dist/src/integrations/swarm/types.js +61 -8
  104. package/dist/src/integrations/swarm/types.js.map +1 -1
  105. package/dist/src/integrations/swarm/worker-pool.d.ts +11 -4
  106. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  107. package/dist/src/integrations/swarm/worker-pool.js +173 -43
  108. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  109. package/dist/src/integrations/tool-recommendation.d.ts +7 -4
  110. package/dist/src/integrations/tool-recommendation.d.ts.map +1 -1
  111. package/dist/src/integrations/tool-recommendation.js +58 -5
  112. package/dist/src/integrations/tool-recommendation.js.map +1 -1
  113. package/dist/src/integrations/work-log.js +4 -4
  114. package/dist/src/integrations/work-log.js.map +1 -1
  115. package/dist/src/main.js +26 -1
  116. package/dist/src/main.js.map +1 -1
  117. package/dist/src/modes/repl.d.ts.map +1 -1
  118. package/dist/src/modes/repl.js +10 -4
  119. package/dist/src/modes/repl.js.map +1 -1
  120. package/dist/src/modes/tui.d.ts.map +1 -1
  121. package/dist/src/modes/tui.js +5 -0
  122. package/dist/src/modes/tui.js.map +1 -1
  123. package/dist/src/modes.d.ts.map +1 -1
  124. package/dist/src/modes.js +4 -27
  125. package/dist/src/modes.js.map +1 -1
  126. package/dist/src/tools/agent.d.ts.map +1 -1
  127. package/dist/src/tools/agent.js +11 -2
  128. package/dist/src/tools/agent.js.map +1 -1
  129. package/dist/src/tools/bash.d.ts +3 -3
  130. package/dist/src/tools/coercion.d.ts +6 -0
  131. package/dist/src/tools/coercion.d.ts.map +1 -1
  132. package/dist/src/tools/coercion.js +13 -0
  133. package/dist/src/tools/coercion.js.map +1 -1
  134. package/dist/src/tools/file.d.ts +2 -2
  135. package/dist/src/tools/file.js +2 -2
  136. package/dist/src/tools/file.js.map +1 -1
  137. package/dist/src/tools/permission.d.ts.map +1 -1
  138. package/dist/src/tools/permission.js +4 -111
  139. package/dist/src/tools/permission.js.map +1 -1
  140. package/dist/src/tracing/trace-collector.d.ts +167 -0
  141. package/dist/src/tracing/trace-collector.d.ts.map +1 -1
  142. package/dist/src/tracing/trace-collector.js +137 -0
  143. package/dist/src/tracing/trace-collector.js.map +1 -1
  144. package/dist/src/tracing/types.d.ts +105 -1
  145. package/dist/src/tracing/types.d.ts.map +1 -1
  146. package/dist/src/tracing/types.js.map +1 -1
  147. package/dist/src/tui/app.d.ts.map +1 -1
  148. package/dist/src/tui/app.js +34 -5
  149. package/dist/src/tui/app.js.map +1 -1
  150. package/dist/src/types.d.ts +71 -0
  151. package/dist/src/types.d.ts.map +1 -1
  152. package/package.json +1 -1
@@ -16,28 +16,70 @@
16
16
  * - State persistence and resume
17
17
  * - Orchestrator decision logging
18
18
  */
19
- import { createSmartDecomposer, parseDecompositionResponse } from '../smart-decomposer.js';
19
+ import * as fs from 'node:fs';
20
+ import * as path from 'node:path';
21
+ import { createSmartDecomposer, parseDecompositionResponse, validateDecomposition } from '../smart-decomposer.js';
20
22
  import { createResultSynthesizer } from '../result-synthesizer.js';
21
- import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, SUBTASK_TO_CAPABILITY } from './types.js';
23
+ import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, getTaskTypeConfig } from './types.js';
22
24
  import { createSwarmTaskQueue } from './task-queue.js';
23
25
  import { createSwarmBudgetPool } from './swarm-budget.js';
24
26
  import { createSwarmWorkerPool } from './worker-pool.js';
25
- import { evaluateWorkerOutput } from './swarm-quality-gate.js';
27
+ import { evaluateWorkerOutput, runPreFlightChecks, checkArtifacts, checkArtifactsEnhanced, runConcreteChecks } from './swarm-quality-gate.js';
26
28
  import { ModelHealthTracker, selectAlternativeModel } from './model-selector.js';
27
29
  import { SwarmStateStore } from './swarm-state-store.js';
28
30
  // ─── Hollow Completion Detection ──────────────────────────────────────────
29
31
  /**
30
- * V10: Minimal hollow completion detection — let the quality gate judge everything else.
31
- * Only catches truly empty completions: zero tool calls AND trivial output (<50 chars).
32
- * No task-type lists, no closure report checks, no hardcoded thresholds beyond the bare minimum.
32
+ * V11: Hollow completion detection — catches empty completions AND "success" with failure language.
33
+ * Zero tool calls AND trivial output is always hollow.
34
+ * Additionally, success=true but output containing failure admissions is also hollow
35
+ * this catches workers that report success but actually did no useful work.
33
36
  */
34
- export function isHollowCompletion(spawnResult) {
37
+ const FAILURE_INDICATORS = [
38
+ 'budget exhausted', 'unable to complete', 'could not complete',
39
+ 'ran out of budget', 'no changes were made', 'no files were modified',
40
+ 'no files were created', 'failed to complete', 'before research could begin',
41
+ 'i was unable to', 'i could not', 'unfortunately i',
42
+ ];
43
+ const BOILERPLATE_INDICATORS = [
44
+ 'task completed successfully', 'i have completed the task',
45
+ 'the task has been completed', 'done', 'completed', 'finished',
46
+ 'no issues found', 'everything looks good', 'all tasks completed',
47
+ ];
48
+ export function isHollowCompletion(spawnResult, taskType, swarmConfig) {
35
49
  // Timeout uses toolCalls === -1, not hollow
36
- if (spawnResult.metrics.toolCalls === -1)
50
+ if ((spawnResult.metrics.toolCalls ?? 0) === -1)
37
51
  return false;
38
- // Only catch truly empty completions: zero tools AND trivial output
39
- return spawnResult.metrics.toolCalls === 0
40
- && (spawnResult.output?.trim().length ?? 0) < 50;
52
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
53
+ // Truly empty completions: zero tools AND trivial output
54
+ // P4: Higher threshold (120 chars) + configurable via SwarmConfig
55
+ const hollowThreshold = swarmConfig?.hollowOutputThreshold ?? 120;
56
+ if (toolCalls === 0
57
+ && (spawnResult.output?.trim().length ?? 0) < hollowThreshold) {
58
+ return true;
59
+ }
60
+ // P4: Boilerplate detection — zero tools AND short output that's just boilerplate
61
+ if (toolCalls === 0 && (spawnResult.output?.trim().length ?? 0) < 300) {
62
+ const outputLower = (spawnResult.output ?? '').toLowerCase().trim();
63
+ if (BOILERPLATE_INDICATORS.some(b => outputLower.includes(b))) {
64
+ return true;
65
+ }
66
+ }
67
+ // "Success" that admits failure: worker claims success but output contains failure language
68
+ if (spawnResult.success) {
69
+ const outputLower = (spawnResult.output ?? '').toLowerCase();
70
+ if (FAILURE_INDICATORS.some(f => outputLower.includes(f))) {
71
+ return true;
72
+ }
73
+ }
74
+ // V7: Use configurable requiresToolCalls from TaskTypeConfig.
75
+ // For action-oriented tasks (implement/test/refactor/etc), zero tool calls is ALWAYS hollow.
76
+ if (taskType) {
77
+ const typeConfig = getTaskTypeConfig(taskType, swarmConfig);
78
+ if (typeConfig.requiresToolCalls && toolCalls === 0) {
79
+ return true;
80
+ }
81
+ }
82
+ return false;
41
83
  }
42
84
  // ─── Orchestrator ──────────────────────────────────────────────────────────
43
85
  export class SwarmOrchestrator {
@@ -61,10 +103,15 @@ export class SwarmOrchestrator {
61
103
  retries = 0;
62
104
  startTime = 0;
63
105
  modelUsage = new Map();
106
+ // Orchestrator's own LLM usage (separate from worker usage)
107
+ orchestratorTokens = 0;
108
+ orchestratorCost = 0;
109
+ orchestratorCalls = 0;
64
110
  // V2: Planning, review, verification, health, persistence
65
111
  plan;
66
112
  waveReviews = [];
67
113
  verificationResult;
114
+ artifactInventory;
68
115
  orchestratorDecisions = [];
69
116
  healthTracker;
70
117
  stateStore;
@@ -75,25 +122,63 @@ export class SwarmOrchestrator {
75
122
  static CIRCUIT_BREAKER_WINDOW_MS = 30_000;
76
123
  static CIRCUIT_BREAKER_THRESHOLD = 3;
77
124
  static CIRCUIT_BREAKER_PAUSE_MS = 15_000;
78
- // Quality gate circuit breaker: disable quality gates after too many consecutive rejections
79
- consecutiveQualityRejections = 0;
80
- qualityGateDisabled = false;
81
- static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 8;
125
+ // P3: Per-model quality gate circuit breaker (replaces global circuit breaker)
126
+ perModelQualityRejections = new Map();
127
+ qualityGateDisabledModels = new Set();
128
+ static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 5;
129
+ // Hollow completion streak: early termination when single-model swarm produces only hollows
130
+ hollowStreak = 0;
131
+ static HOLLOW_STREAK_THRESHOLD = 3;
132
+ // V7: Global dispatch + hollow ratio tracking for multi-model termination
133
+ totalDispatches = 0;
134
+ totalHollows = 0;
135
+ // Hollow ratio warning (fired once, then suppressed to avoid log spam)
136
+ hollowRatioWarned = false;
137
+ // P7: Adaptive dispatch stagger — increases on rate limits, decreases on success
138
+ adaptiveStaggerMs = 0; // Initialized from config in constructor
139
+ // F25: Consecutive timeout tracking per task — early-fail after limit
140
+ taskTimeoutCounts = new Map();
141
+ // Original prompt for re-planning on resume
142
+ originalPrompt = '';
143
+ // Mid-swarm re-planning: only once per swarm execution
144
+ hasReplanned = false;
82
145
  constructor(config, provider, agentRegistry, spawnAgentFn, blackboard) {
83
146
  this.config = { ...DEFAULT_SWARM_CONFIG, ...config };
84
147
  this.provider = provider;
85
148
  this.blackboard = blackboard;
86
149
  this.spawnAgentFn = spawnAgentFn;
87
150
  this.healthTracker = new ModelHealthTracker();
151
+ this.adaptiveStaggerMs = this.getStaggerMs();
88
152
  this.taskQueue = createSwarmTaskQueue();
89
153
  this.budgetPool = createSwarmBudgetPool(this.config);
90
- this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool);
154
+ this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool, this.healthTracker);
91
155
  // Initialize state store if persistence enabled
92
156
  if (this.config.enablePersistence) {
93
157
  this.stateStore = new SwarmStateStore(this.config.stateDir ?? '.agent/swarm-state', this.config.resumeSessionId);
94
158
  }
95
159
  // C1: Build LLM decompose function with explicit JSON schema
96
160
  const llmDecompose = async (task, _context) => {
161
+ // V7: Dynamically build the allowed type list from built-in + user-defined types
162
+ const builtinTypes = ['research', 'analysis', 'design', 'implement', 'test', 'refactor', 'review', 'document', 'integrate', 'deploy', 'merge'];
163
+ const customTypes = Object.keys(this.config.taskTypes ?? {}).filter(t => !builtinTypes.includes(t));
164
+ const allTypes = [...builtinTypes, ...customTypes];
165
+ const typeListStr = allTypes.map(t => `"${t}"`).join(' | ');
166
+ // Build custom type descriptions so the LLM knows when to use them
167
+ let customTypeSection = '';
168
+ if (customTypes.length > 0) {
169
+ const descriptions = customTypes.map(t => {
170
+ const cfg = this.config.taskTypes[t];
171
+ const parts = [` - "${t}"`];
172
+ if (cfg.capability)
173
+ parts.push(`(capability: ${cfg.capability})`);
174
+ if (cfg.promptTemplate)
175
+ parts.push(`— uses ${cfg.promptTemplate} workflow`);
176
+ if (cfg.timeout)
177
+ parts.push(`— timeout: ${Math.round(cfg.timeout / 60000)}min`);
178
+ return parts.join(' ');
179
+ }).join('\n');
180
+ customTypeSection = `\n\nCustom task types available:\n${descriptions}\nUse these when their description matches the subtask's purpose.`;
181
+ }
97
182
  const systemPrompt = `You are a task decomposition expert. Break down the given task into well-defined subtasks with clear dependencies.
98
183
 
99
184
  CRITICAL: Dependencies MUST use zero-based integer indices referring to other subtasks in the array.
@@ -103,7 +188,7 @@ Respond with valid JSON matching this exact schema:
103
188
  "subtasks": [
104
189
  {
105
190
  "description": "Clear description of what this subtask does",
106
- "type": "implement" | "research" | "analysis" | "design" | "test" | "refactor" | "review" | "document" | "integrate" | "deploy" | "merge",
191
+ "type": ${typeListStr},
107
192
  "complexity": 1-10,
108
193
  "dependencies": [0, 1],
109
194
  "parallelizable": true | false,
@@ -112,7 +197,7 @@ Respond with valid JSON matching this exact schema:
112
197
  ],
113
198
  "strategy": "sequential" | "parallel" | "hierarchical" | "adaptive" | "pipeline",
114
199
  "reasoning": "Brief explanation of why this decomposition was chosen"
115
- }
200
+ }${customTypeSection}
116
201
 
117
202
  EXAMPLE 1 — Research task (3 parallel research + 1 merge):
118
203
  {
@@ -152,6 +237,7 @@ Rules:
152
237
  maxTokens: 4000,
153
238
  temperature: 0.3,
154
239
  });
240
+ this.trackOrchestratorUsage(response, 'decompose');
155
241
  // Use parseDecompositionResponse which handles markdown code blocks and edge cases
156
242
  return parseDecompositionResponse(response.content);
157
243
  };
@@ -195,6 +281,25 @@ Rules:
195
281
  }
196
282
  }
197
283
  }
284
+ /**
285
+ * Track token usage from an orchestrator LLM call.
286
+ */
287
+ trackOrchestratorUsage(response, purpose) {
288
+ if (!response.usage)
289
+ return;
290
+ const tokens = response.usage.total_tokens ?? ((response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0));
291
+ const cost = tokens * 0.000015; // ~$15/M tokens average for orchestrator models
292
+ this.orchestratorTokens += tokens;
293
+ this.orchestratorCost += cost;
294
+ this.orchestratorCalls++;
295
+ this.emit({
296
+ type: 'swarm.orchestrator.llm',
297
+ model: this.config.orchestratorModel,
298
+ purpose,
299
+ tokens,
300
+ cost,
301
+ });
302
+ }
198
303
  /**
199
304
  * Execute the full swarm pipeline for a task.
200
305
  *
@@ -211,6 +316,7 @@ Rules:
211
316
  */
212
317
  async execute(task) {
213
318
  this.startTime = Date.now();
319
+ this.originalPrompt = task;
214
320
  try {
215
321
  // V2: Check for resume
216
322
  if (this.config.resumeSessionId && this.stateStore) {
@@ -219,15 +325,85 @@ Rules:
219
325
  // Phase 1: Decompose
220
326
  this.currentPhase = 'decomposing';
221
327
  this.emit({ type: 'swarm.phase.progress', phase: 'decomposing', message: 'Decomposing task into subtasks...' });
222
- const decomposition = await this.decompose(task);
328
+ let decomposition = await this.decompose(task);
223
329
  if (!decomposition) {
224
330
  this.currentPhase = 'failed';
225
331
  return this.buildErrorResult('Decomposition failed — task may be too simple for swarm mode');
226
332
  }
333
+ // F5: Validate decomposition — check for cycles, invalid deps, granularity
334
+ const validation = validateDecomposition(decomposition);
335
+ if (validation.warnings.length > 0) {
336
+ this.logDecision('decomposition-validation', `Warnings: ${validation.warnings.join('; ')}`, '');
337
+ }
338
+ if (!validation.valid) {
339
+ this.logDecision('decomposition-validation', `Invalid decomposition: ${validation.issues.join('; ')}`, 'Retrying...');
340
+ // Retry decomposition once with feedback
341
+ decomposition = await this.decompose(`${task}\n\nIMPORTANT: Previous decomposition was invalid: ${validation.issues.join('. ')}. Fix these issues.`);
342
+ if (!decomposition) {
343
+ this.currentPhase = 'failed';
344
+ return this.buildErrorResult(`Decomposition validation failed: ${validation.issues.join('; ')}`);
345
+ }
346
+ const retryValidation = validateDecomposition(decomposition);
347
+ if (!retryValidation.valid) {
348
+ this.logDecision('decomposition-validation', `Retry still invalid: ${retryValidation.issues.join('; ')}`, 'Proceeding anyway');
349
+ }
350
+ }
227
351
  // Phase 2: Schedule into waves
228
352
  this.currentPhase = 'scheduling';
229
353
  this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduling ${decomposition.subtasks.length} subtasks into waves...` });
230
354
  this.taskQueue.loadFromDecomposition(decomposition, this.config);
355
+ // F3: Dynamic orchestrator reserve scaling based on subtask count.
356
+ // More subtasks = more quality gate calls, synthesis work, and review overhead.
357
+ // Formula: max(configured ratio, 5% per subtask), capped at 40%.
358
+ const subtaskCount = decomposition.subtasks.length;
359
+ const dynamicReserveRatio = Math.min(0.40, Math.max(this.config.orchestratorReserveRatio, subtaskCount * 0.05));
360
+ if (dynamicReserveRatio > this.config.orchestratorReserveRatio) {
361
+ this.logDecision('budget-scaling', `Scaled orchestrator reserve from ${(this.config.orchestratorReserveRatio * 100).toFixed(0)}% to ${(dynamicReserveRatio * 100).toFixed(0)}% for ${subtaskCount} subtasks`, '');
362
+ }
363
+ // Foundation task detection: tasks that are the sole dependency of 3+ downstream
364
+ // tasks are critical — if they fail, the entire swarm cascade-skips.
365
+ // Give them extra retries and timeout scaling.
366
+ this.detectFoundationTasks();
367
+ // D3/F1: Probe model capability before dispatch (default: true)
368
+ if (this.config.probeModels !== false) {
369
+ await this.probeModelCapability();
370
+ // F15/F23: Handle all-models-failed probe scenario
371
+ // Resolve strategy: explicit probeFailureStrategy > legacy ignoreProbeFailures > default 'warn-and-try'
372
+ const probeStrategy = this.config.probeFailureStrategy
373
+ ?? (this.config.ignoreProbeFailures ? 'warn-and-try' : 'warn-and-try');
374
+ const uniqueModels = [...new Set(this.config.workers.map(w => w.model))];
375
+ const healthyModels = this.healthTracker.getHealthy(uniqueModels);
376
+ if (healthyModels.length === 0 && uniqueModels.length > 0) {
377
+ if (probeStrategy === 'abort') {
378
+ // Hard abort — no tasks dispatched
379
+ const reason = `All ${uniqueModels.length} worker model(s) failed capability probes — no model can make tool calls. Aborting swarm to prevent budget waste. Fix model configuration and retry.`;
380
+ this.logDecision('probe-abort', reason, `Models tested: ${uniqueModels.join(', ')}`);
381
+ this.emit({ type: 'swarm.abort', reason });
382
+ this.skipRemainingTasks(reason);
383
+ const totalTasks = this.taskQueue.getStats().total;
384
+ const abortStats = {
385
+ completedTasks: 0, failedTasks: 0, skippedTasks: totalTasks,
386
+ totalTasks, totalWaves: 0, totalTokens: 0, totalCost: 0,
387
+ totalDurationMs: Date.now() - this.startTime,
388
+ qualityRejections: 0, retries: 0,
389
+ modelUsage: new Map(),
390
+ };
391
+ this.emit({ type: 'swarm.complete', stats: abortStats, errors: this.errors });
392
+ return {
393
+ success: false, summary: reason,
394
+ tasks: this.taskQueue.getAllTasks(), stats: abortStats, errors: this.errors,
395
+ };
396
+ }
397
+ else {
398
+ // F23: warn-and-try — log warning, reset health, let real tasks prove capability
399
+ this.logDecision('probe-warning', `All ${uniqueModels.length} model(s) failed probe — continuing anyway (strategy: warn-and-try)`, 'Will abort after first real task failure if model cannot use tools');
400
+ // Reset health so dispatch doesn't skip all models
401
+ for (const model of uniqueModels) {
402
+ this.healthTracker.recordSuccess(model, 0);
403
+ }
404
+ }
405
+ }
406
+ }
231
407
  // Emit skip events when tasks are cascade-skipped due to dependency failures
232
408
  this.taskQueue.setOnCascadeSkip((skippedTaskId, reason) => {
233
409
  this.emit({ type: 'swarm.task.skipped', taskId: skippedTaskId, reason });
@@ -262,9 +438,14 @@ Rules:
262
438
  // Phase 3: Execute waves (planning runs concurrently)
263
439
  this.currentPhase = 'executing';
264
440
  await this.executeWaves();
441
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
442
+ if (!this.cancelled)
443
+ await this.finalRescuePass();
265
444
  // Ensure planning completed before verification/synthesis
266
445
  if (planPromise)
267
446
  await planPromise;
447
+ // Post-wave artifact audit: scan filesystem for files created by workers
448
+ this.artifactInventory = this.buildArtifactInventory();
268
449
  // V2: Phase 3.5: Verify integration
269
450
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
270
451
  this.currentPhase = 'verifying';
@@ -280,10 +461,14 @@ Rules:
280
461
  const executionStats = this.buildStats();
281
462
  // V2: Final checkpoint
282
463
  this.checkpoint('final');
283
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
464
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
465
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
284
466
  return {
285
467
  success: executionStats.completedTasks > 0,
468
+ partialSuccess: !executionStats.completedTasks && hasArtifacts,
469
+ partialFailure: executionStats.failedTasks > 0,
286
470
  synthesisResult: synthesisResult ?? undefined,
471
+ artifactInventory: this.artifactInventory,
287
472
  summary: this.buildSummary(executionStats),
288
473
  tasks: this.taskQueue.getAllTasks(),
289
474
  stats: executionStats,
@@ -383,6 +568,7 @@ Respond with valid JSON:
383
568
  maxTokens: 3000,
384
569
  temperature: 0.3,
385
570
  });
571
+ this.trackOrchestratorUsage(response, 'plan');
386
572
  const parsed = this.parseJSON(response.content);
387
573
  if (parsed) {
388
574
  this.plan = {
@@ -454,6 +640,7 @@ Respond with valid JSON:
454
640
  },
455
641
  { role: 'user', content: `Review these wave ${waveIndex + 1} outputs:\n\n${taskSummaries}` },
456
642
  ], { model: reviewModel, maxTokens: 2000, temperature: 0.3 });
643
+ this.trackOrchestratorUsage(response, 'review');
457
644
  const parsed = this.parseJSON(response.content);
458
645
  if (!parsed)
459
646
  return null;
@@ -578,6 +765,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
578
765
  },
579
766
  { role: 'user', content: `Original task: ${task}\n\nFailed verifications:\n${failedSteps}` },
580
767
  ], { model: this.config.plannerModel ?? this.config.orchestratorModel, maxTokens: 1500, temperature: 0.3 });
768
+ this.trackOrchestratorUsage(response, 'verification-fixup');
581
769
  const parsed = this.parseJSON(response.content);
582
770
  if (parsed?.fixups && parsed.fixups.length > 0) {
583
771
  const fixupTasks = parsed.fixups.map((f, i) => ({
@@ -628,6 +816,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
628
816
  this.logDecision('resume', `Resuming from wave ${checkpoint.currentWave}`, `Session: ${checkpoint.sessionId}`);
629
817
  this.emit({ type: 'swarm.state.resume', sessionId: checkpoint.sessionId, fromWave: checkpoint.currentWave });
630
818
  // Restore state
819
+ if (checkpoint.originalPrompt)
820
+ this.originalPrompt = checkpoint.originalPrompt;
631
821
  if (checkpoint.plan)
632
822
  this.plan = checkpoint.plan;
633
823
  if (checkpoint.modelHealth.length > 0)
@@ -657,9 +847,48 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
657
847
  if (resetCount > 0) {
658
848
  this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
659
849
  }
850
+ // Reset skipped tasks whose dependencies are now satisfied
851
+ let unskippedCount = 0;
852
+ for (const task of this.taskQueue.getAllTasks()) {
853
+ if (task.status === 'skipped') {
854
+ const deps = task.dependencies.map(id => this.taskQueue.getTask(id));
855
+ const allDepsSatisfied = deps.every(d => d && (d.status === 'completed' || d.status === 'decomposed'));
856
+ if (allDepsSatisfied) {
857
+ task.status = 'ready';
858
+ task.attempts = 0;
859
+ task.rescueContext = 'Recovered on resume — dependencies now satisfied';
860
+ unskippedCount++;
861
+ }
862
+ }
863
+ }
864
+ // Also reset failed tasks that have retry budget
865
+ for (const task of this.taskQueue.getAllTasks()) {
866
+ if (task.status === 'failed') {
867
+ task.status = 'ready';
868
+ task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
869
+ unskippedCount++;
870
+ }
871
+ }
872
+ if (unskippedCount > 0) {
873
+ this.logDecision('resume', `Recovered ${unskippedCount} skipped/failed tasks`, 'Fresh retry on resume');
874
+ }
875
+ // If many tasks are still stuck after un-skip, trigger re-plan
876
+ const resumeStats = this.taskQueue.getStats();
877
+ const stuckCount = resumeStats.failed + resumeStats.skipped;
878
+ const totalAttempted = resumeStats.completed + stuckCount;
879
+ if (totalAttempted > 0 && stuckCount / totalAttempted > 0.4) {
880
+ this.logDecision('resume-replan', `${stuckCount}/${totalAttempted} tasks still stuck after resume — triggering re-plan`, '');
881
+ this.hasReplanned = false; // Allow re-plan on resume
882
+ await this.midSwarmReplan();
883
+ }
660
884
  // Continue from where we left off
661
885
  this.currentPhase = 'executing';
662
886
  await this.executeWaves();
887
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
888
+ if (!this.cancelled)
889
+ await this.finalRescuePass();
890
+ // Post-wave artifact audit
891
+ this.artifactInventory = this.buildArtifactInventory();
663
892
  // Continue with verification and synthesis as normal
664
893
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
665
894
  this.currentPhase = 'verifying';
@@ -673,10 +902,14 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
673
902
  this.currentPhase = 'completed';
674
903
  const executionStats = this.buildStats();
675
904
  this.checkpoint('final');
676
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
905
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
906
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
677
907
  return {
678
908
  success: executionStats.completedTasks > 0,
909
+ partialSuccess: !executionStats.completedTasks && hasArtifacts,
910
+ partialFailure: executionStats.failedTasks > 0,
679
911
  synthesisResult: synthesisResult ?? undefined,
912
+ artifactInventory: this.artifactInventory,
680
913
  summary: this.buildSummary(executionStats),
681
914
  tasks: this.taskQueue.getAllTasks(),
682
915
  stats: executionStats,
@@ -693,6 +926,13 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
693
926
  while (waveIndex < totalWaves && !this.cancelled) {
694
927
  const readyTasks = this.taskQueue.getReadyTasks();
695
928
  const queueStats = this.taskQueue.getStats();
929
+ // F18: Skip empty waves — if no tasks are ready and none are running,
930
+ // remaining tasks are all blocked/failed/skipped. Break instead of
931
+ // running useless review cycles.
932
+ if (readyTasks.length === 0 && queueStats.running === 0 && queueStats.ready === 0) {
933
+ this.logDecision('wave-skip', `Skipping waves ${waveIndex + 1}-${totalWaves}: no dispatchable tasks remain`, `Stats: ${queueStats.completed} completed, ${queueStats.failed} failed, ${queueStats.skipped} skipped`);
934
+ break;
935
+ }
696
936
  this.emit({
697
937
  type: 'swarm.wave.start',
698
938
  wave: waveIndex + 1,
@@ -734,6 +974,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
734
974
  previousFeedback: 'All tasks in this batch failed. Try a fundamentally different approach — the previous strategy did not work.',
735
975
  previousScore: 0,
736
976
  attempt: task.attempts,
977
+ previousModel: task.assignedModel,
978
+ swarmProgress: this.getSwarmProgressSummary(),
737
979
  };
738
980
  }
739
981
  this.logDecision('wave-recovery', `Re-queued ${failedWaveTasks.length} tasks with adapted retry context`, 'Budget allows retry');
@@ -741,21 +983,46 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
741
983
  await this.executeWave(failedWaveTasks.map(t => this.taskQueue.getTask(t.id)).filter(t => t.status === 'ready'));
742
984
  }
743
985
  }
986
+ // F5: Adaptive re-decomposition — if < 50% of wave tasks succeeded,
987
+ // the decomposition may be structurally flawed. Log for observability.
988
+ // (Full re-decomposition of remaining work would require re-architecting the queue,
989
+ // so we log the signal and let wave retry + fixup handle recovery.)
990
+ const waveTotal = waveCompleted + waveFailed + waveSkipped;
991
+ const waveSuccessRate = waveTotal > 0 ? waveCompleted / waveTotal : 0;
992
+ if (waveSuccessRate < 0.5 && waveTotal >= 2) {
993
+ this.logDecision('decomposition-quality', `Wave ${waveIndex + 1} success rate ${(waveSuccessRate * 100).toFixed(0)}% (${waveCompleted}/${waveTotal})`, 'Low success rate may indicate decomposition quality issues');
994
+ }
744
995
  // V2: Review wave outputs
745
996
  const review = await this.reviewWave(waveIndex);
746
997
  if (review && review.fixupTasks.length > 0) {
747
998
  // Execute fix-up tasks immediately
748
999
  await this.executeWave(review.fixupTasks);
749
1000
  }
1001
+ // Rescue cascade-skipped tasks that can still run
1002
+ // (after wave review + fixup, some skipped tasks may now be viable)
1003
+ const rescued = this.rescueCascadeSkipped();
1004
+ if (rescued.length > 0) {
1005
+ this.logDecision('cascade-rescue', `Rescued ${rescued.length} cascade-skipped tasks after wave ${waveIndex + 1}`, rescued.map(t => t.id).join(', '));
1006
+ await this.executeWave(rescued);
1007
+ }
750
1008
  // Reset quality circuit breaker at wave boundary — each wave gets a fresh chance.
751
1009
  // Within a wave, rejections accumulate properly so the breaker can trip.
752
1010
  // Between waves, we reset so each wave gets a fresh quality evaluation window.
753
1011
  // (The within-wave reset at quality-gate-passed is kept — that's correct.)
754
- if (this.qualityGateDisabled) {
755
- this.qualityGateDisabled = false;
756
- this.consecutiveQualityRejections = 0;
757
- this.logDecision('quality-circuit-breaker', `Re-enabled quality gates at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
1012
+ if (this.qualityGateDisabledModels.size > 0) {
1013
+ this.qualityGateDisabledModels.clear();
1014
+ this.perModelQualityRejections.clear();
1015
+ this.logDecision('quality-circuit-breaker', `Re-enabled quality gates for all models at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
758
1016
  }
1017
+ // F3: Log budget reallocation after wave completion.
1018
+ // SharedBudgetPool already returns unused tokens via release(), but we log it
1019
+ // for observability so operators can see how budget flows between waves.
1020
+ const budgetStats = this.budgetPool.getStats();
1021
+ this.logDecision('budget-reallocation', `After wave ${waveIndex + 1}: ${budgetStats.tokensRemaining} tokens remaining (${(budgetStats.utilization * 100).toFixed(0)}% utilized)`, '');
1022
+ this.budgetPool.reallocateUnused(budgetStats.tokensRemaining);
1023
+ // F21: Mid-swarm situational assessment — evaluate success rate and budget health,
1024
+ // optionally triage low-priority tasks to conserve budget for critical path.
1025
+ await this.assessAndAdapt(waveIndex);
759
1026
  // V2: Checkpoint after each wave
760
1027
  this.checkpoint(`wave-${waveIndex}`);
761
1028
  // Advance to next wave
@@ -783,7 +1050,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
783
1050
  taskIndex++;
784
1051
  // Stagger dispatches to avoid rate limit storms
785
1052
  if (taskIndex < tasks.length && this.workerPool.availableSlots > 0) {
786
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1053
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
787
1054
  }
788
1055
  }
789
1056
  // Process completions and dispatch more tasks as slots open
@@ -804,7 +1071,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
804
1071
  await this.dispatchTask(task);
805
1072
  // Stagger dispatches to avoid rate limit storms
806
1073
  if (taskIndex + 1 < tasks.length && this.workerPool.availableSlots > 0) {
807
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1074
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
808
1075
  }
809
1076
  }
810
1077
  taskIndex++;
@@ -819,11 +1086,38 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
819
1086
  await this.dispatchTask(moreReady[i]);
820
1087
  // Stagger dispatches to avoid rate limit storms
821
1088
  if (i + 1 < moreReady.length && this.workerPool.availableSlots > 0) {
822
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1089
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
823
1090
  }
824
1091
  }
825
1092
  }
826
1093
  }
1094
+ // F20: Re-dispatch pass — after all workers finish, budget may have been freed
1095
+ // by completed tasks. Try to dispatch any still-ready tasks (e.g., those paused
1096
+ // by budget exhaustion earlier).
1097
+ if (!this.cancelled && this.budgetPool.hasCapacity()) {
1098
+ const stillReady = this.taskQueue.getAllReadyTasks()
1099
+ .filter(t => !this.workerPool.getActiveWorkerStatus().some(w => w.taskId === t.id));
1100
+ if (stillReady.length > 0) {
1101
+ this.logDecision('budget-redispatch', `Budget freed after wave — re-dispatching ${stillReady.length} ready task(s)`, `Budget: ${JSON.stringify(this.budgetPool.getStats())}`);
1102
+ for (const task of stillReady) {
1103
+ if (this.workerPool.availableSlots <= 0 || !this.budgetPool.hasCapacity())
1104
+ break;
1105
+ await this.dispatchTask(task);
1106
+ if (this.workerPool.availableSlots > 0) {
1107
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
1108
+ }
1109
+ }
1110
+ // Wait for these re-dispatched tasks to complete
1111
+ while (this.workerPool.activeCount > 0 && !this.cancelled) {
1112
+ const completed = await this.workerPool.waitForAny();
1113
+ if (!completed)
1114
+ break;
1115
+ await this.handleTaskCompletion(completed.taskId, completed.result, completed.startedAt);
1116
+ this.emitBudgetUpdate();
1117
+ this.emitStatusUpdate();
1118
+ }
1119
+ }
1120
+ }
827
1121
  }
828
1122
  /**
829
1123
  * Dispatch a single task to a worker.
@@ -833,45 +1127,111 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
833
1127
  const worker = this.workerPool.selectWorker(task);
834
1128
  if (!worker) {
835
1129
  // M2: Emit error and mark task failed instead of silently returning
836
- this.taskQueue.markFailed(task.id, 0);
1130
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1131
+ this.logDecision('no-worker', `${task.id}: no worker for type ${task.type}`, '');
1132
+ if (task.attempts > 0) {
1133
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1134
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1135
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1136
+ return;
1137
+ }
1138
+ }
1139
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1140
+ this.taskQueue.triggerCascadeSkip(task.id);
837
1141
  this.emit({
838
1142
  type: 'swarm.task.failed',
839
1143
  taskId: task.id,
840
1144
  error: `No worker available for task type: ${task.type}`,
841
- attempt: 0,
1145
+ attempt: task.attempts,
842
1146
  maxAttempts: 0,
843
1147
  willRetry: false,
1148
+ failureMode: 'error',
844
1149
  });
845
1150
  return;
846
1151
  }
847
1152
  try {
848
- this.taskQueue.markDispatched(task.id, worker.model);
1153
+ // Pre-dispatch auto-split for critical-path bottlenecks
1154
+ if (this.shouldAutoSplit(task)) {
1155
+ try {
1156
+ const splitResult = await this.judgeSplit(task);
1157
+ if (splitResult.shouldSplit && splitResult.subtasks) {
1158
+ task.status = 'dispatched'; // Required for replaceWithSubtasks
1159
+ this.taskQueue.replaceWithSubtasks(task.id, splitResult.subtasks);
1160
+ this.emit({
1161
+ type: 'swarm.task.resilience',
1162
+ taskId: task.id,
1163
+ strategy: 'auto-split',
1164
+ succeeded: true,
1165
+ reason: `Pre-dispatch split into ${splitResult.subtasks.length} parallel subtasks`,
1166
+ artifactsFound: 0,
1167
+ toolCalls: 0,
1168
+ });
1169
+ return; // Subtasks now in queue, will be dispatched this wave
1170
+ }
1171
+ }
1172
+ catch (err) {
1173
+ this.logDecision('auto-split', `${task.id}: split judge failed — ${err.message}`, '');
1174
+ // Fall through to normal dispatch
1175
+ }
1176
+ }
1177
+ this.totalDispatches++;
1178
+ const dispatchedModel = task.assignedModel ?? worker.model;
1179
+ this.taskQueue.markDispatched(task.id, dispatchedModel);
1180
+ if (task.assignedModel && task.assignedModel !== worker.model) {
1181
+ this.logDecision('failover', `Dispatching ${task.id} with failover model ${task.assignedModel} (worker default: ${worker.model})`, 'Retry model override is active');
1182
+ }
849
1183
  // Pass the pre-selected worker to avoid double-selection in dispatch()
850
1184
  await this.workerPool.dispatch(task, worker);
851
1185
  this.emit({
852
1186
  type: 'swarm.task.dispatched',
853
1187
  taskId: task.id,
854
1188
  description: task.description,
855
- model: worker.model,
1189
+ model: dispatchedModel,
856
1190
  workerName: worker.name,
1191
+ toolCount: worker.allowedTools?.length ?? -1, // -1 = all tools
1192
+ tools: worker.allowedTools,
1193
+ retryContext: task.retryContext,
1194
+ fromModel: task.retryContext ? task.retryContext.previousModel : undefined,
1195
+ attempts: task.attempts,
857
1196
  });
858
1197
  }
859
1198
  catch (error) {
1199
+ const errorMsg = error.message;
1200
+ // F20: Budget exhaustion is NOT a task failure — the task is fine, we just ran out of money.
1201
+ // Reset status to ready so it can be picked up if budget becomes available
1202
+ // (e.g., after tokens are released from completing tasks).
1203
+ if (errorMsg.includes('Budget pool exhausted')) {
1204
+ task.status = 'ready';
1205
+ this.logDecision('budget-pause', `Cannot dispatch ${task.id}: budget exhausted — task kept ready for potential re-dispatch`, `Budget stats: ${JSON.stringify(this.budgetPool.getStats())}`);
1206
+ return;
1207
+ }
860
1208
  this.errors.push({
861
1209
  taskId: task.id,
862
1210
  phase: 'dispatch',
863
- message: error.message,
1211
+ message: errorMsg,
864
1212
  recovered: false,
865
1213
  });
1214
+ this.logDecision('dispatch-error', `${task.id}: dispatch failed: ${errorMsg.slice(0, 100)}`, `attempts: ${task.attempts}`);
1215
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1216
+ if (task.attempts > 0) {
1217
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1218
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1219
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1220
+ this.errors[this.errors.length - 1].recovered = true;
1221
+ return;
1222
+ }
1223
+ }
1224
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1225
+ this.taskQueue.triggerCascadeSkip(task.id);
866
1226
  this.emit({
867
1227
  type: 'swarm.task.failed',
868
1228
  taskId: task.id,
869
- error: error.message,
1229
+ error: errorMsg,
870
1230
  attempt: task.attempts,
871
1231
  maxAttempts: 1 + this.config.workerRetries,
872
1232
  willRetry: false,
1233
+ failureMode: 'error',
873
1234
  });
874
- this.taskQueue.markFailed(task.id, 0);
875
1235
  }
876
1236
  }
877
1237
  /**
@@ -881,9 +1241,36 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
881
1241
  const task = this.taskQueue.getTask(taskId);
882
1242
  if (!task)
883
1243
  return;
884
- // Guard: task was cascade-skipped while its worker was running — ignore the result
885
- if (task.status === 'skipped' || task.status === 'failed')
1244
+ // Guard: task was terminally resolved while its worker was running — ignore the result
1245
+ // F4: But NOT if pendingCascadeSkip those results are evaluated below
1246
+ if ((task.status === 'skipped' || task.status === 'failed') && !task.pendingCascadeSkip)
886
1247
  return;
1248
+ // V7: Global dispatch cap — prevent any single task from burning budget.
1249
+ // Try resilience recovery (micro-decompose, degraded acceptance) before hard-failing.
1250
+ const maxDispatches = this.config.maxDispatchesPerTask ?? 5;
1251
+ if (task.attempts >= maxDispatches) {
1252
+ const durationMs = Date.now() - startedAt;
1253
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
1254
+ this.totalTokens += taskResult.tokensUsed;
1255
+ this.totalCost += taskResult.costUsed;
1256
+ // Try resilience recovery before hard fail
1257
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1258
+ return;
1259
+ }
1260
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1261
+ this.taskQueue.triggerCascadeSkip(taskId);
1262
+ this.emit({
1263
+ type: 'swarm.task.failed',
1264
+ taskId,
1265
+ error: `Dispatch cap reached (${maxDispatches} attempts)`,
1266
+ attempt: task.attempts,
1267
+ maxAttempts: maxDispatches,
1268
+ willRetry: false,
1269
+ failureMode: task.failureMode,
1270
+ });
1271
+ this.logDecision('dispatch-cap', `${taskId}: hard cap reached (${task.attempts}/${maxDispatches})`, 'No more retries — resilience recovery also failed');
1272
+ return;
1273
+ }
887
1274
  const durationMs = Date.now() - startedAt;
888
1275
  const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
889
1276
  // Track model usage
@@ -895,21 +1282,94 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
895
1282
  this.modelUsage.set(model, usage);
896
1283
  this.totalTokens += taskResult.tokensUsed;
897
1284
  this.totalCost += taskResult.costUsed;
1285
+ // V10: Emit per-attempt event for full decision traceability
1286
+ this.emit({
1287
+ type: 'swarm.task.attempt',
1288
+ taskId,
1289
+ attempt: task.attempts,
1290
+ model,
1291
+ success: spawnResult.success,
1292
+ durationMs,
1293
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
1294
+ failureMode: !spawnResult.success ? task.failureMode : undefined,
1295
+ qualityScore: taskResult.qualityScore,
1296
+ output: taskResult.output.slice(0, 500),
1297
+ });
898
1298
  if (!spawnResult.success) {
899
1299
  // V2: Record model health
900
1300
  const errorMsg = spawnResult.output.toLowerCase();
901
1301
  const is429 = errorMsg.includes('429') || errorMsg.includes('rate');
902
1302
  const is402 = errorMsg.includes('402') || errorMsg.includes('spend limit');
903
- const errorType = is429 ? '429' : is402 ? '402' : 'error';
1303
+ const isTimeout = spawnResult.metrics.toolCalls === -1;
1304
+ // F25: Use 'timeout' errorType for timeouts (was 'error')
1305
+ const errorType = is429 ? '429' : is402 ? '402' : isTimeout ? 'timeout' : 'error';
904
1306
  this.healthTracker.recordFailure(model, errorType);
905
1307
  this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1308
+ // P6: Tag failure mode for cascade threshold awareness
1309
+ task.failureMode = (is429 || is402) ? 'rate-limit' : (spawnResult.metrics.toolCalls === -1 ? 'timeout' : 'error');
906
1310
  // Feed circuit breaker
907
1311
  if (is429 || is402) {
908
1312
  this.recordRateLimit();
909
1313
  }
1314
+ // F25a: Consecutive timeout tracking — early-fail after N consecutive timeouts
1315
+ if (isTimeout) {
1316
+ const count = (this.taskTimeoutCounts.get(taskId) ?? 0) + 1;
1317
+ this.taskTimeoutCounts.set(taskId, count);
1318
+ const timeoutLimit = this.config.consecutiveTimeoutLimit ?? 3;
1319
+ this.logDecision('timeout-tracking', `${taskId}: consecutive timeout ${count}/${timeoutLimit}`, '');
1320
+ if (count >= timeoutLimit) {
1321
+ // F25b: Try model failover before giving up
1322
+ let failoverSucceeded = false;
1323
+ if (this.config.enableModelFailover) {
1324
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1325
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1326
+ if (alternative) {
1327
+ this.emit({
1328
+ type: 'swarm.model.failover',
1329
+ taskId,
1330
+ fromModel: model,
1331
+ toModel: alternative.model,
1332
+ reason: 'consecutive-timeouts',
1333
+ });
1334
+ task.assignedModel = alternative.model;
1335
+ this.taskTimeoutCounts.set(taskId, 0); // Reset counter for new model
1336
+ this.logDecision('failover', `Timeout failover ${taskId}: ${model} → ${alternative.model}`, `${count} consecutive timeouts`);
1337
+ failoverSucceeded = true;
1338
+ }
1339
+ }
1340
+ if (!failoverSucceeded) {
1341
+ // No alternative model — try resilience recovery before hard fail.
1342
+ // Timeouts often produce artifacts (worker WAS working, just ran out of time).
1343
+ task.failureMode = 'timeout';
1344
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, Date.now() - startedAt);
1345
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1346
+ this.taskTimeoutCounts.delete(taskId);
1347
+ return;
1348
+ }
1349
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1350
+ this.taskQueue.triggerCascadeSkip(taskId);
1351
+ this.emit({
1352
+ type: 'swarm.task.failed',
1353
+ taskId,
1354
+ error: `${count} consecutive timeouts — no alternative model available`,
1355
+ attempt: task.attempts,
1356
+ maxAttempts: maxDispatches,
1357
+ willRetry: false,
1358
+ failureMode: 'timeout',
1359
+ });
1360
+ this.logDecision('timeout-early-fail', `${taskId}: ${count} consecutive timeouts, no alt model — resilience recovery also failed`, '');
1361
+ this.taskTimeoutCounts.delete(taskId);
1362
+ return;
1363
+ }
1364
+ }
1365
+ }
1366
+ else {
1367
+ // Non-timeout failure — reset the counter
1368
+ this.taskTimeoutCounts.delete(taskId);
1369
+ }
910
1370
  // V2: Model failover on rate limits
911
1371
  if ((is429 || is402) && this.config.enableModelFailover) {
912
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1372
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
913
1373
  const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
914
1374
  if (alternative) {
915
1375
  this.emit({
@@ -926,21 +1386,27 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
926
1386
  // V5/V7: Store error context so retry gets different prompt
927
1387
  if (!(is429 || is402)) {
928
1388
  // V7: Timeout-specific feedback — the worker WAS working, just ran out of time
929
- const isTimeout = spawnResult.metrics.toolCalls === -1;
930
1389
  const timeoutSeconds = isTimeout ? Math.round(durationMs / 1000) : 0;
931
1390
  task.retryContext = {
932
1391
  previousFeedback: isTimeout
933
1392
  ? `Previous attempt timed out after ${timeoutSeconds}s. You must complete this task more efficiently — work faster, use fewer tool calls, and produce your result sooner.`
934
- : spawnResult.output.slice(0, 500),
1393
+ : spawnResult.output.slice(0, 2000),
935
1394
  previousScore: 0,
936
1395
  attempt: task.attempts,
1396
+ previousModel: model,
1397
+ previousFiles: taskResult.filesModified,
1398
+ swarmProgress: this.getSwarmProgressSummary(),
937
1399
  };
938
1400
  }
939
- // Worker failed use higher retry limit for rate limit errors
1401
+ // V7: Reset hollow streak on non-hollow failure (error is not a hollow completion)
1402
+ this.hollowStreak = 0;
1403
+ // Worker failed — use higher retry limit for rate limit errors.
1404
+ // V7: Fixup tasks get capped retries, foundation tasks get +1.
1405
+ const baseRetries = this.getEffectiveRetries(task);
940
1406
  const retryLimit = (is429 || is402)
941
- ? (this.config.rateLimitRetries ?? 3)
942
- : this.config.workerRetries;
943
- const canRetry = this.taskQueue.markFailed(taskId, retryLimit);
1407
+ ? Math.min(this.config.rateLimitRetries ?? 3, baseRetries + 1)
1408
+ : baseRetries;
1409
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, retryLimit);
944
1410
  if (canRetry) {
945
1411
  this.retries++;
946
1412
  // Non-blocking cooldown: set retryAfter timestamp instead of blocking
@@ -948,8 +1414,21 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
948
1414
  const baseDelay = this.config.retryBaseDelayMs ?? 5000;
949
1415
  const cooldownMs = Math.min(baseDelay * Math.pow(2, task.attempts - 1), 30000);
950
1416
  this.taskQueue.setRetryAfter(taskId, cooldownMs);
1417
+ this.logDecision('rate-limit-cooldown', `${taskId}: ${errorType} cooldown ${cooldownMs}ms, model ${model}`, '');
951
1418
  }
952
1419
  }
1420
+ else if (!(is429 || is402)) {
1421
+ // Resilience recovery for non-rate-limit errors (micro-decompose + degraded acceptance)
1422
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1423
+ return;
1424
+ }
1425
+ // Recovery failed — NOW trigger cascade
1426
+ this.taskQueue.triggerCascadeSkip(taskId);
1427
+ }
1428
+ else {
1429
+ // Rate-limit exhaustion — trigger cascade
1430
+ this.taskQueue.triggerCascadeSkip(taskId);
1431
+ }
953
1432
  this.emit({
954
1433
  type: 'swarm.task.failed',
955
1434
  taskId,
@@ -957,23 +1436,43 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
957
1436
  attempt: task.attempts,
958
1437
  maxAttempts: 1 + this.config.workerRetries,
959
1438
  willRetry: canRetry,
1439
+ toolCalls: spawnResult.metrics.toolCalls,
1440
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1441
+ failureMode: task.failureMode,
960
1442
  });
961
1443
  return;
962
1444
  }
963
1445
  // V6: Hollow completion detection — workers that "succeed" without doing any work
964
1446
  // Must check BEFORE recording success, otherwise hollow completions inflate health scores
965
- if (isHollowCompletion(spawnResult)) {
966
- // Record health failure so hollow-prone models accumulate failure records
967
- // and eventually trigger failover via selectAlternativeModel
968
- this.healthTracker.recordFailure(model, 'error');
1447
+ if (isHollowCompletion(spawnResult, task.type, this.config)) {
1448
+ // F4: Hollow result + pendingCascadeSkip honor the skip immediately, no retry
1449
+ if (task.pendingCascadeSkip) {
1450
+ task.pendingCascadeSkip = undefined;
1451
+ task.status = 'skipped';
1452
+ this.totalHollows++;
1453
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (hollow completion)`, '');
1454
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: 'cascade skip honored — hollow completion' });
1455
+ return;
1456
+ }
1457
+ // P6: Tag failure mode for cascade threshold awareness
1458
+ task.failureMode = 'hollow';
1459
+ // Record hollow completion so hollow-prone models accumulate hollow-specific records
1460
+ // and get deprioritized by the model selector (also records generic failure internally)
1461
+ this.healthTracker.recordHollow(model);
1462
+ const admitsFailure = spawnResult.success && FAILURE_INDICATORS.some(f => (spawnResult.output ?? '').toLowerCase().includes(f));
969
1463
  task.retryContext = {
970
- previousFeedback: 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
1464
+ previousFeedback: admitsFailure
1465
+ ? 'Previous attempt reported success but admitted failure (e.g., "budget exhausted", "unable to complete"). You MUST execute tool calls and produce concrete output this time.'
1466
+ : 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
971
1467
  previousScore: 1,
972
1468
  attempt: task.attempts,
1469
+ previousModel: model,
1470
+ previousFiles: taskResult.filesModified,
1471
+ swarmProgress: this.getSwarmProgressSummary(),
973
1472
  };
974
1473
  // Model failover for hollow completions — same pattern as quality failover
975
1474
  if (this.config.enableModelFailover) {
976
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1475
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
977
1476
  const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
978
1477
  if (alternative) {
979
1478
  this.emit({
@@ -987,9 +1486,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
987
1486
  this.logDecision('failover', `Hollow failover ${taskId}: ${model} → ${alternative.model}`, 'Model produced hollow completion');
988
1487
  }
989
1488
  }
990
- const canRetry = this.taskQueue.markFailed(taskId, this.config.workerRetries);
991
- if (canRetry)
1489
+ const hollowRetries = this.getEffectiveRetries(task);
1490
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, hollowRetries);
1491
+ if (canRetry) {
992
1492
  this.retries++;
1493
+ }
1494
+ else {
1495
+ // Retries exhausted — try shared resilience recovery (micro-decompose, degraded acceptance)
1496
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1497
+ return;
1498
+ }
1499
+ // Recovery failed — NOW trigger cascade
1500
+ this.taskQueue.triggerCascadeSkip(taskId);
1501
+ }
993
1502
  this.emit({
994
1503
  type: 'swarm.task.failed',
995
1504
  taskId,
@@ -997,21 +1506,83 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
997
1506
  attempt: task.attempts,
998
1507
  maxAttempts: 1 + this.config.workerRetries,
999
1508
  willRetry: canRetry,
1509
+ toolCalls: spawnResult.metrics.toolCalls,
1510
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1511
+ failureMode: 'hollow',
1000
1512
  });
1001
- this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls`, 'Marking as failed for retry');
1513
+ this.hollowStreak++;
1514
+ this.totalHollows++;
1515
+ this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls (streak: ${this.hollowStreak}, total hollows: ${this.totalHollows}/${this.totalDispatches})`, canRetry ? 'Marking as failed for retry' : 'Retries exhausted — hard fail');
1516
+ // B2: Hollow streak handling — only terminate if enableHollowTermination is explicitly on
1517
+ if (this.hollowStreak >= SwarmOrchestrator.HOLLOW_STREAK_THRESHOLD) {
1518
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
1519
+ const singleModel = uniqueModels.size === 1;
1520
+ const onlyModel = [...uniqueModels][0];
1521
+ const modelUnhealthy = singleModel && !this.healthTracker.getAllRecords().find(r => r.model === onlyModel)?.healthy;
1522
+ if (singleModel && modelUnhealthy) {
1523
+ if (this.config.enableHollowTermination) {
1524
+ this.logDecision('early-termination', `Terminating swarm: ${this.hollowStreak} consecutive hollow completions on sole model ${onlyModel}`, 'Single-model swarm with unhealthy model — enableHollowTermination is on');
1525
+ this.skipRemainingTasks(`Single-model hollow streak (${this.hollowStreak}x on ${onlyModel})`);
1526
+ }
1527
+ else {
1528
+ this.logDecision('stall-mode', `${this.hollowStreak} consecutive hollows on sole model ${onlyModel} — entering stall mode`, 'Will attempt model failover or simplified retry on next dispatch');
1529
+ // Reset streak to allow more attempts with adjusted strategy
1530
+ this.hollowStreak = 0;
1531
+ }
1532
+ }
1533
+ }
1534
+ // V7: Multi-model hollow ratio — warn but don't terminate unless opt-in
1535
+ const minDispatches = this.config.hollowTerminationMinDispatches ?? 8;
1536
+ const threshold = this.config.hollowTerminationRatio ?? 0.55;
1537
+ if (this.totalDispatches >= minDispatches) {
1538
+ const ratio = this.totalHollows / this.totalDispatches;
1539
+ if (ratio > threshold) {
1540
+ if (this.config.enableHollowTermination) {
1541
+ this.logDecision('early-termination', `Terminating swarm: hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, `Exceeds threshold ${(threshold * 100).toFixed(0)}% after ${minDispatches}+ dispatches — enableHollowTermination is on`);
1542
+ this.skipRemainingTasks(`Hollow ratio ${(ratio * 100).toFixed(0)}% — models cannot execute tasks`);
1543
+ }
1544
+ else if (!this.hollowRatioWarned) {
1545
+ this.hollowRatioWarned = true;
1546
+ this.logDecision('stall-warning', `Hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, 'High hollow rate but continuing — tasks may still recover via resilience');
1547
+ }
1548
+ }
1549
+ }
1002
1550
  return;
1003
1551
  }
1552
+ // F4: Task had pendingCascadeSkip but produced non-hollow results.
1553
+ // Run pre-flight checks — if the output is good, accept it instead of skipping.
1554
+ if (task.pendingCascadeSkip) {
1555
+ const cachedReport = checkArtifacts(task);
1556
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedReport);
1557
+ if (preFlight && !preFlight.passed) {
1558
+ // Output is garbage — honor the cascade skip
1559
+ task.pendingCascadeSkip = undefined;
1560
+ task.status = 'skipped';
1561
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (pre-flight failed: ${preFlight.feedback})`, '');
1562
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: `cascade skip honored — output failed pre-flight: ${preFlight.feedback}` });
1563
+ return;
1564
+ }
1565
+ // Output is good — clear the flag and accept the result
1566
+ task.pendingCascadeSkip = undefined;
1567
+ task.status = 'dispatched'; // Reset so markCompleted works
1568
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip overridden — worker produced valid output`, '');
1569
+ }
1004
1570
  // Record model health on success (only for non-hollow completions)
1005
1571
  this.healthTracker.recordSuccess(model, durationMs);
1572
+ this.decreaseStagger(); // P7: Speed up on success
1006
1573
  // Run quality gate if enabled — skip under API pressure, skip if circuit breaker tripped,
1007
1574
  // and let the final attempt through without quality gate (so tasks produce *something*)
1575
+ // Foundation tasks get +1 retry to reduce cascade failure risk.
1576
+ const effectiveRetries = this.getEffectiveRetries(task);
1008
1577
  const recentRLCount = this.recentRateLimits.filter(t => t > Date.now() - 30_000).length;
1009
- const isLastAttempt = task.attempts >= (this.config.workerRetries + 1);
1578
+ const isLastAttempt = task.attempts >= (effectiveRetries + 1);
1010
1579
  const shouldRunQualityGate = this.config.qualityGates
1011
- && !this.qualityGateDisabled
1580
+ && !this.qualityGateDisabledModels.has(model)
1012
1581
  && !isLastAttempt
1013
1582
  && Date.now() >= this.circuitBreakerUntil
1014
1583
  && recentRLCount < 2;
1584
+ // C1: Pre-compute artifact report once — shared by quality gate and pre-flight checks
1585
+ const cachedArtifactReport = checkArtifacts(task);
1015
1586
  if (shouldRunQualityGate) {
1016
1587
  // V3: Judge role handles quality gates
1017
1588
  const judgeModel = this.config.hierarchy?.judge?.model
@@ -1021,57 +1592,272 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1021
1592
  persona: this.config.hierarchy?.judge?.persona,
1022
1593
  };
1023
1594
  this.emit({ type: 'swarm.role.action', role: 'judge', action: 'quality-gate', model: judgeModel, taskId });
1024
- const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, this.config.qualityThreshold ?? 3);
1595
+ // Extract file artifacts from worker output for quality gate visibility.
1596
+ // When workers create files via write_file/edit_file, the judge needs to see
1597
+ // the actual content — not just the worker's text claims about what was created.
1598
+ const fileArtifacts = this.extractFileArtifacts(task, taskResult);
1599
+ // Foundation tasks get a relaxed quality threshold (threshold - 1, min 2)
1600
+ // to reduce the chance of cascade-skipping the entire swarm.
1601
+ const baseThreshold = this.config.qualityThreshold ?? 3;
1602
+ const qualityThreshold = task.isFoundation ? Math.max(2, baseThreshold - 1) : baseThreshold;
1603
+ const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, qualityThreshold, (resp, purpose) => this.trackOrchestratorUsage(resp, purpose), fileArtifacts, this.config, cachedArtifactReport);
1025
1604
  taskResult.qualityScore = quality.score;
1026
1605
  taskResult.qualityFeedback = quality.feedback;
1027
- if (!quality.passed) {
1028
- this.qualityRejections++;
1029
- this.consecutiveQualityRejections++;
1030
- // Quality circuit breaker: disable gates after too many consecutive rejections
1031
- if (this.consecutiveQualityRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
1032
- this.qualityGateDisabled = true;
1033
- this.logDecision('quality-circuit-breaker', `Disabled quality gates after ${this.consecutiveQualityRejections} consecutive rejections`, 'Workers cannot meet quality threshold — letting remaining tasks through');
1606
+ // F11: Foundation tasks that barely pass the relaxed threshold get concrete validation.
1607
+ // A 2/5 foundation task with truncated output will cascade-poison all dependents.
1608
+ if (quality.passed && task.isFoundation && quality.score <= baseThreshold - 1) {
1609
+ const concreteResult = runConcreteChecks(task, taskResult);
1610
+ if (!concreteResult.passed) {
1611
+ quality.passed = false;
1612
+ quality.feedback += ` [F11: foundation task barely passed (${quality.score}/${baseThreshold}) but concrete validation failed: ${concreteResult.issues.join('; ')}]`;
1613
+ this.logDecision('foundation-concrete-gate', `${taskId}: foundation task scored ${quality.score} (relaxed threshold ${qualityThreshold}) but concrete checks failed — rejecting`, concreteResult.issues.join('; '));
1034
1614
  }
1035
- // V5: Attach feedback so retry prompt includes it
1036
- task.retryContext = {
1037
- previousFeedback: quality.feedback,
1038
- previousScore: quality.score,
1039
- attempt: task.attempts,
1040
- };
1041
- // V5: Model failover on severe quality rejection — but NOT on artifact auto-fails
1042
- if (quality.score <= 1 && this.config.enableModelFailover && !quality.artifactAutoFail) {
1043
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1044
- const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1045
- if (alternative) {
1615
+ }
1616
+ if (!quality.passed) {
1617
+ // F7: Gate error fallback — when LLM judge fails, use concrete validation
1618
+ // If concrete checks pass, tentatively accept the result instead of rejecting.
1619
+ if (quality.gateError && (this.config.enableConcreteValidation !== false)) {
1620
+ const concreteResult = runConcreteChecks(task, taskResult);
1621
+ if (concreteResult.passed) {
1622
+ // Concrete validation passed tentatively accept despite gate error
1623
+ this.logDecision('gate-error-fallback', `${taskId}: gate error but concrete checks passed — tentatively accepting`, quality.gateErrorMessage ?? 'unknown');
1624
+ taskResult.qualityScore = quality.score;
1625
+ taskResult.qualityFeedback = `${quality.feedback} [concrete validation passed — tentative accept]`;
1626
+ // Fall through to success path (don't return)
1627
+ }
1628
+ else {
1629
+ // Both gate and concrete failed — reject
1630
+ this.logDecision('gate-error-fallback', `${taskId}: gate error AND concrete checks failed — rejecting`, `Concrete issues: ${concreteResult.issues.join('; ')}`);
1631
+ // Fall through to normal rejection below
1632
+ }
1633
+ // If concrete passed, skip the rejection path
1634
+ if (concreteResult.passed) {
1635
+ this.perModelQualityRejections.delete(model);
1636
+ // Jump to success path below
1637
+ }
1638
+ else {
1639
+ // Proceed with normal rejection
1640
+ this.qualityRejections++;
1641
+ task.failureMode = 'quality';
1642
+ this.healthTracker.recordQualityRejection(model, quality.score);
1643
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1644
+ this.hollowStreak = 0;
1645
+ task.retryContext = {
1646
+ previousFeedback: `Gate error + concrete validation failed: ${concreteResult.issues.join('; ')}`,
1647
+ previousScore: quality.score,
1648
+ attempt: task.attempts,
1649
+ previousModel: model,
1650
+ previousFiles: taskResult.filesModified,
1651
+ swarmProgress: this.getSwarmProgressSummary(),
1652
+ };
1653
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1654
+ if (canRetry) {
1655
+ this.retries++;
1656
+ }
1657
+ else {
1658
+ // Retries exhausted — try resilience recovery before cascade-skip
1659
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1660
+ return;
1661
+ }
1662
+ // Recovery failed — NOW trigger cascade
1663
+ this.taskQueue.triggerCascadeSkip(taskId);
1664
+ }
1046
1665
  this.emit({
1047
- type: 'swarm.model.failover',
1666
+ type: 'swarm.quality.rejected',
1048
1667
  taskId,
1049
- fromModel: model,
1050
- toModel: alternative.model,
1051
- reason: `quality-score-${quality.score}`,
1668
+ score: quality.score,
1669
+ feedback: quality.feedback,
1670
+ artifactCount: fileArtifacts.length,
1671
+ outputLength: taskResult.output.length,
1672
+ preFlightReject: false,
1673
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1052
1674
  });
1053
- task.assignedModel = alternative.model;
1054
- this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
1675
+ return;
1676
+ }
1677
+ }
1678
+ else if (!quality.gateError) {
1679
+ // Normal quality rejection (LLM judge rejected, no gate error)
1680
+ this.qualityRejections++;
1681
+ // P6: Tag failure mode for cascade threshold awareness
1682
+ task.failureMode = 'quality';
1683
+ // P1: Quality rejections update model health — undo premature recordSuccess
1684
+ this.healthTracker.recordQualityRejection(model, quality.score);
1685
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1686
+ // V7: Quality rejection is NOT hollow — worker did work, just poorly
1687
+ this.hollowStreak = 0;
1688
+ // F7: Per-model circuit breaker → "pre-flight only mode" instead of fully disabling gates.
1689
+ // After threshold rejections, skip LLM judge but keep pre-flight mandatory.
1690
+ if (!quality.preFlightReject) {
1691
+ const modelRejections = (this.perModelQualityRejections.get(model) ?? 0) + 1;
1692
+ this.perModelQualityRejections.set(model, modelRejections);
1693
+ if (modelRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
1694
+ this.qualityGateDisabledModels.add(model);
1695
+ this.logDecision('quality-circuit-breaker', `Switched model ${model} to pre-flight-only mode after ${modelRejections} rejections`, 'Skipping LLM judge but keeping pre-flight checks mandatory');
1696
+ }
1697
+ }
1698
+ // V5: Attach feedback so retry prompt includes it
1699
+ task.retryContext = {
1700
+ previousFeedback: quality.feedback,
1701
+ previousScore: quality.score,
1702
+ attempt: task.attempts,
1703
+ previousModel: model,
1704
+ previousFiles: taskResult.filesModified,
1705
+ swarmProgress: this.getSwarmProgressSummary(),
1706
+ };
1707
+ // V5: Model failover on quality rejection — but NOT on artifact auto-fails
1708
+ // P1: Widened from score<=1 to score<threshold so failover triggers on any rejection
1709
+ if (quality.score < qualityThreshold && this.config.enableModelFailover && !quality.artifactAutoFail) {
1710
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1711
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1712
+ if (alternative) {
1713
+ this.emit({
1714
+ type: 'swarm.model.failover',
1715
+ taskId,
1716
+ fromModel: model,
1717
+ toModel: alternative.model,
1718
+ reason: `quality-score-${quality.score}`,
1719
+ });
1720
+ task.assignedModel = alternative.model;
1721
+ this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
1722
+ }
1723
+ }
1724
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1725
+ if (canRetry) {
1726
+ this.retries++;
1727
+ }
1728
+ else {
1729
+ // Retries exhausted — try resilience recovery before cascade-skip
1730
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1731
+ return;
1732
+ }
1733
+ // Recovery failed — NOW trigger cascade
1734
+ this.taskQueue.triggerCascadeSkip(taskId);
1735
+ }
1736
+ // M1: Only emit quality.rejected (not duplicate task.failed)
1737
+ this.emit({
1738
+ type: 'swarm.quality.rejected',
1739
+ taskId,
1740
+ score: quality.score,
1741
+ feedback: quality.feedback,
1742
+ artifactCount: fileArtifacts.length,
1743
+ outputLength: taskResult.output.length,
1744
+ preFlightReject: quality.preFlightReject,
1745
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1746
+ });
1747
+ return;
1748
+ }
1749
+ else {
1750
+ // gateError=true but concrete validation disabled — reject
1751
+ this.qualityRejections++;
1752
+ task.failureMode = 'quality';
1753
+ this.hollowStreak = 0;
1754
+ task.retryContext = {
1755
+ previousFeedback: quality.feedback,
1756
+ previousScore: quality.score,
1757
+ attempt: task.attempts,
1758
+ previousModel: model,
1759
+ previousFiles: taskResult.filesModified,
1760
+ swarmProgress: this.getSwarmProgressSummary(),
1761
+ };
1762
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1763
+ if (canRetry) {
1764
+ this.retries++;
1765
+ }
1766
+ else {
1767
+ // Retries exhausted — try resilience recovery before cascade-skip
1768
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1769
+ return;
1770
+ }
1771
+ // Recovery failed — NOW trigger cascade
1772
+ this.taskQueue.triggerCascadeSkip(taskId);
1055
1773
  }
1774
+ this.emit({
1775
+ type: 'swarm.quality.rejected',
1776
+ taskId,
1777
+ score: quality.score,
1778
+ feedback: quality.feedback,
1779
+ artifactCount: fileArtifacts.length,
1780
+ outputLength: taskResult.output.length,
1781
+ preFlightReject: false,
1782
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1783
+ });
1784
+ return;
1056
1785
  }
1057
- const canRetry = this.taskQueue.markFailed(taskId, this.config.workerRetries);
1786
+ }
1787
+ // Quality passed — reset per-model rejection counter
1788
+ this.perModelQualityRejections.delete(model);
1789
+ }
1790
+ // F7: When quality gate was skipped (last attempt, pre-flight-only mode, API pressure),
1791
+ // still run pre-flight + concrete checks so obviously broken outputs don't slip through.
1792
+ // C1: Use cached artifact report to avoid double filesystem scan.
1793
+ if (!shouldRunQualityGate && this.config.qualityGates) {
1794
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedArtifactReport);
1795
+ if (preFlight && !preFlight.passed) {
1796
+ taskResult.qualityScore = preFlight.score;
1797
+ taskResult.qualityFeedback = preFlight.feedback;
1798
+ this.qualityRejections++;
1799
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1058
1800
  if (canRetry) {
1059
1801
  this.retries++;
1060
1802
  }
1061
- // M1: Only emit quality.rejected (not duplicate task.failed)
1803
+ else {
1804
+ // Retries exhausted — try resilience recovery before cascade-skip
1805
+ this.logDecision('preflight-reject', `${taskId}: pre-flight failed: ${preFlight.feedback}`, '');
1806
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1807
+ return;
1808
+ }
1809
+ // Recovery failed — NOW trigger cascade
1810
+ this.taskQueue.triggerCascadeSkip(taskId);
1811
+ }
1062
1812
  this.emit({
1063
1813
  type: 'swarm.quality.rejected',
1064
1814
  taskId,
1065
- score: quality.score,
1066
- feedback: quality.feedback,
1815
+ score: preFlight.score,
1816
+ feedback: preFlight.feedback,
1817
+ artifactCount: 0,
1818
+ outputLength: taskResult.output.length,
1819
+ preFlightReject: true,
1067
1820
  });
1068
1821
  return;
1069
1822
  }
1070
- // Quality passed reset consecutive rejection counter
1071
- this.consecutiveQualityRejections = 0;
1823
+ // F2: Run concrete validation when pre-flight passes but gate was skipped
1824
+ if (this.config.enableConcreteValidation !== false) {
1825
+ const concreteResult = runConcreteChecks(task, taskResult);
1826
+ if (!concreteResult.passed) {
1827
+ taskResult.qualityScore = 2;
1828
+ taskResult.qualityFeedback = `Concrete validation failed: ${concreteResult.issues.join('; ')}`;
1829
+ this.qualityRejections++;
1830
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1831
+ if (canRetry) {
1832
+ this.retries++;
1833
+ }
1834
+ else {
1835
+ // Retries exhausted — try resilience recovery before cascade-skip
1836
+ this.logDecision('concrete-reject', `${taskId}: concrete validation failed: ${concreteResult.issues.join('; ')}`, '');
1837
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1838
+ return;
1839
+ }
1840
+ // Recovery failed — NOW trigger cascade
1841
+ this.taskQueue.triggerCascadeSkip(taskId);
1842
+ }
1843
+ this.emit({
1844
+ type: 'swarm.quality.rejected',
1845
+ taskId,
1846
+ score: 2,
1847
+ feedback: taskResult.qualityFeedback,
1848
+ artifactCount: 0,
1849
+ outputLength: taskResult.output.length,
1850
+ preFlightReject: false,
1851
+ });
1852
+ return;
1853
+ }
1854
+ }
1072
1855
  }
1073
1856
  // Task passed — mark completed
1074
1857
  this.taskQueue.markCompleted(taskId, taskResult);
1858
+ this.hollowStreak = 0;
1859
+ // F25: Clear timeout counter on success
1860
+ this.taskTimeoutCounts.delete(taskId);
1075
1861
  // H6: Post findings to blackboard with error handling
1076
1862
  if (this.blackboard && taskResult.findings) {
1077
1863
  try {
@@ -1117,7 +1903,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1117
1903
  const tasks = this.taskQueue.getAllTasks();
1118
1904
  const outputs = tasks
1119
1905
  .filter(t => t.status === 'completed')
1120
- .map(t => taskResultToAgentOutput(t))
1906
+ .map(t => taskResultToAgentOutput(t, this.config))
1121
1907
  .filter((o) => o !== null);
1122
1908
  if (outputs.length === 0)
1123
1909
  return null;
@@ -1147,11 +1933,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1147
1933
  activeWorkers: this.workerPool.getActiveWorkerStatus(),
1148
1934
  queue: stats,
1149
1935
  budget: {
1150
- tokensUsed: this.totalTokens,
1936
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
1151
1937
  tokensTotal: this.config.totalBudget,
1152
- costUsed: this.totalCost,
1938
+ costUsed: this.totalCost + this.orchestratorCost,
1153
1939
  costTotal: this.config.maxCost,
1154
1940
  },
1941
+ orchestrator: {
1942
+ tokens: this.orchestratorTokens,
1943
+ cost: this.orchestratorCost,
1944
+ calls: this.orchestratorCalls,
1945
+ model: this.config.orchestratorModel,
1946
+ },
1155
1947
  };
1156
1948
  }
1157
1949
  /**
@@ -1163,6 +1955,69 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1163
1955
  this.currentPhase = 'failed';
1164
1956
  await this.workerPool.cancelAll();
1165
1957
  }
1958
+ // ─── D3: Model Capability Probing ─────────────────────────────────────
1959
+ /**
1960
+ * D3/F23: Probe each unique model to verify it can make tool calls.
1961
+ * Models that fail the probe are marked unhealthy so they're skipped in dispatch.
1962
+ *
1963
+ * F23 fix: Uses chatWithTools() with actual tool definitions instead of
1964
+ * plain chat() which never included tools in the API request.
1965
+ */
1966
+ async probeModelCapability() {
1967
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
1968
+ this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Probing ${uniqueModels.size} model(s) for tool-calling capability...` });
1969
+ // F23: Check if provider supports native tool calling
1970
+ const supportsTools = 'chatWithTools' in this.provider
1971
+ && typeof this.provider.chatWithTools === 'function';
1972
+ if (!supportsTools) {
1973
+ // Provider doesn't support chatWithTools — skip probe entirely.
1974
+ // Workers will rely on text-based tool parsing fallback.
1975
+ this.logDecision('model-probe', 'Provider does not support chatWithTools — skipping probe', '');
1976
+ return;
1977
+ }
1978
+ const providerWithTools = this.provider;
1979
+ const probeTools = [{
1980
+ type: 'function',
1981
+ function: {
1982
+ name: 'read_file',
1983
+ description: 'Read a file from disk',
1984
+ parameters: {
1985
+ type: 'object',
1986
+ properties: { path: { type: 'string', description: 'File path' } },
1987
+ required: ['path'],
1988
+ },
1989
+ },
1990
+ }];
1991
+ // F24: Configurable probe timeout — generous default for slow models/connections
1992
+ const probeTimeout = this.config.probeTimeoutMs ?? 60_000;
1993
+ for (const model of uniqueModels) {
1994
+ try {
1995
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Probe timeout (${probeTimeout}ms)`)), probeTimeout));
1996
+ const response = await Promise.race([
1997
+ providerWithTools.chatWithTools([
1998
+ { role: 'system', content: 'You are a test probe. Call the read_file tool with path "package.json".' },
1999
+ { role: 'user', content: 'Read package.json.' },
2000
+ ], { model, maxTokens: 200, temperature: 0, tools: probeTools, tool_choice: 'required' }),
2001
+ timeoutPromise,
2002
+ ]);
2003
+ const hasToolCall = (response.toolCalls?.length ?? 0) > 0;
2004
+ if (!hasToolCall) {
2005
+ // F19: Directly mark unhealthy — probe failure is definitive evidence
2006
+ this.healthTracker.markUnhealthy(model);
2007
+ this.logDecision('model-probe', `Model ${model} failed probe (no tool calls)`, 'Marked unhealthy');
2008
+ }
2009
+ else {
2010
+ this.healthTracker.recordSuccess(model, 0);
2011
+ this.logDecision('model-probe', `Model ${model} passed probe`, '');
2012
+ }
2013
+ }
2014
+ catch {
2015
+ // F19: Directly mark unhealthy on probe error (includes timeout)
2016
+ this.healthTracker.markUnhealthy(model);
2017
+ this.logDecision('model-probe', `Model ${model} probe errored`, 'Marked unhealthy');
2018
+ }
2019
+ }
2020
+ }
1166
2021
  // ─── Circuit Breaker ────────────────────────────────────────────────
1167
2022
  /**
1168
2023
  * Record a rate limit hit and check if the circuit breaker should trip.
@@ -1170,6 +2025,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1170
2025
  recordRateLimit() {
1171
2026
  const now = Date.now();
1172
2027
  this.recentRateLimits.push(now);
2028
+ this.increaseStagger(); // P7: Back off on rate limits
1173
2029
  // Prune entries older than the window
1174
2030
  const cutoff = now - SwarmOrchestrator.CIRCUIT_BREAKER_WINDOW_MS;
1175
2031
  this.recentRateLimits = this.recentRateLimits.filter(t => t > cutoff);
@@ -1197,6 +2053,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1197
2053
  }
1198
2054
  return false;
1199
2055
  }
2056
+ // ─── P7: Adaptive Stagger ────────────────────────────────────────────
2057
+ /** P7: Get current stagger delay (adapts based on rate limit / success signals). */
2058
+ getStaggerMs() {
2059
+ return this.adaptiveStaggerMs;
2060
+ }
2061
+ /** P7: Increase stagger on rate limit (×1.5, capped at 10s). */
2062
+ increaseStagger() {
2063
+ this.adaptiveStaggerMs = Math.min(this.adaptiveStaggerMs * 1.5, 10_000);
2064
+ }
2065
+ /** P7: Decrease stagger on success (×0.9, floor at 200ms). */
2066
+ decreaseStagger() {
2067
+ this.adaptiveStaggerMs = Math.max(this.adaptiveStaggerMs * 0.9, 200);
2068
+ }
1200
2069
  // ─── V2: Decision Logging ─────────────────────────────────────────────
1201
2070
  logDecision(phase, decision, reasoning) {
1202
2071
  const entry = {
@@ -1223,14 +2092,15 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1223
2092
  waves: queueState.waves,
1224
2093
  currentWave: queueState.currentWave,
1225
2094
  stats: {
1226
- totalTokens: this.totalTokens,
1227
- totalCost: this.totalCost,
2095
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2096
+ totalCost: this.totalCost + this.orchestratorCost,
1228
2097
  qualityRejections: this.qualityRejections,
1229
2098
  retries: this.retries,
1230
2099
  },
1231
2100
  modelHealth: this.healthTracker.getAllRecords(),
1232
2101
  decisions: this.orchestratorDecisions,
1233
2102
  errors: this.errors,
2103
+ originalPrompt: this.originalPrompt,
1234
2104
  });
1235
2105
  this.emit({
1236
2106
  type: 'swarm.state.checkpoint',
@@ -1250,9 +2120,9 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1250
2120
  emitBudgetUpdate() {
1251
2121
  this.emit({
1252
2122
  type: 'swarm.budget.update',
1253
- tokensUsed: this.totalTokens,
2123
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
1254
2124
  tokensTotal: this.config.totalBudget,
1255
- costUsed: this.totalCost,
2125
+ costUsed: this.totalCost + this.orchestratorCost,
1256
2126
  costTotal: this.config.maxCost,
1257
2127
  });
1258
2128
  }
@@ -1267,8 +2137,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1267
2137
  failedTasks: queueStats.failed,
1268
2138
  skippedTasks: queueStats.skipped,
1269
2139
  totalWaves: this.taskQueue.getTotalWaves(),
1270
- totalTokens: this.totalTokens,
1271
- totalCost: this.totalCost,
2140
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2141
+ totalCost: this.totalCost + this.orchestratorCost,
1272
2142
  totalDurationMs: Date.now() - this.startTime,
1273
2143
  qualityRejections: this.qualityRejections,
1274
2144
  retries: this.retries,
@@ -1293,6 +2163,16 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1293
2163
  if (this.verificationResult) {
1294
2164
  parts.push(` Verification: ${this.verificationResult.passed ? 'PASSED' : 'FAILED'}`);
1295
2165
  }
2166
+ // Artifact inventory: show what files actually exist on disk regardless of task status
2167
+ if (this.artifactInventory && this.artifactInventory.totalFiles > 0) {
2168
+ parts.push(` Files on disk: ${this.artifactInventory.totalFiles} files (${(this.artifactInventory.totalBytes / 1024).toFixed(1)}KB)`);
2169
+ for (const f of this.artifactInventory.files.slice(0, 15)) {
2170
+ parts.push(` ${f.path}: ${f.sizeBytes}B`);
2171
+ }
2172
+ if (this.artifactInventory.files.length > 15) {
2173
+ parts.push(` ... and ${this.artifactInventory.files.length - 15} more`);
2174
+ }
2175
+ }
1296
2176
  return parts.join('\n');
1297
2177
  }
1298
2178
  buildErrorResult(message) {
@@ -1319,6 +2199,698 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1319
2199
  return null;
1320
2200
  }
1321
2201
  }
2202
+ /**
2203
+ * Detect foundation tasks: tasks that are a dependency of 2+ downstream tasks.
2204
+ * These are critical single-points-of-failure — mark them for extra resilience.
2205
+ */
2206
+ detectFoundationTasks() {
2207
+ const allTasks = this.taskQueue.getAllTasks();
2208
+ const dependentCounts = new Map();
2209
+ for (const task of allTasks) {
2210
+ for (const depId of task.dependencies) {
2211
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2212
+ }
2213
+ }
2214
+ for (const task of allTasks) {
2215
+ const dependentCount = dependentCounts.get(task.id) ?? 0;
2216
+ if (dependentCount >= 2) {
2217
+ task.isFoundation = true;
2218
+ this.logDecision('scheduling', `Foundation task: ${task.id} (${dependentCount} dependents)`, 'Extra retries and relaxed quality threshold applied');
2219
+ }
2220
+ }
2221
+ }
2222
+ /**
2223
+ * Extract file artifacts from a worker's output for quality gate visibility.
2224
+ * Reads actual file content from disk so the judge can verify real work,
2225
+ * not just text claims about what was created.
2226
+ */
2227
+ extractFileArtifacts(task, taskResult) {
2228
+ const artifacts = [];
2229
+ const seen = new Set();
2230
+ // Collect file paths from multiple sources
2231
+ const candidatePaths = [];
2232
+ // 1. filesModified from structured closure report
2233
+ if (taskResult.filesModified) {
2234
+ candidatePaths.push(...taskResult.filesModified);
2235
+ }
2236
+ // 2. targetFiles from task definition
2237
+ if (task.targetFiles) {
2238
+ candidatePaths.push(...task.targetFiles);
2239
+ }
2240
+ // 3. Extract file paths mentioned in worker output (e.g., "Created src/foo.ts")
2241
+ const filePathPattern = /(?:created|wrote|modified|edited|updated)\s+["`']?([^\s"`',]+\.\w+)/gi;
2242
+ let match;
2243
+ while ((match = filePathPattern.exec(taskResult.output)) !== null) {
2244
+ candidatePaths.push(match[1]);
2245
+ }
2246
+ // Resolve against the target project directory, not CWD
2247
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2248
+ // Read previews from disk
2249
+ for (const filePath of candidatePaths) {
2250
+ if (seen.has(filePath))
2251
+ continue;
2252
+ seen.add(filePath);
2253
+ try {
2254
+ const resolved = path.resolve(baseDir, filePath);
2255
+ if (fs.existsSync(resolved)) {
2256
+ const content = fs.readFileSync(resolved, 'utf-8');
2257
+ if (content.length > 0) {
2258
+ artifacts.push({ path: filePath, preview: content.slice(0, 2000) });
2259
+ }
2260
+ }
2261
+ }
2262
+ catch {
2263
+ // Skip unreadable files
2264
+ }
2265
+ // Limit to 10 files to keep prompt size reasonable
2266
+ if (artifacts.length >= 10)
2267
+ break;
2268
+ }
2269
+ return artifacts;
2270
+ }
2271
+ /**
2272
+ * Build an inventory of filesystem artifacts produced during swarm execution.
2273
+ * Scans all tasks' targetFiles and readFiles to check what actually exists on disk.
2274
+ * This reveals work done by workers even when tasks "failed" (timeout, quality gate, etc.).
2275
+ */
2276
+ buildArtifactInventory() {
2277
+ const allFiles = new Set();
2278
+ for (const task of this.taskQueue.getAllTasks()) {
2279
+ for (const f of (task.targetFiles ?? []))
2280
+ allFiles.add(f);
2281
+ for (const f of (task.readFiles ?? []))
2282
+ allFiles.add(f);
2283
+ }
2284
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2285
+ const artifacts = [];
2286
+ for (const filePath of allFiles) {
2287
+ try {
2288
+ const resolved = path.resolve(baseDir, filePath);
2289
+ if (fs.existsSync(resolved)) {
2290
+ const stats = fs.statSync(resolved);
2291
+ if (stats.isFile() && stats.size > 0) {
2292
+ artifacts.push({ path: filePath, sizeBytes: stats.size, exists: true });
2293
+ }
2294
+ }
2295
+ }
2296
+ catch { /* skip unreadable files */ }
2297
+ }
2298
+ return {
2299
+ files: artifacts,
2300
+ totalFiles: artifacts.length,
2301
+ totalBytes: artifacts.reduce((s, a) => s + a.sizeBytes, 0),
2302
+ };
2303
+ }
2304
+ /**
2305
+ * Skip all remaining pending/ready tasks (used for early termination).
2306
+ */
2307
+ skipRemainingTasks(reason) {
2308
+ for (const task of this.taskQueue.getAllTasks()) {
2309
+ if (task.status === 'pending' || task.status === 'ready') {
2310
+ task.status = 'skipped';
2311
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id, reason });
2312
+ }
2313
+ }
2314
+ }
2315
+ /**
2316
+ * F21: Mid-swarm situational assessment after each wave.
2317
+ * Evaluates success rate and budget health, triages low-priority tasks when budget is tight.
2318
+ * Also detects stalled progress and triggers mid-swarm re-planning.
2319
+ */
2320
+ async assessAndAdapt(waveIndex) {
2321
+ const stats = this.taskQueue.getStats();
2322
+ const budgetStats = this.budgetPool.getStats();
2323
+ // 1. Calculate success rate for this swarm run
2324
+ const successRate = stats.completed / Math.max(1, stats.completed + stats.failed + stats.skipped);
2325
+ // 2. Budget efficiency: tokens spent per completed task
2326
+ const tokensPerTask = stats.completed > 0
2327
+ ? (this.totalTokens / stats.completed)
2328
+ : Infinity;
2329
+ // 3. Remaining budget vs remaining tasks
2330
+ const remainingTasks = stats.total - stats.completed - stats.failed - stats.skipped;
2331
+ const estimatedTokensNeeded = remainingTasks * tokensPerTask;
2332
+ const budgetSufficient = budgetStats.tokensRemaining > estimatedTokensNeeded * 0.5;
2333
+ // Log the assessment for observability
2334
+ this.logDecision('mid-swarm-assessment', `After wave ${waveIndex + 1}: ${stats.completed}/${stats.total} completed (${(successRate * 100).toFixed(0)}%), ` +
2335
+ `${remainingTasks} remaining, ${budgetStats.tokensRemaining} tokens left`, budgetSufficient ? 'Budget looks sufficient' : 'Budget may be insufficient for remaining tasks');
2336
+ // 4. If budget is tight, prioritize: skip low-value remaining tasks
2337
+ // Only triage if we have actual data (at least one completion to estimate from)
2338
+ if (!budgetSufficient && remainingTasks > 1 && stats.completed > 0) {
2339
+ // Prefer pausing over skipping: if workers are still running, wait for budget release
2340
+ const runningCount = stats.running ?? 0;
2341
+ if (runningCount > 0) {
2342
+ this.logDecision('budget-wait', 'Budget tight but workers still running — waiting for budget release', `${runningCount} workers active, ${budgetStats.tokensRemaining} tokens remaining`);
2343
+ return;
2344
+ }
2345
+ const expendableTasks = this.findExpendableTasks();
2346
+ // Hard cap: never skip more than 20% of remaining tasks in one triage pass
2347
+ const maxSkips = Math.max(1, Math.floor(remainingTasks * 0.2));
2348
+ if (expendableTasks.length > 0) {
2349
+ let currentEstimate = estimatedTokensNeeded;
2350
+ let skipped = 0;
2351
+ for (const task of expendableTasks) {
2352
+ if (skipped >= maxSkips)
2353
+ break;
2354
+ // Stop trimming once we're within budget
2355
+ if (currentEstimate * 0.7 <= budgetStats.tokensRemaining)
2356
+ break;
2357
+ task.status = 'skipped';
2358
+ skipped++;
2359
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id,
2360
+ reason: 'Budget conservation: skipping low-priority task to protect critical path' });
2361
+ this.logDecision('budget-triage', `Skipping ${task.id} (${task.type}, complexity ${task.complexity}) to conserve budget`, `${remainingTasks} tasks remain, ${budgetStats.tokensRemaining} tokens`);
2362
+ currentEstimate -= tokensPerTask;
2363
+ }
2364
+ }
2365
+ }
2366
+ // 5. Stall detection: if progress ratio is too low, trigger re-plan
2367
+ const attemptedTasks = stats.completed + stats.failed + stats.skipped;
2368
+ if (attemptedTasks >= 5) {
2369
+ const progressRatio = stats.completed / Math.max(1, attemptedTasks);
2370
+ if (progressRatio < 0.4) {
2371
+ this.logDecision('stall-detected', `Progress stalled: ${stats.completed}/${attemptedTasks} tasks succeeded (${(progressRatio * 100).toFixed(0)}%)`, 'Triggering mid-swarm re-plan');
2372
+ this.emit({
2373
+ type: 'swarm.stall',
2374
+ progressRatio,
2375
+ attempted: attemptedTasks,
2376
+ completed: stats.completed,
2377
+ });
2378
+ await this.midSwarmReplan();
2379
+ }
2380
+ }
2381
+ }
2382
+ /**
2383
+ * F21: Find expendable tasks — leaf tasks (no dependents) with lowest complexity.
2384
+ * These are the safest to skip when budget is tight.
2385
+ * Only tasks with complexity <= 2 are considered expendable.
2386
+ */
2387
+ findExpendableTasks() {
2388
+ const allTasks = this.taskQueue.getAllTasks();
2389
+ // Build reverse dependency map: which tasks depend on each task?
2390
+ const dependentCounts = new Map();
2391
+ for (const task of allTasks) {
2392
+ for (const depId of task.dependencies) {
2393
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2394
+ }
2395
+ }
2396
+ // Expendable = pending/ready, never attempted, no dependents, not foundation,
2397
+ // complexity <= 2 (simple leaf tasks only), lowest complexity first
2398
+ return allTasks
2399
+ .filter(t => (t.status === 'pending' || t.status === 'ready') &&
2400
+ t.attempts === 0 &&
2401
+ !t.isFoundation &&
2402
+ (t.complexity ?? 5) <= 2 &&
2403
+ (dependentCounts.get(t.id) ?? 0) === 0)
2404
+ .sort((a, b) => (a.complexity ?? 5) - (b.complexity ?? 5));
2405
+ }
2406
+ /**
2407
+ * Mid-swarm re-planning: when progress stalls, ask LLM to re-plan remaining work.
2408
+ * Creates simpler replacement tasks for stuck/failed work, building on what's already done.
2409
+ * Only triggers once per swarm execution to avoid infinite re-planning loops.
2410
+ */
2411
+ async midSwarmReplan() {
2412
+ if (this.hasReplanned)
2413
+ return;
2414
+ this.hasReplanned = true;
2415
+ const allTasks = this.taskQueue.getAllTasks();
2416
+ const completed = allTasks.filter(t => t.status === 'completed' || t.status === 'decomposed');
2417
+ const stuck = allTasks.filter(t => t.status === 'failed' || t.status === 'skipped');
2418
+ if (stuck.length === 0)
2419
+ return;
2420
+ const completedSummary = completed.map(t => `- ${t.description} [${t.type}] → completed${t.degraded ? ' (degraded)' : ''}`).join('\n') || '(none)';
2421
+ const stuckSummary = stuck.map(t => `- ${t.description} [${t.type}] → ${t.status} (${t.failureMode ?? 'unknown'})`).join('\n');
2422
+ const artifactInventory = this.buildArtifactInventory();
2423
+ const artifactSummary = artifactInventory.files.map(f => `- ${f.path} (${f.sizeBytes}B)`).join('\n') || '(none)';
2424
+ const replanPrompt = `The swarm is stalled. Here's the situation:
2425
+
2426
+ COMPLETED WORK:
2427
+ ${completedSummary}
2428
+
2429
+ FILES ON DISK:
2430
+ ${artifactSummary}
2431
+
2432
+ STUCK TASKS (failed or skipped):
2433
+ ${stuckSummary}
2434
+
2435
+ Re-plan the remaining work. Create new subtasks that:
2436
+ 1. Build on what's already completed (don't redo work)
2437
+ 2. Are more focused in scope (but assign realistic complexity for the work involved — don't underestimate)
2438
+ 3. Can succeed independently (minimize dependencies)
2439
+
2440
+ Return JSON: { "subtasks": [{ "description": "...", "type": "implement|test|research|review|document|refactor", "complexity": 1-5, "dependencies": [], "relevantFiles": [] }] }
2441
+ Return ONLY the JSON object, no other text.`;
2442
+ try {
2443
+ const response = await this.provider.chat([{ role: 'user', content: replanPrompt }]);
2444
+ this.trackOrchestratorUsage(response, 'mid-swarm-replan');
2445
+ const content = response.content ?? '';
2446
+ const jsonMatch = content.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
2447
+ if (!jsonMatch) {
2448
+ this.logDecision('replan-failed', 'LLM produced no parseable re-plan JSON', content.slice(0, 200));
2449
+ return;
2450
+ }
2451
+ const parsed = JSON.parse(jsonMatch[0]);
2452
+ if (!parsed.subtasks || parsed.subtasks.length === 0) {
2453
+ this.logDecision('replan-failed', 'LLM produced empty subtask list', '');
2454
+ return;
2455
+ }
2456
+ // Add new tasks from re-plan into current wave
2457
+ const newTasks = this.taskQueue.addReplanTasks(parsed.subtasks, this.taskQueue.getCurrentWave());
2458
+ this.logDecision('replan-success', `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`, newTasks.map(t => t.description).join('; '));
2459
+ this.emit({
2460
+ type: 'swarm.replan',
2461
+ stuckCount: stuck.length,
2462
+ newTaskCount: newTasks.length,
2463
+ });
2464
+ this.emit({
2465
+ type: 'swarm.orchestrator.decision',
2466
+ decision: {
2467
+ timestamp: Date.now(),
2468
+ phase: 'replan',
2469
+ decision: `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`,
2470
+ reasoning: newTasks.map(t => `${t.id}: ${t.description}`).join('; '),
2471
+ },
2472
+ });
2473
+ }
2474
+ catch (error) {
2475
+ this.logDecision('replan-failed', `Re-plan LLM call failed: ${error.message}`, '');
2476
+ }
2477
+ }
2478
+ /**
2479
+ * Rescue cascade-skipped tasks that can still run.
2480
+ * After cascade-skip fires, assess whether skipped tasks can still be attempted:
2481
+ * - If all OTHER dependencies completed and the failed dep's artifacts exist on disk → un-skip
2482
+ * - If the task has no strict data dependency on the failed task (different file targets) → un-skip with warning
2483
+ */
2484
+ rescueCascadeSkipped(lenient = false) {
2485
+ const skippedTasks = this.taskQueue.getSkippedTasks();
2486
+ const rescued = [];
2487
+ for (const task of skippedTasks) {
2488
+ if (task.dependencies.length === 0)
2489
+ continue;
2490
+ let completedDeps = 0;
2491
+ let failedDepsWithArtifacts = 0;
2492
+ let failedDepsWithoutArtifacts = 0;
2493
+ let skippedDepsBlockedBySkipped = 0;
2494
+ let totalDeps = 0;
2495
+ const failedDepDescriptions = [];
2496
+ for (const depId of task.dependencies) {
2497
+ const dep = this.taskQueue.getTask(depId);
2498
+ if (!dep)
2499
+ continue;
2500
+ totalDeps++;
2501
+ if (dep.status === 'completed' || dep.status === 'decomposed') {
2502
+ completedDeps++;
2503
+ }
2504
+ else if (dep.status === 'failed' || dep.status === 'skipped') {
2505
+ // V10: In lenient mode, use checkArtifactsEnhanced for broader detection
2506
+ const artifactReport = lenient ? checkArtifactsEnhanced(dep) : checkArtifacts(dep);
2507
+ if (artifactReport && artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length > 0) {
2508
+ failedDepsWithArtifacts++;
2509
+ failedDepDescriptions.push(`${dep.description} (failed but ${artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length} artifacts exist)`);
2510
+ }
2511
+ else {
2512
+ // Check if this dep's target files exist on disk (may have been created by earlier attempt)
2513
+ const targetFiles = dep.targetFiles ?? [];
2514
+ const existingFiles = targetFiles.filter(f => {
2515
+ try {
2516
+ const resolved = path.resolve(this.config.facts?.workingDirectory ?? process.cwd(), f);
2517
+ return fs.statSync(resolved).size > 0;
2518
+ }
2519
+ catch {
2520
+ return false;
2521
+ }
2522
+ });
2523
+ if (existingFiles.length > 0) {
2524
+ failedDepsWithArtifacts++;
2525
+ failedDepDescriptions.push(`${dep.description} (failed but ${existingFiles.length}/${targetFiles.length} target files exist)`);
2526
+ }
2527
+ else {
2528
+ // Check if skipped task's targets don't overlap with the failed dep's targets
2529
+ const taskTargets = new Set(task.targetFiles ?? []);
2530
+ const depTargets = new Set(dep.targetFiles ?? []);
2531
+ const hasOverlap = [...taskTargets].some(f => depTargets.has(f));
2532
+ if (!hasOverlap && taskTargets.size > 0) {
2533
+ // Different file targets — task probably doesn't need the failed dep's output
2534
+ failedDepsWithArtifacts++;
2535
+ failedDepDescriptions.push(`${dep.description} (failed, no file overlap — likely independent)`);
2536
+ }
2537
+ else if (lenient && dep.status === 'skipped') {
2538
+ // V10: In lenient mode, count skipped-by-skipped deps separately
2539
+ // (transitive cascade — the dep itself was a victim, not truly broken)
2540
+ skippedDepsBlockedBySkipped++;
2541
+ failedDepDescriptions.push(`${dep.description} (skipped — transitive cascade victim)`);
2542
+ }
2543
+ else {
2544
+ failedDepsWithoutArtifacts++;
2545
+ }
2546
+ }
2547
+ }
2548
+ }
2549
+ }
2550
+ // Rescue condition:
2551
+ // Normal: all failed deps have artifacts or are independent, AND at least some deps completed
2552
+ // Lenient: tolerate up to 1 truly-missing dep, and count transitive cascade victims as recoverable
2553
+ const effectiveWithout = failedDepsWithoutArtifacts;
2554
+ const maxMissing = lenient ? 1 : 0;
2555
+ const hasEnoughContext = lenient ? (completedDeps + failedDepsWithArtifacts + skippedDepsBlockedBySkipped > 0) : (completedDeps > 0);
2556
+ if (totalDeps > 0 && effectiveWithout <= maxMissing && hasEnoughContext) {
2557
+ const rescueContext = `Rescued from cascade-skip${lenient ? ' (lenient)' : ''}: ${completedDeps}/${totalDeps} deps completed, ` +
2558
+ `${failedDepsWithArtifacts} failed deps have artifacts${skippedDepsBlockedBySkipped > 0 ? `, ${skippedDepsBlockedBySkipped} transitive cascade victims` : ''}. ${failedDepDescriptions.join('; ')}`;
2559
+ this.taskQueue.rescueTask(task.id, rescueContext);
2560
+ rescued.push(task);
2561
+ this.logDecision('cascade-rescue', `${task.id}: rescued from cascade-skip${lenient ? ' (lenient)' : ''}`, rescueContext);
2562
+ }
2563
+ }
2564
+ return rescued;
2565
+ }
2566
+ /**
2567
+ * Final rescue pass — runs after executeWaves() finishes.
2568
+ * Uses lenient mode to rescue cascade-skipped tasks that have partial context.
2569
+ * Re-dispatches rescued tasks in a final wave.
2570
+ */
2571
+ async finalRescuePass() {
2572
+ const skipped = this.taskQueue.getSkippedTasks();
2573
+ if (skipped.length === 0)
2574
+ return;
2575
+ this.logDecision('final-rescue', `${skipped.length} skipped tasks — running final rescue pass`, '');
2576
+ const rescued = this.rescueCascadeSkipped(true); // lenient=true
2577
+ if (rescued.length > 0) {
2578
+ this.logDecision('final-rescue', `Rescued ${rescued.length} tasks`, rescued.map(t => t.id).join(', '));
2579
+ await this.executeWave(rescued);
2580
+ }
2581
+ }
2582
+ /**
2583
+ * Try resilience recovery strategies before hard-failing a task.
2584
+ * Called from dispatch-cap, timeout, hollow, and error paths to avoid bypassing resilience.
2585
+ *
2586
+ * Strategies (in order):
2587
+ * 1. Micro-decomposition — break complex failing tasks into subtasks
2588
+ * 2. Degraded acceptance — accept partial work if artifacts exist on disk
2589
+ *
2590
+ * Returns true if recovery succeeded (caller should return), false if hard-fail should proceed.
2591
+ */
2592
+ async tryResilienceRecovery(task, taskId, taskResult, spawnResult) {
2593
+ // Strategy 1: Micro-decompose complex tasks into smaller subtasks
2594
+ // V10: Lowered threshold from >= 6 to >= 4 so moderately complex tasks can be recovered
2595
+ if ((task.complexity ?? 0) >= 4 && task.attempts >= 2 && this.budgetPool.hasCapacity()) {
2596
+ const subtasks = await this.microDecompose(task);
2597
+ if (subtasks && subtasks.length >= 2) {
2598
+ // Reset task status so replaceWithSubtasks can mark it as decomposed
2599
+ task.status = 'dispatched';
2600
+ this.taskQueue.replaceWithSubtasks(taskId, subtasks);
2601
+ this.logDecision('micro-decompose', `${taskId}: decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
2602
+ this.emit({
2603
+ type: 'swarm.task.failed',
2604
+ taskId,
2605
+ error: `Micro-decomposed into ${subtasks.length} subtasks`,
2606
+ attempt: task.attempts,
2607
+ maxAttempts: this.config.maxDispatchesPerTask ?? 5,
2608
+ willRetry: false,
2609
+ toolCalls: spawnResult.metrics.toolCalls,
2610
+ failureMode: task.failureMode,
2611
+ });
2612
+ this.emit({
2613
+ type: 'swarm.task.resilience',
2614
+ taskId,
2615
+ strategy: 'micro-decompose',
2616
+ succeeded: true,
2617
+ reason: `Decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`,
2618
+ artifactsFound: 0,
2619
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
2620
+ });
2621
+ return true;
2622
+ }
2623
+ // Micro-decompose was attempted but didn't produce usable subtasks
2624
+ if ((task.complexity ?? 0) < 4) {
2625
+ this.logDecision('resilience-skip', `${taskId}: skipped micro-decompose — complexity ${task.complexity} < 4`, '');
2626
+ }
2627
+ }
2628
+ // Strategy 2: Degraded acceptance — check if any attempt produced files on disk.
2629
+ // V10: Use checkArtifactsEnhanced for broader detection (filesModified, closureReport, output)
2630
+ const artifactReport = checkArtifactsEnhanced(task, taskResult);
2631
+ const existingArtifacts = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0);
2632
+ const hasArtifacts = existingArtifacts.length > 0;
2633
+ // V10: Fix timeout detection — toolCalls=-1 means timeout (worker WAS working)
2634
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
2635
+ const hadToolCalls = toolCalls > 0 || toolCalls === -1
2636
+ || (taskResult.filesModified && taskResult.filesModified.length > 0);
2637
+ if (hasArtifacts || hadToolCalls) {
2638
+ // Accept with degraded flag — prevents cascade-skip of dependents
2639
+ taskResult.success = true;
2640
+ taskResult.degraded = true;
2641
+ taskResult.qualityScore = 2; // Capped at low quality
2642
+ taskResult.qualityFeedback = 'Degraded acceptance: retries exhausted but filesystem artifacts exist';
2643
+ task.degraded = true;
2644
+ // Reset status so markCompleted works (markFailed may have set it to 'failed')
2645
+ task.status = 'dispatched';
2646
+ this.taskQueue.markCompleted(taskId, taskResult);
2647
+ this.hollowStreak = 0;
2648
+ this.logDecision('degraded-acceptance', `${taskId}: accepted as degraded — ${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`, 'Prevents cascade-skip of dependent tasks');
2649
+ this.emit({
2650
+ type: 'swarm.task.completed',
2651
+ taskId,
2652
+ success: true,
2653
+ tokensUsed: taskResult.tokensUsed,
2654
+ costUsed: taskResult.costUsed,
2655
+ durationMs: taskResult.durationMs,
2656
+ qualityScore: 2,
2657
+ qualityFeedback: 'Degraded acceptance',
2658
+ output: taskResult.output,
2659
+ toolCalls: spawnResult.metrics.toolCalls,
2660
+ });
2661
+ this.emit({
2662
+ type: 'swarm.task.resilience',
2663
+ taskId,
2664
+ strategy: 'degraded-acceptance',
2665
+ succeeded: true,
2666
+ reason: `${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`,
2667
+ artifactsFound: existingArtifacts.length,
2668
+ toolCalls,
2669
+ });
2670
+ return true;
2671
+ }
2672
+ // Both strategies failed — log exhaustion for traceability
2673
+ this.logDecision('resilience-exhausted', `${taskId}: no recovery — artifacts: ${existingArtifacts.length}, toolCalls: ${toolCalls}, filesModified: ${taskResult.filesModified?.length ?? 0}`, '');
2674
+ this.emit({
2675
+ type: 'swarm.task.resilience',
2676
+ taskId,
2677
+ strategy: 'none',
2678
+ succeeded: false,
2679
+ reason: `No artifacts found, toolCalls=${toolCalls}, filesModified=${taskResult.filesModified?.length ?? 0}`,
2680
+ artifactsFound: existingArtifacts.length,
2681
+ toolCalls,
2682
+ });
2683
+ return false;
2684
+ }
2685
+ /**
2686
+ * Micro-decompose a complex task into 2-3 smaller subtasks using the LLM.
2687
+ * Called when a complex task (complexity >= 6) fails 2+ times with the same failure mode.
2688
+ * Returns null if decomposition doesn't make sense or LLM can't produce valid subtasks.
2689
+ */
2690
+ async microDecompose(task) {
2691
+ if ((task.complexity ?? 0) < 4)
2692
+ return null;
2693
+ try {
2694
+ const prompt = `Task "${task.description}" failed ${task.attempts} times on model ${task.assignedModel ?? 'unknown'}.
2695
+ The task has complexity ${task.complexity}/10 and type "${task.type}".
2696
+ ${task.targetFiles?.length ? `Target files: ${task.targetFiles.join(', ')}` : ''}
2697
+
2698
+ Break this task into 2-3 smaller, independent subtasks that each handle a portion of the work.
2699
+ Each subtask MUST be simpler (complexity <= ${Math.ceil(task.complexity / 2)}).
2700
+ Each subtask should be self-contained and produce concrete file changes.
2701
+
2702
+ Return JSON ONLY (no markdown, no explanation):
2703
+ {
2704
+ "subtasks": [
2705
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number> }
2706
+ ]
2707
+ }`;
2708
+ const response = await this.provider.chat([
2709
+ { role: 'system', content: 'You are a task decomposition assistant. Return only valid JSON.' },
2710
+ { role: 'user', content: prompt },
2711
+ ], {
2712
+ model: this.config.orchestratorModel,
2713
+ maxTokens: 2000,
2714
+ temperature: 0.3,
2715
+ });
2716
+ this.trackOrchestratorUsage(response, 'micro-decompose');
2717
+ // Parse response — handle markdown code blocks
2718
+ let jsonStr = response.content.trim();
2719
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
2720
+ if (codeBlockMatch)
2721
+ jsonStr = codeBlockMatch[1].trim();
2722
+ const parsed = JSON.parse(jsonStr);
2723
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
2724
+ return null;
2725
+ }
2726
+ const subtasks = parsed.subtasks.map((sub, idx) => ({
2727
+ id: `${task.id}-sub${idx + 1}`,
2728
+ description: sub.description,
2729
+ type: sub.type ?? task.type,
2730
+ dependencies: [], // Will be set by replaceWithSubtasks
2731
+ status: 'ready',
2732
+ complexity: Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1),
2733
+ wave: task.wave,
2734
+ targetFiles: sub.targetFiles ?? [],
2735
+ readFiles: task.readFiles,
2736
+ attempts: 0,
2737
+ }));
2738
+ return subtasks;
2739
+ }
2740
+ catch (error) {
2741
+ this.logDecision('micro-decompose', `${task.id}: micro-decomposition failed — ${error.message}`, 'Falling through to normal failure path');
2742
+ return null;
2743
+ }
2744
+ }
2745
+ // ─── Pre-Dispatch Auto-Split ──────────────────────────────────────────────
2746
+ /**
2747
+ * Heuristic pre-filter: should this task be considered for auto-split?
2748
+ * Cheap check — no LLM call. Returns true if all conditions are met.
2749
+ */
2750
+ shouldAutoSplit(task) {
2751
+ const cfg = this.config.autoSplit;
2752
+ if (cfg?.enabled === false)
2753
+ return false;
2754
+ const floor = cfg?.complexityFloor ?? 6;
2755
+ const splittable = cfg?.splittableTypes ?? ['implement', 'refactor', 'test'];
2756
+ // Only first attempts — retries use micro-decompose
2757
+ if (task.attempts > 0)
2758
+ return false;
2759
+ // Complexity check
2760
+ if ((task.complexity ?? 0) < floor)
2761
+ return false;
2762
+ // Type check
2763
+ if (!splittable.includes(task.type))
2764
+ return false;
2765
+ // Must be on critical path (foundation task)
2766
+ if (!task.isFoundation)
2767
+ return false;
2768
+ // Budget capacity check
2769
+ if (!this.budgetPool.hasCapacity())
2770
+ return false;
2771
+ return true;
2772
+ }
2773
+ /**
2774
+ * LLM judge call: ask the orchestrator model whether and how to split a task.
2775
+ * Returns { shouldSplit: false } or { shouldSplit: true, subtasks: [...] }.
2776
+ */
2777
+ async judgeSplit(task) {
2778
+ const maxSubs = this.config.autoSplit?.maxSubtasks ?? 4;
2779
+ const prompt = `You are evaluating whether a task should be split into parallel subtasks before dispatch.
2780
+
2781
+ TASK: "${task.description}"
2782
+ TYPE: ${task.type}
2783
+ COMPLEXITY: ${task.complexity}/10
2784
+ TARGET FILES: ${task.targetFiles?.join(', ') || 'none specified'}
2785
+ DOWNSTREAM DEPENDENTS: This is a foundation task — other tasks are waiting on it.
2786
+
2787
+ Should this task be split into 2-${maxSubs} parallel subtasks that different workers can execute simultaneously?
2788
+
2789
+ SPLIT if:
2790
+ - The task involves multiple independent pieces of work (e.g., different files, different functions, different concerns)
2791
+ - Parallel execution would meaningfully reduce wall-clock time
2792
+ - The subtasks can produce useful output independently
2793
+
2794
+ DO NOT SPLIT if:
2795
+ - The work is conceptually atomic (one function, one algorithm, tightly coupled logic)
2796
+ - The subtasks would need to coordinate on the same files/functions
2797
+ - Splitting would add more overhead than it saves
2798
+
2799
+ Return JSON ONLY:
2800
+ {
2801
+ "shouldSplit": true/false,
2802
+ "reason": "brief explanation",
2803
+ "subtasks": [
2804
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number 1-10> }
2805
+ ]
2806
+ }
2807
+ If shouldSplit is false, omit subtasks.`;
2808
+ const response = await this.provider.chat([
2809
+ { role: 'system', content: 'You are a task planning judge. Return only valid JSON.' },
2810
+ { role: 'user', content: prompt },
2811
+ ], {
2812
+ model: this.config.orchestratorModel,
2813
+ maxTokens: 1500,
2814
+ temperature: 0.2,
2815
+ });
2816
+ this.trackOrchestratorUsage(response, 'auto-split-judge');
2817
+ // Parse response — reuse markdown code block stripping from microDecompose
2818
+ let jsonStr = response.content.trim();
2819
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
2820
+ if (codeBlockMatch)
2821
+ jsonStr = codeBlockMatch[1].trim();
2822
+ const parsed = JSON.parse(jsonStr);
2823
+ if (!parsed.shouldSplit) {
2824
+ this.logDecision('auto-split', `${task.id}: judge says no split — ${parsed.reason}`, '');
2825
+ return { shouldSplit: false };
2826
+ }
2827
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
2828
+ return { shouldSplit: false };
2829
+ }
2830
+ // Build SwarmTask[] from judge output (same pattern as microDecompose)
2831
+ const subtasks = parsed.subtasks.slice(0, maxSubs).map((sub, idx) => ({
2832
+ id: `${task.id}-split${idx + 1}`,
2833
+ description: sub.description,
2834
+ type: sub.type ?? task.type,
2835
+ dependencies: [],
2836
+ status: 'ready',
2837
+ complexity: Math.max(3, Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1)),
2838
+ wave: task.wave,
2839
+ targetFiles: sub.targetFiles ?? [],
2840
+ readFiles: task.readFiles,
2841
+ attempts: 0,
2842
+ rescueContext: `Auto-split from ${task.id} (original complexity ${task.complexity})`,
2843
+ }));
2844
+ this.logDecision('auto-split', `${task.id}: split into ${subtasks.length} subtasks — ${parsed.reason}`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
2845
+ return { shouldSplit: true, subtasks };
2846
+ }
2847
+ /**
2848
+ * V7: Compute effective retry limit for a task.
2849
+ * F10: Fixup tasks get max 2 retries (3 attempts total) — one full model-failover cycle.
2850
+ * Foundation tasks get +1 retry to reduce cascade failure risk.
2851
+ */
2852
+ getEffectiveRetries(task) {
2853
+ const isFixup = 'fixesTaskId' in task;
2854
+ if (isFixup)
2855
+ return 2; // Fixup tasks: 2 retries max (3 attempts total)
2856
+ return task.isFoundation ? this.config.workerRetries + 1 : this.config.workerRetries;
2857
+ }
2858
+ /**
2859
+ * F22: Build a brief summary of swarm progress for retry context.
2860
+ * Helps retrying workers understand what the swarm has already accomplished.
2861
+ */
2862
+ getSwarmProgressSummary() {
2863
+ const allTasks = this.taskQueue.getAllTasks();
2864
+ const completed = allTasks.filter(t => t.status === 'completed');
2865
+ if (completed.length === 0)
2866
+ return '';
2867
+ const lines = [];
2868
+ for (const task of completed) {
2869
+ const score = task.result?.qualityScore ? ` (${task.result.qualityScore}/5)` : '';
2870
+ lines.push(`- ${task.id}: ${task.description.slice(0, 80)}${score}`);
2871
+ }
2872
+ // Collect files created by completed tasks
2873
+ const files = new Set();
2874
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2875
+ for (const task of completed) {
2876
+ for (const f of (task.result?.filesModified ?? []))
2877
+ files.add(f);
2878
+ for (const f of (task.targetFiles ?? [])) {
2879
+ try {
2880
+ const resolved = path.resolve(baseDir, f);
2881
+ if (fs.existsSync(resolved))
2882
+ files.add(f);
2883
+ }
2884
+ catch { /* skip */ }
2885
+ }
2886
+ }
2887
+ const parts = [`The following tasks have completed successfully:\n${lines.join('\n')}`];
2888
+ if (files.size > 0) {
2889
+ parts.push(`Files already created/modified: ${[...files].slice(0, 20).join(', ')}`);
2890
+ parts.push('You can build on these existing files.');
2891
+ }
2892
+ return parts.join('\n');
2893
+ }
1322
2894
  /** Get a model health summary for emitting events. */
1323
2895
  getModelHealthSummary(model) {
1324
2896
  const records = this.healthTracker.getAllRecords();