attocode 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +191 -1
  2. package/README.md +7 -0
  3. package/dist/src/adapters.d.ts +6 -1
  4. package/dist/src/adapters.d.ts.map +1 -1
  5. package/dist/src/adapters.js +8 -1
  6. package/dist/src/adapters.js.map +1 -1
  7. package/dist/src/agent.d.ts +41 -4
  8. package/dist/src/agent.d.ts.map +1 -1
  9. package/dist/src/agent.js +846 -75
  10. package/dist/src/agent.js.map +1 -1
  11. package/dist/src/cli.d.ts.map +1 -1
  12. package/dist/src/cli.js +23 -2
  13. package/dist/src/cli.js.map +1 -1
  14. package/dist/src/core/protocol/types.d.ts +8 -8
  15. package/dist/src/defaults.d.ts +7 -2
  16. package/dist/src/defaults.d.ts.map +1 -1
  17. package/dist/src/defaults.js +38 -2
  18. package/dist/src/defaults.js.map +1 -1
  19. package/dist/src/integrations/agent-registry.d.ts +13 -0
  20. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  21. package/dist/src/integrations/agent-registry.js.map +1 -1
  22. package/dist/src/integrations/async-subagent.d.ts +135 -0
  23. package/dist/src/integrations/async-subagent.d.ts.map +1 -0
  24. package/dist/src/integrations/async-subagent.js +213 -0
  25. package/dist/src/integrations/async-subagent.js.map +1 -0
  26. package/dist/src/integrations/auto-checkpoint.d.ts +98 -0
  27. package/dist/src/integrations/auto-checkpoint.d.ts.map +1 -0
  28. package/dist/src/integrations/auto-checkpoint.js +252 -0
  29. package/dist/src/integrations/auto-checkpoint.js.map +1 -0
  30. package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
  31. package/dist/src/integrations/auto-compaction.js +5 -1
  32. package/dist/src/integrations/auto-compaction.js.map +1 -1
  33. package/dist/src/integrations/bash-policy.d.ts +33 -0
  34. package/dist/src/integrations/bash-policy.d.ts.map +1 -0
  35. package/dist/src/integrations/bash-policy.js +142 -0
  36. package/dist/src/integrations/bash-policy.js.map +1 -0
  37. package/dist/src/integrations/codebase-context.d.ts +5 -0
  38. package/dist/src/integrations/codebase-context.d.ts.map +1 -1
  39. package/dist/src/integrations/codebase-context.js +33 -0
  40. package/dist/src/integrations/codebase-context.js.map +1 -1
  41. package/dist/src/integrations/complexity-classifier.d.ts +86 -0
  42. package/dist/src/integrations/complexity-classifier.d.ts.map +1 -0
  43. package/dist/src/integrations/complexity-classifier.js +233 -0
  44. package/dist/src/integrations/complexity-classifier.js.map +1 -0
  45. package/dist/src/integrations/delegation-protocol.d.ts +86 -0
  46. package/dist/src/integrations/delegation-protocol.d.ts.map +1 -0
  47. package/dist/src/integrations/delegation-protocol.js +127 -0
  48. package/dist/src/integrations/delegation-protocol.js.map +1 -0
  49. package/dist/src/integrations/dynamic-budget.d.ts +81 -0
  50. package/dist/src/integrations/dynamic-budget.d.ts.map +1 -0
  51. package/dist/src/integrations/dynamic-budget.js +151 -0
  52. package/dist/src/integrations/dynamic-budget.js.map +1 -0
  53. package/dist/src/integrations/economics.d.ts +86 -1
  54. package/dist/src/integrations/economics.d.ts.map +1 -1
  55. package/dist/src/integrations/economics.js +306 -11
  56. package/dist/src/integrations/economics.js.map +1 -1
  57. package/dist/src/integrations/environment-facts.d.ts +52 -0
  58. package/dist/src/integrations/environment-facts.d.ts.map +1 -0
  59. package/dist/src/integrations/environment-facts.js +84 -0
  60. package/dist/src/integrations/environment-facts.js.map +1 -0
  61. package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
  62. package/dist/src/integrations/hierarchical-config.js +17 -0
  63. package/dist/src/integrations/hierarchical-config.js.map +1 -1
  64. package/dist/src/integrations/index.d.ts +19 -2
  65. package/dist/src/integrations/index.d.ts.map +1 -1
  66. package/dist/src/integrations/index.js +34 -2
  67. package/dist/src/integrations/index.js.map +1 -1
  68. package/dist/src/integrations/injection-budget.d.ts +71 -0
  69. package/dist/src/integrations/injection-budget.d.ts.map +1 -0
  70. package/dist/src/integrations/injection-budget.js +136 -0
  71. package/dist/src/integrations/injection-budget.js.map +1 -0
  72. package/dist/src/integrations/mcp-client.d.ts.map +1 -1
  73. package/dist/src/integrations/mcp-client.js +14 -0
  74. package/dist/src/integrations/mcp-client.js.map +1 -1
  75. package/dist/src/integrations/mcp-custom-tools.d.ts +102 -0
  76. package/dist/src/integrations/mcp-custom-tools.d.ts.map +1 -0
  77. package/dist/src/integrations/mcp-custom-tools.js +232 -0
  78. package/dist/src/integrations/mcp-custom-tools.js.map +1 -0
  79. package/dist/src/integrations/mcp-tool-validator.d.ts +60 -0
  80. package/dist/src/integrations/mcp-tool-validator.d.ts.map +1 -0
  81. package/dist/src/integrations/mcp-tool-validator.js +141 -0
  82. package/dist/src/integrations/mcp-tool-validator.js.map +1 -0
  83. package/dist/src/integrations/policy-engine.d.ts +55 -0
  84. package/dist/src/integrations/policy-engine.d.ts.map +1 -0
  85. package/dist/src/integrations/policy-engine.js +247 -0
  86. package/dist/src/integrations/policy-engine.js.map +1 -0
  87. package/dist/src/integrations/safety.d.ts +5 -4
  88. package/dist/src/integrations/safety.d.ts.map +1 -1
  89. package/dist/src/integrations/safety.js +32 -7
  90. package/dist/src/integrations/safety.js.map +1 -1
  91. package/dist/src/integrations/sandbox/basic.d.ts +7 -0
  92. package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
  93. package/dist/src/integrations/sandbox/basic.js +27 -2
  94. package/dist/src/integrations/sandbox/basic.js.map +1 -1
  95. package/dist/src/integrations/sandbox/index.d.ts +6 -0
  96. package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
  97. package/dist/src/integrations/sandbox/index.js +3 -0
  98. package/dist/src/integrations/sandbox/index.js.map +1 -1
  99. package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
  100. package/dist/src/integrations/sandbox/landlock.js +3 -0
  101. package/dist/src/integrations/sandbox/landlock.js.map +1 -1
  102. package/dist/src/integrations/self-improvement.d.ts +90 -0
  103. package/dist/src/integrations/self-improvement.d.ts.map +1 -0
  104. package/dist/src/integrations/self-improvement.js +229 -0
  105. package/dist/src/integrations/self-improvement.js.map +1 -0
  106. package/dist/src/integrations/smart-decomposer.d.ts +22 -1
  107. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  108. package/dist/src/integrations/smart-decomposer.js +127 -28
  109. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  110. package/dist/src/integrations/subagent-output-store.d.ts +91 -0
  111. package/dist/src/integrations/subagent-output-store.d.ts.map +1 -0
  112. package/dist/src/integrations/subagent-output-store.js +257 -0
  113. package/dist/src/integrations/subagent-output-store.js.map +1 -0
  114. package/dist/src/integrations/swarm/index.d.ts +2 -2
  115. package/dist/src/integrations/swarm/index.d.ts.map +1 -1
  116. package/dist/src/integrations/swarm/index.js +1 -1
  117. package/dist/src/integrations/swarm/index.js.map +1 -1
  118. package/dist/src/integrations/swarm/model-selector.d.ts +16 -0
  119. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  120. package/dist/src/integrations/swarm/model-selector.js +123 -10
  121. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  122. package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
  123. package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
  124. package/dist/src/integrations/swarm/swarm-budget.js +6 -0
  125. package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
  126. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +10 -1
  127. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  128. package/dist/src/integrations/swarm/swarm-config-loader.js +226 -13
  129. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  130. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +12 -1
  131. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  132. package/dist/src/integrations/swarm/swarm-event-bridge.js +178 -9
  133. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  134. package/dist/src/integrations/swarm/swarm-events.d.ts +66 -1
  135. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  136. package/dist/src/integrations/swarm/swarm-events.js +26 -5
  137. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  138. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +127 -0
  139. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  140. package/dist/src/integrations/swarm/swarm-orchestrator.js +1842 -47
  141. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  142. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +91 -3
  143. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
  144. package/dist/src/integrations/swarm/swarm-quality-gate.js +395 -19
  145. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
  146. package/dist/src/integrations/swarm/task-queue.d.ts +55 -1
  147. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  148. package/dist/src/integrations/swarm/task-queue.js +389 -16
  149. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  150. package/dist/src/integrations/swarm/types.d.ts +247 -11
  151. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  152. package/dist/src/integrations/swarm/types.js +67 -9
  153. package/dist/src/integrations/swarm/types.js.map +1 -1
  154. package/dist/src/integrations/swarm/worker-pool.d.ts +18 -5
  155. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  156. package/dist/src/integrations/swarm/worker-pool.js +236 -34
  157. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  158. package/dist/src/integrations/thinking-strategy.d.ts +52 -0
  159. package/dist/src/integrations/thinking-strategy.d.ts.map +1 -0
  160. package/dist/src/integrations/thinking-strategy.js +129 -0
  161. package/dist/src/integrations/thinking-strategy.js.map +1 -0
  162. package/dist/src/integrations/tool-recommendation.d.ts +61 -0
  163. package/dist/src/integrations/tool-recommendation.d.ts.map +1 -0
  164. package/dist/src/integrations/tool-recommendation.js +268 -0
  165. package/dist/src/integrations/tool-recommendation.js.map +1 -0
  166. package/dist/src/integrations/verification-gate.d.ts +80 -0
  167. package/dist/src/integrations/verification-gate.d.ts.map +1 -0
  168. package/dist/src/integrations/verification-gate.js +146 -0
  169. package/dist/src/integrations/verification-gate.js.map +1 -0
  170. package/dist/src/integrations/work-log.d.ts +87 -0
  171. package/dist/src/integrations/work-log.d.ts.map +1 -0
  172. package/dist/src/integrations/work-log.js +275 -0
  173. package/dist/src/integrations/work-log.js.map +1 -0
  174. package/dist/src/main.js +31 -5
  175. package/dist/src/main.js.map +1 -1
  176. package/dist/src/modes/repl.d.ts.map +1 -1
  177. package/dist/src/modes/repl.js +10 -4
  178. package/dist/src/modes/repl.js.map +1 -1
  179. package/dist/src/modes/tui.d.ts.map +1 -1
  180. package/dist/src/modes/tui.js +5 -0
  181. package/dist/src/modes/tui.js.map +1 -1
  182. package/dist/src/modes.d.ts +6 -0
  183. package/dist/src/modes.d.ts.map +1 -1
  184. package/dist/src/modes.js +69 -21
  185. package/dist/src/modes.js.map +1 -1
  186. package/dist/src/tools/agent.d.ts.map +1 -1
  187. package/dist/src/tools/agent.js +11 -2
  188. package/dist/src/tools/agent.js.map +1 -1
  189. package/dist/src/tools/bash.d.ts +9 -3
  190. package/dist/src/tools/bash.d.ts.map +1 -1
  191. package/dist/src/tools/bash.js +12 -0
  192. package/dist/src/tools/bash.js.map +1 -1
  193. package/dist/src/tools/coercion.d.ts +6 -0
  194. package/dist/src/tools/coercion.d.ts.map +1 -1
  195. package/dist/src/tools/coercion.js +13 -0
  196. package/dist/src/tools/coercion.js.map +1 -1
  197. package/dist/src/tools/file.d.ts +2 -2
  198. package/dist/src/tools/file.js +2 -2
  199. package/dist/src/tools/file.js.map +1 -1
  200. package/dist/src/tools/permission.d.ts.map +1 -1
  201. package/dist/src/tools/permission.js +4 -111
  202. package/dist/src/tools/permission.js.map +1 -1
  203. package/dist/src/tools/standard.d.ts +17 -1
  204. package/dist/src/tools/standard.d.ts.map +1 -1
  205. package/dist/src/tools/standard.js +64 -11
  206. package/dist/src/tools/standard.js.map +1 -1
  207. package/dist/src/tracing/trace-collector.d.ts +167 -0
  208. package/dist/src/tracing/trace-collector.d.ts.map +1 -1
  209. package/dist/src/tracing/trace-collector.js +137 -0
  210. package/dist/src/tracing/trace-collector.js.map +1 -1
  211. package/dist/src/tracing/types.d.ts +105 -1
  212. package/dist/src/tracing/types.d.ts.map +1 -1
  213. package/dist/src/tracing/types.js.map +1 -1
  214. package/dist/src/tui/app.d.ts.map +1 -1
  215. package/dist/src/tui/app.js +34 -5
  216. package/dist/src/tui/app.js.map +1 -1
  217. package/dist/src/types.d.ts +89 -0
  218. package/dist/src/types.d.ts.map +1 -1
  219. package/package.json +6 -2
@@ -16,15 +16,71 @@
16
16
  * - State persistence and resume
17
17
  * - Orchestrator decision logging
18
18
  */
19
- import { createSmartDecomposer, parseDecompositionResponse } from '../smart-decomposer.js';
19
+ import * as fs from 'node:fs';
20
+ import * as path from 'node:path';
21
+ import { createSmartDecomposer, parseDecompositionResponse, validateDecomposition } from '../smart-decomposer.js';
20
22
  import { createResultSynthesizer } from '../result-synthesizer.js';
21
- import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, SUBTASK_TO_CAPABILITY } from './types.js';
23
+ import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, getTaskTypeConfig } from './types.js';
22
24
  import { createSwarmTaskQueue } from './task-queue.js';
23
25
  import { createSwarmBudgetPool } from './swarm-budget.js';
24
26
  import { createSwarmWorkerPool } from './worker-pool.js';
25
- import { evaluateWorkerOutput } from './swarm-quality-gate.js';
27
+ import { evaluateWorkerOutput, runPreFlightChecks, checkArtifacts, checkArtifactsEnhanced, runConcreteChecks } from './swarm-quality-gate.js';
26
28
  import { ModelHealthTracker, selectAlternativeModel } from './model-selector.js';
27
29
  import { SwarmStateStore } from './swarm-state-store.js';
30
+ // ─── Hollow Completion Detection ──────────────────────────────────────────
31
+ /**
32
+ * V11: Hollow completion detection — catches empty completions AND "success" with failure language.
33
+ * Zero tool calls AND trivial output is always hollow.
34
+ * Additionally, success=true but output containing failure admissions is also hollow —
35
+ * this catches workers that report success but actually did no useful work.
36
+ */
37
+ const FAILURE_INDICATORS = [
38
+ 'budget exhausted', 'unable to complete', 'could not complete',
39
+ 'ran out of budget', 'no changes were made', 'no files were modified',
40
+ 'no files were created', 'failed to complete', 'before research could begin',
41
+ 'i was unable to', 'i could not', 'unfortunately i',
42
+ ];
43
+ const BOILERPLATE_INDICATORS = [
44
+ 'task completed successfully', 'i have completed the task',
45
+ 'the task has been completed', 'done', 'completed', 'finished',
46
+ 'no issues found', 'everything looks good', 'all tasks completed',
47
+ ];
48
+ export function isHollowCompletion(spawnResult, taskType, swarmConfig) {
49
+ // Timeout uses toolCalls === -1, not hollow
50
+ if ((spawnResult.metrics.toolCalls ?? 0) === -1)
51
+ return false;
52
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
53
+ // Truly empty completions: zero tools AND trivial output
54
+ // P4: Higher threshold (120 chars) + configurable via SwarmConfig
55
+ const hollowThreshold = swarmConfig?.hollowOutputThreshold ?? 120;
56
+ if (toolCalls === 0
57
+ && (spawnResult.output?.trim().length ?? 0) < hollowThreshold) {
58
+ return true;
59
+ }
60
+ // P4: Boilerplate detection — zero tools AND short output that's just boilerplate
61
+ if (toolCalls === 0 && (spawnResult.output?.trim().length ?? 0) < 300) {
62
+ const outputLower = (spawnResult.output ?? '').toLowerCase().trim();
63
+ if (BOILERPLATE_INDICATORS.some(b => outputLower.includes(b))) {
64
+ return true;
65
+ }
66
+ }
67
+ // "Success" that admits failure: worker claims success but output contains failure language
68
+ if (spawnResult.success) {
69
+ const outputLower = (spawnResult.output ?? '').toLowerCase();
70
+ if (FAILURE_INDICATORS.some(f => outputLower.includes(f))) {
71
+ return true;
72
+ }
73
+ }
74
+ // V7: Use configurable requiresToolCalls from TaskTypeConfig.
75
+ // For action-oriented tasks (implement/test/refactor/etc), zero tool calls is ALWAYS hollow.
76
+ if (taskType) {
77
+ const typeConfig = getTaskTypeConfig(taskType, swarmConfig);
78
+ if (typeConfig.requiresToolCalls && toolCalls === 0) {
79
+ return true;
80
+ }
81
+ }
82
+ return false;
83
+ }
28
84
  // ─── Orchestrator ──────────────────────────────────────────────────────────
29
85
  export class SwarmOrchestrator {
30
86
  config;
@@ -47,10 +103,15 @@ export class SwarmOrchestrator {
47
103
  retries = 0;
48
104
  startTime = 0;
49
105
  modelUsage = new Map();
106
+ // Orchestrator's own LLM usage (separate from worker usage)
107
+ orchestratorTokens = 0;
108
+ orchestratorCost = 0;
109
+ orchestratorCalls = 0;
50
110
  // V2: Planning, review, verification, health, persistence
51
111
  plan;
52
112
  waveReviews = [];
53
113
  verificationResult;
114
+ artifactInventory;
54
115
  orchestratorDecisions = [];
55
116
  healthTracker;
56
117
  stateStore;
@@ -61,43 +122,111 @@ export class SwarmOrchestrator {
61
122
  static CIRCUIT_BREAKER_WINDOW_MS = 30_000;
62
123
  static CIRCUIT_BREAKER_THRESHOLD = 3;
63
124
  static CIRCUIT_BREAKER_PAUSE_MS = 15_000;
125
+ // P3: Per-model quality gate circuit breaker (replaces global circuit breaker)
126
+ perModelQualityRejections = new Map();
127
+ qualityGateDisabledModels = new Set();
128
+ static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 5;
129
+ // Hollow completion streak: early termination when single-model swarm produces only hollows
130
+ hollowStreak = 0;
131
+ static HOLLOW_STREAK_THRESHOLD = 3;
132
+ // V7: Global dispatch + hollow ratio tracking for multi-model termination
133
+ totalDispatches = 0;
134
+ totalHollows = 0;
135
+ // Hollow ratio warning (fired once, then suppressed to avoid log spam)
136
+ hollowRatioWarned = false;
137
+ // P7: Adaptive dispatch stagger — increases on rate limits, decreases on success
138
+ adaptiveStaggerMs = 0; // Initialized from config in constructor
139
+ // F25: Consecutive timeout tracking per task — early-fail after limit
140
+ taskTimeoutCounts = new Map();
141
+ // Original prompt for re-planning on resume
142
+ originalPrompt = '';
143
+ // Mid-swarm re-planning: only once per swarm execution
144
+ hasReplanned = false;
64
145
  constructor(config, provider, agentRegistry, spawnAgentFn, blackboard) {
65
146
  this.config = { ...DEFAULT_SWARM_CONFIG, ...config };
66
147
  this.provider = provider;
67
148
  this.blackboard = blackboard;
68
149
  this.spawnAgentFn = spawnAgentFn;
69
150
  this.healthTracker = new ModelHealthTracker();
151
+ this.adaptiveStaggerMs = this.getStaggerMs();
70
152
  this.taskQueue = createSwarmTaskQueue();
71
153
  this.budgetPool = createSwarmBudgetPool(this.config);
72
- this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool);
154
+ this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool, this.healthTracker);
73
155
  // Initialize state store if persistence enabled
74
156
  if (this.config.enablePersistence) {
75
157
  this.stateStore = new SwarmStateStore(this.config.stateDir ?? '.agent/swarm-state', this.config.resumeSessionId);
76
158
  }
77
159
  // C1: Build LLM decompose function with explicit JSON schema
78
160
  const llmDecompose = async (task, _context) => {
161
+ // V7: Dynamically build the allowed type list from built-in + user-defined types
162
+ const builtinTypes = ['research', 'analysis', 'design', 'implement', 'test', 'refactor', 'review', 'document', 'integrate', 'deploy', 'merge'];
163
+ const customTypes = Object.keys(this.config.taskTypes ?? {}).filter(t => !builtinTypes.includes(t));
164
+ const allTypes = [...builtinTypes, ...customTypes];
165
+ const typeListStr = allTypes.map(t => `"${t}"`).join(' | ');
166
+ // Build custom type descriptions so the LLM knows when to use them
167
+ let customTypeSection = '';
168
+ if (customTypes.length > 0) {
169
+ const descriptions = customTypes.map(t => {
170
+ const cfg = this.config.taskTypes[t];
171
+ const parts = [` - "${t}"`];
172
+ if (cfg.capability)
173
+ parts.push(`(capability: ${cfg.capability})`);
174
+ if (cfg.promptTemplate)
175
+ parts.push(`— uses ${cfg.promptTemplate} workflow`);
176
+ if (cfg.timeout)
177
+ parts.push(`— timeout: ${Math.round(cfg.timeout / 60000)}min`);
178
+ return parts.join(' ');
179
+ }).join('\n');
180
+ customTypeSection = `\n\nCustom task types available:\n${descriptions}\nUse these when their description matches the subtask's purpose.`;
181
+ }
79
182
  const systemPrompt = `You are a task decomposition expert. Break down the given task into well-defined subtasks with clear dependencies.
80
183
 
184
+ CRITICAL: Dependencies MUST use zero-based integer indices referring to other subtasks in the array.
185
+
81
186
  Respond with valid JSON matching this exact schema:
82
187
  {
83
188
  "subtasks": [
84
189
  {
85
190
  "description": "Clear description of what this subtask does",
86
- "type": "implement" | "research" | "analysis" | "design" | "test" | "refactor" | "review" | "document" | "integrate" | "deploy" | "merge",
191
+ "type": ${typeListStr},
87
192
  "complexity": 1-10,
88
- "dependencies": ["description of dependency task or index like '0'"],
193
+ "dependencies": [0, 1],
89
194
  "parallelizable": true | false,
90
195
  "relevantFiles": ["src/path/to/file.ts"]
91
196
  }
92
197
  ],
93
198
  "strategy": "sequential" | "parallel" | "hierarchical" | "adaptive" | "pipeline",
94
199
  "reasoning": "Brief explanation of why this decomposition was chosen"
200
+ }${customTypeSection}
201
+
202
+ EXAMPLE 1 — Research task (3 parallel research + 1 merge):
203
+ {
204
+ "subtasks": [
205
+ { "description": "Research React state management", "type": "research", "complexity": 3, "dependencies": [], "parallelizable": true },
206
+ { "description": "Research routing options", "type": "research", "complexity": 3, "dependencies": [], "parallelizable": true },
207
+ { "description": "Research testing frameworks", "type": "research", "complexity": 2, "dependencies": [], "parallelizable": true },
208
+ { "description": "Synthesize findings into recommendation", "type": "merge", "complexity": 4, "dependencies": [0, 1, 2], "parallelizable": false }
209
+ ],
210
+ "strategy": "parallel",
211
+ "reasoning": "Independent research tasks feed into a single merge"
212
+ }
213
+
214
+ EXAMPLE 2 — Implementation task (sequential chain):
215
+ {
216
+ "subtasks": [
217
+ { "description": "Design API schema", "type": "design", "complexity": 4, "dependencies": [], "parallelizable": false },
218
+ { "description": "Implement API endpoints", "type": "implement", "complexity": 6, "dependencies": [0], "parallelizable": false },
219
+ { "description": "Write integration tests", "type": "test", "complexity": 3, "dependencies": [1], "parallelizable": false }
220
+ ],
221
+ "strategy": "sequential",
222
+ "reasoning": "Each step depends on the previous"
95
223
  }
96
224
 
97
225
  Rules:
226
+ - Dependencies MUST be integer indices (e.g., [0, 1]), NOT descriptions or strings
98
227
  - Each subtask must have a clear, actionable description
99
- - Dependencies reference other subtask descriptions or zero-based indices
100
228
  - Mark subtasks as parallelizable: true if they don't depend on each other
229
+ - If there are multiple independent subtasks, ALWAYS create a final merge task that depends on ALL of them
101
230
  - Complexity 1-3: simple, 4-6: moderate, 7-10: complex
102
231
  - Return at least 2 subtasks for non-trivial tasks`;
103
232
  const response = await this.provider.chat([
@@ -108,6 +237,7 @@ Rules:
108
237
  maxTokens: 4000,
109
238
  temperature: 0.3,
110
239
  });
240
+ this.trackOrchestratorUsage(response, 'decompose');
111
241
  // Use parseDecompositionResponse which handles markdown code blocks and edge cases
112
242
  return parseDecompositionResponse(response.content);
113
243
  };
@@ -151,6 +281,25 @@ Rules:
151
281
  }
152
282
  }
153
283
  }
284
+ /**
285
+ * Track token usage from an orchestrator LLM call.
286
+ */
287
+ trackOrchestratorUsage(response, purpose) {
288
+ if (!response.usage)
289
+ return;
290
+ const tokens = response.usage.total_tokens ?? ((response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0));
291
+ const cost = tokens * 0.000015; // ~$15/M tokens average for orchestrator models
292
+ this.orchestratorTokens += tokens;
293
+ this.orchestratorCost += cost;
294
+ this.orchestratorCalls++;
295
+ this.emit({
296
+ type: 'swarm.orchestrator.llm',
297
+ model: this.config.orchestratorModel,
298
+ purpose,
299
+ tokens,
300
+ cost,
301
+ });
302
+ }
154
303
  /**
155
304
  * Execute the full swarm pipeline for a task.
156
305
  *
@@ -167,6 +316,7 @@ Rules:
167
316
  */
168
317
  async execute(task) {
169
318
  this.startTime = Date.now();
319
+ this.originalPrompt = task;
170
320
  try {
171
321
  // V2: Check for resume
172
322
  if (this.config.resumeSessionId && this.stateStore) {
@@ -174,19 +324,100 @@ Rules:
174
324
  }
175
325
  // Phase 1: Decompose
176
326
  this.currentPhase = 'decomposing';
177
- const decomposition = await this.decompose(task);
327
+ this.emit({ type: 'swarm.phase.progress', phase: 'decomposing', message: 'Decomposing task into subtasks...' });
328
+ let decomposition = await this.decompose(task);
178
329
  if (!decomposition) {
179
330
  this.currentPhase = 'failed';
180
331
  return this.buildErrorResult('Decomposition failed — task may be too simple for swarm mode');
181
332
  }
333
+ // F5: Validate decomposition — check for cycles, invalid deps, granularity
334
+ const validation = validateDecomposition(decomposition);
335
+ if (validation.warnings.length > 0) {
336
+ this.logDecision('decomposition-validation', `Warnings: ${validation.warnings.join('; ')}`, '');
337
+ }
338
+ if (!validation.valid) {
339
+ this.logDecision('decomposition-validation', `Invalid decomposition: ${validation.issues.join('; ')}`, 'Retrying...');
340
+ // Retry decomposition once with feedback
341
+ decomposition = await this.decompose(`${task}\n\nIMPORTANT: Previous decomposition was invalid: ${validation.issues.join('. ')}. Fix these issues.`);
342
+ if (!decomposition) {
343
+ this.currentPhase = 'failed';
344
+ return this.buildErrorResult(`Decomposition validation failed: ${validation.issues.join('; ')}`);
345
+ }
346
+ const retryValidation = validateDecomposition(decomposition);
347
+ if (!retryValidation.valid) {
348
+ this.logDecision('decomposition-validation', `Retry still invalid: ${retryValidation.issues.join('; ')}`, 'Proceeding anyway');
349
+ }
350
+ }
182
351
  // Phase 2: Schedule into waves
183
352
  this.currentPhase = 'scheduling';
353
+ this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduling ${decomposition.subtasks.length} subtasks into waves...` });
184
354
  this.taskQueue.loadFromDecomposition(decomposition, this.config);
355
+ // F3: Dynamic orchestrator reserve scaling based on subtask count.
356
+ // More subtasks = more quality gate calls, synthesis work, and review overhead.
357
+ // Formula: max(configured ratio, 5% per subtask), capped at 40%.
358
+ const subtaskCount = decomposition.subtasks.length;
359
+ const dynamicReserveRatio = Math.min(0.40, Math.max(this.config.orchestratorReserveRatio, subtaskCount * 0.05));
360
+ if (dynamicReserveRatio > this.config.orchestratorReserveRatio) {
361
+ this.logDecision('budget-scaling', `Scaled orchestrator reserve from ${(this.config.orchestratorReserveRatio * 100).toFixed(0)}% to ${(dynamicReserveRatio * 100).toFixed(0)}% for ${subtaskCount} subtasks`, '');
362
+ }
363
+ // Foundation task detection: tasks that are the sole dependency of 3+ downstream
364
+ // tasks are critical — if they fail, the entire swarm cascade-skips.
365
+ // Give them extra retries and timeout scaling.
366
+ this.detectFoundationTasks();
367
+ // D3/F1: Probe model capability before dispatch (default: true)
368
+ if (this.config.probeModels !== false) {
369
+ await this.probeModelCapability();
370
+ // F15/F23: Handle all-models-failed probe scenario
371
+ // Resolve strategy: explicit probeFailureStrategy > legacy ignoreProbeFailures > default 'warn-and-try'
372
+ const probeStrategy = this.config.probeFailureStrategy
373
+ ?? (this.config.ignoreProbeFailures ? 'warn-and-try' : 'warn-and-try');
374
+ const uniqueModels = [...new Set(this.config.workers.map(w => w.model))];
375
+ const healthyModels = this.healthTracker.getHealthy(uniqueModels);
376
+ if (healthyModels.length === 0 && uniqueModels.length > 0) {
377
+ if (probeStrategy === 'abort') {
378
+ // Hard abort — no tasks dispatched
379
+ const reason = `All ${uniqueModels.length} worker model(s) failed capability probes — no model can make tool calls. Aborting swarm to prevent budget waste. Fix model configuration and retry.`;
380
+ this.logDecision('probe-abort', reason, `Models tested: ${uniqueModels.join(', ')}`);
381
+ this.emit({ type: 'swarm.abort', reason });
382
+ this.skipRemainingTasks(reason);
383
+ const totalTasks = this.taskQueue.getStats().total;
384
+ const abortStats = {
385
+ completedTasks: 0, failedTasks: 0, skippedTasks: totalTasks,
386
+ totalTasks, totalWaves: 0, totalTokens: 0, totalCost: 0,
387
+ totalDurationMs: Date.now() - this.startTime,
388
+ qualityRejections: 0, retries: 0,
389
+ modelUsage: new Map(),
390
+ };
391
+ this.emit({ type: 'swarm.complete', stats: abortStats, errors: this.errors });
392
+ return {
393
+ success: false, summary: reason,
394
+ tasks: this.taskQueue.getAllTasks(), stats: abortStats, errors: this.errors,
395
+ };
396
+ }
397
+ else {
398
+ // F23: warn-and-try — log warning, reset health, let real tasks prove capability
399
+ this.logDecision('probe-warning', `All ${uniqueModels.length} model(s) failed probe — continuing anyway (strategy: warn-and-try)`, 'Will abort after first real task failure if model cannot use tools');
400
+ // Reset health so dispatch doesn't skip all models
401
+ for (const model of uniqueModels) {
402
+ this.healthTracker.recordSuccess(model, 0);
403
+ }
404
+ }
405
+ }
406
+ }
407
+ // Emit skip events when tasks are cascade-skipped due to dependency failures
408
+ this.taskQueue.setOnCascadeSkip((skippedTaskId, reason) => {
409
+ this.emit({ type: 'swarm.task.skipped', taskId: skippedTaskId, reason });
410
+ });
185
411
  const stats = this.taskQueue.getStats();
186
- // V2: Phase 2.5: Plan execution (acceptance criteria)
412
+ this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduled ${stats.total} tasks in ${this.taskQueue.getTotalWaves()} waves` });
413
+ // V2: Phase 2.5: Plan execution — fire in background, don't block waves
414
+ let planPromise;
187
415
  if (this.config.enablePlanning) {
188
416
  this.currentPhase = 'planning';
189
- await this.planExecution(task, decomposition);
417
+ this.emit({ type: 'swarm.phase.progress', phase: 'planning', message: 'Creating acceptance criteria...' });
418
+ planPromise = this.planExecution(task, decomposition).catch(err => {
419
+ this.logDecision('planning', 'Planning failed (non-fatal)', err.message);
420
+ });
190
421
  }
191
422
  this.emit({
192
423
  type: 'swarm.start',
@@ -204,9 +435,17 @@ Rules:
204
435
  type: 'swarm.tasks.loaded',
205
436
  tasks: this.taskQueue.getAllTasks(),
206
437
  });
207
- // Phase 3: Execute waves (V2: with review after each wave)
438
+ // Phase 3: Execute waves (planning runs concurrently)
208
439
  this.currentPhase = 'executing';
209
440
  await this.executeWaves();
441
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
442
+ if (!this.cancelled)
443
+ await this.finalRescuePass();
444
+ // Ensure planning completed before verification/synthesis
445
+ if (planPromise)
446
+ await planPromise;
447
+ // Post-wave artifact audit: scan filesystem for files created by workers
448
+ this.artifactInventory = this.buildArtifactInventory();
210
449
  // V2: Phase 3.5: Verify integration
211
450
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
212
451
  this.currentPhase = 'verifying';
@@ -222,10 +461,14 @@ Rules:
222
461
  const executionStats = this.buildStats();
223
462
  // V2: Final checkpoint
224
463
  this.checkpoint('final');
225
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
464
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
465
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
226
466
  return {
227
467
  success: executionStats.completedTasks > 0,
468
+ partialSuccess: !executionStats.completedTasks && hasArtifacts,
469
+ partialFailure: executionStats.failedTasks > 0,
228
470
  synthesisResult: synthesisResult ?? undefined,
471
+ artifactInventory: this.artifactInventory,
229
472
  summary: this.buildSummary(executionStats),
230
473
  tasks: this.taskQueue.getAllTasks(),
231
474
  stats: executionStats,
@@ -257,6 +500,16 @@ Rules:
257
500
  // Too simple for swarm mode
258
501
  return null;
259
502
  }
503
+ // Reject heuristic fallback — the generic 3-task chain is worse than aborting
504
+ if (!result.metadata.llmAssisted) {
505
+ this.logDecision('decomposition', 'Rejected heuristic fallback DAG', 'LLM decomposition failed after retries. Heuristic DAG is not useful.');
506
+ return null;
507
+ }
508
+ // Flat-DAG detection: warn when all tasks land in wave 0 with no dependencies
509
+ const hasAnyDependency = result.subtasks.some(s => s.dependencies.length > 0);
510
+ if (!hasAnyDependency && result.subtasks.length >= 3) {
511
+ this.logDecision('decomposition', `Flat DAG: ${result.subtasks.length} tasks, zero dependencies`, 'All tasks will execute in wave 0 without ordering');
512
+ }
260
513
  return result;
261
514
  }
262
515
  catch (error) {
@@ -315,6 +568,7 @@ Respond with valid JSON:
315
568
  maxTokens: 3000,
316
569
  temperature: 0.3,
317
570
  });
571
+ this.trackOrchestratorUsage(response, 'plan');
318
572
  const parsed = this.parseJSON(response.content);
319
573
  if (parsed) {
320
574
  this.plan = {
@@ -386,6 +640,7 @@ Respond with valid JSON:
386
640
  },
387
641
  { role: 'user', content: `Review these wave ${waveIndex + 1} outputs:\n\n${taskSummaries}` },
388
642
  ], { model: reviewModel, maxTokens: 2000, temperature: 0.3 });
643
+ this.trackOrchestratorUsage(response, 'review');
389
644
  const parsed = this.parseJSON(response.content);
390
645
  if (!parsed)
391
646
  return null;
@@ -412,6 +667,11 @@ Respond with valid JSON:
412
667
  }
413
668
  if (fixupTasks.length > 0) {
414
669
  this.taskQueue.addFixupTasks(fixupTasks);
670
+ // V5: Re-emit full task list so dashboard picks up fixup tasks + edges
671
+ this.emit({
672
+ type: 'swarm.tasks.loaded',
673
+ tasks: this.taskQueue.getAllTasks(),
674
+ });
415
675
  }
416
676
  }
417
677
  const result = {
@@ -505,6 +765,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
505
765
  },
506
766
  { role: 'user', content: `Original task: ${task}\n\nFailed verifications:\n${failedSteps}` },
507
767
  ], { model: this.config.plannerModel ?? this.config.orchestratorModel, maxTokens: 1500, temperature: 0.3 });
768
+ this.trackOrchestratorUsage(response, 'verification-fixup');
508
769
  const parsed = this.parseJSON(response.content);
509
770
  if (parsed?.fixups && parsed.fixups.length > 0) {
510
771
  const fixupTasks = parsed.fixups.map((f, i) => ({
@@ -520,6 +781,11 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
520
781
  fixInstructions: f.description,
521
782
  }));
522
783
  this.taskQueue.addFixupTasks(fixupTasks);
784
+ // V5: Re-emit full task list so dashboard picks up verification fixup tasks
785
+ this.emit({
786
+ type: 'swarm.tasks.loaded',
787
+ tasks: this.taskQueue.getAllTasks(),
788
+ });
523
789
  // Execute fix-up wave
524
790
  this.currentPhase = 'executing';
525
791
  await this.executeWave(fixupTasks);
@@ -550,6 +816,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
550
816
  this.logDecision('resume', `Resuming from wave ${checkpoint.currentWave}`, `Session: ${checkpoint.sessionId}`);
551
817
  this.emit({ type: 'swarm.state.resume', sessionId: checkpoint.sessionId, fromWave: checkpoint.currentWave });
552
818
  // Restore state
819
+ if (checkpoint.originalPrompt)
820
+ this.originalPrompt = checkpoint.originalPrompt;
553
821
  if (checkpoint.plan)
554
822
  this.plan = checkpoint.plan;
555
823
  if (checkpoint.modelHealth.length > 0)
@@ -566,9 +834,61 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
566
834
  waves: checkpoint.waves,
567
835
  currentWave: checkpoint.currentWave,
568
836
  });
837
+ // Reset orphaned dispatched tasks — their workers died with the previous process
838
+ let resetCount = 0;
839
+ for (const task of this.taskQueue.getAllTasks()) {
840
+ if (task.status === 'dispatched') {
841
+ task.status = 'ready';
842
+ // Preserve at least 1 retry attempt
843
+ task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
844
+ resetCount++;
845
+ }
846
+ }
847
+ if (resetCount > 0) {
848
+ this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
849
+ }
850
+ // Reset skipped tasks whose dependencies are now satisfied
851
+ let unskippedCount = 0;
852
+ for (const task of this.taskQueue.getAllTasks()) {
853
+ if (task.status === 'skipped') {
854
+ const deps = task.dependencies.map(id => this.taskQueue.getTask(id));
855
+ const allDepsSatisfied = deps.every(d => d && (d.status === 'completed' || d.status === 'decomposed'));
856
+ if (allDepsSatisfied) {
857
+ task.status = 'ready';
858
+ task.attempts = 0;
859
+ task.rescueContext = 'Recovered on resume — dependencies now satisfied';
860
+ unskippedCount++;
861
+ }
862
+ }
863
+ }
864
+ // Also reset failed tasks that have retry budget
865
+ for (const task of this.taskQueue.getAllTasks()) {
866
+ if (task.status === 'failed') {
867
+ task.status = 'ready';
868
+ task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
869
+ unskippedCount++;
870
+ }
871
+ }
872
+ if (unskippedCount > 0) {
873
+ this.logDecision('resume', `Recovered ${unskippedCount} skipped/failed tasks`, 'Fresh retry on resume');
874
+ }
875
+ // If many tasks are still stuck after un-skip, trigger re-plan
876
+ const resumeStats = this.taskQueue.getStats();
877
+ const stuckCount = resumeStats.failed + resumeStats.skipped;
878
+ const totalAttempted = resumeStats.completed + stuckCount;
879
+ if (totalAttempted > 0 && stuckCount / totalAttempted > 0.4) {
880
+ this.logDecision('resume-replan', `${stuckCount}/${totalAttempted} tasks still stuck after resume — triggering re-plan`, '');
881
+ this.hasReplanned = false; // Allow re-plan on resume
882
+ await this.midSwarmReplan();
883
+ }
569
884
  // Continue from where we left off
570
885
  this.currentPhase = 'executing';
571
886
  await this.executeWaves();
887
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
888
+ if (!this.cancelled)
889
+ await this.finalRescuePass();
890
+ // Post-wave artifact audit
891
+ this.artifactInventory = this.buildArtifactInventory();
572
892
  // Continue with verification and synthesis as normal
573
893
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
574
894
  this.currentPhase = 'verifying';
@@ -582,10 +902,14 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
582
902
  this.currentPhase = 'completed';
583
903
  const executionStats = this.buildStats();
584
904
  this.checkpoint('final');
585
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
905
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
906
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
586
907
  return {
587
908
  success: executionStats.completedTasks > 0,
909
+ partialSuccess: !executionStats.completedTasks && hasArtifacts,
910
+ partialFailure: executionStats.failedTasks > 0,
588
911
  synthesisResult: synthesisResult ?? undefined,
912
+ artifactInventory: this.artifactInventory,
589
913
  summary: this.buildSummary(executionStats),
590
914
  tasks: this.taskQueue.getAllTasks(),
591
915
  stats: executionStats,
@@ -602,6 +926,13 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
602
926
  while (waveIndex < totalWaves && !this.cancelled) {
603
927
  const readyTasks = this.taskQueue.getReadyTasks();
604
928
  const queueStats = this.taskQueue.getStats();
929
+ // F18: Skip empty waves — if no tasks are ready and none are running,
930
+ // remaining tasks are all blocked/failed/skipped. Break instead of
931
+ // running useless review cycles.
932
+ if (readyTasks.length === 0 && queueStats.running === 0 && queueStats.ready === 0) {
933
+ this.logDecision('wave-skip', `Skipping waves ${waveIndex + 1}-${totalWaves}: no dispatchable tasks remain`, `Stats: ${queueStats.completed} completed, ${queueStats.failed} failed, ${queueStats.skipped} skipped`);
934
+ break;
935
+ }
605
936
  this.emit({
606
937
  type: 'swarm.wave.start',
607
938
  wave: waveIndex + 1,
@@ -623,12 +954,75 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
623
954
  failed: waveFailed,
624
955
  skipped: waveSkipped,
625
956
  });
957
+ // Wave failure recovery: if ALL tasks in a wave failed, retry with adapted context
958
+ if (waveCompleted === 0 && waveFailed > 0 && readyTasks.length > 0) {
959
+ this.emit({ type: 'swarm.wave.allFailed', wave: waveIndex + 1 });
960
+ this.logDecision('wave-recovery', `Entire wave ${waveIndex + 1} failed (${waveFailed} tasks)`, 'Checking if budget allows retry with adapted strategy');
961
+ // Re-queue failed tasks with retry context if budget allows
962
+ const budgetRemaining = this.budgetPool.hasCapacity();
963
+ const failedWaveTasks = readyTasks.filter(t => {
964
+ const task = this.taskQueue.getTask(t.id);
965
+ return task && task.status === 'failed' && task.attempts < (this.config.workerRetries + 1);
966
+ });
967
+ if (budgetRemaining && failedWaveTasks.length > 0) {
968
+ for (const t of failedWaveTasks) {
969
+ const task = this.taskQueue.getTask(t.id);
970
+ if (!task)
971
+ continue;
972
+ task.status = 'ready';
973
+ task.retryContext = {
974
+ previousFeedback: 'All tasks in this batch failed. Try a fundamentally different approach — the previous strategy did not work.',
975
+ previousScore: 0,
976
+ attempt: task.attempts,
977
+ previousModel: task.assignedModel,
978
+ swarmProgress: this.getSwarmProgressSummary(),
979
+ };
980
+ }
981
+ this.logDecision('wave-recovery', `Re-queued ${failedWaveTasks.length} tasks with adapted retry context`, 'Budget allows retry');
982
+ // Re-execute the wave with adapted tasks
983
+ await this.executeWave(failedWaveTasks.map(t => this.taskQueue.getTask(t.id)).filter(t => t.status === 'ready'));
984
+ }
985
+ }
986
+ // F5: Adaptive re-decomposition — if < 50% of wave tasks succeeded,
987
+ // the decomposition may be structurally flawed. Log for observability.
988
+ // (Full re-decomposition of remaining work would require re-architecting the queue,
989
+ // so we log the signal and let wave retry + fixup handle recovery.)
990
+ const waveTotal = waveCompleted + waveFailed + waveSkipped;
991
+ const waveSuccessRate = waveTotal > 0 ? waveCompleted / waveTotal : 0;
992
+ if (waveSuccessRate < 0.5 && waveTotal >= 2) {
993
+ this.logDecision('decomposition-quality', `Wave ${waveIndex + 1} success rate ${(waveSuccessRate * 100).toFixed(0)}% (${waveCompleted}/${waveTotal})`, 'Low success rate may indicate decomposition quality issues');
994
+ }
626
995
  // V2: Review wave outputs
627
996
  const review = await this.reviewWave(waveIndex);
628
997
  if (review && review.fixupTasks.length > 0) {
629
998
  // Execute fix-up tasks immediately
630
999
  await this.executeWave(review.fixupTasks);
631
1000
  }
1001
+ // Rescue cascade-skipped tasks that can still run
1002
+ // (after wave review + fixup, some skipped tasks may now be viable)
1003
+ const rescued = this.rescueCascadeSkipped();
1004
+ if (rescued.length > 0) {
1005
+ this.logDecision('cascade-rescue', `Rescued ${rescued.length} cascade-skipped tasks after wave ${waveIndex + 1}`, rescued.map(t => t.id).join(', '));
1006
+ await this.executeWave(rescued);
1007
+ }
1008
+ // Reset quality circuit breaker at wave boundary — each wave gets a fresh chance.
1009
+ // Within a wave, rejections accumulate properly so the breaker can trip.
1010
+ // Between waves, we reset so each wave gets a fresh quality evaluation window.
1011
+ // (The within-wave reset at quality-gate-passed is kept — that's correct.)
1012
+ if (this.qualityGateDisabledModels.size > 0) {
1013
+ this.qualityGateDisabledModels.clear();
1014
+ this.perModelQualityRejections.clear();
1015
+ this.logDecision('quality-circuit-breaker', `Re-enabled quality gates for all models at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
1016
+ }
1017
+ // F3: Log budget reallocation after wave completion.
1018
+ // SharedBudgetPool already returns unused tokens via release(), but we log it
1019
+ // for observability so operators can see how budget flows between waves.
1020
+ const budgetStats = this.budgetPool.getStats();
1021
+ this.logDecision('budget-reallocation', `After wave ${waveIndex + 1}: ${budgetStats.tokensRemaining} tokens remaining (${(budgetStats.utilization * 100).toFixed(0)}% utilized)`, '');
1022
+ this.budgetPool.reallocateUnused(budgetStats.tokensRemaining);
1023
+ // F21: Mid-swarm situational assessment — evaluate success rate and budget health,
1024
+ // optionally triage low-priority tasks to conserve budget for critical path.
1025
+ await this.assessAndAdapt(waveIndex);
632
1026
  // V2: Checkpoint after each wave
633
1027
  this.checkpoint(`wave-${waveIndex}`);
634
1028
  // Advance to next wave
@@ -656,7 +1050,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
656
1050
  taskIndex++;
657
1051
  // Stagger dispatches to avoid rate limit storms
658
1052
  if (taskIndex < tasks.length && this.workerPool.availableSlots > 0) {
659
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1053
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
660
1054
  }
661
1055
  }
662
1056
  // Process completions and dispatch more tasks as slots open
@@ -677,7 +1071,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
677
1071
  await this.dispatchTask(task);
678
1072
  // Stagger dispatches to avoid rate limit storms
679
1073
  if (taskIndex + 1 < tasks.length && this.workerPool.availableSlots > 0) {
680
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1074
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
681
1075
  }
682
1076
  }
683
1077
  taskIndex++;
@@ -692,57 +1086,152 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
692
1086
  await this.dispatchTask(moreReady[i]);
693
1087
  // Stagger dispatches to avoid rate limit storms
694
1088
  if (i + 1 < moreReady.length && this.workerPool.availableSlots > 0) {
695
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1089
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
1090
+ }
1091
+ }
1092
+ }
1093
+ }
1094
+ // F20: Re-dispatch pass — after all workers finish, budget may have been freed
1095
+ // by completed tasks. Try to dispatch any still-ready tasks (e.g., those paused
1096
+ // by budget exhaustion earlier).
1097
+ if (!this.cancelled && this.budgetPool.hasCapacity()) {
1098
+ const stillReady = this.taskQueue.getAllReadyTasks()
1099
+ .filter(t => !this.workerPool.getActiveWorkerStatus().some(w => w.taskId === t.id));
1100
+ if (stillReady.length > 0) {
1101
+ this.logDecision('budget-redispatch', `Budget freed after wave — re-dispatching ${stillReady.length} ready task(s)`, `Budget: ${JSON.stringify(this.budgetPool.getStats())}`);
1102
+ for (const task of stillReady) {
1103
+ if (this.workerPool.availableSlots <= 0 || !this.budgetPool.hasCapacity())
1104
+ break;
1105
+ await this.dispatchTask(task);
1106
+ if (this.workerPool.availableSlots > 0) {
1107
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
696
1108
  }
697
1109
  }
1110
+ // Wait for these re-dispatched tasks to complete
1111
+ while (this.workerPool.activeCount > 0 && !this.cancelled) {
1112
+ const completed = await this.workerPool.waitForAny();
1113
+ if (!completed)
1114
+ break;
1115
+ await this.handleTaskCompletion(completed.taskId, completed.result, completed.startedAt);
1116
+ this.emitBudgetUpdate();
1117
+ this.emitStatusUpdate();
1118
+ }
698
1119
  }
699
1120
  }
700
1121
  }
701
1122
  /**
702
1123
  * Dispatch a single task to a worker.
1124
+ * Selects the worker once and passes it through to avoid double-selection.
703
1125
  */
704
1126
  async dispatchTask(task) {
705
1127
  const worker = this.workerPool.selectWorker(task);
706
1128
  if (!worker) {
707
1129
  // M2: Emit error and mark task failed instead of silently returning
708
- this.taskQueue.markFailed(task.id, 0);
1130
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1131
+ this.logDecision('no-worker', `${task.id}: no worker for type ${task.type}`, '');
1132
+ if (task.attempts > 0) {
1133
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1134
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1135
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1136
+ return;
1137
+ }
1138
+ }
1139
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1140
+ this.taskQueue.triggerCascadeSkip(task.id);
709
1141
  this.emit({
710
1142
  type: 'swarm.task.failed',
711
1143
  taskId: task.id,
712
1144
  error: `No worker available for task type: ${task.type}`,
713
- attempt: 0,
1145
+ attempt: task.attempts,
714
1146
  maxAttempts: 0,
715
1147
  willRetry: false,
1148
+ failureMode: 'error',
716
1149
  });
717
1150
  return;
718
1151
  }
719
1152
  try {
720
- this.taskQueue.markDispatched(task.id, worker.model);
721
- await this.workerPool.dispatch(task);
1153
+ // Pre-dispatch auto-split for critical-path bottlenecks
1154
+ if (this.shouldAutoSplit(task)) {
1155
+ try {
1156
+ const splitResult = await this.judgeSplit(task);
1157
+ if (splitResult.shouldSplit && splitResult.subtasks) {
1158
+ task.status = 'dispatched'; // Required for replaceWithSubtasks
1159
+ this.taskQueue.replaceWithSubtasks(task.id, splitResult.subtasks);
1160
+ this.emit({
1161
+ type: 'swarm.task.resilience',
1162
+ taskId: task.id,
1163
+ strategy: 'auto-split',
1164
+ succeeded: true,
1165
+ reason: `Pre-dispatch split into ${splitResult.subtasks.length} parallel subtasks`,
1166
+ artifactsFound: 0,
1167
+ toolCalls: 0,
1168
+ });
1169
+ return; // Subtasks now in queue, will be dispatched this wave
1170
+ }
1171
+ }
1172
+ catch (err) {
1173
+ this.logDecision('auto-split', `${task.id}: split judge failed — ${err.message}`, '');
1174
+ // Fall through to normal dispatch
1175
+ }
1176
+ }
1177
+ this.totalDispatches++;
1178
+ const dispatchedModel = task.assignedModel ?? worker.model;
1179
+ this.taskQueue.markDispatched(task.id, dispatchedModel);
1180
+ if (task.assignedModel && task.assignedModel !== worker.model) {
1181
+ this.logDecision('failover', `Dispatching ${task.id} with failover model ${task.assignedModel} (worker default: ${worker.model})`, 'Retry model override is active');
1182
+ }
1183
+ // Pass the pre-selected worker to avoid double-selection in dispatch()
1184
+ await this.workerPool.dispatch(task, worker);
722
1185
  this.emit({
723
1186
  type: 'swarm.task.dispatched',
724
1187
  taskId: task.id,
725
1188
  description: task.description,
726
- model: worker.model,
1189
+ model: dispatchedModel,
727
1190
  workerName: worker.name,
1191
+ toolCount: worker.allowedTools?.length ?? -1, // -1 = all tools
1192
+ tools: worker.allowedTools,
1193
+ retryContext: task.retryContext,
1194
+ fromModel: task.retryContext ? task.retryContext.previousModel : undefined,
1195
+ attempts: task.attempts,
728
1196
  });
729
1197
  }
730
1198
  catch (error) {
1199
+ const errorMsg = error.message;
1200
+ // F20: Budget exhaustion is NOT a task failure — the task is fine, we just ran out of money.
1201
+ // Reset status to ready so it can be picked up if budget becomes available
1202
+ // (e.g., after tokens are released from completing tasks).
1203
+ if (errorMsg.includes('Budget pool exhausted')) {
1204
+ task.status = 'ready';
1205
+ this.logDecision('budget-pause', `Cannot dispatch ${task.id}: budget exhausted — task kept ready for potential re-dispatch`, `Budget stats: ${JSON.stringify(this.budgetPool.getStats())}`);
1206
+ return;
1207
+ }
731
1208
  this.errors.push({
732
1209
  taskId: task.id,
733
1210
  phase: 'dispatch',
734
- message: error.message,
1211
+ message: errorMsg,
735
1212
  recovered: false,
736
1213
  });
1214
+ this.logDecision('dispatch-error', `${task.id}: dispatch failed: ${errorMsg.slice(0, 100)}`, `attempts: ${task.attempts}`);
1215
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1216
+ if (task.attempts > 0) {
1217
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1218
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1219
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1220
+ this.errors[this.errors.length - 1].recovered = true;
1221
+ return;
1222
+ }
1223
+ }
1224
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1225
+ this.taskQueue.triggerCascadeSkip(task.id);
737
1226
  this.emit({
738
1227
  type: 'swarm.task.failed',
739
1228
  taskId: task.id,
740
- error: error.message,
1229
+ error: errorMsg,
741
1230
  attempt: task.attempts,
742
1231
  maxAttempts: 1 + this.config.workerRetries,
743
1232
  willRetry: false,
1233
+ failureMode: 'error',
744
1234
  });
745
- this.taskQueue.markFailed(task.id, 0);
746
1235
  }
747
1236
  }
748
1237
  /**
@@ -752,6 +1241,36 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
752
1241
  const task = this.taskQueue.getTask(taskId);
753
1242
  if (!task)
754
1243
  return;
1244
+ // Guard: task was terminally resolved while its worker was running — ignore the result
1245
+ // F4: But NOT if pendingCascadeSkip — those results are evaluated below
1246
+ if ((task.status === 'skipped' || task.status === 'failed') && !task.pendingCascadeSkip)
1247
+ return;
1248
+ // V7: Global dispatch cap — prevent any single task from burning budget.
1249
+ // Try resilience recovery (micro-decompose, degraded acceptance) before hard-failing.
1250
+ const maxDispatches = this.config.maxDispatchesPerTask ?? 5;
1251
+ if (task.attempts >= maxDispatches) {
1252
+ const durationMs = Date.now() - startedAt;
1253
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
1254
+ this.totalTokens += taskResult.tokensUsed;
1255
+ this.totalCost += taskResult.costUsed;
1256
+ // Try resilience recovery before hard fail
1257
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1258
+ return;
1259
+ }
1260
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1261
+ this.taskQueue.triggerCascadeSkip(taskId);
1262
+ this.emit({
1263
+ type: 'swarm.task.failed',
1264
+ taskId,
1265
+ error: `Dispatch cap reached (${maxDispatches} attempts)`,
1266
+ attempt: task.attempts,
1267
+ maxAttempts: maxDispatches,
1268
+ willRetry: false,
1269
+ failureMode: task.failureMode,
1270
+ });
1271
+ this.logDecision('dispatch-cap', `${taskId}: hard cap reached (${task.attempts}/${maxDispatches})`, 'No more retries — resilience recovery also failed');
1272
+ return;
1273
+ }
755
1274
  const durationMs = Date.now() - startedAt;
756
1275
  const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
757
1276
  // Track model usage
@@ -763,21 +1282,94 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
763
1282
  this.modelUsage.set(model, usage);
764
1283
  this.totalTokens += taskResult.tokensUsed;
765
1284
  this.totalCost += taskResult.costUsed;
1285
+ // V10: Emit per-attempt event for full decision traceability
1286
+ this.emit({
1287
+ type: 'swarm.task.attempt',
1288
+ taskId,
1289
+ attempt: task.attempts,
1290
+ model,
1291
+ success: spawnResult.success,
1292
+ durationMs,
1293
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
1294
+ failureMode: !spawnResult.success ? task.failureMode : undefined,
1295
+ qualityScore: taskResult.qualityScore,
1296
+ output: taskResult.output.slice(0, 500),
1297
+ });
766
1298
  if (!spawnResult.success) {
767
1299
  // V2: Record model health
768
1300
  const errorMsg = spawnResult.output.toLowerCase();
769
1301
  const is429 = errorMsg.includes('429') || errorMsg.includes('rate');
770
1302
  const is402 = errorMsg.includes('402') || errorMsg.includes('spend limit');
771
- const errorType = is429 ? '429' : is402 ? '402' : 'error';
1303
+ const isTimeout = spawnResult.metrics.toolCalls === -1;
1304
+ // F25: Use 'timeout' errorType for timeouts (was 'error')
1305
+ const errorType = is429 ? '429' : is402 ? '402' : isTimeout ? 'timeout' : 'error';
772
1306
  this.healthTracker.recordFailure(model, errorType);
773
1307
  this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1308
+ // P6: Tag failure mode for cascade threshold awareness
1309
+ task.failureMode = (is429 || is402) ? 'rate-limit' : (spawnResult.metrics.toolCalls === -1 ? 'timeout' : 'error');
774
1310
  // Feed circuit breaker
775
1311
  if (is429 || is402) {
776
1312
  this.recordRateLimit();
777
1313
  }
1314
+ // F25a: Consecutive timeout tracking — early-fail after N consecutive timeouts
1315
+ if (isTimeout) {
1316
+ const count = (this.taskTimeoutCounts.get(taskId) ?? 0) + 1;
1317
+ this.taskTimeoutCounts.set(taskId, count);
1318
+ const timeoutLimit = this.config.consecutiveTimeoutLimit ?? 3;
1319
+ this.logDecision('timeout-tracking', `${taskId}: consecutive timeout ${count}/${timeoutLimit}`, '');
1320
+ if (count >= timeoutLimit) {
1321
+ // F25b: Try model failover before giving up
1322
+ let failoverSucceeded = false;
1323
+ if (this.config.enableModelFailover) {
1324
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1325
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1326
+ if (alternative) {
1327
+ this.emit({
1328
+ type: 'swarm.model.failover',
1329
+ taskId,
1330
+ fromModel: model,
1331
+ toModel: alternative.model,
1332
+ reason: 'consecutive-timeouts',
1333
+ });
1334
+ task.assignedModel = alternative.model;
1335
+ this.taskTimeoutCounts.set(taskId, 0); // Reset counter for new model
1336
+ this.logDecision('failover', `Timeout failover ${taskId}: ${model} → ${alternative.model}`, `${count} consecutive timeouts`);
1337
+ failoverSucceeded = true;
1338
+ }
1339
+ }
1340
+ if (!failoverSucceeded) {
1341
+ // No alternative model — try resilience recovery before hard fail.
1342
+ // Timeouts often produce artifacts (worker WAS working, just ran out of time).
1343
+ task.failureMode = 'timeout';
1344
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, Date.now() - startedAt);
1345
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1346
+ this.taskTimeoutCounts.delete(taskId);
1347
+ return;
1348
+ }
1349
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1350
+ this.taskQueue.triggerCascadeSkip(taskId);
1351
+ this.emit({
1352
+ type: 'swarm.task.failed',
1353
+ taskId,
1354
+ error: `${count} consecutive timeouts — no alternative model available`,
1355
+ attempt: task.attempts,
1356
+ maxAttempts: maxDispatches,
1357
+ willRetry: false,
1358
+ failureMode: 'timeout',
1359
+ });
1360
+ this.logDecision('timeout-early-fail', `${taskId}: ${count} consecutive timeouts, no alt model — resilience recovery also failed`, '');
1361
+ this.taskTimeoutCounts.delete(taskId);
1362
+ return;
1363
+ }
1364
+ }
1365
+ }
1366
+ else {
1367
+ // Non-timeout failure — reset the counter
1368
+ this.taskTimeoutCounts.delete(taskId);
1369
+ }
778
1370
  // V2: Model failover on rate limits
779
1371
  if ((is429 || is402) && this.config.enableModelFailover) {
780
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1372
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
781
1373
  const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
782
1374
  if (alternative) {
783
1375
  this.emit({
@@ -791,11 +1383,30 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
791
1383
  this.logDecision('failover', `Switched ${taskId} from ${model} to ${alternative.model}`, `${errorType} error`);
792
1384
  }
793
1385
  }
794
- // Worker failed use higher retry limit for rate limit errors
1386
+ // V5/V7: Store error context so retry gets different prompt
1387
+ if (!(is429 || is402)) {
1388
+ // V7: Timeout-specific feedback — the worker WAS working, just ran out of time
1389
+ const timeoutSeconds = isTimeout ? Math.round(durationMs / 1000) : 0;
1390
+ task.retryContext = {
1391
+ previousFeedback: isTimeout
1392
+ ? `Previous attempt timed out after ${timeoutSeconds}s. You must complete this task more efficiently — work faster, use fewer tool calls, and produce your result sooner.`
1393
+ : spawnResult.output.slice(0, 2000),
1394
+ previousScore: 0,
1395
+ attempt: task.attempts,
1396
+ previousModel: model,
1397
+ previousFiles: taskResult.filesModified,
1398
+ swarmProgress: this.getSwarmProgressSummary(),
1399
+ };
1400
+ }
1401
+ // V7: Reset hollow streak on non-hollow failure (error is not a hollow completion)
1402
+ this.hollowStreak = 0;
1403
+ // Worker failed — use higher retry limit for rate limit errors.
1404
+ // V7: Fixup tasks get capped retries, foundation tasks get +1.
1405
+ const baseRetries = this.getEffectiveRetries(task);
795
1406
  const retryLimit = (is429 || is402)
796
- ? (this.config.rateLimitRetries ?? 3)
797
- : this.config.workerRetries;
798
- const canRetry = this.taskQueue.markFailed(taskId, retryLimit);
1407
+ ? Math.min(this.config.rateLimitRetries ?? 3, baseRetries + 1)
1408
+ : baseRetries;
1409
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, retryLimit);
799
1410
  if (canRetry) {
800
1411
  this.retries++;
801
1412
  // Non-blocking cooldown: set retryAfter timestamp instead of blocking
@@ -803,8 +1414,21 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
803
1414
  const baseDelay = this.config.retryBaseDelayMs ?? 5000;
804
1415
  const cooldownMs = Math.min(baseDelay * Math.pow(2, task.attempts - 1), 30000);
805
1416
  this.taskQueue.setRetryAfter(taskId, cooldownMs);
1417
+ this.logDecision('rate-limit-cooldown', `${taskId}: ${errorType} cooldown ${cooldownMs}ms, model ${model}`, '');
806
1418
  }
807
1419
  }
1420
+ else if (!(is429 || is402)) {
1421
+ // Resilience recovery for non-rate-limit errors (micro-decompose + degraded acceptance)
1422
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1423
+ return;
1424
+ }
1425
+ // Recovery failed — NOW trigger cascade
1426
+ this.taskQueue.triggerCascadeSkip(taskId);
1427
+ }
1428
+ else {
1429
+ // Rate-limit exhaustion — trigger cascade
1430
+ this.taskQueue.triggerCascadeSkip(taskId);
1431
+ }
808
1432
  this.emit({
809
1433
  type: 'swarm.task.failed',
810
1434
  taskId,
@@ -812,17 +1436,153 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
812
1436
  attempt: task.attempts,
813
1437
  maxAttempts: 1 + this.config.workerRetries,
814
1438
  willRetry: canRetry,
1439
+ toolCalls: spawnResult.metrics.toolCalls,
1440
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1441
+ failureMode: task.failureMode,
1442
+ });
1443
+ return;
1444
+ }
1445
+ // V6: Hollow completion detection — workers that "succeed" without doing any work
1446
+ // Must check BEFORE recording success, otherwise hollow completions inflate health scores
1447
+ if (isHollowCompletion(spawnResult, task.type, this.config)) {
1448
+ // F4: Hollow result + pendingCascadeSkip — honor the skip immediately, no retry
1449
+ if (task.pendingCascadeSkip) {
1450
+ task.pendingCascadeSkip = undefined;
1451
+ task.status = 'skipped';
1452
+ this.totalHollows++;
1453
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (hollow completion)`, '');
1454
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: 'cascade skip honored — hollow completion' });
1455
+ return;
1456
+ }
1457
+ // P6: Tag failure mode for cascade threshold awareness
1458
+ task.failureMode = 'hollow';
1459
+ // Record hollow completion so hollow-prone models accumulate hollow-specific records
1460
+ // and get deprioritized by the model selector (also records generic failure internally)
1461
+ this.healthTracker.recordHollow(model);
1462
+ const admitsFailure = spawnResult.success && FAILURE_INDICATORS.some(f => (spawnResult.output ?? '').toLowerCase().includes(f));
1463
+ task.retryContext = {
1464
+ previousFeedback: admitsFailure
1465
+ ? 'Previous attempt reported success but admitted failure (e.g., "budget exhausted", "unable to complete"). You MUST execute tool calls and produce concrete output this time.'
1466
+ : 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
1467
+ previousScore: 1,
1468
+ attempt: task.attempts,
1469
+ previousModel: model,
1470
+ previousFiles: taskResult.filesModified,
1471
+ swarmProgress: this.getSwarmProgressSummary(),
1472
+ };
1473
+ // Model failover for hollow completions — same pattern as quality failover
1474
+ if (this.config.enableModelFailover) {
1475
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1476
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1477
+ if (alternative) {
1478
+ this.emit({
1479
+ type: 'swarm.model.failover',
1480
+ taskId,
1481
+ fromModel: model,
1482
+ toModel: alternative.model,
1483
+ reason: 'hollow-completion',
1484
+ });
1485
+ task.assignedModel = alternative.model;
1486
+ this.logDecision('failover', `Hollow failover ${taskId}: ${model} → ${alternative.model}`, 'Model produced hollow completion');
1487
+ }
1488
+ }
1489
+ const hollowRetries = this.getEffectiveRetries(task);
1490
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, hollowRetries);
1491
+ if (canRetry) {
1492
+ this.retries++;
1493
+ }
1494
+ else {
1495
+ // Retries exhausted — try shared resilience recovery (micro-decompose, degraded acceptance)
1496
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1497
+ return;
1498
+ }
1499
+ // Recovery failed — NOW trigger cascade
1500
+ this.taskQueue.triggerCascadeSkip(taskId);
1501
+ }
1502
+ this.emit({
1503
+ type: 'swarm.task.failed',
1504
+ taskId,
1505
+ error: 'Hollow completion: worker used no tools',
1506
+ attempt: task.attempts,
1507
+ maxAttempts: 1 + this.config.workerRetries,
1508
+ willRetry: canRetry,
1509
+ toolCalls: spawnResult.metrics.toolCalls,
1510
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1511
+ failureMode: 'hollow',
815
1512
  });
1513
+ this.hollowStreak++;
1514
+ this.totalHollows++;
1515
+ this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls (streak: ${this.hollowStreak}, total hollows: ${this.totalHollows}/${this.totalDispatches})`, canRetry ? 'Marking as failed for retry' : 'Retries exhausted — hard fail');
1516
+ // B2: Hollow streak handling — only terminate if enableHollowTermination is explicitly on
1517
+ if (this.hollowStreak >= SwarmOrchestrator.HOLLOW_STREAK_THRESHOLD) {
1518
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
1519
+ const singleModel = uniqueModels.size === 1;
1520
+ const onlyModel = [...uniqueModels][0];
1521
+ const modelUnhealthy = singleModel && !this.healthTracker.getAllRecords().find(r => r.model === onlyModel)?.healthy;
1522
+ if (singleModel && modelUnhealthy) {
1523
+ if (this.config.enableHollowTermination) {
1524
+ this.logDecision('early-termination', `Terminating swarm: ${this.hollowStreak} consecutive hollow completions on sole model ${onlyModel}`, 'Single-model swarm with unhealthy model — enableHollowTermination is on');
1525
+ this.skipRemainingTasks(`Single-model hollow streak (${this.hollowStreak}x on ${onlyModel})`);
1526
+ }
1527
+ else {
1528
+ this.logDecision('stall-mode', `${this.hollowStreak} consecutive hollows on sole model ${onlyModel} — entering stall mode`, 'Will attempt model failover or simplified retry on next dispatch');
1529
+ // Reset streak to allow more attempts with adjusted strategy
1530
+ this.hollowStreak = 0;
1531
+ }
1532
+ }
1533
+ }
1534
+ // V7: Multi-model hollow ratio — warn but don't terminate unless opt-in
1535
+ const minDispatches = this.config.hollowTerminationMinDispatches ?? 8;
1536
+ const threshold = this.config.hollowTerminationRatio ?? 0.55;
1537
+ if (this.totalDispatches >= minDispatches) {
1538
+ const ratio = this.totalHollows / this.totalDispatches;
1539
+ if (ratio > threshold) {
1540
+ if (this.config.enableHollowTermination) {
1541
+ this.logDecision('early-termination', `Terminating swarm: hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, `Exceeds threshold ${(threshold * 100).toFixed(0)}% after ${minDispatches}+ dispatches — enableHollowTermination is on`);
1542
+ this.skipRemainingTasks(`Hollow ratio ${(ratio * 100).toFixed(0)}% — models cannot execute tasks`);
1543
+ }
1544
+ else if (!this.hollowRatioWarned) {
1545
+ this.hollowRatioWarned = true;
1546
+ this.logDecision('stall-warning', `Hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, 'High hollow rate but continuing — tasks may still recover via resilience');
1547
+ }
1548
+ }
1549
+ }
816
1550
  return;
817
1551
  }
818
- // V2: Record model health on success
1552
+ // F4: Task had pendingCascadeSkip but produced non-hollow results.
1553
+ // Run pre-flight checks — if the output is good, accept it instead of skipping.
1554
+ if (task.pendingCascadeSkip) {
1555
+ const cachedReport = checkArtifacts(task);
1556
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedReport);
1557
+ if (preFlight && !preFlight.passed) {
1558
+ // Output is garbage — honor the cascade skip
1559
+ task.pendingCascadeSkip = undefined;
1560
+ task.status = 'skipped';
1561
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (pre-flight failed: ${preFlight.feedback})`, '');
1562
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: `cascade skip honored — output failed pre-flight: ${preFlight.feedback}` });
1563
+ return;
1564
+ }
1565
+ // Output is good — clear the flag and accept the result
1566
+ task.pendingCascadeSkip = undefined;
1567
+ task.status = 'dispatched'; // Reset so markCompleted works
1568
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip overridden — worker produced valid output`, '');
1569
+ }
1570
+ // Record model health on success (only for non-hollow completions)
819
1571
  this.healthTracker.recordSuccess(model, durationMs);
820
- // Run quality gate if enabled — skip under API pressure or on retried tasks
1572
+ this.decreaseStagger(); // P7: Speed up on success
1573
+ // Run quality gate if enabled — skip under API pressure, skip if circuit breaker tripped,
1574
+ // and let the final attempt through without quality gate (so tasks produce *something*)
1575
+ // Foundation tasks get +1 retry to reduce cascade failure risk.
1576
+ const effectiveRetries = this.getEffectiveRetries(task);
821
1577
  const recentRLCount = this.recentRateLimits.filter(t => t > Date.now() - 30_000).length;
1578
+ const isLastAttempt = task.attempts >= (effectiveRetries + 1);
822
1579
  const shouldRunQualityGate = this.config.qualityGates
823
- && task.attempts <= 1
1580
+ && !this.qualityGateDisabledModels.has(model)
1581
+ && !isLastAttempt
824
1582
  && Date.now() >= this.circuitBreakerUntil
825
1583
  && recentRLCount < 2;
1584
+ // C1: Pre-compute artifact report once — shared by quality gate and pre-flight checks
1585
+ const cachedArtifactReport = checkArtifacts(task);
826
1586
  if (shouldRunQualityGate) {
827
1587
  // V3: Judge role handles quality gates
828
1588
  const judgeModel = this.config.hierarchy?.judge?.model
@@ -832,27 +1592,272 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
832
1592
  persona: this.config.hierarchy?.judge?.persona,
833
1593
  };
834
1594
  this.emit({ type: 'swarm.role.action', role: 'judge', action: 'quality-gate', model: judgeModel, taskId });
835
- const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig);
1595
+ // Extract file artifacts from worker output for quality gate visibility.
1596
+ // When workers create files via write_file/edit_file, the judge needs to see
1597
+ // the actual content — not just the worker's text claims about what was created.
1598
+ const fileArtifacts = this.extractFileArtifacts(task, taskResult);
1599
+ // Foundation tasks get a relaxed quality threshold (threshold - 1, min 2)
1600
+ // to reduce the chance of cascade-skipping the entire swarm.
1601
+ const baseThreshold = this.config.qualityThreshold ?? 3;
1602
+ const qualityThreshold = task.isFoundation ? Math.max(2, baseThreshold - 1) : baseThreshold;
1603
+ const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, qualityThreshold, (resp, purpose) => this.trackOrchestratorUsage(resp, purpose), fileArtifacts, this.config, cachedArtifactReport);
836
1604
  taskResult.qualityScore = quality.score;
837
1605
  taskResult.qualityFeedback = quality.feedback;
1606
+ // F11: Foundation tasks that barely pass the relaxed threshold get concrete validation.
1607
+ // A 2/5 foundation task with truncated output will cascade-poison all dependents.
1608
+ if (quality.passed && task.isFoundation && quality.score <= baseThreshold - 1) {
1609
+ const concreteResult = runConcreteChecks(task, taskResult);
1610
+ if (!concreteResult.passed) {
1611
+ quality.passed = false;
1612
+ quality.feedback += ` [F11: foundation task barely passed (${quality.score}/${baseThreshold}) but concrete validation failed: ${concreteResult.issues.join('; ')}]`;
1613
+ this.logDecision('foundation-concrete-gate', `${taskId}: foundation task scored ${quality.score} (relaxed threshold ${qualityThreshold}) but concrete checks failed — rejecting`, concreteResult.issues.join('; '));
1614
+ }
1615
+ }
838
1616
  if (!quality.passed) {
1617
+ // F7: Gate error fallback — when LLM judge fails, use concrete validation
1618
+ // If concrete checks pass, tentatively accept the result instead of rejecting.
1619
+ if (quality.gateError && (this.config.enableConcreteValidation !== false)) {
1620
+ const concreteResult = runConcreteChecks(task, taskResult);
1621
+ if (concreteResult.passed) {
1622
+ // Concrete validation passed — tentatively accept despite gate error
1623
+ this.logDecision('gate-error-fallback', `${taskId}: gate error but concrete checks passed — tentatively accepting`, quality.gateErrorMessage ?? 'unknown');
1624
+ taskResult.qualityScore = quality.score;
1625
+ taskResult.qualityFeedback = `${quality.feedback} [concrete validation passed — tentative accept]`;
1626
+ // Fall through to success path (don't return)
1627
+ }
1628
+ else {
1629
+ // Both gate and concrete failed — reject
1630
+ this.logDecision('gate-error-fallback', `${taskId}: gate error AND concrete checks failed — rejecting`, `Concrete issues: ${concreteResult.issues.join('; ')}`);
1631
+ // Fall through to normal rejection below
1632
+ }
1633
+ // If concrete passed, skip the rejection path
1634
+ if (concreteResult.passed) {
1635
+ this.perModelQualityRejections.delete(model);
1636
+ // Jump to success path below
1637
+ }
1638
+ else {
1639
+ // Proceed with normal rejection
1640
+ this.qualityRejections++;
1641
+ task.failureMode = 'quality';
1642
+ this.healthTracker.recordQualityRejection(model, quality.score);
1643
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1644
+ this.hollowStreak = 0;
1645
+ task.retryContext = {
1646
+ previousFeedback: `Gate error + concrete validation failed: ${concreteResult.issues.join('; ')}`,
1647
+ previousScore: quality.score,
1648
+ attempt: task.attempts,
1649
+ previousModel: model,
1650
+ previousFiles: taskResult.filesModified,
1651
+ swarmProgress: this.getSwarmProgressSummary(),
1652
+ };
1653
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1654
+ if (canRetry) {
1655
+ this.retries++;
1656
+ }
1657
+ else {
1658
+ // Retries exhausted — try resilience recovery before cascade-skip
1659
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1660
+ return;
1661
+ }
1662
+ // Recovery failed — NOW trigger cascade
1663
+ this.taskQueue.triggerCascadeSkip(taskId);
1664
+ }
1665
+ this.emit({
1666
+ type: 'swarm.quality.rejected',
1667
+ taskId,
1668
+ score: quality.score,
1669
+ feedback: quality.feedback,
1670
+ artifactCount: fileArtifacts.length,
1671
+ outputLength: taskResult.output.length,
1672
+ preFlightReject: false,
1673
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1674
+ });
1675
+ return;
1676
+ }
1677
+ }
1678
+ else if (!quality.gateError) {
1679
+ // Normal quality rejection (LLM judge rejected, no gate error)
1680
+ this.qualityRejections++;
1681
+ // P6: Tag failure mode for cascade threshold awareness
1682
+ task.failureMode = 'quality';
1683
+ // P1: Quality rejections update model health — undo premature recordSuccess
1684
+ this.healthTracker.recordQualityRejection(model, quality.score);
1685
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
1686
+ // V7: Quality rejection is NOT hollow — worker did work, just poorly
1687
+ this.hollowStreak = 0;
1688
+ // F7: Per-model circuit breaker → "pre-flight only mode" instead of fully disabling gates.
1689
+ // After threshold rejections, skip LLM judge but keep pre-flight mandatory.
1690
+ if (!quality.preFlightReject) {
1691
+ const modelRejections = (this.perModelQualityRejections.get(model) ?? 0) + 1;
1692
+ this.perModelQualityRejections.set(model, modelRejections);
1693
+ if (modelRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
1694
+ this.qualityGateDisabledModels.add(model);
1695
+ this.logDecision('quality-circuit-breaker', `Switched model ${model} to pre-flight-only mode after ${modelRejections} rejections`, 'Skipping LLM judge but keeping pre-flight checks mandatory');
1696
+ }
1697
+ }
1698
+ // V5: Attach feedback so retry prompt includes it
1699
+ task.retryContext = {
1700
+ previousFeedback: quality.feedback,
1701
+ previousScore: quality.score,
1702
+ attempt: task.attempts,
1703
+ previousModel: model,
1704
+ previousFiles: taskResult.filesModified,
1705
+ swarmProgress: this.getSwarmProgressSummary(),
1706
+ };
1707
+ // V5: Model failover on quality rejection — but NOT on artifact auto-fails
1708
+ // P1: Widened from score<=1 to score<threshold so failover triggers on any rejection
1709
+ if (quality.score < qualityThreshold && this.config.enableModelFailover && !quality.artifactAutoFail) {
1710
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1711
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1712
+ if (alternative) {
1713
+ this.emit({
1714
+ type: 'swarm.model.failover',
1715
+ taskId,
1716
+ fromModel: model,
1717
+ toModel: alternative.model,
1718
+ reason: `quality-score-${quality.score}`,
1719
+ });
1720
+ task.assignedModel = alternative.model;
1721
+ this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
1722
+ }
1723
+ }
1724
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1725
+ if (canRetry) {
1726
+ this.retries++;
1727
+ }
1728
+ else {
1729
+ // Retries exhausted — try resilience recovery before cascade-skip
1730
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1731
+ return;
1732
+ }
1733
+ // Recovery failed — NOW trigger cascade
1734
+ this.taskQueue.triggerCascadeSkip(taskId);
1735
+ }
1736
+ // M1: Only emit quality.rejected (not duplicate task.failed)
1737
+ this.emit({
1738
+ type: 'swarm.quality.rejected',
1739
+ taskId,
1740
+ score: quality.score,
1741
+ feedback: quality.feedback,
1742
+ artifactCount: fileArtifacts.length,
1743
+ outputLength: taskResult.output.length,
1744
+ preFlightReject: quality.preFlightReject,
1745
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1746
+ });
1747
+ return;
1748
+ }
1749
+ else {
1750
+ // gateError=true but concrete validation disabled — reject
1751
+ this.qualityRejections++;
1752
+ task.failureMode = 'quality';
1753
+ this.hollowStreak = 0;
1754
+ task.retryContext = {
1755
+ previousFeedback: quality.feedback,
1756
+ previousScore: quality.score,
1757
+ attempt: task.attempts,
1758
+ previousModel: model,
1759
+ previousFiles: taskResult.filesModified,
1760
+ swarmProgress: this.getSwarmProgressSummary(),
1761
+ };
1762
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1763
+ if (canRetry) {
1764
+ this.retries++;
1765
+ }
1766
+ else {
1767
+ // Retries exhausted — try resilience recovery before cascade-skip
1768
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1769
+ return;
1770
+ }
1771
+ // Recovery failed — NOW trigger cascade
1772
+ this.taskQueue.triggerCascadeSkip(taskId);
1773
+ }
1774
+ this.emit({
1775
+ type: 'swarm.quality.rejected',
1776
+ taskId,
1777
+ score: quality.score,
1778
+ feedback: quality.feedback,
1779
+ artifactCount: fileArtifacts.length,
1780
+ outputLength: taskResult.output.length,
1781
+ preFlightReject: false,
1782
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1783
+ });
1784
+ return;
1785
+ }
1786
+ }
1787
+ // Quality passed — reset per-model rejection counter
1788
+ this.perModelQualityRejections.delete(model);
1789
+ }
1790
+ // F7: When quality gate was skipped (last attempt, pre-flight-only mode, API pressure),
1791
+ // still run pre-flight + concrete checks so obviously broken outputs don't slip through.
1792
+ // C1: Use cached artifact report to avoid double filesystem scan.
1793
+ if (!shouldRunQualityGate && this.config.qualityGates) {
1794
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedArtifactReport);
1795
+ if (preFlight && !preFlight.passed) {
1796
+ taskResult.qualityScore = preFlight.score;
1797
+ taskResult.qualityFeedback = preFlight.feedback;
839
1798
  this.qualityRejections++;
840
- const canRetry = this.taskQueue.markFailed(taskId, this.config.workerRetries);
1799
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
841
1800
  if (canRetry) {
842
1801
  this.retries++;
843
1802
  }
844
- // M1: Only emit quality.rejected (not duplicate task.failed)
1803
+ else {
1804
+ // Retries exhausted — try resilience recovery before cascade-skip
1805
+ this.logDecision('preflight-reject', `${taskId}: pre-flight failed: ${preFlight.feedback}`, '');
1806
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1807
+ return;
1808
+ }
1809
+ // Recovery failed — NOW trigger cascade
1810
+ this.taskQueue.triggerCascadeSkip(taskId);
1811
+ }
845
1812
  this.emit({
846
1813
  type: 'swarm.quality.rejected',
847
1814
  taskId,
848
- score: quality.score,
849
- feedback: quality.feedback,
1815
+ score: preFlight.score,
1816
+ feedback: preFlight.feedback,
1817
+ artifactCount: 0,
1818
+ outputLength: taskResult.output.length,
1819
+ preFlightReject: true,
850
1820
  });
851
1821
  return;
852
1822
  }
1823
+ // F2: Run concrete validation when pre-flight passes but gate was skipped
1824
+ if (this.config.enableConcreteValidation !== false) {
1825
+ const concreteResult = runConcreteChecks(task, taskResult);
1826
+ if (!concreteResult.passed) {
1827
+ taskResult.qualityScore = 2;
1828
+ taskResult.qualityFeedback = `Concrete validation failed: ${concreteResult.issues.join('; ')}`;
1829
+ this.qualityRejections++;
1830
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1831
+ if (canRetry) {
1832
+ this.retries++;
1833
+ }
1834
+ else {
1835
+ // Retries exhausted — try resilience recovery before cascade-skip
1836
+ this.logDecision('concrete-reject', `${taskId}: concrete validation failed: ${concreteResult.issues.join('; ')}`, '');
1837
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1838
+ return;
1839
+ }
1840
+ // Recovery failed — NOW trigger cascade
1841
+ this.taskQueue.triggerCascadeSkip(taskId);
1842
+ }
1843
+ this.emit({
1844
+ type: 'swarm.quality.rejected',
1845
+ taskId,
1846
+ score: 2,
1847
+ feedback: taskResult.qualityFeedback,
1848
+ artifactCount: 0,
1849
+ outputLength: taskResult.output.length,
1850
+ preFlightReject: false,
1851
+ });
1852
+ return;
1853
+ }
1854
+ }
853
1855
  }
854
1856
  // Task passed — mark completed
855
1857
  this.taskQueue.markCompleted(taskId, taskResult);
1858
+ this.hollowStreak = 0;
1859
+ // F25: Clear timeout counter on success
1860
+ this.taskTimeoutCounts.delete(taskId);
856
1861
  // H6: Post findings to blackboard with error handling
857
1862
  if (this.blackboard && taskResult.findings) {
858
1863
  try {
@@ -885,6 +1890,10 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
885
1890
  costUsed: taskResult.costUsed,
886
1891
  durationMs: taskResult.durationMs,
887
1892
  qualityScore: taskResult.qualityScore,
1893
+ qualityFeedback: taskResult.qualityFeedback,
1894
+ output: taskResult.output,
1895
+ closureReport: taskResult.closureReport,
1896
+ toolCalls: spawnResult.metrics.toolCalls,
888
1897
  });
889
1898
  }
890
1899
  /**
@@ -894,7 +1903,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
894
1903
  const tasks = this.taskQueue.getAllTasks();
895
1904
  const outputs = tasks
896
1905
  .filter(t => t.status === 'completed')
897
- .map(t => taskResultToAgentOutput(t))
1906
+ .map(t => taskResultToAgentOutput(t, this.config))
898
1907
  .filter((o) => o !== null);
899
1908
  if (outputs.length === 0)
900
1909
  return null;
@@ -924,11 +1933,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
924
1933
  activeWorkers: this.workerPool.getActiveWorkerStatus(),
925
1934
  queue: stats,
926
1935
  budget: {
927
- tokensUsed: this.totalTokens,
1936
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
928
1937
  tokensTotal: this.config.totalBudget,
929
- costUsed: this.totalCost,
1938
+ costUsed: this.totalCost + this.orchestratorCost,
930
1939
  costTotal: this.config.maxCost,
931
1940
  },
1941
+ orchestrator: {
1942
+ tokens: this.orchestratorTokens,
1943
+ cost: this.orchestratorCost,
1944
+ calls: this.orchestratorCalls,
1945
+ model: this.config.orchestratorModel,
1946
+ },
932
1947
  };
933
1948
  }
934
1949
  /**
@@ -940,6 +1955,69 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
940
1955
  this.currentPhase = 'failed';
941
1956
  await this.workerPool.cancelAll();
942
1957
  }
1958
+ // ─── D3: Model Capability Probing ─────────────────────────────────────
1959
+ /**
1960
+ * D3/F23: Probe each unique model to verify it can make tool calls.
1961
+ * Models that fail the probe are marked unhealthy so they're skipped in dispatch.
1962
+ *
1963
+ * F23 fix: Uses chatWithTools() with actual tool definitions instead of
1964
+ * plain chat() which never included tools in the API request.
1965
+ */
1966
+ async probeModelCapability() {
1967
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
1968
+ this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Probing ${uniqueModels.size} model(s) for tool-calling capability...` });
1969
+ // F23: Check if provider supports native tool calling
1970
+ const supportsTools = 'chatWithTools' in this.provider
1971
+ && typeof this.provider.chatWithTools === 'function';
1972
+ if (!supportsTools) {
1973
+ // Provider doesn't support chatWithTools — skip probe entirely.
1974
+ // Workers will rely on text-based tool parsing fallback.
1975
+ this.logDecision('model-probe', 'Provider does not support chatWithTools — skipping probe', '');
1976
+ return;
1977
+ }
1978
+ const providerWithTools = this.provider;
1979
+ const probeTools = [{
1980
+ type: 'function',
1981
+ function: {
1982
+ name: 'read_file',
1983
+ description: 'Read a file from disk',
1984
+ parameters: {
1985
+ type: 'object',
1986
+ properties: { path: { type: 'string', description: 'File path' } },
1987
+ required: ['path'],
1988
+ },
1989
+ },
1990
+ }];
1991
+ // F24: Configurable probe timeout — generous default for slow models/connections
1992
+ const probeTimeout = this.config.probeTimeoutMs ?? 60_000;
1993
+ for (const model of uniqueModels) {
1994
+ try {
1995
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Probe timeout (${probeTimeout}ms)`)), probeTimeout));
1996
+ const response = await Promise.race([
1997
+ providerWithTools.chatWithTools([
1998
+ { role: 'system', content: 'You are a test probe. Call the read_file tool with path "package.json".' },
1999
+ { role: 'user', content: 'Read package.json.' },
2000
+ ], { model, maxTokens: 200, temperature: 0, tools: probeTools, tool_choice: 'required' }),
2001
+ timeoutPromise,
2002
+ ]);
2003
+ const hasToolCall = (response.toolCalls?.length ?? 0) > 0;
2004
+ if (!hasToolCall) {
2005
+ // F19: Directly mark unhealthy — probe failure is definitive evidence
2006
+ this.healthTracker.markUnhealthy(model);
2007
+ this.logDecision('model-probe', `Model ${model} failed probe (no tool calls)`, 'Marked unhealthy');
2008
+ }
2009
+ else {
2010
+ this.healthTracker.recordSuccess(model, 0);
2011
+ this.logDecision('model-probe', `Model ${model} passed probe`, '');
2012
+ }
2013
+ }
2014
+ catch {
2015
+ // F19: Directly mark unhealthy on probe error (includes timeout)
2016
+ this.healthTracker.markUnhealthy(model);
2017
+ this.logDecision('model-probe', `Model ${model} probe errored`, 'Marked unhealthy');
2018
+ }
2019
+ }
2020
+ }
943
2021
  // ─── Circuit Breaker ────────────────────────────────────────────────
944
2022
  /**
945
2023
  * Record a rate limit hit and check if the circuit breaker should trip.
@@ -947,6 +2025,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
947
2025
  recordRateLimit() {
948
2026
  const now = Date.now();
949
2027
  this.recentRateLimits.push(now);
2028
+ this.increaseStagger(); // P7: Back off on rate limits
950
2029
  // Prune entries older than the window
951
2030
  const cutoff = now - SwarmOrchestrator.CIRCUIT_BREAKER_WINDOW_MS;
952
2031
  this.recentRateLimits = this.recentRateLimits.filter(t => t > cutoff);
@@ -974,6 +2053,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
974
2053
  }
975
2054
  return false;
976
2055
  }
2056
+ // ─── P7: Adaptive Stagger ────────────────────────────────────────────
2057
+ /** P7: Get current stagger delay (adapts based on rate limit / success signals). */
2058
+ getStaggerMs() {
2059
+ return this.adaptiveStaggerMs;
2060
+ }
2061
+ /** P7: Increase stagger on rate limit (×1.5, capped at 10s). */
2062
+ increaseStagger() {
2063
+ this.adaptiveStaggerMs = Math.min(this.adaptiveStaggerMs * 1.5, 10_000);
2064
+ }
2065
+ /** P7: Decrease stagger on success (×0.9, floor at 200ms). */
2066
+ decreaseStagger() {
2067
+ this.adaptiveStaggerMs = Math.max(this.adaptiveStaggerMs * 0.9, 200);
2068
+ }
977
2069
  // ─── V2: Decision Logging ─────────────────────────────────────────────
978
2070
  logDecision(phase, decision, reasoning) {
979
2071
  const entry = {
@@ -1000,14 +2092,15 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1000
2092
  waves: queueState.waves,
1001
2093
  currentWave: queueState.currentWave,
1002
2094
  stats: {
1003
- totalTokens: this.totalTokens,
1004
- totalCost: this.totalCost,
2095
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2096
+ totalCost: this.totalCost + this.orchestratorCost,
1005
2097
  qualityRejections: this.qualityRejections,
1006
2098
  retries: this.retries,
1007
2099
  },
1008
2100
  modelHealth: this.healthTracker.getAllRecords(),
1009
2101
  decisions: this.orchestratorDecisions,
1010
2102
  errors: this.errors,
2103
+ originalPrompt: this.originalPrompt,
1011
2104
  });
1012
2105
  this.emit({
1013
2106
  type: 'swarm.state.checkpoint',
@@ -1027,9 +2120,9 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1027
2120
  emitBudgetUpdate() {
1028
2121
  this.emit({
1029
2122
  type: 'swarm.budget.update',
1030
- tokensUsed: this.totalTokens,
2123
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
1031
2124
  tokensTotal: this.config.totalBudget,
1032
- costUsed: this.totalCost,
2125
+ costUsed: this.totalCost + this.orchestratorCost,
1033
2126
  costTotal: this.config.maxCost,
1034
2127
  });
1035
2128
  }
@@ -1044,8 +2137,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1044
2137
  failedTasks: queueStats.failed,
1045
2138
  skippedTasks: queueStats.skipped,
1046
2139
  totalWaves: this.taskQueue.getTotalWaves(),
1047
- totalTokens: this.totalTokens,
1048
- totalCost: this.totalCost,
2140
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2141
+ totalCost: this.totalCost + this.orchestratorCost,
1049
2142
  totalDurationMs: Date.now() - this.startTime,
1050
2143
  qualityRejections: this.qualityRejections,
1051
2144
  retries: this.retries,
@@ -1070,6 +2163,16 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1070
2163
  if (this.verificationResult) {
1071
2164
  parts.push(` Verification: ${this.verificationResult.passed ? 'PASSED' : 'FAILED'}`);
1072
2165
  }
2166
+ // Artifact inventory: show what files actually exist on disk regardless of task status
2167
+ if (this.artifactInventory && this.artifactInventory.totalFiles > 0) {
2168
+ parts.push(` Files on disk: ${this.artifactInventory.totalFiles} files (${(this.artifactInventory.totalBytes / 1024).toFixed(1)}KB)`);
2169
+ for (const f of this.artifactInventory.files.slice(0, 15)) {
2170
+ parts.push(` ${f.path}: ${f.sizeBytes}B`);
2171
+ }
2172
+ if (this.artifactInventory.files.length > 15) {
2173
+ parts.push(` ... and ${this.artifactInventory.files.length - 15} more`);
2174
+ }
2175
+ }
1073
2176
  return parts.join('\n');
1074
2177
  }
1075
2178
  buildErrorResult(message) {
@@ -1096,6 +2199,698 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1096
2199
  return null;
1097
2200
  }
1098
2201
  }
2202
+ /**
2203
+ * Detect foundation tasks: tasks that are a dependency of 2+ downstream tasks.
2204
+ * These are critical single-points-of-failure — mark them for extra resilience.
2205
+ */
2206
+ detectFoundationTasks() {
2207
+ const allTasks = this.taskQueue.getAllTasks();
2208
+ const dependentCounts = new Map();
2209
+ for (const task of allTasks) {
2210
+ for (const depId of task.dependencies) {
2211
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2212
+ }
2213
+ }
2214
+ for (const task of allTasks) {
2215
+ const dependentCount = dependentCounts.get(task.id) ?? 0;
2216
+ if (dependentCount >= 2) {
2217
+ task.isFoundation = true;
2218
+ this.logDecision('scheduling', `Foundation task: ${task.id} (${dependentCount} dependents)`, 'Extra retries and relaxed quality threshold applied');
2219
+ }
2220
+ }
2221
+ }
2222
+ /**
2223
+ * Extract file artifacts from a worker's output for quality gate visibility.
2224
+ * Reads actual file content from disk so the judge can verify real work,
2225
+ * not just text claims about what was created.
2226
+ */
2227
+ extractFileArtifacts(task, taskResult) {
2228
+ const artifacts = [];
2229
+ const seen = new Set();
2230
+ // Collect file paths from multiple sources
2231
+ const candidatePaths = [];
2232
+ // 1. filesModified from structured closure report
2233
+ if (taskResult.filesModified) {
2234
+ candidatePaths.push(...taskResult.filesModified);
2235
+ }
2236
+ // 2. targetFiles from task definition
2237
+ if (task.targetFiles) {
2238
+ candidatePaths.push(...task.targetFiles);
2239
+ }
2240
+ // 3. Extract file paths mentioned in worker output (e.g., "Created src/foo.ts")
2241
+ const filePathPattern = /(?:created|wrote|modified|edited|updated)\s+["`']?([^\s"`',]+\.\w+)/gi;
2242
+ let match;
2243
+ while ((match = filePathPattern.exec(taskResult.output)) !== null) {
2244
+ candidatePaths.push(match[1]);
2245
+ }
2246
+ // Resolve against the target project directory, not CWD
2247
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2248
+ // Read previews from disk
2249
+ for (const filePath of candidatePaths) {
2250
+ if (seen.has(filePath))
2251
+ continue;
2252
+ seen.add(filePath);
2253
+ try {
2254
+ const resolved = path.resolve(baseDir, filePath);
2255
+ if (fs.existsSync(resolved)) {
2256
+ const content = fs.readFileSync(resolved, 'utf-8');
2257
+ if (content.length > 0) {
2258
+ artifacts.push({ path: filePath, preview: content.slice(0, 2000) });
2259
+ }
2260
+ }
2261
+ }
2262
+ catch {
2263
+ // Skip unreadable files
2264
+ }
2265
+ // Limit to 10 files to keep prompt size reasonable
2266
+ if (artifacts.length >= 10)
2267
+ break;
2268
+ }
2269
+ return artifacts;
2270
+ }
2271
+ /**
2272
+ * Build an inventory of filesystem artifacts produced during swarm execution.
2273
+ * Scans all tasks' targetFiles and readFiles to check what actually exists on disk.
2274
+ * This reveals work done by workers even when tasks "failed" (timeout, quality gate, etc.).
2275
+ */
2276
+ buildArtifactInventory() {
2277
+ const allFiles = new Set();
2278
+ for (const task of this.taskQueue.getAllTasks()) {
2279
+ for (const f of (task.targetFiles ?? []))
2280
+ allFiles.add(f);
2281
+ for (const f of (task.readFiles ?? []))
2282
+ allFiles.add(f);
2283
+ }
2284
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2285
+ const artifacts = [];
2286
+ for (const filePath of allFiles) {
2287
+ try {
2288
+ const resolved = path.resolve(baseDir, filePath);
2289
+ if (fs.existsSync(resolved)) {
2290
+ const stats = fs.statSync(resolved);
2291
+ if (stats.isFile() && stats.size > 0) {
2292
+ artifacts.push({ path: filePath, sizeBytes: stats.size, exists: true });
2293
+ }
2294
+ }
2295
+ }
2296
+ catch { /* skip unreadable files */ }
2297
+ }
2298
+ return {
2299
+ files: artifacts,
2300
+ totalFiles: artifacts.length,
2301
+ totalBytes: artifacts.reduce((s, a) => s + a.sizeBytes, 0),
2302
+ };
2303
+ }
2304
+ /**
2305
+ * Skip all remaining pending/ready tasks (used for early termination).
2306
+ */
2307
+ skipRemainingTasks(reason) {
2308
+ for (const task of this.taskQueue.getAllTasks()) {
2309
+ if (task.status === 'pending' || task.status === 'ready') {
2310
+ task.status = 'skipped';
2311
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id, reason });
2312
+ }
2313
+ }
2314
+ }
2315
+ /**
2316
+ * F21: Mid-swarm situational assessment after each wave.
2317
+ * Evaluates success rate and budget health, triages low-priority tasks when budget is tight.
2318
+ * Also detects stalled progress and triggers mid-swarm re-planning.
2319
+ */
2320
+ async assessAndAdapt(waveIndex) {
2321
+ const stats = this.taskQueue.getStats();
2322
+ const budgetStats = this.budgetPool.getStats();
2323
+ // 1. Calculate success rate for this swarm run
2324
+ const successRate = stats.completed / Math.max(1, stats.completed + stats.failed + stats.skipped);
2325
+ // 2. Budget efficiency: tokens spent per completed task
2326
+ const tokensPerTask = stats.completed > 0
2327
+ ? (this.totalTokens / stats.completed)
2328
+ : Infinity;
2329
+ // 3. Remaining budget vs remaining tasks
2330
+ const remainingTasks = stats.total - stats.completed - stats.failed - stats.skipped;
2331
+ const estimatedTokensNeeded = remainingTasks * tokensPerTask;
2332
+ const budgetSufficient = budgetStats.tokensRemaining > estimatedTokensNeeded * 0.5;
2333
+ // Log the assessment for observability
2334
+ this.logDecision('mid-swarm-assessment', `After wave ${waveIndex + 1}: ${stats.completed}/${stats.total} completed (${(successRate * 100).toFixed(0)}%), ` +
2335
+ `${remainingTasks} remaining, ${budgetStats.tokensRemaining} tokens left`, budgetSufficient ? 'Budget looks sufficient' : 'Budget may be insufficient for remaining tasks');
2336
+ // 4. If budget is tight, prioritize: skip low-value remaining tasks
2337
+ // Only triage if we have actual data (at least one completion to estimate from)
2338
+ if (!budgetSufficient && remainingTasks > 1 && stats.completed > 0) {
2339
+ // Prefer pausing over skipping: if workers are still running, wait for budget release
2340
+ const runningCount = stats.running ?? 0;
2341
+ if (runningCount > 0) {
2342
+ this.logDecision('budget-wait', 'Budget tight but workers still running — waiting for budget release', `${runningCount} workers active, ${budgetStats.tokensRemaining} tokens remaining`);
2343
+ return;
2344
+ }
2345
+ const expendableTasks = this.findExpendableTasks();
2346
+ // Hard cap: never skip more than 20% of remaining tasks in one triage pass
2347
+ const maxSkips = Math.max(1, Math.floor(remainingTasks * 0.2));
2348
+ if (expendableTasks.length > 0) {
2349
+ let currentEstimate = estimatedTokensNeeded;
2350
+ let skipped = 0;
2351
+ for (const task of expendableTasks) {
2352
+ if (skipped >= maxSkips)
2353
+ break;
2354
+ // Stop trimming once we're within budget
2355
+ if (currentEstimate * 0.7 <= budgetStats.tokensRemaining)
2356
+ break;
2357
+ task.status = 'skipped';
2358
+ skipped++;
2359
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id,
2360
+ reason: 'Budget conservation: skipping low-priority task to protect critical path' });
2361
+ this.logDecision('budget-triage', `Skipping ${task.id} (${task.type}, complexity ${task.complexity}) to conserve budget`, `${remainingTasks} tasks remain, ${budgetStats.tokensRemaining} tokens`);
2362
+ currentEstimate -= tokensPerTask;
2363
+ }
2364
+ }
2365
+ }
2366
+ // 5. Stall detection: if progress ratio is too low, trigger re-plan
2367
+ const attemptedTasks = stats.completed + stats.failed + stats.skipped;
2368
+ if (attemptedTasks >= 5) {
2369
+ const progressRatio = stats.completed / Math.max(1, attemptedTasks);
2370
+ if (progressRatio < 0.4) {
2371
+ this.logDecision('stall-detected', `Progress stalled: ${stats.completed}/${attemptedTasks} tasks succeeded (${(progressRatio * 100).toFixed(0)}%)`, 'Triggering mid-swarm re-plan');
2372
+ this.emit({
2373
+ type: 'swarm.stall',
2374
+ progressRatio,
2375
+ attempted: attemptedTasks,
2376
+ completed: stats.completed,
2377
+ });
2378
+ await this.midSwarmReplan();
2379
+ }
2380
+ }
2381
+ }
2382
+ /**
2383
+ * F21: Find expendable tasks — leaf tasks (no dependents) with lowest complexity.
2384
+ * These are the safest to skip when budget is tight.
2385
+ * Only tasks with complexity <= 2 are considered expendable.
2386
+ */
2387
+ findExpendableTasks() {
2388
+ const allTasks = this.taskQueue.getAllTasks();
2389
+ // Build reverse dependency map: which tasks depend on each task?
2390
+ const dependentCounts = new Map();
2391
+ for (const task of allTasks) {
2392
+ for (const depId of task.dependencies) {
2393
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2394
+ }
2395
+ }
2396
+ // Expendable = pending/ready, never attempted, no dependents, not foundation,
2397
+ // complexity <= 2 (simple leaf tasks only), lowest complexity first
2398
+ return allTasks
2399
+ .filter(t => (t.status === 'pending' || t.status === 'ready') &&
2400
+ t.attempts === 0 &&
2401
+ !t.isFoundation &&
2402
+ (t.complexity ?? 5) <= 2 &&
2403
+ (dependentCounts.get(t.id) ?? 0) === 0)
2404
+ .sort((a, b) => (a.complexity ?? 5) - (b.complexity ?? 5));
2405
+ }
2406
+ /**
2407
+ * Mid-swarm re-planning: when progress stalls, ask LLM to re-plan remaining work.
2408
+ * Creates simpler replacement tasks for stuck/failed work, building on what's already done.
2409
+ * Only triggers once per swarm execution to avoid infinite re-planning loops.
2410
+ */
2411
+ async midSwarmReplan() {
2412
+ if (this.hasReplanned)
2413
+ return;
2414
+ this.hasReplanned = true;
2415
+ const allTasks = this.taskQueue.getAllTasks();
2416
+ const completed = allTasks.filter(t => t.status === 'completed' || t.status === 'decomposed');
2417
+ const stuck = allTasks.filter(t => t.status === 'failed' || t.status === 'skipped');
2418
+ if (stuck.length === 0)
2419
+ return;
2420
+ const completedSummary = completed.map(t => `- ${t.description} [${t.type}] → completed${t.degraded ? ' (degraded)' : ''}`).join('\n') || '(none)';
2421
+ const stuckSummary = stuck.map(t => `- ${t.description} [${t.type}] → ${t.status} (${t.failureMode ?? 'unknown'})`).join('\n');
2422
+ const artifactInventory = this.buildArtifactInventory();
2423
+ const artifactSummary = artifactInventory.files.map(f => `- ${f.path} (${f.sizeBytes}B)`).join('\n') || '(none)';
2424
+ const replanPrompt = `The swarm is stalled. Here's the situation:
2425
+
2426
+ COMPLETED WORK:
2427
+ ${completedSummary}
2428
+
2429
+ FILES ON DISK:
2430
+ ${artifactSummary}
2431
+
2432
+ STUCK TASKS (failed or skipped):
2433
+ ${stuckSummary}
2434
+
2435
+ Re-plan the remaining work. Create new subtasks that:
2436
+ 1. Build on what's already completed (don't redo work)
2437
+ 2. Are more focused in scope (but assign realistic complexity for the work involved — don't underestimate)
2438
+ 3. Can succeed independently (minimize dependencies)
2439
+
2440
+ Return JSON: { "subtasks": [{ "description": "...", "type": "implement|test|research|review|document|refactor", "complexity": 1-5, "dependencies": [], "relevantFiles": [] }] }
2441
+ Return ONLY the JSON object, no other text.`;
2442
+ try {
2443
+ const response = await this.provider.chat([{ role: 'user', content: replanPrompt }]);
2444
+ this.trackOrchestratorUsage(response, 'mid-swarm-replan');
2445
+ const content = response.content ?? '';
2446
+ const jsonMatch = content.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
2447
+ if (!jsonMatch) {
2448
+ this.logDecision('replan-failed', 'LLM produced no parseable re-plan JSON', content.slice(0, 200));
2449
+ return;
2450
+ }
2451
+ const parsed = JSON.parse(jsonMatch[0]);
2452
+ if (!parsed.subtasks || parsed.subtasks.length === 0) {
2453
+ this.logDecision('replan-failed', 'LLM produced empty subtask list', '');
2454
+ return;
2455
+ }
2456
+ // Add new tasks from re-plan into current wave
2457
+ const newTasks = this.taskQueue.addReplanTasks(parsed.subtasks, this.taskQueue.getCurrentWave());
2458
+ this.logDecision('replan-success', `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`, newTasks.map(t => t.description).join('; '));
2459
+ this.emit({
2460
+ type: 'swarm.replan',
2461
+ stuckCount: stuck.length,
2462
+ newTaskCount: newTasks.length,
2463
+ });
2464
+ this.emit({
2465
+ type: 'swarm.orchestrator.decision',
2466
+ decision: {
2467
+ timestamp: Date.now(),
2468
+ phase: 'replan',
2469
+ decision: `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`,
2470
+ reasoning: newTasks.map(t => `${t.id}: ${t.description}`).join('; '),
2471
+ },
2472
+ });
2473
+ }
2474
+ catch (error) {
2475
+ this.logDecision('replan-failed', `Re-plan LLM call failed: ${error.message}`, '');
2476
+ }
2477
+ }
2478
+ /**
2479
+ * Rescue cascade-skipped tasks that can still run.
2480
+ * After cascade-skip fires, assess whether skipped tasks can still be attempted:
2481
+ * - If all OTHER dependencies completed and the failed dep's artifacts exist on disk → un-skip
2482
+ * - If the task has no strict data dependency on the failed task (different file targets) → un-skip with warning
2483
+ */
2484
+ rescueCascadeSkipped(lenient = false) {
2485
+ const skippedTasks = this.taskQueue.getSkippedTasks();
2486
+ const rescued = [];
2487
+ for (const task of skippedTasks) {
2488
+ if (task.dependencies.length === 0)
2489
+ continue;
2490
+ let completedDeps = 0;
2491
+ let failedDepsWithArtifacts = 0;
2492
+ let failedDepsWithoutArtifacts = 0;
2493
+ let skippedDepsBlockedBySkipped = 0;
2494
+ let totalDeps = 0;
2495
+ const failedDepDescriptions = [];
2496
+ for (const depId of task.dependencies) {
2497
+ const dep = this.taskQueue.getTask(depId);
2498
+ if (!dep)
2499
+ continue;
2500
+ totalDeps++;
2501
+ if (dep.status === 'completed' || dep.status === 'decomposed') {
2502
+ completedDeps++;
2503
+ }
2504
+ else if (dep.status === 'failed' || dep.status === 'skipped') {
2505
+ // V10: In lenient mode, use checkArtifactsEnhanced for broader detection
2506
+ const artifactReport = lenient ? checkArtifactsEnhanced(dep) : checkArtifacts(dep);
2507
+ if (artifactReport && artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length > 0) {
2508
+ failedDepsWithArtifacts++;
2509
+ failedDepDescriptions.push(`${dep.description} (failed but ${artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length} artifacts exist)`);
2510
+ }
2511
+ else {
2512
+ // Check if this dep's target files exist on disk (may have been created by earlier attempt)
2513
+ const targetFiles = dep.targetFiles ?? [];
2514
+ const existingFiles = targetFiles.filter(f => {
2515
+ try {
2516
+ const resolved = path.resolve(this.config.facts?.workingDirectory ?? process.cwd(), f);
2517
+ return fs.statSync(resolved).size > 0;
2518
+ }
2519
+ catch {
2520
+ return false;
2521
+ }
2522
+ });
2523
+ if (existingFiles.length > 0) {
2524
+ failedDepsWithArtifacts++;
2525
+ failedDepDescriptions.push(`${dep.description} (failed but ${existingFiles.length}/${targetFiles.length} target files exist)`);
2526
+ }
2527
+ else {
2528
+ // Check if skipped task's targets don't overlap with the failed dep's targets
2529
+ const taskTargets = new Set(task.targetFiles ?? []);
2530
+ const depTargets = new Set(dep.targetFiles ?? []);
2531
+ const hasOverlap = [...taskTargets].some(f => depTargets.has(f));
2532
+ if (!hasOverlap && taskTargets.size > 0) {
2533
+ // Different file targets — task probably doesn't need the failed dep's output
2534
+ failedDepsWithArtifacts++;
2535
+ failedDepDescriptions.push(`${dep.description} (failed, no file overlap — likely independent)`);
2536
+ }
2537
+ else if (lenient && dep.status === 'skipped') {
2538
+ // V10: In lenient mode, count skipped-by-skipped deps separately
2539
+ // (transitive cascade — the dep itself was a victim, not truly broken)
2540
+ skippedDepsBlockedBySkipped++;
2541
+ failedDepDescriptions.push(`${dep.description} (skipped — transitive cascade victim)`);
2542
+ }
2543
+ else {
2544
+ failedDepsWithoutArtifacts++;
2545
+ }
2546
+ }
2547
+ }
2548
+ }
2549
+ }
2550
+ // Rescue condition:
2551
+ // Normal: all failed deps have artifacts or are independent, AND at least some deps completed
2552
+ // Lenient: tolerate up to 1 truly-missing dep, and count transitive cascade victims as recoverable
2553
+ const effectiveWithout = failedDepsWithoutArtifacts;
2554
+ const maxMissing = lenient ? 1 : 0;
2555
+ const hasEnoughContext = lenient ? (completedDeps + failedDepsWithArtifacts + skippedDepsBlockedBySkipped > 0) : (completedDeps > 0);
2556
+ if (totalDeps > 0 && effectiveWithout <= maxMissing && hasEnoughContext) {
2557
+ const rescueContext = `Rescued from cascade-skip${lenient ? ' (lenient)' : ''}: ${completedDeps}/${totalDeps} deps completed, ` +
2558
+ `${failedDepsWithArtifacts} failed deps have artifacts${skippedDepsBlockedBySkipped > 0 ? `, ${skippedDepsBlockedBySkipped} transitive cascade victims` : ''}. ${failedDepDescriptions.join('; ')}`;
2559
+ this.taskQueue.rescueTask(task.id, rescueContext);
2560
+ rescued.push(task);
2561
+ this.logDecision('cascade-rescue', `${task.id}: rescued from cascade-skip${lenient ? ' (lenient)' : ''}`, rescueContext);
2562
+ }
2563
+ }
2564
+ return rescued;
2565
+ }
2566
+ /**
2567
+ * Final rescue pass — runs after executeWaves() finishes.
2568
+ * Uses lenient mode to rescue cascade-skipped tasks that have partial context.
2569
+ * Re-dispatches rescued tasks in a final wave.
2570
+ */
2571
+ async finalRescuePass() {
2572
+ const skipped = this.taskQueue.getSkippedTasks();
2573
+ if (skipped.length === 0)
2574
+ return;
2575
+ this.logDecision('final-rescue', `${skipped.length} skipped tasks — running final rescue pass`, '');
2576
+ const rescued = this.rescueCascadeSkipped(true); // lenient=true
2577
+ if (rescued.length > 0) {
2578
+ this.logDecision('final-rescue', `Rescued ${rescued.length} tasks`, rescued.map(t => t.id).join(', '));
2579
+ await this.executeWave(rescued);
2580
+ }
2581
+ }
2582
+ /**
2583
+ * Try resilience recovery strategies before hard-failing a task.
2584
+ * Called from dispatch-cap, timeout, hollow, and error paths to avoid bypassing resilience.
2585
+ *
2586
+ * Strategies (in order):
2587
+ * 1. Micro-decomposition — break complex failing tasks into subtasks
2588
+ * 2. Degraded acceptance — accept partial work if artifacts exist on disk
2589
+ *
2590
+ * Returns true if recovery succeeded (caller should return), false if hard-fail should proceed.
2591
+ */
2592
+ async tryResilienceRecovery(task, taskId, taskResult, spawnResult) {
2593
+ // Strategy 1: Micro-decompose complex tasks into smaller subtasks
2594
+ // V10: Lowered threshold from >= 6 to >= 4 so moderately complex tasks can be recovered
2595
+ if ((task.complexity ?? 0) >= 4 && task.attempts >= 2 && this.budgetPool.hasCapacity()) {
2596
+ const subtasks = await this.microDecompose(task);
2597
+ if (subtasks && subtasks.length >= 2) {
2598
+ // Reset task status so replaceWithSubtasks can mark it as decomposed
2599
+ task.status = 'dispatched';
2600
+ this.taskQueue.replaceWithSubtasks(taskId, subtasks);
2601
+ this.logDecision('micro-decompose', `${taskId}: decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
2602
+ this.emit({
2603
+ type: 'swarm.task.failed',
2604
+ taskId,
2605
+ error: `Micro-decomposed into ${subtasks.length} subtasks`,
2606
+ attempt: task.attempts,
2607
+ maxAttempts: this.config.maxDispatchesPerTask ?? 5,
2608
+ willRetry: false,
2609
+ toolCalls: spawnResult.metrics.toolCalls,
2610
+ failureMode: task.failureMode,
2611
+ });
2612
+ this.emit({
2613
+ type: 'swarm.task.resilience',
2614
+ taskId,
2615
+ strategy: 'micro-decompose',
2616
+ succeeded: true,
2617
+ reason: `Decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`,
2618
+ artifactsFound: 0,
2619
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
2620
+ });
2621
+ return true;
2622
+ }
2623
+ // Micro-decompose was attempted but didn't produce usable subtasks
2624
+ if ((task.complexity ?? 0) < 4) {
2625
+ this.logDecision('resilience-skip', `${taskId}: skipped micro-decompose — complexity ${task.complexity} < 4`, '');
2626
+ }
2627
+ }
2628
+ // Strategy 2: Degraded acceptance — check if any attempt produced files on disk.
2629
+ // V10: Use checkArtifactsEnhanced for broader detection (filesModified, closureReport, output)
2630
+ const artifactReport = checkArtifactsEnhanced(task, taskResult);
2631
+ const existingArtifacts = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0);
2632
+ const hasArtifacts = existingArtifacts.length > 0;
2633
+ // V10: Fix timeout detection — toolCalls=-1 means timeout (worker WAS working)
2634
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
2635
+ const hadToolCalls = toolCalls > 0 || toolCalls === -1
2636
+ || (taskResult.filesModified && taskResult.filesModified.length > 0);
2637
+ if (hasArtifacts || hadToolCalls) {
2638
+ // Accept with degraded flag — prevents cascade-skip of dependents
2639
+ taskResult.success = true;
2640
+ taskResult.degraded = true;
2641
+ taskResult.qualityScore = 2; // Capped at low quality
2642
+ taskResult.qualityFeedback = 'Degraded acceptance: retries exhausted but filesystem artifacts exist';
2643
+ task.degraded = true;
2644
+ // Reset status so markCompleted works (markFailed may have set it to 'failed')
2645
+ task.status = 'dispatched';
2646
+ this.taskQueue.markCompleted(taskId, taskResult);
2647
+ this.hollowStreak = 0;
2648
+ this.logDecision('degraded-acceptance', `${taskId}: accepted as degraded — ${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`, 'Prevents cascade-skip of dependent tasks');
2649
+ this.emit({
2650
+ type: 'swarm.task.completed',
2651
+ taskId,
2652
+ success: true,
2653
+ tokensUsed: taskResult.tokensUsed,
2654
+ costUsed: taskResult.costUsed,
2655
+ durationMs: taskResult.durationMs,
2656
+ qualityScore: 2,
2657
+ qualityFeedback: 'Degraded acceptance',
2658
+ output: taskResult.output,
2659
+ toolCalls: spawnResult.metrics.toolCalls,
2660
+ });
2661
+ this.emit({
2662
+ type: 'swarm.task.resilience',
2663
+ taskId,
2664
+ strategy: 'degraded-acceptance',
2665
+ succeeded: true,
2666
+ reason: `${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`,
2667
+ artifactsFound: existingArtifacts.length,
2668
+ toolCalls,
2669
+ });
2670
+ return true;
2671
+ }
2672
+ // Both strategies failed — log exhaustion for traceability
2673
+ this.logDecision('resilience-exhausted', `${taskId}: no recovery — artifacts: ${existingArtifacts.length}, toolCalls: ${toolCalls}, filesModified: ${taskResult.filesModified?.length ?? 0}`, '');
2674
+ this.emit({
2675
+ type: 'swarm.task.resilience',
2676
+ taskId,
2677
+ strategy: 'none',
2678
+ succeeded: false,
2679
+ reason: `No artifacts found, toolCalls=${toolCalls}, filesModified=${taskResult.filesModified?.length ?? 0}`,
2680
+ artifactsFound: existingArtifacts.length,
2681
+ toolCalls,
2682
+ });
2683
+ return false;
2684
+ }
2685
+ /**
2686
+ * Micro-decompose a complex task into 2-3 smaller subtasks using the LLM.
2687
+ * Called when a complex task (complexity >= 6) fails 2+ times with the same failure mode.
2688
+ * Returns null if decomposition doesn't make sense or LLM can't produce valid subtasks.
2689
+ */
2690
+ async microDecompose(task) {
2691
+ if ((task.complexity ?? 0) < 4)
2692
+ return null;
2693
+ try {
2694
+ const prompt = `Task "${task.description}" failed ${task.attempts} times on model ${task.assignedModel ?? 'unknown'}.
2695
+ The task has complexity ${task.complexity}/10 and type "${task.type}".
2696
+ ${task.targetFiles?.length ? `Target files: ${task.targetFiles.join(', ')}` : ''}
2697
+
2698
+ Break this task into 2-3 smaller, independent subtasks that each handle a portion of the work.
2699
+ Each subtask MUST be simpler (complexity <= ${Math.ceil(task.complexity / 2)}).
2700
+ Each subtask should be self-contained and produce concrete file changes.
2701
+
2702
+ Return JSON ONLY (no markdown, no explanation):
2703
+ {
2704
+ "subtasks": [
2705
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number> }
2706
+ ]
2707
+ }`;
2708
+ const response = await this.provider.chat([
2709
+ { role: 'system', content: 'You are a task decomposition assistant. Return only valid JSON.' },
2710
+ { role: 'user', content: prompt },
2711
+ ], {
2712
+ model: this.config.orchestratorModel,
2713
+ maxTokens: 2000,
2714
+ temperature: 0.3,
2715
+ });
2716
+ this.trackOrchestratorUsage(response, 'micro-decompose');
2717
+ // Parse response — handle markdown code blocks
2718
+ let jsonStr = response.content.trim();
2719
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
2720
+ if (codeBlockMatch)
2721
+ jsonStr = codeBlockMatch[1].trim();
2722
+ const parsed = JSON.parse(jsonStr);
2723
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
2724
+ return null;
2725
+ }
2726
+ const subtasks = parsed.subtasks.map((sub, idx) => ({
2727
+ id: `${task.id}-sub${idx + 1}`,
2728
+ description: sub.description,
2729
+ type: sub.type ?? task.type,
2730
+ dependencies: [], // Will be set by replaceWithSubtasks
2731
+ status: 'ready',
2732
+ complexity: Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1),
2733
+ wave: task.wave,
2734
+ targetFiles: sub.targetFiles ?? [],
2735
+ readFiles: task.readFiles,
2736
+ attempts: 0,
2737
+ }));
2738
+ return subtasks;
2739
+ }
2740
+ catch (error) {
2741
+ this.logDecision('micro-decompose', `${task.id}: micro-decomposition failed — ${error.message}`, 'Falling through to normal failure path');
2742
+ return null;
2743
+ }
2744
+ }
2745
+ // ─── Pre-Dispatch Auto-Split ──────────────────────────────────────────────
2746
+ /**
2747
+ * Heuristic pre-filter: should this task be considered for auto-split?
2748
+ * Cheap check — no LLM call. Returns true if all conditions are met.
2749
+ */
2750
+ shouldAutoSplit(task) {
2751
+ const cfg = this.config.autoSplit;
2752
+ if (cfg?.enabled === false)
2753
+ return false;
2754
+ const floor = cfg?.complexityFloor ?? 6;
2755
+ const splittable = cfg?.splittableTypes ?? ['implement', 'refactor', 'test'];
2756
+ // Only first attempts — retries use micro-decompose
2757
+ if (task.attempts > 0)
2758
+ return false;
2759
+ // Complexity check
2760
+ if ((task.complexity ?? 0) < floor)
2761
+ return false;
2762
+ // Type check
2763
+ if (!splittable.includes(task.type))
2764
+ return false;
2765
+ // Must be on critical path (foundation task)
2766
+ if (!task.isFoundation)
2767
+ return false;
2768
+ // Budget capacity check
2769
+ if (!this.budgetPool.hasCapacity())
2770
+ return false;
2771
+ return true;
2772
+ }
2773
+ /**
2774
+ * LLM judge call: ask the orchestrator model whether and how to split a task.
2775
+ * Returns { shouldSplit: false } or { shouldSplit: true, subtasks: [...] }.
2776
+ */
2777
+ async judgeSplit(task) {
2778
+ const maxSubs = this.config.autoSplit?.maxSubtasks ?? 4;
2779
+ const prompt = `You are evaluating whether a task should be split into parallel subtasks before dispatch.
2780
+
2781
+ TASK: "${task.description}"
2782
+ TYPE: ${task.type}
2783
+ COMPLEXITY: ${task.complexity}/10
2784
+ TARGET FILES: ${task.targetFiles?.join(', ') || 'none specified'}
2785
+ DOWNSTREAM DEPENDENTS: This is a foundation task — other tasks are waiting on it.
2786
+
2787
+ Should this task be split into 2-${maxSubs} parallel subtasks that different workers can execute simultaneously?
2788
+
2789
+ SPLIT if:
2790
+ - The task involves multiple independent pieces of work (e.g., different files, different functions, different concerns)
2791
+ - Parallel execution would meaningfully reduce wall-clock time
2792
+ - The subtasks can produce useful output independently
2793
+
2794
+ DO NOT SPLIT if:
2795
+ - The work is conceptually atomic (one function, one algorithm, tightly coupled logic)
2796
+ - The subtasks would need to coordinate on the same files/functions
2797
+ - Splitting would add more overhead than it saves
2798
+
2799
+ Return JSON ONLY:
2800
+ {
2801
+ "shouldSplit": true/false,
2802
+ "reason": "brief explanation",
2803
+ "subtasks": [
2804
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number 1-10> }
2805
+ ]
2806
+ }
2807
+ If shouldSplit is false, omit subtasks.`;
2808
+ const response = await this.provider.chat([
2809
+ { role: 'system', content: 'You are a task planning judge. Return only valid JSON.' },
2810
+ { role: 'user', content: prompt },
2811
+ ], {
2812
+ model: this.config.orchestratorModel,
2813
+ maxTokens: 1500,
2814
+ temperature: 0.2,
2815
+ });
2816
+ this.trackOrchestratorUsage(response, 'auto-split-judge');
2817
+ // Parse response — reuse markdown code block stripping from microDecompose
2818
+ let jsonStr = response.content.trim();
2819
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
2820
+ if (codeBlockMatch)
2821
+ jsonStr = codeBlockMatch[1].trim();
2822
+ const parsed = JSON.parse(jsonStr);
2823
+ if (!parsed.shouldSplit) {
2824
+ this.logDecision('auto-split', `${task.id}: judge says no split — ${parsed.reason}`, '');
2825
+ return { shouldSplit: false };
2826
+ }
2827
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
2828
+ return { shouldSplit: false };
2829
+ }
2830
+ // Build SwarmTask[] from judge output (same pattern as microDecompose)
2831
+ const subtasks = parsed.subtasks.slice(0, maxSubs).map((sub, idx) => ({
2832
+ id: `${task.id}-split${idx + 1}`,
2833
+ description: sub.description,
2834
+ type: sub.type ?? task.type,
2835
+ dependencies: [],
2836
+ status: 'ready',
2837
+ complexity: Math.max(3, Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1)),
2838
+ wave: task.wave,
2839
+ targetFiles: sub.targetFiles ?? [],
2840
+ readFiles: task.readFiles,
2841
+ attempts: 0,
2842
+ rescueContext: `Auto-split from ${task.id} (original complexity ${task.complexity})`,
2843
+ }));
2844
+ this.logDecision('auto-split', `${task.id}: split into ${subtasks.length} subtasks — ${parsed.reason}`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
2845
+ return { shouldSplit: true, subtasks };
2846
+ }
2847
+ /**
2848
+ * V7: Compute effective retry limit for a task.
2849
+ * F10: Fixup tasks get max 2 retries (3 attempts total) — one full model-failover cycle.
2850
+ * Foundation tasks get +1 retry to reduce cascade failure risk.
2851
+ */
2852
+ getEffectiveRetries(task) {
2853
+ const isFixup = 'fixesTaskId' in task;
2854
+ if (isFixup)
2855
+ return 2; // Fixup tasks: 2 retries max (3 attempts total)
2856
+ return task.isFoundation ? this.config.workerRetries + 1 : this.config.workerRetries;
2857
+ }
2858
+ /**
2859
+ * F22: Build a brief summary of swarm progress for retry context.
2860
+ * Helps retrying workers understand what the swarm has already accomplished.
2861
+ */
2862
+ getSwarmProgressSummary() {
2863
+ const allTasks = this.taskQueue.getAllTasks();
2864
+ const completed = allTasks.filter(t => t.status === 'completed');
2865
+ if (completed.length === 0)
2866
+ return '';
2867
+ const lines = [];
2868
+ for (const task of completed) {
2869
+ const score = task.result?.qualityScore ? ` (${task.result.qualityScore}/5)` : '';
2870
+ lines.push(`- ${task.id}: ${task.description.slice(0, 80)}${score}`);
2871
+ }
2872
+ // Collect files created by completed tasks
2873
+ const files = new Set();
2874
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2875
+ for (const task of completed) {
2876
+ for (const f of (task.result?.filesModified ?? []))
2877
+ files.add(f);
2878
+ for (const f of (task.targetFiles ?? [])) {
2879
+ try {
2880
+ const resolved = path.resolve(baseDir, f);
2881
+ if (fs.existsSync(resolved))
2882
+ files.add(f);
2883
+ }
2884
+ catch { /* skip */ }
2885
+ }
2886
+ }
2887
+ const parts = [`The following tasks have completed successfully:\n${lines.join('\n')}`];
2888
+ if (files.size > 0) {
2889
+ parts.push(`Files already created/modified: ${[...files].slice(0, 20).join(', ')}`);
2890
+ parts.push('You can build on these existing files.');
2891
+ }
2892
+ return parts.join('\n');
2893
+ }
1099
2894
  /** Get a model health summary for emitting events. */
1100
2895
  getModelHealthSummary(model) {
1101
2896
  const records = this.healthTracker.getAllRecords();