attocode 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (382) hide show
  1. package/CHANGELOG.md +169 -3
  2. package/README.md +65 -5
  3. package/dist/src/adapters.d.ts.map +1 -1
  4. package/dist/src/adapters.js +15 -11
  5. package/dist/src/adapters.js.map +1 -1
  6. package/dist/src/agent.d.ts +44 -98
  7. package/dist/src/agent.d.ts.map +1 -1
  8. package/dist/src/agent.js +716 -2648
  9. package/dist/src/agent.js.map +1 -1
  10. package/dist/src/cli.d.ts.map +1 -1
  11. package/dist/src/cli.js +25 -3
  12. package/dist/src/cli.js.map +1 -1
  13. package/dist/src/commands/handler.d.ts.map +1 -1
  14. package/dist/src/commands/handler.js +11 -3
  15. package/dist/src/commands/handler.js.map +1 -1
  16. package/dist/src/commands/init-commands.d.ts.map +1 -1
  17. package/dist/src/commands/init-commands.js +16 -1
  18. package/dist/src/commands/init-commands.js.map +1 -1
  19. package/dist/src/commands/init.d.ts.map +1 -1
  20. package/dist/src/commands/init.js +31 -0
  21. package/dist/src/commands/init.js.map +1 -1
  22. package/dist/src/config/base-types.d.ts +45 -0
  23. package/dist/src/config/base-types.d.ts.map +1 -0
  24. package/dist/src/config/base-types.js +9 -0
  25. package/dist/src/config/base-types.js.map +1 -0
  26. package/dist/src/config/config-manager.d.ts +35 -0
  27. package/dist/src/config/config-manager.d.ts.map +1 -0
  28. package/dist/src/config/config-manager.js +108 -0
  29. package/dist/src/config/config-manager.js.map +1 -0
  30. package/dist/src/config/index.d.ts +4 -0
  31. package/dist/src/config/index.d.ts.map +1 -0
  32. package/dist/src/config/index.js +3 -0
  33. package/dist/src/config/index.js.map +1 -0
  34. package/dist/src/config/schema.d.ts +1546 -0
  35. package/dist/src/config/schema.d.ts.map +1 -0
  36. package/dist/src/config/schema.js +268 -0
  37. package/dist/src/config/schema.js.map +1 -0
  38. package/dist/src/config.d.ts +4 -1
  39. package/dist/src/config.d.ts.map +1 -1
  40. package/dist/src/config.js +8 -12
  41. package/dist/src/config.js.map +1 -1
  42. package/dist/src/core/agent-state-machine.d.ts +131 -0
  43. package/dist/src/core/agent-state-machine.d.ts.map +1 -0
  44. package/dist/src/core/agent-state-machine.js +302 -0
  45. package/dist/src/core/agent-state-machine.js.map +1 -0
  46. package/dist/src/core/base-manager.d.ts +79 -0
  47. package/dist/src/core/base-manager.d.ts.map +1 -0
  48. package/dist/src/core/base-manager.js +170 -0
  49. package/dist/src/core/base-manager.js.map +1 -0
  50. package/dist/src/core/completion-analyzer.d.ts +15 -0
  51. package/dist/src/core/completion-analyzer.d.ts.map +1 -0
  52. package/dist/src/core/completion-analyzer.js +53 -0
  53. package/dist/src/core/completion-analyzer.js.map +1 -0
  54. package/dist/src/core/execution-loop.d.ts +46 -0
  55. package/dist/src/core/execution-loop.d.ts.map +1 -0
  56. package/dist/src/core/execution-loop.js +1258 -0
  57. package/dist/src/core/execution-loop.js.map +1 -0
  58. package/dist/src/core/index.d.ts +7 -0
  59. package/dist/src/core/index.d.ts.map +1 -1
  60. package/dist/src/core/index.js +9 -0
  61. package/dist/src/core/index.js.map +1 -1
  62. package/dist/src/core/process-handlers.d.ts.map +1 -1
  63. package/dist/src/core/process-handlers.js +14 -0
  64. package/dist/src/core/process-handlers.js.map +1 -1
  65. package/dist/src/core/protocol/types.d.ts +4 -4
  66. package/dist/src/core/response-handler.d.ts +16 -0
  67. package/dist/src/core/response-handler.d.ts.map +1 -0
  68. package/dist/src/core/response-handler.js +234 -0
  69. package/dist/src/core/response-handler.js.map +1 -0
  70. package/dist/src/core/subagent-spawner.d.ts +43 -0
  71. package/dist/src/core/subagent-spawner.d.ts.map +1 -0
  72. package/dist/src/core/subagent-spawner.js +966 -0
  73. package/dist/src/core/subagent-spawner.js.map +1 -0
  74. package/dist/src/core/tool-executor.d.ts +59 -0
  75. package/dist/src/core/tool-executor.d.ts.map +1 -0
  76. package/dist/src/core/tool-executor.js +677 -0
  77. package/dist/src/core/tool-executor.js.map +1 -0
  78. package/dist/src/core/types.d.ts +133 -0
  79. package/dist/src/core/types.d.ts.map +1 -0
  80. package/dist/src/core/types.js +12 -0
  81. package/dist/src/core/types.js.map +1 -0
  82. package/dist/src/defaults.d.ts +8 -3
  83. package/dist/src/defaults.d.ts.map +1 -1
  84. package/dist/src/defaults.js +65 -3
  85. package/dist/src/defaults.js.map +1 -1
  86. package/dist/src/integrations/agent-registry.d.ts +11 -0
  87. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  88. package/dist/src/integrations/agent-registry.js.map +1 -1
  89. package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
  90. package/dist/src/integrations/auto-compaction.js +8 -3
  91. package/dist/src/integrations/auto-compaction.js.map +1 -1
  92. package/dist/src/integrations/bash-policy.d.ts +33 -0
  93. package/dist/src/integrations/bash-policy.d.ts.map +1 -0
  94. package/dist/src/integrations/bash-policy.js +142 -0
  95. package/dist/src/integrations/bash-policy.js.map +1 -0
  96. package/dist/src/integrations/budget-pool.d.ts +7 -0
  97. package/dist/src/integrations/budget-pool.d.ts.map +1 -1
  98. package/dist/src/integrations/budget-pool.js +43 -0
  99. package/dist/src/integrations/budget-pool.js.map +1 -1
  100. package/dist/src/integrations/codebase-ast.d.ts +52 -0
  101. package/dist/src/integrations/codebase-ast.d.ts.map +1 -0
  102. package/dist/src/integrations/codebase-ast.js +457 -0
  103. package/dist/src/integrations/codebase-ast.js.map +1 -0
  104. package/dist/src/integrations/codebase-context.d.ts +23 -0
  105. package/dist/src/integrations/codebase-context.d.ts.map +1 -1
  106. package/dist/src/integrations/codebase-context.js +230 -17
  107. package/dist/src/integrations/codebase-context.js.map +1 -1
  108. package/dist/src/integrations/compaction.d.ts.map +1 -1
  109. package/dist/src/integrations/compaction.js +14 -6
  110. package/dist/src/integrations/compaction.js.map +1 -1
  111. package/dist/src/integrations/context-engineering.d.ts +8 -0
  112. package/dist/src/integrations/context-engineering.d.ts.map +1 -1
  113. package/dist/src/integrations/context-engineering.js +19 -0
  114. package/dist/src/integrations/context-engineering.js.map +1 -1
  115. package/dist/src/integrations/delegation-protocol.js +2 -2
  116. package/dist/src/integrations/delegation-protocol.js.map +1 -1
  117. package/dist/src/integrations/economics.d.ts +67 -1
  118. package/dist/src/integrations/economics.d.ts.map +1 -1
  119. package/dist/src/integrations/economics.js +328 -33
  120. package/dist/src/integrations/economics.js.map +1 -1
  121. package/dist/src/integrations/edit-validator.d.ts +30 -0
  122. package/dist/src/integrations/edit-validator.d.ts.map +1 -0
  123. package/dist/src/integrations/edit-validator.js +85 -0
  124. package/dist/src/integrations/edit-validator.js.map +1 -0
  125. package/dist/src/integrations/file-cache.d.ts +7 -0
  126. package/dist/src/integrations/file-cache.d.ts.map +1 -1
  127. package/dist/src/integrations/file-cache.js +54 -0
  128. package/dist/src/integrations/file-cache.js.map +1 -1
  129. package/dist/src/integrations/health-check.d.ts.map +1 -1
  130. package/dist/src/integrations/health-check.js +3 -2
  131. package/dist/src/integrations/health-check.js.map +1 -1
  132. package/dist/src/integrations/hierarchical-config.d.ts +3 -0
  133. package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
  134. package/dist/src/integrations/hierarchical-config.js +20 -0
  135. package/dist/src/integrations/hierarchical-config.js.map +1 -1
  136. package/dist/src/integrations/hooks.d.ts +2 -0
  137. package/dist/src/integrations/hooks.d.ts.map +1 -1
  138. package/dist/src/integrations/hooks.js +99 -15
  139. package/dist/src/integrations/hooks.js.map +1 -1
  140. package/dist/src/integrations/index.d.ts +10 -1
  141. package/dist/src/integrations/index.d.ts.map +1 -1
  142. package/dist/src/integrations/index.js +12 -2
  143. package/dist/src/integrations/index.js.map +1 -1
  144. package/dist/src/integrations/logger.d.ts +104 -0
  145. package/dist/src/integrations/logger.d.ts.map +1 -0
  146. package/dist/src/integrations/logger.js +219 -0
  147. package/dist/src/integrations/logger.js.map +1 -0
  148. package/dist/src/integrations/lsp.d.ts.map +1 -1
  149. package/dist/src/integrations/lsp.js +5 -4
  150. package/dist/src/integrations/lsp.js.map +1 -1
  151. package/dist/src/integrations/mcp-client.d.ts.map +1 -1
  152. package/dist/src/integrations/mcp-client.js +8 -7
  153. package/dist/src/integrations/mcp-client.js.map +1 -1
  154. package/dist/src/integrations/observability.d.ts.map +1 -1
  155. package/dist/src/integrations/observability.js +5 -4
  156. package/dist/src/integrations/observability.js.map +1 -1
  157. package/dist/src/integrations/openrouter-pricing.d.ts.map +1 -1
  158. package/dist/src/integrations/openrouter-pricing.js +4 -3
  159. package/dist/src/integrations/openrouter-pricing.js.map +1 -1
  160. package/dist/src/integrations/persistence.d.ts.map +1 -1
  161. package/dist/src/integrations/persistence.js +5 -4
  162. package/dist/src/integrations/persistence.js.map +1 -1
  163. package/dist/src/integrations/planning.d.ts.map +1 -1
  164. package/dist/src/integrations/planning.js +5 -4
  165. package/dist/src/integrations/planning.js.map +1 -1
  166. package/dist/src/integrations/policy-engine.d.ts +55 -0
  167. package/dist/src/integrations/policy-engine.d.ts.map +1 -0
  168. package/dist/src/integrations/policy-engine.js +247 -0
  169. package/dist/src/integrations/policy-engine.js.map +1 -0
  170. package/dist/src/integrations/retry.d.ts +1 -0
  171. package/dist/src/integrations/retry.d.ts.map +1 -1
  172. package/dist/src/integrations/retry.js.map +1 -1
  173. package/dist/src/integrations/routing.d.ts.map +1 -1
  174. package/dist/src/integrations/routing.js +2 -1
  175. package/dist/src/integrations/routing.js.map +1 -1
  176. package/dist/src/integrations/safety.d.ts +5 -4
  177. package/dist/src/integrations/safety.d.ts.map +1 -1
  178. package/dist/src/integrations/safety.js +45 -20
  179. package/dist/src/integrations/safety.js.map +1 -1
  180. package/dist/src/integrations/sandbox/basic.d.ts +7 -0
  181. package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
  182. package/dist/src/integrations/sandbox/basic.js +27 -2
  183. package/dist/src/integrations/sandbox/basic.js.map +1 -1
  184. package/dist/src/integrations/sandbox/docker.d.ts.map +1 -1
  185. package/dist/src/integrations/sandbox/docker.js +2 -1
  186. package/dist/src/integrations/sandbox/docker.js.map +1 -1
  187. package/dist/src/integrations/sandbox/index.d.ts +6 -0
  188. package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
  189. package/dist/src/integrations/sandbox/index.js +8 -4
  190. package/dist/src/integrations/sandbox/index.js.map +1 -1
  191. package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
  192. package/dist/src/integrations/sandbox/landlock.js +3 -0
  193. package/dist/src/integrations/sandbox/landlock.js.map +1 -1
  194. package/dist/src/integrations/self-improvement.d.ts.map +1 -1
  195. package/dist/src/integrations/self-improvement.js +12 -0
  196. package/dist/src/integrations/self-improvement.js.map +1 -1
  197. package/dist/src/integrations/session-store.d.ts +1 -0
  198. package/dist/src/integrations/session-store.d.ts.map +1 -1
  199. package/dist/src/integrations/session-store.js +1 -0
  200. package/dist/src/integrations/session-store.js.map +1 -1
  201. package/dist/src/integrations/shared-blackboard.d.ts +3 -0
  202. package/dist/src/integrations/shared-blackboard.d.ts.map +1 -1
  203. package/dist/src/integrations/shared-blackboard.js +47 -0
  204. package/dist/src/integrations/shared-blackboard.js.map +1 -1
  205. package/dist/src/integrations/smart-decomposer.d.ts +45 -1
  206. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  207. package/dist/src/integrations/smart-decomposer.js +486 -30
  208. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  209. package/dist/src/integrations/sqlite-store.d.ts +2 -0
  210. package/dist/src/integrations/sqlite-store.d.ts.map +1 -1
  211. package/dist/src/integrations/sqlite-store.js +18 -6
  212. package/dist/src/integrations/sqlite-store.js.map +1 -1
  213. package/dist/src/integrations/swarm/failure-classifier.d.ts +11 -0
  214. package/dist/src/integrations/swarm/failure-classifier.d.ts.map +1 -0
  215. package/dist/src/integrations/swarm/failure-classifier.js +95 -0
  216. package/dist/src/integrations/swarm/failure-classifier.js.map +1 -0
  217. package/dist/src/integrations/swarm/index.d.ts +1 -1
  218. package/dist/src/integrations/swarm/index.d.ts.map +1 -1
  219. package/dist/src/integrations/swarm/index.js.map +1 -1
  220. package/dist/src/integrations/swarm/model-selector.d.ts +15 -0
  221. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  222. package/dist/src/integrations/swarm/model-selector.js +100 -20
  223. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  224. package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
  225. package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
  226. package/dist/src/integrations/swarm/swarm-budget.js +6 -0
  227. package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
  228. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +8 -0
  229. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  230. package/dist/src/integrations/swarm/swarm-config-loader.js +249 -7
  231. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  232. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +86 -1
  233. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  234. package/dist/src/integrations/swarm/swarm-event-bridge.js +207 -23
  235. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  236. package/dist/src/integrations/swarm/swarm-events.d.ts +58 -1
  237. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  238. package/dist/src/integrations/swarm/swarm-events.js +22 -5
  239. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  240. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +147 -8
  241. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  242. package/dist/src/integrations/swarm/swarm-orchestrator.js +2179 -132
  243. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  244. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +83 -2
  245. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
  246. package/dist/src/integrations/swarm/swarm-quality-gate.js +278 -19
  247. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
  248. package/dist/src/integrations/swarm/swarm-state-store.d.ts +4 -1
  249. package/dist/src/integrations/swarm/swarm-state-store.d.ts.map +1 -1
  250. package/dist/src/integrations/swarm/swarm-state-store.js +8 -1
  251. package/dist/src/integrations/swarm/swarm-state-store.js.map +1 -1
  252. package/dist/src/integrations/swarm/task-queue.d.ts +54 -0
  253. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  254. package/dist/src/integrations/swarm/task-queue.js +310 -12
  255. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  256. package/dist/src/integrations/swarm/types.d.ts +251 -13
  257. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  258. package/dist/src/integrations/swarm/types.js +70 -8
  259. package/dist/src/integrations/swarm/types.js.map +1 -1
  260. package/dist/src/integrations/swarm/worker-pool.d.ts +21 -4
  261. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  262. package/dist/src/integrations/swarm/worker-pool.js +223 -44
  263. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  264. package/dist/src/integrations/task-manager.d.ts +33 -1
  265. package/dist/src/integrations/task-manager.d.ts.map +1 -1
  266. package/dist/src/integrations/task-manager.js +78 -4
  267. package/dist/src/integrations/task-manager.js.map +1 -1
  268. package/dist/src/integrations/tool-recommendation.d.ts +7 -4
  269. package/dist/src/integrations/tool-recommendation.d.ts.map +1 -1
  270. package/dist/src/integrations/tool-recommendation.js +58 -5
  271. package/dist/src/integrations/tool-recommendation.js.map +1 -1
  272. package/dist/src/integrations/work-log.js +4 -4
  273. package/dist/src/integrations/work-log.js.map +1 -1
  274. package/dist/src/main.js +106 -30
  275. package/dist/src/main.js.map +1 -1
  276. package/dist/src/modes/repl.d.ts.map +1 -1
  277. package/dist/src/modes/repl.js +50 -12
  278. package/dist/src/modes/repl.js.map +1 -1
  279. package/dist/src/modes/tui.d.ts.map +1 -1
  280. package/dist/src/modes/tui.js +41 -6
  281. package/dist/src/modes/tui.js.map +1 -1
  282. package/dist/src/modes.d.ts.map +1 -1
  283. package/dist/src/modes.js +4 -27
  284. package/dist/src/modes.js.map +1 -1
  285. package/dist/src/observability/tracer.d.ts.map +1 -1
  286. package/dist/src/observability/tracer.js +2 -1
  287. package/dist/src/observability/tracer.js.map +1 -1
  288. package/dist/src/persistence/schema.d.ts.map +1 -1
  289. package/dist/src/persistence/schema.js +11 -0
  290. package/dist/src/persistence/schema.js.map +1 -1
  291. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  292. package/dist/src/providers/adapters/anthropic.js +3 -2
  293. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  294. package/dist/src/providers/adapters/openai.d.ts.map +1 -1
  295. package/dist/src/providers/adapters/openai.js +3 -2
  296. package/dist/src/providers/adapters/openai.js.map +1 -1
  297. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  298. package/dist/src/providers/adapters/openrouter.js +11 -11
  299. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  300. package/dist/src/providers/circuit-breaker.d.ts +1 -0
  301. package/dist/src/providers/circuit-breaker.d.ts.map +1 -1
  302. package/dist/src/providers/circuit-breaker.js.map +1 -1
  303. package/dist/src/providers/provider.d.ts.map +1 -1
  304. package/dist/src/providers/provider.js +2 -1
  305. package/dist/src/providers/provider.js.map +1 -1
  306. package/dist/src/providers/resilient-provider.d.ts.map +1 -1
  307. package/dist/src/providers/resilient-provider.js +2 -1
  308. package/dist/src/providers/resilient-provider.js.map +1 -1
  309. package/dist/src/session-picker.d.ts.map +1 -1
  310. package/dist/src/session-picker.js +40 -5
  311. package/dist/src/session-picker.js.map +1 -1
  312. package/dist/src/shared/budget-tracker.d.ts +65 -0
  313. package/dist/src/shared/budget-tracker.d.ts.map +1 -0
  314. package/dist/src/shared/budget-tracker.js +128 -0
  315. package/dist/src/shared/budget-tracker.js.map +1 -0
  316. package/dist/src/shared/context-engine.d.ts +64 -0
  317. package/dist/src/shared/context-engine.d.ts.map +1 -0
  318. package/dist/src/shared/context-engine.js +117 -0
  319. package/dist/src/shared/context-engine.js.map +1 -0
  320. package/dist/src/shared/index.d.ts +12 -0
  321. package/dist/src/shared/index.d.ts.map +1 -0
  322. package/dist/src/shared/index.js +12 -0
  323. package/dist/src/shared/index.js.map +1 -0
  324. package/dist/src/shared/persistence.d.ts +57 -0
  325. package/dist/src/shared/persistence.d.ts.map +1 -0
  326. package/dist/src/shared/persistence.js +168 -0
  327. package/dist/src/shared/persistence.js.map +1 -0
  328. package/dist/src/shared/shared-context-state.d.ts +89 -0
  329. package/dist/src/shared/shared-context-state.d.ts.map +1 -0
  330. package/dist/src/shared/shared-context-state.js +175 -0
  331. package/dist/src/shared/shared-context-state.js.map +1 -0
  332. package/dist/src/shared/shared-economics-state.d.ts +61 -0
  333. package/dist/src/shared/shared-economics-state.d.ts.map +1 -0
  334. package/dist/src/shared/shared-economics-state.js +100 -0
  335. package/dist/src/shared/shared-economics-state.js.map +1 -0
  336. package/dist/src/tools/agent.d.ts.map +1 -1
  337. package/dist/src/tools/agent.js +11 -2
  338. package/dist/src/tools/agent.js.map +1 -1
  339. package/dist/src/tools/bash.d.ts +1 -1
  340. package/dist/src/tools/bash.d.ts.map +1 -1
  341. package/dist/src/tools/bash.js +2 -1
  342. package/dist/src/tools/bash.js.map +1 -1
  343. package/dist/src/tools/coercion.d.ts +6 -0
  344. package/dist/src/tools/coercion.d.ts.map +1 -1
  345. package/dist/src/tools/coercion.js +13 -0
  346. package/dist/src/tools/coercion.js.map +1 -1
  347. package/dist/src/tools/file.d.ts +5 -5
  348. package/dist/src/tools/file.js +2 -2
  349. package/dist/src/tools/file.js.map +1 -1
  350. package/dist/src/tools/permission.d.ts.map +1 -1
  351. package/dist/src/tools/permission.js +10 -116
  352. package/dist/src/tools/permission.js.map +1 -1
  353. package/dist/src/tools/types.d.ts +1 -0
  354. package/dist/src/tools/types.d.ts.map +1 -1
  355. package/dist/src/tools/types.js.map +1 -1
  356. package/dist/src/tracing/trace-collector.d.ts +292 -0
  357. package/dist/src/tracing/trace-collector.d.ts.map +1 -1
  358. package/dist/src/tracing/trace-collector.js +249 -5
  359. package/dist/src/tracing/trace-collector.js.map +1 -1
  360. package/dist/src/tracing/types.d.ts +200 -1
  361. package/dist/src/tracing/types.d.ts.map +1 -1
  362. package/dist/src/tracing/types.js.map +1 -1
  363. package/dist/src/tricks/failure-evidence.d.ts.map +1 -1
  364. package/dist/src/tricks/failure-evidence.js +2 -1
  365. package/dist/src/tricks/failure-evidence.js.map +1 -1
  366. package/dist/src/tui/app.d.ts +13 -0
  367. package/dist/src/tui/app.d.ts.map +1 -1
  368. package/dist/src/tui/app.js +162 -19
  369. package/dist/src/tui/app.js.map +1 -1
  370. package/dist/src/tui/components/ErrorBoundary.d.ts.map +1 -1
  371. package/dist/src/tui/components/ErrorBoundary.js +3 -2
  372. package/dist/src/tui/components/ErrorBoundary.js.map +1 -1
  373. package/dist/src/tui/event-display.d.ts.map +1 -1
  374. package/dist/src/tui/event-display.js +36 -62
  375. package/dist/src/tui/event-display.js.map +1 -1
  376. package/dist/src/tui/index.d.ts +4 -0
  377. package/dist/src/tui/index.d.ts.map +1 -1
  378. package/dist/src/tui/index.js +17 -0
  379. package/dist/src/tui/index.js.map +1 -1
  380. package/dist/src/types.d.ts +214 -1
  381. package/dist/src/types.d.ts.map +1 -1
  382. package/package.json +18 -3
@@ -16,34 +16,113 @@
16
16
  * - State persistence and resume
17
17
  * - Orchestrator decision logging
18
18
  */
19
- import { createSmartDecomposer, parseDecompositionResponse } from '../smart-decomposer.js';
19
+ import * as fs from 'node:fs';
20
+ import * as path from 'node:path';
21
+ import { createSmartDecomposer, parseDecompositionResponse, validateDecomposition } from '../smart-decomposer.js';
20
22
  import { createResultSynthesizer } from '../result-synthesizer.js';
21
- import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, SUBTASK_TO_CAPABILITY } from './types.js';
23
+ import { taskResultToAgentOutput, DEFAULT_SWARM_CONFIG, getTaskTypeConfig } from './types.js';
22
24
  import { createSwarmTaskQueue } from './task-queue.js';
23
25
  import { createSwarmBudgetPool } from './swarm-budget.js';
24
26
  import { createSwarmWorkerPool } from './worker-pool.js';
25
- import { evaluateWorkerOutput } from './swarm-quality-gate.js';
27
+ import { evaluateWorkerOutput, runPreFlightChecks, checkArtifacts, checkArtifactsEnhanced, runConcreteChecks } from './swarm-quality-gate.js';
26
28
  import { ModelHealthTracker, selectAlternativeModel } from './model-selector.js';
27
29
  import { SwarmStateStore } from './swarm-state-store.js';
30
+ import { createSharedContextState } from '../../shared/shared-context-state.js';
31
+ import { createSharedEconomicsState } from '../../shared/shared-economics-state.js';
32
+ import { createSharedContextEngine } from '../../shared/context-engine.js';
33
+ import { classifySwarmFailure } from './failure-classifier.js';
28
34
  // ─── Hollow Completion Detection ──────────────────────────────────────────
29
35
  /**
30
- * V10: Minimal hollow completion detection — let the quality gate judge everything else.
31
- * Only catches truly empty completions: zero tool calls AND trivial output (<50 chars).
32
- * No task-type lists, no closure report checks, no hardcoded thresholds beyond the bare minimum.
36
+ * V11: Hollow completion detection — catches empty completions AND "success" with failure language.
37
+ * Zero tool calls AND trivial output is always hollow.
38
+ * Additionally, success=true but output containing failure admissions is also hollow
39
+ * this catches workers that report success but actually did no useful work.
33
40
  */
34
- export function isHollowCompletion(spawnResult) {
41
+ const FAILURE_INDICATORS = [
42
+ 'budget exhausted', 'unable to complete', 'could not complete',
43
+ 'ran out of budget', 'no changes were made', 'no files were modified',
44
+ 'no files were created', 'failed to complete', 'before research could begin',
45
+ 'i was unable to', 'i could not', 'unfortunately i',
46
+ ];
47
+ const BOILERPLATE_INDICATORS = [
48
+ 'task completed successfully', 'i have completed the task',
49
+ 'the task has been completed', 'done', 'completed', 'finished',
50
+ 'no issues found', 'everything looks good', 'all tasks completed',
51
+ ];
52
+ function hasFutureIntentLanguage(content) {
53
+ const trimmed = content.trim();
54
+ if (!trimmed)
55
+ return false;
56
+ const lower = trimmed.toLowerCase();
57
+ const completionSignals = /\b(done|completed|finished|created|saved|wrote|implemented|fixed|updated|added)\b/;
58
+ if (completionSignals.test(lower))
59
+ return false;
60
+ const futureIntentPatterns = [
61
+ /\b(i\s+will|i'll|let me)\s+(create|write|save|update|modify|fix|add|edit|implement|change|run|execute|build|continue)\b/,
62
+ /\b(i\s+need to|i\s+should|i\s+can)\s+(create|write|update|modify|fix|add|edit|implement|continue)\b/,
63
+ /\b(next step|remaining work|still need|to be done)\b/,
64
+ /\b(i am going to|i'm going to)\b/,
65
+ ];
66
+ return futureIntentPatterns.some(p => p.test(lower));
67
+ }
68
+ function repoLooksUnscaffolded(baseDir) {
69
+ try {
70
+ const packageJson = path.join(baseDir, 'package.json');
71
+ const srcDir = path.join(baseDir, 'src');
72
+ if (!fs.existsSync(packageJson) && !fs.existsSync(srcDir)) {
73
+ return true;
74
+ }
75
+ }
76
+ catch {
77
+ return false;
78
+ }
79
+ return false;
80
+ }
81
+ export function isHollowCompletion(spawnResult, taskType, swarmConfig) {
35
82
  // Timeout uses toolCalls === -1, not hollow
36
- if (spawnResult.metrics.toolCalls === -1)
83
+ if ((spawnResult.metrics.toolCalls ?? 0) === -1)
37
84
  return false;
38
- // Only catch truly empty completions: zero tools AND trivial output
39
- return spawnResult.metrics.toolCalls === 0
40
- && (spawnResult.output?.trim().length ?? 0) < 50;
85
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
86
+ // Truly empty completions: zero tools AND trivial output
87
+ // P4: Higher threshold (120 chars) + configurable via SwarmConfig
88
+ const hollowThreshold = swarmConfig?.hollowOutputThreshold ?? 120;
89
+ if (toolCalls === 0
90
+ && (spawnResult.output?.trim().length ?? 0) < hollowThreshold) {
91
+ return true;
92
+ }
93
+ // P4: Boilerplate detection — zero tools AND short output that's just boilerplate
94
+ if (toolCalls === 0 && (spawnResult.output?.trim().length ?? 0) < 300) {
95
+ const outputLower = (spawnResult.output ?? '').toLowerCase().trim();
96
+ if (BOILERPLATE_INDICATORS.some(b => outputLower.includes(b))) {
97
+ return true;
98
+ }
99
+ }
100
+ // "Success" that admits failure: worker claims success but output contains failure language
101
+ if (spawnResult.success) {
102
+ const outputLower = (spawnResult.output ?? '').toLowerCase();
103
+ if (FAILURE_INDICATORS.some(f => outputLower.includes(f))) {
104
+ return true;
105
+ }
106
+ }
107
+ // V7: Use configurable requiresToolCalls from TaskTypeConfig.
108
+ // For action-oriented tasks (implement/test/refactor/etc), zero tool calls is ALWAYS hollow.
109
+ if (taskType) {
110
+ const typeConfig = getTaskTypeConfig(taskType, swarmConfig);
111
+ if (typeConfig.requiresToolCalls && toolCalls === 0) {
112
+ return true;
113
+ }
114
+ }
115
+ return false;
41
116
  }
42
117
  // ─── Orchestrator ──────────────────────────────────────────────────────────
43
118
  export class SwarmOrchestrator {
44
119
  config;
45
120
  provider;
46
121
  blackboard;
122
+ // Phase 3.1+3.2: Shared state for cross-worker learning
123
+ sharedContextState;
124
+ sharedEconomicsState;
125
+ sharedContextEngine;
47
126
  taskQueue;
48
127
  budgetPool;
49
128
  workerPool;
@@ -61,10 +140,15 @@ export class SwarmOrchestrator {
61
140
  retries = 0;
62
141
  startTime = 0;
63
142
  modelUsage = new Map();
143
+ // Orchestrator's own LLM usage (separate from worker usage)
144
+ orchestratorTokens = 0;
145
+ orchestratorCost = 0;
146
+ orchestratorCalls = 0;
64
147
  // V2: Planning, review, verification, health, persistence
65
148
  plan;
66
149
  waveReviews = [];
67
150
  verificationResult;
151
+ artifactInventory;
68
152
  orchestratorDecisions = [];
69
153
  healthTracker;
70
154
  stateStore;
@@ -75,25 +159,96 @@ export class SwarmOrchestrator {
75
159
  static CIRCUIT_BREAKER_WINDOW_MS = 30_000;
76
160
  static CIRCUIT_BREAKER_THRESHOLD = 3;
77
161
  static CIRCUIT_BREAKER_PAUSE_MS = 15_000;
78
- // Quality gate circuit breaker: disable quality gates after too many consecutive rejections
79
- consecutiveQualityRejections = 0;
80
- qualityGateDisabled = false;
81
- static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 8;
162
+ // P3: Per-model quality gate circuit breaker (replaces global circuit breaker)
163
+ perModelQualityRejections = new Map();
164
+ qualityGateDisabledModels = new Set();
165
+ static QUALITY_CIRCUIT_BREAKER_THRESHOLD = 5;
166
+ // Hollow completion streak: early termination when single-model swarm produces only hollows
167
+ hollowStreak = 0;
168
+ static HOLLOW_STREAK_THRESHOLD = 3;
169
+ // V7: Global dispatch + hollow ratio tracking for multi-model termination
170
+ totalDispatches = 0;
171
+ totalHollows = 0;
172
+ // Hollow ratio warning (fired once, then suppressed to avoid log spam)
173
+ hollowRatioWarned = false;
174
+ // P7: Adaptive dispatch stagger — increases on rate limits, decreases on success
175
+ adaptiveStaggerMs = 0; // Initialized from config in constructor
176
+ // F25: Consecutive timeout tracking per task — early-fail after limit
177
+ taskTimeoutCounts = new Map();
178
+ // Original prompt for re-planning on resume
179
+ originalPrompt = '';
180
+ // Mid-swarm re-planning: only once per swarm execution
181
+ hasReplanned = false;
82
182
  constructor(config, provider, agentRegistry, spawnAgentFn, blackboard) {
83
183
  this.config = { ...DEFAULT_SWARM_CONFIG, ...config };
84
184
  this.provider = provider;
85
185
  this.blackboard = blackboard;
86
186
  this.spawnAgentFn = spawnAgentFn;
87
187
  this.healthTracker = new ModelHealthTracker();
188
+ this.adaptiveStaggerMs = this.getStaggerMs();
189
+ // Phase 3.1+3.2: Shared context & economics for cross-worker learning
190
+ this.sharedContextState = createSharedContextState({
191
+ staticPrefix: 'You are a swarm worker agent.',
192
+ maxFailures: 100,
193
+ maxReferences: 200,
194
+ });
195
+ this.sharedEconomicsState = createSharedEconomicsState({
196
+ globalDoomLoopThreshold: 10,
197
+ });
198
+ this.sharedContextEngine = createSharedContextEngine(this.sharedContextState, {
199
+ maxFailuresInPrompt: 5,
200
+ includeInsights: true,
201
+ });
88
202
  this.taskQueue = createSwarmTaskQueue();
89
203
  this.budgetPool = createSwarmBudgetPool(this.config);
90
- this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool);
204
+ this.workerPool = createSwarmWorkerPool(this.config, agentRegistry, spawnAgentFn, this.budgetPool, this.healthTracker, this.sharedContextEngine);
91
205
  // Initialize state store if persistence enabled
92
206
  if (this.config.enablePersistence) {
93
207
  this.stateStore = new SwarmStateStore(this.config.stateDir ?? '.agent/swarm-state', this.config.resumeSessionId);
94
208
  }
95
209
  // C1: Build LLM decompose function with explicit JSON schema
96
- const llmDecompose = async (task, _context) => {
210
+ const llmDecompose = async (task, context) => {
211
+ // V7: Dynamically build the allowed type list from built-in + user-defined types
212
+ const builtinTypes = ['research', 'analysis', 'design', 'implement', 'test', 'refactor', 'review', 'document', 'integrate', 'deploy', 'merge'];
213
+ const customTypes = Object.keys(this.config.taskTypes ?? {}).filter(t => !builtinTypes.includes(t));
214
+ const allTypes = [...builtinTypes, ...customTypes];
215
+ const typeListStr = allTypes.map(t => `"${t}"`).join(' | ');
216
+ // Build custom type descriptions so the LLM knows when to use them
217
+ let customTypeSection = '';
218
+ if (customTypes.length > 0) {
219
+ const descriptions = customTypes.map(t => {
220
+ const cfg = this.config.taskTypes[t];
221
+ const parts = [` - "${t}"`];
222
+ if (cfg.capability)
223
+ parts.push(`(capability: ${cfg.capability})`);
224
+ if (cfg.promptTemplate)
225
+ parts.push(`— uses ${cfg.promptTemplate} workflow`);
226
+ if (cfg.timeout)
227
+ parts.push(`— timeout: ${Math.round(cfg.timeout / 60000)}min`);
228
+ return parts.join(' ');
229
+ }).join('\n');
230
+ customTypeSection = `\n\nCustom task types available:\n${descriptions}\nUse these when their description matches the subtask's purpose.`;
231
+ }
232
+ // Build codebase context section from repo map if available
233
+ let codebaseSection = '';
234
+ if (context.repoMap) {
235
+ const map = context.repoMap;
236
+ const topFiles = Array.from(map.chunks.values())
237
+ .sort((a, b) => b.importance - a.importance)
238
+ .slice(0, 30)
239
+ .map(c => ` - ${c.filePath} (${c.type}, ${c.tokenCount} tokens, importance: ${c.importance.toFixed(2)})`);
240
+ codebaseSection = `
241
+
242
+ CODEBASE STRUCTURE (${map.chunks.size} files, ${map.totalTokens} total tokens):
243
+ Entry points: ${map.entryPoints.slice(0, 5).join(', ')}
244
+ Core modules: ${map.coreModules.slice(0, 5).join(', ')}
245
+ Key files:
246
+ ${topFiles.join('\n')}
247
+
248
+ CRITICAL: Your subtasks MUST reference actual files from this codebase.
249
+ Do NOT invent new project scaffolding or create files that don't relate to the existing codebase.
250
+ Decompose the work based on what ALREADY EXISTS in the project.`;
251
+ }
97
252
  const systemPrompt = `You are a task decomposition expert. Break down the given task into well-defined subtasks with clear dependencies.
98
253
 
99
254
  CRITICAL: Dependencies MUST use zero-based integer indices referring to other subtasks in the array.
@@ -103,7 +258,7 @@ Respond with valid JSON matching this exact schema:
103
258
  "subtasks": [
104
259
  {
105
260
  "description": "Clear description of what this subtask does",
106
- "type": "implement" | "research" | "analysis" | "design" | "test" | "refactor" | "review" | "document" | "integrate" | "deploy" | "merge",
261
+ "type": ${typeListStr},
107
262
  "complexity": 1-10,
108
263
  "dependencies": [0, 1],
109
264
  "parallelizable": true | false,
@@ -112,7 +267,7 @@ Respond with valid JSON matching this exact schema:
112
267
  ],
113
268
  "strategy": "sequential" | "parallel" | "hierarchical" | "adaptive" | "pipeline",
114
269
  "reasoning": "Brief explanation of why this decomposition was chosen"
115
- }
270
+ }${customTypeSection}${codebaseSection}
116
271
 
117
272
  EXAMPLE 1 — Research task (3 parallel research + 1 merge):
118
273
  {
@@ -149,11 +304,52 @@ Rules:
149
304
  { role: 'user', content: task },
150
305
  ], {
151
306
  model: this.config.orchestratorModel,
152
- maxTokens: 4000,
307
+ maxTokens: 16000,
153
308
  temperature: 0.3,
154
309
  });
310
+ this.trackOrchestratorUsage(response, 'decompose');
155
311
  // Use parseDecompositionResponse which handles markdown code blocks and edge cases
156
- return parseDecompositionResponse(response.content);
312
+ const result = parseDecompositionResponse(response.content);
313
+ // If decomposition returned 0 subtasks, log diagnostics and retry with explicit JSON instruction
314
+ if (result.subtasks.length === 0) {
315
+ const snippet = response.content?.slice(0, 500) ?? '(empty response)';
316
+ const parseError = result.parseError ?? 'unknown';
317
+ this.errors.push({
318
+ phase: 'decomposition',
319
+ message: `LLM returned no subtasks. Parse error: ${parseError}. Response preview: ${snippet}`,
320
+ recovered: true,
321
+ });
322
+ this.emit({
323
+ type: 'swarm.orchestrator.decision',
324
+ decision: {
325
+ timestamp: Date.now(),
326
+ phase: 'decomposition',
327
+ decision: `Empty decomposition — retrying with explicit JSON instruction`,
328
+ reasoning: `Parse error: ${parseError}. Response preview (first 500 chars): ${snippet}`,
329
+ },
330
+ });
331
+ // Retry with explicit JSON instruction — don't include previous truncated response (wastes input tokens)
332
+ const retryResponse = await this.provider.chat([
333
+ { role: 'system', content: systemPrompt },
334
+ { role: 'user', content: `${task}\n\nIMPORTANT: Your previous attempt was truncated or could not be parsed (${parseError}). Return ONLY a raw JSON object with NO markdown formatting, NO explanation text, NO code fences. The JSON must have a "subtasks" array with at least 2 entries matching the schema above. Keep subtask descriptions concise to avoid truncation.` },
335
+ ], {
336
+ model: this.config.orchestratorModel,
337
+ maxTokens: 16000,
338
+ temperature: 0.2,
339
+ });
340
+ this.trackOrchestratorUsage(retryResponse, 'decompose-retry');
341
+ const retryResult = parseDecompositionResponse(retryResponse.content);
342
+ if (retryResult.subtasks.length === 0) {
343
+ const retrySnippet = retryResponse.content?.slice(0, 500) ?? '(empty response)';
344
+ this.errors.push({
345
+ phase: 'decomposition',
346
+ message: `Retry also returned no subtasks. Response preview: ${retrySnippet}`,
347
+ recovered: false,
348
+ });
349
+ }
350
+ return retryResult;
351
+ }
352
+ return result;
157
353
  };
158
354
  // Configure decomposer for swarm use
159
355
  const decomposer = createSmartDecomposer({
@@ -171,6 +367,18 @@ Rules:
171
367
  getBudgetPool() {
172
368
  return this.budgetPool;
173
369
  }
370
+ /** Get shared context state for cross-worker failure learning. */
371
+ getSharedContextState() {
372
+ return this.sharedContextState;
373
+ }
374
+ /** Get shared economics state for cross-worker doom loop aggregation. */
375
+ getSharedEconomicsState() {
376
+ return this.sharedEconomicsState;
377
+ }
378
+ /** Get shared context engine for cross-worker failure learning. */
379
+ getSharedContextEngine() {
380
+ return this.sharedContextEngine;
381
+ }
174
382
  /**
175
383
  * Subscribe to swarm events.
176
384
  */
@@ -190,11 +398,38 @@ Rules:
190
398
  try {
191
399
  listener(event);
192
400
  }
193
- catch {
194
- // Don't let listener errors break the orchestrator
401
+ catch (err) {
402
+ // Don't let listener errors break the orchestrator, but log for debugging
403
+ const msg = err instanceof Error ? err.message : String(err);
404
+ if (process.env.DEBUG) {
405
+ console.error(`[SwarmOrchestrator] Listener error on ${event.type}: ${msg}`);
406
+ }
195
407
  }
196
408
  }
197
409
  }
410
+ /**
411
+ * Track token usage from an orchestrator LLM call.
412
+ */
413
+ trackOrchestratorUsage(response, purpose) {
414
+ if (!response.usage)
415
+ return;
416
+ // Handle both raw API fields (total_tokens, prompt_tokens, completion_tokens)
417
+ // and ChatResponse fields (inputTokens, outputTokens)
418
+ const input = response.usage.prompt_tokens ?? response.usage.inputTokens ?? 0;
419
+ const output = response.usage.completion_tokens ?? response.usage.outputTokens ?? 0;
420
+ const tokens = response.usage.total_tokens ?? (input + output);
421
+ const cost = response.usage.cost ?? tokens * 0.000015; // ~$15/M tokens average for orchestrator models
422
+ this.orchestratorTokens += tokens;
423
+ this.orchestratorCost += cost;
424
+ this.orchestratorCalls++;
425
+ this.emit({
426
+ type: 'swarm.orchestrator.llm',
427
+ model: this.config.orchestratorModel,
428
+ purpose,
429
+ tokens,
430
+ cost,
431
+ });
432
+ }
198
433
  /**
199
434
  * Execute the full swarm pipeline for a task.
200
435
  *
@@ -211,6 +446,7 @@ Rules:
211
446
  */
212
447
  async execute(task) {
213
448
  this.startTime = Date.now();
449
+ this.originalPrompt = task;
214
450
  try {
215
451
  // V2: Check for resume
216
452
  if (this.config.resumeSessionId && this.stateStore) {
@@ -219,15 +455,102 @@ Rules:
219
455
  // Phase 1: Decompose
220
456
  this.currentPhase = 'decomposing';
221
457
  this.emit({ type: 'swarm.phase.progress', phase: 'decomposing', message: 'Decomposing task into subtasks...' });
222
- const decomposition = await this.decompose(task);
223
- if (!decomposition) {
458
+ const decomposeOutcome = await this.decompose(task);
459
+ if (!decomposeOutcome.result) {
224
460
  this.currentPhase = 'failed';
225
- return this.buildErrorResult('Decomposition failed — task may be too simple for swarm mode');
461
+ return this.buildErrorResult(`Decomposition failed: ${decomposeOutcome.failureReason}`);
462
+ }
463
+ let decomposition = decomposeOutcome.result;
464
+ // If repository is mostly empty, force a scaffold-first dependency chain
465
+ // so implementation tasks don't immediately fail on missing files.
466
+ if (repoLooksUnscaffolded(this.config.facts?.workingDirectory ?? process.cwd())) {
467
+ const scaffoldTask = decomposition.subtasks.find(st => /\b(scaffold|bootstrap|initialize|setup|set up|project scaffold)\b/i.test(st.description));
468
+ if (scaffoldTask) {
469
+ for (const subtask of decomposition.subtasks) {
470
+ if (subtask.id === scaffoldTask.id)
471
+ continue;
472
+ if (!subtask.dependencies.includes(scaffoldTask.id)) {
473
+ subtask.dependencies.push(scaffoldTask.id);
474
+ }
475
+ }
476
+ this.logDecision('scaffold-first', `Repo appears unscaffolded; enforcing scaffold task ${scaffoldTask.id} as prerequisite`, '');
477
+ }
478
+ }
479
+ // F5: Validate decomposition — check for cycles, invalid deps, granularity
480
+ const validation = validateDecomposition(decomposition);
481
+ if (validation.warnings.length > 0) {
482
+ this.logDecision('decomposition-validation', `Warnings: ${validation.warnings.join('; ')}`, '');
483
+ }
484
+ if (!validation.valid) {
485
+ this.logDecision('decomposition-validation', `Invalid decomposition: ${validation.issues.join('; ')}`, 'Retrying...');
486
+ // Retry decomposition once with feedback
487
+ const retryOutcome = await this.decompose(`${task}\n\nIMPORTANT: Previous decomposition was invalid: ${validation.issues.join('. ')}. Fix these issues.`);
488
+ if (!retryOutcome.result) {
489
+ this.currentPhase = 'failed';
490
+ return this.buildErrorResult(`Decomposition validation failed: ${validation.issues.join('; ')}`);
491
+ }
492
+ decomposition = retryOutcome.result;
493
+ const retryValidation = validateDecomposition(decomposition);
494
+ if (!retryValidation.valid) {
495
+ this.logDecision('decomposition-validation', `Retry still invalid: ${retryValidation.issues.join('; ')}`, 'Proceeding anyway');
496
+ }
226
497
  }
227
498
  // Phase 2: Schedule into waves
228
499
  this.currentPhase = 'scheduling';
229
500
  this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Scheduling ${decomposition.subtasks.length} subtasks into waves...` });
230
501
  this.taskQueue.loadFromDecomposition(decomposition, this.config);
502
+ // F3: Dynamic orchestrator reserve scaling based on subtask count.
503
+ // More subtasks = more quality gate calls, synthesis work, and review overhead.
504
+ // Formula: max(configured ratio, 5% per subtask), capped at 40%.
505
+ const subtaskCount = decomposition.subtasks.length;
506
+ const dynamicReserveRatio = Math.min(0.40, Math.max(this.config.orchestratorReserveRatio, subtaskCount * 0.05));
507
+ if (dynamicReserveRatio > this.config.orchestratorReserveRatio) {
508
+ this.logDecision('budget-scaling', `Scaled orchestrator reserve from ${(this.config.orchestratorReserveRatio * 100).toFixed(0)}% to ${(dynamicReserveRatio * 100).toFixed(0)}% for ${subtaskCount} subtasks`, '');
509
+ }
510
+ // Foundation task detection: tasks that are the sole dependency of 3+ downstream
511
+ // tasks are critical — if they fail, the entire swarm cascade-skips.
512
+ // Give them extra retries and timeout scaling.
513
+ this.detectFoundationTasks();
514
+ // D3/F1: Probe model capability before dispatch (default: true)
515
+ if (this.config.probeModels !== false) {
516
+ await this.probeModelCapability();
517
+ // F15/F23: Handle all-models-failed probe scenario
518
+ // Resolve strategy: explicit probeFailureStrategy > legacy ignoreProbeFailures > default 'warn-and-try'
519
+ const probeStrategy = this.config.probeFailureStrategy
520
+ ?? (this.config.ignoreProbeFailures ? 'warn-and-try' : 'warn-and-try');
521
+ const uniqueModels = [...new Set(this.config.workers.map(w => w.model))];
522
+ const healthyModels = this.healthTracker.getHealthy(uniqueModels);
523
+ if (healthyModels.length === 0 && uniqueModels.length > 0) {
524
+ if (probeStrategy === 'abort') {
525
+ // Hard abort — no tasks dispatched
526
+ const reason = `All ${uniqueModels.length} worker model(s) failed capability probes — no model can make tool calls. Aborting swarm to prevent budget waste. Fix model configuration and retry.`;
527
+ this.logDecision('probe-abort', reason, `Models tested: ${uniqueModels.join(', ')}`);
528
+ this.emit({ type: 'swarm.abort', reason });
529
+ this.skipRemainingTasks(reason);
530
+ const totalTasks = this.taskQueue.getStats().total;
531
+ const abortStats = {
532
+ completedTasks: 0, failedTasks: 0, skippedTasks: totalTasks,
533
+ totalTasks, totalWaves: 0, totalTokens: 0, totalCost: 0,
534
+ totalDurationMs: Date.now() - this.startTime,
535
+ qualityRejections: 0, retries: 0,
536
+ modelUsage: new Map(),
537
+ };
538
+ this.emit({ type: 'swarm.complete', stats: abortStats, errors: this.errors });
539
+ return {
540
+ success: false, summary: reason,
541
+ tasks: this.taskQueue.getAllTasks(), stats: abortStats, errors: this.errors,
542
+ };
543
+ }
544
+ else {
545
+ // F23: warn-and-try — log warning, reset health, let real tasks prove capability
546
+ this.logDecision('probe-warning', `All ${uniqueModels.length} model(s) failed probe — continuing anyway (strategy: warn-and-try)`, 'Will abort after first real task failure if model cannot use tools');
547
+ // Reset health so dispatch doesn't skip all models
548
+ for (const model of uniqueModels) {
549
+ this.healthTracker.recordSuccess(model, 0);
550
+ }
551
+ }
552
+ }
553
+ }
231
554
  // Emit skip events when tasks are cascade-skipped due to dependency failures
232
555
  this.taskQueue.setOnCascadeSkip((skippedTaskId, reason) => {
233
556
  this.emit({ type: 'swarm.task.skipped', taskId: skippedTaskId, reason });
@@ -262,9 +585,14 @@ Rules:
262
585
  // Phase 3: Execute waves (planning runs concurrently)
263
586
  this.currentPhase = 'executing';
264
587
  await this.executeWaves();
588
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
589
+ if (!this.cancelled)
590
+ await this.finalRescuePass();
265
591
  // Ensure planning completed before verification/synthesis
266
592
  if (planPromise)
267
593
  await planPromise;
594
+ // Post-wave artifact audit: scan filesystem for files created by workers
595
+ this.artifactInventory = this.buildArtifactInventory();
268
596
  // V2: Phase 3.5: Verify integration
269
597
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
270
598
  this.currentPhase = 'verifying';
@@ -280,10 +608,20 @@ Rules:
280
608
  const executionStats = this.buildStats();
281
609
  // V2: Final checkpoint
282
610
  this.checkpoint('final');
283
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
611
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
612
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
613
+ // Success requires completing at least 70% of tasks (not just > 0)
614
+ const completionRatio = executionStats.totalTasks > 0
615
+ ? executionStats.completedTasks / executionStats.totalTasks
616
+ : 0;
617
+ const isSuccess = completionRatio >= 0.7;
618
+ const isPartialSuccess = !isSuccess && executionStats.completedTasks > 0;
284
619
  return {
285
- success: executionStats.completedTasks > 0,
620
+ success: isSuccess,
621
+ partialSuccess: isPartialSuccess || (!executionStats.completedTasks && hasArtifacts),
622
+ partialFailure: executionStats.failedTasks > 0,
286
623
  synthesisResult: synthesisResult ?? undefined,
624
+ artifactInventory: this.artifactInventory,
287
625
  summary: this.buildSummary(executionStats),
288
626
  tasks: this.taskQueue.getAllTasks(),
289
627
  stats: executionStats,
@@ -310,33 +648,234 @@ Rules:
310
648
  */
311
649
  async decompose(task) {
312
650
  try {
313
- const result = await this._decomposer.decompose(task);
651
+ const repoMap = this.config.codebaseContext?.getRepoMap() ?? undefined;
652
+ const result = await this._decomposer.decompose(task, {
653
+ repoMap,
654
+ });
314
655
  if (result.subtasks.length < 2) {
315
- // Too simple for swarm mode
316
- return null;
656
+ const reason = result.subtasks.length === 0
657
+ ? `Decomposition produced 0 subtasks (model: ${this.config.orchestratorModel}).`
658
+ : `Decomposition produced only ${result.subtasks.length} subtask — too few for swarm mode.`;
659
+ this.logDecision('decomposition', `Insufficient subtasks: ${result.subtasks.length}`, reason);
660
+ try {
661
+ const lastResortResult = await this.lastResortDecompose(task);
662
+ if (lastResortResult && lastResortResult.subtasks.length >= 2) {
663
+ this.logDecision('decomposition', `Last-resort decomposition succeeded: ${lastResortResult.subtasks.length} subtasks`, 'Recovered from insufficient primary decomposition');
664
+ return { result: lastResortResult };
665
+ }
666
+ }
667
+ catch (error) {
668
+ this.logDecision('decomposition', 'Last-resort decomposition failed after insufficient primary decomposition', error.message);
669
+ }
670
+ const fallback = this.buildEmergencyDecomposition(task, reason);
671
+ this.emit({
672
+ type: 'swarm.phase.progress',
673
+ phase: 'decomposing',
674
+ message: `Using emergency decomposition fallback (${this.classifyDecompositionFailure(reason)})`,
675
+ });
676
+ this.logDecision('decomposition', `Using emergency scaffold decomposition: ${fallback.subtasks.length} subtasks`, 'Swarm will continue with deterministic fallback tasks');
677
+ return { result: fallback };
317
678
  }
318
- // Reject heuristic fallback the generic 3-task chain is worse than aborting
679
+ // Non-LLM result means decomposer fell back to heuristic mode.
680
+ // Prefer a simplified LLM decomposition, but continue with heuristic fallback when needed.
319
681
  if (!result.metadata.llmAssisted) {
320
- this.logDecision('decomposition', 'Rejected heuristic fallback DAG', 'LLM decomposition failed after retries. Heuristic DAG is not useful.');
321
- return null;
682
+ this.logDecision('decomposition', 'Heuristic decomposition detected attempting last-resort simplified LLM decomposition', `Model: ${this.config.orchestratorModel}`);
683
+ try {
684
+ const lastResortResult = await this.lastResortDecompose(task);
685
+ if (lastResortResult && lastResortResult.subtasks.length >= 2) {
686
+ this.logDecision('decomposition', `Last-resort decomposition succeeded: ${lastResortResult.subtasks.length} subtasks`, 'Simplified prompt worked');
687
+ return { result: lastResortResult };
688
+ }
689
+ }
690
+ catch (error) {
691
+ this.logDecision('decomposition', 'Last-resort decomposition also failed', error.message);
692
+ }
693
+ this.logDecision('decomposition', `Continuing with heuristic decomposition: ${result.subtasks.length} subtasks`, 'Fallback is acceptable; do not abort swarm');
694
+ this.emit({
695
+ type: 'swarm.phase.progress',
696
+ phase: 'decomposing',
697
+ message: `Continuing with heuristic decomposition (${this.classifyDecompositionFailure('heuristic fallback')})`,
698
+ });
699
+ return { result };
322
700
  }
323
701
  // Flat-DAG detection: warn when all tasks land in wave 0 with no dependencies
324
702
  const hasAnyDependency = result.subtasks.some(s => s.dependencies.length > 0);
325
703
  if (!hasAnyDependency && result.subtasks.length >= 3) {
326
704
  this.logDecision('decomposition', `Flat DAG: ${result.subtasks.length} tasks, zero dependencies`, 'All tasks will execute in wave 0 without ordering');
327
705
  }
328
- return result;
706
+ return { result };
329
707
  }
330
708
  catch (error) {
709
+ const message = error.message;
331
710
  this.errors.push({
332
711
  phase: 'decomposition',
333
- message: error.message,
334
- recovered: false,
712
+ message,
713
+ recovered: true,
335
714
  });
336
- this.emit({ type: 'swarm.error', error: error.message, phase: 'decomposition' });
337
- return null;
715
+ const fallback = this.buildEmergencyDecomposition(task, `Decomposition threw an error: ${message}`);
716
+ this.emit({
717
+ type: 'swarm.phase.progress',
718
+ phase: 'decomposing',
719
+ message: `Decomposition fallback due to ${this.classifyDecompositionFailure(message)}`,
720
+ });
721
+ this.logDecision('decomposition', `Decomposition threw error; using emergency scaffold decomposition (${fallback.subtasks.length} subtasks)`, message);
722
+ return { result: fallback };
338
723
  }
339
724
  }
725
+ classifyDecompositionFailure(message) {
726
+ const m = message.toLowerCase();
727
+ if (m.includes('429') || m.includes('too many requests') || m.includes('rate limit')) {
728
+ return 'rate_limit';
729
+ }
730
+ if (m.includes('402') || m.includes('spend limit') || m.includes('key limit exceeded') || m.includes('insufficient credits')) {
731
+ return 'provider_budget_limit';
732
+ }
733
+ if (m.includes('parse') || m.includes('json') || m.includes('subtasks')) {
734
+ return 'parse_failure';
735
+ }
736
+ if (m.includes('invalid') || m.includes('validation')) {
737
+ return 'validation_failure';
738
+ }
739
+ return 'other';
740
+ }
741
+ /**
742
+ * Deterministic decomposition fallback when all LLM decomposition paths fail.
743
+ * Keeps swarm mode alive with visible scaffolding tasks instead of aborting.
744
+ */
745
+ buildEmergencyDecomposition(task, reason) {
746
+ const normalizer = createSmartDecomposer({ detectConflicts: true });
747
+ const taskLabel = task.trim().slice(0, 140) || 'requested task';
748
+ const repoMap = this.config.codebaseContext?.getRepoMap();
749
+ const topFiles = repoMap
750
+ ? Array.from(repoMap.chunks.values())
751
+ .sort((a, b) => b.importance - a.importance)
752
+ .slice(0, 10)
753
+ .map(c => c.filePath)
754
+ : [];
755
+ const subtasks = [
756
+ {
757
+ id: 'task-fb-0',
758
+ description: `Scaffold implementation plan and identify target files for: ${taskLabel}`,
759
+ status: 'ready',
760
+ dependencies: [],
761
+ complexity: 2,
762
+ type: 'design',
763
+ parallelizable: true,
764
+ relevantFiles: topFiles.slice(0, 5),
765
+ },
766
+ {
767
+ id: 'task-fb-1',
768
+ description: `Implement core code changes for: ${taskLabel}`,
769
+ status: 'blocked',
770
+ dependencies: ['task-fb-0'],
771
+ complexity: 5,
772
+ type: 'implement',
773
+ parallelizable: false,
774
+ relevantFiles: topFiles.slice(0, 8),
775
+ },
776
+ {
777
+ id: 'task-fb-2',
778
+ description: `Add or update tests and run validation for: ${taskLabel}`,
779
+ status: 'blocked',
780
+ dependencies: ['task-fb-1'],
781
+ complexity: 3,
782
+ type: 'test',
783
+ parallelizable: false,
784
+ relevantFiles: topFiles.slice(0, 8),
785
+ },
786
+ {
787
+ id: 'task-fb-3',
788
+ description: `Integrate results and produce final summary for: ${taskLabel}`,
789
+ status: 'blocked',
790
+ dependencies: ['task-fb-1', 'task-fb-2'],
791
+ complexity: 2,
792
+ type: 'integrate',
793
+ parallelizable: false,
794
+ relevantFiles: topFiles.slice(0, 5),
795
+ },
796
+ ];
797
+ const dependencyGraph = normalizer.buildDependencyGraph(subtasks);
798
+ const conflicts = normalizer.detectConflicts(subtasks);
799
+ return {
800
+ originalTask: task,
801
+ subtasks,
802
+ dependencyGraph,
803
+ conflicts,
804
+ strategy: 'adaptive',
805
+ totalComplexity: subtasks.reduce((sum, s) => sum + s.complexity, 0),
806
+ totalEstimatedTokens: subtasks.length * 4000,
807
+ metadata: {
808
+ decomposedAt: new Date(),
809
+ codebaseAware: !!repoMap,
810
+ llmAssisted: false,
811
+ },
812
+ };
813
+ }
814
+ /**
815
+ * Last-resort decomposition: radically simplified prompt that even weak models can handle.
816
+ * Uses shorter context, no examples, minimal schema, and lower maxTokens to avoid truncation.
817
+ */
818
+ async lastResortDecompose(task) {
819
+ // Include codebase grounding if repo map is available
820
+ let codebaseHint = '';
821
+ const repoMap = this.config.codebaseContext?.getRepoMap();
822
+ if (repoMap) {
823
+ const topFiles = Array.from(repoMap.chunks.values())
824
+ .sort((a, b) => b.importance - a.importance)
825
+ .slice(0, 10)
826
+ .map(c => c.filePath);
827
+ codebaseHint = `\nKey project files: ${topFiles.join(', ')}\nReference actual files in subtask descriptions.`;
828
+ }
829
+ const simplifiedPrompt = `Break this task into 2-6 subtasks. Return ONLY raw JSON, no markdown.
830
+
831
+ {"subtasks":[{"description":"...","type":"implement","complexity":3,"dependencies":[],"parallelizable":true,"relevantFiles":["src/..."]}],"strategy":"adaptive","reasoning":"..."}
832
+
833
+ Rules:
834
+ - dependencies: integer indices (e.g. [0] means depends on first subtask)
835
+ - type: one of research/implement/test/design/refactor/integrate/merge
836
+ - At least 2 subtasks${codebaseHint}`;
837
+ const response = await this.provider.chat([
838
+ { role: 'system', content: simplifiedPrompt },
839
+ { role: 'user', content: task },
840
+ ], {
841
+ model: this.config.orchestratorModel,
842
+ maxTokens: 4096, // Short — avoids truncation
843
+ temperature: 0.1, // Very deterministic
844
+ });
845
+ this.trackOrchestratorUsage(response, 'decompose-last-resort');
846
+ const parsed = parseDecompositionResponse(response.content);
847
+ if (parsed.subtasks.length < 2)
848
+ return null;
849
+ // Build a proper SmartDecompositionResult from the parsed LLM output
850
+ const decomposer = createSmartDecomposer({ detectConflicts: true });
851
+ const subtasks = parsed.subtasks.map((s, index) => ({
852
+ id: `task-lr-${index}`,
853
+ description: s.description,
854
+ status: (s.dependencies.length > 0 ? 'blocked' : 'ready'),
855
+ dependencies: s.dependencies.map((d) => `task-lr-${d}`),
856
+ complexity: s.complexity,
857
+ type: s.type,
858
+ parallelizable: s.parallelizable,
859
+ relevantFiles: s.relevantFiles,
860
+ suggestedRole: s.suggestedRole,
861
+ }));
862
+ const dependencyGraph = decomposer.buildDependencyGraph(subtasks);
863
+ const conflicts = decomposer.detectConflicts(subtasks);
864
+ return {
865
+ originalTask: task,
866
+ subtasks,
867
+ dependencyGraph,
868
+ conflicts,
869
+ strategy: parsed.strategy,
870
+ totalComplexity: subtasks.reduce((sum, t) => sum + t.complexity, 0),
871
+ totalEstimatedTokens: subtasks.length * 5000,
872
+ metadata: {
873
+ decomposedAt: new Date(),
874
+ codebaseAware: false,
875
+ llmAssisted: true, // This IS LLM-assisted, just simplified
876
+ },
877
+ };
878
+ }
340
879
  // ─── V2: Planning Phase ───────────────────────────────────────────────
341
880
  /**
342
881
  * Create acceptance criteria and integration test plan.
@@ -383,6 +922,7 @@ Respond with valid JSON:
383
922
  maxTokens: 3000,
384
923
  temperature: 0.3,
385
924
  });
925
+ this.trackOrchestratorUsage(response, 'plan');
386
926
  const parsed = this.parseJSON(response.content);
387
927
  if (parsed) {
388
928
  this.plan = {
@@ -454,6 +994,7 @@ Respond with valid JSON:
454
994
  },
455
995
  { role: 'user', content: `Review these wave ${waveIndex + 1} outputs:\n\n${taskSummaries}` },
456
996
  ], { model: reviewModel, maxTokens: 2000, temperature: 0.3 });
997
+ this.trackOrchestratorUsage(response, 'review');
457
998
  const parsed = this.parseJSON(response.content);
458
999
  if (!parsed)
459
1000
  return null;
@@ -578,6 +1119,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
578
1119
  },
579
1120
  { role: 'user', content: `Original task: ${task}\n\nFailed verifications:\n${failedSteps}` },
580
1121
  ], { model: this.config.plannerModel ?? this.config.orchestratorModel, maxTokens: 1500, temperature: 0.3 });
1122
+ this.trackOrchestratorUsage(response, 'verification-fixup');
581
1123
  const parsed = this.parseJSON(response.content);
582
1124
  if (parsed?.fixups && parsed.fixups.length > 0) {
583
1125
  const fixupTasks = parsed.fixups.map((f, i) => ({
@@ -628,6 +1170,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
628
1170
  this.logDecision('resume', `Resuming from wave ${checkpoint.currentWave}`, `Session: ${checkpoint.sessionId}`);
629
1171
  this.emit({ type: 'swarm.state.resume', sessionId: checkpoint.sessionId, fromWave: checkpoint.currentWave });
630
1172
  // Restore state
1173
+ if (checkpoint.originalPrompt)
1174
+ this.originalPrompt = checkpoint.originalPrompt;
631
1175
  if (checkpoint.plan)
632
1176
  this.plan = checkpoint.plan;
633
1177
  if (checkpoint.modelHealth.length > 0)
@@ -638,6 +1182,13 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
638
1182
  this.totalCost = checkpoint.stats.totalCost;
639
1183
  this.qualityRejections = checkpoint.stats.qualityRejections;
640
1184
  this.retries = checkpoint.stats.retries;
1185
+ // Restore shared context & economics state from checkpoint
1186
+ if (checkpoint.sharedContext) {
1187
+ this.sharedContextState.restoreFrom(checkpoint.sharedContext);
1188
+ }
1189
+ if (checkpoint.sharedEconomics) {
1190
+ this.sharedEconomicsState.restoreFrom(checkpoint.sharedEconomics);
1191
+ }
641
1192
  // Restore task queue
642
1193
  this.taskQueue.restoreFromCheckpoint({
643
1194
  taskStates: checkpoint.taskStates,
@@ -645,21 +1196,63 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
645
1196
  currentWave: checkpoint.currentWave,
646
1197
  });
647
1198
  // Reset orphaned dispatched tasks — their workers died with the previous process
648
- let resetCount = 0;
1199
+ const resetIds = this.taskQueue.reconcileStaleDispatched({
1200
+ staleAfterMs: 0,
1201
+ activeTaskIds: new Set(),
1202
+ });
1203
+ const resetCount = resetIds.length;
1204
+ for (const taskId of resetIds) {
1205
+ const task = this.taskQueue.getTask(taskId);
1206
+ if (!task)
1207
+ continue;
1208
+ // Preserve at least 1 retry attempt
1209
+ task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
1210
+ }
1211
+ if (resetCount > 0) {
1212
+ this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
1213
+ }
1214
+ // Reset skipped tasks whose dependencies are now satisfied
1215
+ let unskippedCount = 0;
1216
+ for (const task of this.taskQueue.getAllTasks()) {
1217
+ if (task.status === 'skipped') {
1218
+ const deps = task.dependencies.map(id => this.taskQueue.getTask(id));
1219
+ const allDepsSatisfied = deps.every(d => d && (d.status === 'completed' || d.status === 'decomposed'));
1220
+ if (allDepsSatisfied) {
1221
+ task.status = 'ready';
1222
+ task.attempts = 0;
1223
+ task.rescueContext = 'Recovered on resume — dependencies now satisfied';
1224
+ unskippedCount++;
1225
+ }
1226
+ }
1227
+ }
1228
+ // Also reset failed tasks that have retry budget
649
1229
  for (const task of this.taskQueue.getAllTasks()) {
650
- if (task.status === 'dispatched') {
1230
+ if (task.status === 'failed') {
651
1231
  task.status = 'ready';
652
- // Preserve at least 1 retry attempt
653
1232
  task.attempts = Math.min(task.attempts, Math.max(0, this.config.workerRetries - 1));
654
- resetCount++;
1233
+ unskippedCount++;
655
1234
  }
656
1235
  }
657
- if (resetCount > 0) {
658
- this.logDecision('resume', `Reset ${resetCount} orphaned dispatched tasks to ready`, 'Workers died with previous process');
1236
+ if (unskippedCount > 0) {
1237
+ this.logDecision('resume', `Recovered ${unskippedCount} skipped/failed tasks`, 'Fresh retry on resume');
1238
+ }
1239
+ // If many tasks are still stuck after un-skip, trigger re-plan
1240
+ const resumeStats = this.taskQueue.getStats();
1241
+ const stuckCount = resumeStats.failed + resumeStats.skipped;
1242
+ const totalAttempted = resumeStats.completed + stuckCount;
1243
+ if (totalAttempted > 0 && stuckCount / totalAttempted > 0.4) {
1244
+ this.logDecision('resume-replan', `${stuckCount}/${totalAttempted} tasks still stuck after resume — triggering re-plan`, '');
1245
+ this.hasReplanned = false; // Allow re-plan on resume
1246
+ await this.midSwarmReplan();
659
1247
  }
660
1248
  // Continue from where we left off
661
1249
  this.currentPhase = 'executing';
662
1250
  await this.executeWaves();
1251
+ // V10: Final rescue pass — attempt to recover cascade-skipped tasks with lenient mode
1252
+ if (!this.cancelled)
1253
+ await this.finalRescuePass();
1254
+ // Post-wave artifact audit
1255
+ this.artifactInventory = this.buildArtifactInventory();
663
1256
  // Continue with verification and synthesis as normal
664
1257
  if (this.config.enableVerification && this.plan?.integrationTestPlan) {
665
1258
  this.currentPhase = 'verifying';
@@ -673,10 +1266,20 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
673
1266
  this.currentPhase = 'completed';
674
1267
  const executionStats = this.buildStats();
675
1268
  this.checkpoint('final');
676
- this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors });
1269
+ const hasArtifacts = (this.artifactInventory?.totalFiles ?? 0) > 0;
1270
+ this.emit({ type: 'swarm.complete', stats: executionStats, errors: this.errors, artifactInventory: this.artifactInventory });
1271
+ // Success requires completing at least 70% of tasks (not just > 0)
1272
+ const completionRatio = executionStats.totalTasks > 0
1273
+ ? executionStats.completedTasks / executionStats.totalTasks
1274
+ : 0;
1275
+ const isSuccess = completionRatio >= 0.7;
1276
+ const isPartialSuccess = !isSuccess && executionStats.completedTasks > 0;
677
1277
  return {
678
- success: executionStats.completedTasks > 0,
1278
+ success: isSuccess,
1279
+ partialSuccess: isPartialSuccess || (!executionStats.completedTasks && hasArtifacts),
1280
+ partialFailure: executionStats.failedTasks > 0,
679
1281
  synthesisResult: synthesisResult ?? undefined,
1282
+ artifactInventory: this.artifactInventory,
680
1283
  summary: this.buildSummary(executionStats),
681
1284
  tasks: this.taskQueue.getAllTasks(),
682
1285
  stats: executionStats,
@@ -690,9 +1293,25 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
690
1293
  async executeWaves() {
691
1294
  let waveIndex = this.taskQueue.getCurrentWave();
692
1295
  const totalWaves = this.taskQueue.getTotalWaves();
1296
+ const dispatchLeaseStaleMs = this.config.dispatchLeaseStaleMs ?? 5 * 60 * 1000;
693
1297
  while (waveIndex < totalWaves && !this.cancelled) {
1298
+ const activeTaskIds = new Set(this.workerPool.getActiveWorkerStatus().map(w => w.taskId));
1299
+ const recovered = this.taskQueue.reconcileStaleDispatched({
1300
+ staleAfterMs: dispatchLeaseStaleMs,
1301
+ activeTaskIds,
1302
+ });
1303
+ if (recovered.length > 0) {
1304
+ this.logDecision('lease-recovery', `Recovered ${recovered.length} stale dispatched task(s)`, recovered.join(', '));
1305
+ }
694
1306
  const readyTasks = this.taskQueue.getReadyTasks();
695
1307
  const queueStats = this.taskQueue.getStats();
1308
+ // F18: Skip empty waves — if no tasks are ready and none are running,
1309
+ // remaining tasks are all blocked/failed/skipped. Break instead of
1310
+ // running useless review cycles.
1311
+ if (readyTasks.length === 0 && queueStats.running === 0 && queueStats.ready === 0) {
1312
+ this.logDecision('wave-skip', `Skipping waves ${waveIndex + 1}-${totalWaves}: no dispatchable tasks remain`, `Stats: ${queueStats.completed} completed, ${queueStats.failed} failed, ${queueStats.skipped} skipped`);
1313
+ break;
1314
+ }
696
1315
  this.emit({
697
1316
  type: 'swarm.wave.start',
698
1317
  wave: waveIndex + 1,
@@ -734,6 +1353,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
734
1353
  previousFeedback: 'All tasks in this batch failed. Try a fundamentally different approach — the previous strategy did not work.',
735
1354
  previousScore: 0,
736
1355
  attempt: task.attempts,
1356
+ previousModel: task.assignedModel,
1357
+ swarmProgress: this.getSwarmProgressSummary(),
737
1358
  };
738
1359
  }
739
1360
  this.logDecision('wave-recovery', `Re-queued ${failedWaveTasks.length} tasks with adapted retry context`, 'Budget allows retry');
@@ -741,21 +1362,46 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
741
1362
  await this.executeWave(failedWaveTasks.map(t => this.taskQueue.getTask(t.id)).filter(t => t.status === 'ready'));
742
1363
  }
743
1364
  }
1365
+ // F5: Adaptive re-decomposition — if < 50% of wave tasks succeeded,
1366
+ // the decomposition may be structurally flawed. Log for observability.
1367
+ // (Full re-decomposition of remaining work would require re-architecting the queue,
1368
+ // so we log the signal and let wave retry + fixup handle recovery.)
1369
+ const waveTotal = waveCompleted + waveFailed + waveSkipped;
1370
+ const waveSuccessRate = waveTotal > 0 ? waveCompleted / waveTotal : 0;
1371
+ if (waveSuccessRate < 0.5 && waveTotal >= 2) {
1372
+ this.logDecision('decomposition-quality', `Wave ${waveIndex + 1} success rate ${(waveSuccessRate * 100).toFixed(0)}% (${waveCompleted}/${waveTotal})`, 'Low success rate may indicate decomposition quality issues');
1373
+ }
744
1374
  // V2: Review wave outputs
745
1375
  const review = await this.reviewWave(waveIndex);
746
1376
  if (review && review.fixupTasks.length > 0) {
747
1377
  // Execute fix-up tasks immediately
748
1378
  await this.executeWave(review.fixupTasks);
749
1379
  }
1380
+ // Rescue cascade-skipped tasks that can still run
1381
+ // (after wave review + fixup, some skipped tasks may now be viable)
1382
+ const rescued = this.rescueCascadeSkipped();
1383
+ if (rescued.length > 0) {
1384
+ this.logDecision('cascade-rescue', `Rescued ${rescued.length} cascade-skipped tasks after wave ${waveIndex + 1}`, rescued.map(t => t.id).join(', '));
1385
+ await this.executeWave(rescued);
1386
+ }
750
1387
  // Reset quality circuit breaker at wave boundary — each wave gets a fresh chance.
751
1388
  // Within a wave, rejections accumulate properly so the breaker can trip.
752
1389
  // Between waves, we reset so each wave gets a fresh quality evaluation window.
753
1390
  // (The within-wave reset at quality-gate-passed is kept — that's correct.)
754
- if (this.qualityGateDisabled) {
755
- this.qualityGateDisabled = false;
756
- this.consecutiveQualityRejections = 0;
757
- this.logDecision('quality-circuit-breaker', `Re-enabled quality gates at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
1391
+ if (this.qualityGateDisabledModels.size > 0) {
1392
+ this.qualityGateDisabledModels.clear();
1393
+ this.perModelQualityRejections.clear();
1394
+ this.logDecision('quality-circuit-breaker', `Re-enabled quality gates for all models at wave ${waveIndex + 1} boundary`, 'Each wave gets a fresh quality evaluation window');
758
1395
  }
1396
+ // F3: Log budget reallocation after wave completion.
1397
+ // SharedBudgetPool already returns unused tokens via release(), but we log it
1398
+ // for observability so operators can see how budget flows between waves.
1399
+ const budgetStats = this.budgetPool.getStats();
1400
+ this.logDecision('budget-reallocation', `After wave ${waveIndex + 1}: ${budgetStats.tokensRemaining} tokens remaining (${(budgetStats.utilization * 100).toFixed(0)}% utilized)`, '');
1401
+ this.budgetPool.reallocateUnused(budgetStats.tokensRemaining);
1402
+ // F21: Mid-swarm situational assessment — evaluate success rate and budget health,
1403
+ // optionally triage low-priority tasks to conserve budget for critical path.
1404
+ await this.assessAndAdapt(waveIndex);
759
1405
  // V2: Checkpoint after each wave
760
1406
  this.checkpoint(`wave-${waveIndex}`);
761
1407
  // Advance to next wave
@@ -783,7 +1429,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
783
1429
  taskIndex++;
784
1430
  // Stagger dispatches to avoid rate limit storms
785
1431
  if (taskIndex < tasks.length && this.workerPool.availableSlots > 0) {
786
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1432
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
787
1433
  }
788
1434
  }
789
1435
  // Process completions and dispatch more tasks as slots open
@@ -804,7 +1450,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
804
1450
  await this.dispatchTask(task);
805
1451
  // Stagger dispatches to avoid rate limit storms
806
1452
  if (taskIndex + 1 < tasks.length && this.workerPool.availableSlots > 0) {
807
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1453
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
808
1454
  }
809
1455
  }
810
1456
  taskIndex++;
@@ -819,11 +1465,38 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
819
1465
  await this.dispatchTask(moreReady[i]);
820
1466
  // Stagger dispatches to avoid rate limit storms
821
1467
  if (i + 1 < moreReady.length && this.workerPool.availableSlots > 0) {
822
- await new Promise(resolve => setTimeout(resolve, this.config.dispatchStaggerMs ?? 500));
1468
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
823
1469
  }
824
1470
  }
825
1471
  }
826
1472
  }
1473
+ // F20: Re-dispatch pass — after all workers finish, budget may have been freed
1474
+ // by completed tasks. Try to dispatch any still-ready tasks (e.g., those paused
1475
+ // by budget exhaustion earlier).
1476
+ if (!this.cancelled && this.budgetPool.hasCapacity()) {
1477
+ const stillReady = this.taskQueue.getAllReadyTasks()
1478
+ .filter(t => !this.workerPool.getActiveWorkerStatus().some(w => w.taskId === t.id));
1479
+ if (stillReady.length > 0) {
1480
+ this.logDecision('budget-redispatch', `Budget freed after wave — re-dispatching ${stillReady.length} ready task(s)`, `Budget: ${JSON.stringify(this.budgetPool.getStats())}`);
1481
+ for (const task of stillReady) {
1482
+ if (this.workerPool.availableSlots <= 0 || !this.budgetPool.hasCapacity())
1483
+ break;
1484
+ await this.dispatchTask(task);
1485
+ if (this.workerPool.availableSlots > 0) {
1486
+ await new Promise(resolve => setTimeout(resolve, this.getStaggerMs()));
1487
+ }
1488
+ }
1489
+ // Wait for these re-dispatched tasks to complete
1490
+ while (this.workerPool.activeCount > 0 && !this.cancelled) {
1491
+ const completed = await this.workerPool.waitForAny();
1492
+ if (!completed)
1493
+ break;
1494
+ await this.handleTaskCompletion(completed.taskId, completed.result, completed.startedAt);
1495
+ this.emitBudgetUpdate();
1496
+ this.emitStatusUpdate();
1497
+ }
1498
+ }
1499
+ }
827
1500
  }
828
1501
  /**
829
1502
  * Dispatch a single task to a worker.
@@ -833,45 +1506,111 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
833
1506
  const worker = this.workerPool.selectWorker(task);
834
1507
  if (!worker) {
835
1508
  // M2: Emit error and mark task failed instead of silently returning
836
- this.taskQueue.markFailed(task.id, 0);
1509
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1510
+ this.logDecision('no-worker', `${task.id}: no worker for type ${task.type}`, '');
1511
+ if (task.attempts > 0) {
1512
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1513
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1514
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1515
+ return;
1516
+ }
1517
+ }
1518
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1519
+ this.taskQueue.triggerCascadeSkip(task.id);
837
1520
  this.emit({
838
1521
  type: 'swarm.task.failed',
839
1522
  taskId: task.id,
840
1523
  error: `No worker available for task type: ${task.type}`,
841
- attempt: 0,
1524
+ attempt: task.attempts,
842
1525
  maxAttempts: 0,
843
1526
  willRetry: false,
1527
+ failureMode: 'error',
844
1528
  });
845
1529
  return;
846
1530
  }
847
1531
  try {
848
- this.taskQueue.markDispatched(task.id, worker.model);
1532
+ // Pre-dispatch auto-split for critical-path bottlenecks
1533
+ if (this.shouldAutoSplit(task)) {
1534
+ try {
1535
+ const splitResult = await this.judgeSplit(task);
1536
+ if (splitResult.shouldSplit && splitResult.subtasks) {
1537
+ task.status = 'dispatched'; // Required for replaceWithSubtasks
1538
+ this.taskQueue.replaceWithSubtasks(task.id, splitResult.subtasks);
1539
+ this.emit({
1540
+ type: 'swarm.task.resilience',
1541
+ taskId: task.id,
1542
+ strategy: 'auto-split',
1543
+ succeeded: true,
1544
+ reason: `Pre-dispatch split into ${splitResult.subtasks.length} parallel subtasks`,
1545
+ artifactsFound: 0,
1546
+ toolCalls: 0,
1547
+ });
1548
+ return; // Subtasks now in queue, will be dispatched this wave
1549
+ }
1550
+ }
1551
+ catch (err) {
1552
+ this.logDecision('auto-split', `${task.id}: split judge failed — ${err.message}`, '');
1553
+ // Fall through to normal dispatch
1554
+ }
1555
+ }
1556
+ this.totalDispatches++;
1557
+ const dispatchedModel = task.assignedModel ?? worker.model;
1558
+ this.taskQueue.markDispatched(task.id, dispatchedModel);
1559
+ if (task.assignedModel && task.assignedModel !== worker.model) {
1560
+ this.logDecision('failover', `Dispatching ${task.id} with failover model ${task.assignedModel} (worker default: ${worker.model})`, 'Retry model override is active');
1561
+ }
849
1562
  // Pass the pre-selected worker to avoid double-selection in dispatch()
850
1563
  await this.workerPool.dispatch(task, worker);
851
1564
  this.emit({
852
1565
  type: 'swarm.task.dispatched',
853
1566
  taskId: task.id,
854
1567
  description: task.description,
855
- model: worker.model,
1568
+ model: dispatchedModel,
856
1569
  workerName: worker.name,
1570
+ toolCount: worker.allowedTools?.length ?? -1, // -1 = all tools
1571
+ tools: worker.allowedTools,
1572
+ retryContext: task.retryContext,
1573
+ fromModel: task.retryContext ? task.retryContext.previousModel : undefined,
1574
+ attempts: task.attempts,
857
1575
  });
858
1576
  }
859
1577
  catch (error) {
1578
+ const errorMsg = error.message;
1579
+ // F20: Budget exhaustion is NOT a task failure — the task is fine, we just ran out of money.
1580
+ // Reset status to ready so it can be picked up if budget becomes available
1581
+ // (e.g., after tokens are released from completing tasks).
1582
+ if (errorMsg.includes('Budget pool exhausted')) {
1583
+ task.status = 'ready';
1584
+ this.logDecision('budget-pause', `Cannot dispatch ${task.id}: budget exhausted — task kept ready for potential re-dispatch`, `Budget stats: ${JSON.stringify(this.budgetPool.getStats())}`);
1585
+ return;
1586
+ }
860
1587
  this.errors.push({
861
1588
  taskId: task.id,
862
1589
  phase: 'dispatch',
863
- message: error.message,
1590
+ message: errorMsg,
864
1591
  recovered: false,
865
1592
  });
1593
+ this.logDecision('dispatch-error', `${task.id}: dispatch failed: ${errorMsg.slice(0, 100)}`, `attempts: ${task.attempts}`);
1594
+ // V10: Try resilience recovery if task had previous attempts (prior worker may have produced artifacts)
1595
+ if (task.attempts > 0) {
1596
+ const syntheticTaskResult = { success: false, output: '', tokensUsed: 0, costUsed: 0, durationMs: 0, model: 'none' };
1597
+ const syntheticSpawn = { success: false, output: '', metrics: { tokens: 0, duration: 0, toolCalls: 0 } };
1598
+ if (await this.tryResilienceRecovery(task, task.id, syntheticTaskResult, syntheticSpawn)) {
1599
+ this.errors[this.errors.length - 1].recovered = true;
1600
+ return;
1601
+ }
1602
+ }
1603
+ this.taskQueue.markFailedWithoutCascade(task.id, 0);
1604
+ this.taskQueue.triggerCascadeSkip(task.id);
866
1605
  this.emit({
867
1606
  type: 'swarm.task.failed',
868
1607
  taskId: task.id,
869
- error: error.message,
1608
+ error: errorMsg,
870
1609
  attempt: task.attempts,
871
1610
  maxAttempts: 1 + this.config.workerRetries,
872
1611
  willRetry: false,
1612
+ failureMode: 'error',
873
1613
  });
874
- this.taskQueue.markFailed(task.id, 0);
875
1614
  }
876
1615
  }
877
1616
  /**
@@ -881,9 +1620,36 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
881
1620
  const task = this.taskQueue.getTask(taskId);
882
1621
  if (!task)
883
1622
  return;
884
- // Guard: task was cascade-skipped while its worker was running — ignore the result
885
- if (task.status === 'skipped' || task.status === 'failed')
1623
+ // Guard: task was terminally resolved while its worker was running — ignore the result
1624
+ // F4: But NOT if pendingCascadeSkip those results are evaluated below
1625
+ if ((task.status === 'skipped' || task.status === 'failed') && !task.pendingCascadeSkip)
886
1626
  return;
1627
+ // V7: Global dispatch cap — prevent any single task from burning budget.
1628
+ // Try resilience recovery (micro-decompose, degraded acceptance) before hard-failing.
1629
+ const maxDispatches = this.config.maxDispatchesPerTask ?? 5;
1630
+ if (task.attempts >= maxDispatches) {
1631
+ const durationMs = Date.now() - startedAt;
1632
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
1633
+ this.totalTokens += taskResult.tokensUsed;
1634
+ this.totalCost += taskResult.costUsed;
1635
+ // Try resilience recovery before hard fail
1636
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1637
+ return;
1638
+ }
1639
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1640
+ this.taskQueue.triggerCascadeSkip(taskId);
1641
+ this.emit({
1642
+ type: 'swarm.task.failed',
1643
+ taskId,
1644
+ error: `Dispatch cap reached (${maxDispatches} attempts)`,
1645
+ attempt: task.attempts,
1646
+ maxAttempts: maxDispatches,
1647
+ willRetry: false,
1648
+ failureMode: task.failureMode,
1649
+ });
1650
+ this.logDecision('dispatch-cap', `${taskId}: hard cap reached (${task.attempts}/${maxDispatches})`, 'No more retries — resilience recovery also failed');
1651
+ return;
1652
+ }
887
1653
  const durationMs = Date.now() - startedAt;
888
1654
  const taskResult = this.workerPool.toTaskResult(spawnResult, task, durationMs);
889
1655
  // Track model usage
@@ -895,21 +1661,101 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
895
1661
  this.modelUsage.set(model, usage);
896
1662
  this.totalTokens += taskResult.tokensUsed;
897
1663
  this.totalCost += taskResult.costUsed;
1664
+ // Log per-worker budget utilization for orchestrator visibility
1665
+ if (taskResult.budgetUtilization) {
1666
+ this.logDecision('budget-utilization', `${taskId}: token ${taskResult.budgetUtilization.tokenPercent}%, iter ${taskResult.budgetUtilization.iterationPercent}%`, `model=${model}, tokens=${taskResult.tokensUsed}, duration=${durationMs}ms`);
1667
+ }
1668
+ // V10: Emit per-attempt event for full decision traceability
1669
+ this.emit({
1670
+ type: 'swarm.task.attempt',
1671
+ taskId,
1672
+ attempt: task.attempts,
1673
+ model,
1674
+ success: spawnResult.success,
1675
+ durationMs,
1676
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
1677
+ failureMode: !spawnResult.success ? task.failureMode : undefined,
1678
+ qualityScore: taskResult.qualityScore,
1679
+ output: taskResult.output.slice(0, 500),
1680
+ });
898
1681
  if (!spawnResult.success) {
899
1682
  // V2: Record model health
900
- const errorMsg = spawnResult.output.toLowerCase();
901
- const is429 = errorMsg.includes('429') || errorMsg.includes('rate');
902
- const is402 = errorMsg.includes('402') || errorMsg.includes('spend limit');
903
- const errorType = is429 ? '429' : is402 ? '402' : 'error';
1683
+ const failure = classifySwarmFailure(spawnResult.output, spawnResult.metrics.toolCalls);
1684
+ const { failureClass, retryable, errorType, failureMode, reason } = failure;
1685
+ const isTimeout = failureMode === 'timeout';
1686
+ const isRateLimited = failureClass === 'rate_limited';
1687
+ const isSpendLimit = failureClass === 'provider_spend_limit';
1688
+ const isNonRetryable = !retryable;
904
1689
  this.healthTracker.recordFailure(model, errorType);
905
1690
  this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
906
- // Feed circuit breaker
907
- if (is429 || is402) {
1691
+ // P6: Tag failure mode for cascade threshold awareness
1692
+ task.failureMode = failureMode;
1693
+ // Feed circuit breaker only for retryable rate limiting
1694
+ if (isRateLimited) {
908
1695
  this.recordRateLimit();
909
1696
  }
910
- // V2: Model failover on rate limits
911
- if ((is429 || is402) && this.config.enableModelFailover) {
912
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1697
+ // F25a: Consecutive timeout tracking early-fail after N consecutive timeouts
1698
+ if (isTimeout) {
1699
+ const count = (this.taskTimeoutCounts.get(taskId) ?? 0) + 1;
1700
+ this.taskTimeoutCounts.set(taskId, count);
1701
+ const timeoutLimit = this.config.consecutiveTimeoutLimit ?? 3;
1702
+ this.logDecision('timeout-tracking', `${taskId}: consecutive timeout ${count}/${timeoutLimit}`, '');
1703
+ if (count >= timeoutLimit) {
1704
+ // F25b: Try model failover before giving up
1705
+ let failoverSucceeded = false;
1706
+ if (this.config.enableModelFailover) {
1707
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
1708
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1709
+ if (alternative) {
1710
+ this.emit({
1711
+ type: 'swarm.model.failover',
1712
+ taskId,
1713
+ fromModel: model,
1714
+ toModel: alternative.model,
1715
+ reason: 'consecutive-timeouts',
1716
+ });
1717
+ task.assignedModel = alternative.model;
1718
+ this.taskTimeoutCounts.set(taskId, 0); // Reset counter for new model
1719
+ this.logDecision('failover', `Timeout failover ${taskId}: ${model} → ${alternative.model}`, `${count} consecutive timeouts`);
1720
+ failoverSucceeded = true;
1721
+ }
1722
+ }
1723
+ if (!failoverSucceeded) {
1724
+ // No alternative model — try resilience recovery before hard fail.
1725
+ // Timeouts often produce artifacts (worker WAS working, just ran out of time).
1726
+ task.failureMode = 'timeout';
1727
+ const taskResult = this.workerPool.toTaskResult(spawnResult, task, Date.now() - startedAt);
1728
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1729
+ this.taskTimeoutCounts.delete(taskId);
1730
+ return;
1731
+ }
1732
+ this.taskQueue.markFailedWithoutCascade(taskId, 0);
1733
+ this.taskQueue.triggerCascadeSkip(taskId);
1734
+ this.emit({
1735
+ type: 'swarm.task.failed',
1736
+ taskId,
1737
+ error: `${count} consecutive timeouts — no alternative model available`,
1738
+ attempt: task.attempts,
1739
+ maxAttempts: maxDispatches,
1740
+ willRetry: false,
1741
+ failureMode: 'timeout',
1742
+ failureClass: 'timeout',
1743
+ retrySuppressed: true,
1744
+ retryReason: 'Consecutive timeout limit reached with no alternative model',
1745
+ });
1746
+ this.logDecision('timeout-early-fail', `${taskId}: ${count} consecutive timeouts, no alt model — resilience recovery also failed`, '');
1747
+ this.taskTimeoutCounts.delete(taskId);
1748
+ return;
1749
+ }
1750
+ }
1751
+ }
1752
+ else {
1753
+ // Non-timeout failure — reset the counter
1754
+ this.taskTimeoutCounts.delete(taskId);
1755
+ }
1756
+ // V2: Model failover on retryable rate limits
1757
+ if (isRateLimited && this.config.enableModelFailover) {
1758
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
913
1759
  const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
914
1760
  if (alternative) {
915
1761
  this.emit({
@@ -924,32 +1770,61 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
924
1770
  }
925
1771
  }
926
1772
  // V5/V7: Store error context so retry gets different prompt
927
- if (!(is429 || is402)) {
1773
+ if (!(isRateLimited || isSpendLimit)) {
928
1774
  // V7: Timeout-specific feedback — the worker WAS working, just ran out of time
929
- const isTimeout = spawnResult.metrics.toolCalls === -1;
930
1775
  const timeoutSeconds = isTimeout ? Math.round(durationMs / 1000) : 0;
931
1776
  task.retryContext = {
932
1777
  previousFeedback: isTimeout
933
1778
  ? `Previous attempt timed out after ${timeoutSeconds}s. You must complete this task more efficiently — work faster, use fewer tool calls, and produce your result sooner.`
934
- : spawnResult.output.slice(0, 500),
1779
+ : spawnResult.output.slice(0, 2000),
935
1780
  previousScore: 0,
936
1781
  attempt: task.attempts,
1782
+ previousModel: model,
1783
+ previousFiles: taskResult.filesModified,
1784
+ swarmProgress: this.getSwarmProgressSummary(),
937
1785
  };
1786
+ // Phase 3.1: Report failure to shared context engine for cross-worker learning
1787
+ this.sharedContextEngine.reportFailure(taskId, {
1788
+ action: task.description.slice(0, 200),
1789
+ error: spawnResult.output.slice(0, 500),
1790
+ });
1791
+ }
1792
+ // V7: Reset hollow streak on non-hollow failure (error is not a hollow completion)
1793
+ this.hollowStreak = 0;
1794
+ // Worker failed — use higher retry limit for rate limit errors.
1795
+ // V7: Fixup tasks get capped retries, foundation tasks get +1.
1796
+ const baseRetries = this.getEffectiveRetries(task);
1797
+ const retryLimit = isNonRetryable
1798
+ ? 0
1799
+ : isRateLimited
1800
+ ? Math.min(this.config.rateLimitRetries ?? 3, baseRetries + 1)
1801
+ : baseRetries;
1802
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, retryLimit);
1803
+ if (isNonRetryable) {
1804
+ this.logDecision('retry-suppressed', `${taskId}: ${failureClass}`, reason);
938
1805
  }
939
- // Worker failed — use higher retry limit for rate limit errors
940
- const retryLimit = (is429 || is402)
941
- ? (this.config.rateLimitRetries ?? 3)
942
- : this.config.workerRetries;
943
- const canRetry = this.taskQueue.markFailed(taskId, retryLimit);
944
1806
  if (canRetry) {
945
1807
  this.retries++;
946
1808
  // Non-blocking cooldown: set retryAfter timestamp instead of blocking
947
- if (is429 || is402) {
1809
+ if (isRateLimited) {
948
1810
  const baseDelay = this.config.retryBaseDelayMs ?? 5000;
949
1811
  const cooldownMs = Math.min(baseDelay * Math.pow(2, task.attempts - 1), 30000);
950
1812
  this.taskQueue.setRetryAfter(taskId, cooldownMs);
1813
+ this.logDecision('rate-limit-cooldown', `${taskId}: ${errorType} cooldown ${cooldownMs}ms, model ${model}`, '');
951
1814
  }
952
1815
  }
1816
+ else if (!isRateLimited) {
1817
+ // Resilience recovery for non-rate-limit errors (micro-decompose + degraded acceptance)
1818
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1819
+ return;
1820
+ }
1821
+ // Recovery failed — NOW trigger cascade
1822
+ this.taskQueue.triggerCascadeSkip(taskId);
1823
+ }
1824
+ else {
1825
+ // Rate-limit exhaustion — trigger cascade
1826
+ this.taskQueue.triggerCascadeSkip(taskId);
1827
+ }
953
1828
  this.emit({
954
1829
  type: 'swarm.task.failed',
955
1830
  taskId,
@@ -957,23 +1832,51 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
957
1832
  attempt: task.attempts,
958
1833
  maxAttempts: 1 + this.config.workerRetries,
959
1834
  willRetry: canRetry,
1835
+ toolCalls: spawnResult.metrics.toolCalls,
1836
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1837
+ failureMode: task.failureMode,
1838
+ failureClass,
1839
+ retrySuppressed: isNonRetryable,
1840
+ retryReason: reason,
960
1841
  });
961
1842
  return;
962
1843
  }
963
1844
  // V6: Hollow completion detection — workers that "succeed" without doing any work
964
1845
  // Must check BEFORE recording success, otherwise hollow completions inflate health scores
965
- if (isHollowCompletion(spawnResult)) {
966
- // Record health failure so hollow-prone models accumulate failure records
967
- // and eventually trigger failover via selectAlternativeModel
968
- this.healthTracker.recordFailure(model, 'error');
1846
+ if (isHollowCompletion(spawnResult, task.type, this.config)) {
1847
+ // F4: Hollow result + pendingCascadeSkip honor the skip immediately, no retry
1848
+ if (task.pendingCascadeSkip) {
1849
+ task.pendingCascadeSkip = undefined;
1850
+ task.status = 'skipped';
1851
+ this.totalHollows++;
1852
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (hollow completion)`, '');
1853
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: 'cascade skip honored — hollow completion' });
1854
+ return;
1855
+ }
1856
+ // P6: Tag failure mode for cascade threshold awareness
1857
+ task.failureMode = 'hollow';
1858
+ // Record hollow completion so hollow-prone models accumulate hollow-specific records
1859
+ // and get deprioritized by the model selector (also records generic failure internally)
1860
+ this.healthTracker.recordHollow(model);
1861
+ const admitsFailure = spawnResult.success && FAILURE_INDICATORS.some(f => (spawnResult.output ?? '').toLowerCase().includes(f));
969
1862
  task.retryContext = {
970
- previousFeedback: 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
1863
+ previousFeedback: admitsFailure
1864
+ ? 'Previous attempt reported success but admitted failure (e.g., "budget exhausted", "unable to complete"). You MUST execute tool calls and produce concrete output this time.'
1865
+ : 'Previous attempt produced no meaningful output. Try again with a concrete approach.',
971
1866
  previousScore: 1,
972
1867
  attempt: task.attempts,
1868
+ previousModel: model,
1869
+ previousFiles: taskResult.filesModified,
1870
+ swarmProgress: this.getSwarmProgressSummary(),
973
1871
  };
1872
+ // Phase 3.1: Report hollow completion to shared context engine
1873
+ this.sharedContextEngine.reportFailure(taskId, {
1874
+ action: task.description.slice(0, 200),
1875
+ error: 'Hollow completion: worker produced no meaningful output',
1876
+ });
974
1877
  // Model failover for hollow completions — same pattern as quality failover
975
1878
  if (this.config.enableModelFailover) {
976
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1879
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
977
1880
  const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
978
1881
  if (alternative) {
979
1882
  this.emit({
@@ -987,9 +1890,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
987
1890
  this.logDecision('failover', `Hollow failover ${taskId}: ${model} → ${alternative.model}`, 'Model produced hollow completion');
988
1891
  }
989
1892
  }
990
- const canRetry = this.taskQueue.markFailed(taskId, this.config.workerRetries);
991
- if (canRetry)
1893
+ const hollowRetries = this.getEffectiveRetries(task);
1894
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, hollowRetries);
1895
+ if (canRetry) {
992
1896
  this.retries++;
1897
+ }
1898
+ else {
1899
+ // Retries exhausted — try shared resilience recovery (micro-decompose, degraded acceptance)
1900
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
1901
+ return;
1902
+ }
1903
+ // Recovery failed — NOW trigger cascade
1904
+ this.taskQueue.triggerCascadeSkip(taskId);
1905
+ }
993
1906
  this.emit({
994
1907
  type: 'swarm.task.failed',
995
1908
  taskId,
@@ -997,21 +1910,83 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
997
1910
  attempt: task.attempts,
998
1911
  maxAttempts: 1 + this.config.workerRetries,
999
1912
  willRetry: canRetry,
1913
+ toolCalls: spawnResult.metrics.toolCalls,
1914
+ failoverModel: task.assignedModel !== model ? task.assignedModel : undefined,
1915
+ failureMode: 'hollow',
1000
1916
  });
1001
- this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls`, 'Marking as failed for retry');
1917
+ this.hollowStreak++;
1918
+ this.totalHollows++;
1919
+ this.logDecision('hollow-completion', `${taskId}: worker completed with 0 tool calls (streak: ${this.hollowStreak}, total hollows: ${this.totalHollows}/${this.totalDispatches})`, canRetry ? 'Marking as failed for retry' : 'Retries exhausted — hard fail');
1920
+ // B2: Hollow streak handling — only terminate if enableHollowTermination is explicitly on
1921
+ if (this.hollowStreak >= SwarmOrchestrator.HOLLOW_STREAK_THRESHOLD) {
1922
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
1923
+ const singleModel = uniqueModels.size === 1;
1924
+ const onlyModel = [...uniqueModels][0];
1925
+ const modelUnhealthy = singleModel && !this.healthTracker.getAllRecords().find(r => r.model === onlyModel)?.healthy;
1926
+ if (singleModel && modelUnhealthy) {
1927
+ if (this.config.enableHollowTermination) {
1928
+ this.logDecision('early-termination', `Terminating swarm: ${this.hollowStreak} consecutive hollow completions on sole model ${onlyModel}`, 'Single-model swarm with unhealthy model — enableHollowTermination is on');
1929
+ this.skipRemainingTasks(`Single-model hollow streak (${this.hollowStreak}x on ${onlyModel})`);
1930
+ }
1931
+ else {
1932
+ this.logDecision('stall-mode', `${this.hollowStreak} consecutive hollows on sole model ${onlyModel} — entering stall mode`, 'Will attempt model failover or simplified retry on next dispatch');
1933
+ // Reset streak to allow more attempts with adjusted strategy
1934
+ this.hollowStreak = 0;
1935
+ }
1936
+ }
1937
+ }
1938
+ // V7: Multi-model hollow ratio — warn but don't terminate unless opt-in
1939
+ const minDispatches = this.config.hollowTerminationMinDispatches ?? 8;
1940
+ const threshold = this.config.hollowTerminationRatio ?? 0.55;
1941
+ if (this.totalDispatches >= minDispatches) {
1942
+ const ratio = this.totalHollows / this.totalDispatches;
1943
+ if (ratio > threshold) {
1944
+ if (this.config.enableHollowTermination) {
1945
+ this.logDecision('early-termination', `Terminating swarm: hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, `Exceeds threshold ${(threshold * 100).toFixed(0)}% after ${minDispatches}+ dispatches — enableHollowTermination is on`);
1946
+ this.skipRemainingTasks(`Hollow ratio ${(ratio * 100).toFixed(0)}% — models cannot execute tasks`);
1947
+ }
1948
+ else if (!this.hollowRatioWarned) {
1949
+ this.hollowRatioWarned = true;
1950
+ this.logDecision('stall-warning', `Hollow ratio ${(ratio * 100).toFixed(0)}% (${this.totalHollows}/${this.totalDispatches})`, 'High hollow rate but continuing — tasks may still recover via resilience');
1951
+ }
1952
+ }
1953
+ }
1002
1954
  return;
1003
1955
  }
1956
+ // F4: Task had pendingCascadeSkip but produced non-hollow results.
1957
+ // Run pre-flight checks — if the output is good, accept it instead of skipping.
1958
+ if (task.pendingCascadeSkip) {
1959
+ const cachedReport = checkArtifacts(task);
1960
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedReport);
1961
+ if (preFlight && !preFlight.passed) {
1962
+ // Output is garbage — honor the cascade skip
1963
+ task.pendingCascadeSkip = undefined;
1964
+ task.status = 'skipped';
1965
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip honored (pre-flight failed: ${preFlight.feedback})`, '');
1966
+ this.emit({ type: 'swarm.task.skipped', taskId, reason: `cascade skip honored — output failed pre-flight: ${preFlight.feedback}` });
1967
+ return;
1968
+ }
1969
+ // Output is good — clear the flag and accept the result
1970
+ task.pendingCascadeSkip = undefined;
1971
+ task.status = 'dispatched'; // Reset so markCompleted works
1972
+ this.logDecision('cascade-skip', `${taskId}: pending cascade skip overridden — worker produced valid output`, '');
1973
+ }
1004
1974
  // Record model health on success (only for non-hollow completions)
1005
1975
  this.healthTracker.recordSuccess(model, durationMs);
1976
+ this.decreaseStagger(); // P7: Speed up on success
1006
1977
  // Run quality gate if enabled — skip under API pressure, skip if circuit breaker tripped,
1007
1978
  // and let the final attempt through without quality gate (so tasks produce *something*)
1979
+ // Foundation tasks get +1 retry to reduce cascade failure risk.
1980
+ const effectiveRetries = this.getEffectiveRetries(task);
1008
1981
  const recentRLCount = this.recentRateLimits.filter(t => t > Date.now() - 30_000).length;
1009
- const isLastAttempt = task.attempts >= (this.config.workerRetries + 1);
1982
+ const isLastAttempt = task.attempts >= (effectiveRetries + 1);
1010
1983
  const shouldRunQualityGate = this.config.qualityGates
1011
- && !this.qualityGateDisabled
1984
+ && !this.qualityGateDisabledModels.has(model)
1012
1985
  && !isLastAttempt
1013
1986
  && Date.now() >= this.circuitBreakerUntil
1014
1987
  && recentRLCount < 2;
1988
+ // C1: Pre-compute artifact report once — shared by quality gate and pre-flight checks
1989
+ const cachedArtifactReport = checkArtifacts(task);
1015
1990
  if (shouldRunQualityGate) {
1016
1991
  // V3: Judge role handles quality gates
1017
1992
  const judgeModel = this.config.hierarchy?.judge?.model
@@ -1021,57 +1996,336 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1021
1996
  persona: this.config.hierarchy?.judge?.persona,
1022
1997
  };
1023
1998
  this.emit({ type: 'swarm.role.action', role: 'judge', action: 'quality-gate', model: judgeModel, taskId });
1024
- const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, this.config.qualityThreshold ?? 3);
1999
+ // Extract file artifacts from worker output for quality gate visibility.
2000
+ // When workers create files via write_file/edit_file, the judge needs to see
2001
+ // the actual content — not just the worker's text claims about what was created.
2002
+ const fileArtifacts = this.extractFileArtifacts(task, taskResult);
2003
+ // Foundation tasks get a relaxed quality threshold (threshold - 1, min 2)
2004
+ // to reduce the chance of cascade-skipping the entire swarm.
2005
+ const baseThreshold = this.config.qualityThreshold ?? 3;
2006
+ const qualityThreshold = task.isFoundation ? Math.max(2, baseThreshold - 1) : baseThreshold;
2007
+ const quality = await evaluateWorkerOutput(this.provider, judgeModel, task, taskResult, judgeConfig, qualityThreshold, (resp, purpose) => this.trackOrchestratorUsage(resp, purpose), fileArtifacts, this.config, cachedArtifactReport);
1025
2008
  taskResult.qualityScore = quality.score;
1026
2009
  taskResult.qualityFeedback = quality.feedback;
1027
- if (!quality.passed) {
1028
- this.qualityRejections++;
1029
- this.consecutiveQualityRejections++;
1030
- // Quality circuit breaker: disable gates after too many consecutive rejections
1031
- if (this.consecutiveQualityRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
1032
- this.qualityGateDisabled = true;
1033
- this.logDecision('quality-circuit-breaker', `Disabled quality gates after ${this.consecutiveQualityRejections} consecutive rejections`, 'Workers cannot meet quality threshold — letting remaining tasks through');
2010
+ // F11: Foundation tasks that barely pass the relaxed threshold get concrete validation.
2011
+ // A 2/5 foundation task with truncated output will cascade-poison all dependents.
2012
+ if (quality.passed && task.isFoundation && quality.score <= baseThreshold - 1) {
2013
+ const concreteResult = runConcreteChecks(task, taskResult);
2014
+ if (!concreteResult.passed) {
2015
+ quality.passed = false;
2016
+ quality.feedback += ` [F11: foundation task barely passed (${quality.score}/${baseThreshold}) but concrete validation failed: ${concreteResult.issues.join('; ')}]`;
2017
+ this.logDecision('foundation-concrete-gate', `${taskId}: foundation task scored ${quality.score} (relaxed threshold ${qualityThreshold}) but concrete checks failed — rejecting`, concreteResult.issues.join('; '));
1034
2018
  }
1035
- // V5: Attach feedback so retry prompt includes it
1036
- task.retryContext = {
1037
- previousFeedback: quality.feedback,
1038
- previousScore: quality.score,
1039
- attempt: task.attempts,
1040
- };
1041
- // V5: Model failover on severe quality rejection — but NOT on artifact auto-fails
1042
- if (quality.score <= 1 && this.config.enableModelFailover && !quality.artifactAutoFail) {
1043
- const capability = SUBTASK_TO_CAPABILITY[task.type] ?? 'code';
1044
- const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
1045
- if (alternative) {
2019
+ }
2020
+ if (!quality.passed) {
2021
+ // F7: Gate error fallback — when LLM judge fails, use concrete validation
2022
+ // If concrete checks pass, tentatively accept the result instead of rejecting.
2023
+ if (quality.gateError && (this.config.enableConcreteValidation !== false)) {
2024
+ const concreteResult = runConcreteChecks(task, taskResult);
2025
+ if (concreteResult.passed) {
2026
+ // Concrete validation passed tentatively accept despite gate error
2027
+ this.logDecision('gate-error-fallback', `${taskId}: gate error but concrete checks passed — tentatively accepting`, quality.gateErrorMessage ?? 'unknown');
2028
+ taskResult.qualityScore = quality.score;
2029
+ taskResult.qualityFeedback = `${quality.feedback} [concrete validation passed — tentative accept]`;
2030
+ // Fall through to success path (don't return)
2031
+ }
2032
+ else {
2033
+ // Both gate and concrete failed — reject
2034
+ this.logDecision('gate-error-fallback', `${taskId}: gate error AND concrete checks failed — rejecting`, `Concrete issues: ${concreteResult.issues.join('; ')}`);
2035
+ // Fall through to normal rejection below
2036
+ }
2037
+ // If concrete passed, skip the rejection path
2038
+ if (concreteResult.passed) {
2039
+ this.perModelQualityRejections.delete(model);
2040
+ // Jump to success path below
2041
+ }
2042
+ else {
2043
+ // Proceed with normal rejection
2044
+ this.qualityRejections++;
2045
+ task.failureMode = 'quality';
2046
+ this.healthTracker.recordQualityRejection(model, quality.score);
2047
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
2048
+ this.hollowStreak = 0;
2049
+ task.retryContext = {
2050
+ previousFeedback: `Gate error + concrete validation failed: ${concreteResult.issues.join('; ')}`,
2051
+ previousScore: quality.score,
2052
+ attempt: task.attempts,
2053
+ previousModel: model,
2054
+ previousFiles: taskResult.filesModified,
2055
+ swarmProgress: this.getSwarmProgressSummary(),
2056
+ };
2057
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2058
+ if (canRetry) {
2059
+ this.retries++;
2060
+ }
2061
+ else {
2062
+ // Retries exhausted — try resilience recovery before cascade-skip
2063
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2064
+ return;
2065
+ }
2066
+ // Recovery failed — NOW trigger cascade
2067
+ this.taskQueue.triggerCascadeSkip(taskId);
2068
+ }
1046
2069
  this.emit({
1047
- type: 'swarm.model.failover',
2070
+ type: 'swarm.quality.rejected',
1048
2071
  taskId,
1049
- fromModel: model,
1050
- toModel: alternative.model,
1051
- reason: `quality-score-${quality.score}`,
2072
+ score: quality.score,
2073
+ feedback: quality.feedback,
2074
+ artifactCount: fileArtifacts.length,
2075
+ outputLength: taskResult.output.length,
2076
+ preFlightReject: false,
2077
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
1052
2078
  });
1053
- task.assignedModel = alternative.model;
1054
- this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
2079
+ return;
1055
2080
  }
1056
2081
  }
1057
- const canRetry = this.taskQueue.markFailed(taskId, this.config.workerRetries);
2082
+ else if (!quality.gateError) {
2083
+ // Normal quality rejection (LLM judge rejected, no gate error)
2084
+ this.qualityRejections++;
2085
+ // P6: Tag failure mode for cascade threshold awareness
2086
+ task.failureMode = 'quality';
2087
+ // P1: Quality rejections update model health — undo premature recordSuccess
2088
+ this.healthTracker.recordQualityRejection(model, quality.score);
2089
+ this.emit({ type: 'swarm.model.health', record: { model, ...this.getModelHealthSummary(model) } });
2090
+ // V7: Quality rejection is NOT hollow — worker did work, just poorly
2091
+ this.hollowStreak = 0;
2092
+ // F7: Per-model circuit breaker → "pre-flight only mode" instead of fully disabling gates.
2093
+ // After threshold rejections, skip LLM judge but keep pre-flight mandatory.
2094
+ if (!quality.preFlightReject) {
2095
+ const modelRejections = (this.perModelQualityRejections.get(model) ?? 0) + 1;
2096
+ this.perModelQualityRejections.set(model, modelRejections);
2097
+ if (modelRejections >= SwarmOrchestrator.QUALITY_CIRCUIT_BREAKER_THRESHOLD) {
2098
+ this.qualityGateDisabledModels.add(model);
2099
+ this.logDecision('quality-circuit-breaker', `Switched model ${model} to pre-flight-only mode after ${modelRejections} rejections`, 'Skipping LLM judge but keeping pre-flight checks mandatory');
2100
+ }
2101
+ }
2102
+ // V5: Attach feedback so retry prompt includes it
2103
+ task.retryContext = {
2104
+ previousFeedback: quality.feedback,
2105
+ previousScore: quality.score,
2106
+ attempt: task.attempts,
2107
+ previousModel: model,
2108
+ previousFiles: taskResult.filesModified,
2109
+ swarmProgress: this.getSwarmProgressSummary(),
2110
+ };
2111
+ // Phase 3.1: Report quality rejection to shared context engine
2112
+ this.sharedContextEngine.reportFailure(taskId, {
2113
+ action: task.description.slice(0, 200),
2114
+ error: `Quality gate rejection (score ${quality.score}): ${quality.feedback.slice(0, 300)}`,
2115
+ });
2116
+ // V5: Model failover on quality rejection — but NOT on artifact auto-fails
2117
+ // P1: Widened from score<=1 to score<threshold so failover triggers on any rejection
2118
+ if (quality.score < qualityThreshold && this.config.enableModelFailover && !quality.artifactAutoFail) {
2119
+ const capability = getTaskTypeConfig(task.type, this.config).capability ?? 'code';
2120
+ const alternative = selectAlternativeModel(this.config.workers, model, capability, this.healthTracker);
2121
+ if (alternative) {
2122
+ this.emit({
2123
+ type: 'swarm.model.failover',
2124
+ taskId,
2125
+ fromModel: model,
2126
+ toModel: alternative.model,
2127
+ reason: `quality-score-${quality.score}`,
2128
+ });
2129
+ task.assignedModel = alternative.model;
2130
+ this.logDecision('failover', `Quality failover ${taskId}: ${model} → ${alternative.model}`, `Score ${quality.score}/5`);
2131
+ }
2132
+ }
2133
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2134
+ if (canRetry) {
2135
+ this.retries++;
2136
+ }
2137
+ else {
2138
+ // Retries exhausted — try resilience recovery before cascade-skip
2139
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2140
+ return;
2141
+ }
2142
+ // Recovery failed — NOW trigger cascade
2143
+ this.taskQueue.triggerCascadeSkip(taskId);
2144
+ }
2145
+ // M1: Only emit quality.rejected (not duplicate task.failed)
2146
+ this.emit({
2147
+ type: 'swarm.quality.rejected',
2148
+ taskId,
2149
+ score: quality.score,
2150
+ feedback: quality.feedback,
2151
+ artifactCount: fileArtifacts.length,
2152
+ outputLength: taskResult.output.length,
2153
+ preFlightReject: quality.preFlightReject,
2154
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
2155
+ });
2156
+ return;
2157
+ }
2158
+ else {
2159
+ // gateError=true but concrete validation disabled — reject
2160
+ this.qualityRejections++;
2161
+ task.failureMode = 'quality';
2162
+ this.hollowStreak = 0;
2163
+ task.retryContext = {
2164
+ previousFeedback: quality.feedback,
2165
+ previousScore: quality.score,
2166
+ attempt: task.attempts,
2167
+ previousModel: model,
2168
+ previousFiles: taskResult.filesModified,
2169
+ swarmProgress: this.getSwarmProgressSummary(),
2170
+ };
2171
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2172
+ if (canRetry) {
2173
+ this.retries++;
2174
+ }
2175
+ else {
2176
+ // Retries exhausted — try resilience recovery before cascade-skip
2177
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2178
+ return;
2179
+ }
2180
+ // Recovery failed — NOW trigger cascade
2181
+ this.taskQueue.triggerCascadeSkip(taskId);
2182
+ }
2183
+ this.emit({
2184
+ type: 'swarm.quality.rejected',
2185
+ taskId,
2186
+ score: quality.score,
2187
+ feedback: quality.feedback,
2188
+ artifactCount: fileArtifacts.length,
2189
+ outputLength: taskResult.output.length,
2190
+ preFlightReject: false,
2191
+ filesOnDisk: checkArtifactsEnhanced(task, taskResult).files.filter(f => f.exists && f.sizeBytes > 0).length,
2192
+ });
2193
+ return;
2194
+ }
2195
+ }
2196
+ // Quality passed — reset per-model rejection counter
2197
+ this.perModelQualityRejections.delete(model);
2198
+ }
2199
+ // F7: When quality gate was skipped (last attempt, pre-flight-only mode, API pressure),
2200
+ // still run pre-flight + concrete checks so obviously broken outputs don't slip through.
2201
+ // C1: Use cached artifact report to avoid double filesystem scan.
2202
+ if (!shouldRunQualityGate && this.config.qualityGates) {
2203
+ const preFlight = runPreFlightChecks(task, taskResult, this.config, cachedArtifactReport);
2204
+ if (preFlight && !preFlight.passed) {
2205
+ taskResult.qualityScore = preFlight.score;
2206
+ taskResult.qualityFeedback = preFlight.feedback;
2207
+ this.qualityRejections++;
2208
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
1058
2209
  if (canRetry) {
1059
2210
  this.retries++;
1060
2211
  }
1061
- // M1: Only emit quality.rejected (not duplicate task.failed)
2212
+ else {
2213
+ // Retries exhausted — try resilience recovery before cascade-skip
2214
+ this.logDecision('preflight-reject', `${taskId}: pre-flight failed: ${preFlight.feedback}`, '');
2215
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2216
+ return;
2217
+ }
2218
+ // Recovery failed — NOW trigger cascade
2219
+ this.taskQueue.triggerCascadeSkip(taskId);
2220
+ }
1062
2221
  this.emit({
1063
2222
  type: 'swarm.quality.rejected',
1064
2223
  taskId,
1065
- score: quality.score,
1066
- feedback: quality.feedback,
2224
+ score: preFlight.score,
2225
+ feedback: preFlight.feedback,
2226
+ artifactCount: 0,
2227
+ outputLength: taskResult.output.length,
2228
+ preFlightReject: true,
1067
2229
  });
1068
2230
  return;
1069
2231
  }
1070
- // Quality passed reset consecutive rejection counter
1071
- this.consecutiveQualityRejections = 0;
2232
+ // F2: Run concrete validation when pre-flight passes but gate was skipped
2233
+ if (this.config.enableConcreteValidation !== false) {
2234
+ const concreteResult = runConcreteChecks(task, taskResult);
2235
+ if (!concreteResult.passed) {
2236
+ taskResult.qualityScore = 2;
2237
+ taskResult.qualityFeedback = `Concrete validation failed: ${concreteResult.issues.join('; ')}`;
2238
+ this.qualityRejections++;
2239
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2240
+ if (canRetry) {
2241
+ this.retries++;
2242
+ }
2243
+ else {
2244
+ // Retries exhausted — try resilience recovery before cascade-skip
2245
+ this.logDecision('concrete-reject', `${taskId}: concrete validation failed: ${concreteResult.issues.join('; ')}`, '');
2246
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2247
+ return;
2248
+ }
2249
+ // Recovery failed — NOW trigger cascade
2250
+ this.taskQueue.triggerCascadeSkip(taskId);
2251
+ }
2252
+ this.emit({
2253
+ type: 'swarm.quality.rejected',
2254
+ taskId,
2255
+ score: 2,
2256
+ feedback: taskResult.qualityFeedback,
2257
+ artifactCount: 0,
2258
+ outputLength: taskResult.output.length,
2259
+ preFlightReject: false,
2260
+ });
2261
+ return;
2262
+ }
2263
+ }
2264
+ }
2265
+ // Final completion guard: block "narrative success" for action tasks.
2266
+ const completionGuard = this.config.completionGuard ?? {};
2267
+ const rejectFutureIntentOutputs = completionGuard.rejectFutureIntentOutputs ?? true;
2268
+ const requireConcreteArtifactsForActionTasks = completionGuard.requireConcreteArtifactsForActionTasks ?? true;
2269
+ const typeConfig = getTaskTypeConfig(task.type, this.config);
2270
+ const artifactReport = checkArtifactsEnhanced(task, taskResult);
2271
+ const filesOnDisk = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length;
2272
+ const hasConcreteArtifacts = filesOnDisk > 0 || (taskResult.filesModified?.length ?? 0) > 0;
2273
+ const isActionTask = !!typeConfig.requiresToolCalls;
2274
+ if (rejectFutureIntentOutputs && hasFutureIntentLanguage(taskResult.output ?? '')) {
2275
+ taskResult.qualityScore = 1;
2276
+ taskResult.qualityFeedback = 'Completion rejected: output indicates pending, unexecuted work';
2277
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2278
+ if (canRetry) {
2279
+ this.retries++;
2280
+ }
2281
+ else {
2282
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2283
+ return;
2284
+ }
2285
+ this.taskQueue.triggerCascadeSkip(taskId);
2286
+ }
2287
+ this.emit({
2288
+ type: 'swarm.quality.rejected',
2289
+ taskId,
2290
+ score: 1,
2291
+ feedback: taskResult.qualityFeedback,
2292
+ artifactCount: filesOnDisk,
2293
+ outputLength: taskResult.output.length,
2294
+ preFlightReject: true,
2295
+ filesOnDisk,
2296
+ });
2297
+ return;
2298
+ }
2299
+ if (requireConcreteArtifactsForActionTasks && isActionTask && !hasConcreteArtifacts) {
2300
+ taskResult.qualityScore = 1;
2301
+ taskResult.qualityFeedback = 'Completion rejected: action task produced no concrete artifacts';
2302
+ const canRetry = this.taskQueue.markFailedWithoutCascade(taskId, effectiveRetries);
2303
+ if (canRetry) {
2304
+ this.retries++;
2305
+ }
2306
+ else {
2307
+ if (await this.tryResilienceRecovery(task, taskId, taskResult, spawnResult)) {
2308
+ return;
2309
+ }
2310
+ this.taskQueue.triggerCascadeSkip(taskId);
2311
+ }
2312
+ this.emit({
2313
+ type: 'swarm.quality.rejected',
2314
+ taskId,
2315
+ score: 1,
2316
+ feedback: taskResult.qualityFeedback,
2317
+ artifactCount: filesOnDisk,
2318
+ outputLength: taskResult.output.length,
2319
+ preFlightReject: true,
2320
+ filesOnDisk,
2321
+ });
2322
+ return;
1072
2323
  }
1073
2324
  // Task passed — mark completed
1074
2325
  this.taskQueue.markCompleted(taskId, taskResult);
2326
+ this.hollowStreak = 0;
2327
+ // F25: Clear timeout counter on success
2328
+ this.taskTimeoutCounts.delete(taskId);
1075
2329
  // H6: Post findings to blackboard with error handling
1076
2330
  if (this.blackboard && taskResult.findings) {
1077
2331
  try {
@@ -1117,7 +2371,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1117
2371
  const tasks = this.taskQueue.getAllTasks();
1118
2372
  const outputs = tasks
1119
2373
  .filter(t => t.status === 'completed')
1120
- .map(t => taskResultToAgentOutput(t))
2374
+ .map(t => taskResultToAgentOutput(t, this.config))
1121
2375
  .filter((o) => o !== null);
1122
2376
  if (outputs.length === 0)
1123
2377
  return null;
@@ -1147,11 +2401,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1147
2401
  activeWorkers: this.workerPool.getActiveWorkerStatus(),
1148
2402
  queue: stats,
1149
2403
  budget: {
1150
- tokensUsed: this.totalTokens,
2404
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
1151
2405
  tokensTotal: this.config.totalBudget,
1152
- costUsed: this.totalCost,
2406
+ costUsed: this.totalCost + this.orchestratorCost,
1153
2407
  costTotal: this.config.maxCost,
1154
2408
  },
2409
+ orchestrator: {
2410
+ tokens: this.orchestratorTokens,
2411
+ cost: this.orchestratorCost,
2412
+ calls: this.orchestratorCalls,
2413
+ model: this.config.orchestratorModel,
2414
+ },
1155
2415
  };
1156
2416
  }
1157
2417
  /**
@@ -1163,6 +2423,69 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1163
2423
  this.currentPhase = 'failed';
1164
2424
  await this.workerPool.cancelAll();
1165
2425
  }
2426
+ // ─── D3: Model Capability Probing ─────────────────────────────────────
2427
+ /**
2428
+ * D3/F23: Probe each unique model to verify it can make tool calls.
2429
+ * Models that fail the probe are marked unhealthy so they're skipped in dispatch.
2430
+ *
2431
+ * F23 fix: Uses chatWithTools() with actual tool definitions instead of
2432
+ * plain chat() which never included tools in the API request.
2433
+ */
2434
+ async probeModelCapability() {
2435
+ const uniqueModels = new Set(this.config.workers.map(w => w.model));
2436
+ this.emit({ type: 'swarm.phase.progress', phase: 'scheduling', message: `Probing ${uniqueModels.size} model(s) for tool-calling capability...` });
2437
+ // F23: Check if provider supports native tool calling
2438
+ const supportsTools = 'chatWithTools' in this.provider
2439
+ && typeof this.provider.chatWithTools === 'function';
2440
+ if (!supportsTools) {
2441
+ // Provider doesn't support chatWithTools — skip probe entirely.
2442
+ // Workers will rely on text-based tool parsing fallback.
2443
+ this.logDecision('model-probe', 'Provider does not support chatWithTools — skipping probe', '');
2444
+ return;
2445
+ }
2446
+ const providerWithTools = this.provider;
2447
+ const probeTools = [{
2448
+ type: 'function',
2449
+ function: {
2450
+ name: 'read_file',
2451
+ description: 'Read a file from disk',
2452
+ parameters: {
2453
+ type: 'object',
2454
+ properties: { path: { type: 'string', description: 'File path' } },
2455
+ required: ['path'],
2456
+ },
2457
+ },
2458
+ }];
2459
+ // F24: Configurable probe timeout — generous default for slow models/connections
2460
+ const probeTimeout = this.config.probeTimeoutMs ?? 60_000;
2461
+ for (const model of uniqueModels) {
2462
+ try {
2463
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Probe timeout (${probeTimeout}ms)`)), probeTimeout));
2464
+ const response = await Promise.race([
2465
+ providerWithTools.chatWithTools([
2466
+ { role: 'system', content: 'You are a test probe. Call the read_file tool with path "package.json".' },
2467
+ { role: 'user', content: 'Read package.json.' },
2468
+ ], { model, maxTokens: 200, temperature: 0, tools: probeTools, tool_choice: 'required' }),
2469
+ timeoutPromise,
2470
+ ]);
2471
+ const hasToolCall = (response.toolCalls?.length ?? 0) > 0;
2472
+ if (!hasToolCall) {
2473
+ // F19: Directly mark unhealthy — probe failure is definitive evidence
2474
+ this.healthTracker.markUnhealthy(model);
2475
+ this.logDecision('model-probe', `Model ${model} failed probe (no tool calls)`, 'Marked unhealthy');
2476
+ }
2477
+ else {
2478
+ this.healthTracker.recordSuccess(model, 0);
2479
+ this.logDecision('model-probe', `Model ${model} passed probe`, '');
2480
+ }
2481
+ }
2482
+ catch {
2483
+ // F19: Directly mark unhealthy on probe error (includes timeout)
2484
+ this.healthTracker.markUnhealthy(model);
2485
+ this.logDecision('model-probe', `Model ${model} probe errored`, 'Marked unhealthy');
2486
+ }
2487
+ }
2488
+ }
1166
2489
  // ─── Circuit Breaker ────────────────────────────────────────────────
1167
2490
  /**
1168
2491
  * Record a rate limit hit and check if the circuit breaker should trip.
@@ -1170,6 +2493,7 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1170
2493
  recordRateLimit() {
1171
2494
  const now = Date.now();
1172
2495
  this.recentRateLimits.push(now);
2496
+ this.increaseStagger(); // P7: Back off on rate limits
1173
2497
  // Prune entries older than the window
1174
2498
  const cutoff = now - SwarmOrchestrator.CIRCUIT_BREAKER_WINDOW_MS;
1175
2499
  this.recentRateLimits = this.recentRateLimits.filter(t => t > cutoff);
@@ -1197,6 +2521,19 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1197
2521
  }
1198
2522
  return false;
1199
2523
  }
2524
+ // ─── P7: Adaptive Stagger ────────────────────────────────────────────
2525
+ /** P7: Get current stagger delay (adapts based on rate limit / success signals). */
2526
+ getStaggerMs() {
2527
+ return this.adaptiveStaggerMs;
2528
+ }
2529
+ /** P7: Increase stagger on rate limit (×1.5, capped at 10s). */
2530
+ increaseStagger() {
2531
+ this.adaptiveStaggerMs = Math.min(this.adaptiveStaggerMs * 1.5, 10_000);
2532
+ }
2533
+ /** P7: Decrease stagger on success (×0.9, floor at 200ms). */
2534
+ decreaseStagger() {
2535
+ this.adaptiveStaggerMs = Math.max(this.adaptiveStaggerMs * 0.9, 200);
2536
+ }
1200
2537
  // ─── V2: Decision Logging ─────────────────────────────────────────────
1201
2538
  logDecision(phase, decision, reasoning) {
1202
2539
  const entry = {
@@ -1223,14 +2560,17 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1223
2560
  waves: queueState.waves,
1224
2561
  currentWave: queueState.currentWave,
1225
2562
  stats: {
1226
- totalTokens: this.totalTokens,
1227
- totalCost: this.totalCost,
2563
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2564
+ totalCost: this.totalCost + this.orchestratorCost,
1228
2565
  qualityRejections: this.qualityRejections,
1229
2566
  retries: this.retries,
1230
2567
  },
1231
2568
  modelHealth: this.healthTracker.getAllRecords(),
1232
2569
  decisions: this.orchestratorDecisions,
1233
2570
  errors: this.errors,
2571
+ originalPrompt: this.originalPrompt,
2572
+ sharedContext: this.sharedContextState.toJSON(),
2573
+ sharedEconomics: this.sharedEconomicsState.toJSON(),
1234
2574
  });
1235
2575
  this.emit({
1236
2576
  type: 'swarm.state.checkpoint',
@@ -1250,9 +2590,9 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1250
2590
  emitBudgetUpdate() {
1251
2591
  this.emit({
1252
2592
  type: 'swarm.budget.update',
1253
- tokensUsed: this.totalTokens,
2593
+ tokensUsed: this.totalTokens + this.orchestratorTokens,
1254
2594
  tokensTotal: this.config.totalBudget,
1255
- costUsed: this.totalCost,
2595
+ costUsed: this.totalCost + this.orchestratorCost,
1256
2596
  costTotal: this.config.maxCost,
1257
2597
  });
1258
2598
  }
@@ -1267,8 +2607,8 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1267
2607
  failedTasks: queueStats.failed,
1268
2608
  skippedTasks: queueStats.skipped,
1269
2609
  totalWaves: this.taskQueue.getTotalWaves(),
1270
- totalTokens: this.totalTokens,
1271
- totalCost: this.totalCost,
2610
+ totalTokens: this.totalTokens + this.orchestratorTokens,
2611
+ totalCost: this.totalCost + this.orchestratorCost,
1272
2612
  totalDurationMs: Date.now() - this.startTime,
1273
2613
  qualityRejections: this.qualityRejections,
1274
2614
  retries: this.retries,
@@ -1293,6 +2633,16 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1293
2633
  if (this.verificationResult) {
1294
2634
  parts.push(` Verification: ${this.verificationResult.passed ? 'PASSED' : 'FAILED'}`);
1295
2635
  }
2636
+ // Artifact inventory: show what files actually exist on disk regardless of task status
2637
+ if (this.artifactInventory && this.artifactInventory.totalFiles > 0) {
2638
+ parts.push(` Files on disk: ${this.artifactInventory.totalFiles} files (${(this.artifactInventory.totalBytes / 1024).toFixed(1)}KB)`);
2639
+ for (const f of this.artifactInventory.files.slice(0, 15)) {
2640
+ parts.push(` ${f.path}: ${f.sizeBytes}B`);
2641
+ }
2642
+ if (this.artifactInventory.files.length > 15) {
2643
+ parts.push(` ... and ${this.artifactInventory.files.length - 15} more`);
2644
+ }
2645
+ }
1296
2646
  return parts.join('\n');
1297
2647
  }
1298
2648
  buildErrorResult(message) {
@@ -1319,6 +2669,703 @@ Respond with JSON: { "fixups": [{ "description": "what to fix", "type": "impleme
1319
2669
  return null;
1320
2670
  }
1321
2671
  }
2672
+ /**
2673
+ * Detect foundation tasks: tasks that are a dependency of 2+ downstream tasks.
2674
+ * These are critical single-points-of-failure — mark them for extra resilience.
2675
+ */
2676
+ detectFoundationTasks() {
2677
+ const allTasks = this.taskQueue.getAllTasks();
2678
+ const dependentCounts = new Map();
2679
+ for (const task of allTasks) {
2680
+ for (const depId of task.dependencies) {
2681
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2682
+ }
2683
+ }
2684
+ for (const task of allTasks) {
2685
+ const dependentCount = dependentCounts.get(task.id) ?? 0;
2686
+ if (dependentCount >= 2) {
2687
+ task.isFoundation = true;
2688
+ this.logDecision('scheduling', `Foundation task: ${task.id} (${dependentCount} dependents)`, 'Extra retries and relaxed quality threshold applied');
2689
+ }
2690
+ }
2691
+ }
2692
+ /**
2693
+ * Extract file artifacts from a worker's output for quality gate visibility.
2694
+ * Reads actual file content from disk so the judge can verify real work,
2695
+ * not just text claims about what was created.
2696
+ */
2697
+ extractFileArtifacts(task, taskResult) {
2698
+ const artifacts = [];
2699
+ const seen = new Set();
2700
+ // Collect file paths from multiple sources
2701
+ const candidatePaths = [];
2702
+ // 1. filesModified from structured closure report
2703
+ if (taskResult.filesModified) {
2704
+ candidatePaths.push(...taskResult.filesModified);
2705
+ }
2706
+ // 2. targetFiles from task definition
2707
+ if (task.targetFiles) {
2708
+ candidatePaths.push(...task.targetFiles);
2709
+ }
2710
+ // 3. Extract file paths mentioned in worker output (e.g., "Created src/foo.ts")
2711
+ const filePathPattern = /(?:created|wrote|modified|edited|updated)\s+["`']?([^\s"`',]+\.\w+)/gi;
2712
+ let match;
2713
+ while ((match = filePathPattern.exec(taskResult.output)) !== null) {
2714
+ candidatePaths.push(match[1]);
2715
+ }
2716
+ // Resolve against the target project directory, not CWD
2717
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2718
+ // Read previews from disk
2719
+ for (const filePath of candidatePaths) {
2720
+ if (seen.has(filePath))
2721
+ continue;
2722
+ seen.add(filePath);
2723
+ try {
2724
+ const resolved = path.resolve(baseDir, filePath);
2725
+ if (fs.existsSync(resolved)) {
2726
+ const content = fs.readFileSync(resolved, 'utf-8');
2727
+ if (content.length > 0) {
2728
+ artifacts.push({ path: filePath, preview: content.slice(0, 2000) });
2729
+ }
2730
+ }
2731
+ }
2732
+ catch {
2733
+ // Skip unreadable files
2734
+ }
2735
+ // Limit to 10 files to keep prompt size reasonable
2736
+ if (artifacts.length >= 10)
2737
+ break;
2738
+ }
2739
+ return artifacts;
2740
+ }
2741
+ /**
2742
+ * Build an inventory of filesystem artifacts produced during swarm execution.
2743
+ * Scans all tasks' targetFiles and readFiles to check what actually exists on disk.
2744
+ * This reveals work done by workers even when tasks "failed" (timeout, quality gate, etc.).
2745
+ */
2746
+ buildArtifactInventory() {
2747
+ const allFiles = new Set();
2748
+ for (const task of this.taskQueue.getAllTasks()) {
2749
+ for (const f of (task.targetFiles ?? []))
2750
+ allFiles.add(f);
2751
+ for (const f of (task.readFiles ?? []))
2752
+ allFiles.add(f);
2753
+ }
2754
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
2755
+ const artifacts = [];
2756
+ for (const filePath of allFiles) {
2757
+ try {
2758
+ const resolved = path.resolve(baseDir, filePath);
2759
+ if (fs.existsSync(resolved)) {
2760
+ const stats = fs.statSync(resolved);
2761
+ if (stats.isFile() && stats.size > 0) {
2762
+ artifacts.push({ path: filePath, sizeBytes: stats.size, exists: true });
2763
+ }
2764
+ }
2765
+ }
2766
+ catch { /* skip unreadable files */ }
2767
+ }
2768
+ return {
2769
+ files: artifacts,
2770
+ totalFiles: artifacts.length,
2771
+ totalBytes: artifacts.reduce((s, a) => s + a.sizeBytes, 0),
2772
+ };
2773
+ }
2774
+ /**
2775
+ * Skip all remaining pending/ready tasks (used for early termination).
2776
+ */
2777
+ skipRemainingTasks(reason) {
2778
+ for (const task of this.taskQueue.getAllTasks()) {
2779
+ if (task.status === 'pending' || task.status === 'ready') {
2780
+ task.status = 'skipped';
2781
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id, reason });
2782
+ }
2783
+ }
2784
+ }
2785
+ /**
2786
+ * F21: Mid-swarm situational assessment after each wave.
2787
+ * Evaluates success rate and budget health, triages low-priority tasks when budget is tight.
2788
+ * Also detects stalled progress and triggers mid-swarm re-planning.
2789
+ */
2790
+ async assessAndAdapt(waveIndex) {
2791
+ const stats = this.taskQueue.getStats();
2792
+ const budgetStats = this.budgetPool.getStats();
2793
+ // 1. Calculate success rate for this swarm run
2794
+ const successRate = stats.completed / Math.max(1, stats.completed + stats.failed + stats.skipped);
2795
+ // 2. Budget efficiency: tokens spent per completed task
2796
+ const tokensPerTask = stats.completed > 0
2797
+ ? (this.totalTokens / stats.completed)
2798
+ : Infinity;
2799
+ // 3. Remaining budget vs remaining tasks
2800
+ const remainingTasks = stats.total - stats.completed - stats.failed - stats.skipped;
2801
+ const estimatedTokensNeeded = remainingTasks * tokensPerTask;
2802
+ const budgetSufficient = budgetStats.tokensRemaining > estimatedTokensNeeded * 0.5;
2803
+ // Log the assessment for observability
2804
+ this.logDecision('mid-swarm-assessment', `After wave ${waveIndex + 1}: ${stats.completed}/${stats.total} completed (${(successRate * 100).toFixed(0)}%), ` +
2805
+ `${remainingTasks} remaining, ${budgetStats.tokensRemaining} tokens left`, budgetSufficient ? 'Budget looks sufficient' : 'Budget may be insufficient for remaining tasks');
2806
+ // 4. If budget is tight, prioritize: skip low-value remaining tasks
2807
+ // Only triage if we have actual data (at least one completion to estimate from)
2808
+ if (!budgetSufficient && remainingTasks > 1 && stats.completed > 0) {
2809
+ // Prefer pausing over skipping: if workers are still running, wait for budget release
2810
+ const runningCount = stats.running ?? 0;
2811
+ if (runningCount > 0) {
2812
+ this.logDecision('budget-wait', 'Budget tight but workers still running — waiting for budget release', `${runningCount} workers active, ${budgetStats.tokensRemaining} tokens remaining`);
2813
+ return;
2814
+ }
2815
+ const expendableTasks = this.findExpendableTasks();
2816
+ // Hard cap: never skip more than 20% of remaining tasks in one triage pass
2817
+ const maxSkips = Math.max(1, Math.floor(remainingTasks * 0.2));
2818
+ if (expendableTasks.length > 0) {
2819
+ let currentEstimate = estimatedTokensNeeded;
2820
+ let skipped = 0;
2821
+ for (const task of expendableTasks) {
2822
+ if (skipped >= maxSkips)
2823
+ break;
2824
+ // Stop trimming once we're within budget
2825
+ if (currentEstimate * 0.7 <= budgetStats.tokensRemaining)
2826
+ break;
2827
+ task.status = 'skipped';
2828
+ skipped++;
2829
+ this.emit({ type: 'swarm.task.skipped', taskId: task.id,
2830
+ reason: 'Budget conservation: skipping low-priority task to protect critical path' });
2831
+ this.logDecision('budget-triage', `Skipping ${task.id} (${task.type}, complexity ${task.complexity}) to conserve budget`, `${remainingTasks} tasks remain, ${budgetStats.tokensRemaining} tokens`);
2832
+ currentEstimate -= tokensPerTask;
2833
+ }
2834
+ }
2835
+ }
2836
+ // 5. Stall detection: if progress ratio is too low, trigger re-plan
2837
+ const attemptedTasks = stats.completed + stats.failed + stats.skipped;
2838
+ if (attemptedTasks >= 5) {
2839
+ const progressRatio = stats.completed / Math.max(1, attemptedTasks);
2840
+ if (progressRatio < 0.4) {
2841
+ this.logDecision('stall-detected', `Progress stalled: ${stats.completed}/${attemptedTasks} tasks succeeded (${(progressRatio * 100).toFixed(0)}%)`, 'Triggering mid-swarm re-plan');
2842
+ this.emit({
2843
+ type: 'swarm.stall',
2844
+ progressRatio,
2845
+ attempted: attemptedTasks,
2846
+ completed: stats.completed,
2847
+ });
2848
+ await this.midSwarmReplan();
2849
+ }
2850
+ }
2851
+ }
2852
+ /**
2853
+ * F21: Find expendable tasks — leaf tasks (no dependents) with lowest complexity.
2854
+ * These are the safest to skip when budget is tight.
2855
+ * Only tasks with complexity <= 2 are considered expendable.
2856
+ */
2857
+ findExpendableTasks() {
2858
+ const allTasks = this.taskQueue.getAllTasks();
2859
+ // Build reverse dependency map: which tasks depend on each task?
2860
+ const dependentCounts = new Map();
2861
+ for (const task of allTasks) {
2862
+ for (const depId of task.dependencies) {
2863
+ dependentCounts.set(depId, (dependentCounts.get(depId) ?? 0) + 1);
2864
+ }
2865
+ }
2866
+ // Expendable = pending/ready, never attempted, no dependents, not foundation,
2867
+ // complexity <= 2 (simple leaf tasks only), lowest complexity first
2868
+ return allTasks
2869
+ .filter(t => (t.status === 'pending' || t.status === 'ready') &&
2870
+ t.attempts === 0 &&
2871
+ !t.isFoundation &&
2872
+ (t.complexity ?? 5) <= 2 &&
2873
+ (dependentCounts.get(t.id) ?? 0) === 0)
2874
+ .sort((a, b) => (a.complexity ?? 5) - (b.complexity ?? 5));
2875
+ }
2876
+ /**
2877
+ * Mid-swarm re-planning: when progress stalls, ask LLM to re-plan remaining work.
2878
+ * Creates simpler replacement tasks for stuck/failed work, building on what's already done.
2879
+ * Only triggers once per swarm execution to avoid infinite re-planning loops.
2880
+ */
2881
+ async midSwarmReplan() {
2882
+ if (this.hasReplanned)
2883
+ return;
2884
+ this.hasReplanned = true;
2885
+ const allTasks = this.taskQueue.getAllTasks();
2886
+ const completed = allTasks.filter(t => t.status === 'completed' || t.status === 'decomposed');
2887
+ const stuck = allTasks.filter(t => t.status === 'failed' || t.status === 'skipped');
2888
+ if (stuck.length === 0)
2889
+ return;
2890
+ const completedSummary = completed.map(t => `- ${t.description} [${t.type}] → completed${t.degraded ? ' (degraded)' : ''}`).join('\n') || '(none)';
2891
+ const stuckSummary = stuck.map(t => `- ${t.description} [${t.type}] → ${t.status} (${t.failureMode ?? 'unknown'})`).join('\n');
2892
+ const artifactInventory = this.buildArtifactInventory();
2893
+ const artifactSummary = artifactInventory.files.map(f => `- ${f.path} (${f.sizeBytes}B)`).join('\n') || '(none)';
2894
+ const replanPrompt = `The swarm is stalled. Here's the situation:
2895
+
2896
+ COMPLETED WORK:
2897
+ ${completedSummary}
2898
+
2899
+ FILES ON DISK:
2900
+ ${artifactSummary}
2901
+
2902
+ STUCK TASKS (failed or skipped):
2903
+ ${stuckSummary}
2904
+
2905
+ Re-plan the remaining work. Create new subtasks that:
2906
+ 1. Build on what's already completed (don't redo work)
2907
+ 2. Are more focused in scope (but assign realistic complexity for the work involved — don't underestimate)
2908
+ 3. Can succeed independently (minimize dependencies)
2909
+
2910
+ Return JSON: { "subtasks": [{ "description": "...", "type": "implement|test|research|review|document|refactor", "complexity": 1-5, "dependencies": [], "relevantFiles": [] }] }
2911
+ Return ONLY the JSON object, no other text.`;
2912
+ try {
2913
+ const response = await this.provider.chat([{ role: 'user', content: replanPrompt }]);
2914
+ this.trackOrchestratorUsage(response, 'mid-swarm-replan');
2915
+ const content = response.content ?? '';
2916
+ const jsonMatch = content.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
2917
+ if (!jsonMatch) {
2918
+ this.logDecision('replan-failed', 'LLM produced no parseable re-plan JSON', content.slice(0, 200));
2919
+ return;
2920
+ }
2921
+ const parsed = JSON.parse(jsonMatch[0]);
2922
+ if (!parsed.subtasks || parsed.subtasks.length === 0) {
2923
+ this.logDecision('replan-failed', 'LLM produced empty subtask list', '');
2924
+ return;
2925
+ }
2926
+ // Add new tasks from re-plan into current wave
2927
+ const newTasks = this.taskQueue.addReplanTasks(parsed.subtasks, this.taskQueue.getCurrentWave());
2928
+ this.logDecision('replan-success', `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`, newTasks.map(t => t.description).join('; '));
2929
+ this.emit({
2930
+ type: 'swarm.replan',
2931
+ stuckCount: stuck.length,
2932
+ newTaskCount: newTasks.length,
2933
+ });
2934
+ this.emit({
2935
+ type: 'swarm.orchestrator.decision',
2936
+ decision: {
2937
+ timestamp: Date.now(),
2938
+ phase: 'replan',
2939
+ decision: `Re-planned ${stuck.length} stuck tasks into ${newTasks.length} new tasks`,
2940
+ reasoning: newTasks.map(t => `${t.id}: ${t.description}`).join('; '),
2941
+ },
2942
+ });
2943
+ }
2944
+ catch (error) {
2945
+ this.logDecision('replan-failed', `Re-plan LLM call failed: ${error.message}`, '');
2946
+ }
2947
+ }
2948
+ /**
2949
+ * Rescue cascade-skipped tasks that can still run.
2950
+ * After cascade-skip fires, assess whether skipped tasks can still be attempted:
2951
+ * - If all OTHER dependencies completed and the failed dep's artifacts exist on disk → un-skip
2952
+ * - If the task has no strict data dependency on the failed task (different file targets) → un-skip with warning
2953
+ */
2954
+ rescueCascadeSkipped(lenient = false) {
2955
+ const skippedTasks = this.taskQueue.getSkippedTasks();
2956
+ const rescued = [];
2957
+ for (const task of skippedTasks) {
2958
+ if (task.dependencies.length === 0)
2959
+ continue;
2960
+ let completedDeps = 0;
2961
+ let failedDepsWithArtifacts = 0;
2962
+ let failedDepsWithoutArtifacts = 0;
2963
+ let skippedDepsBlockedBySkipped = 0;
2964
+ let totalDeps = 0;
2965
+ const failedDepDescriptions = [];
2966
+ for (const depId of task.dependencies) {
2967
+ const dep = this.taskQueue.getTask(depId);
2968
+ if (!dep)
2969
+ continue;
2970
+ totalDeps++;
2971
+ if (dep.status === 'completed' || dep.status === 'decomposed') {
2972
+ completedDeps++;
2973
+ }
2974
+ else if (dep.status === 'failed' || dep.status === 'skipped') {
2975
+ // V10: In lenient mode, use checkArtifactsEnhanced for broader detection
2976
+ const artifactReport = lenient ? checkArtifactsEnhanced(dep) : checkArtifacts(dep);
2977
+ if (artifactReport && artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length > 0) {
2978
+ failedDepsWithArtifacts++;
2979
+ failedDepDescriptions.push(`${dep.description} (failed but ${artifactReport.files.filter(f => f.exists && f.sizeBytes > 0).length} artifacts exist)`);
2980
+ }
2981
+ else {
2982
+ // Check if this dep's target files exist on disk (may have been created by earlier attempt)
2983
+ const targetFiles = dep.targetFiles ?? [];
2984
+ const existingFiles = targetFiles.filter(f => {
2985
+ try {
2986
+ const resolved = path.resolve(this.config.facts?.workingDirectory ?? process.cwd(), f);
2987
+ return fs.statSync(resolved).size > 0;
2988
+ }
2989
+ catch {
2990
+ return false;
2991
+ }
2992
+ });
2993
+ if (existingFiles.length > 0) {
2994
+ failedDepsWithArtifacts++;
2995
+ failedDepDescriptions.push(`${dep.description} (failed but ${existingFiles.length}/${targetFiles.length} target files exist)`);
2996
+ }
2997
+ else {
2998
+ // Check if skipped task's targets don't overlap with the failed dep's targets
2999
+ const taskTargets = new Set(task.targetFiles ?? []);
3000
+ const depTargets = new Set(dep.targetFiles ?? []);
3001
+ const hasOverlap = [...taskTargets].some(f => depTargets.has(f));
3002
+ if (!hasOverlap && taskTargets.size > 0) {
3003
+ // Different file targets — task probably doesn't need the failed dep's output
3004
+ failedDepsWithArtifacts++;
3005
+ failedDepDescriptions.push(`${dep.description} (failed, no file overlap — likely independent)`);
3006
+ }
3007
+ else if (lenient && dep.status === 'skipped') {
3008
+ // V10: In lenient mode, count skipped-by-skipped deps separately
3009
+ // (transitive cascade — the dep itself was a victim, not truly broken)
3010
+ skippedDepsBlockedBySkipped++;
3011
+ failedDepDescriptions.push(`${dep.description} (skipped — transitive cascade victim)`);
3012
+ }
3013
+ else {
3014
+ failedDepsWithoutArtifacts++;
3015
+ }
3016
+ }
3017
+ }
3018
+ }
3019
+ }
3020
+ // Rescue condition:
3021
+ // Normal: all failed deps have artifacts or are independent, AND at least some deps completed
3022
+ // Lenient: tolerate up to 1 truly-missing dep, and count transitive cascade victims as recoverable
3023
+ const effectiveWithout = failedDepsWithoutArtifacts;
3024
+ const maxMissing = lenient ? 1 : 0;
3025
+ const hasEnoughContext = lenient ? (completedDeps + failedDepsWithArtifacts + skippedDepsBlockedBySkipped > 0) : (completedDeps > 0);
3026
+ if (totalDeps > 0 && effectiveWithout <= maxMissing && hasEnoughContext) {
3027
+ const rescueContext = `Rescued from cascade-skip${lenient ? ' (lenient)' : ''}: ${completedDeps}/${totalDeps} deps completed, ` +
3028
+ `${failedDepsWithArtifacts} failed deps have artifacts${skippedDepsBlockedBySkipped > 0 ? `, ${skippedDepsBlockedBySkipped} transitive cascade victims` : ''}. ${failedDepDescriptions.join('; ')}`;
3029
+ this.taskQueue.rescueTask(task.id, rescueContext);
3030
+ rescued.push(task);
3031
+ this.logDecision('cascade-rescue', `${task.id}: rescued from cascade-skip${lenient ? ' (lenient)' : ''}`, rescueContext);
3032
+ }
3033
+ }
3034
+ return rescued;
3035
+ }
3036
+ /**
3037
+ * Final rescue pass — runs after executeWaves() finishes.
3038
+ * Uses lenient mode to rescue cascade-skipped tasks that have partial context.
3039
+ * Re-dispatches rescued tasks in a final wave.
3040
+ */
3041
+ async finalRescuePass() {
3042
+ const skipped = this.taskQueue.getSkippedTasks();
3043
+ if (skipped.length === 0)
3044
+ return;
3045
+ this.logDecision('final-rescue', `${skipped.length} skipped tasks — running final rescue pass`, '');
3046
+ const rescued = this.rescueCascadeSkipped(true); // lenient=true
3047
+ if (rescued.length > 0) {
3048
+ this.logDecision('final-rescue', `Rescued ${rescued.length} tasks`, rescued.map(t => t.id).join(', '));
3049
+ await this.executeWave(rescued);
3050
+ }
3051
+ }
3052
+ /**
3053
+ * Try resilience recovery strategies before hard-failing a task.
3054
+ * Called from dispatch-cap, timeout, hollow, and error paths to avoid bypassing resilience.
3055
+ *
3056
+ * Strategies (in order):
3057
+ * 1. Micro-decomposition — break complex failing tasks into subtasks
3058
+ * 2. Degraded acceptance — accept partial work if artifacts exist on disk
3059
+ *
3060
+ * Returns true if recovery succeeded (caller should return), false if hard-fail should proceed.
3061
+ */
3062
+ async tryResilienceRecovery(task, taskId, taskResult, spawnResult) {
3063
+ // Strategy 1: Micro-decompose complex tasks into smaller subtasks
3064
+ // V10: Lowered threshold from >= 6 to >= 4 so moderately complex tasks can be recovered
3065
+ if ((task.complexity ?? 0) >= 4 && task.attempts >= 2 && this.budgetPool.hasCapacity()) {
3066
+ const subtasks = await this.microDecompose(task);
3067
+ if (subtasks && subtasks.length >= 2) {
3068
+ // Reset task status so replaceWithSubtasks can mark it as decomposed
3069
+ task.status = 'dispatched';
3070
+ this.taskQueue.replaceWithSubtasks(taskId, subtasks);
3071
+ this.logDecision('micro-decompose', `${taskId}: decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
3072
+ this.emit({
3073
+ type: 'swarm.task.failed',
3074
+ taskId,
3075
+ error: `Micro-decomposed into ${subtasks.length} subtasks`,
3076
+ attempt: task.attempts,
3077
+ maxAttempts: this.config.maxDispatchesPerTask ?? 5,
3078
+ willRetry: false,
3079
+ toolCalls: spawnResult.metrics.toolCalls,
3080
+ failureMode: task.failureMode,
3081
+ });
3082
+ this.emit({
3083
+ type: 'swarm.task.resilience',
3084
+ taskId,
3085
+ strategy: 'micro-decompose',
3086
+ succeeded: true,
3087
+ reason: `Decomposed into ${subtasks.length} subtasks after ${task.attempts} failures`,
3088
+ artifactsFound: 0,
3089
+ toolCalls: spawnResult.metrics.toolCalls ?? 0,
3090
+ });
3091
+ return true;
3092
+ }
3093
+ // Micro-decompose was attempted but didn't produce usable subtasks
3094
+ if ((task.complexity ?? 0) < 4) {
3095
+ this.logDecision('resilience-skip', `${taskId}: skipped micro-decompose — complexity ${task.complexity} < 4`, '');
3096
+ }
3097
+ }
3098
+ // Strategy 2: Degraded acceptance — check if any attempt produced files on disk.
3099
+ // V10: Use checkArtifactsEnhanced for broader detection (filesModified, closureReport, output)
3100
+ const artifactReport = checkArtifactsEnhanced(task, taskResult);
3101
+ const existingArtifacts = artifactReport.files.filter(f => f.exists && f.sizeBytes > 0);
3102
+ const hasArtifacts = existingArtifacts.length > 0;
3103
+ // V10: Fix timeout detection — toolCalls=-1 means timeout (worker WAS working)
3104
+ const toolCalls = spawnResult.metrics.toolCalls ?? 0;
3105
+ const hadToolCalls = toolCalls > 0 || toolCalls === -1
3106
+ || (taskResult.filesModified && taskResult.filesModified.length > 0);
3107
+ const isNarrativeOnly = hasFutureIntentLanguage(taskResult.output ?? '');
3108
+ const typeConfig = getTaskTypeConfig(task.type, this.config);
3109
+ const actionTaskNeedsArtifacts = (this.config.completionGuard?.requireConcreteArtifactsForActionTasks ?? true)
3110
+ && !!typeConfig.requiresToolCalls;
3111
+ const allowDegradedWithoutArtifacts = !actionTaskNeedsArtifacts && hadToolCalls && !isNarrativeOnly;
3112
+ if (hasArtifacts || allowDegradedWithoutArtifacts) {
3113
+ // Accept with degraded flag — prevents cascade-skip of dependents
3114
+ taskResult.success = true;
3115
+ taskResult.degraded = true;
3116
+ taskResult.qualityScore = 2; // Capped at low quality
3117
+ taskResult.qualityFeedback = 'Degraded acceptance: retries exhausted but filesystem artifacts exist';
3118
+ task.degraded = true;
3119
+ // Reset status so markCompleted works (markFailed may have set it to 'failed')
3120
+ task.status = 'dispatched';
3121
+ this.taskQueue.markCompleted(taskId, taskResult);
3122
+ this.hollowStreak = 0;
3123
+ this.logDecision('degraded-acceptance', `${taskId}: accepted as degraded — ${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`, 'Prevents cascade-skip of dependent tasks');
3124
+ this.emit({
3125
+ type: 'swarm.task.completed',
3126
+ taskId,
3127
+ success: true,
3128
+ tokensUsed: taskResult.tokensUsed,
3129
+ costUsed: taskResult.costUsed,
3130
+ durationMs: taskResult.durationMs,
3131
+ qualityScore: 2,
3132
+ qualityFeedback: 'Degraded acceptance',
3133
+ output: taskResult.output,
3134
+ toolCalls: spawnResult.metrics.toolCalls,
3135
+ });
3136
+ this.emit({
3137
+ type: 'swarm.task.resilience',
3138
+ taskId,
3139
+ strategy: 'degraded-acceptance',
3140
+ succeeded: true,
3141
+ reason: `${existingArtifacts.length} artifacts on disk, ${toolCalls} tool calls`,
3142
+ artifactsFound: existingArtifacts.length,
3143
+ toolCalls,
3144
+ });
3145
+ return true;
3146
+ }
3147
+ // Both strategies failed — log exhaustion for traceability
3148
+ this.logDecision('resilience-exhausted', `${taskId}: no recovery — artifacts: ${existingArtifacts.length}, toolCalls: ${toolCalls}, filesModified: ${taskResult.filesModified?.length ?? 0}`, '');
3149
+ this.emit({
3150
+ type: 'swarm.task.resilience',
3151
+ taskId,
3152
+ strategy: 'none',
3153
+ succeeded: false,
3154
+ reason: `No artifacts found, toolCalls=${toolCalls}, filesModified=${taskResult.filesModified?.length ?? 0}`,
3155
+ artifactsFound: existingArtifacts.length,
3156
+ toolCalls,
3157
+ });
3158
+ return false;
3159
+ }
3160
+ /**
3161
+ * Micro-decompose a complex task into 2-3 smaller subtasks using the LLM.
3162
+ * Called when a complex task (complexity >= 6) fails 2+ times with the same failure mode.
3163
+ * Returns null if decomposition doesn't make sense or LLM can't produce valid subtasks.
3164
+ */
3165
+ async microDecompose(task) {
3166
+ if ((task.complexity ?? 0) < 4)
3167
+ return null;
3168
+ try {
3169
+ const prompt = `Task "${task.description}" failed ${task.attempts} times on model ${task.assignedModel ?? 'unknown'}.
3170
+ The task has complexity ${task.complexity}/10 and type "${task.type}".
3171
+ ${task.targetFiles?.length ? `Target files: ${task.targetFiles.join(', ')}` : ''}
3172
+
3173
+ Break this task into 2-3 smaller, independent subtasks that each handle a portion of the work.
3174
+ Each subtask MUST be simpler (complexity <= ${Math.ceil(task.complexity / 2)}).
3175
+ Each subtask should be self-contained and produce concrete file changes.
3176
+
3177
+ Return JSON ONLY (no markdown, no explanation):
3178
+ {
3179
+ "subtasks": [
3180
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number> }
3181
+ ]
3182
+ }`;
3183
+ const response = await this.provider.chat([
3184
+ { role: 'system', content: 'You are a task decomposition assistant. Return only valid JSON.' },
3185
+ { role: 'user', content: prompt },
3186
+ ], {
3187
+ model: this.config.orchestratorModel,
3188
+ maxTokens: 2000,
3189
+ temperature: 0.3,
3190
+ });
3191
+ this.trackOrchestratorUsage(response, 'micro-decompose');
3192
+ // Parse response — handle markdown code blocks
3193
+ let jsonStr = response.content.trim();
3194
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
3195
+ if (codeBlockMatch)
3196
+ jsonStr = codeBlockMatch[1].trim();
3197
+ const parsed = JSON.parse(jsonStr);
3198
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
3199
+ return null;
3200
+ }
3201
+ const subtasks = parsed.subtasks.map((sub, idx) => ({
3202
+ id: `${task.id}-sub${idx + 1}`,
3203
+ description: sub.description,
3204
+ type: sub.type ?? task.type,
3205
+ dependencies: [], // Will be set by replaceWithSubtasks
3206
+ status: 'ready',
3207
+ complexity: Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1),
3208
+ wave: task.wave,
3209
+ targetFiles: sub.targetFiles ?? [],
3210
+ readFiles: task.readFiles,
3211
+ attempts: 0,
3212
+ }));
3213
+ return subtasks;
3214
+ }
3215
+ catch (error) {
3216
+ this.logDecision('micro-decompose', `${task.id}: micro-decomposition failed — ${error.message}`, 'Falling through to normal failure path');
3217
+ return null;
3218
+ }
3219
+ }
3220
+ // ─── Pre-Dispatch Auto-Split ──────────────────────────────────────────────
3221
+ /**
3222
+ * Heuristic pre-filter: should this task be considered for auto-split?
3223
+ * Cheap check — no LLM call. Returns true if all conditions are met.
3224
+ */
3225
+ shouldAutoSplit(task) {
3226
+ const cfg = this.config.autoSplit;
3227
+ if (cfg?.enabled === false)
3228
+ return false;
3229
+ const floor = cfg?.complexityFloor ?? 6;
3230
+ const splittable = cfg?.splittableTypes ?? ['implement', 'refactor', 'test'];
3231
+ // Only first attempts — retries use micro-decompose
3232
+ if (task.attempts > 0)
3233
+ return false;
3234
+ // Complexity check
3235
+ if ((task.complexity ?? 0) < floor)
3236
+ return false;
3237
+ // Type check
3238
+ if (!splittable.includes(task.type))
3239
+ return false;
3240
+ // Must be on critical path (foundation task)
3241
+ if (!task.isFoundation)
3242
+ return false;
3243
+ // Budget capacity check
3244
+ if (!this.budgetPool.hasCapacity())
3245
+ return false;
3246
+ return true;
3247
+ }
3248
+ /**
3249
+ * LLM judge call: ask the orchestrator model whether and how to split a task.
3250
+ * Returns { shouldSplit: false } or { shouldSplit: true, subtasks: [...] }.
3251
+ */
3252
+ async judgeSplit(task) {
3253
+ const maxSubs = this.config.autoSplit?.maxSubtasks ?? 4;
3254
+ const prompt = `You are evaluating whether a task should be split into parallel subtasks before dispatch.
3255
+
3256
+ TASK: "${task.description}"
3257
+ TYPE: ${task.type}
3258
+ COMPLEXITY: ${task.complexity}/10
3259
+ TARGET FILES: ${task.targetFiles?.join(', ') || 'none specified'}
3260
+ DOWNSTREAM DEPENDENTS: This is a foundation task — other tasks are waiting on it.
3261
+
3262
+ Should this task be split into 2-${maxSubs} parallel subtasks that different workers can execute simultaneously?
3263
+
3264
+ SPLIT if:
3265
+ - The task involves multiple independent pieces of work (e.g., different files, different functions, different concerns)
3266
+ - Parallel execution would meaningfully reduce wall-clock time
3267
+ - The subtasks can produce useful output independently
3268
+
3269
+ DO NOT SPLIT if:
3270
+ - The work is conceptually atomic (one function, one algorithm, tightly coupled logic)
3271
+ - The subtasks would need to coordinate on the same files/functions
3272
+ - Splitting would add more overhead than it saves
3273
+
3274
+ Return JSON ONLY:
3275
+ {
3276
+ "shouldSplit": true/false,
3277
+ "reason": "brief explanation",
3278
+ "subtasks": [
3279
+ { "description": "...", "type": "${task.type}", "targetFiles": ["..."], "complexity": <number 1-10> }
3280
+ ]
3281
+ }
3282
+ If shouldSplit is false, omit subtasks.`;
3283
+ const response = await this.provider.chat([
3284
+ { role: 'system', content: 'You are a task planning judge. Return only valid JSON.' },
3285
+ { role: 'user', content: prompt },
3286
+ ], {
3287
+ model: this.config.orchestratorModel,
3288
+ maxTokens: 1500,
3289
+ temperature: 0.2,
3290
+ });
3291
+ this.trackOrchestratorUsage(response, 'auto-split-judge');
3292
+ // Parse response — reuse markdown code block stripping from microDecompose
3293
+ let jsonStr = response.content.trim();
3294
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
3295
+ if (codeBlockMatch)
3296
+ jsonStr = codeBlockMatch[1].trim();
3297
+ const parsed = JSON.parse(jsonStr);
3298
+ if (!parsed.shouldSplit) {
3299
+ this.logDecision('auto-split', `${task.id}: judge says no split — ${parsed.reason}`, '');
3300
+ return { shouldSplit: false };
3301
+ }
3302
+ if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length < 2) {
3303
+ return { shouldSplit: false };
3304
+ }
3305
+ // Build SwarmTask[] from judge output (same pattern as microDecompose)
3306
+ const subtasks = parsed.subtasks.slice(0, maxSubs).map((sub, idx) => ({
3307
+ id: `${task.id}-split${idx + 1}`,
3308
+ description: sub.description,
3309
+ type: sub.type ?? task.type,
3310
+ dependencies: [],
3311
+ status: 'ready',
3312
+ complexity: Math.max(3, Math.min(sub.complexity ?? Math.ceil(task.complexity / 2), task.complexity - 1)),
3313
+ wave: task.wave,
3314
+ targetFiles: sub.targetFiles ?? [],
3315
+ readFiles: task.readFiles,
3316
+ attempts: 0,
3317
+ rescueContext: `Auto-split from ${task.id} (original complexity ${task.complexity})`,
3318
+ }));
3319
+ this.logDecision('auto-split', `${task.id}: split into ${subtasks.length} subtasks — ${parsed.reason}`, subtasks.map(s => `${s.id}: ${s.description.slice(0, 60)}`).join('; '));
3320
+ return { shouldSplit: true, subtasks };
3321
+ }
3322
+ /**
3323
+ * V7: Compute effective retry limit for a task.
3324
+ * F10: Fixup tasks get max 2 retries (3 attempts total) — one full model-failover cycle.
3325
+ * Foundation tasks get +1 retry to reduce cascade failure risk.
3326
+ */
3327
+ getEffectiveRetries(task) {
3328
+ const isFixup = 'fixesTaskId' in task;
3329
+ if (isFixup)
3330
+ return 2; // Fixup tasks: 2 retries max (3 attempts total)
3331
+ return task.isFoundation ? this.config.workerRetries + 1 : this.config.workerRetries;
3332
+ }
3333
+ /**
3334
+ * F22: Build a brief summary of swarm progress for retry context.
3335
+ * Helps retrying workers understand what the swarm has already accomplished.
3336
+ */
3337
+ getSwarmProgressSummary() {
3338
+ const allTasks = this.taskQueue.getAllTasks();
3339
+ const completed = allTasks.filter(t => t.status === 'completed');
3340
+ if (completed.length === 0)
3341
+ return '';
3342
+ const lines = [];
3343
+ for (const task of completed) {
3344
+ const score = task.result?.qualityScore ? ` (${task.result.qualityScore}/5)` : '';
3345
+ lines.push(`- ${task.id}: ${task.description.slice(0, 80)}${score}`);
3346
+ }
3347
+ // Collect files created by completed tasks
3348
+ const files = new Set();
3349
+ const baseDir = this.config.facts?.workingDirectory ?? process.cwd();
3350
+ for (const task of completed) {
3351
+ for (const f of (task.result?.filesModified ?? []))
3352
+ files.add(f);
3353
+ for (const f of (task.targetFiles ?? [])) {
3354
+ try {
3355
+ const resolved = path.resolve(baseDir, f);
3356
+ if (fs.existsSync(resolved))
3357
+ files.add(f);
3358
+ }
3359
+ catch { /* skip */ }
3360
+ }
3361
+ }
3362
+ const parts = [`The following tasks have completed successfully:\n${lines.join('\n')}`];
3363
+ if (files.size > 0) {
3364
+ parts.push(`Files already created/modified: ${[...files].slice(0, 20).join(', ')}`);
3365
+ parts.push('You can build on these existing files.');
3366
+ }
3367
+ return parts.join('\n');
3368
+ }
1322
3369
  /** Get a model health summary for emitting events. */
1323
3370
  getModelHealthSummary(model) {
1324
3371
  const records = this.healthTracker.getAllRecords();