attocode 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (382) hide show
  1. package/CHANGELOG.md +169 -3
  2. package/README.md +65 -5
  3. package/dist/src/adapters.d.ts.map +1 -1
  4. package/dist/src/adapters.js +15 -11
  5. package/dist/src/adapters.js.map +1 -1
  6. package/dist/src/agent.d.ts +44 -98
  7. package/dist/src/agent.d.ts.map +1 -1
  8. package/dist/src/agent.js +716 -2648
  9. package/dist/src/agent.js.map +1 -1
  10. package/dist/src/cli.d.ts.map +1 -1
  11. package/dist/src/cli.js +25 -3
  12. package/dist/src/cli.js.map +1 -1
  13. package/dist/src/commands/handler.d.ts.map +1 -1
  14. package/dist/src/commands/handler.js +11 -3
  15. package/dist/src/commands/handler.js.map +1 -1
  16. package/dist/src/commands/init-commands.d.ts.map +1 -1
  17. package/dist/src/commands/init-commands.js +16 -1
  18. package/dist/src/commands/init-commands.js.map +1 -1
  19. package/dist/src/commands/init.d.ts.map +1 -1
  20. package/dist/src/commands/init.js +31 -0
  21. package/dist/src/commands/init.js.map +1 -1
  22. package/dist/src/config/base-types.d.ts +45 -0
  23. package/dist/src/config/base-types.d.ts.map +1 -0
  24. package/dist/src/config/base-types.js +9 -0
  25. package/dist/src/config/base-types.js.map +1 -0
  26. package/dist/src/config/config-manager.d.ts +35 -0
  27. package/dist/src/config/config-manager.d.ts.map +1 -0
  28. package/dist/src/config/config-manager.js +108 -0
  29. package/dist/src/config/config-manager.js.map +1 -0
  30. package/dist/src/config/index.d.ts +4 -0
  31. package/dist/src/config/index.d.ts.map +1 -0
  32. package/dist/src/config/index.js +3 -0
  33. package/dist/src/config/index.js.map +1 -0
  34. package/dist/src/config/schema.d.ts +1546 -0
  35. package/dist/src/config/schema.d.ts.map +1 -0
  36. package/dist/src/config/schema.js +268 -0
  37. package/dist/src/config/schema.js.map +1 -0
  38. package/dist/src/config.d.ts +4 -1
  39. package/dist/src/config.d.ts.map +1 -1
  40. package/dist/src/config.js +8 -12
  41. package/dist/src/config.js.map +1 -1
  42. package/dist/src/core/agent-state-machine.d.ts +131 -0
  43. package/dist/src/core/agent-state-machine.d.ts.map +1 -0
  44. package/dist/src/core/agent-state-machine.js +302 -0
  45. package/dist/src/core/agent-state-machine.js.map +1 -0
  46. package/dist/src/core/base-manager.d.ts +79 -0
  47. package/dist/src/core/base-manager.d.ts.map +1 -0
  48. package/dist/src/core/base-manager.js +170 -0
  49. package/dist/src/core/base-manager.js.map +1 -0
  50. package/dist/src/core/completion-analyzer.d.ts +15 -0
  51. package/dist/src/core/completion-analyzer.d.ts.map +1 -0
  52. package/dist/src/core/completion-analyzer.js +53 -0
  53. package/dist/src/core/completion-analyzer.js.map +1 -0
  54. package/dist/src/core/execution-loop.d.ts +46 -0
  55. package/dist/src/core/execution-loop.d.ts.map +1 -0
  56. package/dist/src/core/execution-loop.js +1258 -0
  57. package/dist/src/core/execution-loop.js.map +1 -0
  58. package/dist/src/core/index.d.ts +7 -0
  59. package/dist/src/core/index.d.ts.map +1 -1
  60. package/dist/src/core/index.js +9 -0
  61. package/dist/src/core/index.js.map +1 -1
  62. package/dist/src/core/process-handlers.d.ts.map +1 -1
  63. package/dist/src/core/process-handlers.js +14 -0
  64. package/dist/src/core/process-handlers.js.map +1 -1
  65. package/dist/src/core/protocol/types.d.ts +4 -4
  66. package/dist/src/core/response-handler.d.ts +16 -0
  67. package/dist/src/core/response-handler.d.ts.map +1 -0
  68. package/dist/src/core/response-handler.js +234 -0
  69. package/dist/src/core/response-handler.js.map +1 -0
  70. package/dist/src/core/subagent-spawner.d.ts +43 -0
  71. package/dist/src/core/subagent-spawner.d.ts.map +1 -0
  72. package/dist/src/core/subagent-spawner.js +966 -0
  73. package/dist/src/core/subagent-spawner.js.map +1 -0
  74. package/dist/src/core/tool-executor.d.ts +59 -0
  75. package/dist/src/core/tool-executor.d.ts.map +1 -0
  76. package/dist/src/core/tool-executor.js +677 -0
  77. package/dist/src/core/tool-executor.js.map +1 -0
  78. package/dist/src/core/types.d.ts +133 -0
  79. package/dist/src/core/types.d.ts.map +1 -0
  80. package/dist/src/core/types.js +12 -0
  81. package/dist/src/core/types.js.map +1 -0
  82. package/dist/src/defaults.d.ts +8 -3
  83. package/dist/src/defaults.d.ts.map +1 -1
  84. package/dist/src/defaults.js +65 -3
  85. package/dist/src/defaults.js.map +1 -1
  86. package/dist/src/integrations/agent-registry.d.ts +11 -0
  87. package/dist/src/integrations/agent-registry.d.ts.map +1 -1
  88. package/dist/src/integrations/agent-registry.js.map +1 -1
  89. package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
  90. package/dist/src/integrations/auto-compaction.js +8 -3
  91. package/dist/src/integrations/auto-compaction.js.map +1 -1
  92. package/dist/src/integrations/bash-policy.d.ts +33 -0
  93. package/dist/src/integrations/bash-policy.d.ts.map +1 -0
  94. package/dist/src/integrations/bash-policy.js +142 -0
  95. package/dist/src/integrations/bash-policy.js.map +1 -0
  96. package/dist/src/integrations/budget-pool.d.ts +7 -0
  97. package/dist/src/integrations/budget-pool.d.ts.map +1 -1
  98. package/dist/src/integrations/budget-pool.js +43 -0
  99. package/dist/src/integrations/budget-pool.js.map +1 -1
  100. package/dist/src/integrations/codebase-ast.d.ts +52 -0
  101. package/dist/src/integrations/codebase-ast.d.ts.map +1 -0
  102. package/dist/src/integrations/codebase-ast.js +457 -0
  103. package/dist/src/integrations/codebase-ast.js.map +1 -0
  104. package/dist/src/integrations/codebase-context.d.ts +23 -0
  105. package/dist/src/integrations/codebase-context.d.ts.map +1 -1
  106. package/dist/src/integrations/codebase-context.js +230 -17
  107. package/dist/src/integrations/codebase-context.js.map +1 -1
  108. package/dist/src/integrations/compaction.d.ts.map +1 -1
  109. package/dist/src/integrations/compaction.js +14 -6
  110. package/dist/src/integrations/compaction.js.map +1 -1
  111. package/dist/src/integrations/context-engineering.d.ts +8 -0
  112. package/dist/src/integrations/context-engineering.d.ts.map +1 -1
  113. package/dist/src/integrations/context-engineering.js +19 -0
  114. package/dist/src/integrations/context-engineering.js.map +1 -1
  115. package/dist/src/integrations/delegation-protocol.js +2 -2
  116. package/dist/src/integrations/delegation-protocol.js.map +1 -1
  117. package/dist/src/integrations/economics.d.ts +67 -1
  118. package/dist/src/integrations/economics.d.ts.map +1 -1
  119. package/dist/src/integrations/economics.js +328 -33
  120. package/dist/src/integrations/economics.js.map +1 -1
  121. package/dist/src/integrations/edit-validator.d.ts +30 -0
  122. package/dist/src/integrations/edit-validator.d.ts.map +1 -0
  123. package/dist/src/integrations/edit-validator.js +85 -0
  124. package/dist/src/integrations/edit-validator.js.map +1 -0
  125. package/dist/src/integrations/file-cache.d.ts +7 -0
  126. package/dist/src/integrations/file-cache.d.ts.map +1 -1
  127. package/dist/src/integrations/file-cache.js +54 -0
  128. package/dist/src/integrations/file-cache.js.map +1 -1
  129. package/dist/src/integrations/health-check.d.ts.map +1 -1
  130. package/dist/src/integrations/health-check.js +3 -2
  131. package/dist/src/integrations/health-check.js.map +1 -1
  132. package/dist/src/integrations/hierarchical-config.d.ts +3 -0
  133. package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
  134. package/dist/src/integrations/hierarchical-config.js +20 -0
  135. package/dist/src/integrations/hierarchical-config.js.map +1 -1
  136. package/dist/src/integrations/hooks.d.ts +2 -0
  137. package/dist/src/integrations/hooks.d.ts.map +1 -1
  138. package/dist/src/integrations/hooks.js +99 -15
  139. package/dist/src/integrations/hooks.js.map +1 -1
  140. package/dist/src/integrations/index.d.ts +10 -1
  141. package/dist/src/integrations/index.d.ts.map +1 -1
  142. package/dist/src/integrations/index.js +12 -2
  143. package/dist/src/integrations/index.js.map +1 -1
  144. package/dist/src/integrations/logger.d.ts +104 -0
  145. package/dist/src/integrations/logger.d.ts.map +1 -0
  146. package/dist/src/integrations/logger.js +219 -0
  147. package/dist/src/integrations/logger.js.map +1 -0
  148. package/dist/src/integrations/lsp.d.ts.map +1 -1
  149. package/dist/src/integrations/lsp.js +5 -4
  150. package/dist/src/integrations/lsp.js.map +1 -1
  151. package/dist/src/integrations/mcp-client.d.ts.map +1 -1
  152. package/dist/src/integrations/mcp-client.js +8 -7
  153. package/dist/src/integrations/mcp-client.js.map +1 -1
  154. package/dist/src/integrations/observability.d.ts.map +1 -1
  155. package/dist/src/integrations/observability.js +5 -4
  156. package/dist/src/integrations/observability.js.map +1 -1
  157. package/dist/src/integrations/openrouter-pricing.d.ts.map +1 -1
  158. package/dist/src/integrations/openrouter-pricing.js +4 -3
  159. package/dist/src/integrations/openrouter-pricing.js.map +1 -1
  160. package/dist/src/integrations/persistence.d.ts.map +1 -1
  161. package/dist/src/integrations/persistence.js +5 -4
  162. package/dist/src/integrations/persistence.js.map +1 -1
  163. package/dist/src/integrations/planning.d.ts.map +1 -1
  164. package/dist/src/integrations/planning.js +5 -4
  165. package/dist/src/integrations/planning.js.map +1 -1
  166. package/dist/src/integrations/policy-engine.d.ts +55 -0
  167. package/dist/src/integrations/policy-engine.d.ts.map +1 -0
  168. package/dist/src/integrations/policy-engine.js +247 -0
  169. package/dist/src/integrations/policy-engine.js.map +1 -0
  170. package/dist/src/integrations/retry.d.ts +1 -0
  171. package/dist/src/integrations/retry.d.ts.map +1 -1
  172. package/dist/src/integrations/retry.js.map +1 -1
  173. package/dist/src/integrations/routing.d.ts.map +1 -1
  174. package/dist/src/integrations/routing.js +2 -1
  175. package/dist/src/integrations/routing.js.map +1 -1
  176. package/dist/src/integrations/safety.d.ts +5 -4
  177. package/dist/src/integrations/safety.d.ts.map +1 -1
  178. package/dist/src/integrations/safety.js +45 -20
  179. package/dist/src/integrations/safety.js.map +1 -1
  180. package/dist/src/integrations/sandbox/basic.d.ts +7 -0
  181. package/dist/src/integrations/sandbox/basic.d.ts.map +1 -1
  182. package/dist/src/integrations/sandbox/basic.js +27 -2
  183. package/dist/src/integrations/sandbox/basic.js.map +1 -1
  184. package/dist/src/integrations/sandbox/docker.d.ts.map +1 -1
  185. package/dist/src/integrations/sandbox/docker.js +2 -1
  186. package/dist/src/integrations/sandbox/docker.js.map +1 -1
  187. package/dist/src/integrations/sandbox/index.d.ts +6 -0
  188. package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
  189. package/dist/src/integrations/sandbox/index.js +8 -4
  190. package/dist/src/integrations/sandbox/index.js.map +1 -1
  191. package/dist/src/integrations/sandbox/landlock.d.ts.map +1 -1
  192. package/dist/src/integrations/sandbox/landlock.js +3 -0
  193. package/dist/src/integrations/sandbox/landlock.js.map +1 -1
  194. package/dist/src/integrations/self-improvement.d.ts.map +1 -1
  195. package/dist/src/integrations/self-improvement.js +12 -0
  196. package/dist/src/integrations/self-improvement.js.map +1 -1
  197. package/dist/src/integrations/session-store.d.ts +1 -0
  198. package/dist/src/integrations/session-store.d.ts.map +1 -1
  199. package/dist/src/integrations/session-store.js +1 -0
  200. package/dist/src/integrations/session-store.js.map +1 -1
  201. package/dist/src/integrations/shared-blackboard.d.ts +3 -0
  202. package/dist/src/integrations/shared-blackboard.d.ts.map +1 -1
  203. package/dist/src/integrations/shared-blackboard.js +47 -0
  204. package/dist/src/integrations/shared-blackboard.js.map +1 -1
  205. package/dist/src/integrations/smart-decomposer.d.ts +45 -1
  206. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  207. package/dist/src/integrations/smart-decomposer.js +486 -30
  208. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  209. package/dist/src/integrations/sqlite-store.d.ts +2 -0
  210. package/dist/src/integrations/sqlite-store.d.ts.map +1 -1
  211. package/dist/src/integrations/sqlite-store.js +18 -6
  212. package/dist/src/integrations/sqlite-store.js.map +1 -1
  213. package/dist/src/integrations/swarm/failure-classifier.d.ts +11 -0
  214. package/dist/src/integrations/swarm/failure-classifier.d.ts.map +1 -0
  215. package/dist/src/integrations/swarm/failure-classifier.js +95 -0
  216. package/dist/src/integrations/swarm/failure-classifier.js.map +1 -0
  217. package/dist/src/integrations/swarm/index.d.ts +1 -1
  218. package/dist/src/integrations/swarm/index.d.ts.map +1 -1
  219. package/dist/src/integrations/swarm/index.js.map +1 -1
  220. package/dist/src/integrations/swarm/model-selector.d.ts +15 -0
  221. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  222. package/dist/src/integrations/swarm/model-selector.js +100 -20
  223. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  224. package/dist/src/integrations/swarm/swarm-budget.d.ts +4 -0
  225. package/dist/src/integrations/swarm/swarm-budget.d.ts.map +1 -1
  226. package/dist/src/integrations/swarm/swarm-budget.js +6 -0
  227. package/dist/src/integrations/swarm/swarm-budget.js.map +1 -1
  228. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +8 -0
  229. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  230. package/dist/src/integrations/swarm/swarm-config-loader.js +249 -7
  231. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  232. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +86 -1
  233. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  234. package/dist/src/integrations/swarm/swarm-event-bridge.js +207 -23
  235. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  236. package/dist/src/integrations/swarm/swarm-events.d.ts +58 -1
  237. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  238. package/dist/src/integrations/swarm/swarm-events.js +22 -5
  239. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  240. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +147 -8
  241. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  242. package/dist/src/integrations/swarm/swarm-orchestrator.js +2179 -132
  243. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  244. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts +83 -2
  245. package/dist/src/integrations/swarm/swarm-quality-gate.d.ts.map +1 -1
  246. package/dist/src/integrations/swarm/swarm-quality-gate.js +278 -19
  247. package/dist/src/integrations/swarm/swarm-quality-gate.js.map +1 -1
  248. package/dist/src/integrations/swarm/swarm-state-store.d.ts +4 -1
  249. package/dist/src/integrations/swarm/swarm-state-store.d.ts.map +1 -1
  250. package/dist/src/integrations/swarm/swarm-state-store.js +8 -1
  251. package/dist/src/integrations/swarm/swarm-state-store.js.map +1 -1
  252. package/dist/src/integrations/swarm/task-queue.d.ts +54 -0
  253. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  254. package/dist/src/integrations/swarm/task-queue.js +310 -12
  255. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  256. package/dist/src/integrations/swarm/types.d.ts +251 -13
  257. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  258. package/dist/src/integrations/swarm/types.js +70 -8
  259. package/dist/src/integrations/swarm/types.js.map +1 -1
  260. package/dist/src/integrations/swarm/worker-pool.d.ts +21 -4
  261. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  262. package/dist/src/integrations/swarm/worker-pool.js +223 -44
  263. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  264. package/dist/src/integrations/task-manager.d.ts +33 -1
  265. package/dist/src/integrations/task-manager.d.ts.map +1 -1
  266. package/dist/src/integrations/task-manager.js +78 -4
  267. package/dist/src/integrations/task-manager.js.map +1 -1
  268. package/dist/src/integrations/tool-recommendation.d.ts +7 -4
  269. package/dist/src/integrations/tool-recommendation.d.ts.map +1 -1
  270. package/dist/src/integrations/tool-recommendation.js +58 -5
  271. package/dist/src/integrations/tool-recommendation.js.map +1 -1
  272. package/dist/src/integrations/work-log.js +4 -4
  273. package/dist/src/integrations/work-log.js.map +1 -1
  274. package/dist/src/main.js +106 -30
  275. package/dist/src/main.js.map +1 -1
  276. package/dist/src/modes/repl.d.ts.map +1 -1
  277. package/dist/src/modes/repl.js +50 -12
  278. package/dist/src/modes/repl.js.map +1 -1
  279. package/dist/src/modes/tui.d.ts.map +1 -1
  280. package/dist/src/modes/tui.js +41 -6
  281. package/dist/src/modes/tui.js.map +1 -1
  282. package/dist/src/modes.d.ts.map +1 -1
  283. package/dist/src/modes.js +4 -27
  284. package/dist/src/modes.js.map +1 -1
  285. package/dist/src/observability/tracer.d.ts.map +1 -1
  286. package/dist/src/observability/tracer.js +2 -1
  287. package/dist/src/observability/tracer.js.map +1 -1
  288. package/dist/src/persistence/schema.d.ts.map +1 -1
  289. package/dist/src/persistence/schema.js +11 -0
  290. package/dist/src/persistence/schema.js.map +1 -1
  291. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  292. package/dist/src/providers/adapters/anthropic.js +3 -2
  293. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  294. package/dist/src/providers/adapters/openai.d.ts.map +1 -1
  295. package/dist/src/providers/adapters/openai.js +3 -2
  296. package/dist/src/providers/adapters/openai.js.map +1 -1
  297. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  298. package/dist/src/providers/adapters/openrouter.js +11 -11
  299. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  300. package/dist/src/providers/circuit-breaker.d.ts +1 -0
  301. package/dist/src/providers/circuit-breaker.d.ts.map +1 -1
  302. package/dist/src/providers/circuit-breaker.js.map +1 -1
  303. package/dist/src/providers/provider.d.ts.map +1 -1
  304. package/dist/src/providers/provider.js +2 -1
  305. package/dist/src/providers/provider.js.map +1 -1
  306. package/dist/src/providers/resilient-provider.d.ts.map +1 -1
  307. package/dist/src/providers/resilient-provider.js +2 -1
  308. package/dist/src/providers/resilient-provider.js.map +1 -1
  309. package/dist/src/session-picker.d.ts.map +1 -1
  310. package/dist/src/session-picker.js +40 -5
  311. package/dist/src/session-picker.js.map +1 -1
  312. package/dist/src/shared/budget-tracker.d.ts +65 -0
  313. package/dist/src/shared/budget-tracker.d.ts.map +1 -0
  314. package/dist/src/shared/budget-tracker.js +128 -0
  315. package/dist/src/shared/budget-tracker.js.map +1 -0
  316. package/dist/src/shared/context-engine.d.ts +64 -0
  317. package/dist/src/shared/context-engine.d.ts.map +1 -0
  318. package/dist/src/shared/context-engine.js +117 -0
  319. package/dist/src/shared/context-engine.js.map +1 -0
  320. package/dist/src/shared/index.d.ts +12 -0
  321. package/dist/src/shared/index.d.ts.map +1 -0
  322. package/dist/src/shared/index.js +12 -0
  323. package/dist/src/shared/index.js.map +1 -0
  324. package/dist/src/shared/persistence.d.ts +57 -0
  325. package/dist/src/shared/persistence.d.ts.map +1 -0
  326. package/dist/src/shared/persistence.js +168 -0
  327. package/dist/src/shared/persistence.js.map +1 -0
  328. package/dist/src/shared/shared-context-state.d.ts +89 -0
  329. package/dist/src/shared/shared-context-state.d.ts.map +1 -0
  330. package/dist/src/shared/shared-context-state.js +175 -0
  331. package/dist/src/shared/shared-context-state.js.map +1 -0
  332. package/dist/src/shared/shared-economics-state.d.ts +61 -0
  333. package/dist/src/shared/shared-economics-state.d.ts.map +1 -0
  334. package/dist/src/shared/shared-economics-state.js +100 -0
  335. package/dist/src/shared/shared-economics-state.js.map +1 -0
  336. package/dist/src/tools/agent.d.ts.map +1 -1
  337. package/dist/src/tools/agent.js +11 -2
  338. package/dist/src/tools/agent.js.map +1 -1
  339. package/dist/src/tools/bash.d.ts +1 -1
  340. package/dist/src/tools/bash.d.ts.map +1 -1
  341. package/dist/src/tools/bash.js +2 -1
  342. package/dist/src/tools/bash.js.map +1 -1
  343. package/dist/src/tools/coercion.d.ts +6 -0
  344. package/dist/src/tools/coercion.d.ts.map +1 -1
  345. package/dist/src/tools/coercion.js +13 -0
  346. package/dist/src/tools/coercion.js.map +1 -1
  347. package/dist/src/tools/file.d.ts +5 -5
  348. package/dist/src/tools/file.js +2 -2
  349. package/dist/src/tools/file.js.map +1 -1
  350. package/dist/src/tools/permission.d.ts.map +1 -1
  351. package/dist/src/tools/permission.js +10 -116
  352. package/dist/src/tools/permission.js.map +1 -1
  353. package/dist/src/tools/types.d.ts +1 -0
  354. package/dist/src/tools/types.d.ts.map +1 -1
  355. package/dist/src/tools/types.js.map +1 -1
  356. package/dist/src/tracing/trace-collector.d.ts +292 -0
  357. package/dist/src/tracing/trace-collector.d.ts.map +1 -1
  358. package/dist/src/tracing/trace-collector.js +249 -5
  359. package/dist/src/tracing/trace-collector.js.map +1 -1
  360. package/dist/src/tracing/types.d.ts +200 -1
  361. package/dist/src/tracing/types.d.ts.map +1 -1
  362. package/dist/src/tracing/types.js.map +1 -1
  363. package/dist/src/tricks/failure-evidence.d.ts.map +1 -1
  364. package/dist/src/tricks/failure-evidence.js +2 -1
  365. package/dist/src/tricks/failure-evidence.js.map +1 -1
  366. package/dist/src/tui/app.d.ts +13 -0
  367. package/dist/src/tui/app.d.ts.map +1 -1
  368. package/dist/src/tui/app.js +162 -19
  369. package/dist/src/tui/app.js.map +1 -1
  370. package/dist/src/tui/components/ErrorBoundary.d.ts.map +1 -1
  371. package/dist/src/tui/components/ErrorBoundary.js +3 -2
  372. package/dist/src/tui/components/ErrorBoundary.js.map +1 -1
  373. package/dist/src/tui/event-display.d.ts.map +1 -1
  374. package/dist/src/tui/event-display.js +36 -62
  375. package/dist/src/tui/event-display.js.map +1 -1
  376. package/dist/src/tui/index.d.ts +4 -0
  377. package/dist/src/tui/index.d.ts.map +1 -1
  378. package/dist/src/tui/index.js +17 -0
  379. package/dist/src/tui/index.js.map +1 -1
  380. package/dist/src/types.d.ts +214 -1
  381. package/dist/src/types.d.ts.map +1 -1
  382. package/package.json +18 -3
package/dist/src/agent.js CHANGED
@@ -18,19 +18,16 @@
18
18
  * - Execution Policies (Lesson 23)
19
19
  * - Thread Management (Lesson 24)
20
20
  */
21
- import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
22
- import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
21
+ import * as path from 'node:path';
22
+ import { buildConfig, isFeatureEnabled, getEnabledFeatures, } from './defaults.js';
23
+ import { createModeManager, formatModeList, parseMode, } from './modes.js';
23
24
  import { createLSPFileTools, } from './agent-tools/index.js';
24
- import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, createSharedFileCache, createBudgetPool, createDynamicBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate,
25
- // Phase 2: Orchestration
26
- classifyComplexity, getScalingGuidance, buildDelegationPrompt, createMinimalDelegationSpec, getSubagentQualityPrompt, ToolRecommendationEngine, createToolRecommendationEngine, createInjectionBudgetManager,
27
- // Phase 3: Advanced
28
- getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, createSubagentSupervisor, createSubagentHandle, } from './integrations/index.js';
29
- // Lesson 26: Tracing & Evaluation integration
25
+ import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, AgentRegistry, formatAgentList, createCancellationManager, isCancellationError, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, createCodebaseContext, buildContextFromChunks, generateLightweightRepoMap, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate, classifyComplexity, getScalingGuidance, createToolRecommendationEngine, createInjectionBudgetManager, getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, } from './integrations/index.js';
26
+ import { resolvePolicyProfile, } from './integrations/policy-engine.js';
30
27
  import { createTraceCollector } from './tracing/trace-collector.js';
31
- // Model registry for context window limits
32
28
  import { modelRegistry } from './costs/index.js';
33
29
  import { getModelContextLength } from './integrations/openrouter-pricing.js';
30
+ import { createComponentLogger } from './integrations/logger.js';
34
31
  // Spawn agent tools for LLM-driven subagent delegation
35
32
  import { createBoundSpawnAgentTool, createBoundSpawnAgentsParallelTool, } from './tools/agent.js';
36
33
  // Task tools for Claude Code-style task management
@@ -42,115 +39,15 @@ import { createTaskTools, } from './tools/tasks.js';
42
39
  * Tools that are safe to execute in parallel (read-only, no side effects).
43
40
  * These tools don't modify state, so running them concurrently is safe.
44
41
  */
45
- export const PARALLELIZABLE_TOOLS = new Set([
46
- 'read_file', 'glob', 'grep', 'list_files', 'search_files',
47
- 'search_code', 'get_file_info',
48
- ]);
49
- /**
50
- * Tools that can run in parallel IF they target different files.
51
- * write_file and edit_file on different paths are safe to parallelize.
52
- */
53
- export const CONDITIONALLY_PARALLEL_TOOLS = new Set([
54
- 'write_file', 'edit_file',
55
- ]);
56
- /**
57
- * Extract the target file path from a tool call's arguments.
58
- * Returns null if no file path can be determined.
59
- */
60
- export function extractToolFilePath(toolCall) {
61
- // Check common argument patterns
62
- const args = toolCall;
63
- for (const key of ['path', 'file_path', 'filename', 'file']) {
64
- if (typeof args[key] === 'string')
65
- return args[key];
66
- }
67
- // Check nested args object
68
- if (args.args && typeof args.args === 'object') {
69
- const nested = args.args;
70
- for (const key of ['path', 'file_path', 'filename', 'file']) {
71
- if (typeof nested[key] === 'string')
72
- return nested[key];
73
- }
74
- }
75
- // Check input object (common in structured tool calls)
76
- if (args.input && typeof args.input === 'object') {
77
- const input = args.input;
78
- for (const key of ['path', 'file_path', 'filename', 'file']) {
79
- if (typeof input[key] === 'string')
80
- return input[key];
81
- }
82
- }
83
- return null;
84
- }
85
- /**
86
- * Check if a conditionally-parallel tool call conflicts with any tool
87
- * in the current accumulator (same file path).
88
- */
89
- function hasFileConflict(toolCall, accumulator) {
90
- const path = extractToolFilePath(toolCall);
91
- if (!path)
92
- return true; // Can't determine path → assume conflict
93
- for (const existing of accumulator) {
94
- const existingPath = extractToolFilePath(existing);
95
- if (existingPath === path)
96
- return true; // Same file → conflict
97
- }
98
- return false;
99
- }
100
- /**
101
- * Groups tool calls into batches for parallel/sequential execution.
102
- * Uses accumulate-and-flush: parallelizable tools accumulate until a
103
- * non-parallelizable tool flushes them as a batch. This produces optimal
104
- * batching even for non-consecutive parallelizable tools.
105
- *
106
- * Enhanced with conditional parallelism: write_file/edit_file on
107
- * DIFFERENT files can be batched together for parallel execution.
108
- *
109
- * Example: [read1, read2, write, read3, grep] → [[read1, read2], [write], [read3, grep]]
110
- * (Previous algorithm produced 4 batches; this produces 3)
111
- *
112
- * Enhanced: [write_a, write_b, write_a] → [[write_a, write_b], [write_a]]
113
- * (Different files parallelized, same file sequential)
114
- */
115
- export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name), isConditionallyParallel = (tc) => CONDITIONALLY_PARALLEL_TOOLS.has(tc.name)) {
116
- if (toolCalls.length === 0)
117
- return [];
118
- const batches = [];
119
- let parallelAccum = [];
120
- for (const toolCall of toolCalls) {
121
- if (isParallelizable(toolCall)) {
122
- parallelAccum.push(toolCall);
123
- }
124
- else if (isConditionallyParallel(toolCall)) {
125
- // Can parallelize if no file conflict with existing accumulator
126
- if (!hasFileConflict(toolCall, parallelAccum)) {
127
- parallelAccum.push(toolCall);
128
- }
129
- else {
130
- // Conflict: flush current batch, start new one with this tool
131
- if (parallelAccum.length > 0) {
132
- batches.push(parallelAccum);
133
- parallelAccum = [];
134
- }
135
- parallelAccum.push(toolCall);
136
- }
137
- }
138
- else {
139
- // Flush any accumulated parallel tools as a single batch
140
- if (parallelAccum.length > 0) {
141
- batches.push(parallelAccum);
142
- parallelAccum = [];
143
- }
144
- // Non-parallelizable tool gets its own batch
145
- batches.push([toolCall]);
146
- }
147
- }
148
- // Flush remaining parallel tools
149
- if (parallelAccum.length > 0) {
150
- batches.push(parallelAccum);
151
- }
152
- return batches;
153
- }
42
+ const log = createComponentLogger('ProductionAgent');
43
+ // Tool-batching constants (canonical home: core/tool-executor.ts)
44
+ import { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches, } from './core/index.js';
45
+ export { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches };
46
+ // Extracted core modules (Phase 2.1 — thin orchestrator delegates)
47
+ import { executeDirectly as coreExecuteDirectly, spawnAgent as coreSpawnAgent, spawnAgentsParallel as coreSpawnAgentsParallel, } from './core/index.js';
48
+ // Phase 2.2: Agent State Machine
49
+ import { createAgentStateMachine } from './core/agent-state-machine.js';
50
+ import { detectIncompleteActionResponse } from './core/completion-analyzer.js';
154
51
  /**
155
52
  * Production-ready agent that composes all features.
156
53
  */
@@ -179,6 +76,7 @@ export class ProductionAgent {
179
76
  skillManager = null;
180
77
  contextEngineering = null;
181
78
  codebaseContext = null;
79
+ codebaseAnalysisTriggered = false;
182
80
  traceCollector = null;
183
81
  modeManager;
184
82
  pendingPlanManager;
@@ -193,6 +91,8 @@ export class ProductionAgent {
193
91
  agentId;
194
92
  blackboard = null;
195
93
  fileCache = null;
94
+ _sharedContextState = null;
95
+ _sharedEconomicsState = null;
196
96
  budgetPool = null;
197
97
  taskManager = null;
198
98
  store = null;
@@ -205,11 +105,13 @@ export class ProductionAgent {
205
105
  subagentOutputStore = null;
206
106
  autoCheckpointManager = null;
207
107
  toolRecommendation = null;
108
+ stateMachine = null;
208
109
  lastComplexityAssessment = null;
110
+ lastSystemPromptLength = 0;
209
111
  // Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
210
112
  // Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
211
113
  spawnedTasks = new Map();
212
- static SPAWN_DEDUP_WINDOW_MS = 60000; // 60 seconds
114
+ // SPAWN_DEDUP_WINDOW_MS moved to core/subagent-spawner.ts
213
115
  // Parent iteration tracking for total budget calculation
214
116
  parentIterations = 0;
215
117
  // External cancellation token (for subagent timeout propagation)
@@ -221,6 +123,9 @@ export class ProductionAgent {
221
123
  // Cacheable system prompt blocks for prompt caching (Improvement P1)
222
124
  // When set, callLLM() will inject these as structured content with cache_control markers
223
125
  cacheableSystemBlocks = null;
126
+ // Pre-compaction agentic turn: when true, the agent gets one more LLM turn
127
+ // to summarize its state before compaction clears the context.
128
+ compactionPending = false;
224
129
  // Initialization tracking
225
130
  initPromises = [];
226
131
  initComplete = false;
@@ -296,6 +201,9 @@ export class ProductionAgent {
296
201
  const parentBudgetTokens = baseBudget.maxTokens ?? STANDARD_BUDGET.maxTokens ?? 200000;
297
202
  this.budgetPool = createBudgetPool(parentBudgetTokens, 0.25, 100000);
298
203
  }
204
+ // Shared state for swarm workers (passed from orchestrator via config)
205
+ this._sharedContextState = userConfig.sharedContextState ?? null;
206
+ this._sharedEconomicsState = userConfig.sharedEconomicsState ?? null;
299
207
  // Initialize enabled features
300
208
  this.initializeFeatures();
301
209
  }
@@ -306,7 +214,7 @@ export class ProductionAgent {
306
214
  // Debug output only when DEBUG env var is set
307
215
  if (process.env.DEBUG) {
308
216
  const features = getEnabledFeatures(this.config);
309
- console.log(`[ProductionAgent] Initializing with features: ${features.join(', ')}`);
217
+ log.debug('Initializing with features', { features: features.join(', ') });
310
218
  }
311
219
  // Hooks & Plugins
312
220
  if (isFeatureEnabled(this.config.hooks) && isFeatureEnabled(this.config.plugins)) {
@@ -339,7 +247,29 @@ export class ProductionAgent {
339
247
  }
340
248
  // Safety (Sandbox + Human-in-Loop)
341
249
  if (isFeatureEnabled(this.config.sandbox) || isFeatureEnabled(this.config.humanInLoop)) {
342
- this.safety = new SafetyManager(isFeatureEnabled(this.config.sandbox) ? this.config.sandbox : false, isFeatureEnabled(this.config.humanInLoop) ? this.config.humanInLoop : false);
250
+ this.safety = new SafetyManager(isFeatureEnabled(this.config.sandbox) ? this.config.sandbox : false, isFeatureEnabled(this.config.humanInLoop) ? this.config.humanInLoop : false, isFeatureEnabled(this.config.policyEngine) ? this.config.policyEngine : false);
251
+ }
252
+ if (isFeatureEnabled(this.config.policyEngine)) {
253
+ const rootPolicy = resolvePolicyProfile({
254
+ policyEngine: this.config.policyEngine,
255
+ sandboxConfig: isFeatureEnabled(this.config.sandbox) ? this.config.sandbox : undefined,
256
+ });
257
+ this.emit({
258
+ type: 'policy.profile.resolved',
259
+ profile: rootPolicy.profileName,
260
+ context: 'root',
261
+ selectionSource: rootPolicy.metadata.selectionSource,
262
+ usedLegacyMappings: rootPolicy.metadata.usedLegacyMappings,
263
+ legacySources: rootPolicy.metadata.legacyMappingSources,
264
+ });
265
+ if (rootPolicy.metadata.usedLegacyMappings) {
266
+ this.emit({
267
+ type: 'policy.legacy.fallback.used',
268
+ profile: rootPolicy.profileName,
269
+ sources: rootPolicy.metadata.legacyMappingSources,
270
+ warnings: rootPolicy.metadata.warnings,
271
+ });
272
+ }
343
273
  }
344
274
  // Routing
345
275
  if (isFeatureEnabled(this.config.routing)) {
@@ -388,7 +318,7 @@ export class ProductionAgent {
388
318
  });
389
319
  // Load rules asynchronously - tracked for ensureReady()
390
320
  this.initPromises.push(this.rules.loadRules().catch(err => {
391
- console.warn('[ProductionAgent] Failed to load rules:', err);
321
+ log.warn('Failed to load rules', { error: String(err) });
392
322
  }));
393
323
  }
394
324
  // Economics System (Token Budget) - always enabled
@@ -399,7 +329,24 @@ export class ProductionAgent {
399
329
  // Use maxIterations from config as absolute safety cap
400
330
  maxIterations: this.config.maxIterations,
401
331
  targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
332
+ }, this._sharedEconomicsState ?? undefined, this.agentId);
333
+ // Phase 2.2: Agent State Machine - formalizes phase tracking
334
+ // Always enabled - provides structured phase transitions with metrics
335
+ this.stateMachine = createAgentStateMachine();
336
+ // Forward state machine phase transitions as subagent.phase events
337
+ const phaseMap = {
338
+ exploring: 'exploring', planning: 'planning', acting: 'executing', verifying: 'completing',
339
+ };
340
+ const unsubStateMachine = this.stateMachine.subscribe(event => {
341
+ if (event.type === 'phase.changed') {
342
+ this.emit({
343
+ type: 'subagent.phase',
344
+ agentId: this.agentId,
345
+ phase: phaseMap[event.transition.to] ?? 'exploring',
346
+ });
347
+ }
402
348
  });
349
+ this.unsubscribers.push(unsubStateMachine);
403
350
  // Work Log - compaction-resilient summary of agent work
404
351
  // Always enabled - minimal overhead and critical for long-running tasks
405
352
  this.workLog = createWorkLog();
@@ -417,7 +364,7 @@ export class ProductionAgent {
417
364
  this.agentRegistry = new AgentRegistry();
418
365
  // Load user agents asynchronously - tracked for ensureReady()
419
366
  this.initPromises.push(this.agentRegistry.loadUserAgents().catch(err => {
420
- console.warn('[ProductionAgent] Failed to load user agents:', err);
367
+ log.warn('Failed to load user agents', { error: String(err) });
421
368
  }));
422
369
  // Register spawn_agent tool so LLM can delegate to subagents
423
370
  const boundSpawnTool = createBoundSpawnAgentTool((name, task, constraints) => this.spawnAgent(name, task, constraints));
@@ -466,11 +413,16 @@ export class ProductionAgent {
466
413
  : swarmConfig.throttle;
467
414
  this.provider = createThrottledProvider(this.provider, throttleConfig);
468
415
  }
416
+ // Pass codebaseContext so the decomposer can ground tasks in actual project files
417
+ swarmConfig.codebaseContext = this.codebaseContext ?? undefined;
469
418
  this.swarmOrchestrator = createSwarmOrchestrator(swarmConfig, this.provider, this.agentRegistry, (name, task) => this.spawnAgent(name, task), this.blackboard ?? undefined);
470
419
  // Override parent budget pool with swarm's much larger pool so spawnAgent()
471
420
  // allocates from the swarm budget (e.g. 10M tokens) instead of the parent's
472
421
  // generic pool (200K tokens). Without this, workers get 5K emergency budget.
473
422
  this.budgetPool = this.swarmOrchestrator.getBudgetPool().pool;
423
+ // Phase 3.1+3.2: Set shared state so workers inherit it via buildContext()
424
+ this._sharedContextState = this.swarmOrchestrator.getSharedContextState();
425
+ this._sharedEconomicsState = this.swarmOrchestrator.getSharedEconomicsState();
474
426
  }
475
427
  // Cancellation Support
476
428
  if (isFeatureEnabled(this.config.cancellation)) {
@@ -538,7 +490,7 @@ export class ProductionAgent {
538
490
  this.initPromises.push(this.skillManager.loadSkills()
539
491
  .then(() => { }) // Convert to void
540
492
  .catch(err => {
541
- console.warn('[ProductionAgent] Failed to load skills:', err);
493
+ log.warn('Failed to load skills', { error: String(err) });
542
494
  }));
543
495
  }
544
496
  // Context Engineering (Manus-inspired tricks P, Q, R, S, T)
@@ -554,6 +506,10 @@ export class ProductionAgent {
554
506
  maxFailures: 30,
555
507
  maxReferences: 50,
556
508
  });
509
+ // Bind shared context state for cross-worker failure learning (swarm workers only)
510
+ if (this._sharedContextState) {
511
+ this.contextEngineering.setSharedState(this._sharedContextState);
512
+ }
557
513
  // Codebase Context - intelligent code selection for context management
558
514
  // Analyzes repo structure and selects relevant code within token budgets
559
515
  if (this.config.codebaseContext !== false) {
@@ -570,6 +526,10 @@ export class ProductionAgent {
570
526
  cacheResults: true,
571
527
  cacheTTL: 5 * 60 * 1000, // 5 minutes
572
528
  });
529
+ // Forward trace collector so codebase analysis can emit codebase.map entries.
530
+ if (this.traceCollector) {
531
+ this.codebaseContext.traceCollector = this.traceCollector;
532
+ }
573
533
  // Connect LSP manager to codebase context for enhanced code selection
574
534
  // This enables LSP-based relevance boosting (Phase 4.1)
575
535
  if (this.lspManager) {
@@ -924,6 +884,7 @@ export class ProductionAgent {
924
884
  async run(task) {
925
885
  // Ensure all integrations are ready before running
926
886
  await this.ensureReady();
887
+ this.reconcileStaleTasks('run_start');
927
888
  const startTime = Date.now();
928
889
  // Create cancellation context if enabled
929
890
  const cancellationConfig = isFeatureEnabled(this.config.cancellation) ? this.config.cancellation : null;
@@ -931,6 +892,7 @@ export class ProductionAgent {
931
892
  // Start tracing
932
893
  const traceId = this.observability?.tracer?.startTrace('agent.run') || `trace-${Date.now()}`;
933
894
  this.emit({ type: 'start', task, traceId });
895
+ this.emit({ type: 'run.before', task });
934
896
  this.observability?.logger?.info('Agent started', { task });
935
897
  // Lesson 26: Start trace capture
936
898
  // If session is already active (managed by REPL), start a task within it.
@@ -943,9 +905,19 @@ export class ProductionAgent {
943
905
  else {
944
906
  // Single-task mode (backward compatibility) - start session with task
945
907
  const traceSessionId = `session-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
946
- await this.traceCollector?.startSession(traceSessionId, task, this.config.model || 'default', {});
908
+ const sessionMetadata = {};
909
+ if (this.swarmOrchestrator) {
910
+ sessionMetadata.swarm = true;
911
+ }
912
+ await this.traceCollector?.startSession(traceSessionId, task, this.config.model || 'default', sessionMetadata);
947
913
  }
948
914
  try {
915
+ let runSuccess = true;
916
+ let runFailureReason;
917
+ let completion = {
918
+ success: true,
919
+ reason: 'completed',
920
+ };
949
921
  // Check for cancellation before starting
950
922
  cancellationToken?.throwIfCancellationRequested();
951
923
  // Classify task complexity for scaling guidance
@@ -955,6 +927,27 @@ export class ProductionAgent {
955
927
  // Check if swarm mode should handle this task
956
928
  if (this.swarmOrchestrator) {
957
929
  const swarmResult = await this.runSwarm(task);
930
+ if (!swarmResult.success) {
931
+ runSuccess = false;
932
+ runFailureReason = swarmResult.summary || 'Swarm reported unsuccessful execution';
933
+ completion = {
934
+ success: false,
935
+ reason: 'swarm_failure',
936
+ details: runFailureReason,
937
+ };
938
+ }
939
+ // Guard against summaries that still indicate pending work.
940
+ if (detectIncompleteActionResponse(swarmResult.summary || '')) {
941
+ this.emit({ type: 'completion.before', reason: 'future_intent' });
942
+ runSuccess = false;
943
+ runFailureReason = 'Swarm summary indicates pending, unexecuted work';
944
+ completion = {
945
+ success: false,
946
+ reason: 'future_intent',
947
+ details: runFailureReason,
948
+ futureIntentDetected: true,
949
+ };
950
+ }
958
951
  // Store swarm summary as an assistant message for the response
959
952
  this.state.messages.push({ role: 'assistant', content: swarmResult.summary });
960
953
  }
@@ -963,7 +956,17 @@ export class ProductionAgent {
963
956
  await this.createAndExecutePlan(task);
964
957
  }
965
958
  else {
966
- await this.executeDirectly(task);
959
+ const directResult = await this.executeDirectly(task);
960
+ if (!directResult.success) {
961
+ runSuccess = false;
962
+ runFailureReason = directResult.failureReason || directResult.terminationReason;
963
+ }
964
+ completion = {
965
+ success: directResult.success,
966
+ reason: directResult.terminationReason,
967
+ ...(directResult.failureReason ? { details: directResult.failureReason } : {}),
968
+ ...(directResult.openTasks ? { openTasks: directResult.openTasks } : {}),
969
+ };
967
970
  }
968
971
  // Get final response - find the LAST assistant message (not just check if last message is assistant)
969
972
  const assistantMessages = this.state.messages.filter(m => m.role === 'assistant');
@@ -971,28 +974,101 @@ export class ProductionAgent {
971
974
  const response = typeof lastAssistantMessage?.content === 'string'
972
975
  ? lastAssistantMessage.content
973
976
  : '';
977
+ // Final guardrail: never mark a run successful if the final answer is "I'll do X".
978
+ if (runSuccess && detectIncompleteActionResponse(response)) {
979
+ this.emit({ type: 'completion.before', reason: 'future_intent' });
980
+ runSuccess = false;
981
+ runFailureReason = 'Final response indicates pending, unexecuted work';
982
+ completion = {
983
+ success: false,
984
+ reason: 'future_intent',
985
+ details: runFailureReason,
986
+ futureIntentDetected: true,
987
+ };
988
+ }
989
+ if (runSuccess && completion.reason === 'completed') {
990
+ this.reconcileStaleTasks('run_end');
991
+ const openTasks = this.getOpenTasksSummary();
992
+ if (openTasks && (openTasks.inProgress > 0 || openTasks.pending > 0)) {
993
+ this.emit({ type: 'completion.before', reason: 'open_tasks' });
994
+ runSuccess = false;
995
+ runFailureReason = `Open tasks remain: ${openTasks.pending} pending, ${openTasks.inProgress} in_progress`;
996
+ completion = {
997
+ success: false,
998
+ reason: 'open_tasks',
999
+ details: runFailureReason,
1000
+ openTasks,
1001
+ };
1002
+ this.emit({
1003
+ type: 'completion.blocked',
1004
+ reasons: [
1005
+ runFailureReason,
1006
+ openTasks.blocked > 0 ? `${openTasks.blocked} pending tasks are blocked` : '',
1007
+ ].filter(Boolean),
1008
+ openTasks,
1009
+ diagnostics: {
1010
+ forceTextOnly: false,
1011
+ availableTasks: this.taskManager?.getAvailableTasks().length ?? 0,
1012
+ pendingWithOwner: 0,
1013
+ },
1014
+ });
1015
+ }
1016
+ }
974
1017
  // Finalize
975
1018
  const duration = Date.now() - startTime;
976
1019
  this.state.metrics.duration = duration;
977
- this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
1020
+ if (runSuccess) {
1021
+ this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
1022
+ }
1023
+ else {
1024
+ this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
1025
+ }
978
1026
  await this.observability?.tracer?.endTrace();
979
1027
  const result = {
980
- success: true,
1028
+ success: runSuccess,
981
1029
  response,
1030
+ ...(runSuccess ? {} : { error: runFailureReason ?? 'Task failed' }),
982
1031
  metrics: this.getMetrics(),
983
1032
  messages: this.state.messages,
984
1033
  traceId,
985
1034
  plan: this.state.plan,
1035
+ completion,
1036
+ };
1037
+ result.completion.recovery = {
1038
+ intraRunRetries: this.state.metrics.retryCount ?? 0,
1039
+ autoLoopRuns: 0,
1040
+ terminal: !runSuccess,
1041
+ reasonChain: [completion.reason],
986
1042
  };
987
1043
  this.emit({ type: 'complete', result });
988
- this.observability?.logger?.info('Agent completed', { duration, success: true });
1044
+ this.emit({
1045
+ type: 'completion.after',
1046
+ success: runSuccess,
1047
+ reason: completion.reason,
1048
+ ...(completion.details ? { details: completion.details } : {}),
1049
+ });
1050
+ this.emit({
1051
+ type: 'run.after',
1052
+ success: runSuccess,
1053
+ reason: completion.reason,
1054
+ ...(completion.details ? { details: completion.details } : {}),
1055
+ });
1056
+ this.observability?.logger?.info('Agent completed', {
1057
+ duration,
1058
+ success: runSuccess,
1059
+ ...(runFailureReason ? { failureReason: runFailureReason } : {}),
1060
+ });
989
1061
  // Lesson 26: End trace capture
990
1062
  // If task is active (REPL mode), end the task. Otherwise end the session (single-task mode).
991
1063
  if (this.traceCollector?.isTaskActive()) {
992
- await this.traceCollector.endTask({ success: true, output: response });
1064
+ await this.traceCollector.endTask(runSuccess
1065
+ ? { success: true, output: response }
1066
+ : { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
993
1067
  }
994
1068
  else if (this.traceCollector?.isSessionActive()) {
995
- await this.traceCollector.endSession({ success: true, output: response });
1069
+ await this.traceCollector.endSession(runSuccess
1070
+ ? { success: true, output: response }
1071
+ : { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
996
1072
  }
997
1073
  return result;
998
1074
  }
@@ -1013,6 +1089,18 @@ export class ProductionAgent {
1013
1089
  else if (this.traceCollector?.isSessionActive()) {
1014
1090
  await this.traceCollector.endSession({ success: false, failureReason: `Cancelled: ${error.message}` });
1015
1091
  }
1092
+ this.emit({
1093
+ type: 'completion.after',
1094
+ success: false,
1095
+ reason: 'cancelled',
1096
+ details: `Cancelled: ${error.message}`,
1097
+ });
1098
+ this.emit({
1099
+ type: 'run.after',
1100
+ success: false,
1101
+ reason: 'cancelled',
1102
+ details: `Cancelled: ${error.message}`,
1103
+ });
1016
1104
  return {
1017
1105
  success: false,
1018
1106
  response: '',
@@ -1020,6 +1108,11 @@ export class ProductionAgent {
1020
1108
  metrics: this.getMetrics(),
1021
1109
  messages: this.state.messages,
1022
1110
  traceId,
1111
+ completion: {
1112
+ success: false,
1113
+ reason: 'cancelled',
1114
+ details: `Cancelled: ${error.message}`,
1115
+ },
1023
1116
  };
1024
1117
  }
1025
1118
  this.observability?.tracer?.recordError(error);
@@ -1027,6 +1120,9 @@ export class ProductionAgent {
1027
1120
  this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
1028
1121
  this.emit({ type: 'error', error: error.message });
1029
1122
  this.observability?.logger?.error('Agent failed', { error: error.message });
1123
+ const completionReason = error.message.includes('failed to complete requested action')
1124
+ ? 'incomplete_action'
1125
+ : 'error';
1030
1126
  // Lesson 26: End trace capture on error
1031
1127
  if (this.traceCollector?.isTaskActive()) {
1032
1128
  await this.traceCollector.endTask({ success: false, failureReason: error.message });
@@ -1034,14 +1130,26 @@ export class ProductionAgent {
1034
1130
  else if (this.traceCollector?.isSessionActive()) {
1035
1131
  await this.traceCollector.endSession({ success: false, failureReason: error.message });
1036
1132
  }
1037
- return {
1133
+ const errorResult = {
1038
1134
  success: false,
1039
1135
  response: '',
1040
1136
  error: error.message,
1041
1137
  metrics: this.getMetrics(),
1042
1138
  messages: this.state.messages,
1043
1139
  traceId,
1140
+ completion: {
1141
+ success: false,
1142
+ reason: completionReason,
1143
+ details: error.message,
1144
+ },
1044
1145
  };
1146
+ this.emit({
1147
+ type: 'run.after',
1148
+ success: false,
1149
+ reason: completionReason,
1150
+ details: error.message,
1151
+ });
1152
+ return errorResult;
1045
1153
  }
1046
1154
  finally {
1047
1155
  // Dispose cancellation context on completion
@@ -1068,7 +1176,7 @@ export class ProductionAgent {
1068
1176
  this.planning.completeTask(currentTask.id);
1069
1177
  this.emit({ type: 'task.complete', task: currentTask });
1070
1178
  }
1071
- catch (err) {
1179
+ catch (_err) {
1072
1180
  this.planning.failTask(currentTask.id);
1073
1181
  this.observability?.logger?.warn('Plan task failed', { taskId: currentTask.id });
1074
1182
  // Continue with other tasks if possible
@@ -1102,891 +1210,336 @@ export class ProductionAgent {
1102
1210
  const { SwarmEventBridge } = await import('./integrations/swarm/swarm-event-bridge.js');
1103
1211
  const bridge = new SwarmEventBridge({ outputDir: '.agent/swarm-live' });
1104
1212
  const unsubBridge = bridge.attach(this.swarmOrchestrator);
1105
- try {
1106
- const result = await this.swarmOrchestrator.execute(task);
1107
- // Populate task DAG for dashboard after execution
1108
- bridge.setTasks(result.tasks);
1109
- this.observability?.logger?.info('Swarm execution complete', {
1110
- success: result.success,
1111
- tasks: result.stats.totalTasks,
1112
- completed: result.stats.completedTasks,
1113
- tokens: result.stats.totalTokens,
1114
- cost: result.stats.totalCost,
1213
+ const writeCodeMapSnapshot = () => {
1214
+ if (!this.codebaseContext) {
1215
+ return;
1216
+ }
1217
+ const repoMap = this.codebaseContext.getRepoMap();
1218
+ if (!repoMap) {
1219
+ return;
1220
+ }
1221
+ // Build dependency edges from the dependency graph
1222
+ const depEdges = [];
1223
+ for (const [file, deps] of repoMap.dependencyGraph) {
1224
+ depEdges.push({ file, imports: Array.from(deps) });
1225
+ }
1226
+ // Build top chunks sorted by importance
1227
+ const chunks = Array.from(repoMap.chunks.values());
1228
+ const topChunks = chunks
1229
+ .sort((a, b) => b.importance - a.importance)
1230
+ .slice(0, 100)
1231
+ .map(c => ({
1232
+ filePath: c.filePath,
1233
+ tokenCount: c.tokenCount,
1234
+ importance: c.importance,
1235
+ type: c.type,
1236
+ symbols: c.symbolDetails,
1237
+ }));
1238
+ const files = chunks.map((chunk) => ({
1239
+ filePath: chunk.filePath,
1240
+ directory: path.dirname(chunk.filePath) === '.' ? '' : path.dirname(chunk.filePath),
1241
+ fileName: path.basename(chunk.filePath),
1242
+ tokenCount: chunk.tokenCount,
1243
+ importance: chunk.importance,
1244
+ type: chunk.type,
1245
+ symbols: chunk.symbolDetails,
1246
+ inDegree: repoMap.reverseDependencyGraph.get(chunk.filePath)?.size ?? 0,
1247
+ outDegree: repoMap.dependencyGraph.get(chunk.filePath)?.size ?? 0,
1248
+ }));
1249
+ bridge.writeCodeMapSnapshot({
1250
+ totalFiles: repoMap.chunks.size,
1251
+ totalTokens: repoMap.totalTokens,
1252
+ entryPoints: repoMap.entryPoints,
1253
+ coreModules: repoMap.coreModules,
1254
+ dependencyEdges: depEdges,
1255
+ files,
1256
+ topChunks,
1115
1257
  });
1116
- return result;
1117
- }
1118
- finally {
1119
- unsubBridge();
1120
- bridge.close();
1121
- unsubSwarm();
1122
- }
1123
- }
1124
- /**
1125
- * Execute a task directly without planning.
1126
- */
1127
- async executeDirectly(task) {
1128
- // Build messages
1129
- const messages = this.buildMessages(task);
1130
- // Reset economics for new task
1131
- this.economics?.reset();
1132
- // Reflection configuration
1133
- const reflectionConfig = this.config.reflection;
1134
- const reflectionEnabled = isFeatureEnabled(reflectionConfig);
1135
- const autoReflect = reflectionEnabled && reflectionConfig.autoReflect;
1136
- const maxReflectionAttempts = reflectionEnabled
1137
- ? (reflectionConfig.maxAttempts || 3)
1138
- : 1;
1139
- const confidenceThreshold = reflectionEnabled
1140
- ? (reflectionConfig.confidenceThreshold || 0.8)
1141
- : 0.8;
1142
- let reflectionAttempt = 0;
1143
- let lastResponse = '';
1144
- let incompleteActionRetries = 0;
1145
- const requestedArtifact = this.extractRequestedArtifact(task);
1146
- const executedToolNames = new Set();
1147
- // Outer loop for reflection (if enabled)
1148
- while (reflectionAttempt < maxReflectionAttempts) {
1149
- reflectionAttempt++;
1150
- // Agent loop - now uses economics-based budget checking
1151
- while (true) {
1152
- this.state.iteration++;
1153
- // Record iteration start for tracing
1154
- this.traceCollector?.record({
1155
- type: 'iteration.start',
1156
- data: { iterationNumber: this.state.iteration },
1157
- });
1158
- // =======================================================================
1159
- // CANCELLATION CHECK
1160
- // Checks internal cancellation (ESC key) always immediate.
1161
- // External cancellation (parent timeout) is checked after economics
1162
- // to allow graceful wrapup when wrapup has been requested.
1163
- // =======================================================================
1164
- if (this.cancellation?.isCancelled) {
1165
- this.cancellation.token.throwIfCancellationRequested();
1258
+ };
1259
+ let codeMapRefreshInFlight = false;
1260
+ let codeMapRefreshTimer = null;
1261
+ const refreshAndWriteCodeMapSnapshot = async () => {
1262
+ if (!this.codebaseContext || codeMapRefreshInFlight) {
1263
+ return;
1264
+ }
1265
+ codeMapRefreshInFlight = true;
1266
+ try {
1267
+ // Re-analyze from disk so snapshots include newly created files during swarm execution.
1268
+ this.codebaseContext.clearCache();
1269
+ await this.codebaseContext.analyze();
1270
+ writeCodeMapSnapshot();
1271
+ }
1272
+ catch {
1273
+ // Best effort
1274
+ }
1275
+ finally {
1276
+ codeMapRefreshInFlight = false;
1277
+ }
1278
+ };
1279
+ // Write observability snapshots to swarm-live/ on relevant events
1280
+ const unsubSnapshots = this.swarmOrchestrator.subscribe(event => {
1281
+ // Write codemap snapshot when tasks are loaded.
1282
+ if (event.type === 'swarm.tasks.loaded' && this.codebaseContext) {
1283
+ try {
1284
+ writeCodeMapSnapshot();
1285
+ }
1286
+ catch {
1287
+ // Best effort — don't crash the swarm
1288
+ }
1289
+ }
1290
+ // Refresh codemap after each completed wave to avoid stale 0-file snapshots.
1291
+ if (event.type === 'swarm.wave.complete' && this.codebaseContext) {
1292
+ void refreshAndWriteCodeMapSnapshot();
1293
+ }
1294
+ if (event.type === 'swarm.task.completed' && this.codebaseContext) {
1295
+ if (codeMapRefreshTimer) {
1296
+ clearTimeout(codeMapRefreshTimer);
1297
+ }
1298
+ codeMapRefreshTimer = setTimeout(() => {
1299
+ void refreshAndWriteCodeMapSnapshot();
1300
+ }, 1200);
1301
+ }
1302
+ // Write blackboard.json on wave completion or task completion
1303
+ if ((event.type === 'swarm.wave.complete' || event.type === 'swarm.task.completed') && this.blackboard) {
1304
+ try {
1305
+ const findings = this.blackboard.getAllFindings();
1306
+ bridge.writeBlackboardSnapshot({
1307
+ findings: findings.map(f => ({
1308
+ id: f.id ?? '',
1309
+ topic: f.topic ?? '',
1310
+ type: f.type ?? '',
1311
+ agentId: f.agentId ?? '',
1312
+ confidence: f.confidence ?? 0,
1313
+ content: (f.content ?? '').slice(0, 500),
1314
+ })),
1315
+ claims: [],
1316
+ updatedAt: new Date().toISOString(),
1317
+ });
1318
+ }
1319
+ catch {
1320
+ // Best effort
1321
+ }
1322
+ }
1323
+ // Write budget-pool.json on budget updates
1324
+ if (event.type === 'swarm.budget.update' && this.budgetPool) {
1325
+ try {
1326
+ const stats = this.budgetPool.getStats();
1327
+ bridge.writeBudgetPoolSnapshot({
1328
+ poolTotal: stats.totalTokens,
1329
+ poolUsed: stats.tokensUsed,
1330
+ poolRemaining: stats.tokensRemaining,
1331
+ allocations: [],
1332
+ updatedAt: new Date().toISOString(),
1333
+ });
1334
+ }
1335
+ catch {
1336
+ // Best effort
1166
1337
  }
1167
- // =======================================================================
1168
- // RESOURCE CHECK - system resource limits
1169
- // =======================================================================
1170
- if (this.resourceManager) {
1171
- const resourceCheck = this.resourceManager.check();
1172
- if (!resourceCheck.canContinue) {
1173
- this.observability?.logger?.warn('Resource limit reached', {
1174
- status: resourceCheck.status,
1175
- message: resourceCheck.message,
1338
+ }
1339
+ });
1340
+ // Bridge swarm events into JSONL trace pipeline
1341
+ const traceCollector = this.traceCollector;
1342
+ let unsubTrace;
1343
+ if (traceCollector) {
1344
+ unsubTrace = this.swarmOrchestrator.subscribe(event => {
1345
+ switch (event.type) {
1346
+ case 'swarm.start':
1347
+ traceCollector.record({
1348
+ type: 'swarm.start',
1349
+ data: { taskCount: event.taskCount, config: event.config },
1176
1350
  });
1177
- this.emit({ type: 'error', error: resourceCheck.message || 'Resource limit exceeded' });
1178
1351
  break;
1179
- }
1180
- // Log warnings for elevated usage
1181
- if (resourceCheck.status === 'warning' || resourceCheck.status === 'critical') {
1182
- this.observability?.logger?.info(`Resource status: ${resourceCheck.status}`, {
1183
- message: resourceCheck.message,
1352
+ case 'swarm.tasks.loaded':
1353
+ traceCollector.record({
1354
+ type: 'swarm.decomposition',
1355
+ data: {
1356
+ tasks: event.tasks.map(t => ({
1357
+ id: t.id,
1358
+ description: t.description.slice(0, 200),
1359
+ type: t.type,
1360
+ wave: t.wave,
1361
+ deps: t.dependencies,
1362
+ })),
1363
+ totalWaves: Math.max(...event.tasks.map(t => t.wave), 0) + 1,
1364
+ },
1184
1365
  });
1185
- }
1186
- }
1187
- // =======================================================================
1188
- // ECONOMICS CHECK (Token Budget) - replaces hard iteration limit
1189
- // With recovery: try compaction before giving up on token limits
1190
- // =======================================================================
1191
- let forceTextOnly = false; // Track if we should skip tool execution
1192
- let budgetInjectedPrompt;
1193
- if (this.economics) {
1194
- const budgetCheck = this.economics.checkBudget();
1195
- // Capture forceTextOnly and injectedPrompt for later use
1196
- forceTextOnly = budgetCheck.forceTextOnly ?? false;
1197
- budgetInjectedPrompt = budgetCheck.injectedPrompt;
1198
- if (!budgetCheck.canContinue) {
1199
- // ===================================================================
1200
- // RECOVERY ATTEMPT: Try emergency context reduction before giving up
1201
- // Only for token-based limits, not iteration limits
1202
- // ===================================================================
1203
- const isTokenLimit = budgetCheck.budgetType === 'tokens' || budgetCheck.budgetType === 'cost';
1204
- const alreadyTriedRecovery = this.state._recoveryAttempted === true;
1205
- if (isTokenLimit && !alreadyTriedRecovery) {
1206
- this.observability?.logger?.info('Budget limit reached, attempting recovery via context reduction', {
1207
- reason: budgetCheck.reason,
1208
- percentUsed: budgetCheck.percentUsed,
1209
- });
1210
- this.emit({
1211
- type: 'resilience.retry',
1212
- reason: 'budget_limit_compaction',
1213
- attempt: 1,
1214
- maxAttempts: 1,
1215
- });
1216
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1217
- // Mark that we've attempted recovery to prevent infinite loops
1218
- this.state._recoveryAttempted = true;
1219
- const tokensBefore = this.estimateContextTokens(messages);
1220
- // Step 1: Compact tool outputs aggressively
1221
- this.compactToolOutputs();
1222
- // Step 2: Emergency truncation - keep system + last N messages
1223
- const PRESERVE_RECENT = 10;
1224
- if (messages.length > PRESERVE_RECENT + 2) {
1225
- const systemMessage = messages.find(m => m.role === 'system');
1226
- const recentMessages = messages.slice(-(PRESERVE_RECENT));
1227
- // Rebuild message array
1228
- messages.length = 0;
1229
- if (systemMessage) {
1230
- messages.push(systemMessage);
1231
- }
1232
- messages.push({
1233
- role: 'system',
1234
- content: `[CONTEXT REDUCED: Earlier messages were removed to stay within budget. Conversation continues from recent context.]`,
1235
- });
1236
- messages.push(...recentMessages);
1237
- // Inject work log after emergency truncation to prevent amnesia
1238
- if (this.workLog?.hasContent()) {
1239
- const workLogMessage = {
1240
- role: 'user',
1241
- content: this.workLog.toCompactString(),
1242
- };
1243
- messages.push(workLogMessage);
1244
- }
1245
- // Update state messages too
1246
- this.state.messages.length = 0;
1247
- this.state.messages.push(...messages);
1248
- }
1249
- const tokensAfter = this.estimateContextTokens(messages);
1250
- const reduction = Math.round((1 - tokensAfter / tokensBefore) * 100);
1251
- if (tokensAfter < tokensBefore * 0.8) {
1252
- // Significant reduction achieved
1253
- this.observability?.logger?.info('Context reduction successful, continuing execution', {
1254
- tokensBefore,
1255
- tokensAfter,
1256
- reduction,
1257
- });
1258
- this.emit({
1259
- type: 'resilience.recovered',
1260
- reason: 'budget_limit_compaction',
1261
- attempts: 1,
1262
- });
1263
- this.emit({
1264
- type: 'compaction.auto',
1265
- tokensBefore,
1266
- tokensAfter,
1267
- messagesCompacted: tokensBefore - tokensAfter,
1268
- });
1269
- // Continue execution instead of breaking
1270
- continue;
1271
- }
1272
- this.observability?.logger?.warn('Context reduction insufficient', {
1273
- tokensBefore,
1274
- tokensAfter,
1275
- reduction,
1276
- });
1277
- }
1278
- // Hard limit reached and recovery failed (or not applicable)
1279
- this.observability?.logger?.warn('Budget limit reached', {
1280
- reason: budgetCheck.reason,
1281
- budgetType: budgetCheck.budgetType,
1366
+ break;
1367
+ case 'swarm.wave.start':
1368
+ traceCollector.record({
1369
+ type: 'swarm.wave',
1370
+ data: { phase: 'start', wave: event.wave, taskCount: event.taskCount },
1282
1371
  });
1283
- // Emit appropriate event
1284
- if (budgetCheck.budgetType === 'iterations') {
1285
- const totalIter = this.getTotalIterations();
1286
- const iterMsg = this.parentIterations > 0
1287
- ? `${this.state.iteration} + ${this.parentIterations} parent = ${totalIter}`
1288
- : `${this.state.iteration}`;
1289
- this.emit({ type: 'error', error: `Max iterations reached (${iterMsg})` });
1290
- }
1291
- else {
1292
- this.emit({ type: 'error', error: budgetCheck.reason || 'Budget exceeded' });
1293
- }
1294
1372
  break;
1295
- }
1296
- // Check for soft limits and potential extension
1297
- if (budgetCheck.isSoftLimit && budgetCheck.suggestedAction === 'request_extension') {
1298
- this.observability?.logger?.info('Approaching budget limit', {
1299
- reason: budgetCheck.reason,
1300
- percentUsed: budgetCheck.percentUsed,
1373
+ case 'swarm.wave.complete':
1374
+ traceCollector.record({
1375
+ type: 'swarm.wave',
1376
+ data: {
1377
+ phase: 'complete',
1378
+ wave: event.wave,
1379
+ taskCount: event.completed + event.failed + (event.skipped ?? 0),
1380
+ completed: event.completed,
1381
+ failed: event.failed,
1382
+ },
1301
1383
  });
1302
- // Could request extension here if handler is set
1303
- }
1304
- }
1305
- else {
1306
- // Fallback to simple iteration check if economics not available
1307
- // Use getTotalIterations() to account for parent iterations (subagent hierarchy)
1308
- if (this.getTotalIterations() >= this.config.maxIterations) {
1309
- this.observability?.logger?.warn('Max iterations reached', {
1310
- iteration: this.state.iteration,
1311
- parentIterations: this.parentIterations,
1312
- total: this.getTotalIterations(),
1384
+ break;
1385
+ case 'swarm.task.dispatched':
1386
+ traceCollector.record({
1387
+ type: 'swarm.task',
1388
+ data: { phase: 'dispatched', taskId: event.taskId, model: event.model },
1313
1389
  });
1314
1390
  break;
1315
- }
1316
- }
1317
- // =======================================================================
1318
- // GRACEFUL WRAPUP CHECK
1319
- // If a wrapup has been requested (e.g., timeout approaching), convert
1320
- // to forceTextOnly + inject wrapup prompt for structured summary.
1321
- // Must come after economics check (which may also set forceTextOnly).
1322
- // =======================================================================
1323
- if (this.wrapupRequested && !forceTextOnly) {
1324
- forceTextOnly = true;
1325
- budgetInjectedPrompt = TIMEOUT_WRAPUP_PROMPT;
1326
- this.wrapupRequested = false;
1327
- }
1328
- // =======================================================================
1329
- // EXTERNAL CANCELLATION CHECK (deferred from above)
1330
- // Checked after wrapup so that graceful wrapup can intercept the timeout.
1331
- // If wrapup was already requested and converted to forceTextOnly above,
1332
- // we skip throwing here to allow one more text-only turn for the summary.
1333
- // =======================================================================
1334
- if (this.externalCancellationToken?.isCancellationRequested && !forceTextOnly) {
1335
- this.externalCancellationToken.throwIfCancellationRequested();
1336
- }
1337
- // =======================================================================
1338
- // INTELLIGENT LOOP DETECTION & NUDGE INJECTION
1339
- // Uses economics system for doom loops, exploration saturation, etc.
1340
- // =======================================================================
1341
- if (this.economics && budgetInjectedPrompt) {
1342
- // Inject contextual guidance from economics system
1343
- messages.push({
1344
- role: 'user',
1345
- content: budgetInjectedPrompt,
1346
- });
1347
- const loopState = this.economics.getLoopState();
1348
- const phaseState = this.economics.getPhaseState();
1349
- this.observability?.logger?.info('Loop detection - injecting guidance', {
1350
- iteration: this.state.iteration,
1351
- doomLoop: loopState.doomLoopDetected,
1352
- phase: phaseState.phase,
1353
- filesRead: phaseState.uniqueFilesRead,
1354
- filesModified: phaseState.filesModified,
1355
- shouldTransition: phaseState.shouldTransition,
1356
- forceTextOnly,
1357
- });
1358
- }
1359
- // =======================================================================
1360
- // RECITATION INJECTION (Trick Q) - Combat "lost in middle" attention
1361
- // =======================================================================
1362
- if (this.contextEngineering) {
1363
- if (process.env.DEBUG_LLM) {
1364
- if (process.env.DEBUG)
1365
- console.log(`[recitation] Before: ${messages.length} messages`);
1366
- }
1367
- const enrichedMessages = this.contextEngineering.injectRecitation(messages, {
1368
- goal: task,
1369
- plan: this.state.plan ? {
1370
- description: this.state.plan.goal || task,
1371
- tasks: this.state.plan.tasks.map(t => ({
1372
- id: t.id,
1373
- description: t.description,
1374
- status: t.status,
1375
- })),
1376
- currentTaskIndex: this.state.plan.tasks.findIndex(t => t.status === 'in_progress'),
1377
- } : undefined,
1378
- activeFiles: this.economics?.getProgress().filesModified
1379
- ? [`${this.economics.getProgress().filesModified} files modified`]
1380
- : undefined,
1381
- recentErrors: this.contextEngineering.getFailureInsights().slice(0, 2),
1382
- });
1383
- if (process.env.DEBUG_LLM) {
1384
- if (process.env.DEBUG)
1385
- console.log(`[recitation] After: ${enrichedMessages?.length ?? 'null/undefined'} messages`);
1386
- }
1387
- // Only replace if we got a DIFFERENT array back (avoid clearing same reference)
1388
- // When no injection needed, injectRecitation returns the same array reference
1389
- if (enrichedMessages && enrichedMessages !== messages && enrichedMessages.length > 0) {
1390
- messages.length = 0;
1391
- messages.push(...enrichedMessages);
1392
- }
1393
- else if (!enrichedMessages || enrichedMessages.length === 0) {
1394
- console.warn('[executeDirectly] Recitation returned empty/null messages, keeping original');
1395
- }
1396
- // If enrichedMessages === messages, we don't need to do anything (same reference)
1397
- // Update recitation frequency based on context size
1398
- const contextTokens = messages.reduce((sum, m) => sum + (m.content?.length || 0) / 4, 0);
1399
- this.contextEngineering.updateRecitationFrequency(contextTokens);
1400
- }
1401
- // =======================================================================
1402
- // FAILURE CONTEXT INJECTION (Trick S) - Learn from mistakes
1403
- // =======================================================================
1404
- if (this.contextEngineering) {
1405
- const failureContext = this.contextEngineering.getFailureContext(5);
1406
- if (failureContext) {
1407
- // Insert failure context before the last user message
1408
- // (Using reverse iteration for ES2022 compatibility)
1409
- let lastUserIdx = -1;
1410
- for (let i = messages.length - 1; i >= 0; i--) {
1411
- if (messages[i].role === 'user') {
1412
- lastUserIdx = i;
1413
- break;
1414
- }
1415
- }
1416
- if (lastUserIdx > 0) {
1417
- messages.splice(lastUserIdx, 0, {
1418
- role: 'system',
1419
- content: failureContext,
1420
- });
1421
- }
1422
- }
1423
- }
1424
- // =====================================================================
1425
- // INJECTION BUDGET ANALYSIS (Phase 2 - monitoring mode)
1426
- // Collects stats on context injections without gating; logs when
1427
- // budget would have dropped items. Validates system before enabling gating.
1428
- // =====================================================================
1429
- if (this.injectionBudget) {
1430
- const proposals = [];
1431
- if (budgetInjectedPrompt) {
1432
- proposals.push({ name: 'budget_warning', priority: 0, maxTokens: 500, content: budgetInjectedPrompt });
1433
- }
1434
- // Approximate recitation content (actual injection handled above)
1435
- if (this.contextEngineering) {
1436
- const failureCtx = this.contextEngineering.getFailureContext(5);
1437
- if (failureCtx) {
1438
- proposals.push({ name: 'failure_context', priority: 2, maxTokens: 300, content: failureCtx });
1439
- }
1440
- }
1441
- if (proposals.length > 0) {
1442
- const accepted = this.injectionBudget.allocate(proposals);
1443
- const stats = this.injectionBudget.getLastStats();
1444
- if (stats && stats.droppedNames.length > 0 && process.env.DEBUG) {
1445
- console.log(`[injection-budget] Would drop: ${stats.droppedNames.join(', ')} (${stats.proposedTokens} proposed, ${stats.acceptedTokens} accepted)`);
1446
- }
1447
- // Log total injection overhead for observability
1448
- if (stats && process.env.DEBUG_LLM) {
1449
- console.log(`[injection-budget] Iteration ${this.state.iteration}: ${accepted.length}/${proposals.length} injections, ~${stats.acceptedTokens} tokens`);
1450
- }
1451
- }
1452
- }
1453
- // =====================================================================
1454
- // RESILIENT LLM CALL: Empty response retries + max_tokens continuation
1455
- // =====================================================================
1456
- // Get resilience config
1457
- const resilienceConfig = typeof this.config.resilience === 'object'
1458
- ? this.config.resilience
1459
- : {};
1460
- const resilienceEnabled = isFeatureEnabled(this.config.resilience);
1461
- const MAX_EMPTY_RETRIES = resilienceConfig.maxEmptyRetries ?? 2;
1462
- const MAX_CONTINUATIONS = resilienceConfig.maxContinuations ?? 3;
1463
- const AUTO_CONTINUE = resilienceConfig.autoContinue ?? true;
1464
- const MIN_CONTENT_LENGTH = resilienceConfig.minContentLength ?? 1;
1465
- const INCOMPLETE_ACTION_RECOVERY = resilienceConfig.incompleteActionRecovery ?? true;
1466
- const MAX_INCOMPLETE_ACTION_RETRIES = resilienceConfig.maxIncompleteActionRetries ?? 2;
1467
- const ENFORCE_REQUESTED_ARTIFACTS = resilienceConfig.enforceRequestedArtifacts ?? true;
1468
- // =================================================================
1469
- // PRE-FLIGHT BUDGET CHECK: Estimate if LLM call would exceed budget
1470
- // Catches cases where we're at e.g. 120k and next call adds ~35k
1471
- // =================================================================
1472
- if (this.economics && !forceTextOnly) {
1473
- const estimatedInputTokens = this.estimateContextTokens(messages);
1474
- const estimatedOutputTokens = 4096; // Conservative output estimate
1475
- const currentUsage = this.economics.getUsage();
1476
- const budget = this.economics.getBudget();
1477
- const projectedTotal = currentUsage.tokens + estimatedInputTokens + estimatedOutputTokens;
1478
- if (projectedTotal > budget.maxTokens) {
1479
- this.observability?.logger?.warn('Pre-flight budget check: projected overshoot', {
1480
- currentTokens: currentUsage.tokens,
1481
- estimatedInput: estimatedInputTokens,
1482
- projectedTotal,
1483
- maxTokens: budget.maxTokens,
1391
+ case 'swarm.task.completed':
1392
+ traceCollector.record({
1393
+ type: 'swarm.task',
1394
+ data: {
1395
+ phase: 'completed',
1396
+ taskId: event.taskId,
1397
+ tokensUsed: event.tokensUsed,
1398
+ costUsed: event.costUsed,
1399
+ qualityScore: event.qualityScore,
1400
+ },
1484
1401
  });
1485
- // Inject wrap-up prompt if not already injected
1486
- if (!budgetInjectedPrompt) {
1487
- messages.push({
1488
- role: 'user',
1489
- content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1490
- });
1491
- this.state.messages.push({
1492
- role: 'user',
1493
- content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1494
- });
1495
- }
1496
- forceTextOnly = true;
1497
- }
1498
- }
1499
- let response = await this.callLLM(messages);
1500
- let emptyRetries = 0;
1501
- let continuations = 0;
1502
- // Phase 1: Handle empty responses with retry (if resilience enabled)
1503
- while (resilienceEnabled && emptyRetries < MAX_EMPTY_RETRIES) {
1504
- const hasContent = response.content && response.content.length >= MIN_CONTENT_LENGTH;
1505
- const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
1506
- const hasThinking = response.thinking && response.thinking.length > 0;
1507
- if (hasContent || hasToolCalls) {
1508
- // Valid visible response
1509
- if (emptyRetries > 0) {
1510
- this.emit({
1511
- type: 'resilience.recovered',
1512
- reason: 'empty_response',
1513
- attempts: emptyRetries,
1514
- });
1515
- this.observability?.logger?.info('Recovered from empty response', {
1516
- retries: emptyRetries,
1517
- });
1518
- }
1519
1402
  break;
1520
- }
1521
- if (hasThinking && !hasContent && !hasToolCalls) {
1522
- // Model produced reasoning but no visible output (e.g., DeepSeek-R1, GLM-4, QwQ).
1523
- // Give ONE targeted nudge, then accept thinking as content.
1524
- if (emptyRetries === 0) {
1525
- emptyRetries++;
1526
- this.emit({
1527
- type: 'resilience.retry',
1528
- reason: 'thinking_only_response',
1529
- attempt: emptyRetries,
1530
- maxAttempts: MAX_EMPTY_RETRIES,
1531
- });
1532
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1533
- this.observability?.logger?.warn('Thinking-only response (no visible content), nudging', {
1534
- thinkingLength: response.thinking.length,
1535
- });
1536
- const thinkingNudge = {
1537
- role: 'user',
1538
- content: '[System: You produced reasoning but no visible response. Please provide your answer based on your analysis.]',
1539
- };
1540
- messages.push(thinkingNudge);
1541
- this.state.messages.push(thinkingNudge);
1542
- response = await this.callLLM(messages);
1543
- continue;
1544
- }
1545
- // Second attempt also thinking-only → accept thinking as content
1546
- this.observability?.logger?.info('Accepting thinking as content after nudge failed', {
1547
- thinkingLength: response.thinking.length,
1403
+ case 'swarm.task.failed':
1404
+ traceCollector.record({
1405
+ type: 'swarm.task',
1406
+ data: { phase: 'failed', taskId: event.taskId, error: event.error },
1548
1407
  });
1549
- response = { ...response, content: response.thinking };
1550
1408
  break;
1551
- }
1552
- // Truly empty (no content, no tools, no thinking) — existing retry logic
1553
- emptyRetries++;
1554
- this.emit({
1555
- type: 'resilience.retry',
1556
- reason: 'empty_response',
1557
- attempt: emptyRetries,
1558
- maxAttempts: MAX_EMPTY_RETRIES,
1559
- });
1560
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1561
- this.observability?.logger?.warn('Empty LLM response, retrying', {
1562
- attempt: emptyRetries,
1563
- maxAttempts: MAX_EMPTY_RETRIES,
1564
- });
1565
- // Add gentle nudge and retry
1566
- const nudgeMessage = {
1567
- role: 'user',
1568
- content: '[System: Your previous response was empty. Please provide a response or use a tool.]',
1569
- };
1570
- messages.push(nudgeMessage);
1571
- this.state.messages.push(nudgeMessage);
1572
- response = await this.callLLM(messages);
1573
- }
1574
- // Phase 2: Handle max_tokens truncation with continuation (if enabled)
1575
- if (resilienceEnabled && AUTO_CONTINUE && response.stopReason === 'max_tokens' && !response.toolCalls?.length) {
1576
- let accumulatedContent = response.content || '';
1577
- while (continuations < MAX_CONTINUATIONS && response.stopReason === 'max_tokens') {
1578
- continuations++;
1579
- this.emit({
1580
- type: 'resilience.continue',
1581
- reason: 'max_tokens',
1582
- continuation: continuations,
1583
- maxContinuations: MAX_CONTINUATIONS,
1584
- accumulatedLength: accumulatedContent.length,
1409
+ case 'swarm.task.skipped':
1410
+ traceCollector.record({
1411
+ type: 'swarm.task',
1412
+ data: { phase: 'skipped', taskId: event.taskId, reason: event.reason },
1585
1413
  });
1586
- this.observability?.logger?.info('Response truncated at max_tokens, continuing', {
1587
- continuation: continuations,
1588
- accumulatedLength: accumulatedContent.length,
1414
+ break;
1415
+ case 'swarm.quality.rejected':
1416
+ traceCollector.record({
1417
+ type: 'swarm.quality',
1418
+ data: { taskId: event.taskId, score: event.score, feedback: event.feedback },
1589
1419
  });
1590
- // Add continuation request
1591
- const continuationMessage = {
1592
- role: 'assistant',
1593
- content: accumulatedContent,
1594
- };
1595
- const continueRequest = {
1596
- role: 'user',
1597
- content: '[System: Please continue from where you left off. Do not repeat what you already said.]',
1598
- };
1599
- messages.push(continuationMessage, continueRequest);
1600
- this.state.messages.push(continuationMessage, continueRequest);
1601
- response = await this.callLLM(messages);
1602
- // Accumulate content
1603
- if (response.content) {
1604
- accumulatedContent += response.content;
1605
- }
1606
- }
1607
- // Update response with accumulated content
1608
- if (continuations > 0) {
1609
- response = { ...response, content: accumulatedContent };
1610
- this.emit({
1611
- type: 'resilience.completed',
1612
- reason: 'max_tokens_continuation',
1613
- continuations,
1614
- finalLength: accumulatedContent.length,
1420
+ break;
1421
+ case 'swarm.budget.update':
1422
+ traceCollector.record({
1423
+ type: 'swarm.budget',
1424
+ data: {
1425
+ tokensUsed: event.tokensUsed,
1426
+ tokensTotal: event.tokensTotal,
1427
+ costUsed: event.costUsed,
1428
+ costTotal: event.costTotal,
1429
+ },
1615
1430
  });
1616
- }
1617
- }
1618
- // Phase 2b: Handle truncated tool calls (stopReason=max_tokens with tool calls present)
1619
- // When a model hits max_tokens mid-tool-call, the JSON arguments are truncated and unparseable.
1620
- // Instead of executing broken tool calls, strip them and ask the LLM to retry smaller.
1621
- if (resilienceEnabled && response.stopReason === 'max_tokens' && response.toolCalls?.length) {
1622
- this.emit({
1623
- type: 'resilience.truncated_tool_call',
1624
- toolNames: response.toolCalls.map(tc => tc.name),
1625
- });
1626
- this.observability?.logger?.warn('Tool call truncated at max_tokens', {
1627
- toolNames: response.toolCalls.map(tc => tc.name),
1628
- outputTokens: response.usage?.outputTokens,
1629
- });
1630
- // Strip truncated tool calls, inject recovery message
1631
- const truncatedResponse = response;
1632
- response = { ...response, toolCalls: undefined };
1633
- const recoveryMessage = {
1634
- role: 'user',
1635
- content: '[System: Your previous tool call was truncated because the output exceeded the token limit. ' +
1636
- 'The tool call arguments were cut off and could not be parsed. ' +
1637
- 'Please retry with a smaller approach: for write_file, break the content into smaller chunks ' +
1638
- 'or use edit_file for targeted changes instead of rewriting entire files.]',
1639
- };
1640
- messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1641
- messages.push(recoveryMessage);
1642
- this.state.messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1643
- this.state.messages.push(recoveryMessage);
1644
- response = await this.callLLM(messages);
1645
- }
1646
- // Record LLM usage for economics
1647
- if (this.economics && response.usage) {
1648
- this.economics.recordLLMUsage(response.usage.inputTokens, response.usage.outputTokens, this.config.model, response.usage.cost // Use actual cost from provider when available
1649
- );
1650
- // =================================================================
1651
- // POST-LLM BUDGET CHECK: Prevent tool execution if over budget
1652
- // A single LLM call can push us over - catch it before running tools
1653
- // =================================================================
1654
- if (!forceTextOnly) {
1655
- const postCheck = this.economics.checkBudget();
1656
- if (!postCheck.canContinue) {
1657
- this.observability?.logger?.warn('Budget exceeded after LLM call, skipping tool execution', {
1658
- reason: postCheck.reason,
1659
- });
1660
- forceTextOnly = true;
1661
- }
1662
- }
1663
- }
1664
- // Add assistant message
1665
- const assistantMessage = {
1666
- role: 'assistant',
1667
- content: response.content,
1668
- toolCalls: response.toolCalls,
1669
- ...(response.thinking ? { metadata: { thinking: response.thinking } } : {}),
1670
- };
1671
- messages.push(assistantMessage);
1672
- this.state.messages.push(assistantMessage);
1673
- lastResponse = response.content || (response.thinking ? response.thinking : '');
1674
- // In plan mode: capture exploration findings as we go (not just at the end)
1675
- // This ensures we collect context from exploration iterations before writes are queued
1676
- if (this.modeManager.getMode() === 'plan' && response.content && response.content.length > 50) {
1677
- const hasReadOnlyTools = response.toolCalls?.every(tc => ['read_file', 'list_files', 'glob', 'grep', 'search', 'mcp_'].some(prefix => tc.name.startsWith(prefix) || tc.name === prefix));
1678
- // Capture substantive exploration content (not just "let me read..." responses)
1679
- if (hasReadOnlyTools && !response.content.match(/^(Let me|I'll|I will|I need to|First,)/i)) {
1680
- this.pendingPlanManager.appendExplorationFinding(response.content.slice(0, 1000));
1681
- }
1682
- }
1683
- // Check for tool calls
1684
- // When forceTextOnly is set (max iterations reached), ignore any tool calls
1685
- const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
1686
- if (!hasToolCalls || forceTextOnly) {
1687
- // Log if we're ignoring tool calls due to forceTextOnly
1688
- if (forceTextOnly && hasToolCalls) {
1689
- this.observability?.logger?.info('Ignoring tool calls due to forceTextOnly (max steps reached)', {
1690
- toolCallCount: response.toolCalls?.length,
1691
- iteration: this.state.iteration,
1431
+ break;
1432
+ case 'swarm.verify.start':
1433
+ traceCollector.record({
1434
+ type: 'swarm.verification',
1435
+ data: { phase: 'start', description: `${event.stepCount} verification steps` },
1692
1436
  });
1693
- }
1694
- const incompleteAction = this.detectIncompleteActionResponse(response.content || '');
1695
- const missingRequiredArtifact = ENFORCE_REQUESTED_ARTIFACTS
1696
- ? this.isRequestedArtifactMissing(requestedArtifact, executedToolNames)
1697
- : false;
1698
- const shouldRecoverIncompleteAction = resilienceEnabled
1699
- && INCOMPLETE_ACTION_RECOVERY
1700
- && !forceTextOnly
1701
- && (incompleteAction || missingRequiredArtifact);
1702
- if (shouldRecoverIncompleteAction) {
1703
- if (incompleteActionRetries < MAX_INCOMPLETE_ACTION_RETRIES) {
1704
- incompleteActionRetries++;
1705
- const reason = missingRequiredArtifact && requestedArtifact
1706
- ? `missing_requested_artifact:${requestedArtifact}`
1707
- : 'future_intent_without_action';
1708
- this.emit({
1709
- type: 'resilience.incomplete_action_detected',
1710
- reason,
1711
- attempt: incompleteActionRetries,
1712
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1713
- requiresArtifact: missingRequiredArtifact,
1714
- });
1715
- this.observability?.logger?.warn('Incomplete action detected, retrying with nudge', {
1716
- reason,
1717
- attempt: incompleteActionRetries,
1718
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1719
- });
1720
- const nudgeMessage = {
1721
- role: 'user',
1722
- content: missingRequiredArtifact && requestedArtifact
1723
- ? `[System: You said you would complete the next action, but no tool call was made. The task requires creating or updating "${requestedArtifact}". Execute the required tool now, or explicitly explain why it cannot be produced.]`
1724
- : '[System: You described a next action but did not execute it. If work remains, call the required tool now. If the task is complete, provide a final answer with no pending action language.]',
1725
- };
1726
- messages.push(nudgeMessage);
1727
- this.state.messages.push(nudgeMessage);
1728
- continue;
1729
- }
1730
- const failureReason = missingRequiredArtifact && requestedArtifact
1731
- ? `incomplete_action_missing_artifact:${requestedArtifact}`
1732
- : 'incomplete_action_unresolved';
1733
- this.emit({
1734
- type: 'resilience.incomplete_action_failed',
1735
- reason: failureReason,
1736
- attempts: incompleteActionRetries,
1737
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1437
+ break;
1438
+ case 'swarm.verify.step':
1439
+ traceCollector.record({
1440
+ type: 'swarm.verification',
1441
+ data: {
1442
+ phase: 'step',
1443
+ stepIndex: event.stepIndex,
1444
+ description: event.description,
1445
+ passed: event.passed,
1446
+ },
1738
1447
  });
1739
- throw new Error(`LLM failed to complete requested action after ${incompleteActionRetries} retries (${failureReason})`);
1740
- }
1741
- if (incompleteActionRetries > 0) {
1742
- this.emit({
1743
- type: 'resilience.incomplete_action_recovered',
1744
- reason: 'incomplete_action',
1745
- attempts: incompleteActionRetries,
1448
+ break;
1449
+ case 'swarm.verify.complete':
1450
+ traceCollector.record({
1451
+ type: 'swarm.verification',
1452
+ data: {
1453
+ phase: 'complete',
1454
+ passed: event.result.passed,
1455
+ summary: event.result.summary,
1456
+ },
1746
1457
  });
1747
- incompleteActionRetries = 0;
1748
- }
1749
- // Verification gate: if criteria not met, nudge agent to verify before completing
1750
- if (this.verificationGate && !forceTextOnly) {
1751
- const vResult = this.verificationGate.check();
1752
- if (!vResult.satisfied && !vResult.forceAllow && vResult.nudge) {
1753
- // Inject nudge and continue the loop
1754
- const nudgeMessage = {
1755
- role: 'user',
1756
- content: vResult.nudge,
1757
- };
1758
- messages.push(nudgeMessage);
1759
- this.state.messages.push(nudgeMessage);
1760
- this.observability?.logger?.info('Verification gate nudge', {
1761
- missing: vResult.missing,
1762
- nudgeCount: this.verificationGate.getState().nudgeCount,
1763
- });
1764
- continue;
1765
- }
1766
- }
1767
- // No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
1768
- // The model has "consumed" the tool outputs and produced a response,
1769
- // so we can replace verbose outputs with compact summaries
1770
- this.compactToolOutputs();
1771
- // In plan mode: capture exploration summary from the final response
1772
- // This provides context for what was learned during exploration before proposing changes
1773
- if (this.modeManager.getMode() === 'plan' && this.pendingPlanManager.hasPendingPlan()) {
1774
- const explorationContent = response.content || '';
1775
- if (explorationContent.length > 0) {
1776
- this.pendingPlanManager.setExplorationSummary(explorationContent);
1777
- }
1778
- }
1779
- // Final validation: warn if response is still empty after all retries
1780
- if (!response.content || response.content.length === 0) {
1781
- this.observability?.logger?.error('Agent finished with empty response after all retries', {
1782
- emptyRetries,
1783
- continuations,
1784
- iteration: this.state.iteration,
1458
+ break;
1459
+ case 'swarm.orchestrator.llm':
1460
+ traceCollector.record({
1461
+ type: 'swarm.orchestrator.llm',
1462
+ data: { model: event.model, purpose: event.purpose, tokens: event.tokens, cost: event.cost },
1785
1463
  });
1786
- this.emit({
1787
- type: 'resilience.failed',
1788
- reason: 'empty_final_response',
1789
- emptyRetries,
1790
- continuations,
1464
+ break;
1465
+ case 'swarm.wave.allFailed':
1466
+ traceCollector.record({
1467
+ type: 'swarm.wave.allFailed',
1468
+ data: { wave: event.wave },
1791
1469
  });
1792
- }
1793
- // Record iteration end for tracing (no tool calls case)
1794
- this.traceCollector?.record({
1795
- type: 'iteration.end',
1796
- data: { iterationNumber: this.state.iteration },
1797
- });
1798
- break;
1799
- }
1800
- // Execute tool calls (we know toolCalls is defined here due to the check above)
1801
- const toolCalls = response.toolCalls;
1802
- const toolResults = await this.executeToolCalls(toolCalls);
1803
- // Record tool calls for economics/progress tracking + work log
1804
- for (let i = 0; i < toolCalls.length; i++) {
1805
- const toolCall = toolCalls[i];
1806
- const result = toolResults[i];
1807
- executedToolNames.add(toolCall.name);
1808
- this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
1809
- // Record in work log for compaction resilience
1810
- const toolOutput = result?.result && typeof result.result === 'object' && 'output' in result.result
1811
- ? String(result.result.output)
1812
- : typeof result?.result === 'string' ? result.result : undefined;
1813
- this.workLog?.recordToolExecution(toolCall.name, toolCall.arguments, toolOutput);
1814
- // Record in verification gate
1815
- if (this.verificationGate) {
1816
- if (toolCall.name === 'bash') {
1817
- const toolRes = result?.result;
1818
- const output = toolRes && typeof toolRes === 'object' && 'output' in toolRes
1819
- ? String(toolRes.output)
1820
- : typeof toolRes === 'string' ? toolRes : '';
1821
- const exitCode = toolRes && typeof toolRes === 'object' && toolRes.metadata
1822
- ? toolRes.metadata.exitCode ?? null
1823
- : null;
1824
- this.verificationGate.recordBashExecution(String(toolCall.arguments.command || ''), output, exitCode);
1825
- }
1826
- if (['write_file', 'edit_file'].includes(toolCall.name)) {
1827
- this.verificationGate.recordFileChange();
1828
- }
1829
- }
1830
- }
1831
- // Add tool results to messages (with truncation and proactive budget management)
1832
- const MAX_TOOL_OUTPUT_CHARS = 8000; // ~2000 tokens max per tool output
1833
- // =======================================================================
1834
- // PROACTIVE BUDGET CHECK - compact BEFORE we overflow, not after
1835
- // Uses AutoCompactionManager if available for sophisticated compaction
1836
- // =======================================================================
1837
- const currentContextTokens = this.estimateContextTokens(messages);
1838
- if (this.autoCompactionManager) {
1839
- // Use the AutoCompactionManager for threshold-based compaction
1840
- const compactionResult = await this.autoCompactionManager.checkAndMaybeCompact({
1841
- currentTokens: currentContextTokens,
1842
- messages: messages,
1843
- });
1844
- // Handle compaction result
1845
- if (compactionResult.status === 'compacted' && compactionResult.compactedMessages) {
1846
- // Replace messages with compacted version
1847
- messages.length = 0;
1848
- messages.push(...compactionResult.compactedMessages);
1849
- this.state.messages.length = 0;
1850
- this.state.messages.push(...compactionResult.compactedMessages);
1851
- // Inject work log after compaction to prevent amnesia
1852
- if (this.workLog?.hasContent()) {
1853
- const workLogMessage = {
1854
- role: 'user',
1855
- content: this.workLog.toCompactString(),
1856
- };
1857
- messages.push(workLogMessage);
1858
- this.state.messages.push(workLogMessage);
1859
- }
1860
- }
1861
- else if (compactionResult.status === 'hard_limit') {
1862
- // Hard limit reached - this is serious, emit error
1863
- this.emit({
1864
- type: 'error',
1865
- error: `Context hard limit reached (${Math.round(compactionResult.ratio * 100)}% of max tokens)`,
1470
+ break;
1471
+ case 'swarm.phase.progress':
1472
+ traceCollector.record({
1473
+ type: 'swarm.phase.progress',
1474
+ data: { phase: event.phase, message: event.message },
1866
1475
  });
1867
1476
  break;
1868
- }
1869
- }
1870
- else if (this.economics) {
1871
- // Fallback to simple compaction
1872
- const currentUsage = this.economics.getUsage();
1873
- const budget = this.economics.getBudget();
1874
- const percentUsed = (currentUsage.tokens / budget.maxTokens) * 100;
1875
- // If we're at 70%+ of budget, proactively compact to make room
1876
- if (percentUsed >= 70) {
1877
- this.observability?.logger?.info('Proactive compaction triggered', {
1878
- percentUsed: Math.round(percentUsed),
1879
- currentTokens: currentUsage.tokens,
1880
- maxTokens: budget.maxTokens,
1477
+ case 'swarm.complete':
1478
+ traceCollector.record({
1479
+ type: 'swarm.complete',
1480
+ data: {
1481
+ stats: {
1482
+ totalTasks: event.stats.totalTasks,
1483
+ completedTasks: event.stats.completedTasks,
1484
+ failedTasks: event.stats.failedTasks,
1485
+ totalTokens: event.stats.totalTokens,
1486
+ totalCost: event.stats.totalCost,
1487
+ totalDuration: event.stats.totalDurationMs,
1488
+ },
1489
+ },
1881
1490
  });
1882
- this.compactToolOutputs();
1883
- }
1491
+ break;
1884
1492
  }
1885
- const toolCallNameById = new Map(toolCalls.map(tc => [tc.id, tc.name]));
1886
- for (const result of toolResults) {
1887
- let content = typeof result.result === 'string' ? result.result : stableStringify(result.result);
1888
- const sourceToolName = toolCallNameById.get(result.callId);
1889
- const isExpensiveResult = sourceToolName === 'spawn_agent' || sourceToolName === 'spawn_agents_parallel';
1890
- // Truncate long outputs to save context
1891
- if (content.length > MAX_TOOL_OUTPUT_CHARS) {
1892
- content = content.slice(0, MAX_TOOL_OUTPUT_CHARS) + `\n\n... [truncated ${content.length - MAX_TOOL_OUTPUT_CHARS} chars]`;
1893
- }
1894
- // =======================================================================
1895
- // ESTIMATE if adding this result would exceed budget
1896
- // =======================================================================
1897
- if (this.economics) {
1898
- const estimatedNewTokens = Math.ceil(content.length / 4); // ~4 chars per token
1899
- const currentContextTokens = this.estimateContextTokens(messages);
1900
- const budget = this.economics.getBudget();
1901
- // Check if adding this would push us over the hard limit
1902
- if (currentContextTokens + estimatedNewTokens > budget.maxTokens * 0.95) {
1903
- this.observability?.logger?.warn('Skipping tool result to stay within budget', {
1904
- toolCallId: result.callId,
1905
- estimatedTokens: estimatedNewTokens,
1906
- currentContext: currentContextTokens,
1907
- limit: budget.maxTokens,
1908
- });
1909
- // Add a truncated placeholder instead
1910
- const toolMessage = {
1911
- role: 'tool',
1912
- content: `[Result omitted to stay within token budget. Original size: ${content.length} chars]`,
1913
- toolCallId: result.callId,
1914
- };
1915
- messages.push(toolMessage);
1916
- this.state.messages.push(toolMessage);
1917
- continue;
1918
- }
1919
- }
1920
- const toolMessage = {
1921
- role: 'tool',
1922
- content,
1923
- toolCallId: result.callId,
1924
- ...(isExpensiveResult
1925
- ? {
1926
- metadata: {
1927
- preserveFromCompaction: true,
1928
- costToRegenerate: 'high',
1929
- source: sourceToolName,
1930
- },
1931
- }
1932
- : {}),
1933
- };
1934
- messages.push(toolMessage);
1935
- this.state.messages.push(toolMessage);
1493
+ });
1494
+ }
1495
+ try {
1496
+ // Ensure codebase context is analyzed before decomposition so repo map is available
1497
+ if (this.codebaseContext && !this.codebaseContext.getRepoMap()) {
1498
+ try {
1499
+ await this.codebaseContext.analyze();
1936
1500
  }
1937
- // Emit context health after adding tool results
1938
- const currentTokenEstimate = this.estimateContextTokens(messages);
1939
- const contextLimit = this.getMaxContextTokens();
1940
- const percentUsed = Math.round((currentTokenEstimate / contextLimit) * 100);
1941
- const avgTokensPerExchange = currentTokenEstimate / Math.max(1, this.state.iteration);
1942
- const remainingTokens = contextLimit - currentTokenEstimate;
1943
- const estimatedExchanges = Math.floor(remainingTokens / Math.max(1, avgTokensPerExchange));
1944
- this.emit({
1945
- type: 'context.health',
1946
- currentTokens: currentTokenEstimate,
1947
- maxTokens: contextLimit,
1948
- estimatedExchanges,
1949
- percentUsed,
1950
- });
1951
- // Record iteration end for tracing (after tool execution)
1952
- this.traceCollector?.record({
1953
- type: 'iteration.end',
1954
- data: { iterationNumber: this.state.iteration },
1955
- });
1956
- }
1957
- // =======================================================================
1958
- // REFLECTION (Lesson 16)
1959
- // =======================================================================
1960
- if (autoReflect && this.planning && reflectionAttempt < maxReflectionAttempts) {
1961
- this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: false });
1962
- const reflectionResult = await this.planning.reflect(task, lastResponse, this.provider);
1963
- this.state.metrics.reflectionAttempts = reflectionAttempt;
1964
- if (reflectionResult.satisfied && reflectionResult.confidence >= confidenceThreshold) {
1965
- // Output is satisfactory
1966
- this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: true });
1967
- break;
1501
+ catch {
1502
+ // non-fatal decomposer will work without codebase context
1968
1503
  }
1969
- // Not satisfied - add feedback and continue
1970
- const feedbackMessage = {
1971
- role: 'user',
1972
- content: `[Reflection feedback]\nThe previous output needs improvement:\n- Critique: ${reflectionResult.critique}\n- Suggestions: ${reflectionResult.suggestions.join(', ')}\n\nPlease improve the output.`,
1973
- };
1974
- messages.push(feedbackMessage);
1975
- this.state.messages.push(feedbackMessage);
1976
- this.observability?.logger?.info('Reflection not satisfied, retrying', {
1977
- attempt: reflectionAttempt,
1978
- confidence: reflectionResult.confidence,
1979
- critique: reflectionResult.critique,
1980
- });
1981
1504
  }
1982
- else {
1983
- // No reflection or already satisfied
1984
- break;
1505
+ // Write codemap snapshot immediately so dashboard can render even if decomposition fails.
1506
+ try {
1507
+ writeCodeMapSnapshot();
1508
+ }
1509
+ catch {
1510
+ // Best effort
1511
+ }
1512
+ const result = await this.swarmOrchestrator.execute(task);
1513
+ // Populate task DAG for dashboard after execution
1514
+ bridge.setTasks(result.tasks);
1515
+ this.observability?.logger?.info('Swarm execution complete', {
1516
+ success: result.success,
1517
+ tasks: result.stats.totalTasks,
1518
+ completed: result.stats.completedTasks,
1519
+ tokens: result.stats.totalTokens,
1520
+ cost: result.stats.totalCost,
1521
+ });
1522
+ return result;
1523
+ }
1524
+ finally {
1525
+ if (codeMapRefreshTimer) {
1526
+ clearTimeout(codeMapRefreshTimer);
1985
1527
  }
1528
+ unsubTrace?.();
1529
+ unsubSnapshots();
1530
+ unsubBridge();
1531
+ bridge.close();
1532
+ unsubSwarm();
1986
1533
  }
1987
- // Store conversation in memory
1988
- this.memory?.storeConversation(this.state.messages);
1989
- this.updateMemoryStats();
1534
+ }
1535
+ /**
1536
+ * Execute a task directly without planning (delegates to core/execution-loop).
1537
+ */
1538
+ async executeDirectly(task) {
1539
+ const messages = await this.buildMessages(task);
1540
+ const ctx = this.buildContext();
1541
+ const mutators = this.buildMutators();
1542
+ return coreExecuteDirectly(task, messages, ctx, mutators);
1990
1543
  }
1991
1544
  /**
1992
1545
  * Build messages for LLM call.
@@ -1994,7 +1547,7 @@ export class ProductionAgent {
1994
1547
  * Uses cache-aware system prompt building (Trick P) when contextEngineering
1995
1548
  * is available, ensuring static content is ordered for optimal KV-cache reuse.
1996
1549
  */
1997
- buildMessages(task) {
1550
+ async buildMessages(task) {
1998
1551
  const messages = [];
1999
1552
  // Gather all context components
2000
1553
  const rulesContent = this.rules?.getRulesContent() ?? '';
@@ -2013,10 +1566,20 @@ export class ProductionAgent {
2013
1566
  const reservedTokens = 10500;
2014
1567
  const maxContextTokens = (this.config.maxContextTokens ?? 80000) - reservedTokens;
2015
1568
  const codebaseBudget = Math.min(maxContextTokens * 0.3, 15000); // Up to 30% or 15K tokens
2016
- try {
2017
- // Use synchronous cache if available, otherwise skip
2018
- const repoMap = this.codebaseContext.getRepoMap();
2019
- if (repoMap) {
1569
+ // Synchronous analysis on first system prompt build so context is available immediately
1570
+ if (!this.codebaseContext.getRepoMap() && !this.codebaseAnalysisTriggered) {
1571
+ this.codebaseAnalysisTriggered = true;
1572
+ try {
1573
+ await this.codebaseContext.analyze();
1574
+ }
1575
+ catch {
1576
+ // non-fatal — agent can still work without codebase context
1577
+ }
1578
+ }
1579
+ // Get repo map AFTER analysis so we have fresh data on first prompt
1580
+ const repoMap = this.codebaseContext.getRepoMap();
1581
+ if (repoMap) {
1582
+ try {
2020
1583
  const selection = this.selectRelevantCodeSync(task, codebaseBudget);
2021
1584
  if (selection.chunks.length > 0) {
2022
1585
  codebaseContextStr = buildContextFromChunks(selection.chunks, {
@@ -2025,10 +1588,14 @@ export class ProductionAgent {
2025
1588
  maxTotalTokens: codebaseBudget,
2026
1589
  });
2027
1590
  }
1591
+ else {
1592
+ // Fallback: lightweight repo map when task-specific selection finds nothing
1593
+ codebaseContextStr = generateLightweightRepoMap(repoMap, codebaseBudget);
1594
+ }
1595
+ }
1596
+ catch {
1597
+ // Selection error — skip
2028
1598
  }
2029
- }
2030
- catch {
2031
- // Codebase analysis not ready yet - skip for this call
2032
1599
  }
2033
1600
  }
2034
1601
  // Build tool descriptions
@@ -2110,619 +1677,94 @@ export class ProductionAgent {
2110
1677
  if (toolDescriptions) {
2111
1678
  systemPrompt += '\n\nAvailable tools:\n' + toolDescriptions;
2112
1679
  }
2113
- // Safety check: ensure system prompt is not empty
2114
- if (!systemPrompt || systemPrompt.trim().length === 0) {
2115
- console.warn('[buildMessages] Warning: Empty system prompt detected, using fallback');
2116
- systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
2117
- }
2118
- messages.push({ role: 'system', content: systemPrompt });
2119
- }
2120
- // Add existing conversation
2121
- for (const msg of this.state.messages) {
2122
- if (msg.role !== 'system') {
2123
- messages.push(msg);
2124
- }
2125
- }
2126
- // Add current task
2127
- messages.push({ role: 'user', content: task });
2128
- return messages;
2129
- }
2130
- /**
2131
- * Call the LLM with routing and observability.
2132
- */
2133
- async callLLM(messages) {
2134
- const spanId = this.observability?.tracer?.startSpan('llm.call');
2135
- this.emit({ type: 'llm.start', model: this.config.model || 'default' });
2136
- // Prompt caching (Improvement P1): Replace the system message with structured content
2137
- // that includes cache_control markers, enabling 60-70% cache hit rates.
2138
- // Only use structured cache_control markers for Anthropic models — other providers
2139
- // (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
2140
- const configModel = this.config.model || 'default';
2141
- const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
2142
- let providerMessages = messages;
2143
- if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
2144
- providerMessages = messages.map((m, i) => {
2145
- if (i === 0 && m.role === 'system') {
2146
- // Replace system message with structured cacheable content
2147
- return {
2148
- role: 'system',
2149
- content: this.cacheableSystemBlocks,
2150
- };
2151
- }
2152
- return m;
2153
- });
2154
- }
2155
- // Emit context insight for verbose feedback
2156
- const estimatedTokens = messages.reduce((sum, m) => {
2157
- const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
2158
- return sum + Math.ceil(content.length / 3.5); // ~3.5 chars per token estimate
2159
- }, 0);
2160
- // Use context window size, not output token limit
2161
- const contextLimit = this.getMaxContextTokens();
2162
- this.emit({
2163
- type: 'insight.context',
2164
- currentTokens: estimatedTokens,
2165
- maxTokens: contextLimit,
2166
- messageCount: messages.length,
2167
- percentUsed: Math.round((estimatedTokens / contextLimit) * 100),
2168
- });
2169
- const startTime = Date.now();
2170
- const requestId = `req-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2171
- // Debug: Log message count and structure (helps diagnose API errors)
2172
- if (process.env.DEBUG_LLM) {
2173
- console.log(`[callLLM] Sending ${messages.length} messages:`);
2174
- messages.forEach((m, i) => {
2175
- console.log(` [${i}] ${m.role}: ${m.content?.slice(0, 50)}...`);
2176
- });
2177
- }
2178
- // Validate messages are not empty
2179
- if (!messages || messages.length === 0) {
2180
- throw new Error('No messages to send to LLM');
2181
- }
2182
- // Lesson 26: Record LLM request for tracing
2183
- const model = this.config.model || 'default';
2184
- const provider = this.config.provider?.name || 'unknown';
2185
- this.traceCollector?.record({
2186
- type: 'llm.request',
2187
- data: {
2188
- requestId,
2189
- model,
2190
- provider,
2191
- messages: messages.map(m => ({
2192
- role: m.role,
2193
- content: m.content,
2194
- toolCallId: m.toolCallId,
2195
- toolCalls: m.toolCalls?.map(tc => ({
2196
- id: tc.id,
2197
- name: tc.name,
2198
- arguments: tc.arguments,
2199
- })),
2200
- })),
2201
- tools: Array.from(this.tools.values()).map(t => ({
2202
- name: t.name,
2203
- description: t.description,
2204
- parametersSchema: t.parameters,
2205
- })),
2206
- parameters: {
2207
- maxTokens: this.config.maxTokens,
2208
- temperature: this.config.temperature,
2209
- },
2210
- },
2211
- });
2212
- // Pause duration budget during LLM call - network time shouldn't count against agent
2213
- this.economics?.pauseDuration();
2214
- try {
2215
- let response;
2216
- let actualModel = model;
2217
- // Use routing if enabled
2218
- if (this.routing) {
2219
- const complexity = this.routing.estimateComplexity(messages[messages.length - 1]?.content || '');
2220
- const context = {
2221
- task: messages[messages.length - 1]?.content || '',
2222
- complexity,
2223
- hasTools: this.tools.size > 0,
2224
- hasImages: false,
2225
- taskType: 'general',
2226
- estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
2227
- };
2228
- const result = await this.routing.executeWithFallback(providerMessages, context);
2229
- response = result.response;
2230
- actualModel = result.model;
2231
- // Emit routing insight
2232
- this.emit({
2233
- type: 'insight.routing',
2234
- model: actualModel,
2235
- reason: actualModel !== model ? 'Routed based on complexity' : 'Default model',
2236
- complexity: complexity <= 0.3 ? 'low' : complexity <= 0.7 ? 'medium' : 'high',
2237
- });
2238
- // Emit decision transparency event
2239
- this.emit({
2240
- type: 'decision.routing',
2241
- model: actualModel,
2242
- reason: actualModel !== model
2243
- ? `Complexity ${(complexity * 100).toFixed(0)}% - using ${actualModel}`
2244
- : 'Default model for current task',
2245
- alternatives: actualModel !== model
2246
- ? [{ model, rejected: 'complexity threshold exceeded' }]
2247
- : undefined,
2248
- });
2249
- // Enhanced tracing: Record routing decision
2250
- this.traceCollector?.record({
2251
- type: 'decision',
2252
- data: {
2253
- type: 'routing',
2254
- decision: `Selected model: ${actualModel}`,
2255
- outcome: 'allowed',
2256
- reasoning: actualModel !== model
2257
- ? `Task complexity ${(complexity * 100).toFixed(0)}% exceeded threshold - routed to ${actualModel}`
2258
- : `Default model ${model} suitable for task complexity ${(complexity * 100).toFixed(0)}%`,
2259
- factors: [
2260
- { name: 'complexity', value: complexity, weight: 0.8 },
2261
- { name: 'hasTools', value: context.hasTools, weight: 0.1 },
2262
- { name: 'taskType', value: context.taskType, weight: 0.1 },
2263
- ],
2264
- alternatives: actualModel !== model
2265
- ? [{ option: model, reason: 'complexity threshold exceeded', rejected: true }]
2266
- : undefined,
2267
- confidence: 0.9,
2268
- },
2269
- });
2270
- }
2271
- else {
2272
- response = await this.provider.chat(providerMessages, {
2273
- model: this.config.model,
2274
- tools: Array.from(this.tools.values()),
2275
- });
2276
- }
2277
- const duration = Date.now() - startTime;
2278
- // Debug cache stats when DEBUG_CACHE is set
2279
- if (process.env.DEBUG_CACHE) {
2280
- const cr = response.usage?.cacheReadTokens ?? 0;
2281
- const cw = response.usage?.cacheWriteTokens ?? 0;
2282
- const inp = response.usage?.inputTokens ?? 0;
2283
- const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
2284
- console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
2285
- }
2286
- // Lesson 26: Record LLM response for tracing
2287
- this.traceCollector?.record({
2288
- type: 'llm.response',
2289
- data: {
2290
- requestId,
2291
- content: response.content || '',
2292
- toolCalls: response.toolCalls?.map(tc => ({
2293
- id: tc.id,
2294
- name: tc.name,
2295
- arguments: tc.arguments,
2296
- })),
2297
- stopReason: response.stopReason === 'end_turn' ? 'end_turn'
2298
- : response.stopReason === 'tool_use' ? 'tool_use'
2299
- : response.stopReason === 'max_tokens' ? 'max_tokens'
2300
- : 'stop_sequence',
2301
- usage: {
2302
- inputTokens: response.usage?.inputTokens || 0,
2303
- outputTokens: response.usage?.outputTokens || 0,
2304
- cacheReadTokens: response.usage?.cacheReadTokens,
2305
- cacheWriteTokens: response.usage?.cacheWriteTokens,
2306
- cost: response.usage?.cost, // Actual cost from provider (e.g., OpenRouter)
2307
- },
2308
- durationMs: duration,
2309
- },
2310
- });
2311
- // Enhanced tracing: Record thinking/reasoning blocks if present
2312
- if (response.thinking) {
2313
- this.traceCollector?.record({
2314
- type: 'llm.thinking',
2315
- data: {
2316
- requestId,
2317
- content: response.thinking,
2318
- summarized: response.thinking.length > 10000, // Summarize if very long
2319
- originalLength: response.thinking.length,
2320
- durationMs: duration,
2321
- },
2322
- });
2323
- }
2324
- // Record metrics
2325
- this.observability?.metrics?.recordLLMCall(response.usage?.inputTokens || 0, response.usage?.outputTokens || 0, duration, actualModel, response.usage?.cost // Actual cost from provider (e.g., OpenRouter)
2326
- );
2327
- this.state.metrics.llmCalls++;
2328
- this.state.metrics.inputTokens += response.usage?.inputTokens || 0;
2329
- this.state.metrics.outputTokens += response.usage?.outputTokens || 0;
2330
- this.state.metrics.totalTokens = this.state.metrics.inputTokens + this.state.metrics.outputTokens;
2331
- this.emit({ type: 'llm.complete', response });
2332
- // Emit token usage insight for verbose feedback
2333
- if (response.usage) {
2334
- this.emit({
2335
- type: 'insight.tokens',
2336
- inputTokens: response.usage.inputTokens,
2337
- outputTokens: response.usage.outputTokens,
2338
- cacheReadTokens: response.usage.cacheReadTokens,
2339
- cacheWriteTokens: response.usage.cacheWriteTokens,
2340
- cost: response.usage.cost,
2341
- model: actualModel,
2342
- });
2343
- }
2344
- this.observability?.tracer?.endSpan(spanId);
2345
- return response;
2346
- }
2347
- catch (err) {
2348
- const error = err instanceof Error ? err : new Error(String(err));
2349
- this.observability?.tracer?.recordError(error);
2350
- this.observability?.tracer?.endSpan(spanId);
2351
- throw error;
2352
- }
2353
- finally {
2354
- // Resume duration budget after LLM call completes (success or failure)
2355
- this.economics?.resumeDuration();
2356
- }
2357
- }
2358
- /**
2359
- * Execute an async callback while excluding wall-clock wait time from duration budgeting.
2360
- * Used for external waits such as approval dialogs and delegation confirmation.
2361
- */
2362
- async withPausedDuration(fn) {
2363
- this.economics?.pauseDuration();
2364
- try {
2365
- return await fn();
2366
- }
2367
- finally {
2368
- this.economics?.resumeDuration();
2369
- }
2370
- }
2371
- /**
2372
- * Execute tool calls with safety checks and execution policy enforcement.
2373
- * Parallelizable read-only tools are batched and executed concurrently.
2374
- */
2375
- async executeToolCalls(toolCalls) {
2376
- const results = [];
2377
- // Group consecutive parallelizable tool calls into batches
2378
- const batches = groupToolCallsIntoBatches(toolCalls);
2379
- // Execute batches: parallel batches use Promise.allSettled, sequential execute one-by-one
2380
- for (const batch of batches) {
2381
- if (batch.length > 1 && PARALLELIZABLE_TOOLS.has(batch[0].name)) {
2382
- // Execute parallelizable batch concurrently
2383
- const batchResults = await Promise.allSettled(batch.map(tc => this.executeSingleToolCall(tc)));
2384
- for (const result of batchResults) {
2385
- if (result.status === 'fulfilled') {
2386
- results.push(result.value);
2387
- }
2388
- else {
2389
- // Should not happen since executeSingleToolCall catches errors internally
2390
- const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
2391
- results.push({ callId: 'unknown', result: `Error: ${error}`, error });
2392
- }
2393
- }
2394
- }
2395
- else {
2396
- // Execute sequentially
2397
- for (const tc of batch) {
2398
- results.push(await this.executeSingleToolCall(tc));
2399
- }
2400
- }
2401
- }
2402
- return results;
2403
- }
2404
- /**
2405
- * Execute a single tool call with all safety checks, tracing, and error handling.
2406
- */
2407
- async executeSingleToolCall(toolCall) {
2408
- const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2409
- const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2410
- this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2411
- const startTime = Date.now();
2412
- // Short-circuit if tool call arguments failed to parse
2413
- if (toolCall.parseError) {
2414
- const errorMsg = `Tool arguments could not be parsed: ${toolCall.parseError}. Please retry with complete, valid JSON.`;
2415
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: errorMsg });
2416
- this.traceCollector?.record({
2417
- type: 'tool.end',
2418
- data: { executionId, status: 'error', error: new Error(errorMsg), durationMs: Date.now() - startTime },
2419
- });
2420
- this.observability?.tracer?.endSpan(spanId);
2421
- return { callId: toolCall.id, result: `Error: ${errorMsg}`, error: errorMsg };
2422
- }
2423
- // Lesson 26: Record tool start for tracing
2424
- this.traceCollector?.record({
2425
- type: 'tool.start',
2426
- data: {
2427
- executionId,
2428
- toolName: toolCall.name,
2429
- arguments: toolCall.arguments,
2430
- },
2431
- });
2432
- try {
2433
- // =====================================================================
2434
- // PLAN MODE WRITE INTERCEPTION
2435
- // =====================================================================
2436
- // In plan mode, intercept write operations and queue them as proposed changes
2437
- if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2438
- // Extract contextual reasoning instead of simple truncation
2439
- const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2440
- // Start a new plan if needed
2441
- if (!this.pendingPlanManager.hasPendingPlan()) {
2442
- const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2443
- const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2444
- this.pendingPlanManager.startPlan(task);
2445
- }
2446
- // Queue the write operation
2447
- const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2448
- // Emit event for UI
2449
- this.emit({
2450
- type: 'plan.change.queued',
2451
- tool: toolCall.name,
2452
- changeId: change?.id,
2453
- summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2454
- });
2455
- // Return a message indicating the change was queued
2456
- const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2457
- `Tool: ${toolCall.name}\n` +
2458
- `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2459
- `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2460
- this.observability?.tracer?.endSpan(spanId);
2461
- return { callId: toolCall.id, result: queueMessage };
2462
- }
2463
- // =====================================================================
2464
- // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2465
- // =====================================================================
2466
- let policyApprovedByUser = false;
2467
- if (this.executionPolicy) {
2468
- const policyContext = {
2469
- messages: this.state.messages,
2470
- currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2471
- previousToolCalls: [],
2472
- };
2473
- const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2474
- // Emit policy event
2475
- this.emit({
2476
- type: 'policy.evaluated',
2477
- tool: toolCall.name,
2478
- policy: evaluation.policy,
2479
- reason: evaluation.reason,
2480
- });
2481
- // Emit decision transparency event
2482
- this.emit({
2483
- type: 'decision.tool',
2484
- tool: toolCall.name,
2485
- decision: evaluation.policy === 'forbidden' ? 'blocked'
2486
- : evaluation.policy === 'prompt' ? 'prompted'
2487
- : 'allowed',
2488
- policyMatch: evaluation.reason,
2489
- });
2490
- // Enhanced tracing: Record policy decision
2491
- this.traceCollector?.record({
2492
- type: 'decision',
2493
- data: {
2494
- type: 'policy',
2495
- decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2496
- outcome: evaluation.policy === 'forbidden' ? 'blocked'
2497
- : evaluation.policy === 'prompt' ? 'deferred'
2498
- : 'allowed',
2499
- reasoning: evaluation.reason,
2500
- factors: [
2501
- { name: 'policy', value: evaluation.policy },
2502
- { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2503
- ],
2504
- confidence: evaluation.intent?.confidence ?? 0.8,
2505
- },
2506
- });
2507
- // Handle forbidden policy - always block
2508
- if (evaluation.policy === 'forbidden') {
2509
- throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2510
- }
2511
- // Handle prompt policy - requires approval
2512
- if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2513
- // Try to get approval through safety manager's human-in-loop
2514
- const humanInLoop = this.safety?.humanInLoop;
2515
- if (humanInLoop) {
2516
- const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2517
- if (!approval.approved) {
2518
- throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2519
- }
2520
- policyApprovedByUser = true;
2521
- // Create a grant for future similar calls if approved
2522
- this.executionPolicy.createGrant({
2523
- toolName: toolCall.name,
2524
- grantedBy: 'user',
2525
- reason: 'Approved during execution',
2526
- maxUsages: 5, // Allow 5 more similar calls
2527
- });
2528
- }
2529
- else {
2530
- // No approval handler - block by default for safety
2531
- throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2532
- }
2533
- }
2534
- // Log intent classification if available
2535
- if (evaluation.intent) {
2536
- this.emit({
2537
- type: 'intent.classified',
2538
- tool: toolCall.name,
2539
- intent: evaluation.intent.type,
2540
- confidence: evaluation.intent.confidence,
2541
- });
2542
- }
2543
- }
2544
- // =====================================================================
2545
- // SAFETY VALIDATION (Lesson 20-21)
2546
- // =====================================================================
2547
- if (this.safety) {
2548
- const safety = this.safety;
2549
- const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2550
- if (!validation.allowed) {
2551
- throw new Error(`Tool call blocked: ${validation.reason}`);
2552
- }
2553
- }
2554
- // Get tool definition (with lazy-loading support for MCP tools)
2555
- let tool = this.tools.get(toolCall.name);
2556
- const wasPreloaded = !!tool;
2557
- if (!tool && this.toolResolver) {
2558
- // Try to resolve and load the tool on-demand
2559
- const resolved = this.toolResolver(toolCall.name);
2560
- if (resolved) {
2561
- this.addTool(resolved);
2562
- tool = resolved;
2563
- if (process.env.DEBUG)
2564
- console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2565
- this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2566
- }
2567
- }
2568
- if (!tool) {
2569
- throw new Error(`Unknown tool: ${toolCall.name}`);
2570
- }
2571
- // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2572
- if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2573
- console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2574
- }
2575
- // =====================================================================
2576
- // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2577
- // =====================================================================
2578
- // Claim file resources before write operations to prevent conflicts
2579
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2580
- const args = toolCall.arguments;
2581
- const filePath = String(args.path || args.file_path || '');
2582
- if (filePath) {
2583
- const agentId = this.agentId;
2584
- const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2585
- ttl: 60000, // 1 minute claim
2586
- intent: `${toolCall.name}: ${filePath}`,
2587
- });
2588
- if (!claimed) {
2589
- const existingClaim = this.blackboard.getClaim(filePath);
2590
- throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2591
- `Wait for the other agent to complete or choose a different file.`);
2592
- }
2593
- }
2594
- }
2595
- // FILE CACHE: Check cache for read_file operations before executing
2596
- if (this.fileCache && toolCall.name === 'read_file') {
2597
- const args = toolCall.arguments;
2598
- const readPath = String(args.path || '');
2599
- if (readPath) {
2600
- const cached = this.fileCache.get(readPath);
2601
- if (cached !== undefined) {
2602
- const lines = cached.split('\n').length;
2603
- const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2604
- const duration = Date.now() - startTime;
2605
- this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2606
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2607
- this.state.metrics.toolCalls++;
2608
- this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2609
- this.observability?.tracer?.endSpan(spanId);
2610
- return {
2611
- callId: toolCall.id,
2612
- result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2613
- };
2614
- }
2615
- }
2616
- }
2617
- // Execute tool (with sandbox if available)
2618
- let result;
2619
- if (this.safety?.sandbox) {
2620
- // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2621
- // The default 60s sandbox timeout would kill subagents prematurely
2622
- // Subagents may run for minutes (per their own timeout config)
2623
- const isSpawnAgent = toolCall.name === 'spawn_agent';
2624
- const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2625
- const isSubagentTool = isSpawnAgent || isSpawnParallel;
2626
- const subagentConfig = this.config.subagent;
2627
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2628
- const subagentTimeout = hasSubagentConfig
2629
- ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2630
- : 600000;
2631
- // Use subagent timeout + buffer for spawn tools, default for others
2632
- // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2633
- // but the total wall-clock time should still allow the slowest agent to complete)
2634
- const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2635
- result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2636
- }
2637
- else {
2638
- result = await tool.execute(toolCall.arguments);
2639
- }
2640
- const duration = Date.now() - startTime;
2641
- // Lesson 26: Record tool completion for tracing
2642
- this.traceCollector?.record({
2643
- type: 'tool.end',
2644
- data: {
2645
- executionId,
2646
- status: 'success',
2647
- result,
2648
- durationMs: duration,
2649
- },
2650
- });
2651
- // Record metrics
2652
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2653
- this.state.metrics.toolCalls++;
2654
- this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2655
- // FILE CACHE: Store read results and invalidate on writes
2656
- if (this.fileCache) {
2657
- const args = toolCall.arguments;
2658
- const filePath = String(args.path || args.file_path || '');
2659
- if (toolCall.name === 'read_file' && filePath) {
2660
- // Cache successful read results
2661
- const resultObj = result;
2662
- if (resultObj?.success && typeof resultObj.output === 'string') {
2663
- this.fileCache.set(filePath, resultObj.output);
2664
- }
2665
- }
2666
- else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2667
- // Invalidate cache when files are modified (including undo operations)
2668
- this.fileCache.invalidate(filePath);
2669
- }
2670
- }
2671
- // Emit tool insight with result summary
2672
- const summary = this.summarizeToolResult(toolCall.name, result);
2673
- this.emit({
2674
- type: 'insight.tool',
2675
- tool: toolCall.name,
2676
- summary,
2677
- durationMs: duration,
2678
- success: true,
2679
- });
2680
- // Release blackboard claim after successful file write
2681
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2682
- const args = toolCall.arguments;
2683
- const filePath = String(args.path || args.file_path || '');
2684
- if (filePath) {
2685
- const agentId = this.agentId;
2686
- this.blackboard.release(filePath, agentId);
2687
- }
1680
+ // Safety check: ensure system prompt is not empty
1681
+ if (!systemPrompt || systemPrompt.trim().length === 0) {
1682
+ log.warn('Empty system prompt detected, using fallback');
1683
+ systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
2688
1684
  }
2689
- // Self-improvement: record success pattern
2690
- this.selfImprovement?.recordSuccess(toolCall.name, toolCall.arguments, typeof result === 'string' ? result.slice(0, 200) : JSON.stringify(result).slice(0, 200));
2691
- this.observability?.tracer?.endSpan(spanId);
2692
- return { callId: toolCall.id, result };
1685
+ messages.push({ role: 'system', content: systemPrompt });
2693
1686
  }
2694
- catch (err) {
2695
- const error = err instanceof Error ? err : new Error(String(err));
2696
- const duration = Date.now() - startTime;
2697
- // Lesson 26: Record tool error for tracing
2698
- this.traceCollector?.record({
2699
- type: 'tool.end',
2700
- data: {
2701
- executionId,
2702
- status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
2703
- error,
2704
- durationMs: duration,
2705
- },
2706
- });
2707
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
2708
- this.observability?.tracer?.recordError(error);
2709
- this.observability?.tracer?.endSpan(spanId);
2710
- // FAILURE EVIDENCE RECORDING (Trick S)
2711
- // Track failed tool calls to prevent loops and provide context
2712
- this.contextEngineering?.recordFailure({
2713
- action: toolCall.name,
2714
- args: toolCall.arguments,
2715
- error,
2716
- intent: `Execute tool ${toolCall.name}`,
2717
- });
2718
- // Self-improvement: enhance error message with diagnosis for better LLM recovery
2719
- if (this.selfImprovement) {
2720
- const enhanced = this.selfImprovement.enhanceErrorMessage(toolCall.name, error.message, toolCall.arguments);
2721
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: enhanced });
2722
- return { callId: toolCall.id, result: `Error: ${enhanced}`, error: enhanced };
1687
+ // Add existing conversation
1688
+ for (const msg of this.state.messages) {
1689
+ if (msg.role !== 'system') {
1690
+ messages.push(msg);
2723
1691
  }
2724
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
2725
- return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
1692
+ }
1693
+ // Add current task
1694
+ messages.push({ role: 'user', content: task });
1695
+ // Track system prompt length for context % estimation
1696
+ const sysMsg = messages.find(m => m.role === 'system');
1697
+ if (sysMsg) {
1698
+ const content = typeof sysMsg.content === 'string' ? sysMsg.content : JSON.stringify(sysMsg.content);
1699
+ this.lastSystemPromptLength = content.length;
1700
+ }
1701
+ return messages;
1702
+ }
1703
+ // ===========================================================================
1704
+ // CONTEXT BUILDERS — Bridge private fields to extracted core modules
1705
+ // ===========================================================================
1706
+ buildContext() {
1707
+ return {
1708
+ config: this.config, agentId: this.agentId, provider: this.provider,
1709
+ tools: this.tools, state: this.state,
1710
+ modeManager: this.modeManager, pendingPlanManager: this.pendingPlanManager,
1711
+ hooks: this.hooks, economics: this.economics, cancellation: this.cancellation,
1712
+ resourceManager: this.resourceManager, safety: this.safety,
1713
+ observability: this.observability, contextEngineering: this.contextEngineering,
1714
+ traceCollector: this.traceCollector, executionPolicy: this.executionPolicy,
1715
+ routing: this.routing, planning: this.planning, memory: this.memory,
1716
+ react: this.react, blackboard: this.blackboard, fileCache: this.fileCache,
1717
+ budgetPool: this.budgetPool, taskManager: this.taskManager, store: this.store,
1718
+ codebaseContext: this.codebaseContext, learningStore: this.learningStore,
1719
+ compactor: this.compactor, autoCompactionManager: this.autoCompactionManager,
1720
+ workLog: this.workLog, verificationGate: this.verificationGate,
1721
+ agentRegistry: this.agentRegistry, toolRecommendation: this.toolRecommendation,
1722
+ selfImprovement: this.selfImprovement, subagentOutputStore: this.subagentOutputStore,
1723
+ autoCheckpointManager: this.autoCheckpointManager, injectionBudget: this.injectionBudget,
1724
+ skillManager: this.skillManager, semanticCache: this.semanticCache,
1725
+ lspManager: this.lspManager, threadManager: this.threadManager,
1726
+ interactivePlanner: this.interactivePlanner, recursiveContext: this.recursiveContext,
1727
+ fileChangeTracker: this.fileChangeTracker, capabilitiesRegistry: this.capabilitiesRegistry,
1728
+ rules: this.rules, stateMachine: this.stateMachine,
1729
+ lastComplexityAssessment: this.lastComplexityAssessment,
1730
+ cacheableSystemBlocks: this.cacheableSystemBlocks,
1731
+ parentIterations: this.parentIterations,
1732
+ externalCancellationToken: this.externalCancellationToken,
1733
+ wrapupRequested: this.wrapupRequested, wrapupReason: this.wrapupReason,
1734
+ compactionPending: this.compactionPending,
1735
+ sharedContextState: this._sharedContextState,
1736
+ sharedEconomicsState: this._sharedEconomicsState,
1737
+ spawnedTasks: this.spawnedTasks, toolResolver: this.toolResolver,
1738
+ emit: (event) => this.emit(event),
1739
+ addTool: (tool) => this.addTool(tool),
1740
+ getMaxContextTokens: () => this.getMaxContextTokens(),
1741
+ getTotalIterations: () => this.getTotalIterations(),
1742
+ };
1743
+ }
1744
+ buildMutators() {
1745
+ return {
1746
+ setBudgetPool: (pool) => { this.budgetPool = pool; },
1747
+ setCacheableSystemBlocks: (blocks) => { this.cacheableSystemBlocks = blocks; },
1748
+ setCompactionPending: (pending) => { this.compactionPending = pending; },
1749
+ setWrapupRequested: (requested) => { this.wrapupRequested = requested; },
1750
+ setLastComplexityAssessment: (a) => { this.lastComplexityAssessment = a; },
1751
+ setExternalCancellationToken: (t) => { this.externalCancellationToken = t; },
1752
+ };
1753
+ }
1754
+ createSubAgentFactory() {
1755
+ return (config) => new ProductionAgent(config);
1756
+ }
1757
+ /**
1758
+ * Execute an async callback while excluding wall-clock wait time from duration budgeting.
1759
+ * Used for external waits such as approval dialogs and delegation confirmation.
1760
+ */
1761
+ async withPausedDuration(fn) {
1762
+ this.economics?.pauseDuration();
1763
+ try {
1764
+ return await fn();
1765
+ }
1766
+ finally {
1767
+ this.economics?.resumeDuration();
2726
1768
  }
2727
1769
  }
2728
1770
  /**
@@ -2861,123 +1903,6 @@ export class ProductionAgent {
2861
1903
  emit(event) {
2862
1904
  this.hooks?.emit(event);
2863
1905
  }
2864
- /**
2865
- * Create a brief summary of a tool result for insight display.
2866
- */
2867
- summarizeToolResult(toolName, result) {
2868
- if (result === null || result === undefined) {
2869
- return 'No output';
2870
- }
2871
- const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
2872
- // Tool-specific summaries
2873
- if (toolName === 'list_files' || toolName === 'glob') {
2874
- const lines = resultStr.split('\n').filter(l => l.trim());
2875
- return `Found ${lines.length} file${lines.length !== 1 ? 's' : ''}`;
2876
- }
2877
- if (toolName === 'bash' || toolName === 'execute_command') {
2878
- const lines = resultStr.split('\n').filter(l => l.trim());
2879
- if (resultStr.includes('exit code: 0') || !resultStr.includes('exit code:')) {
2880
- return lines.length > 1 ? `Success (${lines.length} lines)` : 'Success';
2881
- }
2882
- return `Failed - ${lines[0]?.slice(0, 50) || 'see output'}`;
2883
- }
2884
- if (toolName === 'read_file') {
2885
- const lines = resultStr.split('\n').length;
2886
- return `Read ${lines} line${lines !== 1 ? 's' : ''}`;
2887
- }
2888
- if (toolName === 'write_file' || toolName === 'edit_file') {
2889
- return 'File updated';
2890
- }
2891
- if (toolName === 'search' || toolName === 'grep') {
2892
- const matches = (resultStr.match(/\n/g) || []).length;
2893
- return `${matches} match${matches !== 1 ? 'es' : ''}`;
2894
- }
2895
- // Generic summary
2896
- if (resultStr.length <= 50) {
2897
- return resultStr;
2898
- }
2899
- return `${resultStr.slice(0, 47)}...`;
2900
- }
2901
- /**
2902
- * Format tool arguments for plan display.
2903
- */
2904
- formatToolArgsForPlan(toolName, args) {
2905
- if (toolName === 'write_file') {
2906
- const path = args.path || args.file_path;
2907
- const content = String(args.content || '');
2908
- const preview = content.slice(0, 100).replace(/\n/g, '\\n');
2909
- return `File: ${path}\nContent preview: ${preview}${content.length > 100 ? '...' : ''}`;
2910
- }
2911
- if (toolName === 'edit_file') {
2912
- const path = args.path || args.file_path;
2913
- return `File: ${path}\nOld: ${String(args.old_string || args.search || '').slice(0, 50)}...\nNew: ${String(args.new_string || args.replace || '').slice(0, 50)}...`;
2914
- }
2915
- if (toolName === 'bash') {
2916
- return `Command: ${String(args.command || '').slice(0, 100)}`;
2917
- }
2918
- if (toolName === 'delete_file') {
2919
- return `Delete: ${args.path || args.file_path}`;
2920
- }
2921
- if (toolName === 'spawn_agent' || toolName === 'researcher') {
2922
- const task = String(args.task || args.prompt || args.goal || '');
2923
- const model = args.model ? ` (${args.model})` : '';
2924
- const firstLine = task.split('\n')[0].slice(0, 100);
2925
- return `${firstLine}${task.length > 100 ? '...' : ''}${model}`;
2926
- }
2927
- // Generic
2928
- return `Args: ${JSON.stringify(args).slice(0, 100)}...`;
2929
- }
2930
- /**
2931
- * Extract contextual reasoning for a proposed change in plan mode.
2932
- * Looks at recent assistant messages to find relevant explanation.
2933
- * Returns a more complete reason than simple truncation.
2934
- */
2935
- extractChangeReasoning(toolCall, messages) {
2936
- // Get last few assistant messages (most recent first)
2937
- const assistantMsgs = messages
2938
- .filter(m => m.role === 'assistant' && typeof m.content === 'string')
2939
- .slice(-3)
2940
- .reverse();
2941
- if (assistantMsgs.length === 0) {
2942
- return `Proposed change: ${toolCall.name}`;
2943
- }
2944
- // Use the most recent assistant message
2945
- const lastMsg = assistantMsgs[0];
2946
- const content = lastMsg.content;
2947
- // For spawn_agent, the task itself is usually the reason
2948
- if (toolCall.name === 'spawn_agent') {
2949
- const args = toolCall.arguments;
2950
- const task = String(args.task || args.prompt || args.goal || '');
2951
- if (task.length > 0) {
2952
- // Use first paragraph or 500 chars of task as reason
2953
- const firstPara = task.split(/\n\n/)[0];
2954
- return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
2955
- }
2956
- }
2957
- // For file operations, look for context about the file
2958
- if (['write_file', 'edit_file'].includes(toolCall.name)) {
2959
- const args = toolCall.arguments;
2960
- const path = String(args.path || args.file_path || '');
2961
- // Look for mentions of this file in the assistant's explanation
2962
- if (path && content.toLowerCase().includes(path.toLowerCase().split('/').pop() || '')) {
2963
- // Extract the sentence(s) mentioning this file
2964
- const sentences = content.split(/[.!?\n]+/).filter(s => s.toLowerCase().includes(path.toLowerCase().split('/').pop() || ''));
2965
- if (sentences.length > 0) {
2966
- const relevant = sentences.slice(0, 2).join('. ').trim();
2967
- return relevant.length > 500 ? relevant.slice(0, 500) + '...' : relevant;
2968
- }
2969
- }
2970
- }
2971
- // Fallback: use first 500 chars instead of 200
2972
- // Look for the first meaningful paragraph/section
2973
- const paragraphs = content.split(/\n\n+/).filter(p => p.trim().length > 20);
2974
- if (paragraphs.length > 0) {
2975
- const firstPara = paragraphs[0].trim();
2976
- return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
2977
- }
2978
- // Ultimate fallback
2979
- return content.length > 500 ? content.slice(0, 500) + '...' : content;
2980
- }
2981
1906
  /**
2982
1907
  * Update memory statistics.
2983
1908
  * Memory stats are retrieved via memory manager, not stored in state.
@@ -3003,12 +1928,27 @@ export class ProductionAgent {
3003
1928
  }
3004
1929
  return this.state.metrics;
3005
1930
  }
1931
+ getResilienceConfig() {
1932
+ return this.config.resilience;
1933
+ }
3006
1934
  /**
3007
1935
  * Get current state.
3008
1936
  */
3009
1937
  getState() {
3010
1938
  return { ...this.state };
3011
1939
  }
1940
+ /**
1941
+ * Get shared state stats for TUI visibility.
1942
+ * Returns null when not in a swarm context.
1943
+ */
1944
+ getSharedStats() {
1945
+ if (!this._sharedContextState)
1946
+ return null;
1947
+ return {
1948
+ context: this._sharedContextState.getStats(),
1949
+ economics: this._sharedEconomicsState?.getStats() ?? { fingerprints: 0, globalLoops: [] },
1950
+ };
1951
+ }
3012
1952
  /**
3013
1953
  * Get the maximum context tokens for this agent's model.
3014
1954
  * Priority: user config > OpenRouter API > hardcoded ModelRegistry > 200K default
@@ -3030,6 +1970,16 @@ export class ProductionAgent {
3030
1970
  // Default
3031
1971
  return 200000;
3032
1972
  }
1973
+ /**
1974
+ * Estimate tokens used by the system prompt (codebase context, tools, rules).
1975
+ * Used by TUI to display accurate context % that includes system overhead.
1976
+ */
1977
+ getSystemPromptTokenEstimate() {
1978
+ if (this.lastSystemPromptLength > 0) {
1979
+ return Math.ceil(this.lastSystemPromptLength / 3.2);
1980
+ }
1981
+ return 0;
1982
+ }
3033
1983
  /**
3034
1984
  * Get the trace collector (Lesson 26).
3035
1985
  * Returns null if trace capture is not enabled.
@@ -3043,6 +1993,9 @@ export class ProductionAgent {
3043
1993
  */
3044
1994
  setTraceCollector(collector) {
3045
1995
  this.traceCollector = collector;
1996
+ if (this.codebaseContext) {
1997
+ this.codebaseContext.traceCollector = collector;
1998
+ }
3046
1999
  }
3047
2000
  /**
3048
2001
  * Get the learning store for cross-session learning.
@@ -3254,7 +2207,7 @@ export class ProductionAgent {
3254
2207
  const validation = this.validateCheckpoint(savedState);
3255
2208
  // Log warnings
3256
2209
  for (const warning of validation.warnings) {
3257
- console.warn(`[Checkpoint] Warning: ${warning}`);
2210
+ log.warn('Checkpoint validation warning', { warning });
3258
2211
  this.observability?.logger?.warn('Checkpoint validation warning', { warning });
3259
2212
  }
3260
2213
  // Fail on validation errors
@@ -3353,7 +2306,7 @@ export class ProductionAgent {
3353
2306
  }
3354
2307
  }
3355
2308
  if (compactedCount > 0 && process.env.DEBUG) {
3356
- console.log(` 📦 Compacted ${compactedCount} tool outputs (saved ~${Math.round(savedChars / 4)} tokens)`);
2309
+ log.debug('Compacted tool outputs', { compactedCount, savedTokens: Math.round(savedChars / 4) });
3357
2310
  }
3358
2311
  }
3359
2312
  /**
@@ -3394,23 +2347,32 @@ export class ProductionAgent {
3394
2347
  const artifactWriteTools = ['write_file', 'edit_file', 'apply_patch', 'append_file'];
3395
2348
  return !artifactWriteTools.some(toolName => executedToolNames.has(toolName));
3396
2349
  }
3397
- /**
3398
- * Detect "future-intent" responses that imply the model has not completed work.
3399
- */
3400
- detectIncompleteActionResponse(content) {
3401
- const trimmed = content.trim();
3402
- if (!trimmed) {
3403
- return false;
2350
+ getOpenTasksSummary() {
2351
+ if (!this.taskManager) {
2352
+ return undefined;
2353
+ }
2354
+ const tasks = this.taskManager.list();
2355
+ const pending = tasks.filter(t => t.status === 'pending').length;
2356
+ const inProgress = tasks.filter(t => t.status === 'in_progress').length;
2357
+ const blocked = tasks.filter(t => t.status === 'pending' && this.taskManager?.isBlocked(t.id)).length;
2358
+ return { pending, inProgress, blocked };
2359
+ }
2360
+ reconcileStaleTasks(reason) {
2361
+ if (!this.taskManager)
2362
+ return;
2363
+ const staleAfterMs = typeof this.config.resilience === 'object'
2364
+ ? (this.config.resilience.taskLeaseStaleMs ?? 5 * 60 * 1000)
2365
+ : 5 * 60 * 1000;
2366
+ const recovered = this.taskManager.reconcileStaleInProgress({
2367
+ staleAfterMs,
2368
+ reason,
2369
+ });
2370
+ if (recovered.reconciled > 0) {
2371
+ this.observability?.logger?.info('Recovered stale task leases', {
2372
+ reason,
2373
+ recovered: recovered.reconciled,
2374
+ });
3404
2375
  }
3405
- const lower = trimmed.toLowerCase();
3406
- const futureIntentPatterns = [
3407
- /^(now|next|then)\s+(i\s+will|i'll|let me)\b/,
3408
- /^i\s+(will|am going to|can)\b/,
3409
- /^(let me|i'll|i will)\s+(create|write|save|do|make|generate|start)\b/,
3410
- /^(now|next|then)\s+i(?:'ll| will)\b/,
3411
- ];
3412
- const completionSignals = /\b(done|completed|finished|here is|created|saved|wrote)\b/;
3413
- return futureIntentPatterns.some(pattern => pattern.test(lower)) && !completionSignals.test(lower);
3414
2376
  }
3415
2377
  /**
3416
2378
  * Get audit log (if human-in-loop is enabled).
@@ -3608,6 +2570,12 @@ export class ProductionAgent {
3608
2570
  return null;
3609
2571
  return this.economics.getProgress();
3610
2572
  }
2573
+ /**
2574
+ * Get actual file paths modified during this agent's session.
2575
+ */
2576
+ getModifiedFilePaths() {
2577
+ return this.economics?.getModifiedFilePaths() ?? [];
2578
+ }
3611
2579
  /**
3612
2580
  * Extend the budget limits.
3613
2581
  */
@@ -3817,859 +2785,16 @@ export class ProductionAgent {
3817
2785
  return success;
3818
2786
  }
3819
2787
  /**
3820
- * Spawn an agent to execute a task.
3821
- * Returns the result when the agent completes.
3822
- *
3823
- * @param agentName - Name of the agent to spawn (researcher, coder, etc.)
3824
- * @param task - The task description for the agent
3825
- * @param constraints - Optional constraints to keep the subagent focused
2788
+ * Spawn a subagent (delegates to core/subagent-spawner).
3826
2789
  */
3827
2790
  async spawnAgent(agentName, task, constraints) {
3828
- if (!this.agentRegistry) {
3829
- return {
3830
- success: false,
3831
- output: 'Agent registry not initialized',
3832
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
3833
- };
3834
- }
3835
- const agentDef = this.agentRegistry.getAgent(agentName);
3836
- if (!agentDef) {
3837
- return {
3838
- success: false,
3839
- output: `Agent not found: ${agentName}`,
3840
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
3841
- };
3842
- }
3843
- // DUPLICATE SPAWN PREVENTION with SEMANTIC SIMILARITY
3844
- // Skip for swarm workers — the orchestrator handles retry logic and deduplication
3845
- // at the task level. Without this bypass, retried swarm tasks return stale results.
3846
- const isSwarmWorker = agentName.startsWith('swarm-');
3847
- const SEMANTIC_SIMILARITY_THRESHOLD = 0.75; // 75% similarity = duplicate
3848
- const taskKey = `${agentName}:${task.slice(0, 150).toLowerCase().replace(/\s+/g, ' ').trim()}`;
3849
- const now = Date.now();
3850
- // Clean up old entries (older than dedup window)
3851
- for (const [key, entry] of this.spawnedTasks.entries()) {
3852
- if (now - entry.timestamp > ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
3853
- this.spawnedTasks.delete(key);
3854
- }
3855
- }
3856
- let existingMatch;
3857
- let matchType = 'exact';
3858
- if (!isSwarmWorker) {
3859
- // Check for exact match first
3860
- existingMatch = this.spawnedTasks.get(taskKey);
3861
- // If no exact match, check for semantic similarity among same agent's tasks
3862
- if (!existingMatch) {
3863
- for (const [key, entry] of this.spawnedTasks.entries()) {
3864
- // Only compare tasks from the same agent type
3865
- if (!key.startsWith(`${agentName}:`))
3866
- continue;
3867
- if (now - entry.timestamp >= ProductionAgent.SPAWN_DEDUP_WINDOW_MS)
3868
- continue;
3869
- // Extract the task portion from the key
3870
- const existingTask = key.slice(agentName.length + 1);
3871
- const similarity = calculateTaskSimilarity(task, existingTask);
3872
- if (similarity >= SEMANTIC_SIMILARITY_THRESHOLD) {
3873
- existingMatch = entry;
3874
- matchType = 'semantic';
3875
- this.observability?.logger?.debug('Semantic duplicate detected', {
3876
- agent: agentName,
3877
- newTask: task.slice(0, 80),
3878
- existingTask: existingTask.slice(0, 80),
3879
- similarity: (similarity * 100).toFixed(1) + '%',
3880
- });
3881
- break;
3882
- }
3883
- }
3884
- }
3885
- }
3886
- if (existingMatch && now - existingMatch.timestamp < ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
3887
- // Same or semantically similar task spawned within the dedup window
3888
- this.observability?.logger?.warn('Duplicate spawn prevented', {
3889
- agent: agentName,
3890
- task: task.slice(0, 100),
3891
- matchType,
3892
- originalTimestamp: existingMatch.timestamp,
3893
- elapsedMs: now - existingMatch.timestamp,
3894
- });
3895
- const duplicateMessage = `[DUPLICATE SPAWN PREVENTED${matchType === 'semantic' ? ' - SEMANTIC MATCH' : ''}]\n` +
3896
- `This task was already spawned ${Math.round((now - existingMatch.timestamp) / 1000)}s ago.\n` +
3897
- `${existingMatch.queuedChanges > 0
3898
- ? `The previous spawn queued ${existingMatch.queuedChanges} change(s) to the pending plan.\n` +
3899
- `These changes are already in your plan - do NOT spawn again.\n`
3900
- : ''}Previous result summary:\n${existingMatch.result.slice(0, 500)}`;
3901
- return {
3902
- success: true, // Mark as success since original task completed
3903
- output: duplicateMessage,
3904
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
3905
- };
3906
- }
3907
- // Generate a unique ID for this agent instance that will be used consistently
3908
- // throughout the agent's lifecycle (spawn event, token events, completion events)
3909
- const agentId = `spawn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
3910
- this.emit({ type: 'agent.spawn', agentId, name: agentName, task });
3911
- this.observability?.logger?.info('Spawning agent', { name: agentName, task });
3912
- const startTime = Date.now();
3913
- const childSessionId = `subagent-${agentName}-${Date.now()}`;
3914
- const childTraceId = `trace-${childSessionId}`;
3915
- let workerResultId;
3916
- try {
3917
- // Filter tools for this agent
3918
- let agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
3919
- // Apply tool recommendations to improve subagent focus (only for large tool sets)
3920
- if (this.toolRecommendation && agentTools.length > 15) {
3921
- const taskType = ToolRecommendationEngine.inferTaskType(agentName);
3922
- const recommendations = this.toolRecommendation.recommendTools(task, taskType, agentTools.map(t => t.name));
3923
- if (recommendations.length > 0) {
3924
- const recommendedNames = new Set(recommendations.map(r => r.toolName));
3925
- // Always keep spawn tools even if not recommended
3926
- const alwaysKeep = new Set(['spawn_agent', 'spawn_agents_parallel']);
3927
- agentTools = agentTools.filter(t => recommendedNames.has(t.name) || alwaysKeep.has(t.name));
3928
- }
3929
- }
3930
- // Resolve model - abstract tiers (fast/balanced/quality) should use parent's model
3931
- // Only use agentDef.model if it's an actual model ID (contains '/')
3932
- const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
3933
- ? agentDef.model
3934
- : this.config.model;
3935
- // Persist subagent task lifecycle in durable storage when available
3936
- if (this.store?.hasWorkerResultsFeature()) {
3937
- try {
3938
- workerResultId = this.store.createWorkerResult(agentId, task.slice(0, 500), resolvedModel || 'default');
3939
- }
3940
- catch (storeErr) {
3941
- this.observability?.logger?.warn('Failed to create worker result record', {
3942
- agentId,
3943
- error: storeErr.message,
3944
- });
3945
- }
3946
- }
3947
- // Get subagent config with agent-type-specific timeouts and iteration limits
3948
- // Uses dynamic configuration based on agent type (researcher needs more time than reviewer)
3949
- // Precedence: per-type config > per-type default > global config > hardcoded fallback
3950
- const subagentConfig = this.config.subagent;
3951
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
3952
- // Timeout precedence: per-type config override > agent-type default > global config default
3953
- const agentTypeTimeout = getSubagentTimeout(agentName);
3954
- const rawPerTypeTimeout = hasSubagentConfig
3955
- ? subagentConfig.timeouts?.[agentName]
3956
- : undefined;
3957
- const rawGlobalTimeout = hasSubagentConfig
3958
- ? subagentConfig.defaultTimeout
3959
- : undefined;
3960
- // Validate: reject negative, NaN, or non-finite timeout values
3961
- const isValidTimeout = (v) => v !== undefined && Number.isFinite(v) && v > 0;
3962
- const perTypeConfigTimeout = isValidTimeout(rawPerTypeTimeout) ? rawPerTypeTimeout : undefined;
3963
- const globalConfigTimeout = isValidTimeout(rawGlobalTimeout) ? rawGlobalTimeout : undefined;
3964
- const subagentTimeout = perTypeConfigTimeout ?? agentTypeTimeout ?? globalConfigTimeout ?? 300000;
3965
- // Iteration precedence: per-type config override > agent-type default > global config default
3966
- const agentTypeMaxIter = getSubagentMaxIterations(agentName);
3967
- const rawPerTypeMaxIter = hasSubagentConfig
3968
- ? subagentConfig.maxIterations?.[agentName]
3969
- : undefined;
3970
- const rawGlobalMaxIter = hasSubagentConfig
3971
- ? subagentConfig.defaultMaxIterations
3972
- : undefined;
3973
- const isValidIter = (v) => v !== undefined && Number.isFinite(v) && v > 0 && Number.isInteger(v);
3974
- const perTypeConfigMaxIter = isValidIter(rawPerTypeMaxIter) ? rawPerTypeMaxIter : undefined;
3975
- const globalConfigMaxIter = isValidIter(rawGlobalMaxIter) ? rawGlobalMaxIter : undefined;
3976
- const defaultMaxIterations = agentDef.maxIterations ?? perTypeConfigMaxIter ?? agentTypeMaxIter ?? globalConfigMaxIter ?? 15;
3977
- // BLACKBOARD CONTEXT INJECTION
3978
- // Gather relevant context from the blackboard for the subagent
3979
- let blackboardContext = '';
3980
- const parentAgentId = `parent-${Date.now()}`;
3981
- if (this.blackboard) {
3982
- // Post parent's exploration context before spawning
3983
- this.blackboard.post(parentAgentId, {
3984
- topic: 'spawn.parent_context',
3985
- content: `Parent spawning ${agentName} for task: ${task.slice(0, 200)}`,
3986
- type: 'progress',
3987
- confidence: 1,
3988
- metadata: { agentName, taskPreview: task.slice(0, 100) },
3989
- });
3990
- // Gather recent findings that might help the subagent
3991
- const recentFindings = this.blackboard.query({
3992
- limit: 5,
3993
- types: ['discovery', 'analysis', 'progress'],
3994
- minConfidence: 0.7,
3995
- });
3996
- if (recentFindings.length > 0) {
3997
- const findingsSummary = recentFindings
3998
- .map(f => `- [${f.agentId}] ${f.topic}: ${f.content.slice(0, 150)}${f.content.length > 150 ? '...' : ''}`)
3999
- .join('\n');
4000
- blackboardContext = `\n\n**BLACKBOARD CONTEXT (from parent/sibling agents):**\n${findingsSummary}\n`;
4001
- }
4002
- }
4003
- // Check for files already being modified in parent's pending plan
4004
- const currentPlan = this.pendingPlanManager.getPendingPlan();
4005
- if (currentPlan && currentPlan.proposedChanges.length > 0) {
4006
- const pendingFiles = currentPlan.proposedChanges
4007
- .filter((c) => c.tool === 'write_file' || c.tool === 'edit_file')
4008
- .map((c) => c.args.path || c.args.file_path)
4009
- .filter(Boolean);
4010
- if (pendingFiles.length > 0) {
4011
- blackboardContext += `\n**FILES ALREADY IN PENDING PLAN (do not duplicate):**\n${pendingFiles.slice(0, 10).join('\n')}\n`;
4012
- }
4013
- }
4014
- // CONSTRAINT INJECTION
4015
- // Add constraints to the subagent's context if provided
4016
- // Also always include budget awareness so subagents know their limits
4017
- const constraintParts = [];
4018
- // BUDGET AWARENESS: Always inject so subagent understands its limits
4019
- const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
4020
- const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
4021
- if (isSwarmWorker) {
4022
- // V6: Calmer resource awareness for swarm workers — prevents weaker models
4023
- // from confabulating budget warnings and wrapping up without doing work
4024
- constraintParts.push(`**Resource Info:**\n` +
4025
- `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens (you have plenty)\n` +
4026
- `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
4027
- `- Focus on completing your task. Do NOT wrap up prematurely.\n` +
4028
- `- You will receive a system warning IF you approach budget limits. Until then, work normally.\n` +
4029
- `- **IMPORTANT:** Budget warnings come from the SYSTEM, not from your own assessment. ` +
4030
- `Do not preemptively claim budget issues.\n` +
4031
- `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
4032
- ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4033
- }
4034
- else {
4035
- // Original RESOURCE AWARENESS text for regular subagents
4036
- constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
4037
- `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
4038
- `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
4039
- `- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
4040
- `- Do not explore indefinitely - be focused and efficient.\n` +
4041
- `- If approaching limits, summarize findings and return.\n` +
4042
- `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
4043
- ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4044
- }
4045
- if (constraints) {
4046
- if (constraints.focusAreas && constraints.focusAreas.length > 0) {
4047
- constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
4048
- }
4049
- if (constraints.excludeAreas && constraints.excludeAreas.length > 0) {
4050
- constraintParts.push(`**EXCLUDED AREAS (do NOT explore these):**\n${constraints.excludeAreas.map(a => ` - ${a}`).join('\n')}`);
4051
- }
4052
- if (constraints.requiredDeliverables && constraints.requiredDeliverables.length > 0) {
4053
- constraintParts.push(`**REQUIRED DELIVERABLES (you must produce these):**\n${constraints.requiredDeliverables.map(d => ` - ${d}`).join('\n')}`);
4054
- }
4055
- if (constraints.timeboxMinutes) {
4056
- constraintParts.push(`**TIME LIMIT:** ${constraints.timeboxMinutes} minutes (soft limit - wrap up if approaching)`);
4057
- }
4058
- }
4059
- const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
4060
- // Build delegation-enhanced system prompt
4061
- let delegationContext = '';
4062
- if (this.lastComplexityAssessment && this.lastComplexityAssessment.tier !== 'simple') {
4063
- const spec = createMinimalDelegationSpec(task, agentName);
4064
- delegationContext = '\n\n' + buildDelegationPrompt(spec);
4065
- }
4066
- // Quality self-assessment prompt for subagent
4067
- const qualityPrompt = '\n\n' + getSubagentQualityPrompt();
4068
- // Build subagent system prompt with subagent-specific plan mode addition
4069
- const parentMode = this.getMode();
4070
- const subagentSystemPrompt = parentMode === 'plan'
4071
- ? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`
4072
- : `${agentDef.systemPrompt}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`;
4073
- // Allocate budget from pool (or use default) — track allocation ID for release later
4074
- const pooledBudget = this.getSubagentBudget(agentName, constraints);
4075
- const poolAllocationId = pooledBudget.allocationId;
4076
- // Create a sub-agent with the agent's config
4077
- // Use SUBAGENT_BUDGET to constrain resource usage (prevents runaway token consumption)
4078
- const subAgent = new ProductionAgent({
4079
- provider: this.provider,
4080
- tools: agentTools,
4081
- // Pass toolResolver so subagent can lazy-load MCP tools
4082
- toolResolver: this.toolResolver || undefined,
4083
- // Pass MCP tool summaries so subagent knows what tools are available
4084
- mcpToolSummaries: this.config.mcpToolSummaries,
4085
- systemPrompt: subagentSystemPrompt,
4086
- model: resolvedModel,
4087
- maxIterations: agentDef.maxIterations || defaultMaxIterations,
4088
- // Inherit some features but keep subagent simpler
4089
- memory: false,
4090
- planning: false,
4091
- reflection: false,
4092
- // Enable lightweight compaction for subagents (Improvement P5)
4093
- // tokenThreshold configures the Compactor's per-pass size limit
4094
- // maxContextTokens constrains AutoCompactionManager's percentage thresholds
4095
- // With maxContextTokens=80000 and default 80% threshold, compaction triggers at ~64K
4096
- compaction: {
4097
- enabled: true,
4098
- mode: 'auto',
4099
- tokenThreshold: 40000, // Compactor summarization size limit per pass
4100
- preserveRecentCount: 4, // Preserve fewer messages (splits to 2 user + 2 assistant)
4101
- preserveToolResults: false, // More aggressive — subagents can re-read files
4102
- summaryMaxTokens: 500,
4103
- },
4104
- // Lower context window for subagents so percentage-based compaction triggers earlier
4105
- maxContextTokens: 80000,
4106
- observability: this.config.observability,
4107
- sandbox: this.config.sandbox,
4108
- humanInLoop: this.config.humanInLoop,
4109
- // Subagents get 'allow' as default policy since they're already
4110
- // constrained to their registered tool set. The parent's 'prompt'
4111
- // policy can't work without humanInLoop.
4112
- executionPolicy: this.config.executionPolicy
4113
- ? { ...this.config.executionPolicy, defaultPolicy: 'allow' }
4114
- : this.config.executionPolicy,
4115
- threads: false,
4116
- // Disable hooks console output in subagents - parent handles event display
4117
- hooks: this.config.hooks === false ? false : {
4118
- enabled: true,
4119
- builtIn: { logging: false, timing: false, metrics: false },
4120
- custom: [],
4121
- },
4122
- // Pass unique agentId for blackboard coordination and tracing
4123
- agentId,
4124
- // Share parent's blackboard for coordination between parallel subagents
4125
- blackboard: this.blackboard || undefined,
4126
- // Share parent's file cache to eliminate redundant reads across agents
4127
- fileCache: this.fileCache || undefined,
4128
- // CONSTRAINED BUDGET: Use pooled budget when available, falling back to SUBAGENT_BUDGET
4129
- // Pooled budget ensures total tree cost stays bounded by parent's budget
4130
- budget: pooledBudget.budget,
4131
- });
4132
- // CRITICAL: Subagent inherits parent's mode
4133
- // This ensures that if parent is in plan mode:
4134
- // - Subagent's read operations execute immediately (visible exploration)
4135
- // - Subagent's write operations get queued in the subagent's pending plan
4136
- // - User maintains control over what actually gets written
4137
- if (parentMode !== 'build') {
4138
- subAgent.setMode(parentMode);
4139
- }
4140
- // APPROVAL BATCHING (Improvement P6): Set approval scope for subagents
4141
- // Read-only tools are auto-approved; write tools get scoped approval
4142
- // This reduces interruptions from ~8 per session to ~1-2
4143
- subAgent.setApprovalScope({
4144
- autoApprove: ['read_file', 'list_files', 'glob', 'grep', 'show_file_history', 'show_session_changes'],
4145
- scopedApprove: {
4146
- write_file: { paths: ['src/', 'tests/', 'tools/'] },
4147
- edit_file: { paths: ['src/', 'tests/', 'tools/'] },
4148
- },
4149
- requireApproval: ['bash', 'delete_file'],
4150
- });
4151
- // Pass parent's iteration count to subagent for accurate budget tracking
4152
- // This prevents subagents from consuming excessive iterations when parent already used many
4153
- subAgent.setParentIterations(this.getTotalIterations());
4154
- // UNIFIED TRACING: Share parent's trace collector with subagent context
4155
- // This ensures all subagent events are written to the same trace file as the parent,
4156
- // tagged with subagent context for proper aggregation in /trace output
4157
- if (this.traceCollector) {
4158
- const subagentTraceView = this.traceCollector.createSubagentView({
4159
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4160
- agentType: agentName,
4161
- spawnedAtIteration: this.state.iteration,
4162
- });
4163
- subAgent.setTraceCollector(subagentTraceView);
4164
- }
4165
- // GRACEFUL TIMEOUT with WRAPUP PHASE
4166
- // Instead of instant death on timeout, the subagent gets a wrapup window
4167
- // to produce a structured summary before being killed:
4168
- // 1. Normal operation: progress extends idle timer
4169
- // 2. Wrapup phase: 30s before hard kill, wrapup callback fires → forceTextOnly
4170
- // 3. Hard kill: race() throws CancellationError after wrapup window
4171
- const IDLE_TIMEOUT = 120000; // 2 minutes without progress = timeout
4172
- let WRAPUP_WINDOW = 30000;
4173
- let IDLE_CHECK_INTERVAL = 5000;
4174
- if (this.config.subagent) {
4175
- WRAPUP_WINDOW = this.config.subagent.wrapupWindowMs ?? WRAPUP_WINDOW;
4176
- IDLE_CHECK_INTERVAL = this.config.subagent.idleCheckIntervalMs ?? IDLE_CHECK_INTERVAL;
4177
- }
4178
- const progressAwareTimeout = createGracefulTimeout(subagentTimeout, // Max total time (hard limit from agent type config)
4179
- IDLE_TIMEOUT, // Idle timeout (soft limit - no progress triggers this)
4180
- WRAPUP_WINDOW, // Wrapup window before hard kill
4181
- IDLE_CHECK_INTERVAL);
4182
- // Register wrapup callback — fires 30s before hard kill
4183
- // This triggers the subagent's forceTextOnly path for a structured summary
4184
- progressAwareTimeout.onWrapupWarning(() => {
4185
- this.emit({
4186
- type: 'subagent.wrapup.started',
4187
- agentId,
4188
- agentType: agentName,
4189
- reason: 'Timeout approaching - graceful wrapup window opened',
4190
- elapsedMs: Date.now() - startTime,
4191
- });
4192
- subAgent.requestWrapup('Timeout approaching — produce structured summary');
4193
- });
4194
- // Forward events from subagent with context (track for cleanup)
4195
- // Also report progress to the timeout tracker
4196
- const unsubSubAgent = subAgent.subscribe(event => {
4197
- // Tag event with subagent source AND unique ID so TUI can properly attribute
4198
- // events to the specific agent instance (critical for multiple same-type agents)
4199
- const taggedEvent = { ...event, subagent: agentName, subagentId: agentId };
4200
- this.emit(taggedEvent);
4201
- // Report progress for timeout extension
4202
- // Progress events: tool calls, LLM responses, token updates
4203
- const progressEvents = ['tool.start', 'tool.complete', 'llm.start', 'llm.complete'];
4204
- if (progressEvents.includes(event.type)) {
4205
- progressAwareTimeout.reportProgress();
4206
- }
4207
- });
4208
- // Link parent's cancellation with progress-aware timeout so ESC propagates to subagents
4209
- const parentSource = this.cancellation?.getSource();
4210
- const effectiveSource = parentSource
4211
- ? createLinkedToken(parentSource, progressAwareTimeout)
4212
- : progressAwareTimeout;
4213
- // CRITICAL: Pass the cancellation token to the subagent so it can check and stop
4214
- // gracefully when timeout fires. Without this, the subagent continues running as
4215
- // a "zombie" even after race() returns with a timeout error.
4216
- subAgent.setExternalCancellation(effectiveSource.token);
4217
- // Pause parent's duration timer while subagent runs to prevent
4218
- // the parent from timing out on wall-clock while waiting for subagent
4219
- this.economics?.pauseDuration();
4220
- try {
4221
- // Run the task with cancellation propagation from parent
4222
- const result = await race(subAgent.run(task), effectiveSource.token);
4223
- const duration = Date.now() - startTime;
4224
- // BEFORE cleanup - extract subagent's pending plan and merge into parent's plan
4225
- // This ensures that when a subagent in plan mode queues writes, they bubble up to the parent
4226
- let queuedChangeSummary = '';
4227
- let queuedChangesCount = 0;
4228
- if (subAgent.hasPendingPlan()) {
4229
- const subPlan = subAgent.getPendingPlan();
4230
- if (subPlan && subPlan.proposedChanges.length > 0) {
4231
- queuedChangesCount = subPlan.proposedChanges.length;
4232
- // Emit event for TUI to display
4233
- this.emit({
4234
- type: 'agent.pending_plan',
4235
- agentId: agentName,
4236
- changes: subPlan.proposedChanges,
4237
- });
4238
- // Build detailed summary of what was queued for the return message
4239
- // This prevents the "doom loop" where parent doesn't know what subagent did
4240
- const changeSummaries = subPlan.proposedChanges.map(c => {
4241
- if (c.tool === 'write_file' || c.tool === 'edit_file') {
4242
- const path = c.args.path || c.args.file_path || '(unknown file)';
4243
- return ` - [${c.tool}] ${path}: ${c.reason}`;
4244
- }
4245
- else if (c.tool === 'bash') {
4246
- const cmd = String(c.args.command || '').slice(0, 60);
4247
- return ` - [bash] ${cmd}${String(c.args.command || '').length > 60 ? '...' : ''}: ${c.reason}`;
4248
- }
4249
- return ` - [${c.tool}]: ${c.reason}`;
4250
- });
4251
- queuedChangeSummary = `\n\n[PLAN MODE - CHANGES QUEUED TO PARENT]\n` +
4252
- `The following ${subPlan.proposedChanges.length} change(s) have been queued in the parent's pending plan:\n` +
4253
- changeSummaries.join('\n') + '\n' +
4254
- `\nThese changes are now in YOUR pending plan. The task for this subagent is COMPLETE.\n` +
4255
- `Do NOT spawn another agent for the same task - the changes are already queued.\n` +
4256
- `Use /show-plan to see all pending changes, /approve to execute them.`;
4257
- // Merge into parent's pending plan with subagent context
4258
- for (const change of subPlan.proposedChanges) {
4259
- this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
4260
- }
4261
- }
4262
- // Also merge exploration summary if available
4263
- if (subPlan?.explorationSummary) {
4264
- this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
4265
- }
4266
- }
4267
- // If subagent queued changes, override output with informative message
4268
- // This is critical to prevent doom loops where parent doesn't understand what happened
4269
- const finalOutput = queuedChangeSummary
4270
- ? (result.response || '') + queuedChangeSummary
4271
- : (result.response || result.error || '');
4272
- // Parse structured closure report from agent's response (if it produced one)
4273
- const structured = parseStructuredClosureReport(result.response || '', 'completed');
4274
- const spawnResultFinal = {
4275
- success: result.success,
4276
- output: finalOutput,
4277
- metrics: {
4278
- tokens: result.metrics.totalTokens,
4279
- duration,
4280
- toolCalls: result.metrics.toolCalls,
4281
- },
4282
- structured,
4283
- };
4284
- // Save full output to subagent output store (avoids telephone problem)
4285
- if (this.subagentOutputStore) {
4286
- const outputEntry = {
4287
- id: agentId,
4288
- agentId,
4289
- agentName,
4290
- task,
4291
- fullOutput: finalOutput,
4292
- structured,
4293
- filesModified: [],
4294
- filesCreated: [],
4295
- timestamp: new Date(),
4296
- tokensUsed: result.metrics.totalTokens,
4297
- durationMs: duration,
4298
- };
4299
- const storeId = this.subagentOutputStore.save(outputEntry);
4300
- // Attach reference so downstream consumers can retrieve full output
4301
- spawnResultFinal.outputStoreId = storeId;
4302
- }
4303
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4304
- try {
4305
- this.store.completeWorkerResult(workerResultId, {
4306
- fullOutput: finalOutput,
4307
- summary: finalOutput.slice(0, 500),
4308
- artifacts: structured ? [{ type: 'structured_report', data: structured }] : undefined,
4309
- metrics: {
4310
- tokens: result.metrics.totalTokens,
4311
- duration,
4312
- toolCalls: result.metrics.toolCalls,
4313
- },
4314
- });
4315
- }
4316
- catch (storeErr) {
4317
- this.observability?.logger?.warn('Failed to persist worker result', {
4318
- agentId,
4319
- error: storeErr.message,
4320
- });
4321
- }
4322
- }
4323
- this.emit({
4324
- type: 'agent.complete',
4325
- agentId, // Use unique spawn ID for precise tracking
4326
- agentType: agentName, // Keep type for display purposes
4327
- success: result.success,
4328
- output: finalOutput.slice(0, 500), // Include output preview
4329
- });
4330
- if (progressAwareTimeout.isInWrapupPhase()) {
4331
- this.emit({
4332
- type: 'subagent.wrapup.completed',
4333
- agentId,
4334
- agentType: agentName,
4335
- elapsedMs: Date.now() - startTime,
4336
- });
4337
- }
4338
- // Enhanced tracing: Record subagent completion
4339
- this.traceCollector?.record({
4340
- type: 'subagent.link',
4341
- data: {
4342
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4343
- childSessionId,
4344
- childTraceId,
4345
- childConfig: {
4346
- agentType: agentName,
4347
- model: resolvedModel || 'default',
4348
- task,
4349
- tools: agentTools.map(t => t.name),
4350
- },
4351
- spawnContext: {
4352
- reason: `Delegated task: ${task.slice(0, 100)}`,
4353
- expectedOutcome: agentDef.description,
4354
- parentIteration: this.state.iteration,
4355
- },
4356
- result: {
4357
- success: result.success,
4358
- summary: (result.response || result.error || '').slice(0, 500),
4359
- tokensUsed: result.metrics.totalTokens,
4360
- durationMs: duration,
4361
- },
4362
- },
4363
- });
4364
- // Unsubscribe from subagent events before cleanup
4365
- unsubSubAgent();
4366
- await subAgent.cleanup();
4367
- // Cache result for duplicate spawn prevention
4368
- // Use the same taskKey from the dedup check above
4369
- this.spawnedTasks.set(taskKey, {
4370
- timestamp: Date.now(),
4371
- result: finalOutput,
4372
- queuedChanges: queuedChangesCount,
4373
- });
4374
- return spawnResultFinal;
4375
- }
4376
- catch (err) {
4377
- // Handle cancellation (user ESC or timeout) for cleaner error messages
4378
- if (isCancellationError(err)) {
4379
- const duration = Date.now() - startTime;
4380
- const isUserCancellation = parentSource?.isCancellationRequested;
4381
- const reason = isUserCancellation
4382
- ? 'User cancelled'
4383
- : err.reason || `Timed out after ${subagentTimeout}ms`;
4384
- this.emit({ type: 'agent.error', agentId, agentType: agentName, error: reason });
4385
- if (!isUserCancellation) {
4386
- this.emit({
4387
- type: 'subagent.timeout.hard_kill',
4388
- agentId,
4389
- agentType: agentName,
4390
- reason,
4391
- elapsedMs: Date.now() - startTime,
4392
- });
4393
- }
4394
- // =======================================================================
4395
- // PRESERVE PARTIAL RESULTS
4396
- // Instead of discarding all work, capture whatever the subagent produced
4397
- // before timeout. This prevents the "zombie agent" problem where tokens
4398
- // are consumed but results are lost.
4399
- // =======================================================================
4400
- const subagentState = subAgent.getState();
4401
- const subagentMetrics = subAgent.getMetrics();
4402
- // Extract partial response from the last assistant message
4403
- const assistantMessages = subagentState.messages.filter(m => m.role === 'assistant');
4404
- const lastAssistantMsg = assistantMessages[assistantMessages.length - 1];
4405
- const partialResponse = typeof lastAssistantMsg?.content === 'string'
4406
- ? lastAssistantMsg.content
4407
- : '';
4408
- // Extract pending plan before cleanup (even on cancellation, preserve any queued work)
4409
- let cancelledQueuedSummary = '';
4410
- if (subAgent.hasPendingPlan()) {
4411
- const subPlan = subAgent.getPendingPlan();
4412
- if (subPlan && subPlan.proposedChanges.length > 0) {
4413
- this.emit({
4414
- type: 'agent.pending_plan',
4415
- agentId: agentName,
4416
- changes: subPlan.proposedChanges,
4417
- });
4418
- // Build summary of changes that were queued before cancellation
4419
- const changeSummaries = subPlan.proposedChanges.map(c => {
4420
- if (c.tool === 'write_file' || c.tool === 'edit_file') {
4421
- const path = c.args.path || c.args.file_path || '(unknown file)';
4422
- return ` - [${c.tool}] ${path}: ${c.reason}`;
4423
- }
4424
- else if (c.tool === 'bash') {
4425
- const cmd = String(c.args.command || '').slice(0, 60);
4426
- return ` - [bash] ${cmd}...: ${c.reason}`;
4427
- }
4428
- return ` - [${c.tool}]: ${c.reason}`;
4429
- });
4430
- cancelledQueuedSummary = `\n\n[PLAN MODE - CHANGES QUEUED BEFORE CANCELLATION]\n` +
4431
- `${subPlan.proposedChanges.length} change(s) were queued to the parent plan:\n` +
4432
- changeSummaries.join('\n') + '\n' +
4433
- `These changes are preserved in your pending plan.`;
4434
- for (const change of subPlan.proposedChanges) {
4435
- this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
4436
- }
4437
- }
4438
- // Also preserve exploration summary
4439
- if (subPlan?.explorationSummary) {
4440
- this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
4441
- }
4442
- }
4443
- // Unsubscribe from subagent events and cleanup gracefully
4444
- unsubSubAgent();
4445
- try {
4446
- await subAgent.cleanup();
4447
- }
4448
- catch {
4449
- // Ignore cleanup errors on cancellation
4450
- }
4451
- // Build output message with partial results
4452
- const baseOutput = isUserCancellation
4453
- ? `Subagent '${agentName}' was cancelled by user.`
4454
- : `Subagent '${agentName}' timed out after ${Math.round(subagentTimeout / 1000)}s.`;
4455
- // Include partial response if we have one
4456
- const partialResultSection = partialResponse
4457
- ? `\n\n[PARTIAL RESULTS BEFORE TIMEOUT]\n${partialResponse.slice(0, 2000)}${partialResponse.length > 2000 ? '...(truncated)' : ''}`
4458
- : '';
4459
- // Enhanced tracing: Record subagent timeout with partial results
4460
- this.traceCollector?.record({
4461
- type: 'subagent.link',
4462
- data: {
4463
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4464
- childSessionId,
4465
- childTraceId,
4466
- childConfig: {
4467
- agentType: agentName,
4468
- model: resolvedModel || 'default',
4469
- task,
4470
- tools: agentTools.map(t => t.name),
4471
- },
4472
- spawnContext: {
4473
- reason: `Delegated task: ${task.slice(0, 100)}`,
4474
- expectedOutcome: agentDef.description,
4475
- parentIteration: this.state.iteration,
4476
- },
4477
- result: {
4478
- success: false,
4479
- summary: `[TIMEOUT] ${baseOutput}\n${partialResponse.slice(0, 200)}`,
4480
- tokensUsed: subagentMetrics.totalTokens,
4481
- durationMs: duration,
4482
- },
4483
- },
4484
- });
4485
- // Parse structured closure report from partial response
4486
- const exitReason = isUserCancellation ? 'cancelled' : 'timeout_graceful';
4487
- const structured = parseStructuredClosureReport(partialResponse, exitReason, task);
4488
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4489
- try {
4490
- this.store.failWorkerResult(workerResultId, reason);
4491
- }
4492
- catch (storeErr) {
4493
- this.observability?.logger?.warn('Failed to mark cancelled worker result as failed', {
4494
- agentId,
4495
- error: storeErr.message,
4496
- });
4497
- }
4498
- }
4499
- return {
4500
- success: false,
4501
- output: baseOutput + partialResultSection + cancelledQueuedSummary,
4502
- // IMPORTANT: Use actual metrics instead of zeros
4503
- // This ensures accurate token tracking in /trace output
4504
- metrics: {
4505
- tokens: subagentMetrics.totalTokens,
4506
- duration,
4507
- toolCalls: subagentMetrics.toolCalls,
4508
- },
4509
- structured,
4510
- };
4511
- }
4512
- throw err; // Re-throw non-cancellation errors
4513
- }
4514
- finally {
4515
- // Resume parent's duration timer now that subagent is done
4516
- this.economics?.resumeDuration();
4517
- // Dispose both sources (linked source disposes its internal state, timeout source handles its timer)
4518
- effectiveSource.dispose();
4519
- progressAwareTimeout.dispose();
4520
- // BUDGET POOL: Record actual usage and release the allocation
4521
- // This must happen in finally to ensure cleanup on both success and error paths
4522
- if (this.budgetPool && poolAllocationId) {
4523
- const subMetrics = subAgent.getMetrics();
4524
- this.budgetPool.recordUsage(poolAllocationId, subMetrics.totalTokens, subMetrics.estimatedCost);
4525
- this.budgetPool.release(poolAllocationId);
4526
- }
4527
- }
4528
- }
4529
- catch (err) {
4530
- const error = err instanceof Error ? err.message : String(err);
4531
- this.emit({ type: 'agent.error', agentId, agentType: agentName, error });
4532
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4533
- try {
4534
- this.store.failWorkerResult(workerResultId, error);
4535
- }
4536
- catch (storeErr) {
4537
- this.observability?.logger?.warn('Failed to mark worker result as failed', {
4538
- agentId,
4539
- error: storeErr.message,
4540
- });
4541
- }
4542
- }
4543
- return {
4544
- success: false,
4545
- output: `Agent error: ${error}`,
4546
- metrics: { tokens: 0, duration: Date.now() - startTime, toolCalls: 0 },
4547
- };
4548
- }
4549
- }
4550
- /**
4551
- * Spawn multiple agents in parallel to work on independent tasks.
4552
- * Uses the shared blackboard for coordination and conflict prevention.
4553
- *
4554
- * Get budget for a subagent, using the pooled budget when available.
4555
- * Falls back to the static SUBAGENT_BUDGET if no pool is configured.
4556
- * Returns both the budget and the pool allocation ID (if any) for tracking.
4557
- */
4558
- getSubagentBudget(agentName, constraints) {
4559
- // If explicit maxTokens constraint, use that
4560
- if (constraints?.maxTokens) {
4561
- return {
4562
- budget: { ...SUBAGENT_BUDGET, maxTokens: constraints.maxTokens },
4563
- allocationId: null,
4564
- };
4565
- }
4566
- // Try to allocate from the shared budget pool
4567
- if (this.budgetPool) {
4568
- const allocationId = `${agentName}-${Date.now()}`;
4569
- const allocation = this.budgetPool.reserve(allocationId);
4570
- if (allocation) {
4571
- return {
4572
- budget: {
4573
- ...SUBAGENT_BUDGET,
4574
- maxTokens: allocation.tokenBudget,
4575
- softTokenLimit: Math.floor(allocation.tokenBudget * 0.7),
4576
- maxCost: allocation.costBudget,
4577
- },
4578
- allocationId,
4579
- };
4580
- }
4581
- // Pool exhausted — give a tiny emergency budget (just enough to report failure)
4582
- // This does NOT bypass the pool — it's a fixed small cost for error messaging
4583
- return {
4584
- budget: {
4585
- ...SUBAGENT_BUDGET,
4586
- maxTokens: 5000,
4587
- softTokenLimit: 3000,
4588
- maxCost: 0.01,
4589
- },
4590
- allocationId: null,
4591
- };
4592
- }
4593
- // No pool — use default subagent budget
4594
- return { budget: SUBAGENT_BUDGET, allocationId: null };
2791
+ return coreSpawnAgent(agentName, task, this.buildContext(), this.createSubAgentFactory(), constraints);
4595
2792
  }
4596
2793
  /**
4597
- * Uses Promise.allSettled to handle partial failures gracefully - if one
4598
- * agent fails or times out, others can still complete successfully.
2794
+ * Spawn multiple subagents in parallel (delegates to core/subagent-spawner).
4599
2795
  */
4600
2796
  async spawnAgentsParallel(tasks) {
4601
- // Emit start event for TUI visibility
4602
- this.emit({
4603
- type: 'parallel.spawn.start',
4604
- count: tasks.length,
4605
- agents: tasks.map(t => t.agent),
4606
- });
4607
- // Use DynamicBudgetPool for parallel spawns (prevents child starvation,
4608
- // enables priority-based allocation). Falls back to regular pool for single tasks.
4609
- let settled;
4610
- const originalPool = this.budgetPool;
4611
- // SubagentSupervisor for unified monitoring of concurrent subagents
4612
- const supervisor = tasks.length > 1 ? createSubagentSupervisor() : null;
4613
- if (this.budgetPool && tasks.length > 1) {
4614
- // Swap to DynamicBudgetPool for this parallel batch
4615
- const poolStats = this.budgetPool.getStats();
4616
- const dynamicPool = createDynamicBudgetPool(poolStats.tokensRemaining, 0.1);
4617
- dynamicPool.setExpectedChildren(tasks.length);
4618
- // Temporarily replace the budget pool so spawnAgent's reserve() uses the dynamic one
4619
- this.budgetPool = dynamicPool;
4620
- try {
4621
- const promises = tasks.map(({ agent, task }) => {
4622
- const spawnPromise = this.spawnAgent(agent, task);
4623
- // Register with supervisor for monitoring
4624
- if (supervisor) {
4625
- const handle = createSubagentHandle(`parallel-${agent}-${Date.now()}`, agent, task, spawnPromise, {});
4626
- supervisor.add(handle);
4627
- }
4628
- return spawnPromise;
4629
- });
4630
- settled = await Promise.allSettled(promises);
4631
- }
4632
- finally {
4633
- this.budgetPool = originalPool;
4634
- supervisor?.stop();
4635
- }
4636
- }
4637
- else {
4638
- // Single task or no pool - use standard sequential allocation
4639
- const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
4640
- settled = await Promise.allSettled(promises);
4641
- }
4642
- // Convert settled results to SpawnResult array
4643
- const results = settled.map((result, i) => {
4644
- if (result.status === 'fulfilled') {
4645
- return result.value;
4646
- }
4647
- // Handle rejected promises (shouldn't happen since spawnAgent catches errors internally,
4648
- // but this is a safety net for unexpected failures)
4649
- const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
4650
- this.emit({
4651
- type: 'agent.error',
4652
- agentId: tasks[i].agent,
4653
- error: `Unexpected parallel spawn error: ${error}`,
4654
- });
4655
- return {
4656
- success: false,
4657
- output: `Parallel spawn error: ${error}`,
4658
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
4659
- };
4660
- });
4661
- // Emit completion event
4662
- this.emit({
4663
- type: 'parallel.spawn.complete',
4664
- count: tasks.length,
4665
- successCount: results.filter(r => r.success).length,
4666
- results: results.map((r, i) => ({
4667
- agent: tasks[i].agent,
4668
- success: r.success,
4669
- tokens: r.metrics?.tokens || 0,
4670
- })),
4671
- });
4672
- return results;
2797
+ return coreSpawnAgentsParallel(tasks, this.buildContext(), this.buildMutators(), this.createSubAgentFactory());
4673
2798
  }
4674
2799
  /**
4675
2800
  * Get a formatted list of available agents.
@@ -4835,7 +2960,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
4835
2960
  */
4836
2961
  cancel(reason) {
4837
2962
  if (!this.cancellation) {
4838
- console.warn('[ProductionAgent] Cancellation not enabled');
2963
+ log.warn('Cancellation not enabled');
4839
2964
  return;
4840
2965
  }
4841
2966
  this.cancellation.cancel(reason);
@@ -4946,7 +3071,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
4946
3071
  */
4947
3072
  enableLSPFileTools(options) {
4948
3073
  if (!this.lspManager) {
4949
- console.warn('[ProductionAgent] LSP not enabled, cannot enable LSP file tools');
3074
+ log.warn('LSP not enabled, cannot enable LSP file tools');
4950
3075
  return;
4951
3076
  }
4952
3077
  const lspTools = this.getLSPFileTools(options);
@@ -5576,63 +3701,6 @@ export function buildAgent() {
5576
3701
  return new ProductionAgentBuilder();
5577
3702
  }
5578
3703
  // =============================================================================
5579
- // STRUCTURED CLOSURE REPORT PARSER
5580
- // =============================================================================
5581
- /**
5582
- * Parse a structured closure report from a subagent's text response.
5583
- * The subagent may have produced JSON in response to a TIMEOUT_WRAPUP_PROMPT.
5584
- *
5585
- * @param text - The subagent's last response text
5586
- * @param defaultExitReason - Exit reason to use (completed, timeout_graceful, cancelled, etc.)
5587
- * @param fallbackTask - Original task description for fallback remainingWork
5588
- * @returns Parsed StructuredClosureReport, or undefined if no JSON found and no fallback needed
5589
- */
5590
- export function parseStructuredClosureReport(text, defaultExitReason, fallbackTask) {
5591
- if (!text) {
5592
- // No text at all — create a hard timeout fallback if we have a task
5593
- if (fallbackTask) {
5594
- return {
5595
- findings: [],
5596
- actionsTaken: [],
5597
- failures: ['Timeout before producing structured summary'],
5598
- remainingWork: [fallbackTask],
5599
- exitReason: 'timeout_hard',
5600
- };
5601
- }
5602
- return undefined;
5603
- }
5604
- try {
5605
- // Try to extract JSON from the response
5606
- const jsonMatch = text.match(/\{[\s\S]*\}/);
5607
- if (jsonMatch) {
5608
- const parsed = JSON.parse(jsonMatch[0]);
5609
- // Validate that it looks like a closure report (has at least one expected field)
5610
- if (parsed.findings || parsed.actionsTaken || parsed.failures || parsed.remainingWork) {
5611
- return {
5612
- findings: Array.isArray(parsed.findings) ? parsed.findings : [],
5613
- actionsTaken: Array.isArray(parsed.actionsTaken) ? parsed.actionsTaken : [],
5614
- failures: Array.isArray(parsed.failures) ? parsed.failures : [],
5615
- remainingWork: Array.isArray(parsed.remainingWork) ? parsed.remainingWork : [],
5616
- exitReason: defaultExitReason,
5617
- suggestedNextSteps: Array.isArray(parsed.suggestedNextSteps) ? parsed.suggestedNextSteps : undefined,
5618
- };
5619
- }
5620
- }
5621
- }
5622
- catch {
5623
- // JSON parse failed — fall through to fallback
5624
- }
5625
- // Fallback: LLM didn't produce valid JSON but we have text
5626
- if (defaultExitReason !== 'completed') {
5627
- return {
5628
- findings: [text.slice(0, 500)],
5629
- actionsTaken: [],
5630
- failures: ['Did not produce structured JSON summary'],
5631
- remainingWork: fallbackTask ? [fallbackTask] : [],
5632
- exitReason: defaultExitReason === 'timeout_graceful' ? 'timeout_hard' : defaultExitReason,
5633
- };
5634
- }
5635
- // For completed agents, don't force a structured report if they didn't produce one
5636
- return undefined;
5637
- }
3704
+ // Re-export from core for backward compatibility
3705
+ export { parseStructuredClosureReport } from './core/index.js';
5638
3706
  //# sourceMappingURL=agent.js.map