attocode 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (327) hide show
  1. package/CHANGELOG.md +67 -1
  2. package/README.md +65 -5
  3. package/dist/src/adapters.d.ts.map +1 -1
  4. package/dist/src/adapters.js +15 -11
  5. package/dist/src/adapters.js.map +1 -1
  6. package/dist/src/agent.d.ts +38 -98
  7. package/dist/src/agent.d.ts.map +1 -1
  8. package/dist/src/agent.js +505 -2892
  9. package/dist/src/agent.js.map +1 -1
  10. package/dist/src/cli.d.ts.map +1 -1
  11. package/dist/src/cli.js +2 -1
  12. package/dist/src/cli.js.map +1 -1
  13. package/dist/src/commands/handler.d.ts.map +1 -1
  14. package/dist/src/commands/handler.js +11 -3
  15. package/dist/src/commands/handler.js.map +1 -1
  16. package/dist/src/commands/init-commands.d.ts.map +1 -1
  17. package/dist/src/commands/init-commands.js +16 -1
  18. package/dist/src/commands/init-commands.js.map +1 -1
  19. package/dist/src/commands/init.d.ts.map +1 -1
  20. package/dist/src/commands/init.js +31 -0
  21. package/dist/src/commands/init.js.map +1 -1
  22. package/dist/src/config/base-types.d.ts +45 -0
  23. package/dist/src/config/base-types.d.ts.map +1 -0
  24. package/dist/src/config/base-types.js +9 -0
  25. package/dist/src/config/base-types.js.map +1 -0
  26. package/dist/src/config/config-manager.d.ts +35 -0
  27. package/dist/src/config/config-manager.d.ts.map +1 -0
  28. package/dist/src/config/config-manager.js +108 -0
  29. package/dist/src/config/config-manager.js.map +1 -0
  30. package/dist/src/config/index.d.ts +4 -0
  31. package/dist/src/config/index.d.ts.map +1 -0
  32. package/dist/src/config/index.js +3 -0
  33. package/dist/src/config/index.js.map +1 -0
  34. package/dist/src/config/schema.d.ts +1546 -0
  35. package/dist/src/config/schema.d.ts.map +1 -0
  36. package/dist/src/config/schema.js +268 -0
  37. package/dist/src/config/schema.js.map +1 -0
  38. package/dist/src/config.d.ts +4 -1
  39. package/dist/src/config.d.ts.map +1 -1
  40. package/dist/src/config.js +8 -12
  41. package/dist/src/config.js.map +1 -1
  42. package/dist/src/core/agent-state-machine.d.ts +131 -0
  43. package/dist/src/core/agent-state-machine.d.ts.map +1 -0
  44. package/dist/src/core/agent-state-machine.js +302 -0
  45. package/dist/src/core/agent-state-machine.js.map +1 -0
  46. package/dist/src/core/base-manager.d.ts +79 -0
  47. package/dist/src/core/base-manager.d.ts.map +1 -0
  48. package/dist/src/core/base-manager.js +170 -0
  49. package/dist/src/core/base-manager.js.map +1 -0
  50. package/dist/src/core/completion-analyzer.d.ts +15 -0
  51. package/dist/src/core/completion-analyzer.d.ts.map +1 -0
  52. package/dist/src/core/completion-analyzer.js +53 -0
  53. package/dist/src/core/completion-analyzer.js.map +1 -0
  54. package/dist/src/core/execution-loop.d.ts +46 -0
  55. package/dist/src/core/execution-loop.d.ts.map +1 -0
  56. package/dist/src/core/execution-loop.js +1258 -0
  57. package/dist/src/core/execution-loop.js.map +1 -0
  58. package/dist/src/core/index.d.ts +7 -0
  59. package/dist/src/core/index.d.ts.map +1 -1
  60. package/dist/src/core/index.js +9 -0
  61. package/dist/src/core/index.js.map +1 -1
  62. package/dist/src/core/process-handlers.d.ts.map +1 -1
  63. package/dist/src/core/process-handlers.js +14 -0
  64. package/dist/src/core/process-handlers.js.map +1 -1
  65. package/dist/src/core/protocol/types.d.ts +12 -12
  66. package/dist/src/core/response-handler.d.ts +16 -0
  67. package/dist/src/core/response-handler.d.ts.map +1 -0
  68. package/dist/src/core/response-handler.js +234 -0
  69. package/dist/src/core/response-handler.js.map +1 -0
  70. package/dist/src/core/subagent-spawner.d.ts +43 -0
  71. package/dist/src/core/subagent-spawner.d.ts.map +1 -0
  72. package/dist/src/core/subagent-spawner.js +966 -0
  73. package/dist/src/core/subagent-spawner.js.map +1 -0
  74. package/dist/src/core/tool-executor.d.ts +59 -0
  75. package/dist/src/core/tool-executor.d.ts.map +1 -0
  76. package/dist/src/core/tool-executor.js +677 -0
  77. package/dist/src/core/tool-executor.js.map +1 -0
  78. package/dist/src/core/types.d.ts +133 -0
  79. package/dist/src/core/types.d.ts.map +1 -0
  80. package/dist/src/core/types.js +12 -0
  81. package/dist/src/core/types.js.map +1 -0
  82. package/dist/src/defaults.d.ts +2 -2
  83. package/dist/src/defaults.d.ts.map +1 -1
  84. package/dist/src/defaults.js +29 -1
  85. package/dist/src/defaults.js.map +1 -1
  86. package/dist/src/integrations/auto-compaction.d.ts.map +1 -1
  87. package/dist/src/integrations/auto-compaction.js +3 -2
  88. package/dist/src/integrations/auto-compaction.js.map +1 -1
  89. package/dist/src/integrations/budget-pool.d.ts +7 -0
  90. package/dist/src/integrations/budget-pool.d.ts.map +1 -1
  91. package/dist/src/integrations/budget-pool.js +43 -0
  92. package/dist/src/integrations/budget-pool.js.map +1 -1
  93. package/dist/src/integrations/codebase-ast.d.ts +52 -0
  94. package/dist/src/integrations/codebase-ast.d.ts.map +1 -0
  95. package/dist/src/integrations/codebase-ast.js +457 -0
  96. package/dist/src/integrations/codebase-ast.js.map +1 -0
  97. package/dist/src/integrations/codebase-context.d.ts +18 -0
  98. package/dist/src/integrations/codebase-context.d.ts.map +1 -1
  99. package/dist/src/integrations/codebase-context.js +197 -17
  100. package/dist/src/integrations/codebase-context.js.map +1 -1
  101. package/dist/src/integrations/compaction.d.ts.map +1 -1
  102. package/dist/src/integrations/compaction.js +14 -6
  103. package/dist/src/integrations/compaction.js.map +1 -1
  104. package/dist/src/integrations/context-engineering.d.ts +8 -0
  105. package/dist/src/integrations/context-engineering.d.ts.map +1 -1
  106. package/dist/src/integrations/context-engineering.js +19 -0
  107. package/dist/src/integrations/context-engineering.js.map +1 -1
  108. package/dist/src/integrations/economics.d.ts +25 -1
  109. package/dist/src/integrations/economics.d.ts.map +1 -1
  110. package/dist/src/integrations/economics.js +217 -38
  111. package/dist/src/integrations/economics.js.map +1 -1
  112. package/dist/src/integrations/edit-validator.d.ts +30 -0
  113. package/dist/src/integrations/edit-validator.d.ts.map +1 -0
  114. package/dist/src/integrations/edit-validator.js +85 -0
  115. package/dist/src/integrations/edit-validator.js.map +1 -0
  116. package/dist/src/integrations/file-cache.d.ts +7 -0
  117. package/dist/src/integrations/file-cache.d.ts.map +1 -1
  118. package/dist/src/integrations/file-cache.js +54 -0
  119. package/dist/src/integrations/file-cache.js.map +1 -1
  120. package/dist/src/integrations/health-check.d.ts.map +1 -1
  121. package/dist/src/integrations/health-check.js +3 -2
  122. package/dist/src/integrations/health-check.js.map +1 -1
  123. package/dist/src/integrations/hierarchical-config.d.ts +3 -0
  124. package/dist/src/integrations/hierarchical-config.d.ts.map +1 -1
  125. package/dist/src/integrations/hierarchical-config.js +3 -0
  126. package/dist/src/integrations/hierarchical-config.js.map +1 -1
  127. package/dist/src/integrations/hooks.d.ts +2 -0
  128. package/dist/src/integrations/hooks.d.ts.map +1 -1
  129. package/dist/src/integrations/hooks.js +99 -15
  130. package/dist/src/integrations/hooks.js.map +1 -1
  131. package/dist/src/integrations/index.d.ts +7 -0
  132. package/dist/src/integrations/index.d.ts.map +1 -1
  133. package/dist/src/integrations/index.js +9 -1
  134. package/dist/src/integrations/index.js.map +1 -1
  135. package/dist/src/integrations/logger.d.ts +104 -0
  136. package/dist/src/integrations/logger.d.ts.map +1 -0
  137. package/dist/src/integrations/logger.js +219 -0
  138. package/dist/src/integrations/logger.js.map +1 -0
  139. package/dist/src/integrations/lsp.d.ts.map +1 -1
  140. package/dist/src/integrations/lsp.js +5 -4
  141. package/dist/src/integrations/lsp.js.map +1 -1
  142. package/dist/src/integrations/mcp-client.d.ts.map +1 -1
  143. package/dist/src/integrations/mcp-client.js +8 -7
  144. package/dist/src/integrations/mcp-client.js.map +1 -1
  145. package/dist/src/integrations/observability.d.ts.map +1 -1
  146. package/dist/src/integrations/observability.js +5 -4
  147. package/dist/src/integrations/observability.js.map +1 -1
  148. package/dist/src/integrations/openrouter-pricing.d.ts.map +1 -1
  149. package/dist/src/integrations/openrouter-pricing.js +4 -3
  150. package/dist/src/integrations/openrouter-pricing.js.map +1 -1
  151. package/dist/src/integrations/persistence.d.ts.map +1 -1
  152. package/dist/src/integrations/persistence.js +5 -4
  153. package/dist/src/integrations/persistence.js.map +1 -1
  154. package/dist/src/integrations/planning.d.ts.map +1 -1
  155. package/dist/src/integrations/planning.js +5 -4
  156. package/dist/src/integrations/planning.js.map +1 -1
  157. package/dist/src/integrations/retry.d.ts +1 -0
  158. package/dist/src/integrations/retry.d.ts.map +1 -1
  159. package/dist/src/integrations/retry.js.map +1 -1
  160. package/dist/src/integrations/routing.d.ts.map +1 -1
  161. package/dist/src/integrations/routing.js +2 -1
  162. package/dist/src/integrations/routing.js.map +1 -1
  163. package/dist/src/integrations/safety.d.ts.map +1 -1
  164. package/dist/src/integrations/safety.js +13 -13
  165. package/dist/src/integrations/safety.js.map +1 -1
  166. package/dist/src/integrations/sandbox/docker.d.ts.map +1 -1
  167. package/dist/src/integrations/sandbox/docker.js +2 -1
  168. package/dist/src/integrations/sandbox/docker.js.map +1 -1
  169. package/dist/src/integrations/sandbox/index.d.ts.map +1 -1
  170. package/dist/src/integrations/sandbox/index.js +5 -4
  171. package/dist/src/integrations/sandbox/index.js.map +1 -1
  172. package/dist/src/integrations/session-store.d.ts +1 -0
  173. package/dist/src/integrations/session-store.d.ts.map +1 -1
  174. package/dist/src/integrations/session-store.js +1 -0
  175. package/dist/src/integrations/session-store.js.map +1 -1
  176. package/dist/src/integrations/shared-blackboard.d.ts +3 -0
  177. package/dist/src/integrations/shared-blackboard.d.ts.map +1 -1
  178. package/dist/src/integrations/shared-blackboard.js +47 -0
  179. package/dist/src/integrations/shared-blackboard.js.map +1 -1
  180. package/dist/src/integrations/smart-decomposer.d.ts +27 -0
  181. package/dist/src/integrations/smart-decomposer.d.ts.map +1 -1
  182. package/dist/src/integrations/smart-decomposer.js +414 -30
  183. package/dist/src/integrations/smart-decomposer.js.map +1 -1
  184. package/dist/src/integrations/sqlite-store.d.ts +2 -0
  185. package/dist/src/integrations/sqlite-store.d.ts.map +1 -1
  186. package/dist/src/integrations/sqlite-store.js +18 -6
  187. package/dist/src/integrations/sqlite-store.js.map +1 -1
  188. package/dist/src/integrations/swarm/failure-classifier.d.ts +11 -0
  189. package/dist/src/integrations/swarm/failure-classifier.d.ts.map +1 -0
  190. package/dist/src/integrations/swarm/failure-classifier.js +95 -0
  191. package/dist/src/integrations/swarm/failure-classifier.js.map +1 -0
  192. package/dist/src/integrations/swarm/model-selector.d.ts.map +1 -1
  193. package/dist/src/integrations/swarm/model-selector.js +2 -1
  194. package/dist/src/integrations/swarm/model-selector.js.map +1 -1
  195. package/dist/src/integrations/swarm/swarm-config-loader.d.ts +8 -0
  196. package/dist/src/integrations/swarm/swarm-config-loader.d.ts.map +1 -1
  197. package/dist/src/integrations/swarm/swarm-config-loader.js +95 -0
  198. package/dist/src/integrations/swarm/swarm-config-loader.js.map +1 -1
  199. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts +74 -0
  200. package/dist/src/integrations/swarm/swarm-event-bridge.d.ts.map +1 -1
  201. package/dist/src/integrations/swarm/swarm-event-bridge.js +37 -0
  202. package/dist/src/integrations/swarm/swarm-event-bridge.js.map +1 -1
  203. package/dist/src/integrations/swarm/swarm-events.d.ts +3 -0
  204. package/dist/src/integrations/swarm/swarm-events.d.ts.map +1 -1
  205. package/dist/src/integrations/swarm/swarm-events.js +1 -1
  206. package/dist/src/integrations/swarm/swarm-events.js.map +1 -1
  207. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts +23 -0
  208. package/dist/src/integrations/swarm/swarm-orchestrator.d.ts.map +1 -1
  209. package/dist/src/integrations/swarm/swarm-orchestrator.js +530 -55
  210. package/dist/src/integrations/swarm/swarm-orchestrator.js.map +1 -1
  211. package/dist/src/integrations/swarm/swarm-state-store.d.ts +4 -1
  212. package/dist/src/integrations/swarm/swarm-state-store.d.ts.map +1 -1
  213. package/dist/src/integrations/swarm/swarm-state-store.js +8 -1
  214. package/dist/src/integrations/swarm/swarm-state-store.js.map +1 -1
  215. package/dist/src/integrations/swarm/task-queue.d.ts +10 -0
  216. package/dist/src/integrations/swarm/task-queue.d.ts.map +1 -1
  217. package/dist/src/integrations/swarm/task-queue.js +36 -1
  218. package/dist/src/integrations/swarm/task-queue.js.map +1 -1
  219. package/dist/src/integrations/swarm/types.d.ts +41 -0
  220. package/dist/src/integrations/swarm/types.d.ts.map +1 -1
  221. package/dist/src/integrations/swarm/types.js +9 -0
  222. package/dist/src/integrations/swarm/types.js.map +1 -1
  223. package/dist/src/integrations/swarm/worker-pool.d.ts +12 -2
  224. package/dist/src/integrations/swarm/worker-pool.d.ts.map +1 -1
  225. package/dist/src/integrations/swarm/worker-pool.js +53 -4
  226. package/dist/src/integrations/swarm/worker-pool.js.map +1 -1
  227. package/dist/src/integrations/task-manager.d.ts +33 -1
  228. package/dist/src/integrations/task-manager.d.ts.map +1 -1
  229. package/dist/src/integrations/task-manager.js +78 -4
  230. package/dist/src/integrations/task-manager.js.map +1 -1
  231. package/dist/src/main.js +83 -32
  232. package/dist/src/main.js.map +1 -1
  233. package/dist/src/modes/repl.d.ts.map +1 -1
  234. package/dist/src/modes/repl.js +40 -8
  235. package/dist/src/modes/repl.js.map +1 -1
  236. package/dist/src/modes/tui.d.ts.map +1 -1
  237. package/dist/src/modes/tui.js +36 -6
  238. package/dist/src/modes/tui.js.map +1 -1
  239. package/dist/src/observability/tracer.d.ts.map +1 -1
  240. package/dist/src/observability/tracer.js +2 -1
  241. package/dist/src/observability/tracer.js.map +1 -1
  242. package/dist/src/persistence/schema.d.ts.map +1 -1
  243. package/dist/src/persistence/schema.js +11 -0
  244. package/dist/src/persistence/schema.js.map +1 -1
  245. package/dist/src/providers/adapters/anthropic.d.ts.map +1 -1
  246. package/dist/src/providers/adapters/anthropic.js +3 -2
  247. package/dist/src/providers/adapters/anthropic.js.map +1 -1
  248. package/dist/src/providers/adapters/openai.d.ts.map +1 -1
  249. package/dist/src/providers/adapters/openai.js +3 -2
  250. package/dist/src/providers/adapters/openai.js.map +1 -1
  251. package/dist/src/providers/adapters/openrouter.d.ts.map +1 -1
  252. package/dist/src/providers/adapters/openrouter.js +11 -11
  253. package/dist/src/providers/adapters/openrouter.js.map +1 -1
  254. package/dist/src/providers/circuit-breaker.d.ts +1 -0
  255. package/dist/src/providers/circuit-breaker.d.ts.map +1 -1
  256. package/dist/src/providers/circuit-breaker.js.map +1 -1
  257. package/dist/src/providers/provider.d.ts.map +1 -1
  258. package/dist/src/providers/provider.js +2 -1
  259. package/dist/src/providers/provider.js.map +1 -1
  260. package/dist/src/providers/resilient-provider.d.ts.map +1 -1
  261. package/dist/src/providers/resilient-provider.js +2 -1
  262. package/dist/src/providers/resilient-provider.js.map +1 -1
  263. package/dist/src/session-picker.d.ts.map +1 -1
  264. package/dist/src/session-picker.js +40 -5
  265. package/dist/src/session-picker.js.map +1 -1
  266. package/dist/src/shared/budget-tracker.d.ts +65 -0
  267. package/dist/src/shared/budget-tracker.d.ts.map +1 -0
  268. package/dist/src/shared/budget-tracker.js +128 -0
  269. package/dist/src/shared/budget-tracker.js.map +1 -0
  270. package/dist/src/shared/context-engine.d.ts +64 -0
  271. package/dist/src/shared/context-engine.d.ts.map +1 -0
  272. package/dist/src/shared/context-engine.js +117 -0
  273. package/dist/src/shared/context-engine.js.map +1 -0
  274. package/dist/src/shared/index.d.ts +12 -0
  275. package/dist/src/shared/index.d.ts.map +1 -0
  276. package/dist/src/shared/index.js +12 -0
  277. package/dist/src/shared/index.js.map +1 -0
  278. package/dist/src/shared/persistence.d.ts +57 -0
  279. package/dist/src/shared/persistence.d.ts.map +1 -0
  280. package/dist/src/shared/persistence.js +168 -0
  281. package/dist/src/shared/persistence.js.map +1 -0
  282. package/dist/src/shared/shared-context-state.d.ts +89 -0
  283. package/dist/src/shared/shared-context-state.d.ts.map +1 -0
  284. package/dist/src/shared/shared-context-state.js +175 -0
  285. package/dist/src/shared/shared-context-state.js.map +1 -0
  286. package/dist/src/shared/shared-economics-state.d.ts +61 -0
  287. package/dist/src/shared/shared-economics-state.d.ts.map +1 -0
  288. package/dist/src/shared/shared-economics-state.js +100 -0
  289. package/dist/src/shared/shared-economics-state.js.map +1 -0
  290. package/dist/src/tools/bash.d.ts +3 -3
  291. package/dist/src/tools/bash.d.ts.map +1 -1
  292. package/dist/src/tools/bash.js +2 -1
  293. package/dist/src/tools/bash.js.map +1 -1
  294. package/dist/src/tools/file.d.ts +3 -3
  295. package/dist/src/tools/permission.d.ts.map +1 -1
  296. package/dist/src/tools/permission.js +6 -5
  297. package/dist/src/tools/permission.js.map +1 -1
  298. package/dist/src/tools/types.d.ts +1 -0
  299. package/dist/src/tools/types.d.ts.map +1 -1
  300. package/dist/src/tools/types.js.map +1 -1
  301. package/dist/src/tracing/trace-collector.d.ts +125 -0
  302. package/dist/src/tracing/trace-collector.d.ts.map +1 -1
  303. package/dist/src/tracing/trace-collector.js +112 -5
  304. package/dist/src/tracing/trace-collector.js.map +1 -1
  305. package/dist/src/tracing/types.d.ts +96 -1
  306. package/dist/src/tracing/types.d.ts.map +1 -1
  307. package/dist/src/tracing/types.js.map +1 -1
  308. package/dist/src/tricks/failure-evidence.d.ts.map +1 -1
  309. package/dist/src/tricks/failure-evidence.js +2 -1
  310. package/dist/src/tricks/failure-evidence.js.map +1 -1
  311. package/dist/src/tui/app.d.ts +13 -0
  312. package/dist/src/tui/app.d.ts.map +1 -1
  313. package/dist/src/tui/app.js +129 -15
  314. package/dist/src/tui/app.js.map +1 -1
  315. package/dist/src/tui/components/ErrorBoundary.d.ts.map +1 -1
  316. package/dist/src/tui/components/ErrorBoundary.js +3 -2
  317. package/dist/src/tui/components/ErrorBoundary.js.map +1 -1
  318. package/dist/src/tui/event-display.d.ts.map +1 -1
  319. package/dist/src/tui/event-display.js +36 -62
  320. package/dist/src/tui/event-display.js.map +1 -1
  321. package/dist/src/tui/index.d.ts +4 -0
  322. package/dist/src/tui/index.d.ts.map +1 -1
  323. package/dist/src/tui/index.js +17 -0
  324. package/dist/src/tui/index.js.map +1 -1
  325. package/dist/src/types.d.ts +143 -1
  326. package/dist/src/types.d.ts.map +1 -1
  327. package/package.json +18 -3
package/dist/src/agent.js CHANGED
@@ -18,20 +18,16 @@
18
18
  * - Execution Policies (Lesson 23)
19
19
  * - Thread Management (Lesson 24)
20
20
  */
21
- import { buildConfig, isFeatureEnabled, getEnabledFeatures, getSubagentTimeout, getSubagentMaxIterations, } from './defaults.js';
22
- import { createModeManager, formatModeList, parseMode, calculateTaskSimilarity, SUBAGENT_PLAN_MODE_ADDITION, } from './modes.js';
21
+ import * as path from 'node:path';
22
+ import { buildConfig, isFeatureEnabled, getEnabledFeatures, } from './defaults.js';
23
+ import { createModeManager, formatModeList, parseMode, } from './modes.js';
23
24
  import { createLSPFileTools, } from './agent-tools/index.js';
24
- import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, SUBAGENT_BUDGET, TIMEOUT_WRAPUP_PROMPT, AgentRegistry, filterToolsForAgent, formatAgentList, createCancellationManager, isCancellationError, createLinkedToken, createGracefulTimeout, race, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, stableStringify, createCodebaseContext, buildContextFromChunks, generateLightweightRepoMap, createSharedFileCache, createBudgetPool, createDynamicBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate,
25
- // Phase 2: Orchestration
26
- classifyComplexity, getScalingGuidance, buildDelegationPrompt, createMinimalDelegationSpec, getSubagentQualityPrompt, ToolRecommendationEngine, createToolRecommendationEngine, createInjectionBudgetManager,
27
- // Phase 3: Advanced
28
- getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, createSubagentSupervisor, createSubagentHandle, } from './integrations/index.js';
29
- import { mergeApprovalScopeWithProfile, resolvePolicyProfile, } from './integrations/policy-engine.js';
30
- // Lesson 26: Tracing & Evaluation integration
25
+ import { HookManager, MemoryManager, PlanningManager, ObservabilityManager, SafetyManager, RoutingManager, MultiAgentManager, ReActManager, ExecutionPolicyManager, ThreadManager, RulesManager, DEFAULT_RULE_SOURCES, ExecutionEconomicsManager, STANDARD_BUDGET, AgentRegistry, formatAgentList, createCancellationManager, isCancellationError, createResourceManager, createLSPManager, createSemanticCacheManager, createSkillManager, formatSkillList, createContextEngineering, createCodebaseContext, buildContextFromChunks, generateLightweightRepoMap, createSharedFileCache, createBudgetPool, createPendingPlanManager, createInteractivePlanner, createRecursiveContext, createLearningStore, createCompactor, createAutoCompactionManager, createFileChangeTracker, createCapabilitiesRegistry, createSharedBlackboard, createTaskManager, createSwarmOrchestrator, createThrottledProvider, FREE_TIER_THROTTLE, PAID_TIER_THROTTLE, createWorkLog, createVerificationGate, classifyComplexity, getScalingGuidance, createToolRecommendationEngine, createInjectionBudgetManager, getThinkingSystemPrompt, createSelfImprovementProtocol, createSubagentOutputStore, createSerperSearchTool, getEnvironmentFacts, formatFactsBlock, createAutoCheckpointManager, } from './integrations/index.js';
26
+ import { resolvePolicyProfile, } from './integrations/policy-engine.js';
31
27
  import { createTraceCollector } from './tracing/trace-collector.js';
32
- // Model registry for context window limits
33
28
  import { modelRegistry } from './costs/index.js';
34
29
  import { getModelContextLength } from './integrations/openrouter-pricing.js';
30
+ import { createComponentLogger } from './integrations/logger.js';
35
31
  // Spawn agent tools for LLM-driven subagent delegation
36
32
  import { createBoundSpawnAgentTool, createBoundSpawnAgentsParallelTool, } from './tools/agent.js';
37
33
  // Task tools for Claude Code-style task management
@@ -43,115 +39,15 @@ import { createTaskTools, } from './tools/tasks.js';
43
39
  * Tools that are safe to execute in parallel (read-only, no side effects).
44
40
  * These tools don't modify state, so running them concurrently is safe.
45
41
  */
46
- export const PARALLELIZABLE_TOOLS = new Set([
47
- 'read_file', 'glob', 'grep', 'list_files', 'search_files',
48
- 'search_code', 'get_file_info',
49
- ]);
50
- /**
51
- * Tools that can run in parallel IF they target different files.
52
- * write_file and edit_file on different paths are safe to parallelize.
53
- */
54
- export const CONDITIONALLY_PARALLEL_TOOLS = new Set([
55
- 'write_file', 'edit_file',
56
- ]);
57
- /**
58
- * Extract the target file path from a tool call's arguments.
59
- * Returns null if no file path can be determined.
60
- */
61
- export function extractToolFilePath(toolCall) {
62
- // Check common argument patterns
63
- const args = toolCall;
64
- for (const key of ['path', 'file_path', 'filename', 'file']) {
65
- if (typeof args[key] === 'string')
66
- return args[key];
67
- }
68
- // Check nested args object
69
- if (args.args && typeof args.args === 'object') {
70
- const nested = args.args;
71
- for (const key of ['path', 'file_path', 'filename', 'file']) {
72
- if (typeof nested[key] === 'string')
73
- return nested[key];
74
- }
75
- }
76
- // Check input object (common in structured tool calls)
77
- if (args.input && typeof args.input === 'object') {
78
- const input = args.input;
79
- for (const key of ['path', 'file_path', 'filename', 'file']) {
80
- if (typeof input[key] === 'string')
81
- return input[key];
82
- }
83
- }
84
- return null;
85
- }
86
- /**
87
- * Check if a conditionally-parallel tool call conflicts with any tool
88
- * in the current accumulator (same file path).
89
- */
90
- function hasFileConflict(toolCall, accumulator) {
91
- const path = extractToolFilePath(toolCall);
92
- if (!path)
93
- return true; // Can't determine path → assume conflict
94
- for (const existing of accumulator) {
95
- const existingPath = extractToolFilePath(existing);
96
- if (existingPath === path)
97
- return true; // Same file → conflict
98
- }
99
- return false;
100
- }
101
- /**
102
- * Groups tool calls into batches for parallel/sequential execution.
103
- * Uses accumulate-and-flush: parallelizable tools accumulate until a
104
- * non-parallelizable tool flushes them as a batch. This produces optimal
105
- * batching even for non-consecutive parallelizable tools.
106
- *
107
- * Enhanced with conditional parallelism: write_file/edit_file on
108
- * DIFFERENT files can be batched together for parallel execution.
109
- *
110
- * Example: [read1, read2, write, read3, grep] → [[read1, read2], [write], [read3, grep]]
111
- * (Previous algorithm produced 4 batches; this produces 3)
112
- *
113
- * Enhanced: [write_a, write_b, write_a] → [[write_a, write_b], [write_a]]
114
- * (Different files parallelized, same file sequential)
115
- */
116
- export function groupToolCallsIntoBatches(toolCalls, isParallelizable = (tc) => PARALLELIZABLE_TOOLS.has(tc.name), isConditionallyParallel = (tc) => CONDITIONALLY_PARALLEL_TOOLS.has(tc.name)) {
117
- if (toolCalls.length === 0)
118
- return [];
119
- const batches = [];
120
- let parallelAccum = [];
121
- for (const toolCall of toolCalls) {
122
- if (isParallelizable(toolCall)) {
123
- parallelAccum.push(toolCall);
124
- }
125
- else if (isConditionallyParallel(toolCall)) {
126
- // Can parallelize if no file conflict with existing accumulator
127
- if (!hasFileConflict(toolCall, parallelAccum)) {
128
- parallelAccum.push(toolCall);
129
- }
130
- else {
131
- // Conflict: flush current batch, start new one with this tool
132
- if (parallelAccum.length > 0) {
133
- batches.push(parallelAccum);
134
- parallelAccum = [];
135
- }
136
- parallelAccum.push(toolCall);
137
- }
138
- }
139
- else {
140
- // Flush any accumulated parallel tools as a single batch
141
- if (parallelAccum.length > 0) {
142
- batches.push(parallelAccum);
143
- parallelAccum = [];
144
- }
145
- // Non-parallelizable tool gets its own batch
146
- batches.push([toolCall]);
147
- }
148
- }
149
- // Flush remaining parallel tools
150
- if (parallelAccum.length > 0) {
151
- batches.push(parallelAccum);
152
- }
153
- return batches;
154
- }
42
+ const log = createComponentLogger('ProductionAgent');
43
+ // Tool-batching constants (canonical home: core/tool-executor.ts)
44
+ import { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches, } from './core/index.js';
45
+ export { PARALLELIZABLE_TOOLS, CONDITIONALLY_PARALLEL_TOOLS, extractToolFilePath, groupToolCallsIntoBatches };
46
+ // Extracted core modules (Phase 2.1 — thin orchestrator delegates)
47
+ import { executeDirectly as coreExecuteDirectly, spawnAgent as coreSpawnAgent, spawnAgentsParallel as coreSpawnAgentsParallel, } from './core/index.js';
48
+ // Phase 2.2: Agent State Machine
49
+ import { createAgentStateMachine } from './core/agent-state-machine.js';
50
+ import { detectIncompleteActionResponse } from './core/completion-analyzer.js';
155
51
  /**
156
52
  * Production-ready agent that composes all features.
157
53
  */
@@ -195,6 +91,8 @@ export class ProductionAgent {
195
91
  agentId;
196
92
  blackboard = null;
197
93
  fileCache = null;
94
+ _sharedContextState = null;
95
+ _sharedEconomicsState = null;
198
96
  budgetPool = null;
199
97
  taskManager = null;
200
98
  store = null;
@@ -207,11 +105,13 @@ export class ProductionAgent {
207
105
  subagentOutputStore = null;
208
106
  autoCheckpointManager = null;
209
107
  toolRecommendation = null;
108
+ stateMachine = null;
210
109
  lastComplexityAssessment = null;
110
+ lastSystemPromptLength = 0;
211
111
  // Duplicate spawn prevention - tracks recently spawned tasks to prevent doom loops
212
112
  // Map<taskKey, { timestamp: number; result: string; queuedChanges: number }>
213
113
  spawnedTasks = new Map();
214
- static SPAWN_DEDUP_WINDOW_MS = 60000; // 60 seconds
114
+ // SPAWN_DEDUP_WINDOW_MS moved to core/subagent-spawner.ts
215
115
  // Parent iteration tracking for total budget calculation
216
116
  parentIterations = 0;
217
117
  // External cancellation token (for subagent timeout propagation)
@@ -301,6 +201,9 @@ export class ProductionAgent {
301
201
  const parentBudgetTokens = baseBudget.maxTokens ?? STANDARD_BUDGET.maxTokens ?? 200000;
302
202
  this.budgetPool = createBudgetPool(parentBudgetTokens, 0.25, 100000);
303
203
  }
204
+ // Shared state for swarm workers (passed from orchestrator via config)
205
+ this._sharedContextState = userConfig.sharedContextState ?? null;
206
+ this._sharedEconomicsState = userConfig.sharedEconomicsState ?? null;
304
207
  // Initialize enabled features
305
208
  this.initializeFeatures();
306
209
  }
@@ -311,7 +214,7 @@ export class ProductionAgent {
311
214
  // Debug output only when DEBUG env var is set
312
215
  if (process.env.DEBUG) {
313
216
  const features = getEnabledFeatures(this.config);
314
- console.log(`[ProductionAgent] Initializing with features: ${features.join(', ')}`);
217
+ log.debug('Initializing with features', { features: features.join(', ') });
315
218
  }
316
219
  // Hooks & Plugins
317
220
  if (isFeatureEnabled(this.config.hooks) && isFeatureEnabled(this.config.plugins)) {
@@ -415,7 +318,7 @@ export class ProductionAgent {
415
318
  });
416
319
  // Load rules asynchronously - tracked for ensureReady()
417
320
  this.initPromises.push(this.rules.loadRules().catch(err => {
418
- console.warn('[ProductionAgent] Failed to load rules:', err);
321
+ log.warn('Failed to load rules', { error: String(err) });
419
322
  }));
420
323
  }
421
324
  // Economics System (Token Budget) - always enabled
@@ -426,7 +329,24 @@ export class ProductionAgent {
426
329
  // Use maxIterations from config as absolute safety cap
427
330
  maxIterations: this.config.maxIterations,
428
331
  targetIterations: Math.min(baseBudget.targetIterations ?? 20, this.config.maxIterations),
332
+ }, this._sharedEconomicsState ?? undefined, this.agentId);
333
+ // Phase 2.2: Agent State Machine - formalizes phase tracking
334
+ // Always enabled - provides structured phase transitions with metrics
335
+ this.stateMachine = createAgentStateMachine();
336
+ // Forward state machine phase transitions as subagent.phase events
337
+ const phaseMap = {
338
+ exploring: 'exploring', planning: 'planning', acting: 'executing', verifying: 'completing',
339
+ };
340
+ const unsubStateMachine = this.stateMachine.subscribe(event => {
341
+ if (event.type === 'phase.changed') {
342
+ this.emit({
343
+ type: 'subagent.phase',
344
+ agentId: this.agentId,
345
+ phase: phaseMap[event.transition.to] ?? 'exploring',
346
+ });
347
+ }
429
348
  });
349
+ this.unsubscribers.push(unsubStateMachine);
430
350
  // Work Log - compaction-resilient summary of agent work
431
351
  // Always enabled - minimal overhead and critical for long-running tasks
432
352
  this.workLog = createWorkLog();
@@ -444,7 +364,7 @@ export class ProductionAgent {
444
364
  this.agentRegistry = new AgentRegistry();
445
365
  // Load user agents asynchronously - tracked for ensureReady()
446
366
  this.initPromises.push(this.agentRegistry.loadUserAgents().catch(err => {
447
- console.warn('[ProductionAgent] Failed to load user agents:', err);
367
+ log.warn('Failed to load user agents', { error: String(err) });
448
368
  }));
449
369
  // Register spawn_agent tool so LLM can delegate to subagents
450
370
  const boundSpawnTool = createBoundSpawnAgentTool((name, task, constraints) => this.spawnAgent(name, task, constraints));
@@ -493,11 +413,16 @@ export class ProductionAgent {
493
413
  : swarmConfig.throttle;
494
414
  this.provider = createThrottledProvider(this.provider, throttleConfig);
495
415
  }
416
+ // Pass codebaseContext so the decomposer can ground tasks in actual project files
417
+ swarmConfig.codebaseContext = this.codebaseContext ?? undefined;
496
418
  this.swarmOrchestrator = createSwarmOrchestrator(swarmConfig, this.provider, this.agentRegistry, (name, task) => this.spawnAgent(name, task), this.blackboard ?? undefined);
497
419
  // Override parent budget pool with swarm's much larger pool so spawnAgent()
498
420
  // allocates from the swarm budget (e.g. 10M tokens) instead of the parent's
499
421
  // generic pool (200K tokens). Without this, workers get 5K emergency budget.
500
422
  this.budgetPool = this.swarmOrchestrator.getBudgetPool().pool;
423
+ // Phase 3.1+3.2: Set shared state so workers inherit it via buildContext()
424
+ this._sharedContextState = this.swarmOrchestrator.getSharedContextState();
425
+ this._sharedEconomicsState = this.swarmOrchestrator.getSharedEconomicsState();
501
426
  }
502
427
  // Cancellation Support
503
428
  if (isFeatureEnabled(this.config.cancellation)) {
@@ -565,7 +490,7 @@ export class ProductionAgent {
565
490
  this.initPromises.push(this.skillManager.loadSkills()
566
491
  .then(() => { }) // Convert to void
567
492
  .catch(err => {
568
- console.warn('[ProductionAgent] Failed to load skills:', err);
493
+ log.warn('Failed to load skills', { error: String(err) });
569
494
  }));
570
495
  }
571
496
  // Context Engineering (Manus-inspired tricks P, Q, R, S, T)
@@ -581,6 +506,10 @@ export class ProductionAgent {
581
506
  maxFailures: 30,
582
507
  maxReferences: 50,
583
508
  });
509
+ // Bind shared context state for cross-worker failure learning (swarm workers only)
510
+ if (this._sharedContextState) {
511
+ this.contextEngineering.setSharedState(this._sharedContextState);
512
+ }
584
513
  // Codebase Context - intelligent code selection for context management
585
514
  // Analyzes repo structure and selects relevant code within token budgets
586
515
  if (this.config.codebaseContext !== false) {
@@ -597,6 +526,10 @@ export class ProductionAgent {
597
526
  cacheResults: true,
598
527
  cacheTTL: 5 * 60 * 1000, // 5 minutes
599
528
  });
529
+ // Forward trace collector so codebase analysis can emit codebase.map entries.
530
+ if (this.traceCollector) {
531
+ this.codebaseContext.traceCollector = this.traceCollector;
532
+ }
600
533
  // Connect LSP manager to codebase context for enhanced code selection
601
534
  // This enables LSP-based relevance boosting (Phase 4.1)
602
535
  if (this.lspManager) {
@@ -951,6 +884,7 @@ export class ProductionAgent {
951
884
  async run(task) {
952
885
  // Ensure all integrations are ready before running
953
886
  await this.ensureReady();
887
+ this.reconcileStaleTasks('run_start');
954
888
  const startTime = Date.now();
955
889
  // Create cancellation context if enabled
956
890
  const cancellationConfig = isFeatureEnabled(this.config.cancellation) ? this.config.cancellation : null;
@@ -958,6 +892,7 @@ export class ProductionAgent {
958
892
  // Start tracing
959
893
  const traceId = this.observability?.tracer?.startTrace('agent.run') || `trace-${Date.now()}`;
960
894
  this.emit({ type: 'start', task, traceId });
895
+ this.emit({ type: 'run.before', task });
961
896
  this.observability?.logger?.info('Agent started', { task });
962
897
  // Lesson 26: Start trace capture
963
898
  // If session is already active (managed by REPL), start a task within it.
@@ -977,6 +912,12 @@ export class ProductionAgent {
977
912
  await this.traceCollector?.startSession(traceSessionId, task, this.config.model || 'default', sessionMetadata);
978
913
  }
979
914
  try {
915
+ let runSuccess = true;
916
+ let runFailureReason;
917
+ let completion = {
918
+ success: true,
919
+ reason: 'completed',
920
+ };
980
921
  // Check for cancellation before starting
981
922
  cancellationToken?.throwIfCancellationRequested();
982
923
  // Classify task complexity for scaling guidance
@@ -986,6 +927,27 @@ export class ProductionAgent {
986
927
  // Check if swarm mode should handle this task
987
928
  if (this.swarmOrchestrator) {
988
929
  const swarmResult = await this.runSwarm(task);
930
+ if (!swarmResult.success) {
931
+ runSuccess = false;
932
+ runFailureReason = swarmResult.summary || 'Swarm reported unsuccessful execution';
933
+ completion = {
934
+ success: false,
935
+ reason: 'swarm_failure',
936
+ details: runFailureReason,
937
+ };
938
+ }
939
+ // Guard against summaries that still indicate pending work.
940
+ if (detectIncompleteActionResponse(swarmResult.summary || '')) {
941
+ this.emit({ type: 'completion.before', reason: 'future_intent' });
942
+ runSuccess = false;
943
+ runFailureReason = 'Swarm summary indicates pending, unexecuted work';
944
+ completion = {
945
+ success: false,
946
+ reason: 'future_intent',
947
+ details: runFailureReason,
948
+ futureIntentDetected: true,
949
+ };
950
+ }
989
951
  // Store swarm summary as an assistant message for the response
990
952
  this.state.messages.push({ role: 'assistant', content: swarmResult.summary });
991
953
  }
@@ -994,7 +956,17 @@ export class ProductionAgent {
994
956
  await this.createAndExecutePlan(task);
995
957
  }
996
958
  else {
997
- await this.executeDirectly(task);
959
+ const directResult = await this.executeDirectly(task);
960
+ if (!directResult.success) {
961
+ runSuccess = false;
962
+ runFailureReason = directResult.failureReason || directResult.terminationReason;
963
+ }
964
+ completion = {
965
+ success: directResult.success,
966
+ reason: directResult.terminationReason,
967
+ ...(directResult.failureReason ? { details: directResult.failureReason } : {}),
968
+ ...(directResult.openTasks ? { openTasks: directResult.openTasks } : {}),
969
+ };
998
970
  }
999
971
  // Get final response - find the LAST assistant message (not just check if last message is assistant)
1000
972
  const assistantMessages = this.state.messages.filter(m => m.role === 'assistant');
@@ -1002,28 +974,101 @@ export class ProductionAgent {
1002
974
  const response = typeof lastAssistantMessage?.content === 'string'
1003
975
  ? lastAssistantMessage.content
1004
976
  : '';
977
+ // Final guardrail: never mark a run successful if the final answer is "I'll do X".
978
+ if (runSuccess && detectIncompleteActionResponse(response)) {
979
+ this.emit({ type: 'completion.before', reason: 'future_intent' });
980
+ runSuccess = false;
981
+ runFailureReason = 'Final response indicates pending, unexecuted work';
982
+ completion = {
983
+ success: false,
984
+ reason: 'future_intent',
985
+ details: runFailureReason,
986
+ futureIntentDetected: true,
987
+ };
988
+ }
989
+ if (runSuccess && completion.reason === 'completed') {
990
+ this.reconcileStaleTasks('run_end');
991
+ const openTasks = this.getOpenTasksSummary();
992
+ if (openTasks && (openTasks.inProgress > 0 || openTasks.pending > 0)) {
993
+ this.emit({ type: 'completion.before', reason: 'open_tasks' });
994
+ runSuccess = false;
995
+ runFailureReason = `Open tasks remain: ${openTasks.pending} pending, ${openTasks.inProgress} in_progress`;
996
+ completion = {
997
+ success: false,
998
+ reason: 'open_tasks',
999
+ details: runFailureReason,
1000
+ openTasks,
1001
+ };
1002
+ this.emit({
1003
+ type: 'completion.blocked',
1004
+ reasons: [
1005
+ runFailureReason,
1006
+ openTasks.blocked > 0 ? `${openTasks.blocked} pending tasks are blocked` : '',
1007
+ ].filter(Boolean),
1008
+ openTasks,
1009
+ diagnostics: {
1010
+ forceTextOnly: false,
1011
+ availableTasks: this.taskManager?.getAvailableTasks().length ?? 0,
1012
+ pendingWithOwner: 0,
1013
+ },
1014
+ });
1015
+ }
1016
+ }
1005
1017
  // Finalize
1006
1018
  const duration = Date.now() - startTime;
1007
1019
  this.state.metrics.duration = duration;
1008
- this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
1020
+ if (runSuccess) {
1021
+ this.state.metrics.successCount = (this.state.metrics.successCount ?? 0) + 1;
1022
+ }
1023
+ else {
1024
+ this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
1025
+ }
1009
1026
  await this.observability?.tracer?.endTrace();
1010
1027
  const result = {
1011
- success: true,
1028
+ success: runSuccess,
1012
1029
  response,
1030
+ ...(runSuccess ? {} : { error: runFailureReason ?? 'Task failed' }),
1013
1031
  metrics: this.getMetrics(),
1014
1032
  messages: this.state.messages,
1015
1033
  traceId,
1016
1034
  plan: this.state.plan,
1035
+ completion,
1036
+ };
1037
+ result.completion.recovery = {
1038
+ intraRunRetries: this.state.metrics.retryCount ?? 0,
1039
+ autoLoopRuns: 0,
1040
+ terminal: !runSuccess,
1041
+ reasonChain: [completion.reason],
1017
1042
  };
1018
1043
  this.emit({ type: 'complete', result });
1019
- this.observability?.logger?.info('Agent completed', { duration, success: true });
1044
+ this.emit({
1045
+ type: 'completion.after',
1046
+ success: runSuccess,
1047
+ reason: completion.reason,
1048
+ ...(completion.details ? { details: completion.details } : {}),
1049
+ });
1050
+ this.emit({
1051
+ type: 'run.after',
1052
+ success: runSuccess,
1053
+ reason: completion.reason,
1054
+ ...(completion.details ? { details: completion.details } : {}),
1055
+ });
1056
+ this.observability?.logger?.info('Agent completed', {
1057
+ duration,
1058
+ success: runSuccess,
1059
+ ...(runFailureReason ? { failureReason: runFailureReason } : {}),
1060
+ });
1020
1061
  // Lesson 26: End trace capture
1021
1062
  // If task is active (REPL mode), end the task. Otherwise end the session (single-task mode).
1022
1063
  if (this.traceCollector?.isTaskActive()) {
1023
- await this.traceCollector.endTask({ success: true, output: response });
1064
+ await this.traceCollector.endTask(runSuccess
1065
+ ? { success: true, output: response }
1066
+ : { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
1024
1067
  }
1025
1068
  else if (this.traceCollector?.isSessionActive()) {
1026
- await this.traceCollector.endSession({ success: true, output: response });
1069
+ await this.traceCollector.endSession(runSuccess
1070
+ ? { success: true, output: response }
1071
+ : { success: false, failureReason: runFailureReason ?? 'Task failed', output: response });
1027
1072
  }
1028
1073
  return result;
1029
1074
  }
@@ -1044,6 +1089,18 @@ export class ProductionAgent {
1044
1089
  else if (this.traceCollector?.isSessionActive()) {
1045
1090
  await this.traceCollector.endSession({ success: false, failureReason: `Cancelled: ${error.message}` });
1046
1091
  }
1092
+ this.emit({
1093
+ type: 'completion.after',
1094
+ success: false,
1095
+ reason: 'cancelled',
1096
+ details: `Cancelled: ${error.message}`,
1097
+ });
1098
+ this.emit({
1099
+ type: 'run.after',
1100
+ success: false,
1101
+ reason: 'cancelled',
1102
+ details: `Cancelled: ${error.message}`,
1103
+ });
1047
1104
  return {
1048
1105
  success: false,
1049
1106
  response: '',
@@ -1051,6 +1108,11 @@ export class ProductionAgent {
1051
1108
  metrics: this.getMetrics(),
1052
1109
  messages: this.state.messages,
1053
1110
  traceId,
1111
+ completion: {
1112
+ success: false,
1113
+ reason: 'cancelled',
1114
+ details: `Cancelled: ${error.message}`,
1115
+ },
1054
1116
  };
1055
1117
  }
1056
1118
  this.observability?.tracer?.recordError(error);
@@ -1058,6 +1120,9 @@ export class ProductionAgent {
1058
1120
  this.state.metrics.failureCount = (this.state.metrics.failureCount ?? 0) + 1;
1059
1121
  this.emit({ type: 'error', error: error.message });
1060
1122
  this.observability?.logger?.error('Agent failed', { error: error.message });
1123
+ const completionReason = error.message.includes('failed to complete requested action')
1124
+ ? 'incomplete_action'
1125
+ : 'error';
1061
1126
  // Lesson 26: End trace capture on error
1062
1127
  if (this.traceCollector?.isTaskActive()) {
1063
1128
  await this.traceCollector.endTask({ success: false, failureReason: error.message });
@@ -1065,14 +1130,26 @@ export class ProductionAgent {
1065
1130
  else if (this.traceCollector?.isSessionActive()) {
1066
1131
  await this.traceCollector.endSession({ success: false, failureReason: error.message });
1067
1132
  }
1068
- return {
1133
+ const errorResult = {
1069
1134
  success: false,
1070
1135
  response: '',
1071
1136
  error: error.message,
1072
1137
  metrics: this.getMetrics(),
1073
1138
  messages: this.state.messages,
1074
1139
  traceId,
1140
+ completion: {
1141
+ success: false,
1142
+ reason: completionReason,
1143
+ details: error.message,
1144
+ },
1075
1145
  };
1146
+ this.emit({
1147
+ type: 'run.after',
1148
+ success: false,
1149
+ reason: completionReason,
1150
+ details: error.message,
1151
+ });
1152
+ return errorResult;
1076
1153
  }
1077
1154
  finally {
1078
1155
  // Dispose cancellation context on completion
@@ -1099,7 +1176,7 @@ export class ProductionAgent {
1099
1176
  this.planning.completeTask(currentTask.id);
1100
1177
  this.emit({ type: 'task.complete', task: currentTask });
1101
1178
  }
1102
- catch (err) {
1179
+ catch (_err) {
1103
1180
  this.planning.failTask(currentTask.id);
1104
1181
  this.observability?.logger?.warn('Plan task failed', { taskId: currentTask.id });
1105
1182
  // Continue with other tasks if possible
@@ -1133,6 +1210,133 @@ export class ProductionAgent {
1133
1210
  const { SwarmEventBridge } = await import('./integrations/swarm/swarm-event-bridge.js');
1134
1211
  const bridge = new SwarmEventBridge({ outputDir: '.agent/swarm-live' });
1135
1212
  const unsubBridge = bridge.attach(this.swarmOrchestrator);
1213
+ const writeCodeMapSnapshot = () => {
1214
+ if (!this.codebaseContext) {
1215
+ return;
1216
+ }
1217
+ const repoMap = this.codebaseContext.getRepoMap();
1218
+ if (!repoMap) {
1219
+ return;
1220
+ }
1221
+ // Build dependency edges from the dependency graph
1222
+ const depEdges = [];
1223
+ for (const [file, deps] of repoMap.dependencyGraph) {
1224
+ depEdges.push({ file, imports: Array.from(deps) });
1225
+ }
1226
+ // Build top chunks sorted by importance
1227
+ const chunks = Array.from(repoMap.chunks.values());
1228
+ const topChunks = chunks
1229
+ .sort((a, b) => b.importance - a.importance)
1230
+ .slice(0, 100)
1231
+ .map(c => ({
1232
+ filePath: c.filePath,
1233
+ tokenCount: c.tokenCount,
1234
+ importance: c.importance,
1235
+ type: c.type,
1236
+ symbols: c.symbolDetails,
1237
+ }));
1238
+ const files = chunks.map((chunk) => ({
1239
+ filePath: chunk.filePath,
1240
+ directory: path.dirname(chunk.filePath) === '.' ? '' : path.dirname(chunk.filePath),
1241
+ fileName: path.basename(chunk.filePath),
1242
+ tokenCount: chunk.tokenCount,
1243
+ importance: chunk.importance,
1244
+ type: chunk.type,
1245
+ symbols: chunk.symbolDetails,
1246
+ inDegree: repoMap.reverseDependencyGraph.get(chunk.filePath)?.size ?? 0,
1247
+ outDegree: repoMap.dependencyGraph.get(chunk.filePath)?.size ?? 0,
1248
+ }));
1249
+ bridge.writeCodeMapSnapshot({
1250
+ totalFiles: repoMap.chunks.size,
1251
+ totalTokens: repoMap.totalTokens,
1252
+ entryPoints: repoMap.entryPoints,
1253
+ coreModules: repoMap.coreModules,
1254
+ dependencyEdges: depEdges,
1255
+ files,
1256
+ topChunks,
1257
+ });
1258
+ };
1259
+ let codeMapRefreshInFlight = false;
1260
+ let codeMapRefreshTimer = null;
1261
+ const refreshAndWriteCodeMapSnapshot = async () => {
1262
+ if (!this.codebaseContext || codeMapRefreshInFlight) {
1263
+ return;
1264
+ }
1265
+ codeMapRefreshInFlight = true;
1266
+ try {
1267
+ // Re-analyze from disk so snapshots include newly created files during swarm execution.
1268
+ this.codebaseContext.clearCache();
1269
+ await this.codebaseContext.analyze();
1270
+ writeCodeMapSnapshot();
1271
+ }
1272
+ catch {
1273
+ // Best effort
1274
+ }
1275
+ finally {
1276
+ codeMapRefreshInFlight = false;
1277
+ }
1278
+ };
1279
+ // Write observability snapshots to swarm-live/ on relevant events
1280
+ const unsubSnapshots = this.swarmOrchestrator.subscribe(event => {
1281
+ // Write codemap snapshot when tasks are loaded.
1282
+ if (event.type === 'swarm.tasks.loaded' && this.codebaseContext) {
1283
+ try {
1284
+ writeCodeMapSnapshot();
1285
+ }
1286
+ catch {
1287
+ // Best effort — don't crash the swarm
1288
+ }
1289
+ }
1290
+ // Refresh codemap after each completed wave to avoid stale 0-file snapshots.
1291
+ if (event.type === 'swarm.wave.complete' && this.codebaseContext) {
1292
+ void refreshAndWriteCodeMapSnapshot();
1293
+ }
1294
+ if (event.type === 'swarm.task.completed' && this.codebaseContext) {
1295
+ if (codeMapRefreshTimer) {
1296
+ clearTimeout(codeMapRefreshTimer);
1297
+ }
1298
+ codeMapRefreshTimer = setTimeout(() => {
1299
+ void refreshAndWriteCodeMapSnapshot();
1300
+ }, 1200);
1301
+ }
1302
+ // Write blackboard.json on wave completion or task completion
1303
+ if ((event.type === 'swarm.wave.complete' || event.type === 'swarm.task.completed') && this.blackboard) {
1304
+ try {
1305
+ const findings = this.blackboard.getAllFindings();
1306
+ bridge.writeBlackboardSnapshot({
1307
+ findings: findings.map(f => ({
1308
+ id: f.id ?? '',
1309
+ topic: f.topic ?? '',
1310
+ type: f.type ?? '',
1311
+ agentId: f.agentId ?? '',
1312
+ confidence: f.confidence ?? 0,
1313
+ content: (f.content ?? '').slice(0, 500),
1314
+ })),
1315
+ claims: [],
1316
+ updatedAt: new Date().toISOString(),
1317
+ });
1318
+ }
1319
+ catch {
1320
+ // Best effort
1321
+ }
1322
+ }
1323
+ // Write budget-pool.json on budget updates
1324
+ if (event.type === 'swarm.budget.update' && this.budgetPool) {
1325
+ try {
1326
+ const stats = this.budgetPool.getStats();
1327
+ bridge.writeBudgetPoolSnapshot({
1328
+ poolTotal: stats.totalTokens,
1329
+ poolUsed: stats.tokensUsed,
1330
+ poolRemaining: stats.tokensRemaining,
1331
+ allocations: [],
1332
+ updatedAt: new Date().toISOString(),
1333
+ });
1334
+ }
1335
+ catch {
1336
+ // Best effort
1337
+ }
1338
+ }
1339
+ });
1136
1340
  // Bridge swarm events into JSONL trace pipeline
1137
1341
  const traceCollector = this.traceCollector;
1138
1342
  let unsubTrace;
@@ -1289,6 +1493,22 @@ export class ProductionAgent {
1289
1493
  });
1290
1494
  }
1291
1495
  try {
1496
+ // Ensure codebase context is analyzed before decomposition so repo map is available
1497
+ if (this.codebaseContext && !this.codebaseContext.getRepoMap()) {
1498
+ try {
1499
+ await this.codebaseContext.analyze();
1500
+ }
1501
+ catch {
1502
+ // non-fatal — decomposer will work without codebase context
1503
+ }
1504
+ }
1505
+ // Write codemap snapshot immediately so dashboard can render even if decomposition fails.
1506
+ try {
1507
+ writeCodeMapSnapshot();
1508
+ }
1509
+ catch {
1510
+ // Best effort
1511
+ }
1292
1512
  const result = await this.swarmOrchestrator.execute(task);
1293
1513
  // Populate task DAG for dashboard after execution
1294
1514
  bridge.setTasks(result.tasks);
@@ -1302,967 +1522,24 @@ export class ProductionAgent {
1302
1522
  return result;
1303
1523
  }
1304
1524
  finally {
1525
+ if (codeMapRefreshTimer) {
1526
+ clearTimeout(codeMapRefreshTimer);
1527
+ }
1305
1528
  unsubTrace?.();
1529
+ unsubSnapshots();
1306
1530
  unsubBridge();
1307
1531
  bridge.close();
1308
1532
  unsubSwarm();
1309
1533
  }
1310
1534
  }
1311
1535
  /**
1312
- * Execute a task directly without planning.
1536
+ * Execute a task directly without planning (delegates to core/execution-loop).
1313
1537
  */
1314
1538
  async executeDirectly(task) {
1315
- // Build messages
1316
- const messages = this.buildMessages(task);
1317
- // Reset economics for new task
1318
- this.economics?.reset();
1319
- // Reflection configuration
1320
- const reflectionConfig = this.config.reflection;
1321
- const reflectionEnabled = isFeatureEnabled(reflectionConfig);
1322
- const autoReflect = reflectionEnabled && reflectionConfig.autoReflect;
1323
- const maxReflectionAttempts = reflectionEnabled
1324
- ? (reflectionConfig.maxAttempts || 3)
1325
- : 1;
1326
- const confidenceThreshold = reflectionEnabled
1327
- ? (reflectionConfig.confidenceThreshold || 0.8)
1328
- : 0.8;
1329
- let reflectionAttempt = 0;
1330
- let lastResponse = '';
1331
- let incompleteActionRetries = 0;
1332
- const requestedArtifact = this.extractRequestedArtifact(task);
1333
- const executedToolNames = new Set();
1334
- // Outer loop for reflection (if enabled)
1335
- while (reflectionAttempt < maxReflectionAttempts) {
1336
- reflectionAttempt++;
1337
- // Agent loop - now uses economics-based budget checking
1338
- while (true) {
1339
- this.state.iteration++;
1340
- // Record iteration start for tracing
1341
- this.traceCollector?.record({
1342
- type: 'iteration.start',
1343
- data: { iterationNumber: this.state.iteration },
1344
- });
1345
- // =======================================================================
1346
- // CANCELLATION CHECK
1347
- // Checks internal cancellation (ESC key) — always immediate.
1348
- // External cancellation (parent timeout) is checked after economics
1349
- // to allow graceful wrapup when wrapup has been requested.
1350
- // =======================================================================
1351
- if (this.cancellation?.isCancelled) {
1352
- this.cancellation.token.throwIfCancellationRequested();
1353
- }
1354
- // =======================================================================
1355
- // RESOURCE CHECK - system resource limits
1356
- // =======================================================================
1357
- if (this.resourceManager) {
1358
- const resourceCheck = this.resourceManager.check();
1359
- if (!resourceCheck.canContinue) {
1360
- this.observability?.logger?.warn('Resource limit reached', {
1361
- status: resourceCheck.status,
1362
- message: resourceCheck.message,
1363
- });
1364
- this.emit({ type: 'error', error: resourceCheck.message || 'Resource limit exceeded' });
1365
- break;
1366
- }
1367
- // Log warnings for elevated usage
1368
- if (resourceCheck.status === 'warning' || resourceCheck.status === 'critical') {
1369
- this.observability?.logger?.info(`Resource status: ${resourceCheck.status}`, {
1370
- message: resourceCheck.message,
1371
- });
1372
- }
1373
- }
1374
- // =======================================================================
1375
- // ECONOMICS CHECK (Token Budget) - replaces hard iteration limit
1376
- // With recovery: try compaction before giving up on token limits
1377
- // =======================================================================
1378
- let forceTextOnly = false; // Track if we should skip tool execution
1379
- let budgetInjectedPrompt;
1380
- if (this.economics) {
1381
- const budgetCheck = this.economics.checkBudget();
1382
- // Capture forceTextOnly and injectedPrompt for later use
1383
- forceTextOnly = budgetCheck.forceTextOnly ?? false;
1384
- budgetInjectedPrompt = budgetCheck.injectedPrompt;
1385
- if (!budgetCheck.canContinue) {
1386
- // ===================================================================
1387
- // RECOVERY ATTEMPT: Try emergency context reduction before giving up
1388
- // Only for token-based limits, not iteration limits
1389
- // ===================================================================
1390
- const isTokenLimit = budgetCheck.budgetType === 'tokens' || budgetCheck.budgetType === 'cost';
1391
- const alreadyTriedRecovery = this.state._recoveryAttempted === true;
1392
- if (isTokenLimit && !alreadyTriedRecovery) {
1393
- this.observability?.logger?.info('Budget limit reached, attempting recovery via context reduction', {
1394
- reason: budgetCheck.reason,
1395
- percentUsed: budgetCheck.percentUsed,
1396
- });
1397
- this.emit({
1398
- type: 'resilience.retry',
1399
- reason: 'budget_limit_compaction',
1400
- attempt: 1,
1401
- maxAttempts: 1,
1402
- });
1403
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1404
- // Mark that we've attempted recovery to prevent infinite loops
1405
- this.state._recoveryAttempted = true;
1406
- const tokensBefore = this.estimateContextTokens(messages);
1407
- // Step 1: Compact tool outputs aggressively
1408
- this.compactToolOutputs();
1409
- // Step 2: Emergency truncation - keep system + last N messages
1410
- const PRESERVE_RECENT = 10;
1411
- if (messages.length > PRESERVE_RECENT + 2) {
1412
- const systemMessage = messages.find(m => m.role === 'system');
1413
- const recentMessages = messages.slice(-(PRESERVE_RECENT));
1414
- // Rebuild message array
1415
- messages.length = 0;
1416
- if (systemMessage) {
1417
- messages.push(systemMessage);
1418
- }
1419
- messages.push({
1420
- role: 'system',
1421
- content: `[CONTEXT REDUCED: Earlier messages were removed to stay within budget. Conversation continues from recent context.]`,
1422
- });
1423
- messages.push(...recentMessages);
1424
- // Inject work log after emergency truncation to prevent amnesia
1425
- if (this.workLog?.hasContent()) {
1426
- const workLogMessage = {
1427
- role: 'user',
1428
- content: this.workLog.toCompactString(),
1429
- };
1430
- messages.push(workLogMessage);
1431
- }
1432
- // Update state messages too
1433
- this.state.messages.length = 0;
1434
- this.state.messages.push(...messages);
1435
- }
1436
- const tokensAfter = this.estimateContextTokens(messages);
1437
- const reduction = Math.round((1 - tokensAfter / tokensBefore) * 100);
1438
- if (tokensAfter < tokensBefore * 0.8) {
1439
- // Significant reduction achieved
1440
- this.observability?.logger?.info('Context reduction successful, continuing execution', {
1441
- tokensBefore,
1442
- tokensAfter,
1443
- reduction,
1444
- });
1445
- this.emit({
1446
- type: 'resilience.recovered',
1447
- reason: 'budget_limit_compaction',
1448
- attempts: 1,
1449
- });
1450
- this.emit({
1451
- type: 'compaction.auto',
1452
- tokensBefore,
1453
- tokensAfter,
1454
- messagesCompacted: tokensBefore - tokensAfter,
1455
- });
1456
- // Continue execution instead of breaking
1457
- continue;
1458
- }
1459
- this.observability?.logger?.warn('Context reduction insufficient', {
1460
- tokensBefore,
1461
- tokensAfter,
1462
- reduction,
1463
- });
1464
- }
1465
- // Hard limit reached and recovery failed (or not applicable)
1466
- this.observability?.logger?.warn('Budget limit reached', {
1467
- reason: budgetCheck.reason,
1468
- budgetType: budgetCheck.budgetType,
1469
- });
1470
- // Emit appropriate event
1471
- if (budgetCheck.budgetType === 'iterations') {
1472
- const totalIter = this.getTotalIterations();
1473
- const iterMsg = this.parentIterations > 0
1474
- ? `${this.state.iteration} + ${this.parentIterations} parent = ${totalIter}`
1475
- : `${this.state.iteration}`;
1476
- this.emit({ type: 'error', error: `Max iterations reached (${iterMsg})` });
1477
- }
1478
- else {
1479
- this.emit({ type: 'error', error: budgetCheck.reason || 'Budget exceeded' });
1480
- }
1481
- break;
1482
- }
1483
- // Check for soft limits and potential extension
1484
- if (budgetCheck.isSoftLimit && budgetCheck.suggestedAction === 'request_extension') {
1485
- this.observability?.logger?.info('Approaching budget limit', {
1486
- reason: budgetCheck.reason,
1487
- percentUsed: budgetCheck.percentUsed,
1488
- });
1489
- // Could request extension here if handler is set
1490
- }
1491
- }
1492
- else {
1493
- // Fallback to simple iteration check if economics not available
1494
- // Use getTotalIterations() to account for parent iterations (subagent hierarchy)
1495
- if (this.getTotalIterations() >= this.config.maxIterations) {
1496
- this.observability?.logger?.warn('Max iterations reached', {
1497
- iteration: this.state.iteration,
1498
- parentIterations: this.parentIterations,
1499
- total: this.getTotalIterations(),
1500
- });
1501
- break;
1502
- }
1503
- }
1504
- // =======================================================================
1505
- // GRACEFUL WRAPUP CHECK
1506
- // If a wrapup has been requested (e.g., timeout approaching), convert
1507
- // to forceTextOnly + inject wrapup prompt for structured summary.
1508
- // Must come after economics check (which may also set forceTextOnly).
1509
- // =======================================================================
1510
- if (this.wrapupRequested && !forceTextOnly) {
1511
- forceTextOnly = true;
1512
- budgetInjectedPrompt = TIMEOUT_WRAPUP_PROMPT;
1513
- this.wrapupRequested = false;
1514
- }
1515
- // =======================================================================
1516
- // EXTERNAL CANCELLATION CHECK (deferred from above)
1517
- // Checked after wrapup so that graceful wrapup can intercept the timeout.
1518
- // If wrapup was already requested and converted to forceTextOnly above,
1519
- // we skip throwing here to allow one more text-only turn for the summary.
1520
- // =======================================================================
1521
- if (this.externalCancellationToken?.isCancellationRequested && !forceTextOnly) {
1522
- this.externalCancellationToken.throwIfCancellationRequested();
1523
- }
1524
- // =======================================================================
1525
- // INTELLIGENT LOOP DETECTION & NUDGE INJECTION
1526
- // Uses economics system for doom loops, exploration saturation, etc.
1527
- // =======================================================================
1528
- if (this.economics && budgetInjectedPrompt) {
1529
- // Inject contextual guidance from economics system
1530
- messages.push({
1531
- role: 'user',
1532
- content: budgetInjectedPrompt,
1533
- });
1534
- const loopState = this.economics.getLoopState();
1535
- const phaseState = this.economics.getPhaseState();
1536
- this.observability?.logger?.info('Loop detection - injecting guidance', {
1537
- iteration: this.state.iteration,
1538
- doomLoop: loopState.doomLoopDetected,
1539
- phase: phaseState.phase,
1540
- filesRead: phaseState.uniqueFilesRead,
1541
- filesModified: phaseState.filesModified,
1542
- shouldTransition: phaseState.shouldTransition,
1543
- forceTextOnly,
1544
- });
1545
- }
1546
- // =======================================================================
1547
- // RECITATION INJECTION (Trick Q) - Combat "lost in middle" attention
1548
- // =======================================================================
1549
- if (this.contextEngineering) {
1550
- if (process.env.DEBUG_LLM) {
1551
- if (process.env.DEBUG)
1552
- console.log(`[recitation] Before: ${messages.length} messages`);
1553
- }
1554
- const enrichedMessages = this.contextEngineering.injectRecitation(messages, {
1555
- goal: task,
1556
- plan: this.state.plan ? {
1557
- description: this.state.plan.goal || task,
1558
- tasks: this.state.plan.tasks.map(t => ({
1559
- id: t.id,
1560
- description: t.description,
1561
- status: t.status,
1562
- })),
1563
- currentTaskIndex: this.state.plan.tasks.findIndex(t => t.status === 'in_progress'),
1564
- } : undefined,
1565
- activeFiles: this.economics?.getProgress().filesModified
1566
- ? [`${this.economics.getProgress().filesModified} files modified`]
1567
- : undefined,
1568
- recentErrors: this.contextEngineering.getFailureInsights().slice(0, 2),
1569
- });
1570
- if (process.env.DEBUG_LLM) {
1571
- if (process.env.DEBUG)
1572
- console.log(`[recitation] After: ${enrichedMessages?.length ?? 'null/undefined'} messages`);
1573
- }
1574
- // Only replace if we got a DIFFERENT array back (avoid clearing same reference)
1575
- // When no injection needed, injectRecitation returns the same array reference
1576
- if (enrichedMessages && enrichedMessages !== messages && enrichedMessages.length > 0) {
1577
- messages.length = 0;
1578
- messages.push(...enrichedMessages);
1579
- }
1580
- else if (!enrichedMessages || enrichedMessages.length === 0) {
1581
- console.warn('[executeDirectly] Recitation returned empty/null messages, keeping original');
1582
- }
1583
- // If enrichedMessages === messages, we don't need to do anything (same reference)
1584
- // Update recitation frequency based on context size
1585
- const contextTokens = messages.reduce((sum, m) => sum + (m.content?.length || 0) / 4, 0);
1586
- this.contextEngineering.updateRecitationFrequency(contextTokens);
1587
- }
1588
- // =======================================================================
1589
- // FAILURE CONTEXT INJECTION (Trick S) - Learn from mistakes
1590
- // =======================================================================
1591
- if (this.contextEngineering) {
1592
- const failureContext = this.contextEngineering.getFailureContext(5);
1593
- if (failureContext) {
1594
- // Insert failure context before the last user message
1595
- // (Using reverse iteration for ES2022 compatibility)
1596
- let lastUserIdx = -1;
1597
- for (let i = messages.length - 1; i >= 0; i--) {
1598
- if (messages[i].role === 'user') {
1599
- lastUserIdx = i;
1600
- break;
1601
- }
1602
- }
1603
- if (lastUserIdx > 0) {
1604
- messages.splice(lastUserIdx, 0, {
1605
- role: 'system',
1606
- content: failureContext,
1607
- });
1608
- }
1609
- }
1610
- }
1611
- // =====================================================================
1612
- // INJECTION BUDGET ANALYSIS (Phase 2 - monitoring mode)
1613
- // Collects stats on context injections without gating; logs when
1614
- // budget would have dropped items. Validates system before enabling gating.
1615
- // =====================================================================
1616
- if (this.injectionBudget) {
1617
- const proposals = [];
1618
- if (budgetInjectedPrompt) {
1619
- proposals.push({ name: 'budget_warning', priority: 0, maxTokens: 500, content: budgetInjectedPrompt });
1620
- }
1621
- // Approximate recitation content (actual injection handled above)
1622
- if (this.contextEngineering) {
1623
- const failureCtx = this.contextEngineering.getFailureContext(5);
1624
- if (failureCtx) {
1625
- proposals.push({ name: 'failure_context', priority: 2, maxTokens: 300, content: failureCtx });
1626
- }
1627
- }
1628
- if (proposals.length > 0) {
1629
- const accepted = this.injectionBudget.allocate(proposals);
1630
- const stats = this.injectionBudget.getLastStats();
1631
- if (stats && stats.droppedNames.length > 0 && process.env.DEBUG) {
1632
- console.log(`[injection-budget] Would drop: ${stats.droppedNames.join(', ')} (${stats.proposedTokens} proposed, ${stats.acceptedTokens} accepted)`);
1633
- }
1634
- // Log total injection overhead for observability
1635
- if (stats && process.env.DEBUG_LLM) {
1636
- console.log(`[injection-budget] Iteration ${this.state.iteration}: ${accepted.length}/${proposals.length} injections, ~${stats.acceptedTokens} tokens`);
1637
- }
1638
- }
1639
- }
1640
- // =====================================================================
1641
- // RESILIENT LLM CALL: Empty response retries + max_tokens continuation
1642
- // =====================================================================
1643
- // Get resilience config
1644
- const resilienceConfig = typeof this.config.resilience === 'object'
1645
- ? this.config.resilience
1646
- : {};
1647
- const resilienceEnabled = isFeatureEnabled(this.config.resilience);
1648
- const MAX_EMPTY_RETRIES = resilienceConfig.maxEmptyRetries ?? 2;
1649
- const MAX_CONTINUATIONS = resilienceConfig.maxContinuations ?? 3;
1650
- const AUTO_CONTINUE = resilienceConfig.autoContinue ?? true;
1651
- const MIN_CONTENT_LENGTH = resilienceConfig.minContentLength ?? 1;
1652
- const INCOMPLETE_ACTION_RECOVERY = resilienceConfig.incompleteActionRecovery ?? true;
1653
- const MAX_INCOMPLETE_ACTION_RETRIES = resilienceConfig.maxIncompleteActionRetries ?? 2;
1654
- const ENFORCE_REQUESTED_ARTIFACTS = resilienceConfig.enforceRequestedArtifacts ?? true;
1655
- // =================================================================
1656
- // PRE-FLIGHT BUDGET CHECK: Estimate if LLM call would exceed budget
1657
- // Catches cases where we're at e.g. 120k and next call adds ~35k
1658
- // =================================================================
1659
- if (this.economics && !forceTextOnly) {
1660
- const estimatedInputTokens = this.estimateContextTokens(messages);
1661
- const estimatedOutputTokens = 4096; // Conservative output estimate
1662
- const currentUsage = this.economics.getUsage();
1663
- const budget = this.economics.getBudget();
1664
- const projectedTotal = currentUsage.tokens + estimatedInputTokens + estimatedOutputTokens;
1665
- if (projectedTotal > budget.maxTokens) {
1666
- this.observability?.logger?.warn('Pre-flight budget check: projected overshoot', {
1667
- currentTokens: currentUsage.tokens,
1668
- estimatedInput: estimatedInputTokens,
1669
- projectedTotal,
1670
- maxTokens: budget.maxTokens,
1671
- });
1672
- // Inject wrap-up prompt if not already injected
1673
- if (!budgetInjectedPrompt) {
1674
- messages.push({
1675
- role: 'user',
1676
- content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1677
- });
1678
- this.state.messages.push({
1679
- role: 'user',
1680
- content: '[System] BUDGET CRITICAL: This is your LAST response. Summarize findings concisely and stop. Do NOT call tools.',
1681
- });
1682
- }
1683
- forceTextOnly = true;
1684
- }
1685
- }
1686
- let response = await this.callLLM(messages);
1687
- let emptyRetries = 0;
1688
- let continuations = 0;
1689
- // Phase 1: Handle empty responses with retry (if resilience enabled)
1690
- while (resilienceEnabled && emptyRetries < MAX_EMPTY_RETRIES) {
1691
- const hasContent = response.content && response.content.length >= MIN_CONTENT_LENGTH;
1692
- const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
1693
- const hasThinking = response.thinking && response.thinking.length > 0;
1694
- if (hasContent || hasToolCalls) {
1695
- // Valid visible response
1696
- if (emptyRetries > 0) {
1697
- this.emit({
1698
- type: 'resilience.recovered',
1699
- reason: 'empty_response',
1700
- attempts: emptyRetries,
1701
- });
1702
- this.observability?.logger?.info('Recovered from empty response', {
1703
- retries: emptyRetries,
1704
- });
1705
- }
1706
- break;
1707
- }
1708
- if (hasThinking && !hasContent && !hasToolCalls) {
1709
- // Model produced reasoning but no visible output (e.g., DeepSeek-R1, GLM-4, QwQ).
1710
- // Give ONE targeted nudge, then accept thinking as content.
1711
- if (emptyRetries === 0) {
1712
- emptyRetries++;
1713
- this.emit({
1714
- type: 'resilience.retry',
1715
- reason: 'thinking_only_response',
1716
- attempt: emptyRetries,
1717
- maxAttempts: MAX_EMPTY_RETRIES,
1718
- });
1719
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1720
- this.observability?.logger?.warn('Thinking-only response (no visible content), nudging', {
1721
- thinkingLength: response.thinking.length,
1722
- });
1723
- const thinkingNudge = {
1724
- role: 'user',
1725
- content: '[System: You produced reasoning but no visible response. Please provide your answer based on your analysis.]',
1726
- };
1727
- messages.push(thinkingNudge);
1728
- this.state.messages.push(thinkingNudge);
1729
- response = await this.callLLM(messages);
1730
- continue;
1731
- }
1732
- // Second attempt also thinking-only → accept thinking as content
1733
- this.observability?.logger?.info('Accepting thinking as content after nudge failed', {
1734
- thinkingLength: response.thinking.length,
1735
- });
1736
- response = { ...response, content: response.thinking };
1737
- break;
1738
- }
1739
- // Truly empty (no content, no tools, no thinking) — existing retry logic
1740
- emptyRetries++;
1741
- this.emit({
1742
- type: 'resilience.retry',
1743
- reason: 'empty_response',
1744
- attempt: emptyRetries,
1745
- maxAttempts: MAX_EMPTY_RETRIES,
1746
- });
1747
- this.state.metrics.retryCount = (this.state.metrics.retryCount ?? 0) + 1;
1748
- this.observability?.logger?.warn('Empty LLM response, retrying', {
1749
- attempt: emptyRetries,
1750
- maxAttempts: MAX_EMPTY_RETRIES,
1751
- });
1752
- // Add gentle nudge and retry
1753
- const nudgeMessage = {
1754
- role: 'user',
1755
- content: '[System: Your previous response was empty. Please provide a response or use a tool.]',
1756
- };
1757
- messages.push(nudgeMessage);
1758
- this.state.messages.push(nudgeMessage);
1759
- response = await this.callLLM(messages);
1760
- }
1761
- // Phase 2: Handle max_tokens truncation with continuation (if enabled)
1762
- if (resilienceEnabled && AUTO_CONTINUE && response.stopReason === 'max_tokens' && !response.toolCalls?.length) {
1763
- let accumulatedContent = response.content || '';
1764
- while (continuations < MAX_CONTINUATIONS && response.stopReason === 'max_tokens') {
1765
- continuations++;
1766
- this.emit({
1767
- type: 'resilience.continue',
1768
- reason: 'max_tokens',
1769
- continuation: continuations,
1770
- maxContinuations: MAX_CONTINUATIONS,
1771
- accumulatedLength: accumulatedContent.length,
1772
- });
1773
- this.observability?.logger?.info('Response truncated at max_tokens, continuing', {
1774
- continuation: continuations,
1775
- accumulatedLength: accumulatedContent.length,
1776
- });
1777
- // Add continuation request
1778
- const continuationMessage = {
1779
- role: 'assistant',
1780
- content: accumulatedContent,
1781
- };
1782
- const continueRequest = {
1783
- role: 'user',
1784
- content: '[System: Please continue from where you left off. Do not repeat what you already said.]',
1785
- };
1786
- messages.push(continuationMessage, continueRequest);
1787
- this.state.messages.push(continuationMessage, continueRequest);
1788
- response = await this.callLLM(messages);
1789
- // Accumulate content
1790
- if (response.content) {
1791
- accumulatedContent += response.content;
1792
- }
1793
- }
1794
- // Update response with accumulated content
1795
- if (continuations > 0) {
1796
- response = { ...response, content: accumulatedContent };
1797
- this.emit({
1798
- type: 'resilience.completed',
1799
- reason: 'max_tokens_continuation',
1800
- continuations,
1801
- finalLength: accumulatedContent.length,
1802
- });
1803
- }
1804
- }
1805
- // Phase 2b: Handle truncated tool calls (stopReason=max_tokens with tool calls present)
1806
- // When a model hits max_tokens mid-tool-call, the JSON arguments are truncated and unparseable.
1807
- // Instead of executing broken tool calls, strip them and ask the LLM to retry smaller.
1808
- if (resilienceEnabled && response.stopReason === 'max_tokens' && response.toolCalls?.length) {
1809
- this.emit({
1810
- type: 'resilience.truncated_tool_call',
1811
- toolNames: response.toolCalls.map(tc => tc.name),
1812
- });
1813
- this.observability?.logger?.warn('Tool call truncated at max_tokens', {
1814
- toolNames: response.toolCalls.map(tc => tc.name),
1815
- outputTokens: response.usage?.outputTokens,
1816
- });
1817
- // Strip truncated tool calls, inject recovery message
1818
- const truncatedResponse = response;
1819
- response = { ...response, toolCalls: undefined };
1820
- const recoveryMessage = {
1821
- role: 'user',
1822
- content: '[System: Your previous tool call was truncated because the output exceeded the token limit. ' +
1823
- 'The tool call arguments were cut off and could not be parsed. ' +
1824
- 'Please retry with a smaller approach: for write_file, break the content into smaller chunks ' +
1825
- 'or use edit_file for targeted changes instead of rewriting entire files.]',
1826
- };
1827
- messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1828
- messages.push(recoveryMessage);
1829
- this.state.messages.push({ role: 'assistant', content: truncatedResponse.content || '' });
1830
- this.state.messages.push(recoveryMessage);
1831
- response = await this.callLLM(messages);
1832
- }
1833
- // Record LLM usage for economics
1834
- if (this.economics && response.usage) {
1835
- this.economics.recordLLMUsage(response.usage.inputTokens, response.usage.outputTokens, this.config.model, response.usage.cost // Use actual cost from provider when available
1836
- );
1837
- // =================================================================
1838
- // POST-LLM BUDGET CHECK: Prevent tool execution if over budget
1839
- // A single LLM call can push us over - catch it before running tools
1840
- // =================================================================
1841
- if (!forceTextOnly) {
1842
- const postCheck = this.economics.checkBudget();
1843
- if (!postCheck.canContinue) {
1844
- this.observability?.logger?.warn('Budget exceeded after LLM call, skipping tool execution', {
1845
- reason: postCheck.reason,
1846
- });
1847
- forceTextOnly = true;
1848
- }
1849
- }
1850
- }
1851
- // Add assistant message
1852
- const assistantMessage = {
1853
- role: 'assistant',
1854
- content: response.content,
1855
- toolCalls: response.toolCalls,
1856
- ...(response.thinking ? { metadata: { thinking: response.thinking } } : {}),
1857
- };
1858
- messages.push(assistantMessage);
1859
- this.state.messages.push(assistantMessage);
1860
- lastResponse = response.content || (response.thinking ? response.thinking : '');
1861
- // In plan mode: capture exploration findings as we go (not just at the end)
1862
- // This ensures we collect context from exploration iterations before writes are queued
1863
- if (this.modeManager.getMode() === 'plan' && response.content && response.content.length > 50) {
1864
- const hasReadOnlyTools = response.toolCalls?.every(tc => ['read_file', 'list_files', 'glob', 'grep', 'search', 'mcp_'].some(prefix => tc.name.startsWith(prefix) || tc.name === prefix));
1865
- // Capture substantive exploration content (not just "let me read..." responses)
1866
- if (hasReadOnlyTools && !response.content.match(/^(Let me|I'll|I will|I need to|First,)/i)) {
1867
- this.pendingPlanManager.appendExplorationFinding(response.content.slice(0, 1000));
1868
- }
1869
- }
1870
- // Check for tool calls
1871
- // When forceTextOnly is set (max iterations reached), ignore any tool calls
1872
- const hasToolCalls = response.toolCalls && response.toolCalls.length > 0;
1873
- if (!hasToolCalls || forceTextOnly) {
1874
- // Log if we're ignoring tool calls due to forceTextOnly
1875
- if (forceTextOnly && hasToolCalls) {
1876
- this.observability?.logger?.info('Ignoring tool calls due to forceTextOnly (max steps reached)', {
1877
- toolCallCount: response.toolCalls?.length,
1878
- iteration: this.state.iteration,
1879
- });
1880
- }
1881
- const incompleteAction = this.detectIncompleteActionResponse(response.content || '');
1882
- const missingRequiredArtifact = ENFORCE_REQUESTED_ARTIFACTS
1883
- ? this.isRequestedArtifactMissing(requestedArtifact, executedToolNames)
1884
- : false;
1885
- const shouldRecoverIncompleteAction = resilienceEnabled
1886
- && INCOMPLETE_ACTION_RECOVERY
1887
- && !forceTextOnly
1888
- && (incompleteAction || missingRequiredArtifact);
1889
- if (shouldRecoverIncompleteAction) {
1890
- if (incompleteActionRetries < MAX_INCOMPLETE_ACTION_RETRIES) {
1891
- incompleteActionRetries++;
1892
- const reason = missingRequiredArtifact && requestedArtifact
1893
- ? `missing_requested_artifact:${requestedArtifact}`
1894
- : 'future_intent_without_action';
1895
- this.emit({
1896
- type: 'resilience.incomplete_action_detected',
1897
- reason,
1898
- attempt: incompleteActionRetries,
1899
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1900
- requiresArtifact: missingRequiredArtifact,
1901
- });
1902
- this.observability?.logger?.warn('Incomplete action detected, retrying with nudge', {
1903
- reason,
1904
- attempt: incompleteActionRetries,
1905
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1906
- });
1907
- const nudgeMessage = {
1908
- role: 'user',
1909
- content: missingRequiredArtifact && requestedArtifact
1910
- ? `[System: You said you would complete the next action, but no tool call was made. The task requires creating or updating "${requestedArtifact}". Execute the required tool now, or explicitly explain why it cannot be produced.]`
1911
- : '[System: You described a next action but did not execute it. If work remains, call the required tool now. If the task is complete, provide a final answer with no pending action language.]',
1912
- };
1913
- messages.push(nudgeMessage);
1914
- this.state.messages.push(nudgeMessage);
1915
- continue;
1916
- }
1917
- const failureReason = missingRequiredArtifact && requestedArtifact
1918
- ? `incomplete_action_missing_artifact:${requestedArtifact}`
1919
- : 'incomplete_action_unresolved';
1920
- this.emit({
1921
- type: 'resilience.incomplete_action_failed',
1922
- reason: failureReason,
1923
- attempts: incompleteActionRetries,
1924
- maxAttempts: MAX_INCOMPLETE_ACTION_RETRIES,
1925
- });
1926
- throw new Error(`LLM failed to complete requested action after ${incompleteActionRetries} retries (${failureReason})`);
1927
- }
1928
- if (incompleteActionRetries > 0) {
1929
- this.emit({
1930
- type: 'resilience.incomplete_action_recovered',
1931
- reason: 'incomplete_action',
1932
- attempts: incompleteActionRetries,
1933
- });
1934
- incompleteActionRetries = 0;
1935
- }
1936
- // Verification gate: if criteria not met, nudge agent to verify before completing
1937
- if (this.verificationGate && !forceTextOnly) {
1938
- const vResult = this.verificationGate.check();
1939
- if (!vResult.satisfied && !vResult.forceAllow && vResult.nudge) {
1940
- // Inject nudge and continue the loop
1941
- const nudgeMessage = {
1942
- role: 'user',
1943
- content: vResult.nudge,
1944
- };
1945
- messages.push(nudgeMessage);
1946
- this.state.messages.push(nudgeMessage);
1947
- this.observability?.logger?.info('Verification gate nudge', {
1948
- missing: vResult.missing,
1949
- nudgeCount: this.verificationGate.getState().nudgeCount,
1950
- });
1951
- continue;
1952
- }
1953
- }
1954
- // No tool calls (or forced to ignore), agent is done - compact tool outputs to save context
1955
- // The model has "consumed" the tool outputs and produced a response,
1956
- // so we can replace verbose outputs with compact summaries
1957
- this.compactToolOutputs();
1958
- // In plan mode: capture exploration summary from the final response
1959
- // This provides context for what was learned during exploration before proposing changes
1960
- if (this.modeManager.getMode() === 'plan' && this.pendingPlanManager.hasPendingPlan()) {
1961
- const explorationContent = response.content || '';
1962
- if (explorationContent.length > 0) {
1963
- this.pendingPlanManager.setExplorationSummary(explorationContent);
1964
- }
1965
- }
1966
- // Final validation: warn if response is still empty after all retries
1967
- if (!response.content || response.content.length === 0) {
1968
- this.observability?.logger?.error('Agent finished with empty response after all retries', {
1969
- emptyRetries,
1970
- continuations,
1971
- iteration: this.state.iteration,
1972
- });
1973
- this.emit({
1974
- type: 'resilience.failed',
1975
- reason: 'empty_final_response',
1976
- emptyRetries,
1977
- continuations,
1978
- });
1979
- }
1980
- // Record iteration end for tracing (no tool calls case)
1981
- this.traceCollector?.record({
1982
- type: 'iteration.end',
1983
- data: { iterationNumber: this.state.iteration },
1984
- });
1985
- break;
1986
- }
1987
- // Execute tool calls (we know toolCalls is defined here due to the check above)
1988
- const toolCalls = response.toolCalls;
1989
- const toolResults = await this.executeToolCalls(toolCalls);
1990
- // Record tool calls for economics/progress tracking + work log
1991
- for (let i = 0; i < toolCalls.length; i++) {
1992
- const toolCall = toolCalls[i];
1993
- const result = toolResults[i];
1994
- executedToolNames.add(toolCall.name);
1995
- this.economics?.recordToolCall(toolCall.name, toolCall.arguments, result?.result);
1996
- // Record in work log for compaction resilience
1997
- const toolOutput = result?.result && typeof result.result === 'object' && 'output' in result.result
1998
- ? String(result.result.output)
1999
- : typeof result?.result === 'string' ? result.result : undefined;
2000
- this.workLog?.recordToolExecution(toolCall.name, toolCall.arguments, toolOutput);
2001
- // Record in verification gate
2002
- if (this.verificationGate) {
2003
- if (toolCall.name === 'bash') {
2004
- const toolRes = result?.result;
2005
- const output = toolRes && typeof toolRes === 'object' && 'output' in toolRes
2006
- ? String(toolRes.output)
2007
- : typeof toolRes === 'string' ? toolRes : '';
2008
- const exitCode = toolRes && typeof toolRes === 'object' && toolRes.metadata
2009
- ? toolRes.metadata.exitCode ?? null
2010
- : null;
2011
- this.verificationGate.recordBashExecution(String(toolCall.arguments.command || ''), output, exitCode);
2012
- }
2013
- if (['write_file', 'edit_file'].includes(toolCall.name)) {
2014
- this.verificationGate.recordFileChange();
2015
- }
2016
- }
2017
- }
2018
- // Add tool results to messages (with truncation and proactive budget management)
2019
- const MAX_TOOL_OUTPUT_CHARS = 8000; // ~2000 tokens max per tool output
2020
- // =======================================================================
2021
- // PROACTIVE BUDGET CHECK - compact BEFORE we overflow, not after
2022
- // Uses AutoCompactionManager if available for sophisticated compaction
2023
- // =======================================================================
2024
- const currentContextTokens = this.estimateContextTokens(messages);
2025
- if (this.autoCompactionManager) {
2026
- // Use the AutoCompactionManager for threshold-based compaction
2027
- const compactionResult = await this.autoCompactionManager.checkAndMaybeCompact({
2028
- currentTokens: currentContextTokens,
2029
- messages: messages,
2030
- });
2031
- // Handle compaction result
2032
- if (compactionResult.status === 'compacted' && compactionResult.compactedMessages) {
2033
- // ─── Pre-compaction agentic turn ───────────────────────────────
2034
- // Give the agent one LLM turn to summarize critical state before
2035
- // compaction clears the context. On the first trigger we inject a
2036
- // system message and skip compaction; on the next trigger (the
2037
- // agent has already responded) we proceed with actual compaction.
2038
- if (!this.compactionPending) {
2039
- this.compactionPending = true;
2040
- const preCompactionMsg = {
2041
- role: 'user',
2042
- content: '[SYSTEM] Context compaction is imminent. Summarize your current progress, key findings, and next steps into a single concise message. This will be preserved after compaction.',
2043
- };
2044
- messages.push(preCompactionMsg);
2045
- this.state.messages.push(preCompactionMsg);
2046
- this.observability?.logger?.info('Pre-compaction agentic turn: injected summary request');
2047
- // Skip compaction this iteration — let the agent respond first
2048
- // (continue to tool result processing below)
2049
- }
2050
- else {
2051
- // Agent has had its chance to summarize — now compact for real
2052
- this.compactionPending = false;
2053
- // Pre-compaction checkpoint: save full state before discarding
2054
- try {
2055
- this.autoCheckpoint(true); // force=true bypasses frequency check
2056
- }
2057
- catch {
2058
- // Non-critical — don't block compaction
2059
- }
2060
- // Replace messages with compacted version
2061
- messages.length = 0;
2062
- messages.push(...compactionResult.compactedMessages);
2063
- this.state.messages.length = 0;
2064
- this.state.messages.push(...compactionResult.compactedMessages);
2065
- // Inject work log after compaction to prevent amnesia
2066
- if (this.workLog?.hasContent()) {
2067
- const workLogMessage = {
2068
- role: 'user',
2069
- content: this.workLog.toCompactString(),
2070
- };
2071
- messages.push(workLogMessage);
2072
- this.state.messages.push(workLogMessage);
2073
- }
2074
- // Context recovery: re-inject critical state after compaction
2075
- const recoveryParts = [];
2076
- // Goals
2077
- if (this.store) {
2078
- const goalsSummary = this.store.getGoalsSummary();
2079
- if (goalsSummary && goalsSummary !== 'No active goals.' && goalsSummary !== 'Goals feature not available.') {
2080
- recoveryParts.push(goalsSummary);
2081
- }
2082
- }
2083
- // Junctures (last 5 key moments)
2084
- if (this.store) {
2085
- const juncturesSummary = this.store.getJuncturesSummary(undefined, 5);
2086
- if (juncturesSummary) {
2087
- recoveryParts.push(juncturesSummary);
2088
- }
2089
- }
2090
- // Learnings from past patterns
2091
- if (this.learningStore) {
2092
- const learnings = this.learningStore.getLearningContext({ maxLearnings: 3 });
2093
- if (learnings) {
2094
- recoveryParts.push(learnings);
2095
- }
2096
- }
2097
- if (recoveryParts.length > 0) {
2098
- const recoveryMessage = {
2099
- role: 'user',
2100
- content: `[CONTEXT RECOVERY — Re-injected after compaction]\n\n${recoveryParts.join('\n\n')}`,
2101
- };
2102
- messages.push(recoveryMessage);
2103
- this.state.messages.push(recoveryMessage);
2104
- }
2105
- // Emit compaction event for observability
2106
- const compactionTokensAfter = this.estimateContextTokens(messages);
2107
- const compactionRecoveryInjected = recoveryParts.length > 0;
2108
- const compactionEvent = {
2109
- type: 'context.compacted',
2110
- tokensBefore: currentContextTokens,
2111
- tokensAfter: compactionTokensAfter,
2112
- recoveryInjected: compactionRecoveryInjected,
2113
- };
2114
- this.emit(compactionEvent);
2115
- // Record to trace collector for JSONL output
2116
- if (this.traceCollector) {
2117
- this.traceCollector.record({
2118
- type: 'context.compacted',
2119
- data: {
2120
- tokensBefore: currentContextTokens,
2121
- tokensAfter: compactionTokensAfter,
2122
- recoveryInjected: compactionRecoveryInjected,
2123
- },
2124
- });
2125
- }
2126
- }
2127
- }
2128
- else if (compactionResult.status === 'hard_limit') {
2129
- // Hard limit reached - this is serious, emit error
2130
- this.emit({
2131
- type: 'error',
2132
- error: `Context hard limit reached (${Math.round(compactionResult.ratio * 100)}% of max tokens)`,
2133
- });
2134
- break;
2135
- }
2136
- }
2137
- else if (this.economics) {
2138
- // Fallback to simple compaction
2139
- const currentUsage = this.economics.getUsage();
2140
- const budget = this.economics.getBudget();
2141
- const percentUsed = (currentUsage.tokens / budget.maxTokens) * 100;
2142
- // If we're at 70%+ of budget, proactively compact to make room
2143
- if (percentUsed >= 70) {
2144
- this.observability?.logger?.info('Proactive compaction triggered', {
2145
- percentUsed: Math.round(percentUsed),
2146
- currentTokens: currentUsage.tokens,
2147
- maxTokens: budget.maxTokens,
2148
- });
2149
- // Also checkpoint before fallback compaction
2150
- try {
2151
- this.autoCheckpoint(true);
2152
- }
2153
- catch {
2154
- // Non-critical
2155
- }
2156
- this.compactToolOutputs();
2157
- }
2158
- }
2159
- const toolCallNameById = new Map(toolCalls.map(tc => [tc.id, tc.name]));
2160
- for (const result of toolResults) {
2161
- let content = typeof result.result === 'string' ? result.result : stableStringify(result.result);
2162
- const sourceToolName = toolCallNameById.get(result.callId);
2163
- const isExpensiveResult = sourceToolName === 'spawn_agent' || sourceToolName === 'spawn_agents_parallel';
2164
- // Truncate long outputs to save context
2165
- // Use larger limit for subagent results to preserve critical context
2166
- const effectiveMaxChars = isExpensiveResult ? MAX_TOOL_OUTPUT_CHARS * 2 : MAX_TOOL_OUTPUT_CHARS;
2167
- if (content.length > effectiveMaxChars) {
2168
- content = content.slice(0, effectiveMaxChars) + `\n\n... [truncated ${content.length - effectiveMaxChars} chars]`;
2169
- }
2170
- // =======================================================================
2171
- // ESTIMATE if adding this result would exceed budget
2172
- // =======================================================================
2173
- if (this.economics) {
2174
- const estimatedNewTokens = Math.ceil(content.length / 4); // ~4 chars per token
2175
- const currentContextTokens = this.estimateContextTokens(messages);
2176
- const budget = this.economics.getBudget();
2177
- // Check if adding this would push us over the hard limit
2178
- if (currentContextTokens + estimatedNewTokens > budget.maxTokens * 0.95) {
2179
- this.observability?.logger?.warn('Skipping tool result to stay within budget', {
2180
- toolCallId: result.callId,
2181
- estimatedTokens: estimatedNewTokens,
2182
- currentContext: currentContextTokens,
2183
- limit: budget.maxTokens,
2184
- });
2185
- // Add a truncated placeholder instead
2186
- const toolMessage = {
2187
- role: 'tool',
2188
- content: `[Result omitted to stay within token budget. Original size: ${content.length} chars]`,
2189
- toolCallId: result.callId,
2190
- };
2191
- messages.push(toolMessage);
2192
- this.state.messages.push(toolMessage);
2193
- continue;
2194
- }
2195
- }
2196
- const toolMessage = {
2197
- role: 'tool',
2198
- content,
2199
- toolCallId: result.callId,
2200
- ...(isExpensiveResult
2201
- ? {
2202
- metadata: {
2203
- preserveFromCompaction: true,
2204
- costToRegenerate: 'high',
2205
- source: sourceToolName,
2206
- },
2207
- }
2208
- : {}),
2209
- };
2210
- messages.push(toolMessage);
2211
- this.state.messages.push(toolMessage);
2212
- }
2213
- // Emit context health after adding tool results
2214
- const currentTokenEstimate = this.estimateContextTokens(messages);
2215
- const contextLimit = this.getMaxContextTokens();
2216
- const percentUsed = Math.round((currentTokenEstimate / contextLimit) * 100);
2217
- const avgTokensPerExchange = currentTokenEstimate / Math.max(1, this.state.iteration);
2218
- const remainingTokens = contextLimit - currentTokenEstimate;
2219
- const estimatedExchanges = Math.floor(remainingTokens / Math.max(1, avgTokensPerExchange));
2220
- this.emit({
2221
- type: 'context.health',
2222
- currentTokens: currentTokenEstimate,
2223
- maxTokens: contextLimit,
2224
- estimatedExchanges,
2225
- percentUsed,
2226
- });
2227
- // Record iteration end for tracing (after tool execution)
2228
- this.traceCollector?.record({
2229
- type: 'iteration.end',
2230
- data: { iterationNumber: this.state.iteration },
2231
- });
2232
- }
2233
- // =======================================================================
2234
- // REFLECTION (Lesson 16)
2235
- // =======================================================================
2236
- if (autoReflect && this.planning && reflectionAttempt < maxReflectionAttempts) {
2237
- this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: false });
2238
- const reflectionResult = await this.planning.reflect(task, lastResponse, this.provider);
2239
- this.state.metrics.reflectionAttempts = reflectionAttempt;
2240
- if (reflectionResult.satisfied && reflectionResult.confidence >= confidenceThreshold) {
2241
- // Output is satisfactory
2242
- this.emit({ type: 'reflection', attempt: reflectionAttempt, satisfied: true });
2243
- break;
2244
- }
2245
- // Not satisfied - add feedback and continue
2246
- const feedbackMessage = {
2247
- role: 'user',
2248
- content: `[Reflection feedback]\nThe previous output needs improvement:\n- Critique: ${reflectionResult.critique}\n- Suggestions: ${reflectionResult.suggestions.join(', ')}\n\nPlease improve the output.`,
2249
- };
2250
- messages.push(feedbackMessage);
2251
- this.state.messages.push(feedbackMessage);
2252
- this.observability?.logger?.info('Reflection not satisfied, retrying', {
2253
- attempt: reflectionAttempt,
2254
- confidence: reflectionResult.confidence,
2255
- critique: reflectionResult.critique,
2256
- });
2257
- }
2258
- else {
2259
- // No reflection or already satisfied
2260
- break;
2261
- }
2262
- }
2263
- // Store conversation in memory
2264
- this.memory?.storeConversation(this.state.messages);
2265
- this.updateMemoryStats();
1539
+ const messages = await this.buildMessages(task);
1540
+ const ctx = this.buildContext();
1541
+ const mutators = this.buildMutators();
1542
+ return coreExecuteDirectly(task, messages, ctx, mutators);
2266
1543
  }
2267
1544
  /**
2268
1545
  * Build messages for LLM call.
@@ -2270,7 +1547,7 @@ export class ProductionAgent {
2270
1547
  * Uses cache-aware system prompt building (Trick P) when contextEngineering
2271
1548
  * is available, ensuring static content is ordered for optimal KV-cache reuse.
2272
1549
  */
2273
- buildMessages(task) {
1550
+ async buildMessages(task) {
2274
1551
  const messages = [];
2275
1552
  // Gather all context components
2276
1553
  const rulesContent = this.rules?.getRulesContent() ?? '';
@@ -2289,12 +1566,18 @@ export class ProductionAgent {
2289
1566
  const reservedTokens = 10500;
2290
1567
  const maxContextTokens = (this.config.maxContextTokens ?? 80000) - reservedTokens;
2291
1568
  const codebaseBudget = Math.min(maxContextTokens * 0.3, 15000); // Up to 30% or 15K tokens
2292
- const repoMap = this.codebaseContext.getRepoMap();
2293
- // Lazy: trigger analysis on first system prompt build, ready by next turn
2294
- if (!repoMap && !this.codebaseAnalysisTriggered) {
1569
+ // Synchronous analysis on first system prompt build so context is available immediately
1570
+ if (!this.codebaseContext.getRepoMap() && !this.codebaseAnalysisTriggered) {
2295
1571
  this.codebaseAnalysisTriggered = true;
2296
- this.codebaseContext.analyze().catch(() => { });
1572
+ try {
1573
+ await this.codebaseContext.analyze();
1574
+ }
1575
+ catch {
1576
+ // non-fatal — agent can still work without codebase context
1577
+ }
2297
1578
  }
1579
+ // Get repo map AFTER analysis so we have fresh data on first prompt
1580
+ const repoMap = this.codebaseContext.getRepoMap();
2298
1581
  if (repoMap) {
2299
1582
  try {
2300
1583
  const selection = this.selectRelevantCodeSync(task, codebaseBudget);
@@ -2396,7 +1679,7 @@ export class ProductionAgent {
2396
1679
  }
2397
1680
  // Safety check: ensure system prompt is not empty
2398
1681
  if (!systemPrompt || systemPrompt.trim().length === 0) {
2399
- console.warn('[buildMessages] Warning: Empty system prompt detected, using fallback');
1682
+ log.warn('Empty system prompt detected, using fallback');
2400
1683
  systemPrompt = this.config.systemPrompt || 'You are a helpful AI assistant.';
2401
1684
  }
2402
1685
  messages.push({ role: 'system', content: systemPrompt });
@@ -2409,625 +1692,79 @@ export class ProductionAgent {
2409
1692
  }
2410
1693
  // Add current task
2411
1694
  messages.push({ role: 'user', content: task });
2412
- return messages;
2413
- }
2414
- /**
2415
- * Call the LLM with routing and observability.
2416
- */
2417
- async callLLM(messages) {
2418
- const spanId = this.observability?.tracer?.startSpan('llm.call');
2419
- this.emit({ type: 'llm.start', model: this.config.model || 'default' });
2420
- // Prompt caching (Improvement P1): Replace the system message with structured content
2421
- // that includes cache_control markers, enabling 60-70% cache hit rates.
2422
- // Only use structured cache_control markers for Anthropic models — other providers
2423
- // (DeepSeek, Grok, etc.) use automatic prefix-based caching and don't understand these markers.
2424
- const configModel = this.config.model || 'default';
2425
- const isAnthropicModel = configModel.startsWith('anthropic/') || configModel.startsWith('claude-');
2426
- let providerMessages = messages;
2427
- if (isAnthropicModel && this.cacheableSystemBlocks && this.cacheableSystemBlocks.length > 0) {
2428
- providerMessages = messages.map((m, i) => {
2429
- if (i === 0 && m.role === 'system') {
2430
- // Replace system message with structured cacheable content
2431
- return {
2432
- role: 'system',
2433
- content: this.cacheableSystemBlocks,
2434
- };
2435
- }
2436
- return m;
2437
- });
2438
- }
2439
- // Emit context insight for verbose feedback
2440
- const estimatedTokens = messages.reduce((sum, m) => {
2441
- const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
2442
- return sum + Math.ceil(content.length / 3.5); // ~3.5 chars per token estimate
2443
- }, 0);
2444
- // Use context window size, not output token limit
2445
- const contextLimit = this.getMaxContextTokens();
2446
- this.emit({
2447
- type: 'insight.context',
2448
- currentTokens: estimatedTokens,
2449
- maxTokens: contextLimit,
2450
- messageCount: messages.length,
2451
- percentUsed: Math.round((estimatedTokens / contextLimit) * 100),
2452
- });
2453
- const startTime = Date.now();
2454
- const requestId = `req-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2455
- // Debug: Log message count and structure (helps diagnose API errors)
2456
- if (process.env.DEBUG_LLM) {
2457
- console.log(`[callLLM] Sending ${messages.length} messages:`);
2458
- messages.forEach((m, i) => {
2459
- console.log(` [${i}] ${m.role}: ${m.content?.slice(0, 50)}...`);
2460
- });
2461
- }
2462
- // Validate messages are not empty
2463
- if (!messages || messages.length === 0) {
2464
- throw new Error('No messages to send to LLM');
2465
- }
2466
- // Lesson 26: Record LLM request for tracing
2467
- const model = this.config.model || 'default';
2468
- const provider = this.config.provider?.name || 'unknown';
2469
- this.traceCollector?.record({
2470
- type: 'llm.request',
2471
- data: {
2472
- requestId,
2473
- model,
2474
- provider,
2475
- messages: messages.map(m => ({
2476
- role: m.role,
2477
- content: m.content,
2478
- toolCallId: m.toolCallId,
2479
- toolCalls: m.toolCalls?.map(tc => ({
2480
- id: tc.id,
2481
- name: tc.name,
2482
- arguments: tc.arguments,
2483
- })),
2484
- })),
2485
- tools: Array.from(this.tools.values()).map(t => ({
2486
- name: t.name,
2487
- description: t.description,
2488
- parametersSchema: t.parameters,
2489
- })),
2490
- parameters: {
2491
- maxTokens: this.config.maxTokens,
2492
- temperature: this.config.temperature,
2493
- },
2494
- },
2495
- });
2496
- // Pause duration budget during LLM call - network time shouldn't count against agent
2497
- this.economics?.pauseDuration();
2498
- try {
2499
- let response;
2500
- let actualModel = model;
2501
- // Use routing if enabled
2502
- if (this.routing) {
2503
- const complexity = this.routing.estimateComplexity(messages[messages.length - 1]?.content || '');
2504
- const context = {
2505
- task: messages[messages.length - 1]?.content || '',
2506
- complexity,
2507
- hasTools: this.tools.size > 0,
2508
- hasImages: false,
2509
- taskType: 'general',
2510
- estimatedTokens: messages.reduce((sum, m) => sum + m.content.length / 4, 0),
2511
- };
2512
- const result = await this.routing.executeWithFallback(providerMessages, context);
2513
- response = result.response;
2514
- actualModel = result.model;
2515
- // Emit routing insight
2516
- this.emit({
2517
- type: 'insight.routing',
2518
- model: actualModel,
2519
- reason: actualModel !== model ? 'Routed based on complexity' : 'Default model',
2520
- complexity: complexity <= 0.3 ? 'low' : complexity <= 0.7 ? 'medium' : 'high',
2521
- });
2522
- // Emit decision transparency event
2523
- this.emit({
2524
- type: 'decision.routing',
2525
- model: actualModel,
2526
- reason: actualModel !== model
2527
- ? `Complexity ${(complexity * 100).toFixed(0)}% - using ${actualModel}`
2528
- : 'Default model for current task',
2529
- alternatives: actualModel !== model
2530
- ? [{ model, rejected: 'complexity threshold exceeded' }]
2531
- : undefined,
2532
- });
2533
- // Enhanced tracing: Record routing decision
2534
- this.traceCollector?.record({
2535
- type: 'decision',
2536
- data: {
2537
- type: 'routing',
2538
- decision: `Selected model: ${actualModel}`,
2539
- outcome: 'allowed',
2540
- reasoning: actualModel !== model
2541
- ? `Task complexity ${(complexity * 100).toFixed(0)}% exceeded threshold - routed to ${actualModel}`
2542
- : `Default model ${model} suitable for task complexity ${(complexity * 100).toFixed(0)}%`,
2543
- factors: [
2544
- { name: 'complexity', value: complexity, weight: 0.8 },
2545
- { name: 'hasTools', value: context.hasTools, weight: 0.1 },
2546
- { name: 'taskType', value: context.taskType, weight: 0.1 },
2547
- ],
2548
- alternatives: actualModel !== model
2549
- ? [{ option: model, reason: 'complexity threshold exceeded', rejected: true }]
2550
- : undefined,
2551
- confidence: 0.9,
2552
- },
2553
- });
2554
- }
2555
- else {
2556
- response = await this.provider.chat(providerMessages, {
2557
- model: this.config.model,
2558
- tools: Array.from(this.tools.values()),
2559
- });
2560
- }
2561
- const duration = Date.now() - startTime;
2562
- // Debug cache stats when DEBUG_CACHE is set
2563
- if (process.env.DEBUG_CACHE) {
2564
- const cr = response.usage?.cacheReadTokens ?? 0;
2565
- const cw = response.usage?.cacheWriteTokens ?? 0;
2566
- const inp = response.usage?.inputTokens ?? 0;
2567
- const hitRate = inp > 0 ? ((cr / inp) * 100).toFixed(1) : '0.0';
2568
- console.log(`[Cache] model=${actualModel} read=${cr} write=${cw} input=${inp} hit=${hitRate}%`);
2569
- }
2570
- // Lesson 26: Record LLM response for tracing
2571
- this.traceCollector?.record({
2572
- type: 'llm.response',
2573
- data: {
2574
- requestId,
2575
- content: response.content || '',
2576
- toolCalls: response.toolCalls?.map(tc => ({
2577
- id: tc.id,
2578
- name: tc.name,
2579
- arguments: tc.arguments,
2580
- })),
2581
- stopReason: response.stopReason === 'end_turn' ? 'end_turn'
2582
- : response.stopReason === 'tool_use' ? 'tool_use'
2583
- : response.stopReason === 'max_tokens' ? 'max_tokens'
2584
- : 'stop_sequence',
2585
- usage: {
2586
- inputTokens: response.usage?.inputTokens || 0,
2587
- outputTokens: response.usage?.outputTokens || 0,
2588
- cacheReadTokens: response.usage?.cacheReadTokens,
2589
- cacheWriteTokens: response.usage?.cacheWriteTokens,
2590
- cost: response.usage?.cost, // Actual cost from provider (e.g., OpenRouter)
2591
- },
2592
- durationMs: duration,
2593
- },
2594
- });
2595
- // Enhanced tracing: Record thinking/reasoning blocks if present
2596
- if (response.thinking) {
2597
- this.traceCollector?.record({
2598
- type: 'llm.thinking',
2599
- data: {
2600
- requestId,
2601
- content: response.thinking,
2602
- summarized: response.thinking.length > 10000, // Summarize if very long
2603
- originalLength: response.thinking.length,
2604
- durationMs: duration,
2605
- },
2606
- });
2607
- }
2608
- // Record metrics
2609
- this.observability?.metrics?.recordLLMCall(response.usage?.inputTokens || 0, response.usage?.outputTokens || 0, duration, actualModel, response.usage?.cost // Actual cost from provider (e.g., OpenRouter)
2610
- );
2611
- this.state.metrics.llmCalls++;
2612
- this.state.metrics.inputTokens += response.usage?.inputTokens || 0;
2613
- this.state.metrics.outputTokens += response.usage?.outputTokens || 0;
2614
- this.state.metrics.totalTokens = this.state.metrics.inputTokens + this.state.metrics.outputTokens;
2615
- this.emit({ type: 'llm.complete', response });
2616
- // Emit token usage insight for verbose feedback
2617
- if (response.usage) {
2618
- this.emit({
2619
- type: 'insight.tokens',
2620
- inputTokens: response.usage.inputTokens,
2621
- outputTokens: response.usage.outputTokens,
2622
- cacheReadTokens: response.usage.cacheReadTokens,
2623
- cacheWriteTokens: response.usage.cacheWriteTokens,
2624
- cost: response.usage.cost,
2625
- model: actualModel,
2626
- });
2627
- }
2628
- this.observability?.tracer?.endSpan(spanId);
2629
- return response;
2630
- }
2631
- catch (err) {
2632
- const error = err instanceof Error ? err : new Error(String(err));
2633
- this.observability?.tracer?.recordError(error);
2634
- this.observability?.tracer?.endSpan(spanId);
2635
- throw error;
2636
- }
2637
- finally {
2638
- // Resume duration budget after LLM call completes (success or failure)
2639
- this.economics?.resumeDuration();
2640
- }
2641
- }
2642
- /**
2643
- * Execute an async callback while excluding wall-clock wait time from duration budgeting.
2644
- * Used for external waits such as approval dialogs and delegation confirmation.
2645
- */
2646
- async withPausedDuration(fn) {
2647
- this.economics?.pauseDuration();
2648
- try {
2649
- return await fn();
2650
- }
2651
- finally {
2652
- this.economics?.resumeDuration();
1695
+ // Track system prompt length for context % estimation
1696
+ const sysMsg = messages.find(m => m.role === 'system');
1697
+ if (sysMsg) {
1698
+ const content = typeof sysMsg.content === 'string' ? sysMsg.content : JSON.stringify(sysMsg.content);
1699
+ this.lastSystemPromptLength = content.length;
2653
1700
  }
1701
+ return messages;
2654
1702
  }
2655
- /**
2656
- * Execute tool calls with safety checks and execution policy enforcement.
2657
- * Parallelizable read-only tools are batched and executed concurrently.
2658
- */
2659
- async executeToolCalls(toolCalls) {
2660
- const results = [];
2661
- // Group consecutive parallelizable tool calls into batches
2662
- const batches = groupToolCallsIntoBatches(toolCalls);
2663
- // Execute batches: parallel batches use Promise.allSettled, sequential execute one-by-one
2664
- for (const batch of batches) {
2665
- if (batch.length > 1 && PARALLELIZABLE_TOOLS.has(batch[0].name)) {
2666
- // Execute parallelizable batch concurrently
2667
- const batchResults = await Promise.allSettled(batch.map(tc => this.executeSingleToolCall(tc)));
2668
- for (const result of batchResults) {
2669
- if (result.status === 'fulfilled') {
2670
- results.push(result.value);
2671
- }
2672
- else {
2673
- // Should not happen since executeSingleToolCall catches errors internally
2674
- const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
2675
- results.push({ callId: 'unknown', result: `Error: ${error}`, error });
2676
- }
2677
- }
2678
- }
2679
- else {
2680
- // Execute sequentially
2681
- for (const tc of batch) {
2682
- results.push(await this.executeSingleToolCall(tc));
2683
- }
2684
- }
2685
- }
2686
- return results;
1703
+ // ===========================================================================
1704
+ // CONTEXT BUILDERS Bridge private fields to extracted core modules
1705
+ // ===========================================================================
1706
+ buildContext() {
1707
+ return {
1708
+ config: this.config, agentId: this.agentId, provider: this.provider,
1709
+ tools: this.tools, state: this.state,
1710
+ modeManager: this.modeManager, pendingPlanManager: this.pendingPlanManager,
1711
+ hooks: this.hooks, economics: this.economics, cancellation: this.cancellation,
1712
+ resourceManager: this.resourceManager, safety: this.safety,
1713
+ observability: this.observability, contextEngineering: this.contextEngineering,
1714
+ traceCollector: this.traceCollector, executionPolicy: this.executionPolicy,
1715
+ routing: this.routing, planning: this.planning, memory: this.memory,
1716
+ react: this.react, blackboard: this.blackboard, fileCache: this.fileCache,
1717
+ budgetPool: this.budgetPool, taskManager: this.taskManager, store: this.store,
1718
+ codebaseContext: this.codebaseContext, learningStore: this.learningStore,
1719
+ compactor: this.compactor, autoCompactionManager: this.autoCompactionManager,
1720
+ workLog: this.workLog, verificationGate: this.verificationGate,
1721
+ agentRegistry: this.agentRegistry, toolRecommendation: this.toolRecommendation,
1722
+ selfImprovement: this.selfImprovement, subagentOutputStore: this.subagentOutputStore,
1723
+ autoCheckpointManager: this.autoCheckpointManager, injectionBudget: this.injectionBudget,
1724
+ skillManager: this.skillManager, semanticCache: this.semanticCache,
1725
+ lspManager: this.lspManager, threadManager: this.threadManager,
1726
+ interactivePlanner: this.interactivePlanner, recursiveContext: this.recursiveContext,
1727
+ fileChangeTracker: this.fileChangeTracker, capabilitiesRegistry: this.capabilitiesRegistry,
1728
+ rules: this.rules, stateMachine: this.stateMachine,
1729
+ lastComplexityAssessment: this.lastComplexityAssessment,
1730
+ cacheableSystemBlocks: this.cacheableSystemBlocks,
1731
+ parentIterations: this.parentIterations,
1732
+ externalCancellationToken: this.externalCancellationToken,
1733
+ wrapupRequested: this.wrapupRequested, wrapupReason: this.wrapupReason,
1734
+ compactionPending: this.compactionPending,
1735
+ sharedContextState: this._sharedContextState,
1736
+ sharedEconomicsState: this._sharedEconomicsState,
1737
+ spawnedTasks: this.spawnedTasks, toolResolver: this.toolResolver,
1738
+ emit: (event) => this.emit(event),
1739
+ addTool: (tool) => this.addTool(tool),
1740
+ getMaxContextTokens: () => this.getMaxContextTokens(),
1741
+ getTotalIterations: () => this.getTotalIterations(),
1742
+ };
2687
1743
  }
2688
- /**
2689
- * Execute a single tool call with all safety checks, tracing, and error handling.
2690
- */
2691
- async executeSingleToolCall(toolCall) {
2692
- const spanId = this.observability?.tracer?.startSpan(`tool.${toolCall.name}`);
2693
- const executionId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
2694
- this.emit({ type: 'tool.start', tool: toolCall.name, args: toolCall.arguments });
2695
- const startTime = Date.now();
2696
- // Short-circuit if tool call arguments failed to parse
2697
- if (toolCall.parseError) {
2698
- const errorMsg = `Tool arguments could not be parsed: ${toolCall.parseError}. Please retry with complete, valid JSON.`;
2699
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: errorMsg });
2700
- this.traceCollector?.record({
2701
- type: 'tool.end',
2702
- data: { executionId, status: 'error', error: new Error(errorMsg), durationMs: Date.now() - startTime },
2703
- });
2704
- this.observability?.tracer?.endSpan(spanId);
2705
- return { callId: toolCall.id, result: `Error: ${errorMsg}`, error: errorMsg };
2706
- }
2707
- // Lesson 26: Record tool start for tracing
2708
- this.traceCollector?.record({
2709
- type: 'tool.start',
2710
- data: {
2711
- executionId,
2712
- toolName: toolCall.name,
2713
- arguments: toolCall.arguments,
2714
- },
2715
- });
2716
- try {
2717
- // =====================================================================
2718
- // PLAN MODE WRITE INTERCEPTION
2719
- // =====================================================================
2720
- // In plan mode, intercept write operations and queue them as proposed changes
2721
- if (this.modeManager.shouldInterceptTool(toolCall.name, toolCall.arguments)) {
2722
- // Extract contextual reasoning instead of simple truncation
2723
- const reason = this.extractChangeReasoning(toolCall, this.state.messages);
2724
- // Start a new plan if needed
2725
- if (!this.pendingPlanManager.hasPendingPlan()) {
2726
- const lastUserMsg = [...this.state.messages].reverse().find(m => m.role === 'user');
2727
- const task = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : 'Plan';
2728
- this.pendingPlanManager.startPlan(task);
2729
- }
2730
- // Queue the write operation
2731
- const change = this.pendingPlanManager.addProposedChange(toolCall.name, toolCall.arguments, reason, toolCall.id);
2732
- // Emit event for UI
2733
- this.emit({
2734
- type: 'plan.change.queued',
2735
- tool: toolCall.name,
2736
- changeId: change?.id,
2737
- summary: this.formatToolArgsForPlan(toolCall.name, toolCall.arguments),
2738
- });
2739
- // Return a message indicating the change was queued
2740
- const queueMessage = `[PLAN MODE] Change queued for approval:\n` +
2741
- `Tool: ${toolCall.name}\n` +
2742
- `${this.formatToolArgsForPlan(toolCall.name, toolCall.arguments)}\n` +
2743
- `Use /show-plan to see all pending changes, /approve to execute, /reject to discard.`;
2744
- this.observability?.tracer?.endSpan(spanId);
2745
- return { callId: toolCall.id, result: queueMessage };
2746
- }
2747
- // =====================================================================
2748
- // EXECUTION POLICY ENFORCEMENT (Lesson 23)
2749
- // =====================================================================
2750
- let policyApprovedByUser = false;
2751
- if (this.executionPolicy) {
2752
- const policyContext = {
2753
- messages: this.state.messages,
2754
- currentMessage: this.state.messages.find(m => m.role === 'user')?.content,
2755
- previousToolCalls: [],
2756
- };
2757
- const evaluation = this.executionPolicy.evaluate(toolCall, policyContext);
2758
- // Emit policy event
2759
- this.emit({
2760
- type: 'policy.evaluated',
2761
- tool: toolCall.name,
2762
- policy: evaluation.policy,
2763
- reason: evaluation.reason,
2764
- });
2765
- // Emit decision transparency event
2766
- this.emit({
2767
- type: 'decision.tool',
2768
- tool: toolCall.name,
2769
- decision: evaluation.policy === 'forbidden' ? 'blocked'
2770
- : evaluation.policy === 'prompt' ? 'prompted'
2771
- : 'allowed',
2772
- policyMatch: evaluation.reason,
2773
- });
2774
- // Enhanced tracing: Record policy decision
2775
- this.traceCollector?.record({
2776
- type: 'decision',
2777
- data: {
2778
- type: 'policy',
2779
- decision: `Tool ${toolCall.name}: ${evaluation.policy}`,
2780
- outcome: evaluation.policy === 'forbidden' ? 'blocked'
2781
- : evaluation.policy === 'prompt' ? 'deferred'
2782
- : 'allowed',
2783
- reasoning: evaluation.reason,
2784
- factors: [
2785
- { name: 'policy', value: evaluation.policy },
2786
- { name: 'requiresApproval', value: evaluation.requiresApproval ?? false },
2787
- ],
2788
- confidence: evaluation.intent?.confidence ?? 0.8,
2789
- },
2790
- });
2791
- // Handle forbidden policy - always block
2792
- if (evaluation.policy === 'forbidden') {
2793
- this.emit({
2794
- type: 'policy.tool.blocked',
2795
- tool: toolCall.name,
2796
- phase: 'enforced',
2797
- reason: `Forbidden by execution policy: ${evaluation.reason}`,
2798
- });
2799
- throw new Error(`Forbidden by policy: ${evaluation.reason}`);
2800
- }
2801
- // Handle prompt policy - requires approval
2802
- if (evaluation.policy === 'prompt' && evaluation.requiresApproval) {
2803
- // Try to get approval through safety manager's human-in-loop
2804
- const humanInLoop = this.safety?.humanInLoop;
2805
- if (humanInLoop) {
2806
- const approval = await this.withPausedDuration(() => humanInLoop.requestApproval(toolCall, `Policy requires approval: ${evaluation.reason}`));
2807
- if (!approval.approved) {
2808
- throw new Error(`Denied by user: ${approval.reason || 'No reason provided'}`);
2809
- }
2810
- policyApprovedByUser = true;
2811
- // Create a grant for future similar calls if approved
2812
- this.executionPolicy.createGrant({
2813
- toolName: toolCall.name,
2814
- grantedBy: 'user',
2815
- reason: 'Approved during execution',
2816
- maxUsages: 5, // Allow 5 more similar calls
2817
- });
2818
- }
2819
- else {
2820
- // No approval handler - block by default for safety
2821
- throw new Error(`Policy requires approval but no approval handler available: ${evaluation.reason}`);
2822
- }
2823
- }
2824
- // Log intent classification if available
2825
- if (evaluation.intent) {
2826
- this.emit({
2827
- type: 'intent.classified',
2828
- tool: toolCall.name,
2829
- intent: evaluation.intent.type,
2830
- confidence: evaluation.intent.confidence,
2831
- });
2832
- }
2833
- }
2834
- // =====================================================================
2835
- // SAFETY VALIDATION (Lesson 20-21)
2836
- // =====================================================================
2837
- if (this.safety) {
2838
- const safety = this.safety;
2839
- const validation = await this.withPausedDuration(() => safety.validateAndApprove(toolCall, `Executing tool: ${toolCall.name}`, { skipHumanApproval: policyApprovedByUser }));
2840
- if (!validation.allowed) {
2841
- this.emit({
2842
- type: 'policy.tool.blocked',
2843
- tool: toolCall.name,
2844
- phase: 'enforced',
2845
- reason: validation.reason || 'Blocked by safety manager',
2846
- });
2847
- if (toolCall.name === 'bash') {
2848
- const args = toolCall.arguments;
2849
- this.emit({
2850
- type: 'policy.bash.blocked',
2851
- phase: 'enforced',
2852
- command: String(args.command || args.cmd || ''),
2853
- reason: validation.reason || 'Blocked by safety manager',
2854
- });
2855
- }
2856
- throw new Error(`Tool call blocked: ${validation.reason}`);
2857
- }
2858
- }
2859
- // Get tool definition (with lazy-loading support for MCP tools)
2860
- let tool = this.tools.get(toolCall.name);
2861
- const wasPreloaded = !!tool;
2862
- if (!tool && this.toolResolver) {
2863
- // Try to resolve and load the tool on-demand
2864
- const resolved = this.toolResolver(toolCall.name);
2865
- if (resolved) {
2866
- this.addTool(resolved);
2867
- tool = resolved;
2868
- if (process.env.DEBUG)
2869
- console.log(` 🔄 Auto-loaded MCP tool: ${toolCall.name}`);
2870
- this.observability?.logger?.info('Tool auto-loaded', { tool: toolCall.name });
2871
- }
2872
- }
2873
- if (!tool) {
2874
- throw new Error(`Unknown tool: ${toolCall.name}`);
2875
- }
2876
- // Log whether tool was pre-loaded or auto-loaded (for MCP tools)
2877
- if (process.env.DEBUG && toolCall.name.startsWith('mcp_') && wasPreloaded) {
2878
- console.log(` ✓ Using pre-loaded MCP tool: ${toolCall.name}`);
2879
- }
2880
- // =====================================================================
2881
- // BLACKBOARD FILE COORDINATION (Parallel Subagent Support)
2882
- // =====================================================================
2883
- // Claim file resources before write operations to prevent conflicts
2884
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2885
- const args = toolCall.arguments;
2886
- const filePath = String(args.path || args.file_path || '');
2887
- if (filePath) {
2888
- const agentId = this.agentId;
2889
- const claimed = this.blackboard.claim(filePath, agentId, 'write', {
2890
- ttl: 60000, // 1 minute claim
2891
- intent: `${toolCall.name}: ${filePath}`,
2892
- });
2893
- if (!claimed) {
2894
- const existingClaim = this.blackboard.getClaim(filePath);
2895
- throw new Error(`File "${filePath}" is being edited by another agent (${existingClaim?.agentId || 'unknown'}). ` +
2896
- `Wait for the other agent to complete or choose a different file.`);
2897
- }
2898
- }
2899
- }
2900
- // FILE CACHE: Check cache for read_file operations before executing
2901
- if (this.fileCache && toolCall.name === 'read_file') {
2902
- const args = toolCall.arguments;
2903
- const readPath = String(args.path || '');
2904
- if (readPath) {
2905
- const cached = this.fileCache.get(readPath);
2906
- if (cached !== undefined) {
2907
- const lines = cached.split('\n').length;
2908
- const cacheResult = { success: true, output: cached, metadata: { lines, bytes: cached.length, cached: true } };
2909
- const duration = Date.now() - startTime;
2910
- this.traceCollector?.record({ type: 'tool.end', data: { executionId, status: 'success', result: cacheResult, durationMs: duration } });
2911
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2912
- this.state.metrics.toolCalls++;
2913
- this.emit({ type: 'tool.complete', tool: toolCall.name, result: cacheResult });
2914
- this.observability?.tracer?.endSpan(spanId);
2915
- return {
2916
- callId: toolCall.id,
2917
- result: typeof cacheResult === 'string' ? cacheResult : JSON.stringify(cacheResult),
2918
- };
2919
- }
2920
- }
2921
- }
2922
- // Execute tool (with sandbox if available)
2923
- let result;
2924
- if (this.safety?.sandbox) {
2925
- // CRITICAL: spawn_agent and spawn_agents_parallel need MUCH longer timeouts
2926
- // The default 60s sandbox timeout would kill subagents prematurely
2927
- // Subagents may run for minutes (per their own timeout config)
2928
- const isSpawnAgent = toolCall.name === 'spawn_agent';
2929
- const isSpawnParallel = toolCall.name === 'spawn_agents_parallel';
2930
- const isSubagentTool = isSpawnAgent || isSpawnParallel;
2931
- const subagentConfig = this.config.subagent;
2932
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
2933
- const subagentTimeout = hasSubagentConfig
2934
- ? subagentConfig.defaultTimeout ?? 600000 // 10 min default
2935
- : 600000;
2936
- // Use subagent timeout + buffer for spawn tools, default for others
2937
- // For spawn_agents_parallel, multiply by number of agents (they run in parallel,
2938
- // but the total wall-clock time should still allow the slowest agent to complete)
2939
- const toolTimeout = isSubagentTool ? subagentTimeout + 30000 : undefined;
2940
- result = await this.safety.sandbox.executeWithLimits(() => tool.execute(toolCall.arguments), toolTimeout);
2941
- }
2942
- else {
2943
- result = await tool.execute(toolCall.arguments);
2944
- }
2945
- const duration = Date.now() - startTime;
2946
- // Lesson 26: Record tool completion for tracing
2947
- this.traceCollector?.record({
2948
- type: 'tool.end',
2949
- data: {
2950
- executionId,
2951
- status: 'success',
2952
- result,
2953
- durationMs: duration,
2954
- },
2955
- });
2956
- // Record metrics
2957
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, true);
2958
- this.state.metrics.toolCalls++;
2959
- this.emit({ type: 'tool.complete', tool: toolCall.name, result });
2960
- // FILE CACHE: Store read results and invalidate on writes
2961
- if (this.fileCache) {
2962
- const args = toolCall.arguments;
2963
- const filePath = String(args.path || args.file_path || '');
2964
- if (toolCall.name === 'read_file' && filePath) {
2965
- // Cache successful read results
2966
- const resultObj = result;
2967
- if (resultObj?.success && typeof resultObj.output === 'string') {
2968
- this.fileCache.set(filePath, resultObj.output);
2969
- }
2970
- }
2971
- else if ((toolCall.name === 'write_file' || toolCall.name === 'edit_file' || toolCall.name === 'undo_file_change') && filePath) {
2972
- // Invalidate cache when files are modified (including undo operations)
2973
- this.fileCache.invalidate(filePath);
2974
- }
2975
- }
2976
- // Emit tool insight with result summary
2977
- const summary = this.summarizeToolResult(toolCall.name, result);
2978
- this.emit({
2979
- type: 'insight.tool',
2980
- tool: toolCall.name,
2981
- summary,
2982
- durationMs: duration,
2983
- success: true,
2984
- });
2985
- // Release blackboard claim after successful file write
2986
- if (this.blackboard && (toolCall.name === 'write_file' || toolCall.name === 'edit_file')) {
2987
- const args = toolCall.arguments;
2988
- const filePath = String(args.path || args.file_path || '');
2989
- if (filePath) {
2990
- const agentId = this.agentId;
2991
- this.blackboard.release(filePath, agentId);
2992
- }
2993
- }
2994
- // Self-improvement: record success pattern
2995
- this.selfImprovement?.recordSuccess(toolCall.name, toolCall.arguments, typeof result === 'string' ? result.slice(0, 200) : JSON.stringify(result).slice(0, 200));
2996
- this.observability?.tracer?.endSpan(spanId);
2997
- return { callId: toolCall.id, result };
1744
+ buildMutators() {
1745
+ return {
1746
+ setBudgetPool: (pool) => { this.budgetPool = pool; },
1747
+ setCacheableSystemBlocks: (blocks) => { this.cacheableSystemBlocks = blocks; },
1748
+ setCompactionPending: (pending) => { this.compactionPending = pending; },
1749
+ setWrapupRequested: (requested) => { this.wrapupRequested = requested; },
1750
+ setLastComplexityAssessment: (a) => { this.lastComplexityAssessment = a; },
1751
+ setExternalCancellationToken: (t) => { this.externalCancellationToken = t; },
1752
+ };
1753
+ }
1754
+ createSubAgentFactory() {
1755
+ return (config) => new ProductionAgent(config);
1756
+ }
1757
+ /**
1758
+ * Execute an async callback while excluding wall-clock wait time from duration budgeting.
1759
+ * Used for external waits such as approval dialogs and delegation confirmation.
1760
+ */
1761
+ async withPausedDuration(fn) {
1762
+ this.economics?.pauseDuration();
1763
+ try {
1764
+ return await fn();
2998
1765
  }
2999
- catch (err) {
3000
- const error = err instanceof Error ? err : new Error(String(err));
3001
- const duration = Date.now() - startTime;
3002
- // Lesson 26: Record tool error for tracing
3003
- this.traceCollector?.record({
3004
- type: 'tool.end',
3005
- data: {
3006
- executionId,
3007
- status: error.message.includes('Blocked') || error.message.includes('Policy') ? 'blocked' : 'error',
3008
- error,
3009
- durationMs: duration,
3010
- },
3011
- });
3012
- this.observability?.metrics?.recordToolCall(toolCall.name, duration, false);
3013
- this.observability?.tracer?.recordError(error);
3014
- this.observability?.tracer?.endSpan(spanId);
3015
- // FAILURE EVIDENCE RECORDING (Trick S)
3016
- // Track failed tool calls to prevent loops and provide context
3017
- this.contextEngineering?.recordFailure({
3018
- action: toolCall.name,
3019
- args: toolCall.arguments,
3020
- error,
3021
- intent: `Execute tool ${toolCall.name}`,
3022
- });
3023
- // Self-improvement: enhance error message with diagnosis for better LLM recovery
3024
- if (this.selfImprovement) {
3025
- const enhanced = this.selfImprovement.enhanceErrorMessage(toolCall.name, error.message, toolCall.arguments);
3026
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: enhanced });
3027
- return { callId: toolCall.id, result: `Error: ${enhanced}`, error: enhanced };
3028
- }
3029
- this.emit({ type: 'tool.blocked', tool: toolCall.name, reason: error.message });
3030
- return { callId: toolCall.id, result: `Error: ${error.message}`, error: error.message };
1766
+ finally {
1767
+ this.economics?.resumeDuration();
3031
1768
  }
3032
1769
  }
3033
1770
  /**
@@ -3166,123 +1903,6 @@ export class ProductionAgent {
3166
1903
  emit(event) {
3167
1904
  this.hooks?.emit(event);
3168
1905
  }
3169
- /**
3170
- * Create a brief summary of a tool result for insight display.
3171
- */
3172
- summarizeToolResult(toolName, result) {
3173
- if (result === null || result === undefined) {
3174
- return 'No output';
3175
- }
3176
- const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
3177
- // Tool-specific summaries
3178
- if (toolName === 'list_files' || toolName === 'glob') {
3179
- const lines = resultStr.split('\n').filter(l => l.trim());
3180
- return `Found ${lines.length} file${lines.length !== 1 ? 's' : ''}`;
3181
- }
3182
- if (toolName === 'bash' || toolName === 'execute_command') {
3183
- const lines = resultStr.split('\n').filter(l => l.trim());
3184
- if (resultStr.includes('exit code: 0') || !resultStr.includes('exit code:')) {
3185
- return lines.length > 1 ? `Success (${lines.length} lines)` : 'Success';
3186
- }
3187
- return `Failed - ${lines[0]?.slice(0, 50) || 'see output'}`;
3188
- }
3189
- if (toolName === 'read_file') {
3190
- const lines = resultStr.split('\n').length;
3191
- return `Read ${lines} line${lines !== 1 ? 's' : ''}`;
3192
- }
3193
- if (toolName === 'write_file' || toolName === 'edit_file') {
3194
- return 'File updated';
3195
- }
3196
- if (toolName === 'search' || toolName === 'grep') {
3197
- const matches = (resultStr.match(/\n/g) || []).length;
3198
- return `${matches} match${matches !== 1 ? 'es' : ''}`;
3199
- }
3200
- // Generic summary
3201
- if (resultStr.length <= 50) {
3202
- return resultStr;
3203
- }
3204
- return `${resultStr.slice(0, 47)}...`;
3205
- }
3206
- /**
3207
- * Format tool arguments for plan display.
3208
- */
3209
- formatToolArgsForPlan(toolName, args) {
3210
- if (toolName === 'write_file') {
3211
- const path = args.path || args.file_path;
3212
- const content = String(args.content || '');
3213
- const preview = content.slice(0, 100).replace(/\n/g, '\\n');
3214
- return `File: ${path}\nContent preview: ${preview}${content.length > 100 ? '...' : ''}`;
3215
- }
3216
- if (toolName === 'edit_file') {
3217
- const path = args.path || args.file_path;
3218
- return `File: ${path}\nOld: ${String(args.old_string || args.search || '').slice(0, 50)}...\nNew: ${String(args.new_string || args.replace || '').slice(0, 50)}...`;
3219
- }
3220
- if (toolName === 'bash') {
3221
- return `Command: ${String(args.command || '').slice(0, 100)}`;
3222
- }
3223
- if (toolName === 'delete_file') {
3224
- return `Delete: ${args.path || args.file_path}`;
3225
- }
3226
- if (toolName === 'spawn_agent' || toolName === 'researcher') {
3227
- const task = String(args.task || args.prompt || args.goal || '');
3228
- const model = args.model ? ` (${args.model})` : '';
3229
- const firstLine = task.split('\n')[0].slice(0, 100);
3230
- return `${firstLine}${task.length > 100 ? '...' : ''}${model}`;
3231
- }
3232
- // Generic
3233
- return `Args: ${JSON.stringify(args).slice(0, 100)}...`;
3234
- }
3235
- /**
3236
- * Extract contextual reasoning for a proposed change in plan mode.
3237
- * Looks at recent assistant messages to find relevant explanation.
3238
- * Returns a more complete reason than simple truncation.
3239
- */
3240
- extractChangeReasoning(toolCall, messages) {
3241
- // Get last few assistant messages (most recent first)
3242
- const assistantMsgs = messages
3243
- .filter(m => m.role === 'assistant' && typeof m.content === 'string')
3244
- .slice(-3)
3245
- .reverse();
3246
- if (assistantMsgs.length === 0) {
3247
- return `Proposed change: ${toolCall.name}`;
3248
- }
3249
- // Use the most recent assistant message
3250
- const lastMsg = assistantMsgs[0];
3251
- const content = lastMsg.content;
3252
- // For spawn_agent, the task itself is usually the reason
3253
- if (toolCall.name === 'spawn_agent') {
3254
- const args = toolCall.arguments;
3255
- const task = String(args.task || args.prompt || args.goal || '');
3256
- if (task.length > 0) {
3257
- // Use first paragraph or 500 chars of task as reason
3258
- const firstPara = task.split(/\n\n/)[0];
3259
- return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
3260
- }
3261
- }
3262
- // For file operations, look for context about the file
3263
- if (['write_file', 'edit_file'].includes(toolCall.name)) {
3264
- const args = toolCall.arguments;
3265
- const path = String(args.path || args.file_path || '');
3266
- // Look for mentions of this file in the assistant's explanation
3267
- if (path && content.toLowerCase().includes(path.toLowerCase().split('/').pop() || '')) {
3268
- // Extract the sentence(s) mentioning this file
3269
- const sentences = content.split(/[.!?\n]+/).filter(s => s.toLowerCase().includes(path.toLowerCase().split('/').pop() || ''));
3270
- if (sentences.length > 0) {
3271
- const relevant = sentences.slice(0, 2).join('. ').trim();
3272
- return relevant.length > 500 ? relevant.slice(0, 500) + '...' : relevant;
3273
- }
3274
- }
3275
- }
3276
- // Fallback: use first 500 chars instead of 200
3277
- // Look for the first meaningful paragraph/section
3278
- const paragraphs = content.split(/\n\n+/).filter(p => p.trim().length > 20);
3279
- if (paragraphs.length > 0) {
3280
- const firstPara = paragraphs[0].trim();
3281
- return firstPara.length > 500 ? firstPara.slice(0, 500) + '...' : firstPara;
3282
- }
3283
- // Ultimate fallback
3284
- return content.length > 500 ? content.slice(0, 500) + '...' : content;
3285
- }
3286
1906
  /**
3287
1907
  * Update memory statistics.
3288
1908
  * Memory stats are retrieved via memory manager, not stored in state.
@@ -3308,12 +1928,27 @@ export class ProductionAgent {
3308
1928
  }
3309
1929
  return this.state.metrics;
3310
1930
  }
1931
+ getResilienceConfig() {
1932
+ return this.config.resilience;
1933
+ }
3311
1934
  /**
3312
1935
  * Get current state.
3313
1936
  */
3314
1937
  getState() {
3315
1938
  return { ...this.state };
3316
1939
  }
1940
+ /**
1941
+ * Get shared state stats for TUI visibility.
1942
+ * Returns null when not in a swarm context.
1943
+ */
1944
+ getSharedStats() {
1945
+ if (!this._sharedContextState)
1946
+ return null;
1947
+ return {
1948
+ context: this._sharedContextState.getStats(),
1949
+ economics: this._sharedEconomicsState?.getStats() ?? { fingerprints: 0, globalLoops: [] },
1950
+ };
1951
+ }
3317
1952
  /**
3318
1953
  * Get the maximum context tokens for this agent's model.
3319
1954
  * Priority: user config > OpenRouter API > hardcoded ModelRegistry > 200K default
@@ -3335,6 +1970,16 @@ export class ProductionAgent {
3335
1970
  // Default
3336
1971
  return 200000;
3337
1972
  }
1973
+ /**
1974
+ * Estimate tokens used by the system prompt (codebase context, tools, rules).
1975
+ * Used by TUI to display accurate context % that includes system overhead.
1976
+ */
1977
+ getSystemPromptTokenEstimate() {
1978
+ if (this.lastSystemPromptLength > 0) {
1979
+ return Math.ceil(this.lastSystemPromptLength / 3.2);
1980
+ }
1981
+ return 0;
1982
+ }
3338
1983
  /**
3339
1984
  * Get the trace collector (Lesson 26).
3340
1985
  * Returns null if trace capture is not enabled.
@@ -3348,6 +1993,9 @@ export class ProductionAgent {
3348
1993
  */
3349
1994
  setTraceCollector(collector) {
3350
1995
  this.traceCollector = collector;
1996
+ if (this.codebaseContext) {
1997
+ this.codebaseContext.traceCollector = collector;
1998
+ }
3351
1999
  }
3352
2000
  /**
3353
2001
  * Get the learning store for cross-session learning.
@@ -3559,7 +2207,7 @@ export class ProductionAgent {
3559
2207
  const validation = this.validateCheckpoint(savedState);
3560
2208
  // Log warnings
3561
2209
  for (const warning of validation.warnings) {
3562
- console.warn(`[Checkpoint] Warning: ${warning}`);
2210
+ log.warn('Checkpoint validation warning', { warning });
3563
2211
  this.observability?.logger?.warn('Checkpoint validation warning', { warning });
3564
2212
  }
3565
2213
  // Fail on validation errors
@@ -3658,7 +2306,7 @@ export class ProductionAgent {
3658
2306
  }
3659
2307
  }
3660
2308
  if (compactedCount > 0 && process.env.DEBUG) {
3661
- console.log(` 📦 Compacted ${compactedCount} tool outputs (saved ~${Math.round(savedChars / 4)} tokens)`);
2309
+ log.debug('Compacted tool outputs', { compactedCount, savedTokens: Math.round(savedChars / 4) });
3662
2310
  }
3663
2311
  }
3664
2312
  /**
@@ -3699,23 +2347,32 @@ export class ProductionAgent {
3699
2347
  const artifactWriteTools = ['write_file', 'edit_file', 'apply_patch', 'append_file'];
3700
2348
  return !artifactWriteTools.some(toolName => executedToolNames.has(toolName));
3701
2349
  }
3702
- /**
3703
- * Detect "future-intent" responses that imply the model has not completed work.
3704
- */
3705
- detectIncompleteActionResponse(content) {
3706
- const trimmed = content.trim();
3707
- if (!trimmed) {
3708
- return false;
2350
+ getOpenTasksSummary() {
2351
+ if (!this.taskManager) {
2352
+ return undefined;
2353
+ }
2354
+ const tasks = this.taskManager.list();
2355
+ const pending = tasks.filter(t => t.status === 'pending').length;
2356
+ const inProgress = tasks.filter(t => t.status === 'in_progress').length;
2357
+ const blocked = tasks.filter(t => t.status === 'pending' && this.taskManager?.isBlocked(t.id)).length;
2358
+ return { pending, inProgress, blocked };
2359
+ }
2360
+ reconcileStaleTasks(reason) {
2361
+ if (!this.taskManager)
2362
+ return;
2363
+ const staleAfterMs = typeof this.config.resilience === 'object'
2364
+ ? (this.config.resilience.taskLeaseStaleMs ?? 5 * 60 * 1000)
2365
+ : 5 * 60 * 1000;
2366
+ const recovered = this.taskManager.reconcileStaleInProgress({
2367
+ staleAfterMs,
2368
+ reason,
2369
+ });
2370
+ if (recovered.reconciled > 0) {
2371
+ this.observability?.logger?.info('Recovered stale task leases', {
2372
+ reason,
2373
+ recovered: recovered.reconciled,
2374
+ });
3709
2375
  }
3710
- const lower = trimmed.toLowerCase();
3711
- const futureIntentPatterns = [
3712
- /^(now|next|then)\s+(i\s+will|i'll|let me)\b/,
3713
- /^i\s+(will|am going to|can)\b/,
3714
- /^(let me|i'll|i will)\s+(create|write|save|do|make|generate|start)\b/,
3715
- /^(now|next|then)\s+i(?:'ll| will)\b/,
3716
- ];
3717
- const completionSignals = /\b(done|completed|finished|here is|created|saved|wrote)\b/;
3718
- return futureIntentPatterns.some(pattern => pattern.test(lower)) && !completionSignals.test(lower);
3719
2376
  }
3720
2377
  /**
3721
2378
  * Get audit log (if human-in-loop is enabled).
@@ -4128,1003 +2785,16 @@ export class ProductionAgent {
4128
2785
  return success;
4129
2786
  }
4130
2787
  /**
4131
- * Spawn an agent to execute a task.
4132
- * Returns the result when the agent completes.
4133
- *
4134
- * @param agentName - Name of the agent to spawn (researcher, coder, etc.)
4135
- * @param task - The task description for the agent
4136
- * @param constraints - Optional constraints to keep the subagent focused
2788
+ * Spawn a subagent (delegates to core/subagent-spawner).
4137
2789
  */
4138
2790
  async spawnAgent(agentName, task, constraints) {
4139
- if (!this.agentRegistry) {
4140
- return {
4141
- success: false,
4142
- output: 'Agent registry not initialized',
4143
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
4144
- };
4145
- }
4146
- const agentDef = this.agentRegistry.getAgent(agentName);
4147
- if (!agentDef) {
4148
- return {
4149
- success: false,
4150
- output: `Agent not found: ${agentName}`,
4151
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
4152
- };
4153
- }
4154
- // DUPLICATE SPAWN PREVENTION with SEMANTIC SIMILARITY
4155
- // Skip for swarm workers — the orchestrator handles retry logic and deduplication
4156
- // at the task level. Without this bypass, retried swarm tasks return stale results.
4157
- const isSwarmWorker = agentName.startsWith('swarm-');
4158
- const SEMANTIC_SIMILARITY_THRESHOLD = 0.75; // 75% similarity = duplicate
4159
- const taskKey = `${agentName}:${task.slice(0, 150).toLowerCase().replace(/\s+/g, ' ').trim()}`;
4160
- const now = Date.now();
4161
- // Clean up old entries (older than dedup window)
4162
- for (const [key, entry] of this.spawnedTasks.entries()) {
4163
- if (now - entry.timestamp > ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
4164
- this.spawnedTasks.delete(key);
4165
- }
4166
- }
4167
- let existingMatch;
4168
- let matchType = 'exact';
4169
- if (!isSwarmWorker) {
4170
- // Check for exact match first
4171
- existingMatch = this.spawnedTasks.get(taskKey);
4172
- // If no exact match, check for semantic similarity among same agent's tasks
4173
- if (!existingMatch) {
4174
- for (const [key, entry] of this.spawnedTasks.entries()) {
4175
- // Only compare tasks from the same agent type
4176
- if (!key.startsWith(`${agentName}:`))
4177
- continue;
4178
- if (now - entry.timestamp >= ProductionAgent.SPAWN_DEDUP_WINDOW_MS)
4179
- continue;
4180
- // Extract the task portion from the key
4181
- const existingTask = key.slice(agentName.length + 1);
4182
- const similarity = calculateTaskSimilarity(task, existingTask);
4183
- if (similarity >= SEMANTIC_SIMILARITY_THRESHOLD) {
4184
- existingMatch = entry;
4185
- matchType = 'semantic';
4186
- this.observability?.logger?.debug('Semantic duplicate detected', {
4187
- agent: agentName,
4188
- newTask: task.slice(0, 80),
4189
- existingTask: existingTask.slice(0, 80),
4190
- similarity: (similarity * 100).toFixed(1) + '%',
4191
- });
4192
- break;
4193
- }
4194
- }
4195
- }
4196
- }
4197
- if (existingMatch && now - existingMatch.timestamp < ProductionAgent.SPAWN_DEDUP_WINDOW_MS) {
4198
- // Same or semantically similar task spawned within the dedup window
4199
- this.observability?.logger?.warn('Duplicate spawn prevented', {
4200
- agent: agentName,
4201
- task: task.slice(0, 100),
4202
- matchType,
4203
- originalTimestamp: existingMatch.timestamp,
4204
- elapsedMs: now - existingMatch.timestamp,
4205
- });
4206
- const duplicateMessage = `[DUPLICATE SPAWN PREVENTED${matchType === 'semantic' ? ' - SEMANTIC MATCH' : ''}]\n` +
4207
- `This task was already spawned ${Math.round((now - existingMatch.timestamp) / 1000)}s ago.\n` +
4208
- `${existingMatch.queuedChanges > 0
4209
- ? `The previous spawn queued ${existingMatch.queuedChanges} change(s) to the pending plan.\n` +
4210
- `These changes are already in your plan - do NOT spawn again.\n`
4211
- : ''}Previous result summary:\n${existingMatch.result.slice(0, 500)}`;
4212
- return {
4213
- success: true, // Mark as success since original task completed
4214
- output: duplicateMessage,
4215
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
4216
- };
4217
- }
4218
- // Generate a unique ID for this agent instance that will be used consistently
4219
- // throughout the agent's lifecycle (spawn event, token events, completion events)
4220
- const agentId = `spawn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
4221
- this.emit({ type: 'agent.spawn', agentId, name: agentName, task });
4222
- this.observability?.logger?.info('Spawning agent', { name: agentName, task });
4223
- const startTime = Date.now();
4224
- const childSessionId = `subagent-${agentName}-${Date.now()}`;
4225
- const childTraceId = `trace-${childSessionId}`;
4226
- let workerResultId;
4227
- try {
4228
- // Filter tools for this agent
4229
- let agentTools = filterToolsForAgent(agentDef, Array.from(this.tools.values()));
4230
- // Resolve policy profile FIRST so we know which tools the policy allows.
4231
- // This must happen before the recommendation filter so policy-allowed tools
4232
- // are preserved through the recommendation pruning step.
4233
- const inferredTaskType = agentDef.taskType ?? ToolRecommendationEngine.inferTaskType(agentName);
4234
- const policyResolution = resolvePolicyProfile({
4235
- policyEngine: this.config.policyEngine,
4236
- requestedProfile: agentDef.policyProfile,
4237
- swarmConfig: isSwarmWorker && this.config.swarm && typeof this.config.swarm === 'object'
4238
- ? this.config.swarm
4239
- : undefined,
4240
- taskType: inferredTaskType,
4241
- isSwarmWorker,
4242
- sandboxConfig: this.config.sandbox && typeof this.config.sandbox === 'object'
4243
- ? this.config.sandbox
4244
- : undefined,
4245
- });
4246
- this.emit({
4247
- type: 'policy.profile.resolved',
4248
- profile: policyResolution.profileName,
4249
- context: isSwarmWorker ? 'swarm' : 'subagent',
4250
- selectionSource: policyResolution.metadata.selectionSource,
4251
- usedLegacyMappings: policyResolution.metadata.usedLegacyMappings,
4252
- legacySources: policyResolution.metadata.legacyMappingSources,
4253
- });
4254
- if (policyResolution.metadata.usedLegacyMappings) {
4255
- this.emit({
4256
- type: 'policy.legacy.fallback.used',
4257
- profile: policyResolution.profileName,
4258
- sources: policyResolution.metadata.legacyMappingSources,
4259
- warnings: policyResolution.metadata.warnings,
4260
- });
4261
- this.observability?.logger?.warn('Policy legacy mappings used', {
4262
- agent: agentName,
4263
- profile: policyResolution.profileName,
4264
- sources: policyResolution.metadata.legacyMappingSources,
4265
- });
4266
- }
4267
- // Apply tool recommendations to improve subagent focus (only for large tool sets)
4268
- if (this.toolRecommendation && agentTools.length > 15) {
4269
- const taskType = ToolRecommendationEngine.inferTaskType(agentName);
4270
- const recommendations = this.toolRecommendation.recommendTools(task, taskType, agentTools.map(t => t.name));
4271
- if (recommendations.length > 0) {
4272
- const recommendedNames = new Set(recommendations.map(r => r.toolName));
4273
- // Always keep spawn tools even if not recommended
4274
- const alwaysKeep = new Set(['spawn_agent', 'spawn_agents_parallel']);
4275
- // Also keep tools that the resolved policy profile explicitly allows.
4276
- // This prevents the recommendation engine from stripping tools that the
4277
- // security policy says the worker should have.
4278
- if (policyResolution.profile.allowedTools) {
4279
- for (const t of policyResolution.profile.allowedTools)
4280
- alwaysKeep.add(t);
4281
- }
4282
- agentTools = agentTools.filter(t => recommendedNames.has(t.name) || alwaysKeep.has(t.name));
4283
- }
4284
- }
4285
- // Enforce unified tool policy at spawn-time so denied tools are never exposed.
4286
- if (policyResolution.profile.toolAccessMode === 'whitelist' && policyResolution.profile.allowedTools) {
4287
- const allowed = new Set(policyResolution.profile.allowedTools);
4288
- agentTools = agentTools.filter(t => allowed.has(t.name));
4289
- }
4290
- else if (policyResolution.profile.deniedTools && policyResolution.profile.deniedTools.length > 0) {
4291
- const denied = new Set(policyResolution.profile.deniedTools);
4292
- agentTools = agentTools.filter(t => !denied.has(t.name));
4293
- }
4294
- // Fail fast if tool filtering resulted in zero tools — the worker can't do anything
4295
- if (agentTools.length === 0) {
4296
- throw new Error(`Worker '${agentName}' has zero available tools after filtering. Check toolAccessMode and policy profile '${policyResolution.profileName}'.`);
4297
- }
4298
- // Resolve model - abstract tiers (fast/balanced/quality) should use parent's model
4299
- // Only use agentDef.model if it's an actual model ID (contains '/')
4300
- const resolvedModel = (agentDef.model && agentDef.model.includes('/'))
4301
- ? agentDef.model
4302
- : this.config.model;
4303
- // Persist subagent task lifecycle in durable storage when available
4304
- if (this.store?.hasWorkerResultsFeature()) {
4305
- try {
4306
- workerResultId = this.store.createWorkerResult(agentId, task.slice(0, 500), resolvedModel || 'default');
4307
- }
4308
- catch (storeErr) {
4309
- this.observability?.logger?.warn('Failed to create worker result record', {
4310
- agentId,
4311
- error: storeErr.message,
4312
- });
4313
- }
4314
- }
4315
- // Get subagent config with agent-type-specific timeouts and iteration limits
4316
- // Uses dynamic configuration based on agent type (researcher needs more time than reviewer)
4317
- // Precedence: per-type config > per-type default > global config > hardcoded fallback
4318
- const subagentConfig = this.config.subagent;
4319
- const hasSubagentConfig = subagentConfig !== false && subagentConfig !== undefined;
4320
- // Timeout precedence: agentDef.timeout > per-type config > agent-type default > global config default
4321
- // agentDef.timeout is set by worker-pool for swarm workers, giving them precise timeout control
4322
- const agentTypeTimeout = getSubagentTimeout(agentName);
4323
- const rawPerTypeTimeout = hasSubagentConfig
4324
- ? subagentConfig.timeouts?.[agentName]
4325
- : undefined;
4326
- const rawGlobalTimeout = hasSubagentConfig
4327
- ? subagentConfig.defaultTimeout
4328
- : undefined;
4329
- // Validate: reject negative, NaN, or non-finite timeout values
4330
- const isValidTimeout = (v) => v !== undefined && Number.isFinite(v) && v > 0;
4331
- const agentDefTimeout = isValidTimeout(agentDef.timeout) ? agentDef.timeout : undefined;
4332
- const perTypeConfigTimeout = isValidTimeout(rawPerTypeTimeout) ? rawPerTypeTimeout : undefined;
4333
- const globalConfigTimeout = isValidTimeout(rawGlobalTimeout) ? rawGlobalTimeout : undefined;
4334
- const subagentTimeout = agentDefTimeout ?? perTypeConfigTimeout ?? agentTypeTimeout ?? globalConfigTimeout ?? 300000;
4335
- // Iteration precedence: per-type config override > agent-type default > global config default
4336
- const agentTypeMaxIter = getSubagentMaxIterations(agentName);
4337
- const rawPerTypeMaxIter = hasSubagentConfig
4338
- ? subagentConfig.maxIterations?.[agentName]
4339
- : undefined;
4340
- const rawGlobalMaxIter = hasSubagentConfig
4341
- ? subagentConfig.defaultMaxIterations
4342
- : undefined;
4343
- const isValidIter = (v) => v !== undefined && Number.isFinite(v) && v > 0 && Number.isInteger(v);
4344
- const perTypeConfigMaxIter = isValidIter(rawPerTypeMaxIter) ? rawPerTypeMaxIter : undefined;
4345
- const globalConfigMaxIter = isValidIter(rawGlobalMaxIter) ? rawGlobalMaxIter : undefined;
4346
- const defaultMaxIterations = agentDef.maxIterations ?? perTypeConfigMaxIter ?? agentTypeMaxIter ?? globalConfigMaxIter ?? 15;
4347
- // BLACKBOARD CONTEXT INJECTION
4348
- // Gather relevant context from the blackboard for the subagent
4349
- let blackboardContext = '';
4350
- const parentAgentId = `parent-${Date.now()}`;
4351
- if (this.blackboard) {
4352
- // Post parent's exploration context before spawning
4353
- this.blackboard.post(parentAgentId, {
4354
- topic: 'spawn.parent_context',
4355
- content: `Parent spawning ${agentName} for task: ${task.slice(0, 200)}`,
4356
- type: 'progress',
4357
- confidence: 1,
4358
- metadata: { agentName, taskPreview: task.slice(0, 100) },
4359
- });
4360
- // Gather recent findings that might help the subagent
4361
- const recentFindings = this.blackboard.query({
4362
- limit: 5,
4363
- types: ['discovery', 'analysis', 'progress'],
4364
- minConfidence: 0.7,
4365
- });
4366
- if (recentFindings.length > 0) {
4367
- const findingsSummary = recentFindings
4368
- .map(f => `- [${f.agentId}] ${f.topic}: ${f.content.slice(0, 150)}${f.content.length > 150 ? '...' : ''}`)
4369
- .join('\n');
4370
- blackboardContext = `\n\n**BLACKBOARD CONTEXT (from parent/sibling agents):**\n${findingsSummary}\n`;
4371
- }
4372
- }
4373
- // Check for files already being modified in parent's pending plan
4374
- const currentPlan = this.pendingPlanManager.getPendingPlan();
4375
- if (currentPlan && currentPlan.proposedChanges.length > 0) {
4376
- const pendingFiles = currentPlan.proposedChanges
4377
- .filter((c) => c.tool === 'write_file' || c.tool === 'edit_file')
4378
- .map((c) => c.args.path || c.args.file_path)
4379
- .filter(Boolean);
4380
- if (pendingFiles.length > 0) {
4381
- blackboardContext += `\n**FILES ALREADY IN PENDING PLAN (do not duplicate):**\n${pendingFiles.slice(0, 10).join('\n')}\n`;
4382
- }
4383
- }
4384
- // CONSTRAINT INJECTION
4385
- // Add constraints to the subagent's context if provided
4386
- // Also always include budget awareness so subagents know their limits
4387
- const constraintParts = [];
4388
- // BUDGET AWARENESS: Always inject so subagent understands its limits
4389
- const subagentBudgetTokens = constraints?.maxTokens ?? SUBAGENT_BUDGET.maxTokens ?? 100000;
4390
- const subagentBudgetMinutes = Math.round((SUBAGENT_BUDGET.maxDuration ?? 240000) / 60000);
4391
- if (isSwarmWorker) {
4392
- // V8: Minimal resource awareness for swarm workers — removes budget/time
4393
- // messaging entirely to prevent cheap models from bail-out anxiety.
4394
- // The economics system handles budget warnings via system messages when needed.
4395
- // Wrapup JSON format is ONLY injected when requestWrapup() is called.
4396
- constraintParts.push(`**Execution Mode:** You are a focused worker agent.\n` +
4397
- `- Complete your assigned task using tool calls.\n` +
4398
- `- Your FIRST action must be a tool call (read_file, write_file, edit_file, grep, glob, etc.).\n` +
4399
- `- To create files use write_file. To modify files use edit_file. Do NOT use bash for file operations.\n` +
4400
- `- You will receive a system message if you need to wrap up. Until then, work normally.\n` +
4401
- `- Do NOT produce summaries or reports — produce CODE and FILE CHANGES.`);
4402
- }
4403
- else {
4404
- // Original RESOURCE AWARENESS text for regular subagents
4405
- constraintParts.push(`**RESOURCE AWARENESS (CRITICAL):**\n` +
4406
- `- Token budget: ~${(subagentBudgetTokens / 1000).toFixed(0)}k tokens\n` +
4407
- `- Time limit: ~${subagentBudgetMinutes} minutes\n` +
4408
- `- You will receive warnings at 70% usage. When warned, WRAP UP immediately.\n` +
4409
- `- Do not explore indefinitely - be focused and efficient.\n` +
4410
- `- If approaching limits, summarize findings and return.\n` +
4411
- `- **STRUCTURED WRAPUP:** When told to wrap up, respond with ONLY this JSON (no tool calls):\n` +
4412
- ` {"findings":[...], "actionsTaken":[...], "failures":[...], "remainingWork":[...], "suggestedNextSteps":[...]}`);
4413
- }
4414
- if (constraints) {
4415
- if (constraints.focusAreas && constraints.focusAreas.length > 0) {
4416
- constraintParts.push(`**FOCUS AREAS (limit exploration to these paths):**\n${constraints.focusAreas.map(a => ` - ${a}`).join('\n')}`);
4417
- }
4418
- if (constraints.excludeAreas && constraints.excludeAreas.length > 0) {
4419
- constraintParts.push(`**EXCLUDED AREAS (do NOT explore these):**\n${constraints.excludeAreas.map(a => ` - ${a}`).join('\n')}`);
4420
- }
4421
- if (constraints.requiredDeliverables && constraints.requiredDeliverables.length > 0) {
4422
- constraintParts.push(`**REQUIRED DELIVERABLES (you must produce these):**\n${constraints.requiredDeliverables.map(d => ` - ${d}`).join('\n')}`);
4423
- }
4424
- if (constraints.timeboxMinutes) {
4425
- constraintParts.push(`**TIME LIMIT:** ${constraints.timeboxMinutes} minutes (soft limit - wrap up if approaching)`);
4426
- }
4427
- }
4428
- const constraintContext = `\n\n**EXECUTION CONSTRAINTS:**\n${constraintParts.join('\n\n')}\n`;
4429
- // Build delegation-enhanced system prompt
4430
- let delegationContext = '';
4431
- if (this.lastComplexityAssessment && this.lastComplexityAssessment.tier !== 'simple') {
4432
- const spec = createMinimalDelegationSpec(task, agentName);
4433
- delegationContext = '\n\n' + buildDelegationPrompt(spec);
4434
- }
4435
- // Quality self-assessment prompt for subagent
4436
- const qualityPrompt = '\n\n' + getSubagentQualityPrompt();
4437
- // Build subagent system prompt with subagent-specific plan mode addition
4438
- const parentMode = this.getMode();
4439
- const subagentSystemPrompt = parentMode === 'plan'
4440
- ? `${agentDef.systemPrompt}\n\n${SUBAGENT_PLAN_MODE_ADDITION}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`
4441
- : `${agentDef.systemPrompt}${blackboardContext}${constraintContext}${delegationContext}${qualityPrompt}`;
4442
- // Allocate budget from pool (or use default) — track allocation ID for release later
4443
- const pooledBudget = this.getSubagentBudget(agentName, constraints);
4444
- const poolAllocationId = pooledBudget.allocationId;
4445
- const deniedByProfile = new Set(policyResolution.profile.deniedTools ?? []);
4446
- const policyToolPolicies = {};
4447
- for (const toolName of deniedByProfile) {
4448
- policyToolPolicies[toolName] = {
4449
- policy: 'forbidden',
4450
- reason: `Denied by policy profile '${policyResolution.profileName}'`,
4451
- };
4452
- }
4453
- if ((policyResolution.profile.bashMode ?? 'full') === 'disabled') {
4454
- policyToolPolicies.bash = {
4455
- policy: 'forbidden',
4456
- reason: `Bash is disabled by policy profile '${policyResolution.profileName}'`,
4457
- };
4458
- }
4459
- // Create a sub-agent with the agent's config
4460
- // Use SUBAGENT_BUDGET to constrain resource usage (prevents runaway token consumption)
4461
- const subAgent = new ProductionAgent({
4462
- provider: this.provider,
4463
- tools: agentTools,
4464
- // Pass toolResolver so subagent can lazy-load MCP tools
4465
- toolResolver: this.toolResolver || undefined,
4466
- // Pass MCP tool summaries so subagent knows what tools are available
4467
- mcpToolSummaries: this.config.mcpToolSummaries,
4468
- systemPrompt: subagentSystemPrompt,
4469
- model: resolvedModel,
4470
- maxIterations: agentDef.maxIterations || defaultMaxIterations,
4471
- // Inherit some features but keep subagent simpler
4472
- memory: false,
4473
- planning: false,
4474
- reflection: false,
4475
- // Enable lightweight compaction for subagents (Improvement P5)
4476
- // tokenThreshold configures the Compactor's per-pass size limit
4477
- // maxContextTokens constrains AutoCompactionManager's percentage thresholds
4478
- // With maxContextTokens=80000 and default 80% threshold, compaction triggers at ~64K
4479
- compaction: {
4480
- enabled: true,
4481
- mode: 'auto',
4482
- tokenThreshold: 40000, // Compactor summarization size limit per pass
4483
- preserveRecentCount: 4, // Preserve fewer messages (splits to 2 user + 2 assistant)
4484
- preserveToolResults: false, // More aggressive — subagents can re-read files
4485
- summaryMaxTokens: 500,
4486
- },
4487
- // Lower context window for subagents so percentage-based compaction triggers earlier
4488
- maxContextTokens: 80000,
4489
- observability: this.config.observability,
4490
- sandbox: (() => {
4491
- const swarm = this.config.swarm;
4492
- const extraCmds = swarm && typeof swarm === 'object' && swarm.permissions?.additionalAllowedCommands;
4493
- const baseSbx = this.config.sandbox;
4494
- if (baseSbx && typeof baseSbx === 'object') {
4495
- const sbx = baseSbx;
4496
- const allowedCommands = extraCmds
4497
- ? [...(sbx.allowedCommands || []), ...extraCmds]
4498
- : sbx.allowedCommands;
4499
- return {
4500
- ...sbx,
4501
- allowedCommands,
4502
- bashMode: policyResolution.profile.bashMode ?? sbx.bashMode,
4503
- bashWriteProtection: policyResolution.profile.bashWriteProtection ?? sbx.bashWriteProtection,
4504
- blockFileCreationViaBash: (policyResolution.profile.bashWriteProtection ?? 'off') === 'block_file_mutation'
4505
- ? true
4506
- : sbx.blockFileCreationViaBash,
4507
- };
4508
- }
4509
- return baseSbx;
4510
- })(),
4511
- humanInLoop: this.config.humanInLoop,
4512
- // Subagents get 'allow' as default policy since they're already
4513
- // constrained to their registered tool set. The parent's 'prompt'
4514
- // policy can't work without humanInLoop.
4515
- executionPolicy: (() => {
4516
- const hasPolicyOverrides = Object.keys(policyToolPolicies).length > 0;
4517
- if (this.config.executionPolicy) {
4518
- return {
4519
- ...this.config.executionPolicy,
4520
- defaultPolicy: 'allow',
4521
- toolPolicies: {
4522
- ...(this.config.executionPolicy.toolPolicies ?? {}),
4523
- ...policyToolPolicies,
4524
- },
4525
- };
4526
- }
4527
- if (hasPolicyOverrides) {
4528
- return {
4529
- enabled: true,
4530
- defaultPolicy: 'allow',
4531
- toolPolicies: policyToolPolicies,
4532
- intentAware: false,
4533
- };
4534
- }
4535
- return this.config.executionPolicy;
4536
- })(),
4537
- policyEngine: this.config.policyEngine
4538
- ? { ...this.config.policyEngine, defaultProfile: policyResolution.profileName }
4539
- : this.config.policyEngine,
4540
- threads: false,
4541
- // Disable hooks console output in subagents - parent handles event display
4542
- hooks: this.config.hooks === false ? false : {
4543
- enabled: true,
4544
- builtIn: { logging: false, timing: false, metrics: false },
4545
- custom: [],
4546
- },
4547
- // Pass unique agentId for blackboard coordination and tracing
4548
- agentId,
4549
- // Share parent's blackboard for coordination between parallel subagents
4550
- blackboard: this.blackboard || undefined,
4551
- // Share parent's file cache to eliminate redundant reads across agents
4552
- fileCache: this.fileCache || undefined,
4553
- // CONSTRAINED BUDGET: Use pooled budget when available, falling back to SUBAGENT_BUDGET
4554
- // Pooled budget ensures total tree cost stays bounded by parent's budget
4555
- // Merge economicsTuning from agent definition so swarm workers get custom thresholds
4556
- budget: agentDef.economicsTuning
4557
- ? { ...pooledBudget.budget, tuning: agentDef.economicsTuning }
4558
- : pooledBudget.budget,
4559
- });
4560
- // CRITICAL: Subagent inherits parent's mode
4561
- // This ensures that if parent is in plan mode:
4562
- // - Subagent's read operations execute immediately (visible exploration)
4563
- // - Subagent's write operations get queued in the subagent's pending plan
4564
- // - User maintains control over what actually gets written
4565
- if (parentMode !== 'build') {
4566
- subAgent.setMode(parentMode);
4567
- }
4568
- // APPROVAL BATCHING (Improvement P6): Set approval scope for subagents
4569
- // Read-only tools are auto-approved; write tools get scoped approval
4570
- // This reduces interruptions from ~8 per session to ~1-2
4571
- // Swarm permissions from config override defaults when present
4572
- const swarmPerms = this.config.swarm && typeof this.config.swarm === 'object'
4573
- ? this.config.swarm.permissions : undefined;
4574
- const baseAutoApprove = ['read_file', 'list_files', 'glob', 'grep', 'show_file_history', 'show_session_changes'];
4575
- const baseScopedApprove = isSwarmWorker
4576
- ? {
4577
- write_file: { paths: ['src/', 'tests/', 'tools/'] },
4578
- edit_file: { paths: ['src/', 'tests/', 'tools/'] },
4579
- bash: { paths: ['src/', 'tests/', 'tools/'] },
4580
- }
4581
- : {
4582
- write_file: { paths: ['src/', 'tests/', 'tools/'] },
4583
- edit_file: { paths: ['src/', 'tests/', 'tools/'] },
4584
- };
4585
- const baseRequireApproval = isSwarmWorker ? ['delete_file'] : ['bash', 'delete_file'];
4586
- const mergedScope = mergeApprovalScopeWithProfile({
4587
- autoApprove: swarmPerms?.autoApprove
4588
- ? [...new Set([...baseAutoApprove, ...swarmPerms.autoApprove])]
4589
- : baseAutoApprove,
4590
- scopedApprove: swarmPerms?.scopedApprove
4591
- ? { ...baseScopedApprove, ...swarmPerms.scopedApprove }
4592
- : baseScopedApprove,
4593
- // requireApproval: full replacement (not merge) — user may want to REMOVE
4594
- // tools like 'bash' to let workers run freely
4595
- requireApproval: swarmPerms?.requireApproval
4596
- ? swarmPerms.requireApproval
4597
- : baseRequireApproval,
4598
- }, policyResolution.profile);
4599
- subAgent.setApprovalScope(mergedScope);
4600
- // Pass parent's iteration count to subagent for accurate budget tracking
4601
- // This prevents subagents from consuming excessive iterations when parent already used many
4602
- subAgent.setParentIterations(this.getTotalIterations());
4603
- // UNIFIED TRACING: Share parent's trace collector with subagent context
4604
- // This ensures all subagent events are written to the same trace file as the parent,
4605
- // tagged with subagent context for proper aggregation in /trace output
4606
- if (this.traceCollector) {
4607
- const subagentTraceView = this.traceCollector.createSubagentView({
4608
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4609
- agentType: agentName,
4610
- spawnedAtIteration: this.state.iteration,
4611
- });
4612
- subAgent.setTraceCollector(subagentTraceView);
4613
- }
4614
- // GRACEFUL TIMEOUT with WRAPUP PHASE
4615
- // Instead of instant death on timeout, the subagent gets a wrapup window
4616
- // to produce a structured summary before being killed:
4617
- // 1. Normal operation: progress extends idle timer
4618
- // 2. Wrapup phase: 30s before hard kill, wrapup callback fires → forceTextOnly
4619
- // 3. Hard kill: race() throws CancellationError after wrapup window
4620
- const IDLE_TIMEOUT = agentDef.idleTimeout ?? 120000; // Configurable idle timeout (default: 2 min)
4621
- let WRAPUP_WINDOW = 30000;
4622
- let IDLE_CHECK_INTERVAL = 5000;
4623
- if (this.config.subagent) {
4624
- WRAPUP_WINDOW = this.config.subagent.wrapupWindowMs ?? WRAPUP_WINDOW;
4625
- IDLE_CHECK_INTERVAL = this.config.subagent.idleCheckIntervalMs ?? IDLE_CHECK_INTERVAL;
4626
- }
4627
- const progressAwareTimeout = createGracefulTimeout(subagentTimeout, // Max total time (hard limit from agent type config)
4628
- IDLE_TIMEOUT, // Idle timeout (soft limit - no progress triggers this)
4629
- WRAPUP_WINDOW, // Wrapup window before hard kill
4630
- IDLE_CHECK_INTERVAL);
4631
- // Register wrapup callback — fires 30s before hard kill
4632
- // This triggers the subagent's forceTextOnly path for a structured summary
4633
- progressAwareTimeout.onWrapupWarning(() => {
4634
- this.emit({
4635
- type: 'subagent.wrapup.started',
4636
- agentId,
4637
- agentType: agentName,
4638
- reason: 'Timeout approaching - graceful wrapup window opened',
4639
- elapsedMs: Date.now() - startTime,
4640
- });
4641
- subAgent.requestWrapup('Timeout approaching — produce structured summary');
4642
- });
4643
- // Forward events from subagent with context (track for cleanup)
4644
- // Also report progress to the timeout tracker
4645
- const unsubSubAgent = subAgent.subscribe(event => {
4646
- // Tag event with subagent source AND unique ID so TUI can properly attribute
4647
- // events to the specific agent instance (critical for multiple same-type agents)
4648
- const taggedEvent = { ...event, subagent: agentName, subagentId: agentId };
4649
- this.emit(taggedEvent);
4650
- // Report progress for timeout extension
4651
- // Progress events: tool calls, LLM responses, token updates
4652
- const progressEvents = ['tool.start', 'tool.complete', 'llm.start', 'llm.complete'];
4653
- if (progressEvents.includes(event.type)) {
4654
- progressAwareTimeout.reportProgress();
4655
- }
4656
- });
4657
- // Link parent's cancellation with progress-aware timeout so ESC propagates to subagents
4658
- const parentSource = this.cancellation?.getSource();
4659
- const effectiveSource = parentSource
4660
- ? createLinkedToken(parentSource, progressAwareTimeout)
4661
- : progressAwareTimeout;
4662
- // CRITICAL: Pass the cancellation token to the subagent so it can check and stop
4663
- // gracefully when timeout fires. Without this, the subagent continues running as
4664
- // a "zombie" even after race() returns with a timeout error.
4665
- subAgent.setExternalCancellation(effectiveSource.token);
4666
- // Pause parent's duration timer while subagent runs to prevent
4667
- // the parent from timing out on wall-clock while waiting for subagent
4668
- this.economics?.pauseDuration();
4669
- try {
4670
- // Run the task with cancellation propagation from parent
4671
- const result = await race(subAgent.run(task), effectiveSource.token);
4672
- const duration = Date.now() - startTime;
4673
- // BEFORE cleanup - extract subagent's pending plan and merge into parent's plan
4674
- // This ensures that when a subagent in plan mode queues writes, they bubble up to the parent
4675
- let queuedChangeSummary = '';
4676
- let queuedChangesCount = 0;
4677
- if (subAgent.hasPendingPlan()) {
4678
- const subPlan = subAgent.getPendingPlan();
4679
- if (subPlan && subPlan.proposedChanges.length > 0) {
4680
- queuedChangesCount = subPlan.proposedChanges.length;
4681
- // Emit event for TUI to display
4682
- this.emit({
4683
- type: 'agent.pending_plan',
4684
- agentId: agentName,
4685
- changes: subPlan.proposedChanges,
4686
- });
4687
- // Build detailed summary of what was queued for the return message
4688
- // This prevents the "doom loop" where parent doesn't know what subagent did
4689
- const changeSummaries = subPlan.proposedChanges.map(c => {
4690
- if (c.tool === 'write_file' || c.tool === 'edit_file') {
4691
- const path = c.args.path || c.args.file_path || '(unknown file)';
4692
- return ` - [${c.tool}] ${path}: ${c.reason}`;
4693
- }
4694
- else if (c.tool === 'bash') {
4695
- const cmd = String(c.args.command || '').slice(0, 60);
4696
- return ` - [bash] ${cmd}${String(c.args.command || '').length > 60 ? '...' : ''}: ${c.reason}`;
4697
- }
4698
- return ` - [${c.tool}]: ${c.reason}`;
4699
- });
4700
- queuedChangeSummary = `\n\n[PLAN MODE - CHANGES QUEUED TO PARENT]\n` +
4701
- `The following ${subPlan.proposedChanges.length} change(s) have been queued in the parent's pending plan:\n` +
4702
- changeSummaries.join('\n') + '\n' +
4703
- `\nThese changes are now in YOUR pending plan. The task for this subagent is COMPLETE.\n` +
4704
- `Do NOT spawn another agent for the same task - the changes are already queued.\n` +
4705
- `Use /show-plan to see all pending changes, /approve to execute them.`;
4706
- // Merge into parent's pending plan with subagent context
4707
- for (const change of subPlan.proposedChanges) {
4708
- this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
4709
- }
4710
- }
4711
- // Also merge exploration summary if available
4712
- if (subPlan?.explorationSummary) {
4713
- this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
4714
- }
4715
- }
4716
- // If subagent queued changes, override output with informative message
4717
- // This is critical to prevent doom loops where parent doesn't understand what happened
4718
- const finalOutput = queuedChangeSummary
4719
- ? (result.response || '') + queuedChangeSummary
4720
- : (result.response || result.error || '');
4721
- // Parse structured closure report from agent's response (if it produced one)
4722
- const structured = parseStructuredClosureReport(result.response || '', 'completed');
4723
- // Extract real file paths from subagent's economics tracker (before cleanup)
4724
- const subagentFilePaths = subAgent.getModifiedFilePaths();
4725
- const spawnResultFinal = {
4726
- success: result.success,
4727
- output: finalOutput,
4728
- metrics: {
4729
- tokens: result.metrics.totalTokens,
4730
- duration,
4731
- toolCalls: result.metrics.toolCalls,
4732
- },
4733
- structured,
4734
- filesModified: subagentFilePaths,
4735
- };
4736
- // Save full output to subagent output store (avoids telephone problem)
4737
- if (this.subagentOutputStore) {
4738
- const outputEntry = {
4739
- id: agentId,
4740
- agentId,
4741
- agentName,
4742
- task,
4743
- fullOutput: finalOutput,
4744
- structured,
4745
- filesModified: subagentFilePaths,
4746
- filesCreated: [],
4747
- timestamp: new Date(),
4748
- tokensUsed: result.metrics.totalTokens,
4749
- durationMs: duration,
4750
- };
4751
- const storeId = this.subagentOutputStore.save(outputEntry);
4752
- // Attach reference so downstream consumers can retrieve full output
4753
- spawnResultFinal.outputStoreId = storeId;
4754
- }
4755
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4756
- try {
4757
- this.store.completeWorkerResult(workerResultId, {
4758
- fullOutput: finalOutput,
4759
- summary: finalOutput.slice(0, 500),
4760
- artifacts: structured ? [{ type: 'structured_report', data: structured }] : undefined,
4761
- metrics: {
4762
- tokens: result.metrics.totalTokens,
4763
- duration,
4764
- toolCalls: result.metrics.toolCalls,
4765
- },
4766
- });
4767
- }
4768
- catch (storeErr) {
4769
- this.observability?.logger?.warn('Failed to persist worker result', {
4770
- agentId,
4771
- error: storeErr.message,
4772
- });
4773
- }
4774
- }
4775
- this.emit({
4776
- type: 'agent.complete',
4777
- agentId, // Use unique spawn ID for precise tracking
4778
- agentType: agentName, // Keep type for display purposes
4779
- success: result.success,
4780
- output: finalOutput.slice(0, 500), // Include output preview
4781
- });
4782
- if (progressAwareTimeout.isInWrapupPhase()) {
4783
- this.emit({
4784
- type: 'subagent.wrapup.completed',
4785
- agentId,
4786
- agentType: agentName,
4787
- elapsedMs: Date.now() - startTime,
4788
- });
4789
- }
4790
- // Enhanced tracing: Record subagent completion
4791
- this.traceCollector?.record({
4792
- type: 'subagent.link',
4793
- data: {
4794
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4795
- childSessionId,
4796
- childTraceId,
4797
- childConfig: {
4798
- agentType: agentName,
4799
- model: resolvedModel || 'default',
4800
- task,
4801
- tools: agentTools.map(t => t.name),
4802
- },
4803
- spawnContext: {
4804
- reason: `Delegated task: ${task.slice(0, 100)}`,
4805
- expectedOutcome: agentDef.description,
4806
- parentIteration: this.state.iteration,
4807
- },
4808
- result: {
4809
- success: result.success,
4810
- summary: (result.response || result.error || '').slice(0, 500),
4811
- tokensUsed: result.metrics.totalTokens,
4812
- durationMs: duration,
4813
- },
4814
- },
4815
- });
4816
- // Unsubscribe from subagent events before cleanup
4817
- unsubSubAgent();
4818
- await subAgent.cleanup();
4819
- // Cache result for duplicate spawn prevention
4820
- // Use the same taskKey from the dedup check above
4821
- this.spawnedTasks.set(taskKey, {
4822
- timestamp: Date.now(),
4823
- result: finalOutput,
4824
- queuedChanges: queuedChangesCount,
4825
- });
4826
- return spawnResultFinal;
4827
- }
4828
- catch (err) {
4829
- // Handle cancellation (user ESC or timeout) for cleaner error messages
4830
- if (isCancellationError(err)) {
4831
- const duration = Date.now() - startTime;
4832
- const isUserCancellation = parentSource?.isCancellationRequested;
4833
- const reason = isUserCancellation
4834
- ? 'User cancelled'
4835
- : err.reason || `Timed out after ${subagentTimeout}ms`;
4836
- this.emit({ type: 'agent.error', agentId, agentType: agentName, error: reason });
4837
- if (!isUserCancellation) {
4838
- this.emit({
4839
- type: 'subagent.timeout.hard_kill',
4840
- agentId,
4841
- agentType: agentName,
4842
- reason,
4843
- elapsedMs: Date.now() - startTime,
4844
- });
4845
- }
4846
- // =======================================================================
4847
- // PRESERVE PARTIAL RESULTS
4848
- // Instead of discarding all work, capture whatever the subagent produced
4849
- // before timeout. This prevents the "zombie agent" problem where tokens
4850
- // are consumed but results are lost.
4851
- // =======================================================================
4852
- const subagentState = subAgent.getState();
4853
- const subagentMetrics = subAgent.getMetrics();
4854
- // Extract partial response from the last assistant message
4855
- const assistantMessages = subagentState.messages.filter(m => m.role === 'assistant');
4856
- const lastAssistantMsg = assistantMessages[assistantMessages.length - 1];
4857
- const partialResponse = typeof lastAssistantMsg?.content === 'string'
4858
- ? lastAssistantMsg.content
4859
- : '';
4860
- // Extract pending plan before cleanup (even on cancellation, preserve any queued work)
4861
- let cancelledQueuedSummary = '';
4862
- if (subAgent.hasPendingPlan()) {
4863
- const subPlan = subAgent.getPendingPlan();
4864
- if (subPlan && subPlan.proposedChanges.length > 0) {
4865
- this.emit({
4866
- type: 'agent.pending_plan',
4867
- agentId: agentName,
4868
- changes: subPlan.proposedChanges,
4869
- });
4870
- // Build summary of changes that were queued before cancellation
4871
- const changeSummaries = subPlan.proposedChanges.map(c => {
4872
- if (c.tool === 'write_file' || c.tool === 'edit_file') {
4873
- const path = c.args.path || c.args.file_path || '(unknown file)';
4874
- return ` - [${c.tool}] ${path}: ${c.reason}`;
4875
- }
4876
- else if (c.tool === 'bash') {
4877
- const cmd = String(c.args.command || '').slice(0, 60);
4878
- return ` - [bash] ${cmd}...: ${c.reason}`;
4879
- }
4880
- return ` - [${c.tool}]: ${c.reason}`;
4881
- });
4882
- cancelledQueuedSummary = `\n\n[PLAN MODE - CHANGES QUEUED BEFORE CANCELLATION]\n` +
4883
- `${subPlan.proposedChanges.length} change(s) were queued to the parent plan:\n` +
4884
- changeSummaries.join('\n') + '\n' +
4885
- `These changes are preserved in your pending plan.`;
4886
- for (const change of subPlan.proposedChanges) {
4887
- this.pendingPlanManager.addProposedChange(change.tool, { ...change.args, _fromSubagent: agentName }, `[${agentName}] ${change.reason}`, change.toolCallId);
4888
- }
4889
- }
4890
- // Also preserve exploration summary
4891
- if (subPlan?.explorationSummary) {
4892
- this.pendingPlanManager.appendExplorationFinding(`[${agentName}] ${subPlan.explorationSummary}`);
4893
- }
4894
- }
4895
- // Extract real file paths from subagent's economics tracker (before cleanup)
4896
- const subagentFilePaths = subAgent.getModifiedFilePaths();
4897
- // Unsubscribe from subagent events and cleanup gracefully
4898
- unsubSubAgent();
4899
- try {
4900
- await subAgent.cleanup();
4901
- }
4902
- catch {
4903
- // Ignore cleanup errors on cancellation
4904
- }
4905
- // Build output message with partial results
4906
- const baseOutput = isUserCancellation
4907
- ? `Subagent '${agentName}' was cancelled by user.`
4908
- : `Subagent '${agentName}' timed out after ${Math.round(subagentTimeout / 1000)}s.`;
4909
- // Include partial response if we have one
4910
- const partialResultSection = partialResponse
4911
- ? `\n\n[PARTIAL RESULTS BEFORE TIMEOUT]\n${partialResponse.slice(0, 2000)}${partialResponse.length > 2000 ? '...(truncated)' : ''}`
4912
- : '';
4913
- // Enhanced tracing: Record subagent timeout with partial results
4914
- this.traceCollector?.record({
4915
- type: 'subagent.link',
4916
- data: {
4917
- parentSessionId: this.traceCollector.getSessionId() || 'unknown',
4918
- childSessionId,
4919
- childTraceId,
4920
- childConfig: {
4921
- agentType: agentName,
4922
- model: resolvedModel || 'default',
4923
- task,
4924
- tools: agentTools.map(t => t.name),
4925
- },
4926
- spawnContext: {
4927
- reason: `Delegated task: ${task.slice(0, 100)}`,
4928
- expectedOutcome: agentDef.description,
4929
- parentIteration: this.state.iteration,
4930
- },
4931
- result: {
4932
- success: false,
4933
- summary: `[TIMEOUT] ${baseOutput}\n${partialResponse.slice(0, 200)}`,
4934
- tokensUsed: subagentMetrics.totalTokens,
4935
- durationMs: duration,
4936
- },
4937
- },
4938
- });
4939
- // Parse structured closure report from partial response
4940
- const exitReason = isUserCancellation ? 'cancelled' : 'timeout_graceful';
4941
- const structured = parseStructuredClosureReport(partialResponse, exitReason, task);
4942
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4943
- try {
4944
- this.store.failWorkerResult(workerResultId, reason);
4945
- }
4946
- catch (storeErr) {
4947
- this.observability?.logger?.warn('Failed to mark cancelled worker result as failed', {
4948
- agentId,
4949
- error: storeErr.message,
4950
- });
4951
- }
4952
- }
4953
- return {
4954
- success: false,
4955
- output: baseOutput + partialResultSection + cancelledQueuedSummary,
4956
- // IMPORTANT: Use actual metrics instead of zeros
4957
- // This ensures accurate token tracking in /trace output
4958
- metrics: {
4959
- tokens: subagentMetrics.totalTokens,
4960
- duration,
4961
- toolCalls: subagentMetrics.toolCalls,
4962
- },
4963
- structured,
4964
- filesModified: subagentFilePaths,
4965
- };
4966
- }
4967
- throw err; // Re-throw non-cancellation errors
4968
- }
4969
- finally {
4970
- // Resume parent's duration timer now that subagent is done
4971
- this.economics?.resumeDuration();
4972
- // Dispose both sources (linked source disposes its internal state, timeout source handles its timer)
4973
- effectiveSource.dispose();
4974
- progressAwareTimeout.dispose();
4975
- // BUDGET POOL: Record actual usage and release the allocation
4976
- // This must happen in finally to ensure cleanup on both success and error paths
4977
- if (this.budgetPool && poolAllocationId) {
4978
- const subMetrics = subAgent.getMetrics();
4979
- this.budgetPool.recordUsage(poolAllocationId, subMetrics.totalTokens, subMetrics.estimatedCost);
4980
- this.budgetPool.release(poolAllocationId);
4981
- }
4982
- }
4983
- }
4984
- catch (err) {
4985
- const error = err instanceof Error ? err.message : String(err);
4986
- this.emit({ type: 'agent.error', agentId, agentType: agentName, error });
4987
- if (workerResultId && this.store?.hasWorkerResultsFeature()) {
4988
- try {
4989
- this.store.failWorkerResult(workerResultId, error);
4990
- }
4991
- catch (storeErr) {
4992
- this.observability?.logger?.warn('Failed to mark worker result as failed', {
4993
- agentId,
4994
- error: storeErr.message,
4995
- });
4996
- }
4997
- }
4998
- return {
4999
- success: false,
5000
- output: `Agent error: ${error}`,
5001
- metrics: { tokens: 0, duration: Date.now() - startTime, toolCalls: 0 },
5002
- };
5003
- }
5004
- }
5005
- /**
5006
- * Spawn multiple agents in parallel to work on independent tasks.
5007
- * Uses the shared blackboard for coordination and conflict prevention.
5008
- *
5009
- * Get budget for a subagent, using the pooled budget when available.
5010
- * Falls back to the static SUBAGENT_BUDGET if no pool is configured.
5011
- * Returns both the budget and the pool allocation ID (if any) for tracking.
5012
- */
5013
- getSubagentBudget(agentName, constraints) {
5014
- // If explicit maxTokens constraint, use that
5015
- if (constraints?.maxTokens) {
5016
- return {
5017
- budget: { ...SUBAGENT_BUDGET, maxTokens: constraints.maxTokens },
5018
- allocationId: null,
5019
- };
5020
- }
5021
- // Try to allocate from the shared budget pool
5022
- if (this.budgetPool) {
5023
- const allocationId = `${agentName}-${Date.now()}`;
5024
- const allocation = this.budgetPool.reserve(allocationId);
5025
- if (allocation) {
5026
- return {
5027
- budget: {
5028
- ...SUBAGENT_BUDGET,
5029
- maxTokens: allocation.tokenBudget,
5030
- softTokenLimit: Math.floor(allocation.tokenBudget * 0.7),
5031
- maxCost: allocation.costBudget,
5032
- },
5033
- allocationId,
5034
- };
5035
- }
5036
- // Pool exhausted — give a tiny emergency budget (just enough to report failure)
5037
- // This does NOT bypass the pool — it's a fixed small cost for error messaging
5038
- return {
5039
- budget: {
5040
- ...SUBAGENT_BUDGET,
5041
- maxTokens: 5000,
5042
- softTokenLimit: 3000,
5043
- maxCost: 0.01,
5044
- },
5045
- allocationId: null,
5046
- };
5047
- }
5048
- // No pool — use default subagent budget
5049
- return { budget: SUBAGENT_BUDGET, allocationId: null };
2791
+ return coreSpawnAgent(agentName, task, this.buildContext(), this.createSubAgentFactory(), constraints);
5050
2792
  }
5051
2793
  /**
5052
- * Uses Promise.allSettled to handle partial failures gracefully - if one
5053
- * agent fails or times out, others can still complete successfully.
2794
+ * Spawn multiple subagents in parallel (delegates to core/subagent-spawner).
5054
2795
  */
5055
2796
  async spawnAgentsParallel(tasks) {
5056
- // Emit start event for TUI visibility
5057
- this.emit({
5058
- type: 'parallel.spawn.start',
5059
- count: tasks.length,
5060
- agents: tasks.map(t => t.agent),
5061
- });
5062
- // Use DynamicBudgetPool for parallel spawns (prevents child starvation,
5063
- // enables priority-based allocation). Falls back to regular pool for single tasks.
5064
- let settled;
5065
- const originalPool = this.budgetPool;
5066
- // SubagentSupervisor for unified monitoring of concurrent subagents
5067
- const supervisor = tasks.length > 1 ? createSubagentSupervisor() : null;
5068
- if (this.budgetPool && tasks.length > 1) {
5069
- // Swap to DynamicBudgetPool for this parallel batch
5070
- const poolStats = this.budgetPool.getStats();
5071
- const dynamicPool = createDynamicBudgetPool(poolStats.tokensRemaining, 0.1);
5072
- dynamicPool.setExpectedChildren(tasks.length);
5073
- // Temporarily replace the budget pool so spawnAgent's reserve() uses the dynamic one
5074
- this.budgetPool = dynamicPool;
5075
- try {
5076
- const promises = tasks.map(({ agent, task }) => {
5077
- const spawnPromise = this.spawnAgent(agent, task);
5078
- // Register with supervisor for monitoring
5079
- if (supervisor) {
5080
- const handle = createSubagentHandle(`parallel-${agent}-${Date.now()}`, agent, task, spawnPromise, {});
5081
- supervisor.add(handle);
5082
- }
5083
- return spawnPromise;
5084
- });
5085
- settled = await Promise.allSettled(promises);
5086
- }
5087
- finally {
5088
- this.budgetPool = originalPool;
5089
- supervisor?.stop();
5090
- }
5091
- }
5092
- else {
5093
- // Single task or no pool - use standard sequential allocation
5094
- const promises = tasks.map(({ agent, task }) => this.spawnAgent(agent, task));
5095
- settled = await Promise.allSettled(promises);
5096
- }
5097
- // Convert settled results to SpawnResult array
5098
- const results = settled.map((result, i) => {
5099
- if (result.status === 'fulfilled') {
5100
- return result.value;
5101
- }
5102
- // Handle rejected promises (shouldn't happen since spawnAgent catches errors internally,
5103
- // but this is a safety net for unexpected failures)
5104
- const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
5105
- this.emit({
5106
- type: 'agent.error',
5107
- agentId: tasks[i].agent,
5108
- error: `Unexpected parallel spawn error: ${error}`,
5109
- });
5110
- return {
5111
- success: false,
5112
- output: `Parallel spawn error: ${error}`,
5113
- metrics: { tokens: 0, duration: 0, toolCalls: 0 },
5114
- };
5115
- });
5116
- // Emit completion event
5117
- this.emit({
5118
- type: 'parallel.spawn.complete',
5119
- count: tasks.length,
5120
- successCount: results.filter(r => r.success).length,
5121
- results: results.map((r, i) => ({
5122
- agent: tasks[i].agent,
5123
- success: r.success,
5124
- tokens: r.metrics?.tokens || 0,
5125
- })),
5126
- });
5127
- return results;
2797
+ return coreSpawnAgentsParallel(tasks, this.buildContext(), this.buildMutators(), this.createSubAgentFactory());
5128
2798
  }
5129
2799
  /**
5130
2800
  * Get a formatted list of available agents.
@@ -5290,7 +2960,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
5290
2960
  */
5291
2961
  cancel(reason) {
5292
2962
  if (!this.cancellation) {
5293
- console.warn('[ProductionAgent] Cancellation not enabled');
2963
+ log.warn('Cancellation not enabled');
5294
2964
  return;
5295
2965
  }
5296
2966
  this.cancellation.cancel(reason);
@@ -5401,7 +3071,7 @@ If the task is a simple question or doesn't need specialized handling, set bestA
5401
3071
  */
5402
3072
  enableLSPFileTools(options) {
5403
3073
  if (!this.lspManager) {
5404
- console.warn('[ProductionAgent] LSP not enabled, cannot enable LSP file tools');
3074
+ log.warn('LSP not enabled, cannot enable LSP file tools');
5405
3075
  return;
5406
3076
  }
5407
3077
  const lspTools = this.getLSPFileTools(options);
@@ -6031,63 +3701,6 @@ export function buildAgent() {
6031
3701
  return new ProductionAgentBuilder();
6032
3702
  }
6033
3703
  // =============================================================================
6034
- // STRUCTURED CLOSURE REPORT PARSER
6035
- // =============================================================================
6036
- /**
6037
- * Parse a structured closure report from a subagent's text response.
6038
- * The subagent may have produced JSON in response to a TIMEOUT_WRAPUP_PROMPT.
6039
- *
6040
- * @param text - The subagent's last response text
6041
- * @param defaultExitReason - Exit reason to use (completed, timeout_graceful, cancelled, etc.)
6042
- * @param fallbackTask - Original task description for fallback remainingWork
6043
- * @returns Parsed StructuredClosureReport, or undefined if no JSON found and no fallback needed
6044
- */
6045
- export function parseStructuredClosureReport(text, defaultExitReason, fallbackTask) {
6046
- if (!text) {
6047
- // No text at all — create a hard timeout fallback if we have a task
6048
- if (fallbackTask) {
6049
- return {
6050
- findings: [],
6051
- actionsTaken: [],
6052
- failures: ['Timeout before producing structured summary'],
6053
- remainingWork: [fallbackTask],
6054
- exitReason: 'timeout_hard',
6055
- };
6056
- }
6057
- return undefined;
6058
- }
6059
- try {
6060
- // Try to extract JSON from the response
6061
- const jsonMatch = text.match(/\{[\s\S]*\}/);
6062
- if (jsonMatch) {
6063
- const parsed = JSON.parse(jsonMatch[0]);
6064
- // Validate that it looks like a closure report (has at least one expected field)
6065
- if (parsed.findings || parsed.actionsTaken || parsed.failures || parsed.remainingWork) {
6066
- return {
6067
- findings: Array.isArray(parsed.findings) ? parsed.findings : [],
6068
- actionsTaken: Array.isArray(parsed.actionsTaken) ? parsed.actionsTaken : [],
6069
- failures: Array.isArray(parsed.failures) ? parsed.failures : [],
6070
- remainingWork: Array.isArray(parsed.remainingWork) ? parsed.remainingWork : [],
6071
- exitReason: defaultExitReason,
6072
- suggestedNextSteps: Array.isArray(parsed.suggestedNextSteps) ? parsed.suggestedNextSteps : undefined,
6073
- };
6074
- }
6075
- }
6076
- }
6077
- catch {
6078
- // JSON parse failed — fall through to fallback
6079
- }
6080
- // Fallback: LLM didn't produce valid JSON but we have text
6081
- if (defaultExitReason !== 'completed') {
6082
- return {
6083
- findings: [text.slice(0, 500)],
6084
- actionsTaken: [],
6085
- failures: ['Did not produce structured JSON summary'],
6086
- remainingWork: fallbackTask ? [fallbackTask] : [],
6087
- exitReason: defaultExitReason === 'timeout_graceful' ? 'timeout_hard' : defaultExitReason,
6088
- };
6089
- }
6090
- // For completed agents, don't force a structured report if they didn't produce one
6091
- return undefined;
6092
- }
3704
+ // Re-export from core for backward compatibility
3705
+ export { parseStructuredClosureReport } from './core/index.js';
6093
3706
  //# sourceMappingURL=agent.js.map