@caupulican/pi-adaptative 0.80.86 → 0.80.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. package/CHANGELOG.md +178 -0
  2. package/dist/core/agent-session.d.ts +412 -1
  3. package/dist/core/agent-session.d.ts.map +1 -1
  4. package/dist/core/agent-session.js +2053 -41
  5. package/dist/core/agent-session.js.map +1 -1
  6. package/dist/core/autonomy/approval-gate.d.ts +4 -0
  7. package/dist/core/autonomy/approval-gate.d.ts.map +1 -0
  8. package/dist/core/autonomy/approval-gate.js +27 -0
  9. package/dist/core/autonomy/approval-gate.js.map +1 -0
  10. package/dist/core/autonomy/bounded-completion.d.ts +27 -0
  11. package/dist/core/autonomy/bounded-completion.d.ts.map +1 -0
  12. package/dist/core/autonomy/bounded-completion.js +44 -0
  13. package/dist/core/autonomy/bounded-completion.js.map +1 -0
  14. package/dist/core/autonomy/contracts.d.ts +129 -0
  15. package/dist/core/autonomy/contracts.d.ts.map +1 -0
  16. package/dist/core/autonomy/contracts.js +2 -0
  17. package/dist/core/autonomy/contracts.js.map +1 -0
  18. package/dist/core/autonomy/gates.d.ts +15 -0
  19. package/dist/core/autonomy/gates.d.ts.map +1 -0
  20. package/dist/core/autonomy/gates.js +205 -0
  21. package/dist/core/autonomy/gates.js.map +1 -0
  22. package/dist/core/autonomy/lane-tracker.d.ts +48 -0
  23. package/dist/core/autonomy/lane-tracker.d.ts.map +1 -0
  24. package/dist/core/autonomy/lane-tracker.js +125 -0
  25. package/dist/core/autonomy/lane-tracker.js.map +1 -0
  26. package/dist/core/autonomy/path-scope.d.ts +9 -0
  27. package/dist/core/autonomy/path-scope.d.ts.map +1 -0
  28. package/dist/core/autonomy/path-scope.js +122 -0
  29. package/dist/core/autonomy/path-scope.js.map +1 -0
  30. package/dist/core/autonomy/risk-assessment.d.ts +3 -0
  31. package/dist/core/autonomy/risk-assessment.d.ts.map +1 -0
  32. package/dist/core/autonomy/risk-assessment.js +122 -0
  33. package/dist/core/autonomy/risk-assessment.js.map +1 -0
  34. package/dist/core/autonomy/session-lane-record.d.ts +10 -0
  35. package/dist/core/autonomy/session-lane-record.d.ts.map +1 -0
  36. package/dist/core/autonomy/session-lane-record.js +36 -0
  37. package/dist/core/autonomy/session-lane-record.js.map +1 -0
  38. package/dist/core/autonomy/status.d.ts +40 -0
  39. package/dist/core/autonomy/status.d.ts.map +1 -0
  40. package/dist/core/autonomy/status.js +107 -0
  41. package/dist/core/autonomy/status.js.map +1 -0
  42. package/dist/core/autonomy/subagent-prompt.d.ts +21 -0
  43. package/dist/core/autonomy/subagent-prompt.d.ts.map +1 -0
  44. package/dist/core/autonomy/subagent-prompt.js +28 -0
  45. package/dist/core/autonomy/subagent-prompt.js.map +1 -0
  46. package/dist/core/autonomy/telemetry-events.d.ts +18 -0
  47. package/dist/core/autonomy/telemetry-events.d.ts.map +1 -0
  48. package/dist/core/autonomy/telemetry-events.js +60 -0
  49. package/dist/core/autonomy/telemetry-events.js.map +1 -0
  50. package/dist/core/context/artifact-retrieval.d.ts +49 -0
  51. package/dist/core/context/artifact-retrieval.d.ts.map +1 -0
  52. package/dist/core/context/artifact-retrieval.js +49 -0
  53. package/dist/core/context/artifact-retrieval.js.map +1 -0
  54. package/dist/core/context/brain-curator.d.ts +88 -0
  55. package/dist/core/context/brain-curator.d.ts.map +1 -0
  56. package/dist/core/context/brain-curator.js +192 -0
  57. package/dist/core/context/brain-curator.js.map +1 -0
  58. package/dist/core/context/context-artifacts.d.ts +94 -0
  59. package/dist/core/context/context-artifacts.d.ts.map +1 -0
  60. package/dist/core/context/context-artifacts.js +307 -0
  61. package/dist/core/context/context-artifacts.js.map +1 -0
  62. package/dist/core/context/context-audit.d.ts +66 -0
  63. package/dist/core/context/context-audit.d.ts.map +1 -0
  64. package/dist/core/context/context-audit.js +173 -0
  65. package/dist/core/context/context-audit.js.map +1 -0
  66. package/dist/core/context/context-composition.d.ts +122 -0
  67. package/dist/core/context/context-composition.d.ts.map +1 -0
  68. package/dist/core/context/context-composition.js +163 -0
  69. package/dist/core/context/context-composition.js.map +1 -0
  70. package/dist/core/context/context-item.d.ts +117 -0
  71. package/dist/core/context/context-item.d.ts.map +1 -0
  72. package/dist/core/context/context-item.js +36 -0
  73. package/dist/core/context/context-item.js.map +1 -0
  74. package/dist/core/context/context-prompt-enforcement.d.ts +86 -0
  75. package/dist/core/context/context-prompt-enforcement.d.ts.map +1 -0
  76. package/dist/core/context/context-prompt-enforcement.js +168 -0
  77. package/dist/core/context/context-prompt-enforcement.js.map +1 -0
  78. package/dist/core/context/context-prompt-policy.d.ts +90 -0
  79. package/dist/core/context/context-prompt-policy.d.ts.map +1 -0
  80. package/dist/core/context/context-prompt-policy.js +73 -0
  81. package/dist/core/context/context-prompt-policy.js.map +1 -0
  82. package/dist/core/context/context-retention.d.ts +36 -0
  83. package/dist/core/context/context-retention.d.ts.map +1 -0
  84. package/dist/core/context/context-retention.js +108 -0
  85. package/dist/core/context/context-retention.js.map +1 -0
  86. package/dist/core/context/context-store.d.ts +37 -0
  87. package/dist/core/context/context-store.d.ts.map +1 -0
  88. package/dist/core/context/context-store.js +45 -0
  89. package/dist/core/context/context-store.js.map +1 -0
  90. package/dist/core/context/memory-diagnostics.d.ts +50 -0
  91. package/dist/core/context/memory-diagnostics.d.ts.map +1 -0
  92. package/dist/core/context/memory-diagnostics.js +43 -0
  93. package/dist/core/context/memory-diagnostics.js.map +1 -0
  94. package/dist/core/context/memory-index-store.d.ts +28 -0
  95. package/dist/core/context/memory-index-store.d.ts.map +1 -0
  96. package/dist/core/context/memory-index-store.js +38 -0
  97. package/dist/core/context/memory-index-store.js.map +1 -0
  98. package/dist/core/context/memory-prompt-block.d.ts +34 -0
  99. package/dist/core/context/memory-prompt-block.d.ts.map +1 -0
  100. package/dist/core/context/memory-prompt-block.js +58 -0
  101. package/dist/core/context/memory-prompt-block.js.map +1 -0
  102. package/dist/core/context/memory-provider-contract.d.ts +114 -0
  103. package/dist/core/context/memory-provider-contract.d.ts.map +1 -0
  104. package/dist/core/context/memory-provider-contract.js +121 -0
  105. package/dist/core/context/memory-provider-contract.js.map +1 -0
  106. package/dist/core/context/memory-retrieval.d.ts +27 -0
  107. package/dist/core/context/memory-retrieval.d.ts.map +1 -0
  108. package/dist/core/context/memory-retrieval.js +91 -0
  109. package/dist/core/context/memory-retrieval.js.map +1 -0
  110. package/dist/core/context/okf-memory-provider.d.ts +26 -0
  111. package/dist/core/context/okf-memory-provider.d.ts.map +1 -0
  112. package/dist/core/context/okf-memory-provider.js +154 -0
  113. package/dist/core/context/okf-memory-provider.js.map +1 -0
  114. package/dist/core/context/okf-memory.d.ts +42 -0
  115. package/dist/core/context/okf-memory.d.ts.map +1 -0
  116. package/dist/core/context/okf-memory.js +175 -0
  117. package/dist/core/context/okf-memory.js.map +1 -0
  118. package/dist/core/context/policy-engine.d.ts +66 -0
  119. package/dist/core/context/policy-engine.d.ts.map +1 -0
  120. package/dist/core/context/policy-engine.js +171 -0
  121. package/dist/core/context/policy-engine.js.map +1 -0
  122. package/dist/core/context/policy-types.d.ts +102 -0
  123. package/dist/core/context/policy-types.d.ts.map +1 -0
  124. package/dist/core/context/policy-types.js +7 -0
  125. package/dist/core/context/policy-types.js.map +1 -0
  126. package/dist/core/context/sqlite-runtime-index.d.ts +19 -0
  127. package/dist/core/context/sqlite-runtime-index.d.ts.map +1 -0
  128. package/dist/core/context/sqlite-runtime-index.js +344 -0
  129. package/dist/core/context/sqlite-runtime-index.js.map +1 -0
  130. package/dist/core/context/storage-authority.d.ts +20 -0
  131. package/dist/core/context/storage-authority.d.ts.map +1 -0
  132. package/dist/core/context/storage-authority.js +51 -0
  133. package/dist/core/context/storage-authority.js.map +1 -0
  134. package/dist/core/context/tool-output-packer.d.ts +75 -0
  135. package/dist/core/context/tool-output-packer.d.ts.map +1 -0
  136. package/dist/core/context/tool-output-packer.js +77 -0
  137. package/dist/core/context/tool-output-packer.js.map +1 -0
  138. package/dist/core/context-gc.d.ts +13 -0
  139. package/dist/core/context-gc.d.ts.map +1 -1
  140. package/dist/core/context-gc.js +6 -0
  141. package/dist/core/context-gc.js.map +1 -1
  142. package/dist/core/cost/session-usage.d.ts +20 -0
  143. package/dist/core/cost/session-usage.d.ts.map +1 -0
  144. package/dist/core/cost/session-usage.js +164 -0
  145. package/dist/core/cost/session-usage.js.map +1 -0
  146. package/dist/core/delegation/session-worker-result.d.ts +10 -0
  147. package/dist/core/delegation/session-worker-result.d.ts.map +1 -0
  148. package/dist/core/delegation/session-worker-result.js +36 -0
  149. package/dist/core/delegation/session-worker-result.js.map +1 -0
  150. package/dist/core/delegation/worker-result.d.ts +9 -0
  151. package/dist/core/delegation/worker-result.d.ts.map +1 -0
  152. package/dist/core/delegation/worker-result.js +152 -0
  153. package/dist/core/delegation/worker-result.js.map +1 -0
  154. package/dist/core/delegation/worker-runner.d.ts +58 -0
  155. package/dist/core/delegation/worker-runner.d.ts.map +1 -0
  156. package/dist/core/delegation/worker-runner.js +188 -0
  157. package/dist/core/delegation/worker-runner.js.map +1 -0
  158. package/dist/core/extensions/builtin.d.ts +5 -1
  159. package/dist/core/extensions/builtin.d.ts.map +1 -1
  160. package/dist/core/extensions/builtin.js +23 -1
  161. package/dist/core/extensions/builtin.js.map +1 -1
  162. package/dist/core/footer-data-provider.d.ts +5 -1
  163. package/dist/core/footer-data-provider.d.ts.map +1 -1
  164. package/dist/core/footer-data-provider.js +13 -0
  165. package/dist/core/footer-data-provider.js.map +1 -1
  166. package/dist/core/goals/goal-continuation-controller.d.ts +22 -0
  167. package/dist/core/goals/goal-continuation-controller.d.ts.map +1 -0
  168. package/dist/core/goals/goal-continuation-controller.js +88 -0
  169. package/dist/core/goals/goal-continuation-controller.js.map +1 -0
  170. package/dist/core/goals/goal-continuation-defaults.d.ts +10 -0
  171. package/dist/core/goals/goal-continuation-defaults.d.ts.map +1 -0
  172. package/dist/core/goals/goal-continuation-defaults.js +10 -0
  173. package/dist/core/goals/goal-continuation-defaults.js.map +1 -0
  174. package/dist/core/goals/goal-continuation-prompt.d.ts +18 -0
  175. package/dist/core/goals/goal-continuation-prompt.d.ts.map +1 -0
  176. package/dist/core/goals/goal-continuation-prompt.js +141 -0
  177. package/dist/core/goals/goal-continuation-prompt.js.map +1 -0
  178. package/dist/core/goals/goal-runtime-snapshot.d.ts +19 -0
  179. package/dist/core/goals/goal-runtime-snapshot.d.ts.map +1 -0
  180. package/dist/core/goals/goal-runtime-snapshot.js +23 -0
  181. package/dist/core/goals/goal-runtime-snapshot.js.map +1 -0
  182. package/dist/core/goals/goal-state.d.ts +87 -0
  183. package/dist/core/goals/goal-state.d.ts.map +1 -0
  184. package/dist/core/goals/goal-state.js +259 -0
  185. package/dist/core/goals/goal-state.js.map +1 -0
  186. package/dist/core/goals/goal-tool-core.d.ts +66 -0
  187. package/dist/core/goals/goal-tool-core.d.ts.map +1 -0
  188. package/dist/core/goals/goal-tool-core.js +146 -0
  189. package/dist/core/goals/goal-tool-core.js.map +1 -0
  190. package/dist/core/goals/session-goal-state.d.ts +10 -0
  191. package/dist/core/goals/session-goal-state.d.ts.map +1 -0
  192. package/dist/core/goals/session-goal-state.js +35 -0
  193. package/dist/core/goals/session-goal-state.js.map +1 -0
  194. package/dist/core/learning/learning-audit.d.ts +45 -0
  195. package/dist/core/learning/learning-audit.d.ts.map +1 -0
  196. package/dist/core/learning/learning-audit.js +139 -0
  197. package/dist/core/learning/learning-audit.js.map +1 -0
  198. package/dist/core/learning/learning-gate.d.ts +29 -0
  199. package/dist/core/learning/learning-gate.d.ts.map +1 -0
  200. package/dist/core/learning/learning-gate.js +150 -0
  201. package/dist/core/learning/learning-gate.js.map +1 -0
  202. package/dist/core/learning/session-learning-decision.d.ts +10 -0
  203. package/dist/core/learning/session-learning-decision.d.ts.map +1 -0
  204. package/dist/core/learning/session-learning-decision.js +36 -0
  205. package/dist/core/learning/session-learning-decision.js.map +1 -0
  206. package/dist/core/model-capability.d.ts +41 -0
  207. package/dist/core/model-capability.d.ts.map +1 -0
  208. package/dist/core/model-capability.js +101 -0
  209. package/dist/core/model-capability.js.map +1 -0
  210. package/dist/core/model-router/config-diagnostics.d.ts.map +1 -1
  211. package/dist/core/model-router/config-diagnostics.js +1 -0
  212. package/dist/core/model-router/config-diagnostics.js.map +1 -1
  213. package/dist/core/model-router/intent-classifier.d.ts +2 -0
  214. package/dist/core/model-router/intent-classifier.d.ts.map +1 -1
  215. package/dist/core/model-router/intent-classifier.js +154 -9
  216. package/dist/core/model-router/intent-classifier.js.map +1 -1
  217. package/dist/core/model-router/route-judge.d.ts +54 -0
  218. package/dist/core/model-router/route-judge.d.ts.map +1 -0
  219. package/dist/core/model-router/route-judge.js +128 -0
  220. package/dist/core/model-router/route-judge.js.map +1 -0
  221. package/dist/core/model-router/status.d.ts +4 -1
  222. package/dist/core/model-router/status.d.ts.map +1 -1
  223. package/dist/core/model-router/status.js +30 -6
  224. package/dist/core/model-router/status.js.map +1 -1
  225. package/dist/core/model-router/tool-escalation.d.ts +4 -6
  226. package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
  227. package/dist/core/model-router/tool-escalation.js +1 -1
  228. package/dist/core/model-router/tool-escalation.js.map +1 -1
  229. package/dist/core/models/fitness-store.d.ts +40 -0
  230. package/dist/core/models/fitness-store.d.ts.map +1 -0
  231. package/dist/core/models/fitness-store.js +61 -0
  232. package/dist/core/models/fitness-store.js.map +1 -0
  233. package/dist/core/profile-registry.d.ts.map +1 -1
  234. package/dist/core/profile-registry.js +1 -1
  235. package/dist/core/profile-registry.js.map +1 -1
  236. package/dist/core/prompt-templates.d.ts +2 -0
  237. package/dist/core/prompt-templates.d.ts.map +1 -1
  238. package/dist/core/prompt-templates.js +12 -4
  239. package/dist/core/prompt-templates.js.map +1 -1
  240. package/dist/core/research/automata-provider.d.ts +5 -0
  241. package/dist/core/research/automata-provider.d.ts.map +1 -0
  242. package/dist/core/research/automata-provider.js +15 -0
  243. package/dist/core/research/automata-provider.js.map +1 -0
  244. package/dist/core/research/evidence-bundle.d.ts +10 -0
  245. package/dist/core/research/evidence-bundle.d.ts.map +1 -0
  246. package/dist/core/research/evidence-bundle.js +116 -0
  247. package/dist/core/research/evidence-bundle.js.map +1 -0
  248. package/dist/core/research/model-fitness.d.ts +82 -0
  249. package/dist/core/research/model-fitness.d.ts.map +1 -0
  250. package/dist/core/research/model-fitness.js +308 -0
  251. package/dist/core/research/model-fitness.js.map +1 -0
  252. package/dist/core/research/research-gate.d.ts +11 -0
  253. package/dist/core/research/research-gate.d.ts.map +1 -0
  254. package/dist/core/research/research-gate.js +82 -0
  255. package/dist/core/research/research-gate.js.map +1 -0
  256. package/dist/core/research/research-runner.d.ts +59 -0
  257. package/dist/core/research/research-runner.d.ts.map +1 -0
  258. package/dist/core/research/research-runner.js +155 -0
  259. package/dist/core/research/research-runner.js.map +1 -0
  260. package/dist/core/research/session-evidence-bundle.d.ts +11 -0
  261. package/dist/core/research/session-evidence-bundle.d.ts.map +1 -0
  262. package/dist/core/research/session-evidence-bundle.js +55 -0
  263. package/dist/core/research/session-evidence-bundle.js.map +1 -0
  264. package/dist/core/resource-loader.d.ts.map +1 -1
  265. package/dist/core/resource-loader.js +4 -0
  266. package/dist/core/resource-loader.js.map +1 -1
  267. package/dist/core/settings-manager.d.ts +160 -4
  268. package/dist/core/settings-manager.d.ts.map +1 -1
  269. package/dist/core/settings-manager.js +304 -9
  270. package/dist/core/settings-manager.js.map +1 -1
  271. package/dist/core/skills.d.ts +4 -0
  272. package/dist/core/skills.d.ts.map +1 -1
  273. package/dist/core/skills.js +18 -6
  274. package/dist/core/skills.js.map +1 -1
  275. package/dist/core/slash-commands.d.ts.map +1 -1
  276. package/dist/core/slash-commands.js +10 -1
  277. package/dist/core/slash-commands.js.map +1 -1
  278. package/dist/core/toolkit/script-registry.d.ts +34 -0
  279. package/dist/core/toolkit/script-registry.d.ts.map +1 -0
  280. package/dist/core/toolkit/script-registry.js +71 -0
  281. package/dist/core/toolkit/script-registry.js.map +1 -0
  282. package/dist/core/toolkit/script-runner.d.ts +28 -0
  283. package/dist/core/toolkit/script-runner.d.ts.map +1 -0
  284. package/dist/core/toolkit/script-runner.js +48 -0
  285. package/dist/core/toolkit/script-runner.js.map +1 -0
  286. package/dist/core/tools/artifact-retrieve.d.ts +23 -0
  287. package/dist/core/tools/artifact-retrieve.d.ts.map +1 -0
  288. package/dist/core/tools/artifact-retrieve.js +110 -0
  289. package/dist/core/tools/artifact-retrieve.js.map +1 -0
  290. package/dist/core/tools/delegate.d.ts +32 -0
  291. package/dist/core/tools/delegate.d.ts.map +1 -0
  292. package/dist/core/tools/delegate.js +60 -0
  293. package/dist/core/tools/delegate.js.map +1 -0
  294. package/dist/core/tools/fff-search-backend.d.ts +103 -0
  295. package/dist/core/tools/fff-search-backend.d.ts.map +1 -0
  296. package/dist/core/tools/fff-search-backend.js +151 -0
  297. package/dist/core/tools/fff-search-backend.js.map +1 -0
  298. package/dist/core/tools/find.d.ts +21 -1
  299. package/dist/core/tools/find.d.ts.map +1 -1
  300. package/dist/core/tools/find.js +183 -10
  301. package/dist/core/tools/find.js.map +1 -1
  302. package/dist/core/tools/goal.d.ts +35 -0
  303. package/dist/core/tools/goal.d.ts.map +1 -0
  304. package/dist/core/tools/goal.js +122 -0
  305. package/dist/core/tools/goal.js.map +1 -0
  306. package/dist/core/tools/grep.d.ts +21 -1
  307. package/dist/core/tools/grep.d.ts.map +1 -1
  308. package/dist/core/tools/grep.js +272 -27
  309. package/dist/core/tools/grep.js.map +1 -1
  310. package/dist/core/tools/index.d.ts +4 -1
  311. package/dist/core/tools/index.d.ts.map +1 -1
  312. package/dist/core/tools/index.js +9 -0
  313. package/dist/core/tools/index.js.map +1 -1
  314. package/dist/core/tools/model-fitness.d.ts +30 -0
  315. package/dist/core/tools/model-fitness.d.ts.map +1 -0
  316. package/dist/core/tools/model-fitness.js +38 -0
  317. package/dist/core/tools/model-fitness.js.map +1 -0
  318. package/dist/core/tools/run-toolkit-script.d.ts +24 -0
  319. package/dist/core/tools/run-toolkit-script.d.ts.map +1 -0
  320. package/dist/core/tools/run-toolkit-script.js +103 -0
  321. package/dist/core/tools/run-toolkit-script.js.map +1 -0
  322. package/dist/core/tools/search-router.d.ts +75 -0
  323. package/dist/core/tools/search-router.d.ts.map +1 -0
  324. package/dist/core/tools/search-router.js +85 -0
  325. package/dist/core/tools/search-router.js.map +1 -0
  326. package/dist/modes/interactive/components/fitness-role-selector.d.ts +13 -0
  327. package/dist/modes/interactive/components/fitness-role-selector.d.ts.map +1 -0
  328. package/dist/modes/interactive/components/fitness-role-selector.js +65 -0
  329. package/dist/modes/interactive/components/fitness-role-selector.js.map +1 -0
  330. package/dist/modes/interactive/components/footer.d.ts.map +1 -1
  331. package/dist/modes/interactive/components/footer.js +18 -16
  332. package/dist/modes/interactive/components/footer.js.map +1 -1
  333. package/dist/modes/interactive/components/settings-selector.d.ts +16 -1
  334. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  335. package/dist/modes/interactive/components/settings-selector.js +555 -11
  336. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  337. package/dist/modes/interactive/interactive-mode.d.ts +9 -0
  338. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  339. package/dist/modes/interactive/interactive-mode.js +308 -39
  340. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  341. package/dist/utils/tools-manager.d.ts +2 -0
  342. package/dist/utils/tools-manager.d.ts.map +1 -1
  343. package/dist/utils/tools-manager.js +154 -2
  344. package/dist/utils/tools-manager.js.map +1 -1
  345. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  346. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  347. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  348. package/examples/extensions/sandbox/package-lock.json +2 -2
  349. package/examples/extensions/sandbox/package.json +1 -1
  350. package/examples/extensions/with-deps/package-lock.json +2 -2
  351. package/examples/extensions/with-deps/package.json +1 -1
  352. package/npm-shrinkwrap.json +368 -12
  353. package/package.json +5 -4
@@ -21,12 +21,30 @@ import { stripFrontmatter } from "../utils/frontmatter.js";
21
21
  import { resolvePath } from "../utils/paths.js";
22
22
  import { sleep } from "../utils/sleep.js";
23
23
  import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth-guidance.js";
24
+ import { evaluateToolGate } from "./autonomy/gates.js";
25
+ import { LaneTracker } from "./autonomy/lane-tracker.js";
26
+ import { appendLaneRecordSnapshot, getLaneRecordSnapshots } from "./autonomy/session-lane-record.js";
27
+ import { composeSubagentSystemPrompt } from "./autonomy/subagent-prompt.js";
24
28
  import { executeBashWithOperations } from "./bash-executor.js";
25
29
  import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
30
+ // (module-scope helper for curation goal extraction defined below the imports)
31
+ import { BrainCurator } from "./context/brain-curator.js";
32
+ import { createFileArtifactStore } from "./context/context-artifacts.js";
33
+ import { runContextAudit } from "./context/context-audit.js";
34
+ import { buildContextCompositionReport, formatContextCompositionDashboard, } from "./context/context-composition.js";
35
+ import { enforcePromptPolicy } from "./context/context-prompt-enforcement.js";
36
+ import { correlateWithContextGc, planPromptPolicy, } from "./context/context-prompt-policy.js";
37
+ import { defaultMemoryPromptInclusionReport, sanitizeMemoryRetrievalReportForDiagnostics, } from "./context/memory-diagnostics.js";
38
+ import { buildMemoryPromptBlock } from "./context/memory-prompt-block.js";
39
+ import { DEFAULT_LOCAL_MEMORY_EGRESS_POLICY, } from "./context/memory-provider-contract.js";
40
+ import { retrieveMemoryForContext } from "./context/memory-retrieval.js";
41
+ import { createOkfMemoryProvider } from "./context/okf-memory-provider.js";
26
42
  import { applyContextGc } from "./context-gc.js";
27
43
  import { aggregateDailyUsageFromSessionFiles, aggregateDailyUsageFromSessionRoot, formatDailyUsageBreakdown, getLocalDayWindow, } from "./cost/daily-usage.js";
28
44
  import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
29
45
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
46
+ import { appendWorkerResultSnapshot, getWorkerResultSnapshots } from "./delegation/session-worker-result.js";
47
+ import { runWorker } from "./delegation/worker-runner.js";
30
48
  import { exportSessionToHtml } from "./export-html/index.js";
31
49
  import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
32
50
  import { createCoreDiagnosticsToolDefinitions } from "./extensions/builtin.js";
@@ -34,7 +52,13 @@ import { ExtensionRunner, wrapRegisteredTools, } from "./extensions/index.js";
34
52
  import { disposeExtensionEventSubscriptions } from "./extensions/loader.js";
35
53
  import { emitSessionShutdownEvent } from "./extensions/runner.js";
36
54
  import { GatewayRegistry } from "./gateways/channel-provider.js";
55
+ import { buildGoalContinuationPrompt, } from "./goals/goal-continuation-prompt.js";
56
+ import { buildGoalRuntimeSnapshot, } from "./goals/goal-runtime-snapshot.js";
57
+ import { appendGoalStateSnapshot, getLatestGoalStateSnapshot } from "./goals/session-goal-state.js";
58
+ import { appendLearningAuditSnapshot, getLearningAuditSnapshots, proposalFromReflectionWrite, rollbackPlanForReflectionWrite, } from "./learning/learning-audit.js";
59
+ import { evaluateLearningDecision } from "./learning/learning-gate.js";
37
60
  import { decideDemand, ReflectionEngine, } from "./learning/reflection-engine.js";
61
+ import { appendLearningDecisionSnapshot, getLearningDecisionSnapshots } from "./learning/session-learning-decision.js";
38
62
  import { isPromotedFrontmatter, SkillCurator } from "./learning/skill-curator.js";
39
63
  import { EffectivenessTracker } from "./memory/effectiveness-tracker.js";
40
64
  import { MemoryManager } from "./memory/memory-manager.js";
@@ -42,21 +66,32 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
42
66
  import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
43
67
  import { compactToolResultDetailsForRetention } from "./message-retention.js";
44
68
  import { createCustomMessage } from "./messages.js";
69
+ import { deriveModelCapabilityProfile, filterToolNamesForCapability, } from "./model-capability.js";
45
70
  import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
46
71
  import { collectModelRouterConfigDiagnostics } from "./model-router/config-diagnostics.js";
47
- import { classifyModelRouterIntent } from "./model-router/intent-classifier.js";
72
+ import { classifyModelRouterRoute } from "./model-router/intent-classifier.js";
73
+ import { ROUTE_JUDGE_MAX_OUTPUT_TOKENS, runRouteJudge } from "./model-router/route-judge.js";
48
74
  import { bufferModelRouterSessionCustomMessage, bufferModelRouterSessionMessage, createModelRouterSessionBuffer, flushModelRouterSessionBuffer, } from "./model-router/session-buffer.js";
49
75
  import { formatModelRouterStatus, getRecentModelRouterDecisions, MODEL_ROUTER_DECISION_CUSTOM_TYPE, } from "./model-router/status.js";
50
76
  import { shouldEscalateModelRouterTool } from "./model-router/tool-escalation.js";
77
+ import { FitnessStore } from "./models/fitness-store.js";
51
78
  import { expandPromptTemplate } from "./prompt-templates.js";
79
+ import { runModelFitnessProbe } from "./research/model-fitness.js";
80
+ import { runResearch } from "./research/research-runner.js";
81
+ import { appendEvidenceBundleSnapshot, getEvidenceBundleSnapshots, getLatestEvidenceBundleSnapshot, } from "./research/session-evidence-bundle.js";
52
82
  import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
53
83
  import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
54
84
  import { CURRENT_SESSION_VERSION, getLatestCompactionEntry } from "./session-manager.js";
55
85
  import { matchesResourceProfilePattern, } from "./settings-manager.js";
56
86
  import { createSyntheticSourceInfo } from "./source-info.js";
57
87
  import { buildSystemPrompt } from "./system-prompt.js";
88
+ import { executeToolkitScript } from "./toolkit/script-runner.js";
58
89
  import { createLocalBashOperations } from "./tools/bash.js";
90
+ import { createDelegateToolDefinition } from "./tools/delegate.js";
91
+ import { createGoalToolDefinition } from "./tools/goal.js";
59
92
  import { createAllToolDefinitions } from "./tools/index.js";
93
+ import { createModelFitnessToolDefinition } from "./tools/model-fitness.js";
94
+ import { createRunToolkitScriptToolDefinition } from "./tools/run-toolkit-script.js";
60
95
  import { createToolDefinitionFromAgentTool } from "./tools/tool-definition-wrapper.js";
61
96
  /**
62
97
  * Parse a skill block from message text.
@@ -75,6 +110,23 @@ export function parseSkillBlock(text) {
75
110
  }
76
111
  /** customType for spawned-usage roll-up entries (Cost Aggregation, Model A). */
77
112
  export const SPAWNED_USAGE_CUSTOM_TYPE = "spawned_usage";
113
+ /** Latest user prompt text in the provider-visible array (curation goal line; bounded by caller). */
114
+ function latestUserPromptText(messages) {
115
+ for (let index = messages.length - 1; index >= 0; index--) {
116
+ const message = messages[index];
117
+ if (!message || message.role !== "user")
118
+ continue;
119
+ if (typeof message.content === "string")
120
+ return message.content;
121
+ const text = message.content
122
+ .filter((part) => part.type === "text")
123
+ .map((part) => part.text)
124
+ .join("\n");
125
+ if (text.length > 0)
126
+ return text;
127
+ }
128
+ return "";
129
+ }
78
130
  // ============================================================================
79
131
  // Constants
80
132
  // ============================================================================
@@ -89,10 +141,46 @@ function formatModelRouterModel(model) {
89
141
  function persistModelRouterDecision(sessionManager, decision) {
90
142
  sessionManager.appendCustomEntry(MODEL_ROUTER_DECISION_CUSTOM_TYPE, decision);
91
143
  }
144
+ /** Read a packed grep/find tool result's `details.artifactId`, if present, without `any`. */
145
+ function extractArtifactId(message) {
146
+ if (!message || message.role !== "toolResult")
147
+ return undefined;
148
+ const details = message.details;
149
+ if (typeof details !== "object" || details === null)
150
+ return undefined;
151
+ const artifactId = details.artifactId;
152
+ return typeof artifactId === "string" ? artifactId : undefined;
153
+ }
154
+ /**
155
+ * Text of the most recent user message, or "" if there is none (e.g. goal-continuation
156
+ * turns with no new user input). An empty query degrades to zero memory-retrieval results
157
+ * by construction (see memory-provider-contract.ts's score-on-empty-query-tokens rule) --
158
+ * no special-casing needed here beyond returning "".
159
+ */
160
+ function latestUserMessageText(messages) {
161
+ for (let index = messages.length - 1; index >= 0; index--) {
162
+ const message = messages[index];
163
+ if (message.role !== "user")
164
+ continue;
165
+ if (typeof message.content === "string")
166
+ return message.content;
167
+ const parts = [];
168
+ for (const part of message.content) {
169
+ if (part.type === "text")
170
+ parts.push(part.text);
171
+ }
172
+ return parts.join("\n");
173
+ }
174
+ return "";
175
+ }
176
+ function emptyMemoryRetrievalReport(maxResults) {
177
+ return { request: { query: "", maxResults }, providerReports: [], results: [], contextItems: [] };
178
+ }
92
179
  export class AgentSession {
93
180
  agent;
94
181
  sessionManager;
95
182
  settingsManager;
183
+ capabilityEnvelope;
96
184
  _scopedModels;
97
185
  // Event subscription state
98
186
  _unsubscribeAgent;
@@ -108,11 +196,46 @@ export class AgentSession {
108
196
  _pendingNextTurnMessages = [];
109
197
  /** Serializes prompt() submissions made while streaming so queued steering/follow-ups keep user-typed FIFO order. */
110
198
  _streamingPromptSubmissionTail = Promise.resolve();
199
+ /** Pending idle timer that starts bounded goal continuation after the session becomes idle. */
200
+ _goalAutoContinueTimer;
201
+ /** Guards bounded idle autosteer so continuation prompts do not recursively trigger themselves. */
202
+ _isGoalAutoContinuing = false;
203
+ /** Pending idle timer that starts an autonomous research pass after the session becomes idle. */
204
+ _researchLaneTimer;
205
+ /** Single-flight guard: at most one research pass runs at a time per session. */
206
+ _isResearchLaneRunning = false;
207
+ /** Why the last idle research-lane evaluation skipped, for /autonomy diagnostics. */
208
+ _lastResearchLaneSkipReason;
209
+ /** Live lane registry — the real source for AutonomyStatusSnapshot.activeLaneCount. */
210
+ _laneTracker = new LaneTracker();
211
+ /** Session-lifetime abort for in-flight research passes (same pattern as _reflectionAbort). */
212
+ _researchLaneAbort = new AbortController();
213
+ /** Single-flight guard: at most one delegated worker runs at a time per session. */
214
+ _isWorkerDelegationRunning = false;
215
+ /** Session-lifetime abort for in-flight delegated workers. */
216
+ _workerDelegationAbort = new AbortController();
217
+ /**
218
+ * The last tool set requested via setActiveToolsByName BEFORE model-capability filtering, so
219
+ * switching from a small-window model back to a large one restores the full requested set.
220
+ */
221
+ _requestedActiveToolNames;
111
222
  // Compaction/context hygiene state
112
223
  _compactionAbortController = undefined;
113
224
  _autoCompactionAbortController = undefined;
114
225
  _overflowRecoveryAttempted = false;
115
226
  _latestContextGcReport = undefined;
227
+ /** Brain-curation sidecar (design: brain-context-curation-design.md). Inert unless the
228
+ * contextPolicy.curation setting is enabled AND the model passes the digest fitness gate. */
229
+ _brainCurator = new BrainCurator();
230
+ _lastCurationSkipReason = undefined;
231
+ _toolArtifactStore = undefined;
232
+ _latestContextAuditReport = undefined;
233
+ _latestPromptPolicyReport = undefined;
234
+ _latestPromptPolicyGcCorrelation = undefined;
235
+ _latestPromptEnforcementReport = undefined;
236
+ _memoryOkfProvider = undefined;
237
+ _latestMemoryRetrievalReport = undefined;
238
+ _latestMemoryPromptInclusionReport = undefined;
116
239
  // Branch summarization state
117
240
  _branchSummaryAbortController = undefined;
118
241
  // Retry state
@@ -151,10 +274,12 @@ export class AgentSession {
151
274
  _costGuardDowngraded = false;
152
275
  /** Active model-router intent for the current transient routed turn, if any. */
153
276
  _activeModelRouterIntent;
277
+ _activeModelRouterRoute;
154
278
  _modelRouterSessionBuffer;
155
279
  _modelRouterEscalationRequested = false;
156
280
  _isModelRouterRetry = false;
157
281
  _lastModelRouterDecision;
282
+ _lastAutonomyGateOutcome;
158
283
  _lastModelRouterSkipReason;
159
284
  _lastModelRouterIntent;
160
285
  /** Lazily-built skill curator (#32) over `<agentDir>/skills`. */
@@ -317,7 +442,21 @@ export class AgentSession {
317
442
  if (this._extensionRunner.hasHandlers("context")) {
318
443
  finalMessages = await this._extensionRunner.emitContext(currentMessages);
319
444
  }
320
- const gcMessages = this._applyContextGc(finalMessages, true).messages;
445
+ const auditReport = this._runContextAudit(finalMessages);
446
+ const shadowReport = this._runPromptPolicyPlanning(auditReport);
447
+ const memoryReport = await this._runMemoryRetrieval(finalMessages);
448
+ const gcResult = this._applyContextGc(finalMessages, true);
449
+ this._correlatePromptPolicyWithContextGc(gcResult.report);
450
+ const enforcementResult = this._runPromptEnforcement(gcResult.messages, shadowReport);
451
+ this._enqueueRelevanceCuration(gcResult.messages, shadowReport);
452
+ // Fire-and-forget: the local curator overlaps the frontier call; it never blocks a turn.
453
+ this._maybeDrainBrainCuration();
454
+ // Appended LAST, after gc and enforcement, so the bounded evidence block is
455
+ // never packed/stubbed/reshaped by either pass and always reflects this turn's
456
+ // fresh retrieval. Because nothing downstream trims it, memory-prompt-block.ts's
457
+ // character caps are the only budget protection for this block -- load-bearing,
458
+ // not merely defensive.
459
+ const gcMessages = this._maybeAppendMemoryEvidenceBlock(enforcementResult.messages, memoryReport);
321
460
  this._applyCostGuard(gcMessages);
322
461
  return gcMessages;
323
462
  };
@@ -422,15 +561,601 @@ export class AgentSession {
422
561
  _contextGcStorageDir() {
423
562
  return join(this._agentDir, "context-gc", this.sessionManager.getSessionId());
424
563
  }
564
+ _toolArtifactsDir() {
565
+ return join(this._agentDir, "context-artifacts", this.sessionManager.getSessionId());
566
+ }
567
+ /**
568
+ * Session-scoped, filesystem-backed artifact store for first-capture-then-bound tool
569
+ * output (grep/find only, for now -- see tool-output-artifacts.md). Lazily created and
570
+ * cached so every tool construction in this session shares one store instance.
571
+ *
572
+ * `packToolOutput()` registers a reference (the packing tool call's id) at pack time
573
+ * and fails closed, so packed artifacts are never prematurely collected.
574
+ * `_releaseGcPackedArtifactReferences()` (called from `_applyContextGc()`) releases
575
+ * that reference once context-gc packs the result out of live context, and
576
+ * opportunistically reclaims now-unreferenced artifacts via `cleanup()`.
577
+ * Remaining carry-forward gap: cleanup() now also runs at dispose(), but only reclaims
578
+ * already-released (zero-reference) artifacts. A session that ends before context-gc
579
+ * ever evicts a result never releases that reference, so its artifact stays on disk by
580
+ * design (resolvable on resume). Reclaiming those requires an explicit cross-session
581
+ * expiry/liveness policy, not just a sweep.
582
+ */
583
+ _getToolArtifactStore() {
584
+ this._toolArtifactStore ??= createFileArtifactStore({ baseDir: this._toolArtifactsDir() });
585
+ return this._toolArtifactStore;
586
+ }
587
+ /**
588
+ * Fixed path for this slice's local Pi OKF memory documents, shared across sessions
589
+ * under this agentDir (not session-scoped, unlike tool-artifacts/context-gc, since OKF
590
+ * memory represents durable cross-session knowledge, not a per-session capture). Not
591
+ * yet user-configurable -- see the memory-retrieval settings doc comment.
592
+ */
593
+ _memoryOkfDir() {
594
+ return join(this._agentDir, "okf-memory");
595
+ }
596
+ /**
597
+ * Session-scoped, read-only local OKF memory provider. Lazily created ONLY when memory
598
+ * retrieval is enabled (see `_runMemoryRetrieval`) -- never force-created, so a session
599
+ * with the setting off never touches `_memoryOkfDir()` at all (no directory access, no
600
+ * creation; `createOkfMemoryProvider` itself never writes/mkdirs either way).
601
+ */
602
+ _getMemoryOkfProvider() {
603
+ this._memoryOkfProvider ??= createOkfMemoryProvider({ rootDir: this._memoryOkfDir() });
604
+ return this._memoryOkfProvider;
605
+ }
606
+ /**
607
+ * One pass over the current branch, mapping each toolResult's toolCallId to its
608
+ * persisted session-entry id. Rebuilt every audit pass (O(branch) per turn), so this is
609
+ * O(n^2) over a long session. Fine at current scale; after the artifact-read fix this is
610
+ * the next per-turn audit cost to optimize if it ever matters (e.g. cache/incrementally
611
+ * update instead of a full rebuild).
612
+ */
613
+ _buildSessionEntryIdLookup() {
614
+ const map = new Map();
615
+ for (const entry of this.sessionManager.getBranch()) {
616
+ if (entry.type === "message" && entry.message.role === "toolResult") {
617
+ map.set(entry.message.toolCallId, entry.id);
618
+ }
619
+ }
620
+ return (toolCallId) => map.get(toolCallId);
621
+ }
622
+ /**
623
+ * Phase 1 observe-only audit pass (see context/context-audit.ts): converts live
624
+ * toolResult messages into ContextItems and runs the existing retention/hard-constraint
625
+ * evaluators over them, storing the latest deterministic report for tests/debugging.
626
+ * Read-only with respect to messages, the transcript, and artifact references -- uses
627
+ * `_toolArtifactStore` (the field), not `_getToolArtifactStore()` (the getter), so a
628
+ * session that never packed anything doesn't force-create a store/dir just to audit.
629
+ * Never throws into a live turn: any failure degrades to an empty report.
630
+ */
631
+ _runContextAudit(messages) {
632
+ try {
633
+ const report = runContextAudit(messages, {
634
+ turnIndex: this._turnIndex,
635
+ artifactStore: this._toolArtifactStore,
636
+ sessionEntryIdForToolCallId: this._buildSessionEntryIdLookup(),
637
+ });
638
+ this._latestContextAuditReport = report;
639
+ return report;
640
+ }
641
+ catch {
642
+ const report = { turnIndex: this._turnIndex, items: [] };
643
+ this._latestContextAuditReport = report;
644
+ return report;
645
+ }
646
+ }
647
+ /**
648
+ * Read-only inspection of the context audit. With `messages`, recomputes fresh against
649
+ * the given array (still no mutation of messages/transcript/artifact refs); without,
650
+ * returns the last report computed during a real transform pass.
651
+ */
652
+ getContextAuditReport(messages) {
653
+ if (messages)
654
+ return this._runContextAudit(messages);
655
+ return this._latestContextAuditReport ?? { turnIndex: this._turnIndex, items: [] };
656
+ }
657
+ /**
658
+ * Observe-first shadow/planning pass (see context/context-prompt-policy.ts): re-shapes
659
+ * the audit report into a per-item policy plan whose `appliedAction` is always
660
+ * "keep_raw" -- this never enforces anything, it only records what the policy engine
661
+ * would say. Never throws into a live turn: any failure degrades to an empty report.
662
+ */
663
+ _runPromptPolicyPlanning(auditReport) {
664
+ try {
665
+ const report = planPromptPolicy(auditReport);
666
+ this._latestPromptPolicyReport = report;
667
+ return report;
668
+ }
669
+ catch {
670
+ const report = { turnIndex: this._turnIndex, items: [] };
671
+ this._latestPromptPolicyReport = report;
672
+ return report;
673
+ }
674
+ }
675
+ /**
676
+ * Read-only inspection of the shadow policy plan. With `messages`, recomputes fresh
677
+ * (audit + plan) against the given array; without, returns the last plan computed
678
+ * during a real transform pass. Never mutates messages/transcript/artifact refs.
679
+ */
680
+ getPromptPolicyReport(messages) {
681
+ if (messages)
682
+ return this._runPromptPolicyPlanning(this._runContextAudit(messages));
683
+ return this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
684
+ }
685
+ /**
686
+ * Report-only correlation between the shadow plan just computed this turn and what the
687
+ * legacy context-gc pass actually packed. Runs after `_applyContextGc()` has already
688
+ * produced its report; never influences context-gc itself. Never throws into a live
689
+ * turn: any failure degrades to an empty correlation.
690
+ */
691
+ _correlatePromptPolicyWithContextGc(gcReport) {
692
+ const shadowReport = this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
693
+ try {
694
+ this._latestPromptPolicyGcCorrelation = correlateWithContextGc(shadowReport, gcReport);
695
+ }
696
+ catch {
697
+ this._latestPromptPolicyGcCorrelation = { turnIndex: this._turnIndex, entries: [] };
698
+ }
699
+ }
700
+ /** Read-only inspection of the latest shadow-plan/legacy-gc correlation, for tests/debugging. */
701
+ getPromptPolicyGcCorrelation() {
702
+ return this._latestPromptPolicyGcCorrelation ?? { turnIndex: this._turnIndex, entries: [] };
703
+ }
704
+ /**
705
+ * First enforcement pilot (see context/context-prompt-enforcement.ts): opt-in,
706
+ * default-disabled stub-in-place of stale artifact-backed tool_output results in the
707
+ * provider-visible message array only. Runs on `messages` AFTER context-gc has already
708
+ * produced its own result, so legacy context-gc's own packing/reporting is completely
709
+ * unaffected by this pass -- it only ever acts on messages gc left untouched this turn.
710
+ * Never throws into a live turn: any failure degrades to returning `messages` unchanged.
711
+ */
712
+ _runPromptEnforcement(messages, shadowReport) {
713
+ try {
714
+ const persistedSettings = this.settingsManager.getContextPromptEnforcementSettings();
715
+ const curationEnabled = this.settingsManager.getContextCurationSettings().enabled;
716
+ const settings = {
717
+ ...persistedSettings,
718
+ // Runtime fact, never assumed: artifact_retrieve is a companion affordance
719
+ // (auto-activated alongside grep/find), not a default/global tool, so active
720
+ // tools can differ turn to turn -- see context-prompt-enforcement.ts's doc
721
+ // comment on why this is checked separately from hasAvailableRetrievalPath.
722
+ retrievalToolAvailable: this.getActiveToolNames().includes("artifact_retrieve"),
723
+ brainRelevance: curationEnabled ? (itemId) => this._brainCurator.getRelevance(itemId) : undefined,
724
+ };
725
+ const result = enforcePromptPolicy(messages, shadowReport, settings);
726
+ this._latestPromptEnforcementReport = result.report;
727
+ return result;
728
+ }
729
+ catch {
730
+ const report = { turnIndex: this._turnIndex, items: [] };
731
+ this._latestPromptEnforcementReport = report;
732
+ return { messages, report };
733
+ }
734
+ }
735
+ /**
736
+ * Enqueue relevance-scoring jobs for stale, artifact-backed tool outputs the enforcement
737
+ * pilot could act on. Pure queueing — the verdicts only ever take effect through the
738
+ * asymmetric advisory lever inside enforcePromptPolicy. Never throws into a turn.
739
+ */
740
+ _enqueueRelevanceCuration(messages, shadowReport) {
741
+ try {
742
+ const settings = this.settingsManager.getContextCurationSettings();
743
+ if (!settings.enabled)
744
+ return;
745
+ const goal = latestUserPromptText(messages).slice(0, 400);
746
+ for (const item of shadowReport.items) {
747
+ if (!item.hasAvailableRetrievalPath)
748
+ continue;
749
+ const message = messages[item.messageIndex];
750
+ if (!message || message.role !== "toolResult" || message.toolCallId !== item.toolCallId)
751
+ continue;
752
+ if (message.isError)
753
+ continue;
754
+ const details = message.details;
755
+ if (details?.contextGc?.packed === true || details?.promptPolicy?.enforced === true)
756
+ continue;
757
+ const text = message.content
758
+ .filter((part) => part.type === "text")
759
+ .map((part) => part.text)
760
+ .join("\n");
761
+ if (text.length === 0)
762
+ continue;
763
+ this._brainCurator.enqueue({ kind: "relevance", key: item.itemId, content: text.slice(0, 4000), goal });
764
+ }
765
+ }
766
+ catch {
767
+ // curation is a sidecar; it must never disrupt a turn
768
+ }
769
+ }
770
+ /**
771
+ * Drain gate: settings on, model configured+authed, and the model has PASSED the digest
772
+ * fitness probe on THIS host (design: unfit or unprobed models are refused with a visible
773
+ * reason, never silently degraded). Fire-and-forget; never throws into a turn.
774
+ */
775
+ _maybeDrainBrainCuration() {
776
+ try {
777
+ const settings = this.settingsManager.getContextCurationSettings();
778
+ if (!settings.enabled) {
779
+ // Never surface a stale refusal reason for a feature the user has since disabled.
780
+ this._lastCurationSkipReason = undefined;
781
+ return;
782
+ }
783
+ if (!this._brainCurator.hasWork() || this._brainCurator.isDraining)
784
+ return;
785
+ if (!settings.model) {
786
+ this._lastCurationSkipReason = "curation_model_unset";
787
+ return;
788
+ }
789
+ const resolved = resolveCliModel({ cliModel: settings.model, modelRegistry: this._modelRegistry });
790
+ if (!resolved.model || !this._modelRegistry.hasConfiguredAuth(resolved.model)) {
791
+ this._lastCurationSkipReason = "curation_model_unresolved";
792
+ return;
793
+ }
794
+ // Match on the CANONICAL "provider/id" ref — runModelFitness stores reports under it,
795
+ // while settings.model may be a bare id or pattern; comparing raw strings would refuse
796
+ // forever with curation_model_unprobed even after a successful probe.
797
+ const canonicalRef = `${resolved.model.provider}/${resolved.model.id}`;
798
+ const fitness = FitnessStore.forAgentDir(this._agentDir)
799
+ .getForHost()
800
+ .find((entry) => entry.model === canonicalRef);
801
+ const digestScore = fitness?.report.digest;
802
+ if (!digestScore) {
803
+ this._lastCurationSkipReason = "curation_model_unprobed";
804
+ return;
805
+ }
806
+ if (digestScore.succeeded < Math.ceil(digestScore.total * (2 / 3))) {
807
+ this._lastCurationSkipReason = "curation_model_digest_unfit";
808
+ return;
809
+ }
810
+ this._lastCurationSkipReason = undefined;
811
+ void this._drainBrainCuration(resolved.model, settings.maxJobsPerTurn);
812
+ }
813
+ catch {
814
+ // curation is a sidecar; it must never disrupt a turn
815
+ }
816
+ }
817
+ async _drainBrainCuration(model, maxJobs) {
818
+ try {
819
+ // ACCUMULATE across all drained jobs (the drain runs the completer once PER job) —
820
+ // keeping only the last job's usage would under-report every multi-job drain.
821
+ let spentUsage;
822
+ const results = await this._brainCurator.drain({
823
+ maxJobs,
824
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
825
+ const completion = await this.runIsolatedCompletion({
826
+ systemPrompt,
827
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
828
+ model,
829
+ thinkingLevel: "off",
830
+ maxTokens: 256,
831
+ signal,
832
+ // Both curation system prompts are static — the provider can cache the prefix.
833
+ cacheRetention: "short",
834
+ });
835
+ const usage = completion.usage;
836
+ if (!spentUsage) {
837
+ spentUsage = structuredClone(usage);
838
+ }
839
+ else {
840
+ spentUsage.input += usage.input;
841
+ spentUsage.output += usage.output;
842
+ spentUsage.cacheRead += usage.cacheRead;
843
+ spentUsage.cacheWrite += usage.cacheWrite;
844
+ spentUsage.totalTokens += usage.totalTokens;
845
+ spentUsage.cost.input += usage.cost.input;
846
+ spentUsage.cost.output += usage.cost.output;
847
+ spentUsage.cost.cacheRead += usage.cost.cacheRead;
848
+ spentUsage.cost.cacheWrite += usage.cost.cacheWrite;
849
+ spentUsage.cost.total += usage.cost.total;
850
+ }
851
+ return {
852
+ text: completion.text,
853
+ costUsd: completion.usage.cost.total,
854
+ stopReason: String(completion.stopReason),
855
+ };
856
+ },
857
+ });
858
+ // Honest accounting even for free local models: token visibility is the contract.
859
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
860
+ this.addSpawnedUsage(spentUsage, { label: "context-curator" });
861
+ }
862
+ if (this._disposed || results.length === 0)
863
+ return;
864
+ this.sessionManager.appendCustomEntry("brain-curation", {
865
+ version: 1,
866
+ results: results.map((result) => ({
867
+ key: result.key,
868
+ kind: result.kind,
869
+ ok: result.ok,
870
+ ms: result.ms,
871
+ ...(result.digest !== undefined ? { digest: result.digest } : {}),
872
+ ...(result.relevant !== undefined ? { relevant: result.relevant, confidence: result.confidence } : {}),
873
+ })),
874
+ telemetry: this._brainCurator.telemetry(),
875
+ });
876
+ }
877
+ catch {
878
+ // curation is a sidecar; it must never disrupt a turn
879
+ }
880
+ }
881
+ /**
882
+ * Context composition dashboard data: decomposes the per-request payload (system prompt, tool
883
+ * schemas, extension contributions, message classes incl. GC/policy stubs and recall pages)
884
+ * plus background spend, so users can see exactly what their integrations cost per request.
885
+ * Read-only: uses the GC report path (writePayloads=false), never mutates anything.
886
+ */
887
+ getContextCompositionReport() {
888
+ const rawMessages = this.agent.state.messages.slice();
889
+ const gcResult = this._applyContextGc(rawMessages, false);
890
+ const activeNames = new Set(this.getActiveToolNames());
891
+ const extensions = this._resourceLoader.getExtensions().extensions;
892
+ const extensionToolNames = new Set(extensions.flatMap((extension) => [...extension.tools.keys()]));
893
+ const usage = this.getContextUsage();
894
+ const enforcementItems = this.getPromptEnforcementReport().items;
895
+ const curationStatus = this.getContextCurationStatus();
896
+ const spawned = this.getSpawnedUsage();
897
+ const promptInclusion = this.getMemoryPromptInclusionReport();
898
+ const memoryEvidenceTokens = promptInclusion.status === "included" ? Math.ceil(promptInclusion.blockChars / 4) : 0;
899
+ // Enforcement stubs are applied at SEND time (not persisted), so the message view here
900
+ // still holds raw text for them; subtract what stubbing reclaims per request.
901
+ const enforcementSavedTokens = enforcementItems
902
+ .filter((item) => item.enforced && typeof item.originalChars === "number")
903
+ .reduce((sum, item) => sum + Math.max(0, Math.ceil((item.originalChars ?? 0) / 4) - 50), 0);
904
+ return buildContextCompositionReport({
905
+ systemPrompt: this.systemPrompt ?? "",
906
+ tools: this.getAllTools()
907
+ .filter((tool) => activeNames.has(tool.name))
908
+ .map((tool) => ({
909
+ name: tool.name,
910
+ description: tool.description,
911
+ parameters: tool.parameters,
912
+ source: extensionToolNames.has(tool.name) ? "extension" : "built-in",
913
+ })),
914
+ extensions: extensions.map((extension) => ({
915
+ name: basename(extension.path),
916
+ path: extension.path,
917
+ toolNames: [...extension.tools.keys()],
918
+ commandCount: extension.commands.size,
919
+ })),
920
+ messages: gcResult.messages,
921
+ providerReportedTokens: usage?.tokens ?? null,
922
+ contextWindow: usage?.contextWindow ?? this.model?.contextWindow ?? null,
923
+ gc: { packedCount: gcResult.report.packedCount, savedTokens: gcResult.report.savedTokens },
924
+ enforcement: {
925
+ enforcedCount: enforcementItems.filter((item) => item.enforced).length,
926
+ advisoryEvictions: enforcementItems.filter((item) => item.advisory === "brain_irrelevant").length,
927
+ },
928
+ curation: {
929
+ enabled: curationStatus.enabled,
930
+ telemetry: curationStatus.telemetry,
931
+ lastSkipReason: curationStatus.lastSkipReason,
932
+ },
933
+ spawned: { cost: spawned.cost, reports: spawned.reports },
934
+ adjustments: { memoryEvidenceTokens, enforcementSavedTokens },
935
+ });
936
+ }
937
+ /** Bounded plain-text rendering of {@link getContextCompositionReport} for the /context command. */
938
+ formatContextCompositionDashboard() {
939
+ return formatContextCompositionDashboard(this.getContextCompositionReport());
940
+ }
941
+ /** Curation status for diagnostics/dashboard: settings, live telemetry, last refusal reason. */
942
+ getContextCurationStatus() {
943
+ const settings = this.settingsManager.getContextCurationSettings();
944
+ return {
945
+ enabled: settings.enabled,
946
+ model: settings.model,
947
+ telemetry: this._brainCurator.telemetry(),
948
+ lastSkipReason: this._lastCurationSkipReason,
949
+ };
950
+ }
951
+ /** Read-only inspection of the latest prompt-enforcement report, for tests/debugging. */
952
+ getPromptEnforcementReport() {
953
+ return this._latestPromptEnforcementReport ?? { turnIndex: this._turnIndex, items: [] };
954
+ }
955
+ /**
956
+ * Observe-only local memory retrieval (see context/memory-retrieval.ts and
957
+ * context/okf-memory-provider.ts): default disabled, opt-in setting. When disabled,
958
+ * never constructs the OKF provider (no directory access under `_memoryOkfDir()` at
959
+ * all) and returns an empty report -- fully fail-closed. When enabled, queries the
960
+ * local, read-only OKF provider with the latest user message text (empty if there is
961
+ * none, e.g. a goal-continuation turn -- degrades to zero results by construction, see
962
+ * `latestUserMessageText`'s doc comment) under `DEFAULT_LOCAL_MEMORY_EGRESS_POLICY`.
963
+ * Retrieved items are only ever stored in the report; nothing here touches `messages`,
964
+ * the transcript, or the provider-visible prompt. Never throws into a live turn: any
965
+ * failure (including a provider search error) degrades to an empty report.
966
+ */
967
+ async _runMemoryRetrieval(messages) {
968
+ try {
969
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
970
+ if (!settings.enabled) {
971
+ const report = emptyMemoryRetrievalReport(settings.maxResults);
972
+ this._latestMemoryRetrievalReport = report;
973
+ return report;
974
+ }
975
+ const report = await retrieveMemoryForContext([this._getMemoryOkfProvider()], { query: latestUserMessageText(messages), maxResults: settings.maxResults }, {
976
+ createdAtTurn: this._turnIndex,
977
+ maxResults: settings.maxResults,
978
+ defaultLocalPolicy: DEFAULT_LOCAL_MEMORY_EGRESS_POLICY,
979
+ });
980
+ this._latestMemoryRetrievalReport = report;
981
+ return report;
982
+ }
983
+ catch {
984
+ const report = emptyMemoryRetrievalReport(0);
985
+ this._latestMemoryRetrievalReport = report;
986
+ return report;
987
+ }
988
+ }
989
+ /** Read-only inspection of the latest memory-retrieval report, for tests/debugging. */
990
+ getMemoryRetrievalReport() {
991
+ return this._latestMemoryRetrievalReport ?? emptyMemoryRetrievalReport(0);
992
+ }
993
+ /**
994
+ * Bounded prompt-surfacing pilot for local memory evidence (see
995
+ * context/memory-prompt-block.ts): opt-in, default disabled, and gated on TWO settings
996
+ * (`enabled` AND `includeInPrompt`) plus a non-empty `report.contextItems` -- the first
997
+ * two are belt-and-suspenders on top of the fact that `_runMemoryRetrieval` already
998
+ * leaves `contextItems` empty whenever `enabled` is false, regardless of
999
+ * `includeInPrompt`. Reuses the `report` this pass's `_runMemoryRetrieval` call already
1000
+ * computed -- never re-queries the provider here.
1001
+ *
1002
+ * Appends exactly one ephemeral `custom`/"memory_evidence" message wrapped by
1003
+ * `wrapUntrustedText` (the same nonce-fenced boundary + always-on system-prompt rule
1004
+ * used for other untrusted content) to the END of `messages`. This is purely additive
1005
+ * (never mutates an existing message) and purely transient: `messages` here is the
1006
+ * array about to be sent to the provider, not `this.agent.state.messages` or anything
1007
+ * persisted via `sessionManager` -- so the injected message can never reach the
1008
+ * transcript, regardless of how many times this pass runs.
1009
+ *
1010
+ * Also records a `MemoryPromptInclusionReport` (context/memory-diagnostics.ts) at each
1011
+ * branch below, for context_audit's diagnostic surface only -- this is pure bookkeeping
1012
+ * alongside the existing branches, not a new branch/condition: the messages returned
1013
+ * are unchanged by this recording.
1014
+ */
1015
+ _maybeAppendMemoryEvidenceBlock(messages, report) {
1016
+ try {
1017
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
1018
+ const base = {
1019
+ enabled: settings.enabled,
1020
+ includeInPrompt: settings.includeInPrompt,
1021
+ selectedItemCount: report.contextItems.length,
1022
+ };
1023
+ if (!settings.enabled) {
1024
+ this._latestMemoryPromptInclusionReport = {
1025
+ ...base,
1026
+ status: "disabled",
1027
+ includedCount: 0,
1028
+ omittedCount: 0,
1029
+ blockChars: 0,
1030
+ };
1031
+ return messages;
1032
+ }
1033
+ if (!settings.includeInPrompt) {
1034
+ this._latestMemoryPromptInclusionReport = {
1035
+ ...base,
1036
+ status: "include_disabled",
1037
+ includedCount: 0,
1038
+ omittedCount: 0,
1039
+ blockChars: 0,
1040
+ };
1041
+ return messages;
1042
+ }
1043
+ if (report.contextItems.length === 0) {
1044
+ this._latestMemoryPromptInclusionReport = {
1045
+ ...base,
1046
+ status: "no_results",
1047
+ includedCount: 0,
1048
+ omittedCount: 0,
1049
+ blockChars: 0,
1050
+ };
1051
+ return messages;
1052
+ }
1053
+ const block = buildMemoryPromptBlock(report.contextItems);
1054
+ if (!block.text) {
1055
+ this._latestMemoryPromptInclusionReport = {
1056
+ ...base,
1057
+ status: "empty_block",
1058
+ includedCount: block.includedCount,
1059
+ omittedCount: block.omittedCount,
1060
+ blockChars: 0,
1061
+ };
1062
+ return messages;
1063
+ }
1064
+ const wrapped = wrapUntrustedText(block.text, "memory:pi-okf");
1065
+ const evidenceMessage = {
1066
+ role: "custom",
1067
+ customType: "memory_evidence",
1068
+ content: [{ type: "text", text: wrapped }],
1069
+ display: false,
1070
+ timestamp: Date.now(),
1071
+ };
1072
+ this._latestMemoryPromptInclusionReport = {
1073
+ ...base,
1074
+ status: "included",
1075
+ includedCount: block.includedCount,
1076
+ omittedCount: block.omittedCount,
1077
+ blockChars: wrapped.length,
1078
+ sourceLabel: "memory:pi-okf",
1079
+ };
1080
+ return [...messages, evidenceMessage];
1081
+ }
1082
+ catch {
1083
+ // `base` may not exist yet if the throw happened before it was computed (e.g.
1084
+ // settings access or `report.contextItems` itself threw), so this branch cannot
1085
+ // rely on it -- fall back to safe, fixed defaults rather than risk referencing
1086
+ // a partially-evaluated value.
1087
+ this._latestMemoryPromptInclusionReport = {
1088
+ enabled: false,
1089
+ includeInPrompt: false,
1090
+ selectedItemCount: 0,
1091
+ status: "failed",
1092
+ includedCount: 0,
1093
+ omittedCount: 0,
1094
+ blockChars: 0,
1095
+ };
1096
+ return messages;
1097
+ }
1098
+ }
1099
+ /** Read-only inspection of the latest memory-prompt-inclusion decision, for tests/debugging and context_audit. */
1100
+ getMemoryPromptInclusionReport() {
1101
+ return this._latestMemoryPromptInclusionReport ?? defaultMemoryPromptInclusionReport();
1102
+ }
1103
+ /**
1104
+ * Combines the already-stored, no-arg latest reports (never re-queries the provider or
1105
+ * touches the OKF directory) into the safe, allow-list-projected shape context_audit
1106
+ * exposes. See context/memory-diagnostics.ts for why this projection is allow-list
1107
+ * based rather than a spread-then-delete of the raw report.
1108
+ */
1109
+ _getMemoryAuditDiagnostics() {
1110
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
1111
+ return {
1112
+ retrieval: sanitizeMemoryRetrievalReportForDiagnostics(this.getMemoryRetrievalReport(), settings),
1113
+ promptInclusion: this.getMemoryPromptInclusionReport(),
1114
+ };
1115
+ }
425
1116
  _applyContextGc(messages, writePayloads) {
426
1117
  try {
1118
+ const settings = this.settingsManager.getContextGcSettings();
1119
+ // Merge the ACTIVE memory providers' own page markers (e.g. transcript-recall's
1120
+ // "<memory_context") into the semantic-memory marker list. The settings default is
1121
+ // provider-agnostic and non-empty, so without this merge the recall pages the bundled
1122
+ // default provider actually emits are never recognized as semantic-memory pages and
1123
+ // accumulate raw for the life of the session — the exact growth Bug #7 GC exists to stop.
1124
+ const providerMarkers = this._memoryManager.getContextMarkers();
1125
+ const curationSettings = this.settingsManager.getContextCurationSettings();
427
1126
  const result = applyContextGc(messages, {
428
- ...this.settingsManager.getContextGcSettings(),
1127
+ ...settings,
1128
+ semanticMemory: {
1129
+ ...settings.semanticMemory,
1130
+ markers: [...new Set([...settings.semanticMemory.markers, ...providerMarkers])],
1131
+ },
429
1132
  cwd: this._cwd,
430
1133
  storageDir: this._contextGcStorageDir(),
431
1134
  writePayloads,
1135
+ curation: curationSettings.enabled
1136
+ ? {
1137
+ resolveDigest: (digestKey) => this._brainCurator.getDigest(digestKey),
1138
+ // Only the real per-turn pass enqueues work; the read-only report path
1139
+ // (writePayloads=false) stays side-effect free.
1140
+ onPacked: writePayloads
1141
+ ? (record, originalText) => {
1142
+ this._brainCurator.enqueue({
1143
+ kind: "stub_digest",
1144
+ key: record.key ?? record.toolCallId,
1145
+ content: originalText,
1146
+ });
1147
+ }
1148
+ : undefined,
1149
+ }
1150
+ : undefined,
432
1151
  });
433
1152
  this._latestContextGcReport = result.report;
1153
+ // Only release/reclaim on the real per-turn pass (writePayloads=true), never on
1154
+ // the read-only status-report path (getContextGcReport with writePayloads=false),
1155
+ // so merely inspecting the report can't have side effects.
1156
+ if (writePayloads && result.report.packedCount > 0) {
1157
+ this._releaseGcPackedArtifactReferences(messages, result.report);
1158
+ }
434
1159
  return result;
435
1160
  }
436
1161
  catch {
@@ -446,6 +1171,39 @@ export class AgentSession {
446
1171
  return { messages, report };
447
1172
  }
448
1173
  }
1174
+ /**
1175
+ * Reference-release + cleanup lifecycle: once context-gc has packed a grep/find tool
1176
+ * result out of the live prompt (the message is no longer current/active working
1177
+ * context -- see contracts-and-retention.md's "ephemeral"/"expired" retention
1178
+ * classes), release the pack-time reference `packToolOutput()` registered for it, and
1179
+ * opportunistically reclaim now-unreferenced artifacts. This is the other half of the
1180
+ * D2b-1 gate: artifacts were being registered but never released, so they accumulated
1181
+ * for the life of the session.
1182
+ *
1183
+ * `record.toolCallId` (from context-gc's packed record) is exactly the holder id
1184
+ * `packToolOutput()` used when it called `addReference()` -- both trace back to the
1185
+ * same tool call's id -- so no separate bookkeeping is needed to find it.
1186
+ */
1187
+ _releaseGcPackedArtifactReferences(messages, report) {
1188
+ const store = this._toolArtifactStore;
1189
+ if (!store)
1190
+ return; // no store was ever constructed, so nothing could have been packed to one
1191
+ let releasedAny = false;
1192
+ for (const record of report.records) {
1193
+ if (record.toolName !== "grep" && record.toolName !== "find")
1194
+ continue;
1195
+ const artifactId = extractArtifactId(messages[record.messageIndex]);
1196
+ if (!artifactId)
1197
+ continue;
1198
+ if (store.removeReference(artifactId, record.toolCallId))
1199
+ releasedAny = true;
1200
+ }
1201
+ // Cleanup only runs immediately after a release actually happened in this pass, so
1202
+ // a long session doesn't re-scan the artifact directory on every turn once nothing
1203
+ // new became eligible for release.
1204
+ if (releasedAny)
1205
+ store.cleanup();
1206
+ }
449
1207
  getContextGcReport(messages) {
450
1208
  if (messages)
451
1209
  return this._applyContextGc(messages, false).report;
@@ -477,8 +1235,8 @@ export class AgentSession {
477
1235
  }
478
1236
  _installAgentToolHooks() {
479
1237
  this.agent.beforeToolCall = async ({ toolCall, args }) => {
480
- if (this._activeModelRouterIntent &&
481
- shouldEscalateModelRouterTool({ intent: this._activeModelRouterIntent, toolName: toolCall.name, args })) {
1238
+ if (this._activeModelRouterRoute &&
1239
+ shouldEscalateModelRouterTool({ tier: this._activeModelRouterRoute.tier, toolName: toolCall.name, args })) {
482
1240
  this._modelRouterEscalationRequested = true;
483
1241
  this.agent.abort();
484
1242
  return {
@@ -486,6 +1244,22 @@ export class AgentSession {
486
1244
  reason: "Model router escalation required: a cheap research turn attempted a mutating tool. Retry the turn on the configured expensive model.",
487
1245
  };
488
1246
  }
1247
+ // Autonomy tool gating
1248
+ const gateResult = evaluateToolGate({
1249
+ toolName: toolCall.name,
1250
+ args,
1251
+ cwd: this._cwd,
1252
+ envelope: this.capabilityEnvelope,
1253
+ });
1254
+ if (this.capabilityEnvelope) {
1255
+ this._lastAutonomyGateOutcome = gateResult;
1256
+ }
1257
+ if (gateResult.outcome === "block" || gateResult.outcome === "ask-user") {
1258
+ return {
1259
+ block: true,
1260
+ reason: `Tool execution blocked by autonomy gate [${gateResult.gate}]: ${gateResult.message} (${gateResult.reasonCode})`,
1261
+ };
1262
+ }
489
1263
  const runner = this._extensionRunner;
490
1264
  if (!runner.hasHandlers("tool_call")) {
491
1265
  return undefined;
@@ -835,6 +1609,8 @@ export class AgentSession {
835
1609
  */
836
1610
  dispose() {
837
1611
  try {
1612
+ this._clearGoalAutoContinueTimer();
1613
+ this._clearResearchLaneTimer();
838
1614
  this.abortRetry();
839
1615
  this.abortCompaction();
840
1616
  this.abortBranchSummary();
@@ -846,6 +1622,10 @@ export class AgentSession {
846
1622
  // write memory/skills against this now-disposed session.
847
1623
  this._disposed = true;
848
1624
  this._reflectionAbort.abort();
1625
+ // Abort any in-flight research pass or delegated worker for the same reason: a disposed
1626
+ // session must not keep spending tokens or persist evidence against dead state.
1627
+ this._researchLaneAbort.abort();
1628
+ this._workerDelegationAbort.abort();
849
1629
  // Bug #20: clear the hooks this session installed on the shared agent so their closures stop
850
1630
  // pinning this (deactivated) session — and all its history/maps — in memory if the agent
851
1631
  // instance outlives the session.
@@ -862,6 +1642,19 @@ export class AgentSession {
862
1642
  // true session-end hook (P3); file-store shutdown is a no-op.
863
1643
  void this._memoryManager.shutdownAll().catch(() => { });
864
1644
  cleanupSessionResources(this.sessionId);
1645
+ // Best-effort final sweep for any grep/find artifact already released (reference
1646
+ // count zero) but not yet reclaimed -- e.g. a release whose cleanup() call failed
1647
+ // transiently. This is conservative: it never releases a still-referenced
1648
+ // artifact, so a session that ends before context-gc ever evicts a result (too
1649
+ // short to cross preserveRecentMessages) correctly leaves that artifact in place,
1650
+ // resolvable if the same session is resumed later. It does not sweep OTHER
1651
+ // sessions' artifact directories.
1652
+ try {
1653
+ this._toolArtifactStore?.cleanup();
1654
+ }
1655
+ catch {
1656
+ // Best-effort; dispose must succeed regardless.
1657
+ }
865
1658
  }
866
1659
  // =========================================================================
867
1660
  // Read-only State Access
@@ -917,16 +1710,41 @@ export class AgentSession {
917
1710
  * Only tools in the registry can be enabled. Unknown tool names are ignored.
918
1711
  * Also rebuilds the system prompt to reflect the new tool set.
919
1712
  * Changes take effect on the next agent turn.
1713
+ *
1714
+ * artifact_retrieve is auto-activated as a companion whenever grep or find ends up
1715
+ * in the resulting active set and artifact_retrieve is registered (i.e. not excluded/
1716
+ * blocked/outside an allowlist -- the registry itself is built with that same filter,
1717
+ * so registry presence already tracks "allowed"). This is enforced here, not just in
1718
+ * the settings/profile refresh flow, because this method is a public, extension-
1719
+ * exposed activation path (`setActiveTools`) on its own: without this, grep/find could
1720
+ * end up active while still being handed an artifact store (gated on "allowed" in
1721
+ * `_buildRuntime`) with no active tool able to resolve the resulting
1722
+ * "Full output: artifact tool-output:<id>" handle.
920
1723
  */
921
1724
  setActiveToolsByName(toolNames) {
1725
+ // Model capability: small-window models get a reduced tool surface derived from the model's
1726
+ // own metadata. The unfiltered request is remembered so a later switch to a larger model
1727
+ // restores it (the filter is re-applied on every model change).
1728
+ this._requestedActiveToolNames = [...toolNames];
1729
+ const capabilityFiltered = filterToolNamesForCapability(toolNames, this.getModelCapabilityProfile());
922
1730
  const tools = [];
923
1731
  const validToolNames = [];
924
- for (const name of toolNames) {
1732
+ const seen = new Set();
1733
+ const addIfRegistered = (name) => {
1734
+ if (seen.has(name))
1735
+ return;
925
1736
  const tool = this._toolRegistry.get(name);
926
- if (tool) {
927
- tools.push(tool);
928
- validToolNames.push(name);
929
- }
1737
+ if (!tool)
1738
+ return;
1739
+ seen.add(name);
1740
+ tools.push(tool);
1741
+ validToolNames.push(name);
1742
+ };
1743
+ for (const name of capabilityFiltered) {
1744
+ addIfRegistered(name);
1745
+ }
1746
+ if (validToolNames.includes("grep") || validToolNames.includes("find")) {
1747
+ addIfRegistered("artifact_retrieve");
930
1748
  }
931
1749
  this.agent.state.tools = tools;
932
1750
  // Rebuild base system prompt with new tool set
@@ -1134,35 +1952,166 @@ export class AgentSession {
1134
1952
  await this._drainQueuedExtensionCommands();
1135
1953
  }
1136
1954
  }
1137
- _resolveModelRouterModelForIntent(intent) {
1955
+ _isModelAvailableAndAuthed(pattern) {
1956
+ const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
1957
+ if (!resolved.model)
1958
+ return false;
1959
+ return this._modelRegistry.hasConfiguredAuth(resolved.model);
1960
+ }
1961
+ _resolveModelRouterTurnRoute(prompt) {
1138
1962
  const settings = this.settingsManager.getModelRouterSettings();
1139
- const modelLabel = intent === "research" ? "cheap model" : "expensive model";
1140
1963
  if (!settings.enabled) {
1141
1964
  this._lastModelRouterSkipReason = "disabled";
1142
1965
  return undefined;
1143
1966
  }
1144
- const modelPattern = intent === "research" ? settings.cheapModel : settings.expensiveModel;
1967
+ const decision = classifyModelRouterRoute(prompt);
1968
+ this._lastModelRouterIntent = decision.tier === "cheap" ? "research" : "modify";
1969
+ // Learning tier must not be selected for normal user prompts
1970
+ if (decision.tier === "learning") {
1971
+ this._lastModelRouterSkipReason = "learning tier not supported for user prompts";
1972
+ return undefined;
1973
+ }
1974
+ const modelPattern = settings[decision.tier === "cheap" ? "cheapModel" : decision.tier === "medium" ? "mediumModel" : "expensiveModel"];
1975
+ const label = decision.tier === "cheap" ? "cheap model" : decision.tier === "medium" ? "medium model" : "expensive model";
1976
+ if (decision.tier === "medium" && (!modelPattern || !this._isModelAvailableAndAuthed(modelPattern))) {
1977
+ const expensivePattern = settings.expensiveModel;
1978
+ if (expensivePattern && this._isModelAvailableAndAuthed(expensivePattern)) {
1979
+ const resolvedExpensive = resolveCliModel({
1980
+ cliModel: expensivePattern,
1981
+ modelRegistry: this._modelRegistry,
1982
+ });
1983
+ if (resolvedExpensive.model) {
1984
+ decision.fallbackFrom = "medium";
1985
+ decision.tier = "expensive";
1986
+ decision.reasonCode = "medium_unavailable_fallback_expensive";
1987
+ decision.reasons = [...decision.reasons, "Medium model is unavailable, falling back to expensive model"];
1988
+ decision.model = formatModelRouterModel(resolvedExpensive.model);
1989
+ this._lastModelRouterSkipReason = undefined;
1990
+ return { decision, model: resolvedExpensive.model };
1991
+ }
1992
+ }
1993
+ this._lastModelRouterSkipReason = "medium model and expensive fallback are unavailable";
1994
+ return undefined;
1995
+ }
1145
1996
  if (!modelPattern) {
1146
- this._lastModelRouterSkipReason = `${modelLabel} unset`;
1997
+ this._lastModelRouterSkipReason = `${label} unset`;
1147
1998
  return undefined;
1148
1999
  }
1149
2000
  const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
1150
2001
  if (!resolved.model) {
1151
- this._lastModelRouterSkipReason = `${modelLabel} unresolved: ${modelPattern}`;
2002
+ this._lastModelRouterSkipReason = `${label} unresolved: ${modelPattern}`;
1152
2003
  return undefined;
1153
2004
  }
1154
2005
  const resolvedName = formatModelRouterModel(resolved.model);
1155
2006
  if (!this._modelRegistry.hasConfiguredAuth(resolved.model)) {
1156
- this._lastModelRouterSkipReason = `${modelLabel} missing auth: ${resolvedName}`;
2007
+ this._lastModelRouterSkipReason = `${label} missing auth: ${resolvedName}`;
1157
2008
  return undefined;
1158
2009
  }
1159
2010
  this._lastModelRouterSkipReason = undefined;
2011
+ decision.model = resolvedName;
2012
+ return { decision, model: resolved.model };
2013
+ }
2014
+ _resolveModelRouterModelForIntent(intent) {
2015
+ const settings = this.settingsManager.getModelRouterSettings();
2016
+ const modelPattern = intent === "research" ? settings.cheapModel : settings.expensiveModel;
2017
+ if (!modelPattern)
2018
+ return undefined;
2019
+ const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
2020
+ if (!resolved.model)
2021
+ return undefined;
2022
+ if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
2023
+ return undefined;
1160
2024
  return resolved.model;
1161
2025
  }
2026
+ _resolveConfiguredTierModel(tier) {
2027
+ const settings = this.settingsManager.getModelRouterSettings();
2028
+ const pattern = tier === "cheap" ? settings.cheapModel : tier === "medium" ? settings.mediumModel : settings.expensiveModel;
2029
+ if (!pattern)
2030
+ return undefined;
2031
+ const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
2032
+ if (!resolved.model)
2033
+ return undefined;
2034
+ if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
2035
+ return undefined;
2036
+ return resolved.model;
2037
+ }
2038
+ /**
2039
+ * Router resolution with the routing judge (auto-on with the router): the regex classifier's
2040
+ * decision is the baseline; when a judge model resolves (judgeModel, else mediumModel), one
2041
+ * bounded, tool-less completion may move the tier between cheap/medium/expensive — never to
2042
+ * learning. Core rule encoded in the judge prompt: planning is never cheap unless genuinely
2043
+ * trivial. Every fallback stays visible in the decision reasons, and judge spend reports
2044
+ * through spawned-usage accounting.
2045
+ */
2046
+ async _resolveModelRouterTurnRouteJudged(prompt, options) {
2047
+ const baseline = this._resolveModelRouterTurnRoute(prompt);
2048
+ if (!baseline)
2049
+ return undefined;
2050
+ if (options?.skipJudge)
2051
+ return baseline;
2052
+ const settings = this.settingsManager.getModelRouterSettings();
2053
+ if (!settings.judgeEnabled)
2054
+ return baseline;
2055
+ const judgePattern = settings.judgeModel ?? settings.mediumModel;
2056
+ if (!judgePattern)
2057
+ return baseline;
2058
+ const judgeModel = this._resolveLaneModel(judgePattern);
2059
+ if (!judgeModel)
2060
+ return baseline;
2061
+ let spentUsage;
2062
+ const judged = await runRouteJudge({
2063
+ prompt,
2064
+ baseline: baseline.decision,
2065
+ signal: this._reflectionAbort.signal,
2066
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
2067
+ const completion = await this.runIsolatedCompletion({
2068
+ systemPrompt,
2069
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
2070
+ model: judgeModel,
2071
+ thinkingLevel: "off",
2072
+ maxTokens: ROUTE_JUDGE_MAX_OUTPUT_TOKENS,
2073
+ signal,
2074
+ // The judge system prompt is static — the provider can cache the prefix.
2075
+ cacheRetention: "short",
2076
+ });
2077
+ spentUsage = completion.usage;
2078
+ return {
2079
+ text: completion.text,
2080
+ costUsd: completion.usage.cost.total,
2081
+ stopReason: String(completion.stopReason),
2082
+ };
2083
+ },
2084
+ });
2085
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
2086
+ this.addSpawnedUsage(spentUsage, { label: "router-judge" });
2087
+ }
2088
+ if (!judged.verdict || judged.decision.tier === baseline.decision.tier) {
2089
+ // Same tier (or judge fell back): keep the baseline model, carry the annotated decision.
2090
+ return { decision: judged.decision, model: baseline.model };
2091
+ }
2092
+ const judgedTier = judged.decision.tier;
2093
+ if (judgedTier !== "cheap" && judgedTier !== "medium" && judgedTier !== "expensive") {
2094
+ return { decision: baseline.decision, model: baseline.model };
2095
+ }
2096
+ const judgedModel = this._resolveConfiguredTierModel(judgedTier);
2097
+ if (!judgedModel) {
2098
+ return {
2099
+ decision: {
2100
+ ...baseline.decision,
2101
+ reasons: [
2102
+ ...baseline.decision.reasons,
2103
+ `Route judge chose ${judgedTier} but no model resolves for that tier; baseline kept`,
2104
+ ],
2105
+ },
2106
+ model: baseline.model,
2107
+ };
2108
+ }
2109
+ return { decision: { ...judged.decision, model: formatModelRouterModel(judgedModel) }, model: judgedModel };
2110
+ }
2111
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: test seam
1162
2112
  _resolveModelRouterTurnModel(prompt) {
1163
- const intent = classifyModelRouterIntent(prompt);
1164
- this._lastModelRouterIntent = intent;
1165
- return this._resolveModelRouterModelForIntent(intent);
2113
+ const resolved = this._resolveModelRouterTurnRoute(prompt);
2114
+ return resolved?.model;
1166
2115
  }
1167
2116
  getModelRouterStatus(formatLabel) {
1168
2117
  const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries());
@@ -1181,7 +2130,7 @@ export class AgentSession {
1181
2130
  }
1182
2131
  return lines.join("\n");
1183
2132
  }
1184
- async _runAgentPromptWithModelRouter(messages, routedModel, routedIntent, persistDecision = true) {
2133
+ async _runAgentPromptWithModelRouter(messages, routedModel, routeDecision, persistDecision = true) {
1185
2134
  if (!routedModel) {
1186
2135
  await this._runAgentPrompt(messages);
1187
2136
  return;
@@ -1189,23 +2138,30 @@ export class AgentSession {
1189
2138
  const previousModel = this.agent.state.model;
1190
2139
  const previousThinkingLevel = this.agent.state.thinkingLevel;
1191
2140
  const previousActiveModelRouterIntent = this._activeModelRouterIntent;
2141
+ const previousActiveModelRouterRoute = this._activeModelRouterRoute;
1192
2142
  const previousModelRouterSessionBuffer = this._modelRouterSessionBuffer;
1193
2143
  const previousModelRouterEscalationRequested = this._modelRouterEscalationRequested;
1194
- const bufferRoutedTurn = routedIntent === "research";
2144
+ const bufferRoutedTurn = routeDecision?.tier === "cheap";
1195
2145
  const originalHistoryLength = this.agent.state.messages.length;
1196
2146
  let retryModel;
1197
- let completedDecision = routedIntent
2147
+ let completedDecision = routeDecision
1198
2148
  ? {
1199
- intent: routedIntent,
2149
+ route: routeDecision,
1200
2150
  routedModel: formatModelRouterModel(routedModel),
1201
2151
  outcome: "routed",
2152
+ intent: routeDecision.tier === "cheap" ? "research" : "modify",
1202
2153
  }
1203
2154
  : undefined;
1204
2155
  let thrownError;
1205
- if (routedIntent) {
2156
+ if (routeDecision) {
1206
2157
  this._lastModelRouterDecision = completedDecision;
1207
2158
  }
1208
- this._activeModelRouterIntent = routedIntent;
2159
+ this._activeModelRouterIntent = routeDecision
2160
+ ? routeDecision.tier === "cheap"
2161
+ ? "research"
2162
+ : "modify"
2163
+ : undefined;
2164
+ this._activeModelRouterRoute = routeDecision;
1209
2165
  if (bufferRoutedTurn) {
1210
2166
  this._modelRouterSessionBuffer = createModelRouterSessionBuffer();
1211
2167
  this._modelRouterEscalationRequested = false;
@@ -1220,10 +2176,11 @@ export class AgentSession {
1220
2176
  this.agent.state.messages.splice(originalHistoryLength);
1221
2177
  retryModel = this._resolveModelRouterModelForIntent("modify") ?? previousModel;
1222
2178
  completedDecision = {
1223
- intent: routedIntent,
2179
+ route: routeDecision,
1224
2180
  routedModel: formatModelRouterModel(routedModel),
1225
2181
  outcome: "escalated",
1226
2182
  retryModel: formatModelRouterModel(retryModel),
2183
+ intent: routeDecision.tier === "cheap" ? "research" : "modify",
1227
2184
  };
1228
2185
  this._lastModelRouterDecision = completedDecision;
1229
2186
  }
@@ -1243,9 +2200,18 @@ export class AgentSession {
1243
2200
  }
1244
2201
  }
1245
2202
  finally {
1246
- this.agent.state.model = previousModel;
1247
- this.agent.state.thinkingLevel = previousThinkingLevel;
2203
+ // Restore the pre-route model ONLY if the routed model is still in place: a command
2204
+ // handler may have legitimately changed the session model mid-turn (setModel or a
2205
+ // provider re-registration), and clobbering that would silently undo the change.
2206
+ if (modelsAreEqual(this.agent.state.model, routedModel)) {
2207
+ this.agent.state.model = previousModel;
2208
+ this.agent.state.thinkingLevel = previousThinkingLevel;
2209
+ // The registry may have changed mid-turn (command-time registerProvider): re-resolve
2210
+ // the restored model so a provider override is not dropped with the routed model.
2211
+ this._refreshCurrentModelFromRegistry();
2212
+ }
1248
2213
  this._activeModelRouterIntent = previousActiveModelRouterIntent;
2214
+ this._activeModelRouterRoute = previousActiveModelRouterRoute;
1249
2215
  this._modelRouterSessionBuffer = previousModelRouterSessionBuffer;
1250
2216
  this._modelRouterEscalationRequested = previousModelRouterEscalationRequested;
1251
2217
  }
@@ -1253,7 +2219,16 @@ export class AgentSession {
1253
2219
  const previousIsModelRouterRetry = this._isModelRouterRetry;
1254
2220
  try {
1255
2221
  this._isModelRouterRetry = true;
1256
- await this._runAgentPromptWithModelRouter(messages, retryModel, "modify", false);
2222
+ const retryDecision = {
2223
+ tier: "expensive",
2224
+ risk: "high-impact",
2225
+ confidence: 1.0,
2226
+ reasonCode: "cheap_mutating_tool_escalation",
2227
+ reasons: ["Cheap research turn attempted a mutating tool and escalated"],
2228
+ fallbackFrom: "cheap",
2229
+ model: formatModelRouterModel(retryModel),
2230
+ };
2231
+ await this._runAgentPromptWithModelRouter(messages, retryModel, retryDecision, false);
1257
2232
  this._lastModelRouterDecision = completedDecision;
1258
2233
  }
1259
2234
  catch (error) {
@@ -1309,6 +2284,9 @@ export class AgentSession {
1309
2284
  * @throws Error if no model selected or no API key available (when not streaming)
1310
2285
  */
1311
2286
  async prompt(text, options) {
2287
+ if (options?.autoContinueGoal !== false) {
2288
+ this._clearGoalAutoContinueTimer();
2289
+ }
1312
2290
  if ((this.isStreaming || this.isRetrying) && options?.streamingBehavior) {
1313
2291
  const run = this._streamingPromptSubmissionTail.then(() => this._promptUnserialized(text, options), () => this._promptUnserialized(text, options));
1314
2292
  this._streamingPromptSubmissionTail = run.catch(() => { });
@@ -1338,7 +2316,7 @@ export class AgentSession {
1338
2316
  const preflightResult = options?.preflightResult;
1339
2317
  let messages;
1340
2318
  let routedTurnModel;
1341
- let routedTurnIntent;
2319
+ let routedTurnRouteDecision;
1342
2320
  // R4 effectiveness feedback: remember the recall page + the query so we can score, after the
1343
2321
  // response, whether the agent actually used the recalled context.
1344
2322
  let injectedRecall = "";
@@ -1401,8 +2379,13 @@ export class AgentSession {
1401
2379
  }
1402
2380
  // Flush any pending bash messages before the new prompt
1403
2381
  this._flushPendingBashMessages();
1404
- routedTurnModel = this._resolveModelRouterTurnModel(expandedText);
1405
- routedTurnIntent = routedTurnModel ? classifyModelRouterIntent(expandedText) : undefined;
2382
+ const resolvedRouteInfo = await this._resolveModelRouterTurnRouteJudged(expandedText, {
2383
+ // Internally generated turns (goal continuation, lane follow-ups) never consult the judge:
2384
+ // the regex floor already classified them, and a 20-turn loop must not buy 20 judge calls.
2385
+ skipJudge: options?.autoContinueGoal === false,
2386
+ });
2387
+ routedTurnModel = resolvedRouteInfo?.model;
2388
+ routedTurnRouteDecision = resolvedRouteInfo?.decision;
1406
2389
  const requestModel = routedTurnModel ?? this.model;
1407
2390
  // Validate model
1408
2391
  if (!requestModel) {
@@ -1495,7 +2478,7 @@ export class AgentSession {
1495
2478
  return;
1496
2479
  }
1497
2480
  preflightResult?.(true);
1498
- await this._runAgentPromptWithModelRouter(messages, routedTurnModel, routedTurnIntent);
2481
+ await this._runAgentPromptWithModelRouter(messages, routedTurnModel, routedTurnRouteDecision);
1499
2482
  // R4: score whether the agent actually used the recalled context, so the recall gate can adapt.
1500
2483
  if (injectedRecall) {
1501
2484
  const response = this._findLastAssistantMessage();
@@ -1509,6 +2492,8 @@ export class AgentSession {
1509
2492
  this._effectivenessTracker.recordRecallOutcome(injectedRecall, recallQuery, responseText);
1510
2493
  }
1511
2494
  }
2495
+ this._scheduleGoalAutoContinueFromIdle(options);
2496
+ this._scheduleResearchLaneFromIdle();
1512
2497
  }
1513
2498
  /**
1514
2499
  * Try to execute an extension command. Returns true if command was found and executed.
@@ -1829,6 +2814,19 @@ export class AgentSession {
1829
2814
  this.setThinkingLevel(thinkingLevel, { persistSettings });
1830
2815
  await this._emitModelSelect(model, previousModel, "set");
1831
2816
  this._checkContextWindowUsageWarning();
2817
+ // Re-derive the model-capability tool surface for the new model (restores the full requested
2818
+ // set when moving small -> large, reduces it when moving large -> small).
2819
+ if (this._requestedActiveToolNames) {
2820
+ const before = this.getActiveToolNames().join(",");
2821
+ this.setActiveToolsByName(this._requestedActiveToolNames);
2822
+ const capability = this.getModelCapabilityProfile();
2823
+ if (capability.class !== "full" && this.getActiveToolNames().join(",") !== before) {
2824
+ this._emit({
2825
+ type: "warning",
2826
+ message: `Small-context model detected (${capability.contextWindow ?? "unknown"} tokens, class '${capability.class}'): active tools reduced to [${this.getActiveToolNames().join(", ")}]; background lanes ${capability.backgroundLanesEnabled ? "enabled" : "disabled"}.`,
2827
+ });
2828
+ }
2829
+ }
1832
2830
  }
1833
2831
  /**
1834
2832
  * Cycle to next/previous model.
@@ -2761,7 +3759,10 @@ export class AgentSession {
2761
3759
  }
2762
3760
  _refreshToolRegistry(options) {
2763
3761
  const previousRegistryNames = new Set(this._toolRegistry.keys());
2764
- const previousActiveToolNames = this.getActiveToolNames();
3762
+ // Re-derive from the pre-filter REQUEST, never from agent.state.tools: the active set is
3763
+ // capability/profile-filtered, so feeding it back through setActiveToolsByName would
3764
+ // permanently shrink what a later switch to a larger model (or permissive profile) restores.
3765
+ const previousActiveToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
2765
3766
  const allowedToolNames = this._allowedToolNames;
2766
3767
  const excludedToolNames = this._excludedToolNames;
2767
3768
  const toolProfileFilter = this._toolProfileFilter;
@@ -2833,27 +3834,40 @@ export class AgentSession {
2833
3834
  toolRegistry.set(tool.name, tool);
2834
3835
  }
2835
3836
  this._toolRegistry = toolRegistry;
2836
- const nextActiveToolNames = (options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames]).filter((name) => isAllowedTool(name));
3837
+ const requestedBase = options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames];
3838
+ const nextActiveToolNames = requestedBase.filter((name) => isAllowedTool(name));
3839
+ const autoActivated = [];
2837
3840
  if (allowedToolNames) {
2838
3841
  for (const toolName of this._toolRegistry.keys()) {
2839
3842
  if (allowedToolNames.has(toolName)) {
2840
3843
  nextActiveToolNames.push(toolName);
3844
+ autoActivated.push(toolName);
2841
3845
  }
2842
3846
  }
2843
3847
  }
2844
3848
  else if (options?.includeAllExtensionTools) {
2845
3849
  for (const tool of wrappedExtensionTools) {
2846
3850
  nextActiveToolNames.push(tool.name);
3851
+ autoActivated.push(tool.name);
2847
3852
  }
2848
3853
  }
2849
3854
  else if (!options?.activeToolNames) {
2850
3855
  for (const toolName of this._toolRegistry.keys()) {
2851
3856
  if (!previousRegistryNames.has(toolName)) {
2852
3857
  nextActiveToolNames.push(toolName);
3858
+ autoActivated.push(toolName);
2853
3859
  }
2854
3860
  }
2855
3861
  }
3862
+ // artifact_retrieve companion auto-activation is enforced inside
3863
+ // setActiveToolsByName() itself (not duplicated here), so every activation path --
3864
+ // including the public, extension-exposed setActiveTools() -- gets the same
3865
+ // guarantee, not just this settings/profile refresh flow.
2856
3866
  this.setActiveToolsByName([...new Set(nextActiveToolNames)]);
3867
+ // setActiveToolsByName just stored the profile-filtered ACTIVE set as the request; restore
3868
+ // the true pre-filter request (plus this refresh's auto-activations) so an internal refresh
3869
+ // can never permanently narrow it.
3870
+ this._requestedActiveToolNames = [...new Set([...requestedBase, ...autoActivated])];
2857
3871
  }
2858
3872
  _createReloadRuntimeSnapshot() {
2859
3873
  return {
@@ -2908,6 +3922,14 @@ export class AgentSession {
2908
3922
  const autoResizeImages = this.settingsManager.getImageAutoResize();
2909
3923
  const shellCommandPrefix = this.settingsManager.getShellCommandPrefix();
2910
3924
  const shellPath = this.settingsManager.getShellPath();
3925
+ // grep/find must not emit a "Full output: artifact tool-output:<id>" handle that
3926
+ // nothing can resolve. If artifact_retrieve is explicitly excluded/blocked/outside
3927
+ // an active allowlist, don't hand grep/find an artifact store at all: they fall
3928
+ // back to their pre-existing bounded preview/truncation behavior, with no
3929
+ // payload/meta files ever written and no retrieval promise made.
3930
+ const toolArtifactStore = this._isToolOrCommandAllowedByProfile("artifact_retrieve")
3931
+ ? this._getToolArtifactStore()
3932
+ : undefined;
2911
3933
  const baseToolDefinitions = this._baseToolsOverride
2912
3934
  ? Object.fromEntries(Object.entries(this._baseToolsOverride).map(([name, tool]) => [
2913
3935
  name,
@@ -2916,12 +3938,37 @@ export class AgentSession {
2916
3938
  : createAllToolDefinitions(this._cwd, {
2917
3939
  read: { autoResizeImages },
2918
3940
  bash: { commandPrefix: shellCommandPrefix, shellPath },
3941
+ grep: { artifactStore: toolArtifactStore },
3942
+ find: { artifactStore: toolArtifactStore },
3943
+ artifact_retrieve: { artifactStore: toolArtifactStore },
2919
3944
  });
2920
3945
  this._baseToolDefinitions = new Map(Object.entries(baseToolDefinitions).map(([name, tool]) => [name, tool]));
2921
3946
  if (!this._baseToolsOverride) {
2922
- for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages))) {
3947
+ for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages), () => this._getMemoryAuditDiagnostics())) {
2923
3948
  this._baseToolDefinitions.set(definition.name, definition);
2924
3949
  }
3950
+ const goalToolDefinition = createGoalToolDefinition({
3951
+ getGoalState: () => this.getGoalStateSnapshot(),
3952
+ saveGoalState: (state) => {
3953
+ this.saveGoalStateSnapshot(state);
3954
+ },
3955
+ });
3956
+ this._baseToolDefinitions.set(goalToolDefinition.name, goalToolDefinition);
3957
+ const delegateToolDefinition = createDelegateToolDefinition({
3958
+ runWorkerDelegation: (args) => this.runWorkerDelegationOnce(args),
3959
+ });
3960
+ this._baseToolDefinitions.set(delegateToolDefinition.name, delegateToolDefinition);
3961
+ // Registered but not default-active: probes spend tokens on the probed model, so
3962
+ // activation is an explicit choice (settings/profile/setActiveTools or /autonomy fitness).
3963
+ const modelFitnessToolDefinition = createModelFitnessToolDefinition({
3964
+ runProbe: (args) => this.runModelFitness(args),
3965
+ });
3966
+ this._baseToolDefinitions.set(modelFitnessToolDefinition.name, modelFitnessToolDefinition);
3967
+ const runToolkitScriptToolDefinition = createRunToolkitScriptToolDefinition({
3968
+ getScripts: () => this.settingsManager.getToolkitScripts(),
3969
+ execute: (script, scriptArgs) => executeToolkitScript({ script, scriptArgs, cwd: this._cwd }),
3970
+ });
3971
+ this._baseToolDefinitions.set(runToolkitScriptToolDefinition.name, runToolkitScriptToolDefinition);
2925
3972
  }
2926
3973
  const extensionsResult = this._resourceLoader.getExtensions();
2927
3974
  if (options.flagValues) {
@@ -2941,7 +3988,7 @@ export class AgentSession {
2941
3988
  this._applyExtensionBindings(this._extensionRunner);
2942
3989
  const defaultActiveToolNames = this._baseToolsOverride
2943
3990
  ? Object.keys(this._baseToolsOverride)
2944
- : ["read", "bash", "edit", "write", "context_audit"];
3991
+ : ["read", "bash", "edit", "write", "context_audit", "goal", "delegate", "run_toolkit_script"];
2945
3992
  const baseActiveToolNames = options.activeToolNames ?? defaultActiveToolNames;
2946
3993
  this._refreshToolRegistry({
2947
3994
  activeToolNames: baseActiveToolNames,
@@ -2957,7 +4004,9 @@ export class AgentSession {
2957
4004
  }
2958
4005
  const previousRunner = this._extensionRunner;
2959
4006
  const snapshot = this._createReloadRuntimeSnapshot();
2960
- const activeToolNames = this.getActiveToolNames();
4007
+ // Preserve the pre-filter tool REQUEST across the rebuild, not the capability/profile-filtered
4008
+ // active set — otherwise a reload under a small model permanently shrinks the restorable set.
4009
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
2961
4010
  const previousFlagValues = previousRunner.getFlagValues();
2962
4011
  const reloadErrors = [];
2963
4012
  let newRunner;
@@ -3050,7 +4099,7 @@ export class AgentSession {
3050
4099
  // Remove from loaded extensions
3051
4100
  this._resourceLoader.removeLoadedExtension(extensionPath);
3052
4101
  // Rebuild runtime with new extension set
3053
- const activeToolNames = this.getActiveToolNames();
4102
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
3054
4103
  const previousFlagValues = previousRunner.getFlagValues();
3055
4104
  this._buildRuntime({
3056
4105
  activeToolNames,
@@ -3093,7 +4142,7 @@ export class AgentSession {
3093
4142
  throw new Error(error || `Failed to load extension: ${extensionPath}`);
3094
4143
  }
3095
4144
  // Rebuild runtime to aggregate tools/commands/handlers/providers
3096
- const activeToolNames = this.getActiveToolNames();
4145
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
3097
4146
  const previousFlagValues = previousRunner.getFlagValues();
3098
4147
  this._buildRuntime({
3099
4148
  activeToolNames,
@@ -3768,6 +4817,671 @@ export class AgentSession {
3768
4817
  getDailyUsageBreakdown(formatLabel, now = new Date()) {
3769
4818
  return formatDailyUsageBreakdown(this.getDailyUsageTotals(now), formatLabel);
3770
4819
  }
4820
+ /**
4821
+ * Save a snapshot of the goal state to the session log.
4822
+ *
4823
+ * @returns the id of the appended custom entry
4824
+ */
4825
+ saveGoalStateSnapshot(state) {
4826
+ return appendGoalStateSnapshot(this.sessionManager, state);
4827
+ }
4828
+ /**
4829
+ * Retrieve the latest valid goal state snapshot from the session log.
4830
+ */
4831
+ getGoalStateSnapshot() {
4832
+ return getLatestGoalStateSnapshot(this.sessionManager.getEntries());
4833
+ }
4834
+ /**
4835
+ * Save a snapshot of the evidence bundle to the session log.
4836
+ *
4837
+ * @returns the id of the appended custom entry
4838
+ */
4839
+ saveEvidenceBundleSnapshot(bundle) {
4840
+ return appendEvidenceBundleSnapshot(this.sessionManager, bundle);
4841
+ }
4842
+ /**
4843
+ * Retrieve the latest valid evidence bundle snapshot from the session log.
4844
+ */
4845
+ getEvidenceBundleSnapshot() {
4846
+ return getLatestEvidenceBundleSnapshot(this.sessionManager.getEntries());
4847
+ }
4848
+ getEvidenceBundleSnapshots() {
4849
+ return getEvidenceBundleSnapshots(this.sessionManager.getEntries());
4850
+ }
4851
+ /** Live lane records tracked by this process (running and terminal). */
4852
+ getLaneRecords() {
4853
+ return this._laneTracker.getRecords();
4854
+ }
4855
+ saveWorkerResultSnapshot(result) {
4856
+ return appendWorkerResultSnapshot(this.sessionManager, result);
4857
+ }
4858
+ getWorkerResultSnapshots() {
4859
+ return getWorkerResultSnapshots(this.sessionManager.getEntries());
4860
+ }
4861
+ saveLearningDecisionSnapshot(decision) {
4862
+ return appendLearningDecisionSnapshot(this.sessionManager, decision);
4863
+ }
4864
+ getLearningDecisionSnapshots() {
4865
+ return getLearningDecisionSnapshots(this.sessionManager.getEntries());
4866
+ }
4867
+ getGoalRuntimeSnapshot(settings) {
4868
+ return buildGoalRuntimeSnapshot({
4869
+ entries: this.sessionManager.getEntries(),
4870
+ settings,
4871
+ });
4872
+ }
4873
+ _clearGoalAutoContinueTimer() {
4874
+ if (this._goalAutoContinueTimer !== undefined) {
4875
+ clearTimeout(this._goalAutoContinueTimer);
4876
+ this._goalAutoContinueTimer = undefined;
4877
+ }
4878
+ }
4879
+ _scheduleGoalAutoContinueFromIdle(options) {
4880
+ if (options?.autoContinueGoal === false || this._isGoalAutoContinuing || this._disposed)
4881
+ return;
4882
+ // Small-window models cannot afford multi-thousand-token continuation prompts per idle turn.
4883
+ if (!this.getModelCapabilityProfile().backgroundLanesEnabled)
4884
+ return;
4885
+ const { maxStallTurns, goalAutoContinue, goalAutoContinueDelayMs } = this.settingsManager.getAutonomySettings();
4886
+ if (!goalAutoContinue)
4887
+ return;
4888
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
4889
+ if (snapshot.continuation.action !== "continue")
4890
+ return;
4891
+ this._clearGoalAutoContinueTimer();
4892
+ this._goalAutoContinueTimer = setTimeout(() => {
4893
+ this._goalAutoContinueTimer = undefined;
4894
+ void this._runScheduledGoalAutoContinue();
4895
+ }, goalAutoContinueDelayMs);
4896
+ const timer = this._goalAutoContinueTimer;
4897
+ if (typeof timer === "object" && timer && "unref" in timer) {
4898
+ const { unref } = timer;
4899
+ unref?.call(timer);
4900
+ }
4901
+ }
4902
+ async _runScheduledGoalAutoContinue() {
4903
+ if (this._isGoalAutoContinuing || this._disposed)
4904
+ return;
4905
+ const { maxStallTurns, goalContinueTurns, goalContinueMaxWallClockMinutes, goalAutoContinue } = this.settingsManager.getAutonomySettings();
4906
+ if (!goalAutoContinue)
4907
+ return;
4908
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
4909
+ if (snapshot.continuation.action !== "continue")
4910
+ return;
4911
+ this._isGoalAutoContinuing = true;
4912
+ try {
4913
+ await this.continueGoalLoop({
4914
+ maxTurns: goalContinueTurns,
4915
+ maxStallTurns,
4916
+ maxWallClockMinutes: goalContinueMaxWallClockMinutes,
4917
+ });
4918
+ }
4919
+ catch (error) {
4920
+ const message = error instanceof Error ? error.message : String(error);
4921
+ this._emit({ type: "warning", message: `Goal auto-continuation failed: ${message}` });
4922
+ }
4923
+ finally {
4924
+ this._isGoalAutoContinuing = false;
4925
+ }
4926
+ }
4927
+ _clearResearchLaneTimer() {
4928
+ if (this._researchLaneTimer !== undefined) {
4929
+ clearTimeout(this._researchLaneTimer);
4930
+ this._researchLaneTimer = undefined;
4931
+ }
4932
+ }
4933
+ /**
4934
+ * Derive the research demand from durable goal state: an active goal with open requirements,
4935
+ * deduplicated against the latest persisted bundle so the same requirement set is never
4936
+ * researched twice (the query is deterministic, so dedupe survives session reload).
4937
+ */
4938
+ _buildResearchLaneDemand() {
4939
+ const goal = this.getGoalStateSnapshot();
4940
+ if (!goal || goal.status !== "active") {
4941
+ this._lastResearchLaneSkipReason = "no_active_goal";
4942
+ return undefined;
4943
+ }
4944
+ const open = goal.requirements.filter((requirement) => requirement.status === "open");
4945
+ if (open.length === 0) {
4946
+ this._lastResearchLaneSkipReason = "no_open_requirements";
4947
+ return undefined;
4948
+ }
4949
+ const query = `goal:${goal.goalId} requirements:${open
4950
+ .map((requirement) => requirement.id)
4951
+ .sort()
4952
+ .join(",")}`;
4953
+ if (this.getEvidenceBundleSnapshot()?.query === query) {
4954
+ this._lastResearchLaneSkipReason = "recent_evidence_sufficient";
4955
+ return undefined;
4956
+ }
4957
+ const context = [
4958
+ `Goal: ${goal.userGoal}`,
4959
+ "Open requirements:",
4960
+ ...open.slice(0, 20).map((requirement) => `- ${requirement.text}`),
4961
+ ].join("\n");
4962
+ return { query, context, goalId: goal.goalId };
4963
+ }
4964
+ /**
4965
+ * Idle trigger for the autonomous research lane (mirrors {@link _scheduleGoalAutoContinueFromIdle}).
4966
+ * All skips are recorded in `_lastResearchLaneSkipReason` and surfaced via diagnostics — the lane
4967
+ * informs, it never prompts or blocks the foreground.
4968
+ */
4969
+ _scheduleResearchLaneFromIdle() {
4970
+ if (this._isResearchLaneRunning || this._disposed || this._isChildSession)
4971
+ return;
4972
+ if (!this.getModelCapabilityProfile().backgroundLanesEnabled) {
4973
+ this._lastResearchLaneSkipReason = "model_context_too_small";
4974
+ return;
4975
+ }
4976
+ const research = this.settingsManager.getResearchLaneSettings();
4977
+ if (!research.enabled) {
4978
+ this._lastResearchLaneSkipReason = "research_lane_disabled";
4979
+ return;
4980
+ }
4981
+ const { mode } = this.settingsManager.getAutonomySettings();
4982
+ if (mode === "off") {
4983
+ this._lastResearchLaneSkipReason = "autonomy_mode_off";
4984
+ return;
4985
+ }
4986
+ const priorRuns = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research").length;
4987
+ if (priorRuns >= research.maxRunsPerSession) {
4988
+ this._lastResearchLaneSkipReason = "max_runs_reached";
4989
+ return;
4990
+ }
4991
+ if (!this._buildResearchLaneDemand())
4992
+ return;
4993
+ this._clearResearchLaneTimer();
4994
+ this._researchLaneTimer = setTimeout(() => {
4995
+ this._researchLaneTimer = undefined;
4996
+ void this._runScheduledResearchLane();
4997
+ }, research.idleDelayMs);
4998
+ const timer = this._researchLaneTimer;
4999
+ if (typeof timer === "object" && timer && "unref" in timer) {
5000
+ const { unref } = timer;
5001
+ unref?.call(timer);
5002
+ }
5003
+ }
5004
+ async _runScheduledResearchLane() {
5005
+ if (this._isResearchLaneRunning || this._disposed)
5006
+ return;
5007
+ const research = this.settingsManager.getResearchLaneSettings();
5008
+ const { mode } = this.settingsManager.getAutonomySettings();
5009
+ if (!research.enabled || mode === "off")
5010
+ return;
5011
+ try {
5012
+ await this.runResearchLaneOnce();
5013
+ }
5014
+ catch (error) {
5015
+ const message = error instanceof Error ? error.message : String(error);
5016
+ this._emit({ type: "warning", message: `Research lane failed: ${message}` });
5017
+ }
5018
+ }
5019
+ /**
5020
+ * Capability profile derived from the CURRENT session model's own metadata (context window),
5021
+ * honoring the modelCapability.mode setting ("off" disables, a class name forces).
5022
+ */
5023
+ getModelCapabilityProfile() {
5024
+ return deriveModelCapabilityProfile({
5025
+ contextWindow: this.model?.contextWindow,
5026
+ mode: this.settingsManager.getModelCapabilitySettings().mode,
5027
+ });
5028
+ }
5029
+ /** Capability profile for a specific lane model (lane budgets scale to the lane model's window). */
5030
+ _laneCapabilityProfile(model) {
5031
+ return deriveModelCapabilityProfile({
5032
+ contextWindow: model.contextWindow,
5033
+ mode: this.settingsManager.getModelCapabilitySettings().mode,
5034
+ });
5035
+ }
5036
+ /**
5037
+ * Resolve the model for a background lane. Lanes are shipped BY this session, so they inherit
5038
+ * the session's own model unless a lane-specific model is explicitly configured — a single-model
5039
+ * setup (e.g. one local open model) runs its lanes on that same model. An explicitly configured
5040
+ * pattern that cannot resolve/authenticate is a visible skip, not a silent fallback.
5041
+ */
5042
+ _resolveLaneModel(configuredPattern) {
5043
+ if (configuredPattern) {
5044
+ const resolved = resolveCliModel({ cliModel: configuredPattern, modelRegistry: this._modelRegistry });
5045
+ if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model)) {
5046
+ return resolved.model;
5047
+ }
5048
+ return undefined;
5049
+ }
5050
+ return this.model ?? undefined;
5051
+ }
5052
+ /**
5053
+ * Resolve what a lane ships with. Precedence: explicit lane model setting, then the lane
5054
+ * profile's model (a shipped profile with a model MUST be obeyed — unresolvable is a visible
5055
+ * skip, never a fallback), then generic inheritance of the session model.
5056
+ */
5057
+ _resolveLaneShipment(laneSettings, missingModelReason) {
5058
+ let laneProfile;
5059
+ if (laneSettings.profile) {
5060
+ laneProfile = this.settingsManager.getProfileRegistry().getProfile(laneSettings.profile);
5061
+ if (!laneProfile) {
5062
+ return { ok: false, skipReason: "lane_profile_not_found" };
5063
+ }
5064
+ }
5065
+ let model;
5066
+ if (laneSettings.model) {
5067
+ model = this._resolveLaneModel(laneSettings.model);
5068
+ if (!model)
5069
+ return { ok: false, skipReason: missingModelReason };
5070
+ }
5071
+ else if (laneProfile?.model) {
5072
+ model = this._resolveLaneModel(laneProfile.model);
5073
+ if (!model)
5074
+ return { ok: false, skipReason: "no_lane_profile_model" };
5075
+ }
5076
+ else {
5077
+ model = this.model ?? undefined;
5078
+ if (!model)
5079
+ return { ok: false, skipReason: missingModelReason };
5080
+ }
5081
+ return { ok: true, model, laneProfile };
5082
+ }
5083
+ /** UAC tool grants from a shipped lane profile, recorded on the lane envelope. */
5084
+ _laneProfileToolGrants(laneProfile) {
5085
+ const toolsFilter = laneProfile?.resources.tools;
5086
+ return {
5087
+ ...(toolsFilter?.allow && toolsFilter.allow.length > 0 ? { allowedTools: [...toolsFilter.allow] } : {}),
5088
+ ...(toolsFilter?.block && toolsFilter.block.length > 0 ? { deniedTools: [...toolsFilter.block] } : {}),
5089
+ };
5090
+ }
5091
+ /** Stripped research envelope — never the foreground/architect envelope. */
5092
+ _buildResearchLaneEnvelope(maxUsd, laneProfile) {
5093
+ return {
5094
+ id: `research-${this.sessionId}-${Date.now()}`,
5095
+ profileId: laneProfile?.name,
5096
+ capabilities: ["research", "read_files", "memory_read"],
5097
+ ...this._laneProfileToolGrants(laneProfile),
5098
+ maxEstimatedUsd: Math.min(maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY),
5099
+ createdAt: new Date().toISOString(),
5100
+ };
5101
+ }
5102
+ /**
5103
+ * Run one bounded, read-only research pass and persist its results: evidence bundle snapshot,
5104
+ * terminal lane record, and spawned-usage cost report (single-hop invariant, idempotent on the
5105
+ * lane's reportId). Explicit calls (e.g. `/autonomy research`) express user intent and bypass the
5106
+ * enabled/mode/dedupe gates the idle scheduler enforces; budget and capability gates always apply.
5107
+ */
5108
+ async runResearchLaneOnce(request) {
5109
+ if (this._isResearchLaneRunning) {
5110
+ return { started: false, skipReason: "research_lane_already_running" };
5111
+ }
5112
+ if (this._disposed) {
5113
+ return { started: false, skipReason: "session_disposed" };
5114
+ }
5115
+ const settings = this.settingsManager.getResearchLaneSettings();
5116
+ const demand = request?.query
5117
+ ? { query: request.query, context: request.context ?? "", goalId: request.goalId }
5118
+ : this._buildResearchLaneDemand();
5119
+ if (!demand) {
5120
+ return { started: false, skipReason: this._lastResearchLaneSkipReason ?? "no_research_demand" };
5121
+ }
5122
+ const shipment = this._resolveLaneShipment(settings, "no_research_model");
5123
+ if (!shipment.ok) {
5124
+ this._lastResearchLaneSkipReason = shipment.skipReason;
5125
+ return { started: false, skipReason: shipment.skipReason };
5126
+ }
5127
+ const { model, laneProfile } = shipment;
5128
+ this._isResearchLaneRunning = true;
5129
+ this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
5130
+ const startedRecord = this._laneTracker.start({ type: "research", goalId: demand.goalId });
5131
+ try {
5132
+ let spentUsage;
5133
+ const result = await runResearch({
5134
+ query: demand.query,
5135
+ context: demand.context,
5136
+ envelope: this._buildResearchLaneEnvelope(settings.maxUsd, laneProfile),
5137
+ maxUsd: settings.maxUsd,
5138
+ maxSources: settings.maxSources,
5139
+ maxFindings: settings.maxFindings,
5140
+ maxWallClockMs: settings.maxWallClockMs,
5141
+ signal: this._researchLaneAbort.signal,
5142
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
5143
+ const completion = await this.runIsolatedCompletion({
5144
+ // Level-0 core always survives; profile soul and role prompt are the replaceable
5145
+ // layers; a settings-provided prompt replaces everything above the core.
5146
+ systemPrompt: composeSubagentSystemPrompt({
5147
+ soul: laneProfile?.soul,
5148
+ rolePrompt: systemPrompt,
5149
+ override: settings.systemPrompt,
5150
+ }),
5151
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
5152
+ model,
5153
+ thinkingLevel: laneProfile?.thinking ?? "off",
5154
+ maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
5155
+ signal,
5156
+ // Core/soul/role are all static per configuration — the provider can cache the prefix.
5157
+ cacheRetention: "short",
5158
+ });
5159
+ spentUsage = completion.usage;
5160
+ return {
5161
+ text: completion.text,
5162
+ costUsd: completion.usage.cost.total,
5163
+ stopReason: String(completion.stopReason),
5164
+ };
5165
+ },
5166
+ });
5167
+ // Bug #21 pattern: if the session was disposed while the completion was in flight, do NOT
5168
+ // persist evidence/records/usage against the dead session.
5169
+ if (this._disposed) {
5170
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5171
+ status: "canceled",
5172
+ reasonCode: "session_disposed",
5173
+ });
5174
+ return { started: true, record, result };
5175
+ }
5176
+ let evidenceEntryId;
5177
+ if (result.bundle) {
5178
+ evidenceEntryId = this.saveEvidenceBundleSnapshot(result.bundle);
5179
+ }
5180
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
5181
+ this.addSpawnedUsage(spentUsage, {
5182
+ label: "research-lane",
5183
+ reportId: `research:${this.sessionId}:${startedRecord.laneId}`,
5184
+ });
5185
+ }
5186
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5187
+ status: result.status,
5188
+ reasonCode: result.reasonCode,
5189
+ costUsd: result.costUsd,
5190
+ evidenceEntryId,
5191
+ });
5192
+ if (record) {
5193
+ appendLaneRecordSnapshot(this.sessionManager, record);
5194
+ }
5195
+ return { started: true, record, result };
5196
+ }
5197
+ catch (error) {
5198
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5199
+ status: "failed",
5200
+ reasonCode: "research_lane_error",
5201
+ });
5202
+ if (record && !this._disposed) {
5203
+ appendLaneRecordSnapshot(this.sessionManager, record);
5204
+ }
5205
+ const message = error instanceof Error ? error.message : String(error);
5206
+ this._emit({ type: "warning", message: `Research lane failed: ${message}` });
5207
+ return { started: true, record };
5208
+ }
5209
+ finally {
5210
+ this._isResearchLaneRunning = false;
5211
+ }
5212
+ }
5213
+ /**
5214
+ * Run one bounded scout-worker delegation: build a WorkerRequest with a stripped read-only
5215
+ * envelope, execute it as an isolated completion on a cheap lane, validate the result via
5216
+ * {@link validateWorkerResult} before acceptance, and persist result + lane record + spawned
5217
+ * usage (idempotent per-lane reportId). Consumed by the `delegate` tool.
5218
+ */
5219
+ async runWorkerDelegationOnce(request) {
5220
+ if (this._isWorkerDelegationRunning) {
5221
+ return { started: false, skipReason: "worker_delegation_already_running" };
5222
+ }
5223
+ if (this._disposed) {
5224
+ return { started: false, skipReason: "session_disposed" };
5225
+ }
5226
+ const instructions = request.instructions.trim();
5227
+ if (instructions.length === 0) {
5228
+ return { started: false, skipReason: "missing_instructions" };
5229
+ }
5230
+ const settings = this.settingsManager.getWorkerDelegationSettings();
5231
+ if (!settings.enabled) {
5232
+ return { started: false, skipReason: "worker_delegation_disabled" };
5233
+ }
5234
+ const shipment = this._resolveLaneShipment(settings, "no_worker_model");
5235
+ if (!shipment.ok) {
5236
+ return { started: false, skipReason: shipment.skipReason };
5237
+ }
5238
+ const { model, laneProfile } = shipment;
5239
+ this._isWorkerDelegationRunning = true;
5240
+ this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
5241
+ const startedRecord = this._laneTracker.start({ type: "worker" });
5242
+ const maxUsd = Math.min(settings.maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY);
5243
+ const workerRequest = {
5244
+ id: startedRecord.laneId,
5245
+ instructions,
5246
+ route: {
5247
+ tier: "cheap",
5248
+ risk: "read-only",
5249
+ confidence: 1,
5250
+ reasonCode: "scout_worker",
5251
+ reasons: ["Read-only scout delegation"],
5252
+ },
5253
+ envelope: {
5254
+ id: `worker-${this.sessionId}-${startedRecord.laneId}`,
5255
+ profileId: laneProfile?.name,
5256
+ capabilities: ["read_files"],
5257
+ ...this._laneProfileToolGrants(laneProfile),
5258
+ maxEstimatedUsd: maxUsd,
5259
+ createdAt: new Date().toISOString(),
5260
+ },
5261
+ maxEstimatedUsd: maxUsd,
5262
+ createdAt: new Date().toISOString(),
5263
+ };
5264
+ const usageReportId = `worker:${this.sessionId}:${startedRecord.laneId}`;
5265
+ try {
5266
+ let spentUsage;
5267
+ const outcome = await runWorker({
5268
+ request: workerRequest,
5269
+ maxUsd,
5270
+ maxWallClockMs: settings.maxWallClockMs,
5271
+ usageReportId,
5272
+ signal: this._workerDelegationAbort.signal,
5273
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
5274
+ const completion = await this.runIsolatedCompletion({
5275
+ // Level-0 core always survives. A model-provided prompt (delegate tool) is the most
5276
+ // specific override, then the settings-level prompt, then profile soul + role prompt.
5277
+ systemPrompt: composeSubagentSystemPrompt({
5278
+ soul: laneProfile?.soul,
5279
+ rolePrompt: systemPrompt,
5280
+ override: request.systemPrompt ?? settings.systemPrompt,
5281
+ }),
5282
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
5283
+ model,
5284
+ thinkingLevel: laneProfile?.thinking ?? "off",
5285
+ maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
5286
+ signal,
5287
+ // Core/soul/role are all static per configuration — the provider can cache the prefix.
5288
+ cacheRetention: "short",
5289
+ });
5290
+ spentUsage = completion.usage;
5291
+ return {
5292
+ text: completion.text,
5293
+ costUsd: completion.usage.cost.total,
5294
+ stopReason: String(completion.stopReason),
5295
+ };
5296
+ },
5297
+ });
5298
+ // Bug #21 pattern: never persist against a disposed session.
5299
+ if (this._disposed) {
5300
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5301
+ status: "canceled",
5302
+ reasonCode: "session_disposed",
5303
+ });
5304
+ return { started: true, record, outcome };
5305
+ }
5306
+ this.saveWorkerResultSnapshot(outcome.result);
5307
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
5308
+ this.addSpawnedUsage(spentUsage, { label: "worker-delegation", reportId: usageReportId });
5309
+ }
5310
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5311
+ status: outcome.laneStatus,
5312
+ reasonCode: outcome.reasonCode,
5313
+ costUsd: outcome.costUsd,
5314
+ });
5315
+ if (record) {
5316
+ appendLaneRecordSnapshot(this.sessionManager, record);
5317
+ }
5318
+ return { started: true, record, outcome };
5319
+ }
5320
+ catch (error) {
5321
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5322
+ status: "failed",
5323
+ reasonCode: "worker_delegation_error",
5324
+ });
5325
+ if (record && !this._disposed) {
5326
+ appendLaneRecordSnapshot(this.sessionManager, record);
5327
+ }
5328
+ const message = error instanceof Error ? error.message : String(error);
5329
+ this._emit({ type: "warning", message: `Worker delegation failed: ${message}` });
5330
+ return { started: true, record };
5331
+ }
5332
+ finally {
5333
+ this._isWorkerDelegationRunning = false;
5334
+ }
5335
+ }
5336
+ /**
5337
+ * Probe a candidate model against the subagent contracts (research/worker/judge/search/
5338
+ * tool-call surfaces) via {@link runModelFitnessProbe}. The model must resolve and
5339
+ * authenticate; every probe call runs as an isolated completion on that model, and probe
5340
+ * spend is reported through spawned-usage accounting.
5341
+ */
5342
+ async runModelFitness(args) {
5343
+ if (this._disposed)
5344
+ return { started: false, skipReason: "session_disposed" };
5345
+ const resolved = this._resolveLaneModel(args.model.trim() || undefined);
5346
+ if (!resolved)
5347
+ return { started: false, skipReason: "model_unresolved_or_unauthenticated" };
5348
+ const capability = this._laneCapabilityProfile(resolved);
5349
+ const spent = {
5350
+ input: 0,
5351
+ output: 0,
5352
+ cacheRead: 0,
5353
+ cacheWrite: 0,
5354
+ totalTokens: 0,
5355
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
5356
+ };
5357
+ const report = await runModelFitnessProbe({
5358
+ trials: args.trials,
5359
+ signal: this._researchLaneAbort.signal,
5360
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
5361
+ const callStarted = Date.now();
5362
+ const completion = await this.runIsolatedCompletion({
5363
+ systemPrompt,
5364
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
5365
+ model: resolved,
5366
+ thinkingLevel: "off",
5367
+ maxTokens: capability.laneMaxOutputTokens,
5368
+ signal,
5369
+ cacheRetention: "short",
5370
+ });
5371
+ const callMs = Date.now() - callStarted;
5372
+ spent.input += completion.usage.input;
5373
+ spent.output += completion.usage.output;
5374
+ spent.cacheRead += completion.usage.cacheRead;
5375
+ spent.cacheWrite += completion.usage.cacheWrite;
5376
+ spent.totalTokens += completion.usage.totalTokens;
5377
+ spent.cost.input += completion.usage.cost.input;
5378
+ spent.cost.output += completion.usage.cost.output;
5379
+ spent.cost.cacheRead += completion.usage.cost.cacheRead;
5380
+ spent.cost.cacheWrite += completion.usage.cost.cacheWrite;
5381
+ spent.cost.total += completion.usage.cost.total;
5382
+ return {
5383
+ text: completion.text,
5384
+ costUsd: completion.usage.cost.total,
5385
+ stopReason: String(completion.stopReason),
5386
+ // Wall-clock fallback for tok/s: providers don't expose pure eval time, so the
5387
+ // measured call time stands in — slightly conservative (includes network/queue).
5388
+ outputTokens: completion.usage.output,
5389
+ evalMs: callMs,
5390
+ };
5391
+ },
5392
+ });
5393
+ if (!this._disposed && (spent.cost.total > 0 || spent.totalTokens > 0)) {
5394
+ this.addSpawnedUsage(spent, { label: "model-fitness" });
5395
+ }
5396
+ const modelRef = `${resolved.provider}/${resolved.id}`;
5397
+ // Fitness is a property of a model ON a host — persist the report host-keyed so role
5398
+ // assignments stay per-machine (a model can await better hardware without being forgotten).
5399
+ // Best-effort: a disk problem must not fail the probe itself.
5400
+ try {
5401
+ if (!this._disposed) {
5402
+ FitnessStore.forAgentDir(this._agentDir).save(modelRef, report);
5403
+ }
5404
+ }
5405
+ catch {
5406
+ // best-effort persistence
5407
+ }
5408
+ return { started: true, model: modelRef, report };
5409
+ }
5410
+ /** Fitness reports persisted for THIS host (measured evidence for architect/profile decisions). */
5411
+ getStoredFitnessReports() {
5412
+ try {
5413
+ return FitnessStore.forAgentDir(this._agentDir).getForHost();
5414
+ }
5415
+ catch {
5416
+ return [];
5417
+ }
5418
+ }
5419
+ async continueGoalOnce(options) {
5420
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
5421
+ if (snapshot.continuation.action !== "continue") {
5422
+ return { submitted: false, snapshot };
5423
+ }
5424
+ const prompt = buildGoalContinuationPrompt({ snapshot, limits: options.promptLimits });
5425
+ await this.prompt(prompt.text, {
5426
+ expandPromptTemplates: false,
5427
+ processSlashCommands: false,
5428
+ autoContinueGoal: false,
5429
+ });
5430
+ return { submitted: true, snapshot, prompt };
5431
+ }
5432
+ async continueGoalLoop(options) {
5433
+ let turnsSubmitted = 0;
5434
+ const now = options.now ?? Date.now;
5435
+ const maxWallClockMs = typeof options.maxWallClockMinutes === "number" && options.maxWallClockMinutes > 0
5436
+ ? options.maxWallClockMinutes * 60_000
5437
+ : undefined;
5438
+ const startedAt = now();
5439
+ const hasReachedWallClockBudget = () => maxWallClockMs !== undefined && now() - startedAt >= maxWallClockMs;
5440
+ const snapshot = () => this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
5441
+ if (options.maxTurns <= 0) {
5442
+ return {
5443
+ turnsSubmitted: 0,
5444
+ stopReason: "max_turns_reached",
5445
+ finalSnapshot: snapshot(),
5446
+ };
5447
+ }
5448
+ if (hasReachedWallClockBudget()) {
5449
+ return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
5450
+ }
5451
+ while (turnsSubmitted < options.maxTurns) {
5452
+ const beforeSnapshot = snapshot();
5453
+ if (beforeSnapshot.continuation.action !== "continue") {
5454
+ return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: beforeSnapshot };
5455
+ }
5456
+ const state = beforeSnapshot.goalState;
5457
+ const beforeKey = state
5458
+ ? `${state.goalId}:${state.updatedAt}:${state.events.length}:${state.stallTurns}:${state.status}`
5459
+ : undefined;
5460
+ const result = await this.continueGoalOnce(options);
5461
+ if (result.submitted) {
5462
+ turnsSubmitted++;
5463
+ }
5464
+ if (hasReachedWallClockBudget()) {
5465
+ return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
5466
+ }
5467
+ const afterSnapshot = snapshot();
5468
+ if (afterSnapshot.continuation.action !== "continue") {
5469
+ return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: afterSnapshot };
5470
+ }
5471
+ const afterState = afterSnapshot.goalState;
5472
+ const afterKey = afterState
5473
+ ? `${afterState.goalId}:${afterState.updatedAt}:${afterState.events.length}:${afterState.stallTurns}:${afterState.status}`
5474
+ : undefined;
5475
+ if (beforeKey === afterKey) {
5476
+ return { turnsSubmitted, stopReason: "goal_state_not_advanced", finalSnapshot: afterSnapshot };
5477
+ }
5478
+ }
5479
+ return {
5480
+ turnsSubmitted,
5481
+ stopReason: "max_turns_reached",
5482
+ finalSnapshot: snapshot(),
5483
+ };
5484
+ }
3771
5485
  /**
3772
5486
  * Run a one-shot LLM completion fully ISOLATED from the main session — the load-bearing
3773
5487
  * primitive for the native reflection engine (adaptive-agent design §6c/§7).
@@ -3873,8 +5587,60 @@ export class AgentSession {
3873
5587
  // or skills against the dead session.
3874
5588
  if (this._disposed)
3875
5589
  return result;
5590
+ // Learning apply policy: every durable write is converted to a proposal, decided by the
5591
+ // learning gate, and audited with a rollback plan. With the policy disabled (default) the
5592
+ // legacy direct-apply behavior is preserved — but now leaves audit records with rollback info.
5593
+ const policy = this.settingsManager.getLearningPolicySettings();
5594
+ // The audit id sequence counts STORED snapshots only: it reseeds from the stored count on
5595
+ // every pass, so advancing it for a no-op (which stores nothing) would make later passes
5596
+ // reuse ids — and rollback keys on the id, so a collision blocks or misdirects rollback.
5597
+ let auditSequence = getLearningAuditSnapshots(this.sessionManager.getEntries()).length;
5598
+ let writeIndex = 0;
3876
5599
  for (const write of result.writes) {
3877
- await this._applyReflectionWrite(write, signal);
5600
+ writeIndex += 1;
5601
+ const proposalId = `${input.reportId ?? "reflection"}-w${writeIndex}`;
5602
+ const proposal = proposalFromReflectionWrite(write, proposalId);
5603
+ const rollback = rollbackPlanForReflectionWrite(write);
5604
+ const decision = policy.enabled
5605
+ ? evaluateLearningDecision({
5606
+ proposal,
5607
+ confidence: policy.reflectionSourceConfidence,
5608
+ observations: 1,
5609
+ contradictions: 0,
5610
+ settings: {
5611
+ enabled: true,
5612
+ autoApplyEnabled: policy.autoApplyEnabled,
5613
+ confidenceThreshold: policy.confidenceThreshold,
5614
+ minObservations: policy.minObservations,
5615
+ allowedAutoApplyLayers: policy.allowedAutoApplyLayers,
5616
+ requireRollbackPlan: policy.requireRollbackPlan,
5617
+ },
5618
+ })
5619
+ : {
5620
+ kind: "apply",
5621
+ reasonCode: "learning_policy_disabled_legacy_apply",
5622
+ confidence: 0,
5623
+ summary: proposal.summary,
5624
+ requiresApproval: false,
5625
+ };
5626
+ this.saveLearningDecisionSnapshot(decision);
5627
+ if (decision.kind === "apply") {
5628
+ await this._applyReflectionWrite(write, signal);
5629
+ }
5630
+ if (decision.kind !== "no-op") {
5631
+ auditSequence += 1;
5632
+ appendLearningAuditSnapshot(this.sessionManager, {
5633
+ id: `audit-${auditSequence}`,
5634
+ proposalId,
5635
+ layer: proposal.layer,
5636
+ action: decision.kind === "apply" ? "apply" : "propose",
5637
+ summary: proposal.summary,
5638
+ reasonCode: decision.reasonCode,
5639
+ decision,
5640
+ rollback,
5641
+ createdAt: new Date().toISOString(),
5642
+ });
5643
+ }
3878
5644
  }
3879
5645
  // Account the reflection's spend so it surfaces in the footer roll-up (net-token visibility).
3880
5646
  // Idempotent on reportId so a retried/duplicated pass cannot double-count.
@@ -3883,6 +5649,76 @@ export class AgentSession {
3883
5649
  }
3884
5650
  return result;
3885
5651
  }
5652
+ getLearningAuditRecords() {
5653
+ return getLearningAuditSnapshots(this.sessionManager.getEntries());
5654
+ }
5655
+ /**
5656
+ * Roll back one applied durable learning change by executing the inverse operation recorded in
5657
+ * its audit record (memory ops run through the same bundled memory-tool path as the original
5658
+ * apply; promoted skills are archived). Appends a linked "rollback" audit record on success so
5659
+ * the change history stays complete and a change cannot be rolled back twice.
5660
+ */
5661
+ async rollbackLearningWrite(auditId) {
5662
+ if (this._disposed)
5663
+ return { ok: false, reason: "session_disposed" };
5664
+ const audits = this.getLearningAuditRecords();
5665
+ const audit = audits.find((record) => record.id === auditId);
5666
+ if (!audit)
5667
+ return { ok: false, reason: "audit_not_found" };
5668
+ if (audit.action !== "apply")
5669
+ return { ok: false, reason: "not_an_applied_change" };
5670
+ if (audits.some((record) => record.action === "rollback" && record.rollbackOf === auditId)) {
5671
+ return { ok: false, reason: "already_rolled_back" };
5672
+ }
5673
+ const rollback = audit.rollback;
5674
+ if (!rollback)
5675
+ return { ok: false, reason: "no_rollback_plan" };
5676
+ switch (rollback.kind) {
5677
+ case "memory_remove": {
5678
+ if (!rollback.target)
5679
+ return { ok: false, reason: "missing_rollback_target" };
5680
+ await this._applyReflectionWrite({ kind: "memory_remove", target: rollback.target });
5681
+ break;
5682
+ }
5683
+ case "memory_restore": {
5684
+ if (!rollback.target || rollback.previous === undefined) {
5685
+ return { ok: false, reason: "missing_rollback_target" };
5686
+ }
5687
+ await this._applyReflectionWrite({
5688
+ kind: "memory_replace",
5689
+ target: rollback.target,
5690
+ text: rollback.previous,
5691
+ });
5692
+ break;
5693
+ }
5694
+ case "memory_add": {
5695
+ if (rollback.previous === undefined)
5696
+ return { ok: false, reason: "missing_rollback_target" };
5697
+ await this._applyReflectionWrite({ kind: "memory_add", section: "MEMORY", text: rollback.previous });
5698
+ break;
5699
+ }
5700
+ case "archive_skill": {
5701
+ if (!rollback.target)
5702
+ return { ok: false, reason: "missing_rollback_target" };
5703
+ if (!this.archivePromotedSkill(rollback.target)) {
5704
+ return { ok: false, reason: "skill_archive_failed" };
5705
+ }
5706
+ break;
5707
+ }
5708
+ }
5709
+ appendLearningAuditSnapshot(this.sessionManager, {
5710
+ id: `${audit.id}-rollback`,
5711
+ proposalId: audit.proposalId,
5712
+ layer: audit.layer,
5713
+ action: "rollback",
5714
+ summary: `Rolled back: ${audit.summary}`,
5715
+ reasonCode: "user_requested_rollback",
5716
+ decision: audit.decision,
5717
+ rollbackOf: audit.id,
5718
+ createdAt: new Date().toISOString(),
5719
+ });
5720
+ return { ok: true, reason: "rollback_applied" };
5721
+ }
3886
5722
  /**
3887
5723
  * Apply one reflection write through the bundled `memory` tool. `memory_replace`/`memory_remove`
3888
5724
  * don't carry a target file, so we try MEMORY.md first and fall back to USER.md when the substring
@@ -4084,6 +5920,182 @@ export class AgentSession {
4084
5920
  // =========================================================================
4085
5921
  // Extension System
4086
5922
  // =========================================================================
5923
+ getAutonomyStatusSnapshot() {
5924
+ const snapshot = {};
5925
+ if (this._lastModelRouterDecision?.route) {
5926
+ snapshot.latestRoute = {
5927
+ tier: this._lastModelRouterDecision.route.tier,
5928
+ reasonCode: this._lastModelRouterDecision.route.reasonCode,
5929
+ risk: this._lastModelRouterDecision.route.risk,
5930
+ };
5931
+ }
5932
+ if (this._lastAutonomyGateOutcome) {
5933
+ snapshot.latestGate = {
5934
+ outcome: this._lastAutonomyGateOutcome.outcome,
5935
+ gate: this._lastAutonomyGateOutcome.gate,
5936
+ reasonCode: this._lastAutonomyGateOutcome.reasonCode,
5937
+ };
5938
+ }
5939
+ const currentCost = this.getSessionStats().cost;
5940
+ if (currentCost > 0) {
5941
+ snapshot.currentCostUsd = currentCost;
5942
+ }
5943
+ const spawnedCost = this.getSpawnedUsage().cost;
5944
+ if (spawnedCost > 0) {
5945
+ snapshot.spawnedCostUsd = spawnedCost;
5946
+ }
5947
+ const dailyCost = this.getDailyUsageTotals?.()?.totalCost;
5948
+ if (dailyCost !== undefined && dailyCost > 0) {
5949
+ snapshot.dailyCostUsd = dailyCost;
5950
+ }
5951
+ const goal = this.getGoalStateSnapshot();
5952
+ if (goal) {
5953
+ snapshot.activeGoal = {
5954
+ goalId: goal.goalId,
5955
+ status: goal.status,
5956
+ openRequirements: goal.requirements.filter((requirement) => requirement.status === "open").length,
5957
+ stallTurns: goal.stallTurns,
5958
+ };
5959
+ }
5960
+ // Real live count from the lane tracker — never inferred from historical snapshots. Absent
5961
+ // while zero, matching the presence-means-signal convention of the sibling fields.
5962
+ const activeLaneCount = this._laneTracker.getActiveCount();
5963
+ if (activeLaneCount > 0) {
5964
+ snapshot.activeLaneCount = activeLaneCount;
5965
+ }
5966
+ return snapshot;
5967
+ }
5968
+ /**
5969
+ * Aggregate an effectiveness/autonomy dashboard: what Pi has actually been doing (recent
5970
+ * route choices, latest gate outcome, cost, and any research/delegation/learning/goal
5971
+ * activity). Read-only — combines existing session-log getters, never mutates state or
5972
+ * recomputes a route/gate decision.
5973
+ */
5974
+ getAutonomyDiagnosticSnapshot(options) {
5975
+ const maxEntriesPerFamily = options?.maxEntriesPerFamily ?? 10;
5976
+ const snapshot = {};
5977
+ const goal = this.getGoalStateSnapshot();
5978
+ const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries(), maxEntriesPerFamily);
5979
+ if (recentDecisions.length > 0) {
5980
+ snapshot.routes = recentDecisions.map((decision) => ({
5981
+ title: decision.route.tier,
5982
+ summary: decision.routedModel,
5983
+ reasonCode: decision.route.reasonCode,
5984
+ metadata: { risk: decision.route.risk, outcome: decision.outcome, intent: decision.intent },
5985
+ }));
5986
+ }
5987
+ if (this._lastAutonomyGateOutcome) {
5988
+ const gate = this._lastAutonomyGateOutcome;
5989
+ snapshot.gates = [
5990
+ {
5991
+ title: gate.gate,
5992
+ summary: gate.message,
5993
+ reasonCode: gate.reasonCode,
5994
+ metadata: { outcome: gate.outcome, reversible: gate.reversible },
5995
+ },
5996
+ ];
5997
+ }
5998
+ const costs = [];
5999
+ const currentCostForDiagnostics = this.getSessionStats().cost;
6000
+ if (currentCostForDiagnostics > 0) {
6001
+ costs.push({ title: "current", summary: `$${currentCostForDiagnostics.toFixed(4)}` });
6002
+ }
6003
+ const spawnedCost = this.getSpawnedUsage().cost;
6004
+ if (spawnedCost > 0)
6005
+ costs.push({ title: "spawned", summary: `$${spawnedCost.toFixed(4)}` });
6006
+ const dailyCostForDiagnostics = this.getDailyUsageTotals?.()?.totalCost;
6007
+ if (dailyCostForDiagnostics !== undefined && dailyCostForDiagnostics > 0) {
6008
+ costs.push({ title: "daily", summary: `$${dailyCostForDiagnostics.toFixed(4)}` });
6009
+ }
6010
+ if (costs.length > 0)
6011
+ snapshot.costs = costs;
6012
+ const researchEntries = [];
6013
+ const researchLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research");
6014
+ for (const record of researchLaneRecords.slice(-maxEntriesPerFamily)) {
6015
+ researchEntries.push({
6016
+ title: `Lane ${record.laneId} (${record.status})`,
6017
+ reasonCode: record.reasonCode,
6018
+ metadata: {
6019
+ costUsd: record.costUsd,
6020
+ startedAt: record.startedAt,
6021
+ completedAt: record.completedAt,
6022
+ goalId: record.goalId,
6023
+ },
6024
+ });
6025
+ }
6026
+ for (const bundle of this.getEvidenceBundleSnapshots().slice(-maxEntriesPerFamily)) {
6027
+ researchEntries.push({
6028
+ title: `Research: ${bundle.query}`,
6029
+ metadata: { sourceCount: bundle.sources.length, findingCount: bundle.findings.length },
6030
+ });
6031
+ }
6032
+ if (this._lastResearchLaneSkipReason) {
6033
+ researchEntries.push({ title: "Last skip", reasonCode: this._lastResearchLaneSkipReason });
6034
+ }
6035
+ if (researchEntries.length > 0) {
6036
+ snapshot.research = researchEntries;
6037
+ }
6038
+ const delegationEntries = [];
6039
+ const workerLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "worker");
6040
+ for (const record of workerLaneRecords.slice(-maxEntriesPerFamily)) {
6041
+ delegationEntries.push({
6042
+ title: `Lane ${record.laneId} (${record.status})`,
6043
+ reasonCode: record.reasonCode,
6044
+ metadata: { costUsd: record.costUsd, startedAt: record.startedAt, completedAt: record.completedAt },
6045
+ });
6046
+ }
6047
+ const workerResults = this.getWorkerResultSnapshots();
6048
+ for (const result of workerResults.slice(-maxEntriesPerFamily)) {
6049
+ delegationEntries.push({
6050
+ title: `Worker ${result.requestId} (${result.status})`,
6051
+ summary: result.summary,
6052
+ metadata: {
6053
+ changedFileCount: result.changedFiles.length,
6054
+ blockerCount: result.blockers?.length ?? 0,
6055
+ usageReportId: result.usageReportId,
6056
+ },
6057
+ });
6058
+ }
6059
+ if (delegationEntries.length > 0) {
6060
+ snapshot.delegation = delegationEntries;
6061
+ }
6062
+ const learningEntries = [];
6063
+ const learningDecisions = this.getLearningDecisionSnapshots();
6064
+ for (const decision of learningDecisions.slice(-maxEntriesPerFamily)) {
6065
+ learningEntries.push({
6066
+ title: `Learning (${decision.kind})`,
6067
+ summary: decision.summary,
6068
+ reasonCode: decision.reasonCode,
6069
+ metadata: { confidence: decision.confidence, requiresApproval: decision.requiresApproval },
6070
+ });
6071
+ }
6072
+ for (const audit of this.getLearningAuditRecords().slice(-maxEntriesPerFamily)) {
6073
+ learningEntries.push({
6074
+ title: `Audit ${audit.id} (${audit.action})`,
6075
+ summary: audit.summary,
6076
+ reasonCode: audit.reasonCode,
6077
+ metadata: { layer: audit.layer, proposalId: audit.proposalId, rollbackOf: audit.rollbackOf },
6078
+ });
6079
+ }
6080
+ if (learningEntries.length > 0) {
6081
+ snapshot.learning = learningEntries;
6082
+ }
6083
+ if (goal) {
6084
+ snapshot.goals = [
6085
+ {
6086
+ title: `Goal ${goal.goalId}`,
6087
+ summary: goal.userGoal,
6088
+ reasonCode: goal.status,
6089
+ metadata: {
6090
+ openRequirementCount: goal.requirements.filter((requirement) => requirement.status === "open").length,
6091
+ stallTurns: goal.stallTurns,
6092
+ blockedReason: goal.blockedReason,
6093
+ },
6094
+ },
6095
+ ];
6096
+ }
6097
+ return snapshot;
6098
+ }
4087
6099
  createReplacedSessionContext() {
4088
6100
  const context = Object.defineProperties({}, Object.getOwnPropertyDescriptors(this._extensionRunner.createCommandContext()));
4089
6101
  context.sendMessage = (message, options) => this.sendCustomMessage(message, options);