@caupulican/pi-adaptative 0.80.85 → 0.80.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. package/CHANGELOG.md +160 -1
  2. package/dist/core/agent-session.d.ts +394 -1
  3. package/dist/core/agent-session.d.ts.map +1 -1
  4. package/dist/core/agent-session.js +1862 -46
  5. package/dist/core/agent-session.js.map +1 -1
  6. package/dist/core/autonomy/approval-gate.d.ts +4 -0
  7. package/dist/core/autonomy/approval-gate.d.ts.map +1 -0
  8. package/dist/core/autonomy/approval-gate.js +27 -0
  9. package/dist/core/autonomy/approval-gate.js.map +1 -0
  10. package/dist/core/autonomy/bounded-completion.d.ts +27 -0
  11. package/dist/core/autonomy/bounded-completion.d.ts.map +1 -0
  12. package/dist/core/autonomy/bounded-completion.js +44 -0
  13. package/dist/core/autonomy/bounded-completion.js.map +1 -0
  14. package/dist/core/autonomy/contracts.d.ts +129 -0
  15. package/dist/core/autonomy/contracts.d.ts.map +1 -0
  16. package/dist/core/autonomy/contracts.js +2 -0
  17. package/dist/core/autonomy/contracts.js.map +1 -0
  18. package/dist/core/autonomy/gates.d.ts +15 -0
  19. package/dist/core/autonomy/gates.d.ts.map +1 -0
  20. package/dist/core/autonomy/gates.js +205 -0
  21. package/dist/core/autonomy/gates.js.map +1 -0
  22. package/dist/core/autonomy/lane-tracker.d.ts +48 -0
  23. package/dist/core/autonomy/lane-tracker.d.ts.map +1 -0
  24. package/dist/core/autonomy/lane-tracker.js +125 -0
  25. package/dist/core/autonomy/lane-tracker.js.map +1 -0
  26. package/dist/core/autonomy/path-scope.d.ts +9 -0
  27. package/dist/core/autonomy/path-scope.d.ts.map +1 -0
  28. package/dist/core/autonomy/path-scope.js +122 -0
  29. package/dist/core/autonomy/path-scope.js.map +1 -0
  30. package/dist/core/autonomy/risk-assessment.d.ts +3 -0
  31. package/dist/core/autonomy/risk-assessment.d.ts.map +1 -0
  32. package/dist/core/autonomy/risk-assessment.js +122 -0
  33. package/dist/core/autonomy/risk-assessment.js.map +1 -0
  34. package/dist/core/autonomy/session-lane-record.d.ts +10 -0
  35. package/dist/core/autonomy/session-lane-record.d.ts.map +1 -0
  36. package/dist/core/autonomy/session-lane-record.js +36 -0
  37. package/dist/core/autonomy/session-lane-record.js.map +1 -0
  38. package/dist/core/autonomy/status.d.ts +40 -0
  39. package/dist/core/autonomy/status.d.ts.map +1 -0
  40. package/dist/core/autonomy/status.js +107 -0
  41. package/dist/core/autonomy/status.js.map +1 -0
  42. package/dist/core/autonomy/subagent-prompt.d.ts +21 -0
  43. package/dist/core/autonomy/subagent-prompt.d.ts.map +1 -0
  44. package/dist/core/autonomy/subagent-prompt.js +28 -0
  45. package/dist/core/autonomy/subagent-prompt.js.map +1 -0
  46. package/dist/core/autonomy/telemetry-events.d.ts +18 -0
  47. package/dist/core/autonomy/telemetry-events.d.ts.map +1 -0
  48. package/dist/core/autonomy/telemetry-events.js +60 -0
  49. package/dist/core/autonomy/telemetry-events.js.map +1 -0
  50. package/dist/core/context/artifact-retrieval.d.ts +49 -0
  51. package/dist/core/context/artifact-retrieval.d.ts.map +1 -0
  52. package/dist/core/context/artifact-retrieval.js +49 -0
  53. package/dist/core/context/artifact-retrieval.js.map +1 -0
  54. package/dist/core/context/context-artifacts.d.ts +94 -0
  55. package/dist/core/context/context-artifacts.d.ts.map +1 -0
  56. package/dist/core/context/context-artifacts.js +307 -0
  57. package/dist/core/context/context-artifacts.js.map +1 -0
  58. package/dist/core/context/context-audit.d.ts +66 -0
  59. package/dist/core/context/context-audit.d.ts.map +1 -0
  60. package/dist/core/context/context-audit.js +173 -0
  61. package/dist/core/context/context-audit.js.map +1 -0
  62. package/dist/core/context/context-item.d.ts +117 -0
  63. package/dist/core/context/context-item.d.ts.map +1 -0
  64. package/dist/core/context/context-item.js +36 -0
  65. package/dist/core/context/context-item.js.map +1 -0
  66. package/dist/core/context/context-prompt-enforcement.d.ts +73 -0
  67. package/dist/core/context/context-prompt-enforcement.d.ts.map +1 -0
  68. package/dist/core/context/context-prompt-enforcement.js +153 -0
  69. package/dist/core/context/context-prompt-enforcement.js.map +1 -0
  70. package/dist/core/context/context-prompt-policy.d.ts +90 -0
  71. package/dist/core/context/context-prompt-policy.d.ts.map +1 -0
  72. package/dist/core/context/context-prompt-policy.js +73 -0
  73. package/dist/core/context/context-prompt-policy.js.map +1 -0
  74. package/dist/core/context/context-retention.d.ts +36 -0
  75. package/dist/core/context/context-retention.d.ts.map +1 -0
  76. package/dist/core/context/context-retention.js +108 -0
  77. package/dist/core/context/context-retention.js.map +1 -0
  78. package/dist/core/context/context-store.d.ts +37 -0
  79. package/dist/core/context/context-store.d.ts.map +1 -0
  80. package/dist/core/context/context-store.js +45 -0
  81. package/dist/core/context/context-store.js.map +1 -0
  82. package/dist/core/context/memory-diagnostics.d.ts +50 -0
  83. package/dist/core/context/memory-diagnostics.d.ts.map +1 -0
  84. package/dist/core/context/memory-diagnostics.js +43 -0
  85. package/dist/core/context/memory-diagnostics.js.map +1 -0
  86. package/dist/core/context/memory-index-store.d.ts +28 -0
  87. package/dist/core/context/memory-index-store.d.ts.map +1 -0
  88. package/dist/core/context/memory-index-store.js +38 -0
  89. package/dist/core/context/memory-index-store.js.map +1 -0
  90. package/dist/core/context/memory-prompt-block.d.ts +34 -0
  91. package/dist/core/context/memory-prompt-block.d.ts.map +1 -0
  92. package/dist/core/context/memory-prompt-block.js +58 -0
  93. package/dist/core/context/memory-prompt-block.js.map +1 -0
  94. package/dist/core/context/memory-provider-contract.d.ts +114 -0
  95. package/dist/core/context/memory-provider-contract.d.ts.map +1 -0
  96. package/dist/core/context/memory-provider-contract.js +121 -0
  97. package/dist/core/context/memory-provider-contract.js.map +1 -0
  98. package/dist/core/context/memory-retrieval.d.ts +27 -0
  99. package/dist/core/context/memory-retrieval.d.ts.map +1 -0
  100. package/dist/core/context/memory-retrieval.js +91 -0
  101. package/dist/core/context/memory-retrieval.js.map +1 -0
  102. package/dist/core/context/okf-memory-provider.d.ts +26 -0
  103. package/dist/core/context/okf-memory-provider.d.ts.map +1 -0
  104. package/dist/core/context/okf-memory-provider.js +154 -0
  105. package/dist/core/context/okf-memory-provider.js.map +1 -0
  106. package/dist/core/context/okf-memory.d.ts +42 -0
  107. package/dist/core/context/okf-memory.d.ts.map +1 -0
  108. package/dist/core/context/okf-memory.js +175 -0
  109. package/dist/core/context/okf-memory.js.map +1 -0
  110. package/dist/core/context/policy-engine.d.ts +66 -0
  111. package/dist/core/context/policy-engine.d.ts.map +1 -0
  112. package/dist/core/context/policy-engine.js +171 -0
  113. package/dist/core/context/policy-engine.js.map +1 -0
  114. package/dist/core/context/policy-types.d.ts +102 -0
  115. package/dist/core/context/policy-types.d.ts.map +1 -0
  116. package/dist/core/context/policy-types.js +7 -0
  117. package/dist/core/context/policy-types.js.map +1 -0
  118. package/dist/core/context/sqlite-runtime-index.d.ts +19 -0
  119. package/dist/core/context/sqlite-runtime-index.d.ts.map +1 -0
  120. package/dist/core/context/sqlite-runtime-index.js +344 -0
  121. package/dist/core/context/sqlite-runtime-index.js.map +1 -0
  122. package/dist/core/context/storage-authority.d.ts +20 -0
  123. package/dist/core/context/storage-authority.d.ts.map +1 -0
  124. package/dist/core/context/storage-authority.js +51 -0
  125. package/dist/core/context/storage-authority.js.map +1 -0
  126. package/dist/core/context/tool-output-packer.d.ts +75 -0
  127. package/dist/core/context/tool-output-packer.d.ts.map +1 -0
  128. package/dist/core/context/tool-output-packer.js +77 -0
  129. package/dist/core/context/tool-output-packer.js.map +1 -0
  130. package/dist/core/cost/session-usage.d.ts +20 -0
  131. package/dist/core/cost/session-usage.d.ts.map +1 -0
  132. package/dist/core/cost/session-usage.js +164 -0
  133. package/dist/core/cost/session-usage.js.map +1 -0
  134. package/dist/core/delegation/session-worker-result.d.ts +10 -0
  135. package/dist/core/delegation/session-worker-result.d.ts.map +1 -0
  136. package/dist/core/delegation/session-worker-result.js +36 -0
  137. package/dist/core/delegation/session-worker-result.js.map +1 -0
  138. package/dist/core/delegation/worker-result.d.ts +9 -0
  139. package/dist/core/delegation/worker-result.d.ts.map +1 -0
  140. package/dist/core/delegation/worker-result.js +152 -0
  141. package/dist/core/delegation/worker-result.js.map +1 -0
  142. package/dist/core/delegation/worker-runner.d.ts +58 -0
  143. package/dist/core/delegation/worker-runner.d.ts.map +1 -0
  144. package/dist/core/delegation/worker-runner.js +188 -0
  145. package/dist/core/delegation/worker-runner.js.map +1 -0
  146. package/dist/core/extensions/builtin.d.ts +5 -1
  147. package/dist/core/extensions/builtin.d.ts.map +1 -1
  148. package/dist/core/extensions/builtin.js +23 -1
  149. package/dist/core/extensions/builtin.js.map +1 -1
  150. package/dist/core/footer-data-provider.d.ts +5 -1
  151. package/dist/core/footer-data-provider.d.ts.map +1 -1
  152. package/dist/core/footer-data-provider.js +13 -0
  153. package/dist/core/footer-data-provider.js.map +1 -1
  154. package/dist/core/goals/goal-continuation-controller.d.ts +22 -0
  155. package/dist/core/goals/goal-continuation-controller.d.ts.map +1 -0
  156. package/dist/core/goals/goal-continuation-controller.js +88 -0
  157. package/dist/core/goals/goal-continuation-controller.js.map +1 -0
  158. package/dist/core/goals/goal-continuation-defaults.d.ts +10 -0
  159. package/dist/core/goals/goal-continuation-defaults.d.ts.map +1 -0
  160. package/dist/core/goals/goal-continuation-defaults.js +10 -0
  161. package/dist/core/goals/goal-continuation-defaults.js.map +1 -0
  162. package/dist/core/goals/goal-continuation-prompt.d.ts +18 -0
  163. package/dist/core/goals/goal-continuation-prompt.d.ts.map +1 -0
  164. package/dist/core/goals/goal-continuation-prompt.js +141 -0
  165. package/dist/core/goals/goal-continuation-prompt.js.map +1 -0
  166. package/dist/core/goals/goal-runtime-snapshot.d.ts +19 -0
  167. package/dist/core/goals/goal-runtime-snapshot.d.ts.map +1 -0
  168. package/dist/core/goals/goal-runtime-snapshot.js +23 -0
  169. package/dist/core/goals/goal-runtime-snapshot.js.map +1 -0
  170. package/dist/core/goals/goal-state.d.ts +87 -0
  171. package/dist/core/goals/goal-state.d.ts.map +1 -0
  172. package/dist/core/goals/goal-state.js +259 -0
  173. package/dist/core/goals/goal-state.js.map +1 -0
  174. package/dist/core/goals/goal-tool-core.d.ts +66 -0
  175. package/dist/core/goals/goal-tool-core.d.ts.map +1 -0
  176. package/dist/core/goals/goal-tool-core.js +146 -0
  177. package/dist/core/goals/goal-tool-core.js.map +1 -0
  178. package/dist/core/goals/session-goal-state.d.ts +10 -0
  179. package/dist/core/goals/session-goal-state.d.ts.map +1 -0
  180. package/dist/core/goals/session-goal-state.js +35 -0
  181. package/dist/core/goals/session-goal-state.js.map +1 -0
  182. package/dist/core/learning/learning-audit.d.ts +45 -0
  183. package/dist/core/learning/learning-audit.d.ts.map +1 -0
  184. package/dist/core/learning/learning-audit.js +139 -0
  185. package/dist/core/learning/learning-audit.js.map +1 -0
  186. package/dist/core/learning/learning-gate.d.ts +29 -0
  187. package/dist/core/learning/learning-gate.d.ts.map +1 -0
  188. package/dist/core/learning/learning-gate.js +150 -0
  189. package/dist/core/learning/learning-gate.js.map +1 -0
  190. package/dist/core/learning/session-learning-decision.d.ts +10 -0
  191. package/dist/core/learning/session-learning-decision.d.ts.map +1 -0
  192. package/dist/core/learning/session-learning-decision.js +36 -0
  193. package/dist/core/learning/session-learning-decision.js.map +1 -0
  194. package/dist/core/model-capability.d.ts +41 -0
  195. package/dist/core/model-capability.d.ts.map +1 -0
  196. package/dist/core/model-capability.js +101 -0
  197. package/dist/core/model-capability.js.map +1 -0
  198. package/dist/core/model-router/config-diagnostics.d.ts.map +1 -1
  199. package/dist/core/model-router/config-diagnostics.js +1 -0
  200. package/dist/core/model-router/config-diagnostics.js.map +1 -1
  201. package/dist/core/model-router/intent-classifier.d.ts +2 -0
  202. package/dist/core/model-router/intent-classifier.d.ts.map +1 -1
  203. package/dist/core/model-router/intent-classifier.js +154 -9
  204. package/dist/core/model-router/intent-classifier.js.map +1 -1
  205. package/dist/core/model-router/route-judge.d.ts +54 -0
  206. package/dist/core/model-router/route-judge.d.ts.map +1 -0
  207. package/dist/core/model-router/route-judge.js +128 -0
  208. package/dist/core/model-router/route-judge.js.map +1 -0
  209. package/dist/core/model-router/status.d.ts +4 -1
  210. package/dist/core/model-router/status.d.ts.map +1 -1
  211. package/dist/core/model-router/status.js +30 -6
  212. package/dist/core/model-router/status.js.map +1 -1
  213. package/dist/core/model-router/tool-escalation.d.ts +4 -6
  214. package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
  215. package/dist/core/model-router/tool-escalation.js +1 -1
  216. package/dist/core/model-router/tool-escalation.js.map +1 -1
  217. package/dist/core/models/fitness-store.d.ts +40 -0
  218. package/dist/core/models/fitness-store.d.ts.map +1 -0
  219. package/dist/core/models/fitness-store.js +61 -0
  220. package/dist/core/models/fitness-store.js.map +1 -0
  221. package/dist/core/profile-registry.d.ts.map +1 -1
  222. package/dist/core/profile-registry.js +1 -1
  223. package/dist/core/profile-registry.js.map +1 -1
  224. package/dist/core/prompt-templates.d.ts +2 -0
  225. package/dist/core/prompt-templates.d.ts.map +1 -1
  226. package/dist/core/prompt-templates.js +12 -4
  227. package/dist/core/prompt-templates.js.map +1 -1
  228. package/dist/core/research/automata-provider.d.ts +5 -0
  229. package/dist/core/research/automata-provider.d.ts.map +1 -0
  230. package/dist/core/research/automata-provider.js +15 -0
  231. package/dist/core/research/automata-provider.js.map +1 -0
  232. package/dist/core/research/evidence-bundle.d.ts +10 -0
  233. package/dist/core/research/evidence-bundle.d.ts.map +1 -0
  234. package/dist/core/research/evidence-bundle.js +116 -0
  235. package/dist/core/research/evidence-bundle.js.map +1 -0
  236. package/dist/core/research/model-fitness.d.ts +79 -0
  237. package/dist/core/research/model-fitness.d.ts.map +1 -0
  238. package/dist/core/research/model-fitness.js +257 -0
  239. package/dist/core/research/model-fitness.js.map +1 -0
  240. package/dist/core/research/research-gate.d.ts +11 -0
  241. package/dist/core/research/research-gate.d.ts.map +1 -0
  242. package/dist/core/research/research-gate.js +82 -0
  243. package/dist/core/research/research-gate.js.map +1 -0
  244. package/dist/core/research/research-runner.d.ts +59 -0
  245. package/dist/core/research/research-runner.d.ts.map +1 -0
  246. package/dist/core/research/research-runner.js +155 -0
  247. package/dist/core/research/research-runner.js.map +1 -0
  248. package/dist/core/research/session-evidence-bundle.d.ts +11 -0
  249. package/dist/core/research/session-evidence-bundle.d.ts.map +1 -0
  250. package/dist/core/research/session-evidence-bundle.js +55 -0
  251. package/dist/core/research/session-evidence-bundle.js.map +1 -0
  252. package/dist/core/resource-loader.d.ts.map +1 -1
  253. package/dist/core/resource-loader.js +7 -1
  254. package/dist/core/resource-loader.js.map +1 -1
  255. package/dist/core/settings-manager.d.ts +147 -4
  256. package/dist/core/settings-manager.d.ts.map +1 -1
  257. package/dist/core/settings-manager.js +285 -9
  258. package/dist/core/settings-manager.js.map +1 -1
  259. package/dist/core/skills.d.ts +4 -0
  260. package/dist/core/skills.d.ts.map +1 -1
  261. package/dist/core/skills.js +18 -6
  262. package/dist/core/skills.js.map +1 -1
  263. package/dist/core/slash-commands.d.ts.map +1 -1
  264. package/dist/core/slash-commands.js +4 -0
  265. package/dist/core/slash-commands.js.map +1 -1
  266. package/dist/core/toolkit/script-registry.d.ts +34 -0
  267. package/dist/core/toolkit/script-registry.d.ts.map +1 -0
  268. package/dist/core/toolkit/script-registry.js +71 -0
  269. package/dist/core/toolkit/script-registry.js.map +1 -0
  270. package/dist/core/toolkit/script-runner.d.ts +28 -0
  271. package/dist/core/toolkit/script-runner.d.ts.map +1 -0
  272. package/dist/core/toolkit/script-runner.js +48 -0
  273. package/dist/core/toolkit/script-runner.js.map +1 -0
  274. package/dist/core/tools/artifact-retrieve.d.ts +23 -0
  275. package/dist/core/tools/artifact-retrieve.d.ts.map +1 -0
  276. package/dist/core/tools/artifact-retrieve.js +110 -0
  277. package/dist/core/tools/artifact-retrieve.js.map +1 -0
  278. package/dist/core/tools/delegate.d.ts +32 -0
  279. package/dist/core/tools/delegate.d.ts.map +1 -0
  280. package/dist/core/tools/delegate.js +60 -0
  281. package/dist/core/tools/delegate.js.map +1 -0
  282. package/dist/core/tools/fff-search-backend.d.ts +103 -0
  283. package/dist/core/tools/fff-search-backend.d.ts.map +1 -0
  284. package/dist/core/tools/fff-search-backend.js +151 -0
  285. package/dist/core/tools/fff-search-backend.js.map +1 -0
  286. package/dist/core/tools/find.d.ts +21 -1
  287. package/dist/core/tools/find.d.ts.map +1 -1
  288. package/dist/core/tools/find.js +183 -10
  289. package/dist/core/tools/find.js.map +1 -1
  290. package/dist/core/tools/goal.d.ts +35 -0
  291. package/dist/core/tools/goal.d.ts.map +1 -0
  292. package/dist/core/tools/goal.js +122 -0
  293. package/dist/core/tools/goal.js.map +1 -0
  294. package/dist/core/tools/grep.d.ts +21 -1
  295. package/dist/core/tools/grep.d.ts.map +1 -1
  296. package/dist/core/tools/grep.js +272 -27
  297. package/dist/core/tools/grep.js.map +1 -1
  298. package/dist/core/tools/index.d.ts +4 -1
  299. package/dist/core/tools/index.d.ts.map +1 -1
  300. package/dist/core/tools/index.js +9 -0
  301. package/dist/core/tools/index.js.map +1 -1
  302. package/dist/core/tools/model-fitness.d.ts +30 -0
  303. package/dist/core/tools/model-fitness.d.ts.map +1 -0
  304. package/dist/core/tools/model-fitness.js +38 -0
  305. package/dist/core/tools/model-fitness.js.map +1 -0
  306. package/dist/core/tools/run-toolkit-script.d.ts +24 -0
  307. package/dist/core/tools/run-toolkit-script.d.ts.map +1 -0
  308. package/dist/core/tools/run-toolkit-script.js +103 -0
  309. package/dist/core/tools/run-toolkit-script.js.map +1 -0
  310. package/dist/core/tools/search-router.d.ts +75 -0
  311. package/dist/core/tools/search-router.d.ts.map +1 -0
  312. package/dist/core/tools/search-router.js +85 -0
  313. package/dist/core/tools/search-router.js.map +1 -0
  314. package/dist/modes/interactive/components/footer.d.ts.map +1 -1
  315. package/dist/modes/interactive/components/footer.js +18 -16
  316. package/dist/modes/interactive/components/footer.js.map +1 -1
  317. package/dist/modes/interactive/components/settings-selector.d.ts +13 -1
  318. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  319. package/dist/modes/interactive/components/settings-selector.js +471 -11
  320. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  321. package/dist/modes/interactive/interactive-mode.d.ts +4 -0
  322. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  323. package/dist/modes/interactive/interactive-mode.js +220 -39
  324. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  325. package/dist/modes/print-mode.d.ts.map +1 -1
  326. package/dist/modes/print-mode.js +3 -0
  327. package/dist/modes/print-mode.js.map +1 -1
  328. package/dist/utils/tools-manager.d.ts +2 -0
  329. package/dist/utils/tools-manager.d.ts.map +1 -1
  330. package/dist/utils/tools-manager.js +154 -2
  331. package/dist/utils/tools-manager.js.map +1 -1
  332. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  333. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  334. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  335. package/examples/extensions/sandbox/package-lock.json +2 -2
  336. package/examples/extensions/sandbox/package.json +1 -1
  337. package/examples/extensions/with-deps/package-lock.json +2 -2
  338. package/examples/extensions/with-deps/package.json +1 -1
  339. package/npm-shrinkwrap.json +368 -12
  340. package/package.json +5 -4
@@ -21,12 +21,27 @@ import { stripFrontmatter } from "../utils/frontmatter.js";
21
21
  import { resolvePath } from "../utils/paths.js";
22
22
  import { sleep } from "../utils/sleep.js";
23
23
  import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth-guidance.js";
24
+ import { evaluateToolGate } from "./autonomy/gates.js";
25
+ import { LaneTracker } from "./autonomy/lane-tracker.js";
26
+ import { appendLaneRecordSnapshot, getLaneRecordSnapshots } from "./autonomy/session-lane-record.js";
27
+ import { composeSubagentSystemPrompt } from "./autonomy/subagent-prompt.js";
24
28
  import { executeBashWithOperations } from "./bash-executor.js";
25
29
  import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
30
+ import { createFileArtifactStore } from "./context/context-artifacts.js";
31
+ import { runContextAudit } from "./context/context-audit.js";
32
+ import { enforcePromptPolicy } from "./context/context-prompt-enforcement.js";
33
+ import { correlateWithContextGc, planPromptPolicy, } from "./context/context-prompt-policy.js";
34
+ import { defaultMemoryPromptInclusionReport, sanitizeMemoryRetrievalReportForDiagnostics, } from "./context/memory-diagnostics.js";
35
+ import { buildMemoryPromptBlock } from "./context/memory-prompt-block.js";
36
+ import { DEFAULT_LOCAL_MEMORY_EGRESS_POLICY, } from "./context/memory-provider-contract.js";
37
+ import { retrieveMemoryForContext } from "./context/memory-retrieval.js";
38
+ import { createOkfMemoryProvider } from "./context/okf-memory-provider.js";
26
39
  import { applyContextGc } from "./context-gc.js";
27
40
  import { aggregateDailyUsageFromSessionFiles, aggregateDailyUsageFromSessionRoot, formatDailyUsageBreakdown, getLocalDayWindow, } from "./cost/daily-usage.js";
28
41
  import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
29
42
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
43
+ import { appendWorkerResultSnapshot, getWorkerResultSnapshots } from "./delegation/session-worker-result.js";
44
+ import { runWorker } from "./delegation/worker-runner.js";
30
45
  import { exportSessionToHtml } from "./export-html/index.js";
31
46
  import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
32
47
  import { createCoreDiagnosticsToolDefinitions } from "./extensions/builtin.js";
@@ -34,7 +49,13 @@ import { ExtensionRunner, wrapRegisteredTools, } from "./extensions/index.js";
34
49
  import { disposeExtensionEventSubscriptions } from "./extensions/loader.js";
35
50
  import { emitSessionShutdownEvent } from "./extensions/runner.js";
36
51
  import { GatewayRegistry } from "./gateways/channel-provider.js";
52
+ import { buildGoalContinuationPrompt, } from "./goals/goal-continuation-prompt.js";
53
+ import { buildGoalRuntimeSnapshot, } from "./goals/goal-runtime-snapshot.js";
54
+ import { appendGoalStateSnapshot, getLatestGoalStateSnapshot } from "./goals/session-goal-state.js";
55
+ import { appendLearningAuditSnapshot, getLearningAuditSnapshots, proposalFromReflectionWrite, rollbackPlanForReflectionWrite, } from "./learning/learning-audit.js";
56
+ import { evaluateLearningDecision } from "./learning/learning-gate.js";
37
57
  import { decideDemand, ReflectionEngine, } from "./learning/reflection-engine.js";
58
+ import { appendLearningDecisionSnapshot, getLearningDecisionSnapshots } from "./learning/session-learning-decision.js";
38
59
  import { isPromotedFrontmatter, SkillCurator } from "./learning/skill-curator.js";
39
60
  import { EffectivenessTracker } from "./memory/effectiveness-tracker.js";
40
61
  import { MemoryManager } from "./memory/memory-manager.js";
@@ -42,21 +63,32 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
42
63
  import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
43
64
  import { compactToolResultDetailsForRetention } from "./message-retention.js";
44
65
  import { createCustomMessage } from "./messages.js";
66
+ import { deriveModelCapabilityProfile, filterToolNamesForCapability, } from "./model-capability.js";
45
67
  import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
46
68
  import { collectModelRouterConfigDiagnostics } from "./model-router/config-diagnostics.js";
47
- import { classifyModelRouterIntent } from "./model-router/intent-classifier.js";
69
+ import { classifyModelRouterRoute } from "./model-router/intent-classifier.js";
70
+ import { ROUTE_JUDGE_MAX_OUTPUT_TOKENS, runRouteJudge } from "./model-router/route-judge.js";
48
71
  import { bufferModelRouterSessionCustomMessage, bufferModelRouterSessionMessage, createModelRouterSessionBuffer, flushModelRouterSessionBuffer, } from "./model-router/session-buffer.js";
49
72
  import { formatModelRouterStatus, getRecentModelRouterDecisions, MODEL_ROUTER_DECISION_CUSTOM_TYPE, } from "./model-router/status.js";
50
73
  import { shouldEscalateModelRouterTool } from "./model-router/tool-escalation.js";
74
+ import { FitnessStore } from "./models/fitness-store.js";
51
75
  import { expandPromptTemplate } from "./prompt-templates.js";
76
+ import { runModelFitnessProbe } from "./research/model-fitness.js";
77
+ import { runResearch } from "./research/research-runner.js";
78
+ import { appendEvidenceBundleSnapshot, getEvidenceBundleSnapshots, getLatestEvidenceBundleSnapshot, } from "./research/session-evidence-bundle.js";
52
79
  import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
53
80
  import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
54
81
  import { CURRENT_SESSION_VERSION, getLatestCompactionEntry } from "./session-manager.js";
55
82
  import { matchesResourceProfilePattern, } from "./settings-manager.js";
56
83
  import { createSyntheticSourceInfo } from "./source-info.js";
57
84
  import { buildSystemPrompt } from "./system-prompt.js";
85
+ import { executeToolkitScript } from "./toolkit/script-runner.js";
58
86
  import { createLocalBashOperations } from "./tools/bash.js";
87
+ import { createDelegateToolDefinition } from "./tools/delegate.js";
88
+ import { createGoalToolDefinition } from "./tools/goal.js";
59
89
  import { createAllToolDefinitions } from "./tools/index.js";
90
+ import { createModelFitnessToolDefinition } from "./tools/model-fitness.js";
91
+ import { createRunToolkitScriptToolDefinition } from "./tools/run-toolkit-script.js";
60
92
  import { createToolDefinitionFromAgentTool } from "./tools/tool-definition-wrapper.js";
61
93
  /**
62
94
  * Parse a skill block from message text.
@@ -89,10 +121,46 @@ function formatModelRouterModel(model) {
89
121
  function persistModelRouterDecision(sessionManager, decision) {
90
122
  sessionManager.appendCustomEntry(MODEL_ROUTER_DECISION_CUSTOM_TYPE, decision);
91
123
  }
124
+ /** Read a packed grep/find tool result's `details.artifactId`, if present, without `any`. */
125
+ function extractArtifactId(message) {
126
+ if (!message || message.role !== "toolResult")
127
+ return undefined;
128
+ const details = message.details;
129
+ if (typeof details !== "object" || details === null)
130
+ return undefined;
131
+ const artifactId = details.artifactId;
132
+ return typeof artifactId === "string" ? artifactId : undefined;
133
+ }
134
+ /**
135
+ * Text of the most recent user message, or "" if there is none (e.g. goal-continuation
136
+ * turns with no new user input). An empty query degrades to zero memory-retrieval results
137
+ * by construction (see memory-provider-contract.ts's score-on-empty-query-tokens rule) --
138
+ * no special-casing needed here beyond returning "".
139
+ */
140
+ function latestUserMessageText(messages) {
141
+ for (let index = messages.length - 1; index >= 0; index--) {
142
+ const message = messages[index];
143
+ if (message.role !== "user")
144
+ continue;
145
+ if (typeof message.content === "string")
146
+ return message.content;
147
+ const parts = [];
148
+ for (const part of message.content) {
149
+ if (part.type === "text")
150
+ parts.push(part.text);
151
+ }
152
+ return parts.join("\n");
153
+ }
154
+ return "";
155
+ }
156
+ function emptyMemoryRetrievalReport(maxResults) {
157
+ return { request: { query: "", maxResults }, providerReports: [], results: [], contextItems: [] };
158
+ }
92
159
  export class AgentSession {
93
160
  agent;
94
161
  sessionManager;
95
162
  settingsManager;
163
+ capabilityEnvelope;
96
164
  _scopedModels;
97
165
  // Event subscription state
98
166
  _unsubscribeAgent;
@@ -108,11 +176,42 @@ export class AgentSession {
108
176
  _pendingNextTurnMessages = [];
109
177
  /** Serializes prompt() submissions made while streaming so queued steering/follow-ups keep user-typed FIFO order. */
110
178
  _streamingPromptSubmissionTail = Promise.resolve();
179
+ /** Pending idle timer that starts bounded goal continuation after the session becomes idle. */
180
+ _goalAutoContinueTimer;
181
+ /** Guards bounded idle autosteer so continuation prompts do not recursively trigger themselves. */
182
+ _isGoalAutoContinuing = false;
183
+ /** Pending idle timer that starts an autonomous research pass after the session becomes idle. */
184
+ _researchLaneTimer;
185
+ /** Single-flight guard: at most one research pass runs at a time per session. */
186
+ _isResearchLaneRunning = false;
187
+ /** Why the last idle research-lane evaluation skipped, for /autonomy diagnostics. */
188
+ _lastResearchLaneSkipReason;
189
+ /** Live lane registry — the real source for AutonomyStatusSnapshot.activeLaneCount. */
190
+ _laneTracker = new LaneTracker();
191
+ /** Session-lifetime abort for in-flight research passes (same pattern as _reflectionAbort). */
192
+ _researchLaneAbort = new AbortController();
193
+ /** Single-flight guard: at most one delegated worker runs at a time per session. */
194
+ _isWorkerDelegationRunning = false;
195
+ /** Session-lifetime abort for in-flight delegated workers. */
196
+ _workerDelegationAbort = new AbortController();
197
+ /**
198
+ * The last tool set requested via setActiveToolsByName BEFORE model-capability filtering, so
199
+ * switching from a small-window model back to a large one restores the full requested set.
200
+ */
201
+ _requestedActiveToolNames;
111
202
  // Compaction/context hygiene state
112
203
  _compactionAbortController = undefined;
113
204
  _autoCompactionAbortController = undefined;
114
205
  _overflowRecoveryAttempted = false;
115
206
  _latestContextGcReport = undefined;
207
+ _toolArtifactStore = undefined;
208
+ _latestContextAuditReport = undefined;
209
+ _latestPromptPolicyReport = undefined;
210
+ _latestPromptPolicyGcCorrelation = undefined;
211
+ _latestPromptEnforcementReport = undefined;
212
+ _memoryOkfProvider = undefined;
213
+ _latestMemoryRetrievalReport = undefined;
214
+ _latestMemoryPromptInclusionReport = undefined;
116
215
  // Branch summarization state
117
216
  _branchSummaryAbortController = undefined;
118
217
  // Retry state
@@ -151,9 +250,12 @@ export class AgentSession {
151
250
  _costGuardDowngraded = false;
152
251
  /** Active model-router intent for the current transient routed turn, if any. */
153
252
  _activeModelRouterIntent;
253
+ _activeModelRouterRoute;
154
254
  _modelRouterSessionBuffer;
155
255
  _modelRouterEscalationRequested = false;
256
+ _isModelRouterRetry = false;
156
257
  _lastModelRouterDecision;
258
+ _lastAutonomyGateOutcome;
157
259
  _lastModelRouterSkipReason;
158
260
  _lastModelRouterIntent;
159
261
  /** Lazily-built skill curator (#32) over `<agentDir>/skills`. */
@@ -295,7 +397,7 @@ export class AgentSession {
295
397
  const authoritativeMessages = this.agent.state.messages.length > 0 ? this.agent.state.messages : transformed;
296
398
  let currentMessages = authoritativeMessages;
297
399
  try {
298
- const settings = this.settingsManager.getCompactionSettings();
400
+ const settings = this._getAdaptedCompactionSettings();
299
401
  const contextWindow = this.model?.contextWindow ?? 0;
300
402
  if (settings.enabled && contextWindow > 0 && !this.isCompacting) {
301
403
  const contextTokens = this._estimateCurrentContextTokens(authoritativeMessages);
@@ -316,7 +418,18 @@ export class AgentSession {
316
418
  if (this._extensionRunner.hasHandlers("context")) {
317
419
  finalMessages = await this._extensionRunner.emitContext(currentMessages);
318
420
  }
319
- const gcMessages = this._applyContextGc(finalMessages, true).messages;
421
+ const auditReport = this._runContextAudit(finalMessages);
422
+ const shadowReport = this._runPromptPolicyPlanning(auditReport);
423
+ const memoryReport = await this._runMemoryRetrieval(finalMessages);
424
+ const gcResult = this._applyContextGc(finalMessages, true);
425
+ this._correlatePromptPolicyWithContextGc(gcResult.report);
426
+ const enforcementResult = this._runPromptEnforcement(gcResult.messages, shadowReport);
427
+ // Appended LAST, after gc and enforcement, so the bounded evidence block is
428
+ // never packed/stubbed/reshaped by either pass and always reflects this turn's
429
+ // fresh retrieval. Because nothing downstream trims it, memory-prompt-block.ts's
430
+ // character caps are the only budget protection for this block -- load-bearing,
431
+ // not merely defensive.
432
+ const gcMessages = this._maybeAppendMemoryEvidenceBlock(enforcementResult.messages, memoryReport);
320
433
  this._applyCostGuard(gcMessages);
321
434
  return gcMessages;
322
435
  };
@@ -421,15 +534,366 @@ export class AgentSession {
421
534
  _contextGcStorageDir() {
422
535
  return join(this._agentDir, "context-gc", this.sessionManager.getSessionId());
423
536
  }
537
+ _toolArtifactsDir() {
538
+ return join(this._agentDir, "context-artifacts", this.sessionManager.getSessionId());
539
+ }
540
+ /**
541
+ * Session-scoped, filesystem-backed artifact store for first-capture-then-bound tool
542
+ * output (grep/find only, for now -- see tool-output-artifacts.md). Lazily created and
543
+ * cached so every tool construction in this session shares one store instance.
544
+ *
545
+ * `packToolOutput()` registers a reference (the packing tool call's id) at pack time
546
+ * and fails closed, so packed artifacts are never prematurely collected.
547
+ * `_releaseGcPackedArtifactReferences()` (called from `_applyContextGc()`) releases
548
+ * that reference once context-gc packs the result out of live context, and
549
+ * opportunistically reclaims now-unreferenced artifacts via `cleanup()`.
550
+ * Remaining carry-forward gap: cleanup() now also runs at dispose(), but only reclaims
551
+ * already-released (zero-reference) artifacts. A session that ends before context-gc
552
+ * ever evicts a result never releases that reference, so its artifact stays on disk by
553
+ * design (resolvable on resume). Reclaiming those requires an explicit cross-session
554
+ * expiry/liveness policy, not just a sweep.
555
+ */
556
+ _getToolArtifactStore() {
557
+ this._toolArtifactStore ??= createFileArtifactStore({ baseDir: this._toolArtifactsDir() });
558
+ return this._toolArtifactStore;
559
+ }
560
+ /**
561
+ * Fixed path for this slice's local Pi OKF memory documents, shared across sessions
562
+ * under this agentDir (not session-scoped, unlike tool-artifacts/context-gc, since OKF
563
+ * memory represents durable cross-session knowledge, not a per-session capture). Not
564
+ * yet user-configurable -- see the memory-retrieval settings doc comment.
565
+ */
566
+ _memoryOkfDir() {
567
+ return join(this._agentDir, "okf-memory");
568
+ }
569
+ /**
570
+ * Session-scoped, read-only local OKF memory provider. Lazily created ONLY when memory
571
+ * retrieval is enabled (see `_runMemoryRetrieval`) -- never force-created, so a session
572
+ * with the setting off never touches `_memoryOkfDir()` at all (no directory access, no
573
+ * creation; `createOkfMemoryProvider` itself never writes/mkdirs either way).
574
+ */
575
+ _getMemoryOkfProvider() {
576
+ this._memoryOkfProvider ??= createOkfMemoryProvider({ rootDir: this._memoryOkfDir() });
577
+ return this._memoryOkfProvider;
578
+ }
579
+ /**
580
+ * One pass over the current branch, mapping each toolResult's toolCallId to its
581
+ * persisted session-entry id. Rebuilt every audit pass (O(branch) per turn), so this is
582
+ * O(n^2) over a long session. Fine at current scale; after the artifact-read fix this is
583
+ * the next per-turn audit cost to optimize if it ever matters (e.g. cache/incrementally
584
+ * update instead of a full rebuild).
585
+ */
586
+ _buildSessionEntryIdLookup() {
587
+ const map = new Map();
588
+ for (const entry of this.sessionManager.getBranch()) {
589
+ if (entry.type === "message" && entry.message.role === "toolResult") {
590
+ map.set(entry.message.toolCallId, entry.id);
591
+ }
592
+ }
593
+ return (toolCallId) => map.get(toolCallId);
594
+ }
595
+ /**
596
+ * Phase 1 observe-only audit pass (see context/context-audit.ts): converts live
597
+ * toolResult messages into ContextItems and runs the existing retention/hard-constraint
598
+ * evaluators over them, storing the latest deterministic report for tests/debugging.
599
+ * Read-only with respect to messages, the transcript, and artifact references -- uses
600
+ * `_toolArtifactStore` (the field), not `_getToolArtifactStore()` (the getter), so a
601
+ * session that never packed anything doesn't force-create a store/dir just to audit.
602
+ * Never throws into a live turn: any failure degrades to an empty report.
603
+ */
604
+ _runContextAudit(messages) {
605
+ try {
606
+ const report = runContextAudit(messages, {
607
+ turnIndex: this._turnIndex,
608
+ artifactStore: this._toolArtifactStore,
609
+ sessionEntryIdForToolCallId: this._buildSessionEntryIdLookup(),
610
+ });
611
+ this._latestContextAuditReport = report;
612
+ return report;
613
+ }
614
+ catch {
615
+ const report = { turnIndex: this._turnIndex, items: [] };
616
+ this._latestContextAuditReport = report;
617
+ return report;
618
+ }
619
+ }
620
+ /**
621
+ * Read-only inspection of the context audit. With `messages`, recomputes fresh against
622
+ * the given array (still no mutation of messages/transcript/artifact refs); without,
623
+ * returns the last report computed during a real transform pass.
624
+ */
625
+ getContextAuditReport(messages) {
626
+ if (messages)
627
+ return this._runContextAudit(messages);
628
+ return this._latestContextAuditReport ?? { turnIndex: this._turnIndex, items: [] };
629
+ }
630
+ /**
631
+ * Observe-first shadow/planning pass (see context/context-prompt-policy.ts): re-shapes
632
+ * the audit report into a per-item policy plan whose `appliedAction` is always
633
+ * "keep_raw" -- this never enforces anything, it only records what the policy engine
634
+ * would say. Never throws into a live turn: any failure degrades to an empty report.
635
+ */
636
+ _runPromptPolicyPlanning(auditReport) {
637
+ try {
638
+ const report = planPromptPolicy(auditReport);
639
+ this._latestPromptPolicyReport = report;
640
+ return report;
641
+ }
642
+ catch {
643
+ const report = { turnIndex: this._turnIndex, items: [] };
644
+ this._latestPromptPolicyReport = report;
645
+ return report;
646
+ }
647
+ }
648
+ /**
649
+ * Read-only inspection of the shadow policy plan. With `messages`, recomputes fresh
650
+ * (audit + plan) against the given array; without, returns the last plan computed
651
+ * during a real transform pass. Never mutates messages/transcript/artifact refs.
652
+ */
653
+ getPromptPolicyReport(messages) {
654
+ if (messages)
655
+ return this._runPromptPolicyPlanning(this._runContextAudit(messages));
656
+ return this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
657
+ }
658
+ /**
659
+ * Report-only correlation between the shadow plan just computed this turn and what the
660
+ * legacy context-gc pass actually packed. Runs after `_applyContextGc()` has already
661
+ * produced its report; never influences context-gc itself. Never throws into a live
662
+ * turn: any failure degrades to an empty correlation.
663
+ */
664
+ _correlatePromptPolicyWithContextGc(gcReport) {
665
+ const shadowReport = this._latestPromptPolicyReport ?? { turnIndex: this._turnIndex, items: [] };
666
+ try {
667
+ this._latestPromptPolicyGcCorrelation = correlateWithContextGc(shadowReport, gcReport);
668
+ }
669
+ catch {
670
+ this._latestPromptPolicyGcCorrelation = { turnIndex: this._turnIndex, entries: [] };
671
+ }
672
+ }
673
+ /** Read-only inspection of the latest shadow-plan/legacy-gc correlation, for tests/debugging. */
674
+ getPromptPolicyGcCorrelation() {
675
+ return this._latestPromptPolicyGcCorrelation ?? { turnIndex: this._turnIndex, entries: [] };
676
+ }
677
+ /**
678
+ * First enforcement pilot (see context/context-prompt-enforcement.ts): opt-in,
679
+ * default-disabled stub-in-place of stale artifact-backed tool_output results in the
680
+ * provider-visible message array only. Runs on `messages` AFTER context-gc has already
681
+ * produced its own result, so legacy context-gc's own packing/reporting is completely
682
+ * unaffected by this pass -- it only ever acts on messages gc left untouched this turn.
683
+ * Never throws into a live turn: any failure degrades to returning `messages` unchanged.
684
+ */
685
+ _runPromptEnforcement(messages, shadowReport) {
686
+ try {
687
+ const persistedSettings = this.settingsManager.getContextPromptEnforcementSettings();
688
+ const settings = {
689
+ ...persistedSettings,
690
+ // Runtime fact, never assumed: artifact_retrieve is a companion affordance
691
+ // (auto-activated alongside grep/find), not a default/global tool, so active
692
+ // tools can differ turn to turn -- see context-prompt-enforcement.ts's doc
693
+ // comment on why this is checked separately from hasAvailableRetrievalPath.
694
+ retrievalToolAvailable: this.getActiveToolNames().includes("artifact_retrieve"),
695
+ };
696
+ const result = enforcePromptPolicy(messages, shadowReport, settings);
697
+ this._latestPromptEnforcementReport = result.report;
698
+ return result;
699
+ }
700
+ catch {
701
+ const report = { turnIndex: this._turnIndex, items: [] };
702
+ this._latestPromptEnforcementReport = report;
703
+ return { messages, report };
704
+ }
705
+ }
706
+ /** Read-only inspection of the latest prompt-enforcement report, for tests/debugging. */
707
+ getPromptEnforcementReport() {
708
+ return this._latestPromptEnforcementReport ?? { turnIndex: this._turnIndex, items: [] };
709
+ }
710
+ /**
711
+ * Observe-only local memory retrieval (see context/memory-retrieval.ts and
712
+ * context/okf-memory-provider.ts): default disabled, opt-in setting. When disabled,
713
+ * never constructs the OKF provider (no directory access under `_memoryOkfDir()` at
714
+ * all) and returns an empty report -- fully fail-closed. When enabled, queries the
715
+ * local, read-only OKF provider with the latest user message text (empty if there is
716
+ * none, e.g. a goal-continuation turn -- degrades to zero results by construction, see
717
+ * `latestUserMessageText`'s doc comment) under `DEFAULT_LOCAL_MEMORY_EGRESS_POLICY`.
718
+ * Retrieved items are only ever stored in the report; nothing here touches `messages`,
719
+ * the transcript, or the provider-visible prompt. Never throws into a live turn: any
720
+ * failure (including a provider search error) degrades to an empty report.
721
+ */
722
+ async _runMemoryRetrieval(messages) {
723
+ try {
724
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
725
+ if (!settings.enabled) {
726
+ const report = emptyMemoryRetrievalReport(settings.maxResults);
727
+ this._latestMemoryRetrievalReport = report;
728
+ return report;
729
+ }
730
+ const report = await retrieveMemoryForContext([this._getMemoryOkfProvider()], { query: latestUserMessageText(messages), maxResults: settings.maxResults }, {
731
+ createdAtTurn: this._turnIndex,
732
+ maxResults: settings.maxResults,
733
+ defaultLocalPolicy: DEFAULT_LOCAL_MEMORY_EGRESS_POLICY,
734
+ });
735
+ this._latestMemoryRetrievalReport = report;
736
+ return report;
737
+ }
738
+ catch {
739
+ const report = emptyMemoryRetrievalReport(0);
740
+ this._latestMemoryRetrievalReport = report;
741
+ return report;
742
+ }
743
+ }
744
+ /** Read-only inspection of the latest memory-retrieval report, for tests/debugging. */
745
+ getMemoryRetrievalReport() {
746
+ return this._latestMemoryRetrievalReport ?? emptyMemoryRetrievalReport(0);
747
+ }
748
+ /**
749
+ * Bounded prompt-surfacing pilot for local memory evidence (see
750
+ * context/memory-prompt-block.ts): opt-in, default disabled, and gated on TWO settings
751
+ * (`enabled` AND `includeInPrompt`) plus a non-empty `report.contextItems` -- the first
752
+ * two are belt-and-suspenders on top of the fact that `_runMemoryRetrieval` already
753
+ * leaves `contextItems` empty whenever `enabled` is false, regardless of
754
+ * `includeInPrompt`. Reuses the `report` this pass's `_runMemoryRetrieval` call already
755
+ * computed -- never re-queries the provider here.
756
+ *
757
+ * Appends exactly one ephemeral `custom`/"memory_evidence" message wrapped by
758
+ * `wrapUntrustedText` (the same nonce-fenced boundary + always-on system-prompt rule
759
+ * used for other untrusted content) to the END of `messages`. This is purely additive
760
+ * (never mutates an existing message) and purely transient: `messages` here is the
761
+ * array about to be sent to the provider, not `this.agent.state.messages` or anything
762
+ * persisted via `sessionManager` -- so the injected message can never reach the
763
+ * transcript, regardless of how many times this pass runs.
764
+ *
765
+ * Also records a `MemoryPromptInclusionReport` (context/memory-diagnostics.ts) at each
766
+ * branch below, for context_audit's diagnostic surface only -- this is pure bookkeeping
767
+ * alongside the existing branches, not a new branch/condition: the messages returned
768
+ * are unchanged by this recording.
769
+ */
770
+ _maybeAppendMemoryEvidenceBlock(messages, report) {
771
+ try {
772
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
773
+ const base = {
774
+ enabled: settings.enabled,
775
+ includeInPrompt: settings.includeInPrompt,
776
+ selectedItemCount: report.contextItems.length,
777
+ };
778
+ if (!settings.enabled) {
779
+ this._latestMemoryPromptInclusionReport = {
780
+ ...base,
781
+ status: "disabled",
782
+ includedCount: 0,
783
+ omittedCount: 0,
784
+ blockChars: 0,
785
+ };
786
+ return messages;
787
+ }
788
+ if (!settings.includeInPrompt) {
789
+ this._latestMemoryPromptInclusionReport = {
790
+ ...base,
791
+ status: "include_disabled",
792
+ includedCount: 0,
793
+ omittedCount: 0,
794
+ blockChars: 0,
795
+ };
796
+ return messages;
797
+ }
798
+ if (report.contextItems.length === 0) {
799
+ this._latestMemoryPromptInclusionReport = {
800
+ ...base,
801
+ status: "no_results",
802
+ includedCount: 0,
803
+ omittedCount: 0,
804
+ blockChars: 0,
805
+ };
806
+ return messages;
807
+ }
808
+ const block = buildMemoryPromptBlock(report.contextItems);
809
+ if (!block.text) {
810
+ this._latestMemoryPromptInclusionReport = {
811
+ ...base,
812
+ status: "empty_block",
813
+ includedCount: block.includedCount,
814
+ omittedCount: block.omittedCount,
815
+ blockChars: 0,
816
+ };
817
+ return messages;
818
+ }
819
+ const wrapped = wrapUntrustedText(block.text, "memory:pi-okf");
820
+ const evidenceMessage = {
821
+ role: "custom",
822
+ customType: "memory_evidence",
823
+ content: [{ type: "text", text: wrapped }],
824
+ display: false,
825
+ timestamp: Date.now(),
826
+ };
827
+ this._latestMemoryPromptInclusionReport = {
828
+ ...base,
829
+ status: "included",
830
+ includedCount: block.includedCount,
831
+ omittedCount: block.omittedCount,
832
+ blockChars: wrapped.length,
833
+ sourceLabel: "memory:pi-okf",
834
+ };
835
+ return [...messages, evidenceMessage];
836
+ }
837
+ catch {
838
+ // `base` may not exist yet if the throw happened before it was computed (e.g.
839
+ // settings access or `report.contextItems` itself threw), so this branch cannot
840
+ // rely on it -- fall back to safe, fixed defaults rather than risk referencing
841
+ // a partially-evaluated value.
842
+ this._latestMemoryPromptInclusionReport = {
843
+ enabled: false,
844
+ includeInPrompt: false,
845
+ selectedItemCount: 0,
846
+ status: "failed",
847
+ includedCount: 0,
848
+ omittedCount: 0,
849
+ blockChars: 0,
850
+ };
851
+ return messages;
852
+ }
853
+ }
854
+ /** Read-only inspection of the latest memory-prompt-inclusion decision, for tests/debugging and context_audit. */
855
+ getMemoryPromptInclusionReport() {
856
+ return this._latestMemoryPromptInclusionReport ?? defaultMemoryPromptInclusionReport();
857
+ }
858
+ /**
859
+ * Combines the already-stored, no-arg latest reports (never re-queries the provider or
860
+ * touches the OKF directory) into the safe, allow-list-projected shape context_audit
861
+ * exposes. See context/memory-diagnostics.ts for why this projection is allow-list
862
+ * based rather than a spread-then-delete of the raw report.
863
+ */
864
+ _getMemoryAuditDiagnostics() {
865
+ const settings = this.settingsManager.getMemoryRetrievalSettings();
866
+ return {
867
+ retrieval: sanitizeMemoryRetrievalReportForDiagnostics(this.getMemoryRetrievalReport(), settings),
868
+ promptInclusion: this.getMemoryPromptInclusionReport(),
869
+ };
870
+ }
424
871
  _applyContextGc(messages, writePayloads) {
425
872
  try {
873
+ const settings = this.settingsManager.getContextGcSettings();
874
+ // Merge the ACTIVE memory providers' own page markers (e.g. transcript-recall's
875
+ // "<memory_context") into the semantic-memory marker list. The settings default is
876
+ // provider-agnostic and non-empty, so without this merge the recall pages the bundled
877
+ // default provider actually emits are never recognized as semantic-memory pages and
878
+ // accumulate raw for the life of the session — the exact growth Bug #7 GC exists to stop.
879
+ const providerMarkers = this._memoryManager.getContextMarkers();
426
880
  const result = applyContextGc(messages, {
427
- ...this.settingsManager.getContextGcSettings(),
881
+ ...settings,
882
+ semanticMemory: {
883
+ ...settings.semanticMemory,
884
+ markers: [...new Set([...settings.semanticMemory.markers, ...providerMarkers])],
885
+ },
428
886
  cwd: this._cwd,
429
887
  storageDir: this._contextGcStorageDir(),
430
888
  writePayloads,
431
889
  });
432
890
  this._latestContextGcReport = result.report;
891
+ // Only release/reclaim on the real per-turn pass (writePayloads=true), never on
892
+ // the read-only status-report path (getContextGcReport with writePayloads=false),
893
+ // so merely inspecting the report can't have side effects.
894
+ if (writePayloads && result.report.packedCount > 0) {
895
+ this._releaseGcPackedArtifactReferences(messages, result.report);
896
+ }
433
897
  return result;
434
898
  }
435
899
  catch {
@@ -445,6 +909,39 @@ export class AgentSession {
445
909
  return { messages, report };
446
910
  }
447
911
  }
912
+ /**
913
+ * Reference-release + cleanup lifecycle: once context-gc has packed a grep/find tool
914
+ * result out of the live prompt (the message is no longer current/active working
915
+ * context -- see contracts-and-retention.md's "ephemeral"/"expired" retention
916
+ * classes), release the pack-time reference `packToolOutput()` registered for it, and
917
+ * opportunistically reclaim now-unreferenced artifacts. This is the other half of the
918
+ * D2b-1 gate: artifacts were being registered but never released, so they accumulated
919
+ * for the life of the session.
920
+ *
921
+ * `record.toolCallId` (from context-gc's packed record) is exactly the holder id
922
+ * `packToolOutput()` used when it called `addReference()` -- both trace back to the
923
+ * same tool call's id -- so no separate bookkeeping is needed to find it.
924
+ */
925
+ _releaseGcPackedArtifactReferences(messages, report) {
926
+ const store = this._toolArtifactStore;
927
+ if (!store)
928
+ return; // no store was ever constructed, so nothing could have been packed to one
929
+ let releasedAny = false;
930
+ for (const record of report.records) {
931
+ if (record.toolName !== "grep" && record.toolName !== "find")
932
+ continue;
933
+ const artifactId = extractArtifactId(messages[record.messageIndex]);
934
+ if (!artifactId)
935
+ continue;
936
+ if (store.removeReference(artifactId, record.toolCallId))
937
+ releasedAny = true;
938
+ }
939
+ // Cleanup only runs immediately after a release actually happened in this pass, so
940
+ // a long session doesn't re-scan the artifact directory on every turn once nothing
941
+ // new became eligible for release.
942
+ if (releasedAny)
943
+ store.cleanup();
944
+ }
448
945
  getContextGcReport(messages) {
449
946
  if (messages)
450
947
  return this._applyContextGc(messages, false).report;
@@ -476,8 +973,8 @@ export class AgentSession {
476
973
  }
477
974
  _installAgentToolHooks() {
478
975
  this.agent.beforeToolCall = async ({ toolCall, args }) => {
479
- if (this._activeModelRouterIntent &&
480
- shouldEscalateModelRouterTool({ intent: this._activeModelRouterIntent, toolName: toolCall.name, args })) {
976
+ if (this._activeModelRouterRoute &&
977
+ shouldEscalateModelRouterTool({ tier: this._activeModelRouterRoute.tier, toolName: toolCall.name, args })) {
481
978
  this._modelRouterEscalationRequested = true;
482
979
  this.agent.abort();
483
980
  return {
@@ -485,6 +982,22 @@ export class AgentSession {
485
982
  reason: "Model router escalation required: a cheap research turn attempted a mutating tool. Retry the turn on the configured expensive model.",
486
983
  };
487
984
  }
985
+ // Autonomy tool gating
986
+ const gateResult = evaluateToolGate({
987
+ toolName: toolCall.name,
988
+ args,
989
+ cwd: this._cwd,
990
+ envelope: this.capabilityEnvelope,
991
+ });
992
+ if (this.capabilityEnvelope) {
993
+ this._lastAutonomyGateOutcome = gateResult;
994
+ }
995
+ if (gateResult.outcome === "block" || gateResult.outcome === "ask-user") {
996
+ return {
997
+ block: true,
998
+ reason: `Tool execution blocked by autonomy gate [${gateResult.gate}]: ${gateResult.message} (${gateResult.reasonCode})`,
999
+ };
1000
+ }
488
1001
  const runner = this._extensionRunner;
489
1002
  if (!runner.hasHandlers("tool_call")) {
490
1003
  return undefined;
@@ -584,8 +1097,13 @@ export class AgentSession {
584
1097
  }
585
1098
  // Emit to extensions first
586
1099
  await this._emitExtensionEvent(event);
1100
+ const suppressRetryPromptEvent = this._isModelRouterRetry &&
1101
+ (event.type === "message_start" || event.type === "message_end") &&
1102
+ (event.message.role === "user" || event.message.role === "custom");
587
1103
  // Notify all listeners
588
- this._emit(event.type === "agent_end" ? { ...event, willRetry: this._willRetryAfterAgentEnd(event) } : event);
1104
+ if (!suppressRetryPromptEvent) {
1105
+ this._emit(event.type === "agent_end" ? { ...event, willRetry: this._willRetryAfterAgentEnd(event) } : event);
1106
+ }
589
1107
  // Handle session/context retention. Tool result details are UI/log metadata,
590
1108
  // not provider-visible content, and large graph/search payloads can otherwise
591
1109
  // accumulate until the interactive Node process hits the V8 heap limit.
@@ -829,6 +1347,8 @@ export class AgentSession {
829
1347
  */
830
1348
  dispose() {
831
1349
  try {
1350
+ this._clearGoalAutoContinueTimer();
1351
+ this._clearResearchLaneTimer();
832
1352
  this.abortRetry();
833
1353
  this.abortCompaction();
834
1354
  this.abortBranchSummary();
@@ -840,6 +1360,10 @@ export class AgentSession {
840
1360
  // write memory/skills against this now-disposed session.
841
1361
  this._disposed = true;
842
1362
  this._reflectionAbort.abort();
1363
+ // Abort any in-flight research pass or delegated worker for the same reason: a disposed
1364
+ // session must not keep spending tokens or persist evidence against dead state.
1365
+ this._researchLaneAbort.abort();
1366
+ this._workerDelegationAbort.abort();
843
1367
  // Bug #20: clear the hooks this session installed on the shared agent so their closures stop
844
1368
  // pinning this (deactivated) session — and all its history/maps — in memory if the agent
845
1369
  // instance outlives the session.
@@ -856,6 +1380,19 @@ export class AgentSession {
856
1380
  // true session-end hook (P3); file-store shutdown is a no-op.
857
1381
  void this._memoryManager.shutdownAll().catch(() => { });
858
1382
  cleanupSessionResources(this.sessionId);
1383
+ // Best-effort final sweep for any grep/find artifact already released (reference
1384
+ // count zero) but not yet reclaimed -- e.g. a release whose cleanup() call failed
1385
+ // transiently. This is conservative: it never releases a still-referenced
1386
+ // artifact, so a session that ends before context-gc ever evicts a result (too
1387
+ // short to cross preserveRecentMessages) correctly leaves that artifact in place,
1388
+ // resolvable if the same session is resumed later. It does not sweep OTHER
1389
+ // sessions' artifact directories.
1390
+ try {
1391
+ this._toolArtifactStore?.cleanup();
1392
+ }
1393
+ catch {
1394
+ // Best-effort; dispose must succeed regardless.
1395
+ }
859
1396
  }
860
1397
  // =========================================================================
861
1398
  // Read-only State Access
@@ -911,21 +1448,47 @@ export class AgentSession {
911
1448
  * Only tools in the registry can be enabled. Unknown tool names are ignored.
912
1449
  * Also rebuilds the system prompt to reflect the new tool set.
913
1450
  * Changes take effect on the next agent turn.
1451
+ *
1452
+ * artifact_retrieve is auto-activated as a companion whenever grep or find ends up
1453
+ * in the resulting active set and artifact_retrieve is registered (i.e. not excluded/
1454
+ * blocked/outside an allowlist -- the registry itself is built with that same filter,
1455
+ * so registry presence already tracks "allowed"). This is enforced here, not just in
1456
+ * the settings/profile refresh flow, because this method is a public, extension-
1457
+ * exposed activation path (`setActiveTools`) on its own: without this, grep/find could
1458
+ * end up active while still being handed an artifact store (gated on "allowed" in
1459
+ * `_buildRuntime`) with no active tool able to resolve the resulting
1460
+ * "Full output: artifact tool-output:<id>" handle.
914
1461
  */
915
1462
  setActiveToolsByName(toolNames) {
1463
+ // Model capability: small-window models get a reduced tool surface derived from the model's
1464
+ // own metadata. The unfiltered request is remembered so a later switch to a larger model
1465
+ // restores it (the filter is re-applied on every model change).
1466
+ this._requestedActiveToolNames = [...toolNames];
1467
+ const capabilityFiltered = filterToolNamesForCapability(toolNames, this.getModelCapabilityProfile());
916
1468
  const tools = [];
917
1469
  const validToolNames = [];
918
- for (const name of toolNames) {
1470
+ const seen = new Set();
1471
+ const addIfRegistered = (name) => {
1472
+ if (seen.has(name))
1473
+ return;
919
1474
  const tool = this._toolRegistry.get(name);
920
- if (tool) {
921
- tools.push(tool);
922
- validToolNames.push(name);
923
- }
1475
+ if (!tool)
1476
+ return;
1477
+ seen.add(name);
1478
+ tools.push(tool);
1479
+ validToolNames.push(name);
1480
+ };
1481
+ for (const name of capabilityFiltered) {
1482
+ addIfRegistered(name);
1483
+ }
1484
+ if (validToolNames.includes("grep") || validToolNames.includes("find")) {
1485
+ addIfRegistered("artifact_retrieve");
924
1486
  }
925
1487
  this.agent.state.tools = tools;
926
1488
  // Rebuild base system prompt with new tool set
927
1489
  this._baseSystemPrompt = this._rebuildSystemPrompt(validToolNames);
928
1490
  this.agent.state.systemPrompt = this._baseSystemPrompt;
1491
+ this._checkContextWindowUsageWarning();
929
1492
  }
930
1493
  /** Whether compaction or branch summarization is currently running */
931
1494
  get isCompacting() {
@@ -1127,35 +1690,166 @@ export class AgentSession {
1127
1690
  await this._drainQueuedExtensionCommands();
1128
1691
  }
1129
1692
  }
1130
- _resolveModelRouterModelForIntent(intent) {
1693
+ _isModelAvailableAndAuthed(pattern) {
1694
+ const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
1695
+ if (!resolved.model)
1696
+ return false;
1697
+ return this._modelRegistry.hasConfiguredAuth(resolved.model);
1698
+ }
1699
+ _resolveModelRouterTurnRoute(prompt) {
1131
1700
  const settings = this.settingsManager.getModelRouterSettings();
1132
- const modelLabel = intent === "research" ? "cheap model" : "expensive model";
1133
1701
  if (!settings.enabled) {
1134
1702
  this._lastModelRouterSkipReason = "disabled";
1135
1703
  return undefined;
1136
1704
  }
1137
- const modelPattern = intent === "research" ? settings.cheapModel : settings.expensiveModel;
1705
+ const decision = classifyModelRouterRoute(prompt);
1706
+ this._lastModelRouterIntent = decision.tier === "cheap" ? "research" : "modify";
1707
+ // Learning tier must not be selected for normal user prompts
1708
+ if (decision.tier === "learning") {
1709
+ this._lastModelRouterSkipReason = "learning tier not supported for user prompts";
1710
+ return undefined;
1711
+ }
1712
+ const modelPattern = settings[decision.tier === "cheap" ? "cheapModel" : decision.tier === "medium" ? "mediumModel" : "expensiveModel"];
1713
+ const label = decision.tier === "cheap" ? "cheap model" : decision.tier === "medium" ? "medium model" : "expensive model";
1714
+ if (decision.tier === "medium" && (!modelPattern || !this._isModelAvailableAndAuthed(modelPattern))) {
1715
+ const expensivePattern = settings.expensiveModel;
1716
+ if (expensivePattern && this._isModelAvailableAndAuthed(expensivePattern)) {
1717
+ const resolvedExpensive = resolveCliModel({
1718
+ cliModel: expensivePattern,
1719
+ modelRegistry: this._modelRegistry,
1720
+ });
1721
+ if (resolvedExpensive.model) {
1722
+ decision.fallbackFrom = "medium";
1723
+ decision.tier = "expensive";
1724
+ decision.reasonCode = "medium_unavailable_fallback_expensive";
1725
+ decision.reasons = [...decision.reasons, "Medium model is unavailable, falling back to expensive model"];
1726
+ decision.model = formatModelRouterModel(resolvedExpensive.model);
1727
+ this._lastModelRouterSkipReason = undefined;
1728
+ return { decision, model: resolvedExpensive.model };
1729
+ }
1730
+ }
1731
+ this._lastModelRouterSkipReason = "medium model and expensive fallback are unavailable";
1732
+ return undefined;
1733
+ }
1138
1734
  if (!modelPattern) {
1139
- this._lastModelRouterSkipReason = `${modelLabel} unset`;
1735
+ this._lastModelRouterSkipReason = `${label} unset`;
1140
1736
  return undefined;
1141
1737
  }
1142
1738
  const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
1143
1739
  if (!resolved.model) {
1144
- this._lastModelRouterSkipReason = `${modelLabel} unresolved: ${modelPattern}`;
1740
+ this._lastModelRouterSkipReason = `${label} unresolved: ${modelPattern}`;
1145
1741
  return undefined;
1146
1742
  }
1147
1743
  const resolvedName = formatModelRouterModel(resolved.model);
1148
1744
  if (!this._modelRegistry.hasConfiguredAuth(resolved.model)) {
1149
- this._lastModelRouterSkipReason = `${modelLabel} missing auth: ${resolvedName}`;
1745
+ this._lastModelRouterSkipReason = `${label} missing auth: ${resolvedName}`;
1150
1746
  return undefined;
1151
1747
  }
1152
1748
  this._lastModelRouterSkipReason = undefined;
1749
+ decision.model = resolvedName;
1750
+ return { decision, model: resolved.model };
1751
+ }
1752
+ _resolveModelRouterModelForIntent(intent) {
1753
+ const settings = this.settingsManager.getModelRouterSettings();
1754
+ const modelPattern = intent === "research" ? settings.cheapModel : settings.expensiveModel;
1755
+ if (!modelPattern)
1756
+ return undefined;
1757
+ const resolved = resolveCliModel({ cliModel: modelPattern, modelRegistry: this._modelRegistry });
1758
+ if (!resolved.model)
1759
+ return undefined;
1760
+ if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
1761
+ return undefined;
1762
+ return resolved.model;
1763
+ }
1764
+ _resolveConfiguredTierModel(tier) {
1765
+ const settings = this.settingsManager.getModelRouterSettings();
1766
+ const pattern = tier === "cheap" ? settings.cheapModel : tier === "medium" ? settings.mediumModel : settings.expensiveModel;
1767
+ if (!pattern)
1768
+ return undefined;
1769
+ const resolved = resolveCliModel({ cliModel: pattern, modelRegistry: this._modelRegistry });
1770
+ if (!resolved.model)
1771
+ return undefined;
1772
+ if (!this._modelRegistry.hasConfiguredAuth(resolved.model))
1773
+ return undefined;
1153
1774
  return resolved.model;
1154
1775
  }
1776
+ /**
1777
+ * Router resolution with the routing judge (auto-on with the router): the regex classifier's
1778
+ * decision is the baseline; when a judge model resolves (judgeModel, else mediumModel), one
1779
+ * bounded, tool-less completion may move the tier between cheap/medium/expensive — never to
1780
+ * learning. Core rule encoded in the judge prompt: planning is never cheap unless genuinely
1781
+ * trivial. Every fallback stays visible in the decision reasons, and judge spend reports
1782
+ * through spawned-usage accounting.
1783
+ */
1784
+ async _resolveModelRouterTurnRouteJudged(prompt, options) {
1785
+ const baseline = this._resolveModelRouterTurnRoute(prompt);
1786
+ if (!baseline)
1787
+ return undefined;
1788
+ if (options?.skipJudge)
1789
+ return baseline;
1790
+ const settings = this.settingsManager.getModelRouterSettings();
1791
+ if (!settings.judgeEnabled)
1792
+ return baseline;
1793
+ const judgePattern = settings.judgeModel ?? settings.mediumModel;
1794
+ if (!judgePattern)
1795
+ return baseline;
1796
+ const judgeModel = this._resolveLaneModel(judgePattern);
1797
+ if (!judgeModel)
1798
+ return baseline;
1799
+ let spentUsage;
1800
+ const judged = await runRouteJudge({
1801
+ prompt,
1802
+ baseline: baseline.decision,
1803
+ signal: this._reflectionAbort.signal,
1804
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
1805
+ const completion = await this.runIsolatedCompletion({
1806
+ systemPrompt,
1807
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
1808
+ model: judgeModel,
1809
+ thinkingLevel: "off",
1810
+ maxTokens: ROUTE_JUDGE_MAX_OUTPUT_TOKENS,
1811
+ signal,
1812
+ // The judge system prompt is static — the provider can cache the prefix.
1813
+ cacheRetention: "short",
1814
+ });
1815
+ spentUsage = completion.usage;
1816
+ return {
1817
+ text: completion.text,
1818
+ costUsd: completion.usage.cost.total,
1819
+ stopReason: String(completion.stopReason),
1820
+ };
1821
+ },
1822
+ });
1823
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
1824
+ this.addSpawnedUsage(spentUsage, { label: "router-judge" });
1825
+ }
1826
+ if (!judged.verdict || judged.decision.tier === baseline.decision.tier) {
1827
+ // Same tier (or judge fell back): keep the baseline model, carry the annotated decision.
1828
+ return { decision: judged.decision, model: baseline.model };
1829
+ }
1830
+ const judgedTier = judged.decision.tier;
1831
+ if (judgedTier !== "cheap" && judgedTier !== "medium" && judgedTier !== "expensive") {
1832
+ return { decision: baseline.decision, model: baseline.model };
1833
+ }
1834
+ const judgedModel = this._resolveConfiguredTierModel(judgedTier);
1835
+ if (!judgedModel) {
1836
+ return {
1837
+ decision: {
1838
+ ...baseline.decision,
1839
+ reasons: [
1840
+ ...baseline.decision.reasons,
1841
+ `Route judge chose ${judgedTier} but no model resolves for that tier; baseline kept`,
1842
+ ],
1843
+ },
1844
+ model: baseline.model,
1845
+ };
1846
+ }
1847
+ return { decision: { ...judged.decision, model: formatModelRouterModel(judgedModel) }, model: judgedModel };
1848
+ }
1849
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: test seam
1155
1850
  _resolveModelRouterTurnModel(prompt) {
1156
- const intent = classifyModelRouterIntent(prompt);
1157
- this._lastModelRouterIntent = intent;
1158
- return this._resolveModelRouterModelForIntent(intent);
1851
+ const resolved = this._resolveModelRouterTurnRoute(prompt);
1852
+ return resolved?.model;
1159
1853
  }
1160
1854
  getModelRouterStatus(formatLabel) {
1161
1855
  const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries());
@@ -1174,7 +1868,7 @@ export class AgentSession {
1174
1868
  }
1175
1869
  return lines.join("\n");
1176
1870
  }
1177
- async _runAgentPromptWithModelRouter(messages, routedModel, routedIntent, persistDecision = true) {
1871
+ async _runAgentPromptWithModelRouter(messages, routedModel, routeDecision, persistDecision = true) {
1178
1872
  if (!routedModel) {
1179
1873
  await this._runAgentPrompt(messages);
1180
1874
  return;
@@ -1182,23 +1876,30 @@ export class AgentSession {
1182
1876
  const previousModel = this.agent.state.model;
1183
1877
  const previousThinkingLevel = this.agent.state.thinkingLevel;
1184
1878
  const previousActiveModelRouterIntent = this._activeModelRouterIntent;
1879
+ const previousActiveModelRouterRoute = this._activeModelRouterRoute;
1185
1880
  const previousModelRouterSessionBuffer = this._modelRouterSessionBuffer;
1186
1881
  const previousModelRouterEscalationRequested = this._modelRouterEscalationRequested;
1187
- const bufferRoutedTurn = routedIntent === "research";
1882
+ const bufferRoutedTurn = routeDecision?.tier === "cheap";
1188
1883
  const originalHistoryLength = this.agent.state.messages.length;
1189
1884
  let retryModel;
1190
- let completedDecision = routedIntent
1885
+ let completedDecision = routeDecision
1191
1886
  ? {
1192
- intent: routedIntent,
1887
+ route: routeDecision,
1193
1888
  routedModel: formatModelRouterModel(routedModel),
1194
1889
  outcome: "routed",
1890
+ intent: routeDecision.tier === "cheap" ? "research" : "modify",
1195
1891
  }
1196
1892
  : undefined;
1197
1893
  let thrownError;
1198
- if (routedIntent) {
1894
+ if (routeDecision) {
1199
1895
  this._lastModelRouterDecision = completedDecision;
1200
1896
  }
1201
- this._activeModelRouterIntent = routedIntent;
1897
+ this._activeModelRouterIntent = routeDecision
1898
+ ? routeDecision.tier === "cheap"
1899
+ ? "research"
1900
+ : "modify"
1901
+ : undefined;
1902
+ this._activeModelRouterRoute = routeDecision;
1202
1903
  if (bufferRoutedTurn) {
1203
1904
  this._modelRouterSessionBuffer = createModelRouterSessionBuffer();
1204
1905
  this._modelRouterEscalationRequested = false;
@@ -1213,10 +1914,11 @@ export class AgentSession {
1213
1914
  this.agent.state.messages.splice(originalHistoryLength);
1214
1915
  retryModel = this._resolveModelRouterModelForIntent("modify") ?? previousModel;
1215
1916
  completedDecision = {
1216
- intent: routedIntent,
1917
+ route: routeDecision,
1217
1918
  routedModel: formatModelRouterModel(routedModel),
1218
1919
  outcome: "escalated",
1219
1920
  retryModel: formatModelRouterModel(retryModel),
1921
+ intent: routeDecision.tier === "cheap" ? "research" : "modify",
1220
1922
  };
1221
1923
  this._lastModelRouterDecision = completedDecision;
1222
1924
  }
@@ -1236,15 +1938,36 @@ export class AgentSession {
1236
1938
  }
1237
1939
  }
1238
1940
  finally {
1239
- this.agent.state.model = previousModel;
1240
- this.agent.state.thinkingLevel = previousThinkingLevel;
1941
+ // Restore the pre-route model ONLY if the routed model is still in place: a command
1942
+ // handler may have legitimately changed the session model mid-turn (setModel or a
1943
+ // provider re-registration), and clobbering that would silently undo the change.
1944
+ if (modelsAreEqual(this.agent.state.model, routedModel)) {
1945
+ this.agent.state.model = previousModel;
1946
+ this.agent.state.thinkingLevel = previousThinkingLevel;
1947
+ // The registry may have changed mid-turn (command-time registerProvider): re-resolve
1948
+ // the restored model so a provider override is not dropped with the routed model.
1949
+ this._refreshCurrentModelFromRegistry();
1950
+ }
1241
1951
  this._activeModelRouterIntent = previousActiveModelRouterIntent;
1952
+ this._activeModelRouterRoute = previousActiveModelRouterRoute;
1242
1953
  this._modelRouterSessionBuffer = previousModelRouterSessionBuffer;
1243
1954
  this._modelRouterEscalationRequested = previousModelRouterEscalationRequested;
1244
1955
  }
1245
1956
  if (retryModel && !thrownError) {
1957
+ const previousIsModelRouterRetry = this._isModelRouterRetry;
1246
1958
  try {
1247
- await this._runAgentPromptWithModelRouter(messages, retryModel, "modify", false);
1959
+ this._isModelRouterRetry = true;
1960
+ const retryDecision = {
1961
+ tier: "expensive",
1962
+ risk: "high-impact",
1963
+ confidence: 1.0,
1964
+ reasonCode: "cheap_mutating_tool_escalation",
1965
+ reasons: ["Cheap research turn attempted a mutating tool and escalated"],
1966
+ fallbackFrom: "cheap",
1967
+ model: formatModelRouterModel(retryModel),
1968
+ };
1969
+ await this._runAgentPromptWithModelRouter(messages, retryModel, retryDecision, false);
1970
+ this._lastModelRouterDecision = completedDecision;
1248
1971
  }
1249
1972
  catch (error) {
1250
1973
  thrownError = error;
@@ -1253,6 +1976,9 @@ export class AgentSession {
1253
1976
  this._lastModelRouterDecision = completedDecision;
1254
1977
  }
1255
1978
  }
1979
+ finally {
1980
+ this._isModelRouterRetry = previousIsModelRouterRetry;
1981
+ }
1256
1982
  }
1257
1983
  if (persistDecision && completedDecision) {
1258
1984
  persistModelRouterDecision(this.sessionManager, completedDecision);
@@ -1296,6 +2022,9 @@ export class AgentSession {
1296
2022
  * @throws Error if no model selected or no API key available (when not streaming)
1297
2023
  */
1298
2024
  async prompt(text, options) {
2025
+ if (options?.autoContinueGoal !== false) {
2026
+ this._clearGoalAutoContinueTimer();
2027
+ }
1299
2028
  if ((this.isStreaming || this.isRetrying) && options?.streamingBehavior) {
1300
2029
  const run = this._streamingPromptSubmissionTail.then(() => this._promptUnserialized(text, options), () => this._promptUnserialized(text, options));
1301
2030
  this._streamingPromptSubmissionTail = run.catch(() => { });
@@ -1325,7 +2054,7 @@ export class AgentSession {
1325
2054
  const preflightResult = options?.preflightResult;
1326
2055
  let messages;
1327
2056
  let routedTurnModel;
1328
- let routedTurnIntent;
2057
+ let routedTurnRouteDecision;
1329
2058
  // R4 effectiveness feedback: remember the recall page + the query so we can score, after the
1330
2059
  // response, whether the agent actually used the recalled context.
1331
2060
  let injectedRecall = "";
@@ -1388,8 +2117,13 @@ export class AgentSession {
1388
2117
  }
1389
2118
  // Flush any pending bash messages before the new prompt
1390
2119
  this._flushPendingBashMessages();
1391
- routedTurnModel = this._resolveModelRouterTurnModel(expandedText);
1392
- routedTurnIntent = routedTurnModel ? classifyModelRouterIntent(expandedText) : undefined;
2120
+ const resolvedRouteInfo = await this._resolveModelRouterTurnRouteJudged(expandedText, {
2121
+ // Internally generated turns (goal continuation, lane follow-ups) never consult the judge:
2122
+ // the regex floor already classified them, and a 20-turn loop must not buy 20 judge calls.
2123
+ skipJudge: options?.autoContinueGoal === false,
2124
+ });
2125
+ routedTurnModel = resolvedRouteInfo?.model;
2126
+ routedTurnRouteDecision = resolvedRouteInfo?.decision;
1393
2127
  const requestModel = routedTurnModel ?? this.model;
1394
2128
  // Validate model
1395
2129
  if (!requestModel) {
@@ -1404,6 +2138,7 @@ export class AgentSession {
1404
2138
  }
1405
2139
  throw new Error(formatNoApiKeyFoundMessage(requestModel.provider));
1406
2140
  }
2141
+ this._checkContextWindowUsageWarning();
1407
2142
  // Check if we need to compact before sending (catches aborted responses).
1408
2143
  // Do not call agent.continue() here: the next model turn must include the
1409
2144
  // user's pending prompt, not an empty continuation after compaction.
@@ -1481,7 +2216,7 @@ export class AgentSession {
1481
2216
  return;
1482
2217
  }
1483
2218
  preflightResult?.(true);
1484
- await this._runAgentPromptWithModelRouter(messages, routedTurnModel, routedTurnIntent);
2219
+ await this._runAgentPromptWithModelRouter(messages, routedTurnModel, routedTurnRouteDecision);
1485
2220
  // R4: score whether the agent actually used the recalled context, so the recall gate can adapt.
1486
2221
  if (injectedRecall) {
1487
2222
  const response = this._findLastAssistantMessage();
@@ -1495,6 +2230,8 @@ export class AgentSession {
1495
2230
  this._effectivenessTracker.recordRecallOutcome(injectedRecall, recallQuery, responseText);
1496
2231
  }
1497
2232
  }
2233
+ this._scheduleGoalAutoContinueFromIdle(options);
2234
+ this._scheduleResearchLaneFromIdle();
1498
2235
  }
1499
2236
  /**
1500
2237
  * Try to execute an extension command. Returns true if command was found and executed.
@@ -1814,6 +2551,20 @@ export class AgentSession {
1814
2551
  // Re-clamp thinking level for new model's capabilities
1815
2552
  this.setThinkingLevel(thinkingLevel, { persistSettings });
1816
2553
  await this._emitModelSelect(model, previousModel, "set");
2554
+ this._checkContextWindowUsageWarning();
2555
+ // Re-derive the model-capability tool surface for the new model (restores the full requested
2556
+ // set when moving small -> large, reduces it when moving large -> small).
2557
+ if (this._requestedActiveToolNames) {
2558
+ const before = this.getActiveToolNames().join(",");
2559
+ this.setActiveToolsByName(this._requestedActiveToolNames);
2560
+ const capability = this.getModelCapabilityProfile();
2561
+ if (capability.class !== "full" && this.getActiveToolNames().join(",") !== before) {
2562
+ this._emit({
2563
+ type: "warning",
2564
+ message: `Small-context model detected (${capability.contextWindow ?? "unknown"} tokens, class '${capability.class}'): active tools reduced to [${this.getActiveToolNames().join(", ")}]; background lanes ${capability.backgroundLanesEnabled ? "enabled" : "disabled"}.`,
2565
+ });
2566
+ }
2567
+ }
1817
2568
  }
1818
2569
  /**
1819
2570
  * Cycle to next/previous model.
@@ -1849,6 +2600,7 @@ export class AgentSession {
1849
2600
  // setThinkingLevel clamps to model capabilities.
1850
2601
  this.setThinkingLevel(thinkingLevel);
1851
2602
  await this._emitModelSelect(next.model, currentModel, "cycle");
2603
+ this._checkContextWindowUsageWarning();
1852
2604
  return { model: next.model, thinkingLevel: this.thinkingLevel, isScoped: true };
1853
2605
  }
1854
2606
  async _cycleAvailableModel(direction) {
@@ -1869,6 +2621,7 @@ export class AgentSession {
1869
2621
  // Re-clamp thinking level for new model's capabilities
1870
2622
  this.setThinkingLevel(thinkingLevel);
1871
2623
  await this._emitModelSelect(nextModel, currentModel, "cycle");
2624
+ this._checkContextWindowUsageWarning();
1872
2625
  return { model: nextModel, thinkingLevel: this.thinkingLevel, isScoped: false };
1873
2626
  }
1874
2627
  // =========================================================================
@@ -1980,7 +2733,7 @@ export class AgentSession {
1980
2733
  const compactionModel = this._resolveCompactionModel(this.model);
1981
2734
  const { apiKey, headers } = await this._getCompactionRequestAuth(compactionModel);
1982
2735
  const pathEntries = this.sessionManager.getBranch();
1983
- const settings = this.settingsManager.getCompactionSettings();
2736
+ const settings = this._getAdaptedCompactionSettings();
1984
2737
  const preparation = prepareCompaction(pathEntries, settings);
1985
2738
  if (!preparation) {
1986
2739
  // Check why we can't compact
@@ -2100,8 +2853,57 @@ export class AgentSession {
2100
2853
  * @param assistantMessage The assistant message to check
2101
2854
  * @param skipAbortedCheck If false, include aborted messages (for pre-prompt check). Default: true
2102
2855
  */
2103
- async _checkCompaction(assistantMessage, skipAbortedCheck = true) {
2856
+ _getAdaptedCompactionSettings() {
2104
2857
  const settings = this.settingsManager.getCompactionSettings();
2858
+ if (!this.model)
2859
+ return settings;
2860
+ const contextWindow = this.model.contextWindow ?? 0;
2861
+ if (contextWindow <= 0)
2862
+ return settings;
2863
+ // Adapt reserveTokens: at most 25% of context window
2864
+ const maxReserve = Math.floor(contextWindow * 0.25);
2865
+ const reserveTokens = Math.min(settings.reserveTokens, maxReserve);
2866
+ // Adapt keepRecentTokens: at most 50% of context window
2867
+ const maxKeepRecent = Math.floor(contextWindow * 0.5);
2868
+ const keepRecentTokens = Math.min(settings.keepRecentTokens, maxKeepRecent);
2869
+ return {
2870
+ ...settings,
2871
+ reserveTokens,
2872
+ keepRecentTokens,
2873
+ };
2874
+ }
2875
+ _checkContextWindowUsageWarning() {
2876
+ if (!this.model)
2877
+ return;
2878
+ const contextWindow = this.model.contextWindow ?? 0;
2879
+ if (contextWindow <= 0)
2880
+ return;
2881
+ const systemPromptTokens = Math.ceil((this.agent.state.systemPrompt ?? "").length / 4);
2882
+ let toolsChars = 0;
2883
+ for (const tool of this.agent.state.tools || []) {
2884
+ toolsChars += tool.name.length;
2885
+ toolsChars += tool.description?.length ?? 0;
2886
+ if (tool.parameters) {
2887
+ toolsChars += JSON.stringify(tool.parameters).length;
2888
+ }
2889
+ }
2890
+ const toolsTokens = Math.ceil(toolsChars / 4);
2891
+ const baseTokens = systemPromptTokens + toolsTokens;
2892
+ if (baseTokens >= contextWindow) {
2893
+ this._emit({
2894
+ type: "warning",
2895
+ message: `Base configuration (system prompt and active tools) consumes ${baseTokens} tokens, which exceeds the model's context window of ${contextWindow} tokens. The model cannot process any prompts in this state.`,
2896
+ });
2897
+ }
2898
+ else if (baseTokens >= contextWindow * 0.7) {
2899
+ this._emit({
2900
+ type: "warning",
2901
+ message: `Base configuration (system prompt and active tools) consumes ${baseTokens} tokens (${Math.round((baseTokens / contextWindow) * 100)}% of the ${contextWindow} context window). This leaves very little room for conversation history and may cause immediate compaction or context overflow.`,
2902
+ });
2903
+ }
2904
+ }
2905
+ async _checkCompaction(assistantMessage, skipAbortedCheck = true) {
2906
+ const settings = this._getAdaptedCompactionSettings();
2105
2907
  if (!settings.enabled)
2106
2908
  return false;
2107
2909
  // Skip if message was aborted (user cancelled) - unless skipAbortedCheck is false
@@ -2185,7 +2987,7 @@ export class AgentSession {
2185
2987
  * Internal: Run auto-compaction with events.
2186
2988
  */
2187
2989
  async _runAutoCompaction(reason, willRetry) {
2188
- const settings = this.settingsManager.getCompactionSettings();
2990
+ const settings = this._getAdaptedCompactionSettings();
2189
2991
  this._emit({ type: "compaction_start", reason });
2190
2992
  this._autoCompactionAbortController = new AbortController();
2191
2993
  try {
@@ -2695,7 +3497,10 @@ export class AgentSession {
2695
3497
  }
2696
3498
  _refreshToolRegistry(options) {
2697
3499
  const previousRegistryNames = new Set(this._toolRegistry.keys());
2698
- const previousActiveToolNames = this.getActiveToolNames();
3500
+ // Re-derive from the pre-filter REQUEST, never from agent.state.tools: the active set is
3501
+ // capability/profile-filtered, so feeding it back through setActiveToolsByName would
3502
+ // permanently shrink what a later switch to a larger model (or permissive profile) restores.
3503
+ const previousActiveToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
2699
3504
  const allowedToolNames = this._allowedToolNames;
2700
3505
  const excludedToolNames = this._excludedToolNames;
2701
3506
  const toolProfileFilter = this._toolProfileFilter;
@@ -2767,27 +3572,40 @@ export class AgentSession {
2767
3572
  toolRegistry.set(tool.name, tool);
2768
3573
  }
2769
3574
  this._toolRegistry = toolRegistry;
2770
- const nextActiveToolNames = (options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames]).filter((name) => isAllowedTool(name));
3575
+ const requestedBase = options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames];
3576
+ const nextActiveToolNames = requestedBase.filter((name) => isAllowedTool(name));
3577
+ const autoActivated = [];
2771
3578
  if (allowedToolNames) {
2772
3579
  for (const toolName of this._toolRegistry.keys()) {
2773
3580
  if (allowedToolNames.has(toolName)) {
2774
3581
  nextActiveToolNames.push(toolName);
3582
+ autoActivated.push(toolName);
2775
3583
  }
2776
3584
  }
2777
3585
  }
2778
3586
  else if (options?.includeAllExtensionTools) {
2779
3587
  for (const tool of wrappedExtensionTools) {
2780
3588
  nextActiveToolNames.push(tool.name);
3589
+ autoActivated.push(tool.name);
2781
3590
  }
2782
3591
  }
2783
3592
  else if (!options?.activeToolNames) {
2784
3593
  for (const toolName of this._toolRegistry.keys()) {
2785
3594
  if (!previousRegistryNames.has(toolName)) {
2786
3595
  nextActiveToolNames.push(toolName);
3596
+ autoActivated.push(toolName);
2787
3597
  }
2788
3598
  }
2789
3599
  }
3600
+ // artifact_retrieve companion auto-activation is enforced inside
3601
+ // setActiveToolsByName() itself (not duplicated here), so every activation path --
3602
+ // including the public, extension-exposed setActiveTools() -- gets the same
3603
+ // guarantee, not just this settings/profile refresh flow.
2790
3604
  this.setActiveToolsByName([...new Set(nextActiveToolNames)]);
3605
+ // setActiveToolsByName just stored the profile-filtered ACTIVE set as the request; restore
3606
+ // the true pre-filter request (plus this refresh's auto-activations) so an internal refresh
3607
+ // can never permanently narrow it.
3608
+ this._requestedActiveToolNames = [...new Set([...requestedBase, ...autoActivated])];
2791
3609
  }
2792
3610
  _createReloadRuntimeSnapshot() {
2793
3611
  return {
@@ -2842,6 +3660,14 @@ export class AgentSession {
2842
3660
  const autoResizeImages = this.settingsManager.getImageAutoResize();
2843
3661
  const shellCommandPrefix = this.settingsManager.getShellCommandPrefix();
2844
3662
  const shellPath = this.settingsManager.getShellPath();
3663
+ // grep/find must not emit a "Full output: artifact tool-output:<id>" handle that
3664
+ // nothing can resolve. If artifact_retrieve is explicitly excluded/blocked/outside
3665
+ // an active allowlist, don't hand grep/find an artifact store at all: they fall
3666
+ // back to their pre-existing bounded preview/truncation behavior, with no
3667
+ // payload/meta files ever written and no retrieval promise made.
3668
+ const toolArtifactStore = this._isToolOrCommandAllowedByProfile("artifact_retrieve")
3669
+ ? this._getToolArtifactStore()
3670
+ : undefined;
2845
3671
  const baseToolDefinitions = this._baseToolsOverride
2846
3672
  ? Object.fromEntries(Object.entries(this._baseToolsOverride).map(([name, tool]) => [
2847
3673
  name,
@@ -2850,12 +3676,37 @@ export class AgentSession {
2850
3676
  : createAllToolDefinitions(this._cwd, {
2851
3677
  read: { autoResizeImages },
2852
3678
  bash: { commandPrefix: shellCommandPrefix, shellPath },
3679
+ grep: { artifactStore: toolArtifactStore },
3680
+ find: { artifactStore: toolArtifactStore },
3681
+ artifact_retrieve: { artifactStore: toolArtifactStore },
2853
3682
  });
2854
3683
  this._baseToolDefinitions = new Map(Object.entries(baseToolDefinitions).map(([name, tool]) => [name, tool]));
2855
3684
  if (!this._baseToolsOverride) {
2856
- for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages))) {
3685
+ for (const definition of createCoreDiagnosticsToolDefinitions(() => this.getActiveToolNames(), () => this.getAllTools(), (messages) => this.getContextGcReport(messages), () => this._getMemoryAuditDiagnostics())) {
2857
3686
  this._baseToolDefinitions.set(definition.name, definition);
2858
3687
  }
3688
+ const goalToolDefinition = createGoalToolDefinition({
3689
+ getGoalState: () => this.getGoalStateSnapshot(),
3690
+ saveGoalState: (state) => {
3691
+ this.saveGoalStateSnapshot(state);
3692
+ },
3693
+ });
3694
+ this._baseToolDefinitions.set(goalToolDefinition.name, goalToolDefinition);
3695
+ const delegateToolDefinition = createDelegateToolDefinition({
3696
+ runWorkerDelegation: (args) => this.runWorkerDelegationOnce(args),
3697
+ });
3698
+ this._baseToolDefinitions.set(delegateToolDefinition.name, delegateToolDefinition);
3699
+ // Registered but not default-active: probes spend tokens on the probed model, so
3700
+ // activation is an explicit choice (settings/profile/setActiveTools or /autonomy fitness).
3701
+ const modelFitnessToolDefinition = createModelFitnessToolDefinition({
3702
+ runProbe: (args) => this.runModelFitness(args),
3703
+ });
3704
+ this._baseToolDefinitions.set(modelFitnessToolDefinition.name, modelFitnessToolDefinition);
3705
+ const runToolkitScriptToolDefinition = createRunToolkitScriptToolDefinition({
3706
+ getScripts: () => this.settingsManager.getToolkitScripts(),
3707
+ execute: (script, scriptArgs) => executeToolkitScript({ script, scriptArgs, cwd: this._cwd }),
3708
+ });
3709
+ this._baseToolDefinitions.set(runToolkitScriptToolDefinition.name, runToolkitScriptToolDefinition);
2859
3710
  }
2860
3711
  const extensionsResult = this._resourceLoader.getExtensions();
2861
3712
  if (options.flagValues) {
@@ -2875,7 +3726,7 @@ export class AgentSession {
2875
3726
  this._applyExtensionBindings(this._extensionRunner);
2876
3727
  const defaultActiveToolNames = this._baseToolsOverride
2877
3728
  ? Object.keys(this._baseToolsOverride)
2878
- : ["read", "bash", "edit", "write", "context_audit"];
3729
+ : ["read", "bash", "edit", "write", "context_audit", "goal", "delegate", "run_toolkit_script"];
2879
3730
  const baseActiveToolNames = options.activeToolNames ?? defaultActiveToolNames;
2880
3731
  this._refreshToolRegistry({
2881
3732
  activeToolNames: baseActiveToolNames,
@@ -2891,7 +3742,9 @@ export class AgentSession {
2891
3742
  }
2892
3743
  const previousRunner = this._extensionRunner;
2893
3744
  const snapshot = this._createReloadRuntimeSnapshot();
2894
- const activeToolNames = this.getActiveToolNames();
3745
+ // Preserve the pre-filter tool REQUEST across the rebuild, not the capability/profile-filtered
3746
+ // active set — otherwise a reload under a small model permanently shrinks the restorable set.
3747
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
2895
3748
  const previousFlagValues = previousRunner.getFlagValues();
2896
3749
  const reloadErrors = [];
2897
3750
  let newRunner;
@@ -2984,7 +3837,7 @@ export class AgentSession {
2984
3837
  // Remove from loaded extensions
2985
3838
  this._resourceLoader.removeLoadedExtension(extensionPath);
2986
3839
  // Rebuild runtime with new extension set
2987
- const activeToolNames = this.getActiveToolNames();
3840
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
2988
3841
  const previousFlagValues = previousRunner.getFlagValues();
2989
3842
  this._buildRuntime({
2990
3843
  activeToolNames,
@@ -3027,7 +3880,7 @@ export class AgentSession {
3027
3880
  throw new Error(error || `Failed to load extension: ${extensionPath}`);
3028
3881
  }
3029
3882
  // Rebuild runtime to aggregate tools/commands/handlers/providers
3030
- const activeToolNames = this.getActiveToolNames();
3883
+ const activeToolNames = this._requestedActiveToolNames ?? this.getActiveToolNames();
3031
3884
  const previousFlagValues = previousRunner.getFlagValues();
3032
3885
  this._buildRuntime({
3033
3886
  activeToolNames,
@@ -3702,6 +4555,671 @@ export class AgentSession {
3702
4555
  getDailyUsageBreakdown(formatLabel, now = new Date()) {
3703
4556
  return formatDailyUsageBreakdown(this.getDailyUsageTotals(now), formatLabel);
3704
4557
  }
4558
+ /**
4559
+ * Save a snapshot of the goal state to the session log.
4560
+ *
4561
+ * @returns the id of the appended custom entry
4562
+ */
4563
+ saveGoalStateSnapshot(state) {
4564
+ return appendGoalStateSnapshot(this.sessionManager, state);
4565
+ }
4566
+ /**
4567
+ * Retrieve the latest valid goal state snapshot from the session log.
4568
+ */
4569
+ getGoalStateSnapshot() {
4570
+ return getLatestGoalStateSnapshot(this.sessionManager.getEntries());
4571
+ }
4572
+ /**
4573
+ * Save a snapshot of the evidence bundle to the session log.
4574
+ *
4575
+ * @returns the id of the appended custom entry
4576
+ */
4577
+ saveEvidenceBundleSnapshot(bundle) {
4578
+ return appendEvidenceBundleSnapshot(this.sessionManager, bundle);
4579
+ }
4580
+ /**
4581
+ * Retrieve the latest valid evidence bundle snapshot from the session log.
4582
+ */
4583
+ getEvidenceBundleSnapshot() {
4584
+ return getLatestEvidenceBundleSnapshot(this.sessionManager.getEntries());
4585
+ }
4586
+ getEvidenceBundleSnapshots() {
4587
+ return getEvidenceBundleSnapshots(this.sessionManager.getEntries());
4588
+ }
4589
+ /** Live lane records tracked by this process (running and terminal). */
4590
+ getLaneRecords() {
4591
+ return this._laneTracker.getRecords();
4592
+ }
4593
+ saveWorkerResultSnapshot(result) {
4594
+ return appendWorkerResultSnapshot(this.sessionManager, result);
4595
+ }
4596
+ getWorkerResultSnapshots() {
4597
+ return getWorkerResultSnapshots(this.sessionManager.getEntries());
4598
+ }
4599
+ saveLearningDecisionSnapshot(decision) {
4600
+ return appendLearningDecisionSnapshot(this.sessionManager, decision);
4601
+ }
4602
+ getLearningDecisionSnapshots() {
4603
+ return getLearningDecisionSnapshots(this.sessionManager.getEntries());
4604
+ }
4605
+ getGoalRuntimeSnapshot(settings) {
4606
+ return buildGoalRuntimeSnapshot({
4607
+ entries: this.sessionManager.getEntries(),
4608
+ settings,
4609
+ });
4610
+ }
4611
+ _clearGoalAutoContinueTimer() {
4612
+ if (this._goalAutoContinueTimer !== undefined) {
4613
+ clearTimeout(this._goalAutoContinueTimer);
4614
+ this._goalAutoContinueTimer = undefined;
4615
+ }
4616
+ }
4617
+ _scheduleGoalAutoContinueFromIdle(options) {
4618
+ if (options?.autoContinueGoal === false || this._isGoalAutoContinuing || this._disposed)
4619
+ return;
4620
+ // Small-window models cannot afford multi-thousand-token continuation prompts per idle turn.
4621
+ if (!this.getModelCapabilityProfile().backgroundLanesEnabled)
4622
+ return;
4623
+ const { maxStallTurns, goalAutoContinue, goalAutoContinueDelayMs } = this.settingsManager.getAutonomySettings();
4624
+ if (!goalAutoContinue)
4625
+ return;
4626
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
4627
+ if (snapshot.continuation.action !== "continue")
4628
+ return;
4629
+ this._clearGoalAutoContinueTimer();
4630
+ this._goalAutoContinueTimer = setTimeout(() => {
4631
+ this._goalAutoContinueTimer = undefined;
4632
+ void this._runScheduledGoalAutoContinue();
4633
+ }, goalAutoContinueDelayMs);
4634
+ const timer = this._goalAutoContinueTimer;
4635
+ if (typeof timer === "object" && timer && "unref" in timer) {
4636
+ const { unref } = timer;
4637
+ unref?.call(timer);
4638
+ }
4639
+ }
4640
+ async _runScheduledGoalAutoContinue() {
4641
+ if (this._isGoalAutoContinuing || this._disposed)
4642
+ return;
4643
+ const { maxStallTurns, goalContinueTurns, goalContinueMaxWallClockMinutes, goalAutoContinue } = this.settingsManager.getAutonomySettings();
4644
+ if (!goalAutoContinue)
4645
+ return;
4646
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
4647
+ if (snapshot.continuation.action !== "continue")
4648
+ return;
4649
+ this._isGoalAutoContinuing = true;
4650
+ try {
4651
+ await this.continueGoalLoop({
4652
+ maxTurns: goalContinueTurns,
4653
+ maxStallTurns,
4654
+ maxWallClockMinutes: goalContinueMaxWallClockMinutes,
4655
+ });
4656
+ }
4657
+ catch (error) {
4658
+ const message = error instanceof Error ? error.message : String(error);
4659
+ this._emit({ type: "warning", message: `Goal auto-continuation failed: ${message}` });
4660
+ }
4661
+ finally {
4662
+ this._isGoalAutoContinuing = false;
4663
+ }
4664
+ }
4665
+ _clearResearchLaneTimer() {
4666
+ if (this._researchLaneTimer !== undefined) {
4667
+ clearTimeout(this._researchLaneTimer);
4668
+ this._researchLaneTimer = undefined;
4669
+ }
4670
+ }
4671
+ /**
4672
+ * Derive the research demand from durable goal state: an active goal with open requirements,
4673
+ * deduplicated against the latest persisted bundle so the same requirement set is never
4674
+ * researched twice (the query is deterministic, so dedupe survives session reload).
4675
+ */
4676
+ _buildResearchLaneDemand() {
4677
+ const goal = this.getGoalStateSnapshot();
4678
+ if (!goal || goal.status !== "active") {
4679
+ this._lastResearchLaneSkipReason = "no_active_goal";
4680
+ return undefined;
4681
+ }
4682
+ const open = goal.requirements.filter((requirement) => requirement.status === "open");
4683
+ if (open.length === 0) {
4684
+ this._lastResearchLaneSkipReason = "no_open_requirements";
4685
+ return undefined;
4686
+ }
4687
+ const query = `goal:${goal.goalId} requirements:${open
4688
+ .map((requirement) => requirement.id)
4689
+ .sort()
4690
+ .join(",")}`;
4691
+ if (this.getEvidenceBundleSnapshot()?.query === query) {
4692
+ this._lastResearchLaneSkipReason = "recent_evidence_sufficient";
4693
+ return undefined;
4694
+ }
4695
+ const context = [
4696
+ `Goal: ${goal.userGoal}`,
4697
+ "Open requirements:",
4698
+ ...open.slice(0, 20).map((requirement) => `- ${requirement.text}`),
4699
+ ].join("\n");
4700
+ return { query, context, goalId: goal.goalId };
4701
+ }
4702
+ /**
4703
+ * Idle trigger for the autonomous research lane (mirrors {@link _scheduleGoalAutoContinueFromIdle}).
4704
+ * All skips are recorded in `_lastResearchLaneSkipReason` and surfaced via diagnostics — the lane
4705
+ * informs, it never prompts or blocks the foreground.
4706
+ */
4707
+ _scheduleResearchLaneFromIdle() {
4708
+ if (this._isResearchLaneRunning || this._disposed || this._isChildSession)
4709
+ return;
4710
+ if (!this.getModelCapabilityProfile().backgroundLanesEnabled) {
4711
+ this._lastResearchLaneSkipReason = "model_context_too_small";
4712
+ return;
4713
+ }
4714
+ const research = this.settingsManager.getResearchLaneSettings();
4715
+ if (!research.enabled) {
4716
+ this._lastResearchLaneSkipReason = "research_lane_disabled";
4717
+ return;
4718
+ }
4719
+ const { mode } = this.settingsManager.getAutonomySettings();
4720
+ if (mode === "off") {
4721
+ this._lastResearchLaneSkipReason = "autonomy_mode_off";
4722
+ return;
4723
+ }
4724
+ const priorRuns = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research").length;
4725
+ if (priorRuns >= research.maxRunsPerSession) {
4726
+ this._lastResearchLaneSkipReason = "max_runs_reached";
4727
+ return;
4728
+ }
4729
+ if (!this._buildResearchLaneDemand())
4730
+ return;
4731
+ this._clearResearchLaneTimer();
4732
+ this._researchLaneTimer = setTimeout(() => {
4733
+ this._researchLaneTimer = undefined;
4734
+ void this._runScheduledResearchLane();
4735
+ }, research.idleDelayMs);
4736
+ const timer = this._researchLaneTimer;
4737
+ if (typeof timer === "object" && timer && "unref" in timer) {
4738
+ const { unref } = timer;
4739
+ unref?.call(timer);
4740
+ }
4741
+ }
4742
+ async _runScheduledResearchLane() {
4743
+ if (this._isResearchLaneRunning || this._disposed)
4744
+ return;
4745
+ const research = this.settingsManager.getResearchLaneSettings();
4746
+ const { mode } = this.settingsManager.getAutonomySettings();
4747
+ if (!research.enabled || mode === "off")
4748
+ return;
4749
+ try {
4750
+ await this.runResearchLaneOnce();
4751
+ }
4752
+ catch (error) {
4753
+ const message = error instanceof Error ? error.message : String(error);
4754
+ this._emit({ type: "warning", message: `Research lane failed: ${message}` });
4755
+ }
4756
+ }
4757
+ /**
4758
+ * Capability profile derived from the CURRENT session model's own metadata (context window),
4759
+ * honoring the modelCapability.mode setting ("off" disables, a class name forces).
4760
+ */
4761
+ getModelCapabilityProfile() {
4762
+ return deriveModelCapabilityProfile({
4763
+ contextWindow: this.model?.contextWindow,
4764
+ mode: this.settingsManager.getModelCapabilitySettings().mode,
4765
+ });
4766
+ }
4767
+ /** Capability profile for a specific lane model (lane budgets scale to the lane model's window). */
4768
+ _laneCapabilityProfile(model) {
4769
+ return deriveModelCapabilityProfile({
4770
+ contextWindow: model.contextWindow,
4771
+ mode: this.settingsManager.getModelCapabilitySettings().mode,
4772
+ });
4773
+ }
4774
+ /**
4775
+ * Resolve the model for a background lane. Lanes are shipped BY this session, so they inherit
4776
+ * the session's own model unless a lane-specific model is explicitly configured — a single-model
4777
+ * setup (e.g. one local open model) runs its lanes on that same model. An explicitly configured
4778
+ * pattern that cannot resolve/authenticate is a visible skip, not a silent fallback.
4779
+ */
4780
+ _resolveLaneModel(configuredPattern) {
4781
+ if (configuredPattern) {
4782
+ const resolved = resolveCliModel({ cliModel: configuredPattern, modelRegistry: this._modelRegistry });
4783
+ if (resolved.model && this._modelRegistry.hasConfiguredAuth(resolved.model)) {
4784
+ return resolved.model;
4785
+ }
4786
+ return undefined;
4787
+ }
4788
+ return this.model ?? undefined;
4789
+ }
4790
+ /**
4791
+ * Resolve what a lane ships with. Precedence: explicit lane model setting, then the lane
4792
+ * profile's model (a shipped profile with a model MUST be obeyed — unresolvable is a visible
4793
+ * skip, never a fallback), then generic inheritance of the session model.
4794
+ */
4795
+ _resolveLaneShipment(laneSettings, missingModelReason) {
4796
+ let laneProfile;
4797
+ if (laneSettings.profile) {
4798
+ laneProfile = this.settingsManager.getProfileRegistry().getProfile(laneSettings.profile);
4799
+ if (!laneProfile) {
4800
+ return { ok: false, skipReason: "lane_profile_not_found" };
4801
+ }
4802
+ }
4803
+ let model;
4804
+ if (laneSettings.model) {
4805
+ model = this._resolveLaneModel(laneSettings.model);
4806
+ if (!model)
4807
+ return { ok: false, skipReason: missingModelReason };
4808
+ }
4809
+ else if (laneProfile?.model) {
4810
+ model = this._resolveLaneModel(laneProfile.model);
4811
+ if (!model)
4812
+ return { ok: false, skipReason: "no_lane_profile_model" };
4813
+ }
4814
+ else {
4815
+ model = this.model ?? undefined;
4816
+ if (!model)
4817
+ return { ok: false, skipReason: missingModelReason };
4818
+ }
4819
+ return { ok: true, model, laneProfile };
4820
+ }
4821
+ /** UAC tool grants from a shipped lane profile, recorded on the lane envelope. */
4822
+ _laneProfileToolGrants(laneProfile) {
4823
+ const toolsFilter = laneProfile?.resources.tools;
4824
+ return {
4825
+ ...(toolsFilter?.allow && toolsFilter.allow.length > 0 ? { allowedTools: [...toolsFilter.allow] } : {}),
4826
+ ...(toolsFilter?.block && toolsFilter.block.length > 0 ? { deniedTools: [...toolsFilter.block] } : {}),
4827
+ };
4828
+ }
4829
+ /** Stripped research envelope — never the foreground/architect envelope. */
4830
+ _buildResearchLaneEnvelope(maxUsd, laneProfile) {
4831
+ return {
4832
+ id: `research-${this.sessionId}-${Date.now()}`,
4833
+ profileId: laneProfile?.name,
4834
+ capabilities: ["research", "read_files", "memory_read"],
4835
+ ...this._laneProfileToolGrants(laneProfile),
4836
+ maxEstimatedUsd: Math.min(maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY),
4837
+ createdAt: new Date().toISOString(),
4838
+ };
4839
+ }
4840
+ /**
4841
+ * Run one bounded, read-only research pass and persist its results: evidence bundle snapshot,
4842
+ * terminal lane record, and spawned-usage cost report (single-hop invariant, idempotent on the
4843
+ * lane's reportId). Explicit calls (e.g. `/autonomy research`) express user intent and bypass the
4844
+ * enabled/mode/dedupe gates the idle scheduler enforces; budget and capability gates always apply.
4845
+ */
4846
+ async runResearchLaneOnce(request) {
4847
+ if (this._isResearchLaneRunning) {
4848
+ return { started: false, skipReason: "research_lane_already_running" };
4849
+ }
4850
+ if (this._disposed) {
4851
+ return { started: false, skipReason: "session_disposed" };
4852
+ }
4853
+ const settings = this.settingsManager.getResearchLaneSettings();
4854
+ const demand = request?.query
4855
+ ? { query: request.query, context: request.context ?? "", goalId: request.goalId }
4856
+ : this._buildResearchLaneDemand();
4857
+ if (!demand) {
4858
+ return { started: false, skipReason: this._lastResearchLaneSkipReason ?? "no_research_demand" };
4859
+ }
4860
+ const shipment = this._resolveLaneShipment(settings, "no_research_model");
4861
+ if (!shipment.ok) {
4862
+ this._lastResearchLaneSkipReason = shipment.skipReason;
4863
+ return { started: false, skipReason: shipment.skipReason };
4864
+ }
4865
+ const { model, laneProfile } = shipment;
4866
+ this._isResearchLaneRunning = true;
4867
+ this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
4868
+ const startedRecord = this._laneTracker.start({ type: "research", goalId: demand.goalId });
4869
+ try {
4870
+ let spentUsage;
4871
+ const result = await runResearch({
4872
+ query: demand.query,
4873
+ context: demand.context,
4874
+ envelope: this._buildResearchLaneEnvelope(settings.maxUsd, laneProfile),
4875
+ maxUsd: settings.maxUsd,
4876
+ maxSources: settings.maxSources,
4877
+ maxFindings: settings.maxFindings,
4878
+ maxWallClockMs: settings.maxWallClockMs,
4879
+ signal: this._researchLaneAbort.signal,
4880
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
4881
+ const completion = await this.runIsolatedCompletion({
4882
+ // Level-0 core always survives; profile soul and role prompt are the replaceable
4883
+ // layers; a settings-provided prompt replaces everything above the core.
4884
+ systemPrompt: composeSubagentSystemPrompt({
4885
+ soul: laneProfile?.soul,
4886
+ rolePrompt: systemPrompt,
4887
+ override: settings.systemPrompt,
4888
+ }),
4889
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
4890
+ model,
4891
+ thinkingLevel: laneProfile?.thinking ?? "off",
4892
+ maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
4893
+ signal,
4894
+ // Core/soul/role are all static per configuration — the provider can cache the prefix.
4895
+ cacheRetention: "short",
4896
+ });
4897
+ spentUsage = completion.usage;
4898
+ return {
4899
+ text: completion.text,
4900
+ costUsd: completion.usage.cost.total,
4901
+ stopReason: String(completion.stopReason),
4902
+ };
4903
+ },
4904
+ });
4905
+ // Bug #21 pattern: if the session was disposed while the completion was in flight, do NOT
4906
+ // persist evidence/records/usage against the dead session.
4907
+ if (this._disposed) {
4908
+ const record = this._laneTracker.complete(startedRecord.laneId, {
4909
+ status: "canceled",
4910
+ reasonCode: "session_disposed",
4911
+ });
4912
+ return { started: true, record, result };
4913
+ }
4914
+ let evidenceEntryId;
4915
+ if (result.bundle) {
4916
+ evidenceEntryId = this.saveEvidenceBundleSnapshot(result.bundle);
4917
+ }
4918
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
4919
+ this.addSpawnedUsage(spentUsage, {
4920
+ label: "research-lane",
4921
+ reportId: `research:${this.sessionId}:${startedRecord.laneId}`,
4922
+ });
4923
+ }
4924
+ const record = this._laneTracker.complete(startedRecord.laneId, {
4925
+ status: result.status,
4926
+ reasonCode: result.reasonCode,
4927
+ costUsd: result.costUsd,
4928
+ evidenceEntryId,
4929
+ });
4930
+ if (record) {
4931
+ appendLaneRecordSnapshot(this.sessionManager, record);
4932
+ }
4933
+ return { started: true, record, result };
4934
+ }
4935
+ catch (error) {
4936
+ const record = this._laneTracker.complete(startedRecord.laneId, {
4937
+ status: "failed",
4938
+ reasonCode: "research_lane_error",
4939
+ });
4940
+ if (record && !this._disposed) {
4941
+ appendLaneRecordSnapshot(this.sessionManager, record);
4942
+ }
4943
+ const message = error instanceof Error ? error.message : String(error);
4944
+ this._emit({ type: "warning", message: `Research lane failed: ${message}` });
4945
+ return { started: true, record };
4946
+ }
4947
+ finally {
4948
+ this._isResearchLaneRunning = false;
4949
+ }
4950
+ }
4951
+ /**
4952
+ * Run one bounded scout-worker delegation: build a WorkerRequest with a stripped read-only
4953
+ * envelope, execute it as an isolated completion on a cheap lane, validate the result via
4954
+ * {@link validateWorkerResult} before acceptance, and persist result + lane record + spawned
4955
+ * usage (idempotent per-lane reportId). Consumed by the `delegate` tool.
4956
+ */
4957
+ async runWorkerDelegationOnce(request) {
4958
+ if (this._isWorkerDelegationRunning) {
4959
+ return { started: false, skipReason: "worker_delegation_already_running" };
4960
+ }
4961
+ if (this._disposed) {
4962
+ return { started: false, skipReason: "session_disposed" };
4963
+ }
4964
+ const instructions = request.instructions.trim();
4965
+ if (instructions.length === 0) {
4966
+ return { started: false, skipReason: "missing_instructions" };
4967
+ }
4968
+ const settings = this.settingsManager.getWorkerDelegationSettings();
4969
+ if (!settings.enabled) {
4970
+ return { started: false, skipReason: "worker_delegation_disabled" };
4971
+ }
4972
+ const shipment = this._resolveLaneShipment(settings, "no_worker_model");
4973
+ if (!shipment.ok) {
4974
+ return { started: false, skipReason: shipment.skipReason };
4975
+ }
4976
+ const { model, laneProfile } = shipment;
4977
+ this._isWorkerDelegationRunning = true;
4978
+ this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
4979
+ const startedRecord = this._laneTracker.start({ type: "worker" });
4980
+ const maxUsd = Math.min(settings.maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY);
4981
+ const workerRequest = {
4982
+ id: startedRecord.laneId,
4983
+ instructions,
4984
+ route: {
4985
+ tier: "cheap",
4986
+ risk: "read-only",
4987
+ confidence: 1,
4988
+ reasonCode: "scout_worker",
4989
+ reasons: ["Read-only scout delegation"],
4990
+ },
4991
+ envelope: {
4992
+ id: `worker-${this.sessionId}-${startedRecord.laneId}`,
4993
+ profileId: laneProfile?.name,
4994
+ capabilities: ["read_files"],
4995
+ ...this._laneProfileToolGrants(laneProfile),
4996
+ maxEstimatedUsd: maxUsd,
4997
+ createdAt: new Date().toISOString(),
4998
+ },
4999
+ maxEstimatedUsd: maxUsd,
5000
+ createdAt: new Date().toISOString(),
5001
+ };
5002
+ const usageReportId = `worker:${this.sessionId}:${startedRecord.laneId}`;
5003
+ try {
5004
+ let spentUsage;
5005
+ const outcome = await runWorker({
5006
+ request: workerRequest,
5007
+ maxUsd,
5008
+ maxWallClockMs: settings.maxWallClockMs,
5009
+ usageReportId,
5010
+ signal: this._workerDelegationAbort.signal,
5011
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
5012
+ const completion = await this.runIsolatedCompletion({
5013
+ // Level-0 core always survives. A model-provided prompt (delegate tool) is the most
5014
+ // specific override, then the settings-level prompt, then profile soul + role prompt.
5015
+ systemPrompt: composeSubagentSystemPrompt({
5016
+ soul: laneProfile?.soul,
5017
+ rolePrompt: systemPrompt,
5018
+ override: request.systemPrompt ?? settings.systemPrompt,
5019
+ }),
5020
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
5021
+ model,
5022
+ thinkingLevel: laneProfile?.thinking ?? "off",
5023
+ maxTokens: this._laneCapabilityProfile(model).laneMaxOutputTokens,
5024
+ signal,
5025
+ // Core/soul/role are all static per configuration — the provider can cache the prefix.
5026
+ cacheRetention: "short",
5027
+ });
5028
+ spentUsage = completion.usage;
5029
+ return {
5030
+ text: completion.text,
5031
+ costUsd: completion.usage.cost.total,
5032
+ stopReason: String(completion.stopReason),
5033
+ };
5034
+ },
5035
+ });
5036
+ // Bug #21 pattern: never persist against a disposed session.
5037
+ if (this._disposed) {
5038
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5039
+ status: "canceled",
5040
+ reasonCode: "session_disposed",
5041
+ });
5042
+ return { started: true, record, outcome };
5043
+ }
5044
+ this.saveWorkerResultSnapshot(outcome.result);
5045
+ if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
5046
+ this.addSpawnedUsage(spentUsage, { label: "worker-delegation", reportId: usageReportId });
5047
+ }
5048
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5049
+ status: outcome.laneStatus,
5050
+ reasonCode: outcome.reasonCode,
5051
+ costUsd: outcome.costUsd,
5052
+ });
5053
+ if (record) {
5054
+ appendLaneRecordSnapshot(this.sessionManager, record);
5055
+ }
5056
+ return { started: true, record, outcome };
5057
+ }
5058
+ catch (error) {
5059
+ const record = this._laneTracker.complete(startedRecord.laneId, {
5060
+ status: "failed",
5061
+ reasonCode: "worker_delegation_error",
5062
+ });
5063
+ if (record && !this._disposed) {
5064
+ appendLaneRecordSnapshot(this.sessionManager, record);
5065
+ }
5066
+ const message = error instanceof Error ? error.message : String(error);
5067
+ this._emit({ type: "warning", message: `Worker delegation failed: ${message}` });
5068
+ return { started: true, record };
5069
+ }
5070
+ finally {
5071
+ this._isWorkerDelegationRunning = false;
5072
+ }
5073
+ }
5074
+ /**
5075
+ * Probe a candidate model against the subagent contracts (research/worker/judge/search/
5076
+ * tool-call surfaces) via {@link runModelFitnessProbe}. The model must resolve and
5077
+ * authenticate; every probe call runs as an isolated completion on that model, and probe
5078
+ * spend is reported through spawned-usage accounting.
5079
+ */
5080
+ async runModelFitness(args) {
5081
+ if (this._disposed)
5082
+ return { started: false, skipReason: "session_disposed" };
5083
+ const resolved = this._resolveLaneModel(args.model.trim() || undefined);
5084
+ if (!resolved)
5085
+ return { started: false, skipReason: "model_unresolved_or_unauthenticated" };
5086
+ const capability = this._laneCapabilityProfile(resolved);
5087
+ const spent = {
5088
+ input: 0,
5089
+ output: 0,
5090
+ cacheRead: 0,
5091
+ cacheWrite: 0,
5092
+ totalTokens: 0,
5093
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
5094
+ };
5095
+ const report = await runModelFitnessProbe({
5096
+ trials: args.trials,
5097
+ signal: this._researchLaneAbort.signal,
5098
+ complete: async ({ systemPrompt, userPrompt, signal }) => {
5099
+ const callStarted = Date.now();
5100
+ const completion = await this.runIsolatedCompletion({
5101
+ systemPrompt,
5102
+ messages: [{ role: "user", content: [{ type: "text", text: userPrompt }], timestamp: Date.now() }],
5103
+ model: resolved,
5104
+ thinkingLevel: "off",
5105
+ maxTokens: capability.laneMaxOutputTokens,
5106
+ signal,
5107
+ cacheRetention: "short",
5108
+ });
5109
+ const callMs = Date.now() - callStarted;
5110
+ spent.input += completion.usage.input;
5111
+ spent.output += completion.usage.output;
5112
+ spent.cacheRead += completion.usage.cacheRead;
5113
+ spent.cacheWrite += completion.usage.cacheWrite;
5114
+ spent.totalTokens += completion.usage.totalTokens;
5115
+ spent.cost.input += completion.usage.cost.input;
5116
+ spent.cost.output += completion.usage.cost.output;
5117
+ spent.cost.cacheRead += completion.usage.cost.cacheRead;
5118
+ spent.cost.cacheWrite += completion.usage.cost.cacheWrite;
5119
+ spent.cost.total += completion.usage.cost.total;
5120
+ return {
5121
+ text: completion.text,
5122
+ costUsd: completion.usage.cost.total,
5123
+ stopReason: String(completion.stopReason),
5124
+ // Wall-clock fallback for tok/s: providers don't expose pure eval time, so the
5125
+ // measured call time stands in — slightly conservative (includes network/queue).
5126
+ outputTokens: completion.usage.output,
5127
+ evalMs: callMs,
5128
+ };
5129
+ },
5130
+ });
5131
+ if (!this._disposed && (spent.cost.total > 0 || spent.totalTokens > 0)) {
5132
+ this.addSpawnedUsage(spent, { label: "model-fitness" });
5133
+ }
5134
+ const modelRef = `${resolved.provider}/${resolved.id}`;
5135
+ // Fitness is a property of a model ON a host — persist the report host-keyed so role
5136
+ // assignments stay per-machine (a model can await better hardware without being forgotten).
5137
+ // Best-effort: a disk problem must not fail the probe itself.
5138
+ try {
5139
+ if (!this._disposed) {
5140
+ FitnessStore.forAgentDir(this._agentDir).save(modelRef, report);
5141
+ }
5142
+ }
5143
+ catch {
5144
+ // best-effort persistence
5145
+ }
5146
+ return { started: true, model: modelRef, report };
5147
+ }
5148
+ /** Fitness reports persisted for THIS host (measured evidence for architect/profile decisions). */
5149
+ getStoredFitnessReports() {
5150
+ try {
5151
+ return FitnessStore.forAgentDir(this._agentDir).getForHost();
5152
+ }
5153
+ catch {
5154
+ return [];
5155
+ }
5156
+ }
5157
+ async continueGoalOnce(options) {
5158
+ const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
5159
+ if (snapshot.continuation.action !== "continue") {
5160
+ return { submitted: false, snapshot };
5161
+ }
5162
+ const prompt = buildGoalContinuationPrompt({ snapshot, limits: options.promptLimits });
5163
+ await this.prompt(prompt.text, {
5164
+ expandPromptTemplates: false,
5165
+ processSlashCommands: false,
5166
+ autoContinueGoal: false,
5167
+ });
5168
+ return { submitted: true, snapshot, prompt };
5169
+ }
5170
+ async continueGoalLoop(options) {
5171
+ let turnsSubmitted = 0;
5172
+ const now = options.now ?? Date.now;
5173
+ const maxWallClockMs = typeof options.maxWallClockMinutes === "number" && options.maxWallClockMinutes > 0
5174
+ ? options.maxWallClockMinutes * 60_000
5175
+ : undefined;
5176
+ const startedAt = now();
5177
+ const hasReachedWallClockBudget = () => maxWallClockMs !== undefined && now() - startedAt >= maxWallClockMs;
5178
+ const snapshot = () => this.getGoalRuntimeSnapshot({ maxStallTurns: options.maxStallTurns });
5179
+ if (options.maxTurns <= 0) {
5180
+ return {
5181
+ turnsSubmitted: 0,
5182
+ stopReason: "max_turns_reached",
5183
+ finalSnapshot: snapshot(),
5184
+ };
5185
+ }
5186
+ if (hasReachedWallClockBudget()) {
5187
+ return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
5188
+ }
5189
+ while (turnsSubmitted < options.maxTurns) {
5190
+ const beforeSnapshot = snapshot();
5191
+ if (beforeSnapshot.continuation.action !== "continue") {
5192
+ return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: beforeSnapshot };
5193
+ }
5194
+ const state = beforeSnapshot.goalState;
5195
+ const beforeKey = state
5196
+ ? `${state.goalId}:${state.updatedAt}:${state.events.length}:${state.stallTurns}:${state.status}`
5197
+ : undefined;
5198
+ const result = await this.continueGoalOnce(options);
5199
+ if (result.submitted) {
5200
+ turnsSubmitted++;
5201
+ }
5202
+ if (hasReachedWallClockBudget()) {
5203
+ return { turnsSubmitted, stopReason: "wall_clock_budget_reached", finalSnapshot: snapshot() };
5204
+ }
5205
+ const afterSnapshot = snapshot();
5206
+ if (afterSnapshot.continuation.action !== "continue") {
5207
+ return { turnsSubmitted, stopReason: "continuation_not_allowed", finalSnapshot: afterSnapshot };
5208
+ }
5209
+ const afterState = afterSnapshot.goalState;
5210
+ const afterKey = afterState
5211
+ ? `${afterState.goalId}:${afterState.updatedAt}:${afterState.events.length}:${afterState.stallTurns}:${afterState.status}`
5212
+ : undefined;
5213
+ if (beforeKey === afterKey) {
5214
+ return { turnsSubmitted, stopReason: "goal_state_not_advanced", finalSnapshot: afterSnapshot };
5215
+ }
5216
+ }
5217
+ return {
5218
+ turnsSubmitted,
5219
+ stopReason: "max_turns_reached",
5220
+ finalSnapshot: snapshot(),
5221
+ };
5222
+ }
3705
5223
  /**
3706
5224
  * Run a one-shot LLM completion fully ISOLATED from the main session — the load-bearing
3707
5225
  * primitive for the native reflection engine (adaptive-agent design §6c/§7).
@@ -3807,8 +5325,60 @@ export class AgentSession {
3807
5325
  // or skills against the dead session.
3808
5326
  if (this._disposed)
3809
5327
  return result;
5328
+ // Learning apply policy: every durable write is converted to a proposal, decided by the
5329
+ // learning gate, and audited with a rollback plan. With the policy disabled (default) the
5330
+ // legacy direct-apply behavior is preserved — but now leaves audit records with rollback info.
5331
+ const policy = this.settingsManager.getLearningPolicySettings();
5332
+ // The audit id sequence counts STORED snapshots only: it reseeds from the stored count on
5333
+ // every pass, so advancing it for a no-op (which stores nothing) would make later passes
5334
+ // reuse ids — and rollback keys on the id, so a collision blocks or misdirects rollback.
5335
+ let auditSequence = getLearningAuditSnapshots(this.sessionManager.getEntries()).length;
5336
+ let writeIndex = 0;
3810
5337
  for (const write of result.writes) {
3811
- await this._applyReflectionWrite(write, signal);
5338
+ writeIndex += 1;
5339
+ const proposalId = `${input.reportId ?? "reflection"}-w${writeIndex}`;
5340
+ const proposal = proposalFromReflectionWrite(write, proposalId);
5341
+ const rollback = rollbackPlanForReflectionWrite(write);
5342
+ const decision = policy.enabled
5343
+ ? evaluateLearningDecision({
5344
+ proposal,
5345
+ confidence: policy.reflectionSourceConfidence,
5346
+ observations: 1,
5347
+ contradictions: 0,
5348
+ settings: {
5349
+ enabled: true,
5350
+ autoApplyEnabled: policy.autoApplyEnabled,
5351
+ confidenceThreshold: policy.confidenceThreshold,
5352
+ minObservations: policy.minObservations,
5353
+ allowedAutoApplyLayers: policy.allowedAutoApplyLayers,
5354
+ requireRollbackPlan: policy.requireRollbackPlan,
5355
+ },
5356
+ })
5357
+ : {
5358
+ kind: "apply",
5359
+ reasonCode: "learning_policy_disabled_legacy_apply",
5360
+ confidence: 0,
5361
+ summary: proposal.summary,
5362
+ requiresApproval: false,
5363
+ };
5364
+ this.saveLearningDecisionSnapshot(decision);
5365
+ if (decision.kind === "apply") {
5366
+ await this._applyReflectionWrite(write, signal);
5367
+ }
5368
+ if (decision.kind !== "no-op") {
5369
+ auditSequence += 1;
5370
+ appendLearningAuditSnapshot(this.sessionManager, {
5371
+ id: `audit-${auditSequence}`,
5372
+ proposalId,
5373
+ layer: proposal.layer,
5374
+ action: decision.kind === "apply" ? "apply" : "propose",
5375
+ summary: proposal.summary,
5376
+ reasonCode: decision.reasonCode,
5377
+ decision,
5378
+ rollback,
5379
+ createdAt: new Date().toISOString(),
5380
+ });
5381
+ }
3812
5382
  }
3813
5383
  // Account the reflection's spend so it surfaces in the footer roll-up (net-token visibility).
3814
5384
  // Idempotent on reportId so a retried/duplicated pass cannot double-count.
@@ -3817,6 +5387,76 @@ export class AgentSession {
3817
5387
  }
3818
5388
  return result;
3819
5389
  }
5390
+ getLearningAuditRecords() {
5391
+ return getLearningAuditSnapshots(this.sessionManager.getEntries());
5392
+ }
5393
+ /**
5394
+ * Roll back one applied durable learning change by executing the inverse operation recorded in
5395
+ * its audit record (memory ops run through the same bundled memory-tool path as the original
5396
+ * apply; promoted skills are archived). Appends a linked "rollback" audit record on success so
5397
+ * the change history stays complete and a change cannot be rolled back twice.
5398
+ */
5399
+ async rollbackLearningWrite(auditId) {
5400
+ if (this._disposed)
5401
+ return { ok: false, reason: "session_disposed" };
5402
+ const audits = this.getLearningAuditRecords();
5403
+ const audit = audits.find((record) => record.id === auditId);
5404
+ if (!audit)
5405
+ return { ok: false, reason: "audit_not_found" };
5406
+ if (audit.action !== "apply")
5407
+ return { ok: false, reason: "not_an_applied_change" };
5408
+ if (audits.some((record) => record.action === "rollback" && record.rollbackOf === auditId)) {
5409
+ return { ok: false, reason: "already_rolled_back" };
5410
+ }
5411
+ const rollback = audit.rollback;
5412
+ if (!rollback)
5413
+ return { ok: false, reason: "no_rollback_plan" };
5414
+ switch (rollback.kind) {
5415
+ case "memory_remove": {
5416
+ if (!rollback.target)
5417
+ return { ok: false, reason: "missing_rollback_target" };
5418
+ await this._applyReflectionWrite({ kind: "memory_remove", target: rollback.target });
5419
+ break;
5420
+ }
5421
+ case "memory_restore": {
5422
+ if (!rollback.target || rollback.previous === undefined) {
5423
+ return { ok: false, reason: "missing_rollback_target" };
5424
+ }
5425
+ await this._applyReflectionWrite({
5426
+ kind: "memory_replace",
5427
+ target: rollback.target,
5428
+ text: rollback.previous,
5429
+ });
5430
+ break;
5431
+ }
5432
+ case "memory_add": {
5433
+ if (rollback.previous === undefined)
5434
+ return { ok: false, reason: "missing_rollback_target" };
5435
+ await this._applyReflectionWrite({ kind: "memory_add", section: "MEMORY", text: rollback.previous });
5436
+ break;
5437
+ }
5438
+ case "archive_skill": {
5439
+ if (!rollback.target)
5440
+ return { ok: false, reason: "missing_rollback_target" };
5441
+ if (!this.archivePromotedSkill(rollback.target)) {
5442
+ return { ok: false, reason: "skill_archive_failed" };
5443
+ }
5444
+ break;
5445
+ }
5446
+ }
5447
+ appendLearningAuditSnapshot(this.sessionManager, {
5448
+ id: `${audit.id}-rollback`,
5449
+ proposalId: audit.proposalId,
5450
+ layer: audit.layer,
5451
+ action: "rollback",
5452
+ summary: `Rolled back: ${audit.summary}`,
5453
+ reasonCode: "user_requested_rollback",
5454
+ decision: audit.decision,
5455
+ rollbackOf: audit.id,
5456
+ createdAt: new Date().toISOString(),
5457
+ });
5458
+ return { ok: true, reason: "rollback_applied" };
5459
+ }
3820
5460
  /**
3821
5461
  * Apply one reflection write through the bundled `memory` tool. `memory_replace`/`memory_remove`
3822
5462
  * don't carry a target file, so we try MEMORY.md first and fall back to USER.md when the substring
@@ -4018,6 +5658,182 @@ export class AgentSession {
4018
5658
  // =========================================================================
4019
5659
  // Extension System
4020
5660
  // =========================================================================
5661
+ getAutonomyStatusSnapshot() {
5662
+ const snapshot = {};
5663
+ if (this._lastModelRouterDecision?.route) {
5664
+ snapshot.latestRoute = {
5665
+ tier: this._lastModelRouterDecision.route.tier,
5666
+ reasonCode: this._lastModelRouterDecision.route.reasonCode,
5667
+ risk: this._lastModelRouterDecision.route.risk,
5668
+ };
5669
+ }
5670
+ if (this._lastAutonomyGateOutcome) {
5671
+ snapshot.latestGate = {
5672
+ outcome: this._lastAutonomyGateOutcome.outcome,
5673
+ gate: this._lastAutonomyGateOutcome.gate,
5674
+ reasonCode: this._lastAutonomyGateOutcome.reasonCode,
5675
+ };
5676
+ }
5677
+ const currentCost = this.getSessionStats().cost;
5678
+ if (currentCost > 0) {
5679
+ snapshot.currentCostUsd = currentCost;
5680
+ }
5681
+ const spawnedCost = this.getSpawnedUsage().cost;
5682
+ if (spawnedCost > 0) {
5683
+ snapshot.spawnedCostUsd = spawnedCost;
5684
+ }
5685
+ const dailyCost = this.getDailyUsageTotals?.()?.totalCost;
5686
+ if (dailyCost !== undefined && dailyCost > 0) {
5687
+ snapshot.dailyCostUsd = dailyCost;
5688
+ }
5689
+ const goal = this.getGoalStateSnapshot();
5690
+ if (goal) {
5691
+ snapshot.activeGoal = {
5692
+ goalId: goal.goalId,
5693
+ status: goal.status,
5694
+ openRequirements: goal.requirements.filter((requirement) => requirement.status === "open").length,
5695
+ stallTurns: goal.stallTurns,
5696
+ };
5697
+ }
5698
+ // Real live count from the lane tracker — never inferred from historical snapshots. Absent
5699
+ // while zero, matching the presence-means-signal convention of the sibling fields.
5700
+ const activeLaneCount = this._laneTracker.getActiveCount();
5701
+ if (activeLaneCount > 0) {
5702
+ snapshot.activeLaneCount = activeLaneCount;
5703
+ }
5704
+ return snapshot;
5705
+ }
5706
+ /**
5707
+ * Aggregate an effectiveness/autonomy dashboard: what Pi has actually been doing (recent
5708
+ * route choices, latest gate outcome, cost, and any research/delegation/learning/goal
5709
+ * activity). Read-only — combines existing session-log getters, never mutates state or
5710
+ * recomputes a route/gate decision.
5711
+ */
5712
+ getAutonomyDiagnosticSnapshot(options) {
5713
+ const maxEntriesPerFamily = options?.maxEntriesPerFamily ?? 10;
5714
+ const snapshot = {};
5715
+ const goal = this.getGoalStateSnapshot();
5716
+ const recentDecisions = getRecentModelRouterDecisions(this.sessionManager.getEntries(), maxEntriesPerFamily);
5717
+ if (recentDecisions.length > 0) {
5718
+ snapshot.routes = recentDecisions.map((decision) => ({
5719
+ title: decision.route.tier,
5720
+ summary: decision.routedModel,
5721
+ reasonCode: decision.route.reasonCode,
5722
+ metadata: { risk: decision.route.risk, outcome: decision.outcome, intent: decision.intent },
5723
+ }));
5724
+ }
5725
+ if (this._lastAutonomyGateOutcome) {
5726
+ const gate = this._lastAutonomyGateOutcome;
5727
+ snapshot.gates = [
5728
+ {
5729
+ title: gate.gate,
5730
+ summary: gate.message,
5731
+ reasonCode: gate.reasonCode,
5732
+ metadata: { outcome: gate.outcome, reversible: gate.reversible },
5733
+ },
5734
+ ];
5735
+ }
5736
+ const costs = [];
5737
+ const currentCostForDiagnostics = this.getSessionStats().cost;
5738
+ if (currentCostForDiagnostics > 0) {
5739
+ costs.push({ title: "current", summary: `$${currentCostForDiagnostics.toFixed(4)}` });
5740
+ }
5741
+ const spawnedCost = this.getSpawnedUsage().cost;
5742
+ if (spawnedCost > 0)
5743
+ costs.push({ title: "spawned", summary: `$${spawnedCost.toFixed(4)}` });
5744
+ const dailyCostForDiagnostics = this.getDailyUsageTotals?.()?.totalCost;
5745
+ if (dailyCostForDiagnostics !== undefined && dailyCostForDiagnostics > 0) {
5746
+ costs.push({ title: "daily", summary: `$${dailyCostForDiagnostics.toFixed(4)}` });
5747
+ }
5748
+ if (costs.length > 0)
5749
+ snapshot.costs = costs;
5750
+ const researchEntries = [];
5751
+ const researchLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "research");
5752
+ for (const record of researchLaneRecords.slice(-maxEntriesPerFamily)) {
5753
+ researchEntries.push({
5754
+ title: `Lane ${record.laneId} (${record.status})`,
5755
+ reasonCode: record.reasonCode,
5756
+ metadata: {
5757
+ costUsd: record.costUsd,
5758
+ startedAt: record.startedAt,
5759
+ completedAt: record.completedAt,
5760
+ goalId: record.goalId,
5761
+ },
5762
+ });
5763
+ }
5764
+ for (const bundle of this.getEvidenceBundleSnapshots().slice(-maxEntriesPerFamily)) {
5765
+ researchEntries.push({
5766
+ title: `Research: ${bundle.query}`,
5767
+ metadata: { sourceCount: bundle.sources.length, findingCount: bundle.findings.length },
5768
+ });
5769
+ }
5770
+ if (this._lastResearchLaneSkipReason) {
5771
+ researchEntries.push({ title: "Last skip", reasonCode: this._lastResearchLaneSkipReason });
5772
+ }
5773
+ if (researchEntries.length > 0) {
5774
+ snapshot.research = researchEntries;
5775
+ }
5776
+ const delegationEntries = [];
5777
+ const workerLaneRecords = getLaneRecordSnapshots(this.sessionManager.getEntries()).filter((record) => record.type === "worker");
5778
+ for (const record of workerLaneRecords.slice(-maxEntriesPerFamily)) {
5779
+ delegationEntries.push({
5780
+ title: `Lane ${record.laneId} (${record.status})`,
5781
+ reasonCode: record.reasonCode,
5782
+ metadata: { costUsd: record.costUsd, startedAt: record.startedAt, completedAt: record.completedAt },
5783
+ });
5784
+ }
5785
+ const workerResults = this.getWorkerResultSnapshots();
5786
+ for (const result of workerResults.slice(-maxEntriesPerFamily)) {
5787
+ delegationEntries.push({
5788
+ title: `Worker ${result.requestId} (${result.status})`,
5789
+ summary: result.summary,
5790
+ metadata: {
5791
+ changedFileCount: result.changedFiles.length,
5792
+ blockerCount: result.blockers?.length ?? 0,
5793
+ usageReportId: result.usageReportId,
5794
+ },
5795
+ });
5796
+ }
5797
+ if (delegationEntries.length > 0) {
5798
+ snapshot.delegation = delegationEntries;
5799
+ }
5800
+ const learningEntries = [];
5801
+ const learningDecisions = this.getLearningDecisionSnapshots();
5802
+ for (const decision of learningDecisions.slice(-maxEntriesPerFamily)) {
5803
+ learningEntries.push({
5804
+ title: `Learning (${decision.kind})`,
5805
+ summary: decision.summary,
5806
+ reasonCode: decision.reasonCode,
5807
+ metadata: { confidence: decision.confidence, requiresApproval: decision.requiresApproval },
5808
+ });
5809
+ }
5810
+ for (const audit of this.getLearningAuditRecords().slice(-maxEntriesPerFamily)) {
5811
+ learningEntries.push({
5812
+ title: `Audit ${audit.id} (${audit.action})`,
5813
+ summary: audit.summary,
5814
+ reasonCode: audit.reasonCode,
5815
+ metadata: { layer: audit.layer, proposalId: audit.proposalId, rollbackOf: audit.rollbackOf },
5816
+ });
5817
+ }
5818
+ if (learningEntries.length > 0) {
5819
+ snapshot.learning = learningEntries;
5820
+ }
5821
+ if (goal) {
5822
+ snapshot.goals = [
5823
+ {
5824
+ title: `Goal ${goal.goalId}`,
5825
+ summary: goal.userGoal,
5826
+ reasonCode: goal.status,
5827
+ metadata: {
5828
+ openRequirementCount: goal.requirements.filter((requirement) => requirement.status === "open").length,
5829
+ stallTurns: goal.stallTurns,
5830
+ blockedReason: goal.blockedReason,
5831
+ },
5832
+ },
5833
+ ];
5834
+ }
5835
+ return snapshot;
5836
+ }
4021
5837
  createReplacedSessionContext() {
4022
5838
  const context = Object.defineProperties({}, Object.getOwnPropertyDescriptors(this._extensionRunner.createCommandContext()));
4023
5839
  context.sendMessage = (message, options) => this.sendCustomMessage(message, options);