@caupulican/pi-adaptative 0.80.85 → 0.80.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. package/CHANGELOG.md +160 -1
  2. package/dist/core/agent-session.d.ts +394 -1
  3. package/dist/core/agent-session.d.ts.map +1 -1
  4. package/dist/core/agent-session.js +1862 -46
  5. package/dist/core/agent-session.js.map +1 -1
  6. package/dist/core/autonomy/approval-gate.d.ts +4 -0
  7. package/dist/core/autonomy/approval-gate.d.ts.map +1 -0
  8. package/dist/core/autonomy/approval-gate.js +27 -0
  9. package/dist/core/autonomy/approval-gate.js.map +1 -0
  10. package/dist/core/autonomy/bounded-completion.d.ts +27 -0
  11. package/dist/core/autonomy/bounded-completion.d.ts.map +1 -0
  12. package/dist/core/autonomy/bounded-completion.js +44 -0
  13. package/dist/core/autonomy/bounded-completion.js.map +1 -0
  14. package/dist/core/autonomy/contracts.d.ts +129 -0
  15. package/dist/core/autonomy/contracts.d.ts.map +1 -0
  16. package/dist/core/autonomy/contracts.js +2 -0
  17. package/dist/core/autonomy/contracts.js.map +1 -0
  18. package/dist/core/autonomy/gates.d.ts +15 -0
  19. package/dist/core/autonomy/gates.d.ts.map +1 -0
  20. package/dist/core/autonomy/gates.js +205 -0
  21. package/dist/core/autonomy/gates.js.map +1 -0
  22. package/dist/core/autonomy/lane-tracker.d.ts +48 -0
  23. package/dist/core/autonomy/lane-tracker.d.ts.map +1 -0
  24. package/dist/core/autonomy/lane-tracker.js +125 -0
  25. package/dist/core/autonomy/lane-tracker.js.map +1 -0
  26. package/dist/core/autonomy/path-scope.d.ts +9 -0
  27. package/dist/core/autonomy/path-scope.d.ts.map +1 -0
  28. package/dist/core/autonomy/path-scope.js +122 -0
  29. package/dist/core/autonomy/path-scope.js.map +1 -0
  30. package/dist/core/autonomy/risk-assessment.d.ts +3 -0
  31. package/dist/core/autonomy/risk-assessment.d.ts.map +1 -0
  32. package/dist/core/autonomy/risk-assessment.js +122 -0
  33. package/dist/core/autonomy/risk-assessment.js.map +1 -0
  34. package/dist/core/autonomy/session-lane-record.d.ts +10 -0
  35. package/dist/core/autonomy/session-lane-record.d.ts.map +1 -0
  36. package/dist/core/autonomy/session-lane-record.js +36 -0
  37. package/dist/core/autonomy/session-lane-record.js.map +1 -0
  38. package/dist/core/autonomy/status.d.ts +40 -0
  39. package/dist/core/autonomy/status.d.ts.map +1 -0
  40. package/dist/core/autonomy/status.js +107 -0
  41. package/dist/core/autonomy/status.js.map +1 -0
  42. package/dist/core/autonomy/subagent-prompt.d.ts +21 -0
  43. package/dist/core/autonomy/subagent-prompt.d.ts.map +1 -0
  44. package/dist/core/autonomy/subagent-prompt.js +28 -0
  45. package/dist/core/autonomy/subagent-prompt.js.map +1 -0
  46. package/dist/core/autonomy/telemetry-events.d.ts +18 -0
  47. package/dist/core/autonomy/telemetry-events.d.ts.map +1 -0
  48. package/dist/core/autonomy/telemetry-events.js +60 -0
  49. package/dist/core/autonomy/telemetry-events.js.map +1 -0
  50. package/dist/core/context/artifact-retrieval.d.ts +49 -0
  51. package/dist/core/context/artifact-retrieval.d.ts.map +1 -0
  52. package/dist/core/context/artifact-retrieval.js +49 -0
  53. package/dist/core/context/artifact-retrieval.js.map +1 -0
  54. package/dist/core/context/context-artifacts.d.ts +94 -0
  55. package/dist/core/context/context-artifacts.d.ts.map +1 -0
  56. package/dist/core/context/context-artifacts.js +307 -0
  57. package/dist/core/context/context-artifacts.js.map +1 -0
  58. package/dist/core/context/context-audit.d.ts +66 -0
  59. package/dist/core/context/context-audit.d.ts.map +1 -0
  60. package/dist/core/context/context-audit.js +173 -0
  61. package/dist/core/context/context-audit.js.map +1 -0
  62. package/dist/core/context/context-item.d.ts +117 -0
  63. package/dist/core/context/context-item.d.ts.map +1 -0
  64. package/dist/core/context/context-item.js +36 -0
  65. package/dist/core/context/context-item.js.map +1 -0
  66. package/dist/core/context/context-prompt-enforcement.d.ts +73 -0
  67. package/dist/core/context/context-prompt-enforcement.d.ts.map +1 -0
  68. package/dist/core/context/context-prompt-enforcement.js +153 -0
  69. package/dist/core/context/context-prompt-enforcement.js.map +1 -0
  70. package/dist/core/context/context-prompt-policy.d.ts +90 -0
  71. package/dist/core/context/context-prompt-policy.d.ts.map +1 -0
  72. package/dist/core/context/context-prompt-policy.js +73 -0
  73. package/dist/core/context/context-prompt-policy.js.map +1 -0
  74. package/dist/core/context/context-retention.d.ts +36 -0
  75. package/dist/core/context/context-retention.d.ts.map +1 -0
  76. package/dist/core/context/context-retention.js +108 -0
  77. package/dist/core/context/context-retention.js.map +1 -0
  78. package/dist/core/context/context-store.d.ts +37 -0
  79. package/dist/core/context/context-store.d.ts.map +1 -0
  80. package/dist/core/context/context-store.js +45 -0
  81. package/dist/core/context/context-store.js.map +1 -0
  82. package/dist/core/context/memory-diagnostics.d.ts +50 -0
  83. package/dist/core/context/memory-diagnostics.d.ts.map +1 -0
  84. package/dist/core/context/memory-diagnostics.js +43 -0
  85. package/dist/core/context/memory-diagnostics.js.map +1 -0
  86. package/dist/core/context/memory-index-store.d.ts +28 -0
  87. package/dist/core/context/memory-index-store.d.ts.map +1 -0
  88. package/dist/core/context/memory-index-store.js +38 -0
  89. package/dist/core/context/memory-index-store.js.map +1 -0
  90. package/dist/core/context/memory-prompt-block.d.ts +34 -0
  91. package/dist/core/context/memory-prompt-block.d.ts.map +1 -0
  92. package/dist/core/context/memory-prompt-block.js +58 -0
  93. package/dist/core/context/memory-prompt-block.js.map +1 -0
  94. package/dist/core/context/memory-provider-contract.d.ts +114 -0
  95. package/dist/core/context/memory-provider-contract.d.ts.map +1 -0
  96. package/dist/core/context/memory-provider-contract.js +121 -0
  97. package/dist/core/context/memory-provider-contract.js.map +1 -0
  98. package/dist/core/context/memory-retrieval.d.ts +27 -0
  99. package/dist/core/context/memory-retrieval.d.ts.map +1 -0
  100. package/dist/core/context/memory-retrieval.js +91 -0
  101. package/dist/core/context/memory-retrieval.js.map +1 -0
  102. package/dist/core/context/okf-memory-provider.d.ts +26 -0
  103. package/dist/core/context/okf-memory-provider.d.ts.map +1 -0
  104. package/dist/core/context/okf-memory-provider.js +154 -0
  105. package/dist/core/context/okf-memory-provider.js.map +1 -0
  106. package/dist/core/context/okf-memory.d.ts +42 -0
  107. package/dist/core/context/okf-memory.d.ts.map +1 -0
  108. package/dist/core/context/okf-memory.js +175 -0
  109. package/dist/core/context/okf-memory.js.map +1 -0
  110. package/dist/core/context/policy-engine.d.ts +66 -0
  111. package/dist/core/context/policy-engine.d.ts.map +1 -0
  112. package/dist/core/context/policy-engine.js +171 -0
  113. package/dist/core/context/policy-engine.js.map +1 -0
  114. package/dist/core/context/policy-types.d.ts +102 -0
  115. package/dist/core/context/policy-types.d.ts.map +1 -0
  116. package/dist/core/context/policy-types.js +7 -0
  117. package/dist/core/context/policy-types.js.map +1 -0
  118. package/dist/core/context/sqlite-runtime-index.d.ts +19 -0
  119. package/dist/core/context/sqlite-runtime-index.d.ts.map +1 -0
  120. package/dist/core/context/sqlite-runtime-index.js +344 -0
  121. package/dist/core/context/sqlite-runtime-index.js.map +1 -0
  122. package/dist/core/context/storage-authority.d.ts +20 -0
  123. package/dist/core/context/storage-authority.d.ts.map +1 -0
  124. package/dist/core/context/storage-authority.js +51 -0
  125. package/dist/core/context/storage-authority.js.map +1 -0
  126. package/dist/core/context/tool-output-packer.d.ts +75 -0
  127. package/dist/core/context/tool-output-packer.d.ts.map +1 -0
  128. package/dist/core/context/tool-output-packer.js +77 -0
  129. package/dist/core/context/tool-output-packer.js.map +1 -0
  130. package/dist/core/cost/session-usage.d.ts +20 -0
  131. package/dist/core/cost/session-usage.d.ts.map +1 -0
  132. package/dist/core/cost/session-usage.js +164 -0
  133. package/dist/core/cost/session-usage.js.map +1 -0
  134. package/dist/core/delegation/session-worker-result.d.ts +10 -0
  135. package/dist/core/delegation/session-worker-result.d.ts.map +1 -0
  136. package/dist/core/delegation/session-worker-result.js +36 -0
  137. package/dist/core/delegation/session-worker-result.js.map +1 -0
  138. package/dist/core/delegation/worker-result.d.ts +9 -0
  139. package/dist/core/delegation/worker-result.d.ts.map +1 -0
  140. package/dist/core/delegation/worker-result.js +152 -0
  141. package/dist/core/delegation/worker-result.js.map +1 -0
  142. package/dist/core/delegation/worker-runner.d.ts +58 -0
  143. package/dist/core/delegation/worker-runner.d.ts.map +1 -0
  144. package/dist/core/delegation/worker-runner.js +188 -0
  145. package/dist/core/delegation/worker-runner.js.map +1 -0
  146. package/dist/core/extensions/builtin.d.ts +5 -1
  147. package/dist/core/extensions/builtin.d.ts.map +1 -1
  148. package/dist/core/extensions/builtin.js +23 -1
  149. package/dist/core/extensions/builtin.js.map +1 -1
  150. package/dist/core/footer-data-provider.d.ts +5 -1
  151. package/dist/core/footer-data-provider.d.ts.map +1 -1
  152. package/dist/core/footer-data-provider.js +13 -0
  153. package/dist/core/footer-data-provider.js.map +1 -1
  154. package/dist/core/goals/goal-continuation-controller.d.ts +22 -0
  155. package/dist/core/goals/goal-continuation-controller.d.ts.map +1 -0
  156. package/dist/core/goals/goal-continuation-controller.js +88 -0
  157. package/dist/core/goals/goal-continuation-controller.js.map +1 -0
  158. package/dist/core/goals/goal-continuation-defaults.d.ts +10 -0
  159. package/dist/core/goals/goal-continuation-defaults.d.ts.map +1 -0
  160. package/dist/core/goals/goal-continuation-defaults.js +10 -0
  161. package/dist/core/goals/goal-continuation-defaults.js.map +1 -0
  162. package/dist/core/goals/goal-continuation-prompt.d.ts +18 -0
  163. package/dist/core/goals/goal-continuation-prompt.d.ts.map +1 -0
  164. package/dist/core/goals/goal-continuation-prompt.js +141 -0
  165. package/dist/core/goals/goal-continuation-prompt.js.map +1 -0
  166. package/dist/core/goals/goal-runtime-snapshot.d.ts +19 -0
  167. package/dist/core/goals/goal-runtime-snapshot.d.ts.map +1 -0
  168. package/dist/core/goals/goal-runtime-snapshot.js +23 -0
  169. package/dist/core/goals/goal-runtime-snapshot.js.map +1 -0
  170. package/dist/core/goals/goal-state.d.ts +87 -0
  171. package/dist/core/goals/goal-state.d.ts.map +1 -0
  172. package/dist/core/goals/goal-state.js +259 -0
  173. package/dist/core/goals/goal-state.js.map +1 -0
  174. package/dist/core/goals/goal-tool-core.d.ts +66 -0
  175. package/dist/core/goals/goal-tool-core.d.ts.map +1 -0
  176. package/dist/core/goals/goal-tool-core.js +146 -0
  177. package/dist/core/goals/goal-tool-core.js.map +1 -0
  178. package/dist/core/goals/session-goal-state.d.ts +10 -0
  179. package/dist/core/goals/session-goal-state.d.ts.map +1 -0
  180. package/dist/core/goals/session-goal-state.js +35 -0
  181. package/dist/core/goals/session-goal-state.js.map +1 -0
  182. package/dist/core/learning/learning-audit.d.ts +45 -0
  183. package/dist/core/learning/learning-audit.d.ts.map +1 -0
  184. package/dist/core/learning/learning-audit.js +139 -0
  185. package/dist/core/learning/learning-audit.js.map +1 -0
  186. package/dist/core/learning/learning-gate.d.ts +29 -0
  187. package/dist/core/learning/learning-gate.d.ts.map +1 -0
  188. package/dist/core/learning/learning-gate.js +150 -0
  189. package/dist/core/learning/learning-gate.js.map +1 -0
  190. package/dist/core/learning/session-learning-decision.d.ts +10 -0
  191. package/dist/core/learning/session-learning-decision.d.ts.map +1 -0
  192. package/dist/core/learning/session-learning-decision.js +36 -0
  193. package/dist/core/learning/session-learning-decision.js.map +1 -0
  194. package/dist/core/model-capability.d.ts +41 -0
  195. package/dist/core/model-capability.d.ts.map +1 -0
  196. package/dist/core/model-capability.js +101 -0
  197. package/dist/core/model-capability.js.map +1 -0
  198. package/dist/core/model-router/config-diagnostics.d.ts.map +1 -1
  199. package/dist/core/model-router/config-diagnostics.js +1 -0
  200. package/dist/core/model-router/config-diagnostics.js.map +1 -1
  201. package/dist/core/model-router/intent-classifier.d.ts +2 -0
  202. package/dist/core/model-router/intent-classifier.d.ts.map +1 -1
  203. package/dist/core/model-router/intent-classifier.js +154 -9
  204. package/dist/core/model-router/intent-classifier.js.map +1 -1
  205. package/dist/core/model-router/route-judge.d.ts +54 -0
  206. package/dist/core/model-router/route-judge.d.ts.map +1 -0
  207. package/dist/core/model-router/route-judge.js +128 -0
  208. package/dist/core/model-router/route-judge.js.map +1 -0
  209. package/dist/core/model-router/status.d.ts +4 -1
  210. package/dist/core/model-router/status.d.ts.map +1 -1
  211. package/dist/core/model-router/status.js +30 -6
  212. package/dist/core/model-router/status.js.map +1 -1
  213. package/dist/core/model-router/tool-escalation.d.ts +4 -6
  214. package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
  215. package/dist/core/model-router/tool-escalation.js +1 -1
  216. package/dist/core/model-router/tool-escalation.js.map +1 -1
  217. package/dist/core/models/fitness-store.d.ts +40 -0
  218. package/dist/core/models/fitness-store.d.ts.map +1 -0
  219. package/dist/core/models/fitness-store.js +61 -0
  220. package/dist/core/models/fitness-store.js.map +1 -0
  221. package/dist/core/profile-registry.d.ts.map +1 -1
  222. package/dist/core/profile-registry.js +1 -1
  223. package/dist/core/profile-registry.js.map +1 -1
  224. package/dist/core/prompt-templates.d.ts +2 -0
  225. package/dist/core/prompt-templates.d.ts.map +1 -1
  226. package/dist/core/prompt-templates.js +12 -4
  227. package/dist/core/prompt-templates.js.map +1 -1
  228. package/dist/core/research/automata-provider.d.ts +5 -0
  229. package/dist/core/research/automata-provider.d.ts.map +1 -0
  230. package/dist/core/research/automata-provider.js +15 -0
  231. package/dist/core/research/automata-provider.js.map +1 -0
  232. package/dist/core/research/evidence-bundle.d.ts +10 -0
  233. package/dist/core/research/evidence-bundle.d.ts.map +1 -0
  234. package/dist/core/research/evidence-bundle.js +116 -0
  235. package/dist/core/research/evidence-bundle.js.map +1 -0
  236. package/dist/core/research/model-fitness.d.ts +79 -0
  237. package/dist/core/research/model-fitness.d.ts.map +1 -0
  238. package/dist/core/research/model-fitness.js +257 -0
  239. package/dist/core/research/model-fitness.js.map +1 -0
  240. package/dist/core/research/research-gate.d.ts +11 -0
  241. package/dist/core/research/research-gate.d.ts.map +1 -0
  242. package/dist/core/research/research-gate.js +82 -0
  243. package/dist/core/research/research-gate.js.map +1 -0
  244. package/dist/core/research/research-runner.d.ts +59 -0
  245. package/dist/core/research/research-runner.d.ts.map +1 -0
  246. package/dist/core/research/research-runner.js +155 -0
  247. package/dist/core/research/research-runner.js.map +1 -0
  248. package/dist/core/research/session-evidence-bundle.d.ts +11 -0
  249. package/dist/core/research/session-evidence-bundle.d.ts.map +1 -0
  250. package/dist/core/research/session-evidence-bundle.js +55 -0
  251. package/dist/core/research/session-evidence-bundle.js.map +1 -0
  252. package/dist/core/resource-loader.d.ts.map +1 -1
  253. package/dist/core/resource-loader.js +7 -1
  254. package/dist/core/resource-loader.js.map +1 -1
  255. package/dist/core/settings-manager.d.ts +147 -4
  256. package/dist/core/settings-manager.d.ts.map +1 -1
  257. package/dist/core/settings-manager.js +285 -9
  258. package/dist/core/settings-manager.js.map +1 -1
  259. package/dist/core/skills.d.ts +4 -0
  260. package/dist/core/skills.d.ts.map +1 -1
  261. package/dist/core/skills.js +18 -6
  262. package/dist/core/skills.js.map +1 -1
  263. package/dist/core/slash-commands.d.ts.map +1 -1
  264. package/dist/core/slash-commands.js +4 -0
  265. package/dist/core/slash-commands.js.map +1 -1
  266. package/dist/core/toolkit/script-registry.d.ts +34 -0
  267. package/dist/core/toolkit/script-registry.d.ts.map +1 -0
  268. package/dist/core/toolkit/script-registry.js +71 -0
  269. package/dist/core/toolkit/script-registry.js.map +1 -0
  270. package/dist/core/toolkit/script-runner.d.ts +28 -0
  271. package/dist/core/toolkit/script-runner.d.ts.map +1 -0
  272. package/dist/core/toolkit/script-runner.js +48 -0
  273. package/dist/core/toolkit/script-runner.js.map +1 -0
  274. package/dist/core/tools/artifact-retrieve.d.ts +23 -0
  275. package/dist/core/tools/artifact-retrieve.d.ts.map +1 -0
  276. package/dist/core/tools/artifact-retrieve.js +110 -0
  277. package/dist/core/tools/artifact-retrieve.js.map +1 -0
  278. package/dist/core/tools/delegate.d.ts +32 -0
  279. package/dist/core/tools/delegate.d.ts.map +1 -0
  280. package/dist/core/tools/delegate.js +60 -0
  281. package/dist/core/tools/delegate.js.map +1 -0
  282. package/dist/core/tools/fff-search-backend.d.ts +103 -0
  283. package/dist/core/tools/fff-search-backend.d.ts.map +1 -0
  284. package/dist/core/tools/fff-search-backend.js +151 -0
  285. package/dist/core/tools/fff-search-backend.js.map +1 -0
  286. package/dist/core/tools/find.d.ts +21 -1
  287. package/dist/core/tools/find.d.ts.map +1 -1
  288. package/dist/core/tools/find.js +183 -10
  289. package/dist/core/tools/find.js.map +1 -1
  290. package/dist/core/tools/goal.d.ts +35 -0
  291. package/dist/core/tools/goal.d.ts.map +1 -0
  292. package/dist/core/tools/goal.js +122 -0
  293. package/dist/core/tools/goal.js.map +1 -0
  294. package/dist/core/tools/grep.d.ts +21 -1
  295. package/dist/core/tools/grep.d.ts.map +1 -1
  296. package/dist/core/tools/grep.js +272 -27
  297. package/dist/core/tools/grep.js.map +1 -1
  298. package/dist/core/tools/index.d.ts +4 -1
  299. package/dist/core/tools/index.d.ts.map +1 -1
  300. package/dist/core/tools/index.js +9 -0
  301. package/dist/core/tools/index.js.map +1 -1
  302. package/dist/core/tools/model-fitness.d.ts +30 -0
  303. package/dist/core/tools/model-fitness.d.ts.map +1 -0
  304. package/dist/core/tools/model-fitness.js +38 -0
  305. package/dist/core/tools/model-fitness.js.map +1 -0
  306. package/dist/core/tools/run-toolkit-script.d.ts +24 -0
  307. package/dist/core/tools/run-toolkit-script.d.ts.map +1 -0
  308. package/dist/core/tools/run-toolkit-script.js +103 -0
  309. package/dist/core/tools/run-toolkit-script.js.map +1 -0
  310. package/dist/core/tools/search-router.d.ts +75 -0
  311. package/dist/core/tools/search-router.d.ts.map +1 -0
  312. package/dist/core/tools/search-router.js +85 -0
  313. package/dist/core/tools/search-router.js.map +1 -0
  314. package/dist/modes/interactive/components/footer.d.ts.map +1 -1
  315. package/dist/modes/interactive/components/footer.js +18 -16
  316. package/dist/modes/interactive/components/footer.js.map +1 -1
  317. package/dist/modes/interactive/components/settings-selector.d.ts +13 -1
  318. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  319. package/dist/modes/interactive/components/settings-selector.js +471 -11
  320. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  321. package/dist/modes/interactive/interactive-mode.d.ts +4 -0
  322. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  323. package/dist/modes/interactive/interactive-mode.js +220 -39
  324. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  325. package/dist/modes/print-mode.d.ts.map +1 -1
  326. package/dist/modes/print-mode.js +3 -0
  327. package/dist/modes/print-mode.js.map +1 -1
  328. package/dist/utils/tools-manager.d.ts +2 -0
  329. package/dist/utils/tools-manager.d.ts.map +1 -1
  330. package/dist/utils/tools-manager.js +154 -2
  331. package/dist/utils/tools-manager.js.map +1 -1
  332. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  333. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  334. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  335. package/examples/extensions/sandbox/package-lock.json +2 -2
  336. package/examples/extensions/sandbox/package.json +1 -1
  337. package/examples/extensions/with-deps/package-lock.json +2 -2
  338. package/examples/extensions/with-deps/package.json +1 -1
  339. package/npm-shrinkwrap.json +368 -12
  340. package/package.json +5 -4
@@ -0,0 +1,257 @@
1
+ import { runBoundedCompletion } from "../autonomy/bounded-completion.js";
2
+ import { runWorker } from "../delegation/worker-runner.js";
3
+ import { runRouteJudge } from "../model-router/route-judge.js";
4
+ import { runResearch } from "./research-runner.js";
5
+ /** Default judge probe set: three planning-shaped prompts, three trivial lookups. */
6
+ export const DEFAULT_JUDGE_FITNESS_PROMPTS = [
7
+ { prompt: "how should we plan the migration of the session storage layer?", planning: true },
8
+ { prompt: "design an approach for splitting the settings manager", planning: true },
9
+ { prompt: "draft a roadmap for the autonomy rework", planning: true },
10
+ { prompt: "what does the resolvePath function return?", planning: false },
11
+ { prompt: "list the files in the delegation module", planning: false },
12
+ { prompt: "why is this test flaky?", planning: false },
13
+ ];
14
+ /** Static prompts for the heavy-lifter surfaces (stable for provider prompt caching). */
15
+ export const SEARCH_PROBE_SYSTEM_PROMPT = [
16
+ "You plan code searches for a coding agent. You never answer the question yourself.",
17
+ "Given a question about a codebase, respond with STRICT JSON only - no prose:",
18
+ '{"queries":[{"pattern":"<regex or literal to grep>","glob":"<file glob like **/*.ts>"}]}',
19
+ "Return 1 to 4 queries, most specific first.",
20
+ ].join("\n");
21
+ export const TOOL_CALL_PROBE_SYSTEM_PROMPT = [
22
+ "You operate tools for a coding agent. You have exactly one tool:",
23
+ "grep(pattern: string, path: string) - search files under a path for a pattern.",
24
+ "Respond to every task with STRICT JSON only - no prose:",
25
+ '{"tool":"grep","arguments":{"pattern":"<pattern>","path":"<path>"}}',
26
+ ].join("\n");
27
+ const SEARCH_PROBE_TASKS = [
28
+ "Where is the retry/backoff logic for HTTP requests implemented?",
29
+ "Which files define the settings for background research?",
30
+ "Find where session entries of type custom are appended.",
31
+ ];
32
+ const TOOL_CALL_PROBE_TASKS = [
33
+ "Find usages of the function resolveCliModel under src/.",
34
+ "Search for the string 'budget_exhausted' in the core directory.",
35
+ "Locate where LaneTracker is instantiated under src/core.",
36
+ ];
37
+ function parseSearchPlan(text) {
38
+ const parsed = extractJsonObject(text);
39
+ if (!parsed)
40
+ return false;
41
+ const queries = parsed.queries;
42
+ if (!Array.isArray(queries) || queries.length === 0 || queries.length > 8)
43
+ return false;
44
+ return queries.every((query) => query &&
45
+ typeof query === "object" &&
46
+ typeof query.pattern === "string" &&
47
+ query.pattern.trim().length > 0);
48
+ }
49
+ function parseToolCall(text) {
50
+ const parsed = extractJsonObject(text);
51
+ if (!parsed)
52
+ return false;
53
+ const record = parsed;
54
+ if (record.tool !== "grep")
55
+ return false;
56
+ const args = record.arguments;
57
+ if (!args || typeof args !== "object" || Array.isArray(args))
58
+ return false;
59
+ const pattern = args.pattern;
60
+ const path = args.path;
61
+ return (typeof pattern === "string" && pattern.trim().length > 0 && typeof path === "string" && path.trim().length > 0);
62
+ }
63
+ function extractJsonObject(text) {
64
+ const trimmed = text.trim();
65
+ const candidates = [trimmed];
66
+ const fenced = /```(?:json)?\s*([\s\S]*?)```/.exec(trimmed);
67
+ if (fenced?.[1])
68
+ candidates.push(fenced[1].trim());
69
+ const start = trimmed.indexOf("{");
70
+ const end = trimmed.lastIndexOf("}");
71
+ if (start >= 0 && end > start)
72
+ candidates.push(trimmed.slice(start, end + 1));
73
+ for (const candidate of candidates) {
74
+ try {
75
+ const parsed = JSON.parse(candidate);
76
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed))
77
+ return parsed;
78
+ }
79
+ catch {
80
+ // try next candidate
81
+ }
82
+ }
83
+ return undefined;
84
+ }
85
+ function fitnessEnvelope() {
86
+ return {
87
+ id: "model-fitness-probe",
88
+ capabilities: ["research", "read_files", "memory_read"],
89
+ maxEstimatedUsd: 1,
90
+ createdAt: new Date().toISOString(),
91
+ };
92
+ }
93
+ export async function runModelFitnessProbe(options) {
94
+ const trials = Math.max(1, Math.min(options.trials ?? 3, 20));
95
+ const maxWallClockMs = options.maxWallClockMs ?? 120_000;
96
+ const judgePrompts = options.judgePrompts ?? DEFAULT_JUDGE_FITNESS_PROMPTS;
97
+ const now = options.now ?? Date.now;
98
+ let totalCostUsd = 0;
99
+ // Token-speed instrumentation: the lane runners' own contracts carry text/cost only, so the
100
+ // completer is wrapped once here and generation stats are accumulated per surface.
101
+ const overallSpeed = { tokens: 0, evalMs: 0 };
102
+ let surfaceSpeed = { tokens: 0, evalMs: 0 };
103
+ const complete = async (args) => {
104
+ const completion = await options.complete(args);
105
+ const tokens = completion.outputTokens ?? 0;
106
+ const evalMs = completion.evalMs ?? 0;
107
+ if (tokens > 0 && evalMs > 0) {
108
+ surfaceSpeed.tokens += tokens;
109
+ surfaceSpeed.evalMs += evalMs;
110
+ overallSpeed.tokens += tokens;
111
+ overallSpeed.evalMs += evalMs;
112
+ }
113
+ return completion;
114
+ };
115
+ const takeSurfaceSpeed = () => {
116
+ const speed = surfaceSpeed.evalMs > 0 ? Math.round((surfaceSpeed.tokens / surfaceSpeed.evalMs) * 1000) : undefined;
117
+ surfaceSpeed = { tokens: 0, evalMs: 0 };
118
+ return speed;
119
+ };
120
+ const research = { succeeded: 0, total: trials, outcomes: [], meanMs: 0 };
121
+ for (let i = 0; i < trials; i++) {
122
+ const started = now();
123
+ const result = await runResearch({
124
+ query: `fitness:probe requirements:req-${i}`,
125
+ context: [
126
+ "Goal: add a retry helper to the HTTP client module",
127
+ "Open requirements:",
128
+ "- Find what retry/backoff conventions the codebase already uses",
129
+ "- Identify which call sites would adopt the helper",
130
+ ].join("\n"),
131
+ envelope: fitnessEnvelope(),
132
+ maxUsd: 1,
133
+ maxSources: 8,
134
+ maxFindings: 5,
135
+ maxWallClockMs,
136
+ signal: options.signal,
137
+ complete,
138
+ });
139
+ research.meanMs += now() - started;
140
+ totalCostUsd += result.costUsd;
141
+ if (result.status === "succeeded")
142
+ research.succeeded++;
143
+ research.outcomes.push(`${result.status}/${result.reasonCode}`);
144
+ }
145
+ research.meanMs = Math.round(research.meanMs / trials);
146
+ research.tokensPerSecond = takeSurfaceSpeed();
147
+ const worker = { succeeded: 0, total: trials, outcomes: [], meanMs: 0 };
148
+ for (let i = 0; i < trials; i++) {
149
+ const started = now();
150
+ const outcome = await runWorker({
151
+ request: {
152
+ id: `fitness-worker-${i}`,
153
+ instructions: "Summarize in two sentences what a capability envelope is: a declared set of allowed tools, paths, and capability names that bounds what a delegated worker may do.",
154
+ route: { tier: "cheap", risk: "read-only", confidence: 1, reasonCode: "fitness_probe", reasons: [] },
155
+ envelope: { id: `fitness-env-${i}`, capabilities: ["read_files"], maxEstimatedUsd: 1 },
156
+ maxEstimatedUsd: 1,
157
+ },
158
+ maxUsd: 1,
159
+ maxWallClockMs,
160
+ usageReportId: `fitness:${i}`,
161
+ signal: options.signal,
162
+ complete,
163
+ });
164
+ worker.meanMs += now() - started;
165
+ totalCostUsd += outcome.costUsd;
166
+ if (outcome.result.status === "completed" && outcome.accepted)
167
+ worker.succeeded++;
168
+ worker.outcomes.push(`${outcome.result.status}/${outcome.reasonCode}`);
169
+ }
170
+ worker.meanMs = Math.round(worker.meanMs / trials);
171
+ worker.tokensPerSecond = takeSurfaceSpeed();
172
+ const judge = {
173
+ parsed: 0,
174
+ planningElevated: 0,
175
+ planningTotal: judgePrompts.filter((entry) => entry.planning).length,
176
+ trivialCheap: 0,
177
+ trivialTotal: judgePrompts.filter((entry) => !entry.planning).length,
178
+ total: judgePrompts.length,
179
+ outcomes: [],
180
+ meanMs: 0,
181
+ };
182
+ for (const entry of judgePrompts) {
183
+ const started = now();
184
+ const result = await runRouteJudge({
185
+ prompt: entry.prompt,
186
+ baseline: { tier: "cheap", risk: "read-only", confidence: 0.5, reasonCode: "fitness_probe", reasons: [] },
187
+ maxWallClockMs,
188
+ signal: options.signal,
189
+ complete,
190
+ });
191
+ judge.meanMs += now() - started;
192
+ totalCostUsd += result.costUsd;
193
+ const tier = result.decision.tier;
194
+ if (result.verdict) {
195
+ judge.parsed++;
196
+ // A useful judge must both keep planning off the cheap tier AND actually send trivial
197
+ // prompts there — all-medium verdicts are safe but save nothing.
198
+ if (entry.planning && tier !== "cheap")
199
+ judge.planningElevated++;
200
+ if (!entry.planning && tier === "cheap")
201
+ judge.trivialCheap++;
202
+ }
203
+ judge.outcomes.push(`"${entry.prompt.slice(0, 40)}" -> ${tier}${result.fallbackReason ? ` (${result.fallbackReason})` : ""}`);
204
+ }
205
+ judge.meanMs = judgePrompts.length > 0 ? Math.round(judge.meanMs / judgePrompts.length) : 0;
206
+ judge.tokensPerSecond = takeSurfaceSpeed();
207
+ const probeSurface = async (systemPrompt, tasks, accepts) => {
208
+ const score = { succeeded: 0, total: tasks.length, outcomes: [], meanMs: 0 };
209
+ for (const task of tasks) {
210
+ const started = now();
211
+ // Same wall-clock envelope as the lane surfaces — a hung model must not hang the probe.
212
+ const bounded = await runBoundedCompletion({
213
+ maxWallClockMs,
214
+ signal: options.signal,
215
+ execute: (signal) => complete({ systemPrompt, userPrompt: task, signal }),
216
+ });
217
+ if (bounded.completion)
218
+ totalCostUsd += bounded.completion.costUsd;
219
+ if (bounded.failure || !bounded.completion) {
220
+ score.outcomes.push(bounded.failure ? bounded.failure.status : "completion_error");
221
+ }
222
+ else {
223
+ const ok = accepts(bounded.completion.text);
224
+ if (ok)
225
+ score.succeeded++;
226
+ score.outcomes.push(ok ? "ok" : "unparseable_output");
227
+ }
228
+ score.meanMs += now() - started;
229
+ }
230
+ score.meanMs = tasks.length > 0 ? Math.round(score.meanMs / tasks.length) : 0;
231
+ return score;
232
+ };
233
+ const search = await probeSurface(SEARCH_PROBE_SYSTEM_PROMPT, SEARCH_PROBE_TASKS, parseSearchPlan);
234
+ search.tokensPerSecond = takeSurfaceSpeed();
235
+ const toolCall = await probeSurface(TOOL_CALL_PROBE_SYSTEM_PROMPT, TOOL_CALL_PROBE_TASKS, parseToolCall);
236
+ toolCall.tokensPerSecond = takeSurfaceSpeed();
237
+ const tokensPerSecond = overallSpeed.evalMs > 0 ? Math.round((overallSpeed.tokens / overallSpeed.evalMs) * 1000) : undefined;
238
+ return { trials, tokensPerSecond, research, worker, judge, search, toolCall, totalCostUsd };
239
+ }
240
+ /** Compact human-readable report for tool output / interactive display. Bounded, no raw dumps. */
241
+ export function formatModelFitnessReport(model, report) {
242
+ const speed = (tokensPerSecond) => tokensPerSecond !== undefined ? `, ~${tokensPerSecond} tok/s` : "";
243
+ const lines = [
244
+ `Model fitness: ${model} (${report.trials} trials/lane${speed(report.tokensPerSecond)})`,
245
+ `- research lane: ${report.research.succeeded}/${report.research.total} succeeded, mean ${report.research.meanMs}ms${speed(report.research.tokensPerSecond)} [${report.research.outcomes.join(", ")}]`,
246
+ `- worker lane: ${report.worker.succeeded}/${report.worker.total} completed+accepted, mean ${report.worker.meanMs}ms${speed(report.worker.tokensPerSecond)} [${report.worker.outcomes.join(", ")}]`,
247
+ `- search plans: ${report.search.succeeded}/${report.search.total} well-formed, mean ${report.search.meanMs}ms${speed(report.search.tokensPerSecond)}`,
248
+ `- tool calls: ${report.toolCall.succeeded}/${report.toolCall.total} well-formed, mean ${report.toolCall.meanMs}ms${speed(report.toolCall.tokensPerSecond)}`,
249
+ `- route judge: parsed ${report.judge.parsed}/${report.judge.total}, planning-elevated ${report.judge.planningElevated}/${report.judge.planningTotal}, trivial-cheap ${report.judge.trivialCheap}/${report.judge.trivialTotal}, mean ${report.judge.meanMs}ms${speed(report.judge.tokensPerSecond)}`,
250
+ ...report.judge.outcomes.map((outcome) => ` ${outcome}`),
251
+ ];
252
+ if (report.totalCostUsd > 0) {
253
+ lines.push(`- probe cost: $${report.totalCostUsd.toFixed(4)}`);
254
+ }
255
+ return lines.join("\n");
256
+ }
257
+ //# sourceMappingURL=model-fitness.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-fitness.js","sourceRoot":"","sources":["../../../src/core/research/model-fitness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AAEzE,OAAO,EAAE,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAgCnD,qFAAqF;AACrF,MAAM,CAAC,MAAM,6BAA6B,GAAkC;IAC3E,EAAE,MAAM,EAAE,gEAAgE,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC5F,EAAE,MAAM,EAAE,uDAAuD,EAAE,QAAQ,EAAE,IAAI,EAAE;IACnF,EAAE,MAAM,EAAE,yCAAyC,EAAE,QAAQ,EAAE,IAAI,EAAE;IACrE,EAAE,MAAM,EAAE,4CAA4C,EAAE,QAAQ,EAAE,KAAK,EAAE;IACzE,EAAE,MAAM,EAAE,yCAAyC,EAAE,QAAQ,EAAE,KAAK,EAAE;IACtE,EAAE,MAAM,EAAE,yBAAyB,EAAE,QAAQ,EAAE,KAAK,EAAE;CACtD,CAAC;AAkDF,yFAAyF;AACzF,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACzC,oFAAoF;IACpF,8EAA8E;IAC9E,0FAA0F;IAC1F,6CAA6C;CAC7C,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,6BAA6B,GAAG;IAC5C,kEAAkE;IAClE,gFAAgF;IAChF,yDAAyD;IACzD,qEAAqE;CACrE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,kBAAkB,GAAsB;IAC7C,iEAAiE;IACjE,0DAA0D;IAC1D,yDAAyD;CACzD,CAAC;AAEF,MAAM,qBAAqB,GAAsB;IAChD,yDAAyD;IACzD,iEAAiE;IACjE,0DAA0D;CAC1D,CAAC;AAEF,SAAS,eAAe,CAAC,IAAY,EAAW;IAC/C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAI,MAAgC,CAAC,OAAO,CAAC;IAC1D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IACxF,OAAO,OAAO,CAAC,KAAK,CACnB,CAAC,KAAK,EAAE,EAAE,CACT,KAAK;QACL,OAAO,KAAK,KAAK,QAAQ;QACzB,OAAQ,KAA+B,CAAC,OAAO,KAAK,QAAQ;QAC3D,KAA6B,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CACzD,CAAC;AAAA,CACF;AAED,SAAS,aAAa,CAAC,IAAY,EAAW;IAC7C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,MAAM,GAAG,MAAiD,CAAC;IACjE,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM;QAAE,OAAO,KAAK,CAAC;IACzC,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC;IAC9B,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3E,MAAM,OAAO,GAAI,IAA8B,CAAC,OAAO,CAAC;IACxD,MAAM,IAAI,GAAI,IAA2B,CAAC,IAAI,CAAC;IAC/C,OAAO,CACN,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAC9G,CAAC;AAAA,CACF;AAED,SAAS,iBAAiB,CAAC,IAAY,EAAuB;IAC7D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAa,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,8BAA8B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC5D,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC;QAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,IAAI,CAAC,IAAI,GAAG,GAAG,KAAK;QAAE,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACrC,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;gBAAE,OAAO,MAAM,CAAC;QACnF,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,OAAO,SAAS,CAAC;AAAA,CACjB;AAED,SAAS,eAAe,GAAuB;IAC9C,OAAO;QACN,EAAE,EAAE,qBAAqB;QACzB,YAAY,EAAE,CAAC,UAAU,EAAE,YAAY,EAAE,aAAa,CAAC;QACvD,eAAe,EAAE,CAAC;QAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACnC,CAAC;AAAA,CACF;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,OAA4B,EAA+B;IACrG,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,OAAO,CAAC;IACzD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,6BAA6B,CAAC;IAC3E,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC;IACpC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,4FAA4F;IAC5F,mFAAmF;IACnF,MAAM,YAAY,GAAG,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAC9C,IAAI,YAAY,GAAG,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAC5C,MAAM,QAAQ,GAAoB,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC;QACjD,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,UAAU,CAAC,YAAY,IAAI,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;QACtC,IAAI,MAAM,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC;YAC9B,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC;YAC9B,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC;YAC9B,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/B,CAAC;QACD,OAAO,UAAU,CAAC;IAAA,CAClB,CAAC;IACF,MAAM,gBAAgB,GAAG,GAAuB,EAAE,CAAC;QAClD,MAAM,KAAK,GACV,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACtG,YAAY,GAAG,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;QACxC,OAAO,KAAK,CAAC;IAAA,CACb,CAAC;IAEF,MAAM,QAAQ,GAAqB,EAAE,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAC5F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC;YAChC,KAAK,EAAE,kCAAkC,CAAC,EAAE;YAC5C,OAAO,EAAE;gBACR,oDAAoD;gBACpD,oBAAoB;gBACpB,iEAAiE;gBACjE,oDAAoD;aACpD,CAAC,IAAI,CAAC,IAAI,CAAC;YACZ,QAAQ,EAAE,eAAe,EAAE;YAC3B,MAAM,EAAE,CAAC;YACT,UAAU,EAAE,CAAC;YACb,WAAW,EAAE,CAAC;YACd,cAAc;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,QAAQ;SACR,CAAC,CAAC;QACH,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC;QACnC,YAAY,IAAI,MAAM,CAAC,OAAO,CAAC;QAC/B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW;YAAE,QAAQ,CAAC,SAAS,EAAE,CAAC;QACxD,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;IACjE,CAAC;IACD,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC;IACvD,QAAQ,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC;IAE9C,MAAM,MAAM,GAAqB,EAAE,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAC1F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC;QACtB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC;YAC/B,OAAO,EAAE;gBACR,EAAE,EAAE,kBAAkB,CAAC,EAAE;gBACzB,YAAY,EACX,oKAAoK;gBACrK,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,EAAE,UAAU,EAAE,eAAe,EAAE,OAAO,EAAE,EAAE,EAAE;gBACpG,QAAQ,EAAE,EAAE,EAAE,EAAE,eAAe,CAAC,EAAE,EAAE,YAAY,EAAE,CAAC,YAAY,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE;gBACtF,eAAe,EAAE,CAAC;aAClB;YACD,MAAM,EAAE,CAAC;YACT,cAAc;YACd,aAAa,EAAE,WAAW,CAAC,EAAE;YAC7B,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,QAAQ;SACR,CAAC,CAAC;QACH,MAAM,CAAC,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC;QACjC,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC;QAChC,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,OAAO,CAAC,QAAQ;YAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QAClF,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC;IACnD,MAAM,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC;IAE5C,MAAM,KAAK,GAAsB;QAChC,MAAM,EAAE,CAAC;QACT,gBAAgB,EAAE,CAAC;QACnB,aAAa,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;QACpE,YAAY,EAAE,CAAC;QACf,YAAY,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;QACpE,KAAK,EAAE,YAAY,CAAC,MAAM;QAC1B,QAAQ,EAAE,EAAE;QACZ,MAAM,EAAE,CAAC;KACT,CAAC;IACF,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC;YAClC,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,QAAQ,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,EAAE,UAAU,EAAE,eAAe,EAAE,OAAO,EAAE,EAAE,EAAE;YACzG,cAAc;YACd,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,QAAQ;SACR,CAAC,CAAC;QACH,KAAK,CAAC,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC;QAChC,YAAY,IAAI,MAAM,CAAC,OAAO,CAAC;QAC/B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;QAClC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,KAAK,CAAC,MAAM,EAAE,CAAC;YACf,sFAAsF;YACtF,mEAAiE;YACjE,IAAI,KAAK,CAAC,QAAQ,IAAI,IAAI,KAAK,OAAO;gBAAE,KAAK,CAAC,gBAAgB,EAAE,CAAC;YACjE,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,IAAI,KAAK,OAAO;gBAAE,KAAK,CAAC,YAAY,EAAE,CAAC;QAC/D,CAAC;QACD,KAAK,CAAC,QAAQ,CAAC,IAAI,CAClB,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,IAAI,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACxG,CAAC;IACH,CAAC;IACD,KAAK,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5F,KAAK,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC;IAE3C,MAAM,YAAY,GAAG,KAAK,EACzB,YAAoB,EACpB,KAAwB,EACxB,OAAkC,EACN,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAqB,EAAE,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;QAC/F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YAC1B,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC;YACtB,0FAAwF;YACxF,MAAM,OAAO,GAAG,MAAM,oBAAoB,CAAC;gBAC1C,cAAc;gBACd,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,OAAO,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,YAAY,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;aACzE,CAAC,CAAC;YACH,IAAI,OAAO,CAAC,UAAU;gBAAE,YAAY,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;YACnE,IAAI,OAAO,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;gBAC5C,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC;YACpF,CAAC;iBAAM,CAAC;gBACP,MAAM,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAC5C,IAAI,EAAE;oBAAE,KAAK,CAAC,SAAS,EAAE,CAAC;gBAC1B,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;YACvD,CAAC;YACD,KAAK,CAAC,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,CAAC;QACjC,CAAC;QACD,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9E,OAAO,KAAK,CAAC;IAAA,CACb,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,0BAA0B,EAAE,kBAAkB,EAAE,eAAe,CAAC,CAAC;IACnG,MAAM,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC;IAC5C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,6BAA6B,EAAE,qBAAqB,EAAE,aAAa,CAAC,CAAC;IACzG,QAAQ,CAAC,eAAe,GAAG,gBAAgB,EAAE,CAAC;IAE9C,MAAM,eAAe,GACpB,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAEtG,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;AAAA,CAC5F;AAED,kGAAkG;AAClG,MAAM,UAAU,wBAAwB,CAAC,KAAa,EAAE,MAA0B,EAAU;IAC3F,MAAM,KAAK,GAAG,CAAC,eAAmC,EAAE,EAAE,CACrD,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,eAAe,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;IACpE,MAAM,KAAK,GAAG;QACb,kBAAkB,KAAK,KAAK,MAAM,CAAC,MAAM,eAAe,KAAK,CAAC,MAAM,CAAC,eAAe,CAAC,GAAG;QACxF,oBAAoB,MAAM,CAAC,QAAQ,CAAC,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,oBAAoB,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,KAAK,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QACtM,oBAAoB,MAAM,CAAC,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,6BAA6B,MAAM,CAAC,MAAM,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QACrM,oBAAoB,MAAM,CAAC,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,sBAAsB,MAAM,CAAC,MAAM,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,EAAE;QACvJ,oBAAoB,MAAM,CAAC,QAAQ,CAAC,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,sBAAsB,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE;QAC/J,2BAA2B,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,uBAAuB,MAAM,CAAC,KAAK,CAAC,gBAAgB,IAAI,MAAM,CAAC,KAAK,CAAC,aAAa,mBAAmB,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,MAAM,CAAC,KAAK,CAAC,YAAY,UAAU,MAAM,CAAC,KAAK,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE;QACtS,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,OAAO,EAAE,CAAC;KAC3D,CAAC;IACF,IAAI,MAAM,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,kBAAkB,MAAM,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAChE,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAAA,CACxB","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope } from \"../autonomy/contracts.ts\";\nimport { runWorker } from \"../delegation/worker-runner.ts\";\nimport { runRouteJudge } from \"../model-router/route-judge.ts\";\nimport { runResearch } from \"./research-runner.ts\";\n\n/**\n * Model fitness probe: measures whether a candidate model can actually drive the harness's\n * subagent contracts — the research lane, the scout-worker lane, and the routing judge — by\n * running each real runner against the model and scoring parse/success rates plus judge\n * discrimination. Provider-free: the completion executor is injected, so this works against any\n * registered model (local Ollama models included) and against faux providers in tests.\n */\n\nexport interface FitnessCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n\t/** Output tokens generated (for tok/s). Optional: providers that don't report it are skipped. */\n\toutputTokens?: number;\n\t/** Pure generation time in ms (e.g. Ollama eval_duration). Falls back to wall-clock if absent. */\n\tevalMs?: number;\n}\n\nexport type FitnessComplete = (args: {\n\tsystemPrompt: string;\n\tuserPrompt: string;\n\tsignal?: AbortSignal;\n}) => Promise<FitnessCompletion>;\n\nexport interface JudgeFitnessPrompt {\n\tprompt: string;\n\t/** True when the prompt is planning-shaped and must never route cheap. */\n\tplanning: boolean;\n}\n\n/** Default judge probe set: three planning-shaped prompts, three trivial lookups. */\nexport const DEFAULT_JUDGE_FITNESS_PROMPTS: readonly JudgeFitnessPrompt[] = [\n\t{ prompt: \"how should we plan the migration of the session storage layer?\", planning: true },\n\t{ prompt: \"design an approach for splitting the settings manager\", planning: true },\n\t{ prompt: \"draft a roadmap for the autonomy rework\", planning: true },\n\t{ prompt: \"what does the resolvePath function return?\", planning: false },\n\t{ prompt: \"list the files in the delegation module\", planning: false },\n\t{ prompt: \"why is this test flaky?\", planning: false },\n];\n\nexport interface ModelFitnessOptions {\n\tcomplete: FitnessComplete;\n\t/** Trials per lane surface. Default 3. */\n\ttrials?: number;\n\t/** Wall-clock budget per call in ms. Default 120000. */\n\tmaxWallClockMs?: number;\n\tjudgePrompts?: readonly JudgeFitnessPrompt[];\n\tsignal?: AbortSignal;\n\t/** Injected clock for latency measurement (test seam). Defaults to Date.now. */\n\tnow?: () => number;\n}\n\nexport interface LaneFitnessScore {\n\tsucceeded: number;\n\ttotal: number;\n\toutcomes: string[];\n\tmeanMs: number;\n\t/** Mean output tokens/second across the surface's calls; undefined when not reported. */\n\ttokensPerSecond?: number;\n}\n\nexport interface JudgeFitnessScore {\n\tparsed: number;\n\tplanningElevated: number;\n\tplanningTotal: number;\n\ttrivialCheap: number;\n\ttrivialTotal: number;\n\ttotal: number;\n\toutcomes: string[];\n\tmeanMs: number;\n\t/** Mean output tokens/second across the judge calls; undefined when not reported. */\n\ttokensPerSecond?: number;\n}\n\nexport interface ModelFitnessReport {\n\ttrials: number;\n\t/** Aggregate output tokens/second across ALL probe calls (the headline speed number). */\n\ttokensPerSecond?: number;\n\tresearch: LaneFitnessScore;\n\tworker: LaneFitnessScore;\n\tjudge: JudgeFitnessScore;\n\t/** Heavy-lifter surface: can the model formulate a structured search plan? */\n\tsearch: LaneFitnessScore;\n\t/** Heavy-lifter surface: can the model emit a well-formed tool call against a schema? */\n\ttoolCall: LaneFitnessScore;\n\ttotalCostUsd: number;\n}\n\n/** Static prompts for the heavy-lifter surfaces (stable for provider prompt caching). */\nexport const SEARCH_PROBE_SYSTEM_PROMPT = [\n\t\"You plan code searches for a coding agent. You never answer the question yourself.\",\n\t\"Given a question about a codebase, respond with STRICT JSON only - no prose:\",\n\t'{\"queries\":[{\"pattern\":\"<regex or literal to grep>\",\"glob\":\"<file glob like **/*.ts>\"}]}',\n\t\"Return 1 to 4 queries, most specific first.\",\n].join(\"\\n\");\n\nexport const TOOL_CALL_PROBE_SYSTEM_PROMPT = [\n\t\"You operate tools for a coding agent. You have exactly one tool:\",\n\t\"grep(pattern: string, path: string) - search files under a path for a pattern.\",\n\t\"Respond to every task with STRICT JSON only - no prose:\",\n\t'{\"tool\":\"grep\",\"arguments\":{\"pattern\":\"<pattern>\",\"path\":\"<path>\"}}',\n].join(\"\\n\");\n\nconst SEARCH_PROBE_TASKS: readonly string[] = [\n\t\"Where is the retry/backoff logic for HTTP requests implemented?\",\n\t\"Which files define the settings for background research?\",\n\t\"Find where session entries of type custom are appended.\",\n];\n\nconst TOOL_CALL_PROBE_TASKS: readonly string[] = [\n\t\"Find usages of the function resolveCliModel under src/.\",\n\t\"Search for the string 'budget_exhausted' in the core directory.\",\n\t\"Locate where LaneTracker is instantiated under src/core.\",\n];\n\nfunction parseSearchPlan(text: string): boolean {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return false;\n\tconst queries = (parsed as { queries?: unknown }).queries;\n\tif (!Array.isArray(queries) || queries.length === 0 || queries.length > 8) return false;\n\treturn queries.every(\n\t\t(query) =>\n\t\t\tquery &&\n\t\t\ttypeof query === \"object\" &&\n\t\t\ttypeof (query as { pattern?: unknown }).pattern === \"string\" &&\n\t\t\t(query as { pattern: string }).pattern.trim().length > 0,\n\t);\n}\n\nfunction parseToolCall(text: string): boolean {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return false;\n\tconst record = parsed as { tool?: unknown; arguments?: unknown };\n\tif (record.tool !== \"grep\") return false;\n\tconst args = record.arguments;\n\tif (!args || typeof args !== \"object\" || Array.isArray(args)) return false;\n\tconst pattern = (args as { pattern?: unknown }).pattern;\n\tconst path = (args as { path?: unknown }).path;\n\treturn (\n\t\ttypeof pattern === \"string\" && pattern.trim().length > 0 && typeof path === \"string\" && path.trim().length > 0\n\t);\n}\n\nfunction extractJsonObject(text: string): unknown | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\tfor (const candidate of candidates) {\n\t\ttry {\n\t\t\tconst parsed = JSON.parse(candidate);\n\t\t\tif (parsed && typeof parsed === \"object\" && !Array.isArray(parsed)) return parsed;\n\t\t} catch {\n\t\t\t// try next candidate\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction fitnessEnvelope(): CapabilityEnvelope {\n\treturn {\n\t\tid: \"model-fitness-probe\",\n\t\tcapabilities: [\"research\", \"read_files\", \"memory_read\"],\n\t\tmaxEstimatedUsd: 1,\n\t\tcreatedAt: new Date().toISOString(),\n\t};\n}\n\nexport async function runModelFitnessProbe(options: ModelFitnessOptions): Promise<ModelFitnessReport> {\n\tconst trials = Math.max(1, Math.min(options.trials ?? 3, 20));\n\tconst maxWallClockMs = options.maxWallClockMs ?? 120_000;\n\tconst judgePrompts = options.judgePrompts ?? DEFAULT_JUDGE_FITNESS_PROMPTS;\n\tconst now = options.now ?? Date.now;\n\tlet totalCostUsd = 0;\n\n\t// Token-speed instrumentation: the lane runners' own contracts carry text/cost only, so the\n\t// completer is wrapped once here and generation stats are accumulated per surface.\n\tconst overallSpeed = { tokens: 0, evalMs: 0 };\n\tlet surfaceSpeed = { tokens: 0, evalMs: 0 };\n\tconst complete: FitnessComplete = async (args) => {\n\t\tconst completion = await options.complete(args);\n\t\tconst tokens = completion.outputTokens ?? 0;\n\t\tconst evalMs = completion.evalMs ?? 0;\n\t\tif (tokens > 0 && evalMs > 0) {\n\t\t\tsurfaceSpeed.tokens += tokens;\n\t\t\tsurfaceSpeed.evalMs += evalMs;\n\t\t\toverallSpeed.tokens += tokens;\n\t\t\toverallSpeed.evalMs += evalMs;\n\t\t}\n\t\treturn completion;\n\t};\n\tconst takeSurfaceSpeed = (): number | undefined => {\n\t\tconst speed =\n\t\t\tsurfaceSpeed.evalMs > 0 ? Math.round((surfaceSpeed.tokens / surfaceSpeed.evalMs) * 1000) : undefined;\n\t\tsurfaceSpeed = { tokens: 0, evalMs: 0 };\n\t\treturn speed;\n\t};\n\n\tconst research: LaneFitnessScore = { succeeded: 0, total: trials, outcomes: [], meanMs: 0 };\n\tfor (let i = 0; i < trials; i++) {\n\t\tconst started = now();\n\t\tconst result = await runResearch({\n\t\t\tquery: `fitness:probe requirements:req-${i}`,\n\t\t\tcontext: [\n\t\t\t\t\"Goal: add a retry helper to the HTTP client module\",\n\t\t\t\t\"Open requirements:\",\n\t\t\t\t\"- Find what retry/backoff conventions the codebase already uses\",\n\t\t\t\t\"- Identify which call sites would adopt the helper\",\n\t\t\t].join(\"\\n\"),\n\t\t\tenvelope: fitnessEnvelope(),\n\t\t\tmaxUsd: 1,\n\t\t\tmaxSources: 8,\n\t\t\tmaxFindings: 5,\n\t\t\tmaxWallClockMs,\n\t\t\tsignal: options.signal,\n\t\t\tcomplete,\n\t\t});\n\t\tresearch.meanMs += now() - started;\n\t\ttotalCostUsd += result.costUsd;\n\t\tif (result.status === \"succeeded\") research.succeeded++;\n\t\tresearch.outcomes.push(`${result.status}/${result.reasonCode}`);\n\t}\n\tresearch.meanMs = Math.round(research.meanMs / trials);\n\tresearch.tokensPerSecond = takeSurfaceSpeed();\n\n\tconst worker: LaneFitnessScore = { succeeded: 0, total: trials, outcomes: [], meanMs: 0 };\n\tfor (let i = 0; i < trials; i++) {\n\t\tconst started = now();\n\t\tconst outcome = await runWorker({\n\t\t\trequest: {\n\t\t\t\tid: `fitness-worker-${i}`,\n\t\t\t\tinstructions:\n\t\t\t\t\t\"Summarize in two sentences what a capability envelope is: a declared set of allowed tools, paths, and capability names that bounds what a delegated worker may do.\",\n\t\t\t\troute: { tier: \"cheap\", risk: \"read-only\", confidence: 1, reasonCode: \"fitness_probe\", reasons: [] },\n\t\t\t\tenvelope: { id: `fitness-env-${i}`, capabilities: [\"read_files\"], maxEstimatedUsd: 1 },\n\t\t\t\tmaxEstimatedUsd: 1,\n\t\t\t},\n\t\t\tmaxUsd: 1,\n\t\t\tmaxWallClockMs,\n\t\t\tusageReportId: `fitness:${i}`,\n\t\t\tsignal: options.signal,\n\t\t\tcomplete,\n\t\t});\n\t\tworker.meanMs += now() - started;\n\t\ttotalCostUsd += outcome.costUsd;\n\t\tif (outcome.result.status === \"completed\" && outcome.accepted) worker.succeeded++;\n\t\tworker.outcomes.push(`${outcome.result.status}/${outcome.reasonCode}`);\n\t}\n\tworker.meanMs = Math.round(worker.meanMs / trials);\n\tworker.tokensPerSecond = takeSurfaceSpeed();\n\n\tconst judge: JudgeFitnessScore = {\n\t\tparsed: 0,\n\t\tplanningElevated: 0,\n\t\tplanningTotal: judgePrompts.filter((entry) => entry.planning).length,\n\t\ttrivialCheap: 0,\n\t\ttrivialTotal: judgePrompts.filter((entry) => !entry.planning).length,\n\t\ttotal: judgePrompts.length,\n\t\toutcomes: [],\n\t\tmeanMs: 0,\n\t};\n\tfor (const entry of judgePrompts) {\n\t\tconst started = now();\n\t\tconst result = await runRouteJudge({\n\t\t\tprompt: entry.prompt,\n\t\t\tbaseline: { tier: \"cheap\", risk: \"read-only\", confidence: 0.5, reasonCode: \"fitness_probe\", reasons: [] },\n\t\t\tmaxWallClockMs,\n\t\t\tsignal: options.signal,\n\t\t\tcomplete,\n\t\t});\n\t\tjudge.meanMs += now() - started;\n\t\ttotalCostUsd += result.costUsd;\n\t\tconst tier = result.decision.tier;\n\t\tif (result.verdict) {\n\t\t\tjudge.parsed++;\n\t\t\t// A useful judge must both keep planning off the cheap tier AND actually send trivial\n\t\t\t// prompts there — all-medium verdicts are safe but save nothing.\n\t\t\tif (entry.planning && tier !== \"cheap\") judge.planningElevated++;\n\t\t\tif (!entry.planning && tier === \"cheap\") judge.trivialCheap++;\n\t\t}\n\t\tjudge.outcomes.push(\n\t\t\t`\"${entry.prompt.slice(0, 40)}\" -> ${tier}${result.fallbackReason ? ` (${result.fallbackReason})` : \"\"}`,\n\t\t);\n\t}\n\tjudge.meanMs = judgePrompts.length > 0 ? Math.round(judge.meanMs / judgePrompts.length) : 0;\n\tjudge.tokensPerSecond = takeSurfaceSpeed();\n\n\tconst probeSurface = async (\n\t\tsystemPrompt: string,\n\t\ttasks: readonly string[],\n\t\taccepts: (text: string) => boolean,\n\t): Promise<LaneFitnessScore> => {\n\t\tconst score: LaneFitnessScore = { succeeded: 0, total: tasks.length, outcomes: [], meanMs: 0 };\n\t\tfor (const task of tasks) {\n\t\t\tconst started = now();\n\t\t\t// Same wall-clock envelope as the lane surfaces — a hung model must not hang the probe.\n\t\t\tconst bounded = await runBoundedCompletion({\n\t\t\t\tmaxWallClockMs,\n\t\t\t\tsignal: options.signal,\n\t\t\t\texecute: (signal) => complete({ systemPrompt, userPrompt: task, signal }),\n\t\t\t});\n\t\t\tif (bounded.completion) totalCostUsd += bounded.completion.costUsd;\n\t\t\tif (bounded.failure || !bounded.completion) {\n\t\t\t\tscore.outcomes.push(bounded.failure ? bounded.failure.status : \"completion_error\");\n\t\t\t} else {\n\t\t\t\tconst ok = accepts(bounded.completion.text);\n\t\t\t\tif (ok) score.succeeded++;\n\t\t\t\tscore.outcomes.push(ok ? \"ok\" : \"unparseable_output\");\n\t\t\t}\n\t\t\tscore.meanMs += now() - started;\n\t\t}\n\t\tscore.meanMs = tasks.length > 0 ? Math.round(score.meanMs / tasks.length) : 0;\n\t\treturn score;\n\t};\n\n\tconst search = await probeSurface(SEARCH_PROBE_SYSTEM_PROMPT, SEARCH_PROBE_TASKS, parseSearchPlan);\n\tsearch.tokensPerSecond = takeSurfaceSpeed();\n\tconst toolCall = await probeSurface(TOOL_CALL_PROBE_SYSTEM_PROMPT, TOOL_CALL_PROBE_TASKS, parseToolCall);\n\ttoolCall.tokensPerSecond = takeSurfaceSpeed();\n\n\tconst tokensPerSecond =\n\t\toverallSpeed.evalMs > 0 ? Math.round((overallSpeed.tokens / overallSpeed.evalMs) * 1000) : undefined;\n\n\treturn { trials, tokensPerSecond, research, worker, judge, search, toolCall, totalCostUsd };\n}\n\n/** Compact human-readable report for tool output / interactive display. Bounded, no raw dumps. */\nexport function formatModelFitnessReport(model: string, report: ModelFitnessReport): string {\n\tconst speed = (tokensPerSecond: number | undefined) =>\n\t\ttokensPerSecond !== undefined ? `, ~${tokensPerSecond} tok/s` : \"\";\n\tconst lines = [\n\t\t`Model fitness: ${model} (${report.trials} trials/lane${speed(report.tokensPerSecond)})`,\n\t\t`- research lane: ${report.research.succeeded}/${report.research.total} succeeded, mean ${report.research.meanMs}ms${speed(report.research.tokensPerSecond)} [${report.research.outcomes.join(\", \")}]`,\n\t\t`- worker lane: ${report.worker.succeeded}/${report.worker.total} completed+accepted, mean ${report.worker.meanMs}ms${speed(report.worker.tokensPerSecond)} [${report.worker.outcomes.join(\", \")}]`,\n\t\t`- search plans: ${report.search.succeeded}/${report.search.total} well-formed, mean ${report.search.meanMs}ms${speed(report.search.tokensPerSecond)}`,\n\t\t`- tool calls: ${report.toolCall.succeeded}/${report.toolCall.total} well-formed, mean ${report.toolCall.meanMs}ms${speed(report.toolCall.tokensPerSecond)}`,\n\t\t`- route judge: parsed ${report.judge.parsed}/${report.judge.total}, planning-elevated ${report.judge.planningElevated}/${report.judge.planningTotal}, trivial-cheap ${report.judge.trivialCheap}/${report.judge.trivialTotal}, mean ${report.judge.meanMs}ms${speed(report.judge.tokensPerSecond)}`,\n\t\t...report.judge.outcomes.map((outcome) => ` ${outcome}`),\n\t];\n\tif (report.totalCostUsd > 0) {\n\t\tlines.push(`- probe cost: $${report.totalCostUsd.toFixed(4)}`);\n\t}\n\treturn lines.join(\"\\n\");\n}\n"]}
@@ -0,0 +1,11 @@
1
+ import type { CapabilityEnvelope, EvidenceSourceKind, GateOutcome } from "../autonomy/contracts.ts";
2
+ interface ResearchRequestArgs {
3
+ envelope?: CapabilityEnvelope | null;
4
+ sourceKind: EvidenceSourceKind | string;
5
+ estimatedUsd: number;
6
+ maxEstimatedUsd?: number;
7
+ privateHistoryAllowed?: boolean;
8
+ }
9
+ export declare function evaluateResearchRequest(args: ResearchRequestArgs): GateOutcome;
10
+ export {};
11
+ //# sourceMappingURL=research-gate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"research-gate.d.ts","sourceRoot":"","sources":["../../../src/core/research/research-gate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAkB,kBAAkB,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAEpH,UAAU,mBAAmB;IAC5B,QAAQ,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;IACrC,UAAU,EAAE,kBAAkB,GAAG,MAAM,CAAC;IACxC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,qBAAqB,CAAC,EAAE,OAAO,CAAC;CAChC;AAmBD,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,mBAAmB,GAAG,WAAW,CAyE9E","sourcesContent":["import type { CapabilityEnvelope, CapabilityName, EvidenceSourceKind, GateOutcome } from \"../autonomy/contracts.ts\";\n\ninterface ResearchRequestArgs {\n\tenvelope?: CapabilityEnvelope | null;\n\tsourceKind: EvidenceSourceKind | string;\n\testimatedUsd: number;\n\tmaxEstimatedUsd?: number;\n\tprivateHistoryAllowed?: boolean;\n}\n\nfunction missingCapabilityOutcome(sourceKind: string, capabilities: readonly CapabilityName[]): GateOutcome {\n\treturn {\n\t\toutcome: \"block\",\n\t\tgate: \"research_gate\",\n\t\treasonCode: \"missing_capability\",\n\t\tmessage: `Source kind '${sourceKind}' requires capability ${capabilities.join(\" or \")}.`,\n\t};\n}\n\nfunction hasAnyCapability(envelope: CapabilityEnvelope, capabilities: readonly CapabilityName[]): boolean {\n\treturn capabilities.some((capability) => envelope.capabilities.includes(capability));\n}\n\nfunction isWellFormedEnvelope(value: CapabilityEnvelope | null | undefined): value is CapabilityEnvelope {\n\treturn Boolean(value) && typeof value?.id === \"string\" && Array.isArray(value.capabilities);\n}\n\nexport function evaluateResearchRequest(args: ResearchRequestArgs): GateOutcome {\n\tif (!isWellFormedEnvelope(args.envelope)) {\n\t\treturn {\n\t\t\toutcome: \"block\",\n\t\t\tgate: \"research_gate\",\n\t\t\treasonCode: \"missing_envelope\",\n\t\t\tmessage: \"Missing or malformed capability envelope.\",\n\t\t};\n\t}\n\n\tconst { envelope, sourceKind, estimatedUsd, maxEstimatedUsd, privateHistoryAllowed } = args;\n\tconst limit = maxEstimatedUsd ?? envelope.maxEstimatedUsd;\n\tif (limit !== undefined && estimatedUsd > limit) {\n\t\treturn {\n\t\t\toutcome: \"ask-user\",\n\t\t\tgate: \"research_gate\",\n\t\t\treasonCode: \"over_budget\",\n\t\t\tmessage: `Estimated cost (${estimatedUsd}) exceeds maximum allowed (${limit}).`,\n\t\t};\n\t}\n\n\tswitch (sourceKind) {\n\t\tcase \"workspace\":\n\t\tcase \"tool\":\n\t\tcase \"user\": {\n\t\t\tconst requiredCapabilities: readonly CapabilityName[] = [\"read_files\", \"research\"];\n\t\t\tif (!hasAnyCapability(envelope, requiredCapabilities)) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, requiredCapabilities);\n\t\t\t}\n\t\t\tbreak;\n\t\t}\n\n\t\tcase \"transcript\":\n\t\t\tif (!hasAnyCapability(envelope, [\"memory_read\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"memory_read\"]);\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase \"automata\":\n\t\t\tif (!hasAnyCapability(envelope, [\"memory_read\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"memory_read\"]);\n\t\t\t}\n\t\t\tif (!privateHistoryAllowed) {\n\t\t\t\treturn {\n\t\t\t\t\toutcome: \"ask-user\",\n\t\t\t\t\tgate: \"research_gate\",\n\t\t\t\t\treasonCode: \"private_history_denied\",\n\t\t\t\t\tmessage: \"Automata source requires privateHistoryAllowed=true.\",\n\t\t\t\t};\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase \"web\":\n\t\t\tif (!hasAnyCapability(envelope, [\"network\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"network\"]);\n\t\t\t}\n\t\t\tbreak;\n\n\t\tdefault:\n\t\t\treturn {\n\t\t\t\toutcome: \"block\",\n\t\t\t\tgate: \"research_gate\",\n\t\t\t\treasonCode: \"unknown_source_kind\",\n\t\t\t\tmessage: `Unknown source kind '${sourceKind}'.`,\n\t\t\t};\n\t}\n\n\treturn {\n\t\toutcome: \"allow\",\n\t\tgate: \"research_gate\",\n\t\treasonCode: \"allowed\",\n\t\tmessage: \"Research request allowed.\",\n\t};\n}\n"]}
@@ -0,0 +1,82 @@
1
+ function missingCapabilityOutcome(sourceKind, capabilities) {
2
+ return {
3
+ outcome: "block",
4
+ gate: "research_gate",
5
+ reasonCode: "missing_capability",
6
+ message: `Source kind '${sourceKind}' requires capability ${capabilities.join(" or ")}.`,
7
+ };
8
+ }
9
+ function hasAnyCapability(envelope, capabilities) {
10
+ return capabilities.some((capability) => envelope.capabilities.includes(capability));
11
+ }
12
+ function isWellFormedEnvelope(value) {
13
+ return Boolean(value) && typeof value?.id === "string" && Array.isArray(value.capabilities);
14
+ }
15
+ export function evaluateResearchRequest(args) {
16
+ if (!isWellFormedEnvelope(args.envelope)) {
17
+ return {
18
+ outcome: "block",
19
+ gate: "research_gate",
20
+ reasonCode: "missing_envelope",
21
+ message: "Missing or malformed capability envelope.",
22
+ };
23
+ }
24
+ const { envelope, sourceKind, estimatedUsd, maxEstimatedUsd, privateHistoryAllowed } = args;
25
+ const limit = maxEstimatedUsd ?? envelope.maxEstimatedUsd;
26
+ if (limit !== undefined && estimatedUsd > limit) {
27
+ return {
28
+ outcome: "ask-user",
29
+ gate: "research_gate",
30
+ reasonCode: "over_budget",
31
+ message: `Estimated cost (${estimatedUsd}) exceeds maximum allowed (${limit}).`,
32
+ };
33
+ }
34
+ switch (sourceKind) {
35
+ case "workspace":
36
+ case "tool":
37
+ case "user": {
38
+ const requiredCapabilities = ["read_files", "research"];
39
+ if (!hasAnyCapability(envelope, requiredCapabilities)) {
40
+ return missingCapabilityOutcome(sourceKind, requiredCapabilities);
41
+ }
42
+ break;
43
+ }
44
+ case "transcript":
45
+ if (!hasAnyCapability(envelope, ["memory_read"])) {
46
+ return missingCapabilityOutcome(sourceKind, ["memory_read"]);
47
+ }
48
+ break;
49
+ case "automata":
50
+ if (!hasAnyCapability(envelope, ["memory_read"])) {
51
+ return missingCapabilityOutcome(sourceKind, ["memory_read"]);
52
+ }
53
+ if (!privateHistoryAllowed) {
54
+ return {
55
+ outcome: "ask-user",
56
+ gate: "research_gate",
57
+ reasonCode: "private_history_denied",
58
+ message: "Automata source requires privateHistoryAllowed=true.",
59
+ };
60
+ }
61
+ break;
62
+ case "web":
63
+ if (!hasAnyCapability(envelope, ["network"])) {
64
+ return missingCapabilityOutcome(sourceKind, ["network"]);
65
+ }
66
+ break;
67
+ default:
68
+ return {
69
+ outcome: "block",
70
+ gate: "research_gate",
71
+ reasonCode: "unknown_source_kind",
72
+ message: `Unknown source kind '${sourceKind}'.`,
73
+ };
74
+ }
75
+ return {
76
+ outcome: "allow",
77
+ gate: "research_gate",
78
+ reasonCode: "allowed",
79
+ message: "Research request allowed.",
80
+ };
81
+ }
82
+ //# sourceMappingURL=research-gate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"research-gate.js","sourceRoot":"","sources":["../../../src/core/research/research-gate.ts"],"names":[],"mappings":"AAUA,SAAS,wBAAwB,CAAC,UAAkB,EAAE,YAAuC,EAAe;IAC3G,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,IAAI,EAAE,eAAe;QACrB,UAAU,EAAE,oBAAoB;QAChC,OAAO,EAAE,gBAAgB,UAAU,yBAAyB,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG;KACxF,CAAC;AAAA,CACF;AAED,SAAS,gBAAgB,CAAC,QAA4B,EAAE,YAAuC,EAAW;IACzG,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;AAAA,CACrF;AAED,SAAS,oBAAoB,CAAC,KAA4C,EAA+B;IACxG,OAAO,OAAO,CAAC,KAAK,CAAC,IAAI,OAAO,KAAK,EAAE,EAAE,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;AAAA,CAC5F;AAED,MAAM,UAAU,uBAAuB,CAAC,IAAyB,EAAe;IAC/E,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1C,OAAO;YACN,OAAO,EAAE,OAAO;YAChB,IAAI,EAAE,eAAe;YACrB,UAAU,EAAE,kBAAkB;YAC9B,OAAO,EAAE,2CAA2C;SACpD,CAAC;IACH,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,YAAY,EAAE,eAAe,EAAE,qBAAqB,EAAE,GAAG,IAAI,CAAC;IAC5F,MAAM,KAAK,GAAG,eAAe,IAAI,QAAQ,CAAC,eAAe,CAAC;IAC1D,IAAI,KAAK,KAAK,SAAS,IAAI,YAAY,GAAG,KAAK,EAAE,CAAC;QACjD,OAAO;YACN,OAAO,EAAE,UAAU;YACnB,IAAI,EAAE,eAAe;YACrB,UAAU,EAAE,aAAa;YACzB,OAAO,EAAE,mBAAmB,YAAY,8BAA8B,KAAK,IAAI;SAC/E,CAAC;IACH,CAAC;IAED,QAAQ,UAAU,EAAE,CAAC;QACpB,KAAK,WAAW,CAAC;QACjB,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM,EAAE,CAAC;YACb,MAAM,oBAAoB,GAA8B,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YACnF,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,oBAAoB,CAAC,EAAE,CAAC;gBACvD,OAAO,wBAAwB,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAC;YACnE,CAAC;YACD,MAAM;QACP,CAAC;QAED,KAAK,YAAY;YAChB,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;gBAClD,OAAO,wBAAwB,CAAC,UAAU,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC;YAC9D,CAAC;YACD,MAAM;QAEP,KAAK,UAAU;YACd,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;gBAClD,OAAO,wBAAwB,CAAC,UAAU,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC;YAC9D,CAAC;YACD,IAAI,CAAC,qBAAqB,EAAE,CAAC;gBAC5B,OAAO;oBACN,OAAO,EAAE,UAAU;oBACnB,IAAI,EAAE,eAAe;oBACrB,UAAU,EAAE,wBAAwB;oBACpC,OAAO,EAAE,sDAAsD;iBAC/D,CAAC;YACH,CAAC;YACD,MAAM;QAEP,KAAK,KAAK;YACT,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;gBAC9C,OAAO,wBAAwB,CAAC,UAAU,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1D,CAAC;YACD,MAAM;QAEP;YACC,OAAO;gBACN,OAAO,EAAE,OAAO;gBAChB,IAAI,EAAE,eAAe;gBACrB,UAAU,EAAE,qBAAqB;gBACjC,OAAO,EAAE,wBAAwB,UAAU,IAAI;aAC/C,CAAC;IACJ,CAAC;IAED,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,IAAI,EAAE,eAAe;QACrB,UAAU,EAAE,SAAS;QACrB,OAAO,EAAE,2BAA2B;KACpC,CAAC;AAAA,CACF","sourcesContent":["import type { CapabilityEnvelope, CapabilityName, EvidenceSourceKind, GateOutcome } from \"../autonomy/contracts.ts\";\n\ninterface ResearchRequestArgs {\n\tenvelope?: CapabilityEnvelope | null;\n\tsourceKind: EvidenceSourceKind | string;\n\testimatedUsd: number;\n\tmaxEstimatedUsd?: number;\n\tprivateHistoryAllowed?: boolean;\n}\n\nfunction missingCapabilityOutcome(sourceKind: string, capabilities: readonly CapabilityName[]): GateOutcome {\n\treturn {\n\t\toutcome: \"block\",\n\t\tgate: \"research_gate\",\n\t\treasonCode: \"missing_capability\",\n\t\tmessage: `Source kind '${sourceKind}' requires capability ${capabilities.join(\" or \")}.`,\n\t};\n}\n\nfunction hasAnyCapability(envelope: CapabilityEnvelope, capabilities: readonly CapabilityName[]): boolean {\n\treturn capabilities.some((capability) => envelope.capabilities.includes(capability));\n}\n\nfunction isWellFormedEnvelope(value: CapabilityEnvelope | null | undefined): value is CapabilityEnvelope {\n\treturn Boolean(value) && typeof value?.id === \"string\" && Array.isArray(value.capabilities);\n}\n\nexport function evaluateResearchRequest(args: ResearchRequestArgs): GateOutcome {\n\tif (!isWellFormedEnvelope(args.envelope)) {\n\t\treturn {\n\t\t\toutcome: \"block\",\n\t\t\tgate: \"research_gate\",\n\t\t\treasonCode: \"missing_envelope\",\n\t\t\tmessage: \"Missing or malformed capability envelope.\",\n\t\t};\n\t}\n\n\tconst { envelope, sourceKind, estimatedUsd, maxEstimatedUsd, privateHistoryAllowed } = args;\n\tconst limit = maxEstimatedUsd ?? envelope.maxEstimatedUsd;\n\tif (limit !== undefined && estimatedUsd > limit) {\n\t\treturn {\n\t\t\toutcome: \"ask-user\",\n\t\t\tgate: \"research_gate\",\n\t\t\treasonCode: \"over_budget\",\n\t\t\tmessage: `Estimated cost (${estimatedUsd}) exceeds maximum allowed (${limit}).`,\n\t\t};\n\t}\n\n\tswitch (sourceKind) {\n\t\tcase \"workspace\":\n\t\tcase \"tool\":\n\t\tcase \"user\": {\n\t\t\tconst requiredCapabilities: readonly CapabilityName[] = [\"read_files\", \"research\"];\n\t\t\tif (!hasAnyCapability(envelope, requiredCapabilities)) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, requiredCapabilities);\n\t\t\t}\n\t\t\tbreak;\n\t\t}\n\n\t\tcase \"transcript\":\n\t\t\tif (!hasAnyCapability(envelope, [\"memory_read\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"memory_read\"]);\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase \"automata\":\n\t\t\tif (!hasAnyCapability(envelope, [\"memory_read\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"memory_read\"]);\n\t\t\t}\n\t\t\tif (!privateHistoryAllowed) {\n\t\t\t\treturn {\n\t\t\t\t\toutcome: \"ask-user\",\n\t\t\t\t\tgate: \"research_gate\",\n\t\t\t\t\treasonCode: \"private_history_denied\",\n\t\t\t\t\tmessage: \"Automata source requires privateHistoryAllowed=true.\",\n\t\t\t\t};\n\t\t\t}\n\t\t\tbreak;\n\n\t\tcase \"web\":\n\t\t\tif (!hasAnyCapability(envelope, [\"network\"])) {\n\t\t\t\treturn missingCapabilityOutcome(sourceKind, [\"network\"]);\n\t\t\t}\n\t\t\tbreak;\n\n\t\tdefault:\n\t\t\treturn {\n\t\t\t\toutcome: \"block\",\n\t\t\t\tgate: \"research_gate\",\n\t\t\t\treasonCode: \"unknown_source_kind\",\n\t\t\t\tmessage: `Unknown source kind '${sourceKind}'.`,\n\t\t\t};\n\t}\n\n\treturn {\n\t\toutcome: \"allow\",\n\t\tgate: \"research_gate\",\n\t\treasonCode: \"allowed\",\n\t\tmessage: \"Research request allowed.\",\n\t};\n}\n"]}
@@ -0,0 +1,59 @@
1
+ import type { CapabilityEnvelope, EvidenceBundle, GateOutcome } from "../autonomy/contracts.ts";
2
+ /**
3
+ * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->
4
+ * parse -> evidence bundle. The model executor is injected so this stays provider-free and
5
+ * session-free; production wires `AgentSession.runIsolatedCompletion` in.
6
+ *
7
+ * The lane is read-only by construction: the executor receives text prompts only, and the output
8
+ * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.
9
+ */
10
+ /** Static across calls so callers can use `cacheRetention: "short"` and only pay for the variable tail. */
11
+ export declare const RESEARCH_LANE_SYSTEM_PROMPT: string;
12
+ export interface ResearchCompletion {
13
+ text: string;
14
+ costUsd: number;
15
+ stopReason: string;
16
+ }
17
+ export interface ResearchRunnerOptions {
18
+ query: string;
19
+ /** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */
20
+ context?: string;
21
+ /** Stripped research envelope - never the foreground/architect envelope. */
22
+ envelope: CapabilityEnvelope;
23
+ /** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */
24
+ maxUsd: number;
25
+ maxSources: number;
26
+ maxFindings: number;
27
+ /** Wall-clock budget in milliseconds; 0 disables. */
28
+ maxWallClockMs: number;
29
+ /** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */
30
+ complete: (args: {
31
+ systemPrompt: string;
32
+ userPrompt: string;
33
+ signal?: AbortSignal;
34
+ }) => Promise<ResearchCompletion>;
35
+ /** External cancellation (e.g. session disposal). */
36
+ signal?: AbortSignal;
37
+ }
38
+ export type ResearchRunStatus = "succeeded" | "failed" | "canceled" | "timeout" | "budget_exhausted";
39
+ export interface ResearchRunResult {
40
+ status: ResearchRunStatus;
41
+ reasonCode: string;
42
+ gateOutcome: GateOutcome;
43
+ bundle?: EvidenceBundle;
44
+ costUsd: number;
45
+ }
46
+ export declare function buildResearchUserPrompt(args: {
47
+ query: string;
48
+ context?: string;
49
+ maxFindings: number;
50
+ }): string;
51
+ export interface ParsedResearchFindings {
52
+ findings: Array<{
53
+ summary: string;
54
+ confidence?: number;
55
+ }>;
56
+ }
57
+ export declare function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined;
58
+ export declare function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult>;
59
+ //# sourceMappingURL=research-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"research-runner.d.ts","sourceRoot":"","sources":["../../../src/core/research/research-runner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,kBAAkB,EAAE,cAAc,EAAwB,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAItH;;;;;;;GAOG;AAEH,2GAA2G;AAC3G,eAAO,MAAM,2BAA2B,QAM5B,CAAC;AAEb,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,iGAAiG;IACjG,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,oGAAoG;IACpG,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,cAAc,EAAE,MAAM,CAAC;IACvB,wFAAwF;IACxF,QAAQ,EAAE,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAE,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACpH,qDAAqD;IACrD,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,MAAM,iBAAiB,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,GAAG,kBAAkB,CAAC;AAErG,MAAM,WAAW,iBAAiB;IACjC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,CAAC,EAAE,cAAc,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAO9G;AAED,MAAM,WAAW,sBAAsB;IACtC,QAAQ,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC1D;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,sBAAsB,GAAG,SAAS,CAwC3G;AA+BD,wBAAsB,WAAW,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAwD5F","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\nimport type { CapabilityEnvelope, EvidenceBundle, EvidenceRef, Finding, GateOutcome } from \"../autonomy/contracts.ts\";\nimport { createEvidenceBundle } from \"./evidence-bundle.ts\";\nimport { evaluateResearchRequest } from \"./research-gate.ts\";\n\n/**\n * Pure orchestration for one autonomous research pass: gate -> bounded isolated completion ->\n * parse -> evidence bundle. The model executor is injected so this stays provider-free and\n * session-free; production wires `AgentSession.runIsolatedCompletion` in.\n *\n * The lane is read-only by construction: the executor receives text prompts only, and the output\n * is an `EvidenceBundle` whose model-synthesized findings are marked untrusted.\n */\n\n/** Static across calls so callers can use `cacheRetention: \"short\"` and only pay for the variable tail. */\nexport const RESEARCH_LANE_SYSTEM_PROMPT = [\n\t\"You are a read-only research lane for a coding agent.\",\n\t\"You receive a research query plus bounded context and produce findings that help satisfy open goal requirements.\",\n\t\"Respond with STRICT JSON only - no prose, no markdown fences:\",\n\t'{\"findings\":[{\"summary\":\"<one concrete, actionable finding>\",\"confidence\":<0..1>}]}',\n\t\"Base findings only on the provided context. Never invent file paths, APIs, or facts.\",\n].join(\"\\n\");\n\nexport interface ResearchCompletion {\n\ttext: string;\n\tcostUsd: number;\n\tstopReason: string;\n}\n\nexport interface ResearchRunnerOptions {\n\tquery: string;\n\t/** Bounded, pre-redacted context handed to the research model (goal text, open requirements). */\n\tcontext?: string;\n\t/** Stripped research envelope - never the foreground/architect envelope. */\n\tenvelope: CapabilityEnvelope;\n\t/** Budget for this pass; a post-hoc breach marks the run budget_exhausted (spend stays visible). */\n\tmaxUsd: number;\n\tmaxSources: number;\n\tmaxFindings: number;\n\t/** Wall-clock budget in milliseconds; 0 disables. */\n\tmaxWallClockMs: number;\n\t/** Executes one isolated completion. Production: AgentSession.runIsolatedCompletion. */\n\tcomplete: (args: { systemPrompt: string; userPrompt: string; signal?: AbortSignal }) => Promise<ResearchCompletion>;\n\t/** External cancellation (e.g. session disposal). */\n\tsignal?: AbortSignal;\n}\n\nexport type ResearchRunStatus = \"succeeded\" | \"failed\" | \"canceled\" | \"timeout\" | \"budget_exhausted\";\n\nexport interface ResearchRunResult {\n\tstatus: ResearchRunStatus;\n\treasonCode: string;\n\tgateOutcome: GateOutcome;\n\tbundle?: EvidenceBundle;\n\tcostUsd: number;\n}\n\nexport function buildResearchUserPrompt(args: { query: string; context?: string; maxFindings: number }): string {\n\tconst parts = [`Research query: ${args.query}`];\n\tif (args.context && args.context.length > 0) {\n\t\tparts.push(\"\", \"Context:\", args.context);\n\t}\n\tparts.push(\"\", `Return at most ${args.maxFindings} findings.`);\n\treturn parts.join(\"\\n\");\n}\n\nexport interface ParsedResearchFindings {\n\tfindings: Array<{ summary: string; confidence?: number }>;\n}\n\nexport function parseResearchFindings(text: string, maxFindings: number): ParsedResearchFindings | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\n\tfor (const candidate of candidates) {\n\t\tlet parsed: unknown;\n\t\ttry {\n\t\t\tparsed = JSON.parse(candidate);\n\t\t} catch {\n\t\t\tcontinue;\n\t\t}\n\t\tif (!parsed || typeof parsed !== \"object\" || Array.isArray(parsed)) continue;\n\t\tconst findingsRaw = (parsed as { findings?: unknown }).findings;\n\t\tif (!Array.isArray(findingsRaw)) continue;\n\n\t\tconst findings: Array<{ summary: string; confidence?: number }> = [];\n\t\tfor (const item of findingsRaw) {\n\t\t\tif (!item || typeof item !== \"object\" || Array.isArray(item)) continue;\n\t\t\tconst summary = (item as { summary?: unknown }).summary;\n\t\t\tif (typeof summary !== \"string\" || summary.trim().length === 0) continue;\n\t\t\tconst confidenceRaw = (item as { confidence?: unknown }).confidence;\n\t\t\tconst confidence =\n\t\t\t\ttypeof confidenceRaw === \"number\" && Number.isFinite(confidenceRaw)\n\t\t\t\t\t? Math.min(Math.max(confidenceRaw, 0), 1)\n\t\t\t\t\t: undefined;\n\t\t\tfindings.push({ summary: summary.trim(), confidence });\n\t\t\tif (findings.length >= maxFindings) break;\n\t\t}\n\t\t// A well-formed-but-empty findings array is a valid \"nothing found\"; a findings array whose\n\t\t// every item is malformed is not.\n\t\tif (findings.length > 0 || findingsRaw.length === 0) {\n\t\t\treturn { findings };\n\t\t}\n\t}\n\treturn undefined;\n}\n\nfunction truncateExcerpt(text: string, maxChars: number): string {\n\tif (text.length <= maxChars) return text;\n\treturn `${text.slice(0, Math.max(0, maxChars - 1))}…`;\n}\n\nfunction buildBundle(options: ResearchRunnerOptions, parsed: ParsedResearchFindings): EvidenceBundle {\n\tconst contextRef: EvidenceRef = {\n\t\tid: \"src-context\",\n\t\tkind: \"user\",\n\t\ttitle: \"Goal/context provided to the research lane\",\n\t\ttrusted: true,\n\t\texcerpt: truncateExcerpt(options.context && options.context.length > 0 ? options.context : options.query, 2000),\n\t};\n\tconst synthesisRef: EvidenceRef = {\n\t\tid: \"src-synthesis\",\n\t\tkind: \"tool\",\n\t\ttitle: \"Research-model synthesis\",\n\t\ttrusted: false,\n\t};\n\tconst sources = [contextRef, synthesisRef].slice(0, Math.max(1, options.maxSources));\n\tconst findings: Finding[] = parsed.findings.slice(0, options.maxFindings).map((finding, index) => ({\n\t\tid: `finding-${index + 1}`,\n\t\tsummary: finding.summary,\n\t\tevidenceIds: [synthesisRef.id],\n\t\t...(finding.confidence !== undefined ? { confidence: finding.confidence } : {}),\n\t}));\n\treturn createEvidenceBundle({ query: options.query, sources, findings });\n}\n\nexport async function runResearch(options: ResearchRunnerOptions): Promise<ResearchRunResult> {\n\tconst gateOutcome = evaluateResearchRequest({\n\t\tenvelope: options.envelope,\n\t\tsourceKind: \"tool\",\n\t\testimatedUsd: options.maxUsd,\n\t});\n\tif (gateOutcome.outcome !== \"allow\") {\n\t\t// Skip-and-record, never prompt: gate denials inform diagnostics instead of blocking anything.\n\t\tconst status: ResearchRunStatus = gateOutcome.reasonCode === \"over_budget\" ? \"budget_exhausted\" : \"failed\";\n\t\treturn { status, reasonCode: gateOutcome.reasonCode, gateOutcome, costUsd: 0 };\n\t}\n\n\tconst bounded = await runBoundedCompletion({\n\t\tmaxWallClockMs: options.maxWallClockMs,\n\t\tsignal: options.signal,\n\t\texecute: (signal) =>\n\t\t\toptions.complete({\n\t\t\t\tsystemPrompt: RESEARCH_LANE_SYSTEM_PROMPT,\n\t\t\t\tuserPrompt: buildResearchUserPrompt(options),\n\t\t\t\tsignal,\n\t\t\t}),\n\t});\n\tif (bounded.failure) {\n\t\treturn {\n\t\t\tstatus: bounded.failure.status,\n\t\t\treasonCode: bounded.failure.reasonCode,\n\t\t\tgateOutcome,\n\t\t\tcostUsd: bounded.completion?.costUsd ?? 0,\n\t\t};\n\t}\n\tconst completion = bounded.completion;\n\tif (!completion) {\n\t\treturn { status: \"failed\", reasonCode: \"completion_error\", gateOutcome, costUsd: 0 };\n\t}\n\tif (completion.stopReason === \"error\" || completion.stopReason === \"aborted\") {\n\t\treturn { status: \"failed\", reasonCode: \"model_error\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst parsed = parseResearchFindings(completion.text, options.maxFindings);\n\tif (!parsed) {\n\t\treturn { status: \"failed\", reasonCode: \"unparseable_output\", gateOutcome, costUsd: completion.costUsd };\n\t}\n\n\tconst bundle = buildBundle(options, parsed);\n\tconst overBudget = options.maxUsd > 0 && completion.costUsd > options.maxUsd;\n\treturn {\n\t\tstatus: overBudget ? \"budget_exhausted\" : \"succeeded\",\n\t\treasonCode: overBudget\n\t\t\t? \"cost_budget_exceeded\"\n\t\t\t: parsed.findings.length === 0\n\t\t\t\t? \"no_findings\"\n\t\t\t\t: \"research_completed\",\n\t\tgateOutcome,\n\t\tbundle,\n\t\tcostUsd: completion.costUsd,\n\t};\n}\n"]}