@nexus-cortex/cli 4.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. package/.cortex/agents/AGENT_PROFILE_GUIDE.md +307 -0
  2. package/.cortex/agents/README.md +268 -0
  3. package/.cortex/agents/a-frontend-landing-page-designer.md +41 -0
  4. package/.cortex/agents/autoresearch-agent.md +49 -0
  5. package/.cortex/agents/code-reviewer.md +63 -0
  6. package/.cortex/agents/context-research.md +26 -0
  7. package/.cortex/agents/doc-writer.md +92 -0
  8. package/.cortex/agents/explore.md +63 -0
  9. package/.cortex/agents/new-model-api-integrator-analyst.md +41 -0
  10. package/.cortex/agents/plan.md +109 -0
  11. package/.cortex/agents/pr-architecture-reviewer.md +77 -0
  12. package/.cortex/agents/pr-code-quality.md +78 -0
  13. package/.cortex/agents/pr-implementer.md +50 -0
  14. package/.cortex/agents/pr-security-auditor.md +62 -0
  15. package/.cortex/agents/pr-test-writer.md +67 -0
  16. package/.cortex/agents/refactor.md +118 -0
  17. package/.cortex/agents/test-writer.md +72 -0
  18. package/.cortex/agents/web-researcher.md +72 -0
  19. package/.cortex/bench/tasks/sample-tasks.json +20 -0
  20. package/.cortex/commands/compare.md +14 -0
  21. package/.cortex/commands/deps.md +16 -0
  22. package/.cortex/commands/diff.md +14 -0
  23. package/.cortex/commands/explain.md +16 -0
  24. package/.cortex/commands/find-bug.md +13 -0
  25. package/.cortex/commands/profile.md +15 -0
  26. package/.cortex/commands/review.md +18 -0
  27. package/.cortex/commands/search.md +16 -0
  28. package/.cortex/commands/test.md +15 -0
  29. package/.cortex/permissions.dev.json +20 -0
  30. package/.cortex/permissions.example.json +71 -0
  31. package/.cortex/permissions.prod.json +63 -0
  32. package/.cortex/permissions.test.json +19 -0
  33. package/.cortex/skills/autoresearch/SKILL.md +77 -0
  34. package/.cortex/skills/autoresearch/personas/README.md +45 -0
  35. package/.cortex/skills/autoresearch/personas/aggressive-refactor.md +25 -0
  36. package/.cortex/skills/autoresearch/personas/creative.md +29 -0
  37. package/.cortex/skills/autoresearch/personas/perf-hunter.md +27 -0
  38. package/.cortex/skills/autoresearch/personas/precise.md +23 -0
  39. package/.cortex/skills/autoresearch/personas/root-cause.md +26 -0
  40. package/.cortex/skills/autoresearch/personas/security-auditor.md +29 -0
  41. package/.cortex/skills/autoresearch/personas/skeptic-reviewer.md +31 -0
  42. package/.cortex/skills/autoresearch/personas/test-first.md +25 -0
  43. package/.cortex/skills/best-of-n/SKILL.md +76 -0
  44. package/.cortex/skills/cortex/SKILL.md +834 -0
  45. package/.cortex/skills/cortex-bench/SKILL.md +354 -0
  46. package/.cortex/skills/docx/SKILL.md +83 -0
  47. package/.cortex/skills/pdf-documents/SKILL.md +297 -0
  48. package/.cortex/skills/pdf-documents/sections/01-image-acquisition.md +132 -0
  49. package/.cortex/skills/pdf-documents/sections/02-ai-image-generation.md +274 -0
  50. package/.cortex/skills/pdf-documents/sections/03-paper-sizes.md +89 -0
  51. package/.cortex/skills/pdf-documents/sections/04-design-system.md +549 -0
  52. package/.cortex/skills/pdf-documents/sections/05-css-print-rules.md +135 -0
  53. package/.cortex/skills/pdf-documents/sections/06-svg-charts.md +100 -0
  54. package/.cortex/skills/pdf-documents/sections/07-templates.md +224 -0
  55. package/.cortex/skills/pdf-documents/sections/08-scaled-output.md +164 -0
  56. package/.cortex/skills/pdf-documents/sections/09-preview-qa.md +66 -0
  57. package/.cortex/skills/pdf-documents/sections/10-reading-pdfs.md +499 -0
  58. package/.cortex/skills/pdf-documents/sections/11-form-filling.md +241 -0
  59. package/.cortex/skills/pptx/SKILL.md +90 -0
  60. package/.cortex/skills/resume-analyst/SKILL.md +373 -0
  61. package/.cortex/skills/verify-work/SKILL.md +74 -0
  62. package/.cortex/skills/xlsx/SKILL.md +101 -0
  63. package/.cortex/system-messages/messages/WORK_QUALITY.md +159 -0
  64. package/.cortex/system-messages/registry.json +18 -0
  65. package/LICENSE +202 -0
  66. package/NOTICE +2 -0
  67. package/README.md +13 -0
  68. package/bin/cortex.js +548 -0
  69. package/dist/agent-mode.d.ts +21 -0
  70. package/dist/agent-mode.d.ts.map +1 -0
  71. package/dist/agent-mode.js +511 -0
  72. package/dist/agent-mode.js.map +1 -0
  73. package/dist/client/CortexClient.d.ts +84 -0
  74. package/dist/client/CortexClient.d.ts.map +1 -0
  75. package/dist/client/CortexClient.js +163 -0
  76. package/dist/client/CortexClient.js.map +1 -0
  77. package/dist/commands/artifact/list.d.ts +15 -0
  78. package/dist/commands/artifact/list.d.ts.map +1 -0
  79. package/dist/commands/artifact/list.js +89 -0
  80. package/dist/commands/artifact/list.js.map +1 -0
  81. package/dist/commands/artifact/restart.d.ts +13 -0
  82. package/dist/commands/artifact/restart.d.ts.map +1 -0
  83. package/dist/commands/artifact/restart.js +56 -0
  84. package/dist/commands/artifact/restart.js.map +1 -0
  85. package/dist/commands/artifact/status.d.ts +13 -0
  86. package/dist/commands/artifact/status.d.ts.map +1 -0
  87. package/dist/commands/artifact/status.js +100 -0
  88. package/dist/commands/artifact/status.js.map +1 -0
  89. package/dist/commands/artifact/stop.d.ts +13 -0
  90. package/dist/commands/artifact/stop.d.ts.map +1 -0
  91. package/dist/commands/artifact/stop.js +50 -0
  92. package/dist/commands/artifact/stop.js.map +1 -0
  93. package/dist/commands/autoresearch/bench.d.ts +32 -0
  94. package/dist/commands/autoresearch/bench.d.ts.map +1 -0
  95. package/dist/commands/autoresearch/bench.js +123 -0
  96. package/dist/commands/autoresearch/bench.js.map +1 -0
  97. package/dist/commands/autoresearch/commandRunner.d.ts +35 -0
  98. package/dist/commands/autoresearch/commandRunner.d.ts.map +1 -0
  99. package/dist/commands/autoresearch/commandRunner.js +91 -0
  100. package/dist/commands/autoresearch/commandRunner.js.map +1 -0
  101. package/dist/commands/autoresearch/evaluate.d.ts +18 -0
  102. package/dist/commands/autoresearch/evaluate.d.ts.map +1 -0
  103. package/dist/commands/autoresearch/evaluate.js +117 -0
  104. package/dist/commands/autoresearch/evaluate.js.map +1 -0
  105. package/dist/commands/autoresearch/experiment.d.ts +38 -0
  106. package/dist/commands/autoresearch/experiment.d.ts.map +1 -0
  107. package/dist/commands/autoresearch/experiment.js +168 -0
  108. package/dist/commands/autoresearch/experiment.js.map +1 -0
  109. package/dist/commands/autoresearch/fix.d.ts +10 -0
  110. package/dist/commands/autoresearch/fix.d.ts.map +1 -0
  111. package/dist/commands/autoresearch/fix.js +86 -0
  112. package/dist/commands/autoresearch/fix.js.map +1 -0
  113. package/dist/commands/autoresearch/harnessProcess.d.ts +48 -0
  114. package/dist/commands/autoresearch/harnessProcess.d.ts.map +1 -0
  115. package/dist/commands/autoresearch/harnessProcess.js +140 -0
  116. package/dist/commands/autoresearch/harnessProcess.js.map +1 -0
  117. package/dist/commands/autoresearch/list.d.ts +6 -0
  118. package/dist/commands/autoresearch/list.d.ts.map +1 -0
  119. package/dist/commands/autoresearch/list.js +38 -0
  120. package/dist/commands/autoresearch/list.js.map +1 -0
  121. package/dist/commands/autoresearch/loop.d.ts +26 -0
  122. package/dist/commands/autoresearch/loop.d.ts.map +1 -0
  123. package/dist/commands/autoresearch/loop.js +242 -0
  124. package/dist/commands/autoresearch/loop.js.map +1 -0
  125. package/dist/commands/cache/metrics.d.ts +13 -0
  126. package/dist/commands/cache/metrics.d.ts.map +1 -0
  127. package/dist/commands/cache/metrics.js +77 -0
  128. package/dist/commands/cache/metrics.js.map +1 -0
  129. package/dist/commands/chat/AgenticChat.d.ts +39 -0
  130. package/dist/commands/chat/AgenticChat.d.ts.map +1 -0
  131. package/dist/commands/chat/AgenticChat.js +201 -0
  132. package/dist/commands/chat/AgenticChat.js.map +1 -0
  133. package/dist/commands/chat/renderers/CodeRenderer.d.ts +36 -0
  134. package/dist/commands/chat/renderers/CodeRenderer.d.ts.map +1 -0
  135. package/dist/commands/chat/renderers/CodeRenderer.js +85 -0
  136. package/dist/commands/chat/renderers/CodeRenderer.js.map +1 -0
  137. package/dist/commands/chat/renderers/ToolRenderer.d.ts +30 -0
  138. package/dist/commands/chat/renderers/ToolRenderer.d.ts.map +1 -0
  139. package/dist/commands/chat/renderers/ToolRenderer.js +93 -0
  140. package/dist/commands/chat/renderers/ToolRenderer.js.map +1 -0
  141. package/dist/commands/chat/single-message.d.ts +15 -0
  142. package/dist/commands/chat/single-message.d.ts.map +1 -0
  143. package/dist/commands/chat/single-message.js +85 -0
  144. package/dist/commands/chat/single-message.js.map +1 -0
  145. package/dist/commands/config/categories.d.ts +8 -0
  146. package/dist/commands/config/categories.d.ts.map +1 -0
  147. package/dist/commands/config/categories.js +75 -0
  148. package/dist/commands/config/categories.js.map +1 -0
  149. package/dist/commands/config/category.d.ts +8 -0
  150. package/dist/commands/config/category.d.ts.map +1 -0
  151. package/dist/commands/config/category.js +81 -0
  152. package/dist/commands/config/category.js.map +1 -0
  153. package/dist/commands/config/get.d.ts +9 -0
  154. package/dist/commands/config/get.d.ts.map +1 -0
  155. package/dist/commands/config/get.js +98 -0
  156. package/dist/commands/config/get.js.map +1 -0
  157. package/dist/commands/config/reset.d.ts +6 -0
  158. package/dist/commands/config/reset.d.ts.map +1 -0
  159. package/dist/commands/config/reset.js +68 -0
  160. package/dist/commands/config/reset.js.map +1 -0
  161. package/dist/commands/config/set.d.ts +6 -0
  162. package/dist/commands/config/set.d.ts.map +1 -0
  163. package/dist/commands/config/set.js +60 -0
  164. package/dist/commands/config/set.js.map +1 -0
  165. package/dist/commands/config/utils.d.ts +14 -0
  166. package/dist/commands/config/utils.d.ts.map +1 -0
  167. package/dist/commands/config/utils.js +54 -0
  168. package/dist/commands/config/utils.js.map +1 -0
  169. package/dist/commands/context/boundaries.d.ts +13 -0
  170. package/dist/commands/context/boundaries.d.ts.map +1 -0
  171. package/dist/commands/context/boundaries.js +45 -0
  172. package/dist/commands/context/boundaries.js.map +1 -0
  173. package/dist/commands/context/compact.d.ts +13 -0
  174. package/dist/commands/context/compact.d.ts.map +1 -0
  175. package/dist/commands/context/compact.js +41 -0
  176. package/dist/commands/context/compact.js.map +1 -0
  177. package/dist/commands/context/savings.d.ts +13 -0
  178. package/dist/commands/context/savings.d.ts.map +1 -0
  179. package/dist/commands/context/savings.js +49 -0
  180. package/dist/commands/context/savings.js.map +1 -0
  181. package/dist/commands/context/status.d.ts +13 -0
  182. package/dist/commands/context/status.d.ts.map +1 -0
  183. package/dist/commands/context/status.js +52 -0
  184. package/dist/commands/context/status.js.map +1 -0
  185. package/dist/commands/context/strategy.d.ts +13 -0
  186. package/dist/commands/context/strategy.d.ts.map +1 -0
  187. package/dist/commands/context/strategy.js +66 -0
  188. package/dist/commands/context/strategy.js.map +1 -0
  189. package/dist/commands/mcp/disable.d.ts +5 -0
  190. package/dist/commands/mcp/disable.d.ts.map +1 -0
  191. package/dist/commands/mcp/disable.js +26 -0
  192. package/dist/commands/mcp/disable.js.map +1 -0
  193. package/dist/commands/mcp/edit.d.ts +9 -0
  194. package/dist/commands/mcp/edit.d.ts.map +1 -0
  195. package/dist/commands/mcp/edit.js +62 -0
  196. package/dist/commands/mcp/edit.js.map +1 -0
  197. package/dist/commands/mcp/enable.d.ts +5 -0
  198. package/dist/commands/mcp/enable.d.ts.map +1 -0
  199. package/dist/commands/mcp/enable.js +27 -0
  200. package/dist/commands/mcp/enable.js.map +1 -0
  201. package/dist/commands/mcp/init.d.ts +9 -0
  202. package/dist/commands/mcp/init.d.ts.map +1 -0
  203. package/dist/commands/mcp/init.js +97 -0
  204. package/dist/commands/mcp/init.js.map +1 -0
  205. package/dist/commands/mcp/list.d.ts +6 -0
  206. package/dist/commands/mcp/list.d.ts.map +1 -0
  207. package/dist/commands/mcp/list.js +56 -0
  208. package/dist/commands/mcp/list.js.map +1 -0
  209. package/dist/commands/mcp/server.d.ts +6 -0
  210. package/dist/commands/mcp/server.d.ts.map +1 -0
  211. package/dist/commands/mcp/server.js +44 -0
  212. package/dist/commands/mcp/server.js.map +1 -0
  213. package/dist/commands/mcp/status.d.ts +6 -0
  214. package/dist/commands/mcp/status.d.ts.map +1 -0
  215. package/dist/commands/mcp/status.js +43 -0
  216. package/dist/commands/mcp/status.js.map +1 -0
  217. package/dist/commands/mcp/tools.d.ts +7 -0
  218. package/dist/commands/mcp/tools.d.ts.map +1 -0
  219. package/dist/commands/mcp/tools.js +82 -0
  220. package/dist/commands/mcp/tools.js.map +1 -0
  221. package/dist/commands/mcp/validate.d.ts +8 -0
  222. package/dist/commands/mcp/validate.d.ts.map +1 -0
  223. package/dist/commands/mcp/validate.js +121 -0
  224. package/dist/commands/mcp/validate.js.map +1 -0
  225. package/dist/commands/middleware/config.d.ts +13 -0
  226. package/dist/commands/middleware/config.d.ts.map +1 -0
  227. package/dist/commands/middleware/config.js +87 -0
  228. package/dist/commands/middleware/config.js.map +1 -0
  229. package/dist/commands/middleware/disable.d.ts +13 -0
  230. package/dist/commands/middleware/disable.d.ts.map +1 -0
  231. package/dist/commands/middleware/disable.js +50 -0
  232. package/dist/commands/middleware/disable.js.map +1 -0
  233. package/dist/commands/middleware/enable.d.ts +13 -0
  234. package/dist/commands/middleware/enable.d.ts.map +1 -0
  235. package/dist/commands/middleware/enable.js +50 -0
  236. package/dist/commands/middleware/enable.js.map +1 -0
  237. package/dist/commands/middleware/list.d.ts +13 -0
  238. package/dist/commands/middleware/list.d.ts.map +1 -0
  239. package/dist/commands/middleware/list.js +64 -0
  240. package/dist/commands/middleware/list.js.map +1 -0
  241. package/dist/commands/middleware/status.d.ts +13 -0
  242. package/dist/commands/middleware/status.d.ts.map +1 -0
  243. package/dist/commands/middleware/status.js +80 -0
  244. package/dist/commands/middleware/status.js.map +1 -0
  245. package/dist/commands/models/compare.d.ts +9 -0
  246. package/dist/commands/models/compare.d.ts.map +1 -0
  247. package/dist/commands/models/compare.js +76 -0
  248. package/dist/commands/models/compare.js.map +1 -0
  249. package/dist/commands/models/cost.d.ts +9 -0
  250. package/dist/commands/models/cost.d.ts.map +1 -0
  251. package/dist/commands/models/cost.js +64 -0
  252. package/dist/commands/models/cost.js.map +1 -0
  253. package/dist/commands/models/info.d.ts +9 -0
  254. package/dist/commands/models/info.d.ts.map +1 -0
  255. package/dist/commands/models/info.js +61 -0
  256. package/dist/commands/models/info.js.map +1 -0
  257. package/dist/commands/models/list.d.ts +6 -0
  258. package/dist/commands/models/list.d.ts.map +1 -0
  259. package/dist/commands/models/list.js +66 -0
  260. package/dist/commands/models/list.js.map +1 -0
  261. package/dist/commands/models/providers.d.ts +13 -0
  262. package/dist/commands/models/providers.d.ts.map +1 -0
  263. package/dist/commands/models/providers.js +45 -0
  264. package/dist/commands/models/providers.js.map +1 -0
  265. package/dist/commands/models/search.d.ts +10 -0
  266. package/dist/commands/models/search.d.ts.map +1 -0
  267. package/dist/commands/models/search.js +56 -0
  268. package/dist/commands/models/search.js.map +1 -0
  269. package/dist/commands/models/switch.d.ts +14 -0
  270. package/dist/commands/models/switch.d.ts.map +1 -0
  271. package/dist/commands/models/switch.js +67 -0
  272. package/dist/commands/models/switch.js.map +1 -0
  273. package/dist/commands/permissions/auto-approve.d.ts +13 -0
  274. package/dist/commands/permissions/auto-approve.d.ts.map +1 -0
  275. package/dist/commands/permissions/auto-approve.js +53 -0
  276. package/dist/commands/permissions/auto-approve.js.map +1 -0
  277. package/dist/commands/permissions/grant.d.ts +13 -0
  278. package/dist/commands/permissions/grant.d.ts.map +1 -0
  279. package/dist/commands/permissions/grant.js +46 -0
  280. package/dist/commands/permissions/grant.js.map +1 -0
  281. package/dist/commands/permissions/mode.d.ts +12 -0
  282. package/dist/commands/permissions/mode.d.ts.map +1 -0
  283. package/dist/commands/permissions/mode.js +61 -0
  284. package/dist/commands/permissions/mode.js.map +1 -0
  285. package/dist/commands/permissions/policies.d.ts +13 -0
  286. package/dist/commands/permissions/policies.d.ts.map +1 -0
  287. package/dist/commands/permissions/policies.js +47 -0
  288. package/dist/commands/permissions/policies.js.map +1 -0
  289. package/dist/commands/permissions/revoke.d.ts +13 -0
  290. package/dist/commands/permissions/revoke.d.ts.map +1 -0
  291. package/dist/commands/permissions/revoke.js +46 -0
  292. package/dist/commands/permissions/revoke.js.map +1 -0
  293. package/dist/commands/permissions/set.d.ts +13 -0
  294. package/dist/commands/permissions/set.d.ts.map +1 -0
  295. package/dist/commands/permissions/set.js +57 -0
  296. package/dist/commands/permissions/set.js.map +1 -0
  297. package/dist/commands/permissions/tools.d.ts +13 -0
  298. package/dist/commands/permissions/tools.d.ts.map +1 -0
  299. package/dist/commands/permissions/tools.js +50 -0
  300. package/dist/commands/permissions/tools.js.map +1 -0
  301. package/dist/commands/server/start.d.ts +11 -0
  302. package/dist/commands/server/start.d.ts.map +1 -0
  303. package/dist/commands/server/start.js +58 -0
  304. package/dist/commands/server/start.js.map +1 -0
  305. package/dist/commands/session/checkpoints.d.ts +6 -0
  306. package/dist/commands/session/checkpoints.d.ts.map +1 -0
  307. package/dist/commands/session/checkpoints.js +41 -0
  308. package/dist/commands/session/checkpoints.js.map +1 -0
  309. package/dist/commands/session/compact.d.ts +13 -0
  310. package/dist/commands/session/compact.d.ts.map +1 -0
  311. package/dist/commands/session/compact.js +56 -0
  312. package/dist/commands/session/compact.js.map +1 -0
  313. package/dist/commands/session/export.d.ts +6 -0
  314. package/dist/commands/session/export.d.ts.map +1 -0
  315. package/dist/commands/session/export.js +31 -0
  316. package/dist/commands/session/export.js.map +1 -0
  317. package/dist/commands/session/list.d.ts +7 -0
  318. package/dist/commands/session/list.d.ts.map +1 -0
  319. package/dist/commands/session/list.js +63 -0
  320. package/dist/commands/session/list.js.map +1 -0
  321. package/dist/commands/session/new.d.ts +8 -0
  322. package/dist/commands/session/new.d.ts.map +1 -0
  323. package/dist/commands/session/new.js +23 -0
  324. package/dist/commands/session/new.js.map +1 -0
  325. package/dist/commands/session/resume.d.ts +6 -0
  326. package/dist/commands/session/resume.d.ts.map +1 -0
  327. package/dist/commands/session/resume.js +32 -0
  328. package/dist/commands/session/resume.js.map +1 -0
  329. package/dist/commands/session/search.d.ts +10 -0
  330. package/dist/commands/session/search.d.ts.map +1 -0
  331. package/dist/commands/session/search.js +65 -0
  332. package/dist/commands/session/search.js.map +1 -0
  333. package/dist/commands/session/stats.d.ts +6 -0
  334. package/dist/commands/session/stats.d.ts.map +1 -0
  335. package/dist/commands/session/stats.js +58 -0
  336. package/dist/commands/session/stats.js.map +1 -0
  337. package/dist/commands/session/view.d.ts +6 -0
  338. package/dist/commands/session/view.d.ts.map +1 -0
  339. package/dist/commands/session/view.js +65 -0
  340. package/dist/commands/session/view.js.map +1 -0
  341. package/dist/commands/slash/CommandPalette.d.ts +60 -0
  342. package/dist/commands/slash/CommandPalette.d.ts.map +1 -0
  343. package/dist/commands/slash/CommandPalette.js +351 -0
  344. package/dist/commands/slash/CommandPalette.js.map +1 -0
  345. package/dist/commands/slash/SlashCommandParser.d.ts +11 -0
  346. package/dist/commands/slash/SlashCommandParser.d.ts.map +1 -0
  347. package/dist/commands/slash/SlashCommandParser.js +11 -0
  348. package/dist/commands/slash/SlashCommandParser.js.map +1 -0
  349. package/dist/commands/slash/SlashCommandRegistry.d.ts +11 -0
  350. package/dist/commands/slash/SlashCommandRegistry.d.ts.map +1 -0
  351. package/dist/commands/slash/SlashCommandRegistry.js +11 -0
  352. package/dist/commands/slash/SlashCommandRegistry.js.map +1 -0
  353. package/dist/commands/slash/index.d.ts +11 -0
  354. package/dist/commands/slash/index.d.ts.map +1 -0
  355. package/dist/commands/slash/index.js +13 -0
  356. package/dist/commands/slash/index.js.map +1 -0
  357. package/dist/commands/system-messages/list.d.ts +13 -0
  358. package/dist/commands/system-messages/list.d.ts.map +1 -0
  359. package/dist/commands/system-messages/list.js +54 -0
  360. package/dist/commands/system-messages/list.js.map +1 -0
  361. package/dist/commands/system-messages/reload.d.ts +13 -0
  362. package/dist/commands/system-messages/reload.d.ts.map +1 -0
  363. package/dist/commands/system-messages/reload.js +36 -0
  364. package/dist/commands/system-messages/reload.js.map +1 -0
  365. package/dist/commands/system-messages/view.d.ts +13 -0
  366. package/dist/commands/system-messages/view.d.ts.map +1 -0
  367. package/dist/commands/system-messages/view.js +52 -0
  368. package/dist/commands/system-messages/view.js.map +1 -0
  369. package/dist/commands/tmux/list.d.ts +13 -0
  370. package/dist/commands/tmux/list.d.ts.map +1 -0
  371. package/dist/commands/tmux/list.js +68 -0
  372. package/dist/commands/tmux/list.js.map +1 -0
  373. package/dist/commands/tools/info.d.ts +13 -0
  374. package/dist/commands/tools/info.d.ts.map +1 -0
  375. package/dist/commands/tools/info.js +82 -0
  376. package/dist/commands/tools/info.js.map +1 -0
  377. package/dist/commands/tools/list.d.ts +14 -0
  378. package/dist/commands/tools/list.d.ts.map +1 -0
  379. package/dist/commands/tools/list.js +67 -0
  380. package/dist/commands/tools/list.js.map +1 -0
  381. package/dist/config/ConfigManager.d.ts +40 -0
  382. package/dist/config/ConfigManager.d.ts.map +1 -0
  383. package/dist/config/ConfigManager.js +162 -0
  384. package/dist/config/ConfigManager.js.map +1 -0
  385. package/dist/config/extension.d.ts +12 -0
  386. package/dist/config/extension.d.ts.map +1 -0
  387. package/dist/config/extension.js +5 -0
  388. package/dist/config/extension.js.map +1 -0
  389. package/dist/config/settings.d.ts +42 -0
  390. package/dist/config/settings.d.ts.map +1 -0
  391. package/dist/config/settings.js +32 -0
  392. package/dist/config/settings.js.map +1 -0
  393. package/dist/index.d.ts +3 -0
  394. package/dist/index.d.ts.map +1 -0
  395. package/dist/index.js +883 -0
  396. package/dist/index.js.map +1 -0
  397. package/dist/orchestrator/OrchestratorClient.d.ts +385 -0
  398. package/dist/orchestrator/OrchestratorClient.d.ts.map +1 -0
  399. package/dist/orchestrator/OrchestratorClient.js +1195 -0
  400. package/dist/orchestrator/OrchestratorClient.js.map +1 -0
  401. package/dist/themes/DefaultTheme.d.ts +9 -0
  402. package/dist/themes/DefaultTheme.d.ts.map +1 -0
  403. package/dist/themes/DefaultTheme.js +29 -0
  404. package/dist/themes/DefaultTheme.js.map +1 -0
  405. package/dist/themes/MinimalTheme.d.ts +9 -0
  406. package/dist/themes/MinimalTheme.d.ts.map +1 -0
  407. package/dist/themes/MinimalTheme.js +29 -0
  408. package/dist/themes/MinimalTheme.js.map +1 -0
  409. package/dist/themes/Theme.interface.d.ts +36 -0
  410. package/dist/themes/Theme.interface.d.ts.map +1 -0
  411. package/dist/themes/Theme.interface.js +5 -0
  412. package/dist/themes/Theme.interface.js.map +1 -0
  413. package/dist/themes/ThemeManager.d.ts +63 -0
  414. package/dist/themes/ThemeManager.d.ts.map +1 -0
  415. package/dist/themes/ThemeManager.js +257 -0
  416. package/dist/themes/ThemeManager.js.map +1 -0
  417. package/dist/themes/colors.d.ts +108 -0
  418. package/dist/themes/colors.d.ts.map +1 -0
  419. package/dist/themes/colors.js +284 -0
  420. package/dist/themes/colors.js.map +1 -0
  421. package/dist/themes/createTheme.d.ts +40 -0
  422. package/dist/themes/createTheme.d.ts.map +1 -0
  423. package/dist/themes/createTheme.js +114 -0
  424. package/dist/themes/createTheme.js.map +1 -0
  425. package/dist/themes/themeDefinitions.d.ts +27 -0
  426. package/dist/themes/themeDefinitions.d.ts.map +1 -0
  427. package/dist/themes/themeDefinitions.js +244 -0
  428. package/dist/themes/themeDefinitions.js.map +1 -0
  429. package/dist/utils/CodeDiffRenderer.d.ts +124 -0
  430. package/dist/utils/CodeDiffRenderer.d.ts.map +1 -0
  431. package/dist/utils/CodeDiffRenderer.js +257 -0
  432. package/dist/utils/CodeDiffRenderer.js.map +1 -0
  433. package/dist/utils/MarkdownRenderer.d.ts +74 -0
  434. package/dist/utils/MarkdownRenderer.d.ts.map +1 -0
  435. package/dist/utils/MarkdownRenderer.js +260 -0
  436. package/dist/utils/MarkdownRenderer.js.map +1 -0
  437. package/dist/utils/MessageRenderer.d.ts +200 -0
  438. package/dist/utils/MessageRenderer.d.ts.map +1 -0
  439. package/dist/utils/MessageRenderer.js +283 -0
  440. package/dist/utils/MessageRenderer.js.map +1 -0
  441. package/dist/utils/ToolFormatter.d.ts +103 -0
  442. package/dist/utils/ToolFormatter.d.ts.map +1 -0
  443. package/dist/utils/ToolFormatter.js +357 -0
  444. package/dist/utils/ToolFormatter.js.map +1 -0
  445. package/dist/utils/boxDrawing.d.ts +23 -0
  446. package/dist/utils/boxDrawing.d.ts.map +1 -0
  447. package/dist/utils/boxDrawing.js +78 -0
  448. package/dist/utils/boxDrawing.js.map +1 -0
  449. package/dist/utils/checks.d.ts +9 -0
  450. package/dist/utils/checks.d.ts.map +1 -0
  451. package/dist/utils/checks.js +11 -0
  452. package/dist/utils/checks.js.map +1 -0
  453. package/dist/utils/events.d.ts +24 -0
  454. package/dist/utils/events.d.ts.map +1 -0
  455. package/dist/utils/events.js +17 -0
  456. package/dist/utils/events.js.map +1 -0
  457. package/dist/utils/formatters.d.ts +255 -0
  458. package/dist/utils/formatters.d.ts.map +1 -0
  459. package/dist/utils/formatters.js +361 -0
  460. package/dist/utils/formatters.js.map +1 -0
  461. package/dist/utils/math.d.ts +11 -0
  462. package/dist/utils/math.d.ts.map +1 -0
  463. package/dist/utils/math.js +13 -0
  464. package/dist/utils/math.js.map +1 -0
  465. package/package.json +82 -0
@@ -0,0 +1,123 @@
1
+ /**
2
+ * `cortex autoresearch bench` — run a task set through the harness, GRADE each
3
+ * output with the task's verifier, and write REAL scored records to
4
+ * router-matrix.jsonl. This is the grader the decision layer was missing:
5
+ * `evaluate` (the gate) only produces a meaningful keep/discard once arms carry
6
+ * real qualitativeScores, which this command supplies.
7
+ *
8
+ * One invocation benches ONE harness build (records auto-stamped with its git
9
+ * SHA, or --harness-ref). The swarm orchestrator runs it in the base worktree
10
+ * and the candidate worktree, then calls `evaluate --base … --candidate …`.
11
+ */
12
+ import { readdirSync, readFileSync, statSync } from 'node:fs';
13
+ import { join, resolve } from 'node:path';
14
+ import { spawnSync } from 'node:child_process';
15
+ import { ModelRouterMatrix, runBench, parseTaskSet, ResearchBacklog, } from '@nexus-cortex/core';
16
+ import { ThemeManager } from '../../themes/ThemeManager.js';
17
+ import { findProjectRoot } from '../config/utils.js';
18
+ import { serverRunner } from './harnessProcess.js';
19
+ import { commandRunner } from './commandRunner.js';
20
+ /** Load tasks from a file (array or single object) or a directory of *.json. */
21
+ function loadTasks(taskSetPath) {
22
+ const st = statSync(taskSetPath);
23
+ const files = st.isDirectory()
24
+ ? readdirSync(taskSetPath).filter(f => f.endsWith('.json')).map(f => join(taskSetPath, f))
25
+ : [taskSetPath];
26
+ const tasks = [];
27
+ for (const f of files) {
28
+ const raw = JSON.parse(readFileSync(f, 'utf8'));
29
+ tasks.push(...parseTaskSet(raw, f));
30
+ }
31
+ return tasks;
32
+ }
33
+ export async function autoResearchBench(options) {
34
+ const theme = ThemeManager.getTheme();
35
+ const projectRoot = findProjectRoot();
36
+ if (!options.taskSet) {
37
+ console.error(theme.colors.error('Error: --task-set is required'));
38
+ process.exit(1);
39
+ }
40
+ if (!options.experimentTag) {
41
+ console.error(theme.colors.error('Error: --experiment-tag is required'));
42
+ process.exit(1);
43
+ }
44
+ const split = options.split ?? 'train';
45
+ if (split !== 'train' && split !== 'holdout') {
46
+ console.error(theme.colors.error("Error: --split must be 'train' or 'holdout'"));
47
+ process.exit(1);
48
+ }
49
+ try {
50
+ const tasks = loadTasks(options.taskSet);
51
+ if (tasks.length === 0) {
52
+ console.error(theme.colors.error(`Error: no tasks found in ${options.taskSet}`));
53
+ process.exit(1);
54
+ }
55
+ const matrix = new ModelRouterMatrix(projectRoot);
56
+ const model = options.model ?? process.env.DEFAULT_MODEL_ID;
57
+ const log = (m) => { if (!options.json)
58
+ console.log(theme.colors.muted(` ${m}`)); };
59
+ let runner;
60
+ let source;
61
+ if (options.runCmd) {
62
+ // Non-cortex command target: optionally build once, then grade a shell command per task.
63
+ const cwd = options.cwd ? resolve(options.cwd) : projectRoot;
64
+ const acceptExit = (options.acceptExit ?? '0').split(',').map(s => Number(s.trim())).filter(n => Number.isFinite(n));
65
+ if (options.buildCmd) {
66
+ log(`Building target: ${options.buildCmd} (cwd ${cwd})`);
67
+ const b = spawnSync('sh', ['-c', options.buildCmd], { cwd, stdio: options.json ? 'ignore' : 'inherit' });
68
+ if (b.status !== 0) {
69
+ console.error(theme.colors.error(`Error: build command failed (exit ${b.status})`));
70
+ process.exit(1);
71
+ }
72
+ }
73
+ runner = commandRunner({ cwd, template: options.runCmd, acceptExitCodes: acceptExit, log });
74
+ source = `cmd "${options.runCmd}" (cwd ${cwd})`;
75
+ }
76
+ else {
77
+ const serverUrl = options.serverUrl ?? process.env.CORTEX_SERVER_URL ?? 'http://localhost:4000';
78
+ runner = serverRunner(serverUrl, model);
79
+ source = serverUrl;
80
+ }
81
+ if (!options.json) {
82
+ console.log();
83
+ console.log(theme.colors.muted(` Benching ${tasks.length} task(s) × ${options.runs ?? 2} run(s) via ${source} [${split}] tag=${options.experimentTag}`));
84
+ }
85
+ const summary = await runBench(tasks, runner, matrix, {
86
+ experimentTag: options.experimentTag,
87
+ runs: options.runs ? Number(options.runs) : undefined,
88
+ split,
89
+ modelId: model,
90
+ benchmarkSource: options.benchmarkSource,
91
+ harnessRef: options.harnessRef,
92
+ temperature: options.temperature !== undefined && Number.isFinite(Number(options.temperature)) ? Number(options.temperature) : undefined,
93
+ strategy: options.strategy,
94
+ backlog: options.seedBacklog === false ? undefined : new ResearchBacklog(projectRoot),
95
+ discoveredRound: options.experimentTag,
96
+ discoveredRef: options.harnessRef,
97
+ onRun: options.json ? undefined : (info) => {
98
+ const mark = info.pass ? theme.colors.success('[OK]') : theme.colors.error('[FAIL]');
99
+ console.log(theme.colors.muted(` ${mark} ${info.taskId} run ${info.run}: ${info.qualitativeScore}`));
100
+ },
101
+ });
102
+ if (options.json) {
103
+ console.log(JSON.stringify(summary, null, 2));
104
+ return;
105
+ }
106
+ console.log();
107
+ for (const t of summary.tasks) {
108
+ console.log(` ${theme.colors.highlight(t.taskId.padEnd(28))} ${theme.colors.secondary(t.taskType)} mean ${t.meanScore} pass ${Math.round(t.passRate * 100)}%`);
109
+ }
110
+ console.log();
111
+ console.log(theme.colors.muted(` ${summary.totalRuns} run(s) recorded → ${projectRoot}/.cortex/router-matrix.jsonl (harnessRef ${summary.harnessRef ?? 'auto'})`));
112
+ if (summary.seededDeficiencies > 0) {
113
+ console.log(theme.colors.muted(` ${summary.seededDeficiencies} deficiency(ies) seeded → ${projectRoot}/.cortex/research-backlog.jsonl (ResearchBacklog list / next)`));
114
+ }
115
+ console.log(theme.colors.muted(` Next: cortex autoresearch evaluate --experiment-tag ${options.experimentTag} --base <ref> --candidate <ref> --branch <wt>`));
116
+ console.log();
117
+ }
118
+ catch (error) {
119
+ console.error(theme.colors.error(`Error: ${error.message}`));
120
+ process.exit(1);
121
+ }
122
+ }
123
+ //# sourceMappingURL=bench.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bench.js","sourceRoot":"","sources":["../../../src/commands/autoresearch/bench.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AACH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC/C,OAAO,EACL,iBAAiB,EACjB,QAAQ,EACR,YAAY,EACZ,eAAe,GAGhB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAiCnD,gFAAgF;AAChF,SAAS,SAAS,CAAC,WAAmB;IACpC,MAAM,EAAE,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC;IACjC,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,EAAE;QAC5B,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAC1F,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAClB,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAChD,KAAK,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAiC;IACvE,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,eAAe,EAAE,CAAC;IAEtC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC,CAAC;QAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAAC,CAAC;IAC9G,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC;QAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAC,CAAC;QAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAAC,CAAC;IAE1H,MAAM,KAAK,GAAI,OAAO,CAAC,KAAyC,IAAI,OAAO,CAAC;IAC5E,IAAI,KAAK,KAAK,OAAO,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QAC7C,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC,CAAC;QACjF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,OAAQ,CAAC,CAAC;QAC1C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,4BAA4B,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAC,CAAC;QAE9H,MAAM,MAAM,GAAG,IAAI,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;QAC5D,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI;YAAE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE5F,IAAI,MAAqB,CAAC;QAC1B,IAAI,MAAc,CAAC;QACnB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,yFAAyF;YACzF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;YAC7D,MAAM,UAAU,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YACrH,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACrB,GAAG,CAAC,oBAAoB,OAAO,CAAC,QAAQ,UAAU,GAAG,GAAG,CAAC,CAAC;gBAC1D,MAAM,CAAC,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;gBACzG,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAAC,CAAC;YAC/H,CAAC;YACD,MAAM,GAAG,aAAa,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,CAAC,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;YAC5F,MAAM,GAAG,QAAQ,OAAO,CAAC,MAAM,UAAU,GAAG,GAAG,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,uBAAuB,CAAC;YAChG,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YACxC,MAAM,GAAG,SAAS,CAAC;QACrB,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAClB,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,KAAK,CAAC,MAAM,cAAc,OAAO,CAAC,IAAI,IAAI,CAAC,eAAe,MAAM,KAAK,KAAK,UAAU,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5J,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE;YACpD,aAAa,EAAE,OAAO,CAAC,aAAc;YACrC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACrD,KAAK;YACL,OAAO,EAAE,KAAK;YACd,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS;YACxI,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,OAAO,EAAE,OAAO,CAAC,WAAW,KAAK,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,eAAe,CAAC,WAAW,CAAC;YACrF,eAAe,EAAE,OAAO,CAAC,aAAa;YACtC,aAAa,EAAE,OAAO,CAAC,UAAU;YACjC,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE;gBACzC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACrF,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,QAAQ,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC;YACvG,CAAC;SACF,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,SAAS,UAAU,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;QACnK,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,SAAS,sBAAsB,WAAW,6CAA6C,OAAO,CAAC,UAAU,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC;QACpK,IAAI,OAAO,CAAC,kBAAkB,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,kBAAkB,6BAA6B,WAAW,gEAAgE,CAAC,CAAC,CAAC;QAC1K,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,wDAAwD,OAAO,CAAC,aAAa,+CAA+C,CAAC,CAAC,CAAC;QAC9J,OAAO,CAAC,GAAG,EAAE,CAAC;IAEhB,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,35 @@
1
+ import type { HarnessRunner } from '@nexus-cortex/core';
2
+ import type { ExperimentTarget, PreparedArm, PrepareArmOptions } from './harnessProcess.js';
3
+ export interface CommandRunnerOptions {
4
+ /** Working directory the command runs in (e.g. the candidate worktree). */
5
+ cwd: string;
6
+ /** Command template, e.g. `./eval.sh {prompt}` or `python eval.py --case {prompt}`.
7
+ * If it contains no `{prompt}`/`{case}` placeholder, the prompt is appended as a
8
+ * single quoted argument. */
9
+ template: string;
10
+ /** Exit codes whose stdout is accepted for grading. Default `[0]`. */
11
+ acceptExitCodes?: number[];
12
+ /** Per-run hard timeout in ms. Default 120000. */
13
+ timeoutMs?: number;
14
+ /** Progress/diagnostic sink (stderr + nonzero-exit notices). */
15
+ log?: (message: string) => void;
16
+ }
17
+ export declare function commandRunner(opts: CommandRunnerOptions): HarnessRunner;
18
+ /**
19
+ * CommandTarget — the non-cortex `ExperimentTarget`. An optional one-shot build command,
20
+ * then grade a shell command per task via `commandRunner`. Nothing to serve, nothing to
21
+ * tear down — so `cortex autoresearch experiment` can run base-vs-candidate on any project
22
+ * (a library, CLI, test suite, backtest) through the same statistical gate as the harness.
23
+ */
24
+ export declare class CommandTarget implements ExperimentTarget {
25
+ private readonly cfg;
26
+ readonly kind = "command";
27
+ constructor(cfg: {
28
+ template: string;
29
+ buildCmd?: string;
30
+ acceptExitCodes?: number[];
31
+ timeoutMs?: number;
32
+ });
33
+ prepare(dir: string, opts: PrepareArmOptions): Promise<PreparedArm>;
34
+ }
35
+ //# sourceMappingURL=commandRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"commandRunner.d.ts","sourceRoot":"","sources":["../../../src/commands/autoresearch/commandRunner.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EAAE,aAAa,EAAoB,MAAM,oBAAoB,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAE5F,MAAM,WAAW,oBAAoB;IACnC,2EAA2E;IAC3E,GAAG,EAAE,MAAM,CAAC;IACZ;;kCAE8B;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,sEAAsE;IACtE,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gEAAgE;IAChE,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACjC;AAOD,wBAAgB,aAAa,CAAC,IAAI,EAAE,oBAAoB,GAAG,aAAa,CAoCvE;AAED;;;;;GAKG;AACH,qBAAa,aAAc,YAAW,gBAAgB;IAGlD,OAAO,CAAC,QAAQ,CAAC,GAAG;IAFtB,QAAQ,CAAC,IAAI,aAAa;gBAEP,GAAG,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE;IAGzG,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;CAe1E"}
@@ -0,0 +1,91 @@
1
+ /**
2
+ * CommandRunner — a `HarnessRunner` that grades a SHELL COMMAND instead of an LLM
3
+ * endpoint. This is the non-cortex experiment path: run `template` (with `{prompt}` /
4
+ * `{case}` substituted) per task in `cwd`, capture stdout, and hand it to the task's
5
+ * verifier (typically a `numeric` verifier that extracts a metric). It lets the
6
+ * auto-research loop measure a library / CLI / test suite / backtest — anything with a
7
+ * build + run + metric — through the same statistical gate as the cortex harness.
8
+ *
9
+ * Exit-code contract: stdout is graded only when the exit code is in `acceptExitCodes`
10
+ * (default `[0]`); otherwise `text=''`, so every verifier fails — a crashed run is not a
11
+ * valid measurement, so it fails the bench and seeds the backlog. stderr (and the exit
12
+ * code) are surfaced via `log`, never graded.
13
+ *
14
+ * The substituted `{prompt}` value is single-quote-escaped before it reaches the shell,
15
+ * so a task prompt cannot inject shell syntax. The `template` itself is operator-supplied
16
+ * (the experiment spec) and trusted.
17
+ */
18
+ import { spawn, spawnSync } from 'node:child_process';
19
+ /** POSIX single-quote escape: wrap in '…', and close/escape/reopen any embedded quote. */
20
+ function shQuote(s) {
21
+ return `'${s.replace(/'/g, `'\\''`)}'`;
22
+ }
23
+ export function commandRunner(opts) {
24
+ const accept = opts.acceptExitCodes ?? [0];
25
+ const timeoutMs = opts.timeoutMs ?? 120_000;
26
+ return {
27
+ run(prompt) {
28
+ const cmd = /\{prompt\}|\{case\}/.test(opts.template)
29
+ ? opts.template.replace(/\{prompt\}|\{case\}/g, shQuote(prompt))
30
+ : `${opts.template} ${shQuote(prompt)}`;
31
+ const start = Date.now();
32
+ return new Promise((resolve) => {
33
+ const proc = spawn('sh', ['-c', cmd], { cwd: opts.cwd, stdio: ['ignore', 'pipe', 'pipe'] });
34
+ let out = '';
35
+ let err = '';
36
+ let timedOut = false;
37
+ const timer = setTimeout(() => { timedOut = true; try {
38
+ proc.kill('SIGKILL');
39
+ }
40
+ catch { /* already gone */ } }, timeoutMs);
41
+ proc.stdout.on('data', (d) => { out += d.toString(); });
42
+ proc.stderr.on('data', (d) => { err += d.toString(); });
43
+ const done = (text, latencyMs) => ({ text, modelId: 'command', inputTokens: 0, outputTokens: 0, toolCallCount: 0, latencyMs });
44
+ proc.on('close', (code) => {
45
+ clearTimeout(timer);
46
+ const ok = !timedOut && code != null && accept.includes(code);
47
+ if (!ok) {
48
+ const reason = timedOut ? `timeout after ${timeoutMs}ms` : `exit ${code}`;
49
+ opts.log?.(`[command ${reason}] ${err.trim().slice(-300)}`);
50
+ }
51
+ resolve(done(ok ? out : '', Date.now() - start));
52
+ });
53
+ proc.on('error', (e) => {
54
+ clearTimeout(timer);
55
+ opts.log?.(`[command error] ${e.message}`);
56
+ resolve(done('', Date.now() - start));
57
+ });
58
+ });
59
+ },
60
+ };
61
+ }
62
+ /**
63
+ * CommandTarget — the non-cortex `ExperimentTarget`. An optional one-shot build command,
64
+ * then grade a shell command per task via `commandRunner`. Nothing to serve, nothing to
65
+ * tear down — so `cortex autoresearch experiment` can run base-vs-candidate on any project
66
+ * (a library, CLI, test suite, backtest) through the same statistical gate as the harness.
67
+ */
68
+ export class CommandTarget {
69
+ cfg;
70
+ kind = 'command';
71
+ constructor(cfg) {
72
+ this.cfg = cfg;
73
+ }
74
+ async prepare(dir, opts) {
75
+ if (opts.build && this.cfg.buildCmd) {
76
+ opts.log(`build: ${this.cfg.buildCmd} (cwd ${dir})`);
77
+ const b = spawnSync('sh', ['-c', this.cfg.buildCmd], { cwd: dir, stdio: ['ignore', 'ignore', 'inherit'] });
78
+ if (b.status !== 0)
79
+ throw new Error(`build command failed (exit ${b.status}) in ${dir}`);
80
+ }
81
+ const runner = commandRunner({
82
+ cwd: dir,
83
+ template: this.cfg.template,
84
+ acceptExitCodes: this.cfg.acceptExitCodes,
85
+ timeoutMs: this.cfg.timeoutMs,
86
+ log: opts.log,
87
+ });
88
+ return { runner, stop: () => { } };
89
+ }
90
+ }
91
+ //# sourceMappingURL=commandRunner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"commandRunner.js","sourceRoot":"","sources":["../../../src/commands/autoresearch/commandRunner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAmBtD,0FAA0F;AAC1F,SAAS,OAAO,CAAC,CAAS;IACxB,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAA0B;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC;IAC5C,OAAO;QACL,GAAG,CAAC,MAAc;YAChB,MAAM,GAAG,GAAG,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,sBAAsB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;gBAChE,CAAC,CAAC,GAAG,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACzB,OAAO,IAAI,OAAO,CAAmB,CAAC,OAAO,EAAE,EAAE;gBAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC5F,IAAI,GAAG,GAAG,EAAE,CAAC;gBACb,IAAI,GAAG,GAAG,EAAE,CAAC;gBACb,IAAI,QAAQ,GAAG,KAAK,CAAC;gBACrB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,QAAQ,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;oBAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAAC,CAAC;gBAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;gBAC3H,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxD,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxD,MAAM,IAAI,GAAG,CAAC,IAAY,EAAE,SAAiB,EAAoB,EAAE,CACjE,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC;gBAC/F,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,MAAM,EAAE,GAAG,CAAC,QAAQ,IAAI,IAAI,IAAI,IAAI,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;oBAC9D,IAAI,CAAC,EAAE,EAAE,CAAC;wBACR,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,SAAS,IAAI,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC;wBAC1E,IAAI,CAAC,GAAG,EAAE,CAAC,YAAY,MAAM,KAAK,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBAC9D,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC;gBACnD,CAAC,CAAC,CAAC;gBACH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE;oBACrB,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;oBAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC;gBACxC,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,OAAO,aAAa;IAGL;IAFV,IAAI,GAAG,SAAS,CAAC;IAC1B,YACmB,GAA4F;QAA5F,QAAG,GAAH,GAAG,CAAyF;IAC5G,CAAC;IAEJ,KAAK,CAAC,OAAO,CAAC,GAAW,EAAE,IAAuB;QAChD,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;YACpC,IAAI,CAAC,GAAG,CAAC,UAAU,IAAI,CAAC,GAAG,CAAC,QAAQ,UAAU,GAAG,GAAG,CAAC,CAAC;YACtD,MAAM,CAAC,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,CAAC,CAAC;YAC3G,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,MAAM,QAAQ,GAAG,EAAE,CAAC,CAAC;QAC3F,CAAC;QACD,MAAM,MAAM,GAAG,aAAa,CAAC;YAC3B,GAAG,EAAE,GAAG;YACR,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,QAAQ;YAC3B,eAAe,EAAE,IAAI,CAAC,GAAG,CAAC,eAAe;YACzC,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS;YAC7B,GAAG,EAAE,IAAI,CAAC,GAAG;SACd,CAAC,CAAC;QACH,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAA8B,CAAC,EAAE,CAAC;IAChE,CAAC;CACF"}
@@ -0,0 +1,18 @@
1
+ export interface AutoResearchEvaluateOptions {
2
+ experimentTag?: string;
3
+ base?: string;
4
+ candidate?: string;
5
+ branch?: string;
6
+ deficiencyId?: string;
7
+ benchmarkSource?: string;
8
+ modelId?: string;
9
+ nFamily?: string;
10
+ alpha?: string;
11
+ seed?: string;
12
+ epsilon?: string;
13
+ minRuns?: string;
14
+ verifyHoldout?: boolean;
15
+ json?: boolean;
16
+ }
17
+ export declare function autoResearchEvaluate(options: AutoResearchEvaluateOptions): Promise<void>;
18
+ //# sourceMappingURL=evaluate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluate.d.ts","sourceRoot":"","sources":["../../../src/commands/autoresearch/evaluate.ts"],"names":[],"mappings":"AA0BA,MAAM,WAAW,2BAA2B;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAQD,wBAAsB,oBAAoB,CAAC,OAAO,EAAE,2BAA2B,GAAG,OAAO,CAAC,IAAI,CAAC,CAoG9F"}
@@ -0,0 +1,117 @@
1
+ /**
2
+ * `cortex autoresearch evaluate` — headless invocation of the keep/discard gate.
3
+ *
4
+ * This is the entry point a swarm member (or the cortex-bench flow) calls AFTER
5
+ * it has recorded base-version and candidate-version benchmark runs to the
6
+ * matrix (router-matrix.jsonl) under the same experimentTag. It runs the full
7
+ * decision pipeline (regressionScan → Monte-Carlo gate → ledger.decide) and
8
+ * writes the audited verdict to `.cortex/experiments.jsonl`.
9
+ *
10
+ * The output JSONL is THE integration boundary: the nexus Layer-3 STDB module
11
+ * ingests it (header → `experiment`, results[] → `experiment_task_result`).
12
+ * Nothing downstream re-derives the statistics — the decision is final here.
13
+ *
14
+ * Overfitting guard: keep/discard reads split='train' records; `--verify-holdout`
15
+ * additionally runs the held-out gate (a candidate is only merge-eligible when
16
+ * kept-on-train AND verified-on-holdout).
17
+ */
18
+ import { ModelRouterMatrix, ExperimentLedger, evaluateAutoResearchExperiment, verifyOnHoldout, } from '@nexus-cortex/core';
19
+ import { ThemeManager } from '../../themes/ThemeManager.js';
20
+ import { findProjectRoot } from '../config/utils.js';
21
+ function numOrUndef(v) {
22
+ if (v === undefined)
23
+ return undefined;
24
+ const n = Number(v);
25
+ return Number.isFinite(n) ? n : undefined;
26
+ }
27
+ export async function autoResearchEvaluate(options) {
28
+ const theme = ThemeManager.getTheme();
29
+ const projectRoot = findProjectRoot();
30
+ const required = [
31
+ ['experimentTag', '--experiment-tag'],
32
+ ['base', '--base'],
33
+ ['candidate', '--candidate'],
34
+ ['branch', '--branch'],
35
+ ];
36
+ const missing = required.filter(([k]) => !options[k]).map(([, flag]) => flag);
37
+ if (missing.length > 0) {
38
+ console.error(theme.colors.error(`Error: missing required option(s): ${missing.join(', ')}`));
39
+ process.exit(1);
40
+ }
41
+ try {
42
+ const matrix = new ModelRouterMatrix(projectRoot);
43
+ const ledger = new ExperimentLedger(projectRoot);
44
+ const gate = {
45
+ alpha: numOrUndef(options.alpha),
46
+ seed: numOrUndef(options.seed),
47
+ minRunsPerArm: numOrUndef(options.minRuns),
48
+ };
49
+ const result = evaluateAutoResearchExperiment(matrix, ledger, {
50
+ experimentTag: options.experimentTag,
51
+ baseRef: options.base,
52
+ candidateRef: options.candidate,
53
+ branch: options.branch,
54
+ deficiencyId: options.deficiencyId,
55
+ benchmarkSource: options.benchmarkSource,
56
+ modelId: options.modelId,
57
+ nFamilyExperiments: numOrUndef(options.nFamily) ?? 1,
58
+ gate,
59
+ epsilon: numOrUndef(options.epsilon),
60
+ });
61
+ let holdout = null;
62
+ if (options.verifyHoldout) {
63
+ holdout = verifyOnHoldout(matrix, {
64
+ baseRef: options.base,
65
+ candidateRef: options.candidate,
66
+ benchmarkSource: options.benchmarkSource,
67
+ modelId: options.modelId,
68
+ nFamilyExperiments: numOrUndef(options.nFamily) ?? 1,
69
+ gate,
70
+ epsilon: numOrUndef(options.epsilon),
71
+ });
72
+ }
73
+ // Merge-eligibility: kept on train AND (if checked) verified on holdout.
74
+ const mergeEligible = result.verdict.decision === 'keep' &&
75
+ result.verdict.fwerAdjusted === true &&
76
+ (!options.verifyHoldout || holdout?.decision === 'keep');
77
+ if (options.json) {
78
+ console.log(JSON.stringify({
79
+ record: result.record,
80
+ verdict: result.verdict,
81
+ regressedTasks: result.regressedTasks,
82
+ holdoutVerdict: holdout,
83
+ mergeEligible,
84
+ }, null, 2));
85
+ return;
86
+ }
87
+ const v = result.verdict;
88
+ const decColor = v.decision === 'keep' ? theme.colors.success
89
+ : v.decision === 'discard' ? theme.colors.error
90
+ : theme.colors.muted;
91
+ console.log();
92
+ console.log(` ${theme.colors.highlight('Experiment')} ${options.experimentTag} (${options.base} → ${options.candidate})`);
93
+ console.log(` ${theme.colors.highlight('Decision')} ${decColor(v.decision.toUpperCase())}`);
94
+ console.log(` ${theme.colors.highlight('Effect')} ${v.effect >= 0 ? '+' : ''}${v.effect} (95% CI [${v.ciLow ?? '—'}, ${v.ciHigh ?? '—'}])`);
95
+ console.log(` ${theme.colors.highlight('p-value')} ${v.pValue ?? '—'} vs alpha_adj ${v.alphaAdjusted ?? '—'} (N=${numOrUndef(options.nFamily) ?? 1}, FWER ${v.fwerAdjusted ? 'on' : 'off'})`);
96
+ console.log(` ${theme.colors.highlight('Runs/Tasks')} ${v.nRuns} runs over ${v.nTasks} task(s)`);
97
+ if (result.regressedTasks.length > 0) {
98
+ console.log(` ${theme.colors.error('Regressions')} ${result.regressedTasks.length} task(s): ${result.regressedTasks.join(', ')}`);
99
+ }
100
+ if (options.verifyHoldout) {
101
+ const hd = holdout
102
+ ? `${holdout.decision.toUpperCase()} (effect ${holdout.effect >= 0 ? '+' : ''}${holdout.effect}, CI [${holdout.ciLow ?? '—'}, ${holdout.ciHigh ?? '—'}])`
103
+ : theme.colors.muted('no held-out evidence — unverifiable');
104
+ console.log(` ${theme.colors.highlight('Holdout')} ${hd}`);
105
+ }
106
+ console.log(` ${theme.colors.highlight('Mergeable')} ${mergeEligible ? theme.colors.success('YES') : theme.colors.muted('no')}`);
107
+ console.log(theme.colors.muted(` Recorded → ${projectRoot}/.cortex/experiments.jsonl`));
108
+ console.log();
109
+ console.log(theme.colors.muted(` ${v.reason}`));
110
+ console.log();
111
+ }
112
+ catch (error) {
113
+ console.error(theme.colors.error(`Error: ${error.message}`));
114
+ process.exit(1);
115
+ }
116
+ }
117
+ //# sourceMappingURL=evaluate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluate.js","sourceRoot":"","sources":["../../../src/commands/autoresearch/evaluate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,8BAA8B,EAC9B,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAmBrD,SAAS,UAAU,CAAC,CAAqB;IACvC,IAAI,CAAC,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IACtC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACpB,OAAO,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC5C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,OAAoC;IAC7E,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,eAAe,EAAE,CAAC;IAEtC,MAAM,QAAQ,GAAuD;QACnE,CAAC,eAAe,EAAE,kBAAkB,CAAC;QACrC,CAAC,MAAM,EAAE,QAAQ,CAAC;QAClB,CAAC,WAAW,EAAE,aAAa,CAAC;QAC5B,CAAC,QAAQ,EAAE,UAAU,CAAC;KACvB,CAAC;IACF,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;IAC9E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,gBAAgB,CAAC,WAAW,CAAC,CAAC;QAEjD,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC;YAChC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC;YAC9B,aAAa,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC;SAC3C,CAAC;QAEF,MAAM,MAAM,GAAG,8BAA8B,CAAC,MAAM,EAAE,MAAM,EAAE;YAC5D,aAAa,EAAE,OAAO,CAAC,aAAc;YACrC,OAAO,EAAE,OAAO,CAAC,IAAK;YACtB,YAAY,EAAE,OAAO,CAAC,SAAU;YAChC,MAAM,EAAE,OAAO,CAAC,MAAO;YACvB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,kBAAkB,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC;YACpD,IAAI;YACJ,OAAO,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC;SACrC,CAAC,CAAC;QAEH,IAAI,OAAO,GAAG,IAAI,CAAC;QACnB,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;YAC1B,OAAO,GAAG,eAAe,CAAC,MAAM,EAAE;gBAChC,OAAO,EAAE,OAAO,CAAC,IAAK;gBACtB,YAAY,EAAE,OAAO,CAAC,SAAU;gBAChC,eAAe,EAAE,OAAO,CAAC,eAAe;gBACxC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,kBAAkB,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC;gBACpD,IAAI;gBACJ,OAAO,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC;aACrC,CAAC,CAAC;QACL,CAAC;QAED,yEAAyE;QACzE,MAAM,aAAa,GACjB,MAAM,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;YAClC,MAAM,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI;YACpC,CAAC,CAAC,OAAO,CAAC,aAAa,IAAI,OAAO,EAAE,QAAQ,KAAK,MAAM,CAAC,CAAC;QAE3D,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC;gBACzB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,cAAc,EAAE,OAAO;gBACvB,aAAa;aACd,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACb,OAAO;QACT,CAAC;QAED,MAAM,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC;QACzB,MAAM,QAAQ,GACZ,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO;YAC5C,CAAC,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK;gBAC/C,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC;QAEvB,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,OAAO,CAAC,aAAa,MAAM,OAAO,CAAC,IAAI,MAAM,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QAC7H,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,QAAQ,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC;QAChG,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,KAAK,IAAI,GAAG,KAAK,CAAC,CAAC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC;QACnJ,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,MAAM,IAAI,GAAG,kBAAkB,CAAC,CAAC,aAAa,IAAI,GAAG,QAAQ,UAAU,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC;QACrM,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,KAAK,cAAc,CAAC,CAAC,MAAM,UAAU,CAAC,CAAC;QACnG,IAAI,MAAM,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,KAAK,MAAM,CAAC,cAAc,CAAC,MAAM,aAAa,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrI,CAAC;QACD,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;YAC1B,MAAM,EAAE,GAAG,OAAO;gBAChB,CAAC,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,WAAW,EAAE,YAAY,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,MAAM,SAAS,OAAO,CAAC,KAAK,IAAI,GAAG,KAAK,OAAO,CAAC,MAAM,IAAI,GAAG,IAAI;gBACzJ,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAC;YAC9D,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAClE,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,OAAO,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,eAAe,WAAW,4BAA4B,CAAC,CAAC,CAAC;QACxF,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,EAAE,CAAC;IAEhB,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,38 @@
1
+ export interface AutoResearchExperimentOptions {
2
+ experimentTag?: string;
3
+ candidateDir?: string;
4
+ baseDir?: string;
5
+ taskSet?: string;
6
+ holdoutSet?: string;
7
+ branch?: string;
8
+ nFamily?: string;
9
+ runs?: string;
10
+ model?: string;
11
+ deficiencyId?: string;
12
+ benchmarkSource?: string;
13
+ baseRef?: string;
14
+ candidateRef?: string;
15
+ buildBase?: boolean;
16
+ noBuild?: boolean;
17
+ basePort?: string;
18
+ candidatePort?: string;
19
+ cortexDir?: string;
20
+ seed?: string;
21
+ alpha?: string;
22
+ epsilon?: string;
23
+ minRuns?: string;
24
+ json?: boolean;
25
+ /** Non-cortex target: grade a shell command per task (with --build-cmd/--accept-exit)
26
+ * instead of building+serving a cortex server. Both arms use the same command; the
27
+ * base/candidate difference is the worktree the command runs in. */
28
+ runCmd?: string;
29
+ buildCmd?: string;
30
+ acceptExit?: string;
31
+ /** Effectiveness-arm labels recorded on both base + candidate records (shared dispatch
32
+ * config; the experiment isolates the harness-version variable). Fall back to the
33
+ * CORTEX_SUBAGENT_TEMPERATURE / CORTEX_ARM_STRATEGY env stamp when omitted. */
34
+ temperature?: string;
35
+ strategy?: string;
36
+ }
37
+ export declare function autoResearchExperiment(options: AutoResearchExperimentOptions): Promise<void>;
38
+ //# sourceMappingURL=experiment.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"experiment.d.ts","sourceRoot":"","sources":["../../../src/commands/autoresearch/experiment.ts"],"names":[],"mappings":"AA+BA,MAAM,WAAW,6BAA6B;IAC5C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf;;yEAEqE;IACrE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;oFAEgF;IAChF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAkBD,wBAAsB,sBAAsB,CAAC,OAAO,EAAE,6BAA6B,GAAG,OAAO,CAAC,IAAI,CAAC,CAwHlG"}
@@ -0,0 +1,168 @@
1
+ /**
2
+ * `cortex autoresearch experiment` — the full single-experiment lifecycle.
3
+ *
4
+ * Builds the candidate (and optionally base) checkout, serves each on its own
5
+ * isolated port, benches both arms (train + optional holdout) into ONE shared
6
+ * `.cortex` store, runs the keep/discard gate + held-out verification, and emits
7
+ * the audited verdict + the JSONL artifact a downstream ingest consumes.
8
+ *
9
+ * This is the piece that owns the "two builds, not one relabel" correctness: each
10
+ * arm is served by a server BUILT FROM ITS OWN CODE, so the comparison is real.
11
+ * Servers run with MODEL_ROUTER_RECORD off; only the bench's graded records land
12
+ * in the shared store. Teardown is guaranteed in `finally`.
13
+ *
14
+ * Scope: this is a HARNESS-CODE experiment (base build vs candidate build,
15
+ * compared by git SHA). Model/config experiments (same code, different --model)
16
+ * use the lower-level `bench` + `evaluate` directly.
17
+ */
18
+ import { readdirSync, readFileSync, statSync } from 'node:fs';
19
+ import { join, basename } from 'node:path';
20
+ import { ModelRouterMatrix, ExperimentLedger, runExperiment, parseTaskSet, } from '@nexus-cortex/core';
21
+ import { ThemeManager } from '../../themes/ThemeManager.js';
22
+ import { findProjectRoot } from '../config/utils.js';
23
+ import { freePort, gitShortSha, CortexTarget } from './harnessProcess.js';
24
+ import { CommandTarget } from './commandRunner.js';
25
+ function num(v) {
26
+ if (v === undefined)
27
+ return undefined;
28
+ const n = Number(v);
29
+ return Number.isFinite(n) ? n : undefined;
30
+ }
31
+ function loadTasks(p) {
32
+ const st = statSync(p);
33
+ const files = st.isDirectory()
34
+ ? readdirSync(p).filter(f => f.endsWith('.json')).map(f => join(p, f))
35
+ : [p];
36
+ const out = [];
37
+ for (const f of files)
38
+ out.push(...parseTaskSet(JSON.parse(readFileSync(f, 'utf8')), f));
39
+ return out;
40
+ }
41
+ export async function autoResearchExperiment(options) {
42
+ const theme = ThemeManager.getTheme();
43
+ const json = !!options.json;
44
+ const log = (m) => { if (!json)
45
+ console.log(theme.colors.muted(` ${m}`)); };
46
+ const projectRoot = findProjectRoot();
47
+ const candidateDir = options.candidateDir;
48
+ const baseDir = options.baseDir ?? projectRoot;
49
+ const cortexDir = options.cortexDir ?? projectRoot;
50
+ const missing = [];
51
+ if (!options.experimentTag)
52
+ missing.push('--experiment-tag');
53
+ if (!candidateDir)
54
+ missing.push('--candidate-dir');
55
+ if (!options.taskSet)
56
+ missing.push('--task-set');
57
+ if (missing.length) {
58
+ console.error(theme.colors.error(`Error: missing ${missing.join(', ')}`));
59
+ process.exit(1);
60
+ }
61
+ const arms = [];
62
+ try {
63
+ const trainTasks = loadTasks(options.taskSet);
64
+ const holdoutTasks = options.holdoutSet ? loadTasks(options.holdoutSet) : undefined;
65
+ if (trainTasks.length === 0) {
66
+ console.error(theme.colors.error('Error: empty --task-set'));
67
+ process.exit(1);
68
+ }
69
+ // Distinct arm labels: git SHA when the dir is a checkout, else its basename.
70
+ const refFor = (dir, override) => {
71
+ if (override)
72
+ return override;
73
+ const sha = gitShortSha(dir);
74
+ return sha !== 'unknown' ? sha : basename(dir);
75
+ };
76
+ const baseRef = refFor(baseDir, options.baseRef);
77
+ const candidateRef = refFor(candidateDir, options.candidateRef);
78
+ if (baseRef === candidateRef) {
79
+ console.error(theme.colors.error(`Error: base and candidate resolve to the same ref (${baseRef}) — an experiment needs two distinct arms. Pass --base-ref/--candidate-ref to label, or use a real candidate worktree.`));
80
+ process.exit(1);
81
+ }
82
+ const model = options.model ?? process.env.DEFAULT_MODEL_ID;
83
+ // Select the target: a shell-command target (any project) or the default cortex server.
84
+ const target = options.runCmd
85
+ ? new CommandTarget({
86
+ template: options.runCmd,
87
+ buildCmd: options.buildCmd,
88
+ acceptExitCodes: (options.acceptExit ?? '0').split(',').map(s => Number(s.trim())).filter(n => Number.isFinite(n)),
89
+ })
90
+ : new CortexTarget();
91
+ if (!json) {
92
+ console.log();
93
+ console.log(` ${theme.colors.highlight('Experiment')} ${options.experimentTag} ${baseRef} → ${candidateRef} [${target.kind}]`);
94
+ }
95
+ // Prepare each arm (build if asked + start its runner). Candidate builds unless
96
+ // --no-build; base builds only with --build-base. Each arm gets its own reserved port
97
+ // (server targets bind it; command targets ignore it).
98
+ const basePort = num(options.basePort) ?? await freePort();
99
+ const candPort = num(options.candidatePort) ?? await freePort();
100
+ const baseArm = await target.prepare(baseDir, { port: basePort, model, build: !options.noBuild && !!options.buildBase, log });
101
+ arms.push(baseArm);
102
+ const candArm = await target.prepare(candidateDir, { port: candPort, model, build: !options.noBuild, log });
103
+ arms.push(candArm);
104
+ // Bench both arms + gate (shared store at cortexDir/.cortex).
105
+ const matrix = new ModelRouterMatrix(cortexDir);
106
+ const ledger = new ExperimentLedger(cortexDir);
107
+ const result = await runExperiment(matrix, ledger, {
108
+ baseRunner: baseArm.runner,
109
+ candidateRunner: candArm.runner,
110
+ }, {
111
+ experimentTag: options.experimentTag,
112
+ baseRef, candidateRef,
113
+ branch: options.branch ?? candidateRef,
114
+ trainTasks, holdoutTasks,
115
+ runs: num(options.runs),
116
+ nFamily: num(options.nFamily) ?? 1,
117
+ modelId: model,
118
+ temperature: num(options.temperature),
119
+ strategy: options.strategy,
120
+ deficiencyId: options.deficiencyId,
121
+ benchmarkSource: options.benchmarkSource,
122
+ gate: { alpha: num(options.alpha), seed: num(options.seed), minRunsPerArm: num(options.minRuns) },
123
+ epsilon: num(options.epsilon),
124
+ onProgress: log,
125
+ });
126
+ const out = {
127
+ experimentTag: options.experimentTag,
128
+ baseRef, candidateRef, branch: options.branch ?? candidateRef,
129
+ verdict: result.verdict,
130
+ holdoutVerdict: result.holdoutVerdict,
131
+ regressedTasks: result.regressedTasks,
132
+ mergeEligible: result.mergeEligible,
133
+ benchSummaries: result.benchSummaries,
134
+ cortexDir,
135
+ jsonlPaths: {
136
+ matrix: join(cortexDir, '.cortex', 'router-matrix.jsonl'),
137
+ experiments: join(cortexDir, '.cortex', 'experiments.jsonl'),
138
+ backlog: join(cortexDir, '.cortex', 'research-backlog.jsonl'),
139
+ },
140
+ };
141
+ if (json) {
142
+ console.log(JSON.stringify(out, null, 2));
143
+ return;
144
+ }
145
+ const v = result.verdict;
146
+ const dc = v.decision === 'keep' ? theme.colors.success : v.decision === 'discard' ? theme.colors.error : theme.colors.muted;
147
+ console.log();
148
+ console.log(` ${theme.colors.highlight('Decision')} ${dc(v.decision.toUpperCase())} effect ${v.effect >= 0 ? '+' : ''}${v.effect} CI [${v.ciLow ?? '—'}, ${v.ciHigh ?? '—'}] p=${v.pValue ?? '—'} vs ${v.alphaAdjusted ?? '—'} (N=${num(options.nFamily) ?? 1})`);
149
+ console.log(` ${theme.colors.highlight('Holdout')} ${result.holdoutVerdict ? result.holdoutVerdict.decision.toUpperCase() + ` (effect ${result.holdoutVerdict.effect >= 0 ? '+' : ''}${result.holdoutVerdict.effect})` : theme.colors.muted('not provided → not verifiable')}`);
150
+ if (result.regressedTasks.length)
151
+ console.log(` ${theme.colors.error('Regressions')} ${result.regressedTasks.length}: ${result.regressedTasks.join(', ')}`);
152
+ console.log(` ${theme.colors.highlight('Mergeable')} ${result.mergeEligible ? theme.colors.success('YES') : theme.colors.muted('no')}`);
153
+ console.log(theme.colors.muted(` artifact → ${cortexDir}/.cortex/{router-matrix,experiments,research-backlog}.jsonl`));
154
+ console.log();
155
+ }
156
+ catch (error) {
157
+ if (json)
158
+ console.log(JSON.stringify({ error: error.message }, null, 2));
159
+ else
160
+ console.error(theme.colors.error(`Error: ${error.message}`));
161
+ process.exitCode = 1;
162
+ }
163
+ finally {
164
+ for (const a of arms)
165
+ a.stop();
166
+ }
167
+ }
168
+ //# sourceMappingURL=experiment.js.map