@nexus-cortex/cli 4.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. package/.cortex/agents/AGENT_PROFILE_GUIDE.md +307 -0
  2. package/.cortex/agents/README.md +268 -0
  3. package/.cortex/agents/a-frontend-landing-page-designer.md +41 -0
  4. package/.cortex/agents/autoresearch-agent.md +49 -0
  5. package/.cortex/agents/code-reviewer.md +63 -0
  6. package/.cortex/agents/context-research.md +26 -0
  7. package/.cortex/agents/doc-writer.md +92 -0
  8. package/.cortex/agents/explore.md +63 -0
  9. package/.cortex/agents/new-model-api-integrator-analyst.md +41 -0
  10. package/.cortex/agents/plan.md +109 -0
  11. package/.cortex/agents/pr-architecture-reviewer.md +77 -0
  12. package/.cortex/agents/pr-code-quality.md +78 -0
  13. package/.cortex/agents/pr-implementer.md +50 -0
  14. package/.cortex/agents/pr-security-auditor.md +62 -0
  15. package/.cortex/agents/pr-test-writer.md +67 -0
  16. package/.cortex/agents/refactor.md +118 -0
  17. package/.cortex/agents/test-writer.md +72 -0
  18. package/.cortex/agents/web-researcher.md +72 -0
  19. package/.cortex/bench/tasks/sample-tasks.json +20 -0
  20. package/.cortex/commands/compare.md +14 -0
  21. package/.cortex/commands/deps.md +16 -0
  22. package/.cortex/commands/diff.md +14 -0
  23. package/.cortex/commands/explain.md +16 -0
  24. package/.cortex/commands/find-bug.md +13 -0
  25. package/.cortex/commands/profile.md +15 -0
  26. package/.cortex/commands/review.md +18 -0
  27. package/.cortex/commands/search.md +16 -0
  28. package/.cortex/commands/test.md +15 -0
  29. package/.cortex/permissions.dev.json +20 -0
  30. package/.cortex/permissions.example.json +71 -0
  31. package/.cortex/permissions.prod.json +63 -0
  32. package/.cortex/permissions.test.json +19 -0
  33. package/.cortex/skills/autoresearch/SKILL.md +77 -0
  34. package/.cortex/skills/autoresearch/personas/README.md +45 -0
  35. package/.cortex/skills/autoresearch/personas/aggressive-refactor.md +25 -0
  36. package/.cortex/skills/autoresearch/personas/creative.md +29 -0
  37. package/.cortex/skills/autoresearch/personas/perf-hunter.md +27 -0
  38. package/.cortex/skills/autoresearch/personas/precise.md +23 -0
  39. package/.cortex/skills/autoresearch/personas/root-cause.md +26 -0
  40. package/.cortex/skills/autoresearch/personas/security-auditor.md +29 -0
  41. package/.cortex/skills/autoresearch/personas/skeptic-reviewer.md +31 -0
  42. package/.cortex/skills/autoresearch/personas/test-first.md +25 -0
  43. package/.cortex/skills/best-of-n/SKILL.md +76 -0
  44. package/.cortex/skills/cortex/SKILL.md +834 -0
  45. package/.cortex/skills/cortex-bench/SKILL.md +354 -0
  46. package/.cortex/skills/docx/SKILL.md +83 -0
  47. package/.cortex/skills/pdf-documents/SKILL.md +297 -0
  48. package/.cortex/skills/pdf-documents/sections/01-image-acquisition.md +132 -0
  49. package/.cortex/skills/pdf-documents/sections/02-ai-image-generation.md +274 -0
  50. package/.cortex/skills/pdf-documents/sections/03-paper-sizes.md +89 -0
  51. package/.cortex/skills/pdf-documents/sections/04-design-system.md +549 -0
  52. package/.cortex/skills/pdf-documents/sections/05-css-print-rules.md +135 -0
  53. package/.cortex/skills/pdf-documents/sections/06-svg-charts.md +100 -0
  54. package/.cortex/skills/pdf-documents/sections/07-templates.md +224 -0
  55. package/.cortex/skills/pdf-documents/sections/08-scaled-output.md +164 -0
  56. package/.cortex/skills/pdf-documents/sections/09-preview-qa.md +66 -0
  57. package/.cortex/skills/pdf-documents/sections/10-reading-pdfs.md +499 -0
  58. package/.cortex/skills/pdf-documents/sections/11-form-filling.md +241 -0
  59. package/.cortex/skills/pptx/SKILL.md +90 -0
  60. package/.cortex/skills/resume-analyst/SKILL.md +373 -0
  61. package/.cortex/skills/verify-work/SKILL.md +74 -0
  62. package/.cortex/skills/xlsx/SKILL.md +101 -0
  63. package/.cortex/system-messages/messages/WORK_QUALITY.md +159 -0
  64. package/.cortex/system-messages/registry.json +18 -0
  65. package/LICENSE +202 -0
  66. package/NOTICE +2 -0
  67. package/README.md +13 -0
  68. package/bin/cortex.js +548 -0
  69. package/dist/agent-mode.d.ts +21 -0
  70. package/dist/agent-mode.d.ts.map +1 -0
  71. package/dist/agent-mode.js +511 -0
  72. package/dist/agent-mode.js.map +1 -0
  73. package/dist/client/CortexClient.d.ts +84 -0
  74. package/dist/client/CortexClient.d.ts.map +1 -0
  75. package/dist/client/CortexClient.js +163 -0
  76. package/dist/client/CortexClient.js.map +1 -0
  77. package/dist/commands/artifact/list.d.ts +15 -0
  78. package/dist/commands/artifact/list.d.ts.map +1 -0
  79. package/dist/commands/artifact/list.js +89 -0
  80. package/dist/commands/artifact/list.js.map +1 -0
  81. package/dist/commands/artifact/restart.d.ts +13 -0
  82. package/dist/commands/artifact/restart.d.ts.map +1 -0
  83. package/dist/commands/artifact/restart.js +56 -0
  84. package/dist/commands/artifact/restart.js.map +1 -0
  85. package/dist/commands/artifact/status.d.ts +13 -0
  86. package/dist/commands/artifact/status.d.ts.map +1 -0
  87. package/dist/commands/artifact/status.js +100 -0
  88. package/dist/commands/artifact/status.js.map +1 -0
  89. package/dist/commands/artifact/stop.d.ts +13 -0
  90. package/dist/commands/artifact/stop.d.ts.map +1 -0
  91. package/dist/commands/artifact/stop.js +50 -0
  92. package/dist/commands/artifact/stop.js.map +1 -0
  93. package/dist/commands/autoresearch/bench.d.ts +32 -0
  94. package/dist/commands/autoresearch/bench.d.ts.map +1 -0
  95. package/dist/commands/autoresearch/bench.js +123 -0
  96. package/dist/commands/autoresearch/bench.js.map +1 -0
  97. package/dist/commands/autoresearch/commandRunner.d.ts +35 -0
  98. package/dist/commands/autoresearch/commandRunner.d.ts.map +1 -0
  99. package/dist/commands/autoresearch/commandRunner.js +91 -0
  100. package/dist/commands/autoresearch/commandRunner.js.map +1 -0
  101. package/dist/commands/autoresearch/evaluate.d.ts +18 -0
  102. package/dist/commands/autoresearch/evaluate.d.ts.map +1 -0
  103. package/dist/commands/autoresearch/evaluate.js +117 -0
  104. package/dist/commands/autoresearch/evaluate.js.map +1 -0
  105. package/dist/commands/autoresearch/experiment.d.ts +38 -0
  106. package/dist/commands/autoresearch/experiment.d.ts.map +1 -0
  107. package/dist/commands/autoresearch/experiment.js +168 -0
  108. package/dist/commands/autoresearch/experiment.js.map +1 -0
  109. package/dist/commands/autoresearch/fix.d.ts +10 -0
  110. package/dist/commands/autoresearch/fix.d.ts.map +1 -0
  111. package/dist/commands/autoresearch/fix.js +86 -0
  112. package/dist/commands/autoresearch/fix.js.map +1 -0
  113. package/dist/commands/autoresearch/harnessProcess.d.ts +48 -0
  114. package/dist/commands/autoresearch/harnessProcess.d.ts.map +1 -0
  115. package/dist/commands/autoresearch/harnessProcess.js +140 -0
  116. package/dist/commands/autoresearch/harnessProcess.js.map +1 -0
  117. package/dist/commands/autoresearch/list.d.ts +6 -0
  118. package/dist/commands/autoresearch/list.d.ts.map +1 -0
  119. package/dist/commands/autoresearch/list.js +38 -0
  120. package/dist/commands/autoresearch/list.js.map +1 -0
  121. package/dist/commands/autoresearch/loop.d.ts +26 -0
  122. package/dist/commands/autoresearch/loop.d.ts.map +1 -0
  123. package/dist/commands/autoresearch/loop.js +242 -0
  124. package/dist/commands/autoresearch/loop.js.map +1 -0
  125. package/dist/commands/cache/metrics.d.ts +13 -0
  126. package/dist/commands/cache/metrics.d.ts.map +1 -0
  127. package/dist/commands/cache/metrics.js +77 -0
  128. package/dist/commands/cache/metrics.js.map +1 -0
  129. package/dist/commands/chat/AgenticChat.d.ts +39 -0
  130. package/dist/commands/chat/AgenticChat.d.ts.map +1 -0
  131. package/dist/commands/chat/AgenticChat.js +201 -0
  132. package/dist/commands/chat/AgenticChat.js.map +1 -0
  133. package/dist/commands/chat/renderers/CodeRenderer.d.ts +36 -0
  134. package/dist/commands/chat/renderers/CodeRenderer.d.ts.map +1 -0
  135. package/dist/commands/chat/renderers/CodeRenderer.js +85 -0
  136. package/dist/commands/chat/renderers/CodeRenderer.js.map +1 -0
  137. package/dist/commands/chat/renderers/ToolRenderer.d.ts +30 -0
  138. package/dist/commands/chat/renderers/ToolRenderer.d.ts.map +1 -0
  139. package/dist/commands/chat/renderers/ToolRenderer.js +93 -0
  140. package/dist/commands/chat/renderers/ToolRenderer.js.map +1 -0
  141. package/dist/commands/chat/single-message.d.ts +15 -0
  142. package/dist/commands/chat/single-message.d.ts.map +1 -0
  143. package/dist/commands/chat/single-message.js +85 -0
  144. package/dist/commands/chat/single-message.js.map +1 -0
  145. package/dist/commands/config/categories.d.ts +8 -0
  146. package/dist/commands/config/categories.d.ts.map +1 -0
  147. package/dist/commands/config/categories.js +75 -0
  148. package/dist/commands/config/categories.js.map +1 -0
  149. package/dist/commands/config/category.d.ts +8 -0
  150. package/dist/commands/config/category.d.ts.map +1 -0
  151. package/dist/commands/config/category.js +81 -0
  152. package/dist/commands/config/category.js.map +1 -0
  153. package/dist/commands/config/get.d.ts +9 -0
  154. package/dist/commands/config/get.d.ts.map +1 -0
  155. package/dist/commands/config/get.js +98 -0
  156. package/dist/commands/config/get.js.map +1 -0
  157. package/dist/commands/config/reset.d.ts +6 -0
  158. package/dist/commands/config/reset.d.ts.map +1 -0
  159. package/dist/commands/config/reset.js +68 -0
  160. package/dist/commands/config/reset.js.map +1 -0
  161. package/dist/commands/config/set.d.ts +6 -0
  162. package/dist/commands/config/set.d.ts.map +1 -0
  163. package/dist/commands/config/set.js +60 -0
  164. package/dist/commands/config/set.js.map +1 -0
  165. package/dist/commands/config/utils.d.ts +14 -0
  166. package/dist/commands/config/utils.d.ts.map +1 -0
  167. package/dist/commands/config/utils.js +54 -0
  168. package/dist/commands/config/utils.js.map +1 -0
  169. package/dist/commands/context/boundaries.d.ts +13 -0
  170. package/dist/commands/context/boundaries.d.ts.map +1 -0
  171. package/dist/commands/context/boundaries.js +45 -0
  172. package/dist/commands/context/boundaries.js.map +1 -0
  173. package/dist/commands/context/compact.d.ts +13 -0
  174. package/dist/commands/context/compact.d.ts.map +1 -0
  175. package/dist/commands/context/compact.js +41 -0
  176. package/dist/commands/context/compact.js.map +1 -0
  177. package/dist/commands/context/savings.d.ts +13 -0
  178. package/dist/commands/context/savings.d.ts.map +1 -0
  179. package/dist/commands/context/savings.js +49 -0
  180. package/dist/commands/context/savings.js.map +1 -0
  181. package/dist/commands/context/status.d.ts +13 -0
  182. package/dist/commands/context/status.d.ts.map +1 -0
  183. package/dist/commands/context/status.js +52 -0
  184. package/dist/commands/context/status.js.map +1 -0
  185. package/dist/commands/context/strategy.d.ts +13 -0
  186. package/dist/commands/context/strategy.d.ts.map +1 -0
  187. package/dist/commands/context/strategy.js +66 -0
  188. package/dist/commands/context/strategy.js.map +1 -0
  189. package/dist/commands/mcp/disable.d.ts +5 -0
  190. package/dist/commands/mcp/disable.d.ts.map +1 -0
  191. package/dist/commands/mcp/disable.js +26 -0
  192. package/dist/commands/mcp/disable.js.map +1 -0
  193. package/dist/commands/mcp/edit.d.ts +9 -0
  194. package/dist/commands/mcp/edit.d.ts.map +1 -0
  195. package/dist/commands/mcp/edit.js +62 -0
  196. package/dist/commands/mcp/edit.js.map +1 -0
  197. package/dist/commands/mcp/enable.d.ts +5 -0
  198. package/dist/commands/mcp/enable.d.ts.map +1 -0
  199. package/dist/commands/mcp/enable.js +27 -0
  200. package/dist/commands/mcp/enable.js.map +1 -0
  201. package/dist/commands/mcp/init.d.ts +9 -0
  202. package/dist/commands/mcp/init.d.ts.map +1 -0
  203. package/dist/commands/mcp/init.js +97 -0
  204. package/dist/commands/mcp/init.js.map +1 -0
  205. package/dist/commands/mcp/list.d.ts +6 -0
  206. package/dist/commands/mcp/list.d.ts.map +1 -0
  207. package/dist/commands/mcp/list.js +56 -0
  208. package/dist/commands/mcp/list.js.map +1 -0
  209. package/dist/commands/mcp/server.d.ts +6 -0
  210. package/dist/commands/mcp/server.d.ts.map +1 -0
  211. package/dist/commands/mcp/server.js +44 -0
  212. package/dist/commands/mcp/server.js.map +1 -0
  213. package/dist/commands/mcp/status.d.ts +6 -0
  214. package/dist/commands/mcp/status.d.ts.map +1 -0
  215. package/dist/commands/mcp/status.js +43 -0
  216. package/dist/commands/mcp/status.js.map +1 -0
  217. package/dist/commands/mcp/tools.d.ts +7 -0
  218. package/dist/commands/mcp/tools.d.ts.map +1 -0
  219. package/dist/commands/mcp/tools.js +82 -0
  220. package/dist/commands/mcp/tools.js.map +1 -0
  221. package/dist/commands/mcp/validate.d.ts +8 -0
  222. package/dist/commands/mcp/validate.d.ts.map +1 -0
  223. package/dist/commands/mcp/validate.js +121 -0
  224. package/dist/commands/mcp/validate.js.map +1 -0
  225. package/dist/commands/middleware/config.d.ts +13 -0
  226. package/dist/commands/middleware/config.d.ts.map +1 -0
  227. package/dist/commands/middleware/config.js +87 -0
  228. package/dist/commands/middleware/config.js.map +1 -0
  229. package/dist/commands/middleware/disable.d.ts +13 -0
  230. package/dist/commands/middleware/disable.d.ts.map +1 -0
  231. package/dist/commands/middleware/disable.js +50 -0
  232. package/dist/commands/middleware/disable.js.map +1 -0
  233. package/dist/commands/middleware/enable.d.ts +13 -0
  234. package/dist/commands/middleware/enable.d.ts.map +1 -0
  235. package/dist/commands/middleware/enable.js +50 -0
  236. package/dist/commands/middleware/enable.js.map +1 -0
  237. package/dist/commands/middleware/list.d.ts +13 -0
  238. package/dist/commands/middleware/list.d.ts.map +1 -0
  239. package/dist/commands/middleware/list.js +64 -0
  240. package/dist/commands/middleware/list.js.map +1 -0
  241. package/dist/commands/middleware/status.d.ts +13 -0
  242. package/dist/commands/middleware/status.d.ts.map +1 -0
  243. package/dist/commands/middleware/status.js +80 -0
  244. package/dist/commands/middleware/status.js.map +1 -0
  245. package/dist/commands/models/compare.d.ts +9 -0
  246. package/dist/commands/models/compare.d.ts.map +1 -0
  247. package/dist/commands/models/compare.js +76 -0
  248. package/dist/commands/models/compare.js.map +1 -0
  249. package/dist/commands/models/cost.d.ts +9 -0
  250. package/dist/commands/models/cost.d.ts.map +1 -0
  251. package/dist/commands/models/cost.js +64 -0
  252. package/dist/commands/models/cost.js.map +1 -0
  253. package/dist/commands/models/info.d.ts +9 -0
  254. package/dist/commands/models/info.d.ts.map +1 -0
  255. package/dist/commands/models/info.js +61 -0
  256. package/dist/commands/models/info.js.map +1 -0
  257. package/dist/commands/models/list.d.ts +6 -0
  258. package/dist/commands/models/list.d.ts.map +1 -0
  259. package/dist/commands/models/list.js +66 -0
  260. package/dist/commands/models/list.js.map +1 -0
  261. package/dist/commands/models/providers.d.ts +13 -0
  262. package/dist/commands/models/providers.d.ts.map +1 -0
  263. package/dist/commands/models/providers.js +45 -0
  264. package/dist/commands/models/providers.js.map +1 -0
  265. package/dist/commands/models/search.d.ts +10 -0
  266. package/dist/commands/models/search.d.ts.map +1 -0
  267. package/dist/commands/models/search.js +56 -0
  268. package/dist/commands/models/search.js.map +1 -0
  269. package/dist/commands/models/switch.d.ts +14 -0
  270. package/dist/commands/models/switch.d.ts.map +1 -0
  271. package/dist/commands/models/switch.js +67 -0
  272. package/dist/commands/models/switch.js.map +1 -0
  273. package/dist/commands/permissions/auto-approve.d.ts +13 -0
  274. package/dist/commands/permissions/auto-approve.d.ts.map +1 -0
  275. package/dist/commands/permissions/auto-approve.js +53 -0
  276. package/dist/commands/permissions/auto-approve.js.map +1 -0
  277. package/dist/commands/permissions/grant.d.ts +13 -0
  278. package/dist/commands/permissions/grant.d.ts.map +1 -0
  279. package/dist/commands/permissions/grant.js +46 -0
  280. package/dist/commands/permissions/grant.js.map +1 -0
  281. package/dist/commands/permissions/mode.d.ts +12 -0
  282. package/dist/commands/permissions/mode.d.ts.map +1 -0
  283. package/dist/commands/permissions/mode.js +61 -0
  284. package/dist/commands/permissions/mode.js.map +1 -0
  285. package/dist/commands/permissions/policies.d.ts +13 -0
  286. package/dist/commands/permissions/policies.d.ts.map +1 -0
  287. package/dist/commands/permissions/policies.js +47 -0
  288. package/dist/commands/permissions/policies.js.map +1 -0
  289. package/dist/commands/permissions/revoke.d.ts +13 -0
  290. package/dist/commands/permissions/revoke.d.ts.map +1 -0
  291. package/dist/commands/permissions/revoke.js +46 -0
  292. package/dist/commands/permissions/revoke.js.map +1 -0
  293. package/dist/commands/permissions/set.d.ts +13 -0
  294. package/dist/commands/permissions/set.d.ts.map +1 -0
  295. package/dist/commands/permissions/set.js +57 -0
  296. package/dist/commands/permissions/set.js.map +1 -0
  297. package/dist/commands/permissions/tools.d.ts +13 -0
  298. package/dist/commands/permissions/tools.d.ts.map +1 -0
  299. package/dist/commands/permissions/tools.js +50 -0
  300. package/dist/commands/permissions/tools.js.map +1 -0
  301. package/dist/commands/server/start.d.ts +11 -0
  302. package/dist/commands/server/start.d.ts.map +1 -0
  303. package/dist/commands/server/start.js +58 -0
  304. package/dist/commands/server/start.js.map +1 -0
  305. package/dist/commands/session/checkpoints.d.ts +6 -0
  306. package/dist/commands/session/checkpoints.d.ts.map +1 -0
  307. package/dist/commands/session/checkpoints.js +41 -0
  308. package/dist/commands/session/checkpoints.js.map +1 -0
  309. package/dist/commands/session/compact.d.ts +13 -0
  310. package/dist/commands/session/compact.d.ts.map +1 -0
  311. package/dist/commands/session/compact.js +56 -0
  312. package/dist/commands/session/compact.js.map +1 -0
  313. package/dist/commands/session/export.d.ts +6 -0
  314. package/dist/commands/session/export.d.ts.map +1 -0
  315. package/dist/commands/session/export.js +31 -0
  316. package/dist/commands/session/export.js.map +1 -0
  317. package/dist/commands/session/list.d.ts +7 -0
  318. package/dist/commands/session/list.d.ts.map +1 -0
  319. package/dist/commands/session/list.js +63 -0
  320. package/dist/commands/session/list.js.map +1 -0
  321. package/dist/commands/session/new.d.ts +8 -0
  322. package/dist/commands/session/new.d.ts.map +1 -0
  323. package/dist/commands/session/new.js +23 -0
  324. package/dist/commands/session/new.js.map +1 -0
  325. package/dist/commands/session/resume.d.ts +6 -0
  326. package/dist/commands/session/resume.d.ts.map +1 -0
  327. package/dist/commands/session/resume.js +32 -0
  328. package/dist/commands/session/resume.js.map +1 -0
  329. package/dist/commands/session/search.d.ts +10 -0
  330. package/dist/commands/session/search.d.ts.map +1 -0
  331. package/dist/commands/session/search.js +65 -0
  332. package/dist/commands/session/search.js.map +1 -0
  333. package/dist/commands/session/stats.d.ts +6 -0
  334. package/dist/commands/session/stats.d.ts.map +1 -0
  335. package/dist/commands/session/stats.js +58 -0
  336. package/dist/commands/session/stats.js.map +1 -0
  337. package/dist/commands/session/view.d.ts +6 -0
  338. package/dist/commands/session/view.d.ts.map +1 -0
  339. package/dist/commands/session/view.js +65 -0
  340. package/dist/commands/session/view.js.map +1 -0
  341. package/dist/commands/slash/CommandPalette.d.ts +60 -0
  342. package/dist/commands/slash/CommandPalette.d.ts.map +1 -0
  343. package/dist/commands/slash/CommandPalette.js +351 -0
  344. package/dist/commands/slash/CommandPalette.js.map +1 -0
  345. package/dist/commands/slash/SlashCommandParser.d.ts +11 -0
  346. package/dist/commands/slash/SlashCommandParser.d.ts.map +1 -0
  347. package/dist/commands/slash/SlashCommandParser.js +11 -0
  348. package/dist/commands/slash/SlashCommandParser.js.map +1 -0
  349. package/dist/commands/slash/SlashCommandRegistry.d.ts +11 -0
  350. package/dist/commands/slash/SlashCommandRegistry.d.ts.map +1 -0
  351. package/dist/commands/slash/SlashCommandRegistry.js +11 -0
  352. package/dist/commands/slash/SlashCommandRegistry.js.map +1 -0
  353. package/dist/commands/slash/index.d.ts +11 -0
  354. package/dist/commands/slash/index.d.ts.map +1 -0
  355. package/dist/commands/slash/index.js +13 -0
  356. package/dist/commands/slash/index.js.map +1 -0
  357. package/dist/commands/system-messages/list.d.ts +13 -0
  358. package/dist/commands/system-messages/list.d.ts.map +1 -0
  359. package/dist/commands/system-messages/list.js +54 -0
  360. package/dist/commands/system-messages/list.js.map +1 -0
  361. package/dist/commands/system-messages/reload.d.ts +13 -0
  362. package/dist/commands/system-messages/reload.d.ts.map +1 -0
  363. package/dist/commands/system-messages/reload.js +36 -0
  364. package/dist/commands/system-messages/reload.js.map +1 -0
  365. package/dist/commands/system-messages/view.d.ts +13 -0
  366. package/dist/commands/system-messages/view.d.ts.map +1 -0
  367. package/dist/commands/system-messages/view.js +52 -0
  368. package/dist/commands/system-messages/view.js.map +1 -0
  369. package/dist/commands/tmux/list.d.ts +13 -0
  370. package/dist/commands/tmux/list.d.ts.map +1 -0
  371. package/dist/commands/tmux/list.js +68 -0
  372. package/dist/commands/tmux/list.js.map +1 -0
  373. package/dist/commands/tools/info.d.ts +13 -0
  374. package/dist/commands/tools/info.d.ts.map +1 -0
  375. package/dist/commands/tools/info.js +82 -0
  376. package/dist/commands/tools/info.js.map +1 -0
  377. package/dist/commands/tools/list.d.ts +14 -0
  378. package/dist/commands/tools/list.d.ts.map +1 -0
  379. package/dist/commands/tools/list.js +67 -0
  380. package/dist/commands/tools/list.js.map +1 -0
  381. package/dist/config/ConfigManager.d.ts +40 -0
  382. package/dist/config/ConfigManager.d.ts.map +1 -0
  383. package/dist/config/ConfigManager.js +162 -0
  384. package/dist/config/ConfigManager.js.map +1 -0
  385. package/dist/config/extension.d.ts +12 -0
  386. package/dist/config/extension.d.ts.map +1 -0
  387. package/dist/config/extension.js +5 -0
  388. package/dist/config/extension.js.map +1 -0
  389. package/dist/config/settings.d.ts +42 -0
  390. package/dist/config/settings.d.ts.map +1 -0
  391. package/dist/config/settings.js +32 -0
  392. package/dist/config/settings.js.map +1 -0
  393. package/dist/index.d.ts +3 -0
  394. package/dist/index.d.ts.map +1 -0
  395. package/dist/index.js +883 -0
  396. package/dist/index.js.map +1 -0
  397. package/dist/orchestrator/OrchestratorClient.d.ts +385 -0
  398. package/dist/orchestrator/OrchestratorClient.d.ts.map +1 -0
  399. package/dist/orchestrator/OrchestratorClient.js +1195 -0
  400. package/dist/orchestrator/OrchestratorClient.js.map +1 -0
  401. package/dist/themes/DefaultTheme.d.ts +9 -0
  402. package/dist/themes/DefaultTheme.d.ts.map +1 -0
  403. package/dist/themes/DefaultTheme.js +29 -0
  404. package/dist/themes/DefaultTheme.js.map +1 -0
  405. package/dist/themes/MinimalTheme.d.ts +9 -0
  406. package/dist/themes/MinimalTheme.d.ts.map +1 -0
  407. package/dist/themes/MinimalTheme.js +29 -0
  408. package/dist/themes/MinimalTheme.js.map +1 -0
  409. package/dist/themes/Theme.interface.d.ts +36 -0
  410. package/dist/themes/Theme.interface.d.ts.map +1 -0
  411. package/dist/themes/Theme.interface.js +5 -0
  412. package/dist/themes/Theme.interface.js.map +1 -0
  413. package/dist/themes/ThemeManager.d.ts +63 -0
  414. package/dist/themes/ThemeManager.d.ts.map +1 -0
  415. package/dist/themes/ThemeManager.js +257 -0
  416. package/dist/themes/ThemeManager.js.map +1 -0
  417. package/dist/themes/colors.d.ts +108 -0
  418. package/dist/themes/colors.d.ts.map +1 -0
  419. package/dist/themes/colors.js +284 -0
  420. package/dist/themes/colors.js.map +1 -0
  421. package/dist/themes/createTheme.d.ts +40 -0
  422. package/dist/themes/createTheme.d.ts.map +1 -0
  423. package/dist/themes/createTheme.js +114 -0
  424. package/dist/themes/createTheme.js.map +1 -0
  425. package/dist/themes/themeDefinitions.d.ts +27 -0
  426. package/dist/themes/themeDefinitions.d.ts.map +1 -0
  427. package/dist/themes/themeDefinitions.js +244 -0
  428. package/dist/themes/themeDefinitions.js.map +1 -0
  429. package/dist/utils/CodeDiffRenderer.d.ts +124 -0
  430. package/dist/utils/CodeDiffRenderer.d.ts.map +1 -0
  431. package/dist/utils/CodeDiffRenderer.js +257 -0
  432. package/dist/utils/CodeDiffRenderer.js.map +1 -0
  433. package/dist/utils/MarkdownRenderer.d.ts +74 -0
  434. package/dist/utils/MarkdownRenderer.d.ts.map +1 -0
  435. package/dist/utils/MarkdownRenderer.js +260 -0
  436. package/dist/utils/MarkdownRenderer.js.map +1 -0
  437. package/dist/utils/MessageRenderer.d.ts +200 -0
  438. package/dist/utils/MessageRenderer.d.ts.map +1 -0
  439. package/dist/utils/MessageRenderer.js +283 -0
  440. package/dist/utils/MessageRenderer.js.map +1 -0
  441. package/dist/utils/ToolFormatter.d.ts +103 -0
  442. package/dist/utils/ToolFormatter.d.ts.map +1 -0
  443. package/dist/utils/ToolFormatter.js +357 -0
  444. package/dist/utils/ToolFormatter.js.map +1 -0
  445. package/dist/utils/boxDrawing.d.ts +23 -0
  446. package/dist/utils/boxDrawing.d.ts.map +1 -0
  447. package/dist/utils/boxDrawing.js +78 -0
  448. package/dist/utils/boxDrawing.js.map +1 -0
  449. package/dist/utils/checks.d.ts +9 -0
  450. package/dist/utils/checks.d.ts.map +1 -0
  451. package/dist/utils/checks.js +11 -0
  452. package/dist/utils/checks.js.map +1 -0
  453. package/dist/utils/events.d.ts +24 -0
  454. package/dist/utils/events.d.ts.map +1 -0
  455. package/dist/utils/events.js +17 -0
  456. package/dist/utils/events.js.map +1 -0
  457. package/dist/utils/formatters.d.ts +255 -0
  458. package/dist/utils/formatters.d.ts.map +1 -0
  459. package/dist/utils/formatters.js +361 -0
  460. package/dist/utils/formatters.js.map +1 -0
  461. package/dist/utils/math.d.ts +11 -0
  462. package/dist/utils/math.d.ts.map +1 -0
  463. package/dist/utils/math.js +13 -0
  464. package/dist/utils/math.js.map +1 -0
  465. package/package.json +82 -0
@@ -0,0 +1,834 @@
1
+ ---
2
+ name: cortex
3
+ description: Dispatch tasks to the Nexus Cortex server for parallel testing, evaluation, and agent-to-agent workflows.
4
+ triggers:
5
+ - cortex
6
+ - test via cortex
7
+ - dispatch to cortex
8
+ - evaluate performance
9
+ - cortex agent
10
+ - parallel test
11
+ - send to cortex
12
+ ---
13
+
14
+ # Cortex — Nexus Cortex Headless Agent
15
+
16
+ Global command: `cortex`
17
+ Server: `http://localhost:4000` (auto-starts on first call, ~10s boot)
18
+ Entry: `<repo-root>/packages/cli/bin/cortex.js`
19
+ Server entry: `<repo-root>/packages/server/dist/index.js`
20
+ Server log: `/tmp/cortex-server.log`
21
+ Sessions: `.cortex/sessions/{uuid}.jsonl`
22
+ Working directory: the repo root
23
+
24
+ Sessions are stateful — each call continues the conversation. Use `--new` for isolation.
25
+
26
+ The model has its own session tools (ListSessions, LoadSession, SearchConversationHistory, RequestHistoricalContext) — for session recall, just ask in natural language instead of using `--resume`:
27
+ ```
28
+ cortex --new "Search your conversation history for what we discussed about caching"
29
+ ```
30
+
31
+ ## Flags
32
+
33
+ ### Dispatch
34
+ ```
35
+ --model, -m ID Model override
36
+ --new Fresh session
37
+ --resume ID Resume session by UUID
38
+ --stream SSE streaming (text→stdout, tools/thinking→stderr)
39
+ --json Full JSON response
40
+ --quiet, -q Text only, no footer
41
+ --output FILE Write response to file
42
+ --pr MODE REPO [N] PR management (review/create/list owner/repo [prNumber])
43
+ ```
44
+
45
+ ### Request Tuning
46
+ ```
47
+ --max-tokens N Max response tokens (default: 4096)
48
+ --temperature N 0.0-2.0 (default: 1.0)
49
+ --max-iterations N Tool loop limit (default: 10000)
50
+ --timeout N Abort after N ms
51
+ --system "msg" System message injection
52
+ --tools t1,t2,t3 Restrict to named tools
53
+ --no-tools Pure chat (no tool use)
54
+ --auto-approve YOLO mode for tool execution
55
+ ```
56
+
57
+ ### Server
58
+ ```
59
+ --port, -p PORT Server port (default: 4000)
60
+ --debug Debug logging
61
+ --no-resume Server starts without auto-resume
62
+ --env KEY=VALUE Set server env var (repeatable)
63
+ ```
64
+
65
+ ### Introspection (no prompt needed)
66
+ ```
67
+ --list-models Models grouped by provider
68
+ --list-tools All registered tools with descriptions
69
+ --sessions All sessions with age/message counts
70
+ --stats Current session stats
71
+ --context Token budget utilization
72
+ --cache-metrics Cache hit rate and savings
73
+ --export ID Export session to JSON
74
+ ```
75
+
76
+ ## Headless API Schema
77
+
78
+ ```bash
79
+ # Non-streaming
80
+ curl -s http://localhost:4000/v1/messages \
81
+ -H "Content-Type: application/json" \
82
+ -d '{"model":"MODEL_ID","messages":[{"role":"user","content":"PROMPT"}]}'
83
+
84
+ # Streaming (SSE)
85
+ curl -s http://localhost:4000/v1/messages \
86
+ -H "Content-Type: application/json" \
87
+ -d '{"model":"MODEL_ID","messages":[{"role":"user","content":"PROMPT"}],"stream":true}'
88
+ ```
89
+
90
+ **Request body fields:**
91
+ | Field | Type | Description |
92
+ |-------|------|-------------|
93
+ | `model` | string | Model ID (falls back to DEFAULT_MODEL_ID) |
94
+ | `messages` | array | `[{role: "user", content: "..."}]` |
95
+ | `system` | string | System message override |
96
+ | `tools` | array | Tool name list (empty `[]` = all built-in tools) |
97
+ | `max_tokens` | number | Max response tokens (default: 4096) |
98
+ | `temperature` | number | 0.0-2.0 (default: 1.0) |
99
+ | `top_p` | number | Nucleus sampling (default: 1.0) |
100
+ | `stream` | boolean | Enable SSE streaming |
101
+
102
+ **JSON response:**
103
+ ```json
104
+ {
105
+ "content": [{"type": "text", "text": "..."}],
106
+ "toolUses": [{"name": "Read", "input": {"file_path": "..."}}],
107
+ "usage": {
108
+ "inputTokens": 5000, "outputTokens": 1200,
109
+ "cache": {"cacheHitRate": 0.85, "costSavingsRatio": 0.64}
110
+ },
111
+ "metadata": {"toolCallIterations": 3},
112
+ "model": {"id": "grok-4-1-fast-reasoning", "provider": "xai"}
113
+ }
114
+ ```
115
+
116
+ **SSE event types:** `message_start`, `content_block_start`, `text_delta` (delta field = text string), `thinking_delta`, `tool_use_complete` (toolUse.name), `tool_result` (toolResult.content, toolResult.is_error), `message_delta` (usage), `message_stop`, `error`
117
+
118
+ ## Server Endpoints
119
+
120
+ | Endpoint | Method | Purpose |
121
+ |----------|--------|---------|
122
+ | `/health` | GET | Health check |
123
+ | `/v1/messages` | POST | Send message (streaming or not) |
124
+ | `/models` | GET | List all models (`{data: [...]}`) |
125
+ | `/tools` | GET | List all tools (`{tools: [...]}`) |
126
+ | `/sessions` | GET | List sessions |
127
+ | `/sessions/new` | POST | Create new session |
128
+ | `/sessions/:id/stats` | GET | Session statistics |
129
+ | `/sessions/:id/context` | GET | Token budget info |
130
+ | `/sessions/:id/cache/metrics` | GET | Cache metrics |
131
+ | `/sessions/:id/export` | GET | Full session export |
132
+ | `/sessions/:id/model` | PUT | Switch model mid-session |
133
+ | `/sessions/:id/resume` | POST | Resume session (no body) or checkpoint (`{checkpointId}`) |
134
+ | `/v1/pr/review` | POST | Trigger PR review pipeline (`{repo, prNumber}`) |
135
+ | `/v1/pr/create` | POST | Trigger PR creation pipeline (`{repo, branch}`) |
136
+ | `/v1/pr/list` | GET | List open PRs (`?repo=owner/repo`) |
137
+ | `/v1/pr/webhook` | POST | GitHub webhook (future: auto-review on PR open) |
138
+
139
+ ## Tools
140
+
141
+ Run `cortex --list-tools` (or `GET /tools`) for the authoritative registered list. Categories:
142
+ file ops (Read/Write/Edit/Glob/Grep), execution (Bash/BashOutput/KillShell/TmuxSession),
143
+ web (WebSearch/WebFetch), agents (Task/WorkspaceManager/PRAgent), session history
144
+ (ListSessions/LoadSession/SearchConversationHistory/...), sandbox/artifacts, planning
145
+ (TodoWrite/ExitPlanMode), extensions (Skill/SlashCommand), and auto-research
146
+ (ResearchBacklog + the `cortex autoresearch` CLI).
147
+
148
+ ## .env Configuration
149
+
150
+ ```bash
151
+ # API KEYS (values omitted — set in <repo-root>/.env)
152
+ #ANTHROPIC_API_KEY=
153
+ OPENAI_API_KEY=
154
+ GEMINI_API_KEY=
155
+ #GOOGLE_API_KEY= # legacy fallback for GEMINI_API_KEY
156
+ XAI_API_KEY=
157
+ DEEPSEEK_API_KEY=
158
+
159
+ # ANTHROPIC AUTH
160
+ ANTHROPIC_AUTH_METHOD=auto # auto | oauth | api-key
161
+ CLAUDE_CODE_OAUTH_TOKEN= # sk-ant-oat01-... (Claude.ai Max subscription)
162
+
163
+ # PROMPT CACHING
164
+ ANTHROPIC_PROMPT_CACHING=true
165
+
166
+ # MODELS (example values — run `cortex --list-models` for the registered IDs)
167
+ DEFAULT_MODEL_ID=deepseek-v4-pro
168
+ HELPER_MODEL_ID=deepseek-v4-flash
169
+ WEB_TOOLS_MODEL=gemini-2.5-flash # Gemini model for WebSearch/WebFetch tools (free built-in googleSearch/urlContext)
170
+
171
+ # SYSTEM
172
+ DEBUG=false
173
+ USE_EMOJI=false
174
+ PROJECT_PATH=
175
+
176
+ # REACTIVE MENTORSHIP
177
+ MENTORSHIP_ENABLED=true
178
+ MENTORSHIP_TRIGGER_ON_ERROR=true
179
+ MENTORSHIP_ERROR_THRESHOLD=medium # low | medium | high
180
+ MENTORSHIP_KEYWORDS_ENABLED=true # @ultrathink, @analyze, @rethink
181
+ MENTORSHIP_CUSTOM_KEYWORDS=
182
+ MENTORSHIP_HELPER_MODEL=claude-haiku-4-5
183
+ MENTORSHIP_TURN_BASED_ENABLED=true
184
+ MENTORSHIP_TURN_INTERVAL=10
185
+ MENTORSHIP_INTERLEAVED_THINKING=true
186
+ MENTORSHIP_PATTERN_DETECTION=true
187
+ MENTORSHIP_PATTERN_THRESHOLD=3
188
+
189
+ # CONTEXT MANAGEMENT
190
+ CONTEXT_BUDGET_STRATEGY=sliding-window # sliding-window | priority-based
191
+ REASONING_PATTERN_OPTIMIZATION=true
192
+ REASONING_KEEP_RECENT_TURNS=3
193
+
194
+ # SESSION
195
+ SESSION_STORAGE_DIR=.cortex/sessions
196
+ MCP_AUTO_INJECT=false
197
+
198
+ # XAI SERVER SIDE TOOLS
199
+ ENABLE_SERVER_SIDE_TOOLS=true # Injects web_search, x_search, code_execution; overrides to Responses API
200
+ XAI_API_MODE=messages # messages (thinking support) | responses (server-side tools + stateful)
201
+
202
+ # AGENT TEAM WORKSPACE
203
+ AGENT_TMUX_MONITOR=false # Enable tmux pane visual monitoring for parallel agents
204
+
205
+ # TESTING
206
+ ENABLE_SMOKE_TESTS=false
207
+ ```
208
+
209
+ **Runtime variables (not in .env, set at launch):**
210
+ `CORTEX_MODE` (direct|stateless|server), `PORT` (4000), `YOLO` (true|false), `AUTO_RESUME` (true|false), `MAX_TOOL_ITERATIONS` (default: 10000), `MAX_CONSECUTIVE_ERRORS` (default: 3), `TOOL_TIMEOUT_MS` (default: 120000), `MAX_LOOP_REPETITIONS` (default: 5), `AGENT_TMUX_MONITOR` (default: false)
211
+
212
+ ## Platform Capabilities
213
+
214
+ All subsystems accessible via natural language prompts. Control via `.env` or runtime vars.
215
+
216
+ | Subsystem | Control | What It Does |
217
+ |-----------|---------|--------------|
218
+ | **Reactive Mentorship** | `MENTORSHIP_*` (11 vars) | AI-to-AI self-improvement. Helper model reviews on errors, keywords (`@ultrathink`, `@analyze`, `@rethink`), or every N turns. Pattern detection for repeated failures. |
219
+ | **Context Management** | `CONTEXT_BUDGET_STRATEGY`, `REASONING_*` | Auto-compaction via helper model when context fills. Thinking block optimization for recent turns. Sliding-window or priority-based strategies. |
220
+ | **Loop Control** | `MAX_TOOL_ITERATIONS`, `MAX_CONSECUTIVE_ERRORS`, `TOOL_TIMEOUT_MS`, `MAX_LOOP_REPETITIONS` | Identical-call detection (hash name+input), circuit breaker on consecutive errors, configurable timeout per tool. |
221
+ | **Prompt Caching** | `ANTHROPIC_PROMPT_CACHING` | Caches system messages and tools. Tracks cache creation/read tokens, hit rate, cost savings ratio. |
222
+ | **Sessions** | `SESSION_STORAGE_DIR` | JSONL append-only history. Tools: ListSessions, LoadSession, SearchConversationHistory, GetConversationSegment, RequestHistoricalContext, ListCompactionBoundaries. |
223
+ | **Artifacts** | Dashboard on :4001 | Browser-viewable code artifacts. Tools: CreateArtifactTool, InteractWithSandbox, ModifySandbox, InspectSandbox, StopSandbox. Modes: oneshot, dev, persistent. |
224
+ | **Tmux Sessions** | `TMUX_BIN` | Persistent terminal sessions. Tool: TmuxSession — create, send commands, capture output, list, kill. Metadata in `.cortex/tmux-sessions/`. |
225
+ | **Agent Profiles** | `.cortex/agents/*.md` | Task delegation via Task tool. YAML frontmatter defines name, model, tools, system prompt. Project agents override personal (`~/.cortex/agents/`). |
226
+ | **Permissions** | `.cortex/permissions.*.json`, `YOLO` | 3 profiles (dev/test/prod). 4 policy types: Whitelist, Blacklist, FileOperation, BashCommand. Audit logging. |
227
+ | **System Messages** | `.cortex/system-messages/` | Priority-ordered custom prompts. Hot-reload via file watching. CORTEX.md + MEMORY.md auto-injected every turn. |
228
+ | **MCP Servers** | `MCP_AUTO_INJECT` | Multi-server Model Context Protocol. Auto-injection or on-demand. Tool names prefixed: `serverName__toolName`. |
229
+ | **Slash Commands** | `.cortex/commands/*.md` | SlashCommand tool loads templates with `$1`/`$2` substitution. 9 installed (review, test, diff, deps, find-bug, explain, profile, compare, search). |
230
+ | **Debug** | `DEBUG` | Verbose logging. Shows system message injection, adapter selection, tool execution details. |
231
+ | **XAI Server Tools** | `ENABLE_SERVER_SIDE_TOOLS`, `XAI_API_MODE` | Hybrid tool mode: server-side (web_search, x_search, code_execution) auto-execute on xAI; client-side tools (Read, Write, Bash) pause and return. System messages extracted to `instructions` for caching. |
232
+ | **Agent Team Workspace** | `AGENT_TMUX_MONITOR` | Git worktree isolation for multi-agent collaboration. WorkspaceManager tool manages `git worktree add/remove` for per-agent workspaces. Team briefing prepended to agent prompts. Result broadcasting forwards completed agent findings to still-running siblings. |
233
+ | **Cross-Agent Communication** | Mentorship IPC | Orchestrator-mediated guidance injection via `injectGuidance()` / `broadcastGuidance()`. Uses dual-path: thinking blocks (Google GenerateContent) or `<system-reminder>` tags (Anthropic, OpenAI, XAI). Reuses mentorship infrastructure. |
234
+ | **PR Management** | PRAgent tool, `--pr` flag | Review, create, list, and post-review for GitHub PRs via `gh` CLI. Dispatches parallel audit agents (security, quality, architecture). Server routes at `/v1/pr/*`. |
235
+
236
+ ## Agent Team Workspace System
237
+
238
+ ### Architecture — 3 Layers
239
+
240
+ ```
241
+ Layer 3: Use Cases (PR Review, Internal Dev, Multi-Repo)
242
+ PRAgent tool, cortex --pr CLI, server routes at /v1/pr/*
243
+ Layer 2: Team Communication
244
+ Guidance injection, team briefing, result broadcasting, orchestrator-mediated messaging
245
+ Layer 1: Workspace Isolation
246
+ WorkspaceManager tool (git worktree lifecycle), per-agent worktree paths in task prompts
247
+ ```
248
+
249
+ ### Implementation Files
250
+
251
+ | Component | File | Purpose |
252
+ |-----------|------|---------|
253
+ | **WorkspaceManager executor** | `packages/executors/src/implementations/execution/WorkspaceManagerTool.ts` | Git worktree lifecycle (create/clone/status/diff/cleanup) |
254
+ | **PRAgent executor** | `packages/executors/src/implementations/agent/PRAgentTool.ts` | GitHub PR operations via `gh` CLI |
255
+ | **Tool definitions** | `packages/core/src/tools/registries/BaseToolRegistry.ts` | Schema registration for both tools |
256
+ | **Executor registration** | `packages/executors/src/ExecutorRegistry.ts` | Executor registration for both tools |
257
+ | **Guidance injection (sub-agent)** | `packages/cli/src/agent-mode.ts` | `pendingGuidance` queue + `orchestrator.injectGuidance()` call |
258
+ | **injectGuidance (orchestrator)** | `packages/core/src/orchestrator/CortexOrchestrator.ts` | Public method reusing `injectThinkingBlock()` dual-path |
259
+ | **broadcastGuidance** | `packages/core/src/orchestrator/SubAgentProcessManager.ts` | Sends IPC guidance to all active agents except excluded |
260
+ | **Team briefing** | `packages/core/src/orchestrator/CortexOrchestrator.ts` | `injectTeamBriefing()` — prepends briefing to Task prompts when >1 Task tools dispatched |
261
+ | **PR server routes** | `packages/server/src/routes/pr.ts` | 4 REST endpoints for PR management |
262
+ | **PR CLI flag** | `packages/cli/bin/cortex.js` | `--pr MODE REPO [N]` shorthand |
263
+ | **PR agent definitions** | `.cortex/agents/pr-*.md` (5 files) | Security, quality, architecture, implementer, test-writer |
264
+
265
+ ### WorkspaceManager Tool Reference
266
+
267
+ **Modes:**
268
+
269
+ | Mode | Required Params | What It Does | Returns |
270
+ |------|----------------|-------------|---------|
271
+ | `create` | — | `git worktree add /tmp/workspace-{uuid} -b {branch} {baseBranch}` from local repo | `{ worktreePath, branch, baseBranch, repoPath }` |
272
+ | `clone` | `repo` | `git clone --depth 50` external repo, optionally creates worktree branch | `{ cloneDir, worktreePath, branch, repo }` |
273
+ | `status` | — | `git worktree list --porcelain` → parsed worktree list | `{ worktreeCount, worktrees: [{ path, head, branch }] }` |
274
+ | `diff` | `worktreePath` | `git diff {baseBranch} -- .` with file list | `{ changedFiles, fileCount, diffLines, truncated, diff }` |
275
+ | `cleanup` | `worktreePath` | `git worktree remove --force` + `git worktree prune` | `{ worktreePath, removed: true }` |
276
+
277
+ **Parameters:**
278
+ ```typescript
279
+ {
280
+ mode: 'create' | 'clone' | 'status' | 'diff' | 'cleanup',
281
+ repo?: string, // Local path for create, "owner/repo" for clone
282
+ branch?: string, // Branch name (auto-generated if omitted: workspace-{uuid})
283
+ baseBranch?: string, // Compare target (default: main)
284
+ worktreePath?: string, // Required for diff/cleanup
285
+ maxDiffLines?: number // Truncation limit (default: 5000)
286
+ }
287
+ ```
288
+
289
+ **Key behaviors:**
290
+ - `create` falls back to HEAD if baseBranch doesn't exist
291
+ - `clone` uses shallow clone (`--depth 50`) with 120s timeout
292
+ - `clone` auto-detects clone URL format (owner/repo → `https://github.com/{repo}.git`)
293
+ - `cleanup` force-removes even if `git worktree remove` fails (fallback: `rmSync`)
294
+ - Worktrees created in `/tmp/workspace-{uuid}` (ephemeral)
295
+
296
+ ### PRAgent Tool Reference
297
+
298
+ **Requires:** GitHub CLI (`gh`) installed and authenticated.
299
+
300
+ **Modes:**
301
+
302
+ | Mode | Required Params | What It Does | Returns |
303
+ |------|----------------|-------------|---------|
304
+ | `review` | `repo`, `prNumber` | `gh pr view --json` + `gh pr diff` | `{ title, author, stats, files, diff, labels, reviewDecision }` |
305
+ | `create` | `repo` | Returns context for orchestrator to set up workspace + dispatch agents | `{ repo, branch, instructions }` |
306
+ | `list` | `repo` | `gh pr list --json --limit 50` | `{ count, pullRequests: [{ number, title, author, labels, isDraft }] }` |
307
+ | `post-review` | `repo`, `prNumber`, `action` | `gh pr review {N} {--approve|--request-changes|--comment}` | `{ action, posted: true }` |
308
+
309
+ **Parameters:**
310
+ ```typescript
311
+ {
312
+ repo: string, // "owner/repo" format (required)
313
+ mode: 'review' | 'create' | 'list' | 'post-review',
314
+ prNumber?: number, // Required for review/post-review
315
+ branch?: string, // For create mode
316
+ action?: 'approve' | 'request-changes' | 'comment', // For post-review
317
+ body?: string, // Comment body for post-review
318
+ diffOptions?: { pathFilter?: string, maxLines?: number }
319
+ }
320
+ ```
321
+
322
+ **Key behaviors:**
323
+ - `review` returns structured PR context (metadata + diff) for the LLM to dispatch audit agents
324
+ - `review` supports `pathFilter` to filter diff to specific file paths
325
+ - `create` does NOT set `shouldSpawnSubAgent` — returns context for orchestrator to decide
326
+ - `post-review` maps action to `gh pr review` flags
327
+
328
+ ### Agent Dispatch Modes
329
+
330
+ 4 modes for spawning sub-agents via the Task tool. Team mode activates automatically when >1 Task tools are dispatched in the same turn.
331
+
332
+ | Mode | Agents | Communication | Workspace |
333
+ |------|--------|--------------|-----------|
334
+ | **Solo** | 1 | None | Shared |
335
+ | **Parallel** | N | None | Shared |
336
+ | **Team** | N | Briefing + Result Broadcasting | Shared |
337
+ | **Git Worktree** | N | Briefing + Result Broadcasting | Isolated per agent |
338
+
339
+ ### Solo — Single Agent
340
+ ```bash
341
+ cortex --quiet "Use the code-reviewer agent to review APIClient.ts"
342
+ ```
343
+
344
+ ### Parallel — Independent Concurrent Agents
345
+ ```bash
346
+ cortex --quiet "Use 3 explore agents to search for: WebSocket code, auth middleware, rate limiting"
347
+ ```
348
+
349
+ ### Team — Coordinated Agents with Cross-Communication
350
+ ```bash
351
+ # PR review dispatches 3 audit agents as a team
352
+ cortex --pr review owner/repo 42
353
+
354
+ # Custom team
355
+ cortex --quiet "Dispatch pr-security-auditor, pr-code-quality, and pr-architecture-reviewer as a team to review the auth module"
356
+ ```
357
+
358
+ Team mode auto-injects a **team briefing** (teammate names, assignments) into each agent's prompt. When an agent completes, `broadcastGuidance()` forwards its findings to still-running siblings via IPC → `injectGuidance()` → `<system-reminder>` or thinking block.
359
+
360
+ **Team briefing format** (auto-prepended to each agent's prompt):
361
+ ```
362
+ 📋 **Team Briefing**
363
+ You are part of a {N}-agent team working in parallel.
364
+
365
+ Teammates:
366
+ - {agentType}: {description}
367
+ - {agentType}: {description}
368
+
369
+ The orchestrator will forward relevant findings from teammates.
370
+ Focus on YOUR assignment. Do not duplicate others' work.
371
+ ```
372
+
373
+ ### Git Worktree — Isolated Workspaces per Agent
374
+ ```bash
375
+ # Create isolated worktrees, dispatch agents to each
376
+ cortex --quiet "Use WorkspaceManager to create worktrees for branches auth-refactor and api-cleanup, then dispatch pr-implementer agents to each"
377
+ ```
378
+
379
+ **Full workspace lifecycle:**
380
+ 1. `WorkspaceManager(mode=create, branch=X)` → `/tmp/workspace-{uuid}`
381
+ 2. `WorkspaceManager(mode=create, branch=Y)` → `/tmp/workspace-{uuid}`
382
+ 3. Team briefing injected with workspace paths
383
+ 4. Agents dispatched in parallel via `Promise.allSettled`
384
+ 5. Early-completion agent's findings broadcast to still-running siblings
385
+ 6. `WorkspaceManager(mode=diff, worktreePath=...)` for each
386
+ 7. `WorkspaceManager(mode=cleanup, worktreePath=...)` for each
387
+
388
+ ### Cross-Agent Communication Flow
389
+
390
+ ```
391
+ Agent A completes
392
+
393
+ CortexOrchestrator detects completion
394
+
395
+ Constructs broadcast: "📡 Team Update: Agent 'A' completed: {summary (max 500 chars)}"
396
+
397
+ SubAgentProcessManager.broadcastGuidance(message, excludeAgentId=A)
398
+
399
+ For each active agent B, C, ...:
400
+ SubAgentProcessManager.guideAgent(agentId, message)
401
+
402
+ IPC: parent.send({ type: 'guidance', payload: { message } })
403
+
404
+ agent-mode.ts: pendingGuidance.push(message)
405
+
406
+ orchestrator.injectGuidance(message, 'team_update')
407
+
408
+ Dual-path injection (same as mentorship thinking):
409
+ - Thinking-capable APIs → thinking block
410
+ - Other APIs → <system-reminder> tag
411
+
412
+ Ephemeral message (metadata.ephemeral=true) — cleaned up after turn
413
+ ```
414
+
415
+ ### Visual Monitoring
416
+ ```bash
417
+ AGENT_TMUX_MONITOR=true cortex --quiet "Dispatch 3 agents..."
418
+ # View: http://localhost:4001/tmux/team-{id} or tmux attach -t team-{id}
419
+ ```
420
+
421
+ Events mirrored to tmux panes: `▶️ Started`, `🔧 Read /path`, `✅ Completed (15.2s)`, `❌ Error`
422
+
423
+ ### PR Management
424
+
425
+ **CLI shorthand:**
426
+ ```bash
427
+ cortex --pr review owner/repo 42 # Review PR (dispatches audit team)
428
+ cortex --pr list owner/repo # List open PRs
429
+ cortex --pr create owner/repo --branch feature-x # Create PR workflow
430
+ ```
431
+
432
+ **Server routes:**
433
+
434
+ | Route | Method | Body/Query | Pipeline |
435
+ |-------|--------|-----------|----------|
436
+ | `/v1/pr/review` | POST | `{ repo, prNumber, options? }` | PRAgent(review) → 3 parallel audit agents → synthesize → result |
437
+ | `/v1/pr/create` | POST | `{ repo, branch?, description? }` | WorkspaceManager(create) → PRAgent(create) → pr-implementer agent |
438
+ | `/v1/pr/list` | GET | `?repo=owner/repo` | PRAgent(list) → formatted results |
439
+ | `/v1/pr/webhook` | POST | GitHub webhook payload | Future: auto-review on `pull_request.opened` |
440
+
441
+ **PR review pipeline (3 parallel agents):**
442
+ 1. PRAgent(mode=review) fetches PR metadata + diff
443
+ 2. Orchestrator dispatches 3 agents as a team:
444
+ - `pr-security-auditor` — vulnerabilities, malicious code, supply chain risks
445
+ - `pr-code-quality` — style, complexity, anti-patterns, test gaps
446
+ - `pr-architecture-reviewer` — breaking changes, API surface, dependency impact
447
+ 3. Early completions broadcast to still-running siblings
448
+ 4. Orchestrator synthesizes findings → approve or request changes
449
+ 5. PRAgent(mode=post-review) posts the review
450
+
451
+ **PR agent definitions** (in `.cortex/agents/`):
452
+
453
+ | Agent | Tools | Focus |
454
+ |-------|-------|-------|
455
+ | `pr-security-auditor` | Grep, Read, Bash | Vulnerabilities, malicious code, prompt injection, supply chain |
456
+ | `pr-code-quality` | Read, Grep, Glob | Style, complexity, anti-patterns, test coverage gaps |
457
+ | `pr-architecture-reviewer` | Read, Grep, Glob, Bash | Breaking changes, API surface, dependency impact |
458
+ | `pr-implementer` | Read, Edit, Write, Bash, Grep, Glob | Implement code changes in worktree |
459
+ | `pr-test-writer` | Read, Write, Edit, Bash, Grep, Glob | Write tests for changes |
460
+
461
+ ### Test Coverage
462
+
463
+ The team-workspace system is covered by dedicated test suites:
464
+
465
+ | Test File | What It Covers |
466
+ |-----------|---------------|
467
+ | `packages/core/src/orchestrator/__tests__/TeamWorkspace.test.ts` | Team briefing, guidance injection, broadcast guidance, tmux binary resolution, `AGENT_TMUX_MONITOR` env handling, result broadcasting, ephemeral message lifecycle, agent-mode guidance handler |
468
+ | `packages/core/src/orchestrator/__tests__/AgentDispatchLifecycle.test.ts` | Solo + parallel dispatch, team briefing + cross-agent broadcasting + event routing, IPC protocol messages + guidance flow, error handling, SubAgentProcessManager state, tmux monitoring |
469
+
470
+ **Run tests (from the repo root):**
471
+ ```bash
472
+ npm test # Watch mode
473
+ npm run test:run # Single run
474
+ ```
475
+
476
+ Full agent documentation: `.cortex/agents/README.md`
477
+
478
+ ## XAI Responses API + Server-Side Tools
479
+
480
+ **Env vars:** `ENABLE_SERVER_SIDE_TOOLS=true` (dynamic override) and `XAI_API_MODE=messages|responses` (static config)
481
+
482
+ **How it works:**
483
+ 1. `XAI_API_MODE` sets the default API pattern at model registration time. `messages` preserves interleaved thinking; `responses` enables stateful/server-side features.
484
+ 2. `ENABLE_SERVER_SIDE_TOOLS=true` overrides to Responses API at request time regardless of `XAI_API_MODE`. It injects `web_search`, `x_search`, `code_execution` into the tool array, detection fires, and the orchestrator switches the endpoint + adapter dynamically.
485
+ 3. **Hybrid tool mode**: Server-side tools auto-execute on xAI servers; client-side tools (Read, Write, Bash, etc.) pause and return `function_call` to the orchestrator's tool loop. Both coexist in the same request.
486
+
487
+ **System message architecture:**
488
+ - `SystemMessageMiddleware` injects system context (CORTEX.md, CLAUDE.md, tool guides) as `<system-reminder>` tags in user message content. This is the universal path for ALL adapters.
489
+ - For Responses API only: `APIClient.extractSystemRemindersForResponsesAPI()` scans input items, extracts `<system-reminder>` content, strips tags from user messages, and sets the extracted text as the `instructions` parameter. This enables provider-side caching without duplicating content.
490
+ - Messages API, Chat Completions, GenerateContent are unaffected — system context stays embedded in user content.
491
+
492
+ **Key files:**
493
+ | File | Role |
494
+ |------|------|
495
+ | `packages/core/src/adapters/ServerSideToolDetection.ts` | `shouldUseServerSideTools()` — checks env + model + tools, returns endpoint override |
496
+ | `packages/core/src/models/configurators/XAIConfigurator.ts` | Static config from `XAI_API_MODE`, defaults from `SettingsSchema` |
497
+ | `packages/core/src/orchestrator/CortexOrchestrator.ts` | Server-side tool injection + detection + `effectiveModel` override |
498
+ | `packages/core/src/orchestrator/APIClient.ts` `extractSystemRemindersForResponsesAPI()` | System message extraction for `instructions` param |
499
+ | `packages/core/src/adapters/ResponsesAPIAdapter.ts` | Format adapter: `function_call` as top-level items, `function_call_output` for tool results |
500
+ | `packages/core/src/tools/ServerSideTools.ts` | `web_search`, `x_search`, `code_execution` definitions + `separateTools()` |
501
+
502
+ **Installed agents:** plan, explore, code-reviewer, context-research, doc-writer, test-writer, refactor, new-model-api-integrator-analyst, a-frontend-landing-page-designer, pr-security-auditor, pr-code-quality, pr-architecture-reviewer, pr-implementer, pr-test-writer
503
+
504
+ **Providers** (run `cortex --list-models` for the live registry): Anthropic, OpenAI, Google Gemini, XAI, DeepSeek, Gemma, GLM, Qwen, Moonshot, MiniMax, HuggingFace, Local, OpenRouter
505
+
506
+ ## XAI Cache-Hit Contract (authoritative)
507
+
508
+ Per live XAI docs (`docs.x.ai/api/mcp` MCP server — search for "prompt-caching"), these are the **hard rules** for maximizing cache hits on reasoning models. Violating any of them is the top cause of slow/expensive XAI inference.
509
+
510
+ ### Rule 1: Never modify earlier messages
511
+ > "For cache hits in multi-turn conversations, never edit, remove, or reorder earlier messages — only append new ones."
512
+
513
+ What would break it in our codebase (all currently guarded):
514
+ - `keepRecentThinking()` stripping old thinking blocks → bypassed for XAI reasoning models (CortexOrchestrator.ts, `ensureHistoryFitsModel`)
515
+ - Any compaction that rewrites middle messages
516
+ - Mentorship injection that adds messages in the middle (our mentorship appends to the end — safe)
517
+
518
+ ### Rule 2: For reasoning models, reasoning_content MUST be sent back
519
+ > "For reasoning models, you MUST include reasoning_content from previous responses; omitting it is the TOP cause of cache misses."
520
+
521
+ Coverage:
522
+ - Messages API (grok-code-fast-1, grok-4-1-fast-reasoning via /v1/messages): MessagesAPIAdapter.toProviderMessages preserves `thinking` block + `signature` for XAI. Commit 3e2ec22be.
523
+ - Responses API: `reasoning` output items round-tripped as `reasoning` input items in ResponsesAPIAdapter.toProviderMessages.
524
+
525
+ ### Rule 3: Sticky routing via headers/fields
526
+ > "Use x-grok-conv-id to maximize cache hit rates."
527
+
528
+ | API | Routing mechanism | Our implementation |
529
+ |-----|-------------------|-------------------|
530
+ | Chat Completions / Messages API | `x-grok-conv-id` HTTP header | set from `conversationId = currentSessionId` in PreparedRequest (commit 054f0384d) |
531
+ | Responses API | `prompt_cache_key` body field | same conversationId, mapped inside APIClient.sendResponsesAPI |
532
+
533
+ ### Rule 4: Chaining with `previous_response_id` = send only NEW messages
534
+ > "With Responses API, we can send the id of the previous response, and the new messages to append to it."
535
+
536
+ Official example:
537
+ ```python
538
+ second_response = client.responses.create(
539
+ previous_response_id=response.id,
540
+ input=[{"role": "user", "content": "new message"}], # NOT the full history
541
+ )
542
+ ```
543
+
544
+ Our implementation (commits e85b68834, 72c656b24):
545
+ - Track `messageCountAtLastResponse` as a checkpoint index after every assistant response lands in messageHistory
546
+ - When lastResponseId is set AND effectiveModel.api.pattern === 'responses', slice `messageHistory.slice(checkpoint)` as input
547
+ - Fires at BOTH initial request time (cross-turn chain) AND continuation request time (within-turn tool loop)
548
+ - Debug log: `[Orchestrator] Input-sliced initial request for cross-turn chain: sent 1/N messages`
549
+
550
+ ### Rule 5: Two options for persistence — don't mix them
551
+
552
+ **Server-side (our default)**: `previous_response_id` + sliced input. Server stores reasoning for 30 days.
553
+
554
+ **Client-side (for >30-day persistence)**: `include: ["reasoning.encrypted_content"]` on the FIRST request, then splat `*response.output` into the NEXT request's input — **NO `previous_response_id`**. Deferred implementation.
555
+
556
+ Combining these (our earlier mistake): request shape confuses the server → empty output + reasoning-dominant tokens.
557
+
558
+ ### Measured peak efficiency (2026-04-21)
559
+
560
+ After all improvements, 3-turn benchmarks show:
561
+
562
+ | API | Hit rate T2 | Hit rate T3 | Cost savings |
563
+ |-----|------------|------------|--------------|
564
+ | Messages API (grok-code-fast-1) | 87.7% | 88.2% | ~66% |
565
+ | Responses API (grok-4-1-fast-reasoning, server-side tools) | 97.3% | 92.3% | ~70% |
566
+
567
+ Responses API continuation also sends **1/3 to 1/7 fewer messages** per request due to input slicing.
568
+
569
+ ### Debug log markers for XAI optimization paths
570
+
571
+ Add these to the path-verification table in the Testing section:
572
+
573
+ | Marker | Meaning |
574
+ |--------|---------|
575
+ | `[Orchestrator Context] Skipping thinking block stripping — XAI reasoning model` | Rule 1 compliance active |
576
+ | `[Orchestrator] Input-sliced initial request for cross-turn chain: sent N/M messages` | Rule 4 cross-turn chain active |
577
+ | `[Orchestrator Phase 2.5] Input-sliced for previous_response_id: sent N/M messages` | Rule 4 within-turn chain active |
578
+ | `[Orchestrator Cache] Cache hit detected: { ... hitRate: 'N%' }` | Cache working (fires for both Messages + Responses API paths now) |
579
+ | `prompt_cache_key: '<uuid>'` | Rule 3 Responses API routing active |
580
+ | `x-grok-conv-id` header in XAI Anthropic-compat client | Rule 3 Messages API routing active |
581
+
582
+ ## Server Management
583
+
584
+ ```bash
585
+ # Auto-starts on first cortex call. Manual control:
586
+ # IMPORTANT: a manual launcher that `cd`s into packages/server runs argv
587
+ # `node dist/index.js` (relative) — `pkill -f "packages/server/dist/index.js"`
588
+ # NEVER matches that and fails silently, leaving a zombie that wins the port
589
+ # race. Match the real argv:
590
+ pkill -9 -f "node dist/index.js" # stop (verify with ps!)
591
+ ps -eo pid,args | grep "[d]ist/index.js" # MUST be empty after
592
+ curl -s http://localhost:4000/health # check
593
+ tail -20 /tmp/cortex-server.log # debug
594
+ ```
595
+
596
+ ### Manual start with env overrides
597
+
598
+ When testing a conditional code path (e.g., Responses API, server-side tools, specific models), start the server manually with the env vars inline — do NOT rely on `.env` alone, since `.env` captures steady-state config, not per-test overrides:
599
+
600
+ ```bash
601
+ # Kill old server first — MUST match the real argv (see Server Management
602
+ # note above); verify it's actually gone or you'll benchmark a zombie.
603
+ pkill -9 -f "node dist/index.js" 2>/dev/null; sleep 2
604
+ ps -eo pid,args | grep "[d]ist/index.js" && echo "STILL ALIVE — kill -9 it" || echo "clean"
605
+
606
+ # Start with test-specific env. setsid fully detaches node from the
607
+ # Bash-tool process group (a plain `&`/`disown` child gets reaped when the
608
+ # tool call returns — the server dies before you can curl it).
609
+ cd <repo-root>/packages/server && \
610
+ DEBUG=true \
611
+ ENABLE_SERVER_SIDE_TOOLS=true \
612
+ XAI_API_MODE=responses \
613
+ CORTEX_MODE=stateless \
614
+ setsid nohup node dist/index.js > /tmp/cortex-server.log 2>&1 < /dev/null &
615
+
616
+ # Boot can take ~20s on a cold start (not 5s). Poll, don't fixed-sleep:
617
+ for i in $(seq 1 30); do sleep 2; curl -sf http://localhost:4000/health >/dev/null && break; done
618
+ curl -s http://localhost:4000/health | head -c 150
619
+ ```
620
+
621
+ Key env toggles for path testing:
622
+ - `DEBUG=true` — verbose logs (required for marker inspection below)
623
+ - `ENABLE_SERVER_SIDE_TOOLS=true` — injects `web_search`/`x_search`/`code_execution` + forces Responses API for XAI
624
+ - `XAI_API_MODE=messages|responses` — default pattern when server-side tools aren't injected
625
+ - `ANTHROPIC_PROMPT_CACHING=false` — disable to measure uncached cost baselines
626
+ - `MENTORSHIP_ENABLED=false` — disable AI-to-AI loops when isolating provider behavior
627
+
628
+ ## Testing & Benchmarking Workflow
629
+
630
+ The proven pattern for testing CORTEX changes (coherence fixes, new providers, prompt-engineering, tool additions): **drive the server with natural-language prompts + run a parallel sub-agent + ground-truth both.** Never trust a single output.
631
+
632
+ ### Cardinal rules (read first — these are non-negotiable, learned the hard way)
633
+
634
+ 1. **n ≥ 2, with *different* prompts.** One task agreeing three ways is a false positive waiting to happen. Run the full pattern with at least two *different* tasks in *fresh sessions*. A single run never decides anything — it confirms nothing and refutes nothing.
635
+ 2. **Ground-truth everything against the real artifact.** The parallel sub-agent is a *reference that fails differently*, not an oracle. Even an Opus gold sub-agent is not infallible. The only truth is direct shell/grep/python on the actual files. Neither agent's output is self-verifying.
636
+ 3. **Fresh server + fresh session per run.** `--new` on every prompt; restart the server between *model* probes. Prompt cache and the debug log bleed across models and corrupt cross-model comparisons.
637
+ 4. **Discard confounded runs — don't average them in.** After every run: `grep -nE "429|capacity|exhausted|rate.?limit|overloaded|quota" /tmp/cortex-server.log`. If it hits, the model was throttled, not benchmarked. Throw the result away and re-run; never fold a confounded run into a tally.
638
+ 5. **Real work surface, not toy prompts.** The task must (a) move the harness/platform forward *and* (b) have an independently verifiable answer. "Count imports in file X" is verifiable but worthless; "refactor module Z" is real but unverifiable. The skill is finding tasks that are *both* — real work whose result can still be ground-truthed.
639
+
640
+ ### The three-way pattern
641
+
642
+ ```
643
+ ┌─────────────────────────────┐
644
+ same task │ 1. CORTEX (system under │
645
+ ───────────>│ test) via cortex CLI │──┐
646
+ └─────────────────────────────┘ │
647
+ ┌─────────────────────────────┐ │ compare
648
+ same task │ 2. Claude Code Agent tool │──┼──────────> ground truth
649
+ ───────────>│ (reference, different │ │ via direct shell/grep/python
650
+ │ model — Haiku, Sonnet, │ │
651
+ │ Opus — they fail │ │
652
+ │ differently) │ │
653
+ └─────────────────────────────┘ │
654
+ ┌─────────────────────────────┐ │
655
+ │ 3. Direct shell verification│◄─┘
656
+ │ (the real truth) │
657
+ └─────────────────────────────┘
658
+ ```
659
+
660
+ ### Recipe
661
+
662
+ 1. **Pick a deterministic task.** File contents, counts, code patterns — anything where a correct answer can be independently verified. Avoid web content (results vary by time) or creative tasks (no ground truth).
663
+
664
+ 2. **Launch CORTEX + sub-agent in the SAME message** so they run concurrently. Example parallel dispatch:
665
+ ```
666
+ Bash: cortex --new --quiet -m grok-4-1-fast-reasoning "TASK"
667
+ Agent: subagent_type=general-purpose, model=sonnet, prompt="TASK (identical wording)"
668
+ ```
669
+
670
+ 3. **Ground-truth via direct commands** after both return. CRITICAL: cross-reference every field the agents mention. A Python check on `dependencies` alone will miss `devDependencies` — verify your ground-truth command is as thorough as the broadest agent claim.
671
+
672
+ 4. **Tally in a comparison table** — name, answer, correct/wrong, notable reasoning. Flag disagreements. Different models fail differently:
673
+ - **Haiku** — scan-and-match shortcuts, count inconsistencies
674
+ - **Sonnet** — sophisticated-but-creative interpretations that may violate literal constraints
675
+ - **Opus** — slower, usually more precise, but not infallible
676
+ - **CORTEX with XAI** — literal structural answers, very good at deterministic file tasks
677
+
678
+ 5. **If three-way agreement**: ship. **If disagreement**: the pattern has surfaced a real issue — either in the code under test, in an agent's reasoning, or in your own ground-truth command. Investigate all three causes.
679
+
680
+ ### Performance benchmarking (when comparing models / measuring harness cost)
681
+
682
+ Correctness benchmarking (above) asks *"is the answer right?"*. Performance benchmarking asks *"at what cost, how fast, how many round-trips?"* — used when comparing providers (e.g. "can DeepSeek v4 Flash match Opus on this task at lower cost?") or measuring a harness change (caching, compaction, loop-control).
683
+
684
+ **Capture these fields from every `/v1/messages` response — not just the text:**
685
+
686
+ | Field | Why it matters |
687
+ |---|---|
688
+ | `usage.inputTokens` / `usage.outputTokens` | Raw size; reveals system-message + history overhead |
689
+ | `usage.cacheReadTokens` / `usage.cacheCreationTokens` | Cache effectiveness. Cross-turn cache hit-rate is the headline metric for caching work |
690
+ | `usage.cost_in_usd_ticks` | Authoritative cost **only on the XAI Responses API path** — NOT present on `/v1/messages`. Don't claim "cheaper" from a path that doesn't emit it (this is an unfalsifiable trap — see memory) |
691
+ | `metadata.toolCallIterations` | Tool round-trips. A model that's "right" in 9 iterations is worse than one right in 3 |
692
+ | wall-clock (time the curl) | Latency. Measure with `time` or capture start/end timestamps |
693
+
694
+ **Discipline specific to performance runs:**
695
+
696
+ - **Same prompt, same session-shape, fresh server, one model at a time.** Restart the server between models — a warm prompt cache from model A inflates model B's apparent cache hit-rate.
697
+ - **n ≥ 2 different real-work tasks** (cardinal rule 1 applies doubly here — perf numbers from one task generalise to nothing).
698
+ - **Always run the gold in parallel.** An Opus sub-agent on the identical task is the quality bar. A cheap model that's 10× cheaper but wrong is not a win — tally cost *and* correctness in the same table, never cost alone.
699
+ - **Confound check is mandatory** (cardinal rule 4). A throttled run shows fake-slow latency and fake-low throughput. Grep the log, discard, re-run.
700
+ - **Tally format:** one row per (model × task): `model | task | correct? | inTok | outTok | cacheHit% | iters | wall-s | $ (if Responses path)`. Decisions come from the *table*, never from a single cell.
701
+
702
+ ### Verifying specific code paths via debug log markers
703
+
704
+ After running a test, grep `/tmp/cortex-server.log` for the markers that prove the intended code path ran:
705
+
706
+ | Path being tested | Markers to confirm | Location |
707
+ |---|---|---|
708
+ | XAI Responses API | `[Orchestrator] API Pattern: responses` | `CortexOrchestrator.ts` |
709
+ | Server-side tool injection | `[Orchestrator Phase 2.3] Injected 3 server-side tools: web_search, x_search, code_execution` | `CortexOrchestrator.ts` |
710
+ | Hybrid tool detection | `[ServerSideToolDetection] Hybrid mode: N server-side + M client-side tools for xai` | `ServerSideToolDetection.ts` |
711
+ | Responses API response chaining | `[Orchestrator] Responses API response ID tracked: <uuid>` | `CortexOrchestrator.ts` |
712
+ | `previous_response_id` sent on continuation | `"previous_response_id": "<uuid>"`, `"has_previous_response_id": true` | `APIClient.ts` |
713
+ | Stateful-chain thinking preservation | `[Orchestrator Context] Skipping thinking block stripping — stateful Responses API with previous_response_id chaining` | `CortexOrchestrator.ts` |
714
+ | XAI `instructions` skip | `"has_instructions": false` (for XAI requests) | `APIClient.ts` |
715
+ | Messages API fallback | `[Orchestrator] API Pattern: messages` | `CortexOrchestrator.ts` |
716
+ | Server-side tool completion | `[Orchestrator] Server-side metadata extracted: YES`, `autonomousExecution: true` | `CortexOrchestrator.ts` |
717
+ | Reasoning pattern optimization | `[Orchestrator Context] Reasoning optimization: stripped old thinking blocks, saved N tokens` | `CortexOrchestrator.ts` |
718
+ | Helper model compaction | `[HelperMiddleware] Compacting context via <helper-model-id>` | `HelperModelMiddleware.ts` |
719
+ | Prompt caching hit | `[Orchestrator Cache] Cache hit detected: { ... hitRate: 'N%' }` | `CortexOrchestrator.ts` |
720
+ | Mentorship trigger | `[Orchestrator Mentorship] Detected keyword: <keyword>` or `Triggering periodic review` | `CortexOrchestrator.ts` |
721
+ | Loop detection | `[Orchestrator] Loop detected: tool <name> called N times` | `CortexOrchestrator.ts` |
722
+ | Orphaned tool_use recovery | `[Orchestrator] Recovered orphaned tool_use: <id>` | `CortexOrchestrator.ts` |
723
+
724
+ Good grep patterns for verifying a run:
725
+ ```bash
726
+ grep -nE "API Pattern|response ID tracked|previous_response_id|has_instructions|server-side|Server-side|[Ee]rror" /tmp/cortex-server.log | head -40
727
+ ```
728
+
729
+ ### Known gotchas
730
+
731
+ - **`.env` alone won't test Responses API.** `ENABLE_SERVER_SIDE_TOOLS=true` must be set at server launch. It's NOT in the default `.env`. Without it, XAI requests go through Messages API even with tools.
732
+ - **`pkill -f "packages/server/dist/index.js"` is WRONG and fails silently.** A launcher that `cd`s into packages/server runs argv `node dist/index.js` (relative path) — that pattern never matches it, the old server survives, and the "new" one loses the port race. You then benchmark the *stale* server without knowing. Use `pkill -9 -f "node dist/index.js"` and ALWAYS verify with `ps -eo pid,args | grep "[d]ist/index.js"` (must be empty). Tell that you're hitting a zombie: the same `conversationId` survives a "restart" (impossible for a fresh process).
733
+ - **Benchmarking REQUIRES `CORTEX_MODE=stateless` at server launch.** The default is *persistent*: every `/v1/messages` request is appended as the next turn of ONE shared, monotonically-growing session (input balloons run-over-run, cache-hit rates are fake, a later answer can leak from an earlier one). The `/v1/messages` route has no per-request "new session" param in persistent mode, so the only lever is the launch env var. Stateless = fresh ephemeral orchestrator + session per request (server-side equivalent of `cortex --new`). Verify isolation before trusting any number: fire two identical tiny probes — different `conversationId` + identical `inputTokens` = isolated. (`/clear` is NOT equivalent and is CLI-only: it wipes messages but keeps the conversationId → the provider prompt-cache key stays warm → cross-iteration cache bleed survives it.)
734
+ - **Telemetry caveat.** `usage.outputTokens` is under-reported for Gemini (generateContent) and xAI on `/v1/messages`; Gemini's path surfaces no cache metrics (cacheHitRate 0). `usage.inputTokens` is the reliable cross-provider cost proxy. `usage.cost_in_usd_ticks` only exists on the XAI Responses path. Fresh-session floor ≈ 16k input tokens (system messages + tool schemas) — every isolated request pays this; subtract it when reasoning about task-specific cost.
735
+ - **Server boot is ~20s on a cold start, not 5s.** Poll `/health` in a loop; never fixed-sleep-5 then assume up.
736
+ - **Sessions are stateful (the root cause of the two bullets above).** Use `--new` (CLI) or `CORTEX_MODE=stateless` (HTTP) for benchmark runs, otherwise context leaks between tests and corrupts comparisons.
737
+ - **On resource-constrained hosts, prefer `npm run build` + `tsc --noEmit` over the full `vitest` suite** for quick validation — the watch-mode test runner can be heavy in small containers/sandboxes.
738
+ - **Debug logs are overwritten on each server restart.** Rename or copy `/tmp/cortex-server.log` before restarting if you need to compare across runs.
739
+ - **Ground-truth commands must be as thorough as the agents.** A `jq '.dependencies | keys'` that ignores `devDependencies` will make agents look wrong when they're right. Always re-check your ground-truth query against whatever fields the agents referenced.
740
+
741
+ ### Example benchmark run
742
+
743
+ ```bash
744
+ # 1. Start clean — correct kill pattern, stateless for isolation, setsid to
745
+ # survive tool-call teardown, poll (don't fixed-sleep) for the ~20s boot.
746
+ pkill -9 -f "node dist/index.js" 2>/dev/null; sleep 2
747
+ ps -eo pid,args | grep "[d]ist/index.js" && echo "ZOMBIE — kill -9" || echo "clean"
748
+ cd <repo-root>/packages/server && \
749
+ DEBUG=true ENABLE_SERVER_SIDE_TOOLS=true CORTEX_MODE=stateless \
750
+ setsid nohup node dist/index.js > /tmp/cortex-server.log 2>&1 < /dev/null &
751
+ for i in $(seq 1 30); do sleep 2; curl -sf http://localhost:4000/health >/dev/null && break; done
752
+ # isolation sanity-check: 2 identical probes must give DIFFERENT conversationId,
753
+ # SAME inputTokens. If conversationId repeats or inputTokens grows → not isolated.
754
+ curl -s http://localhost:4000/health | head -c 80
755
+
756
+ # 2. Same prompt, parallel dispatch (do this in ONE tool-call message):
757
+ # Bash: cortex --new --quiet -m grok-4-1-fast-reasoning "Read A and B, report X, Y, Z"
758
+ # Agent: subagent_type=general-purpose, model=sonnet, prompt="Read A and B, report X, Y, Z"
759
+
760
+ # 3. Ground truth (example for package.json analysis)
761
+ python3 -c "import json; d=json.load(open('A/package.json')); print('runtime:', d.get('dependencies',{})); print('dev:', d.get('devDependencies',{}))"
762
+ python3 -c "import json; d=json.load(open('B/package.json')); print('runtime:', d.get('dependencies',{})); print('dev:', d.get('devDependencies',{}))"
763
+
764
+ # 4. Verify code path hit the marker you expected
765
+ grep -nE "API Pattern|response ID tracked|previous_response_id" /tmp/cortex-server.log | head -20
766
+
767
+ # 5. Tally all three in a comparison table. Ship if three-way agreement, investigate if not.
768
+ ```
769
+
770
+ ## Headless Command Systems
771
+
772
+ Two `/command` systems work in headless mode. The 70+ interactive CLI commands (`/model`, `/session`, `/debug`, `/theme`, etc.) are CLI-only and do NOT work headlessly.
773
+
774
+ ### System 1: SlashCommand Tool (`.cortex/commands/`)
775
+
776
+ The model calls the `SlashCommand` tool with `{command: "/name arg1 arg2"}`. The executor loads `.cortex/commands/name.md`, substitutes `$1`/`$2` with args, and returns the expanded body as tool output. The model then follows the instructions.
777
+
778
+ **File format:**
779
+ ```markdown
780
+ ---
781
+ description: What this command does
782
+ argument-hint: [arg1] [arg2]
783
+ ---
784
+
785
+ Body with $1 and $2 placeholders.
786
+ Unused placeholders ($3, $99) are stripped.
787
+ ```
788
+
789
+ **Location:** `<repo-root>/.cortex/commands/`
790
+ **Executor:** `packages/executors/src/implementations/extensions/SlashCommandTool.ts`
791
+ **Name from:** filename without `.md` extension (e.g., `review.md` → `/review`)
792
+ **Subdirectories:** supported — `deployment/deploy-prod.md` → `/deploy-prod`
793
+ **Cache:** in-memory, cleared on server restart
794
+
795
+ **Installed commands:**
796
+ | Command | Args | Purpose |
797
+ |---------|------|---------|
798
+ | `/review` | `[path]` | Code review with actionable feedback |
799
+ | `/test` | `[package-path]` | Run tests and report results |
800
+ | `/diff` | `[path]` | Analyze uncommitted git changes |
801
+ | `/deps` | `[path]` | Dependency and import analysis |
802
+ | `/find-bug` | `[error-or-symptom]` | Root cause investigation |
803
+ | `/explain` | `[path-or-function]` | End-to-end code explanation |
804
+ | `/profile` | `[task]` | Token/iteration performance metrics |
805
+ | `/compare` | `[path-a] [path-b]` | Side-by-side comparison |
806
+ | `/search` | `[term]` | Deep codebase search |
807
+
808
+ **To create a new command:** Write a `.md` file to `.cortex/commands/` with the frontmatter format above. Available immediately (cache clears on next server restart, or invoke any unknown command to trigger reload).
809
+
810
+ ### System 2: Skill Tool (`.agents/skills/`)
811
+
812
+ The model calls the `Skill` tool with `{skill: "name"}`. Skills are YAML-frontmatter markdown files that provide knowledge and instructions, not argument-substituted templates.
813
+
814
+ **File format:**
815
+ ```markdown
816
+ ---
817
+ name: skill-name
818
+ description: What this skill provides
819
+ triggers:
820
+ - keyword1
821
+ - keyword2
822
+ ---
823
+
824
+ # Knowledge content, instructions, patterns, reference material
825
+ ```
826
+
827
+ **Locations searched:**
828
+ - `.agents/skills/*/SKILL.md` (project)
829
+ - `.claude/skills/*/SKILL.md` (symlinks)
830
+ - `~/.claude/skills/*/SKILL.md` (global)
831
+
832
+ **Key difference from SlashCommand:** Skills provide context/knowledge injected into the conversation. SlashCommands are task templates with argument substitution that produce tool output.
833
+
834
+ **To create a new skill:** Create `.agents/skills/name/SKILL.md` with frontmatter, then symlink from `.claude/skills/name` → `../../.agents/skills/name`.