reasonix 0.47.0 → 0.47.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/cli/{acp-QK3DMC53.js → acp-GEOAKSTU.js} +21 -49
  2. package/dist/cli/acp-GEOAKSTU.js.map +1 -0
  3. package/dist/cli/{chat-VV5UWY4V.js → chat-YTPATMMG.js} +23 -23
  4. package/dist/cli/{chunk-FDKOUJKZ.js → chunk-2XY77LW7.js} +7 -7
  5. package/dist/cli/{chunk-QVDWH2A2.js → chunk-4MFCAZ2W.js} +3 -3
  6. package/dist/cli/{chunk-24A7FHGJ.js → chunk-6CRPCJAU.js} +14 -1
  7. package/dist/cli/chunk-6CRPCJAU.js.map +1 -0
  8. package/dist/cli/{chunk-VKYSZKH2.js → chunk-6QC5RQLE.js} +2 -2
  9. package/dist/cli/chunk-BQ6HC66J.js +530 -0
  10. package/dist/cli/chunk-BQ6HC66J.js.map +1 -0
  11. package/dist/cli/{chunk-OJVITDGB.js → chunk-CCJAP7G3.js} +2 -2
  12. package/dist/cli/{chunk-R6GQKKBW.js → chunk-CNG32VAB.js} +2 -2
  13. package/dist/cli/{chunk-QVUFWDD2.js → chunk-DN4B5S6Y.js} +2 -2
  14. package/dist/cli/{chunk-LBLR4CUZ.js → chunk-DQ6K5ZQ7.js} +2 -2
  15. package/dist/cli/{chunk-VNQGCA3Q.js → chunk-DWPAKZTY.js} +14 -3
  16. package/dist/cli/chunk-DWPAKZTY.js.map +1 -0
  17. package/dist/cli/{chunk-BWYVFFKR.js → chunk-GH7DC2Y5.js} +2 -2
  18. package/dist/cli/{chunk-BYYVYJDX.js → chunk-HUILPCYX.js} +3 -3
  19. package/dist/cli/{chunk-ICAFSZHS.js → chunk-JBH5RM7X.js} +174 -65
  20. package/dist/cli/chunk-JBH5RM7X.js.map +1 -0
  21. package/dist/cli/{chunk-K6GUKSXH.js → chunk-KVZZ5U75.js} +2 -2
  22. package/dist/cli/{chunk-WF7TPVZM.js → chunk-KYQVQ5X4.js} +84 -9
  23. package/dist/cli/chunk-KYQVQ5X4.js.map +1 -0
  24. package/dist/cli/{chunk-KDRUEXII.js → chunk-NRQ5UP5T.js} +20 -6
  25. package/dist/cli/chunk-NRQ5UP5T.js.map +1 -0
  26. package/dist/cli/{chunk-VJMBISEI.js → chunk-QCFLPSPH.js} +2 -2
  27. package/dist/cli/{chunk-YDPLF7XR.js → chunk-T5A7EY6B.js} +2 -2
  28. package/dist/cli/{chunk-VMUUFWFF.js → chunk-TDHXB2ER.js} +2 -2
  29. package/dist/cli/{chunk-GDKB2PPK.js → chunk-TRSAHHCL.js} +107 -11
  30. package/dist/cli/chunk-TRSAHHCL.js.map +1 -0
  31. package/dist/cli/{chunk-6J6BSUCR.js → chunk-TRWHTFG7.js} +2 -2
  32. package/dist/cli/{chunk-VC2CQA5D.js → chunk-XD6P7AFH.js} +26 -29
  33. package/dist/cli/chunk-XD6P7AFH.js.map +1 -0
  34. package/dist/cli/{chunk-ICSYGIPN.js → chunk-XMHP7BEE.js} +421 -80
  35. package/dist/cli/chunk-XMHP7BEE.js.map +1 -0
  36. package/dist/cli/{chunk-COWPEX54.js → chunk-YFP3MYMY.js} +5 -5
  37. package/dist/cli/{chunk-CI2PF5QX.js → chunk-ZXSCAODE.js} +8 -8
  38. package/dist/cli/{chunk-CI2PF5QX.js.map → chunk-ZXSCAODE.js.map} +1 -1
  39. package/dist/cli/{code-C24TUAE5.js → code-Q4NRVEDG.js} +29 -27
  40. package/dist/cli/code-Q4NRVEDG.js.map +1 -0
  41. package/dist/cli/{commands-RR3GIYOK.js → commands-4CDI4GFM.js} +4 -4
  42. package/dist/cli/{commit-FSHPIINM.js → commit-GW7LDQP5.js} +3 -3
  43. package/dist/cli/{desktop-7NCHPEFB.js → desktop-EG6P5SF2.js} +80 -22
  44. package/dist/cli/desktop-EG6P5SF2.js.map +1 -0
  45. package/dist/cli/{diff-RAAHHLHV.js → diff-VI2YX4FN.js} +8 -8
  46. package/dist/cli/{doctor-PKVQIXRT.js → doctor-CQTTZP27.js} +8 -8
  47. package/dist/cli/index.js +45 -37
  48. package/dist/cli/index.js.map +1 -1
  49. package/dist/cli/{mcp-CRJ26PP4.js → mcp-J2UCD4RZ.js} +2 -2
  50. package/dist/cli/{mcp-browse-QPAOWZOP.js → mcp-browse-GSX34JEK.js} +2 -2
  51. package/dist/cli/{mcp-inspect-CVCLABRS.js → mcp-inspect-RRFYF4ZV.js} +2 -2
  52. package/dist/cli/{prompt-SKYXERSI.js → prompt-5TQPIVHV.js} +3 -3
  53. package/dist/cli/{replay-KPDW2ZMJ.js → replay-MJCEMODU.js} +8 -8
  54. package/dist/cli/{run-WIKDIXTG.js → run-P4D5VDYE.js} +13 -13
  55. package/dist/cli/{server-P6V2G3P6.js → server-C25JNNZV.js} +11 -11
  56. package/dist/cli/{sessions-2NULRMSA.js → sessions-QIONZJQ6.js} +12 -12
  57. package/dist/cli/{setup-Y5WDBQFL.js → setup-NLQ6G5G4.js} +6 -6
  58. package/dist/cli/setup-NLQ6G5G4.js.map +1 -0
  59. package/dist/cli/{stats-T7BL2YOR.js → stats-DFZEXHP4.js} +6 -6
  60. package/dist/cli/{version-3KWDNWLN.js → version-GR3X3MPI.js} +12 -12
  61. package/dist/index.d.ts +40 -48
  62. package/dist/index.js +286 -237
  63. package/dist/index.js.map +1 -1
  64. package/package.json +3 -1
  65. package/dist/cli/acp-QK3DMC53.js.map +0 -1
  66. package/dist/cli/chunk-24A7FHGJ.js.map +0 -1
  67. package/dist/cli/chunk-GDKB2PPK.js.map +0 -1
  68. package/dist/cli/chunk-ICAFSZHS.js.map +0 -1
  69. package/dist/cli/chunk-ICSYGIPN.js.map +0 -1
  70. package/dist/cli/chunk-KDRUEXII.js.map +0 -1
  71. package/dist/cli/chunk-UDVFBEXC.js +0 -642
  72. package/dist/cli/chunk-UDVFBEXC.js.map +0 -1
  73. package/dist/cli/chunk-VC2CQA5D.js.map +0 -1
  74. package/dist/cli/chunk-VNQGCA3Q.js.map +0 -1
  75. package/dist/cli/chunk-WF7TPVZM.js.map +0 -1
  76. package/dist/cli/code-C24TUAE5.js.map +0 -1
  77. package/dist/cli/desktop-7NCHPEFB.js.map +0 -1
  78. package/dist/cli/setup-Y5WDBQFL.js.map +0 -1
  79. /package/dist/cli/{chat-VV5UWY4V.js.map → chat-YTPATMMG.js.map} +0 -0
  80. /package/dist/cli/{chunk-FDKOUJKZ.js.map → chunk-2XY77LW7.js.map} +0 -0
  81. /package/dist/cli/{chunk-QVDWH2A2.js.map → chunk-4MFCAZ2W.js.map} +0 -0
  82. /package/dist/cli/{chunk-VKYSZKH2.js.map → chunk-6QC5RQLE.js.map} +0 -0
  83. /package/dist/cli/{chunk-OJVITDGB.js.map → chunk-CCJAP7G3.js.map} +0 -0
  84. /package/dist/cli/{chunk-R6GQKKBW.js.map → chunk-CNG32VAB.js.map} +0 -0
  85. /package/dist/cli/{chunk-QVUFWDD2.js.map → chunk-DN4B5S6Y.js.map} +0 -0
  86. /package/dist/cli/{chunk-LBLR4CUZ.js.map → chunk-DQ6K5ZQ7.js.map} +0 -0
  87. /package/dist/cli/{chunk-BWYVFFKR.js.map → chunk-GH7DC2Y5.js.map} +0 -0
  88. /package/dist/cli/{chunk-BYYVYJDX.js.map → chunk-HUILPCYX.js.map} +0 -0
  89. /package/dist/cli/{chunk-K6GUKSXH.js.map → chunk-KVZZ5U75.js.map} +0 -0
  90. /package/dist/cli/{chunk-VJMBISEI.js.map → chunk-QCFLPSPH.js.map} +0 -0
  91. /package/dist/cli/{chunk-YDPLF7XR.js.map → chunk-T5A7EY6B.js.map} +0 -0
  92. /package/dist/cli/{chunk-VMUUFWFF.js.map → chunk-TDHXB2ER.js.map} +0 -0
  93. /package/dist/cli/{chunk-6J6BSUCR.js.map → chunk-TRWHTFG7.js.map} +0 -0
  94. /package/dist/cli/{chunk-COWPEX54.js.map → chunk-YFP3MYMY.js.map} +0 -0
  95. /package/dist/cli/{commands-RR3GIYOK.js.map → commands-4CDI4GFM.js.map} +0 -0
  96. /package/dist/cli/{commit-FSHPIINM.js.map → commit-GW7LDQP5.js.map} +0 -0
  97. /package/dist/cli/{diff-RAAHHLHV.js.map → diff-VI2YX4FN.js.map} +0 -0
  98. /package/dist/cli/{doctor-PKVQIXRT.js.map → doctor-CQTTZP27.js.map} +0 -0
  99. /package/dist/cli/{mcp-CRJ26PP4.js.map → mcp-J2UCD4RZ.js.map} +0 -0
  100. /package/dist/cli/{mcp-browse-QPAOWZOP.js.map → mcp-browse-GSX34JEK.js.map} +0 -0
  101. /package/dist/cli/{mcp-inspect-CVCLABRS.js.map → mcp-inspect-RRFYF4ZV.js.map} +0 -0
  102. /package/dist/cli/{prompt-SKYXERSI.js.map → prompt-5TQPIVHV.js.map} +0 -0
  103. /package/dist/cli/{replay-KPDW2ZMJ.js.map → replay-MJCEMODU.js.map} +0 -0
  104. /package/dist/cli/{run-WIKDIXTG.js.map → run-P4D5VDYE.js.map} +0 -0
  105. /package/dist/cli/{server-P6V2G3P6.js.map → server-C25JNNZV.js.map} +0 -0
  106. /package/dist/cli/{sessions-2NULRMSA.js.map → sessions-QIONZJQ6.js.map} +0 -0
  107. /package/dist/cli/{stats-T7BL2YOR.js.map → stats-DFZEXHP4.js.map} +0 -0
  108. /package/dist/cli/{version-3KWDNWLN.js.map → version-GR3X3MPI.js.map} +0 -0
package/dist/index.js CHANGED
@@ -579,6 +579,7 @@ function webSearchEngine(path2 = defaultConfigPath()) {
579
579
  const cfg = readConfig(path2).webSearchEngine;
580
580
  if (cfg === "searxng") return "searxng";
581
581
  if (cfg === "metaso") return "metaso";
582
+ if (cfg === "tavily") return "tavily";
582
583
  return "mojeek";
583
584
  }
584
585
  function webSearchEndpoint(path2 = defaultConfigPath()) {
@@ -805,7 +806,7 @@ var DeepSeekClient = class {
805
806
  if (opts.temperature !== void 0) payload.temperature = opts.temperature;
806
807
  if (opts.maxTokens !== void 0) payload.max_tokens = opts.maxTokens;
807
808
  if (opts.responseFormat) payload.response_format = opts.responseFormat;
808
- if (opts.thinking) {
809
+ if (opts.thinking && !this._isAzureEndpoint()) {
809
810
  payload.extra_body = { thinking: { type: opts.thinking } };
810
811
  }
811
812
  if (opts.reasoningEffort) {
@@ -813,6 +814,17 @@ var DeepSeekClient = class {
813
814
  }
814
815
  return payload;
815
816
  }
817
+ /** Azure OpenAI-compatible endpoints do not accept DeepSeek's proprietary
818
+ * `extra_body.thinking` field (they reject the request with 400). We still
819
+ * send `reasoning_effort`, which Azure *does* support. */
820
+ _isAzureEndpoint() {
821
+ try {
822
+ const host = new URL(this.baseUrl).hostname;
823
+ return host === "azure.com" || host.endsWith(".azure.com");
824
+ } catch {
825
+ return false;
826
+ }
827
+ }
816
828
  /** Returns null on failure so callers can degrade — session must keep working without balance UI. */
817
829
  async getBalance(opts = {}) {
818
830
  try {
@@ -1543,6 +1555,7 @@ var EN = {
1543
1555
  reviewSaveError: "Could not save config: {message}",
1544
1556
  reviewFooter: "[Enter] save \xB7 [Esc] cancel",
1545
1557
  savedTitle: "\u25B8 Saved.",
1558
+ savedShellHint: "Shell commands the model wants to run ask each time \u2014 pick `allow always` on the prompt to whitelist that exact command for this project. No global allow-all flag by design.",
1546
1559
  savedFooter: "[Enter] to exit",
1547
1560
  selectFooter: "[\u2191\u2193] navigate \xB7 [Enter] confirm \xB7 [Esc] cancel",
1548
1561
  stepCounter: "Step {step}/{total} \xB7 ",
@@ -1607,6 +1620,8 @@ var EN = {
1607
1620
  title: "Checkpoint \u2014 step done",
1608
1621
  continue: "Continue \u2014 run the next step",
1609
1622
  continueHint: "Model resumes with the next step.",
1623
+ finish: "Finish \u2014 summarize and close",
1624
+ finishHint: "Model records the final step and summarizes the completed plan.",
1610
1625
  revise: "Revise \u2014 give feedback before the next step",
1611
1626
  reviseHint: "Stay paused, type guidance; model adjusts the remaining plan.",
1612
1627
  stop: "Stop \u2014 end the plan here",
@@ -1747,6 +1762,10 @@ var EN = {
1747
1762
  helpShellDetail: " the conversation so the model sees it next turn.",
1748
1763
  helpShellConsent: " No allowlist gate \u2014 user-typed = explicit consent.",
1749
1764
  helpShellExample: " Example: !git status !ls src/ !npm test",
1765
+ helpShellGateTitle: "Model-invoked shell commands (per-call approval):",
1766
+ helpShellGate: " \u2191\u2193 + \u23CE each call shows a prompt with `allow once` / `allow always`",
1767
+ helpShellGateDetail: " / `deny`. Pick `allow always` to whitelist that exact",
1768
+ helpShellGatePolicy: " command prefix for this project. No global allow-all flag.",
1750
1769
  helpMemoryTitle: "Quick memory:",
1751
1770
  helpMemoryPin: " #<note> append <note> to <project>/REASONIX.md (committable).",
1752
1771
  helpMemoryPinEx: " Example: #findByEmail must be case-insensitive",
@@ -2058,7 +2077,7 @@ var EN = {
2058
2077
  changesNoteShort: "Changes take effect on next /new or launch. Subcommands: /memory list | show | forget | clear"
2059
2078
  },
2060
2079
  mcp: {
2061
- noServers: 'no MCP servers attached. Run `reasonix setup` to pick some, or launch with --mcp "<spec>". `reasonix mcp list` shows the catalog.',
2080
+ noServers: 'no MCP servers attached. Run `reasonix setup` to pick some, or launch with --mcp "<spec>". `reasonix mcp list` shows the catalog. Note: model-invoked shell commands are gated per-call (allow once / allow always / deny) \u2014 no global allow-all flag.',
2062
2081
  toolsLabel: " tools {count}",
2063
2082
  resourcesHint: "`/resource` to browse+read",
2064
2083
  promptsHint: "`/prompt` to browse+fetch",
@@ -2489,7 +2508,7 @@ var EN = {
2489
2508
  slow: "slow \xB7 {ms}ms",
2490
2509
  verySlow: "very slow \xB7 {ms}ms",
2491
2510
  slowToast: "\u26A0 MCP `{name}` slow \xB7 {seconds}s p95 over the last {sampleSize} calls",
2492
- emptyHint: "\u2139 no MCP servers configured \u2014 try: `reasonix setup` to re-pick, or `reasonix mcp install filesystem`"
2511
+ emptyHint: "\u2139 no MCP servers configured \u2014 try: `reasonix setup` to re-pick, or `reasonix mcp install filesystem` \xB7 shell commands gate per-call (allow once / allow always / deny), no global allow-all"
2493
2512
  },
2494
2513
  denyContextInput: {
2495
2514
  description: "Tell the agent why you denied this. The next attempt will see your reason as additional context."
@@ -3050,6 +3069,7 @@ var zhCN = {
3050
3069
  reviewSaveError: "\u4FDD\u5B58\u914D\u7F6E\u5931\u8D25\uFF1A{message}",
3051
3070
  reviewFooter: "[Enter] \u4FDD\u5B58 \xB7 [Esc] \u53D6\u6D88",
3052
3071
  savedTitle: "\u25B8 \u5DF2\u4FDD\u5B58\u3002",
3072
+ savedShellHint: "\u6A21\u578B\u53D1\u8D77\u7684 shell \u547D\u4EE4\u6BCF\u6B21\u90FD\u4F1A\u5F39\u51FA\u786E\u8BA4 \u2014\u2014 \u5728\u63D0\u793A\u6846\u91CC\u9009 `allow always` \u53EF\u5C06\u8BE5\u547D\u4EE4\u524D\u7F00\u52A0\u5165\u672C\u9879\u76EE\u767D\u540D\u5355\u3002\u8BBE\u8BA1\u4E0A\u6CA1\u6709\u300C\u5168\u5C40\u653E\u884C\u300D\u5F00\u5173\u3002",
3053
3073
  savedFooter: "[Enter] \u9000\u51FA",
3054
3074
  selectFooter: "[\u2191\u2193] \u79FB\u52A8 \xB7 [Enter] \u786E\u8BA4 \xB7 [Esc] \u53D6\u6D88",
3055
3075
  stepCounter: "\u6B65\u9AA4 {step}/{total} \xB7 ",
@@ -3114,6 +3134,8 @@ var zhCN = {
3114
3134
  title: "\u68C0\u67E5\u70B9 \u2014\u2014 \u5F53\u524D\u6B65\u9AA4\u5DF2\u5B8C\u6210",
3115
3135
  continue: "\u7EE7\u7EED \u2014\u2014 \u6267\u884C\u4E0B\u4E00\u6B65",
3116
3136
  continueHint: "\u6A21\u578B\u4ECE\u4E0B\u4E00\u6B65\u7EE7\u7EED\u3002",
3137
+ finish: "\u5B8C\u6210 \u2014\u2014 \u603B\u7ED3\u5E76\u6536\u5C3E",
3138
+ finishHint: "\u6A21\u578B\u8BB0\u5F55\u6700\u540E\u4E00\u6B65\uFF0C\u7136\u540E\u603B\u7ED3\u5DF2\u5B8C\u6210\u7684\u8BA1\u5212\u3002",
3117
3139
  revise: "\u8C03\u6574 \u2014\u2014 \u5728\u4E0B\u4E00\u6B65\u524D\u7ED9\u53CD\u9988",
3118
3140
  reviseHint: "\u5148\u6682\u505C\uFF0C\u8F93\u5165\u6307\u5F15\uFF1B\u6A21\u578B\u4F1A\u8C03\u6574\u5269\u4F59\u8BA1\u5212\u3002",
3119
3141
  stop: "\u505C\u6B62 \u2014\u2014 \u5728\u6B64\u7ED3\u675F\u8BA1\u5212",
@@ -3254,6 +3276,10 @@ var zhCN = {
3254
3276
  helpShellDetail: " \u4EE5\u4FBF\u6A21\u578B\u5728\u4E0B\u4E00\u8F6E\u770B\u5230\u3002\u65E0\u5141\u8BB8\u5217\u8868\u9650\u5236\u3002",
3255
3277
  helpShellConsent: " \u7528\u6237\u8F93\u5165 = \u660E\u786E\u540C\u610F\u3002",
3256
3278
  helpShellExample: " \u793A\u4F8B\uFF1A!git status !ls src/ !npm test",
3279
+ helpShellGateTitle: "\u6A21\u578B\u53D1\u8D77\u7684 shell \u547D\u4EE4\uFF08\u6309\u6B21\u5BA1\u6279\uFF09\uFF1A",
3280
+ helpShellGate: " \u2191\u2193 + \u23CE \u6BCF\u6B21\u90FD\u4F1A\u5F39\u51FA `allow once` / `allow always` /",
3281
+ helpShellGateDetail: " `deny` \u4E09\u9009\u4E00\u3002\u9009 `allow always` \u53EF\u5C06\u8BE5\u547D\u4EE4\u524D\u7F00",
3282
+ helpShellGatePolicy: " \u52A0\u5165\u672C\u9879\u76EE\u767D\u540D\u5355\u3002\u8BBE\u8BA1\u4E0A\u6CA1\u6709\u300C\u5168\u5C40\u653E\u884C\u300D\u5F00\u5173\u3002",
3257
3283
  helpMemoryTitle: "\u5FEB\u901F\u8BB0\u5FC6\uFF1A",
3258
3284
  helpMemoryPin: " #<note> \u5C06 <note> \u8FFD\u52A0\u5230 <project>/REASONIX.md\uFF08\u53EF\u63D0\u4EA4\uFF09\u3002",
3259
3285
  helpMemoryPinEx: " \u793A\u4F8B\uFF1A#findByEmail \u5FC5\u987B\u533A\u5206\u5927\u5C0F\u5199",
@@ -3565,7 +3591,7 @@ var zhCN = {
3565
3591
  changesNoteShort: "\u66F4\u6539\u5728\u4E0B\u6B21 /new \u6216\u542F\u52A8\u65F6\u751F\u6548\u3002\u5B50\u547D\u4EE4\uFF1A/memory list | show | forget | clear"
3566
3592
  },
3567
3593
  mcp: {
3568
- noServers: '\u672A\u9644\u52A0 MCP \u670D\u52A1\u5668\u3002\u8FD0\u884C `reasonix setup` \u9009\u62E9\u4E00\u4E9B\uFF0C\u6216\u4F7F\u7528 --mcp "<spec>" \u542F\u52A8\u3002`reasonix mcp list` \u663E\u793A\u76EE\u5F55\u3002',
3594
+ noServers: '\u672A\u9644\u52A0 MCP \u670D\u52A1\u5668\u3002\u8FD0\u884C `reasonix setup` \u9009\u62E9\u4E00\u4E9B\uFF0C\u6216\u4F7F\u7528 --mcp "<spec>" \u542F\u52A8\u3002`reasonix mcp list` \u663E\u793A\u76EE\u5F55\u3002\u6CE8\uFF1A\u6A21\u578B\u53D1\u8D77\u7684 shell \u547D\u4EE4\u6309\u6B21\u5BA1\u6279\uFF08allow once / allow always / deny\uFF09\uFF0C\u8BBE\u8BA1\u4E0A\u6CA1\u6709\u300C\u5168\u5C40\u653E\u884C\u300D\u5F00\u5173\u3002',
3569
3595
  toolsLabel: " \u5DE5\u5177 {count}",
3570
3596
  resourcesHint: "`/resource` \u6D4F\u89C8+\u8BFB\u53D6",
3571
3597
  promptsHint: "`/prompt` \u6D4F\u89C8+\u83B7\u53D6",
@@ -3996,7 +4022,7 @@ var zhCN = {
3996
4022
  slow: "\u7F13\u6162 \xB7 {ms}ms",
3997
4023
  verySlow: "\u975E\u5E38\u6162 \xB7 {ms}ms",
3998
4024
  slowToast: "\u26A0 MCP `{name}` \u54CD\u5E94\u7F13\u6162 \xB7 P95 {seconds}s \xB7 \u6700\u8FD1 {sampleSize} \u6B21\u8C03\u7528",
3999
- emptyHint: "\u2139 \u672A\u914D\u7F6E MCP \u670D\u52A1\u5668 \u2014\u2014 \u53EF\u5C1D\u8BD5\uFF1A`reasonix setup` \u91CD\u65B0\u9009\u62E9\uFF0C\u6216 `reasonix mcp install filesystem`"
4025
+ emptyHint: "\u2139 \u672A\u914D\u7F6E MCP \u670D\u52A1\u5668 \u2014\u2014 \u53EF\u5C1D\u8BD5\uFF1A`reasonix setup` \u91CD\u65B0\u9009\u62E9\uFF0C\u6216 `reasonix mcp install filesystem` \xB7 shell \u547D\u4EE4\u6309\u6B21\u5BA1\u6279\uFF08allow once / allow always / deny\uFF09\uFF0C\u65E0\u5168\u5C40\u653E\u884C"
4000
4026
  },
4001
4027
  denyContextInput: {
4002
4028
  description: "\u544A\u8BC9\u6A21\u578B\u4F60\u4E3A\u4EC0\u4E48\u62D2\u7EDD\u4E86\u3002\u6A21\u578B\u4E0B\u6B21\u4F1A\u770B\u5230\u4F60\u7684\u7406\u7531\u4F5C\u4E3A\u989D\u5916\u7684\u4E0A\u4E0B\u6587\u3002"
@@ -4795,10 +4821,13 @@ var ToolRegistry = class {
4795
4821
  _autoFlatten;
4796
4822
  _planMode = false;
4797
4823
  _interceptor = null;
4824
+ _interceptors = [];
4798
4825
  _auditListener = null;
4799
4826
  _resultAugmenter = null;
4800
4827
  /** Per-tool fingerprint of the last call that failed schema validation. Cleared by any successful validation for that tool. */
4801
4828
  _lastMalformed = /* @__PURE__ */ new Map();
4829
+ /** Per-tool fingerprint of the last host-side interceptor rejection. */
4830
+ _lastInterceptorRejection = /* @__PURE__ */ new Map();
4802
4831
  constructor(opts = {}) {
4803
4832
  this._autoFlatten = opts.autoFlatten !== false;
4804
4833
  }
@@ -4814,6 +4843,18 @@ var ToolRegistry = class {
4814
4843
  setToolInterceptor(fn) {
4815
4844
  this._interceptor = fn;
4816
4845
  }
4846
+ /** Ordered host-side interceptors. They run before the legacy single interceptor. */
4847
+ addToolInterceptor(id, fn) {
4848
+ const normalized = id.trim();
4849
+ if (!normalized) throw new Error("tool interceptor requires a non-empty id");
4850
+ const existing = this._interceptors.findIndex((entry) => entry.id === normalized);
4851
+ if (existing >= 0) this._interceptors.splice(existing, 1);
4852
+ this._interceptors.push({ id: normalized, fn });
4853
+ return () => {
4854
+ const idx = this._interceptors.findIndex((entry) => entry.id === normalized);
4855
+ if (idx >= 0) this._interceptors.splice(idx, 1);
4856
+ };
4857
+ }
4817
4858
  setAuditListener(fn) {
4818
4859
  this._auditListener = fn;
4819
4860
  }
@@ -4902,16 +4943,21 @@ var ToolRegistry = class {
4902
4943
  rejectedReason: "plan-mode"
4903
4944
  });
4904
4945
  }
4905
- if (this._interceptor) {
4946
+ const chain = this._interceptor ? [...this._interceptors.map((entry) => entry.fn), this._interceptor] : this._interceptors.map((entry) => entry.fn);
4947
+ for (const interceptor of chain) {
4906
4948
  try {
4907
- const short = await this._interceptor(name, args);
4908
- if (typeof short === "string") return short;
4949
+ const short = await interceptor(name, args);
4950
+ if (typeof short === "string") {
4951
+ const guarded = this._noteInterceptorRejection(name, fingerprint, short);
4952
+ return this._augmentResult(name, args, guarded);
4953
+ }
4909
4954
  } catch (err) {
4910
4955
  return JSON.stringify({
4911
4956
  error: `${name}: interceptor failed \u2014 ${err.message}`
4912
4957
  });
4913
4958
  }
4914
4959
  }
4960
+ this._lastInterceptorRejection.delete(name);
4915
4961
  if (opts.signal?.aborted) {
4916
4962
  return JSON.stringify({
4917
4963
  error: `${name}: aborted before dispatch (user interrupt)`,
@@ -4949,13 +4995,16 @@ var ToolRegistry = class {
4949
4995
  finalResult = JSON.stringify({ error: `${e.name}: ${e.message}` });
4950
4996
  }
4951
4997
  }
4998
+ return this._augmentResult(name, args, finalResult);
4999
+ }
5000
+ _augmentResult(name, args, result) {
4952
5001
  if (this._resultAugmenter) {
4953
5002
  try {
4954
- return this._resultAugmenter(name, args, finalResult);
5003
+ return this._resultAugmenter(name, args, result);
4955
5004
  } catch {
4956
5005
  }
4957
5006
  }
4958
- return finalResult;
5007
+ return result;
4959
5008
  }
4960
5009
  /** Records the failed call's fingerprint; on the 2nd consecutive identical malformed call to the same tool, returns a sharper error that tells the model to stop retrying. */
4961
5010
  _noteMalformed(name, fingerprint, detail) {
@@ -4969,7 +5018,35 @@ var ToolRegistry = class {
4969
5018
  }
4970
5019
  return JSON.stringify({ error: `${name}: ${detail}` });
4971
5020
  }
5021
+ _noteInterceptorRejection(name, fingerprint, result) {
5022
+ const reason = rejectedReason(result);
5023
+ if (!reason) {
5024
+ this._lastInterceptorRejection.delete(name);
5025
+ return result;
5026
+ }
5027
+ const key = `${reason}:${fingerprint}`;
5028
+ const prev = this._lastInterceptorRejection.get(name);
5029
+ this._lastInterceptorRejection.set(name, key);
5030
+ if (prev === key) {
5031
+ return JSON.stringify({
5032
+ error: `${name}: same call was just rejected by ${reason} \u2014 do not retry identical args. Switch to read-only exploration, submit or revise the plan, or choose a different tool call.`,
5033
+ rejectedReason: reason,
5034
+ consecutiveInterceptorRejection: true
5035
+ });
5036
+ }
5037
+ return result;
5038
+ }
4972
5039
  };
5040
+ function rejectedReason(result) {
5041
+ try {
5042
+ const parsed = JSON.parse(result);
5043
+ if (!parsed || typeof parsed !== "object") return null;
5044
+ const reason = parsed.rejectedReason;
5045
+ return typeof reason === "string" && reason ? reason : null;
5046
+ } catch {
5047
+ return null;
5048
+ }
5049
+ }
4973
5050
  function isReadOnlyCall(tool, args) {
4974
5051
  if (tool.readOnlyCheck) {
4975
5052
  try {
@@ -9045,7 +9122,7 @@ async function applyMultiEdit(rootDir, edits) {
9045
9122
  );
9046
9123
  }
9047
9124
  const le = before.includes("\r\n") ? "\r\n" : "\n";
9048
- state = { buf: before, le, hunks: [], deltaChars: 0, touched: 0 };
9125
+ state = { before, buf: before, le, hunks: [], deltaChars: 0, touched: 0 };
9049
9126
  filesByPath.set(e.abs, state);
9050
9127
  }
9051
9128
  const adaptedSearch = e.search.replace(/\r?\n/g, state.le);
@@ -9053,7 +9130,7 @@ async function applyMultiEdit(rootDir, edits) {
9053
9130
  const firstIdx = state.buf.indexOf(adaptedSearch);
9054
9131
  if (firstIdx < 0) {
9055
9132
  throw new Error(
9056
- `multi_edit: edit #${i + 1} search text not found in ${rel} \u2014 no edits applied (multi_edit is atomic)`
9133
+ `multi_edit: edit #${i + 1} search text not found in ${rel} \u2014 no edits applied`
9057
9134
  );
9058
9135
  }
9059
9136
  const nextIdx = state.buf.indexOf(adaptedSearch, firstIdx + 1);
@@ -9069,8 +9146,29 @@ ${renderEditDiff(adaptedSearch, adaptedReplace, startLine)}`);
9069
9146
  state.deltaChars += adaptedReplace.length - adaptedSearch.length;
9070
9147
  state.touched++;
9071
9148
  }
9072
- for (const [abs, state] of filesByPath) {
9073
- await fs.writeFile(abs, state.buf, "utf8");
9149
+ const attempted = [];
9150
+ try {
9151
+ for (const [abs, state] of filesByPath) {
9152
+ attempted.push({ abs, before: state.before });
9153
+ await fs.writeFile(abs, state.buf, "utf8");
9154
+ }
9155
+ } catch (writeErr) {
9156
+ const rollbackFailures = [];
9157
+ for (const item of [...attempted].reverse()) {
9158
+ try {
9159
+ await fs.writeFile(item.abs, item.before, "utf8");
9160
+ } catch (restoreErr) {
9161
+ rollbackFailures.push(`${displayRel(rootDir, item.abs)}: ${restoreErr.message}`);
9162
+ }
9163
+ }
9164
+ if (rollbackFailures.length > 0) {
9165
+ throw new Error(
9166
+ `multi_edit: write failed after partial application: ${writeErr.message}; rollback failed for ${rollbackFailures.join("; ")}`
9167
+ );
9168
+ }
9169
+ throw new Error(
9170
+ `multi_edit: write failed: ${writeErr.message}; rolled back all files that may have been modified`
9171
+ );
9074
9172
  }
9075
9173
  const fileCount = filesByPath.size;
9076
9174
  const editCount = edits.length;
@@ -9739,7 +9837,7 @@ async function searchContent(ctx, startAbs, args) {
9739
9837
  }
9740
9838
 
9741
9839
  // src/tools/filesystem.ts
9742
- var DEFAULT_OUTLINE_THRESHOLD_BYTES = 512 * 1024;
9840
+ var DEFAULT_OUTLINE_THRESHOLD_BYTES = 64 * 1024;
9743
9841
  var DEFAULT_MAX_LIST_BYTES = 256 * 1024;
9744
9842
  var HARD_MAX_FILE_BYTES = 32 * 1024 * 1024;
9745
9843
  var OUTLINE_HEAD_LINES = 80;
@@ -9881,11 +9979,7 @@ ${body}`;
9881
9979
  registry.register({
9882
9980
  name: "read_file",
9883
9981
  parallelSafe: true,
9884
- description: `Read a file under the sandbox root. Default behaviour returns FULL CONTENT for files at or under ${Math.round(DEFAULT_OUTLINE_THRESHOLD_BYTES / 1024)} KiB \u2014 trust the prompt cache, don't pre-truncate. Optional scoping:
9885
- - head: N \u2192 first N lines (cheap probe of imports / config head)
9886
- - tail: N \u2192 last N lines (recent-tail of a log)
9887
- - range: "A-B" \u2192 inclusive 1-indexed range (e.g. "120-180" around an edit site)
9888
- Files OVER the threshold auto-switch to outline mode: file metadata + first ${OUTLINE_HEAD_LINES} lines + a top-level symbol outline (TS/JS exports, Python def/class, Go func/type, Rust fn/struct/impl/trait, Markdown headings, Protobuf message/service/rpc, plain-text chapter markers) + concrete next-step commands. No middle bytes \u2014 drill in with range / search_content. Files over ${Math.round(HARD_MAX_FILE_BYTES / (1024 * 1024))} MiB are refused entirely (use grep / range). Binary files are refused \u2014 use get_file_info if you only need stat.`,
9982
+ description: `Read a file under the sandbox root. Default returns FULL CONTENT for files \u2264 ${Math.round(DEFAULT_OUTLINE_THRESHOLD_BYTES / 1024)} KiB. Optional scoping: head/tail (N lines), range "A-B" (1-indexed inclusive). Larger files auto-switch to outline mode (metadata + head + symbol outline for TS/JS/Python/Go/Rust/Markdown/Protobuf/text) \u2014 drill in with range or search_content. Files over ${Math.round(HARD_MAX_FILE_BYTES / (1024 * 1024))} MiB and binaries are refused \u2014 use get_file_info for stat.`,
9889
9983
  readOnly: true,
9890
9984
  stormExempt: true,
9891
9985
  parameters: {
@@ -10003,11 +10097,7 @@ ${slice.join("\n")}`);
10003
10097
  registry.register({
10004
10098
  name: "directory_tree",
10005
10099
  parallelSafe: true,
10006
- description: `Recursively list entries in a directory. Shows indented tree structure with directories marked '/'. Budget-aware by default:
10007
- - maxDepth defaults to 2 (root + one level). A depth-4 tree on a real repo blew ~5K tokens in one call. If you truly need deeper, pass maxDepth:N explicitly.
10008
- - Skips ${[...SKIP_DIR_NAMES].sort().join(", ")} unless include_deps:true. Traversing into node_modules / .git / dist is almost always token-waste.
10009
- - Large subtrees (>50 children) auto-collapse to "[N files, M dirs hidden \u2014 list_directory <path> to inspect]" so one huge folder can't dominate the output.
10010
- Prefer \`list_directory\` for a single-level view, \`search_files\` to find specific paths, and \`search_content\` to find code.`,
10100
+ description: `Recursively list entries with indented tree structure (dirs marked '/'). Budget-aware: maxDepth defaults to 2, large subtrees (>50 children) auto-collapse to "[N hidden \u2014 list_directory to inspect]", and ${[...SKIP_DIR_NAMES].sort().join(" / ")} are skipped unless include_deps:true. For single-level use list_directory; for path lookups use search_files; for code lookups use search_content.`,
10011
10101
  readOnly: true,
10012
10102
  parameters: {
10013
10103
  type: "object",
@@ -10108,38 +10198,38 @@ Prefer \`list_directory\` for a single-level view, \`search_files\` to find spec
10108
10198
  registry.register({
10109
10199
  name: "search_content",
10110
10200
  parallelSafe: true,
10111
- description: "Recursively grep file CONTENTS for a substring or regex. This is the right tool for 'find all places that call X', 'where is Y referenced', 'what files contain Z'. Different from search_files (which matches FILE NAMES). Returns one match per line in 'path:line: text' format. Per-file hits are capped at 30 (a footer reports any extras); when the byte budget is mostly spent the remaining files switch to a 'rel: N matches' histogram so distribution stays visible instead of one popular file drowning the rest. Pass `summary_only:true` to skip line content entirely and get just the histogram. Skips dependency / VCS / build directories (node_modules, .git, dist, build, .next, target, .venv) and binary files by default.",
10201
+ description: "Recursively grep file CONTENTS for a substring or regex \u2014 'where is X called', 'what files contain Y'. Returns one match per line as `path:line: text`. Per-file hit cap 30; when the byte budget is mostly spent, remaining files switch to a `rel: N matches` histogram. Pass `summary_only:true` for just the histogram. Skips dependency / VCS / build dirs and binary files. For file NAMES use search_files.",
10112
10202
  readOnly: true,
10113
10203
  parameters: {
10114
10204
  type: "object",
10115
10205
  properties: {
10116
10206
  pattern: {
10117
10207
  type: "string",
10118
- description: "Substring (or regex) to search file contents for."
10208
+ description: "Substring or regex."
10119
10209
  },
10120
10210
  path: {
10121
10211
  type: "string",
10122
- description: "Directory to start the search at (default: sandbox root)."
10212
+ description: "Search root (default: sandbox root)."
10123
10213
  },
10124
10214
  glob: {
10125
10215
  type: "string",
10126
- description: "Optional filename filter. Real glob when the value contains `*`, `?`, `{`, or `[` \u2014 e.g. '*.ts', '**/*.tsx', 'src/**/*.{ts,tsx}'. Plain substring otherwise \u2014 e.g. '.ts' (suffix), 'test' (anywhere in the name). Patterns containing `/` match against the path relative to the search root; otherwise just the basename."
10216
+ description: "Filename filter. Glob when it contains `*`/`?`/`{`/`[`; otherwise substring. Patterns with `/` match the path relative to the search root."
10127
10217
  },
10128
10218
  case_sensitive: {
10129
10219
  type: "boolean",
10130
- description: "When true, match case exactly. Default false (case-insensitive)."
10220
+ description: "Default false."
10131
10221
  },
10132
10222
  include_deps: {
10133
10223
  type: "boolean",
10134
- description: "When true, also search inside node_modules / .git / dist / build / etc. Off by default \u2014 most exploration questions are about the user's own code."
10224
+ description: "Also search node_modules / .git / dist / build / etc. Default off."
10135
10225
  },
10136
10226
  context: {
10137
10227
  type: "integer",
10138
- description: "Lines of context to show around each match (both before and after). Default 0 (just the matching line). Capped at 20. Output uses ripgrep style: `:` after the line number on the matching line, `-` on context lines, `--` separating non-adjacent windows."
10228
+ description: "Lines of context around each match (both sides). Default 0, capped 20. Ripgrep-style output."
10139
10229
  },
10140
10230
  summary_only: {
10141
10231
  type: "boolean",
10142
- description: "When true, skip line content and return one 'rel: N matches' line per matching file. Use for 'where does this exist at all' questions before drilling in with a targeted read_file."
10232
+ description: "Skip line content, return `rel: N matches` per file. Use for 'where does this exist at all' before drilling in."
10143
10233
  }
10144
10234
  },
10145
10235
  required: ["pattern"]
@@ -10252,7 +10342,7 @@ Prefer \`list_directory\` for a single-level view, \`search_files\` to find spec
10252
10342
  });
10253
10343
  registry.register({
10254
10344
  name: "multi_edit",
10255
- description: "Apply N SEARCH/REPLACE edits across ONE OR MORE files in a single atomic call. Edits run sequentially in array order; for edits that touch the same file, a later edit can match text inserted by an earlier one. If ANY edit fails (search not found, ambiguous match, empty search, file unreadable), NO files are written \u2014 atomic at the validation layer. Same per-edit rules as edit_file: `search` is exact text (whitespace sensitive, no regex) and must be unique in its target file at the moment that edit applies. Use this for renames spanning multiple files, cross-file refactors, or any batch where you'd otherwise loop edit_file.",
10345
+ description: "Apply N SEARCH/REPLACE edits across ONE OR MORE files in one call. Edits validate across the full batch before writing. Validation failures leave all files untouched; disk write failures trigger best-effort rollback of files that may have been modified. Per-file edits run in array order, so a later edit can match text inserted by an earlier one. Same per-edit rules as edit_file: `search` is exact text (whitespace sensitive, no regex) and must be unique in its target file at the moment that edit applies. Use this for renames spanning multiple files, cross-file refactors, or any batch where you'd otherwise loop edit_file.",
10256
10346
  parameters: {
10257
10347
  type: "object",
10258
10348
  properties: {
@@ -10409,7 +10499,7 @@ function registerMemoryTools(registry, opts = {}) {
10409
10499
  }
10410
10500
  registry.register({
10411
10501
  name: "remember",
10412
- description: "Save a memory for future sessions. Use when the user states a preference, corrects your approach, shares a non-obvious fact about this project, or explicitly asks you to remember something. Don't remember transient task state \u2014 only things worth recalling next session. The memory is written now but won't re-load into the system prompt until the next `/new` or launch.",
10502
+ description: "Save a memory for future sessions \u2014 preferences, corrections, non-obvious project facts. Not for transient task state. Loads into the system prompt on next `/new` or launch.",
10413
10503
  parameters: {
10414
10504
  type: "object",
10415
10505
  properties: {
@@ -10420,29 +10510,29 @@ function registerMemoryTools(registry, opts = {}) {
10420
10510
  scope: {
10421
10511
  type: "string",
10422
10512
  enum: ["global", "project"],
10423
- description: "'global' = applies across every project (preferences, tooling); 'project' = scoped to the current sandbox (decisions, local facts). Only available in `reasonix code`."
10513
+ description: "global = across all projects; project = current sandbox only (needs `reasonix code`)."
10424
10514
  },
10425
10515
  name: {
10426
10516
  type: "string",
10427
- description: "filename-safe identifier, 3-40 chars, alnum + _ - . (no path separators, no leading dot)."
10517
+ description: "Filename-safe id, 3-40 chars, alnum + _ - . (no separators, no leading dot)."
10428
10518
  },
10429
10519
  description: {
10430
10520
  type: "string",
10431
- description: "One-line summary shown in MEMORY.md (under ~150 chars)."
10521
+ description: "\u2264150 char one-liner shown in MEMORY.md."
10432
10522
  },
10433
10523
  content: {
10434
10524
  type: "string",
10435
- description: "Full memory body in markdown. For feedback/project types, structure as: rule/fact, then **Why:** line, then **How to apply:** line."
10525
+ description: "Markdown body. For feedback/project, structure as rule + **Why:** + **How to apply:**."
10436
10526
  },
10437
10527
  priority: {
10438
10528
  type: "string",
10439
10529
  enum: ["low", "medium", "high"],
10440
- description: "Optional per-memory priority. `high` injects the entry into a `# HIGH PRIORITY constraints` block at the top of the system prompt \u2014 use sparingly, only for hard rules the model must never violate."
10530
+ description: "`high` injects entry into HIGH PRIORITY block \u2014 use sparingly."
10441
10531
  },
10442
10532
  expires: {
10443
10533
  type: "string",
10444
10534
  enum: ["project_end"],
10445
- description: "Optional lifecycle hint. `project_end` causes `/memory clear project` to also remove this entry even when it's stored at global scope."
10535
+ description: "`project_end` lets /memory clear project remove this even at global scope."
10446
10536
  }
10447
10537
  },
10448
10538
  required: ["type", "scope", "name", "description", "content"]
@@ -10581,26 +10671,26 @@ function sanitizeOptions(raw) {
10581
10671
  function registerChoiceTool(registry, opts = {}) {
10582
10672
  registry.register({
10583
10673
  name: "ask_choice",
10584
- description: "Present 2\u20136 alternatives to the user. The principle: if the user is supposed to pick, the tool picks \u2014 you don't enumerate the choices as prose. Prose menus have no picker in this TUI, so the user gets a wall of text to scroll through and a letter to type, strictly worse than the magenta picker this tool renders. Call it whenever (a) the user has asked for options, (b) you've analyzed multiple approaches and the final call is theirs, or (c) it's a preference fork you can't resolve without them. Skip it when one option is clearly best (just do it, or submit_plan) or a free-form text answer fits (ask in prose). Keep option ids short and stable (A/B/C). Each option: title + optional summary. allowCustom=true when their real answer might not fit. Max 6 options \u2014 narrow first if more. A one-sentence lead-in before the call is fine; don't repeat the options in it.",
10674
+ description: "Render an arrow-key picker with 2\u20136 alternatives. Use when the user is supposed to pick \u2014 never enumerate choices as prose. Skip when one option is clearly best (just do it) or a free-form text answer fits. Max 6 options; set `allowCustom:true` when their real answer might not fit.",
10585
10675
  readOnly: true,
10586
10676
  parameters: {
10587
10677
  type: "object",
10588
10678
  properties: {
10589
10679
  question: {
10590
10680
  type: "string",
10591
- description: "The question to put in front of the user. One sentence. Don't repeat the options in the question text \u2014 the picker renders them separately."
10681
+ description: "One-sentence question. Don't repeat the options here \u2014 the picker renders them."
10592
10682
  },
10593
10683
  options: {
10594
10684
  type: "array",
10595
- description: "2\u20134 alternatives. Each needs a stable id and a short title; summary is optional.",
10685
+ description: "2\u20136 alternatives. Each: stable id + short title; summary optional.",
10596
10686
  items: {
10597
10687
  type: "object",
10598
10688
  properties: {
10599
- id: { type: "string", description: "Short stable id (A, B, C, or option-1)." },
10600
- title: { type: "string", description: "One-line title shown as the option label." },
10689
+ id: { type: "string", description: "Stable id (A, B, C or option-1)." },
10690
+ title: { type: "string", description: "One-line label." },
10601
10691
  summary: {
10602
10692
  type: "string",
10603
- description: "Optional. A second dimmed line with more detail. Keep under ~80 chars."
10693
+ description: "Optional dimmed second line, \u226480 chars."
10604
10694
  }
10605
10695
  },
10606
10696
  required: ["id", "title"]
@@ -10608,7 +10698,7 @@ function registerChoiceTool(registry, opts = {}) {
10608
10698
  },
10609
10699
  allowCustom: {
10610
10700
  type: "boolean",
10611
- description: "If true, the picker shows a 'Let me type my own answer' escape hatch. Default false. Turn on when the user's real answer might not fit any of your pre-defined options."
10701
+ description: "Shows a 'type my own answer' escape hatch. Default false."
10612
10702
  }
10613
10703
  },
10614
10704
  required: ["question", "options"]
@@ -10694,19 +10784,33 @@ var PlanRevisionProposedError = class extends Error {
10694
10784
  };
10695
10785
 
10696
10786
  // src/tools/plan-core.ts
10697
- var SUBMIT_PLAN_DESCRIPTION = "Submit ONE concrete plan you've already decided on. Use this for tasks that warrant a review gate \u2014 multi-file refactors, architecture changes, anything that would be expensive or confusing to undo. Skip it for small fixes (one-line typo, obvious bug with a clear fix) \u2014 just make the change. The user will either approve (you then implement it), ask for refinement, or cancel. If the user has already enabled /plan mode, writes are blocked at dispatch and you MUST use this. CRITICAL: do NOT use submit_plan to present alternative routes (A/B/C, option 1/2/3) for the user to pick from \u2014 the picker only exposes approve/refine/cancel, so a menu plan strands the user with no way to choose. For branching decisions, call `ask_choice` instead; only call submit_plan once the user has picked a direction and you have a single actionable plan. Write the plan as markdown with a one-line summary, a bulleted list of files to touch and what will change, and any risks or open questions. STRONGLY PREFERRED: pass `steps` \u2014 an array of {id, title, action, risk?} \u2014 so the UI renders a structured step list above the approval picker and tracks per-step progress. Use risk='high' for steps that touch prod data / break public APIs / are hard to undo; 'med' for non-trivial but reversible (multi-file edits, schema tweaks); 'low' for safe local work. After each step, call `mark_step_complete` so the user sees progress ticks.";
10698
- var MARK_STEP_COMPLETE_DESCRIPTION = "Mark one step of the approved plan as done. MANDATORY: call this exactly once after finishing each step, before starting the next one \u2014 skipping it leaves the user staring at `0/N done` on the resume banner even when the work is finished, and they have no way to know which steps actually ran. The TUI updates the plan card's progress in place; the count is persisted to disk so it survives session resume. After the FINAL step, write a brief reply summarizing what was done and end the turn. Pass the `stepId` from the plan's steps array, a short `result` (what you did), and optional `notes` for anything surprising (errors, scope changes, follow-ups). This tool doesn't change any files. Don't call it if the plan didn't include structured steps, and don't invent ids that weren't in the original plan. If you only realized at the end that you skipped marking steps, mark them then \u2014 late is still better than never.";
10699
- var REVISE_PLAN_DESCRIPTION = "Surgically replace the REMAINING steps of an in-flight plan. Call this when the user has given feedback at a checkpoint that warrants a structured plan change \u2014 skip a step, swap two steps, add a new step, change risk, etc. Pass: `reason` (one sentence why), `remainingSteps` (the new tail of the plan, replacing whatever steps haven't been done yet), and optional `summary` (updated one-line plan summary). Done steps are NEVER touched \u2014 keep them out of `remainingSteps`. The TUI shows a diff (removed in red, kept in gray, added in green) and the user accepts or rejects. Don't call this for trivial mid-step adjustments \u2014 just keep executing. Don't call submit_plan for revisions either \u2014 that resets the whole plan including completed steps. Use submit_plan only when the entire approach has changed; use revise_plan when the tail needs editing.";
10787
+ var SUBMIT_PLAN_DESCRIPTION = "Submit ONE concrete plan for review. The user approves / refines / cancels \u2014 write a markdown plan body and (strongly preferred) a structured `steps` array. Use for multi-file refactors, architecture changes, anything expensive to undo. Skip for small fixes. Do NOT use for A/B/C menus \u2014 the picker has no branch selector, so a menu plan strands the user; call `ask_choice` for branching decisions. See the system prompt for fuller guidance.";
10788
+ var MARK_STEP_COMPLETE_DESCRIPTION = "Mark one approved-plan step as done. Call exactly once after finishing each step, before starting the next. After the FINAL step, write a brief reply summarizing what was done and end the turn. Skip if the plan didn't include structured steps.";
10789
+ var REVISE_PLAN_DESCRIPTION = "Replace the REMAINING steps of an in-flight plan when checkpoint feedback warrants a structural change. Pass `reason`, the new `remainingSteps` tail (done steps are untouched \u2014 keep them out), and optional updated `summary`. Don't call submit_plan for revisions \u2014 it resets the whole plan.";
10700
10790
  var STEP_ITEM_SCHEMA = {
10701
10791
  type: "object",
10702
10792
  properties: {
10703
10793
  id: { type: "string", description: "Stable id, e.g. step-1." },
10704
10794
  title: { type: "string", description: "Short imperative title." },
10705
- action: { type: "string", description: "One-sentence description of the concrete action." },
10795
+ action: { type: "string", description: "One-sentence concrete action." },
10706
10796
  risk: {
10707
10797
  type: "string",
10708
10798
  enum: ["low", "med", "high"],
10709
- description: "Self-assessed risk. 'high' = hard-to-undo / touches prod / breaks API; 'med' = non-trivial but reversible; 'low' = safe local work. The UI shows a colored dot per step so the user knows where to focus review. Omit if you're unsure."
10799
+ description: "high = hard-to-undo / prod / API break; med = reversible multi-file; low = safe local. Omit if unsure."
10800
+ },
10801
+ targets: {
10802
+ type: "array",
10803
+ description: "Optional. Files/dirs/modules this step touches.",
10804
+ items: { type: "string" }
10805
+ },
10806
+ acceptance: {
10807
+ type: "string",
10808
+ description: "Optional. One-sentence completion criterion."
10809
+ },
10810
+ verification: {
10811
+ type: "array",
10812
+ description: "Optional. Verification commands/checks for this step.",
10813
+ items: { type: "string" }
10710
10814
  }
10711
10815
  },
10712
10816
  required: ["id", "title", "action"]
@@ -10728,10 +10832,42 @@ function sanitizeSteps(raw) {
10728
10832
  const step = { id, title, action };
10729
10833
  const risk = sanitizeRisk(e.risk);
10730
10834
  if (risk) step.risk = risk;
10835
+ const targets = sanitizeStringList(e.targets);
10836
+ if (targets) step.targets = targets;
10837
+ const acceptance = typeof e.acceptance === "string" ? e.acceptance.trim() : "";
10838
+ if (acceptance) step.acceptance = acceptance;
10839
+ const verification = sanitizeStringList(e.verification);
10840
+ if (verification) step.verification = verification;
10731
10841
  steps.push(step);
10732
10842
  }
10733
10843
  return steps.length > 0 ? steps : void 0;
10734
10844
  }
10845
+ function sanitizeStringList(raw) {
10846
+ if (!Array.isArray(raw)) return void 0;
10847
+ const out = raw.map((entry) => typeof entry === "string" ? entry.trim() : "").filter((entry) => entry.length > 0);
10848
+ return out.length > 0 ? out : void 0;
10849
+ }
10850
+ function sanitizeEvidence(raw) {
10851
+ if (!Array.isArray(raw)) return void 0;
10852
+ const out = [];
10853
+ for (const item of raw) {
10854
+ if (!item || typeof item !== "object") continue;
10855
+ const e = item;
10856
+ const kind = e.kind;
10857
+ if (kind !== "verification" && kind !== "diff" && kind !== "checkpoint" && kind !== "manual") {
10858
+ continue;
10859
+ }
10860
+ const summary = typeof e.summary === "string" ? e.summary.trim() : "";
10861
+ if (!summary) continue;
10862
+ const evidence = { kind, summary };
10863
+ const command = typeof e.command === "string" ? e.command.trim() : "";
10864
+ if (command) evidence.command = command;
10865
+ const paths = sanitizeStringList(e.paths);
10866
+ if (paths) evidence.paths = paths;
10867
+ out.push(evidence);
10868
+ }
10869
+ return out.length > 0 ? out : void 0;
10870
+ }
10735
10871
  function registerSubmitPlan(registry, opts) {
10736
10872
  registry.register({
10737
10873
  name: "submit_plan",
@@ -10742,16 +10878,16 @@ function registerSubmitPlan(registry, opts) {
10742
10878
  properties: {
10743
10879
  plan: {
10744
10880
  type: "string",
10745
- description: "Markdown-formatted plan. Lead with a one-sentence summary. Then a file-by-file breakdown of what you'll change and why. Flag any risks or open questions at the end so the user can weigh in before you start."
10881
+ description: "Markdown plan: one-line summary, file-by-file breakdown, risks/open questions."
10746
10882
  },
10747
10883
  steps: {
10748
10884
  type: "array",
10749
- description: "Structured step list (strongly recommended). When provided, the UI renders a compact step list above the approval picker AND tracks per-step progress via `mark_step_complete`. Use stable ids (step-1, step-2, ...). Skip only for tiny one-step plans where the markdown body is enough.",
10885
+ description: "Structured step list \u2014 strongly recommended for >1 step. Stable ids (step-1, step-2, ...).",
10750
10886
  items: STEP_ITEM_SCHEMA
10751
10887
  },
10752
10888
  summary: {
10753
10889
  type: "string",
10754
- description: "Optional. One-sentence human-friendly title for the plan, ~80 chars max. Surfaces in the PlanConfirm picker header and in /plans listings ('\u25B8 refactor auth into signed tokens \xB7 2/5 done'). Skip for trivial plans where the first line of the markdown body is already short and clear."
10890
+ description: "Optional ~80-char plan title for the picker header and /plans listings."
10755
10891
  }
10756
10892
  },
10757
10893
  required: ["plan"]
@@ -10789,19 +10925,33 @@ function registerMarkStepComplete(registry, opts) {
10789
10925
  properties: {
10790
10926
  stepId: {
10791
10927
  type: "string",
10792
- description: "The id of the step being marked complete. Must match one from submit_plan's steps array."
10928
+ description: "Step id from submit_plan's steps array."
10793
10929
  },
10794
10930
  title: {
10795
10931
  type: "string",
10796
- description: "Optional. The step's title, echoed back for the UI. If omitted, the UI falls back to the id."
10932
+ description: "Optional. Echoed for the UI; falls back to id."
10797
10933
  },
10798
10934
  result: {
10799
10935
  type: "string",
10800
- description: "One-sentence summary of what was done for this step."
10936
+ description: "One-sentence summary of what was done."
10801
10937
  },
10802
10938
  notes: {
10803
10939
  type: "string",
10804
- description: "Optional. Anything surprising \u2014 blockers hit, assumptions revised, follow-ups for later steps."
10940
+ description: "Optional. Surprises \u2014 blockers, revised assumptions, follow-ups."
10941
+ },
10942
+ evidence: {
10943
+ type: "array",
10944
+ description: "Optional. Verification summary / diff / checkpoint ref / manual note.",
10945
+ items: {
10946
+ type: "object",
10947
+ properties: {
10948
+ kind: { type: "string", enum: ["verification", "diff", "checkpoint", "manual"] },
10949
+ summary: { type: "string" },
10950
+ command: { type: "string" },
10951
+ paths: { type: "array", items: { type: "string" } }
10952
+ },
10953
+ required: ["kind", "summary"]
10954
+ }
10805
10955
  }
10806
10956
  },
10807
10957
  required: ["stepId", "result"]
@@ -10819,9 +10969,15 @@ function registerMarkStepComplete(registry, opts) {
10819
10969
  }
10820
10970
  const title = typeof args?.title === "string" ? args.title.trim() || void 0 : void 0;
10821
10971
  const notes = typeof args?.notes === "string" ? args.notes.trim() || void 0 : void 0;
10972
+ const evidence = sanitizeEvidence(args?.evidence);
10973
+ const evidenceReason = opts.requireStepEvidence?.({ stepId, title });
10974
+ if (evidenceReason && (!evidence || evidence.length === 0)) {
10975
+ throw new Error(`mark_step_complete: evidence required \u2014 ${evidenceReason}`);
10976
+ }
10822
10977
  const update = { kind: "step_completed", stepId, result };
10823
10978
  if (title) update.title = title;
10824
10979
  if (notes) update.notes = notes;
10980
+ if (evidence) update.evidence = evidence;
10825
10981
  opts.onStepCompleted?.(update);
10826
10982
  const verdict = await (ctx?.confirmationGate ?? pauseGate).ask({
10827
10983
  kind: "plan_checkpoint",
@@ -10846,16 +11002,16 @@ function registerRevisePlan(registry, opts) {
10846
11002
  properties: {
10847
11003
  reason: {
10848
11004
  type: "string",
10849
- description: "One sentence explaining why you're revising \u2014 what the user asked for, what changed your assessment."
11005
+ description: "One sentence \u2014 why you're revising / what the user asked for."
10850
11006
  },
10851
11007
  remainingSteps: {
10852
11008
  type: "array",
10853
- description: "The new tail of the plan \u2014 what should run from here on. Each entry: {id, title, action, risk?}. Use stable ids; reuse old ids when a step is just being adjusted, generate new ones for genuinely new steps.",
11009
+ description: "New tail of the plan. Reuse old ids when adjusting; new ids for new steps.",
10854
11010
  items: STEP_ITEM_SCHEMA
10855
11011
  },
10856
11012
  summary: {
10857
11013
  type: "string",
10858
- description: "Optional. Updated one-line plan summary if the overall framing has shifted."
11014
+ description: "Optional. Updated one-line summary when framing has shifted."
10859
11015
  }
10860
11016
  },
10861
11017
  required: ["reason", "remainingSteps"]
@@ -10893,7 +11049,7 @@ function registerPlanTool(registry, opts = {}) {
10893
11049
  }
10894
11050
 
10895
11051
  // src/tools/todo.ts
10896
- var DESCRIPTION = 'In-session task tracker for multi-step work. NOT a plan \u2014 no approval gate, no checkpoint pauses, doesn\'t touch any files. The tool replaces the entire todo list every call (set semantics, NOT append). Pass the FULL list every time.\n\nWhen to use:\n\u2022 The task has 3+ distinct steps and you want to keep them straight as you work.\n\u2022 The user gave you a multi-part request ("do A, then B, then C").\n\u2022 You\'re partway through a long task and want to record where you are so a future you doesn\'t lose the thread.\n\nWhen NOT to use:\n\u2022 One-shot edits, single-question answers, single-tool tasks.\n\u2022 User-facing approval gates \u2192 that\'s `submit_plan`.\n\u2022 Branching choices \u2192 that\'s `ask_choice`.\n\nRules:\n\u2022 Exactly ONE todo may have status:"in_progress" at a time (or zero \u2014 between steps).\n\u2022 Mark a todo "completed" the moment it\'s actually done \u2014 don\'t batch.\n\u2022 Each todo: `content` (imperative, e.g. "Add tests"), `activeForm` (gerund shown while running, e.g. "Adding tests"), `status`.\n\u2022 Empty `todos:[]` is allowed \u2014 it clears the list when work is fully done.';
11052
+ var DESCRIPTION = "In-session task tracker for 3+ step work. NOT a plan \u2014 no approval gate, no checkpoint, no files touched. Each call REPLACES the entire list (set semantics) \u2014 pass the FULL list. Exactly one item may be in_progress at a time; flip to completed the moment that step's done. Pass `[]` to clear. For approval gates use submit_plan; for branching choices use ask_choice.";
10897
11053
  function validateTodos(raw) {
10898
11054
  if (!Array.isArray(raw)) {
10899
11055
  throw new Error("todo_write: `todos` must be an array");
@@ -12284,8 +12440,13 @@ var OutputBuffer = class {
12284
12440
  };
12285
12441
 
12286
12442
  // src/tools/shell/parse.ts
12287
- import { homedir as homedir6 } from "os";
12443
+ import { homedir as homedir7 } from "os";
12288
12444
  import * as pathMod8 from "path";
12445
+
12446
+ // packages/core-utils/src/tildeify.ts
12447
+ import { homedir as homedir6 } from "os";
12448
+
12449
+ // src/tools/shell/parse.ts
12289
12450
  var BUILTIN_ALLOWLIST = [
12290
12451
  // Repo inspection
12291
12452
  "git status",
@@ -12485,12 +12646,12 @@ function resolveSensitivePath(token, projectRoot) {
12485
12646
  return null;
12486
12647
  let expanded = token;
12487
12648
  if (expanded.startsWith("~")) {
12488
- expanded = pathMod8.join(homedir6(), expanded.slice(1));
12649
+ expanded = pathMod8.join(homedir7(), expanded.slice(1));
12489
12650
  }
12490
12651
  return pathMod8.resolve(projectRoot, expanded);
12491
12652
  }
12492
12653
  function expandPrefix(prefix) {
12493
- if (prefix.startsWith("~")) return pathMod8.join(homedir6(), prefix.slice(1));
12654
+ if (prefix.startsWith("~")) return pathMod8.join(homedir7(), prefix.slice(1));
12494
12655
  return pathMod8.resolve(prefix);
12495
12656
  }
12496
12657
  function pathStartsWithPrefix(normalized, prefix) {
@@ -12863,7 +13024,7 @@ function registerShellTools(registry, opts) {
12863
13024
  const isAllowAll = typeof opts.allowAll === "function" ? opts.allowAll : () => opts.allowAll === true;
12864
13025
  registry.register({
12865
13026
  name: "run_command",
12866
- description: "Run a shell command in the project root; returns combined stdout+stderr. Allowlisted read-only / test / lint / typecheck commands run immediately; anything that could mutate state, install deps, or touch the network is gated by user confirmation. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.\n\nConstraints (no real shell \u2014 argv is parsed natively for cross-platform parity):\n\u2022 Supported: chain ops `|` / `||` / `&&` / `;` (each segment allowlist-checked individually), file redirects `>` / `>>` / `<` / `2>` / `2>>` / `2>&1` / `&>` (target paths resolve relative to project root, max one redirect per fd per segment).\n\u2022 NOT supported: background `&`, heredoc `<<`, command substitution `$(\u2026)`, subshells `(\u2026)`, process substitution `<(\u2026)`, `$VAR` env expansion, glob expansion. To pass an operator char as literal arg, quote it (`grep \"a|b\" file`).\n\u2022 `cd` does NOT persist \u2014 between calls OR within a chain like `cd dir && cmd`. Use the binary's own cwd flag: `npm --prefix <dir>`, `git -C <dir>`, `cargo -C <dir>`, `pytest <dir>/tests`.\n\u2022 Filter at source \u2014 unbounded output (`netstat -ano`, `find /`) wastes tokens. Use `grep -c`, `wc -l`, narrower paths, etc.",
13027
+ description: 'Run a shell command in the project root; returns combined stdout+stderr. Allowlisted read-only / test / lint / typecheck commands run immediately; mutating / network / install commands gate on user confirmation.\n\nNo real shell \u2014 argv parsed natively for cross-platform parity:\n\u2022 Supported: chains `|`/`||`/`&&`/`;` (each segment allowlist-checked) and file redirects `>`/`>>`/`<`/`2>`/`2>>`/`2>&1`/`&>`.\n\u2022 Rejected: background `&`, heredoc `<<`, `$(\u2026)`, subshells, `$VAR` expansion, glob expansion. Quote operator chars as literals (`grep "a|b" file`).\n\u2022 `cd` does NOT persist \u2014 between calls OR within a chain. Use `npm --prefix <dir>`, `git -C <dir>`, `cargo -C <dir>` instead.\n\u2022 Filter at source \u2014 `grep -c` / `wc -l` / narrower paths over unbounded dumps.',
12867
13028
  // Plan-mode gate: allow allowlisted commands through (git status,
12868
13029
  // cargo check, ls, grep …) so the model can actually investigate
12869
13030
  // during planning. Anything that would otherwise trigger a
@@ -12918,7 +13079,7 @@ function registerShellTools(registry, opts) {
12918
13079
  });
12919
13080
  registry.register({
12920
13081
  name: "run_background",
12921
- description: "Spawn a long-running process and detach. Waits up to `waitSec` for startup or a readiness signal ('Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. Tail logs with `job_output`, block on completion with `wait_for_job`, kill with `stop_job`, list with `list_jobs`.\n\nSingle process only \u2014 no chains / redirects. For subdirectories use the `cwd` parameter (workspace-relative or absolute, must stay inside the workspace root); do NOT write `cd X && cmd`, that gets rejected.\n\nUSE THIS \u2014 not run_command \u2014 for:\n- Dev servers / watchers: npm/yarn/pnpm dev, uvicorn / flask run, cargo watch, tsc --watch, webpack serve, anything with dev/serve/watch in the name.\n- One-shot long jobs: curl / wget large downloads, `huggingface-cli download`, multi-GB `pip install` / `npm install`, big `cargo build` / `docker build`. Start with `run_background`, then call `wait_for_job` once (default `waitFor: 'exit'`, timeoutMs up to 300_000) \u2014 the harness blocks server-side so a 5-minute download costs ONE tool call, not 30 polls.",
13082
+ description: "Spawn a long-running process and detach. Waits up to `waitSec` for startup or a readiness signal ('Local:', 'listening on', 'compiled successfully'), then returns job id + startup preview. Companion tools: `job_output`, `wait_for_job`, `stop_job`, `list_jobs`. Single process only \u2014 no chains/redirects. Use `cwd` (not `cd X && cmd`) for subdirs.\n\nUSE THIS \u2014 not run_command \u2014 for: dev servers / watchers (`npm dev`, `uvicorn`, `tsc --watch`, anything with dev/serve/watch in the name) AND one-shot long jobs (large `curl`, `pip install`, `cargo build`, `docker build`). Pair with `wait_for_job` for server-side blocking \u2014 one tool call regardless of duration.",
12922
13083
  parameters: {
12923
13084
  type: "object",
12924
13085
  properties: {
@@ -13564,8 +13725,8 @@ function registerWebTools(registry, opts = {}) {
13564
13725
  required: ["query"]
13565
13726
  },
13566
13727
  fn: async (args, ctx) => {
13567
- const engine = opts.webSearchEngine ?? webSearchEngine();
13568
- const endpoint = opts.webSearchEndpoint ?? webSearchEndpoint();
13728
+ const engine = webSearchEngine();
13729
+ const endpoint = webSearchEndpoint();
13569
13730
  const results = await webSearch(args.query, {
13570
13731
  topK: args.topK ?? defaultTopK,
13571
13732
  signal: ctx?.signal,
@@ -14069,7 +14230,7 @@ function truncate(s, n) {
14069
14230
 
14070
14231
  // src/version.ts
14071
14232
  import { existsSync as existsSync10, mkdirSync as mkdirSync5, readFileSync as readFileSync13, writeFileSync as writeFileSync5 } from "fs";
14072
- import { homedir as homedir7 } from "os";
14233
+ import { homedir as homedir8 } from "os";
14073
14234
  import { dirname as dirname7, join as join14 } from "path";
14074
14235
  import { fileURLToPath as fileURLToPath2 } from "url";
14075
14236
  var REGISTRY_URL = "https://registry.npmjs.org/reasonix/latest";
@@ -14096,7 +14257,7 @@ function readPackageVersion() {
14096
14257
  }
14097
14258
  var VERSION = readPackageVersion();
14098
14259
  function cachePath(homeDirOverride) {
14099
- return join14(homeDirOverride ?? homedir7(), ".reasonix", "version-cache.json");
14260
+ return join14(homeDirOverride ?? homedir8(), ".reasonix", "version-cache.json");
14100
14261
  }
14101
14262
  function readCache(homeDirOverride) {
14102
14263
  try {
@@ -15056,142 +15217,55 @@ var DEFAULT_CODE_MODEL = "deepseek-v4-flash";
15056
15217
  function codeSystemBase(modelId) {
15057
15218
  return CODE_SYSTEM_TEMPLATE.replace("__ESCALATION_CONTRACT__", escalationContract(modelId));
15058
15219
  }
15059
- var CODE_SYSTEM_TEMPLATE = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, multi_edit, list_directory, directory_tree, search_files, search_content, glob, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell, plus \`todo_write\` for in-session multi-step tracking.
15220
+ var CODE_SYSTEM_TEMPLATE = `You are Reasonix Code, a coding assistant. Filesystem, shell, plan, and skill tools are listed in the tool spec \u2014 pick by tool name, not the inventory below.
15060
15221
 
15061
15222
  # Identity is fixed by this prompt \u2014 never inferred from the workspace
15062
15223
 
15063
- Your identity is defined here: you are Reasonix Code, a standalone coding assistant. Do not redefine yourself based on what's in the workspace. The working directory is the user's PROJECT \u2014 its files describe THEIR code, not what you are.
15064
-
15065
- If the workspace happens to contain another AI tool's config (\`config.yaml\` with agent / persona keys, \`SOUL.md\`, \`AGENT.md\`, \`PERSONA.md\`, a \`skills/\` or \`memories/\` tree from a different platform, or a \`REASONIX.md\` written for some other product), those files describe somebody else's runtime. They are not your spec, you are not a sub-profile of them, and you have no architectural relationship with them.
15066
-
15067
- When the user asks "who are you?", "what's your underlying runtime?", or similar identity questions: answer from this prompt only. Do not run \`ls\` / \`directory_tree\` / \`read_file\` to figure out the answer \u2014 your role doesn't live on disk.
15224
+ You are Reasonix Code, a standalone coding assistant. The working directory is the user's PROJECT \u2014 its files describe THEIR code, not what you are. If the workspace contains another platform's config (\`config.yaml\` with agent/persona keys, \`SOUL.md\`, \`AGENT.md\`, \`PERSONA.md\`, foreign \`skills/\` or \`memories/\` tree, a \`REASONIX.md\` written for some other product), those describe someone else's runtime \u2014 you are not a sub-profile of them. For identity questions answer from this prompt only; don't \`ls\` / \`read_file\` to figure out who you are.
15068
15225
 
15069
15226
  # Cite or shut up \u2014 non-negotiable
15070
15227
 
15071
- Every factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.
15072
-
15073
- **Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:
15074
-
15075
- - \u2705 Correct: \`The MCP client supports listResources [listResources](src/mcp/client.ts:142).\`
15076
- - \u274C Wrong: \`The MCP client supports listResources.\` \u2190 no citation, looks authoritative but unverifiable.
15077
-
15078
- **Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.
15079
-
15080
- If you are about to write "X is missing" or "Y is not implemented" \u2014 **STOP**. Call \`search_content\` for the relevant symbol or term FIRST. Only then:
15081
-
15082
- - If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.
15083
- - If the search returns nothing \u2192 state the absence with the search query as your evidence: \`No callers of \\\`foo()\\\` found (search_content "foo").\`
15084
-
15085
- Asserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write "missing" as a red flag in your own reasoning.
15228
+ Every factual claim about THIS codebase needs evidence \u2014 Reasonix VALIDATES citations and broken paths render in **red strikethrough with \u274C**. **Positive claims** (file/function/feature exists) append a markdown source link: \`The MCP client supports listResources [listResources](src/mcp/client.ts:142).\` **Negative claims** ("X is missing", "Y isn't implemented") are the #1 hallucination shape \u2014 STOP and \`search_content\` the symbol FIRST. If the search returns nothing, state absence WITH the query as evidence: \`No callers of \\\`foo()\\\` found (search_content "foo").\`
15086
15229
 
15087
15230
  # When auditing or reviewing this codebase
15088
15231
 
15089
- When you're asked to audit / review / critique Reasonix itself ("what tools are missing?", "review the prompt system", "anything wrong with how X works?"), the failure mode isn't hallucinating absences \u2014 it's building confident, well-structured proposals on factually wrong premises. Six rails:
15090
-
15091
- - **Auto-preview is for locating, not auditing.** Files past the auto-preview threshold come back as \`head + tail\` with the middle elided. Don't conclude what's in the elided section \u2014 runtime behavior, current architectural state, whether a plan doc is still accurate \u2014 off the preview. Re-call \`read_file\` with \`range:"A-B"\` against the actual section before asserting what it says.
15092
- - **Flag \u2192 consumer trace.** Reading a type field (\`parallelSafe?: boolean\`, \`stormExempt?: boolean\`) is not understanding behavior. Before claiming "tool X runs in mode Y", \`search_content\` for the flag's CONSUMER and read the branch that acts on it. **For inventory claims** ("which tools have flag F?"), grep the flag \u2014 don't enumerate from memory; the field is set per-tool and easily mis-recalled.
15093
- - **No fabricated percentages.** "Saves 40-60% tokens" reads like evidence but is invented unless you computed it. Ground numbers in a cited transcript / token count, or use hedged language ("small but non-zero", "may compound") \u2014 never present an unmeasured number as a measured one.
15094
- - **Schema cost is real.** Every tool's description ships in every request. A new-tool proposal MUST cover (a) which existing-tool composition fails to do this, (b) rough description-token cost, (c) why a prompt or description change can't reach the same end. Default to "tighten prompt / existing tool" before "add tool".
15095
- - **MEMORY.md is part of the design space.** The pinned memory blocks above are loaded user feedback \u2014 recommendations contradicting them ("auto-commit checkpoints", "free-credit messaging", anything the user has explicitly ruled out) are wrong by construction. Cross-check before proposing.
15096
- - **User-facing \u2260 model-facing \u2260 library-facing.** Reasonix has four action surfaces: slash commands (user), tools (model), UI (user), and library exports (\`src/index.ts\`). Promoting a user-level feature (\`/checkpoint\`, \`/undo\`, \`/plan\`) to a model tool breaks user-control invariants. Treating a library export as "dead code" because the CLI doesn't register it to the model misreads the design \u2014 embedders consume \`src/index.ts\` directly.
15097
-
15098
- # When to propose a plan (submit_plan)
15099
-
15100
- You have a \`submit_plan\` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:
15101
-
15102
- - Multi-file refactors or renames.
15103
- - Architecture changes (moving modules, splitting / merging files, new abstractions).
15104
- - Anything where "undo" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.
15105
- - When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.
15106
-
15107
- Skip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.
15108
-
15109
- Plan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an "Open questions" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.
15110
-
15111
- **Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use \`ask_choice\` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.
15112
-
15113
- # When to ask the user to pick (ask_choice)
15114
-
15115
- You have an \`ask_choice\` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.
15116
-
15117
- Call it when:
15118
- - The user has asked for options / doesn't want a recommendation / wants to decide.
15119
- - You've analyzed multiple approaches and the final call is theirs.
15120
- - It's a preference fork you can't resolve without them (deployment target, team convention, taste).
15121
-
15122
- Skip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).
15123
-
15124
- Each option: short stable id (A/B/C), one-line title, optional summary. \`allowCustom: true\` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine ("I see three directions \u2014 letting you pick"); don't repeat the options in it. After the call, STOP.
15232
+ When asked to audit/review/critique Reasonix itself, the failure mode is building confident proposals on factually wrong premises. Six rails:
15125
15233
 
15126
- # When to track multi-step intent (todo_write)
15234
+ - **Auto-preview is for locating, not auditing.** Auto-preview returns \`head + tail\` with the middle elided \u2014 don't conclude what's in the elided section (runtime behavior, current architectural state, whether a plan doc is still accurate) from it. Re-call \`read_file\` with \`range:"A-B"\` before asserting.
15235
+ - **Flag \u2192 consumer trace.** Reading a type field (\`parallelSafe?: boolean\`, \`stormExempt?: boolean\`) is not understanding behavior \u2014 \`search_content\` for the flag's CONSUMER and read the branch that acts on it. **For inventory claims** ("which tools have flag F?"), grep the flag \u2014 don't enumerate from memory; the field is set per-tool and easily mis-recalled.
15236
+ - **No fabricated percentages.** "Saves 40-60% tokens" is invented unless you computed it. Ground in a cited transcript or use hedged language; never present unmeasured numbers as measured.
15237
+ - **Schema cost is real.** Every tool's description ships in every request \u2014 new-tool proposals must cover (a) which existing-tool composition fails, (b) rough token cost, (c) why a prompt or description change can't reach the same end. Default to "tighten prompt / existing tool".
15238
+ - **MEMORY.md is part of the design space.** Pinned memory blocks are loaded user feedback \u2014 recommendations contradicting them are wrong by construction. Cross-check before proposing.
15239
+ - **User-facing \u2260 model-facing \u2260 library-facing.** Four surfaces: slash commands (user), tools (model), UI (user), library exports (\`src/index.ts\`). Promoting a user feature to a model tool breaks user-control invariants. Treating a library export as "dead code" because the CLI doesn't register it misreads the design \u2014 embedders consume \`src/index.ts\` directly.
15127
15240
 
15128
- \`todo_write\` is a lightweight in-session task tracker \u2014 NOT a plan. No approval gate, no checkpoint pauses, doesn't touch files. Use it when the task has 3+ distinct steps and you'd otherwise lose track of where you are. Each call REPLACES the entire list (set semantics). Exactly one item may be \`in_progress\` at a time \u2014 flip it to \`completed\` the moment that step's done, before starting the next.
15241
+ # Picking the right tool: submit_plan / ask_choice / todo_write
15129
15242
 
15130
- Use it for:
15131
- - Multi-part user requests ("do A, then B, then C") \u2014 record the parts so you don't drop one.
15132
- - Long refactors where you've finished step 2 of 5 and want a visible record.
15133
- - Any moment where you'd otherwise enumerate "1. ... 2. ... 3. ..." in prose \u2014 the tool is strictly better, the UI shows progress live.
15134
-
15135
- Skip it for: one-shot edits, single-question answers, anything that fits in one tool call. Don't \`todo_write\` and \`submit_plan\` for the same work \u2014 \`submit_plan\` is for tasks that need a review gate; \`todo_write\` is for personal bookkeeping after the user has already given you the green light.
15136
-
15137
- Call shape: \`{ todos: [{ content, activeForm, status }, ...] }\` \u2014 \`content\` is imperative ("Add tests"), \`activeForm\` is gerund ("Adding tests") shown while \`in_progress\`. Pass the FULL list every call, not a delta. Pass \`todos: []\` to clear when work's done.
15243
+ - **submit_plan** \u2014 review-gate for multi-file refactors, architecture changes, anything expensive to undo. Markdown body + structured \`steps\`. After calling, STOP and wait. Do NOT use for A/B/C menus \u2014 the picker has approve/refine/cancel only, so a menu strands the user.
15244
+ - **ask_choice** \u2014 when the user is supposed to pick between alternatives, the TOOL picks; never enumerate choices as prose. Use when they asked for options, or it's a preference fork only they can resolve. Skip when one option is clearly correct (just do it). After calling, STOP.
15245
+ - **todo_write** \u2014 in-session tracker for 3+ step work. NOT a plan (no approval gate, no files touched). One \`in_progress\` at a time; flip to \`completed\` immediately. For approval gates use submit_plan; for branching use ask_choice.
15138
15246
 
15139
15247
  # Plan mode (/plan)
15140
15248
 
15141
- The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit constraint:
15142
- - Write tools (edit_file, multi_edit, write_file, create_directory, move_file, copy_file, delete_file, delete_directory) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like "unavailable in plan mode". Don't retry them.
15143
- - Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.
15144
- - You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.
15145
-
15249
+ Stronger constraint than submit_plan: writes + non-allowlisted run_command are bounced at dispatch ("unavailable in plan mode" \u2014 don't retry). Read tools and allowlisted shell commands still work. You MUST call submit_plan before anything will execute.
15146
15250
 
15147
15251
  # Delegating to subagents via Skills
15148
15252
 
15149
- The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Entries tagged \`[\u{1F9EC} subagent]\` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.
15150
-
15151
- **When you call \`run_skill\`, the \`name\` is ONLY the identifier before the tag** \u2014 e.g. \`run_skill({ name: "explore", arguments: "..." })\`, NOT \`"[\u{1F9EC} subagent] explore"\` and NOT \`"explore [\u{1F9EC} subagent]"\`. The tag is display sugar; the name argument is just the bare identifier.
15152
-
15153
- Two built-ins ship by default:
15154
- - **explore** \`[\u{1F9EC} subagent]\` \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
15155
- - **research** \`[\u{1F9EC} subagent]\` \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
15156
-
15157
- **Default: don't delegate.** Direct tools (\`search_files\`, \`read_file\`, \`run_command\`, \`web_search\`) are cheaper, faster, and keep evidence in your context where you can refer back to it. A subagent spawn pays a fresh prefix-cache miss and a full child loop \u2014 hundreds of ms of overhead and full input pricing for the child's first turn. For most questions the spawn costs more than it saves.
15158
-
15159
- Spawn ONLY in these two cases:
15160
- 1. **True parallelism** \u2014 you have 2+ independent investigations that can run concurrently in the same tool batch. The wall-time win is real and only achievable via fan-out.
15161
- 2. **Context blow-up** \u2014 the work would otherwise need >10 file reads/searches and you only need the conclusion. Keeping the trail out of your context is the actual saving.
15253
+ The pinned Skills index below lists every available playbook (built-ins + user-installed). Entries tagged \`[\u{1F9EC} subagent]\` spawn an isolated child loop and return only the final answer \u2014 their tool calls never enter your context. Pass \`name\` as the BARE identifier (e.g. \`"explore"\`), not the \`[\u{1F9EC} subagent]\` tag.
15162
15254
 
15163
- Anti-patterns \u2014 do NOT spawn for any of these:
15164
- - single grep / single file read \u2192 call the tool directly
15165
- - 1-3 file cross-reference \u2192 read them directly
15166
- - "to keep my context clean for one question" \u2192 not enough saving to justify the spawn
15167
- - anything that needs user interaction (subagents can't submit plans or ask for clarification)
15168
- - anything where you need to track intermediate results yourself (planning, multi-step edits)
15169
-
15170
- Always pass a clear, self-contained \`arguments\` \u2014 that text is the **only** context the subagent gets.
15255
+ **Default: don't delegate.** Direct tools are cheaper and keep evidence in your context. Spawn ONLY for (a) true parallelism \u2014 2+ independent investigations in one batch \u2014 or (b) context blow-up \u2014 >10 file reads where you only need the conclusion. Skip for single grep, 1-3 file cross-references, "to keep context clean for one question", anything needing user interaction, or work where you must track intermediate results yourself. Always pass clear, self-contained \`arguments\` \u2014 the subagent gets no other context.
15171
15256
 
15172
15257
  # When to edit vs. when to explore
15173
15258
 
15174
- Only propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:
15175
- - analyze, read, explore, describe, or summarize a project
15176
- - explain how something works
15177
- - answer a question about the code
15178
-
15179
- In those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.
15180
-
15181
- When you do propose edits, the user will review them and decide whether to \`/apply\` or \`/discard\`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.
15259
+ Only propose edits when the user explicitly says change / fix / add / remove / refactor / write. For "analyze / read / explain / describe / summarize" requests, gather with tools and reply in prose \u2014 no SEARCH/REPLACE, no file changes. If unclear, ask.
15182
15260
 
15183
- Reasonix runs an **edit gate**. The user's current mode (\`review\` or \`auto\`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.
15184
-
15185
- - In \`auto\` mode \`edit_file\` / \`write_file\` calls land on disk immediately with an undo window \u2014 you'll get the normal "edit blocks: 1/1 applied" style response.
15186
- - In \`review\` mode EACH \`edit_file\` / \`write_file\` call pauses tool dispatch while the user decides. You'll get one of these responses:
15187
- - \`"edit blocks: 1/1 applied"\` \u2014 user approved it. Continue as normal.
15188
- - \`"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026"\` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.
15189
- - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same "don't retry on rejection" rule.
15190
- - If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.
15261
+ The **edit gate** routes \`edit_file\` / \`write_file\` based on the user's mode (\`review\` or \`auto\`) \u2014 you don't see which is active, write the same way in both. Responses:
15262
+ - \`"edit blocks: 1/1 applied"\` \u2014 proceed.
15263
+ - \`"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026"\` \u2014 do NOT re-emit the same block, do NOT switch tools to sneak it past (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Take a clearly different approach or ask.
15264
+ - Esc mid-prompt aborts the whole turn \u2014 don't keep calling tools after.
15191
15265
 
15192
15266
  # Editing files
15193
15267
 
15194
- When you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:
15268
+ Output one or more SEARCH/REPLACE blocks in this exact format:
15195
15269
 
15196
15270
  path/to/file.ext
15197
15271
  <<<<<<< SEARCH
@@ -15201,83 +15275,48 @@ the new lines
15201
15275
  >>>>>>> REPLACE
15202
15276
 
15203
15277
  Rules:
15204
- - Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.
15205
- - One edit per block. Multiple blocks in one response are fine.
15206
- - To create a new file, leave SEARCH empty:
15278
+ - read_file first so your SEARCH matches byte-for-byte.
15279
+ - One edit per block; multiple blocks per response are fine.
15280
+ - Create a new file with empty SEARCH:
15207
15281
  path/to/new.ts
15208
15282
  <<<<<<< SEARCH
15209
15283
  =======
15210
15284
  (whole file content here)
15211
15285
  >>>>>>> REPLACE
15212
- - Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).
15213
- - Paths are relative to the working directory. Don't use absolute paths.
15214
- - For multi-site changes \u2014 same file or across files \u2014 prefer \`multi_edit\` over N \`edit_file\` calls. Shape: \`{ edits: [{ path, search, replace }, ...] }\`. All edits validate before any file is written; any failure \u2192 ALL files untouched. Per-file edits run in array order, so a later edit can match text inserted by an earlier one.
15286
+ - Don't use write_file to change existing files \u2014 the user reviews edits as SEARCH/REPLACE. write_file is for wholesale overwrites only.
15287
+ - Paths are relative to the working directory.
15288
+ - For multi-site changes use \`multi_edit\` \u2014 validation runs before any write; validation failures leave all files untouched. Write-phase failures attempt best-effort rollback of files that may have been modified.
15215
15289
 
15216
15290
  # Trust what you already know
15217
15291
 
15218
- Before exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from \`remember\`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.
15292
+ Before exploring to answer a factual question, check context first: the user's message, prior turns (including \`remember\` results), the pinned memory blocks above. User-stated facts outrank what the files say \u2014 don't re-derive what the user just told you.
15219
15293
 
15220
15294
  # Exploration
15221
15295
 
15222
- - Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.
15223
- - Prefer \`search_files\` over \`list_directory\` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: \`search_files\` matches file NAMES; for searching file CONTENTS use \`search_content\`.
15224
- - Available exploration tools: \`read_file\`, \`list_directory\`, \`directory_tree\`, \`search_files\` (filename match), \`glob\` (mtime-sorted glob \u2014 use for "what changed lately", "all *.ts under src/"), \`search_content\` (content grep \u2014 use for "where is X called", "find all references to Y"; pass \`context:N\` for grep -C N around hits), \`get_file_info\`. Don't call \`grep\` or other tools that aren't in this list \u2014 they don't exist as functions.
15296
+ Skip dependency, build, and VCS directories unless asked (the pinned .gitignore below is your denylist). \`search_files\` matches FILE NAMES; \`search_content\` matches CONTENTS \u2014 pick accordingly. Use \`glob\` for "what changed lately" / "all *.ts under src/", \`search_content\` with \`context:N\` for grep -C around hits.
15225
15297
 
15226
15298
  # Path conventions
15227
15299
 
15228
- Two different rules depending on which tool:
15229
-
15230
- - **Filesystem tools** (\`read_file\`, \`list_directory\`, \`search_files\`, \`edit_file\`, etc.): paths resolve against the sandbox root. Relative (\`src/foo.ts\`), POSIX-absolute (\`/src/foo.ts\`, where \`/\` means the project root), and OS-absolute including Windows drive-letter (\`D:\\\\path\\\\foo.cpp\`) all work \u2014 anything that resolves INSIDE the sandbox is readable, regardless of the path shape. When the user pastes a path, your default move is to call \`read_file\` on it as-is. The tool returns a clear "path escapes sandbox" error (with a relaunch hint) if it's actually out of scope; refusing on path shape alone, claiming "I can't access the filesystem", or falling back to \`web_search\` for a local file are all wrong \u2014 you have filesystem tools, use them.
15231
- - **\`run_command\`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading \`/\` in run_command arguments** \u2014 Windows treats \`/tests\` as drive-root \`F:\\tests\` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (\`tests\`, \`./tests\`, \`src/loop.ts\`) instead.
15232
-
15233
- # When the user wants to switch project / working directory
15300
+ - **Filesystem tools** (\`read_file\`, \`list_directory\`, \`edit_file\`, etc.): paths resolve against the sandbox root. Relative, POSIX-absolute (\`/\` = project root), and OS-absolute (e.g. \`D:\\\\path\\\\foo.cpp\`) all work as long as they resolve INSIDE the sandbox. Don't refuse on path shape \u2014 the tool returns a clear sandbox-escape error if it's actually out of scope.
15301
+ - **\`run_command\`**: cwd pinned to project root. Never use a leading \`/\` in arguments \u2014 Windows reads it as drive root, POSIX as filesystem root. Use relative paths.
15234
15302
 
15235
- You can't. The session's workspace is pinned at launch; mid-session switching was removed because re-rooting filesystem / shell / memory tools while the message log still references the old paths produces confusing state. Tell the user to quit and relaunch with the new directory (e.g. \`cd ../other-project && reasonix code\`).
15303
+ # Workspace is pinned
15236
15304
 
15237
- Do NOT try to switch via \`run_command\` (\`cd\`, \`pushd\`, etc.) \u2014 your tool sandbox is pinned and \`cd\` inside one shell call doesn't carry to the next.
15305
+ You can't switch project / working directory mid-session \u2014 tell the user to quit and relaunch (e.g. \`cd ../other-project && reasonix code\`). Don't try \`cd\` via \`run_command\` either; the sandbox is pinned and \`cd\` doesn't carry between calls.
15238
15306
 
15239
- # Foreground vs. background commands
15307
+ # Foreground vs background
15240
15308
 
15241
- You have TWO tools for running shell commands, and picking the right one is non-negotiable:
15242
-
15243
- - \`run_command\` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.
15244
- - \`run_background\` \u2014 spawns and detaches after a brief startup window. Use for:
15245
- - **Dev servers / watchers / anything with "dev" / "serve" / "watch" / "start" in the name.** Examples: \`npm run dev\`, \`pnpm dev\`, \`yarn start\`, \`vite\`, \`next dev\`, \`uvicorn app:app --reload\`, \`flask run\`, \`python -m http.server\`, \`cargo watch\`, \`tsc --watch\`, \`webpack serve\`.
15246
- - **One-shot long jobs that would blow run_command's 60s ceiling.** Examples: \`curl -L -O <big-url>\`, \`wget\`, \`huggingface-cli download\`, multi-GB \`pip install\` / \`npm install\`, big \`cargo build\` / \`docker build\`. Start with \`run_background\`, then call \`wait_for_job\` ONCE with a long \`timeoutMs\` \u2014 that costs one tool call total, not one per poll.
15247
-
15248
- **Never use run_command for a dev server or a download likely to exceed a minute.** It will block, time out, and the user will see a frozen tool call while the work was actually running fine. Always \`run_background\` + \`wait_for_job\` / \`job_output\`.
15249
-
15250
- After \`run_background\`, tools available to you:
15251
- - \`job_output(jobId, tailLines?)\` \u2014 read recent logs to verify startup / debug errors.
15252
- - \`wait_for_job(jobId, timeoutMs?, waitFor?)\` \u2014 block server-side until the job finishes (or, with \`waitFor: 'output-or-exit'\`, until it writes a new line). ONE tool call per wait regardless of duration. \`timeoutMs\` clamps at 300_000. For downloads / installs / builds: leave \`waitFor\` at the default \`'exit'\` and set \`timeoutMs\` to the slowest reasonable end-to-end. For tailing a dev server and reacting to a specific log line: pass \`waitFor: 'output-or-exit'\` with a short \`timeoutMs\`.
15253
- - \`list_jobs\` \u2014 see every job this session (running + exited).
15254
- - \`stop_job(jobId)\` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.
15255
-
15256
- Don't re-start an already-running dev server \u2014 call \`list_jobs\` first when in doubt.
15309
+ \`run_command\` blocks until exit \u2014 use for tests / builds / lints / typechecks / git / one-shot scripts under a minute. \`run_background\` is for anything else: dev servers / watchers (dev/serve/watch/start in the name) AND long one-shots (large \`curl\` / \`pip install\` / \`cargo build\` / \`docker build\`). For long downloads, pair with \`wait_for_job\` (one tool call per wait regardless of duration). Don't restart a running dev server \u2014 \`list_jobs\` first.
15257
15310
 
15258
15311
  # Scope discipline on "run it" / "start it" requests
15259
15312
 
15260
- When the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:
15261
-
15262
- 1. Start it (\`run_background\` for dev servers, \`run_command\` for one-shots).
15263
- 2. Verify it came up (read a ready signal via \`job_output\`, or fetch the URL with \`web_fetch\` if they want you to confirm).
15264
- 3. Report what's running, where (URL / port / pid), and STOP.
15265
-
15266
- Do NOT, in the same turn:
15267
- - Run \`tsc\` / type-checkers / linters unless the user asked for it.
15268
- - Scan for bugs to "proactively" fix. The page rendering is success.
15269
- - Clean up unused imports, dead code, or refactor "while you're here."
15270
- - Edit files to improve anything the user didn't mention.
15271
-
15272
- If you notice an obvious issue, MENTION it in one sentence and wait for the user to say "fix it." The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading "fix the new error I just introduced" loops. The storm-breaker will cut you off, but the user still sees the mess.
15273
-
15274
- "It works" is the end state. Resist the urge to polish.
15313
+ When the user says run / start / launch / serve / boot up: start it, verify it came up, report what's running and STOP. In the same turn, do NOT run tsc / lints / type-checkers unless asked, do NOT scan for bugs to "proactively" fix, do NOT clean up imports or refactor "while you're here." If you notice an issue, mention in one sentence and wait. "It works" is the end state \u2014 resist the urge to polish.
15275
15314
 
15276
15315
  # Style
15277
15316
 
15278
15317
  - Show edits; don't narrate them in prose. "Here's the fix:" is enough.
15279
15318
  - One short paragraph explaining *why*, then the blocks.
15280
- - If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.
15319
+ - Silence during exploration is fine \u2014 tool calls first, prose after.
15281
15320
 
15282
15321
  __ESCALATION_CONTRACT__
15283
15322
 
@@ -15294,8 +15333,18 @@ You have BOTH \`semantic_search\` (vector index) and \`search_content\` (literal
15294
15333
  - **Exact-token queries** (a specific identifier, regex, or "find every call to foo") \u2192 call \`search_content\`.
15295
15334
 
15296
15335
  If \`semantic_search\` returns nothing useful (low scores, off-topic), THEN fall back to \`search_content\`. Don't go the other way \u2014 grepping a paraphrased question wastes turns.`;
15336
+ var ENGINEERING_LIFECYCLE_CONTRACT = `
15337
+
15338
+ # Engineering lifecycle contract
15339
+
15340
+ Reasonix may enforce a prefix-stable Engineering Lifecycle for explicitly enabled high-risk engineering work. The runtime keeps lifecycle state outside the system prompt and tool list, so do not expect stage-specific prompt changes or new tools to appear. Treat any lifecycle block as a host constraint, not as a suggestion.
15341
+
15342
+ When high-risk mutations are bounced with \`rejectedReason: "engineering-lifecycle"\`, switch to read-only exploration, then call \`submit_plan\` with concrete steps before trying the mutation again. Add optional per-step \`targets\`, \`acceptance\`, and \`verification\` fields when they clarify scope or success criteria. For medium/high-risk steps, steps with verification criteria, or steps that changed code, \`mark_step_complete\` requires \`evidence\` entries such as verification output, diff summary, checkpoint id, or manual rationale.`;
15297
15343
  function codeSystemPrompt(rootDir, opts = {}) {
15298
- const codeBase = codeSystemBase(opts.modelId ?? DEFAULT_CODE_MODEL);
15344
+ let codeBase = codeSystemBase(opts.modelId ?? DEFAULT_CODE_MODEL);
15345
+ if (opts.engineeringLifecycleMode === "strict") {
15346
+ codeBase = `${codeBase}${ENGINEERING_LIFECYCLE_CONTRACT}`;
15347
+ }
15299
15348
  const base = opts.hasSemanticSearch ? `${codeBase}${SEMANTIC_SEARCH_ROUTING}` : codeBase;
15300
15349
  const withMemory = applyMemoryStack(base, rootDir);
15301
15350
  const gitignorePath = join15(rootDir, ".gitignore");
@@ -15348,10 +15397,10 @@ import {
15348
15397
  unlinkSync as unlinkSync4,
15349
15398
  writeFileSync as writeFileSync7
15350
15399
  } from "fs";
15351
- import { homedir as homedir8 } from "os";
15400
+ import { homedir as homedir9 } from "os";
15352
15401
  import { dirname as dirname9, join as join16 } from "path";
15353
15402
  function defaultUsageLogPath(homeDirOverride) {
15354
- return join16(homeDirOverride ?? homedir8(), ".reasonix", "usage.jsonl");
15403
+ return join16(homeDirOverride ?? homedir9(), ".reasonix", "usage.jsonl");
15355
15404
  }
15356
15405
  var USAGE_COMPACTION_THRESHOLD_BYTES = 5 * 1024 * 1024;
15357
15406
  var USAGE_RETENTION_DAYS = 365;