reasonix 0.5.22 → 0.5.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  import {
3
3
  CODE_SYSTEM_PROMPT,
4
4
  codeSystemPrompt
5
- } from "./chunk-ANMDY236.js";
5
+ } from "./chunk-C266QOQU.js";
6
6
  export {
7
7
  CODE_SYSTEM_PROMPT,
8
8
  codeSystemPrompt
9
9
  };
10
- //# sourceMappingURL=prompt-75XLIUTO.js.map
10
+ //# sourceMappingURL=prompt-OVVMCH5F.js.map
package/dist/index.d.ts CHANGED
@@ -731,6 +731,18 @@ interface ToolRegistryOptions {
731
731
  */
732
732
  autoFlatten?: boolean;
733
733
  }
734
+ /**
735
+ * Callback form for `setToolInterceptor` — receives the tool name and
736
+ * already-parsed arguments; returns a string to short-circuit dispatch
737
+ * (the returned value becomes the tool result the model sees), or
738
+ * `null` / `undefined` to fall through to the registered tool fn.
739
+ *
740
+ * Used by `reasonix code`'s edit-mode gate: `edit_file` / `write_file`
741
+ * are intercepted in "review" mode (queued into pendingEdits, returning
742
+ * "queued for /apply") or handled inline in "auto" mode (snapshot +
743
+ * apply, then surface an undo banner). Other tools pass through.
744
+ */
745
+ type ToolInterceptor = (name: string, args: Record<string, unknown>) => string | null | undefined | Promise<string | null | undefined>;
734
746
  declare class ToolRegistry {
735
747
  private readonly _tools;
736
748
  private readonly _autoFlatten;
@@ -742,11 +754,23 @@ declare class ToolRegistry {
742
754
  * bounced until the user approves a submitted plan.
743
755
  */
744
756
  private _planMode;
757
+ /**
758
+ * Optional hook run after arg parsing but before tool.fn. Lets the TUI
759
+ * reroute specific tool calls (e.g. edit_file in review mode) without
760
+ * modifying the tool definitions themselves.
761
+ */
762
+ private _interceptor;
745
763
  constructor(opts?: ToolRegistryOptions);
746
764
  /** Enable / disable plan-mode enforcement at dispatch. */
747
765
  setPlanMode(on: boolean): void;
748
766
  /** True when the registry is currently refusing non-readonly calls. */
749
767
  get planMode(): boolean;
768
+ /**
769
+ * Install or clear the dispatch interceptor. At most one interceptor
770
+ * is active at a time — calling twice replaces the previous. Pass
771
+ * `null` to remove.
772
+ */
773
+ setToolInterceptor(fn: ToolInterceptor | null): void;
750
774
  register<A, R>(def: ToolDefinition<A, R>): this;
751
775
  has(name: string): boolean;
752
776
  get(name: string): ToolDefinition | undefined;
@@ -968,6 +992,40 @@ declare class CacheFirstLoop {
968
992
  * authored intent we can't mechanically shrink without losing
969
993
  * meaning.
970
994
  */
995
+ /**
996
+ * Conservative args-only shrink fired after every tool response —
997
+ * strictly about ONE thing: stop oversized `edit_file` / `write_file`
998
+ * arguments from riding every future turn's prompt.
999
+ *
1000
+ * Why this is worth doing AUTOMATICALLY (not just on /compact):
1001
+ * Each tool-call arguments string sticks in the log verbatim. On a
1002
+ * coding session with ~10 edits, that's 20-40K tokens of stale
1003
+ * SEARCH/REPLACE text riding along on every turn. Even at a 98.9%
1004
+ * cache hit rate the input cost still adds up linearly (cache-hit
1005
+ * price × tokens × turns). Compacting IMMEDIATELY after the tool
1006
+ * responds means the next turn's prompt is already smaller — the
1007
+ * shrink is a one-time write that saves every future prompt.
1008
+ *
1009
+ * Threshold rationale: 800 tokens ≈ 3 KB. A typical 20-line edit's
1010
+ * args land well under that; massive rewrites (whole-file content,
1011
+ * 100+ line refactors) land above and get the compaction. Small
1012
+ * edits stay byte-verbatim so nothing common-case changes.
1013
+ *
1014
+ * Safety: we ONLY shrink args whose tool has ALREADY responded.
1015
+ * Structurally that's every call in `log.toMessages()` at this
1016
+ * point — the current turn's assistant/tool pairing is by
1017
+ * construction closed by the time we get here (append happens
1018
+ * AFTER dispatch). The in-flight assistant message being built
1019
+ * lives in scratch, not the log, so this pass can't touch it.
1020
+ *
1021
+ * Model impact: the model may occasionally want to reference the
1022
+ * exact SEARCH text of a prior edit — it then reads the file
1023
+ * directly (which shows current state) or looks at the preceding
1024
+ * assistant text (which has its plan). Losing the stale args is a
1025
+ * net win: one extra read_file vs. dragging N KB of stale text
1026
+ * through every subsequent turn.
1027
+ */
1028
+ private compactToolCallArgsAfterResponse;
971
1029
  compact(maxTokens?: number): {
972
1030
  healedCount: number;
973
1031
  tokensSaved: number;
@@ -1084,12 +1142,21 @@ declare function healLoadedMessagesByTokens(messages: ChatMessage[], maxTokens:
1084
1142
  charsSaved: number;
1085
1143
  };
1086
1144
  /**
1087
- * Annotate the `DeepSeek 400: maximum context length …` error the API
1088
- * returns when a session's history has grown past 131,072 tokens. The
1089
- * raw message is a JSON blob; we surface a short actionable hint on top
1090
- * so the user knows to `/forget` or `/clear` rather than parsing the
1091
- * JSON themselves. Other errors pass through unchanged — the loop's
1092
- * error channel already formats them well enough.
1145
+ * Turn raw `DeepSeek NNN: {json}` errors into short actionable hints.
1146
+ * Client code throws these verbatim from the HTTP layer (see client.ts);
1147
+ * this is the one place the UI text layer reads to decide what the user
1148
+ * actually needs to do about it.
1149
+ *
1150
+ * Covered codes (per DeepSeek's error-code doc):
1151
+ * - 400 + "maximum context length" → context-overflow, point at /forget
1152
+ * - 400 generic → strip the JSON, show inner message
1153
+ * - 401 → API key rejected, point at `reasonix setup`
1154
+ * - 402 → balance depleted, link to top-up page
1155
+ * - 422 → param error, show inner message (usually explains which field)
1156
+ *
1157
+ * 429/500/502/503/504 are swallowed by retry.ts before they reach here;
1158
+ * if they DO reach here (all retries exhausted), the raw string already
1159
+ * says "DeepSeek 503: server busy" etc. which is informative enough.
1093
1160
  */
1094
1161
  declare function formatLoopError(err: Error): string;
1095
1162
 
@@ -1676,6 +1743,119 @@ declare function registerSubagentTool(parentRegistry: ToolRegistry, opts: Subage
1676
1743
  */
1677
1744
  declare function forkRegistryExcluding(parent: ToolRegistry, exclude: ReadonlySet<string>): ToolRegistry;
1678
1745
 
1746
+ /**
1747
+ * Long-running process registry — the "background run" counterpart to
1748
+ * `run_command`. `run_command` spawns a child, waits for it to exit,
1749
+ * then returns combined output; perfect for tests / builds / one-shots
1750
+ * but useless for `npm run dev` / `python -m http.server` / watchers,
1751
+ * which never exit and just time the tool out.
1752
+ *
1753
+ * JobRegistry lets the model fire-and-almost-forget: we spawn the
1754
+ * child, wait at most `waitSec` (default 3s) OR until output matches
1755
+ * a readiness regex, then return the startup preview plus a job id.
1756
+ * The child keeps running in the background; later tool calls tail
1757
+ * its output, stop it, or list what's still alive.
1758
+ *
1759
+ * Shape-wise this is modeled on Claude Code's `BashOutput` / `KillBash`
1760
+ * pair. We diverge on one point: ready-signal detection is on by default
1761
+ * because dev servers almost universally print "Local:", "listening on",
1762
+ * "ready in N ms", "compiled successfully" when they come up — short-
1763
+ * circuiting the wait on those keeps the model's first tool-result
1764
+ * useful ("server is up at http://localhost:5173") instead of spending
1765
+ * the full 3s on a stabilization timer.
1766
+ */
1767
+ interface JobStartOptions {
1768
+ /** Absolute path to cwd for the spawned child. */
1769
+ cwd: string;
1770
+ /**
1771
+ * Max seconds to wait for the initial burst before returning. Capped
1772
+ * at 30. A ready-signal match short-circuits this. Default 3.
1773
+ */
1774
+ waitSec?: number;
1775
+ /** Signal plumbed through from the calling tool's AbortSignal. */
1776
+ signal?: AbortSignal;
1777
+ /** Total per-job output buffer cap (bytes). Default 64 KB. */
1778
+ maxBufferBytes?: number;
1779
+ }
1780
+ interface JobStartResult {
1781
+ jobId: number;
1782
+ pid: number | null;
1783
+ /** True iff the child was still running at the point we returned. */
1784
+ stillRunning: boolean;
1785
+ /** True iff a READY_SIGNALS pattern matched during the wait window. */
1786
+ readyMatched: boolean;
1787
+ /** Preview of combined stdout+stderr accumulated during the wait. */
1788
+ preview: string;
1789
+ /** If the child exited during the wait, its exit code; else null. */
1790
+ exitCode: number | null;
1791
+ }
1792
+ interface JobRecord {
1793
+ id: number;
1794
+ command: string;
1795
+ pid: number | null;
1796
+ startedAt: number;
1797
+ /** Exit code once the process terminates; null while running. */
1798
+ exitCode: number | null;
1799
+ /** Combined stdout+stderr, ring-trimmed. */
1800
+ output: string;
1801
+ /**
1802
+ * Total bytes ever written by the child (not just what's in `output`).
1803
+ * Useful for "how much got dropped" diagnostics.
1804
+ */
1805
+ totalBytesWritten: number;
1806
+ /** True iff the child is still alive. */
1807
+ running: boolean;
1808
+ /** Error from spawn() itself (ENOENT, etc.) once surfaced. */
1809
+ spawnError?: string;
1810
+ }
1811
+ declare class JobRegistry {
1812
+ private readonly jobs;
1813
+ private nextId;
1814
+ /**
1815
+ * Spawn a background child. Resolves after `waitSec` OR on ready
1816
+ * signal OR on early exit, whichever comes first. The child continues
1817
+ * to run (and buffer output) regardless of which path fires.
1818
+ */
1819
+ start(command: string, opts: JobStartOptions): Promise<JobStartResult>;
1820
+ /**
1821
+ * Read a job's accumulated output. `since` lets a caller poll
1822
+ * incrementally: pass the byte count returned from the last call to
1823
+ * get only newly-written content. Returns both full output and a
1824
+ * running snapshot so the caller can use whichever.
1825
+ */
1826
+ read(id: number, opts?: {
1827
+ since?: number;
1828
+ tailLines?: number;
1829
+ }): JobReadResult | null;
1830
+ /**
1831
+ * Send SIGTERM, wait `graceMs`, then SIGKILL if still alive. Returns
1832
+ * the final job record (or null when the job id is unknown). Safe to
1833
+ * call on an already-exited job — returns the record unchanged.
1834
+ */
1835
+ stop(id: number, opts?: {
1836
+ graceMs?: number;
1837
+ }): Promise<JobRecord | null>;
1838
+ list(): JobRecord[];
1839
+ /**
1840
+ * Best-effort kill of every still-running job. Called on TUI shutdown
1841
+ * so dev servers don't outlive the Reasonix process. Resolves after
1842
+ * every child has closed or a hard deadline passes (3s total).
1843
+ */
1844
+ shutdown(deadlineMs?: number): Promise<void>;
1845
+ /** Count of still-running jobs — drives the TUI status-bar indicator. */
1846
+ runningCount(): number;
1847
+ }
1848
+ interface JobReadResult {
1849
+ output: string;
1850
+ /** Total bytes ever in the buffer (pre-slice). Caller passes back as `since`. */
1851
+ byteLength: number;
1852
+ running: boolean;
1853
+ exitCode: number | null;
1854
+ command: string;
1855
+ pid: number | null;
1856
+ spawnError?: string;
1857
+ }
1858
+
1679
1859
  /**
1680
1860
  * Native shell tool — lets the model run commands inside the sandbox
1681
1861
  * root so it can actually verify its own work (run tests, check git
@@ -1731,6 +1911,13 @@ interface ShellToolsOptions {
1731
1911
  * (CI, benchmarks) where a human can't be in the loop to confirm.
1732
1912
  */
1733
1913
  allowAll?: boolean;
1914
+ /**
1915
+ * Background-process registry shared between `run_background`,
1916
+ * `job_output`, `stop_job`, `list_jobs`, and the /jobs /kill slashes.
1917
+ * When omitted, the registrar builds its own — but the caller
1918
+ * usually wants to provide one so the TUI can tail it too.
1919
+ */
1920
+ jobs?: JobRegistry;
1734
1921
  }
1735
1922
  /**
1736
1923
  * Tokenize a shell-ish command string into argv. Handles single/double
@@ -2930,7 +3117,7 @@ declare function restoreSnapshots(snapshots: EditSnapshot[], rootDir: string): A
2930
3117
  * the Cache-First Loop is trying to conserve. The SEARCH/REPLACE spec
2931
3118
  * is the one unavoidable bloat; we trim everything else.
2932
3119
  */
2933
- declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, list_directory, search_files, etc.) rooted at the user's working directory.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills (\uD83E\uDDEC)\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Skills marked with **\uD83E\uDDEC** spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so \uD83E\uDDEC skills are how you keep the main session lean.\n\nTwo built-ins ship by default:\n- **\uD83E\uDDEC explore** \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **\uD83E\uDDEC research** \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a \uD83E\uDDEC skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n";
3120
+ declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, list_directory, search_files, etc.) rooted at the user's working directory.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills (\uD83E\uDDEC)\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Skills marked with **\uD83E\uDDEC** spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so \uD83E\uDDEC skills are how you keep the main session lean.\n\nTwo built-ins ship by default:\n- **\uD83E\uDDEC explore** \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **\uD83E\uDDEC research** \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a \uD83E\uDDEC skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n";
2934
3121
  /**
2935
3122
  * Inject the project's `.gitignore` content into the system prompt as a
2936
3123
  * "respect this on top of the built-in denylist" hint. We don't parse
@@ -2959,6 +3146,21 @@ declare function codeSystemPrompt(rootDir: string): string;
2959
3146
  */
2960
3147
  /** One of the preset bundles (model + harvest + branch combo). */
2961
3148
  type PresetName = "fast" | "smart" | "max";
3149
+ /**
3150
+ * How `reasonix code` handles model-issued edits:
3151
+ * - "review" — queue the edit into pendingEdits; user /apply or `y` commits.
3152
+ * - "auto" — apply immediately, snapshot for /undo, show a short undo
3153
+ * banner so the user can roll back with one keystroke.
3154
+ * Persisted so `/mode auto` survives a relaunch. Missing → "review".
3155
+ */
3156
+ type EditMode = "review" | "auto";
3157
+ /**
3158
+ * reasoning_effort cap for the model. "max" is the agent-class default;
3159
+ * "high" is cheaper / faster. Persisted so `/effort high` survives a
3160
+ * relaunch — earlier versions silently reverted to "max" on every new
3161
+ * session, which burned budget unexpectedly.
3162
+ */
3163
+ type ReasoningEffort = "high" | "max";
2962
3164
  interface ReasonixConfig {
2963
3165
  apiKey?: string;
2964
3166
  baseUrl?: string;
@@ -2967,6 +3169,22 @@ interface ReasonixConfig {
2967
3169
  * Maps to model + harvest + branch combos (see presets.ts). Missing → "fast".
2968
3170
  */
2969
3171
  preset?: PresetName;
3172
+ /**
3173
+ * Edit-gate mode for `reasonix code`. See EditMode doc. Absent → "review".
3174
+ */
3175
+ editMode?: EditMode;
3176
+ /**
3177
+ * Set to `true` the first time we've shown the "Shift+Tab cycles
3178
+ * review/AUTO" onboarding tip in `reasonix code`. Once seen, we stop
3179
+ * posting the tip — the bottom status bar carries the knowledge
3180
+ * forward without further nagging.
3181
+ */
3182
+ editModeHintShown?: boolean;
3183
+ /**
3184
+ * Last reasoning_effort chosen via `/effort`. Loaded on launch so
3185
+ * "high" stays "high" — default is "max" when unset.
3186
+ */
3187
+ reasoningEffort?: ReasoningEffort;
2970
3188
  /**
2971
3189
  * Default MCP server specs to bridge on every `reasonix chat`, in the
2972
3190
  * same `"name=cmd args..."` format that `--mcp` takes. Stored as strings