reasonix 0.30.4 → 0.30.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  import {
3
3
  CODE_SYSTEM_PROMPT,
4
4
  codeSystemPrompt
5
- } from "./chunk-COFBA5FV.js";
5
+ } from "./chunk-VWFJNLIK.js";
6
6
  export {
7
7
  CODE_SYSTEM_PROMPT,
8
8
  codeSystemPrompt
9
9
  };
10
- //# sourceMappingURL=prompt-VF7B6BWR.js.map
10
+ //# sourceMappingURL=prompt-XHICFAYN.js.map
package/dist/index.d.ts CHANGED
@@ -481,13 +481,15 @@ declare function stripHallucinatedToolMarkup(s: string): string;
481
481
 
482
482
  /** Mutating calls clear prior read-only entries so a post-edit re-read isn't flagged as repeat. */
483
483
  type IsMutating = (call: ToolCall) => boolean;
484
+ type IsStormExempt = (call: ToolCall) => boolean;
484
485
  /** Tracks (name, args) repeats; mutating calls clear prior read-only entries while still counting amongst themselves. */
485
486
  declare class StormBreaker {
486
487
  private readonly windowSize;
487
488
  private readonly threshold;
488
489
  private readonly isMutating;
490
+ private readonly isStormExempt;
489
491
  private readonly recent;
490
- constructor(windowSize?: number, threshold?: number, isMutating?: IsMutating);
492
+ constructor(windowSize?: number, threshold?: number, isMutating?: IsMutating, isStormExempt?: IsStormExempt);
491
493
  inspect(call: ToolCall): {
492
494
  suppress: boolean;
493
495
  reason?: string;
@@ -543,6 +545,8 @@ interface ToolCallRepairOptions {
543
545
  maxScavenge?: number;
544
546
  /** Mutating calls clear the storm window so a post-edit verify-read isn't seen as a repeat. */
545
547
  isMutating?: IsMutating;
548
+ /** Cheap state-inspection calls that should never trip repeat-loop suppression. */
549
+ isStormExempt?: IsStormExempt;
546
550
  }
547
551
  declare class ToolCallRepair {
548
552
  private readonly storm;
@@ -712,6 +716,8 @@ interface ToolDefinition<A = any, R = any> {
712
716
  readOnlyCheck?: (args: A) => boolean;
713
717
  /** Safe to dispatch concurrently with other parallel-safe calls in the same turn. Default false — opt-in only. */
714
718
  parallelSafe?: boolean;
719
+ /** Excluded from repeat-loop storm accounting; use only for cheap, state-inspection tools. */
720
+ stormExempt?: boolean;
715
721
  fn: (args: A, ctx?: ToolCallContext) => R | Promise<R>;
716
722
  }
717
723
  interface ToolRegistryOptions {
@@ -1205,6 +1211,9 @@ declare class JobRegistry {
1205
1211
  since?: number;
1206
1212
  tailLines?: number;
1207
1213
  }): JobReadResult | null;
1214
+ waitForJob(id: number, opts?: {
1215
+ timeoutMs?: number;
1216
+ }): Promise<JobWaitResult | null>;
1208
1217
  /** SIGTERM, wait graceMs, then SIGKILL. Idempotent on already-exited jobs. */
1209
1218
  stop(id: number, opts?: {
1210
1219
  graceMs?: number;
@@ -1224,6 +1233,11 @@ interface JobReadResult {
1224
1233
  pid: number | null;
1225
1234
  spawnError?: string;
1226
1235
  }
1236
+ interface JobWaitResult {
1237
+ exited: boolean;
1238
+ exitCode: number | null;
1239
+ latestOutput: string;
1240
+ }
1227
1241
 
1228
1242
  interface RunCommandResult {
1229
1243
  exitCode: number | null;
@@ -1986,7 +2000,7 @@ interface EditSnapshot {
1986
2000
  declare function snapshotBeforeEdits(blocks: EditBlock[], rootDir: string): EditSnapshot[];
1987
2001
  declare function restoreSnapshots(snapshots: EditSnapshot[], rootDir: string): ApplyResult[];
1988
2002
 
1989
- declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nYou can't. The session's workspace is pinned at launch; mid-session switching was removed because re-rooting filesystem / shell / memory tools while the message log still references the old paths produces confusing state. Tell the user to quit and relaunch with the new directory (e.g. `cd ../other-project && reasonix code`).\n\nDo NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
2003
+ declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nYou can't. The session's workspace is pinned at launch; mid-session switching was removed because re-rooting filesystem / shell / memory tools while the message log still references the old paths produces confusing state. Tell the user to quit and relaunch with the new directory (e.g. `cd ../other-project && reasonix code`).\n\nDo NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `wait_for_job(jobId, timeoutMs?)` \u2014 block until the job exits or emits new output. Prefer this over repeating identical `job_output` calls while you're intentionally waiting.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
1990
2004
  interface CodeSystemPromptOptions {
1991
2005
  /** True when semantic_search is registered for this run. Adds an
1992
2006
  * explicit routing fragment so the model picks it for intent-style
package/dist/index.js CHANGED
@@ -2282,15 +2282,18 @@ var StormBreaker = class {
2282
2282
  windowSize;
2283
2283
  threshold;
2284
2284
  isMutating;
2285
+ isStormExempt;
2285
2286
  recent = [];
2286
- constructor(windowSize = 6, threshold = 3, isMutating) {
2287
+ constructor(windowSize = 6, threshold = 3, isMutating, isStormExempt) {
2287
2288
  this.windowSize = windowSize;
2288
2289
  this.threshold = threshold;
2289
2290
  this.isMutating = isMutating;
2291
+ this.isStormExempt = isStormExempt;
2290
2292
  }
2291
2293
  inspect(call) {
2292
2294
  const name = call.function?.name;
2293
2295
  if (!name) return { suppress: false };
2296
+ if (this.isStormExempt?.(call)) return { suppress: false };
2294
2297
  const args = call.function?.arguments ?? "";
2295
2298
  const mutating = this.isMutating ? this.isMutating(call) : false;
2296
2299
  const readOnly = !mutating;
@@ -2391,7 +2394,12 @@ var ToolCallRepair = class {
2391
2394
  opts;
2392
2395
  constructor(opts) {
2393
2396
  this.opts = opts;
2394
- this.storm = new StormBreaker(opts.stormWindow ?? 6, opts.stormThreshold ?? 3, opts.isMutating);
2397
+ this.storm = new StormBreaker(
2398
+ opts.stormWindow ?? 6,
2399
+ opts.stormThreshold ?? 3,
2400
+ opts.isMutating,
2401
+ opts.isStormExempt
2402
+ );
2395
2403
  }
2396
2404
  /** Called at start of every user turn — fresh intent shouldn't inherit old repetition state. */
2397
2405
  resetStorm() {
@@ -2534,9 +2542,15 @@ var CacheFirstLoop = class {
2534
2542
  }
2535
2543
  return def.readOnly !== true;
2536
2544
  };
2545
+ const isStormExempt = (call) => {
2546
+ const name = call.function?.name;
2547
+ if (!name) return false;
2548
+ return registry.get(name)?.stormExempt === true;
2549
+ };
2537
2550
  this.repair = new ToolCallRepair({
2538
2551
  allowedToolNames: allowedNames,
2539
2552
  isMutating,
2553
+ isStormExempt,
2540
2554
  stormThreshold: parsePositiveIntEnv(process.env.REASONIX_STORM_THRESHOLD),
2541
2555
  stormWindow: parsePositiveIntEnv(process.env.REASONIX_STORM_WINDOW)
2542
2556
  });
@@ -3653,19 +3667,19 @@ ${mem.content}
3653
3667
  import { createHash as createHash2 } from "crypto";
3654
3668
  import {
3655
3669
  existsSync as existsSync7,
3656
- mkdirSync as mkdirSync2,
3670
+ mkdirSync as mkdirSync3,
3657
3671
  readFileSync as readFileSync8,
3658
3672
  readdirSync as readdirSync4,
3659
3673
  unlinkSync as unlinkSync2,
3660
- writeFileSync as writeFileSync2
3674
+ writeFileSync as writeFileSync3
3661
3675
  } from "fs";
3662
3676
  import { homedir as homedir4 } from "os";
3663
3677
  import { join as join7, resolve as resolve3 } from "path";
3664
3678
 
3665
3679
  // src/skills.ts
3666
- import { existsSync as existsSync6, readFileSync as readFileSync7, readdirSync as readdirSync3, statSync as statSync3 } from "fs";
3680
+ import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync7, readdirSync as readdirSync3, statSync as statSync3, writeFileSync as writeFileSync2 } from "fs";
3667
3681
  import { homedir as homedir3 } from "os";
3668
- import { join as join6, resolve as resolve2 } from "path";
3682
+ import { dirname as dirname3, join as join6, resolve as resolve2 } from "path";
3669
3683
 
3670
3684
  // src/prompt-fragments.ts
3671
3685
  var TUI_FORMATTING_RULES = `Formatting (rendered in a TUI with a real markdown renderer):
@@ -3770,6 +3784,31 @@ var SkillStore = class {
3770
3784
  }
3771
3785
  return [...byName.values()].sort((a, b) => a.name.localeCompare(b.name));
3772
3786
  }
3787
+ /** Scaffold a new skill stub at the chosen scope. Refuses to overwrite. */
3788
+ create(name, scope) {
3789
+ if (!isValidSkillName(name)) {
3790
+ return { error: `invalid skill name: "${name}" \u2014 use letters, digits, _, -, .` };
3791
+ }
3792
+ if (scope === "project" && !this.projectRoot) {
3793
+ return { error: "project scope requires a workspace \u2014 run from `reasonix code`" };
3794
+ }
3795
+ const root = scope === "project" ? join6(this.projectRoot ?? "", ".reasonix", SKILLS_DIRNAME) : join6(this.homeDir, ".reasonix", SKILLS_DIRNAME);
3796
+ const flat = join6(root, `${name}.md`);
3797
+ const folder = join6(root, name, SKILL_FILE);
3798
+ if (existsSync6(folder)) {
3799
+ return { error: `skill "${name}" already exists at ${folder}` };
3800
+ }
3801
+ mkdirSync2(dirname3(flat), { recursive: true });
3802
+ try {
3803
+ writeFileSync2(flat, skillStubBody(name), { encoding: "utf8", flag: "wx" });
3804
+ } catch (err) {
3805
+ if (err.code === "EEXIST") {
3806
+ return { error: `skill "${name}" already exists at ${flat}` };
3807
+ }
3808
+ throw err;
3809
+ }
3810
+ return { path: flat };
3811
+ }
3773
3812
  /** Resolve one skill by name. Returns `null` if not found or malformed. */
3774
3813
  read(name) {
3775
3814
  if (!isValidSkillName(name)) return null;
@@ -3829,6 +3868,22 @@ var SkillStore = class {
3829
3868
  function parseRunAs(raw) {
3830
3869
  return raw?.trim() === "subagent" ? "subagent" : "inline";
3831
3870
  }
3871
+ function skillStubBody(name) {
3872
+ return `---
3873
+ name: ${name}
3874
+ description: One-liner \u2014 what does this skill do?
3875
+ ---
3876
+
3877
+ # ${name}
3878
+
3879
+ Replace this body with the playbook the model should follow when this skill is invoked.
3880
+
3881
+ Tips:
3882
+ - Reference tools by name (run_command, edit_file, search_content, ...)
3883
+ - Add \`runAs: subagent\` to frontmatter to spawn an isolated subagent loop
3884
+ - Add \`allowed-tools: read_file, search_content\` to scope a subagent's tools
3885
+ `;
3886
+ }
3832
3887
  function skillIndexLine(s) {
3833
3888
  const safeDesc = s.description.replace(/\n/g, " ").trim();
3834
3889
  const tag = s.runAs === "subagent" ? " [\u{1F9EC} subagent]" : "";
@@ -4071,7 +4126,7 @@ function scopeDir(opts) {
4071
4126
  return join7(opts.homeDir, USER_MEMORY_DIR, projectHash(opts.projectRoot));
4072
4127
  }
4073
4128
  function ensureDir(p) {
4074
- if (!existsSync7(p)) mkdirSync2(p, { recursive: true });
4129
+ if (!existsSync7(p)) mkdirSync3(p, { recursive: true });
4075
4130
  }
4076
4131
  function parseFrontmatter2(raw) {
4077
4132
  const lines = raw.split(/\r?\n/);
@@ -4216,7 +4271,7 @@ var MemoryStore = class {
4216
4271
  const file = join7(dir, `${name}.md`);
4217
4272
  const content = `${formatFrontmatter(entry)}${body}
4218
4273
  `;
4219
- writeFileSync2(file, content, "utf8");
4274
+ writeFileSync3(file, content, "utf8");
4220
4275
  this.regenerateIndex(input.scope);
4221
4276
  return file;
4222
4277
  }
@@ -4257,7 +4312,7 @@ var MemoryStore = class {
4257
4312
  lines.push(`- [${name}](${name}.md) \u2014 (malformed, check frontmatter)`);
4258
4313
  }
4259
4314
  }
4260
- writeFileSync2(indexPath, `${lines.join("\n")}
4315
+ writeFileSync3(indexPath, `${lines.join("\n")}
4261
4316
  `, "utf8");
4262
4317
  }
4263
4318
  };
@@ -4693,6 +4748,7 @@ function registerFilesystemTools(registry, opts) {
4693
4748
  - range: "A-B" \u2192 inclusive line range A..B, 1-indexed (e.g. "120-180" around an edit site)
4694
4749
  When none of these is given AND the file is longer than ${DEFAULT_AUTO_PREVIEW_LINES} lines, the tool auto-returns a head+tail preview with an "N lines omitted" marker rather than dumping everything. If you need the middle, re-call with a range. Prefer search_content to locate a symbol first, then read_file with a range around the hit \u2014 one scoped read beats three full-file reads.`,
4695
4750
  readOnly: true,
4751
+ stormExempt: true,
4696
4752
  parameters: {
4697
4753
  type: "object",
4698
4754
  properties: {
@@ -4773,6 +4829,7 @@ ${slice.join("\n")}`;
4773
4829
  parallelSafe: true,
4774
4830
  description: "List entries in a directory under the sandbox root. Returns one line per entry, marking directories with a trailing slash. Not recursive \u2014 use directory_tree for that.",
4775
4831
  readOnly: true,
4832
+ stormExempt: true,
4776
4833
  parameters: {
4777
4834
  type: "object",
4778
4835
  properties: {
@@ -5874,9 +5931,9 @@ function forkRegistryWithAllowList(parent, allow, alsoExclude) {
5874
5931
  import * as pathMod7 from "path";
5875
5932
 
5876
5933
  // src/config.ts
5877
- import { chmodSync as chmodSync2, mkdirSync as mkdirSync3, readFileSync as readFileSync9, writeFileSync as writeFileSync3 } from "fs";
5934
+ import { chmodSync as chmodSync2, mkdirSync as mkdirSync4, readFileSync as readFileSync9, writeFileSync as writeFileSync4 } from "fs";
5878
5935
  import { homedir as homedir5 } from "os";
5879
- import { dirname as dirname4, join as join10 } from "path";
5936
+ import { dirname as dirname5, join as join10 } from "path";
5880
5937
  function defaultConfigPath() {
5881
5938
  return join10(homedir5(), ".reasonix", "config.json");
5882
5939
  }
@@ -5890,8 +5947,8 @@ function readConfig(path2 = defaultConfigPath()) {
5890
5947
  return {};
5891
5948
  }
5892
5949
  function writeConfig(cfg, path2 = defaultConfigPath()) {
5893
- mkdirSync3(dirname4(path2), { recursive: true });
5894
- writeFileSync3(path2, JSON.stringify(cfg, null, 2), "utf8");
5950
+ mkdirSync4(dirname5(path2), { recursive: true });
5951
+ writeFileSync4(path2, JSON.stringify(cfg, null, 2), "utf8");
5895
5952
  try {
5896
5953
  chmodSync2(path2, 384);
5897
5954
  } catch {
@@ -6033,7 +6090,8 @@ var JobRegistry = class {
6033
6090
  },
6034
6091
  closedPromise: Promise.resolve(),
6035
6092
  signalClosed: () => {
6036
- }
6093
+ },
6094
+ outputWaiters: /* @__PURE__ */ new Set()
6037
6095
  };
6038
6096
  this.jobs.set(id2, job2);
6039
6097
  return {
@@ -6069,7 +6127,8 @@ var JobRegistry = class {
6069
6127
  readyPromise,
6070
6128
  signalReady: readyResolve,
6071
6129
  closedPromise,
6072
- signalClosed: closedResolve
6130
+ signalClosed: closedResolve,
6131
+ outputWaiters: /* @__PURE__ */ new Set()
6073
6132
  };
6074
6133
  this.jobs.set(id, job);
6075
6134
  let readyMatched = false;
@@ -6096,6 +6155,11 @@ ${job.output.slice(start)}`;
6096
6155
  }
6097
6156
  }
6098
6157
  }
6158
+ if (job.outputWaiters.size > 0) {
6159
+ const waiters = [...job.outputWaiters];
6160
+ job.outputWaiters.clear();
6161
+ for (const wake of waiters) wake();
6162
+ }
6099
6163
  };
6100
6164
  child.stdout?.on("data", onData);
6101
6165
  child.stderr?.on("data", onData);
@@ -6157,6 +6221,39 @@ ${job.output.slice(start)}`;
6157
6221
  spawnError: job.spawnError
6158
6222
  };
6159
6223
  }
6224
+ async waitForJob(id, opts = {}) {
6225
+ const job = this.jobs.get(id);
6226
+ if (!job) return null;
6227
+ if (!job.running) {
6228
+ return {
6229
+ exited: true,
6230
+ exitCode: job.exitCode,
6231
+ latestOutput: job.output
6232
+ };
6233
+ }
6234
+ const timeoutMs = Math.max(0, Math.min(3e4, opts.timeoutMs ?? 5e3));
6235
+ const startOutput = job.output;
6236
+ let wakeOutput = null;
6237
+ const outputPromise = new Promise((resolve10) => {
6238
+ wakeOutput = resolve10;
6239
+ job.outputWaiters.add(resolve10);
6240
+ });
6241
+ let timer = null;
6242
+ await Promise.race([
6243
+ job.closedPromise,
6244
+ outputPromise,
6245
+ new Promise((resolve10) => {
6246
+ timer = setTimeout(resolve10, timeoutMs);
6247
+ })
6248
+ ]);
6249
+ if (timer) clearTimeout(timer);
6250
+ if (wakeOutput) job.outputWaiters.delete(wakeOutput);
6251
+ return {
6252
+ exited: !job.running,
6253
+ exitCode: job.exitCode,
6254
+ latestOutput: latestOutputSince(startOutput, job.output)
6255
+ };
6256
+ }
6160
6257
  /** SIGTERM, wait graceMs, then SIGKILL. Idempotent on already-exited jobs. */
6161
6258
  async stop(id, opts = {}) {
6162
6259
  const job = this.jobs.get(id);
@@ -6236,6 +6333,11 @@ function snapshot(job) {
6236
6333
  spawnError: job.spawnError
6237
6334
  };
6238
6335
  }
6336
+ function latestOutputSince(before, after) {
6337
+ if (!before) return after;
6338
+ if (after.startsWith(before)) return after.slice(before.length);
6339
+ return after;
6340
+ }
6239
6341
 
6240
6342
  // src/tools/shell/exec.ts
6241
6343
  import { spawn as spawn4, spawnSync } from "child_process";
@@ -7215,6 +7317,7 @@ function registerShellTools(registry, opts) {
7215
7317
  description: "Read the latest output of a background job started with `run_background`. By default returns the tail of the buffer (last 80 lines). Pass `since` (the `byteLength` from a previous call) to stream only new content incrementally. Tells you whether the job is still running, so you can stop polling when it's done.",
7216
7318
  readOnly: true,
7217
7319
  parallelSafe: true,
7320
+ stormExempt: true,
7218
7321
  parameters: {
7219
7322
  type: "object",
7220
7323
  properties: {
@@ -7239,6 +7342,32 @@ function registerShellTools(registry, opts) {
7239
7342
  return formatJobRead(args.jobId, out);
7240
7343
  }
7241
7344
  });
7345
+ registry.register({
7346
+ name: "wait_for_job",
7347
+ description: "Block until a background job exits or produces new output, bounded by `timeoutMs`. Use this instead of polling `job_output` with identical args when you're intentionally waiting for state to change. Returns JSON with `exited`, `exitCode`, and `latestOutput`.",
7348
+ readOnly: true,
7349
+ parameters: {
7350
+ type: "object",
7351
+ properties: {
7352
+ jobId: { type: "integer", description: "Job id returned by run_background." },
7353
+ timeoutMs: {
7354
+ type: "integer",
7355
+ description: "Max time to block before returning if nothing changes. Clamped to 0..30000. Default 5000."
7356
+ }
7357
+ },
7358
+ required: ["jobId"]
7359
+ },
7360
+ fn: async (args) => {
7361
+ const out = await jobs.waitForJob(args.jobId, { timeoutMs: args.timeoutMs });
7362
+ if (!out) return `job ${args.jobId}: not found (use list_jobs)`;
7363
+ return {
7364
+ jobId: args.jobId,
7365
+ exited: out.exited,
7366
+ exitCode: out.exitCode,
7367
+ latestOutput: out.latestOutput
7368
+ };
7369
+ }
7370
+ });
7242
7371
  registry.register({
7243
7372
  name: "stop_job",
7244
7373
  description: "Stop a background job started with `run_background`. SIGTERM first; SIGKILL after a short grace period if it doesn't exit cleanly. Returns the final output + exit code. Safe to call on an already-exited job.",
@@ -7260,6 +7389,7 @@ function registerShellTools(registry, opts) {
7260
7389
  description: "List every background job started this session \u2014 running and exited \u2014 with id, command, pid, status. Use when you've lost track of which job_id corresponds to which process, or to see what's still alive.",
7261
7390
  readOnly: true,
7262
7391
  parallelSafe: true,
7392
+ stormExempt: true,
7263
7393
  parameters: { type: "object", properties: {} },
7264
7394
  fn: async () => {
7265
7395
  const all = jobs.list();
@@ -8192,16 +8322,16 @@ function truncate(s, n) {
8192
8322
  }
8193
8323
 
8194
8324
  // src/version.ts
8195
- import { existsSync as existsSync9, mkdirSync as mkdirSync4, readFileSync as readFileSync12, writeFileSync as writeFileSync4 } from "fs";
8325
+ import { existsSync as existsSync9, mkdirSync as mkdirSync5, readFileSync as readFileSync12, writeFileSync as writeFileSync5 } from "fs";
8196
8326
  import { homedir as homedir6 } from "os";
8197
- import { dirname as dirname5, join as join11 } from "path";
8327
+ import { dirname as dirname6, join as join11 } from "path";
8198
8328
  import { fileURLToPath as fileURLToPath2 } from "url";
8199
8329
  var REGISTRY_URL = "https://registry.npmjs.org/reasonix/latest";
8200
8330
  var LATEST_CACHE_TTL_MS = 24 * 60 * 60 * 1e3;
8201
8331
  var LATEST_FETCH_TIMEOUT_MS = 2e3;
8202
8332
  function readPackageVersion() {
8203
8333
  try {
8204
- let dir = dirname5(fileURLToPath2(import.meta.url));
8334
+ let dir = dirname6(fileURLToPath2(import.meta.url));
8205
8335
  for (let i = 0; i < 6; i++) {
8206
8336
  const p = join11(dir, "package.json");
8207
8337
  if (existsSync9(p)) {
@@ -8210,7 +8340,7 @@ function readPackageVersion() {
8210
8340
  return pkg.version;
8211
8341
  }
8212
8342
  }
8213
- const parent = dirname5(dir);
8343
+ const parent = dirname6(dir);
8214
8344
  if (parent === dir) break;
8215
8345
  dir = parent;
8216
8346
  }
@@ -8236,8 +8366,8 @@ function readCache(homeDirOverride) {
8236
8366
  function writeCache(entry, homeDirOverride) {
8237
8367
  try {
8238
8368
  const p = cachePath(homeDirOverride);
8239
- mkdirSync4(dirname5(p), { recursive: true });
8240
- writeFileSync4(p, JSON.stringify(entry), "utf8");
8369
+ mkdirSync5(dirname6(p), { recursive: true });
8370
+ writeFileSync5(p, JSON.stringify(entry), "utf8");
8241
8371
  } catch {
8242
8372
  }
8243
8373
  }
@@ -9041,15 +9171,15 @@ import {
9041
9171
  existsSync as existsSync10,
9042
9172
  fstatSync,
9043
9173
  ftruncateSync,
9044
- mkdirSync as mkdirSync5,
9174
+ mkdirSync as mkdirSync6,
9045
9175
  openSync as openSync2,
9046
9176
  readFileSync as readFileSync13,
9047
9177
  readSync,
9048
9178
  unlinkSync as unlinkSync3,
9049
- writeFileSync as writeFileSync5,
9179
+ writeFileSync as writeFileSync6,
9050
9180
  writeSync
9051
9181
  } from "fs";
9052
- import { dirname as dirname6, resolve as resolve9 } from "path";
9182
+ import { dirname as dirname7, resolve as resolve9 } from "path";
9053
9183
  var BLOCK_RE = /^(\S[^\n]*)\n<{7} SEARCH\n([\s\S]*?)\n?={7}\n([\s\S]*?)\n?>{7} REPLACE/gm;
9054
9184
  function parseEditBlocks(text) {
9055
9185
  const out = [];
@@ -9079,7 +9209,7 @@ function applyEditBlock(block, rootDir) {
9079
9209
  const searchEmpty = block.search.length === 0;
9080
9210
  if (searchEmpty) {
9081
9211
  try {
9082
- mkdirSync5(dirname6(absTarget), { recursive: true });
9212
+ mkdirSync6(dirname7(absTarget), { recursive: true });
9083
9213
  const fd = openSync2(absTarget, "wx");
9084
9214
  try {
9085
9215
  writeSync(fd, block.replace);
@@ -9194,7 +9324,7 @@ function restoreSnapshots(snapshots, rootDir) {
9194
9324
  message: "removed (the edit had created it)"
9195
9325
  };
9196
9326
  }
9197
- writeFileSync5(abs, snap.prevContent, "utf8");
9327
+ writeFileSync6(abs, snap.prevContent, "utf8");
9198
9328
  return {
9199
9329
  path: snap.path,
9200
9330
  status: "applied",
@@ -9370,6 +9500,7 @@ You have TWO tools for running shell commands, and picking the right one is non-
9370
9500
 
9371
9501
  After \`run_background\`, tools available to you:
9372
9502
  - \`job_output(jobId, tailLines?)\` \u2014 read recent logs to verify startup / debug errors.
9503
+ - \`wait_for_job(jobId, timeoutMs?)\` \u2014 block until the job exits or emits new output. Prefer this over repeating identical \`job_output\` calls while you're intentionally waiting.
9373
9504
  - \`list_jobs\` \u2014 see every job this session (running + exited).
9374
9505
  - \`stop_job(jobId)\` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.
9375
9506
 
@@ -9457,17 +9588,17 @@ import {
9457
9588
  closeSync as closeSync3,
9458
9589
  existsSync as existsSync12,
9459
9590
  fstatSync as fstatSync2,
9460
- mkdirSync as mkdirSync6,
9591
+ mkdirSync as mkdirSync7,
9461
9592
  openSync as openSync3,
9462
9593
  readFileSync as readFileSync15,
9463
9594
  readSync as readSync2,
9464
9595
  renameSync as renameSync2,
9465
9596
  statSync as statSync5,
9466
9597
  unlinkSync as unlinkSync4,
9467
- writeFileSync as writeFileSync6
9598
+ writeFileSync as writeFileSync7
9468
9599
  } from "fs";
9469
9600
  import { homedir as homedir7 } from "os";
9470
- import { dirname as dirname7, join as join13 } from "path";
9601
+ import { dirname as dirname8, join as join13 } from "path";
9471
9602
  function defaultUsageLogPath(homeDirOverride) {
9472
9603
  return join13(homeDirOverride ?? homedir7(), ".reasonix", "usage.jsonl");
9473
9604
  }
@@ -9508,7 +9639,7 @@ function compactUsageLogIfLarge(path2, now) {
9508
9639
  if (kept.length === lines.filter((l) => l.trim()).length) return;
9509
9640
  const tmp = `${path2}.compacting`;
9510
9641
  try {
9511
- writeFileSync6(tmp, kept.length > 0 ? `${kept.join("\n")}
9642
+ writeFileSync7(tmp, kept.length > 0 ? `${kept.join("\n")}
9512
9643
  ` : "", "utf8");
9513
9644
  renameSync2(tmp, path2);
9514
9645
  } catch {
@@ -9534,7 +9665,7 @@ function appendUsage(input) {
9534
9665
  if (input.subagent) record.subagent = input.subagent;
9535
9666
  const path2 = input.path ?? defaultUsageLogPath();
9536
9667
  try {
9537
- mkdirSync6(dirname7(path2), { recursive: true });
9668
+ mkdirSync7(dirname8(path2), { recursive: true });
9538
9669
  appendFileSync2(path2, `${JSON.stringify(record)}
9539
9670
  `, "utf8");
9540
9671
  compactUsageLogIfLarge(path2, record.ts);