reasonix 0.17.1 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chunk-3ALFOYE6.js → chunk-RTVI2CLX.js} +3 -7
- package/dist/cli/chunk-RTVI2CLX.js.map +1 -0
- package/dist/cli/index.js +7331 -8237
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{prompt-MAHJTS7Q.js → prompt-P54FIQAH.js} +2 -2
- package/dist/index.d.ts +17 -4
- package/dist/index.js +97 -28
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
- package/dist/cli/chunk-3ALFOYE6.js.map +0 -1
- /package/dist/cli/{prompt-MAHJTS7Q.js.map → prompt-P54FIQAH.js.map} +0 -0
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
import {
|
|
3
3
|
CODE_SYSTEM_PROMPT,
|
|
4
4
|
codeSystemPrompt
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-RTVI2CLX.js";
|
|
6
6
|
export {
|
|
7
7
|
CODE_SYSTEM_PROMPT,
|
|
8
8
|
codeSystemPrompt
|
|
9
9
|
};
|
|
10
|
-
//# sourceMappingURL=prompt-
|
|
10
|
+
//# sourceMappingURL=prompt-P54FIQAH.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -964,11 +964,9 @@ interface PlanToolOptions {
|
|
|
964
964
|
}
|
|
965
965
|
declare function registerPlanTool(registry: ToolRegistry, opts?: PlanToolOptions): ToolRegistry;
|
|
966
966
|
|
|
967
|
-
/** Isolated child loop. Inherits parent registry minus spawn_subagent + submit_plan; no hooks; non-streaming. */
|
|
968
|
-
|
|
969
967
|
/** Side-channel — subagents run inside a tool-dispatch frame, can't go through parent's `LoopEvent` stream. */
|
|
970
968
|
interface SubagentEvent {
|
|
971
|
-
kind: "start" | "progress" | "end";
|
|
969
|
+
kind: "start" | "progress" | "end" | "inner" | "phase";
|
|
972
970
|
task: string;
|
|
973
971
|
skillName?: string;
|
|
974
972
|
model?: string;
|
|
@@ -979,6 +977,10 @@ interface SubagentEvent {
|
|
|
979
977
|
turns?: number;
|
|
980
978
|
costUsd?: number;
|
|
981
979
|
usage?: Usage;
|
|
980
|
+
/** When kind === "inner": the raw child loop event. Parent UI translates to a child summary. */
|
|
981
|
+
inner?: LoopEvent;
|
|
982
|
+
/** When kind === "phase": coarse status verb for the activity row. */
|
|
983
|
+
phase?: "exploring" | "summarising";
|
|
982
984
|
}
|
|
983
985
|
interface SubagentSink {
|
|
984
986
|
current: ((ev: SubagentEvent) => void) | null;
|
|
@@ -1177,6 +1179,15 @@ interface SessionInfo {
|
|
|
1177
1179
|
size: number;
|
|
1178
1180
|
messageCount: number;
|
|
1179
1181
|
mtime: Date;
|
|
1182
|
+
meta: SessionMeta;
|
|
1183
|
+
}
|
|
1184
|
+
interface SessionMeta {
|
|
1185
|
+
branch?: string;
|
|
1186
|
+
summary?: string;
|
|
1187
|
+
totalCostUsd?: number;
|
|
1188
|
+
turnCount?: number;
|
|
1189
|
+
/** Absolute path of the workspace root the session was created/used in. */
|
|
1190
|
+
workspace?: string;
|
|
1180
1191
|
}
|
|
1181
1192
|
declare function sessionsDir(): string;
|
|
1182
1193
|
declare function sessionPath(name: string): string;
|
|
@@ -1699,6 +1710,8 @@ interface InspectionReport {
|
|
|
1699
1710
|
tools: SectionResult<McpTool>;
|
|
1700
1711
|
resources: SectionResult<McpResource>;
|
|
1701
1712
|
prompts: SectionResult<McpPrompt>;
|
|
1713
|
+
/** Wall-clock for the three list calls combined; surfaced as the server's "p95-ish" latency in the browser. */
|
|
1714
|
+
elapsedMs: number;
|
|
1702
1715
|
}
|
|
1703
1716
|
type SectionResult<T> = {
|
|
1704
1717
|
supported: true;
|
|
@@ -1753,7 +1766,7 @@ interface EditSnapshot {
|
|
|
1753
1766
|
declare function snapshotBeforeEdits(blocks: EditBlock[], rootDir: string): EditSnapshot[];
|
|
1754
1767
|
declare function restoreSnapshots(snapshots: EditSnapshot[], rootDir: string): ApplyResult[];
|
|
1755
1768
|
|
|
1756
|
-
declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nIf the user asks to switch / change / open a different directory or project (\"\u5207\u6362\u5230...\", \"switch to ...\", \"let's work in ...\", \"open the X project\"), call **`change_workspace`** with the absolute target path. The tool always requires the user's explicit approval via a TUI modal \u2014 your call surfaces a \"switch / deny\" prompt, and STOPS your turn until they pick. After approval the filesystem / shell / memory tools re-register against the new root and your subsequent calls land there.\n\nHard rules:\n- Do NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n- Do NOT chain other tool calls in the same turn as `change_workspace` \u2014 wait for the user's confirmation. Their next message will tell you whether the switch happened.\n- Do NOT call `change_workspace` to \"preview\" a sibling directory; only when the user explicitly asked to change projects.\n- The user can also type `/cwd <path>` themselves \u2014 fine, you'll see the new root take effect on the next turn either way.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
|
|
1769
|
+
declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nYou can't. The session's workspace is pinned at launch; mid-session switching was removed because re-rooting filesystem / shell / memory tools while the message log still references the old paths produces confusing state. Tell the user to quit and relaunch with the new directory (e.g. `cd ../other-project && reasonix code`).\n\nDo NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
|
|
1757
1770
|
interface CodeSystemPromptOptions {
|
|
1758
1771
|
/** True when semantic_search is registered for this run. Adds an
|
|
1759
1772
|
* explicit routing fragment so the model picks it for intent-style
|
package/dist/index.js
CHANGED
|
@@ -1242,6 +1242,7 @@ var VolatileScratch = class {
|
|
|
1242
1242
|
};
|
|
1243
1243
|
|
|
1244
1244
|
// src/memory/session.ts
|
|
1245
|
+
import { execFileSync } from "child_process";
|
|
1245
1246
|
import {
|
|
1246
1247
|
appendFileSync,
|
|
1247
1248
|
chmodSync,
|
|
@@ -1249,6 +1250,7 @@ import {
|
|
|
1249
1250
|
mkdirSync,
|
|
1250
1251
|
readFileSync as readFileSync3,
|
|
1251
1252
|
readdirSync,
|
|
1253
|
+
renameSync,
|
|
1252
1254
|
statSync,
|
|
1253
1255
|
unlinkSync,
|
|
1254
1256
|
writeFileSync
|
|
@@ -1299,26 +1301,50 @@ function listSessions() {
|
|
|
1299
1301
|
const dir = sessionsDir();
|
|
1300
1302
|
if (!existsSync3(dir)) return [];
|
|
1301
1303
|
try {
|
|
1302
|
-
const files = readdirSync(dir).filter(
|
|
1304
|
+
const files = readdirSync(dir).filter(
|
|
1305
|
+
(f) => f.endsWith(".jsonl") && !f.endsWith(".events.jsonl")
|
|
1306
|
+
);
|
|
1303
1307
|
return files.map((file) => {
|
|
1304
1308
|
const path = join3(dir, file);
|
|
1305
1309
|
const stat2 = statSync(path);
|
|
1306
1310
|
const name = file.replace(/\.jsonl$/, "");
|
|
1307
1311
|
const messageCount = countLines(path);
|
|
1308
|
-
return {
|
|
1312
|
+
return {
|
|
1313
|
+
name,
|
|
1314
|
+
path,
|
|
1315
|
+
size: stat2.size,
|
|
1316
|
+
messageCount,
|
|
1317
|
+
mtime: stat2.mtime,
|
|
1318
|
+
meta: loadSessionMeta(name)
|
|
1319
|
+
};
|
|
1309
1320
|
}).sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
|
|
1310
1321
|
} catch {
|
|
1311
1322
|
return [];
|
|
1312
1323
|
}
|
|
1313
1324
|
}
|
|
1325
|
+
function metaPath(name) {
|
|
1326
|
+
return join3(sessionsDir(), `${sanitizeName(name)}.meta.json`);
|
|
1327
|
+
}
|
|
1328
|
+
function loadSessionMeta(name) {
|
|
1329
|
+
const p = metaPath(name);
|
|
1330
|
+
if (!existsSync3(p)) return {};
|
|
1331
|
+
try {
|
|
1332
|
+
const raw = JSON.parse(readFileSync3(p, "utf8"));
|
|
1333
|
+
return raw && typeof raw === "object" ? raw : {};
|
|
1334
|
+
} catch {
|
|
1335
|
+
return {};
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1314
1338
|
function deleteSession(name) {
|
|
1315
1339
|
const path = sessionPath(name);
|
|
1316
1340
|
try {
|
|
1317
1341
|
unlinkSync(path);
|
|
1318
|
-
const
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1342
|
+
for (const ext of [".events.jsonl", ".pending.json", ".meta.json", ".plan.json"]) {
|
|
1343
|
+
const sidecar = path.replace(/\.jsonl$/, ext);
|
|
1344
|
+
try {
|
|
1345
|
+
unlinkSync(sidecar);
|
|
1346
|
+
} catch {
|
|
1347
|
+
}
|
|
1322
1348
|
}
|
|
1323
1349
|
return true;
|
|
1324
1350
|
} catch {
|
|
@@ -2507,7 +2533,6 @@ var CacheFirstLoop = class {
|
|
|
2507
2533
|
return;
|
|
2508
2534
|
}
|
|
2509
2535
|
}
|
|
2510
|
-
let workspaceSwitchPending = false;
|
|
2511
2536
|
for (const call of repairedCalls) {
|
|
2512
2537
|
const name = call.function?.name ?? "";
|
|
2513
2538
|
const args = call.function?.arguments ?? "{}";
|
|
@@ -2530,11 +2555,7 @@ var CacheFirstLoop = class {
|
|
|
2530
2555
|
});
|
|
2531
2556
|
for (const w of hookWarnings(preReport.outcomes, this._turn)) yield w;
|
|
2532
2557
|
let result;
|
|
2533
|
-
if (
|
|
2534
|
-
result = JSON.stringify({
|
|
2535
|
-
error: `${name}: deferred because change_workspace in the same batch is awaiting the user's approval. Re-issue this call on your next turn \u2014 the sandbox root may have changed.`
|
|
2536
|
-
});
|
|
2537
|
-
} else if (preReport.blocked) {
|
|
2558
|
+
if (preReport.blocked) {
|
|
2538
2559
|
const blocking = preReport.outcomes[preReport.outcomes.length - 1];
|
|
2539
2560
|
const reason = (blocking?.stderr || blocking?.stdout || "blocked by PreToolUse hook").trim();
|
|
2540
2561
|
result = `[hook block] ${blocking?.hook.command ?? "<unknown>"}
|
|
@@ -2544,9 +2565,6 @@ ${reason}`;
|
|
|
2544
2565
|
signal,
|
|
2545
2566
|
maxResultTokens: DEFAULT_MAX_RESULT_TOKENS
|
|
2546
2567
|
});
|
|
2547
|
-
if (name === "change_workspace" && result.includes('"WorkspaceConfirmationError:')) {
|
|
2548
|
-
workspaceSwitchPending = true;
|
|
2549
|
-
}
|
|
2550
2568
|
const postReport = await runHooks({
|
|
2551
2569
|
hooks: this.hooks,
|
|
2552
2570
|
payload: {
|
|
@@ -5034,7 +5052,10 @@ async function spawnSubagent(opts) {
|
|
|
5034
5052
|
reasoningEffort: DEFAULT_SUBAGENT_EFFORT,
|
|
5035
5053
|
maxToolIters,
|
|
5036
5054
|
hooks: [],
|
|
5037
|
-
|
|
5055
|
+
// Streaming on so the parent UI can flip the "summarising" phase the
|
|
5056
|
+
// moment the model starts emitting the final answer (first assistant_delta
|
|
5057
|
+
// after the last tool result, before assistant_final lands).
|
|
5058
|
+
stream: true
|
|
5038
5059
|
});
|
|
5039
5060
|
const onParentAbort = () => childLoop.abort();
|
|
5040
5061
|
if (opts.parentSignal?.aborted) {
|
|
@@ -5045,10 +5066,13 @@ async function spawnSubagent(opts) {
|
|
|
5045
5066
|
let final = "";
|
|
5046
5067
|
let errorMessage;
|
|
5047
5068
|
let toolIter = 0;
|
|
5069
|
+
let summarisingEmitted = false;
|
|
5048
5070
|
try {
|
|
5049
5071
|
for await (const ev of childLoop.step(opts.task)) {
|
|
5072
|
+
sink?.current?.({ kind: "inner", task: taskPreview, skillName, model, inner: ev });
|
|
5050
5073
|
if (ev.role === "tool") {
|
|
5051
5074
|
toolIter++;
|
|
5075
|
+
summarisingEmitted = false;
|
|
5052
5076
|
sink?.current?.({
|
|
5053
5077
|
kind: "progress",
|
|
5054
5078
|
task: taskPreview,
|
|
@@ -5058,6 +5082,18 @@ async function spawnSubagent(opts) {
|
|
|
5058
5082
|
elapsedMs: Date.now() - startedAt
|
|
5059
5083
|
});
|
|
5060
5084
|
}
|
|
5085
|
+
if (ev.role === "assistant_delta" && !summarisingEmitted && (ev.content ?? "").length > 0) {
|
|
5086
|
+
summarisingEmitted = true;
|
|
5087
|
+
sink?.current?.({
|
|
5088
|
+
kind: "phase",
|
|
5089
|
+
task: taskPreview,
|
|
5090
|
+
skillName,
|
|
5091
|
+
model,
|
|
5092
|
+
phase: "summarising",
|
|
5093
|
+
iter: toolIter,
|
|
5094
|
+
elapsedMs: Date.now() - startedAt
|
|
5095
|
+
});
|
|
5096
|
+
}
|
|
5061
5097
|
if (ev.role === "assistant_final") {
|
|
5062
5098
|
if (ev.forcedSummary) {
|
|
5063
5099
|
errorMessage = ev.content?.trim() || "subagent ended without producing an answer";
|
|
@@ -5208,7 +5244,7 @@ function forkRegistryExcluding(parent, exclude) {
|
|
|
5208
5244
|
}
|
|
5209
5245
|
|
|
5210
5246
|
// src/tools/shell.ts
|
|
5211
|
-
import { spawn as spawn3 } from "child_process";
|
|
5247
|
+
import { spawn as spawn3, spawnSync } from "child_process";
|
|
5212
5248
|
import { existsSync as existsSync8, statSync as statSync4 } from "fs";
|
|
5213
5249
|
import * as pathMod3 from "path";
|
|
5214
5250
|
|
|
@@ -5375,7 +5411,11 @@ ${job.output.slice(start)}`;
|
|
|
5375
5411
|
job.signalReady();
|
|
5376
5412
|
});
|
|
5377
5413
|
const onAbort = () => this.stop(id, { graceMs: 100 });
|
|
5378
|
-
opts.signal?.
|
|
5414
|
+
if (opts.signal?.aborted) {
|
|
5415
|
+
onAbort();
|
|
5416
|
+
} else {
|
|
5417
|
+
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
|
5418
|
+
}
|
|
5379
5419
|
let timer = null;
|
|
5380
5420
|
await Promise.race([
|
|
5381
5421
|
readyPromise,
|
|
@@ -5497,6 +5537,28 @@ function snapshot(job) {
|
|
|
5497
5537
|
}
|
|
5498
5538
|
|
|
5499
5539
|
// src/tools/shell.ts
|
|
5540
|
+
function killProcessTree2(child) {
|
|
5541
|
+
if (!child.pid || child.killed) return;
|
|
5542
|
+
if (process.platform === "win32") {
|
|
5543
|
+
try {
|
|
5544
|
+
spawnSync("taskkill", ["/pid", String(child.pid), "/T", "/F"], {
|
|
5545
|
+
stdio: "ignore",
|
|
5546
|
+
windowsHide: true
|
|
5547
|
+
});
|
|
5548
|
+
return;
|
|
5549
|
+
} catch {
|
|
5550
|
+
}
|
|
5551
|
+
}
|
|
5552
|
+
try {
|
|
5553
|
+
process.kill(-child.pid, "SIGKILL");
|
|
5554
|
+
return;
|
|
5555
|
+
} catch {
|
|
5556
|
+
}
|
|
5557
|
+
try {
|
|
5558
|
+
child.kill("SIGKILL");
|
|
5559
|
+
} catch {
|
|
5560
|
+
}
|
|
5561
|
+
}
|
|
5500
5562
|
var DEFAULT_TIMEOUT_SEC = 60;
|
|
5501
5563
|
var DEFAULT_MAX_OUTPUT_CHARS = 32e3;
|
|
5502
5564
|
var BUILTIN_ALLOWLIST = [
|
|
@@ -5685,12 +5747,21 @@ async function runCommand(cmd, opts) {
|
|
|
5685
5747
|
let totalBytes = 0;
|
|
5686
5748
|
const byteCap = maxChars * 2 * 4;
|
|
5687
5749
|
let timedOut = false;
|
|
5750
|
+
let aborted = false;
|
|
5751
|
+
const killChildTree = () => killProcessTree2(child);
|
|
5688
5752
|
const killTimer = setTimeout(() => {
|
|
5689
5753
|
timedOut = true;
|
|
5690
|
-
|
|
5754
|
+
killChildTree();
|
|
5691
5755
|
}, timeoutMs);
|
|
5692
|
-
const onAbort = () =>
|
|
5693
|
-
|
|
5756
|
+
const onAbort = () => {
|
|
5757
|
+
aborted = true;
|
|
5758
|
+
killChildTree();
|
|
5759
|
+
};
|
|
5760
|
+
if (opts.signal?.aborted) {
|
|
5761
|
+
onAbort();
|
|
5762
|
+
} else {
|
|
5763
|
+
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
|
5764
|
+
}
|
|
5694
5765
|
const onData = (chunk) => {
|
|
5695
5766
|
const b = typeof chunk === "string" ? Buffer.from(chunk) : chunk;
|
|
5696
5767
|
if (totalBytes >= byteCap) return;
|
|
@@ -7570,6 +7641,7 @@ function parseMcpSpec(input) {
|
|
|
7570
7641
|
|
|
7571
7642
|
// src/mcp/inspect.ts
|
|
7572
7643
|
async function inspectMcpServer(client) {
|
|
7644
|
+
const t0 = Date.now();
|
|
7573
7645
|
const tools = await trySection(() => client.listTools().then((r) => r.tools));
|
|
7574
7646
|
const resources = await trySection(
|
|
7575
7647
|
() => client.listResources().then((r) => r.resources)
|
|
@@ -7582,7 +7654,8 @@ async function inspectMcpServer(client) {
|
|
|
7582
7654
|
instructions: client.serverInstructions,
|
|
7583
7655
|
tools,
|
|
7584
7656
|
resources,
|
|
7585
|
-
prompts
|
|
7657
|
+
prompts,
|
|
7658
|
+
elapsedMs: Date.now() - t0
|
|
7586
7659
|
};
|
|
7587
7660
|
}
|
|
7588
7661
|
async function trySection(load) {
|
|
@@ -7866,13 +7939,9 @@ Two different rules depending on which tool:
|
|
|
7866
7939
|
|
|
7867
7940
|
# When the user wants to switch project / working directory
|
|
7868
7941
|
|
|
7869
|
-
|
|
7942
|
+
You can't. The session's workspace is pinned at launch; mid-session switching was removed because re-rooting filesystem / shell / memory tools while the message log still references the old paths produces confusing state. Tell the user to quit and relaunch with the new directory (e.g. \`cd ../other-project && reasonix code\`).
|
|
7870
7943
|
|
|
7871
|
-
|
|
7872
|
-
- Do NOT try to switch via \`run_command\` (\`cd\`, \`pushd\`, etc.) \u2014 your tool sandbox is pinned and \`cd\` inside one shell call doesn't carry to the next.
|
|
7873
|
-
- Do NOT chain other tool calls in the same turn as \`change_workspace\` \u2014 wait for the user's confirmation. Their next message will tell you whether the switch happened.
|
|
7874
|
-
- Do NOT call \`change_workspace\` to "preview" a sibling directory; only when the user explicitly asked to change projects.
|
|
7875
|
-
- The user can also type \`/cwd <path>\` themselves \u2014 fine, you'll see the new root take effect on the next turn either way.
|
|
7944
|
+
Do NOT try to switch via \`run_command\` (\`cd\`, \`pushd\`, etc.) \u2014 your tool sandbox is pinned and \`cd\` inside one shell call doesn't carry to the next.
|
|
7876
7945
|
|
|
7877
7946
|
# Foreground vs. background commands
|
|
7878
7947
|
|