npm - reasonix - Versions diffs - 0.5.23 → 0.6.0 - Mend

reasonix 0.5.23 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/cli/{chunk-ANMDY236.js → chunk-NXYPGKA3.js} +89 -23
package/dist/cli/chunk-NXYPGKA3.js.map +1 -0
package/dist/cli/index.js +3731 -1826
package/dist/cli/index.js.map +1 -1
package/dist/cli/{prompt-75XLIUTO.js → prompt-KX6A4DVX.js} +2 -2
package/dist/index.d.ts +304 -1
package/dist/index.js +1021 -110
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/cli/chunk-ANMDY236.js.map +0 -1
/package/dist/cli/{prompt-75XLIUTO.js.map → prompt-KX6A4DVX.js.map} +0 -0

package/dist/index.js CHANGED Viewed

@@ -47,8 +47,8 @@ function computeWait(attempt, initial, cap, retryAfter) {
 }
 function sleep(ms, signal) {
   if (ms <= 0) return Promise.resolve();
-  return new Promise((resolve8, reject) => {
-    const timer = setTimeout(resolve8, ms);
+  return new Promise((resolve9, reject) => {
+    const timer = setTimeout(resolve9, ms);
     if (signal) {
       const onAbort = () => {
         clearTimeout(timer);
@@ -533,7 +533,7 @@ function matchesTool(hook, toolName) {
   }
 }
 function defaultSpawner(input) {
-  return new Promise((resolve8) => {
+  return new Promise((resolve9) => {
     const child = spawn(input.command, {
       cwd: input.cwd,
       shell: true,
@@ -560,7 +560,7 @@ function defaultSpawner(input) {
     });
     child.once("error", (err) => {
       clearTimeout(timer);
-      resolve8({
+      resolve9({
         exitCode: null,
         stdout,
         stderr,
@@ -570,7 +570,7 @@ function defaultSpawner(input) {
     });
     child.once("close", (code) => {
       clearTimeout(timer);
-      resolve8({
+      resolve9({
         exitCode: code,
         stdout: stdout.trim(),
         stderr: stderr.trim(),
@@ -900,6 +900,12 @@ var ToolRegistry = class {
    * bounced until the user approves a submitted plan.
    */
   _planMode = false;
+  /**
+   * Optional hook run after arg parsing but before tool.fn. Lets the TUI
+   * reroute specific tool calls (e.g. edit_file in review mode) without
+   * modifying the tool definitions themselves.
+   */
+  _interceptor = null;
   constructor(opts = {}) {
     this._autoFlatten = opts.autoFlatten !== false;
   }
@@ -911,6 +917,14 @@ var ToolRegistry = class {
   get planMode() {
     return this._planMode;
   }
+  /**
+   * Install or clear the dispatch interceptor. At most one interceptor
+   * is active at a time — calling twice replaces the previous. Pass
+   * `null` to remove.
+   */
+  setToolInterceptor(fn) {
+    this._interceptor = fn;
+  }
   register(def) {
     if (!def.name) throw new Error("tool requires a name");
     const internal = { ...def };
@@ -967,6 +981,16 @@ var ToolRegistry = class {
         error: `${name}: unavailable in plan mode \u2014 this is a read-only exploration phase. Use read_file / list_directory / search_files / directory_tree / web_search / allowlisted shell commands to investigate. Call submit_plan with your proposed plan when you're ready for the user's review.`
       });
     }
+    if (this._interceptor) {
+      try {
+        const short = await this._interceptor(name, args);
+        if (typeof short === "string") return short;
+      } catch (err) {
+        return JSON.stringify({
+          error: `${name}: interceptor failed \u2014 ${err.message}`
+        });
+      }
+    }
     try {
       const result = await tool.fn(args, { signal: opts.signal });
       const str = typeof result === "string" ? result : JSON.stringify(result);
@@ -1690,7 +1714,8 @@ var SessionStats = class {
       claudeEquivalentUsd: round(this.totalClaudeEquivalent, 6),
       savingsVsClaudePct: round(this.savingsVsClaude * 100, 2),
       cacheHitRatio: round(this.aggregateCacheHitRatio, 4),
-      lastPromptTokens: last?.usage.promptTokens ?? 0
+      lastPromptTokens: last?.usage.promptTokens ?? 0,
+      lastTurnCostUsd: round(last?.cost ?? 0, 6)
     };
   }
 };
@@ -1700,6 +1725,12 @@ function round(n, digits) {
 }
 // src/loop.ts
+var ARGS_COMPACT_THRESHOLD_TOKENS = 800;
+var TURN_END_RESULT_CAP_TOKENS = 3e3;
+var FAILURE_ESCALATION_THRESHOLD = 3;
+var ESCALATION_MODEL = "deepseek-v4-pro";
+var NEEDS_PRO_MARKER = "<<<NEEDS_PRO>>>";
+var NEEDS_PRO_BUFFER_CHARS = 80;
 var CacheFirstLoop = class {
   client;
   prefix;
@@ -1740,11 +1771,36 @@ var CacheFirstLoop = class {
    * `step()` (the prior turn's signal has already fired).
    */
   _turnAbort = new AbortController();
+  /**
+   * "Next turn should run on pro, regardless of this.model." Set by the
+   * `/pro` slash command; consumed at the next turn's start (flipping
+   * `_escalateThisTurn` on and self-clearing) so it's a fire-and-forget
+   * single-turn upgrade. Survives across multiple slash inputs so
+   * typing `/pro` and then hesitating a while before submitting a real
+   * message still applies.
+   */
+  _proArmedForNextTurn = false;
+  /**
+   * Active for the current turn only — true means every model call
+   * this turn uses pro instead of `this.model`. Turned on by EITHER
+   * the pro-armed consumption OR the mid-turn auto-escalation
+   * threshold (see `_turnFailureCount`). Cleared at turn end.
+   */
+  _escalateThisTurn = false;
+  /**
+   * Visible-failure count for the current turn. Incremented by tool
+   * dispatch paths when a result matches a known "flash is struggling"
+   * shape (SEARCH-not-found errors, scavenge / truncation / storm
+   * repair fires). Once it hits {@link FAILURE_ESCALATION_THRESHOLD},
+   * the remainder of the turn's model calls auto-upgrade to pro so
+   * the user doesn't watch flash retry the same edit 5 times.
+   */
+  _turnFailureCount = 0;
   constructor(opts) {
     this.client = opts.client;
     this.prefix = opts.prefix;
     this.tools = opts.tools ?? new ToolRegistry();
-    this.model = opts.model ?? "deepseek-v4-pro";
+    this.model = opts.model ?? "deepseek-v4-flash";
     this.reasoningEffort = opts.reasoningEffort ?? "max";
     this.maxToolIters = opts.maxToolIters ?? 64;
     this.hooks = opts.hooks ?? [];
@@ -1803,12 +1859,93 @@ var CacheFirstLoop = class {
    * authored intent we can't mechanically shrink without losing
    * meaning.
    */
-  compact(maxTokens = 4e3) {
+  /**
+   * Conservative args-only shrink fired after every tool response —
+   * strictly about ONE thing: stop oversized `edit_file` / `write_file`
+   * arguments from riding every future turn's prompt.
+   *
+   * Why this is worth doing AUTOMATICALLY (not just on /compact):
+   * Each tool-call arguments string sticks in the log verbatim. On a
+   * coding session with ~10 edits, that's 20-40K tokens of stale
+   * SEARCH/REPLACE text riding along on every turn. Even at a 98.9%
+   * cache hit rate the input cost still adds up linearly (cache-hit
+   * price × tokens × turns). Compacting IMMEDIATELY after the tool
+   * responds means the next turn's prompt is already smaller — the
+   * shrink is a one-time write that saves every future prompt.
+   *
+   * Threshold rationale: 800 tokens ≈ 3 KB. A typical 20-line edit's
+   * args land well under that; massive rewrites (whole-file content,
+   * 100+ line refactors) land above and get the compaction. Small
+   * edits stay byte-verbatim so nothing common-case changes.
+   *
+   * Safety: we ONLY shrink args whose tool has ALREADY responded.
+   * Structurally that's every call in `log.toMessages()` at this
+   * point — the current turn's assistant/tool pairing is by
+   * construction closed by the time we get here (append happens
+   * AFTER dispatch). The in-flight assistant message being built
+   * lives in scratch, not the log, so this pass can't touch it.
+   *
+   * Model impact: the model may occasionally want to reference the
+   * exact SEARCH text of a prior edit — it then reads the file
+   * directly (which shows current state) or looks at the preceding
+   * assistant text (which has its plan). Losing the stale args is a
+   * net win: one extra read_file vs. dragging N KB of stale text
+   * through every subsequent turn.
+   */
+  compactToolCallArgsAfterResponse() {
     const before = this.log.toMessages();
-    const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
+    const { messages, healedCount } = shrinkOversizedToolCallArgsByTokens(
       before,
-      maxTokens
+      ARGS_COMPACT_THRESHOLD_TOKENS
     );
+    if (healedCount === 0) return;
+    this.log.compactInPlace(messages);
+    if (this.sessionName) {
+      try {
+        rewriteSession(this.sessionName, messages);
+      } catch {
+      }
+    }
+  }
+  /**
+   * Fired at the END of a turn (just before `done` is yielded). Shrinks
+   * every tool RESULT in the log that exceeds {@link TURN_END_RESULT_CAP_TOKENS}
+   * to a tight cap so the NEXT turn's prompt doesn't re-pay for big
+   * reads or searches done earlier. Unlike the reactive 40/80%
+   * thresholds which react to context pressure, this runs unconditionally
+   * — the win is preventive: each turn's big outputs get trimmed before
+   * they ride into the next prompt. Saves compounding cost on long
+   * sessions.
+   *
+   * Why compact the JUST-finished turn's results too (not just older
+   * turns)? The same-turn iters already consumed the raw content to
+   * make their decisions — the log is only carried forward for future
+   * prompts. And "let me re-read the file" is vastly cheaper than
+   * "carry this 12KB result in every future turn's prompt forever."
+   *
+   * Safe by construction: args-compact for THIS turn already ran
+   * inside `compactToolCallArgsAfterResponse`; this pass is orthogonal.
+   */
+  autoCompactToolResultsOnTurnEnd() {
+    const before = this.log.toMessages();
+    const shrunk = shrinkOversizedToolResultsByTokens(before, TURN_END_RESULT_CAP_TOKENS);
+    if (shrunk.healedCount === 0) return;
+    this.log.compactInPlace(shrunk.messages);
+    if (this.sessionName) {
+      try {
+        rewriteSession(this.sessionName, shrunk.messages);
+      } catch {
+      }
+    }
+  }
+  compact(maxTokens = 4e3) {
+    const before = this.log.toMessages();
+    const resultsPass = shrinkOversizedToolResultsByTokens(before, maxTokens);
+    const argsPass = shrinkOversizedToolCallArgsByTokens(resultsPass.messages, maxTokens);
+    const messages = argsPass.messages;
+    const healedCount = resultsPass.healedCount + argsPass.healedCount;
+    const tokensSaved = resultsPass.tokensSaved + argsPass.tokensSaved;
+    const charsSaved = resultsPass.charsSaved + argsPass.charsSaved;
     if (healedCount > 0) {
       this.log.compactInPlace(messages);
       if (this.sessionName) {
@@ -1883,6 +2020,78 @@ var CacheFirstLoop = class {
     }
     this.stream = this.branchEnabled ? false : this._streamPreference;
   }
+  /**
+   * Arm pro for the next turn (consumed at turn start). Called by
+   * `/pro`. Idempotent — repeated calls stay armed, `disarmPro()`
+   * clears. Separate from `/preset max` which persistently switches
+   * this.model; armed state is strictly single-turn.
+   */
+  armProForNextTurn() {
+    this._proArmedForNextTurn = true;
+  }
+  /** Cancel `/pro` arming before the next turn starts. */
+  disarmPro() {
+    this._proArmedForNextTurn = false;
+  }
+  /** UI surface — true while `/pro` is queued but hasn't fired yet. */
+  get proArmed() {
+    return this._proArmedForNextTurn;
+  }
+  /** UI surface — true while the current turn is running on pro (armed or auto-escalated). */
+  get escalatedThisTurn() {
+    return this._escalateThisTurn;
+  }
+  /**
+   * Model the current model call should use. Defaults to `this.model`;
+   * upgrades to {@link ESCALATION_MODEL} when the turn is armed for
+   * pro (via `/pro`) or has hit the failure-escalation threshold.
+   * Same thinking + effort policy applies regardless — pro defaults
+   * to thinking=enabled and effort=max, which the current turn wanted
+   * anyway when flash was struggling.
+   */
+  modelForCurrentCall() {
+    return this._escalateThisTurn ? ESCALATION_MODEL : this.model;
+  }
+  /**
+   * True when the assistant's content is a self-reported escalation
+   * request. Only the FIRST line matters — the model is instructed
+   * to emit the marker as the first output token if at all. Matching
+   * anywhere else in the text is a normal content reference (e.g.
+   * the user asked about the marker itself, or prose that happens
+   * to contain angle-brackets).
+   */
+  isEscalationRequest(content) {
+    return content.trimStart().startsWith(NEEDS_PRO_MARKER);
+  }
+  /**
+   * Check whether a tool result string looks like a "flash struggled"
+   * signal and, if so, increment the turn's failure counter. Escalates
+   * the REST of the current turn to pro once the threshold is hit.
+   * Idempotent after escalation — further failures don't re-escalate,
+   * but the turn is already on pro so it doesn't matter.
+   *
+   * Return: `true` when this call tipped the turn into escalation
+   * mode (so the loop can surface a one-time warning to the user).
+   */
+  noteToolFailureSignal(resultJson, repair) {
+    let bumped = false;
+    if (resultJson.includes('"error"') && resultJson.includes("search text not found")) {
+      this._turnFailureCount += 1;
+      bumped = true;
+    }
+    if (repair) {
+      const repairs = repair.scavenged + repair.truncationsFixed + repair.stormsBroken;
+      if (repairs > 0) {
+        this._turnFailureCount += repairs;
+        bumped = true;
+      }
+    }
+    if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
+      this._escalateThisTurn = true;
+      return true;
+    }
+    return false;
+  }
   buildMessages(pendingUser) {
     const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
     const msgs = [...this.prefix.toMessages(), ...healed.messages];
@@ -1937,8 +2146,23 @@ var CacheFirstLoop = class {
     this._turn++;
     this.scratch.reset();
     this.repair.resetStorm();
+    this._turnFailureCount = 0;
+    this._escalateThisTurn = false;
+    let armedConsumed = false;
+    if (this._proArmedForNextTurn) {
+      this._escalateThisTurn = true;
+      this._proArmedForNextTurn = false;
+      armedConsumed = true;
+    }
     this._turnAbort = new AbortController();
     const signal = this._turnAbort.signal;
+    if (armedConsumed) {
+      yield {
+        turn: this._turn,
+        role: "warning",
+        content: "\u21E7 /pro armed \u2014 this turn runs on deepseek-v4-pro (one-shot \xB7 disarms after turn)"
+      };
+    }
     let pendingUser = userInput;
     const toolSpecs = this.prefix.tools();
     const warnAt = Math.max(1, Math.floor(this.maxToolIters * 0.7));
@@ -1958,6 +2182,7 @@ var CacheFirstLoop = class {
           content: stoppedMsg,
           forcedSummary: true
         };
+        this.autoCompactToolResultsOnTurnEnd();
         yield { turn: this._turn, role: "done", content: stoppedMsg };
         return;
       }
@@ -2034,14 +2259,15 @@ var CacheFirstLoop = class {
               queue.push(sample);
             }
           };
+          const callModel = this.modelForCurrentCall();
           const branchPromise = runBranches(
             this.client,
             {
-              model: this.model,
+              model: callModel,
               messages,
               tools: toolSpecs.length ? toolSpecs : void 0,
               signal,
-              thinking: thinkingModeForModel(this.model),
+              thinking: thinkingModeForModel(callModel),
               reasoningEffort: this.reasoningEffort
             },
             {
@@ -2051,8 +2277,8 @@ var CacheFirstLoop = class {
             }
           );
           for (let k = 0; k < budget; k++) {
-            const sample = queue.shift() ?? await new Promise((resolve8) => {
-              waiter = resolve8;
+            const sample = queue.shift() ?? await new Promise((resolve9) => {
+              waiter = resolve9;
             });
             yield {
               turn: this._turn,
@@ -2090,21 +2316,41 @@ var CacheFirstLoop = class {
         } else if (this.stream) {
           const callBuf = /* @__PURE__ */ new Map();
           const readyIndices = /* @__PURE__ */ new Set();
+          const callModel = this.modelForCurrentCall();
+          const bufferForEscalation = callModel !== ESCALATION_MODEL;
+          let escalationBuf = "";
+          let escalationBufFlushed = false;
           for await (const chunk of this.client.stream({
-            model: this.model,
+            model: callModel,
             messages,
             tools: toolSpecs.length ? toolSpecs : void 0,
             signal,
-            thinking: thinkingModeForModel(this.model),
+            thinking: thinkingModeForModel(callModel),
             reasoningEffort: this.reasoningEffort
           })) {
             if (chunk.contentDelta) {
               assistantContent += chunk.contentDelta;
-              yield {
-                turn: this._turn,
-                role: "assistant_delta",
-                content: chunk.contentDelta
-              };
+              if (bufferForEscalation && !escalationBufFlushed) {
+                escalationBuf += chunk.contentDelta;
+                if (this.isEscalationRequest(escalationBuf)) {
+                  break;
+                }
+                if (escalationBuf.length >= NEEDS_PRO_BUFFER_CHARS || escalationBuf.includes("\n")) {
+                  escalationBufFlushed = true;
+                  yield {
+                    turn: this._turn,
+                    role: "assistant_delta",
+                    content: escalationBuf
+                  };
+                  escalationBuf = "";
+                }
+              } else {
+                yield {
+                  turn: this._turn,
+                  role: "assistant_delta",
+                  content: chunk.contentDelta
+                };
+              }
             }
             if (chunk.reasoningDelta) {
               reasoningContent += chunk.reasoningDelta;
@@ -2145,13 +2391,23 @@ var CacheFirstLoop = class {
             if (chunk.usage) usage = chunk.usage;
           }
           toolCalls = [...callBuf.values()];
+          if (bufferForEscalation && !escalationBufFlushed && escalationBuf.length > 0) {
+            if (!this.isEscalationRequest(escalationBuf)) {
+              yield {
+                turn: this._turn,
+                role: "assistant_delta",
+                content: escalationBuf
+              };
+            }
+          }
         } else {
+          const callModel = this.modelForCurrentCall();
           const resp = await this.client.chat({
-            model: this.model,
+            model: callModel,
             messages,
             tools: toolSpecs.length ? toolSpecs : void 0,
             signal,
-            thinking: thinkingModeForModel(this.model),
+            thinking: thinkingModeForModel(callModel),
             reasoningEffort: this.reasoningEffort
           });
           assistantContent = resp.content;
@@ -2161,6 +2417,7 @@ var CacheFirstLoop = class {
         }
       } catch (err) {
         if (signal.aborted) {
+          this.autoCompactToolResultsOnTurnEnd();
           yield { turn: this._turn, role: "done", content: "" };
           return;
         }
@@ -2172,7 +2429,27 @@ var CacheFirstLoop = class {
         };
         return;
       }
-      const turnStats = this.stats.record(this._turn, this.model, usage ?? new Usage());
+      if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
+        this._escalateThisTurn = true;
+        yield {
+          turn: this._turn,
+          role: "warning",
+          content: `\u21E7 flash requested escalation \u2014 retrying this turn on ${ESCALATION_MODEL}`
+        };
+        assistantContent = "";
+        reasoningContent = "";
+        toolCalls = [];
+        usage = null;
+        branchSummary = void 0;
+        preHarvestedPlanState = void 0;
+        iter--;
+        continue;
+      }
+      const turnStats = this.stats.record(
+        this._turn,
+        this.modelForCurrentCall(),
+        usage ?? new Usage()
+      );
       if (pendingUser !== null) {
         this.appendAndPersist({ role: "user", content: pendingUser });
         pendingUser = null;
@@ -2203,6 +2480,13 @@ var CacheFirstLoop = class {
         repair: report,
         branch: branchSummary
       };
+      if (this.noteToolFailureSignal("", report)) {
+        yield {
+          turn: this._turn,
+          role: "warning",
+          content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this._turnFailureCount} repair/error signals. Next turn falls back to ${this.model} unless /pro is armed.`
+        };
+      }
       if (report.stormsBroken > 0) {
         const noteTail = report.notes.length ? ` \u2014 ${report.notes[report.notes.length - 1]}` : "";
         const allSuppressed = repairedCalls.length === 0 && toolCalls.length > 0;
@@ -2214,13 +2498,14 @@ var CacheFirstLoop = class {
         };
       }
       if (repairedCalls.length === 0) {
+        this.autoCompactToolResultsOnTurnEnd();
         yield { turn: this._turn, role: "done", content: assistantContent };
         return;
       }
       const ctxMax = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
       if (usage) {
         const ratio = usage.promptTokens / ctxMax;
-        if (ratio > 0.6 && ratio <= 0.8) {
+        if (ratio > 0.4 && ratio <= 0.8) {
           const before = usage.promptTokens;
           const soft = this.compact(4e3);
           if (soft.healedCount > 0) {
@@ -2318,6 +2603,14 @@ ${reason}`;
           name,
           content: result
         });
+        this.compactToolCallArgsAfterResponse();
+        if (this.noteToolFailureSignal(result)) {
+          yield {
+            turn: this._turn,
+            role: "warning",
+            content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this._turnFailureCount} edit failure(s). Next turn falls back to ${this.model} unless /pro is armed.`
+          };
+        }
         yield {
           turn: this._turn,
           role: "tool",
@@ -2341,13 +2634,15 @@ ${reason}`;
         role: "user",
         content: "I'm out of tool-call budget for this turn. Summarize in plain prose what you learned from the tool results above. Do NOT emit any tool calls, function-call markup, DSML invocations, or SEARCH/REPLACE edit blocks \u2014 they will be silently discarded. Just plain text."
       });
+      const summaryModel = "deepseek-v4-flash";
+      const summaryEffort = "high";
       const resp = await this.client.chat({
-        model: this.model,
+        model: summaryModel,
         messages,
         // no tools → model is forced to answer in text
         signal: this._turnAbort.signal,
-        thinking: thinkingModeForModel(this.model),
-        reasoningEffort: this.reasoningEffort
+        thinking: thinkingModeForModel(summaryModel),
+        reasoningEffort: summaryEffort
       });
       const rawContent = resp.content?.trim() ?? "";
       const cleaned = stripHallucinatedToolMarkup(rawContent);
@@ -2356,7 +2651,7 @@ ${reason}`;
       const annotated = `${reasonPrefix}
 ${summary}`;
-      const summaryStats = this.stats.record(this._turn, this.model, resp.usage ?? new Usage());
+      const summaryStats = this.stats.record(this._turn, summaryModel, resp.usage ?? new Usage());
       this.appendAndPersist(this.assistantMessage(summary, [], resp.reasoningContent ?? void 0));
       yield {
         turn: this._turn,
@@ -2365,6 +2660,7 @@ ${summary}`;
         stats: summaryStats,
         forcedSummary: true
       };
+      this.autoCompactToolResultsOnTurnEnd();
       yield { turn: this._turn, role: "done", content: summary };
     } catch (err) {
       const label = errorLabelFor(opts.reason, this.maxToolIters);
@@ -2374,6 +2670,7 @@ ${summary}`;
         content: "",
         error: `${label} and the fallback summary call failed: ${err.message}. Run /clear and retry with a narrower question, or raise --max-tool-iters.`
       };
+      this.autoCompactToolResultsOnTurnEnd();
       yield { turn: this._turn, role: "done", content: "" };
     }
   }
@@ -2503,6 +2800,56 @@ function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
   });
   return { messages: out, healedCount, tokensSaved, charsSaved };
 }
+function shrinkOversizedToolCallArgsByTokens(messages, maxTokens) {
+  let healedCount = 0;
+  let tokensSaved = 0;
+  let charsSaved = 0;
+  const out = messages.map((msg) => {
+    if (msg.role !== "assistant" || !Array.isArray(msg.tool_calls)) return msg;
+    let changed = false;
+    const newCalls = msg.tool_calls.map((call) => {
+      const args = call.function?.arguments;
+      if (typeof args !== "string" || args.length <= maxTokens) return call;
+      const beforeTokens = countTokens(args);
+      if (beforeTokens <= maxTokens) return call;
+      const shrunk = shrinkJsonLongStrings(args);
+      const afterTokens = countTokens(shrunk);
+      if (afterTokens >= beforeTokens) return call;
+      changed = true;
+      healedCount += 1;
+      tokensSaved += beforeTokens - afterTokens;
+      charsSaved += args.length - shrunk.length;
+      return { ...call, function: { ...call.function, arguments: shrunk } };
+    });
+    if (!changed) return msg;
+    return { ...msg, tool_calls: newCalls };
+  });
+  return { messages: out, healedCount, tokensSaved, charsSaved };
+}
+function shrinkJsonLongStrings(jsonStr) {
+  let parsed;
+  try {
+    parsed = JSON.parse(jsonStr);
+  } catch {
+    const head = jsonStr.slice(0, 200);
+    return `${head}\u2026[shrunk: ${jsonStr.length} chars, unparsed]`;
+  }
+  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+    return jsonStr;
+  }
+  const LONG_THRESHOLD = 300;
+  const input = parsed;
+  const output = {};
+  for (const [k, v] of Object.entries(input)) {
+    if (typeof v === "string" && v.length > LONG_THRESHOLD) {
+      const newlines = v.match(/\n/g)?.length ?? 0;
+      output[k] = `[\u2026shrunk: ${v.length} chars, ${newlines} lines \u2014 tool already responded, see result]`;
+    } else {
+      output[k] = v;
+    }
+  }
+  return JSON.stringify(output);
+}
 function fixToolCallPairing(messages) {
   const out = [];
   let droppedAssistantCalls = 0;
@@ -2855,6 +3202,28 @@ import { join as join7, resolve as resolve3 } from "path";
 import { existsSync as existsSync6, readFileSync as readFileSync6, readdirSync as readdirSync3, statSync as statSync3 } from "fs";
 import { homedir as homedir3 } from "os";
 import { join as join6, resolve as resolve2 } from "path";
+// src/prompt-fragments.ts
+var TUI_FORMATTING_RULES = `Formatting (rendered in a TUI with a real markdown renderer):
+- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` header + \`| --- | --- |\` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.
+- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.
+- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
+- Do NOT draw decorative frames around content with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.
+- For flow charts and diagrams: a plain bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
+var ESCALATION_CONTRACT = `Cost-aware escalation (when you're running on deepseek-v4-flash):
+If a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the exact string \`<<<NEEDS_PRO>>>\` as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot. Do NOT emit any other content in the same response when you request escalation.
+Use this sparingly. Normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn.`;
+var NEGATIVE_CLAIM_RULE = `Negative claims ("X is missing", "Y isn't implemented", "there's no Z") are the #1 hallucination shape. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.
+If you have a search tool (\`search_content\`, \`grep\`, web search), call it FIRST before asserting absence:
+- Returns matches \u2192 you were wrong; correct yourself and cite the matches.
+- Returns nothing \u2192 state the absence WITH the search query as evidence: \`No callers of \\\`foo()\\\` found (search_content "foo").\`
+If you have no search tool, qualify hard: "I haven't verified \u2014 this is a guess." Never assert absence with fake authority.`;
+// src/skills.ts
 var SKILLS_DIRNAME = "skills";
 var SKILL_FILE = "SKILL.md";
 var SKILLS_INDEX_MAX_CHARS = 4e3;
@@ -2997,10 +3366,10 @@ function parseRunAs(raw) {
 }
 function skillIndexLine(s) {
   const safeDesc = s.description.replace(/\n/g, " ").trim();
-  const marker = s.runAs === "subagent" ? "\u{1F9EC} " : "";
-  const max = 130 - s.name.length - marker.length;
+  const tag = s.runAs === "subagent" ? " [\u{1F9EC} subagent]" : "";
+  const max = 130 - s.name.length - tag.length;
   const clipped = safeDesc.length > max ? `${safeDesc.slice(0, Math.max(1, max - 1))}\u2026` : safeDesc;
-  return clipped ? `- ${marker}${s.name} \u2014 ${clipped}` : `- ${marker}${s.name}`;
+  return clipped ? `- ${s.name}${tag} \u2014 ${clipped}` : `- ${s.name}${tag}`;
 }
 function applySkillsIndex(basePrompt, opts = {}) {
   const store = new SkillStore(opts);
@@ -3015,7 +3384,7 @@ function applySkillsIndex(basePrompt, opts = {}) {
     "",
     "# Skills \u2014 playbooks you can invoke",
     "",
-    'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })` to invoke one. Skills marked with \u{1F9EC} spawn an **isolated subagent** \u2014 its tool calls and reasoning never enter your context, only its final answer does. Use \u{1F9EC} skills for tasks that would otherwise flood your context (deep exploration, multi-step research, anything where you only need the conclusion). Plain skills are inlined: their body becomes a tool result you read and act on directly. The user can also invoke a skill via `/skill <name>`.',
+    'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })` \u2014 the `name` is JUST the skill identifier (e.g. `"explore"`), NOT the `[\u{1F9EC} subagent]` tag that appears after it. Entries tagged `[\u{1F9EC} subagent]` spawn an **isolated subagent** \u2014 its tool calls and reasoning never enter your context, only its final answer does. Use subagent skills for tasks that would otherwise flood your context (deep exploration, multi-step research, anything where you only need the conclusion). Plain skills are inlined: their body becomes a tool result you read and act on directly. The user can also invoke a skill via `/skill <name>`.',
     "",
     "```",
     truncated,
@@ -3037,12 +3406,9 @@ Your final answer:
 - If the question can't be answered from what you found, say so plainly and suggest where to look next.
 - No follow-up offers, no "let me know if you need more." The parent will ask again if they need more.
-Formatting (rendered in a TUI):
-- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` + \`| --- | --- |\`). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C) \u2014 they break word-wrap.
-- Keep table cells short; if a cell needs a paragraph, use bullets below the table instead.
-- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
-- NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
-- For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
+${NEGATIVE_CLAIM_RULE}
+${TUI_FORMATTING_RULES}
 The 'task' the parent gave you is the question you must answer. Treat any other reading of it as scope creep.`;
 var BUILTIN_RESEARCH_BODY = `You are running as a research subagent. Your job is to gather information from code AND the web, synthesize it, and return one focused conclusion.
@@ -3059,12 +3425,9 @@ Your final answer:
 - Distinguish "I verified this in code" from "I read this on a docs page" \u2014 the parent will trust the former more.
 - If the answer is uncertain, say so. Don't invent confidence.
-Formatting (rendered in a TUI):
-- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` + \`| --- | --- |\`). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C) \u2014 they break word-wrap.
-- Keep table cells short; if a cell needs a paragraph, use bullets below the table instead.
-- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
-- NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
-- For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
+${NEGATIVE_CLAIM_RULE}
+${TUI_FORMATTING_RULES}
 The 'task' the parent gave you is the research question. Stay on it.`;
 var BUILTIN_SKILLS = Object.freeze([
@@ -3366,6 +3729,9 @@ import { promises as fs } from "fs";
 import * as pathMod from "path";
 var DEFAULT_MAX_READ_BYTES = 2 * 1024 * 1024;
 var DEFAULT_MAX_LIST_BYTES = 256 * 1024;
+var DEFAULT_AUTO_PREVIEW_LINES = 200;
+var AUTO_PREVIEW_HEAD_LINES = 80;
+var AUTO_PREVIEW_TAIL_LINES = 40;
 var SKIP_DIR_NAMES = /* @__PURE__ */ new Set([
   "node_modules",
   ".git",
@@ -3458,14 +3824,22 @@ function registerFilesystemTools(registry, opts) {
   };
   registry.register({
     name: "read_file",
-    description: "Read a file under the sandbox root. Returns the full contents (truncated with a notice if larger than the per-call cap). Paths may be relative to the root or absolute-under-root.",
+    description: `Read a file under the sandbox root. To save context, PREFER to scope the read instead of pulling the whole file:
+  - head: N  \u2192 first N lines (imports, public API, small configs)
+  - tail: N  \u2192 last N lines (recently-added code, log tails)
+  - range: "A-B"  \u2192 inclusive line range A..B, 1-indexed (e.g. "120-180" around an edit site)
+When none of these is given AND the file is longer than ${DEFAULT_AUTO_PREVIEW_LINES} lines, the tool auto-returns a head+tail preview with an "N lines omitted" marker rather than dumping everything. If you need the middle, re-call with a range. Prefer search_content to locate a symbol first, then read_file with a range around the hit \u2014 one scoped read beats three full-file reads.`,
     readOnly: true,
     parameters: {
       type: "object",
       properties: {
         path: { type: "string", description: "Path to read (relative to rootDir or absolute)." },
         head: { type: "integer", description: "If set, return only the first N lines." },
-        tail: { type: "integer", description: "If set, return only the last N lines." }
+        tail: { type: "integer", description: "If set, return only the last N lines." },
+        range: {
+          type: "string",
+          description: 'Inclusive line range like "50-100" or "50-50". 1-indexed. Takes precedence over head/tail when all three are set. Out-of-range requests clamp to file bounds.'
+        }
       },
       required: ["path"]
     },
@@ -3477,21 +3851,52 @@ function registerFilesystemTools(registry, opts) {
       }
       const raw = await fs.readFile(abs);
       if (raw.length > maxReadBytes) {
-        const head = raw.slice(0, maxReadBytes).toString("utf8");
-        return `${head}
+        const headBytes = raw.slice(0, maxReadBytes).toString("utf8");
+        return `${headBytes}
-[\u2026truncated ${raw.length - maxReadBytes} bytes \u2014 file is ${raw.length} B, cap ${maxReadBytes} B. Retry with head/tail for targeted view.]`;
+[\u2026truncated ${raw.length - maxReadBytes} bytes \u2014 file is ${raw.length} B, cap ${maxReadBytes} B. Retry with head/tail/range for targeted view.]`;
       }
       const text = raw.toString("utf8");
+      let lines = text.split(/\r?\n/);
+      if (lines.length > 0 && lines[lines.length - 1] === "") lines = lines.slice(0, -1);
+      const totalLines = lines.length;
+      if (typeof args.range === "string" && /^\d+\s*-\s*\d+$/.test(args.range)) {
+        const [rawStart, rawEnd] = args.range.split("-").map((s) => Number.parseInt(s, 10));
+        const start = Math.max(1, rawStart ?? 1);
+        const end = Math.min(totalLines, Math.max(start, rawEnd ?? totalLines));
+        const slice = lines.slice(start - 1, end);
+        const label = `[range ${start}-${end} of ${totalLines} lines]`;
+        return `${label}
+${slice.join("\n")}`;
+      }
       if (typeof args.head === "number" && args.head > 0) {
-        return text.split(/\r?\n/).slice(0, args.head).join("\n");
+        const count = Math.min(args.head, totalLines);
+        const slice = lines.slice(0, count);
+        const marker = count < totalLines ? `
+[\u2026head ${count} of ${totalLines} lines \u2014 call again with range / tail for more]` : "";
+        return slice.join("\n") + marker;
       }
       if (typeof args.tail === "number" && args.tail > 0) {
-        let lines = text.split(/\r?\n/);
-        if (lines.length > 0 && lines[lines.length - 1] === "") lines = lines.slice(0, -1);
-        return lines.slice(Math.max(0, lines.length - args.tail)).join("\n");
+        const count = Math.min(args.tail, totalLines);
+        const slice = lines.slice(totalLines - count);
+        const marker = count < totalLines ? `[\u2026tail ${count} of ${totalLines} lines \u2014 call again with range / head for more]
+` : "";
+        return marker + slice.join("\n");
       }
-      return text;
+      if (totalLines <= DEFAULT_AUTO_PREVIEW_LINES) return lines.join("\n");
+      const head = lines.slice(0, AUTO_PREVIEW_HEAD_LINES).join("\n");
+      const tail = lines.slice(totalLines - AUTO_PREVIEW_TAIL_LINES).join("\n");
+      const omitted = totalLines - AUTO_PREVIEW_HEAD_LINES - AUTO_PREVIEW_TAIL_LINES;
+      return [
+        `[auto-preview: head ${AUTO_PREVIEW_HEAD_LINES} + tail ${AUTO_PREVIEW_TAIL_LINES} of ${totalLines} lines]`,
+        head,
+        `
+[\u2026 ${omitted} lines omitted \u2014 call read_file again with range:"A-B" (1-indexed) or head / tail to get the middle]
+`,
+        tail
+      ].join("\n");
     }
   });
   registry.register({
@@ -3516,21 +3921,34 @@ function registerFilesystemTools(registry, opts) {
   });
   registry.register({
     name: "directory_tree",
-    description: "Recursively list entries in a directory. Shows indented tree structure with directories marked '/'. Caps output so a huge tree doesn't drown the context.",
+    description: `Recursively list entries in a directory. Shows indented tree structure with directories marked '/'. Budget-aware by default:
+  - maxDepth defaults to 2 (root + one level). A depth-4 tree on a real repo blew ~5K tokens in one call. If you truly need deeper, pass maxDepth:N explicitly.
+  - Skips ${[...SKIP_DIR_NAMES].sort().join(", ")} unless include_deps:true. Traversing into node_modules / .git / dist is almost always token-waste.
+  - Large subtrees (>50 children) auto-collapse to "[N files, M dirs hidden \u2014 list_directory <path> to inspect]" so one huge folder can't dominate the output.
+Prefer \`list_directory\` for a single-level view, \`search_files\` to find specific paths, and \`search_content\` to find code.`,
     readOnly: true,
     parameters: {
       type: "object",
       properties: {
         path: { type: "string", description: "Root of the tree (default: sandbox root)." },
-        maxDepth: { type: "integer", description: "Max recursion depth (default 4)." }
+        maxDepth: {
+          type: "integer",
+          description: "Max recursion depth (default 2). Depth 0 shows only the top-level entries; depth 2 is usually enough to see module structure."
+        },
+        include_deps: {
+          type: "boolean",
+          description: "When true, also traverse node_modules / .git / dist / build / etc. Off by default \u2014 most exploration questions are about the user's own code."
+        }
       }
     },
     fn: async (args) => {
       const startAbs = safePath(args.path ?? ".");
-      const maxDepth = typeof args.maxDepth === "number" ? args.maxDepth : 4;
+      const maxDepth = typeof args.maxDepth === "number" ? args.maxDepth : 2;
+      const includeDeps = args.include_deps === true;
       const lines = [];
       let totalBytes = 0;
       let truncated = false;
+      const PER_DIR_CHILD_CAP = 50;
       const walk2 = async (dir, depth) => {
         if (truncated) return;
         if (depth > maxDepth) return;
@@ -3541,10 +3959,27 @@ function registerFilesystemTools(registry, opts) {
           return;
         }
         entries.sort((a, b) => a.name.localeCompare(b.name));
+        let emitted = 0;
         for (const e of entries) {
           if (truncated) return;
+          const skip = e.isDirectory() && !includeDeps && SKIP_DIR_NAMES.has(e.name);
+          if (emitted >= PER_DIR_CHILD_CAP) {
+            const remaining = entries.length - emitted;
+            let restFiles = 0;
+            let restDirs = 0;
+            for (const r of entries.slice(emitted)) {
+              if (r.isDirectory()) restDirs++;
+              else restFiles++;
+            }
+            const indent2 = "  ".repeat(depth);
+            lines.push(
+              `${indent2}[\u2026 ${remaining} entries hidden (${restDirs} dirs, ${restFiles} files) \u2014 list_directory on this path to see all]`
+            );
+            return;
+          }
           const indent = "  ".repeat(depth);
-          const line = e.isDirectory() ? `${indent}${e.name}/` : `${indent}${e.name}`;
+          const suffix = skip ? " (skipped \u2014 pass include_deps:true to traverse)" : "";
+          const line = e.isDirectory() ? `${indent}${e.name}/${suffix}` : `${indent}${e.name}`;
           totalBytes += line.length + 1;
           if (totalBytes > maxListBytes) {
             lines.push(`  [\u2026 tree truncated at ${maxListBytes} bytes \u2026]`);
@@ -3552,7 +3987,8 @@ function registerFilesystemTools(registry, opts) {
             return;
           }
           lines.push(line);
-          if (e.isDirectory()) {
+          emitted++;
+          if (e.isDirectory() && !skip) {
             await walk2(pathMod.join(dir, e.name), depth + 1);
           }
         }
@@ -4057,15 +4493,15 @@ Rules:
 - When you're done, your final assistant message is the only thing the parent will see \u2014 make it complete and self-contained. No follow-up offers, no questions, no "let me know if you need more."
 - Prefer one clear, distilled answer over a long log of what you tried.
-Formatting rules (the parent renders your reply in a TUI with a real markdown renderer):
-- For tabular data use GitHub-Flavored Markdown tables with ASCII pipes: \`| col | col |\` headers, \`| --- | --- |\` separator. NEVER draw tables with Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524). They look intentional but break terminal word-wrap and produce garbled output.
-- Keep table cells short \u2014 one short phrase per cell, not multi-line paragraphs. If a description doesn't fit in ~40 chars, use bullets below the table instead.
-- Use fenced code blocks (\`\`\`) for any code, file paths with line ranges, or shell commands.
-- NEVER draw decorative frames around content with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. The renderer handles code blocks and headings on its own \u2014 extra ASCII art adds noise without value and breaks at narrow terminal widths.
-- For flow charts and diagrams: use a markdown bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
+${NEGATIVE_CLAIM_RULE}
+${ESCALATION_CONTRACT}
+${TUI_FORMATTING_RULES}`;
 var DEFAULT_MAX_RESULT_CHARS2 = 8e3;
 var DEFAULT_MAX_ITERS = 16;
-var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-pro";
+var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-flash";
+var DEFAULT_SUBAGENT_EFFORT = "high";
 var SUBAGENT_TOOL_NAME = "spawn_subagent";
 var NEVER_INHERITED_TOOLS = /* @__PURE__ */ new Set([SUBAGENT_TOOL_NAME, "submit_plan"]);
 async function spawnSubagent(opts) {
@@ -4094,6 +4530,10 @@ async function spawnSubagent(opts) {
     prefix: childPrefix,
     tools: childTools,
     model,
+    // Subagents run on a constrained thinking budget by default — the
+    // task is already narrow by construction, and `high` cuts output
+    // tokens substantially vs `max`.
+    reasoningEffort: DEFAULT_SUBAGENT_EFFORT,
     maxToolIters,
     hooks: [],
     stream: false
@@ -4262,9 +4702,311 @@ function forkRegistryExcluding(parent, exclude) {
 }
 // src/tools/shell.ts
-import { spawn as spawn2 } from "child_process";
+import { spawn as spawn3 } from "child_process";
 import { existsSync as existsSync8, statSync as statSync4 } from "fs";
+import * as pathMod3 from "path";
+// src/tools/jobs.ts
+import { spawn as spawn2 } from "child_process";
 import * as pathMod2 from "path";
+function killProcessTree(pid, signal) {
+  if (process.platform === "win32") {
+    const args = ["/pid", String(pid), "/T"];
+    if (signal === "SIGKILL") args.push("/F");
+    try {
+      const killer = spawn2("taskkill", args, {
+        stdio: "ignore",
+        windowsHide: true
+      });
+      killer.on("error", () => {
+      });
+    } catch {
+    }
+    return;
+  }
+  try {
+    process.kill(-pid, signal);
+    return;
+  } catch {
+  }
+  try {
+    process.kill(pid, signal);
+  } catch {
+  }
+}
+var DEFAULT_OUTPUT_CAP_BYTES = 64 * 1024;
+var READY_SIGNALS = [
+  // HTTP server banners
+  /\blistening on\b/i,
+  /\blocal:\s+https?:\/\//i,
+  /\bhttps?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0)(?::\d+)?\b/i,
+  /\b(?:ready|server started|started server|app listening)\b/i,
+  // Bundlers / compilers
+  /\bcompiled successfully\b/i,
+  /\bbuild complete(?:d)?\b/i,
+  /\bwatching for (?:file )?changes\b/i,
+  /\bready in \d+/i,
+  // Generic
+  /\bstartup (?:complete|finished)\b/i
+];
+var JobRegistry = class {
+  jobs = /* @__PURE__ */ new Map();
+  nextId = 1;
+  /**
+   * Spawn a background child. Resolves after `waitSec` OR on ready
+   * signal OR on early exit, whichever comes first. The child continues
+   * to run (and buffer output) regardless of which path fires.
+   */
+  async start(command, opts) {
+    const trimmed = command.trim();
+    if (!trimmed) throw new Error("run_background: empty command");
+    const op = detectShellOperator(trimmed);
+    if (op !== null) {
+      throw new Error(
+        `run_background: shell operator "${op}" is not supported \u2014 spawn one process per background job. Compose via your orchestration, not the shell.`
+      );
+    }
+    const argv = tokenizeCommand(trimmed);
+    if (argv.length === 0) throw new Error("run_background: empty command");
+    const waitMs = Math.max(0, Math.min(30, opts.waitSec ?? 3)) * 1e3;
+    const maxBytes = opts.maxBufferBytes ?? DEFAULT_OUTPUT_CAP_BYTES;
+    const { bin, args, spawnOverrides } = prepareSpawn(argv);
+    const spawnOpts = {
+      cwd: pathMod2.resolve(opts.cwd),
+      shell: false,
+      windowsHide: true,
+      env: process.env,
+      // POSIX: detach so the child becomes its own process-group leader.
+      // Required for `process.kill(-pid, …)` later — without it a group
+      // kill fails and we end up only signaling the wrapper, leaving
+      // grandchildren (node → vite → esbuild …) orphaned.
+      // Windows: detached would spawn a new console window; leave the
+      // default and use taskkill /T for tree termination.
+      detached: process.platform !== "win32",
+      ...spawnOverrides
+    };
+    let child;
+    try {
+      child = spawn2(bin, args, spawnOpts);
+    } catch (err) {
+      const id2 = this.nextId++;
+      const job2 = {
+        id: id2,
+        command: trimmed,
+        pid: null,
+        startedAt: Date.now(),
+        exitCode: null,
+        output: `[spawn failed] ${err.message}`,
+        totalBytesWritten: 0,
+        running: false,
+        spawnError: err.message,
+        child: null,
+        readyPromise: Promise.resolve(),
+        signalReady: () => {
+        }
+      };
+      this.jobs.set(id2, job2);
+      return {
+        jobId: id2,
+        pid: null,
+        stillRunning: false,
+        readyMatched: false,
+        preview: job2.output,
+        exitCode: null
+      };
+    }
+    const id = this.nextId++;
+    let readyResolve = () => {
+    };
+    const readyPromise = new Promise((res) => {
+      readyResolve = res;
+    });
+    const job = {
+      id,
+      command: trimmed,
+      pid: child.pid ?? null,
+      startedAt: Date.now(),
+      exitCode: null,
+      output: "",
+      totalBytesWritten: 0,
+      running: true,
+      child,
+      readyPromise,
+      signalReady: readyResolve
+    };
+    this.jobs.set(id, job);
+    let readyMatched = false;
+    const onData = (chunk) => {
+      const s = chunk.toString();
+      job.totalBytesWritten += s.length;
+      job.output += s;
+      if (job.output.length > maxBytes) {
+        const overflow = job.output.length - maxBytes;
+        const cut = job.output.indexOf("\n", overflow);
+        const start = cut >= 0 ? cut + 1 : overflow;
+        job.output = `[\u2026 older output dropped \u2026]
+${job.output.slice(start)}`;
+      }
+      if (!readyMatched) {
+        for (const re of READY_SIGNALS) {
+          if (re.test(s) || re.test(job.output)) {
+            readyMatched = true;
+            job.signalReady();
+            break;
+          }
+        }
+      }
+    };
+    child.stdout?.on("data", onData);
+    child.stderr?.on("data", onData);
+    child.on("error", (err) => {
+      job.running = false;
+      job.spawnError = err.message;
+      job.signalReady();
+    });
+    child.on("close", (code) => {
+      job.running = false;
+      job.exitCode = code;
+      job.signalReady();
+    });
+    const onAbort = () => this.stop(id, { graceMs: 100 });
+    opts.signal?.addEventListener("abort", onAbort, { once: true });
+    let timer = null;
+    await Promise.race([
+      readyPromise,
+      new Promise((res) => {
+        timer = setTimeout(res, waitMs);
+      })
+    ]);
+    if (timer) clearTimeout(timer);
+    return {
+      jobId: id,
+      pid: job.pid,
+      stillRunning: job.running,
+      readyMatched,
+      preview: job.output,
+      exitCode: job.exitCode
+    };
+  }
+  /**
+   * Read a job's accumulated output. `since` lets a caller poll
+   * incrementally: pass the byte count returned from the last call to
+   * get only newly-written content. Returns both full output and a
+   * running snapshot so the caller can use whichever.
+   */
+  read(id, opts = {}) {
+    const job = this.jobs.get(id);
+    if (!job) return null;
+    const full = job.output;
+    let slice = full;
+    if (typeof opts.since === "number" && opts.since >= 0 && opts.since < full.length) {
+      slice = full.slice(opts.since);
+    }
+    if (typeof opts.tailLines === "number" && opts.tailLines > 0) {
+      const lines = slice.split("\n");
+      const keep = lines.slice(Math.max(0, lines.length - opts.tailLines));
+      slice = keep.join("\n");
+    }
+    return {
+      output: slice,
+      byteLength: full.length,
+      running: job.running,
+      exitCode: job.exitCode,
+      command: job.command,
+      pid: job.pid,
+      spawnError: job.spawnError
+    };
+  }
+  /**
+   * Send SIGTERM, wait `graceMs`, then SIGKILL if still alive. Returns
+   * the final job record (or null when the job id is unknown). Safe to
+   * call on an already-exited job — returns the record unchanged.
+   */
+  async stop(id, opts = {}) {
+    const job = this.jobs.get(id);
+    if (!job) return null;
+    if (!job.running || !job.child) return snapshot(job);
+    const graceMs = Math.max(0, opts.graceMs ?? 2e3);
+    if (job.pid !== null) {
+      killProcessTree(job.pid, "SIGTERM");
+    } else {
+      try {
+        job.child.kill("SIGTERM");
+      } catch {
+      }
+    }
+    await Promise.race([job.readyPromise, new Promise((res) => setTimeout(res, graceMs))]);
+    if (job.running) {
+      if (job.pid !== null) {
+        killProcessTree(job.pid, "SIGKILL");
+      } else {
+        try {
+          job.child.kill("SIGKILL");
+        } catch {
+        }
+      }
+      await new Promise((res) => setTimeout(res, 800));
+    }
+    return snapshot(job);
+  }
+  list() {
+    return [...this.jobs.values()].map(snapshot);
+  }
+  /**
+   * Best-effort kill of every still-running job. Called on TUI shutdown
+   * so dev servers don't outlive the Reasonix process. Resolves after
+   * every child has closed or a hard deadline passes (3s total).
+   */
+  async shutdown(deadlineMs = 5e3) {
+    const start = Date.now();
+    const runningJobs = [...this.jobs.values()].filter((j) => j.running && j.child);
+    if (runningJobs.length === 0) return;
+    for (const job of runningJobs) {
+      if (job.pid !== null) killProcessTree(job.pid, "SIGTERM");
+      else
+        try {
+          job.child?.kill("SIGTERM");
+        } catch {
+        }
+    }
+    const allClose = Promise.all(runningJobs.map((j) => j.readyPromise));
+    const elapsed = () => Date.now() - start;
+    const graceMs = Math.min(1500, Math.max(0, deadlineMs / 2));
+    await Promise.race([allClose, new Promise((res) => setTimeout(res, graceMs))]);
+    for (const job of runningJobs) {
+      if (!job.running) continue;
+      if (job.pid !== null) killProcessTree(job.pid, "SIGKILL");
+      else
+        try {
+          job.child?.kill("SIGKILL");
+        } catch {
+        }
+    }
+    const remaining = Math.max(800, deadlineMs - elapsed());
+    await Promise.race([allClose, new Promise((res) => setTimeout(res, remaining))]);
+  }
+  /** Count of still-running jobs — drives the TUI status-bar indicator. */
+  runningCount() {
+    let n = 0;
+    for (const job of this.jobs.values()) if (job.running) n++;
+    return n;
+  }
+};
+function snapshot(job) {
+  return {
+    id: job.id,
+    command: job.command,
+    pid: job.pid,
+    startedAt: job.startedAt,
+    exitCode: job.exitCode,
+    output: job.output,
+    totalBytesWritten: job.totalBytesWritten,
+    running: job.running,
+    spawnError: job.spawnError
+  };
+}
+// src/tools/shell.ts
 var DEFAULT_TIMEOUT_SEC = 60;
 var DEFAULT_MAX_OUTPUT_CHARS = 32e3;
 var BUILTIN_ALLOWLIST = [
@@ -4433,10 +5175,10 @@ async function runCommand(cmd, opts) {
   };
   const { bin, args, spawnOverrides } = prepareSpawn(argv);
   const effectiveSpawnOpts = { ...spawnOpts, ...spawnOverrides };
-  return await new Promise((resolve8, reject) => {
+  return await new Promise((resolve9, reject) => {
     let child;
     try {
-      child = spawn2(bin, args, effectiveSpawnOpts);
+      child = spawn3(bin, args, effectiveSpawnOpts);
     } catch (err) {
       reject(err);
       return;
@@ -4466,7 +5208,7 @@ async function runCommand(cmd, opts) {
       const output = buf.length > maxChars ? `${buf.slice(0, maxChars)}
 [\u2026 truncated ${buf.length - maxChars} chars \u2026]` : buf;
-      resolve8({ exitCode: code, output, timedOut });
+      resolve9({ exitCode: code, output, timedOut });
     });
   });
 }
@@ -4474,16 +5216,16 @@ function resolveExecutable(cmd, opts = {}) {
   const platform = opts.platform ?? process.platform;
   if (platform !== "win32") return cmd;
   if (!cmd) return cmd;
-  if (cmd.includes("/") || cmd.includes("\\") || pathMod2.isAbsolute(cmd)) return cmd;
-  if (pathMod2.extname(cmd)) return cmd;
+  if (cmd.includes("/") || cmd.includes("\\") || pathMod3.isAbsolute(cmd)) return cmd;
+  if (pathMod3.extname(cmd)) return cmd;
   const env = opts.env ?? process.env;
   const pathExt = (env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD").split(";").map((e) => e.trim()).filter(Boolean);
-  const delimiter2 = opts.pathDelimiter ?? (platform === "win32" ? ";" : pathMod2.delimiter);
+  const delimiter2 = opts.pathDelimiter ?? (platform === "win32" ? ";" : pathMod3.delimiter);
   const pathDirs = (env.PATH ?? "").split(delimiter2).filter(Boolean);
   const isFile = opts.isFile ?? defaultIsFile;
   for (const dir of pathDirs) {
     for (const ext of pathExt) {
-      const full = pathMod2.win32.join(dir, cmd + ext);
+      const full = pathMod3.win32.join(dir, cmd + ext);
       if (isFile(full)) return full;
     }
   }
@@ -4553,8 +5295,8 @@ function withUtf8Codepage(cmdline) {
 function isBareWindowsName(s) {
   if (!s) return false;
   if (s.includes("/") || s.includes("\\")) return false;
-  if (pathMod2.isAbsolute(s)) return false;
-  if (pathMod2.extname(s)) return false;
+  if (pathMod3.isAbsolute(s)) return false;
+  if (pathMod3.extname(s)) return false;
   return true;
 }
 function quoteForCmdExe(arg) {
@@ -4573,17 +5315,18 @@ var NeedsConfirmationError = class extends Error {
   }
 };
 function registerShellTools(registry, opts) {
-  const rootDir = pathMod2.resolve(opts.rootDir);
+  const rootDir = pathMod3.resolve(opts.rootDir);
   const timeoutSec = opts.timeoutSec ?? DEFAULT_TIMEOUT_SEC;
   const maxOutputChars = opts.maxOutputChars ?? DEFAULT_MAX_OUTPUT_CHARS;
+  const jobs = opts.jobs ?? new JobRegistry();
   const getExtraAllowed = typeof opts.extraAllowed === "function" ? opts.extraAllowed : (() => {
-    const snapshot = opts.extraAllowed ?? [];
-    return () => snapshot;
+    const snapshot2 = opts.extraAllowed ?? [];
+    return () => snapshot2;
   })();
   const allowAll = opts.allowAll ?? false;
   registry.register({
     name: "run_command",
-    description: "Run a shell command in the project root and return its combined stdout+stderr. Common read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
+    description: "Run a shell command in the project root and return its combined stdout+stderr.\n\nConstraints (read these before the first call):\n\u2022 ONE process per call, NO shell expansion. `&&`, `||`, `|`, `;`, `>`, `<`, `2>&1` are all rejected up-front \u2014 split into separate calls and combine results in reasoning. Example: instead of `grep foo *.ts | wc -l`, use `grep -c foo *.ts`; instead of `cd sub && npm test`, use `npm test --prefix sub` (or whatever --cwd flag the binary accepts).\n\u2022 `cd` DOES NOT PERSIST between calls \u2014 each call spawns a fresh process rooted at the project. If a tool needs a subdirectory, pass it via the tool's own flag (`npm --prefix`, `cargo -C`, `git -C`, `pytest tests/\u2026`), NOT via a preceding `cd`.\n\u2022 Avoid commands with unbounded output (`netstat -ano`, `find /`, etc.) \u2014 they waste tokens. Filter at source: `netstat -ano -p TCP`, `find src -name '*.ts'`, `grep -c`, `wc -l`.\n\nCommon read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
     // Plan-mode gate: allow allowlisted commands through (git status,
     // cargo check, ls, grep …) so the model can actually investigate
     // during planning. Anything that would otherwise trigger a
@@ -4624,8 +5367,126 @@ function registerShellTools(registry, opts) {
       return formatCommandResult(cmd, result);
     }
   });
+  registry.register({
+    name: "run_background",
+    description: "Spawn a long-running process (dev server, watcher, any command that doesn't naturally exit) and detach. Waits up to `waitSec` seconds for startup (or until the output matches a readiness signal like 'Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. The process keeps running; call `job_output` to tail its logs, `stop_job` to kill it, `list_jobs` to see all running jobs.\n\nSame shell constraints as run_command: NO `&&` / `||` / `|` / `;` / `>` / `<` / `2>&1`, `cd` doesn't persist. Dev servers that need a subdirectory: use the tool's own --prefix / --cwd flag. For Vite specifically, `--prefix` on npm only tells npm where package.json is; vite's server root still defaults to process cwd, so pass `vite <project-dir>` or configure via `vite.config.ts` root.\n\nUSE THIS \u2014 not `run_command` \u2014 for: npm/yarn/pnpm run dev, uvicorn / flask run, go run, cargo watch, tsc --watch, webpack serve, anything with 'dev' / 'serve' / 'watch' in the name.",
+    parameters: {
+      type: "object",
+      properties: {
+        command: {
+          type: "string",
+          description: "Full command line. Same quoting rules as run_command (no pipes / redirects / chaining)."
+        },
+        waitSec: {
+          type: "integer",
+          description: "Max seconds to wait for startup before returning. 0..30, default 3. A ready-signal match short-circuits this."
+        }
+      },
+      required: ["command"]
+    },
+    fn: async (args, ctx) => {
+      const cmd = args.command.trim();
+      if (!cmd) throw new Error("run_background: empty command");
+      if (!allowAll && !isAllowed(cmd, getExtraAllowed())) {
+        throw new NeedsConfirmationError(cmd);
+      }
+      const result = await jobs.start(cmd, {
+        cwd: rootDir,
+        waitSec: args.waitSec,
+        signal: ctx?.signal
+      });
+      return formatJobStart(result);
+    }
+  });
+  registry.register({
+    name: "job_output",
+    description: "Read the latest output of a background job started with `run_background`. By default returns the tail of the buffer (last 80 lines). Pass `since` (the `byteLength` from a previous call) to stream only new content incrementally. Tells you whether the job is still running, so you can stop polling when it's done.",
+    readOnly: true,
+    parameters: {
+      type: "object",
+      properties: {
+        jobId: { type: "integer", description: "Job id returned by run_background." },
+        since: {
+          type: "integer",
+          description: "Return only output written past this byte offset (for incremental polling)."
+        },
+        tailLines: {
+          type: "integer",
+          description: "Cap the returned slice to the last N lines. Default 80, 0 = unlimited."
+        }
+      },
+      required: ["jobId"]
+    },
+    fn: async (args) => {
+      const out = jobs.read(args.jobId, {
+        since: args.since,
+        tailLines: args.tailLines ?? 80
+      });
+      if (!out) return `job ${args.jobId}: not found (use list_jobs)`;
+      return formatJobRead(args.jobId, out);
+    }
+  });
+  registry.register({
+    name: "stop_job",
+    description: "Stop a background job started with `run_background`. SIGTERM first; SIGKILL after a short grace period if it doesn't exit cleanly. Returns the final output + exit code. Safe to call on an already-exited job.",
+    parameters: {
+      type: "object",
+      properties: {
+        jobId: { type: "integer" }
+      },
+      required: ["jobId"]
+    },
+    fn: async (args) => {
+      const rec = await jobs.stop(args.jobId);
+      if (!rec) return `job ${args.jobId}: not found`;
+      return formatJobStop(rec);
+    }
+  });
+  registry.register({
+    name: "list_jobs",
+    description: "List every background job started this session \u2014 running and exited \u2014 with id, command, pid, status. Use when you've lost track of which job_id corresponds to which process, or to see what's still alive.",
+    readOnly: true,
+    parameters: { type: "object", properties: {} },
+    fn: async () => {
+      const all = jobs.list();
+      if (all.length === 0) return "(no background jobs started this session)";
+      return all.map(formatJobRow).join("\n");
+    }
+  });
   return registry;
 }
+function formatJobStart(r) {
+  const header = r.stillRunning ? `[job ${r.jobId} started \xB7 pid ${r.pid ?? "?"} \xB7 ${r.readyMatched ? "READY signal matched" : "running (no ready signal yet)"}]` : r.exitCode !== null ? `[job ${r.jobId} exited during startup \xB7 exit ${r.exitCode}]` : `[job ${r.jobId} failed to start]`;
+  return r.preview ? `${header}
+${r.preview}` : header;
+}
+function formatJobRead(jobId, r) {
+  const status = r.running ? `running \xB7 pid ${r.pid ?? "?"}` : r.exitCode !== null ? `exited ${r.exitCode}` : r.spawnError ? `failed (${r.spawnError})` : "stopped";
+  const header = `[job ${jobId} \xB7 ${status} \xB7 byteLength=${r.byteLength}]
+$ ${r.command}`;
+  return r.output ? `${header}
+${r.output}` : header;
+}
+function formatJobStop(r) {
+  const running = r.running ? "still running (SIGKILL may be pending)" : `exit ${r.exitCode ?? "?"}`;
+  const tail = tailLines(r.output, 40);
+  const header = `[job ${r.id} stopped \xB7 ${running}]
+$ ${r.command}`;
+  return tail ? `${header}
+${tail}` : header;
+}
+function formatJobRow(r) {
+  const age = ((Date.now() - r.startedAt) / 1e3).toFixed(1);
+  const state = r.running ? `running   \xB7  pid ${r.pid ?? "?"}` : r.exitCode !== null ? `exit ${r.exitCode}` : r.spawnError ? "failed" : "stopped";
+  return `  ${String(r.id).padStart(3)}  ${state.padEnd(24)}  ${age}s ago   $ ${r.command}`;
+}
+function tailLines(s, n) {
+  if (!s) return "";
+  const lines = s.split("\n");
+  if (lines.length <= n) return s;
+  const dropped = lines.length - n;
+  return [`[\u2026 ${dropped} earlier lines \u2026]`, ...lines.slice(-n)].join("\n");
+}
 function formatCommandResult(cmd, r) {
   const header = r.timedOut ? `$ ${cmd}
 [killed after timeout]` : `$ ${cmd}
@@ -4819,11 +5680,11 @@ ${i + 1}. ${r.title}`);
 // src/env.ts
 import { readFileSync as readFileSync8 } from "fs";
-import { resolve as resolve6 } from "path";
+import { resolve as resolve7 } from "path";
 function loadDotenv(path = ".env") {
   let raw;
   try {
-    raw = readFileSync8(resolve6(process.cwd(), path), "utf8");
+    raw = readFileSync8(resolve7(process.cwd(), path), "utf8");
   } catch {
     return;
   }
@@ -5003,7 +5864,8 @@ function summarizeTurns(turns) {
     claudeEquivalentUsd: round2(totalClaude, 6),
     savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
     cacheHitRatio: round2(cacheHitRatio, 4),
-    lastPromptTokens: lastTurn?.usage.promptTokens ?? 0
+    lastPromptTokens: lastTurn?.usage.promptTokens ?? 0,
+    lastTurnCostUsd: round2(lastTurn?.cost ?? 0, 6)
   };
 }
 function round2(n, digits) {
@@ -5505,7 +6367,7 @@ var McpClient = class {
     const id = this.nextId++;
     const frame = { jsonrpc: "2.0", id, method, params };
     let abortHandler = null;
-    const promise = new Promise((resolve8, reject) => {
+    const promise = new Promise((resolve9, reject) => {
       const timeout = setTimeout(() => {
         this.pending.delete(id);
         if (abortHandler && signal) signal.removeEventListener("abort", abortHandler);
@@ -5514,7 +6376,7 @@ var McpClient = class {
         );
       }, this.requestTimeoutMs);
       this.pending.set(id, {
-        resolve: resolve8,
+        resolve: resolve9,
         reject,
         timeout
       });
@@ -5596,7 +6458,7 @@ var McpClient = class {
 };
 // src/mcp/stdio.ts
-import { spawn as spawn3 } from "child_process";
+import { spawn as spawn4 } from "child_process";
 var StdioTransport = class {
   child;
   queue = [];
@@ -5611,14 +6473,14 @@ var StdioTransport = class {
         opts.command,
         ...(opts.args ?? []).map((a) => quoteArg(a, process.platform === "win32"))
       ].join(" ");
-      this.child = spawn3(line, [], {
+      this.child = spawn4(line, [], {
         env,
         cwd: opts.cwd,
         stdio: ["pipe", "pipe", "inherit"],
         shell: true
       });
     } else {
-      this.child = spawn3(opts.command, opts.args ?? [], {
+      this.child = spawn4(opts.command, opts.args ?? [], {
         env,
         cwd: opts.cwd,
         stdio: ["pipe", "pipe", "inherit"]
@@ -5637,12 +6499,12 @@ var StdioTransport = class {
   }
   async send(message) {
     if (this.closed) throw new Error("MCP transport is closed");
-    return new Promise((resolve8, reject) => {
+    return new Promise((resolve9, reject) => {
       const line = `${JSON.stringify(message)}
 `;
       this.child.stdin.write(line, "utf8", (err) => {
         if (err) reject(err);
-        else resolve8();
+        else resolve9();
       });
     });
   }
@@ -5653,8 +6515,8 @@ var StdioTransport = class {
         continue;
       }
       if (this.closed) return;
-      const next = await new Promise((resolve8) => {
-        this.waiters.push(resolve8);
+      const next = await new Promise((resolve9) => {
+        this.waiters.push(resolve9);
       });
       if (next === null) return;
       yield next;
@@ -5720,8 +6582,8 @@ var SseTransport = class {
   constructor(opts) {
     this.url = opts.url;
     this.headers = opts.headers ?? {};
-    this.endpointReady = new Promise((resolve8, reject) => {
-      this.resolveEndpoint = resolve8;
+    this.endpointReady = new Promise((resolve9, reject) => {
+      this.resolveEndpoint = resolve9;
       this.rejectEndpoint = reject;
     });
     this.endpointReady.catch(() => void 0);
@@ -5748,8 +6610,8 @@ var SseTransport = class {
         continue;
       }
       if (this.closed) return;
-      const next = await new Promise((resolve8) => {
-        this.waiters.push(resolve8);
+      const next = await new Promise((resolve9) => {
+        this.waiters.push(resolve9);
       });
       if (next === null) return;
       yield next;
@@ -5949,7 +6811,7 @@ async function trySection(load) {
 // src/code/edit-blocks.ts
 import { existsSync as existsSync9, mkdirSync as mkdirSync3, readFileSync as readFileSync10, unlinkSync as unlinkSync3, writeFileSync as writeFileSync3 } from "fs";
-import { dirname as dirname4, resolve as resolve7 } from "path";
+import { dirname as dirname4, resolve as resolve8 } from "path";
 var BLOCK_RE = /^(\S[^\n]*)\n<{7} SEARCH\n([\s\S]*?)\n?={7}\n([\s\S]*?)\n?>{7} REPLACE/gm;
 function parseEditBlocks(text) {
   const out = [];
@@ -5967,8 +6829,8 @@ function parseEditBlocks(text) {
   return out;
 }
 function applyEditBlock(block, rootDir) {
-  const absRoot = resolve7(rootDir);
-  const absTarget = resolve7(absRoot, block.path);
+  const absRoot = resolve8(rootDir);
+  const absTarget = resolve8(absRoot, block.path);
   if (absTarget !== absRoot && !absTarget.startsWith(`${absRoot}${sep()}`)) {
     return {
       path: block.path,
@@ -6018,13 +6880,13 @@ function applyEditBlocks(blocks, rootDir) {
   return blocks.map((b) => applyEditBlock(b, rootDir));
 }
 function snapshotBeforeEdits(blocks, rootDir) {
-  const absRoot = resolve7(rootDir);
+  const absRoot = resolve8(rootDir);
   const seen = /* @__PURE__ */ new Set();
   const snapshots = [];
   for (const b of blocks) {
     if (seen.has(b.path)) continue;
     seen.add(b.path);
-    const abs = resolve7(absRoot, b.path);
+    const abs = resolve8(absRoot, b.path);
     if (!existsSync9(abs)) {
       snapshots.push({ path: b.path, prevContent: null });
       continue;
@@ -6038,9 +6900,9 @@ function snapshotBeforeEdits(blocks, rootDir) {
   return snapshots;
 }
 function restoreSnapshots(snapshots, rootDir) {
-  const absRoot = resolve7(rootDir);
+  const absRoot = resolve8(rootDir);
   return snapshots.map((snap) => {
-    const abs = resolve7(absRoot, snap.path);
+    const abs = resolve8(absRoot, snap.path);
     if (abs !== absRoot && !abs.startsWith(`${absRoot}${sep()}`)) {
       return {
         path: snap.path,
@@ -6075,7 +6937,7 @@ function sep() {
 // src/code/prompt.ts
 import { existsSync as existsSync10, readFileSync as readFileSync11 } from "fs";
 import { join as join9 } from "path";
-var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, list_directory, search_files, etc.) rooted at the user's working directory.
+var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.
 # Cite or shut up \u2014 non-negotiable
@@ -6116,15 +6978,17 @@ The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit con
 - You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.
-# Delegating to subagents via Skills (\u{1F9EC})
+# Delegating to subagents via Skills
+The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Entries tagged \`[\u{1F9EC} subagent]\` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.
-The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Skills marked with **\u{1F9EC}** spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so \u{1F9EC} skills are how you keep the main session lean.
+**When you call \`run_skill\`, the \`name\` is ONLY the identifier before the tag** \u2014 e.g. \`run_skill({ name: "explore", arguments: "..." })\`, NOT \`"[\u{1F9EC} subagent] explore"\` and NOT \`"explore [\u{1F9EC} subagent]"\`. The tag is display sugar; the name argument is just the bare identifier.
 Two built-ins ship by default:
-- **\u{1F9EC} explore** \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
-- **\u{1F9EC} research** \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
+- **explore** \`[\u{1F9EC} subagent]\` \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
+- **research** \`[\u{1F9EC} subagent]\` \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
-When to delegate (call \`run_skill\` with a \u{1F9EC} skill):
+When to delegate (call \`run_skill\` with a subagent skill):
 - The task would otherwise need >5 file reads or searches.
 - You only need the conclusion, not the exploration trail.
 - The work is self-contained (you can describe it in one paragraph).
@@ -6147,6 +7011,15 @@ In those cases, use tools to gather what you need, then reply in prose. No SEARC
 When you do propose edits, the user will review them and decide whether to \`/apply\` or \`/discard\`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.
+Reasonix runs an **edit gate**. The user's current mode (\`review\` or \`auto\`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.
+- In \`auto\` mode \`edit_file\` / \`write_file\` calls land on disk immediately with an undo window \u2014 you'll get the normal "edit blocks: 1/1 applied" style response.
+- In \`review\` mode EACH \`edit_file\` / \`write_file\` call pauses tool dispatch while the user decides. You'll get one of these responses:
+  - \`"edit blocks: 1/1 applied"\` \u2014 user approved it. Continue as normal.
+  - \`"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026"\` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.
+  - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same "don't retry on rejection" rule.
+- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.
 # Editing files
 When you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:
@@ -6187,11 +7060,49 @@ Two different rules depending on which tool:
 - **Filesystem tools** (\`read_file\`, \`list_directory\`, \`search_files\`, \`edit_file\`, etc.): paths are sandbox-relative. \`/\` means the project root, \`/src/foo.ts\` means \`<project>/src/foo.ts\`. Both relative (\`src/foo.ts\`) and POSIX-absolute (\`/src/foo.ts\`) forms work.
 - **\`run_command\`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading \`/\` in run_command arguments** \u2014 Windows treats \`/tests\` as drive-root \`F:\\tests\` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (\`tests\`, \`./tests\`, \`src/loop.ts\`) instead.
+# Foreground vs. background commands
+You have TWO tools for running shell commands, and picking the right one is non-negotiable:
+- \`run_command\` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.
+- \`run_background\` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with "dev" / "serve" / "watch" / "start" in the name**. Examples: \`npm run dev\`, \`pnpm dev\`, \`yarn start\`, \`vite\`, \`next dev\`, \`uvicorn app:app --reload\`, \`flask run\`, \`python -m http.server\`, \`cargo watch\`, \`tsc --watch\`, \`webpack serve\`.
+**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always \`run_background\`, then \`job_output\` to peek at the logs when you need to verify something.
+After \`run_background\`, tools available to you:
+- \`job_output(jobId, tailLines?)\` \u2014 read recent logs to verify startup / debug errors.
+- \`list_jobs\` \u2014 see every job this session (running + exited).
+- \`stop_job(jobId)\` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.
+Don't re-start an already-running dev server \u2014 call \`list_jobs\` first when in doubt.
+# Scope discipline on "run it" / "start it" requests
+When the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:
+1. Start it (\`run_background\` for dev servers, \`run_command\` for one-shots).
+2. Verify it came up (read a ready signal via \`job_output\`, or fetch the URL with \`web_fetch\` if they want you to confirm).
+3. Report what's running, where (URL / port / pid), and STOP.
+Do NOT, in the same turn:
+- Run \`tsc\` / type-checkers / linters unless the user asked for it.
+- Scan for bugs to "proactively" fix. The page rendering is success.
+- Clean up unused imports, dead code, or refactor "while you're here."
+- Edit files to improve anything the user didn't mention.
+If you notice an obvious issue, MENTION it in one sentence and wait for the user to say "fix it." The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading "fix the new error I just introduced" loops. The storm-breaker will cut you off, but the user still sees the mess.
+"It works" is the end state. Resist the urge to polish.
 # Style
 - Show edits; don't narrate them in prose. "Here's the fix:" is enough.
 - One short paragraph explaining *why*, then the blocks.
 - If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.
+${ESCALATION_CONTRACT}
+${TUI_FORMATTING_RULES}
 `;
 function codeSystemPrompt(rootDir) {
   const withMemory = applyMemoryStack(CODE_SYSTEM_PROMPT, rootDir);