npm - reasonix - Versions diffs - 0.4.9 → 0.4.13 - Mend

reasonix 0.4.9 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -120,6 +120,23 @@ interface StreamChunk {
     finishReason?: string;
     raw: any;
 }
+/**
+ * Response shape for DeepSeek's `/user/balance` endpoint. One entry
+ * per currency the account is funded in (typically CNY, sometimes
+ * USD). `total_balance` is the spendable figure; `granted_balance`
+ * counts promotional credits that expire, `topped_up_balance` is
+ * what the user paid for and keeps.
+ */
+interface BalanceInfo {
+    currency: string;
+    total_balance: string;
+    granted_balance?: string;
+    topped_up_balance?: string;
+}
+interface UserBalance {
+    is_available: boolean;
+    balance_infos: BalanceInfo[];
+}
 interface DeepSeekClientOptions {
     apiKey?: string;
     baseUrl?: string;
@@ -136,6 +153,15 @@ declare class DeepSeekClient {
     private readonly _fetch;
     constructor(opts?: DeepSeekClientOptions);
     private buildPayload;
+    /**
+     * Fetch the current DeepSeek account balance. Separate endpoint
+     * from chat completions, no billing impact. Returns null on any
+     * network/auth failure so callers can gate the balance display
+     * without a hard error — the rest of the session works regardless.
+     */
+    getBalance(opts?: {
+        signal?: AbortSignal;
+    }): Promise<UserBalance | null>;
     chat(opts: ChatRequestOptions): Promise<ChatResponse>;
     stream(opts: ChatRequestOptions): AsyncGenerator<StreamChunk>;
 }
@@ -172,7 +198,7 @@ interface HarvestOptions {
 }
 declare function emptyPlanState(): TypedPlanState;
 declare function isPlanStateEmpty(s: TypedPlanState | null | undefined): boolean;
-declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions): Promise<TypedPlanState>;
+declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions, signal?: AbortSignal): Promise<TypedPlanState>;
 /**
  * Self-consistency branching.
@@ -373,6 +399,10 @@ declare class ToolCallRepair {
 }
 declare function costUsd(model: string, usage: Usage): number;
+/** Input-side cost only (prompt, cache hit + miss). Used for the panel breakdown. */
+declare function inputCostUsd(model: string, usage: Usage): number;
+/** Output-side cost only (completion tokens). Used for the panel breakdown. */
+declare function outputCostUsd(model: string, usage: Usage): number;
 declare function claudeEquivalentCost(usage: Usage): number;
 interface TurnStats {
     turn: number;
@@ -384,7 +414,17 @@ interface TurnStats {
 interface SessionSummary {
     turns: number;
     totalCostUsd: number;
+    /**
+     * Input-side (prompt) cost aggregated across the session. Split
+     * from totalCostUsd so the panel can render "cost $X (in $Y · out
+     * $Z)" — users asked for visibility into where the spend lands.
+     */
+    totalInputCostUsd: number;
+    /** Output-side (completion) cost aggregated across the session. */
+    totalOutputCostUsd: number;
+    /** @deprecated Claude reference; kept for benchmarks + replay compat, no longer surfaced in the TUI. */
     claudeEquivalentUsd: number;
+    /** @deprecated. Same as claudeEquivalentUsd — synthetic ratio, not a real measurement. */
     savingsVsClaudePct: number;
     cacheHitRatio: number;
     /**
@@ -401,6 +441,8 @@ declare class SessionStats {
     get totalCost(): number;
     get totalClaudeEquivalent(): number;
     get savingsVsClaude(): number;
+    get totalInputCost(): number;
+    get totalOutputCost(): number;
     get aggregateCacheHitRatio(): number;
     summary(): SessionSummary;
 }
@@ -447,6 +489,13 @@ declare class ToolRegistry {
 }
 type EventRole = "assistant_delta" | "assistant_final"
+/**
+ * Emitted as `tool_calls[].function.arguments` streams in. A tool
+ * call with a large arguments payload produces no `content` or
+ * `reasoning_content` bytes — this is the only signal the UI has
+ * that the stream is alive during that window.
+ */
+ | "tool_call_delta"
 /**
  * Yielded immediately before a tool is dispatched. Lets the TUI put
  * up a "▸ tool<X> running…" spinner while the tool's Promise is
@@ -454,7 +503,16 @@ type EventRole = "assistant_delta" | "assistant_final"
  * takes more than a few hundred ms (a big `filesystem_edit_file`
  * is a typical trigger).
  */
- | "tool_start" | "tool" | "done" | "error" | "warning" | "branch_start" | "branch_progress" | "branch_done";
+ | "tool_start" | "tool" | "done" | "error" | "warning"
+/**
+ * Transient "what's happening right now" indicator. Emitted during
+ * silent phases — between a tool result and the next iteration's
+ * first streaming byte, and right before harvest — so the TUI can
+ * show a spinner with explanatory text instead of looking frozen.
+ * The UI clears it on the next primary event (assistant_delta,
+ * tool_start, tool, assistant_final, error).
+ */
+ | "status" | "branch_start" | "branch_progress" | "branch_done";
 interface BranchSummary {
     budget: number;
     chosenIndex: number;
@@ -480,6 +538,8 @@ interface LoopEvent {
      * what it returned. Needed by `reasonix diff` to explain divergences.
      */
     toolArgs?: string;
+    /** Cumulative arguments-string length for `role === "tool_call_delta"`. */
+    toolCallArgsChars?: number;
     stats?: TurnStats;
     planState?: TypedPlanState;
     repair?: RepairReport;
@@ -584,6 +644,20 @@ declare class CacheFirstLoop {
         charsSaved: number;
     };
     private appendAndPersist;
+    /**
+     * Start a fresh conversation WITHOUT exiting. Drops every message
+     * in the in-memory log AND rewrites the session file to empty so
+     * a resume won't re-hydrate the old turns. Unlike `/forget`, which
+     * deletes the session entirely, this keeps the session name and
+     * config intact — it's the "new chat" button.
+     *
+     * The immutable prefix (system prompt + tool specs) is preserved
+     * — that's the cache-first invariant, not part of the conversation.
+     * Returns the number of messages dropped so the UI can show it.
+     */
+    clearLog(): {
+        dropped: number;
+    };
     /**
      * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
      * scratch, and stats are preserved — only the per-turn behavior changes.
@@ -629,12 +703,6 @@ declare class CacheFirstLoop {
  * Exported so tests can exercise it against concrete R1 outputs.
  */
 declare function stripHallucinatedToolMarkup(s: string): string;
-/**
- * Truncate any tool-role message whose content exceeds the cap. User
- * and assistant messages are left alone because (a) they're almost
- * always small, (b) truncating user prompts would corrupt conversational
- * intent in a way the user didn't author. Exported for tests.
- */
 declare function healLoadedMessages(messages: ChatMessage[], maxChars: number): {
     messages: ChatMessage[];
     healedCount: number;
@@ -1692,4 +1760,4 @@ declare function redactKey(key: string): string;
 declare const VERSION = "0.4.3";
-export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, harvest, healLoadedMessages, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, parseEditBlocks, parseMcpSpec, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, writeConfig, writeMeta, writeRecord };
+export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, harvest, healLoadedMessages, inputCostUsd, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, outputCostUsd, parseEditBlocks, parseMcpSpec, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, writeConfig, writeMeta, writeRecord };

package/dist/index.js CHANGED Viewed

@@ -133,6 +133,27 @@ var DeepSeekClient = class {
     if (opts.responseFormat) payload.response_format = opts.responseFormat;
     return payload;
   }
+  /**
+   * Fetch the current DeepSeek account balance. Separate endpoint
+   * from chat completions, no billing impact. Returns null on any
+   * network/auth failure so callers can gate the balance display
+   * without a hard error — the rest of the session works regardless.
+   */
+  async getBalance(opts = {}) {
+    try {
+      const resp = await this._fetch(`${this.baseUrl}/user/balance`, {
+        method: "GET",
+        headers: { Authorization: `Bearer ${this.apiKey}` },
+        signal: opts.signal
+      });
+      if (!resp.ok) return null;
+      const data = await resp.json();
+      if (!data || !Array.isArray(data.balance_infos)) return null;
+      return data;
+    } catch {
+      return null;
+    }
+  }
   async chat(opts) {
     const ctrl = new AbortController();
     const timer = setTimeout(() => ctrl.abort(), this.timeoutMs);
@@ -279,8 +300,9 @@ Constraints:
 - Each item is plain text, at most {maxItemLen} characters, no markdown.
 - Write in the same language as the trace (Chinese in \u2192 Chinese out, etc.).
 - Do not quote back the trace; write short, specific phrases.`;
-async function harvest(reasoningContent, client, options = {}) {
+async function harvest(reasoningContent, client, options = {}, signal) {
   if (!client || !reasoningContent) return emptyPlanState();
+  if (signal?.aborted) return emptyPlanState();
   const minLen = options.minReasoningLen ?? 40;
   const trimmed = reasoningContent.trim();
   if (trimmed.length < minLen) return emptyPlanState();
@@ -300,7 +322,8 @@ async function harvest(reasoningContent, client, options = {}) {
       ],
       responseFormat: { type: "json_object" },
       temperature: 0,
-      maxTokens: 600
+      maxTokens: 600,
+      signal
     });
     return parsePlanState(resp.content, maxItems, maxItemLen);
   } catch {
@@ -1089,6 +1112,16 @@ function costUsd(model, usage) {
   if (!p) return 0;
   return (usage.promptCacheHitTokens * p.inputCacheHit + usage.promptCacheMissTokens * p.inputCacheMiss + usage.completionTokens * p.output) / 1e6;
 }
+function inputCostUsd(model, usage) {
+  const p = DEEPSEEK_PRICING[model];
+  if (!p) return 0;
+  return (usage.promptCacheHitTokens * p.inputCacheHit + usage.promptCacheMissTokens * p.inputCacheMiss) / 1e6;
+}
+function outputCostUsd(model, usage) {
+  const p = DEEPSEEK_PRICING[model];
+  if (!p) return 0;
+  return usage.completionTokens * p.output / 1e6;
+}
 function claudeEquivalentCost(usage) {
   return (usage.promptTokens * CLAUDE_SONNET_PRICING.input + usage.completionTokens * CLAUDE_SONNET_PRICING.output) / 1e6;
 }
@@ -1116,6 +1149,12 @@ var SessionStats = class {
     const c = this.totalClaudeEquivalent;
     return c > 0 ? 1 - this.totalCost / c : 0;
   }
+  get totalInputCost() {
+    return this.turns.reduce((sum, t) => sum + inputCostUsd(t.model, t.usage), 0);
+  }
+  get totalOutputCost() {
+    return this.turns.reduce((sum, t) => sum + outputCostUsd(t.model, t.usage), 0);
+  }
   get aggregateCacheHitRatio() {
     let hit = 0;
     let miss = 0;
@@ -1131,6 +1170,8 @@ var SessionStats = class {
     return {
       turns: this.turns.length,
       totalCostUsd: round(this.totalCost, 6),
+      totalInputCostUsd: round(this.totalInputCost, 6),
+      totalOutputCostUsd: round(this.totalOutputCost, 6),
       claudeEquivalentUsd: round(this.totalClaudeEquivalent, 6),
       savingsVsClaudePct: round(this.savingsVsClaude * 100, 2),
       cacheHitRatio: round(this.aggregateCacheHitRatio, 4),
@@ -1205,8 +1246,12 @@ var CacheFirstLoop = class {
       for (const msg of messages) this.log.append(msg);
       this.resumedMessageCount = messages.length;
       if (healedCount > 0) {
+        try {
+          rewriteSession(this.sessionName, messages);
+        } catch {
+        }
         process.stderr.write(
-          `\u25B8 session "${this.sessionName}": healed ${healedCount} oversized tool result(s) (was ${healedFrom.toLocaleString()} chars total). Old payloads were truncated to fit DeepSeek's context window; the conversation is preserved.
+          `\u25B8 session "${this.sessionName}": healed ${healedCount} entr${healedCount === 1 ? "y" : "ies"}${healedFrom > 0 ? ` (was ${healedFrom.toLocaleString()} chars oversized)` : " (dropped dangling tool_calls tail)"}. Rewrote session file.
 `
         );
       }
@@ -1227,7 +1272,7 @@ var CacheFirstLoop = class {
    */
   compact(tightCapChars = 4e3) {
     const before = this.log.toMessages();
-    const { messages, healedCount, healedFrom } = healLoadedMessages(before, tightCapChars);
+    const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
     const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
     const charsSaved = healedFrom - afterBytes;
     if (healedCount > 0) {
@@ -1250,6 +1295,29 @@ var CacheFirstLoop = class {
       }
     }
   }
+  /**
+   * Start a fresh conversation WITHOUT exiting. Drops every message
+   * in the in-memory log AND rewrites the session file to empty so
+   * a resume won't re-hydrate the old turns. Unlike `/forget`, which
+   * deletes the session entirely, this keeps the session name and
+   * config intact — it's the "new chat" button.
+   *
+   * The immutable prefix (system prompt + tool specs) is preserved
+   * — that's the cache-first invariant, not part of the conversation.
+   * Returns the number of messages dropped so the UI can show it.
+   */
+  clearLog() {
+    const dropped = this.log.length;
+    this.log.compactInPlace([]);
+    if (this.sessionName) {
+      try {
+        rewriteSession(this.sessionName, []);
+      } catch {
+      }
+    }
+    this.scratch.reset();
+    return { dropped };
+  }
   /**
    * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
    * scratch, and stats are preserved — only the per-turn behavior changes.
@@ -1281,7 +1349,8 @@ var CacheFirstLoop = class {
     this.stream = this.branchEnabled ? false : this._streamPreference;
   }
   buildMessages(pendingUser) {
-    const msgs = [...this.prefix.toMessages(), ...this.log.toMessages()];
+    const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
+    const msgs = [...this.prefix.toMessages(), ...healed.messages];
     if (pendingUser !== null) msgs.push({ role: "user", content: pendingUser });
     return msgs;
   }
@@ -1356,6 +1425,13 @@ var CacheFirstLoop = class {
         yield { turn: this._turn, role: "done", content: stoppedMsg };
         return;
       }
+      if (iter > 0) {
+        yield {
+          turn: this._turn,
+          role: "status",
+          content: "tool result uploaded \xB7 model thinking before next response\u2026"
+        };
+      }
       if (!warnedForIterBudget && iter >= warnAt) {
         warnedForIterBudget = true;
         yield {
@@ -1485,6 +1561,15 @@ var CacheFirstLoop = class {
               if (d.argumentsDelta)
                 cur.function.arguments = (cur.function.arguments ?? "") + d.argumentsDelta;
               callBuf.set(d.index, cur);
+              if (cur.function.name) {
+                yield {
+                  turn: this._turn,
+                  role: "tool_call_delta",
+                  content: "",
+                  toolName: cur.function.name,
+                  toolCallArgsChars: (cur.function.arguments ?? "").length
+                };
+              }
             }
             if (chunk.usage) usage = chunk.usage;
           }
@@ -1516,7 +1601,14 @@ var CacheFirstLoop = class {
         pendingUser = null;
       }
       this.scratch.reasoning = reasoningContent || null;
-      const planState = preHarvestedPlanState ? preHarvestedPlanState : this.harvestEnabled ? await harvest(reasoningContent || null, this.client, this.harvestOptions) : emptyPlanState();
+      if (!preHarvestedPlanState && this.harvestEnabled && (reasoningContent?.trim().length ?? 0) >= 40) {
+        yield {
+          turn: this._turn,
+          role: "status",
+          content: "extracting plan state from reasoning\u2026"
+        };
+      }
+      const planState = preHarvestedPlanState ? preHarvestedPlanState : this.harvestEnabled ? await harvest(reasoningContent || null, this.client, this.harvestOptions, signal) : emptyPlanState();
       const { calls: repairedCalls, report } = this.repair.process(
         toolCalls,
         reasoningContent || null,
@@ -1538,15 +1630,38 @@ var CacheFirstLoop = class {
       }
       const ctxMax = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
       if (usage && usage.promptTokens / ctxMax > 0.8) {
-        yield {
-          turn: this._turn,
-          role: "warning",
-          content: `context ${usage.promptTokens}/${ctxMax} (${Math.round(
-            usage.promptTokens / ctxMax * 100
-          )}%) \u2014 more tools would overflow. Forcing summary from what was gathered.`
-        };
-        yield* this.forceSummaryAfterIterLimit({ reason: "context-guard" });
-        return;
+        const before = usage.promptTokens;
+        const compactResult = this.compact(4e3);
+        if (compactResult.healedCount > 0) {
+          const approxSaved = Math.round(compactResult.charsSaved / 4);
+          const after = before - approxSaved;
+          yield {
+            turn: this._turn,
+            role: "warning",
+            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
+          };
+        } else {
+          yield {
+            turn: this._turn,
+            role: "warning",
+            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
+              before / ctxMax * 100
+            )}%) \u2014 nothing to auto-compact. Forcing summary from what was gathered.`
+          };
+          const tail = this.log.entries[this.log.entries.length - 1];
+          if (tail && tail.role === "assistant" && Array.isArray(tail.tool_calls) && tail.tool_calls.length > 0) {
+            const kept = this.log.entries.slice(0, -1);
+            this.log.compactInPlace([...kept]);
+            if (this.sessionName) {
+              try {
+                rewriteSession(this.sessionName, kept);
+              } catch {
+              }
+            }
+          }
+          yield* this.forceSummaryAfterIterLimit({ reason: "context-guard" });
+          return;
+        }
       }
       for (const call of repairedCalls) {
         const name = call.function?.name ?? "";
@@ -1578,6 +1693,11 @@ var CacheFirstLoop = class {
   }
   async *forceSummaryAfterIterLimit(opts = { reason: "budget" }) {
     try {
+      yield {
+        turn: this._turn,
+        role: "status",
+        content: "summarizing what was gathered\u2026"
+      };
       const messages = this.buildMessages(null);
       messages.push({
         role: "user",
@@ -1660,7 +1780,7 @@ function summarizeBranch(chosen, samples) {
     temperatures: samples.map((s) => s.temperature)
   };
 }
-function healLoadedMessages(messages, maxChars) {
+function shrinkOversizedToolResults(messages, maxChars) {
   let healedCount = 0;
   let healedFrom = 0;
   const out = messages.map((msg) => {
@@ -1673,6 +1793,51 @@ function healLoadedMessages(messages, maxChars) {
   });
   return { messages: out, healedCount, healedFrom };
 }
+function healLoadedMessages(messages, maxChars) {
+  const shrunk = shrinkOversizedToolResults(messages, maxChars);
+  let healedCount = shrunk.healedCount;
+  const out = [];
+  const openCallIds = /* @__PURE__ */ new Set();
+  let droppedAssistantCalls = 0;
+  let droppedStrayTools = 0;
+  for (let i = 0; i < shrunk.messages.length; i++) {
+    const msg = shrunk.messages[i];
+    if (msg.role === "assistant" && Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) {
+      const needed = /* @__PURE__ */ new Set();
+      for (const call of msg.tool_calls) {
+        if (call?.id) needed.add(call.id);
+      }
+      const candidates = [];
+      let j = i + 1;
+      while (j < shrunk.messages.length && needed.size > 0) {
+        const nxt = shrunk.messages[j];
+        if (nxt.role !== "tool") break;
+        const id = nxt.tool_call_id ?? "";
+        if (!needed.has(id)) break;
+        needed.delete(id);
+        candidates.push(nxt);
+        j++;
+      }
+      if (needed.size === 0) {
+        out.push(msg);
+        for (const r of candidates) out.push(r);
+        i = j - 1;
+      } else {
+        droppedAssistantCalls += 1;
+        droppedStrayTools += candidates.length;
+        i = j - 1;
+      }
+      continue;
+    }
+    if (msg.role === "tool") {
+      droppedStrayTools += 1;
+      continue;
+    }
+    out.push(msg);
+  }
+  healedCount += droppedAssistantCalls + droppedStrayTools;
+  return { messages: out, healedCount, healedFrom: shrunk.healedFrom };
+}
 function formatLoopError(err) {
   const msg = err.message ?? "";
   if (msg.includes("maximum context length")) {
@@ -1929,7 +2094,12 @@ function registerFilesystemTools(registry, opts) {
       }
       const after = before.slice(0, firstIdx) + args.replace + before.slice(firstIdx + args.search.length);
       await fs.writeFile(abs, after, "utf8");
-      return `edited ${pathMod.relative(rootDir, abs)} (${args.search.length}\u2192${args.replace.length} chars)`;
+      const rel = pathMod.relative(rootDir, abs);
+      const header = `edited ${rel} (${args.search.length}\u2192${args.replace.length} chars)`;
+      const startLine = before.slice(0, firstIdx).split(/\r?\n/).length;
+      const diff = renderEditDiff(args.search, args.replace, startLine);
+      return `${header}
+${diff}`;
     }
   });
   registry.register({
@@ -1967,6 +2137,51 @@ function registerFilesystemTools(registry, opts) {
   });
   return registry;
 }
+function renderEditDiff(search, replace, startLine) {
+  const a = search.split(/\r?\n/);
+  const b = replace.split(/\r?\n/);
+  const diff = lineDiff(a, b);
+  const hunk = `@@ -${startLine},${a.length} +${startLine},${b.length} @@`;
+  const body = diff.map((d) => `${d.op === " " ? " " : d.op} ${d.line}`).join("\n");
+  return `${hunk}
+${body}`;
+}
+function lineDiff(a, b) {
+  const n = a.length;
+  const m = b.length;
+  const dp = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0));
+  for (let i2 = 1; i2 <= n; i2++) {
+    for (let j2 = 1; j2 <= m; j2++) {
+      if (a[i2 - 1] === b[j2 - 1]) dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
+      else dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
+    }
+  }
+  const out = [];
+  let i = n;
+  let j = m;
+  while (i > 0 && j > 0) {
+    if (a[i - 1] === b[j - 1]) {
+      out.unshift({ op: " ", line: a[i - 1] });
+      i--;
+      j--;
+    } else if ((dp[i - 1][j] ?? 0) > (dp[i][j - 1] ?? 0)) {
+      out.unshift({ op: "-", line: a[i - 1] });
+      i--;
+    } else {
+      out.unshift({ op: "+", line: b[j - 1] });
+      j--;
+    }
+  }
+  while (i > 0) {
+    out.unshift({ op: "-", line: a[i - 1] });
+    i--;
+  }
+  while (j > 0) {
+    out.unshift({ op: "+", line: b[j - 1] });
+    j--;
+  }
+  return out;
+}
 // src/env.ts
 import { readFileSync as readFileSync2 } from "fs";
@@ -2134,6 +2349,8 @@ function computeReplayStats(records) {
 }
 function summarizeTurns(turns) {
   const totalCost = turns.reduce((s, t) => s + t.cost, 0);
+  const totalInput = turns.reduce((s, t) => s + inputCostUsd(t.model, t.usage), 0);
+  const totalOutput = turns.reduce((s, t) => s + outputCostUsd(t.model, t.usage), 0);
   const totalClaude = turns.reduce((s, t) => s + claudeEquivalentCost(t.usage), 0);
   let hit = 0;
   let miss = 0;
@@ -2147,6 +2364,8 @@ function summarizeTurns(turns) {
   return {
     turns: turns.length,
     totalCostUsd: round2(totalCost, 6),
+    totalInputCostUsd: round2(totalInput, 6),
+    totalOutputCostUsd: round2(totalOutput, 6),
     claudeEquivalentUsd: round2(totalClaude, 6),
     savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
     cacheHitRatio: round2(cacheHitRatio, 4),
@@ -3377,6 +3596,7 @@ export {
   formatLoopError,
   harvest,
   healLoadedMessages,
+  inputCostUsd,
   inspectMcpServer,
   isJsonRpcError,
   isPlanStateEmpty,
@@ -3387,6 +3607,7 @@ export {
   loadSessionMessages,
   nestArguments,
   openTranscriptFile,
+  outputCostUsd,
   parseEditBlocks,
   parseMcpSpec,
   parseTranscript,