reasonix 0.4.9 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -120,6 +120,23 @@ interface StreamChunk {
120
120
  finishReason?: string;
121
121
  raw: any;
122
122
  }
123
+ /**
124
+ * Response shape for DeepSeek's `/user/balance` endpoint. One entry
125
+ * per currency the account is funded in (typically CNY, sometimes
126
+ * USD). `total_balance` is the spendable figure; `granted_balance`
127
+ * counts promotional credits that expire, `topped_up_balance` is
128
+ * what the user paid for and keeps.
129
+ */
130
+ interface BalanceInfo {
131
+ currency: string;
132
+ total_balance: string;
133
+ granted_balance?: string;
134
+ topped_up_balance?: string;
135
+ }
136
+ interface UserBalance {
137
+ is_available: boolean;
138
+ balance_infos: BalanceInfo[];
139
+ }
123
140
  interface DeepSeekClientOptions {
124
141
  apiKey?: string;
125
142
  baseUrl?: string;
@@ -136,6 +153,15 @@ declare class DeepSeekClient {
136
153
  private readonly _fetch;
137
154
  constructor(opts?: DeepSeekClientOptions);
138
155
  private buildPayload;
156
+ /**
157
+ * Fetch the current DeepSeek account balance. Separate endpoint
158
+ * from chat completions, no billing impact. Returns null on any
159
+ * network/auth failure so callers can gate the balance display
160
+ * without a hard error — the rest of the session works regardless.
161
+ */
162
+ getBalance(opts?: {
163
+ signal?: AbortSignal;
164
+ }): Promise<UserBalance | null>;
139
165
  chat(opts: ChatRequestOptions): Promise<ChatResponse>;
140
166
  stream(opts: ChatRequestOptions): AsyncGenerator<StreamChunk>;
141
167
  }
@@ -172,7 +198,7 @@ interface HarvestOptions {
172
198
  }
173
199
  declare function emptyPlanState(): TypedPlanState;
174
200
  declare function isPlanStateEmpty(s: TypedPlanState | null | undefined): boolean;
175
- declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions): Promise<TypedPlanState>;
201
+ declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions, signal?: AbortSignal): Promise<TypedPlanState>;
176
202
 
177
203
  /**
178
204
  * Self-consistency branching.
@@ -373,6 +399,10 @@ declare class ToolCallRepair {
373
399
  }
374
400
 
375
401
  declare function costUsd(model: string, usage: Usage): number;
402
+ /** Input-side cost only (prompt, cache hit + miss). Used for the panel breakdown. */
403
+ declare function inputCostUsd(model: string, usage: Usage): number;
404
+ /** Output-side cost only (completion tokens). Used for the panel breakdown. */
405
+ declare function outputCostUsd(model: string, usage: Usage): number;
376
406
  declare function claudeEquivalentCost(usage: Usage): number;
377
407
  interface TurnStats {
378
408
  turn: number;
@@ -384,7 +414,17 @@ interface TurnStats {
384
414
  interface SessionSummary {
385
415
  turns: number;
386
416
  totalCostUsd: number;
417
+ /**
418
+ * Input-side (prompt) cost aggregated across the session. Split
419
+ * from totalCostUsd so the panel can render "cost $X (in $Y · out
420
+ * $Z)" — users asked for visibility into where the spend lands.
421
+ */
422
+ totalInputCostUsd: number;
423
+ /** Output-side (completion) cost aggregated across the session. */
424
+ totalOutputCostUsd: number;
425
+ /** @deprecated Claude reference; kept for benchmarks + replay compat, no longer surfaced in the TUI. */
387
426
  claudeEquivalentUsd: number;
427
+ /** @deprecated. Same as claudeEquivalentUsd — synthetic ratio, not a real measurement. */
388
428
  savingsVsClaudePct: number;
389
429
  cacheHitRatio: number;
390
430
  /**
@@ -401,6 +441,8 @@ declare class SessionStats {
401
441
  get totalCost(): number;
402
442
  get totalClaudeEquivalent(): number;
403
443
  get savingsVsClaude(): number;
444
+ get totalInputCost(): number;
445
+ get totalOutputCost(): number;
404
446
  get aggregateCacheHitRatio(): number;
405
447
  summary(): SessionSummary;
406
448
  }
@@ -454,7 +496,16 @@ type EventRole = "assistant_delta" | "assistant_final"
454
496
  * takes more than a few hundred ms (a big `filesystem_edit_file`
455
497
  * is a typical trigger).
456
498
  */
457
- | "tool_start" | "tool" | "done" | "error" | "warning" | "branch_start" | "branch_progress" | "branch_done";
499
+ | "tool_start" | "tool" | "done" | "error" | "warning"
500
+ /**
501
+ * Transient "what's happening right now" indicator. Emitted during
502
+ * silent phases — between a tool result and the next iteration's
503
+ * first streaming byte, and right before harvest — so the TUI can
504
+ * show a spinner with explanatory text instead of looking frozen.
505
+ * The UI clears it on the next primary event (assistant_delta,
506
+ * tool_start, tool, assistant_final, error).
507
+ */
508
+ | "status" | "branch_start" | "branch_progress" | "branch_done";
458
509
  interface BranchSummary {
459
510
  budget: number;
460
511
  chosenIndex: number;
@@ -584,6 +635,20 @@ declare class CacheFirstLoop {
584
635
  charsSaved: number;
585
636
  };
586
637
  private appendAndPersist;
638
+ /**
639
+ * Start a fresh conversation WITHOUT exiting. Drops every message
640
+ * in the in-memory log AND rewrites the session file to empty so
641
+ * a resume won't re-hydrate the old turns. Unlike `/forget`, which
642
+ * deletes the session entirely, this keeps the session name and
643
+ * config intact — it's the "new chat" button.
644
+ *
645
+ * The immutable prefix (system prompt + tool specs) is preserved
646
+ * — that's the cache-first invariant, not part of the conversation.
647
+ * Returns the number of messages dropped so the UI can show it.
648
+ */
649
+ clearLog(): {
650
+ dropped: number;
651
+ };
587
652
  /**
588
653
  * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
589
654
  * scratch, and stats are preserved — only the per-turn behavior changes.
@@ -629,12 +694,6 @@ declare class CacheFirstLoop {
629
694
  * Exported so tests can exercise it against concrete R1 outputs.
630
695
  */
631
696
  declare function stripHallucinatedToolMarkup(s: string): string;
632
- /**
633
- * Truncate any tool-role message whose content exceeds the cap. User
634
- * and assistant messages are left alone because (a) they're almost
635
- * always small, (b) truncating user prompts would corrupt conversational
636
- * intent in a way the user didn't author. Exported for tests.
637
- */
638
697
  declare function healLoadedMessages(messages: ChatMessage[], maxChars: number): {
639
698
  messages: ChatMessage[];
640
699
  healedCount: number;
@@ -1692,4 +1751,4 @@ declare function redactKey(key: string): string;
1692
1751
 
1693
1752
  declare const VERSION = "0.4.3";
1694
1753
 
1695
- export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, harvest, healLoadedMessages, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, parseEditBlocks, parseMcpSpec, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, writeConfig, writeMeta, writeRecord };
1754
+ export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, harvest, healLoadedMessages, inputCostUsd, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, outputCostUsd, parseEditBlocks, parseMcpSpec, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, writeConfig, writeMeta, writeRecord };
package/dist/index.js CHANGED
@@ -133,6 +133,27 @@ var DeepSeekClient = class {
133
133
  if (opts.responseFormat) payload.response_format = opts.responseFormat;
134
134
  return payload;
135
135
  }
136
+ /**
137
+ * Fetch the current DeepSeek account balance. Separate endpoint
138
+ * from chat completions, no billing impact. Returns null on any
139
+ * network/auth failure so callers can gate the balance display
140
+ * without a hard error — the rest of the session works regardless.
141
+ */
142
+ async getBalance(opts = {}) {
143
+ try {
144
+ const resp = await this._fetch(`${this.baseUrl}/user/balance`, {
145
+ method: "GET",
146
+ headers: { Authorization: `Bearer ${this.apiKey}` },
147
+ signal: opts.signal
148
+ });
149
+ if (!resp.ok) return null;
150
+ const data = await resp.json();
151
+ if (!data || !Array.isArray(data.balance_infos)) return null;
152
+ return data;
153
+ } catch {
154
+ return null;
155
+ }
156
+ }
136
157
  async chat(opts) {
137
158
  const ctrl = new AbortController();
138
159
  const timer = setTimeout(() => ctrl.abort(), this.timeoutMs);
@@ -279,8 +300,9 @@ Constraints:
279
300
  - Each item is plain text, at most {maxItemLen} characters, no markdown.
280
301
  - Write in the same language as the trace (Chinese in \u2192 Chinese out, etc.).
281
302
  - Do not quote back the trace; write short, specific phrases.`;
282
- async function harvest(reasoningContent, client, options = {}) {
303
+ async function harvest(reasoningContent, client, options = {}, signal) {
283
304
  if (!client || !reasoningContent) return emptyPlanState();
305
+ if (signal?.aborted) return emptyPlanState();
284
306
  const minLen = options.minReasoningLen ?? 40;
285
307
  const trimmed = reasoningContent.trim();
286
308
  if (trimmed.length < minLen) return emptyPlanState();
@@ -300,7 +322,8 @@ async function harvest(reasoningContent, client, options = {}) {
300
322
  ],
301
323
  responseFormat: { type: "json_object" },
302
324
  temperature: 0,
303
- maxTokens: 600
325
+ maxTokens: 600,
326
+ signal
304
327
  });
305
328
  return parsePlanState(resp.content, maxItems, maxItemLen);
306
329
  } catch {
@@ -1089,6 +1112,16 @@ function costUsd(model, usage) {
1089
1112
  if (!p) return 0;
1090
1113
  return (usage.promptCacheHitTokens * p.inputCacheHit + usage.promptCacheMissTokens * p.inputCacheMiss + usage.completionTokens * p.output) / 1e6;
1091
1114
  }
1115
+ function inputCostUsd(model, usage) {
1116
+ const p = DEEPSEEK_PRICING[model];
1117
+ if (!p) return 0;
1118
+ return (usage.promptCacheHitTokens * p.inputCacheHit + usage.promptCacheMissTokens * p.inputCacheMiss) / 1e6;
1119
+ }
1120
+ function outputCostUsd(model, usage) {
1121
+ const p = DEEPSEEK_PRICING[model];
1122
+ if (!p) return 0;
1123
+ return usage.completionTokens * p.output / 1e6;
1124
+ }
1092
1125
  function claudeEquivalentCost(usage) {
1093
1126
  return (usage.promptTokens * CLAUDE_SONNET_PRICING.input + usage.completionTokens * CLAUDE_SONNET_PRICING.output) / 1e6;
1094
1127
  }
@@ -1116,6 +1149,12 @@ var SessionStats = class {
1116
1149
  const c = this.totalClaudeEquivalent;
1117
1150
  return c > 0 ? 1 - this.totalCost / c : 0;
1118
1151
  }
1152
+ get totalInputCost() {
1153
+ return this.turns.reduce((sum, t) => sum + inputCostUsd(t.model, t.usage), 0);
1154
+ }
1155
+ get totalOutputCost() {
1156
+ return this.turns.reduce((sum, t) => sum + outputCostUsd(t.model, t.usage), 0);
1157
+ }
1119
1158
  get aggregateCacheHitRatio() {
1120
1159
  let hit = 0;
1121
1160
  let miss = 0;
@@ -1131,6 +1170,8 @@ var SessionStats = class {
1131
1170
  return {
1132
1171
  turns: this.turns.length,
1133
1172
  totalCostUsd: round(this.totalCost, 6),
1173
+ totalInputCostUsd: round(this.totalInputCost, 6),
1174
+ totalOutputCostUsd: round(this.totalOutputCost, 6),
1134
1175
  claudeEquivalentUsd: round(this.totalClaudeEquivalent, 6),
1135
1176
  savingsVsClaudePct: round(this.savingsVsClaude * 100, 2),
1136
1177
  cacheHitRatio: round(this.aggregateCacheHitRatio, 4),
@@ -1205,8 +1246,12 @@ var CacheFirstLoop = class {
1205
1246
  for (const msg of messages) this.log.append(msg);
1206
1247
  this.resumedMessageCount = messages.length;
1207
1248
  if (healedCount > 0) {
1249
+ try {
1250
+ rewriteSession(this.sessionName, messages);
1251
+ } catch {
1252
+ }
1208
1253
  process.stderr.write(
1209
- `\u25B8 session "${this.sessionName}": healed ${healedCount} oversized tool result(s) (was ${healedFrom.toLocaleString()} chars total). Old payloads were truncated to fit DeepSeek's context window; the conversation is preserved.
1254
+ `\u25B8 session "${this.sessionName}": healed ${healedCount} entr${healedCount === 1 ? "y" : "ies"}${healedFrom > 0 ? ` (was ${healedFrom.toLocaleString()} chars oversized)` : " (dropped dangling tool_calls tail)"}. Rewrote session file.
1210
1255
  `
1211
1256
  );
1212
1257
  }
@@ -1227,7 +1272,7 @@ var CacheFirstLoop = class {
1227
1272
  */
1228
1273
  compact(tightCapChars = 4e3) {
1229
1274
  const before = this.log.toMessages();
1230
- const { messages, healedCount, healedFrom } = healLoadedMessages(before, tightCapChars);
1275
+ const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
1231
1276
  const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
1232
1277
  const charsSaved = healedFrom - afterBytes;
1233
1278
  if (healedCount > 0) {
@@ -1250,6 +1295,29 @@ var CacheFirstLoop = class {
1250
1295
  }
1251
1296
  }
1252
1297
  }
1298
+ /**
1299
+ * Start a fresh conversation WITHOUT exiting. Drops every message
1300
+ * in the in-memory log AND rewrites the session file to empty so
1301
+ * a resume won't re-hydrate the old turns. Unlike `/forget`, which
1302
+ * deletes the session entirely, this keeps the session name and
1303
+ * config intact — it's the "new chat" button.
1304
+ *
1305
+ * The immutable prefix (system prompt + tool specs) is preserved
1306
+ * — that's the cache-first invariant, not part of the conversation.
1307
+ * Returns the number of messages dropped so the UI can show it.
1308
+ */
1309
+ clearLog() {
1310
+ const dropped = this.log.length;
1311
+ this.log.compactInPlace([]);
1312
+ if (this.sessionName) {
1313
+ try {
1314
+ rewriteSession(this.sessionName, []);
1315
+ } catch {
1316
+ }
1317
+ }
1318
+ this.scratch.reset();
1319
+ return { dropped };
1320
+ }
1253
1321
  /**
1254
1322
  * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
1255
1323
  * scratch, and stats are preserved — only the per-turn behavior changes.
@@ -1281,7 +1349,8 @@ var CacheFirstLoop = class {
1281
1349
  this.stream = this.branchEnabled ? false : this._streamPreference;
1282
1350
  }
1283
1351
  buildMessages(pendingUser) {
1284
- const msgs = [...this.prefix.toMessages(), ...this.log.toMessages()];
1352
+ const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
1353
+ const msgs = [...this.prefix.toMessages(), ...healed.messages];
1285
1354
  if (pendingUser !== null) msgs.push({ role: "user", content: pendingUser });
1286
1355
  return msgs;
1287
1356
  }
@@ -1356,6 +1425,13 @@ var CacheFirstLoop = class {
1356
1425
  yield { turn: this._turn, role: "done", content: stoppedMsg };
1357
1426
  return;
1358
1427
  }
1428
+ if (iter > 0) {
1429
+ yield {
1430
+ turn: this._turn,
1431
+ role: "status",
1432
+ content: "tool result uploaded \xB7 model thinking before next response\u2026"
1433
+ };
1434
+ }
1359
1435
  if (!warnedForIterBudget && iter >= warnAt) {
1360
1436
  warnedForIterBudget = true;
1361
1437
  yield {
@@ -1516,7 +1592,14 @@ var CacheFirstLoop = class {
1516
1592
  pendingUser = null;
1517
1593
  }
1518
1594
  this.scratch.reasoning = reasoningContent || null;
1519
- const planState = preHarvestedPlanState ? preHarvestedPlanState : this.harvestEnabled ? await harvest(reasoningContent || null, this.client, this.harvestOptions) : emptyPlanState();
1595
+ if (!preHarvestedPlanState && this.harvestEnabled && (reasoningContent?.trim().length ?? 0) >= 40) {
1596
+ yield {
1597
+ turn: this._turn,
1598
+ role: "status",
1599
+ content: "extracting plan state from reasoning\u2026"
1600
+ };
1601
+ }
1602
+ const planState = preHarvestedPlanState ? preHarvestedPlanState : this.harvestEnabled ? await harvest(reasoningContent || null, this.client, this.harvestOptions, signal) : emptyPlanState();
1520
1603
  const { calls: repairedCalls, report } = this.repair.process(
1521
1604
  toolCalls,
1522
1605
  reasoningContent || null,
@@ -1538,15 +1621,38 @@ var CacheFirstLoop = class {
1538
1621
  }
1539
1622
  const ctxMax = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
1540
1623
  if (usage && usage.promptTokens / ctxMax > 0.8) {
1541
- yield {
1542
- turn: this._turn,
1543
- role: "warning",
1544
- content: `context ${usage.promptTokens}/${ctxMax} (${Math.round(
1545
- usage.promptTokens / ctxMax * 100
1546
- )}%) \u2014 more tools would overflow. Forcing summary from what was gathered.`
1547
- };
1548
- yield* this.forceSummaryAfterIterLimit({ reason: "context-guard" });
1549
- return;
1624
+ const before = usage.promptTokens;
1625
+ const compactResult = this.compact(4e3);
1626
+ if (compactResult.healedCount > 0) {
1627
+ const approxSaved = Math.round(compactResult.charsSaved / 4);
1628
+ const after = before - approxSaved;
1629
+ yield {
1630
+ turn: this._turn,
1631
+ role: "warning",
1632
+ content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
1633
+ };
1634
+ } else {
1635
+ yield {
1636
+ turn: this._turn,
1637
+ role: "warning",
1638
+ content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
1639
+ before / ctxMax * 100
1640
+ )}%) \u2014 nothing to auto-compact. Forcing summary from what was gathered.`
1641
+ };
1642
+ const tail = this.log.entries[this.log.entries.length - 1];
1643
+ if (tail && tail.role === "assistant" && Array.isArray(tail.tool_calls) && tail.tool_calls.length > 0) {
1644
+ const kept = this.log.entries.slice(0, -1);
1645
+ this.log.compactInPlace([...kept]);
1646
+ if (this.sessionName) {
1647
+ try {
1648
+ rewriteSession(this.sessionName, kept);
1649
+ } catch {
1650
+ }
1651
+ }
1652
+ }
1653
+ yield* this.forceSummaryAfterIterLimit({ reason: "context-guard" });
1654
+ return;
1655
+ }
1550
1656
  }
1551
1657
  for (const call of repairedCalls) {
1552
1658
  const name = call.function?.name ?? "";
@@ -1578,6 +1684,11 @@ var CacheFirstLoop = class {
1578
1684
  }
1579
1685
  async *forceSummaryAfterIterLimit(opts = { reason: "budget" }) {
1580
1686
  try {
1687
+ yield {
1688
+ turn: this._turn,
1689
+ role: "status",
1690
+ content: "summarizing what was gathered\u2026"
1691
+ };
1581
1692
  const messages = this.buildMessages(null);
1582
1693
  messages.push({
1583
1694
  role: "user",
@@ -1660,7 +1771,7 @@ function summarizeBranch(chosen, samples) {
1660
1771
  temperatures: samples.map((s) => s.temperature)
1661
1772
  };
1662
1773
  }
1663
- function healLoadedMessages(messages, maxChars) {
1774
+ function shrinkOversizedToolResults(messages, maxChars) {
1664
1775
  let healedCount = 0;
1665
1776
  let healedFrom = 0;
1666
1777
  const out = messages.map((msg) => {
@@ -1673,6 +1784,51 @@ function healLoadedMessages(messages, maxChars) {
1673
1784
  });
1674
1785
  return { messages: out, healedCount, healedFrom };
1675
1786
  }
1787
+ function healLoadedMessages(messages, maxChars) {
1788
+ const shrunk = shrinkOversizedToolResults(messages, maxChars);
1789
+ let healedCount = shrunk.healedCount;
1790
+ const out = [];
1791
+ const openCallIds = /* @__PURE__ */ new Set();
1792
+ let droppedAssistantCalls = 0;
1793
+ let droppedStrayTools = 0;
1794
+ for (let i = 0; i < shrunk.messages.length; i++) {
1795
+ const msg = shrunk.messages[i];
1796
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) {
1797
+ const needed = /* @__PURE__ */ new Set();
1798
+ for (const call of msg.tool_calls) {
1799
+ if (call?.id) needed.add(call.id);
1800
+ }
1801
+ const candidates = [];
1802
+ let j = i + 1;
1803
+ while (j < shrunk.messages.length && needed.size > 0) {
1804
+ const nxt = shrunk.messages[j];
1805
+ if (nxt.role !== "tool") break;
1806
+ const id = nxt.tool_call_id ?? "";
1807
+ if (!needed.has(id)) break;
1808
+ needed.delete(id);
1809
+ candidates.push(nxt);
1810
+ j++;
1811
+ }
1812
+ if (needed.size === 0) {
1813
+ out.push(msg);
1814
+ for (const r of candidates) out.push(r);
1815
+ i = j - 1;
1816
+ } else {
1817
+ droppedAssistantCalls += 1;
1818
+ droppedStrayTools += candidates.length;
1819
+ i = j - 1;
1820
+ }
1821
+ continue;
1822
+ }
1823
+ if (msg.role === "tool") {
1824
+ droppedStrayTools += 1;
1825
+ continue;
1826
+ }
1827
+ out.push(msg);
1828
+ }
1829
+ healedCount += droppedAssistantCalls + droppedStrayTools;
1830
+ return { messages: out, healedCount, healedFrom: shrunk.healedFrom };
1831
+ }
1676
1832
  function formatLoopError(err) {
1677
1833
  const msg = err.message ?? "";
1678
1834
  if (msg.includes("maximum context length")) {
@@ -1929,7 +2085,12 @@ function registerFilesystemTools(registry, opts) {
1929
2085
  }
1930
2086
  const after = before.slice(0, firstIdx) + args.replace + before.slice(firstIdx + args.search.length);
1931
2087
  await fs.writeFile(abs, after, "utf8");
1932
- return `edited ${pathMod.relative(rootDir, abs)} (${args.search.length}\u2192${args.replace.length} chars)`;
2088
+ const rel = pathMod.relative(rootDir, abs);
2089
+ const header = `edited ${rel} (${args.search.length}\u2192${args.replace.length} chars)`;
2090
+ const startLine = before.slice(0, firstIdx).split(/\r?\n/).length;
2091
+ const diff = renderEditDiff(args.search, args.replace, startLine);
2092
+ return `${header}
2093
+ ${diff}`;
1933
2094
  }
1934
2095
  });
1935
2096
  registry.register({
@@ -1967,6 +2128,51 @@ function registerFilesystemTools(registry, opts) {
1967
2128
  });
1968
2129
  return registry;
1969
2130
  }
2131
+ function renderEditDiff(search, replace, startLine) {
2132
+ const a = search.split(/\r?\n/);
2133
+ const b = replace.split(/\r?\n/);
2134
+ const diff = lineDiff(a, b);
2135
+ const hunk = `@@ -${startLine},${a.length} +${startLine},${b.length} @@`;
2136
+ const body = diff.map((d) => `${d.op === " " ? " " : d.op} ${d.line}`).join("\n");
2137
+ return `${hunk}
2138
+ ${body}`;
2139
+ }
2140
+ function lineDiff(a, b) {
2141
+ const n = a.length;
2142
+ const m = b.length;
2143
+ const dp = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0));
2144
+ for (let i2 = 1; i2 <= n; i2++) {
2145
+ for (let j2 = 1; j2 <= m; j2++) {
2146
+ if (a[i2 - 1] === b[j2 - 1]) dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
2147
+ else dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
2148
+ }
2149
+ }
2150
+ const out = [];
2151
+ let i = n;
2152
+ let j = m;
2153
+ while (i > 0 && j > 0) {
2154
+ if (a[i - 1] === b[j - 1]) {
2155
+ out.unshift({ op: " ", line: a[i - 1] });
2156
+ i--;
2157
+ j--;
2158
+ } else if ((dp[i - 1][j] ?? 0) > (dp[i][j - 1] ?? 0)) {
2159
+ out.unshift({ op: "-", line: a[i - 1] });
2160
+ i--;
2161
+ } else {
2162
+ out.unshift({ op: "+", line: b[j - 1] });
2163
+ j--;
2164
+ }
2165
+ }
2166
+ while (i > 0) {
2167
+ out.unshift({ op: "-", line: a[i - 1] });
2168
+ i--;
2169
+ }
2170
+ while (j > 0) {
2171
+ out.unshift({ op: "+", line: b[j - 1] });
2172
+ j--;
2173
+ }
2174
+ return out;
2175
+ }
1970
2176
 
1971
2177
  // src/env.ts
1972
2178
  import { readFileSync as readFileSync2 } from "fs";
@@ -2134,6 +2340,8 @@ function computeReplayStats(records) {
2134
2340
  }
2135
2341
  function summarizeTurns(turns) {
2136
2342
  const totalCost = turns.reduce((s, t) => s + t.cost, 0);
2343
+ const totalInput = turns.reduce((s, t) => s + inputCostUsd(t.model, t.usage), 0);
2344
+ const totalOutput = turns.reduce((s, t) => s + outputCostUsd(t.model, t.usage), 0);
2137
2345
  const totalClaude = turns.reduce((s, t) => s + claudeEquivalentCost(t.usage), 0);
2138
2346
  let hit = 0;
2139
2347
  let miss = 0;
@@ -2147,6 +2355,8 @@ function summarizeTurns(turns) {
2147
2355
  return {
2148
2356
  turns: turns.length,
2149
2357
  totalCostUsd: round2(totalCost, 6),
2358
+ totalInputCostUsd: round2(totalInput, 6),
2359
+ totalOutputCostUsd: round2(totalOutput, 6),
2150
2360
  claudeEquivalentUsd: round2(totalClaude, 6),
2151
2361
  savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
2152
2362
  cacheHitRatio: round2(cacheHitRatio, 4),
@@ -3377,6 +3587,7 @@ export {
3377
3587
  formatLoopError,
3378
3588
  harvest,
3379
3589
  healLoadedMessages,
3590
+ inputCostUsd,
3380
3591
  inspectMcpServer,
3381
3592
  isJsonRpcError,
3382
3593
  isPlanStateEmpty,
@@ -3387,6 +3598,7 @@ export {
3387
3598
  loadSessionMessages,
3388
3599
  nestArguments,
3389
3600
  openTranscriptFile,
3601
+ outputCostUsd,
3390
3602
  parseEditBlocks,
3391
3603
  parseMcpSpec,
3392
3604
  parseTranscript,