npm - reasonix - Versions diffs - 0.11.3 → 0.12.8 - Mend

reasonix 0.11.3 → 0.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +30 -7
package/README.zh-CN.md +26 -7
package/dashboard/app.css +2346 -0
package/dashboard/app.js +3913 -0
package/dashboard/codemirror.js +36 -0
package/dashboard/index.html +19 -0
package/dist/cli/{chunk-JDVY4JDU.js → chunk-PKPWI33U.js} +3 -1
package/dist/cli/index.js +2387 -234
package/dist/cli/index.js.map +1 -1
package/dist/cli/{prompt-YRY4HPMZ.js → prompt-HNDDXDRH.js} +2 -2
package/dist/index.d.ts +95 -21
package/dist/index.js +62 -24
package/dist/index.js.map +1 -1
package/package.json +102 -76
/package/dist/cli/{chunk-JDVY4JDU.js.map → chunk-PKPWI33U.js.map} +0 -0
/package/dist/cli/{prompt-YRY4HPMZ.js.map → prompt-HNDDXDRH.js.map} +0 -0

package/dist/cli/{prompt-YRY4HPMZ.js → prompt-HNDDXDRH.js} RENAMED Viewed

@@ -2,9 +2,9 @@
 import {
   CODE_SYSTEM_PROMPT,
   codeSystemPrompt
-} from "./chunk-JDVY4JDU.js";
+} from "./chunk-PKPWI33U.js";
 export {
   CODE_SYSTEM_PROMPT,
   codeSystemPrompt
 };
-//# sourceMappingURL=prompt-YRY4HPMZ.js.map
+//# sourceMappingURL=prompt-HNDDXDRH.js.map

package/dist/index.d.ts CHANGED Viewed

@@ -521,6 +521,46 @@ declare class VolatileScratch {
     reset(): void;
 }
+/**
+ * Predicate the breaker consults to decide whether a call mutates state.
+ * Mutating calls clear the recent-args buffer: re-reading a file after
+ * `edit_file` shouldn't count as "saw the same args before" — the file
+ * legitimately changed. Wire this from the caller using whatever source
+ * of truth is appropriate (e.g. the ToolRegistry's `readOnly` /
+ * `readOnlyCheck` flags). When undefined, every call is tracked the
+ * old way — preserves the original behavior for callers that don't
+ * thread a registry through.
+ */
+type IsMutating = (call: ToolCall) => boolean;
+/**
+ * Call-storm breaker.
+ *
+ * Detects (tool, args) tuples repeating within a sliding window and suppresses
+ * the offending call. Surfaces a synthetic tool_result advising the model to
+ * change strategy on its next turn.
+ *
+ * Buffer entries are tagged read-only vs mutating. When a mutating call
+ * runs, the breaker drops prior read-only entries — a re-read of the
+ * same path after `edit_file` is fresh, not a repeat. Mutating calls
+ * still count among themselves, so a model looping on identical
+ * `edit_file` invocations still trips on the threshold.
+ *
+ * Without an `isMutating` predicate everything is tracked the same way
+ * (back-compat for callers that don't thread a registry through).
+ */
+declare class StormBreaker {
+    private readonly windowSize;
+    private readonly threshold;
+    private readonly isMutating;
+    private readonly recent;
+    constructor(windowSize?: number, threshold?: number, isMutating?: IsMutating);
+    inspect(call: ToolCall): {
+        suppress: boolean;
+        reason?: string;
+    };
+    reset(): void;
+}
 /**
  * Schema flattening for DeepSeek tool calls.
  *
@@ -577,25 +617,6 @@ interface ScavengeResult {
 }
 declare function scavengeToolCalls(reasoningContent: string | null | undefined, opts: ScavengeOptions): ScavengeResult;
-/**
- * Call-storm breaker.
- *
- * Detects (tool, args) tuples repeating within a sliding window and suppresses
- * the offending call. Surfaces a synthetic tool_result advising the model to
- * change strategy on its next turn.
- */
-declare class StormBreaker {
-    private readonly windowSize;
-    private readonly threshold;
-    private readonly recent;
-    constructor(windowSize?: number, threshold?: number);
-    inspect(call: ToolCall): {
-        suppress: boolean;
-        reason?: string;
-    };
-    reset(): void;
-}
 /**
  * Pillar 3 — Tool-Call Repair pipeline.
  *
@@ -619,6 +640,14 @@ interface ToolCallRepairOptions {
     stormWindow?: number;
     stormThreshold?: number;
     maxScavenge?: number;
+    /**
+     * Optional predicate the storm breaker consults to identify state-
+     * changing calls — those clear the sliding window so a post-edit
+     * verify-read isn't mistaken for a repeat. Production callers wire
+     * this off the ToolRegistry's `readOnly` / `readOnlyCheck` flags;
+     * tests that don't supply it keep the original behavior.
+     */
+    isMutating?: IsMutating;
 }
 declare class ToolCallRepair {
     private readonly storm;
@@ -899,6 +928,12 @@ interface CacheFirstLoopOptions {
      * `max` for Reasonix (agent-class use per DeepSeek V4 docs).
      */
     reasoningEffort?: "high" | "max";
+    /**
+     * Master switch for auto-escalation paths. See ReconfigurableOptions
+     * — defaults to `true` (current behavior); the `flash` and `pro`
+     * presets pass `false` to lock the running session to one model.
+     */
+    autoEscalate?: boolean;
     /**
      * Session name. When set, the loop pre-loads the session's prior messages
      * into its log on construction, and appends every new log entry to
@@ -943,6 +978,15 @@ interface ReconfigurableOptions {
      * mid-session for cheaper, faster turns on simple tasks.
      */
     reasoningEffort?: "high" | "max";
+    /**
+     * Master switch for the auto-escalation paths — both the
+     * `<<<NEEDS_PRO>>>` marker scavenge and the failure-count threshold.
+     * `true` (default) preserves the original "flash baseline, jump to
+     * pro when struggling" behavior. `false` locks the active turn to
+     * whatever `model` is set to — used by the `flash` and `pro` presets
+     * which want a hard model commitment.
+     */
+    autoEscalate?: boolean;
 }
 declare class CacheFirstLoop {
     readonly client: DeepSeekClient;
@@ -961,6 +1005,13 @@ declare class CacheFirstLoop {
     branchOptions: BranchOptions;
     /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
     reasoningEffort: "high" | "max";
+    /**
+     * Auto-escalation toggle. `true` lets the loop self-promote to pro
+     * mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
+     * pinned to `model`. Mutable so the dashboard's preset switcher can
+     * flip it live alongside `model`.
+     */
+    autoEscalate: boolean;
     sessionName: string | null;
     /**
      * Hook list, mutable so `/hooks reload` can swap it without
@@ -3532,8 +3583,21 @@ declare function codeSystemPrompt(rootDir: string, opts?: CodeSystemPromptOption
  * from `reasonix setup`: preset, MCP servers, session. This is what
  * makes `reasonix chat` with no flags "just work" after first-run.
  */
-/** One of the preset bundles (model + harvest + branch combo). */
-type PresetName = "fast" | "smart" | "max";
+/**
+ * Preset names — three model-commitment levels.
+ *   - `auto`  — flash baseline + auto-escalate to pro on hard turns
+ *               (NEEDS_PRO marker / failure-count threshold both fire).
+ *               Default. Closest match to the legacy `smart` preset.
+ *   - `flash` — flash always. No auto-escalation. `/pro` still works
+ *               for one-shot manual escalation. Cheapest predictable.
+ *   - `pro`   — pro always. No downgrade. ~3× cost vs flash at the
+ *               2026-04 discount rate; more outside the window.
+ *
+ * Legacy `fast | smart | max` names stay in the union for back-compat
+ * with existing `~/.reasonix/config.json` files; resolvePreset() maps
+ * them to the new semantics.
+ */
+type PresetName = "auto" | "flash" | "pro" | "fast" | "smart" | "max";
 /**
  * How `reasonix code` handles model-issued tool calls. Two axes folded
  * into one enum because users think about "how trusting am I right now?"
@@ -3812,6 +3876,16 @@ interface UsageBucket {
     cacheMissTokens: number;
     costUsd: number;
     claudeEquivUsd: number;
+    /**
+     * USD that DeepSeek's prompt cache shaved off the bill — sum of
+     * `cacheHitTokens × (missPrice − hitPrice)` per record. Recomputed
+     * from the current pricing table on every aggregate, not frozen at
+     * write time, so a price-cut announcement updates retroactively. The
+     * trade-off is mild inconsistency with `costUsd` (which IS frozen);
+     * acceptable because cache savings is a "what does this mechanism
+     * give me" narrative, not a billing record.
+     */
+    cacheSavingsUsd: number;
 }
 /** Cache hit ratio for a bucket — zero denominator returns 0. */
 declare function bucketCacheHitRatio(b: UsageBucket): number;

package/dist/index.js CHANGED Viewed

@@ -1358,25 +1358,32 @@ function coerceToToolCall(candidateJson, allowedNames) {
 var StormBreaker = class {
   windowSize;
   threshold;
+  isMutating;
   recent = [];
-  constructor(windowSize = 6, threshold = 3) {
+  constructor(windowSize = 6, threshold = 3, isMutating) {
     this.windowSize = windowSize;
     this.threshold = threshold;
+    this.isMutating = isMutating;
   }
   inspect(call) {
-    const sig = signature(call);
-    if (!sig) return { suppress: false };
-    const count = this.recent.reduce(
-      (n, [name, args]) => name === sig[0] && args === sig[1] ? n + 1 : n,
-      0
-    );
+    const name = call.function?.name;
+    if (!name) return { suppress: false };
+    const args = call.function?.arguments ?? "";
+    const mutating = this.isMutating ? this.isMutating(call) : false;
+    const readOnly = !mutating;
+    if (mutating) {
+      for (let i = this.recent.length - 1; i >= 0; i--) {
+        if (this.recent[i].readOnly) this.recent.splice(i, 1);
+      }
+    }
+    const count = this.recent.reduce((n, e) => e.name === name && e.args === args ? n + 1 : n, 0);
     if (count >= this.threshold - 1) {
       return {
         suppress: true,
-        reason: `call-storm suppressed: ${sig[0]} called with identical args ${count + 1} times within window=${this.windowSize}`
+        reason: `call-storm suppressed: ${name} called with identical args ${count + 1} times within window=${this.windowSize}`
       };
     }
-    this.recent.push(sig);
+    this.recent.push({ name, args, readOnly });
     while (this.recent.length > this.windowSize) this.recent.shift();
     return { suppress: false };
   }
@@ -1384,11 +1391,6 @@ var StormBreaker = class {
     this.recent.length = 0;
   }
 };
-function signature(call) {
-  const name = call.function?.name;
-  if (!name) return null;
-  return [name, call.function?.arguments ?? ""];
-}
 // src/repair/truncation.ts
 function repairTruncatedJson(input) {
@@ -1466,7 +1468,7 @@ var ToolCallRepair = class {
   opts;
   constructor(opts) {
     this.opts = opts;
-    this.storm = new StormBreaker(opts.stormWindow ?? 6, opts.stormThreshold ?? 3);
+    this.storm = new StormBreaker(opts.stormWindow ?? 6, opts.stormThreshold ?? 3, opts.isMutating);
   }
   /**
    * Drop the StormBreaker's sliding window of recent (name, args)
@@ -1490,13 +1492,13 @@ var ToolCallRepair = class {
       allowedNames: this.opts.allowedToolNames,
       maxCalls: this.opts.maxScavenge ?? 4
     });
-    const seenSignatures = new Set(declaredCalls.map(signature2));
+    const seenSignatures = new Set(declaredCalls.map(signature));
     const merged = [...declaredCalls];
     for (const sc of scavenged.calls) {
-      if (!seenSignatures.has(signature2(sc))) {
+      if (!seenSignatures.has(signature(sc))) {
         merged.push(sc);
         report.scavenged++;
-        seenSignatures.add(signature2(sc));
+        seenSignatures.add(signature(sc));
       }
     }
     report.notes.push(...scavenged.notes);
@@ -1522,7 +1524,7 @@ var ToolCallRepair = class {
     return { calls: filtered, report };
   }
 };
-function signature2(call) {
+function signature(call) {
   return `${call.function?.name ?? ""}::${call.function?.arguments ?? ""}`;
 }
@@ -1661,6 +1663,12 @@ function outputCostUsd(model, usage) {
   if (!p) return 0;
   return usage.completionTokens * p.output / 1e6;
 }
+function cacheSavingsUsd(model, hitTokens) {
+  if (hitTokens <= 0) return 0;
+  const p = DEEPSEEK_PRICING[model];
+  if (!p) return 0;
+  return hitTokens * (p.inputCacheMiss - p.inputCacheHit) / 1e6;
+}
 function claudeEquivalentCost(usage) {
   return (usage.promptTokens * CLAUDE_SONNET_PRICING.input + usage.completionTokens * CLAUDE_SONNET_PRICING.output) / 1e6;
 }
@@ -1751,6 +1759,13 @@ var CacheFirstLoop = class {
   branchOptions;
   /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
   reasoningEffort;
+  /**
+   * Auto-escalation toggle. `true` lets the loop self-promote to pro
+   * mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
+   * pinned to `model`. Mutable so the dashboard's preset switcher can
+   * flip it live alongside `model`.
+   */
+  autoEscalate = true;
   sessionName;
   /**
    * Hook list, mutable so `/hooks reload` can swap it without
@@ -1815,6 +1830,7 @@ var CacheFirstLoop = class {
     this.tools = opts.tools ?? new ToolRegistry();
     this.model = opts.model ?? "deepseek-v4-flash";
     this.reasoningEffort = opts.reasoningEffort ?? "max";
+    if (opts.autoEscalate !== void 0) this.autoEscalate = opts.autoEscalate;
     this.maxToolIters = opts.maxToolIters ?? 64;
     this.hooks = opts.hooks ?? [];
     this.hookCwd = opts.hookCwd ?? process.cwd();
@@ -1832,7 +1848,26 @@ var CacheFirstLoop = class {
     this._streamPreference = opts.stream ?? true;
     this.stream = this.branchEnabled ? false : this._streamPreference;
     const allowedNames = /* @__PURE__ */ new Set([...this.prefix.toolSpecs.map((s) => s.function.name)]);
-    this.repair = new ToolCallRepair({ allowedToolNames: allowedNames });
+    const registry = this.tools;
+    const isMutating = (call) => {
+      const name = call.function?.name;
+      if (!name) return false;
+      const def = registry.get(name);
+      if (!def) return false;
+      if (def.readOnlyCheck) {
+        let args = {};
+        try {
+          args = JSON.parse(call.function?.arguments ?? "{}") ?? {};
+        } catch {
+        }
+        try {
+          if (def.readOnlyCheck(args)) return false;
+        } catch {
+        }
+      }
+      return def.readOnly !== true;
+    };
+    this.repair = new ToolCallRepair({ allowedToolNames: allowedNames, isMutating });
     this.sessionName = opts.session ?? null;
     if (this.sessionName) {
       const prior = loadSessionMessages(this.sessionName);
@@ -2013,6 +2048,7 @@ var CacheFirstLoop = class {
     if (opts.model !== void 0) this.model = opts.model;
     if (opts.stream !== void 0) this._streamPreference = opts.stream;
     if (opts.reasoningEffort !== void 0) this.reasoningEffort = opts.reasoningEffort;
+    if (opts.autoEscalate !== void 0) this.autoEscalate = opts.autoEscalate;
     if (opts.branch !== void 0) {
       if (typeof opts.branch === "number") {
         this.branchOptions = { budget: opts.branch };
@@ -2128,7 +2164,7 @@ var CacheFirstLoop = class {
       if (repair.truncationsFixed > 0) bump("truncated", repair.truncationsFixed);
       if (repair.stormsBroken > 0) bump("storm-broken", repair.stormsBroken);
     }
-    if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
+    if (bumped && !this._escalateThisTurn && this.autoEscalate && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
       this._escalateThisTurn = true;
       return true;
     }
@@ -2373,7 +2409,7 @@ var CacheFirstLoop = class {
           const callBuf = /* @__PURE__ */ new Map();
           const readyIndices = /* @__PURE__ */ new Set();
           const callModel = this.modelForCurrentCall();
-          const bufferForEscalation = callModel !== ESCALATION_MODEL;
+          const bufferForEscalation = this.autoEscalate && callModel !== ESCALATION_MODEL;
           let escalationBuf = "";
           let escalationBufFlushed = false;
           for await (const chunk of this.client.stream({
@@ -2485,7 +2521,7 @@ var CacheFirstLoop = class {
         };
         return;
       }
-      if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
+      if (this.autoEscalate && this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
         const { reason } = this.parseEscalationMarker(assistantContent);
         this._escalateThisTurn = true;
         const reasonSuffix = reason ? ` \u2014 ${reason}` : "";
@@ -8158,7 +8194,8 @@ function emptyBucket(label, since) {
     cacheHitTokens: 0,
     cacheMissTokens: 0,
     costUsd: 0,
-    claudeEquivUsd: 0
+    claudeEquivUsd: 0,
+    cacheSavingsUsd: 0
   };
 }
 function addToBucket(b, r) {
@@ -8169,6 +8206,7 @@ function addToBucket(b, r) {
   b.cacheMissTokens += r.cacheMissTokens;
   b.costUsd += r.costUsd;
   b.claudeEquivUsd += r.claudeEquivUsd;
+  b.cacheSavingsUsd += cacheSavingsUsd(r.model, r.cacheHitTokens);
 }
 function aggregateUsage(records, opts = {}) {
   const now = opts.now ?? Date.now();