reasonix 0.11.3 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  import {
3
3
  CODE_SYSTEM_PROMPT,
4
4
  codeSystemPrompt
5
- } from "./chunk-JDVY4JDU.js";
5
+ } from "./chunk-PKPWI33U.js";
6
6
  export {
7
7
  CODE_SYSTEM_PROMPT,
8
8
  codeSystemPrompt
9
9
  };
10
- //# sourceMappingURL=prompt-YRY4HPMZ.js.map
10
+ //# sourceMappingURL=prompt-HNDDXDRH.js.map
package/dist/index.d.ts CHANGED
@@ -521,6 +521,46 @@ declare class VolatileScratch {
521
521
  reset(): void;
522
522
  }
523
523
 
524
+ /**
525
+ * Predicate the breaker consults to decide whether a call mutates state.
526
+ * Mutating calls clear the recent-args buffer: re-reading a file after
527
+ * `edit_file` shouldn't count as "saw the same args before" — the file
528
+ * legitimately changed. Wire this from the caller using whatever source
529
+ * of truth is appropriate (e.g. the ToolRegistry's `readOnly` /
530
+ * `readOnlyCheck` flags). When undefined, every call is tracked the
531
+ * old way — preserves the original behavior for callers that don't
532
+ * thread a registry through.
533
+ */
534
+ type IsMutating = (call: ToolCall) => boolean;
535
+ /**
536
+ * Call-storm breaker.
537
+ *
538
+ * Detects (tool, args) tuples repeating within a sliding window and suppresses
539
+ * the offending call. Surfaces a synthetic tool_result advising the model to
540
+ * change strategy on its next turn.
541
+ *
542
+ * Buffer entries are tagged read-only vs mutating. When a mutating call
543
+ * runs, the breaker drops prior read-only entries — a re-read of the
544
+ * same path after `edit_file` is fresh, not a repeat. Mutating calls
545
+ * still count among themselves, so a model looping on identical
546
+ * `edit_file` invocations still trips on the threshold.
547
+ *
548
+ * Without an `isMutating` predicate everything is tracked the same way
549
+ * (back-compat for callers that don't thread a registry through).
550
+ */
551
+ declare class StormBreaker {
552
+ private readonly windowSize;
553
+ private readonly threshold;
554
+ private readonly isMutating;
555
+ private readonly recent;
556
+ constructor(windowSize?: number, threshold?: number, isMutating?: IsMutating);
557
+ inspect(call: ToolCall): {
558
+ suppress: boolean;
559
+ reason?: string;
560
+ };
561
+ reset(): void;
562
+ }
563
+
524
564
  /**
525
565
  * Schema flattening for DeepSeek tool calls.
526
566
  *
@@ -577,25 +617,6 @@ interface ScavengeResult {
577
617
  }
578
618
  declare function scavengeToolCalls(reasoningContent: string | null | undefined, opts: ScavengeOptions): ScavengeResult;
579
619
 
580
- /**
581
- * Call-storm breaker.
582
- *
583
- * Detects (tool, args) tuples repeating within a sliding window and suppresses
584
- * the offending call. Surfaces a synthetic tool_result advising the model to
585
- * change strategy on its next turn.
586
- */
587
- declare class StormBreaker {
588
- private readonly windowSize;
589
- private readonly threshold;
590
- private readonly recent;
591
- constructor(windowSize?: number, threshold?: number);
592
- inspect(call: ToolCall): {
593
- suppress: boolean;
594
- reason?: string;
595
- };
596
- reset(): void;
597
- }
598
-
599
620
  /**
600
621
  * Pillar 3 — Tool-Call Repair pipeline.
601
622
  *
@@ -619,6 +640,14 @@ interface ToolCallRepairOptions {
619
640
  stormWindow?: number;
620
641
  stormThreshold?: number;
621
642
  maxScavenge?: number;
643
+ /**
644
+ * Optional predicate the storm breaker consults to identify state-
645
+ * changing calls — those clear the sliding window so a post-edit
646
+ * verify-read isn't mistaken for a repeat. Production callers wire
647
+ * this off the ToolRegistry's `readOnly` / `readOnlyCheck` flags;
648
+ * tests that don't supply it keep the original behavior.
649
+ */
650
+ isMutating?: IsMutating;
622
651
  }
623
652
  declare class ToolCallRepair {
624
653
  private readonly storm;
@@ -899,6 +928,12 @@ interface CacheFirstLoopOptions {
899
928
  * `max` for Reasonix (agent-class use per DeepSeek V4 docs).
900
929
  */
901
930
  reasoningEffort?: "high" | "max";
931
+ /**
932
+ * Master switch for auto-escalation paths. See ReconfigurableOptions
933
+ * — defaults to `true` (current behavior); the `flash` and `pro`
934
+ * presets pass `false` to lock the running session to one model.
935
+ */
936
+ autoEscalate?: boolean;
902
937
  /**
903
938
  * Session name. When set, the loop pre-loads the session's prior messages
904
939
  * into its log on construction, and appends every new log entry to
@@ -943,6 +978,15 @@ interface ReconfigurableOptions {
943
978
  * mid-session for cheaper, faster turns on simple tasks.
944
979
  */
945
980
  reasoningEffort?: "high" | "max";
981
+ /**
982
+ * Master switch for the auto-escalation paths — both the
983
+ * `<<<NEEDS_PRO>>>` marker scavenge and the failure-count threshold.
984
+ * `true` (default) preserves the original "flash baseline, jump to
985
+ * pro when struggling" behavior. `false` locks the active turn to
986
+ * whatever `model` is set to — used by the `flash` and `pro` presets
987
+ * which want a hard model commitment.
988
+ */
989
+ autoEscalate?: boolean;
946
990
  }
947
991
  declare class CacheFirstLoop {
948
992
  readonly client: DeepSeekClient;
@@ -961,6 +1005,13 @@ declare class CacheFirstLoop {
961
1005
  branchOptions: BranchOptions;
962
1006
  /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
963
1007
  reasoningEffort: "high" | "max";
1008
+ /**
1009
+ * Auto-escalation toggle. `true` lets the loop self-promote to pro
1010
+ * mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
1011
+ * pinned to `model`. Mutable so the dashboard's preset switcher can
1012
+ * flip it live alongside `model`.
1013
+ */
1014
+ autoEscalate: boolean;
964
1015
  sessionName: string | null;
965
1016
  /**
966
1017
  * Hook list, mutable so `/hooks reload` can swap it without
@@ -3532,8 +3583,21 @@ declare function codeSystemPrompt(rootDir: string, opts?: CodeSystemPromptOption
3532
3583
  * from `reasonix setup`: preset, MCP servers, session. This is what
3533
3584
  * makes `reasonix chat` with no flags "just work" after first-run.
3534
3585
  */
3535
- /** One of the preset bundles (model + harvest + branch combo). */
3536
- type PresetName = "fast" | "smart" | "max";
3586
+ /**
3587
+ * Preset names three model-commitment levels.
3588
+ * - `auto` — flash baseline + auto-escalate to pro on hard turns
3589
+ * (NEEDS_PRO marker / failure-count threshold both fire).
3590
+ * Default. Closest match to the legacy `smart` preset.
3591
+ * - `flash` — flash always. No auto-escalation. `/pro` still works
3592
+ * for one-shot manual escalation. Cheapest predictable.
3593
+ * - `pro` — pro always. No downgrade. ~3× cost vs flash at the
3594
+ * 2026-04 discount rate; more outside the window.
3595
+ *
3596
+ * Legacy `fast | smart | max` names stay in the union for back-compat
3597
+ * with existing `~/.reasonix/config.json` files; resolvePreset() maps
3598
+ * them to the new semantics.
3599
+ */
3600
+ type PresetName = "auto" | "flash" | "pro" | "fast" | "smart" | "max";
3537
3601
  /**
3538
3602
  * How `reasonix code` handles model-issued tool calls. Two axes folded
3539
3603
  * into one enum because users think about "how trusting am I right now?"
@@ -3812,6 +3876,16 @@ interface UsageBucket {
3812
3876
  cacheMissTokens: number;
3813
3877
  costUsd: number;
3814
3878
  claudeEquivUsd: number;
3879
+ /**
3880
+ * USD that DeepSeek's prompt cache shaved off the bill — sum of
3881
+ * `cacheHitTokens × (missPrice − hitPrice)` per record. Recomputed
3882
+ * from the current pricing table on every aggregate, not frozen at
3883
+ * write time, so a price-cut announcement updates retroactively. The
3884
+ * trade-off is mild inconsistency with `costUsd` (which IS frozen);
3885
+ * acceptable because cache savings is a "what does this mechanism
3886
+ * give me" narrative, not a billing record.
3887
+ */
3888
+ cacheSavingsUsd: number;
3815
3889
  }
3816
3890
  /** Cache hit ratio for a bucket — zero denominator returns 0. */
3817
3891
  declare function bucketCacheHitRatio(b: UsageBucket): number;
package/dist/index.js CHANGED
@@ -1358,25 +1358,32 @@ function coerceToToolCall(candidateJson, allowedNames) {
1358
1358
  var StormBreaker = class {
1359
1359
  windowSize;
1360
1360
  threshold;
1361
+ isMutating;
1361
1362
  recent = [];
1362
- constructor(windowSize = 6, threshold = 3) {
1363
+ constructor(windowSize = 6, threshold = 3, isMutating) {
1363
1364
  this.windowSize = windowSize;
1364
1365
  this.threshold = threshold;
1366
+ this.isMutating = isMutating;
1365
1367
  }
1366
1368
  inspect(call) {
1367
- const sig = signature(call);
1368
- if (!sig) return { suppress: false };
1369
- const count = this.recent.reduce(
1370
- (n, [name, args]) => name === sig[0] && args === sig[1] ? n + 1 : n,
1371
- 0
1372
- );
1369
+ const name = call.function?.name;
1370
+ if (!name) return { suppress: false };
1371
+ const args = call.function?.arguments ?? "";
1372
+ const mutating = this.isMutating ? this.isMutating(call) : false;
1373
+ const readOnly = !mutating;
1374
+ if (mutating) {
1375
+ for (let i = this.recent.length - 1; i >= 0; i--) {
1376
+ if (this.recent[i].readOnly) this.recent.splice(i, 1);
1377
+ }
1378
+ }
1379
+ const count = this.recent.reduce((n, e) => e.name === name && e.args === args ? n + 1 : n, 0);
1373
1380
  if (count >= this.threshold - 1) {
1374
1381
  return {
1375
1382
  suppress: true,
1376
- reason: `call-storm suppressed: ${sig[0]} called with identical args ${count + 1} times within window=${this.windowSize}`
1383
+ reason: `call-storm suppressed: ${name} called with identical args ${count + 1} times within window=${this.windowSize}`
1377
1384
  };
1378
1385
  }
1379
- this.recent.push(sig);
1386
+ this.recent.push({ name, args, readOnly });
1380
1387
  while (this.recent.length > this.windowSize) this.recent.shift();
1381
1388
  return { suppress: false };
1382
1389
  }
@@ -1384,11 +1391,6 @@ var StormBreaker = class {
1384
1391
  this.recent.length = 0;
1385
1392
  }
1386
1393
  };
1387
- function signature(call) {
1388
- const name = call.function?.name;
1389
- if (!name) return null;
1390
- return [name, call.function?.arguments ?? ""];
1391
- }
1392
1394
 
1393
1395
  // src/repair/truncation.ts
1394
1396
  function repairTruncatedJson(input) {
@@ -1466,7 +1468,7 @@ var ToolCallRepair = class {
1466
1468
  opts;
1467
1469
  constructor(opts) {
1468
1470
  this.opts = opts;
1469
- this.storm = new StormBreaker(opts.stormWindow ?? 6, opts.stormThreshold ?? 3);
1471
+ this.storm = new StormBreaker(opts.stormWindow ?? 6, opts.stormThreshold ?? 3, opts.isMutating);
1470
1472
  }
1471
1473
  /**
1472
1474
  * Drop the StormBreaker's sliding window of recent (name, args)
@@ -1490,13 +1492,13 @@ var ToolCallRepair = class {
1490
1492
  allowedNames: this.opts.allowedToolNames,
1491
1493
  maxCalls: this.opts.maxScavenge ?? 4
1492
1494
  });
1493
- const seenSignatures = new Set(declaredCalls.map(signature2));
1495
+ const seenSignatures = new Set(declaredCalls.map(signature));
1494
1496
  const merged = [...declaredCalls];
1495
1497
  for (const sc of scavenged.calls) {
1496
- if (!seenSignatures.has(signature2(sc))) {
1498
+ if (!seenSignatures.has(signature(sc))) {
1497
1499
  merged.push(sc);
1498
1500
  report.scavenged++;
1499
- seenSignatures.add(signature2(sc));
1501
+ seenSignatures.add(signature(sc));
1500
1502
  }
1501
1503
  }
1502
1504
  report.notes.push(...scavenged.notes);
@@ -1522,7 +1524,7 @@ var ToolCallRepair = class {
1522
1524
  return { calls: filtered, report };
1523
1525
  }
1524
1526
  };
1525
- function signature2(call) {
1527
+ function signature(call) {
1526
1528
  return `${call.function?.name ?? ""}::${call.function?.arguments ?? ""}`;
1527
1529
  }
1528
1530
 
@@ -1661,6 +1663,12 @@ function outputCostUsd(model, usage) {
1661
1663
  if (!p) return 0;
1662
1664
  return usage.completionTokens * p.output / 1e6;
1663
1665
  }
1666
+ function cacheSavingsUsd(model, hitTokens) {
1667
+ if (hitTokens <= 0) return 0;
1668
+ const p = DEEPSEEK_PRICING[model];
1669
+ if (!p) return 0;
1670
+ return hitTokens * (p.inputCacheMiss - p.inputCacheHit) / 1e6;
1671
+ }
1664
1672
  function claudeEquivalentCost(usage) {
1665
1673
  return (usage.promptTokens * CLAUDE_SONNET_PRICING.input + usage.completionTokens * CLAUDE_SONNET_PRICING.output) / 1e6;
1666
1674
  }
@@ -1751,6 +1759,13 @@ var CacheFirstLoop = class {
1751
1759
  branchOptions;
1752
1760
  /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
1753
1761
  reasoningEffort;
1762
+ /**
1763
+ * Auto-escalation toggle. `true` lets the loop self-promote to pro
1764
+ * mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
1765
+ * pinned to `model`. Mutable so the dashboard's preset switcher can
1766
+ * flip it live alongside `model`.
1767
+ */
1768
+ autoEscalate = true;
1754
1769
  sessionName;
1755
1770
  /**
1756
1771
  * Hook list, mutable so `/hooks reload` can swap it without
@@ -1815,6 +1830,7 @@ var CacheFirstLoop = class {
1815
1830
  this.tools = opts.tools ?? new ToolRegistry();
1816
1831
  this.model = opts.model ?? "deepseek-v4-flash";
1817
1832
  this.reasoningEffort = opts.reasoningEffort ?? "max";
1833
+ if (opts.autoEscalate !== void 0) this.autoEscalate = opts.autoEscalate;
1818
1834
  this.maxToolIters = opts.maxToolIters ?? 64;
1819
1835
  this.hooks = opts.hooks ?? [];
1820
1836
  this.hookCwd = opts.hookCwd ?? process.cwd();
@@ -1832,7 +1848,26 @@ var CacheFirstLoop = class {
1832
1848
  this._streamPreference = opts.stream ?? true;
1833
1849
  this.stream = this.branchEnabled ? false : this._streamPreference;
1834
1850
  const allowedNames = /* @__PURE__ */ new Set([...this.prefix.toolSpecs.map((s) => s.function.name)]);
1835
- this.repair = new ToolCallRepair({ allowedToolNames: allowedNames });
1851
+ const registry = this.tools;
1852
+ const isMutating = (call) => {
1853
+ const name = call.function?.name;
1854
+ if (!name) return false;
1855
+ const def = registry.get(name);
1856
+ if (!def) return false;
1857
+ if (def.readOnlyCheck) {
1858
+ let args = {};
1859
+ try {
1860
+ args = JSON.parse(call.function?.arguments ?? "{}") ?? {};
1861
+ } catch {
1862
+ }
1863
+ try {
1864
+ if (def.readOnlyCheck(args)) return false;
1865
+ } catch {
1866
+ }
1867
+ }
1868
+ return def.readOnly !== true;
1869
+ };
1870
+ this.repair = new ToolCallRepair({ allowedToolNames: allowedNames, isMutating });
1836
1871
  this.sessionName = opts.session ?? null;
1837
1872
  if (this.sessionName) {
1838
1873
  const prior = loadSessionMessages(this.sessionName);
@@ -2013,6 +2048,7 @@ var CacheFirstLoop = class {
2013
2048
  if (opts.model !== void 0) this.model = opts.model;
2014
2049
  if (opts.stream !== void 0) this._streamPreference = opts.stream;
2015
2050
  if (opts.reasoningEffort !== void 0) this.reasoningEffort = opts.reasoningEffort;
2051
+ if (opts.autoEscalate !== void 0) this.autoEscalate = opts.autoEscalate;
2016
2052
  if (opts.branch !== void 0) {
2017
2053
  if (typeof opts.branch === "number") {
2018
2054
  this.branchOptions = { budget: opts.branch };
@@ -2128,7 +2164,7 @@ var CacheFirstLoop = class {
2128
2164
  if (repair.truncationsFixed > 0) bump("truncated", repair.truncationsFixed);
2129
2165
  if (repair.stormsBroken > 0) bump("storm-broken", repair.stormsBroken);
2130
2166
  }
2131
- if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
2167
+ if (bumped && !this._escalateThisTurn && this.autoEscalate && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
2132
2168
  this._escalateThisTurn = true;
2133
2169
  return true;
2134
2170
  }
@@ -2373,7 +2409,7 @@ var CacheFirstLoop = class {
2373
2409
  const callBuf = /* @__PURE__ */ new Map();
2374
2410
  const readyIndices = /* @__PURE__ */ new Set();
2375
2411
  const callModel = this.modelForCurrentCall();
2376
- const bufferForEscalation = callModel !== ESCALATION_MODEL;
2412
+ const bufferForEscalation = this.autoEscalate && callModel !== ESCALATION_MODEL;
2377
2413
  let escalationBuf = "";
2378
2414
  let escalationBufFlushed = false;
2379
2415
  for await (const chunk of this.client.stream({
@@ -2485,7 +2521,7 @@ var CacheFirstLoop = class {
2485
2521
  };
2486
2522
  return;
2487
2523
  }
2488
- if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
2524
+ if (this.autoEscalate && this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
2489
2525
  const { reason } = this.parseEscalationMarker(assistantContent);
2490
2526
  this._escalateThisTurn = true;
2491
2527
  const reasonSuffix = reason ? ` \u2014 ${reason}` : "";
@@ -8158,7 +8194,8 @@ function emptyBucket(label, since) {
8158
8194
  cacheHitTokens: 0,
8159
8195
  cacheMissTokens: 0,
8160
8196
  costUsd: 0,
8161
- claudeEquivUsd: 0
8197
+ claudeEquivUsd: 0,
8198
+ cacheSavingsUsd: 0
8162
8199
  };
8163
8200
  }
8164
8201
  function addToBucket(b, r) {
@@ -8169,6 +8206,7 @@ function addToBucket(b, r) {
8169
8206
  b.cacheMissTokens += r.cacheMissTokens;
8170
8207
  b.costUsd += r.costUsd;
8171
8208
  b.claudeEquivUsd += r.claudeEquivUsd;
8209
+ b.cacheSavingsUsd += cacheSavingsUsd(r.model, r.cacheHitTokens);
8172
8210
  }
8173
8211
  function aggregateUsage(records, opts = {}) {
8174
8212
  const now = opts.now ?? Date.now();