reasonix 0.5.24 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1714,7 +1714,8 @@ var SessionStats = class {
1714
1714
  claudeEquivalentUsd: round(this.totalClaudeEquivalent, 6),
1715
1715
  savingsVsClaudePct: round(this.savingsVsClaude * 100, 2),
1716
1716
  cacheHitRatio: round(this.aggregateCacheHitRatio, 4),
1717
- lastPromptTokens: last?.usage.promptTokens ?? 0
1717
+ lastPromptTokens: last?.usage.promptTokens ?? 0,
1718
+ lastTurnCostUsd: round(last?.cost ?? 0, 6)
1718
1719
  };
1719
1720
  }
1720
1721
  };
@@ -1725,6 +1726,11 @@ function round(n, digits) {
1725
1726
 
1726
1727
  // src/loop.ts
1727
1728
  var ARGS_COMPACT_THRESHOLD_TOKENS = 800;
1729
+ var TURN_END_RESULT_CAP_TOKENS = 3e3;
1730
+ var FAILURE_ESCALATION_THRESHOLD = 3;
1731
+ var ESCALATION_MODEL = "deepseek-v4-pro";
1732
+ var NEEDS_PRO_MARKER = "<<<NEEDS_PRO>>>";
1733
+ var NEEDS_PRO_BUFFER_CHARS = 80;
1728
1734
  var CacheFirstLoop = class {
1729
1735
  client;
1730
1736
  prefix;
@@ -1765,11 +1771,36 @@ var CacheFirstLoop = class {
1765
1771
  * `step()` (the prior turn's signal has already fired).
1766
1772
  */
1767
1773
  _turnAbort = new AbortController();
1774
+ /**
1775
+ * "Next turn should run on pro, regardless of this.model." Set by the
1776
+ * `/pro` slash command; consumed at the next turn's start (flipping
1777
+ * `_escalateThisTurn` on and self-clearing) so it's a fire-and-forget
1778
+ * single-turn upgrade. Survives across multiple slash inputs so
1779
+ * typing `/pro` and then hesitating a while before submitting a real
1780
+ * message still applies.
1781
+ */
1782
+ _proArmedForNextTurn = false;
1783
+ /**
1784
+ * Active for the current turn only — true means every model call
1785
+ * this turn uses pro instead of `this.model`. Turned on by EITHER
1786
+ * the pro-armed consumption OR the mid-turn auto-escalation
1787
+ * threshold (see `_turnFailureCount`). Cleared at turn end.
1788
+ */
1789
+ _escalateThisTurn = false;
1790
+ /**
1791
+ * Visible-failure count for the current turn. Incremented by tool
1792
+ * dispatch paths when a result matches a known "flash is struggling"
1793
+ * shape (SEARCH-not-found errors, scavenge / truncation / storm
1794
+ * repair fires). Once it hits {@link FAILURE_ESCALATION_THRESHOLD},
1795
+ * the remainder of the turn's model calls auto-upgrade to pro so
1796
+ * the user doesn't watch flash retry the same edit 5 times.
1797
+ */
1798
+ _turnFailureCount = 0;
1768
1799
  constructor(opts) {
1769
1800
  this.client = opts.client;
1770
1801
  this.prefix = opts.prefix;
1771
1802
  this.tools = opts.tools ?? new ToolRegistry();
1772
- this.model = opts.model ?? "deepseek-v4-pro";
1803
+ this.model = opts.model ?? "deepseek-v4-flash";
1773
1804
  this.reasoningEffort = opts.reasoningEffort ?? "max";
1774
1805
  this.maxToolIters = opts.maxToolIters ?? 64;
1775
1806
  this.hooks = opts.hooks ?? [];
@@ -1876,6 +1907,37 @@ var CacheFirstLoop = class {
1876
1907
  }
1877
1908
  }
1878
1909
  }
1910
+ /**
1911
+ * Fired at the END of a turn (just before `done` is yielded). Shrinks
1912
+ * every tool RESULT in the log that exceeds {@link TURN_END_RESULT_CAP_TOKENS}
1913
+ * to a tight cap so the NEXT turn's prompt doesn't re-pay for big
1914
+ * reads or searches done earlier. Unlike the reactive 40/80%
1915
+ * thresholds which react to context pressure, this runs unconditionally
1916
+ * — the win is preventive: each turn's big outputs get trimmed before
1917
+ * they ride into the next prompt. Saves compounding cost on long
1918
+ * sessions.
1919
+ *
1920
+ * Why compact the JUST-finished turn's results too (not just older
1921
+ * turns)? The same-turn iters already consumed the raw content to
1922
+ * make their decisions — the log is only carried forward for future
1923
+ * prompts. And "let me re-read the file" is vastly cheaper than
1924
+ * "carry this 12KB result in every future turn's prompt forever."
1925
+ *
1926
+ * Safe by construction: args-compact for THIS turn already ran
1927
+ * inside `compactToolCallArgsAfterResponse`; this pass is orthogonal.
1928
+ */
1929
+ autoCompactToolResultsOnTurnEnd() {
1930
+ const before = this.log.toMessages();
1931
+ const shrunk = shrinkOversizedToolResultsByTokens(before, TURN_END_RESULT_CAP_TOKENS);
1932
+ if (shrunk.healedCount === 0) return;
1933
+ this.log.compactInPlace(shrunk.messages);
1934
+ if (this.sessionName) {
1935
+ try {
1936
+ rewriteSession(this.sessionName, shrunk.messages);
1937
+ } catch {
1938
+ }
1939
+ }
1940
+ }
1879
1941
  compact(maxTokens = 4e3) {
1880
1942
  const before = this.log.toMessages();
1881
1943
  const resultsPass = shrinkOversizedToolResultsByTokens(before, maxTokens);
@@ -1958,6 +2020,78 @@ var CacheFirstLoop = class {
1958
2020
  }
1959
2021
  this.stream = this.branchEnabled ? false : this._streamPreference;
1960
2022
  }
2023
+ /**
2024
+ * Arm pro for the next turn (consumed at turn start). Called by
2025
+ * `/pro`. Idempotent — repeated calls stay armed, `disarmPro()`
2026
+ * clears. Separate from `/preset max` which persistently switches
2027
+ * this.model; armed state is strictly single-turn.
2028
+ */
2029
+ armProForNextTurn() {
2030
+ this._proArmedForNextTurn = true;
2031
+ }
2032
+ /** Cancel `/pro` arming before the next turn starts. */
2033
+ disarmPro() {
2034
+ this._proArmedForNextTurn = false;
2035
+ }
2036
+ /** UI surface — true while `/pro` is queued but hasn't fired yet. */
2037
+ get proArmed() {
2038
+ return this._proArmedForNextTurn;
2039
+ }
2040
+ /** UI surface — true while the current turn is running on pro (armed or auto-escalated). */
2041
+ get escalatedThisTurn() {
2042
+ return this._escalateThisTurn;
2043
+ }
2044
+ /**
2045
+ * Model the current model call should use. Defaults to `this.model`;
2046
+ * upgrades to {@link ESCALATION_MODEL} when the turn is armed for
2047
+ * pro (via `/pro`) or has hit the failure-escalation threshold.
2048
+ * Same thinking + effort policy applies regardless — pro defaults
2049
+ * to thinking=enabled and effort=max, which the current turn wanted
2050
+ * anyway when flash was struggling.
2051
+ */
2052
+ modelForCurrentCall() {
2053
+ return this._escalateThisTurn ? ESCALATION_MODEL : this.model;
2054
+ }
2055
+ /**
2056
+ * True when the assistant's content is a self-reported escalation
2057
+ * request. Only the FIRST line matters — the model is instructed
2058
+ * to emit the marker as the first output token if at all. Matching
2059
+ * anywhere else in the text is a normal content reference (e.g.
2060
+ * the user asked about the marker itself, or prose that happens
2061
+ * to contain angle-brackets).
2062
+ */
2063
+ isEscalationRequest(content) {
2064
+ return content.trimStart().startsWith(NEEDS_PRO_MARKER);
2065
+ }
2066
+ /**
2067
+ * Check whether a tool result string looks like a "flash struggled"
2068
+ * signal and, if so, increment the turn's failure counter. Escalates
2069
+ * the REST of the current turn to pro once the threshold is hit.
2070
+ * Idempotent after escalation — further failures don't re-escalate,
2071
+ * but the turn is already on pro so it doesn't matter.
2072
+ *
2073
+ * Return: `true` when this call tipped the turn into escalation
2074
+ * mode (so the loop can surface a one-time warning to the user).
2075
+ */
2076
+ noteToolFailureSignal(resultJson, repair) {
2077
+ let bumped = false;
2078
+ if (resultJson.includes('"error"') && resultJson.includes("search text not found")) {
2079
+ this._turnFailureCount += 1;
2080
+ bumped = true;
2081
+ }
2082
+ if (repair) {
2083
+ const repairs = repair.scavenged + repair.truncationsFixed + repair.stormsBroken;
2084
+ if (repairs > 0) {
2085
+ this._turnFailureCount += repairs;
2086
+ bumped = true;
2087
+ }
2088
+ }
2089
+ if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
2090
+ this._escalateThisTurn = true;
2091
+ return true;
2092
+ }
2093
+ return false;
2094
+ }
1961
2095
  buildMessages(pendingUser) {
1962
2096
  const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
1963
2097
  const msgs = [...this.prefix.toMessages(), ...healed.messages];
@@ -2012,8 +2146,23 @@ var CacheFirstLoop = class {
2012
2146
  this._turn++;
2013
2147
  this.scratch.reset();
2014
2148
  this.repair.resetStorm();
2149
+ this._turnFailureCount = 0;
2150
+ this._escalateThisTurn = false;
2151
+ let armedConsumed = false;
2152
+ if (this._proArmedForNextTurn) {
2153
+ this._escalateThisTurn = true;
2154
+ this._proArmedForNextTurn = false;
2155
+ armedConsumed = true;
2156
+ }
2015
2157
  this._turnAbort = new AbortController();
2016
2158
  const signal = this._turnAbort.signal;
2159
+ if (armedConsumed) {
2160
+ yield {
2161
+ turn: this._turn,
2162
+ role: "warning",
2163
+ content: "\u21E7 /pro armed \u2014 this turn runs on deepseek-v4-pro (one-shot \xB7 disarms after turn)"
2164
+ };
2165
+ }
2017
2166
  let pendingUser = userInput;
2018
2167
  const toolSpecs = this.prefix.tools();
2019
2168
  const warnAt = Math.max(1, Math.floor(this.maxToolIters * 0.7));
@@ -2033,6 +2182,7 @@ var CacheFirstLoop = class {
2033
2182
  content: stoppedMsg,
2034
2183
  forcedSummary: true
2035
2184
  };
2185
+ this.autoCompactToolResultsOnTurnEnd();
2036
2186
  yield { turn: this._turn, role: "done", content: stoppedMsg };
2037
2187
  return;
2038
2188
  }
@@ -2109,14 +2259,15 @@ var CacheFirstLoop = class {
2109
2259
  queue.push(sample);
2110
2260
  }
2111
2261
  };
2262
+ const callModel = this.modelForCurrentCall();
2112
2263
  const branchPromise = runBranches(
2113
2264
  this.client,
2114
2265
  {
2115
- model: this.model,
2266
+ model: callModel,
2116
2267
  messages,
2117
2268
  tools: toolSpecs.length ? toolSpecs : void 0,
2118
2269
  signal,
2119
- thinking: thinkingModeForModel(this.model),
2270
+ thinking: thinkingModeForModel(callModel),
2120
2271
  reasoningEffort: this.reasoningEffort
2121
2272
  },
2122
2273
  {
@@ -2165,21 +2316,41 @@ var CacheFirstLoop = class {
2165
2316
  } else if (this.stream) {
2166
2317
  const callBuf = /* @__PURE__ */ new Map();
2167
2318
  const readyIndices = /* @__PURE__ */ new Set();
2319
+ const callModel = this.modelForCurrentCall();
2320
+ const bufferForEscalation = callModel !== ESCALATION_MODEL;
2321
+ let escalationBuf = "";
2322
+ let escalationBufFlushed = false;
2168
2323
  for await (const chunk of this.client.stream({
2169
- model: this.model,
2324
+ model: callModel,
2170
2325
  messages,
2171
2326
  tools: toolSpecs.length ? toolSpecs : void 0,
2172
2327
  signal,
2173
- thinking: thinkingModeForModel(this.model),
2328
+ thinking: thinkingModeForModel(callModel),
2174
2329
  reasoningEffort: this.reasoningEffort
2175
2330
  })) {
2176
2331
  if (chunk.contentDelta) {
2177
2332
  assistantContent += chunk.contentDelta;
2178
- yield {
2179
- turn: this._turn,
2180
- role: "assistant_delta",
2181
- content: chunk.contentDelta
2182
- };
2333
+ if (bufferForEscalation && !escalationBufFlushed) {
2334
+ escalationBuf += chunk.contentDelta;
2335
+ if (this.isEscalationRequest(escalationBuf)) {
2336
+ break;
2337
+ }
2338
+ if (escalationBuf.length >= NEEDS_PRO_BUFFER_CHARS || escalationBuf.includes("\n")) {
2339
+ escalationBufFlushed = true;
2340
+ yield {
2341
+ turn: this._turn,
2342
+ role: "assistant_delta",
2343
+ content: escalationBuf
2344
+ };
2345
+ escalationBuf = "";
2346
+ }
2347
+ } else {
2348
+ yield {
2349
+ turn: this._turn,
2350
+ role: "assistant_delta",
2351
+ content: chunk.contentDelta
2352
+ };
2353
+ }
2183
2354
  }
2184
2355
  if (chunk.reasoningDelta) {
2185
2356
  reasoningContent += chunk.reasoningDelta;
@@ -2220,13 +2391,23 @@ var CacheFirstLoop = class {
2220
2391
  if (chunk.usage) usage = chunk.usage;
2221
2392
  }
2222
2393
  toolCalls = [...callBuf.values()];
2394
+ if (bufferForEscalation && !escalationBufFlushed && escalationBuf.length > 0) {
2395
+ if (!this.isEscalationRequest(escalationBuf)) {
2396
+ yield {
2397
+ turn: this._turn,
2398
+ role: "assistant_delta",
2399
+ content: escalationBuf
2400
+ };
2401
+ }
2402
+ }
2223
2403
  } else {
2404
+ const callModel = this.modelForCurrentCall();
2224
2405
  const resp = await this.client.chat({
2225
- model: this.model,
2406
+ model: callModel,
2226
2407
  messages,
2227
2408
  tools: toolSpecs.length ? toolSpecs : void 0,
2228
2409
  signal,
2229
- thinking: thinkingModeForModel(this.model),
2410
+ thinking: thinkingModeForModel(callModel),
2230
2411
  reasoningEffort: this.reasoningEffort
2231
2412
  });
2232
2413
  assistantContent = resp.content;
@@ -2236,6 +2417,7 @@ var CacheFirstLoop = class {
2236
2417
  }
2237
2418
  } catch (err) {
2238
2419
  if (signal.aborted) {
2420
+ this.autoCompactToolResultsOnTurnEnd();
2239
2421
  yield { turn: this._turn, role: "done", content: "" };
2240
2422
  return;
2241
2423
  }
@@ -2247,7 +2429,27 @@ var CacheFirstLoop = class {
2247
2429
  };
2248
2430
  return;
2249
2431
  }
2250
- const turnStats = this.stats.record(this._turn, this.model, usage ?? new Usage());
2432
+ if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
2433
+ this._escalateThisTurn = true;
2434
+ yield {
2435
+ turn: this._turn,
2436
+ role: "warning",
2437
+ content: `\u21E7 flash requested escalation \u2014 retrying this turn on ${ESCALATION_MODEL}`
2438
+ };
2439
+ assistantContent = "";
2440
+ reasoningContent = "";
2441
+ toolCalls = [];
2442
+ usage = null;
2443
+ branchSummary = void 0;
2444
+ preHarvestedPlanState = void 0;
2445
+ iter--;
2446
+ continue;
2447
+ }
2448
+ const turnStats = this.stats.record(
2449
+ this._turn,
2450
+ this.modelForCurrentCall(),
2451
+ usage ?? new Usage()
2452
+ );
2251
2453
  if (pendingUser !== null) {
2252
2454
  this.appendAndPersist({ role: "user", content: pendingUser });
2253
2455
  pendingUser = null;
@@ -2278,6 +2480,13 @@ var CacheFirstLoop = class {
2278
2480
  repair: report,
2279
2481
  branch: branchSummary
2280
2482
  };
2483
+ if (this.noteToolFailureSignal("", report)) {
2484
+ yield {
2485
+ turn: this._turn,
2486
+ role: "warning",
2487
+ content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this._turnFailureCount} repair/error signals. Next turn falls back to ${this.model} unless /pro is armed.`
2488
+ };
2489
+ }
2281
2490
  if (report.stormsBroken > 0) {
2282
2491
  const noteTail = report.notes.length ? ` \u2014 ${report.notes[report.notes.length - 1]}` : "";
2283
2492
  const allSuppressed = repairedCalls.length === 0 && toolCalls.length > 0;
@@ -2289,13 +2498,14 @@ var CacheFirstLoop = class {
2289
2498
  };
2290
2499
  }
2291
2500
  if (repairedCalls.length === 0) {
2501
+ this.autoCompactToolResultsOnTurnEnd();
2292
2502
  yield { turn: this._turn, role: "done", content: assistantContent };
2293
2503
  return;
2294
2504
  }
2295
2505
  const ctxMax = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
2296
2506
  if (usage) {
2297
2507
  const ratio = usage.promptTokens / ctxMax;
2298
- if (ratio > 0.6 && ratio <= 0.8) {
2508
+ if (ratio > 0.4 && ratio <= 0.8) {
2299
2509
  const before = usage.promptTokens;
2300
2510
  const soft = this.compact(4e3);
2301
2511
  if (soft.healedCount > 0) {
@@ -2394,6 +2604,13 @@ ${reason}`;
2394
2604
  content: result
2395
2605
  });
2396
2606
  this.compactToolCallArgsAfterResponse();
2607
+ if (this.noteToolFailureSignal(result)) {
2608
+ yield {
2609
+ turn: this._turn,
2610
+ role: "warning",
2611
+ content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this._turnFailureCount} edit failure(s). Next turn falls back to ${this.model} unless /pro is armed.`
2612
+ };
2613
+ }
2397
2614
  yield {
2398
2615
  turn: this._turn,
2399
2616
  role: "tool",
@@ -2417,13 +2634,15 @@ ${reason}`;
2417
2634
  role: "user",
2418
2635
  content: "I'm out of tool-call budget for this turn. Summarize in plain prose what you learned from the tool results above. Do NOT emit any tool calls, function-call markup, DSML invocations, or SEARCH/REPLACE edit blocks \u2014 they will be silently discarded. Just plain text."
2419
2636
  });
2637
+ const summaryModel = "deepseek-v4-flash";
2638
+ const summaryEffort = "high";
2420
2639
  const resp = await this.client.chat({
2421
- model: this.model,
2640
+ model: summaryModel,
2422
2641
  messages,
2423
2642
  // no tools → model is forced to answer in text
2424
2643
  signal: this._turnAbort.signal,
2425
- thinking: thinkingModeForModel(this.model),
2426
- reasoningEffort: this.reasoningEffort
2644
+ thinking: thinkingModeForModel(summaryModel),
2645
+ reasoningEffort: summaryEffort
2427
2646
  });
2428
2647
  const rawContent = resp.content?.trim() ?? "";
2429
2648
  const cleaned = stripHallucinatedToolMarkup(rawContent);
@@ -2432,7 +2651,7 @@ ${reason}`;
2432
2651
  const annotated = `${reasonPrefix}
2433
2652
 
2434
2653
  ${summary}`;
2435
- const summaryStats = this.stats.record(this._turn, this.model, resp.usage ?? new Usage());
2654
+ const summaryStats = this.stats.record(this._turn, summaryModel, resp.usage ?? new Usage());
2436
2655
  this.appendAndPersist(this.assistantMessage(summary, [], resp.reasoningContent ?? void 0));
2437
2656
  yield {
2438
2657
  turn: this._turn,
@@ -2441,6 +2660,7 @@ ${summary}`;
2441
2660
  stats: summaryStats,
2442
2661
  forcedSummary: true
2443
2662
  };
2663
+ this.autoCompactToolResultsOnTurnEnd();
2444
2664
  yield { turn: this._turn, role: "done", content: summary };
2445
2665
  } catch (err) {
2446
2666
  const label = errorLabelFor(opts.reason, this.maxToolIters);
@@ -2450,6 +2670,7 @@ ${summary}`;
2450
2670
  content: "",
2451
2671
  error: `${label} and the fallback summary call failed: ${err.message}. Run /clear and retry with a narrower question, or raise --max-tool-iters.`
2452
2672
  };
2673
+ this.autoCompactToolResultsOnTurnEnd();
2453
2674
  yield { turn: this._turn, role: "done", content: "" };
2454
2675
  }
2455
2676
  }
@@ -2981,6 +3202,28 @@ import { join as join7, resolve as resolve3 } from "path";
2981
3202
  import { existsSync as existsSync6, readFileSync as readFileSync6, readdirSync as readdirSync3, statSync as statSync3 } from "fs";
2982
3203
  import { homedir as homedir3 } from "os";
2983
3204
  import { join as join6, resolve as resolve2 } from "path";
3205
+
3206
+ // src/prompt-fragments.ts
3207
+ var TUI_FORMATTING_RULES = `Formatting (rendered in a TUI with a real markdown renderer):
3208
+ - Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` header + \`| --- | --- |\` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.
3209
+ - Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.
3210
+ - Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
3211
+ - Do NOT draw decorative frames around content with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.
3212
+ - For flow charts and diagrams: a plain bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
3213
+ var ESCALATION_CONTRACT = `Cost-aware escalation (when you're running on deepseek-v4-flash):
3214
+
3215
+ If a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the exact string \`<<<NEEDS_PRO>>>\` as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot. Do NOT emit any other content in the same response when you request escalation.
3216
+
3217
+ Use this sparingly. Normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn.`;
3218
+ var NEGATIVE_CLAIM_RULE = `Negative claims ("X is missing", "Y isn't implemented", "there's no Z") are the #1 hallucination shape. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.
3219
+
3220
+ If you have a search tool (\`search_content\`, \`grep\`, web search), call it FIRST before asserting absence:
3221
+ - Returns matches \u2192 you were wrong; correct yourself and cite the matches.
3222
+ - Returns nothing \u2192 state the absence WITH the search query as evidence: \`No callers of \\\`foo()\\\` found (search_content "foo").\`
3223
+
3224
+ If you have no search tool, qualify hard: "I haven't verified \u2014 this is a guess." Never assert absence with fake authority.`;
3225
+
3226
+ // src/skills.ts
2984
3227
  var SKILLS_DIRNAME = "skills";
2985
3228
  var SKILL_FILE = "SKILL.md";
2986
3229
  var SKILLS_INDEX_MAX_CHARS = 4e3;
@@ -3123,10 +3366,10 @@ function parseRunAs(raw) {
3123
3366
  }
3124
3367
  function skillIndexLine(s) {
3125
3368
  const safeDesc = s.description.replace(/\n/g, " ").trim();
3126
- const marker = s.runAs === "subagent" ? "\u{1F9EC} " : "";
3127
- const max = 130 - s.name.length - marker.length;
3369
+ const tag = s.runAs === "subagent" ? " [\u{1F9EC} subagent]" : "";
3370
+ const max = 130 - s.name.length - tag.length;
3128
3371
  const clipped = safeDesc.length > max ? `${safeDesc.slice(0, Math.max(1, max - 1))}\u2026` : safeDesc;
3129
- return clipped ? `- ${marker}${s.name} \u2014 ${clipped}` : `- ${marker}${s.name}`;
3372
+ return clipped ? `- ${s.name}${tag} \u2014 ${clipped}` : `- ${s.name}${tag}`;
3130
3373
  }
3131
3374
  function applySkillsIndex(basePrompt, opts = {}) {
3132
3375
  const store = new SkillStore(opts);
@@ -3141,7 +3384,7 @@ function applySkillsIndex(basePrompt, opts = {}) {
3141
3384
  "",
3142
3385
  "# Skills \u2014 playbooks you can invoke",
3143
3386
  "",
3144
- 'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })` to invoke one. Skills marked with \u{1F9EC} spawn an **isolated subagent** \u2014 its tool calls and reasoning never enter your context, only its final answer does. Use \u{1F9EC} skills for tasks that would otherwise flood your context (deep exploration, multi-step research, anything where you only need the conclusion). Plain skills are inlined: their body becomes a tool result you read and act on directly. The user can also invoke a skill via `/skill <name>`.',
3387
+ 'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })` \u2014 the `name` is JUST the skill identifier (e.g. `"explore"`), NOT the `[\u{1F9EC} subagent]` tag that appears after it. Entries tagged `[\u{1F9EC} subagent]` spawn an **isolated subagent** \u2014 its tool calls and reasoning never enter your context, only its final answer does. Use subagent skills for tasks that would otherwise flood your context (deep exploration, multi-step research, anything where you only need the conclusion). Plain skills are inlined: their body becomes a tool result you read and act on directly. The user can also invoke a skill via `/skill <name>`.',
3145
3388
  "",
3146
3389
  "```",
3147
3390
  truncated,
@@ -3163,12 +3406,9 @@ Your final answer:
3163
3406
  - If the question can't be answered from what you found, say so plainly and suggest where to look next.
3164
3407
  - No follow-up offers, no "let me know if you need more." The parent will ask again if they need more.
3165
3408
 
3166
- Formatting (rendered in a TUI):
3167
- - Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` + \`| --- | --- |\`). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C) \u2014 they break word-wrap.
3168
- - Keep table cells short; if a cell needs a paragraph, use bullets below the table instead.
3169
- - Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
3170
- - NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
3171
- - For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
3409
+ ${NEGATIVE_CLAIM_RULE}
3410
+
3411
+ ${TUI_FORMATTING_RULES}
3172
3412
 
3173
3413
  The 'task' the parent gave you is the question you must answer. Treat any other reading of it as scope creep.`;
3174
3414
  var BUILTIN_RESEARCH_BODY = `You are running as a research subagent. Your job is to gather information from code AND the web, synthesize it, and return one focused conclusion.
@@ -3185,12 +3425,9 @@ Your final answer:
3185
3425
  - Distinguish "I verified this in code" from "I read this on a docs page" \u2014 the parent will trust the former more.
3186
3426
  - If the answer is uncertain, say so. Don't invent confidence.
3187
3427
 
3188
- Formatting (rendered in a TUI):
3189
- - Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` + \`| --- | --- |\`). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C) \u2014 they break word-wrap.
3190
- - Keep table cells short; if a cell needs a paragraph, use bullets below the table instead.
3191
- - Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
3192
- - NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
3193
- - For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
3428
+ ${NEGATIVE_CLAIM_RULE}
3429
+
3430
+ ${TUI_FORMATTING_RULES}
3194
3431
 
3195
3432
  The 'task' the parent gave you is the research question. Stay on it.`;
3196
3433
  var BUILTIN_SKILLS = Object.freeze([
@@ -4256,15 +4493,15 @@ Rules:
4256
4493
  - When you're done, your final assistant message is the only thing the parent will see \u2014 make it complete and self-contained. No follow-up offers, no questions, no "let me know if you need more."
4257
4494
  - Prefer one clear, distilled answer over a long log of what you tried.
4258
4495
 
4259
- Formatting rules (the parent renders your reply in a TUI with a real markdown renderer):
4260
- - For tabular data use GitHub-Flavored Markdown tables with ASCII pipes: \`| col | col |\` headers, \`| --- | --- |\` separator. NEVER draw tables with Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524). They look intentional but break terminal word-wrap and produce garbled output.
4261
- - Keep table cells short \u2014 one short phrase per cell, not multi-line paragraphs. If a description doesn't fit in ~40 chars, use bullets below the table instead.
4262
- - Use fenced code blocks (\`\`\`) for any code, file paths with line ranges, or shell commands.
4263
- - NEVER draw decorative frames around content with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. The renderer handles code blocks and headings on its own \u2014 extra ASCII art adds noise without value and breaks at narrow terminal widths.
4264
- - For flow charts and diagrams: use a markdown bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
4496
+ ${NEGATIVE_CLAIM_RULE}
4497
+
4498
+ ${ESCALATION_CONTRACT}
4499
+
4500
+ ${TUI_FORMATTING_RULES}`;
4265
4501
  var DEFAULT_MAX_RESULT_CHARS2 = 8e3;
4266
4502
  var DEFAULT_MAX_ITERS = 16;
4267
- var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-pro";
4503
+ var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-flash";
4504
+ var DEFAULT_SUBAGENT_EFFORT = "high";
4268
4505
  var SUBAGENT_TOOL_NAME = "spawn_subagent";
4269
4506
  var NEVER_INHERITED_TOOLS = /* @__PURE__ */ new Set([SUBAGENT_TOOL_NAME, "submit_plan"]);
4270
4507
  async function spawnSubagent(opts) {
@@ -4293,6 +4530,10 @@ async function spawnSubagent(opts) {
4293
4530
  prefix: childPrefix,
4294
4531
  tools: childTools,
4295
4532
  model,
4533
+ // Subagents run on a constrained thinking budget by default — the
4534
+ // task is already narrow by construction, and `high` cuts output
4535
+ // tokens substantially vs `max`.
4536
+ reasoningEffort: DEFAULT_SUBAGENT_EFFORT,
4296
4537
  maxToolIters,
4297
4538
  hooks: [],
4298
4539
  stream: false
@@ -5085,7 +5326,7 @@ function registerShellTools(registry, opts) {
5085
5326
  const allowAll = opts.allowAll ?? false;
5086
5327
  registry.register({
5087
5328
  name: "run_command",
5088
- description: "Run a shell command in the project root and return its combined stdout+stderr. Common read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
5329
+ description: "Run a shell command in the project root and return its combined stdout+stderr.\n\nConstraints (read these before the first call):\n\u2022 ONE process per call, NO shell expansion. `&&`, `||`, `|`, `;`, `>`, `<`, `2>&1` are all rejected up-front \u2014 split into separate calls and combine results in reasoning. Example: instead of `grep foo *.ts | wc -l`, use `grep -c foo *.ts`; instead of `cd sub && npm test`, use `npm test --prefix sub` (or whatever --cwd flag the binary accepts).\n\u2022 `cd` DOES NOT PERSIST between calls \u2014 each call spawns a fresh process rooted at the project. If a tool needs a subdirectory, pass it via the tool's own flag (`npm --prefix`, `cargo -C`, `git -C`, `pytest tests/\u2026`), NOT via a preceding `cd`.\n\u2022 Avoid commands with unbounded output (`netstat -ano`, `find /`, etc.) \u2014 they waste tokens. Filter at source: `netstat -ano -p TCP`, `find src -name '*.ts'`, `grep -c`, `wc -l`.\n\nCommon read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
5089
5330
  // Plan-mode gate: allow allowlisted commands through (git status,
5090
5331
  // cargo check, ls, grep …) so the model can actually investigate
5091
5332
  // during planning. Anything that would otherwise trigger a
@@ -5128,7 +5369,7 @@ function registerShellTools(registry, opts) {
5128
5369
  });
5129
5370
  registry.register({
5130
5371
  name: "run_background",
5131
- description: "Spawn a long-running process (dev server, watcher, any command that doesn't naturally exit) and detach. Waits up to `waitSec` seconds for startup (or until the output matches a readiness signal like 'Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. The process keeps running; call `job_output` to tail its logs, `stop_job` to kill it, `list_jobs` to see all running jobs. USE THIS \u2014 not `run_command` \u2014 for: npm/yarn/pnpm run dev, uvicorn / flask run, go run, cargo watch, tsc --watch, webpack serve, anything with 'dev' / 'serve' / 'watch' in the name.",
5372
+ description: "Spawn a long-running process (dev server, watcher, any command that doesn't naturally exit) and detach. Waits up to `waitSec` seconds for startup (or until the output matches a readiness signal like 'Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. The process keeps running; call `job_output` to tail its logs, `stop_job` to kill it, `list_jobs` to see all running jobs.\n\nSame shell constraints as run_command: NO `&&` / `||` / `|` / `;` / `>` / `<` / `2>&1`, `cd` doesn't persist. Dev servers that need a subdirectory: use the tool's own --prefix / --cwd flag. For Vite specifically, `--prefix` on npm only tells npm where package.json is; vite's server root still defaults to process cwd, so pass `vite <project-dir>` or configure via `vite.config.ts` root.\n\nUSE THIS \u2014 not `run_command` \u2014 for: npm/yarn/pnpm run dev, uvicorn / flask run, go run, cargo watch, tsc --watch, webpack serve, anything with 'dev' / 'serve' / 'watch' in the name.",
5132
5373
  parameters: {
5133
5374
  type: "object",
5134
5375
  properties: {
@@ -5623,7 +5864,8 @@ function summarizeTurns(turns) {
5623
5864
  claudeEquivalentUsd: round2(totalClaude, 6),
5624
5865
  savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
5625
5866
  cacheHitRatio: round2(cacheHitRatio, 4),
5626
- lastPromptTokens: lastTurn?.usage.promptTokens ?? 0
5867
+ lastPromptTokens: lastTurn?.usage.promptTokens ?? 0,
5868
+ lastTurnCostUsd: round2(lastTurn?.cost ?? 0, 6)
5627
5869
  };
5628
5870
  }
5629
5871
  function round2(n, digits) {
@@ -6695,7 +6937,7 @@ function sep() {
6695
6937
  // src/code/prompt.ts
6696
6938
  import { existsSync as existsSync10, readFileSync as readFileSync11 } from "fs";
6697
6939
  import { join as join9 } from "path";
6698
- var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, list_directory, search_files, etc.) rooted at the user's working directory.
6940
+ var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.
6699
6941
 
6700
6942
  # Cite or shut up \u2014 non-negotiable
6701
6943
 
@@ -6736,15 +6978,17 @@ The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit con
6736
6978
  - You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.
6737
6979
 
6738
6980
 
6739
- # Delegating to subagents via Skills (\u{1F9EC})
6981
+ # Delegating to subagents via Skills
6982
+
6983
+ The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Entries tagged \`[\u{1F9EC} subagent]\` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.
6740
6984
 
6741
- The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Skills marked with **\u{1F9EC}** spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so \u{1F9EC} skills are how you keep the main session lean.
6985
+ **When you call \`run_skill\`, the \`name\` is ONLY the identifier before the tag** \u2014 e.g. \`run_skill({ name: "explore", arguments: "..." })\`, NOT \`"[\u{1F9EC} subagent] explore"\` and NOT \`"explore [\u{1F9EC} subagent]"\`. The tag is display sugar; the name argument is just the bare identifier.
6742
6986
 
6743
6987
  Two built-ins ship by default:
6744
- - **\u{1F9EC} explore** \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
6745
- - **\u{1F9EC} research** \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
6988
+ - **explore** \`[\u{1F9EC} subagent]\` \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
6989
+ - **research** \`[\u{1F9EC} subagent]\` \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
6746
6990
 
6747
- When to delegate (call \`run_skill\` with a \u{1F9EC} skill):
6991
+ When to delegate (call \`run_skill\` with a subagent skill):
6748
6992
  - The task would otherwise need >5 file reads or searches.
6749
6993
  - You only need the conclusion, not the exploration trail.
6750
6994
  - The work is self-contained (you can describe it in one paragraph).
@@ -6855,6 +7099,10 @@ If you notice an obvious issue, MENTION it in one sentence and wait for the user
6855
7099
  - Show edits; don't narrate them in prose. "Here's the fix:" is enough.
6856
7100
  - One short paragraph explaining *why*, then the blocks.
6857
7101
  - If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.
7102
+
7103
+ ${ESCALATION_CONTRACT}
7104
+
7105
+ ${TUI_FORMATTING_RULES}
6858
7106
  `;
6859
7107
  function codeSystemPrompt(rootDir) {
6860
7108
  const withMemory = applyMemoryStack(CODE_SYSTEM_PROMPT, rootDir);