reasonix 0.5.24 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chunk-C266QOQU.js → chunk-5DZMZCCW.js} +65 -23
- package/dist/cli/chunk-5DZMZCCW.js.map +1 -0
- package/dist/cli/index.js +4627 -2654
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{prompt-OVVMCH5F.js → prompt-2OABSPAW.js} +2 -2
- package/dist/index.d.ts +344 -39
- package/dist/index.js +747 -82
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/cli/chunk-C266QOQU.js.map +0 -1
- /package/dist/cli/{prompt-OVVMCH5F.js.map → prompt-2OABSPAW.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -334,7 +334,7 @@ async function harvest(reasoningContent, client, options = {}, signal) {
|
|
|
334
334
|
const minLen = options.minReasoningLen ?? 40;
|
|
335
335
|
const trimmed = reasoningContent.trim();
|
|
336
336
|
if (trimmed.length < minLen) return emptyPlanState();
|
|
337
|
-
const model = options.model ?? "deepseek-
|
|
337
|
+
const model = options.model ?? "deepseek-v4-flash";
|
|
338
338
|
const maxItems = options.maxItems ?? 5;
|
|
339
339
|
const maxItemLen = options.maxItemLen ?? 80;
|
|
340
340
|
const system = SYSTEM_PROMPT.replace("{maxItems}", String(maxItems)).replace(
|
|
@@ -1714,7 +1714,8 @@ var SessionStats = class {
|
|
|
1714
1714
|
claudeEquivalentUsd: round(this.totalClaudeEquivalent, 6),
|
|
1715
1715
|
savingsVsClaudePct: round(this.savingsVsClaude * 100, 2),
|
|
1716
1716
|
cacheHitRatio: round(this.aggregateCacheHitRatio, 4),
|
|
1717
|
-
lastPromptTokens: last?.usage.promptTokens ?? 0
|
|
1717
|
+
lastPromptTokens: last?.usage.promptTokens ?? 0,
|
|
1718
|
+
lastTurnCostUsd: round(last?.cost ?? 0, 6)
|
|
1718
1719
|
};
|
|
1719
1720
|
}
|
|
1720
1721
|
};
|
|
@@ -1725,6 +1726,12 @@ function round(n, digits) {
|
|
|
1725
1726
|
|
|
1726
1727
|
// src/loop.ts
|
|
1727
1728
|
var ARGS_COMPACT_THRESHOLD_TOKENS = 800;
|
|
1729
|
+
var TURN_END_RESULT_CAP_TOKENS = 3e3;
|
|
1730
|
+
var FAILURE_ESCALATION_THRESHOLD = 3;
|
|
1731
|
+
var ESCALATION_MODEL = "deepseek-v4-pro";
|
|
1732
|
+
var NEEDS_PRO_MARKER_PREFIX = "<<<NEEDS_PRO";
|
|
1733
|
+
var NEEDS_PRO_MARKER_RE = /^<<<NEEDS_PRO(?::\s*([^>]*))?>>>/;
|
|
1734
|
+
var NEEDS_PRO_BUFFER_CHARS = 256;
|
|
1728
1735
|
var CacheFirstLoop = class {
|
|
1729
1736
|
client;
|
|
1730
1737
|
prefix;
|
|
@@ -1765,11 +1772,44 @@ var CacheFirstLoop = class {
|
|
|
1765
1772
|
* `step()` (the prior turn's signal has already fired).
|
|
1766
1773
|
*/
|
|
1767
1774
|
_turnAbort = new AbortController();
|
|
1775
|
+
/**
|
|
1776
|
+
* "Next turn should run on pro, regardless of this.model." Set by the
|
|
1777
|
+
* `/pro` slash command; consumed at the next turn's start (flipping
|
|
1778
|
+
* `_escalateThisTurn` on and self-clearing) so it's a fire-and-forget
|
|
1779
|
+
* single-turn upgrade. Survives across multiple slash inputs so
|
|
1780
|
+
* typing `/pro` and then hesitating a while before submitting a real
|
|
1781
|
+
* message still applies.
|
|
1782
|
+
*/
|
|
1783
|
+
_proArmedForNextTurn = false;
|
|
1784
|
+
/**
|
|
1785
|
+
* Active for the current turn only — true means every model call
|
|
1786
|
+
* this turn uses pro instead of `this.model`. Turned on by EITHER
|
|
1787
|
+
* the pro-armed consumption OR the mid-turn auto-escalation
|
|
1788
|
+
* threshold (see `_turnFailureCount`). Cleared at turn end.
|
|
1789
|
+
*/
|
|
1790
|
+
_escalateThisTurn = false;
|
|
1791
|
+
/**
|
|
1792
|
+
* Visible-failure count for the current turn. Incremented by tool
|
|
1793
|
+
* dispatch paths when a result matches a known "flash is struggling"
|
|
1794
|
+
* shape (SEARCH-not-found errors, scavenge / truncation / storm
|
|
1795
|
+
* repair fires). Once it hits {@link FAILURE_ESCALATION_THRESHOLD},
|
|
1796
|
+
* the remainder of the turn's model calls auto-upgrade to pro so
|
|
1797
|
+
* the user doesn't watch flash retry the same edit 5 times.
|
|
1798
|
+
*/
|
|
1799
|
+
_turnFailureCount = 0;
|
|
1800
|
+
/**
|
|
1801
|
+
* Per-type breakdown of failure signals counted toward the turn's
|
|
1802
|
+
* auto-escalation threshold. Surfaced in the warning when the
|
|
1803
|
+
* threshold trips so the user sees what kind of trouble flash
|
|
1804
|
+
* actually hit ("3× search-mismatch, 2× truncated") rather than
|
|
1805
|
+
* just a bare count. Reset alongside _turnFailureCount.
|
|
1806
|
+
*/
|
|
1807
|
+
_turnFailureTypes = {};
|
|
1768
1808
|
constructor(opts) {
|
|
1769
1809
|
this.client = opts.client;
|
|
1770
1810
|
this.prefix = opts.prefix;
|
|
1771
1811
|
this.tools = opts.tools ?? new ToolRegistry();
|
|
1772
|
-
this.model = opts.model ?? "deepseek-v4-
|
|
1812
|
+
this.model = opts.model ?? "deepseek-v4-flash";
|
|
1773
1813
|
this.reasoningEffort = opts.reasoningEffort ?? "max";
|
|
1774
1814
|
this.maxToolIters = opts.maxToolIters ?? 64;
|
|
1775
1815
|
this.hooks = opts.hooks ?? [];
|
|
@@ -1792,10 +1832,11 @@ var CacheFirstLoop = class {
|
|
|
1792
1832
|
this.sessionName = opts.session ?? null;
|
|
1793
1833
|
if (this.sessionName) {
|
|
1794
1834
|
const prior = loadSessionMessages(this.sessionName);
|
|
1795
|
-
const
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1835
|
+
const shrunk = healLoadedMessagesByTokens(prior, DEFAULT_MAX_RESULT_TOKENS);
|
|
1836
|
+
const stamped = stampMissingReasoningForThinkingMode(shrunk.messages, this.model);
|
|
1837
|
+
const messages = stamped.messages;
|
|
1838
|
+
const healedCount = shrunk.healedCount + stamped.stampedCount;
|
|
1839
|
+
const tokensSaved = shrunk.tokensSaved;
|
|
1799
1840
|
for (const msg of messages) this.log.append(msg);
|
|
1800
1841
|
this.resumedMessageCount = messages.length;
|
|
1801
1842
|
if (healedCount > 0) {
|
|
@@ -1876,6 +1917,37 @@ var CacheFirstLoop = class {
|
|
|
1876
1917
|
}
|
|
1877
1918
|
}
|
|
1878
1919
|
}
|
|
1920
|
+
/**
|
|
1921
|
+
* Fired at the END of a turn (just before `done` is yielded). Shrinks
|
|
1922
|
+
* every tool RESULT in the log that exceeds {@link TURN_END_RESULT_CAP_TOKENS}
|
|
1923
|
+
* to a tight cap so the NEXT turn's prompt doesn't re-pay for big
|
|
1924
|
+
* reads or searches done earlier. Unlike the reactive 40/80%
|
|
1925
|
+
* thresholds which react to context pressure, this runs unconditionally
|
|
1926
|
+
* — the win is preventive: each turn's big outputs get trimmed before
|
|
1927
|
+
* they ride into the next prompt. Saves compounding cost on long
|
|
1928
|
+
* sessions.
|
|
1929
|
+
*
|
|
1930
|
+
* Why compact the JUST-finished turn's results too (not just older
|
|
1931
|
+
* turns)? The same-turn iters already consumed the raw content to
|
|
1932
|
+
* make their decisions — the log is only carried forward for future
|
|
1933
|
+
* prompts. And "let me re-read the file" is vastly cheaper than
|
|
1934
|
+
* "carry this 12KB result in every future turn's prompt forever."
|
|
1935
|
+
*
|
|
1936
|
+
* Safe by construction: args-compact for THIS turn already ran
|
|
1937
|
+
* inside `compactToolCallArgsAfterResponse`; this pass is orthogonal.
|
|
1938
|
+
*/
|
|
1939
|
+
autoCompactToolResultsOnTurnEnd() {
|
|
1940
|
+
const before = this.log.toMessages();
|
|
1941
|
+
const shrunk = shrinkOversizedToolResultsByTokens(before, TURN_END_RESULT_CAP_TOKENS);
|
|
1942
|
+
if (shrunk.healedCount === 0) return;
|
|
1943
|
+
this.log.compactInPlace(shrunk.messages);
|
|
1944
|
+
if (this.sessionName) {
|
|
1945
|
+
try {
|
|
1946
|
+
rewriteSession(this.sessionName, shrunk.messages);
|
|
1947
|
+
} catch {
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1879
1951
|
compact(maxTokens = 4e3) {
|
|
1880
1952
|
const before = this.log.toMessages();
|
|
1881
1953
|
const resultsPass = shrinkOversizedToolResultsByTokens(before, maxTokens);
|
|
@@ -1958,6 +2030,116 @@ var CacheFirstLoop = class {
|
|
|
1958
2030
|
}
|
|
1959
2031
|
this.stream = this.branchEnabled ? false : this._streamPreference;
|
|
1960
2032
|
}
|
|
2033
|
+
/**
|
|
2034
|
+
* Arm pro for the next turn (consumed at turn start). Called by
|
|
2035
|
+
* `/pro`. Idempotent — repeated calls stay armed, `disarmPro()`
|
|
2036
|
+
* clears. Separate from `/preset max` which persistently switches
|
|
2037
|
+
* this.model; armed state is strictly single-turn.
|
|
2038
|
+
*/
|
|
2039
|
+
armProForNextTurn() {
|
|
2040
|
+
this._proArmedForNextTurn = true;
|
|
2041
|
+
}
|
|
2042
|
+
/** Cancel `/pro` arming before the next turn starts. */
|
|
2043
|
+
disarmPro() {
|
|
2044
|
+
this._proArmedForNextTurn = false;
|
|
2045
|
+
}
|
|
2046
|
+
/** UI surface — true while `/pro` is queued but hasn't fired yet. */
|
|
2047
|
+
get proArmed() {
|
|
2048
|
+
return this._proArmedForNextTurn;
|
|
2049
|
+
}
|
|
2050
|
+
/** UI surface — true while the current turn is running on pro (armed or auto-escalated). */
|
|
2051
|
+
get escalatedThisTurn() {
|
|
2052
|
+
return this._escalateThisTurn;
|
|
2053
|
+
}
|
|
2054
|
+
/**
|
|
2055
|
+
* Model the current model call should use. Defaults to `this.model`;
|
|
2056
|
+
* upgrades to {@link ESCALATION_MODEL} when the turn is armed for
|
|
2057
|
+
* pro (via `/pro`) or has hit the failure-escalation threshold.
|
|
2058
|
+
* Same thinking + effort policy applies regardless — pro defaults
|
|
2059
|
+
* to thinking=enabled and effort=max, which the current turn wanted
|
|
2060
|
+
* anyway when flash was struggling.
|
|
2061
|
+
*/
|
|
2062
|
+
modelForCurrentCall() {
|
|
2063
|
+
return this._escalateThisTurn ? ESCALATION_MODEL : this.model;
|
|
2064
|
+
}
|
|
2065
|
+
/**
|
|
2066
|
+
* Parse the escalation marker out of the model's leading content.
|
|
2067
|
+
* Returns `{ matched: true, reason? }` for both bare and reason-
|
|
2068
|
+
* carrying forms. Only the FIRST line matters — the model is
|
|
2069
|
+
* instructed to emit the marker as the first output token if at
|
|
2070
|
+
* all. Matches anywhere else in the text are normal content
|
|
2071
|
+
* references (e.g. the user asked about the marker itself).
|
|
2072
|
+
*/
|
|
2073
|
+
parseEscalationMarker(content) {
|
|
2074
|
+
const m = NEEDS_PRO_MARKER_RE.exec(content.trimStart());
|
|
2075
|
+
if (!m) return { matched: false };
|
|
2076
|
+
const reason = m[1]?.trim();
|
|
2077
|
+
return { matched: true, reason: reason || void 0 };
|
|
2078
|
+
}
|
|
2079
|
+
/** Convenience boolean — same gate the streaming path used to call. */
|
|
2080
|
+
isEscalationRequest(content) {
|
|
2081
|
+
return this.parseEscalationMarker(content).matched;
|
|
2082
|
+
}
|
|
2083
|
+
/**
|
|
2084
|
+
* Could `buf` STILL plausibly become the full marker as more chunks
|
|
2085
|
+
* arrive? Drives the streaming buffer's flush decision: while this
|
|
2086
|
+
* is true we keep accumulating; once it's false (or the buffer
|
|
2087
|
+
* exceeds the byte limit) we flush so the user isn't staring at a
|
|
2088
|
+
* delayed display for arbitrary content that just happens to start
|
|
2089
|
+
* with `<`.
|
|
2090
|
+
*/
|
|
2091
|
+
looksLikePartialEscalationMarker(buf) {
|
|
2092
|
+
const t = buf.trimStart();
|
|
2093
|
+
if (t.length === 0) return true;
|
|
2094
|
+
if (t.length <= NEEDS_PRO_MARKER_PREFIX.length) {
|
|
2095
|
+
return NEEDS_PRO_MARKER_PREFIX.startsWith(t);
|
|
2096
|
+
}
|
|
2097
|
+
if (!t.startsWith(NEEDS_PRO_MARKER_PREFIX)) return false;
|
|
2098
|
+
const rest = t.slice(NEEDS_PRO_MARKER_PREFIX.length);
|
|
2099
|
+
if (rest[0] !== ">" && rest[0] !== ":") return false;
|
|
2100
|
+
return true;
|
|
2101
|
+
}
|
|
2102
|
+
/**
|
|
2103
|
+
* Check whether a tool result string looks like a "flash struggled"
|
|
2104
|
+
* signal and, if so, increment the turn's failure counter. Escalates
|
|
2105
|
+
* the REST of the current turn to pro once the threshold is hit.
|
|
2106
|
+
* Idempotent after escalation — further failures don't re-escalate,
|
|
2107
|
+
* but the turn is already on pro so it doesn't matter.
|
|
2108
|
+
*
|
|
2109
|
+
* Return: `true` when this call tipped the turn into escalation
|
|
2110
|
+
* mode (so the loop can surface a one-time warning to the user).
|
|
2111
|
+
*/
|
|
2112
|
+
noteToolFailureSignal(resultJson, repair) {
|
|
2113
|
+
let bumped = false;
|
|
2114
|
+
const bump = (kind, by = 1) => {
|
|
2115
|
+
this._turnFailureCount += by;
|
|
2116
|
+
this._turnFailureTypes[kind] = (this._turnFailureTypes[kind] ?? 0) + by;
|
|
2117
|
+
bumped = true;
|
|
2118
|
+
};
|
|
2119
|
+
if (resultJson.includes('"error"') && resultJson.includes("search text not found")) {
|
|
2120
|
+
bump("search-mismatch");
|
|
2121
|
+
}
|
|
2122
|
+
if (repair) {
|
|
2123
|
+
if (repair.scavenged > 0) bump("scavenged", repair.scavenged);
|
|
2124
|
+
if (repair.truncationsFixed > 0) bump("truncated", repair.truncationsFixed);
|
|
2125
|
+
if (repair.stormsBroken > 0) bump("storm-broken", repair.stormsBroken);
|
|
2126
|
+
}
|
|
2127
|
+
if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
|
|
2128
|
+
this._escalateThisTurn = true;
|
|
2129
|
+
return true;
|
|
2130
|
+
}
|
|
2131
|
+
return false;
|
|
2132
|
+
}
|
|
2133
|
+
/**
|
|
2134
|
+
* Render `_turnFailureTypes` as a comma-separated breakdown like
|
|
2135
|
+
* "2× search-mismatch, 1× truncated" for the auto-escalation
|
|
2136
|
+
* warning. Empty if no types have been recorded yet (defensive —
|
|
2137
|
+
* the warning sites only call this after a bump).
|
|
2138
|
+
*/
|
|
2139
|
+
formatFailureBreakdown() {
|
|
2140
|
+
const parts = Object.entries(this._turnFailureTypes).filter(([, n]) => n > 0).map(([kind, n]) => `${n}\xD7 ${kind}`);
|
|
2141
|
+
return parts.length > 0 ? parts.join(", ") : `${this._turnFailureCount} repair/error signal(s)`;
|
|
2142
|
+
}
|
|
1961
2143
|
buildMessages(pendingUser) {
|
|
1962
2144
|
const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
|
|
1963
2145
|
const msgs = [...this.prefix.toMessages(), ...healed.messages];
|
|
@@ -2012,8 +2194,24 @@ var CacheFirstLoop = class {
|
|
|
2012
2194
|
this._turn++;
|
|
2013
2195
|
this.scratch.reset();
|
|
2014
2196
|
this.repair.resetStorm();
|
|
2197
|
+
this._turnFailureCount = 0;
|
|
2198
|
+
this._turnFailureTypes = {};
|
|
2199
|
+
this._escalateThisTurn = false;
|
|
2200
|
+
let armedConsumed = false;
|
|
2201
|
+
if (this._proArmedForNextTurn) {
|
|
2202
|
+
this._escalateThisTurn = true;
|
|
2203
|
+
this._proArmedForNextTurn = false;
|
|
2204
|
+
armedConsumed = true;
|
|
2205
|
+
}
|
|
2015
2206
|
this._turnAbort = new AbortController();
|
|
2016
2207
|
const signal = this._turnAbort.signal;
|
|
2208
|
+
if (armedConsumed) {
|
|
2209
|
+
yield {
|
|
2210
|
+
turn: this._turn,
|
|
2211
|
+
role: "warning",
|
|
2212
|
+
content: "\u21E7 /pro armed \u2014 this turn runs on deepseek-v4-pro (one-shot \xB7 disarms after turn)"
|
|
2213
|
+
};
|
|
2214
|
+
}
|
|
2017
2215
|
let pendingUser = userInput;
|
|
2018
2216
|
const toolSpecs = this.prefix.tools();
|
|
2019
2217
|
const warnAt = Math.max(1, Math.floor(this.maxToolIters * 0.7));
|
|
@@ -2033,6 +2231,7 @@ var CacheFirstLoop = class {
|
|
|
2033
2231
|
content: stoppedMsg,
|
|
2034
2232
|
forcedSummary: true
|
|
2035
2233
|
};
|
|
2234
|
+
this.autoCompactToolResultsOnTurnEnd();
|
|
2036
2235
|
yield { turn: this._turn, role: "done", content: stoppedMsg };
|
|
2037
2236
|
return;
|
|
2038
2237
|
}
|
|
@@ -2109,14 +2308,15 @@ var CacheFirstLoop = class {
|
|
|
2109
2308
|
queue.push(sample);
|
|
2110
2309
|
}
|
|
2111
2310
|
};
|
|
2311
|
+
const callModel = this.modelForCurrentCall();
|
|
2112
2312
|
const branchPromise = runBranches(
|
|
2113
2313
|
this.client,
|
|
2114
2314
|
{
|
|
2115
|
-
model:
|
|
2315
|
+
model: callModel,
|
|
2116
2316
|
messages,
|
|
2117
2317
|
tools: toolSpecs.length ? toolSpecs : void 0,
|
|
2118
2318
|
signal,
|
|
2119
|
-
thinking: thinkingModeForModel(
|
|
2319
|
+
thinking: thinkingModeForModel(callModel),
|
|
2120
2320
|
reasoningEffort: this.reasoningEffort
|
|
2121
2321
|
},
|
|
2122
2322
|
{
|
|
@@ -2165,21 +2365,41 @@ var CacheFirstLoop = class {
|
|
|
2165
2365
|
} else if (this.stream) {
|
|
2166
2366
|
const callBuf = /* @__PURE__ */ new Map();
|
|
2167
2367
|
const readyIndices = /* @__PURE__ */ new Set();
|
|
2368
|
+
const callModel = this.modelForCurrentCall();
|
|
2369
|
+
const bufferForEscalation = callModel !== ESCALATION_MODEL;
|
|
2370
|
+
let escalationBuf = "";
|
|
2371
|
+
let escalationBufFlushed = false;
|
|
2168
2372
|
for await (const chunk of this.client.stream({
|
|
2169
|
-
model:
|
|
2373
|
+
model: callModel,
|
|
2170
2374
|
messages,
|
|
2171
2375
|
tools: toolSpecs.length ? toolSpecs : void 0,
|
|
2172
2376
|
signal,
|
|
2173
|
-
thinking: thinkingModeForModel(
|
|
2377
|
+
thinking: thinkingModeForModel(callModel),
|
|
2174
2378
|
reasoningEffort: this.reasoningEffort
|
|
2175
2379
|
})) {
|
|
2176
2380
|
if (chunk.contentDelta) {
|
|
2177
2381
|
assistantContent += chunk.contentDelta;
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2382
|
+
if (bufferForEscalation && !escalationBufFlushed) {
|
|
2383
|
+
escalationBuf += chunk.contentDelta;
|
|
2384
|
+
if (this.isEscalationRequest(escalationBuf)) {
|
|
2385
|
+
break;
|
|
2386
|
+
}
|
|
2387
|
+
if (escalationBuf.length >= NEEDS_PRO_BUFFER_CHARS || !this.looksLikePartialEscalationMarker(escalationBuf)) {
|
|
2388
|
+
escalationBufFlushed = true;
|
|
2389
|
+
yield {
|
|
2390
|
+
turn: this._turn,
|
|
2391
|
+
role: "assistant_delta",
|
|
2392
|
+
content: escalationBuf
|
|
2393
|
+
};
|
|
2394
|
+
escalationBuf = "";
|
|
2395
|
+
}
|
|
2396
|
+
} else {
|
|
2397
|
+
yield {
|
|
2398
|
+
turn: this._turn,
|
|
2399
|
+
role: "assistant_delta",
|
|
2400
|
+
content: chunk.contentDelta
|
|
2401
|
+
};
|
|
2402
|
+
}
|
|
2183
2403
|
}
|
|
2184
2404
|
if (chunk.reasoningDelta) {
|
|
2185
2405
|
reasoningContent += chunk.reasoningDelta;
|
|
@@ -2220,13 +2440,23 @@ var CacheFirstLoop = class {
|
|
|
2220
2440
|
if (chunk.usage) usage = chunk.usage;
|
|
2221
2441
|
}
|
|
2222
2442
|
toolCalls = [...callBuf.values()];
|
|
2443
|
+
if (bufferForEscalation && !escalationBufFlushed && escalationBuf.length > 0) {
|
|
2444
|
+
if (!this.isEscalationRequest(escalationBuf)) {
|
|
2445
|
+
yield {
|
|
2446
|
+
turn: this._turn,
|
|
2447
|
+
role: "assistant_delta",
|
|
2448
|
+
content: escalationBuf
|
|
2449
|
+
};
|
|
2450
|
+
}
|
|
2451
|
+
}
|
|
2223
2452
|
} else {
|
|
2453
|
+
const callModel = this.modelForCurrentCall();
|
|
2224
2454
|
const resp = await this.client.chat({
|
|
2225
|
-
model:
|
|
2455
|
+
model: callModel,
|
|
2226
2456
|
messages,
|
|
2227
2457
|
tools: toolSpecs.length ? toolSpecs : void 0,
|
|
2228
2458
|
signal,
|
|
2229
|
-
thinking: thinkingModeForModel(
|
|
2459
|
+
thinking: thinkingModeForModel(callModel),
|
|
2230
2460
|
reasoningEffort: this.reasoningEffort
|
|
2231
2461
|
});
|
|
2232
2462
|
assistantContent = resp.content;
|
|
@@ -2236,6 +2466,7 @@ var CacheFirstLoop = class {
|
|
|
2236
2466
|
}
|
|
2237
2467
|
} catch (err) {
|
|
2238
2468
|
if (signal.aborted) {
|
|
2469
|
+
this.autoCompactToolResultsOnTurnEnd();
|
|
2239
2470
|
yield { turn: this._turn, role: "done", content: "" };
|
|
2240
2471
|
return;
|
|
2241
2472
|
}
|
|
@@ -2247,7 +2478,29 @@ var CacheFirstLoop = class {
|
|
|
2247
2478
|
};
|
|
2248
2479
|
return;
|
|
2249
2480
|
}
|
|
2250
|
-
|
|
2481
|
+
if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
|
|
2482
|
+
const { reason } = this.parseEscalationMarker(assistantContent);
|
|
2483
|
+
this._escalateThisTurn = true;
|
|
2484
|
+
const reasonSuffix = reason ? ` \u2014 ${reason}` : "";
|
|
2485
|
+
yield {
|
|
2486
|
+
turn: this._turn,
|
|
2487
|
+
role: "warning",
|
|
2488
|
+
content: `\u21E7 flash requested escalation \u2014 retrying this turn on ${ESCALATION_MODEL}${reasonSuffix}`
|
|
2489
|
+
};
|
|
2490
|
+
assistantContent = "";
|
|
2491
|
+
reasoningContent = "";
|
|
2492
|
+
toolCalls = [];
|
|
2493
|
+
usage = null;
|
|
2494
|
+
branchSummary = void 0;
|
|
2495
|
+
preHarvestedPlanState = void 0;
|
|
2496
|
+
iter--;
|
|
2497
|
+
continue;
|
|
2498
|
+
}
|
|
2499
|
+
const turnStats = this.stats.record(
|
|
2500
|
+
this._turn,
|
|
2501
|
+
this.modelForCurrentCall(),
|
|
2502
|
+
usage ?? new Usage()
|
|
2503
|
+
);
|
|
2251
2504
|
if (pendingUser !== null) {
|
|
2252
2505
|
this.appendAndPersist({ role: "user", content: pendingUser });
|
|
2253
2506
|
pendingUser = null;
|
|
@@ -2267,7 +2520,12 @@ var CacheFirstLoop = class {
|
|
|
2267
2520
|
assistantContent || null
|
|
2268
2521
|
);
|
|
2269
2522
|
this.appendAndPersist(
|
|
2270
|
-
this.assistantMessage(
|
|
2523
|
+
this.assistantMessage(
|
|
2524
|
+
assistantContent,
|
|
2525
|
+
repairedCalls,
|
|
2526
|
+
this.modelForCurrentCall(),
|
|
2527
|
+
reasoningContent
|
|
2528
|
+
)
|
|
2271
2529
|
);
|
|
2272
2530
|
yield {
|
|
2273
2531
|
turn: this._turn,
|
|
@@ -2278,6 +2536,13 @@ var CacheFirstLoop = class {
|
|
|
2278
2536
|
repair: report,
|
|
2279
2537
|
branch: branchSummary
|
|
2280
2538
|
};
|
|
2539
|
+
if (this.noteToolFailureSignal("", report)) {
|
|
2540
|
+
yield {
|
|
2541
|
+
turn: this._turn,
|
|
2542
|
+
role: "warning",
|
|
2543
|
+
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.formatFailureBreakdown()}. Next turn falls back to ${this.model} unless /pro is armed.`
|
|
2544
|
+
};
|
|
2545
|
+
}
|
|
2281
2546
|
if (report.stormsBroken > 0) {
|
|
2282
2547
|
const noteTail = report.notes.length ? ` \u2014 ${report.notes[report.notes.length - 1]}` : "";
|
|
2283
2548
|
const allSuppressed = repairedCalls.length === 0 && toolCalls.length > 0;
|
|
@@ -2289,13 +2554,14 @@ var CacheFirstLoop = class {
|
|
|
2289
2554
|
};
|
|
2290
2555
|
}
|
|
2291
2556
|
if (repairedCalls.length === 0) {
|
|
2557
|
+
this.autoCompactToolResultsOnTurnEnd();
|
|
2292
2558
|
yield { turn: this._turn, role: "done", content: assistantContent };
|
|
2293
2559
|
return;
|
|
2294
2560
|
}
|
|
2295
2561
|
const ctxMax = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
|
|
2296
2562
|
if (usage) {
|
|
2297
2563
|
const ratio = usage.promptTokens / ctxMax;
|
|
2298
|
-
if (ratio > 0.
|
|
2564
|
+
if (ratio > 0.4 && ratio <= 0.8) {
|
|
2299
2565
|
const before = usage.promptTokens;
|
|
2300
2566
|
const soft = this.compact(4e3);
|
|
2301
2567
|
if (soft.healedCount > 0) {
|
|
@@ -2394,6 +2660,13 @@ ${reason}`;
|
|
|
2394
2660
|
content: result
|
|
2395
2661
|
});
|
|
2396
2662
|
this.compactToolCallArgsAfterResponse();
|
|
2663
|
+
if (this.noteToolFailureSignal(result)) {
|
|
2664
|
+
yield {
|
|
2665
|
+
turn: this._turn,
|
|
2666
|
+
role: "warning",
|
|
2667
|
+
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.formatFailureBreakdown()}. Next turn falls back to ${this.model} unless /pro is armed.`
|
|
2668
|
+
};
|
|
2669
|
+
}
|
|
2397
2670
|
yield {
|
|
2398
2671
|
turn: this._turn,
|
|
2399
2672
|
role: "tool",
|
|
@@ -2417,13 +2690,15 @@ ${reason}`;
|
|
|
2417
2690
|
role: "user",
|
|
2418
2691
|
content: "I'm out of tool-call budget for this turn. Summarize in plain prose what you learned from the tool results above. Do NOT emit any tool calls, function-call markup, DSML invocations, or SEARCH/REPLACE edit blocks \u2014 they will be silently discarded. Just plain text."
|
|
2419
2692
|
});
|
|
2693
|
+
const summaryModel = "deepseek-v4-flash";
|
|
2694
|
+
const summaryEffort = "high";
|
|
2420
2695
|
const resp = await this.client.chat({
|
|
2421
|
-
model:
|
|
2696
|
+
model: summaryModel,
|
|
2422
2697
|
messages,
|
|
2423
2698
|
// no tools → model is forced to answer in text
|
|
2424
2699
|
signal: this._turnAbort.signal,
|
|
2425
|
-
thinking: thinkingModeForModel(
|
|
2426
|
-
reasoningEffort:
|
|
2700
|
+
thinking: thinkingModeForModel(summaryModel),
|
|
2701
|
+
reasoningEffort: summaryEffort
|
|
2427
2702
|
});
|
|
2428
2703
|
const rawContent = resp.content?.trim() ?? "";
|
|
2429
2704
|
const cleaned = stripHallucinatedToolMarkup(rawContent);
|
|
@@ -2432,8 +2707,10 @@ ${reason}`;
|
|
|
2432
2707
|
const annotated = `${reasonPrefix}
|
|
2433
2708
|
|
|
2434
2709
|
${summary}`;
|
|
2435
|
-
const summaryStats = this.stats.record(this._turn,
|
|
2436
|
-
this.appendAndPersist(
|
|
2710
|
+
const summaryStats = this.stats.record(this._turn, summaryModel, resp.usage ?? new Usage());
|
|
2711
|
+
this.appendAndPersist(
|
|
2712
|
+
this.assistantMessage(summary, [], summaryModel, resp.reasoningContent)
|
|
2713
|
+
);
|
|
2437
2714
|
yield {
|
|
2438
2715
|
turn: this._turn,
|
|
2439
2716
|
role: "assistant_final",
|
|
@@ -2441,6 +2718,7 @@ ${summary}`;
|
|
|
2441
2718
|
stats: summaryStats,
|
|
2442
2719
|
forcedSummary: true
|
|
2443
2720
|
};
|
|
2721
|
+
this.autoCompactToolResultsOnTurnEnd();
|
|
2444
2722
|
yield { turn: this._turn, role: "done", content: summary };
|
|
2445
2723
|
} catch (err) {
|
|
2446
2724
|
const label = errorLabelFor(opts.reason, this.maxToolIters);
|
|
@@ -2450,6 +2728,7 @@ ${summary}`;
|
|
|
2450
2728
|
content: "",
|
|
2451
2729
|
error: `${label} and the fallback summary call failed: ${err.message}. Run /clear and retry with a narrower question, or raise --max-tool-iters.`
|
|
2452
2730
|
};
|
|
2731
|
+
this.autoCompactToolResultsOnTurnEnd();
|
|
2453
2732
|
yield { turn: this._turn, role: "done", content: "" };
|
|
2454
2733
|
}
|
|
2455
2734
|
}
|
|
@@ -2462,28 +2741,39 @@ ${summary}`;
|
|
|
2462
2741
|
}
|
|
2463
2742
|
return final;
|
|
2464
2743
|
}
|
|
2465
|
-
|
|
2744
|
+
/**
|
|
2745
|
+
* Build an assistant message for the log. The `producingModel` arg is
|
|
2746
|
+
* the model that actually generated this turn (flash, pro, the
|
|
2747
|
+
* forced-summary flash call, `this.model` for synthetics, etc.) —
|
|
2748
|
+
* NOT `this.model`, because escalation + forced-summary can both
|
|
2749
|
+
* route a single turn to a different model.
|
|
2750
|
+
*
|
|
2751
|
+
* The single invariant this encodes: if the producing model is
|
|
2752
|
+
* thinking-mode, `reasoning_content` MUST be present on the
|
|
2753
|
+
* persisted message — even as an empty string. DeepSeek's validator
|
|
2754
|
+
* 400s the NEXT request if any historical thinking-mode assistant
|
|
2755
|
+
* turn is missing it. We used to gate on `reasoning.length > 0`,
|
|
2756
|
+
* which silently dropped the field whenever the stream emitted zero
|
|
2757
|
+
* reasoning deltas or the API returned `reasoning_content: null` —
|
|
2758
|
+
* both legitimate edge cases the 0.5.15/0.5.18 fixes missed.
|
|
2759
|
+
*/
|
|
2760
|
+
assistantMessage(content, toolCalls, producingModel, reasoningContent) {
|
|
2466
2761
|
const msg = { role: "assistant", content };
|
|
2467
2762
|
if (toolCalls.length > 0) msg.tool_calls = toolCalls;
|
|
2468
|
-
if (
|
|
2469
|
-
msg.reasoning_content = reasoningContent;
|
|
2763
|
+
if (isThinkingModeModel(producingModel)) {
|
|
2764
|
+
msg.reasoning_content = reasoningContent ?? "";
|
|
2470
2765
|
}
|
|
2471
2766
|
return msg;
|
|
2472
2767
|
}
|
|
2473
2768
|
/**
|
|
2474
|
-
*
|
|
2475
|
-
*
|
|
2476
|
-
*
|
|
2477
|
-
*
|
|
2478
|
-
*
|
|
2479
|
-
* doesn't care — field stays absent there.
|
|
2769
|
+
* Synthetic assistant message (abort notices, future system injections)
|
|
2770
|
+
* — no real API round trip. Delegates to {@link assistantMessage} with
|
|
2771
|
+
* `this.model` as the stand-in producer, so the same thinking-mode
|
|
2772
|
+
* invariant applies: reasoner sessions get an empty-string
|
|
2773
|
+
* `reasoning_content`; V3 sessions get nothing.
|
|
2480
2774
|
*/
|
|
2481
2775
|
syntheticAssistantMessage(content) {
|
|
2482
|
-
|
|
2483
|
-
if (isThinkingModeModel(this.model)) {
|
|
2484
|
-
msg.reasoning_content = "";
|
|
2485
|
-
}
|
|
2486
|
-
return msg;
|
|
2776
|
+
return this.assistantMessage(content, [], this.model, "");
|
|
2487
2777
|
}
|
|
2488
2778
|
};
|
|
2489
2779
|
function isThinkingModeModel(model) {
|
|
@@ -2676,6 +2966,19 @@ function healLoadedMessages(messages, maxChars) {
|
|
|
2676
2966
|
const healedCount = shrunk.healedCount + paired.droppedAssistantCalls + paired.droppedStrayTools;
|
|
2677
2967
|
return { messages: paired.messages, healedCount, healedFrom: shrunk.healedFrom };
|
|
2678
2968
|
}
|
|
2969
|
+
function stampMissingReasoningForThinkingMode(messages, model) {
|
|
2970
|
+
if (!isThinkingModeModel(model)) {
|
|
2971
|
+
return { messages, stampedCount: 0 };
|
|
2972
|
+
}
|
|
2973
|
+
let stampedCount = 0;
|
|
2974
|
+
const out = messages.map((msg) => {
|
|
2975
|
+
if (msg.role !== "assistant") return msg;
|
|
2976
|
+
if (Object.hasOwn(msg, "reasoning_content")) return msg;
|
|
2977
|
+
stampedCount += 1;
|
|
2978
|
+
return { ...msg, reasoning_content: "" };
|
|
2979
|
+
});
|
|
2980
|
+
return { messages: out, stampedCount };
|
|
2981
|
+
}
|
|
2679
2982
|
function healLoadedMessagesByTokens(messages, maxTokens) {
|
|
2680
2983
|
const shrunk = shrinkOversizedToolResultsByTokens(messages, maxTokens);
|
|
2681
2984
|
const paired = fixToolCallPairing(shrunk.messages);
|
|
@@ -2981,6 +3284,32 @@ import { join as join7, resolve as resolve3 } from "path";
|
|
|
2981
3284
|
import { existsSync as existsSync6, readFileSync as readFileSync6, readdirSync as readdirSync3, statSync as statSync3 } from "fs";
|
|
2982
3285
|
import { homedir as homedir3 } from "os";
|
|
2983
3286
|
import { join as join6, resolve as resolve2 } from "path";
|
|
3287
|
+
|
|
3288
|
+
// src/prompt-fragments.ts
|
|
3289
|
+
var TUI_FORMATTING_RULES = `Formatting (rendered in a TUI with a real markdown renderer):
|
|
3290
|
+
- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (\`| col | col |\` header + \`| --- | --- |\` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.
|
|
3291
|
+
- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.
|
|
3292
|
+
- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
|
|
3293
|
+
- Do NOT draw decorative frames around content with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.
|
|
3294
|
+
- For flow charts and diagrams: a plain bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
|
|
3295
|
+
var ESCALATION_CONTRACT = `Cost-aware escalation (when you're running on deepseek-v4-flash):
|
|
3296
|
+
|
|
3297
|
+
If a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.
|
|
3298
|
+
|
|
3299
|
+
Two accepted forms:
|
|
3300
|
+
- \`<<<NEEDS_PRO>>>\` \u2014 bare marker, no rationale.
|
|
3301
|
+
- \`<<<NEEDS_PRO: <one-sentence reason>>>>\` \u2014 preferred. The reason text appears in the user-visible warning ("\u21E7 flash requested escalation \u2014 <your reason>"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested \`>\` characters. Examples: \`<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>\` or \`<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>\`.
|
|
3302
|
+
|
|
3303
|
+
Do NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).`;
|
|
3304
|
+
var NEGATIVE_CLAIM_RULE = `Negative claims ("X is missing", "Y isn't implemented", "there's no Z") are the #1 hallucination shape. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.
|
|
3305
|
+
|
|
3306
|
+
If you have a search tool (\`search_content\`, \`grep\`, web search), call it FIRST before asserting absence:
|
|
3307
|
+
- Returns matches \u2192 you were wrong; correct yourself and cite the matches.
|
|
3308
|
+
- Returns nothing \u2192 state the absence WITH the search query as evidence: \`No callers of \\\`foo()\\\` found (search_content "foo").\`
|
|
3309
|
+
|
|
3310
|
+
If you have no search tool, qualify hard: "I haven't verified \u2014 this is a guess." Never assert absence with fake authority.`;
|
|
3311
|
+
|
|
3312
|
+
// src/skills.ts
|
|
2984
3313
|
var SKILLS_DIRNAME = "skills";
|
|
2985
3314
|
var SKILL_FILE = "SKILL.md";
|
|
2986
3315
|
var SKILLS_INDEX_MAX_CHARS = 4e3;
|
|
@@ -3123,10 +3452,10 @@ function parseRunAs(raw) {
|
|
|
3123
3452
|
}
|
|
3124
3453
|
function skillIndexLine(s) {
|
|
3125
3454
|
const safeDesc = s.description.replace(/\n/g, " ").trim();
|
|
3126
|
-
const
|
|
3127
|
-
const max = 130 - s.name.length -
|
|
3455
|
+
const tag = s.runAs === "subagent" ? " [\u{1F9EC} subagent]" : "";
|
|
3456
|
+
const max = 130 - s.name.length - tag.length;
|
|
3128
3457
|
const clipped = safeDesc.length > max ? `${safeDesc.slice(0, Math.max(1, max - 1))}\u2026` : safeDesc;
|
|
3129
|
-
return clipped ? `- ${
|
|
3458
|
+
return clipped ? `- ${s.name}${tag} \u2014 ${clipped}` : `- ${s.name}${tag}`;
|
|
3130
3459
|
}
|
|
3131
3460
|
function applySkillsIndex(basePrompt, opts = {}) {
|
|
3132
3461
|
const store = new SkillStore(opts);
|
|
@@ -3141,7 +3470,7 @@ function applySkillsIndex(basePrompt, opts = {}) {
|
|
|
3141
3470
|
"",
|
|
3142
3471
|
"# Skills \u2014 playbooks you can invoke",
|
|
3143
3472
|
"",
|
|
3144
|
-
'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })`
|
|
3473
|
+
'One-liner index. Each entry is either a built-in or a user-authored playbook. Call `run_skill({ name: "<skill-name>", arguments: "<task>" })` \u2014 the `name` is JUST the skill identifier (e.g. `"explore"`), NOT the `[\u{1F9EC} subagent]` tag that appears after it. Entries tagged `[\u{1F9EC} subagent]` spawn an **isolated subagent** \u2014 its tool calls and reasoning never enter your context, only its final answer does. Use subagent skills for tasks that would otherwise flood your context (deep exploration, multi-step research, anything where you only need the conclusion). Plain skills are inlined: their body becomes a tool result you read and act on directly. The user can also invoke a skill via `/skill <name>`.',
|
|
3145
3474
|
"",
|
|
3146
3475
|
"```",
|
|
3147
3476
|
truncated,
|
|
@@ -3163,12 +3492,9 @@ Your final answer:
|
|
|
3163
3492
|
- If the question can't be answered from what you found, say so plainly and suggest where to look next.
|
|
3164
3493
|
- No follow-up offers, no "let me know if you need more." The parent will ask again if they need more.
|
|
3165
3494
|
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
|
|
3170
|
-
- NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
|
|
3171
|
-
- For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
|
|
3495
|
+
${NEGATIVE_CLAIM_RULE}
|
|
3496
|
+
|
|
3497
|
+
${TUI_FORMATTING_RULES}
|
|
3172
3498
|
|
|
3173
3499
|
The 'task' the parent gave you is the question you must answer. Treat any other reading of it as scope creep.`;
|
|
3174
3500
|
var BUILTIN_RESEARCH_BODY = `You are running as a research subagent. Your job is to gather information from code AND the web, synthesize it, and return one focused conclusion.
|
|
@@ -3185,12 +3511,9 @@ Your final answer:
|
|
|
3185
3511
|
- Distinguish "I verified this in code" from "I read this on a docs page" \u2014 the parent will trust the former more.
|
|
3186
3512
|
- If the answer is uncertain, say so. Don't invent confidence.
|
|
3187
3513
|
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (\`\`\`).
|
|
3192
|
-
- NEVER draw decorative frames around code or text with \`\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518\` box-drawing characters. Use plain code blocks; the renderer adds its own border.
|
|
3193
|
-
- For flow charts: use a bullet list with \`\u2192\` or \`\u2193\` between steps, not ASCII boxes-and-arrows.
|
|
3514
|
+
${NEGATIVE_CLAIM_RULE}
|
|
3515
|
+
|
|
3516
|
+
${TUI_FORMATTING_RULES}
|
|
3194
3517
|
|
|
3195
3518
|
The 'task' the parent gave you is the research question. Stay on it.`;
|
|
3196
3519
|
var BUILTIN_SKILLS = Object.freeze([
|
|
@@ -4202,28 +4525,233 @@ function registerMemoryTools(registry, opts = {}) {
|
|
|
4202
4525
|
return registry;
|
|
4203
4526
|
}
|
|
4204
4527
|
|
|
4205
|
-
// src/tools/
|
|
4528
|
+
// src/tools/choice.ts
|
|
4529
|
+
var ChoiceRequestedError = class extends Error {
|
|
4530
|
+
question;
|
|
4531
|
+
options;
|
|
4532
|
+
allowCustom;
|
|
4533
|
+
constructor(question, options, allowCustom) {
|
|
4534
|
+
super(
|
|
4535
|
+
"ChoiceRequestedError: choice submitted. STOP calling tools now \u2014 the TUI has shown the options to the user. Wait for their next message; it will either be 'user picked <id>' (carry on with that branch), 'user answered: <text>' (custom free-form reply; read and proceed), or 'user cancelled the choice' (drop the question and ask what they want instead). Don't call any tools in the meantime."
|
|
4536
|
+
);
|
|
4537
|
+
this.name = "ChoiceRequestedError";
|
|
4538
|
+
this.question = question;
|
|
4539
|
+
this.options = options;
|
|
4540
|
+
this.allowCustom = allowCustom;
|
|
4541
|
+
}
|
|
4542
|
+
toToolResult() {
|
|
4543
|
+
return {
|
|
4544
|
+
error: `${this.name}: ${this.message}`,
|
|
4545
|
+
question: this.question,
|
|
4546
|
+
options: this.options,
|
|
4547
|
+
allowCustom: this.allowCustom
|
|
4548
|
+
};
|
|
4549
|
+
}
|
|
4550
|
+
};
|
|
4551
|
+
function sanitizeOptions(raw) {
|
|
4552
|
+
if (!Array.isArray(raw)) return [];
|
|
4553
|
+
const out = [];
|
|
4554
|
+
const seen = /* @__PURE__ */ new Set();
|
|
4555
|
+
for (const entry of raw) {
|
|
4556
|
+
if (!entry || typeof entry !== "object") continue;
|
|
4557
|
+
const e = entry;
|
|
4558
|
+
const id = typeof e.id === "string" ? e.id.trim() : "";
|
|
4559
|
+
const title = typeof e.title === "string" ? e.title.trim() : "";
|
|
4560
|
+
if (!id || !title) continue;
|
|
4561
|
+
if (seen.has(id)) continue;
|
|
4562
|
+
seen.add(id);
|
|
4563
|
+
const summary = typeof e.summary === "string" ? e.summary.trim() || void 0 : void 0;
|
|
4564
|
+
const opt = { id, title };
|
|
4565
|
+
if (summary) opt.summary = summary;
|
|
4566
|
+
out.push(opt);
|
|
4567
|
+
}
|
|
4568
|
+
return out;
|
|
4569
|
+
}
|
|
4570
|
+
function registerChoiceTool(registry, opts = {}) {
|
|
4571
|
+
registry.register({
|
|
4572
|
+
name: "ask_choice",
|
|
4573
|
+
description: "Present 2\u20136 alternatives to the user. The principle: if the user is supposed to pick, the tool picks \u2014 you don't enumerate the choices as prose. Prose menus have no picker in this TUI, so the user gets a wall of text to scroll through and a letter to type, strictly worse than the magenta picker this tool renders. Call it whenever (a) the user has asked for options, (b) you've analyzed multiple approaches and the final call is theirs, or (c) it's a preference fork you can't resolve without them. Skip it when one option is clearly best (just do it, or submit_plan) or a free-form text answer fits (ask in prose). Keep option ids short and stable (A/B/C). Each option: title + optional summary. allowCustom=true when their real answer might not fit. Max 6 options \u2014 narrow first if more. A one-sentence lead-in before the call is fine; don't repeat the options in it.",
|
|
4574
|
+
readOnly: true,
|
|
4575
|
+
parameters: {
|
|
4576
|
+
type: "object",
|
|
4577
|
+
properties: {
|
|
4578
|
+
question: {
|
|
4579
|
+
type: "string",
|
|
4580
|
+
description: "The question to put in front of the user. One sentence. Don't repeat the options in the question text \u2014 the picker renders them separately."
|
|
4581
|
+
},
|
|
4582
|
+
options: {
|
|
4583
|
+
type: "array",
|
|
4584
|
+
description: "2\u20134 alternatives. Each needs a stable id and a short title; summary is optional.",
|
|
4585
|
+
items: {
|
|
4586
|
+
type: "object",
|
|
4587
|
+
properties: {
|
|
4588
|
+
id: { type: "string", description: "Short stable id (A, B, C, or option-1)." },
|
|
4589
|
+
title: { type: "string", description: "One-line title shown as the option label." },
|
|
4590
|
+
summary: {
|
|
4591
|
+
type: "string",
|
|
4592
|
+
description: "Optional. A second dimmed line with more detail. Keep under ~80 chars."
|
|
4593
|
+
}
|
|
4594
|
+
},
|
|
4595
|
+
required: ["id", "title"]
|
|
4596
|
+
}
|
|
4597
|
+
},
|
|
4598
|
+
allowCustom: {
|
|
4599
|
+
type: "boolean",
|
|
4600
|
+
description: "If true, the picker shows a 'Let me type my own answer' escape hatch. Default false. Turn on when the user's real answer might not fit any of your pre-defined options."
|
|
4601
|
+
}
|
|
4602
|
+
},
|
|
4603
|
+
required: ["question", "options"]
|
|
4604
|
+
},
|
|
4605
|
+
fn: async (args) => {
|
|
4606
|
+
const question = (args?.question ?? "").trim();
|
|
4607
|
+
if (!question) {
|
|
4608
|
+
throw new Error(
|
|
4609
|
+
"ask_choice: question is required \u2014 write one sentence explaining the decision."
|
|
4610
|
+
);
|
|
4611
|
+
}
|
|
4612
|
+
const options = sanitizeOptions(args?.options);
|
|
4613
|
+
if (options.length < 2) {
|
|
4614
|
+
throw new Error(
|
|
4615
|
+
"ask_choice: need at least 2 well-formed options (each with a non-empty id and title). If you just need a text answer, ask the user in plain assistant text instead."
|
|
4616
|
+
);
|
|
4617
|
+
}
|
|
4618
|
+
if (options.length > 6) {
|
|
4619
|
+
throw new Error(
|
|
4620
|
+
"ask_choice: too many options (max 6). If you really have this many branches, split into two sequential ask_choice calls or narrow down first."
|
|
4621
|
+
);
|
|
4622
|
+
}
|
|
4623
|
+
const allowCustom = args?.allowCustom === true;
|
|
4624
|
+
opts.onChoiceRequested?.(question, options);
|
|
4625
|
+
throw new ChoiceRequestedError(question, options, allowCustom);
|
|
4626
|
+
}
|
|
4627
|
+
});
|
|
4628
|
+
return registry;
|
|
4629
|
+
}
|
|
4630
|
+
|
|
4631
|
+
// src/tools/plan-errors.ts
|
|
4206
4632
|
var PlanProposedError = class extends Error {
|
|
4207
4633
|
plan;
|
|
4208
|
-
|
|
4634
|
+
steps;
|
|
4635
|
+
summary;
|
|
4636
|
+
constructor(plan, steps, summary) {
|
|
4209
4637
|
super(
|
|
4210
4638
|
"PlanProposedError: plan submitted. STOP calling tools now \u2014 the TUI has shown the plan to the user. Wait for their next message; it will either approve (you'll then implement the plan), request a refinement (you should explore more and submit an updated plan), or cancel (drop the plan and ask what they want instead). Don't call any tools in the meantime."
|
|
4211
4639
|
);
|
|
4212
4640
|
this.name = "PlanProposedError";
|
|
4213
4641
|
this.plan = plan;
|
|
4642
|
+
this.steps = steps;
|
|
4643
|
+
this.summary = summary;
|
|
4214
4644
|
}
|
|
4215
4645
|
/**
|
|
4216
4646
|
* Structured tool-result shape. Consumed by the TUI to extract the
|
|
4217
|
-
* plan without regex-scraping the error message.
|
|
4647
|
+
* plan without regex-scraping the error message. Optional fields
|
|
4648
|
+
* are omitted from the payload when absent so consumers don't see
|
|
4649
|
+
* `undefined` keys in the JSON.
|
|
4218
4650
|
*/
|
|
4219
4651
|
toToolResult() {
|
|
4220
|
-
|
|
4652
|
+
const payload = {
|
|
4653
|
+
error: `${this.name}: ${this.message}`,
|
|
4654
|
+
plan: this.plan
|
|
4655
|
+
};
|
|
4656
|
+
if (this.steps && this.steps.length > 0) payload.steps = this.steps;
|
|
4657
|
+
if (this.summary) payload.summary = this.summary;
|
|
4658
|
+
return payload;
|
|
4221
4659
|
}
|
|
4222
4660
|
};
|
|
4223
|
-
|
|
4661
|
+
var PlanCheckpointError = class extends Error {
|
|
4662
|
+
stepId;
|
|
4663
|
+
title;
|
|
4664
|
+
result;
|
|
4665
|
+
notes;
|
|
4666
|
+
constructor(update) {
|
|
4667
|
+
super(
|
|
4668
|
+
"PlanCheckpointError: step complete \u2014 STOP calling tools. The TUI has paused the plan for user review. Wait for the next user message; it will either say continue (proceed to the next step), request a revision (adjust the remaining plan), or stop (summarize and end)."
|
|
4669
|
+
);
|
|
4670
|
+
this.name = "PlanCheckpointError";
|
|
4671
|
+
this.stepId = update.stepId;
|
|
4672
|
+
this.title = update.title;
|
|
4673
|
+
this.result = update.result;
|
|
4674
|
+
this.notes = update.notes;
|
|
4675
|
+
}
|
|
4676
|
+
toToolResult() {
|
|
4677
|
+
const payload = {
|
|
4678
|
+
error: `${this.name}: ${this.message}`,
|
|
4679
|
+
kind: "step_completed",
|
|
4680
|
+
stepId: this.stepId,
|
|
4681
|
+
result: this.result
|
|
4682
|
+
};
|
|
4683
|
+
if (this.title) payload.title = this.title;
|
|
4684
|
+
if (this.notes) payload.notes = this.notes;
|
|
4685
|
+
return payload;
|
|
4686
|
+
}
|
|
4687
|
+
};
|
|
4688
|
+
var PlanRevisionProposedError = class extends Error {
|
|
4689
|
+
reason;
|
|
4690
|
+
remainingSteps;
|
|
4691
|
+
summary;
|
|
4692
|
+
constructor(reason, remainingSteps, summary) {
|
|
4693
|
+
super(
|
|
4694
|
+
"PlanRevisionProposedError: revision submitted. STOP calling tools now \u2014 the TUI has paused for the user to review your proposed change. Wait for their next message; it will say 'revision accepted' (proceed with the new step list), 'revision rejected' (keep the original plan and continue), or 'revision cancelled' (drop the proposal entirely). Don't call any tools in the meantime."
|
|
4695
|
+
);
|
|
4696
|
+
this.name = "PlanRevisionProposedError";
|
|
4697
|
+
this.reason = reason;
|
|
4698
|
+
this.remainingSteps = remainingSteps;
|
|
4699
|
+
this.summary = summary;
|
|
4700
|
+
}
|
|
4701
|
+
toToolResult() {
|
|
4702
|
+
const payload = {
|
|
4703
|
+
error: `${this.name}: ${this.message}`,
|
|
4704
|
+
reason: this.reason,
|
|
4705
|
+
remainingSteps: this.remainingSteps
|
|
4706
|
+
};
|
|
4707
|
+
if (this.summary) payload.summary = this.summary;
|
|
4708
|
+
return payload;
|
|
4709
|
+
}
|
|
4710
|
+
};
|
|
4711
|
+
|
|
4712
|
+
// src/tools/plan-core.ts
|
|
4713
|
+
var SUBMIT_PLAN_DESCRIPTION = "Submit ONE concrete plan you've already decided on. Use this for tasks that warrant a review gate \u2014 multi-file refactors, architecture changes, anything that would be expensive or confusing to undo. Skip it for small fixes (one-line typo, obvious bug with a clear fix) \u2014 just make the change. The user will either approve (you then implement it), ask for refinement, or cancel. If the user has already enabled /plan mode, writes are blocked at dispatch and you MUST use this. CRITICAL: do NOT use submit_plan to present alternative routes (A/B/C, option 1/2/3) for the user to pick from \u2014 the picker only exposes approve/refine/cancel, so a menu plan strands the user with no way to choose. For branching decisions, call `ask_choice` instead; only call submit_plan once the user has picked a direction and you have a single actionable plan. Write the plan as markdown with a one-line summary, a bulleted list of files to touch and what will change, and any risks or open questions. STRONGLY PREFERRED: pass `steps` \u2014 an array of {id, title, action, risk?} \u2014 so the UI renders a structured step list above the approval picker and tracks per-step progress. Use risk='high' for steps that touch prod data / break public APIs / are hard to undo; 'med' for non-trivial but reversible (multi-file edits, schema tweaks); 'low' for safe local work. After each step, call `mark_step_complete` so the user sees progress ticks.";
|
|
4714
|
+
var MARK_STEP_COMPLETE_DESCRIPTION = "Mark one step of the approved plan as done AND pause for the user to review. Call this after finishing each step. The TUI shows a \u2713 progress row and mounts a Continue / Revise / Stop picker \u2014 you MUST stop calling tools after this fires and wait for the next user message. Pass the `stepId` from the plan's steps array, a short `result` (what you did), and optional `notes` for anything surprising (errors, scope changes, follow-ups). This tool doesn't change any files. Don't call it if the plan didn't include structured steps, and don't invent ids that weren't in the original plan.";
|
|
4715
|
+
var REVISE_PLAN_DESCRIPTION = "Surgically replace the REMAINING steps of an in-flight plan. Call this when the user has given feedback at a checkpoint that warrants a structured plan change \u2014 skip a step, swap two steps, add a new step, change risk, etc. Pass: `reason` (one sentence why), `remainingSteps` (the new tail of the plan, replacing whatever steps haven't been done yet), and optional `summary` (updated one-line plan summary). Done steps are NEVER touched \u2014 keep them out of `remainingSteps`. The TUI shows a diff (removed in red, kept in gray, added in green) and the user accepts or rejects. Don't call this for trivial mid-step adjustments \u2014 just keep executing. Don't call submit_plan for revisions either \u2014 that resets the whole plan including completed steps. Use submit_plan only when the entire approach has changed; use revise_plan when the tail needs editing.";
|
|
4716
|
+
var STEP_ITEM_SCHEMA = {
|
|
4717
|
+
type: "object",
|
|
4718
|
+
properties: {
|
|
4719
|
+
id: { type: "string", description: "Stable id, e.g. step-1." },
|
|
4720
|
+
title: { type: "string", description: "Short imperative title." },
|
|
4721
|
+
action: { type: "string", description: "One-sentence description of the concrete action." },
|
|
4722
|
+
risk: {
|
|
4723
|
+
type: "string",
|
|
4724
|
+
enum: ["low", "med", "high"],
|
|
4725
|
+
description: "Self-assessed risk. 'high' = hard-to-undo / touches prod / breaks API; 'med' = non-trivial but reversible; 'low' = safe local work. The UI shows a colored dot per step so the user knows where to focus review. Omit if you're unsure."
|
|
4726
|
+
}
|
|
4727
|
+
},
|
|
4728
|
+
required: ["id", "title", "action"]
|
|
4729
|
+
};
|
|
4730
|
+
function sanitizeRisk(raw) {
|
|
4731
|
+
if (raw === "low" || raw === "med" || raw === "high") return raw;
|
|
4732
|
+
return void 0;
|
|
4733
|
+
}
|
|
4734
|
+
function sanitizeSteps(raw) {
|
|
4735
|
+
if (!Array.isArray(raw)) return void 0;
|
|
4736
|
+
const steps = [];
|
|
4737
|
+
for (const entry of raw) {
|
|
4738
|
+
if (!entry || typeof entry !== "object") continue;
|
|
4739
|
+
const e = entry;
|
|
4740
|
+
const id = typeof e.id === "string" ? e.id.trim() : "";
|
|
4741
|
+
const title = typeof e.title === "string" ? e.title.trim() : "";
|
|
4742
|
+
const action = typeof e.action === "string" ? e.action.trim() : "";
|
|
4743
|
+
if (!id || !title || !action) continue;
|
|
4744
|
+
const step = { id, title, action };
|
|
4745
|
+
const risk = sanitizeRisk(e.risk);
|
|
4746
|
+
if (risk) step.risk = risk;
|
|
4747
|
+
steps.push(step);
|
|
4748
|
+
}
|
|
4749
|
+
return steps.length > 0 ? steps : void 0;
|
|
4750
|
+
}
|
|
4751
|
+
function registerSubmitPlan(registry, opts) {
|
|
4224
4752
|
registry.register({
|
|
4225
4753
|
name: "submit_plan",
|
|
4226
|
-
description:
|
|
4754
|
+
description: SUBMIT_PLAN_DESCRIPTION,
|
|
4227
4755
|
readOnly: true,
|
|
4228
4756
|
parameters: {
|
|
4229
4757
|
type: "object",
|
|
@@ -4231,6 +4759,15 @@ function registerPlanTool(registry, opts = {}) {
|
|
|
4231
4759
|
plan: {
|
|
4232
4760
|
type: "string",
|
|
4233
4761
|
description: "Markdown-formatted plan. Lead with a one-sentence summary. Then a file-by-file breakdown of what you'll change and why. Flag any risks or open questions at the end so the user can weigh in before you start."
|
|
4762
|
+
},
|
|
4763
|
+
steps: {
|
|
4764
|
+
type: "array",
|
|
4765
|
+
description: "Structured step list (strongly recommended). When provided, the UI renders a compact step list above the approval picker AND tracks per-step progress via `mark_step_complete`. Use stable ids (step-1, step-2, ...). Skip only for tiny one-step plans where the markdown body is enough.",
|
|
4766
|
+
items: STEP_ITEM_SCHEMA
|
|
4767
|
+
},
|
|
4768
|
+
summary: {
|
|
4769
|
+
type: "string",
|
|
4770
|
+
description: "Optional. One-sentence human-friendly title for the plan, ~80 chars max. Surfaces in the PlanConfirm picker header and in /plans listings ('\u25B8 refactor auth into signed tokens \xB7 2/5 done'). Skip for trivial plans where the first line of the markdown body is already short and clear."
|
|
4234
4771
|
}
|
|
4235
4772
|
},
|
|
4236
4773
|
required: ["plan"]
|
|
@@ -4240,10 +4777,108 @@ function registerPlanTool(registry, opts = {}) {
|
|
|
4240
4777
|
if (!plan) {
|
|
4241
4778
|
throw new Error("submit_plan: empty plan \u2014 write a markdown plan and try again.");
|
|
4242
4779
|
}
|
|
4243
|
-
|
|
4244
|
-
|
|
4780
|
+
const steps = sanitizeSteps(args?.steps);
|
|
4781
|
+
const summary = typeof args?.summary === "string" ? args.summary.trim() || void 0 : void 0;
|
|
4782
|
+
opts.onPlanSubmitted?.(plan, steps);
|
|
4783
|
+
throw new PlanProposedError(plan, steps, summary);
|
|
4784
|
+
}
|
|
4785
|
+
});
|
|
4786
|
+
}
|
|
4787
|
+
function registerMarkStepComplete(registry, opts) {
|
|
4788
|
+
registry.register({
|
|
4789
|
+
name: "mark_step_complete",
|
|
4790
|
+
description: MARK_STEP_COMPLETE_DESCRIPTION,
|
|
4791
|
+
readOnly: true,
|
|
4792
|
+
parameters: {
|
|
4793
|
+
type: "object",
|
|
4794
|
+
properties: {
|
|
4795
|
+
stepId: {
|
|
4796
|
+
type: "string",
|
|
4797
|
+
description: "The id of the step being marked complete. Must match one from submit_plan's steps array."
|
|
4798
|
+
},
|
|
4799
|
+
title: {
|
|
4800
|
+
type: "string",
|
|
4801
|
+
description: "Optional. The step's title, echoed back for the UI. If omitted, the UI falls back to the id."
|
|
4802
|
+
},
|
|
4803
|
+
result: {
|
|
4804
|
+
type: "string",
|
|
4805
|
+
description: "One-sentence summary of what was done for this step."
|
|
4806
|
+
},
|
|
4807
|
+
notes: {
|
|
4808
|
+
type: "string",
|
|
4809
|
+
description: "Optional. Anything surprising \u2014 blockers hit, assumptions revised, follow-ups for later steps."
|
|
4810
|
+
}
|
|
4811
|
+
},
|
|
4812
|
+
required: ["stepId", "result"]
|
|
4813
|
+
},
|
|
4814
|
+
fn: async (args) => {
|
|
4815
|
+
const stepId = (args?.stepId ?? "").trim();
|
|
4816
|
+
const result = (args?.result ?? "").trim();
|
|
4817
|
+
if (!stepId) {
|
|
4818
|
+
throw new Error("mark_step_complete: stepId is required.");
|
|
4819
|
+
}
|
|
4820
|
+
if (!result) {
|
|
4821
|
+
throw new Error(
|
|
4822
|
+
"mark_step_complete: result is required \u2014 say in one sentence what you did."
|
|
4823
|
+
);
|
|
4824
|
+
}
|
|
4825
|
+
const title = typeof args?.title === "string" ? args.title.trim() || void 0 : void 0;
|
|
4826
|
+
const notes = typeof args?.notes === "string" ? args.notes.trim() || void 0 : void 0;
|
|
4827
|
+
const update = { kind: "step_completed", stepId, result };
|
|
4828
|
+
if (title) update.title = title;
|
|
4829
|
+
if (notes) update.notes = notes;
|
|
4830
|
+
opts.onStepCompleted?.(update);
|
|
4831
|
+
throw new PlanCheckpointError({ stepId, title, result, notes });
|
|
4832
|
+
}
|
|
4833
|
+
});
|
|
4834
|
+
}
|
|
4835
|
+
function registerRevisePlan(registry, opts) {
|
|
4836
|
+
registry.register({
|
|
4837
|
+
name: "revise_plan",
|
|
4838
|
+
description: REVISE_PLAN_DESCRIPTION,
|
|
4839
|
+
readOnly: true,
|
|
4840
|
+
parameters: {
|
|
4841
|
+
type: "object",
|
|
4842
|
+
properties: {
|
|
4843
|
+
reason: {
|
|
4844
|
+
type: "string",
|
|
4845
|
+
description: "One sentence explaining why you're revising \u2014 what the user asked for, what changed your assessment."
|
|
4846
|
+
},
|
|
4847
|
+
remainingSteps: {
|
|
4848
|
+
type: "array",
|
|
4849
|
+
description: "The new tail of the plan \u2014 what should run from here on. Each entry: {id, title, action, risk?}. Use stable ids; reuse old ids when a step is just being adjusted, generate new ones for genuinely new steps.",
|
|
4850
|
+
items: STEP_ITEM_SCHEMA
|
|
4851
|
+
},
|
|
4852
|
+
summary: {
|
|
4853
|
+
type: "string",
|
|
4854
|
+
description: "Optional. Updated one-line plan summary if the overall framing has shifted."
|
|
4855
|
+
}
|
|
4856
|
+
},
|
|
4857
|
+
required: ["reason", "remainingSteps"]
|
|
4858
|
+
},
|
|
4859
|
+
fn: async (args) => {
|
|
4860
|
+
const reason = (args?.reason ?? "").trim();
|
|
4861
|
+
if (!reason) {
|
|
4862
|
+
throw new Error(
|
|
4863
|
+
"revise_plan: reason is required \u2014 write one sentence explaining the change."
|
|
4864
|
+
);
|
|
4865
|
+
}
|
|
4866
|
+
const remainingSteps = sanitizeSteps(args?.remainingSteps);
|
|
4867
|
+
if (!remainingSteps || remainingSteps.length === 0) {
|
|
4868
|
+
throw new Error(
|
|
4869
|
+
"revise_plan: remainingSteps must be a non-empty array of well-formed steps. If the user wants to STOP rather than continue, don't revise \u2014 the picker has its own Stop option."
|
|
4870
|
+
);
|
|
4871
|
+
}
|
|
4872
|
+
const summary = typeof args?.summary === "string" ? args.summary.trim() || void 0 : void 0;
|
|
4873
|
+
opts.onPlanRevisionProposed?.(reason, remainingSteps, summary);
|
|
4874
|
+
throw new PlanRevisionProposedError(reason, remainingSteps, summary);
|
|
4245
4875
|
}
|
|
4246
4876
|
});
|
|
4877
|
+
}
|
|
4878
|
+
function registerPlanTool(registry, opts = {}) {
|
|
4879
|
+
registerSubmitPlan(registry, opts);
|
|
4880
|
+
registerMarkStepComplete(registry, opts);
|
|
4881
|
+
registerRevisePlan(registry, opts);
|
|
4247
4882
|
return registry;
|
|
4248
4883
|
}
|
|
4249
4884
|
|
|
@@ -4256,15 +4891,15 @@ Rules:
|
|
|
4256
4891
|
- When you're done, your final assistant message is the only thing the parent will see \u2014 make it complete and self-contained. No follow-up offers, no questions, no "let me know if you need more."
|
|
4257
4892
|
- Prefer one clear, distilled answer over a long log of what you tried.
|
|
4258
4893
|
|
|
4259
|
-
|
|
4260
|
-
|
|
4261
|
-
|
|
4262
|
-
|
|
4263
|
-
|
|
4264
|
-
- For flow charts and diagrams: use a markdown bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
|
|
4894
|
+
${NEGATIVE_CLAIM_RULE}
|
|
4895
|
+
|
|
4896
|
+
${ESCALATION_CONTRACT}
|
|
4897
|
+
|
|
4898
|
+
${TUI_FORMATTING_RULES}`;
|
|
4265
4899
|
var DEFAULT_MAX_RESULT_CHARS2 = 8e3;
|
|
4266
4900
|
var DEFAULT_MAX_ITERS = 16;
|
|
4267
|
-
var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-
|
|
4901
|
+
var DEFAULT_SUBAGENT_MODEL = "deepseek-v4-flash";
|
|
4902
|
+
var DEFAULT_SUBAGENT_EFFORT = "high";
|
|
4268
4903
|
var SUBAGENT_TOOL_NAME = "spawn_subagent";
|
|
4269
4904
|
var NEVER_INHERITED_TOOLS = /* @__PURE__ */ new Set([SUBAGENT_TOOL_NAME, "submit_plan"]);
|
|
4270
4905
|
async function spawnSubagent(opts) {
|
|
@@ -4293,6 +4928,10 @@ async function spawnSubagent(opts) {
|
|
|
4293
4928
|
prefix: childPrefix,
|
|
4294
4929
|
tools: childTools,
|
|
4295
4930
|
model,
|
|
4931
|
+
// Subagents run on a constrained thinking budget by default — the
|
|
4932
|
+
// task is already narrow by construction, and `high` cuts output
|
|
4933
|
+
// tokens substantially vs `max`.
|
|
4934
|
+
reasoningEffort: DEFAULT_SUBAGENT_EFFORT,
|
|
4296
4935
|
maxToolIters,
|
|
4297
4936
|
hooks: [],
|
|
4298
4937
|
stream: false
|
|
@@ -4416,8 +5055,8 @@ function registerSubagentTool(parentRegistry, opts) {
|
|
|
4416
5055
|
},
|
|
4417
5056
|
model: {
|
|
4418
5057
|
type: "string",
|
|
4419
|
-
enum: ["deepseek-v4-flash", "deepseek-v4-pro"
|
|
4420
|
-
description: "Which DeepSeek model the subagent runs on. Default is 'deepseek-v4-
|
|
5058
|
+
enum: ["deepseek-v4-flash", "deepseek-v4-pro"],
|
|
5059
|
+
description: "Which DeepSeek model the subagent runs on. Default is 'deepseek-v4-flash' \u2014 cheap and fast, fine for explore/research-style subtasks. Override to 'deepseek-v4-pro' (~12\xD7 more expensive) when the subtask genuinely needs the stronger model: cross-file architecture, subtle bug hunts, anything where flash has empirically underperformed."
|
|
4421
5060
|
}
|
|
4422
5061
|
},
|
|
4423
5062
|
required: ["task"]
|
|
@@ -5085,7 +5724,7 @@ function registerShellTools(registry, opts) {
|
|
|
5085
5724
|
const allowAll = opts.allowAll ?? false;
|
|
5086
5725
|
registry.register({
|
|
5087
5726
|
name: "run_command",
|
|
5088
|
-
description: "Run a shell command in the project root and return its combined stdout+stderr.
|
|
5727
|
+
description: "Run a shell command in the project root and return its combined stdout+stderr.\n\nConstraints (read these before the first call):\n\u2022 ONE process per call, NO shell expansion. `&&`, `||`, `|`, `;`, `>`, `<`, `2>&1` are all rejected up-front \u2014 split into separate calls and combine results in reasoning. Example: instead of `grep foo *.ts | wc -l`, use `grep -c foo *.ts`; instead of `cd sub && npm test`, use `npm test --prefix sub` (or whatever --cwd flag the binary accepts).\n\u2022 `cd` DOES NOT PERSIST between calls \u2014 each call spawns a fresh process rooted at the project. If a tool needs a subdirectory, pass it via the tool's own flag (`npm --prefix`, `cargo -C`, `git -C`, `pytest tests/\u2026`), NOT via a preceding `cd`.\n\u2022 Avoid commands with unbounded output (`netstat -ano`, `find /`, etc.) \u2014 they waste tokens. Filter at source: `netstat -ano -p TCP`, `find src -name '*.ts'`, `grep -c`, `wc -l`.\n\nCommon read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
|
|
5089
5728
|
// Plan-mode gate: allow allowlisted commands through (git status,
|
|
5090
5729
|
// cargo check, ls, grep …) so the model can actually investigate
|
|
5091
5730
|
// during planning. Anything that would otherwise trigger a
|
|
@@ -5128,7 +5767,7 @@ function registerShellTools(registry, opts) {
|
|
|
5128
5767
|
});
|
|
5129
5768
|
registry.register({
|
|
5130
5769
|
name: "run_background",
|
|
5131
|
-
description: "Spawn a long-running process (dev server, watcher, any command that doesn't naturally exit) and detach. Waits up to `waitSec` seconds for startup (or until the output matches a readiness signal like 'Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. The process keeps running; call `job_output` to tail its logs, `stop_job` to kill it, `list_jobs` to see all running jobs.
|
|
5770
|
+
description: "Spawn a long-running process (dev server, watcher, any command that doesn't naturally exit) and detach. Waits up to `waitSec` seconds for startup (or until the output matches a readiness signal like 'Local:', 'listening on', 'compiled successfully'), then returns the job id + startup preview. The process keeps running; call `job_output` to tail its logs, `stop_job` to kill it, `list_jobs` to see all running jobs.\n\nSame shell constraints as run_command: NO `&&` / `||` / `|` / `;` / `>` / `<` / `2>&1`, `cd` doesn't persist. Dev servers that need a subdirectory: use the tool's own --prefix / --cwd flag. For Vite specifically, `--prefix` on npm only tells npm where package.json is; vite's server root still defaults to process cwd, so pass `vite <project-dir>` or configure via `vite.config.ts` root.\n\nUSE THIS \u2014 not `run_command` \u2014 for: npm/yarn/pnpm run dev, uvicorn / flask run, go run, cargo watch, tsc --watch, webpack serve, anything with 'dev' / 'serve' / 'watch' in the name.",
|
|
5132
5771
|
parameters: {
|
|
5133
5772
|
type: "object",
|
|
5134
5773
|
properties: {
|
|
@@ -5623,7 +6262,8 @@ function summarizeTurns(turns) {
|
|
|
5623
6262
|
claudeEquivalentUsd: round2(totalClaude, 6),
|
|
5624
6263
|
savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
|
|
5625
6264
|
cacheHitRatio: round2(cacheHitRatio, 4),
|
|
5626
|
-
lastPromptTokens: lastTurn?.usage.promptTokens ?? 0
|
|
6265
|
+
lastPromptTokens: lastTurn?.usage.promptTokens ?? 0,
|
|
6266
|
+
lastTurnCostUsd: round2(lastTurn?.cost ?? 0, 6)
|
|
5627
6267
|
};
|
|
5628
6268
|
}
|
|
5629
6269
|
function round2(n, digits) {
|
|
@@ -6695,7 +7335,7 @@ function sep() {
|
|
|
6695
7335
|
// src/code/prompt.ts
|
|
6696
7336
|
import { existsSync as existsSync10, readFileSync as readFileSync11 } from "fs";
|
|
6697
7337
|
import { join as join9 } from "path";
|
|
6698
|
-
var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, list_directory, search_files,
|
|
7338
|
+
var CODE_SYSTEM_PROMPT = `You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.
|
|
6699
7339
|
|
|
6700
7340
|
# Cite or shut up \u2014 non-negotiable
|
|
6701
7341
|
|
|
@@ -6728,6 +7368,21 @@ Skip submit_plan for small, obvious changes: one-line typo, clear bug with a cle
|
|
|
6728
7368
|
|
|
6729
7369
|
Plan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an "Open questions" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.
|
|
6730
7370
|
|
|
7371
|
+
**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use \`ask_choice\` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.
|
|
7372
|
+
|
|
7373
|
+
# When to ask the user to pick (ask_choice)
|
|
7374
|
+
|
|
7375
|
+
You have an \`ask_choice\` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.
|
|
7376
|
+
|
|
7377
|
+
Call it when:
|
|
7378
|
+
- The user has asked for options / doesn't want a recommendation / wants to decide.
|
|
7379
|
+
- You've analyzed multiple approaches and the final call is theirs.
|
|
7380
|
+
- It's a preference fork you can't resolve without them (deployment target, team convention, taste).
|
|
7381
|
+
|
|
7382
|
+
Skip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).
|
|
7383
|
+
|
|
7384
|
+
Each option: short stable id (A/B/C), one-line title, optional summary. \`allowCustom: true\` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine ("I see three directions \u2014 letting you pick"); don't repeat the options in it. After the call, STOP.
|
|
7385
|
+
|
|
6731
7386
|
# Plan mode (/plan)
|
|
6732
7387
|
|
|
6733
7388
|
The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit constraint:
|
|
@@ -6736,15 +7391,17 @@ The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit con
|
|
|
6736
7391
|
- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.
|
|
6737
7392
|
|
|
6738
7393
|
|
|
6739
|
-
# Delegating to subagents via Skills
|
|
7394
|
+
# Delegating to subagents via Skills
|
|
6740
7395
|
|
|
6741
|
-
The pinned Skills index below lists playbooks you can invoke with \`run_skill\`.
|
|
7396
|
+
The pinned Skills index below lists playbooks you can invoke with \`run_skill\`. Entries tagged \`[\u{1F9EC} subagent]\` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.
|
|
7397
|
+
|
|
7398
|
+
**When you call \`run_skill\`, the \`name\` is ONLY the identifier before the tag** \u2014 e.g. \`run_skill({ name: "explore", arguments: "..." })\`, NOT \`"[\u{1F9EC} subagent] explore"\` and NOT \`"explore [\u{1F9EC} subagent]"\`. The tag is display sugar; the name argument is just the bare identifier.
|
|
6742
7399
|
|
|
6743
7400
|
Two built-ins ship by default:
|
|
6744
|
-
-
|
|
6745
|
-
-
|
|
7401
|
+
- **explore** \`[\u{1F9EC} subagent]\` \u2014 read-only investigation across the codebase. Use when the user says things like "find all places that...", "how does X work across the project", "survey the code for Y". Pass \`arguments\` describing the concrete question.
|
|
7402
|
+
- **research** \`[\u{1F9EC} subagent]\` \u2014 combines web search + code reading. Use for "is X supported by lib Y", "what's the canonical way to Z", "compare our impl to the spec".
|
|
6746
7403
|
|
|
6747
|
-
When to delegate (call \`run_skill\` with a
|
|
7404
|
+
When to delegate (call \`run_skill\` with a subagent skill):
|
|
6748
7405
|
- The task would otherwise need >5 file reads or searches.
|
|
6749
7406
|
- You only need the conclusion, not the exploration trail.
|
|
6750
7407
|
- The work is self-contained (you can describe it in one paragraph).
|
|
@@ -6855,6 +7512,10 @@ If you notice an obvious issue, MENTION it in one sentence and wait for the user
|
|
|
6855
7512
|
- Show edits; don't narrate them in prose. "Here's the fix:" is enough.
|
|
6856
7513
|
- One short paragraph explaining *why*, then the blocks.
|
|
6857
7514
|
- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.
|
|
7515
|
+
|
|
7516
|
+
${ESCALATION_CONTRACT}
|
|
7517
|
+
|
|
7518
|
+
${TUI_FORMATTING_RULES}
|
|
6858
7519
|
`;
|
|
6859
7520
|
function codeSystemPrompt(rootDir) {
|
|
6860
7521
|
const withMemory = applyMemoryStack(CODE_SYSTEM_PROMPT, rootDir);
|
|
@@ -7181,6 +7842,7 @@ export {
|
|
|
7181
7842
|
AppendOnlyLog,
|
|
7182
7843
|
CODE_SYSTEM_PROMPT,
|
|
7183
7844
|
CacheFirstLoop,
|
|
7845
|
+
ChoiceRequestedError,
|
|
7184
7846
|
DEFAULT_AT_MENTION_MAX_BYTES,
|
|
7185
7847
|
DEFAULT_MAX_RESULT_CHARS,
|
|
7186
7848
|
DEFAULT_MAX_RESULT_TOKENS,
|
|
@@ -7200,7 +7862,9 @@ export {
|
|
|
7200
7862
|
NeedsConfirmationError,
|
|
7201
7863
|
PROJECT_MEMORY_FILE,
|
|
7202
7864
|
PROJECT_MEMORY_MAX_CHARS,
|
|
7865
|
+
PlanCheckpointError,
|
|
7203
7866
|
PlanProposedError,
|
|
7867
|
+
PlanRevisionProposedError,
|
|
7204
7868
|
SessionStats,
|
|
7205
7869
|
SseTransport,
|
|
7206
7870
|
StdioTransport,
|
|
@@ -7290,6 +7954,7 @@ export {
|
|
|
7290
7954
|
readUsageLog,
|
|
7291
7955
|
recordFromLoopEvent,
|
|
7292
7956
|
redactKey,
|
|
7957
|
+
registerChoiceTool,
|
|
7293
7958
|
registerFilesystemTools,
|
|
7294
7959
|
registerMemoryTools,
|
|
7295
7960
|
registerPlanTool,
|