reasonix 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chunk-NXYPGKA3.js → chunk-5DZMZCCW.js} +22 -3
- package/dist/cli/chunk-5DZMZCCW.js.map +1 -0
- package/dist/cli/index.js +2634 -644
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{prompt-KX6A4DVX.js → prompt-2OABSPAW.js} +2 -2
- package/dist/index.d.ts +256 -45
- package/dist/index.js +489 -55
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/cli/chunk-NXYPGKA3.js.map +0 -1
- /package/dist/cli/{prompt-KX6A4DVX.js.map → prompt-2OABSPAW.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -334,7 +334,7 @@ async function harvest(reasoningContent, client, options = {}, signal) {
|
|
|
334
334
|
const minLen = options.minReasoningLen ?? 40;
|
|
335
335
|
const trimmed = reasoningContent.trim();
|
|
336
336
|
if (trimmed.length < minLen) return emptyPlanState();
|
|
337
|
-
const model = options.model ?? "deepseek-
|
|
337
|
+
const model = options.model ?? "deepseek-v4-flash";
|
|
338
338
|
const maxItems = options.maxItems ?? 5;
|
|
339
339
|
const maxItemLen = options.maxItemLen ?? 80;
|
|
340
340
|
const system = SYSTEM_PROMPT.replace("{maxItems}", String(maxItems)).replace(
|
|
@@ -1729,8 +1729,9 @@ var ARGS_COMPACT_THRESHOLD_TOKENS = 800;
|
|
|
1729
1729
|
var TURN_END_RESULT_CAP_TOKENS = 3e3;
|
|
1730
1730
|
var FAILURE_ESCALATION_THRESHOLD = 3;
|
|
1731
1731
|
var ESCALATION_MODEL = "deepseek-v4-pro";
|
|
1732
|
-
var
|
|
1733
|
-
var
|
|
1732
|
+
var NEEDS_PRO_MARKER_PREFIX = "<<<NEEDS_PRO";
|
|
1733
|
+
var NEEDS_PRO_MARKER_RE = /^<<<NEEDS_PRO(?::\s*([^>]*))?>>>/;
|
|
1734
|
+
var NEEDS_PRO_BUFFER_CHARS = 256;
|
|
1734
1735
|
var CacheFirstLoop = class {
|
|
1735
1736
|
client;
|
|
1736
1737
|
prefix;
|
|
@@ -1796,6 +1797,14 @@ var CacheFirstLoop = class {
|
|
|
1796
1797
|
* the user doesn't watch flash retry the same edit 5 times.
|
|
1797
1798
|
*/
|
|
1798
1799
|
_turnFailureCount = 0;
|
|
1800
|
+
/**
|
|
1801
|
+
* Per-type breakdown of failure signals counted toward the turn's
|
|
1802
|
+
* auto-escalation threshold. Surfaced in the warning when the
|
|
1803
|
+
* threshold trips so the user sees what kind of trouble flash
|
|
1804
|
+
* actually hit ("3× search-mismatch, 2× truncated") rather than
|
|
1805
|
+
* just a bare count. Reset alongside _turnFailureCount.
|
|
1806
|
+
*/
|
|
1807
|
+
_turnFailureTypes = {};
|
|
1799
1808
|
constructor(opts) {
|
|
1800
1809
|
this.client = opts.client;
|
|
1801
1810
|
this.prefix = opts.prefix;
|
|
@@ -1823,10 +1832,11 @@ var CacheFirstLoop = class {
|
|
|
1823
1832
|
this.sessionName = opts.session ?? null;
|
|
1824
1833
|
if (this.sessionName) {
|
|
1825
1834
|
const prior = loadSessionMessages(this.sessionName);
|
|
1826
|
-
const
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1835
|
+
const shrunk = healLoadedMessagesByTokens(prior, DEFAULT_MAX_RESULT_TOKENS);
|
|
1836
|
+
const stamped = stampMissingReasoningForThinkingMode(shrunk.messages, this.model);
|
|
1837
|
+
const messages = stamped.messages;
|
|
1838
|
+
const healedCount = shrunk.healedCount + stamped.stampedCount;
|
|
1839
|
+
const tokensSaved = shrunk.tokensSaved;
|
|
1830
1840
|
for (const msg of messages) this.log.append(msg);
|
|
1831
1841
|
this.resumedMessageCount = messages.length;
|
|
1832
1842
|
if (healedCount > 0) {
|
|
@@ -2053,15 +2063,41 @@ var CacheFirstLoop = class {
|
|
|
2053
2063
|
return this._escalateThisTurn ? ESCALATION_MODEL : this.model;
|
|
2054
2064
|
}
|
|
2055
2065
|
/**
|
|
2056
|
-
*
|
|
2057
|
-
*
|
|
2058
|
-
*
|
|
2059
|
-
*
|
|
2060
|
-
*
|
|
2061
|
-
*
|
|
2066
|
+
* Parse the escalation marker out of the model's leading content.
|
|
2067
|
+
* Returns `{ matched: true, reason? }` for both bare and reason-
|
|
2068
|
+
* carrying forms. Only the FIRST line matters — the model is
|
|
2069
|
+
* instructed to emit the marker as the first output token if at
|
|
2070
|
+
* all. Matches anywhere else in the text are normal content
|
|
2071
|
+
* references (e.g. the user asked about the marker itself).
|
|
2062
2072
|
*/
|
|
2073
|
+
parseEscalationMarker(content) {
|
|
2074
|
+
const m = NEEDS_PRO_MARKER_RE.exec(content.trimStart());
|
|
2075
|
+
if (!m) return { matched: false };
|
|
2076
|
+
const reason = m[1]?.trim();
|
|
2077
|
+
return { matched: true, reason: reason || void 0 };
|
|
2078
|
+
}
|
|
2079
|
+
/** Convenience boolean — same gate the streaming path used to call. */
|
|
2063
2080
|
isEscalationRequest(content) {
|
|
2064
|
-
return
|
|
2081
|
+
return this.parseEscalationMarker(content).matched;
|
|
2082
|
+
}
|
|
2083
|
+
/**
|
|
2084
|
+
* Could `buf` STILL plausibly become the full marker as more chunks
|
|
2085
|
+
* arrive? Drives the streaming buffer's flush decision: while this
|
|
2086
|
+
* is true we keep accumulating; once it's false (or the buffer
|
|
2087
|
+
* exceeds the byte limit) we flush so the user isn't staring at a
|
|
2088
|
+
* delayed display for arbitrary content that just happens to start
|
|
2089
|
+
* with `<`.
|
|
2090
|
+
*/
|
|
2091
|
+
looksLikePartialEscalationMarker(buf) {
|
|
2092
|
+
const t = buf.trimStart();
|
|
2093
|
+
if (t.length === 0) return true;
|
|
2094
|
+
if (t.length <= NEEDS_PRO_MARKER_PREFIX.length) {
|
|
2095
|
+
return NEEDS_PRO_MARKER_PREFIX.startsWith(t);
|
|
2096
|
+
}
|
|
2097
|
+
if (!t.startsWith(NEEDS_PRO_MARKER_PREFIX)) return false;
|
|
2098
|
+
const rest = t.slice(NEEDS_PRO_MARKER_PREFIX.length);
|
|
2099
|
+
if (rest[0] !== ">" && rest[0] !== ":") return false;
|
|
2100
|
+
return true;
|
|
2065
2101
|
}
|
|
2066
2102
|
/**
|
|
2067
2103
|
* Check whether a tool result string looks like a "flash struggled"
|
|
@@ -2075,16 +2111,18 @@ var CacheFirstLoop = class {
|
|
|
2075
2111
|
*/
|
|
2076
2112
|
noteToolFailureSignal(resultJson, repair) {
|
|
2077
2113
|
let bumped = false;
|
|
2078
|
-
|
|
2079
|
-
this._turnFailureCount +=
|
|
2114
|
+
const bump = (kind, by = 1) => {
|
|
2115
|
+
this._turnFailureCount += by;
|
|
2116
|
+
this._turnFailureTypes[kind] = (this._turnFailureTypes[kind] ?? 0) + by;
|
|
2080
2117
|
bumped = true;
|
|
2118
|
+
};
|
|
2119
|
+
if (resultJson.includes('"error"') && resultJson.includes("search text not found")) {
|
|
2120
|
+
bump("search-mismatch");
|
|
2081
2121
|
}
|
|
2082
2122
|
if (repair) {
|
|
2083
|
-
|
|
2084
|
-
if (
|
|
2085
|
-
|
|
2086
|
-
bumped = true;
|
|
2087
|
-
}
|
|
2123
|
+
if (repair.scavenged > 0) bump("scavenged", repair.scavenged);
|
|
2124
|
+
if (repair.truncationsFixed > 0) bump("truncated", repair.truncationsFixed);
|
|
2125
|
+
if (repair.stormsBroken > 0) bump("storm-broken", repair.stormsBroken);
|
|
2088
2126
|
}
|
|
2089
2127
|
if (bumped && !this._escalateThisTurn && this._turnFailureCount >= FAILURE_ESCALATION_THRESHOLD) {
|
|
2090
2128
|
this._escalateThisTurn = true;
|
|
@@ -2092,6 +2130,16 @@ var CacheFirstLoop = class {
|
|
|
2092
2130
|
}
|
|
2093
2131
|
return false;
|
|
2094
2132
|
}
|
|
2133
|
+
/**
|
|
2134
|
+
* Render `_turnFailureTypes` as a comma-separated breakdown like
|
|
2135
|
+
* "2× search-mismatch, 1× truncated" for the auto-escalation
|
|
2136
|
+
* warning. Empty if no types have been recorded yet (defensive —
|
|
2137
|
+
* the warning sites only call this after a bump).
|
|
2138
|
+
*/
|
|
2139
|
+
formatFailureBreakdown() {
|
|
2140
|
+
const parts = Object.entries(this._turnFailureTypes).filter(([, n]) => n > 0).map(([kind, n]) => `${n}\xD7 ${kind}`);
|
|
2141
|
+
return parts.length > 0 ? parts.join(", ") : `${this._turnFailureCount} repair/error signal(s)`;
|
|
2142
|
+
}
|
|
2095
2143
|
buildMessages(pendingUser) {
|
|
2096
2144
|
const healed = healLoadedMessages(this.log.toMessages(), DEFAULT_MAX_RESULT_CHARS);
|
|
2097
2145
|
const msgs = [...this.prefix.toMessages(), ...healed.messages];
|
|
@@ -2147,6 +2195,7 @@ var CacheFirstLoop = class {
|
|
|
2147
2195
|
this.scratch.reset();
|
|
2148
2196
|
this.repair.resetStorm();
|
|
2149
2197
|
this._turnFailureCount = 0;
|
|
2198
|
+
this._turnFailureTypes = {};
|
|
2150
2199
|
this._escalateThisTurn = false;
|
|
2151
2200
|
let armedConsumed = false;
|
|
2152
2201
|
if (this._proArmedForNextTurn) {
|
|
@@ -2335,7 +2384,7 @@ var CacheFirstLoop = class {
|
|
|
2335
2384
|
if (this.isEscalationRequest(escalationBuf)) {
|
|
2336
2385
|
break;
|
|
2337
2386
|
}
|
|
2338
|
-
if (escalationBuf.length >= NEEDS_PRO_BUFFER_CHARS ||
|
|
2387
|
+
if (escalationBuf.length >= NEEDS_PRO_BUFFER_CHARS || !this.looksLikePartialEscalationMarker(escalationBuf)) {
|
|
2339
2388
|
escalationBufFlushed = true;
|
|
2340
2389
|
yield {
|
|
2341
2390
|
turn: this._turn,
|
|
@@ -2430,11 +2479,13 @@ var CacheFirstLoop = class {
|
|
|
2430
2479
|
return;
|
|
2431
2480
|
}
|
|
2432
2481
|
if (this.modelForCurrentCall() !== ESCALATION_MODEL && this.isEscalationRequest(assistantContent)) {
|
|
2482
|
+
const { reason } = this.parseEscalationMarker(assistantContent);
|
|
2433
2483
|
this._escalateThisTurn = true;
|
|
2484
|
+
const reasonSuffix = reason ? ` \u2014 ${reason}` : "";
|
|
2434
2485
|
yield {
|
|
2435
2486
|
turn: this._turn,
|
|
2436
2487
|
role: "warning",
|
|
2437
|
-
content: `\u21E7 flash requested escalation \u2014 retrying this turn on ${ESCALATION_MODEL}`
|
|
2488
|
+
content: `\u21E7 flash requested escalation \u2014 retrying this turn on ${ESCALATION_MODEL}${reasonSuffix}`
|
|
2438
2489
|
};
|
|
2439
2490
|
assistantContent = "";
|
|
2440
2491
|
reasoningContent = "";
|
|
@@ -2469,7 +2520,12 @@ var CacheFirstLoop = class {
|
|
|
2469
2520
|
assistantContent || null
|
|
2470
2521
|
);
|
|
2471
2522
|
this.appendAndPersist(
|
|
2472
|
-
this.assistantMessage(
|
|
2523
|
+
this.assistantMessage(
|
|
2524
|
+
assistantContent,
|
|
2525
|
+
repairedCalls,
|
|
2526
|
+
this.modelForCurrentCall(),
|
|
2527
|
+
reasoningContent
|
|
2528
|
+
)
|
|
2473
2529
|
);
|
|
2474
2530
|
yield {
|
|
2475
2531
|
turn: this._turn,
|
|
@@ -2484,7 +2540,7 @@ var CacheFirstLoop = class {
|
|
|
2484
2540
|
yield {
|
|
2485
2541
|
turn: this._turn,
|
|
2486
2542
|
role: "warning",
|
|
2487
|
-
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.
|
|
2543
|
+
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.formatFailureBreakdown()}. Next turn falls back to ${this.model} unless /pro is armed.`
|
|
2488
2544
|
};
|
|
2489
2545
|
}
|
|
2490
2546
|
if (report.stormsBroken > 0) {
|
|
@@ -2498,6 +2554,11 @@ var CacheFirstLoop = class {
|
|
|
2498
2554
|
};
|
|
2499
2555
|
}
|
|
2500
2556
|
if (repairedCalls.length === 0) {
|
|
2557
|
+
const allSuppressed = report.stormsBroken > 0 && toolCalls.length > 0;
|
|
2558
|
+
if (allSuppressed) {
|
|
2559
|
+
yield* this.forceSummaryAfterIterLimit({ reason: "stuck" });
|
|
2560
|
+
return;
|
|
2561
|
+
}
|
|
2501
2562
|
this.autoCompactToolResultsOnTurnEnd();
|
|
2502
2563
|
yield { turn: this._turn, role: "done", content: assistantContent };
|
|
2503
2564
|
return;
|
|
@@ -2608,7 +2669,7 @@ ${reason}`;
|
|
|
2608
2669
|
yield {
|
|
2609
2670
|
turn: this._turn,
|
|
2610
2671
|
role: "warning",
|
|
2611
|
-
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.
|
|
2672
|
+
content: `\u21E7 auto-escalating to ${ESCALATION_MODEL} for the rest of this turn \u2014 flash hit ${this.formatFailureBreakdown()}. Next turn falls back to ${this.model} unless /pro is armed.`
|
|
2612
2673
|
};
|
|
2613
2674
|
}
|
|
2614
2675
|
yield {
|
|
@@ -2652,7 +2713,9 @@ ${reason}`;
|
|
|
2652
2713
|
|
|
2653
2714
|
${summary}`;
|
|
2654
2715
|
const summaryStats = this.stats.record(this._turn, summaryModel, resp.usage ?? new Usage());
|
|
2655
|
-
this.appendAndPersist(
|
|
2716
|
+
this.appendAndPersist(
|
|
2717
|
+
this.assistantMessage(summary, [], summaryModel, resp.reasoningContent)
|
|
2718
|
+
);
|
|
2656
2719
|
yield {
|
|
2657
2720
|
turn: this._turn,
|
|
2658
2721
|
role: "assistant_final",
|
|
@@ -2683,28 +2746,39 @@ ${summary}`;
|
|
|
2683
2746
|
}
|
|
2684
2747
|
return final;
|
|
2685
2748
|
}
|
|
2686
|
-
|
|
2749
|
+
/**
|
|
2750
|
+
* Build an assistant message for the log. The `producingModel` arg is
|
|
2751
|
+
* the model that actually generated this turn (flash, pro, the
|
|
2752
|
+
* forced-summary flash call, `this.model` for synthetics, etc.) —
|
|
2753
|
+
* NOT `this.model`, because escalation + forced-summary can both
|
|
2754
|
+
* route a single turn to a different model.
|
|
2755
|
+
*
|
|
2756
|
+
* The single invariant this encodes: if the producing model is
|
|
2757
|
+
* thinking-mode, `reasoning_content` MUST be present on the
|
|
2758
|
+
* persisted message — even as an empty string. DeepSeek's validator
|
|
2759
|
+
* 400s the NEXT request if any historical thinking-mode assistant
|
|
2760
|
+
* turn is missing it. We used to gate on `reasoning.length > 0`,
|
|
2761
|
+
* which silently dropped the field whenever the stream emitted zero
|
|
2762
|
+
* reasoning deltas or the API returned `reasoning_content: null` —
|
|
2763
|
+
* both legitimate edge cases the 0.5.15/0.5.18 fixes missed.
|
|
2764
|
+
*/
|
|
2765
|
+
assistantMessage(content, toolCalls, producingModel, reasoningContent) {
|
|
2687
2766
|
const msg = { role: "assistant", content };
|
|
2688
2767
|
if (toolCalls.length > 0) msg.tool_calls = toolCalls;
|
|
2689
|
-
if (
|
|
2690
|
-
msg.reasoning_content = reasoningContent;
|
|
2768
|
+
if (isThinkingModeModel(producingModel)) {
|
|
2769
|
+
msg.reasoning_content = reasoningContent ?? "";
|
|
2691
2770
|
}
|
|
2692
2771
|
return msg;
|
|
2693
2772
|
}
|
|
2694
2773
|
/**
|
|
2695
|
-
*
|
|
2696
|
-
*
|
|
2697
|
-
*
|
|
2698
|
-
*
|
|
2699
|
-
*
|
|
2700
|
-
* doesn't care — field stays absent there.
|
|
2774
|
+
* Synthetic assistant message (abort notices, future system injections)
|
|
2775
|
+
* — no real API round trip. Delegates to {@link assistantMessage} with
|
|
2776
|
+
* `this.model` as the stand-in producer, so the same thinking-mode
|
|
2777
|
+
* invariant applies: reasoner sessions get an empty-string
|
|
2778
|
+
* `reasoning_content`; V3 sessions get nothing.
|
|
2701
2779
|
*/
|
|
2702
2780
|
syntheticAssistantMessage(content) {
|
|
2703
|
-
|
|
2704
|
-
if (isThinkingModeModel(this.model)) {
|
|
2705
|
-
msg.reasoning_content = "";
|
|
2706
|
-
}
|
|
2707
|
-
return msg;
|
|
2781
|
+
return this.assistantMessage(content, [], this.model, "");
|
|
2708
2782
|
}
|
|
2709
2783
|
};
|
|
2710
2784
|
function isThinkingModeModel(model) {
|
|
@@ -2753,11 +2827,15 @@ function reasonPrefixFor(reason, iterCap) {
|
|
|
2753
2827
|
if (reason === "context-guard") {
|
|
2754
2828
|
return "[context budget running low \u2014 summarizing before the next call would overflow]";
|
|
2755
2829
|
}
|
|
2830
|
+
if (reason === "stuck") {
|
|
2831
|
+
return "[stuck on a repeated tool call \u2014 explaining what was tried and what's blocking progress]";
|
|
2832
|
+
}
|
|
2756
2833
|
return `[tool-call budget (${iterCap}) reached \u2014 forcing summary from what I found]`;
|
|
2757
2834
|
}
|
|
2758
2835
|
function errorLabelFor(reason, iterCap) {
|
|
2759
2836
|
if (reason === "aborted") return "aborted by user";
|
|
2760
2837
|
if (reason === "context-guard") return "context-guard triggered (prompt > 80% of window)";
|
|
2838
|
+
if (reason === "stuck") return "stuck (repeated tool call suppressed by storm-breaker)";
|
|
2761
2839
|
return `tool-call budget (${iterCap}) reached`;
|
|
2762
2840
|
}
|
|
2763
2841
|
function summarizeBranch(chosen, samples) {
|
|
@@ -2897,6 +2975,19 @@ function healLoadedMessages(messages, maxChars) {
|
|
|
2897
2975
|
const healedCount = shrunk.healedCount + paired.droppedAssistantCalls + paired.droppedStrayTools;
|
|
2898
2976
|
return { messages: paired.messages, healedCount, healedFrom: shrunk.healedFrom };
|
|
2899
2977
|
}
|
|
2978
|
+
function stampMissingReasoningForThinkingMode(messages, model) {
|
|
2979
|
+
if (!isThinkingModeModel(model)) {
|
|
2980
|
+
return { messages, stampedCount: 0 };
|
|
2981
|
+
}
|
|
2982
|
+
let stampedCount = 0;
|
|
2983
|
+
const out = messages.map((msg) => {
|
|
2984
|
+
if (msg.role !== "assistant") return msg;
|
|
2985
|
+
if (Object.hasOwn(msg, "reasoning_content")) return msg;
|
|
2986
|
+
stampedCount += 1;
|
|
2987
|
+
return { ...msg, reasoning_content: "" };
|
|
2988
|
+
});
|
|
2989
|
+
return { messages: out, stampedCount };
|
|
2990
|
+
}
|
|
2900
2991
|
function healLoadedMessagesByTokens(messages, maxTokens) {
|
|
2901
2992
|
const shrunk = shrinkOversizedToolResultsByTokens(messages, maxTokens);
|
|
2902
2993
|
const paired = fixToolCallPairing(shrunk.messages);
|
|
@@ -3212,9 +3303,13 @@ var TUI_FORMATTING_RULES = `Formatting (rendered in a TUI with a real markdown r
|
|
|
3212
3303
|
- For flow charts and diagrams: a plain bullet list with \`\u2192\` or \`\u2193\` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.`;
|
|
3213
3304
|
var ESCALATION_CONTRACT = `Cost-aware escalation (when you're running on deepseek-v4-flash):
|
|
3214
3305
|
|
|
3215
|
-
If a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the
|
|
3306
|
+
If a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.
|
|
3216
3307
|
|
|
3217
|
-
|
|
3308
|
+
Two accepted forms:
|
|
3309
|
+
- \`<<<NEEDS_PRO>>>\` \u2014 bare marker, no rationale.
|
|
3310
|
+
- \`<<<NEEDS_PRO: <one-sentence reason>>>>\` \u2014 preferred. The reason text appears in the user-visible warning ("\u21E7 flash requested escalation \u2014 <your reason>"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested \`>\` characters. Examples: \`<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>\` or \`<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>\`.
|
|
3311
|
+
|
|
3312
|
+
Do NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).`;
|
|
3218
3313
|
var NEGATIVE_CLAIM_RULE = `Negative claims ("X is missing", "Y isn't implemented", "there's no Z") are the #1 hallucination shape. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.
|
|
3219
3314
|
|
|
3220
3315
|
If you have a search tool (\`search_content\`, \`grep\`, web search), call it FIRST before asserting absence:
|
|
@@ -4439,28 +4534,233 @@ function registerMemoryTools(registry, opts = {}) {
|
|
|
4439
4534
|
return registry;
|
|
4440
4535
|
}
|
|
4441
4536
|
|
|
4442
|
-
// src/tools/
|
|
4537
|
+
// src/tools/choice.ts
|
|
4538
|
+
var ChoiceRequestedError = class extends Error {
|
|
4539
|
+
question;
|
|
4540
|
+
options;
|
|
4541
|
+
allowCustom;
|
|
4542
|
+
constructor(question, options, allowCustom) {
|
|
4543
|
+
super(
|
|
4544
|
+
"ChoiceRequestedError: choice submitted. STOP calling tools now \u2014 the TUI has shown the options to the user. Wait for their next message; it will either be 'user picked <id>' (carry on with that branch), 'user answered: <text>' (custom free-form reply; read and proceed), or 'user cancelled the choice' (drop the question and ask what they want instead). Don't call any tools in the meantime."
|
|
4545
|
+
);
|
|
4546
|
+
this.name = "ChoiceRequestedError";
|
|
4547
|
+
this.question = question;
|
|
4548
|
+
this.options = options;
|
|
4549
|
+
this.allowCustom = allowCustom;
|
|
4550
|
+
}
|
|
4551
|
+
toToolResult() {
|
|
4552
|
+
return {
|
|
4553
|
+
error: `${this.name}: ${this.message}`,
|
|
4554
|
+
question: this.question,
|
|
4555
|
+
options: this.options,
|
|
4556
|
+
allowCustom: this.allowCustom
|
|
4557
|
+
};
|
|
4558
|
+
}
|
|
4559
|
+
};
|
|
4560
|
+
function sanitizeOptions(raw) {
|
|
4561
|
+
if (!Array.isArray(raw)) return [];
|
|
4562
|
+
const out = [];
|
|
4563
|
+
const seen = /* @__PURE__ */ new Set();
|
|
4564
|
+
for (const entry of raw) {
|
|
4565
|
+
if (!entry || typeof entry !== "object") continue;
|
|
4566
|
+
const e = entry;
|
|
4567
|
+
const id = typeof e.id === "string" ? e.id.trim() : "";
|
|
4568
|
+
const title = typeof e.title === "string" ? e.title.trim() : "";
|
|
4569
|
+
if (!id || !title) continue;
|
|
4570
|
+
if (seen.has(id)) continue;
|
|
4571
|
+
seen.add(id);
|
|
4572
|
+
const summary = typeof e.summary === "string" ? e.summary.trim() || void 0 : void 0;
|
|
4573
|
+
const opt = { id, title };
|
|
4574
|
+
if (summary) opt.summary = summary;
|
|
4575
|
+
out.push(opt);
|
|
4576
|
+
}
|
|
4577
|
+
return out;
|
|
4578
|
+
}
|
|
4579
|
+
function registerChoiceTool(registry, opts = {}) {
|
|
4580
|
+
registry.register({
|
|
4581
|
+
name: "ask_choice",
|
|
4582
|
+
description: "Present 2\u20136 alternatives to the user. The principle: if the user is supposed to pick, the tool picks \u2014 you don't enumerate the choices as prose. Prose menus have no picker in this TUI, so the user gets a wall of text to scroll through and a letter to type, strictly worse than the magenta picker this tool renders. Call it whenever (a) the user has asked for options, (b) you've analyzed multiple approaches and the final call is theirs, or (c) it's a preference fork you can't resolve without them. Skip it when one option is clearly best (just do it, or submit_plan) or a free-form text answer fits (ask in prose). Keep option ids short and stable (A/B/C). Each option: title + optional summary. allowCustom=true when their real answer might not fit. Max 6 options \u2014 narrow first if more. A one-sentence lead-in before the call is fine; don't repeat the options in it.",
|
|
4583
|
+
readOnly: true,
|
|
4584
|
+
parameters: {
|
|
4585
|
+
type: "object",
|
|
4586
|
+
properties: {
|
|
4587
|
+
question: {
|
|
4588
|
+
type: "string",
|
|
4589
|
+
description: "The question to put in front of the user. One sentence. Don't repeat the options in the question text \u2014 the picker renders them separately."
|
|
4590
|
+
},
|
|
4591
|
+
options: {
|
|
4592
|
+
type: "array",
|
|
4593
|
+
description: "2\u20134 alternatives. Each needs a stable id and a short title; summary is optional.",
|
|
4594
|
+
items: {
|
|
4595
|
+
type: "object",
|
|
4596
|
+
properties: {
|
|
4597
|
+
id: { type: "string", description: "Short stable id (A, B, C, or option-1)." },
|
|
4598
|
+
title: { type: "string", description: "One-line title shown as the option label." },
|
|
4599
|
+
summary: {
|
|
4600
|
+
type: "string",
|
|
4601
|
+
description: "Optional. A second dimmed line with more detail. Keep under ~80 chars."
|
|
4602
|
+
}
|
|
4603
|
+
},
|
|
4604
|
+
required: ["id", "title"]
|
|
4605
|
+
}
|
|
4606
|
+
},
|
|
4607
|
+
allowCustom: {
|
|
4608
|
+
type: "boolean",
|
|
4609
|
+
description: "If true, the picker shows a 'Let me type my own answer' escape hatch. Default false. Turn on when the user's real answer might not fit any of your pre-defined options."
|
|
4610
|
+
}
|
|
4611
|
+
},
|
|
4612
|
+
required: ["question", "options"]
|
|
4613
|
+
},
|
|
4614
|
+
fn: async (args) => {
|
|
4615
|
+
const question = (args?.question ?? "").trim();
|
|
4616
|
+
if (!question) {
|
|
4617
|
+
throw new Error(
|
|
4618
|
+
"ask_choice: question is required \u2014 write one sentence explaining the decision."
|
|
4619
|
+
);
|
|
4620
|
+
}
|
|
4621
|
+
const options = sanitizeOptions(args?.options);
|
|
4622
|
+
if (options.length < 2) {
|
|
4623
|
+
throw new Error(
|
|
4624
|
+
"ask_choice: need at least 2 well-formed options (each with a non-empty id and title). If you just need a text answer, ask the user in plain assistant text instead."
|
|
4625
|
+
);
|
|
4626
|
+
}
|
|
4627
|
+
if (options.length > 6) {
|
|
4628
|
+
throw new Error(
|
|
4629
|
+
"ask_choice: too many options (max 6). If you really have this many branches, split into two sequential ask_choice calls or narrow down first."
|
|
4630
|
+
);
|
|
4631
|
+
}
|
|
4632
|
+
const allowCustom = args?.allowCustom === true;
|
|
4633
|
+
opts.onChoiceRequested?.(question, options);
|
|
4634
|
+
throw new ChoiceRequestedError(question, options, allowCustom);
|
|
4635
|
+
}
|
|
4636
|
+
});
|
|
4637
|
+
return registry;
|
|
4638
|
+
}
|
|
4639
|
+
|
|
4640
|
+
// src/tools/plan-errors.ts
|
|
4443
4641
|
var PlanProposedError = class extends Error {
|
|
4444
4642
|
plan;
|
|
4445
|
-
|
|
4643
|
+
steps;
|
|
4644
|
+
summary;
|
|
4645
|
+
constructor(plan, steps, summary) {
|
|
4446
4646
|
super(
|
|
4447
4647
|
"PlanProposedError: plan submitted. STOP calling tools now \u2014 the TUI has shown the plan to the user. Wait for their next message; it will either approve (you'll then implement the plan), request a refinement (you should explore more and submit an updated plan), or cancel (drop the plan and ask what they want instead). Don't call any tools in the meantime."
|
|
4448
4648
|
);
|
|
4449
4649
|
this.name = "PlanProposedError";
|
|
4450
4650
|
this.plan = plan;
|
|
4651
|
+
this.steps = steps;
|
|
4652
|
+
this.summary = summary;
|
|
4451
4653
|
}
|
|
4452
4654
|
/**
|
|
4453
4655
|
* Structured tool-result shape. Consumed by the TUI to extract the
|
|
4454
|
-
* plan without regex-scraping the error message.
|
|
4656
|
+
* plan without regex-scraping the error message. Optional fields
|
|
4657
|
+
* are omitted from the payload when absent so consumers don't see
|
|
4658
|
+
* `undefined` keys in the JSON.
|
|
4455
4659
|
*/
|
|
4456
4660
|
toToolResult() {
|
|
4457
|
-
|
|
4661
|
+
const payload = {
|
|
4662
|
+
error: `${this.name}: ${this.message}`,
|
|
4663
|
+
plan: this.plan
|
|
4664
|
+
};
|
|
4665
|
+
if (this.steps && this.steps.length > 0) payload.steps = this.steps;
|
|
4666
|
+
if (this.summary) payload.summary = this.summary;
|
|
4667
|
+
return payload;
|
|
4458
4668
|
}
|
|
4459
4669
|
};
|
|
4460
|
-
|
|
4670
|
+
var PlanCheckpointError = class extends Error {
|
|
4671
|
+
stepId;
|
|
4672
|
+
title;
|
|
4673
|
+
result;
|
|
4674
|
+
notes;
|
|
4675
|
+
constructor(update) {
|
|
4676
|
+
super(
|
|
4677
|
+
"PlanCheckpointError: step complete \u2014 STOP calling tools. The TUI has paused the plan for user review. Wait for the next user message; it will either say continue (proceed to the next step), request a revision (adjust the remaining plan), or stop (summarize and end)."
|
|
4678
|
+
);
|
|
4679
|
+
this.name = "PlanCheckpointError";
|
|
4680
|
+
this.stepId = update.stepId;
|
|
4681
|
+
this.title = update.title;
|
|
4682
|
+
this.result = update.result;
|
|
4683
|
+
this.notes = update.notes;
|
|
4684
|
+
}
|
|
4685
|
+
toToolResult() {
|
|
4686
|
+
const payload = {
|
|
4687
|
+
error: `${this.name}: ${this.message}`,
|
|
4688
|
+
kind: "step_completed",
|
|
4689
|
+
stepId: this.stepId,
|
|
4690
|
+
result: this.result
|
|
4691
|
+
};
|
|
4692
|
+
if (this.title) payload.title = this.title;
|
|
4693
|
+
if (this.notes) payload.notes = this.notes;
|
|
4694
|
+
return payload;
|
|
4695
|
+
}
|
|
4696
|
+
};
|
|
4697
|
+
var PlanRevisionProposedError = class extends Error {
|
|
4698
|
+
reason;
|
|
4699
|
+
remainingSteps;
|
|
4700
|
+
summary;
|
|
4701
|
+
constructor(reason, remainingSteps, summary) {
|
|
4702
|
+
super(
|
|
4703
|
+
"PlanRevisionProposedError: revision submitted. STOP calling tools now \u2014 the TUI has paused for the user to review your proposed change. Wait for their next message; it will say 'revision accepted' (proceed with the new step list), 'revision rejected' (keep the original plan and continue), or 'revision cancelled' (drop the proposal entirely). Don't call any tools in the meantime."
|
|
4704
|
+
);
|
|
4705
|
+
this.name = "PlanRevisionProposedError";
|
|
4706
|
+
this.reason = reason;
|
|
4707
|
+
this.remainingSteps = remainingSteps;
|
|
4708
|
+
this.summary = summary;
|
|
4709
|
+
}
|
|
4710
|
+
toToolResult() {
|
|
4711
|
+
const payload = {
|
|
4712
|
+
error: `${this.name}: ${this.message}`,
|
|
4713
|
+
reason: this.reason,
|
|
4714
|
+
remainingSteps: this.remainingSteps
|
|
4715
|
+
};
|
|
4716
|
+
if (this.summary) payload.summary = this.summary;
|
|
4717
|
+
return payload;
|
|
4718
|
+
}
|
|
4719
|
+
};
|
|
4720
|
+
|
|
4721
|
+
// src/tools/plan-core.ts
|
|
4722
|
+
var SUBMIT_PLAN_DESCRIPTION = "Submit ONE concrete plan you've already decided on. Use this for tasks that warrant a review gate \u2014 multi-file refactors, architecture changes, anything that would be expensive or confusing to undo. Skip it for small fixes (one-line typo, obvious bug with a clear fix) \u2014 just make the change. The user will either approve (you then implement it), ask for refinement, or cancel. If the user has already enabled /plan mode, writes are blocked at dispatch and you MUST use this. CRITICAL: do NOT use submit_plan to present alternative routes (A/B/C, option 1/2/3) for the user to pick from \u2014 the picker only exposes approve/refine/cancel, so a menu plan strands the user with no way to choose. For branching decisions, call `ask_choice` instead; only call submit_plan once the user has picked a direction and you have a single actionable plan. Write the plan as markdown with a one-line summary, a bulleted list of files to touch and what will change, and any risks or open questions. STRONGLY PREFERRED: pass `steps` \u2014 an array of {id, title, action, risk?} \u2014 so the UI renders a structured step list above the approval picker and tracks per-step progress. Use risk='high' for steps that touch prod data / break public APIs / are hard to undo; 'med' for non-trivial but reversible (multi-file edits, schema tweaks); 'low' for safe local work. After each step, call `mark_step_complete` so the user sees progress ticks.";
|
|
4723
|
+
var MARK_STEP_COMPLETE_DESCRIPTION = "Mark one step of the approved plan as done AND pause for the user to review. Call this after finishing each step. The TUI shows a \u2713 progress row and mounts a Continue / Revise / Stop picker \u2014 you MUST stop calling tools after this fires and wait for the next user message. Pass the `stepId` from the plan's steps array, a short `result` (what you did), and optional `notes` for anything surprising (errors, scope changes, follow-ups). This tool doesn't change any files. Don't call it if the plan didn't include structured steps, and don't invent ids that weren't in the original plan.";
|
|
4724
|
+
var REVISE_PLAN_DESCRIPTION = "Surgically replace the REMAINING steps of an in-flight plan. Call this when the user has given feedback at a checkpoint that warrants a structured plan change \u2014 skip a step, swap two steps, add a new step, change risk, etc. Pass: `reason` (one sentence why), `remainingSteps` (the new tail of the plan, replacing whatever steps haven't been done yet), and optional `summary` (updated one-line plan summary). Done steps are NEVER touched \u2014 keep them out of `remainingSteps`. The TUI shows a diff (removed in red, kept in gray, added in green) and the user accepts or rejects. Don't call this for trivial mid-step adjustments \u2014 just keep executing. Don't call submit_plan for revisions either \u2014 that resets the whole plan including completed steps. Use submit_plan only when the entire approach has changed; use revise_plan when the tail needs editing.";
|
|
4725
|
+
var STEP_ITEM_SCHEMA = {
|
|
4726
|
+
type: "object",
|
|
4727
|
+
properties: {
|
|
4728
|
+
id: { type: "string", description: "Stable id, e.g. step-1." },
|
|
4729
|
+
title: { type: "string", description: "Short imperative title." },
|
|
4730
|
+
action: { type: "string", description: "One-sentence description of the concrete action." },
|
|
4731
|
+
risk: {
|
|
4732
|
+
type: "string",
|
|
4733
|
+
enum: ["low", "med", "high"],
|
|
4734
|
+
description: "Self-assessed risk. 'high' = hard-to-undo / touches prod / breaks API; 'med' = non-trivial but reversible; 'low' = safe local work. The UI shows a colored dot per step so the user knows where to focus review. Omit if you're unsure."
|
|
4735
|
+
}
|
|
4736
|
+
},
|
|
4737
|
+
required: ["id", "title", "action"]
|
|
4738
|
+
};
|
|
4739
|
+
function sanitizeRisk(raw) {
|
|
4740
|
+
if (raw === "low" || raw === "med" || raw === "high") return raw;
|
|
4741
|
+
return void 0;
|
|
4742
|
+
}
|
|
4743
|
+
function sanitizeSteps(raw) {
|
|
4744
|
+
if (!Array.isArray(raw)) return void 0;
|
|
4745
|
+
const steps = [];
|
|
4746
|
+
for (const entry of raw) {
|
|
4747
|
+
if (!entry || typeof entry !== "object") continue;
|
|
4748
|
+
const e = entry;
|
|
4749
|
+
const id = typeof e.id === "string" ? e.id.trim() : "";
|
|
4750
|
+
const title = typeof e.title === "string" ? e.title.trim() : "";
|
|
4751
|
+
const action = typeof e.action === "string" ? e.action.trim() : "";
|
|
4752
|
+
if (!id || !title || !action) continue;
|
|
4753
|
+
const step = { id, title, action };
|
|
4754
|
+
const risk = sanitizeRisk(e.risk);
|
|
4755
|
+
if (risk) step.risk = risk;
|
|
4756
|
+
steps.push(step);
|
|
4757
|
+
}
|
|
4758
|
+
return steps.length > 0 ? steps : void 0;
|
|
4759
|
+
}
|
|
4760
|
+
function registerSubmitPlan(registry, opts) {
|
|
4461
4761
|
registry.register({
|
|
4462
4762
|
name: "submit_plan",
|
|
4463
|
-
description:
|
|
4763
|
+
description: SUBMIT_PLAN_DESCRIPTION,
|
|
4464
4764
|
readOnly: true,
|
|
4465
4765
|
parameters: {
|
|
4466
4766
|
type: "object",
|
|
@@ -4468,6 +4768,15 @@ function registerPlanTool(registry, opts = {}) {
|
|
|
4468
4768
|
plan: {
|
|
4469
4769
|
type: "string",
|
|
4470
4770
|
description: "Markdown-formatted plan. Lead with a one-sentence summary. Then a file-by-file breakdown of what you'll change and why. Flag any risks or open questions at the end so the user can weigh in before you start."
|
|
4771
|
+
},
|
|
4772
|
+
steps: {
|
|
4773
|
+
type: "array",
|
|
4774
|
+
description: "Structured step list (strongly recommended). When provided, the UI renders a compact step list above the approval picker AND tracks per-step progress via `mark_step_complete`. Use stable ids (step-1, step-2, ...). Skip only for tiny one-step plans where the markdown body is enough.",
|
|
4775
|
+
items: STEP_ITEM_SCHEMA
|
|
4776
|
+
},
|
|
4777
|
+
summary: {
|
|
4778
|
+
type: "string",
|
|
4779
|
+
description: "Optional. One-sentence human-friendly title for the plan, ~80 chars max. Surfaces in the PlanConfirm picker header and in /plans listings ('\u25B8 refactor auth into signed tokens \xB7 2/5 done'). Skip for trivial plans where the first line of the markdown body is already short and clear."
|
|
4471
4780
|
}
|
|
4472
4781
|
},
|
|
4473
4782
|
required: ["plan"]
|
|
@@ -4477,10 +4786,108 @@ function registerPlanTool(registry, opts = {}) {
|
|
|
4477
4786
|
if (!plan) {
|
|
4478
4787
|
throw new Error("submit_plan: empty plan \u2014 write a markdown plan and try again.");
|
|
4479
4788
|
}
|
|
4480
|
-
|
|
4481
|
-
|
|
4789
|
+
const steps = sanitizeSteps(args?.steps);
|
|
4790
|
+
const summary = typeof args?.summary === "string" ? args.summary.trim() || void 0 : void 0;
|
|
4791
|
+
opts.onPlanSubmitted?.(plan, steps);
|
|
4792
|
+
throw new PlanProposedError(plan, steps, summary);
|
|
4482
4793
|
}
|
|
4483
4794
|
});
|
|
4795
|
+
}
|
|
4796
|
+
function registerMarkStepComplete(registry, opts) {
|
|
4797
|
+
registry.register({
|
|
4798
|
+
name: "mark_step_complete",
|
|
4799
|
+
description: MARK_STEP_COMPLETE_DESCRIPTION,
|
|
4800
|
+
readOnly: true,
|
|
4801
|
+
parameters: {
|
|
4802
|
+
type: "object",
|
|
4803
|
+
properties: {
|
|
4804
|
+
stepId: {
|
|
4805
|
+
type: "string",
|
|
4806
|
+
description: "The id of the step being marked complete. Must match one from submit_plan's steps array."
|
|
4807
|
+
},
|
|
4808
|
+
title: {
|
|
4809
|
+
type: "string",
|
|
4810
|
+
description: "Optional. The step's title, echoed back for the UI. If omitted, the UI falls back to the id."
|
|
4811
|
+
},
|
|
4812
|
+
result: {
|
|
4813
|
+
type: "string",
|
|
4814
|
+
description: "One-sentence summary of what was done for this step."
|
|
4815
|
+
},
|
|
4816
|
+
notes: {
|
|
4817
|
+
type: "string",
|
|
4818
|
+
description: "Optional. Anything surprising \u2014 blockers hit, assumptions revised, follow-ups for later steps."
|
|
4819
|
+
}
|
|
4820
|
+
},
|
|
4821
|
+
required: ["stepId", "result"]
|
|
4822
|
+
},
|
|
4823
|
+
fn: async (args) => {
|
|
4824
|
+
const stepId = (args?.stepId ?? "").trim();
|
|
4825
|
+
const result = (args?.result ?? "").trim();
|
|
4826
|
+
if (!stepId) {
|
|
4827
|
+
throw new Error("mark_step_complete: stepId is required.");
|
|
4828
|
+
}
|
|
4829
|
+
if (!result) {
|
|
4830
|
+
throw new Error(
|
|
4831
|
+
"mark_step_complete: result is required \u2014 say in one sentence what you did."
|
|
4832
|
+
);
|
|
4833
|
+
}
|
|
4834
|
+
const title = typeof args?.title === "string" ? args.title.trim() || void 0 : void 0;
|
|
4835
|
+
const notes = typeof args?.notes === "string" ? args.notes.trim() || void 0 : void 0;
|
|
4836
|
+
const update = { kind: "step_completed", stepId, result };
|
|
4837
|
+
if (title) update.title = title;
|
|
4838
|
+
if (notes) update.notes = notes;
|
|
4839
|
+
opts.onStepCompleted?.(update);
|
|
4840
|
+
throw new PlanCheckpointError({ stepId, title, result, notes });
|
|
4841
|
+
}
|
|
4842
|
+
});
|
|
4843
|
+
}
|
|
4844
|
+
function registerRevisePlan(registry, opts) {
|
|
4845
|
+
registry.register({
|
|
4846
|
+
name: "revise_plan",
|
|
4847
|
+
description: REVISE_PLAN_DESCRIPTION,
|
|
4848
|
+
readOnly: true,
|
|
4849
|
+
parameters: {
|
|
4850
|
+
type: "object",
|
|
4851
|
+
properties: {
|
|
4852
|
+
reason: {
|
|
4853
|
+
type: "string",
|
|
4854
|
+
description: "One sentence explaining why you're revising \u2014 what the user asked for, what changed your assessment."
|
|
4855
|
+
},
|
|
4856
|
+
remainingSteps: {
|
|
4857
|
+
type: "array",
|
|
4858
|
+
description: "The new tail of the plan \u2014 what should run from here on. Each entry: {id, title, action, risk?}. Use stable ids; reuse old ids when a step is just being adjusted, generate new ones for genuinely new steps.",
|
|
4859
|
+
items: STEP_ITEM_SCHEMA
|
|
4860
|
+
},
|
|
4861
|
+
summary: {
|
|
4862
|
+
type: "string",
|
|
4863
|
+
description: "Optional. Updated one-line plan summary if the overall framing has shifted."
|
|
4864
|
+
}
|
|
4865
|
+
},
|
|
4866
|
+
required: ["reason", "remainingSteps"]
|
|
4867
|
+
},
|
|
4868
|
+
fn: async (args) => {
|
|
4869
|
+
const reason = (args?.reason ?? "").trim();
|
|
4870
|
+
if (!reason) {
|
|
4871
|
+
throw new Error(
|
|
4872
|
+
"revise_plan: reason is required \u2014 write one sentence explaining the change."
|
|
4873
|
+
);
|
|
4874
|
+
}
|
|
4875
|
+
const remainingSteps = sanitizeSteps(args?.remainingSteps);
|
|
4876
|
+
if (!remainingSteps || remainingSteps.length === 0) {
|
|
4877
|
+
throw new Error(
|
|
4878
|
+
"revise_plan: remainingSteps must be a non-empty array of well-formed steps. If the user wants to STOP rather than continue, don't revise \u2014 the picker has its own Stop option."
|
|
4879
|
+
);
|
|
4880
|
+
}
|
|
4881
|
+
const summary = typeof args?.summary === "string" ? args.summary.trim() || void 0 : void 0;
|
|
4882
|
+
opts.onPlanRevisionProposed?.(reason, remainingSteps, summary);
|
|
4883
|
+
throw new PlanRevisionProposedError(reason, remainingSteps, summary);
|
|
4884
|
+
}
|
|
4885
|
+
});
|
|
4886
|
+
}
|
|
4887
|
+
function registerPlanTool(registry, opts = {}) {
|
|
4888
|
+
registerSubmitPlan(registry, opts);
|
|
4889
|
+
registerMarkStepComplete(registry, opts);
|
|
4890
|
+
registerRevisePlan(registry, opts);
|
|
4484
4891
|
return registry;
|
|
4485
4892
|
}
|
|
4486
4893
|
|
|
@@ -4657,8 +5064,8 @@ function registerSubagentTool(parentRegistry, opts) {
|
|
|
4657
5064
|
},
|
|
4658
5065
|
model: {
|
|
4659
5066
|
type: "string",
|
|
4660
|
-
enum: ["deepseek-v4-flash", "deepseek-v4-pro"
|
|
4661
|
-
description: "Which DeepSeek model the subagent runs on. Default is 'deepseek-v4-
|
|
5067
|
+
enum: ["deepseek-v4-flash", "deepseek-v4-pro"],
|
|
5068
|
+
description: "Which DeepSeek model the subagent runs on. Default is 'deepseek-v4-flash' \u2014 cheap and fast, fine for explore/research-style subtasks. Override to 'deepseek-v4-pro' (~12\xD7 more expensive) when the subtask genuinely needs the stronger model: cross-file architecture, subtle bug hunts, anything where flash has empirically underperformed."
|
|
4662
5069
|
}
|
|
4663
5070
|
},
|
|
4664
5071
|
required: ["task"]
|
|
@@ -5171,7 +5578,15 @@ async function runCommand(cmd, opts) {
|
|
|
5171
5578
|
shell: false,
|
|
5172
5579
|
// no shell-expansion — see header comment
|
|
5173
5580
|
windowsHide: true,
|
|
5174
|
-
|
|
5581
|
+
// PYTHONIOENCODING + PYTHONUTF8 force any spawned Python child
|
|
5582
|
+
// (run_command running `python script.py`, etc.) to emit UTF-8
|
|
5583
|
+
// on stdout/stderr. Without this, Chinese-Windows defaults
|
|
5584
|
+
// Python's stdout encoder to GBK and `print("…")` raises
|
|
5585
|
+
// UnicodeEncodeError on emoji / non-GBK chars — the model then
|
|
5586
|
+
// sees a Python traceback instead of the script's real output
|
|
5587
|
+
// and goes around in circles trying to fix the wrong problem.
|
|
5588
|
+
// Harmless on non-Python processes (env vars they don't read).
|
|
5589
|
+
env: { ...process.env, PYTHONIOENCODING: "utf-8", PYTHONUTF8: "1" }
|
|
5175
5590
|
};
|
|
5176
5591
|
const { bin, args, spawnOverrides } = prepareSpawn(argv);
|
|
5177
5592
|
const effectiveSpawnOpts = { ...spawnOpts, ...spawnOverrides };
|
|
@@ -5620,7 +6035,7 @@ function registerWebTools(registry, opts = {}) {
|
|
|
5620
6035
|
const maxFetchChars = opts.maxFetchChars ?? DEFAULT_FETCH_MAX_CHARS;
|
|
5621
6036
|
registry.register({
|
|
5622
6037
|
name: "web_search",
|
|
5623
|
-
description: "Search the public web. Returns ranked results with title, url, and snippet.
|
|
6038
|
+
description: "Search the public web. Returns ranked results with title, url, and snippet. Call this when the answer's correctness depends on current state \u2014 anything that changes over time (events, prices, releases, status of a thing in the real world). Composing such answers from training memory invents stale numbers; search first, then ground the answer in the results. For evergreen / definitional questions you don't need this.",
|
|
5624
6039
|
readOnly: true,
|
|
5625
6040
|
parameters: {
|
|
5626
6041
|
type: "object",
|
|
@@ -6970,6 +7385,21 @@ Skip submit_plan for small, obvious changes: one-line typo, clear bug with a cle
|
|
|
6970
7385
|
|
|
6971
7386
|
Plan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an "Open questions" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.
|
|
6972
7387
|
|
|
7388
|
+
**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use \`ask_choice\` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.
|
|
7389
|
+
|
|
7390
|
+
# When to ask the user to pick (ask_choice)
|
|
7391
|
+
|
|
7392
|
+
You have an \`ask_choice\` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.
|
|
7393
|
+
|
|
7394
|
+
Call it when:
|
|
7395
|
+
- The user has asked for options / doesn't want a recommendation / wants to decide.
|
|
7396
|
+
- You've analyzed multiple approaches and the final call is theirs.
|
|
7397
|
+
- It's a preference fork you can't resolve without them (deployment target, team convention, taste).
|
|
7398
|
+
|
|
7399
|
+
Skip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).
|
|
7400
|
+
|
|
7401
|
+
Each option: short stable id (A/B/C), one-line title, optional summary. \`allowCustom: true\` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine ("I see three directions \u2014 letting you pick"); don't repeat the options in it. After the call, STOP.
|
|
7402
|
+
|
|
6973
7403
|
# Plan mode (/plan)
|
|
6974
7404
|
|
|
6975
7405
|
The user can ALSO enter "plan mode" via /plan, which is a stronger, explicit constraint:
|
|
@@ -7429,6 +7859,7 @@ export {
|
|
|
7429
7859
|
AppendOnlyLog,
|
|
7430
7860
|
CODE_SYSTEM_PROMPT,
|
|
7431
7861
|
CacheFirstLoop,
|
|
7862
|
+
ChoiceRequestedError,
|
|
7432
7863
|
DEFAULT_AT_MENTION_MAX_BYTES,
|
|
7433
7864
|
DEFAULT_MAX_RESULT_CHARS,
|
|
7434
7865
|
DEFAULT_MAX_RESULT_TOKENS,
|
|
@@ -7448,7 +7879,9 @@ export {
|
|
|
7448
7879
|
NeedsConfirmationError,
|
|
7449
7880
|
PROJECT_MEMORY_FILE,
|
|
7450
7881
|
PROJECT_MEMORY_MAX_CHARS,
|
|
7882
|
+
PlanCheckpointError,
|
|
7451
7883
|
PlanProposedError,
|
|
7884
|
+
PlanRevisionProposedError,
|
|
7452
7885
|
SessionStats,
|
|
7453
7886
|
SseTransport,
|
|
7454
7887
|
StdioTransport,
|
|
@@ -7538,6 +7971,7 @@ export {
|
|
|
7538
7971
|
readUsageLog,
|
|
7539
7972
|
recordFromLoopEvent,
|
|
7540
7973
|
redactKey,
|
|
7974
|
+
registerChoiceTool,
|
|
7541
7975
|
registerFilesystemTools,
|
|
7542
7976
|
registerMemoryTools,
|
|
7543
7977
|
registerPlanTool,
|