reasonix 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chunk-PKPWI33U.js → chunk-7546PPEL.js} +5 -31
- package/dist/cli/chunk-7546PPEL.js.map +1 -0
- package/dist/cli/index.js +1499 -1147
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{prompt-HNDDXDRH.js → prompt-XPEUBA46.js} +2 -2
- package/dist/index.d.ts +152 -2272
- package/dist/index.js +30 -418
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/cli/chunk-PKPWI33U.js.map +0 -1
- /package/dist/cli/{prompt-HNDDXDRH.js.map → prompt-XPEUBA46.js.map} +0 -0
package/dist/index.d.ts
CHANGED
|
@@ -1,20 +1,7 @@
|
|
|
1
1
|
import { SpawnOptions } from 'node:child_process';
|
|
2
2
|
import { WriteStream } from 'node:fs';
|
|
3
3
|
|
|
4
|
-
/**
|
|
5
|
-
* Retry layer for DeepSeek API calls.
|
|
6
|
-
*
|
|
7
|
-
* Wraps a `fetch` function so that transient failures (rate limiting, server
|
|
8
|
-
* overload, network blips) don't kill an agent session. We explicitly DO NOT
|
|
9
|
-
* retry:
|
|
10
|
-
* - 4xx client errors other than 408 / 429 (bad key, bad request, ...)
|
|
11
|
-
* - aborted requests (user cancelled)
|
|
12
|
-
* - mid-stream body read errors (retrying costs money AND would desync)
|
|
13
|
-
*
|
|
14
|
-
* Retrying is controlled by attempt count + exponential backoff with jitter.
|
|
15
|
-
* If the server sends a `Retry-After` header we honor it (capped by
|
|
16
|
-
* `maxBackoffMs` so a misconfigured upstream can't park us forever).
|
|
17
|
-
*/
|
|
4
|
+
/** No retry on aborts or mid-stream body errors — re-billing the user for desynced output is worse than failing. */
|
|
18
5
|
interface RetryOptions {
|
|
19
6
|
/** Maximum total attempts (including the first). Default 4. */
|
|
20
7
|
maxAttempts?: number;
|
|
@@ -69,13 +56,7 @@ interface ChatMessage {
|
|
|
69
56
|
name?: string;
|
|
70
57
|
tool_call_id?: string;
|
|
71
58
|
tool_calls?: ToolCall[];
|
|
72
|
-
/**
|
|
73
|
-
* R1 `reasoning_content` captured from the assistant's thinking turn.
|
|
74
|
-
* DeepSeek's thinking mode 400s with "reasoning_content in the
|
|
75
|
-
* thinking mode must be passed back" when a tool-loop continuation
|
|
76
|
-
* omits it from the preceding assistant message. Round-tripped for
|
|
77
|
-
* deepseek-reasoner turns with tool_calls; absent for deepseek-chat.
|
|
78
|
-
*/
|
|
59
|
+
/** Must round-trip in tool-loop continuations — thinking mode 400s without it. */
|
|
79
60
|
reasoning_content?: string | null;
|
|
80
61
|
}
|
|
81
62
|
interface RawUsage {
|
|
@@ -97,20 +78,7 @@ interface ChatRequestOptions {
|
|
|
97
78
|
responseFormat?: {
|
|
98
79
|
type: "json_object" | "text";
|
|
99
80
|
};
|
|
100
|
-
/**
|
|
101
|
-
* Explicitly toggle V4 thinking mode. Serialized as
|
|
102
|
-
* `extra_body.thinking.type = enabled|disabled`. Omit to let the
|
|
103
|
-
* server default apply (thinking enabled). Mainly used so the loop
|
|
104
|
-
* can pin the mode per model: `deepseek-chat` → disabled (legacy
|
|
105
|
-
* non-thinking compat), everything else → enabled.
|
|
106
|
-
*/
|
|
107
81
|
thinking?: "enabled" | "disabled";
|
|
108
|
-
/**
|
|
109
|
-
* Per-request reasoning-effort cap. Serialized as the top-level
|
|
110
|
-
* `reasoning_effort` field. DeepSeek accepts `high` (standard) or
|
|
111
|
-
* `max` (Agent-class, auto-applied to Claude-Code-style flows per
|
|
112
|
-
* the V4 docs). Reasonix pins `max` because every turn is agent-like.
|
|
113
|
-
*/
|
|
114
82
|
reasoningEffort?: "high" | "max";
|
|
115
83
|
}
|
|
116
84
|
|
|
@@ -144,13 +112,6 @@ interface StreamChunk {
|
|
|
144
112
|
finishReason?: string;
|
|
145
113
|
raw: any;
|
|
146
114
|
}
|
|
147
|
-
/**
|
|
148
|
-
* Response shape for DeepSeek's `/user/balance` endpoint. One entry
|
|
149
|
-
* per currency the account is funded in (typically CNY, sometimes
|
|
150
|
-
* USD). `total_balance` is the spendable figure; `granted_balance`
|
|
151
|
-
* counts promotional credits that expire, `topped_up_balance` is
|
|
152
|
-
* what the user paid for and keeps.
|
|
153
|
-
*/
|
|
154
115
|
interface BalanceInfo {
|
|
155
116
|
currency: string;
|
|
156
117
|
total_balance: string;
|
|
@@ -161,12 +122,6 @@ interface UserBalance {
|
|
|
161
122
|
is_available: boolean;
|
|
162
123
|
balance_infos: BalanceInfo[];
|
|
163
124
|
}
|
|
164
|
-
/**
|
|
165
|
-
* Response shape for DeepSeek's `/models` endpoint. Mirrors the OpenAI
|
|
166
|
-
* models list shape DeepSeek copied — `id` is the model name to pass to
|
|
167
|
-
* `/chat/completions`, `owned_by` is the provider string (always
|
|
168
|
-
* `"deepseek"` today).
|
|
169
|
-
*/
|
|
170
125
|
interface ModelInfo {
|
|
171
126
|
id: string;
|
|
172
127
|
object: "model";
|
|
@@ -192,22 +147,11 @@ declare class DeepSeekClient {
|
|
|
192
147
|
private readonly _fetch;
|
|
193
148
|
constructor(opts?: DeepSeekClientOptions);
|
|
194
149
|
private buildPayload;
|
|
195
|
-
/**
|
|
196
|
-
* Fetch the current DeepSeek account balance. Separate endpoint
|
|
197
|
-
* from chat completions, no billing impact. Returns null on any
|
|
198
|
-
* network/auth failure so callers can gate the balance display
|
|
199
|
-
* without a hard error — the rest of the session works regardless.
|
|
200
|
-
*/
|
|
150
|
+
/** Returns null on failure so callers can degrade — session must keep working without balance UI. */
|
|
201
151
|
getBalance(opts?: {
|
|
202
152
|
signal?: AbortSignal;
|
|
203
153
|
}): Promise<UserBalance | null>;
|
|
204
|
-
/**
|
|
205
|
-
* Fetch the model catalog DeepSeek currently exposes. Today this is
|
|
206
|
-
* `deepseek-chat` (V3) and `deepseek-reasoner` (R1), but querying is
|
|
207
|
-
* the only way to learn about new ones without a Reasonix release.
|
|
208
|
-
* Returns null on any network/auth failure so callers can degrade
|
|
209
|
-
* gracefully — e.g. `/models` falls back to the hardcoded hint.
|
|
210
|
-
*/
|
|
154
|
+
/** Returns null on failure — callers fall back to a hardcoded model hint. */
|
|
211
155
|
listModels(opts?: {
|
|
212
156
|
signal?: AbortSignal;
|
|
213
157
|
}): Promise<ModelList | null>;
|
|
@@ -215,19 +159,7 @@ declare class DeepSeekClient {
|
|
|
215
159
|
stream(opts: ChatRequestOptions): AsyncGenerator<StreamChunk>;
|
|
216
160
|
}
|
|
217
161
|
|
|
218
|
-
/**
|
|
219
|
-
* Pillar 2 — R1 Thought Harvesting.
|
|
220
|
-
*
|
|
221
|
-
* Takes the `reasoning_content` emitted by a thinking model (deepseek-reasoner
|
|
222
|
-
* / R1) and extracts a structured plan state by making a cheap secondary call
|
|
223
|
-
* to V3 in JSON mode. The typed state is intended for the orchestrator to
|
|
224
|
-
* branch on — e.g. trigger self-consistency sampling when `uncertainties.length
|
|
225
|
-
* > 2`, or surface the subgoals to the user.
|
|
226
|
-
*
|
|
227
|
-
* Opt-in: loops disable harvesting by default. Failures (bad JSON, API error,
|
|
228
|
-
* empty reasoning) return an empty TypedPlanState — the main turn is never
|
|
229
|
-
* aborted because of a harvest hiccup.
|
|
230
|
-
*/
|
|
162
|
+
/** Harvest failures return an empty state — main turn must never abort on a hiccup here. */
|
|
231
163
|
|
|
232
164
|
interface TypedPlanState {
|
|
233
165
|
subgoals: string[];
|
|
@@ -249,19 +181,7 @@ declare function emptyPlanState(): TypedPlanState;
|
|
|
249
181
|
declare function isPlanStateEmpty(s: TypedPlanState | null | undefined): boolean;
|
|
250
182
|
declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions, signal?: AbortSignal): Promise<TypedPlanState>;
|
|
251
183
|
|
|
252
|
-
/**
|
|
253
|
-
* Self-consistency branching.
|
|
254
|
-
*
|
|
255
|
-
* When enabled, the loop fans out into N parallel samples per turn (varied
|
|
256
|
-
* temperatures), runs Pillar 2 harvest on each, and selects the sample with
|
|
257
|
-
* the fewest flagged uncertainties (ties broken by answer length — a crude
|
|
258
|
-
* Occam prior).
|
|
259
|
-
*
|
|
260
|
-
* The unique opportunity here: because DeepSeek is ~20× cheaper than Claude,
|
|
261
|
-
* running N=3–5 samples per turn is still cheaper than a single Claude call,
|
|
262
|
-
* while the majority-confidence selection tends to dominate single-sample
|
|
263
|
-
* answers on fuzzy multi-step reasoning tasks.
|
|
264
|
-
*/
|
|
184
|
+
/** N parallel samples; selector picks fewest uncertainties with shorter-answer tie-break (Occam prior). */
|
|
265
185
|
|
|
266
186
|
interface BranchSample {
|
|
267
187
|
index: number;
|
|
@@ -279,10 +199,7 @@ interface BranchOptions {
|
|
|
279
199
|
harvestOptions?: HarvestOptions;
|
|
280
200
|
/** Custom selector. Default: min uncertainties, tie-break shortest answer. */
|
|
281
201
|
selector?: BranchSelector;
|
|
282
|
-
/**
|
|
283
|
-
* Fires as each sample finishes (main call + harvest both complete).
|
|
284
|
-
* Useful for progress UI. Not awaited; exceptions are swallowed.
|
|
285
|
-
*/
|
|
202
|
+
/** Not awaited; exceptions swallowed. Fires when sample's main + harvest both complete. */
|
|
286
203
|
onSampleDone?: (sample: BranchSample) => void;
|
|
287
204
|
}
|
|
288
205
|
interface BranchResult {
|
|
@@ -301,53 +218,13 @@ declare function aggregateBranchUsage(samples: readonly BranchSample[]): {
|
|
|
301
218
|
promptCacheMissTokens: number;
|
|
302
219
|
};
|
|
303
220
|
|
|
304
|
-
/**
|
|
305
|
-
* Hooks — user-defined automation that fires at well-known points in
|
|
306
|
-
* the agent loop. Mirrors the two-scope layout we use for memory and
|
|
307
|
-
* skills:
|
|
308
|
-
*
|
|
309
|
-
* - `<project>/.reasonix/settings.json` — committable per-project
|
|
310
|
-
* - `~/.reasonix/settings.json` — every session
|
|
311
|
-
*
|
|
312
|
-
* A hook is a shell command. We invoke it with stdin = a JSON
|
|
313
|
-
* payload describing the event, and interpret the exit code:
|
|
314
|
-
*
|
|
315
|
-
* - `0` — pass; loop continues normally
|
|
316
|
-
* - `2` — block; for `PreToolUse` / `UserPromptSubmit` the
|
|
317
|
-
* loop refuses to continue with that step and surfaces the
|
|
318
|
-
* hook's stderr as the reason. For `PostToolUse` / `Stop` block
|
|
319
|
-
* is meaningless (the action already happened) — treat as warn.
|
|
320
|
-
* - anything else — warn; loop continues but stderr is rendered
|
|
321
|
-
* to the user as an inline notice.
|
|
322
|
-
*
|
|
323
|
-
* stdin JSON shape (one envelope per event):
|
|
324
|
-
*
|
|
325
|
-
* {
|
|
326
|
-
* "event": "PreToolUse" | "PostToolUse" | "UserPromptSubmit" | "Stop",
|
|
327
|
-
* "cwd": "<absolute project root or process.cwd()>",
|
|
328
|
-
* "toolName": "<string>", // tool events only
|
|
329
|
-
* "toolArgs": <unknown>, // tool events only — already JSON-decoded
|
|
330
|
-
* "toolResult": "<string>", // PostToolUse only — same body the model sees
|
|
331
|
-
* "prompt": "<string>", // UserPromptSubmit only
|
|
332
|
-
* "lastAssistantText": "<string>", // Stop only
|
|
333
|
-
* "turn": <number>, // Stop only
|
|
334
|
-
* }
|
|
335
|
-
*
|
|
336
|
-
* Hooks are executed in order: project scope first, then global.
|
|
337
|
-
* `Pre*` events stop dispatching at the first block; non-block
|
|
338
|
-
* outcomes accumulate into a single report so the UI can render
|
|
339
|
-
* each warning inline.
|
|
340
|
-
*/
|
|
221
|
+
/** Shell-command hooks; project scope first, then global. Exit 0=pass, 2=block on Pre*, other=warn. */
|
|
341
222
|
type HookEvent = "PreToolUse" | "PostToolUse" | "UserPromptSubmit" | "Stop";
|
|
342
223
|
/** All four events as a const array — drives slash listing + validation. */
|
|
343
224
|
declare const HOOK_EVENTS: readonly HookEvent[];
|
|
344
225
|
type HookScope = "project" | "global";
|
|
345
226
|
interface HookConfig {
|
|
346
|
-
/**
|
|
347
|
-
* Tool-name pattern (PreToolUse / PostToolUse only). Anchored regex.
|
|
348
|
-
* Omitted or `"*"` matches every tool. Ignored for prompt / Stop
|
|
349
|
-
* events (they have no tool name to match against).
|
|
350
|
-
*/
|
|
227
|
+
/** Anchored regex; `"*"` / omitted = every tool. Pre/PostToolUse only. */
|
|
351
228
|
match?: string;
|
|
352
229
|
/** Shell command to run. Spawned through the platform shell. */
|
|
353
230
|
command: string;
|
|
@@ -355,11 +232,7 @@ interface HookConfig {
|
|
|
355
232
|
description?: string;
|
|
356
233
|
/** Per-hook timeout override in ms. */
|
|
357
234
|
timeout?: number;
|
|
358
|
-
/**
|
|
359
|
-
* Working directory for the spawned process. Defaults to:
|
|
360
|
-
* - project scope → the project root
|
|
361
|
-
* - global scope → process.cwd()
|
|
362
|
-
*/
|
|
235
|
+
/** Defaults: project scope → project root; global scope → process.cwd(). */
|
|
363
236
|
cwd?: string;
|
|
364
237
|
}
|
|
365
238
|
/** Shape of `<scope>/.reasonix/settings.json` — only `hooks` for now. */
|
|
@@ -377,14 +250,7 @@ interface ResolvedHook extends HookConfig {
|
|
|
377
250
|
interface HookOutcome {
|
|
378
251
|
/** Which hook fired. */
|
|
379
252
|
hook: ResolvedHook;
|
|
380
|
-
/**
|
|
381
|
-
* Decision:
|
|
382
|
-
* - `pass` — exit 0
|
|
383
|
-
* - `block` — exit 2 on a blocking event (otherwise downgraded to `warn`)
|
|
384
|
-
* - `warn` — non-zero exit that is not a successful block
|
|
385
|
-
* - `timeout` — the spawn was killed past `timeout`
|
|
386
|
-
* - `error` — could not spawn at all (missing command, etc.)
|
|
387
|
-
*/
|
|
253
|
+
/** pass=exit 0; block=exit 2 on blocking event; warn=other non-zero; timeout=killed; error=spawn failed. */
|
|
388
254
|
decision: "pass" | "block" | "warn" | "timeout" | "error";
|
|
389
255
|
exitCode: number | null;
|
|
390
256
|
/** Captured stdout (trimmed). May be empty. */
|
|
@@ -392,12 +258,7 @@ interface HookOutcome {
|
|
|
392
258
|
/** Captured stderr (trimmed). The block / warn message comes from here. */
|
|
393
259
|
stderr: string;
|
|
394
260
|
durationMs: number;
|
|
395
|
-
/**
|
|
396
|
-
* True when stdout or stderr crossed the per-stream byte cap and was
|
|
397
|
-
* truncated. The hook still completed; the loop just sees a clipped
|
|
398
|
-
* view of its output. Surfaced via `formatHookOutcomeMessage` so the
|
|
399
|
-
* user knows their script wrote more than Reasonix kept.
|
|
400
|
-
*/
|
|
261
|
+
/** Output crossed the per-stream byte cap; surfaced so user knows we kept less than the script wrote. */
|
|
401
262
|
truncated?: boolean;
|
|
402
263
|
}
|
|
403
264
|
/** Aggregate report for `runHooks`. */
|
|
@@ -413,16 +274,7 @@ declare const HOOK_SETTINGS_DIRNAME = ".reasonix";
|
|
|
413
274
|
declare function globalSettingsPath(homeDirOverride?: string): string;
|
|
414
275
|
/** Where the project settings.json lives for a given root. */
|
|
415
276
|
declare function projectSettingsPath(projectRoot: string): string;
|
|
416
|
-
/**
|
|
417
|
-
* Pull every configured hook out of the project + global settings
|
|
418
|
-
* files, in the order they should fire (project first, global second,
|
|
419
|
-
* within each scope: array order from the file).
|
|
420
|
-
*
|
|
421
|
-
* Returns a flat list — the dispatcher filters by event + match
|
|
422
|
-
* pattern at run time. Loading is cheap (one or two JSON files), so
|
|
423
|
-
* we don't memoize across processes; re-load is allowed via
|
|
424
|
-
* `/hooks reload` and on every fresh App mount.
|
|
425
|
-
*/
|
|
277
|
+
/** Project hooks fire before global; within a scope, array order. */
|
|
426
278
|
interface LoadHookSettingsOptions {
|
|
427
279
|
/** Absolute project root, if any. Without it, only global hooks load. */
|
|
428
280
|
projectRoot?: string;
|
|
@@ -430,12 +282,7 @@ interface LoadHookSettingsOptions {
|
|
|
430
282
|
homeDir?: string;
|
|
431
283
|
}
|
|
432
284
|
declare function loadHooks(opts?: LoadHookSettingsOptions): ResolvedHook[];
|
|
433
|
-
/**
|
|
434
|
-
* True if `toolName` matches the hook's `match` field. `"*"` and
|
|
435
|
-
* undefined match everything. Otherwise we anchor the field as a
|
|
436
|
-
* regex — partial-name matches don't fire, so `"file"` would not
|
|
437
|
-
* trigger on `read_file` (use `".*file"` for that).
|
|
438
|
-
*/
|
|
285
|
+
/** Match field is an ANCHORED regex — `"file"` won't trigger on `read_file`; use `".*file"`. */
|
|
439
286
|
declare function matchesTool(hook: ResolvedHook, toolName: string): boolean;
|
|
440
287
|
/** Payload envelope passed to hook stdin. */
|
|
441
288
|
interface HookPayload {
|
|
@@ -462,27 +309,11 @@ interface HookSpawnResult {
|
|
|
462
309
|
timedOut: boolean;
|
|
463
310
|
/** True iff spawn() itself failed (ENOENT, EACCES, …). */
|
|
464
311
|
spawnError?: Error;
|
|
465
|
-
/**
|
|
466
|
-
* True iff stdout or stderr was capped at the byte limit. The hook
|
|
467
|
-
* still ran to completion / timeout, but downstream consumers see a
|
|
468
|
-
* truncated view of its output. Surface this in the UI so a hook
|
|
469
|
-
* author who relies on long output knows the loop didn't see all
|
|
470
|
-
* of it.
|
|
471
|
-
*/
|
|
312
|
+
/** Output capped at byte limit — hook ran to completion but consumers see clipped view. */
|
|
472
313
|
truncated?: boolean;
|
|
473
314
|
}
|
|
474
315
|
type HookSpawner = (input: HookSpawnInput) => Promise<HookSpawnResult>;
|
|
475
|
-
/**
|
|
476
|
-
* Format a hook outcome as a single-line UI string. Used by both the
|
|
477
|
-
* loop (for `warning` events) and the App (for UserPromptSubmit /
|
|
478
|
-
* Stop outcomes). Centralizing keeps the language consistent across
|
|
479
|
-
* scopes.
|
|
480
|
-
*/
|
|
481
316
|
declare function formatHookOutcomeMessage(outcome: HookOutcome): string;
|
|
482
|
-
/**
|
|
483
|
-
* Decide the hook's outcome decision from raw spawn results.
|
|
484
|
-
* Pulled out as a pure function so tests can pin the matrix.
|
|
485
|
-
*/
|
|
486
317
|
declare function decideOutcome(event: HookEvent, raw: HookSpawnResult): "pass" | "block" | "warn" | "timeout" | "error";
|
|
487
318
|
interface RunHooksOptions {
|
|
488
319
|
payload: HookPayload;
|
|
@@ -490,13 +321,7 @@ interface RunHooksOptions {
|
|
|
490
321
|
/** Test seam — defaults to a real `spawn`. */
|
|
491
322
|
spawner?: HookSpawner;
|
|
492
323
|
}
|
|
493
|
-
/**
|
|
494
|
-
* Filter hooks down to the ones that match `payload.event` (and
|
|
495
|
-
* `payload.toolName`, for tool events), then run them in order.
|
|
496
|
-
* Stops at the first `block` outcome on a blocking event so a
|
|
497
|
-
* gating hook can prevent later hooks from incorrectly seeing a
|
|
498
|
-
* success that wasn't going to happen.
|
|
499
|
-
*/
|
|
324
|
+
/** Stops at first `block` so a gating hook can prevent later hooks running against a phantom success. */
|
|
500
325
|
declare function runHooks(opts: RunHooksOptions): Promise<HookReport>;
|
|
501
326
|
|
|
502
327
|
interface ImmutablePrefixOptions {
|
|
@@ -506,54 +331,18 @@ interface ImmutablePrefixOptions {
|
|
|
506
331
|
}
|
|
507
332
|
declare class ImmutablePrefix {
|
|
508
333
|
readonly system: string;
|
|
509
|
-
/**
|
|
510
|
-
* Backing array for `toolSpecs`. Originally `Object.freeze`d at
|
|
511
|
-
* construction (hence the class name) — but `addTool` now lets the
|
|
512
|
-
* dashboard register `semantic_search` after a mid-session
|
|
513
|
-
* `reasonix index` build without forcing the user to restart. Each
|
|
514
|
-
* add is documented to cost one cache-miss turn (the cached prefix
|
|
515
|
-
* on DeepSeek's side is keyed by the full tool list); subsequent
|
|
516
|
-
* turns re-cache against the new shape.
|
|
517
|
-
*/
|
|
334
|
+
/** Each `addTool` costs one cache-miss turn — DeepSeek's prefix cache is keyed by full tool list. */
|
|
518
335
|
private _toolSpecs;
|
|
519
336
|
readonly fewShots: readonly ChatMessage[];
|
|
520
|
-
/**
|
|
521
|
-
* Cached SHA-256 of the prefix payload. Computed lazily on first
|
|
522
|
-
* `fingerprint` access, invalidated only by mutations that go
|
|
523
|
-
* through `addTool` (the one legitimate post-construction mutation
|
|
524
|
-
* path). The TUI reads `fingerprint` on every render — without the
|
|
525
|
-
* cache, that means a fresh `JSON.stringify` + sha256 over the
|
|
526
|
-
* full prefix (system prompt + tools list + few-shots, typically
|
|
527
|
-
* 5-10KB) on every keystroke.
|
|
528
|
-
*
|
|
529
|
-
* The lazy-init also acts as a cheap drift guard: if some future
|
|
530
|
-
* code path mutates `_toolSpecs` directly without going through
|
|
531
|
-
* `addTool`, `fingerprint` will return the stale cached value
|
|
532
|
-
* while the actual prefix sent to DeepSeek diverges — the cache
|
|
533
|
-
* miss would be the first symptom. {@link verifyFingerprint}
|
|
534
|
-
* lets dev / test code assert the cache matches reality.
|
|
535
|
-
*/
|
|
337
|
+
/** Invalidated only via `addTool`; bypassing it leaves cache stale → fingerprint diverges from sent prefix. */
|
|
536
338
|
private _fingerprintCache;
|
|
537
339
|
constructor(opts: ImmutablePrefixOptions);
|
|
538
340
|
get toolSpecs(): readonly ToolSpec[];
|
|
539
341
|
toMessages(): ChatMessage[];
|
|
540
342
|
tools(): ToolSpec[];
|
|
541
|
-
/**
|
|
542
|
-
* Add a tool spec to the prefix. Returns `true` if added, `false`
|
|
543
|
-
* if a tool with the same name was already present (callers can
|
|
544
|
-
* decide whether to ignore or surface the no-op). The model picks
|
|
545
|
-
* up the new tool on the next turn after the cache busts once.
|
|
546
|
-
*/
|
|
547
343
|
addTool(spec: ToolSpec): boolean;
|
|
548
344
|
get fingerprint(): string;
|
|
549
|
-
/**
|
|
550
|
-
* Recompute the fingerprint from scratch and assert it matches the
|
|
551
|
-
* cached value. Returns the freshly-computed hash on success; throws
|
|
552
|
-
* with a diff if the cache drifted, which always indicates a bug —
|
|
553
|
-
* either a non-`addTool` mutation path was added, or `addTool`
|
|
554
|
-
* forgot to invalidate the cache. Dev / test only; the live loop
|
|
555
|
-
* doesn't call this on the hot path.
|
|
556
|
-
*/
|
|
345
|
+
/** Dev/test only — throws on cache drift, which always means a non-`addTool` mutation slipped in. */
|
|
557
346
|
verifyFingerprint(): string;
|
|
558
347
|
private computeFingerprint;
|
|
559
348
|
}
|
|
@@ -561,13 +350,7 @@ declare class AppendOnlyLog {
|
|
|
561
350
|
private _entries;
|
|
562
351
|
append(message: ChatMessage): void;
|
|
563
352
|
extend(messages: ChatMessage[]): void;
|
|
564
|
-
/**
|
|
565
|
-
* Bulk-replace entries. Intentionally named to be hard to reach for —
|
|
566
|
-
* this is the one mutation path that breaks the log's append-only
|
|
567
|
-
* spirit, reserved for compaction flows (`/compact`) and recovery
|
|
568
|
-
* where the caller has consciously decided to drop old history. Any
|
|
569
|
-
* other use is almost certainly wrong; append() is what you want.
|
|
570
|
-
*/
|
|
353
|
+
/** The one append-only-breaking path — reserved for `/compact` + recovery. Use `append()` otherwise. */
|
|
571
354
|
compactInPlace(replacement: ChatMessage[]): void;
|
|
572
355
|
get entries(): readonly ChatMessage[];
|
|
573
356
|
toMessages(): ChatMessage[];
|
|
@@ -580,33 +363,9 @@ declare class VolatileScratch {
|
|
|
580
363
|
reset(): void;
|
|
581
364
|
}
|
|
582
365
|
|
|
583
|
-
/**
|
|
584
|
-
* Predicate the breaker consults to decide whether a call mutates state.
|
|
585
|
-
* Mutating calls clear the recent-args buffer: re-reading a file after
|
|
586
|
-
* `edit_file` shouldn't count as "saw the same args before" — the file
|
|
587
|
-
* legitimately changed. Wire this from the caller using whatever source
|
|
588
|
-
* of truth is appropriate (e.g. the ToolRegistry's `readOnly` /
|
|
589
|
-
* `readOnlyCheck` flags). When undefined, every call is tracked the
|
|
590
|
-
* old way — preserves the original behavior for callers that don't
|
|
591
|
-
* thread a registry through.
|
|
592
|
-
*/
|
|
366
|
+
/** Mutating calls clear prior read-only entries so a post-edit re-read isn't flagged as repeat. */
|
|
593
367
|
type IsMutating = (call: ToolCall) => boolean;
|
|
594
|
-
/**
|
|
595
|
-
* Call-storm breaker.
|
|
596
|
-
*
|
|
597
|
-
* Detects (tool, args) tuples repeating within a sliding window and suppresses
|
|
598
|
-
* the offending call. Surfaces a synthetic tool_result advising the model to
|
|
599
|
-
* change strategy on its next turn.
|
|
600
|
-
*
|
|
601
|
-
* Buffer entries are tagged read-only vs mutating. When a mutating call
|
|
602
|
-
* runs, the breaker drops prior read-only entries — a re-read of the
|
|
603
|
-
* same path after `edit_file` is fresh, not a repeat. Mutating calls
|
|
604
|
-
* still count among themselves, so a model looping on identical
|
|
605
|
-
* `edit_file` invocations still trips on the threshold.
|
|
606
|
-
*
|
|
607
|
-
* Without an `isMutating` predicate everything is tracked the same way
|
|
608
|
-
* (back-compat for callers that don't thread a registry through).
|
|
609
|
-
*/
|
|
368
|
+
/** Tracks (name, args) repeats; mutating calls clear prior read-only entries while still counting amongst themselves. */
|
|
610
369
|
declare class StormBreaker {
|
|
611
370
|
private readonly windowSize;
|
|
612
371
|
private readonly threshold;
|
|
@@ -620,16 +379,7 @@ declare class StormBreaker {
|
|
|
620
379
|
reset(): void;
|
|
621
380
|
}
|
|
622
381
|
|
|
623
|
-
/**
|
|
624
|
-
* Schema flattening for DeepSeek tool calls.
|
|
625
|
-
*
|
|
626
|
-
* DeepSeek loses arguments on schemas that are deep (>2 levels of nesting) or
|
|
627
|
-
* wide (>10 leaf parameters). This module transforms such schemas into a
|
|
628
|
-
* dot-notation flat schema and re-nests the model's arguments before dispatch.
|
|
629
|
-
*
|
|
630
|
-
* Example:
|
|
631
|
-
* { user: { profile: { name, age } } } ⇄ "user.profile.name", "user.profile.age"
|
|
632
|
-
*/
|
|
382
|
+
/** DeepSeek drops args on schemas >2 levels deep or >10 leaves; flatten to dot-paths and re-nest after dispatch. */
|
|
633
383
|
|
|
634
384
|
interface FlattenDecision {
|
|
635
385
|
shouldFlatten: boolean;
|
|
@@ -640,14 +390,7 @@ declare function analyzeSchema(schema: JSONSchema | undefined): FlattenDecision;
|
|
|
640
390
|
declare function flattenSchema(schema: JSONSchema): JSONSchema;
|
|
641
391
|
declare function nestArguments(flatArgs: Record<string, unknown>): Record<string, unknown>;
|
|
642
392
|
|
|
643
|
-
/**
|
|
644
|
-
* Truncation recovery for tool-call argument JSON cut off mid-structure
|
|
645
|
-
* (typically when the model hits max_tokens before finishing the JSON object).
|
|
646
|
-
*
|
|
647
|
-
* Strategy is purely local: balance braces, close strings, fill missing values
|
|
648
|
-
* with `null`. We deliberately do NOT make a continuation API call here — that
|
|
649
|
-
* decision belongs to the loop, which knows about budgets.
|
|
650
|
-
*/
|
|
393
|
+
/** Local-only repair (balance braces, close strings, fill nulls); continuation calls belong to the loop, which owns budgets. */
|
|
651
394
|
interface TruncationRepairResult {
|
|
652
395
|
repaired: string;
|
|
653
396
|
changed: boolean;
|
|
@@ -655,14 +398,7 @@ interface TruncationRepairResult {
|
|
|
655
398
|
}
|
|
656
399
|
declare function repairTruncatedJson(input: string): TruncationRepairResult;
|
|
657
400
|
|
|
658
|
-
/**
|
|
659
|
-
* Scavenge tool calls leaked into reasoning_content.
|
|
660
|
-
*
|
|
661
|
-
* R1 sometimes emits tool-call JSON inside <think>…</think> and then forgets
|
|
662
|
-
* to surface it in `tool_calls`. This pass extracts plausible calls and
|
|
663
|
-
* proposes them to the loop, which decides whether to merge them with the
|
|
664
|
-
* declared calls.
|
|
665
|
-
*/
|
|
401
|
+
/** R1 sometimes emits tool-call JSON inside reasoning_content and forgets `tool_calls`; recover those calls. */
|
|
666
402
|
|
|
667
403
|
interface ScavengeOptions {
|
|
668
404
|
/** Names of tools the model may legitimately call. Other names are ignored. */
|
|
@@ -676,17 +412,7 @@ interface ScavengeResult {
|
|
|
676
412
|
}
|
|
677
413
|
declare function scavengeToolCalls(reasoningContent: string | null | undefined, opts: ScavengeOptions): ScavengeResult;
|
|
678
414
|
|
|
679
|
-
/**
|
|
680
|
-
* Pillar 3 — Tool-Call Repair pipeline.
|
|
681
|
-
*
|
|
682
|
-
* Order of passes per turn:
|
|
683
|
-
* 1. scavenge — recover tool calls leaked into <think>
|
|
684
|
-
* 2. truncation — close any half-emitted argument JSON
|
|
685
|
-
* 3. storm breaker — drop call-storm repeats
|
|
686
|
-
*
|
|
687
|
-
* Schema flattening is applied during loop construction (it changes what we
|
|
688
|
-
* advertise to the model), not per-turn.
|
|
689
|
-
*/
|
|
415
|
+
/** Pass order: scavenge → truncation → storm. Schema flatten runs at loop construction, not per-turn. */
|
|
690
416
|
|
|
691
417
|
interface RepairReport {
|
|
692
418
|
scavenged: number;
|
|
@@ -699,26 +425,14 @@ interface ToolCallRepairOptions {
|
|
|
699
425
|
stormWindow?: number;
|
|
700
426
|
stormThreshold?: number;
|
|
701
427
|
maxScavenge?: number;
|
|
702
|
-
/**
|
|
703
|
-
* Optional predicate the storm breaker consults to identify state-
|
|
704
|
-
* changing calls — those clear the sliding window so a post-edit
|
|
705
|
-
* verify-read isn't mistaken for a repeat. Production callers wire
|
|
706
|
-
* this off the ToolRegistry's `readOnly` / `readOnlyCheck` flags;
|
|
707
|
-
* tests that don't supply it keep the original behavior.
|
|
708
|
-
*/
|
|
428
|
+
/** Mutating calls clear the storm window so a post-edit verify-read isn't seen as a repeat. */
|
|
709
429
|
isMutating?: IsMutating;
|
|
710
430
|
}
|
|
711
431
|
declare class ToolCallRepair {
|
|
712
432
|
private readonly storm;
|
|
713
433
|
private readonly opts;
|
|
714
434
|
constructor(opts: ToolCallRepairOptions);
|
|
715
|
-
/**
|
|
716
|
-
* Drop the StormBreaker's sliding window of recent (name, args)
|
|
717
|
-
* signatures. Called at the start of every user turn — a fresh user
|
|
718
|
-
* message is a new intent, so carrying old repetition state into it
|
|
719
|
-
* would turn a valid "try again with different input" flow into a
|
|
720
|
-
* false-positive block.
|
|
721
|
-
*/
|
|
435
|
+
/** Called at start of every user turn — fresh intent shouldn't inherit old repetition state. */
|
|
722
436
|
resetStorm(): void;
|
|
723
437
|
process(declaredCalls: ToolCall[], reasoningContent: string | null, content?: string | null): {
|
|
724
438
|
calls: ToolCall[];
|
|
@@ -742,11 +456,6 @@ interface TurnStats {
|
|
|
742
456
|
interface SessionSummary {
|
|
743
457
|
turns: number;
|
|
744
458
|
totalCostUsd: number;
|
|
745
|
-
/**
|
|
746
|
-
* Input-side (prompt) cost aggregated across the session. Split
|
|
747
|
-
* from totalCostUsd so the panel can render "cost $X (in $Y · out
|
|
748
|
-
* $Z)" — users asked for visibility into where the spend lands.
|
|
749
|
-
*/
|
|
750
459
|
totalInputCostUsd: number;
|
|
751
460
|
/** Output-side (completion) cost aggregated across the session. */
|
|
752
461
|
totalOutputCostUsd: number;
|
|
@@ -755,19 +464,8 @@ interface SessionSummary {
|
|
|
755
464
|
/** @deprecated. Same as claudeEquivalentUsd — synthetic ratio, not a real measurement. */
|
|
756
465
|
savingsVsClaudePct: number;
|
|
757
466
|
cacheHitRatio: number;
|
|
758
|
-
/**
|
|
759
|
-
* Most recent turn's prompt-token count. Used by the TUI's context
|
|
760
|
-
* gauge: we can't know the next call's cost without making it, but
|
|
761
|
-
* the last turn's prompt tokens is the floor (next call is last
|
|
762
|
-
* prompt + user delta + any new tool outputs).
|
|
763
|
-
*/
|
|
467
|
+
/** Floor estimate for next call — actual cost = this + user delta + new tool outputs. */
|
|
764
468
|
lastPromptTokens: number;
|
|
765
|
-
/**
|
|
766
|
-
* Most recent turn's USD cost. Complements `totalCostUsd` so the TUI
|
|
767
|
-
* can render "this turn: $X · session: $Y" — users asked for a
|
|
768
|
-
* per-turn signal so a mid-session jump from flash to pro is
|
|
769
|
-
* immediately visible, not hidden inside the session aggregate.
|
|
770
|
-
*/
|
|
771
469
|
lastTurnCostUsd: number;
|
|
772
470
|
}
|
|
773
471
|
declare class SessionStats {
|
|
@@ -782,14 +480,6 @@ declare class SessionStats {
|
|
|
782
480
|
summary(): SessionSummary;
|
|
783
481
|
}
|
|
784
482
|
|
|
785
|
-
/**
|
|
786
|
-
* Per-call context a tool `fn` can optionally consume. Today the only
|
|
787
|
-
* field is `signal`, plumbed through so long-running tools (MCP calls,
|
|
788
|
-
* HTTP requests) can abort when the user presses Esc. Omitted fields
|
|
789
|
-
* stay optional — tools written against the pre-0.4.9 signature keep
|
|
790
|
-
* working; they just ignore cancellation, which is fine for fast
|
|
791
|
-
* local work where "await finishes" happens before the next tick anyway.
|
|
792
|
-
*/
|
|
793
483
|
interface ToolCallContext {
|
|
794
484
|
signal?: AbortSignal;
|
|
795
485
|
}
|
|
@@ -797,74 +487,29 @@ interface ToolDefinition<A = any, R = any> {
|
|
|
797
487
|
name: string;
|
|
798
488
|
description?: string;
|
|
799
489
|
parameters?: JSONSchema;
|
|
800
|
-
/**
|
|
801
|
-
* Marks a tool as read-only: safe to invoke during plan mode. `true`
|
|
802
|
-
* for tools that only observe (read_file, list_directory, search, web
|
|
803
|
-
* fetch/search). Leave undefined / `false` for anything that can write,
|
|
804
|
-
* execute, or mutate state.
|
|
805
|
-
*
|
|
806
|
-
* The registry enforces this at dispatch: non-readonly tools called
|
|
807
|
-
* while `planMode` is on return a refusal string the model can
|
|
808
|
-
* learn from, instead of actually running.
|
|
809
|
-
*/
|
|
490
|
+
/** Safe in plan mode — registry refuses non-readonly calls when `planMode` is on. */
|
|
810
491
|
readOnly?: boolean;
|
|
811
|
-
/**
|
|
812
|
-
* Dynamic read-only check for tools whose safety depends on arguments
|
|
813
|
-
* — `run_command` with an allowlisted argv is safe, `run_command
|
|
814
|
-
* rm -rf` isn't. Called with the parsed arguments; `true` means "treat
|
|
815
|
-
* as read-only for plan mode". Takes precedence over `readOnly` when
|
|
816
|
-
* both are set.
|
|
817
|
-
*/
|
|
492
|
+
/** Per-args check; takes precedence over `readOnly`. e.g. `run_command` + allowlisted argv. */
|
|
818
493
|
readOnlyCheck?: (args: A) => boolean;
|
|
819
494
|
fn: (args: A, ctx?: ToolCallContext) => R | Promise<R>;
|
|
820
495
|
}
|
|
821
496
|
interface ToolRegistryOptions {
|
|
822
|
-
/**
|
|
823
|
-
* Auto-flatten schemas that exceed depth/width thresholds before sending
|
|
824
|
-
* them to the model. Re-nests arguments transparently on dispatch.
|
|
825
|
-
* Default: true. Pass false to opt out.
|
|
826
|
-
*/
|
|
497
|
+
/** Auto-flatten + re-nest at dispatch; default true. */
|
|
827
498
|
autoFlatten?: boolean;
|
|
828
499
|
}
|
|
829
|
-
/**
|
|
830
|
-
* Callback form for `setToolInterceptor` — receives the tool name and
|
|
831
|
-
* already-parsed arguments; returns a string to short-circuit dispatch
|
|
832
|
-
* (the returned value becomes the tool result the model sees), or
|
|
833
|
-
* `null` / `undefined` to fall through to the registered tool fn.
|
|
834
|
-
*
|
|
835
|
-
* Used by `reasonix code`'s edit-mode gate: `edit_file` / `write_file`
|
|
836
|
-
* are intercepted in "review" mode (queued into pendingEdits, returning
|
|
837
|
-
* "queued for /apply") or handled inline in "auto" mode (snapshot +
|
|
838
|
-
* apply, then surface an undo banner). Other tools pass through.
|
|
839
|
-
*/
|
|
500
|
+
/** String return short-circuits dispatch; null/undefined falls through to the tool fn. */
|
|
840
501
|
type ToolInterceptor = (name: string, args: Record<string, unknown>) => string | null | undefined | Promise<string | null | undefined>;
|
|
841
502
|
declare class ToolRegistry {
|
|
842
503
|
private readonly _tools;
|
|
843
504
|
private readonly _autoFlatten;
|
|
844
|
-
/**
|
|
845
|
-
* When true, `dispatch` refuses any tool whose `readOnly` flag isn't
|
|
846
|
-
* set (and whose `readOnlyCheck` doesn't pass on the specific args).
|
|
847
|
-
* Drives `reasonix code`'s Plan Mode — the model can still explore
|
|
848
|
-
* via read tools but its writes and non-allowlisted shell calls are
|
|
849
|
-
* bounced until the user approves a submitted plan.
|
|
850
|
-
*/
|
|
851
505
|
private _planMode;
|
|
852
|
-
/**
|
|
853
|
-
* Optional hook run after arg parsing but before tool.fn. Lets the TUI
|
|
854
|
-
* reroute specific tool calls (e.g. edit_file in review mode) without
|
|
855
|
-
* modifying the tool definitions themselves.
|
|
856
|
-
*/
|
|
857
506
|
private _interceptor;
|
|
858
507
|
constructor(opts?: ToolRegistryOptions);
|
|
859
508
|
/** Enable / disable plan-mode enforcement at dispatch. */
|
|
860
509
|
setPlanMode(on: boolean): void;
|
|
861
510
|
/** True when the registry is currently refusing non-readonly calls. */
|
|
862
511
|
get planMode(): boolean;
|
|
863
|
-
/**
|
|
864
|
-
* Install or clear the dispatch interceptor. At most one interceptor
|
|
865
|
-
* is active at a time — calling twice replaces the previous. Pass
|
|
866
|
-
* `null` to remove.
|
|
867
|
-
*/
|
|
512
|
+
/** At most one interceptor active; calling twice replaces. */
|
|
868
513
|
setToolInterceptor(fn: ToolInterceptor | null): void;
|
|
869
514
|
register<A, R>(def: ToolDefinition<A, R>): this;
|
|
870
515
|
has(name: string): boolean;
|
|
@@ -881,29 +526,11 @@ declare class ToolRegistry {
|
|
|
881
526
|
}
|
|
882
527
|
|
|
883
528
|
type EventRole = "assistant_delta" | "assistant_final"
|
|
884
|
-
/**
|
|
885
|
-
* Emitted as `tool_calls[].function.arguments` streams in. A tool
|
|
886
|
-
* call with a large arguments payload produces no `content` or
|
|
887
|
-
* `reasoning_content` bytes — this is the only signal the UI has
|
|
888
|
-
* that the stream is alive during that window.
|
|
889
|
-
*/
|
|
529
|
+
/** Only liveness signal during a large-args tool call (no content/reasoning bytes). */
|
|
890
530
|
| "tool_call_delta"
|
|
891
|
-
/**
|
|
892
|
-
* Yielded immediately before a tool is dispatched. Lets the TUI put
|
|
893
|
-
* up a "▸ tool<X> running…" spinner while the tool's Promise is
|
|
894
|
-
* pending — otherwise the UI looks frozen whenever a tool call
|
|
895
|
-
* takes more than a few hundred ms (a big `filesystem_edit_file`
|
|
896
|
-
* is a typical trigger).
|
|
897
|
-
*/
|
|
531
|
+
/** Pre-dispatch ping so the TUI can show a spinner during long tool awaits. */
|
|
898
532
|
| "tool_start" | "tool" | "done" | "error" | "warning"
|
|
899
|
-
/**
|
|
900
|
-
* Transient "what's happening right now" indicator. Emitted during
|
|
901
|
-
* silent phases — between a tool result and the next iteration's
|
|
902
|
-
* first streaming byte, and right before harvest — so the TUI can
|
|
903
|
-
* show a spinner with explanatory text instead of looking frozen.
|
|
904
|
-
* The UI clears it on the next primary event (assistant_delta,
|
|
905
|
-
* tool_start, tool, assistant_final, error).
|
|
906
|
-
*/
|
|
533
|
+
/** Transient indicator for silent phases; UI clears on next primary event. */
|
|
907
534
|
| "status" | "branch_start" | "branch_progress" | "branch_done";
|
|
908
535
|
interface BranchSummary {
|
|
909
536
|
budget: number;
|
|
@@ -924,26 +551,13 @@ interface LoopEvent {
|
|
|
924
551
|
content: string;
|
|
925
552
|
reasoningDelta?: string;
|
|
926
553
|
toolName?: string;
|
|
927
|
-
/**
|
|
928
|
-
* Raw JSON-string arguments the model sent for a tool call (role === "tool").
|
|
929
|
-
* Populated so transcripts can persist *why* a tool was called, not just
|
|
930
|
-
* what it returned. Needed by `reasonix diff` to explain divergences.
|
|
931
|
-
*/
|
|
554
|
+
/** Raw args JSON — needed by `reasonix diff` to explain why a tool was called. */
|
|
932
555
|
toolArgs?: string;
|
|
933
556
|
/** Cumulative arguments-string length for `role === "tool_call_delta"`. */
|
|
934
557
|
toolCallArgsChars?: number;
|
|
935
|
-
/**
|
|
936
|
-
* Zero-based index of the tool call this delta belongs to. Surfaces
|
|
937
|
-
* multi-tool turns: on a response emitting 4 write_file calls the UI
|
|
938
|
-
* can show "building call 3/?" instead of a context-free spinner.
|
|
939
|
-
*/
|
|
558
|
+
/** Zero-based index of the tool call this delta belongs to (multi-tool progress). */
|
|
940
559
|
toolCallIndex?: number;
|
|
941
|
-
/**
|
|
942
|
-
* Count of prior tool calls (this turn) whose arguments have finished
|
|
943
|
-
* streaming into valid JSON. Not all ready calls have been dispatched
|
|
944
|
-
* yet — dispatch still happens post-stream — but the user gets "2
|
|
945
|
-
* ready" progress feedback while later calls keep streaming.
|
|
946
|
-
*/
|
|
560
|
+
/** Count of tool calls whose args have parsed as valid JSON (UI progress, not dispatch gate). */
|
|
947
561
|
toolCallReadyCount?: number;
|
|
948
562
|
stats?: TurnStats;
|
|
949
563
|
planState?: TypedPlanState;
|
|
@@ -951,15 +565,7 @@ interface LoopEvent {
|
|
|
951
565
|
branch?: BranchSummary;
|
|
952
566
|
branchProgress?: BranchProgress;
|
|
953
567
|
error?: string;
|
|
954
|
-
/**
|
|
955
|
-
* True on `assistant_final` events emitted by the no-tools fallback
|
|
956
|
-
* when the loop hit its budget, was aborted, or tripped the
|
|
957
|
-
* token-context guard. Consumers that act on assistant text (notably
|
|
958
|
-
* the code-mode edit applier) MUST treat these as display-only —
|
|
959
|
-
* the model is "wrapping up," not proposing new work. Applying
|
|
960
|
-
* SEARCH/REPLACE blocks found in a forced summary caused the
|
|
961
|
-
* "analysis became edits" bug in v0.4.1 and earlier.
|
|
962
|
-
*/
|
|
568
|
+
/** Display-only — code-mode applier MUST skip SEARCH/REPLACE in forced-summary text. */
|
|
963
569
|
forcedSummary?: boolean;
|
|
964
570
|
}
|
|
965
571
|
interface CacheFirstLoopOptions {
|
|
@@ -969,94 +575,27 @@ interface CacheFirstLoopOptions {
|
|
|
969
575
|
model?: string;
|
|
970
576
|
maxToolIters?: number;
|
|
971
577
|
stream?: boolean;
|
|
972
|
-
/**
|
|
973
|
-
* Pillar 2 — structured harvesting of R1 reasoning into a typed plan state.
|
|
974
|
-
* Pass `true` for defaults or an options object. Off by default (adds a
|
|
975
|
-
* cheap but non-zero V3 call per turn).
|
|
976
|
-
*/
|
|
977
578
|
harvest?: boolean | HarvestOptions;
|
|
978
|
-
/**
|
|
979
|
-
* Self-consistency branching. Pass a number for just a budget (e.g. 3) or
|
|
980
|
-
* a full `BranchOptions` object. Disables streaming for the branched turn
|
|
981
|
-
* because all samples must complete before selection. Auto-enables harvest
|
|
982
|
-
* since the default selector scores samples by plan-state uncertainty.
|
|
983
|
-
*/
|
|
579
|
+
/** Branching disables streaming (need all samples) and force-enables harvest (selector input). */
|
|
984
580
|
branch?: number | BranchOptions;
|
|
985
|
-
/**
|
|
986
|
-
* Reasoning-effort cap. See {@link ReconfigurableOptions} — default
|
|
987
|
-
* `max` for Reasonix (agent-class use per DeepSeek V4 docs).
|
|
988
|
-
*/
|
|
989
581
|
reasoningEffort?: "high" | "max";
|
|
990
|
-
/**
|
|
991
|
-
* Master switch for auto-escalation paths. See ReconfigurableOptions
|
|
992
|
-
* — defaults to `true` (current behavior); the `flash` and `pro`
|
|
993
|
-
* presets pass `false` to lock the running session to one model.
|
|
994
|
-
*/
|
|
995
582
|
autoEscalate?: boolean;
|
|
996
|
-
/**
|
|
997
|
-
* Soft USD budget for the entire session. When set, the loop:
|
|
998
|
-
* - Emits a one-shot warning event when cumulative cost crosses 80%
|
|
999
|
-
* - Refuses to run the next turn once cumulative cost ≥ budget,
|
|
1000
|
-
* yielding an error that explains how to bump or clear the cap
|
|
1001
|
-
*
|
|
1002
|
-
* Default `undefined` — no cap, no warnings. Reasonix is the cost-
|
|
1003
|
-
* focused agent; the budget is opt-in so users new to the tool
|
|
1004
|
-
* don't get blocked at $0.50 wondering what happened, but heavy /
|
|
1005
|
-
* headless / CI users have a clean circuit breaker available.
|
|
1006
|
-
*/
|
|
583
|
+
/** Soft USD cap — warns at 80%, refuses next turn at 100%. Opt-in (default no cap). */
|
|
1007
584
|
budgetUsd?: number;
|
|
1008
|
-
/**
|
|
1009
|
-
* Session name. When set, the loop pre-loads the session's prior messages
|
|
1010
|
-
* into its log on construction, and appends every new log entry to
|
|
1011
|
-
* `~/.reasonix/sessions/<name>.jsonl` so the next run can resume.
|
|
1012
|
-
*/
|
|
1013
585
|
session?: string;
|
|
1014
|
-
/**
|
|
1015
|
-
* Resolved hook list — loaded from `<project>/.reasonix/settings.json`
|
|
1016
|
-
* + `~/.reasonix/settings.json` by the CLI before constructing the loop.
|
|
1017
|
-
* The loop dispatches `PreToolUse` and `PostToolUse` events itself; the
|
|
1018
|
-
* CLI handles `UserPromptSubmit` and `Stop` since they live at the App
|
|
1019
|
-
* boundary. Empty / unset → no hooks fire (the runtime cost of an empty
|
|
1020
|
-
* filter is one ms). See `src/hooks.ts` for the full contract.
|
|
1021
|
-
*/
|
|
586
|
+
/** PreToolUse + PostToolUse only — UserPromptSubmit / Stop live at the App boundary. */
|
|
1022
587
|
hooks?: ResolvedHook[];
|
|
1023
|
-
/**
|
|
1024
|
-
* `cwd` reported to hooks via the stdin payload. Defaults to `process.cwd()`.
|
|
1025
|
-
* `reasonix code` overrides this to the sandbox root so a hook that does
|
|
1026
|
-
* `cd $REASONIX_CWD` lands in the project, not in the user's shell home.
|
|
1027
|
-
*/
|
|
588
|
+
/** `cwd` reported to hooks; `reasonix code` sets this to the sandbox root, not shell home. */
|
|
1028
589
|
hookCwd?: string;
|
|
1029
590
|
}
|
|
1030
|
-
/**
|
|
1031
|
-
* Pillar 1 — Cache-First Loop.
|
|
1032
|
-
*
|
|
1033
|
-
* - prefix is immutable (cache target)
|
|
1034
|
-
* - log is append-only (preserves prior-turn prefix)
|
|
1035
|
-
* - scratch is per-turn volatile (never sent upstream)
|
|
1036
|
-
*
|
|
1037
|
-
* Yields a stream of events so a TUI can render progressively.
|
|
1038
|
-
*/
|
|
1039
591
|
interface ReconfigurableOptions {
|
|
1040
592
|
model?: string;
|
|
1041
593
|
harvest?: boolean | HarvestOptions;
|
|
1042
594
|
branch?: number | BranchOptions;
|
|
1043
595
|
stream?: boolean;
|
|
1044
|
-
/**
|
|
1045
|
-
* Reasoning-effort cap sent per turn (V4 thinking mode only;
|
|
1046
|
-
* deepseek-chat ignores it). Reasonix pins `max` by default because
|
|
1047
|
-
* DeepSeek's V4 docs flag Claude-Code-style agent loops as the
|
|
1048
|
-
* canonical `max` use case. `/effort high` lets a user step down
|
|
1049
|
-
* mid-session for cheaper, faster turns on simple tasks.
|
|
1050
|
-
*/
|
|
596
|
+
/** V4 thinking mode only; deepseek-chat ignores. */
|
|
1051
597
|
reasoningEffort?: "high" | "max";
|
|
1052
|
-
/**
|
|
1053
|
-
* Master switch for the auto-escalation paths — both the
|
|
1054
|
-
* `<<<NEEDS_PRO>>>` marker scavenge and the failure-count threshold.
|
|
1055
|
-
* `true` (default) preserves the original "flash baseline, jump to
|
|
1056
|
-
* pro when struggling" behavior. `false` locks the active turn to
|
|
1057
|
-
* whatever `model` is set to — used by the `flash` and `pro` presets
|
|
1058
|
-
* which want a hard model commitment.
|
|
1059
|
-
*/
|
|
598
|
+
/** `false` pins to `model` — kills both NEEDS_PRO marker scavenge and failure-count threshold. */
|
|
1060
599
|
autoEscalate?: boolean;
|
|
1061
600
|
}
|
|
1062
601
|
declare class CacheFirstLoop {
|
|
@@ -1074,156 +613,28 @@ declare class CacheFirstLoop {
|
|
|
1074
613
|
harvestOptions: HarvestOptions;
|
|
1075
614
|
branchEnabled: boolean;
|
|
1076
615
|
branchOptions: BranchOptions;
|
|
1077
|
-
/** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
|
|
1078
616
|
reasoningEffort: "high" | "max";
|
|
1079
|
-
/**
|
|
1080
|
-
* Auto-escalation toggle. `true` lets the loop self-promote to pro
|
|
1081
|
-
* mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
|
|
1082
|
-
* pinned to `model`. Mutable so the dashboard's preset switcher can
|
|
1083
|
-
* flip it live alongside `model`.
|
|
1084
|
-
*/
|
|
1085
617
|
autoEscalate: boolean;
|
|
1086
|
-
/**
|
|
1087
|
-
* Soft USD budget — see {@link CacheFirstLoopOptions.budgetUsd}.
|
|
1088
|
-
* Mutable so `/budget` slash can set / change / clear it mid-session.
|
|
1089
|
-
* `null` (the default) disables all budget checks.
|
|
1090
|
-
*/
|
|
1091
618
|
budgetUsd: number | null;
|
|
1092
|
-
/**
|
|
1093
|
-
* Set the first time a turn crosses 80% of the budget so the warning
|
|
1094
|
-
* doesn't repeat every turn afterwards. Cleared by `setBudget` (any
|
|
1095
|
-
* change re-arms the warning, including raising the cap above the
|
|
1096
|
-
* current spend).
|
|
1097
|
-
*/
|
|
619
|
+
/** One-shot 80% warning latch — cleared by setBudget so a bump re-arms at the new boundary. */
|
|
1098
620
|
private _budgetWarned;
|
|
1099
621
|
sessionName: string | null;
|
|
1100
|
-
/**
|
|
1101
|
-
* Hook list, mutable so `/hooks reload` can swap it without
|
|
1102
|
-
* reconstructing the loop. Default empty — the filter cost on a
|
|
1103
|
-
* tool call is one array length check.
|
|
1104
|
-
*/
|
|
1105
622
|
hooks: ResolvedHook[];
|
|
1106
|
-
/**
|
|
1107
|
-
* `cwd` reported to hook stdin. Mutable so `/cwd` can switch the
|
|
1108
|
-
* working directory mid-session — the App keeps it in sync with
|
|
1109
|
-
* the same currentRootDir that drives tool re-registration.
|
|
1110
|
-
*/
|
|
1111
623
|
hookCwd: string;
|
|
1112
624
|
/** Number of messages that were pre-loaded from the session file. */
|
|
1113
625
|
readonly resumedMessageCount: number;
|
|
1114
626
|
private _turn;
|
|
1115
627
|
private _streamPreference;
|
|
1116
|
-
/**
|
|
1117
|
-
* AbortController per active turn. Threaded through the DeepSeek
|
|
1118
|
-
* HTTP calls AND every tool dispatch so Esc actually cancels the
|
|
1119
|
-
* in-flight network/subprocess work — not "we'll get to it after
|
|
1120
|
-
* the current call finishes." Re-created at the start of each
|
|
1121
|
-
* `step()` (the prior turn's signal has already fired).
|
|
1122
|
-
*/
|
|
628
|
+
/** Threaded through HTTP + every tool dispatch so Esc cancels in-flight work, not after. */
|
|
1123
629
|
private _turnAbort;
|
|
1124
|
-
/**
|
|
1125
|
-
* "Next turn should run on pro, regardless of this.model." Set by the
|
|
1126
|
-
* `/pro` slash command; consumed at the next turn's start (flipping
|
|
1127
|
-
* `_escalateThisTurn` on and self-clearing) so it's a fire-and-forget
|
|
1128
|
-
* single-turn upgrade. Survives across multiple slash inputs so
|
|
1129
|
-
* typing `/pro` and then hesitating a while before submitting a real
|
|
1130
|
-
* message still applies.
|
|
1131
|
-
*/
|
|
1132
630
|
private _proArmedForNextTurn;
|
|
1133
|
-
/**
|
|
1134
|
-
* Active for the current turn only — true means every model call
|
|
1135
|
-
* this turn uses pro instead of `this.model`. Turned on by EITHER
|
|
1136
|
-
* the pro-armed consumption OR the mid-turn auto-escalation
|
|
1137
|
-
* threshold (see `_turnFailureCount`). Cleared at turn end.
|
|
1138
|
-
*/
|
|
1139
631
|
private _escalateThisTurn;
|
|
1140
|
-
/**
|
|
1141
|
-
* Visible-failure count for the current turn. Incremented by tool
|
|
1142
|
-
* dispatch paths when a result matches a known "flash is struggling"
|
|
1143
|
-
* shape (SEARCH-not-found errors, scavenge / truncation / storm
|
|
1144
|
-
* repair fires). Once it hits {@link FAILURE_ESCALATION_THRESHOLD},
|
|
1145
|
-
* the remainder of the turn's model calls auto-upgrade to pro so
|
|
1146
|
-
* the user doesn't watch flash retry the same edit 5 times.
|
|
1147
|
-
*/
|
|
1148
632
|
private _turnFailureCount;
|
|
1149
|
-
/**
|
|
1150
|
-
* Per-type breakdown of failure signals counted toward the turn's
|
|
1151
|
-
* auto-escalation threshold. Surfaced in the warning when the
|
|
1152
|
-
* threshold trips so the user sees what kind of trouble flash
|
|
1153
|
-
* actually hit ("3× search-mismatch, 2× truncated") rather than
|
|
1154
|
-
* just a bare count. Reset alongside _turnFailureCount.
|
|
1155
|
-
*/
|
|
1156
633
|
private _turnFailureTypes;
|
|
1157
634
|
constructor(opts: CacheFirstLoopOptions);
|
|
1158
|
-
/**
|
|
1159
|
-
* Shrink the log by re-truncating oversized tool results to a tighter
|
|
1160
|
-
* token cap, and persist the result back to disk so the next launch
|
|
1161
|
-
* doesn't re-inherit a fat session file. Returns a summary the TUI
|
|
1162
|
-
* can display.
|
|
1163
|
-
*
|
|
1164
|
-
* The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
|
|
1165
|
-
* capped at the same effective context footprint as English instead
|
|
1166
|
-
* of slipping past a char cap at 2× the token cost. Default 4000
|
|
1167
|
-
* tokens, matching the token-aware dispatch cap from 0.5.2.
|
|
1168
|
-
*
|
|
1169
|
-
* Only tool-role messages are touched (same rationale as
|
|
1170
|
-
* {@link healLoadedMessages}). User and assistant messages carry
|
|
1171
|
-
* authored intent we can't mechanically shrink without losing
|
|
1172
|
-
* meaning.
|
|
1173
|
-
*/
|
|
1174
|
-
/**
|
|
1175
|
-
* Conservative args-only shrink fired after every tool response —
|
|
1176
|
-
* strictly about ONE thing: stop oversized `edit_file` / `write_file`
|
|
1177
|
-
* arguments from riding every future turn's prompt.
|
|
1178
|
-
*
|
|
1179
|
-
* Why this is worth doing AUTOMATICALLY (not just on /compact):
|
|
1180
|
-
* Each tool-call arguments string sticks in the log verbatim. On a
|
|
1181
|
-
* coding session with ~10 edits, that's 20-40K tokens of stale
|
|
1182
|
-
* SEARCH/REPLACE text riding along on every turn. Even at a 98.9%
|
|
1183
|
-
* cache hit rate the input cost still adds up linearly (cache-hit
|
|
1184
|
-
* price × tokens × turns). Compacting IMMEDIATELY after the tool
|
|
1185
|
-
* responds means the next turn's prompt is already smaller — the
|
|
1186
|
-
* shrink is a one-time write that saves every future prompt.
|
|
1187
|
-
*
|
|
1188
|
-
* Threshold rationale: 800 tokens ≈ 3 KB. A typical 20-line edit's
|
|
1189
|
-
* args land well under that; massive rewrites (whole-file content,
|
|
1190
|
-
* 100+ line refactors) land above and get the compaction. Small
|
|
1191
|
-
* edits stay byte-verbatim so nothing common-case changes.
|
|
1192
|
-
*
|
|
1193
|
-
* Safety: we ONLY shrink args whose tool has ALREADY responded.
|
|
1194
|
-
* Structurally that's every call in `log.toMessages()` at this
|
|
1195
|
-
* point — the current turn's assistant/tool pairing is by
|
|
1196
|
-
* construction closed by the time we get here (append happens
|
|
1197
|
-
* AFTER dispatch). The in-flight assistant message being built
|
|
1198
|
-
* lives in scratch, not the log, so this pass can't touch it.
|
|
1199
|
-
*
|
|
1200
|
-
* Model impact: the model may occasionally want to reference the
|
|
1201
|
-
* exact SEARCH text of a prior edit — it then reads the file
|
|
1202
|
-
* directly (which shows current state) or looks at the preceding
|
|
1203
|
-
* assistant text (which has its plan). Losing the stale args is a
|
|
1204
|
-
* net win: one extra read_file vs. dragging N KB of stale text
|
|
1205
|
-
* through every subsequent turn.
|
|
1206
|
-
*/
|
|
635
|
+
/** Shrink huge edit_file/write_file args post-dispatch — tool result already explains. */
|
|
1207
636
|
private compactToolCallArgsAfterResponse;
|
|
1208
|
-
/**
|
|
1209
|
-
* Fired at the END of a turn (just before `done` is yielded). Shrinks
|
|
1210
|
-
* every tool RESULT in the log that exceeds {@link TURN_END_RESULT_CAP_TOKENS}
|
|
1211
|
-
* to a tight cap so the NEXT turn's prompt doesn't re-pay for big
|
|
1212
|
-
* reads or searches done earlier. Unlike the reactive 40/80%
|
|
1213
|
-
* thresholds which react to context pressure, this runs unconditionally
|
|
1214
|
-
* — the win is preventive: each turn's big outputs get trimmed before
|
|
1215
|
-
* they ride into the next prompt. Saves compounding cost on long
|
|
1216
|
-
* sessions.
|
|
1217
|
-
*
|
|
1218
|
-
* Why compact the JUST-finished turn's results too (not just older
|
|
1219
|
-
* turns)? The same-turn iters already consumed the raw content to
|
|
1220
|
-
* make their decisions — the log is only carried forward for future
|
|
1221
|
-
* prompts. And "let me re-read the file" is vastly cheaper than
|
|
1222
|
-
* "carry this 12KB result in every future turn's prompt forever."
|
|
1223
|
-
*
|
|
1224
|
-
* Safe by construction: args-compact for THIS turn already ran
|
|
1225
|
-
* inside `compactToolCallArgsAfterResponse`; this pass is orthogonal.
|
|
1226
|
-
*/
|
|
637
|
+
/** Preventive end-of-turn shrink — trim big results before they ride into the next prompt. */
|
|
1227
638
|
private autoCompactToolResultsOnTurnEnd;
|
|
1228
639
|
compact(maxTokens?: number): {
|
|
1229
640
|
healedCount: number;
|
|
@@ -1231,40 +642,14 @@ declare class CacheFirstLoop {
|
|
|
1231
642
|
charsSaved: number;
|
|
1232
643
|
};
|
|
1233
644
|
appendAndPersist(message: ChatMessage): void;
|
|
1234
|
-
/**
|
|
1235
|
-
* Start a fresh conversation WITHOUT exiting. Drops every message
|
|
1236
|
-
* in the in-memory log AND rewrites the session file to empty so
|
|
1237
|
-
* a resume won't re-hydrate the old turns. Unlike `/forget`, which
|
|
1238
|
-
* deletes the session entirely, this keeps the session name and
|
|
1239
|
-
* config intact — it's the "new chat" button.
|
|
1240
|
-
*
|
|
1241
|
-
* The immutable prefix (system prompt + tool specs) is preserved
|
|
1242
|
-
* — that's the cache-first invariant, not part of the conversation.
|
|
1243
|
-
* Returns the number of messages dropped so the UI can show it.
|
|
1244
|
-
*/
|
|
645
|
+
/** "New chat" — drops messages but keeps session + immutable prefix (cache-first invariant). */
|
|
1245
646
|
clearLog(): {
|
|
1246
647
|
dropped: number;
|
|
1247
648
|
};
|
|
1248
|
-
/**
|
|
1249
|
-
* Reconfigure model/harvest/branch/stream mid-session. The loop's log,
|
|
1250
|
-
* scratch, and stats are preserved — only the per-turn behavior changes.
|
|
1251
|
-
* Used by the TUI's slash commands and by library callers who want to
|
|
1252
|
-
* flip a knob between turns.
|
|
1253
|
-
*/
|
|
1254
649
|
configure(opts: ReconfigurableOptions): void;
|
|
1255
|
-
/**
|
|
1256
|
-
* Set / change / clear the soft USD budget. `null` (or any non-
|
|
1257
|
-
* positive number) disables the cap entirely. Re-arms the 80%
|
|
1258
|
-
* warning so a user who bumps the cap mid-session sees a fresh
|
|
1259
|
-
* threshold message at the new boundary.
|
|
1260
|
-
*/
|
|
650
|
+
/** `null` disables the cap; any change re-arms the 80% warning. */
|
|
1261
651
|
setBudget(usd: number | null): void;
|
|
1262
|
-
/**
|
|
1263
|
-
* Arm pro for the next turn (consumed at turn start). Called by
|
|
1264
|
-
* `/pro`. Idempotent — repeated calls stay armed, `disarmPro()`
|
|
1265
|
-
* clears. Separate from `/preset max` which persistently switches
|
|
1266
|
-
* this.model; armed state is strictly single-turn.
|
|
1267
|
-
*/
|
|
652
|
+
/** Single-turn upgrade consumed at next step() — distinct from `/preset max` (persistent). */
|
|
1268
653
|
armProForNextTurn(): void;
|
|
1269
654
|
/** Cancel `/pro` arming before the next turn starts. */
|
|
1270
655
|
disarmPro(): void;
|
|
@@ -1272,131 +657,31 @@ declare class CacheFirstLoop {
|
|
|
1272
657
|
get proArmed(): boolean;
|
|
1273
658
|
/** UI surface — true while the current turn is running on pro (armed or auto-escalated). */
|
|
1274
659
|
get escalatedThisTurn(): boolean;
|
|
1275
|
-
/**
|
|
1276
|
-
* Model the current model call should use. Defaults to `this.model`;
|
|
1277
|
-
* upgrades to {@link ESCALATION_MODEL} when the turn is armed for
|
|
1278
|
-
* pro (via `/pro`) or has hit the failure-escalation threshold.
|
|
1279
|
-
* Same thinking + effort policy applies regardless — pro defaults
|
|
1280
|
-
* to thinking=enabled and effort=max, which the current turn wanted
|
|
1281
|
-
* anyway when flash was struggling.
|
|
1282
|
-
*/
|
|
1283
660
|
private modelForCurrentCall;
|
|
1284
|
-
/**
|
|
1285
|
-
* Parse the escalation marker out of the model's leading content.
|
|
1286
|
-
* Returns `{ matched: true, reason? }` for both bare and reason-
|
|
1287
|
-
* carrying forms. Only the FIRST line matters — the model is
|
|
1288
|
-
* instructed to emit the marker as the first output token if at
|
|
1289
|
-
* all. Matches anywhere else in the text are normal content
|
|
1290
|
-
* references (e.g. the user asked about the marker itself).
|
|
1291
|
-
*/
|
|
661
|
+
/** Anchored to lead — mid-text matches are normal content (user asking about the marker). */
|
|
1292
662
|
private parseEscalationMarker;
|
|
1293
663
|
/** Convenience boolean — same gate the streaming path used to call. */
|
|
1294
664
|
private isEscalationRequest;
|
|
1295
|
-
/**
|
|
1296
|
-
* Could `buf` STILL plausibly become the full marker as more chunks
|
|
1297
|
-
* arrive? Drives the streaming buffer's flush decision: while this
|
|
1298
|
-
* is true we keep accumulating; once it's false (or the buffer
|
|
1299
|
-
* exceeds the byte limit) we flush so the user isn't staring at a
|
|
1300
|
-
* delayed display for arbitrary content that just happens to start
|
|
1301
|
-
* with `<`.
|
|
1302
|
-
*/
|
|
665
|
+
/** Drives streaming flush — while plausibly partial, keep accumulating; else flush. */
|
|
1303
666
|
private looksLikePartialEscalationMarker;
|
|
1304
|
-
/**
|
|
1305
|
-
* Check whether a tool result string looks like a "flash struggled"
|
|
1306
|
-
* signal and, if so, increment the turn's failure counter. Escalates
|
|
1307
|
-
* the REST of the current turn to pro once the threshold is hit.
|
|
1308
|
-
* Idempotent after escalation — further failures don't re-escalate,
|
|
1309
|
-
* but the turn is already on pro so it doesn't matter.
|
|
1310
|
-
*
|
|
1311
|
-
* Return: `true` when this call tipped the turn into escalation
|
|
1312
|
-
* mode (so the loop can surface a one-time warning to the user).
|
|
1313
|
-
*/
|
|
667
|
+
/** Returns true ONLY on the tipping call — caller surfaces a one-shot warning. */
|
|
1314
668
|
private noteToolFailureSignal;
|
|
1315
|
-
/**
|
|
1316
|
-
* Render `_turnFailureTypes` as a comma-separated breakdown like
|
|
1317
|
-
* "2× search-mismatch, 1× truncated" for the auto-escalation
|
|
1318
|
-
* warning. Empty if no types have been recorded yet (defensive —
|
|
1319
|
-
* the warning sites only call this after a bump).
|
|
1320
|
-
*/
|
|
1321
669
|
private formatFailureBreakdown;
|
|
1322
670
|
private buildMessages;
|
|
1323
|
-
/**
|
|
1324
|
-
* Signal the currently-running {@link step} to stop **now**. Cancels
|
|
1325
|
-
* the in-flight network request (DeepSeek HTTP/SSE) AND any tool call
|
|
1326
|
-
* currently dispatching (MCP `notifications/cancelled` + promise
|
|
1327
|
-
* reject). The loop itself also sees `signal.aborted` at each
|
|
1328
|
-
* iteration boundary and exits quickly instead of looping again.
|
|
1329
|
-
* Called by the TUI on Esc.
|
|
1330
|
-
*/
|
|
1331
671
|
abort(): void;
|
|
1332
|
-
/**
|
|
1333
|
-
* Drop everything in the log after (and including) the most recent
|
|
1334
|
-
* user message. Used by `/retry` so the caller can re-send that
|
|
1335
|
-
* message with a fresh turn instead of layering another response on
|
|
1336
|
-
* top of the prior exchange. Returns the content of the dropped user
|
|
1337
|
-
* message, or `null` if there isn't one yet.
|
|
1338
|
-
*
|
|
1339
|
-
* Persists by rewriting the session file — otherwise the next
|
|
1340
|
-
* launch would rehydrate the old exchange and `/retry` would seem
|
|
1341
|
-
* to have done nothing.
|
|
1342
|
-
*/
|
|
672
|
+
/** Drop the last user message + everything after; caller re-sends. Persists to session file. */
|
|
1343
673
|
retryLastUser(): string | null;
|
|
1344
674
|
step(userInput: string): AsyncGenerator<LoopEvent>;
|
|
1345
675
|
private forceSummaryAfterIterLimit;
|
|
1346
676
|
run(userInput: string, onEvent?: (ev: LoopEvent) => void): Promise<string>;
|
|
1347
|
-
/**
|
|
1348
|
-
* Build an assistant message for the log. The `producingModel` arg is
|
|
1349
|
-
* the model that actually generated this turn (flash, pro, the
|
|
1350
|
-
* forced-summary flash call, `this.model` for synthetics, etc.) —
|
|
1351
|
-
* NOT `this.model`, because escalation + forced-summary can both
|
|
1352
|
-
* route a single turn to a different model.
|
|
1353
|
-
*
|
|
1354
|
-
* The single invariant this encodes: if the producing model is
|
|
1355
|
-
* thinking-mode, `reasoning_content` MUST be present on the
|
|
1356
|
-
* persisted message — even as an empty string. DeepSeek's validator
|
|
1357
|
-
* 400s the NEXT request if any historical thinking-mode assistant
|
|
1358
|
-
* turn is missing it. We used to gate on `reasoning.length > 0`,
|
|
1359
|
-
* which silently dropped the field whenever the stream emitted zero
|
|
1360
|
-
* reasoning deltas or the API returned `reasoning_content: null` —
|
|
1361
|
-
* both legitimate edge cases the 0.5.15/0.5.18 fixes missed.
|
|
1362
|
-
*/
|
|
677
|
+
/** Thinking-mode producer ⇒ reasoning_content MUST be set (even ""), or next call 400s. */
|
|
1363
678
|
private assistantMessage;
|
|
1364
|
-
/**
|
|
1365
|
-
* Synthetic assistant message (abort notices, future system injections)
|
|
1366
|
-
* — no real API round trip. Delegates to {@link assistantMessage} with
|
|
1367
|
-
* `this.model` as the stand-in producer, so the same thinking-mode
|
|
1368
|
-
* invariant applies: reasoner sessions get an empty-string
|
|
1369
|
-
* `reasoning_content`; V3 sessions get nothing.
|
|
1370
|
-
*/
|
|
679
|
+
/** Abort notices etc — uses this.model as stand-in producer for the thinking-mode stamp. */
|
|
1371
680
|
private syntheticAssistantMessage;
|
|
1372
681
|
}
|
|
1373
|
-
/**
|
|
1374
|
-
* R1 occasionally hallucinates tool-call markup as plain text when the
|
|
1375
|
-
* real tool channel has been closed — typically our forced-summary
|
|
1376
|
-
* path, where `tools: undefined` is supposed to force prose but isn't
|
|
1377
|
-
* always respected. The markup isn't parsed by our tool-call path
|
|
1378
|
-
* (the API response's structured `tool_calls` field is empty), so
|
|
1379
|
-
* it's just noise in the user's view. Strip known envelope shapes.
|
|
1380
|
-
*
|
|
1381
|
-
* Exported so tests can exercise it against concrete R1 outputs.
|
|
1382
|
-
*/
|
|
682
|
+
/** Strip hallucinated tool-call envelopes — `tools: undefined` doesn't always force prose. */
|
|
1383
683
|
declare function stripHallucinatedToolMarkup(s: string): string;
|
|
1384
|
-
/**
|
|
1385
|
-
* Enforce tool_calls ↔ tool pairing across a message log. DeepSeek
|
|
1386
|
-
* rejects two shapes at the API boundary:
|
|
1387
|
-
* (a) assistant with tool_calls not followed by matching tool
|
|
1388
|
-
* responses ("insufficient tool messages following tool_calls")
|
|
1389
|
-
* (b) tool message without a preceding assistant.tool_calls with
|
|
1390
|
-
* the matching tool_call_id ("must be a response to a preceding
|
|
1391
|
-
* message with 'tool_calls'")
|
|
1392
|
-
*
|
|
1393
|
-
* Corrupted session files from earlier builds have hit both. This pass
|
|
1394
|
-
* rebuilds the message stream so only well-formed (assistant.tool_calls
|
|
1395
|
-
* + all matching responses) groups survive. Plain user/assistant/system
|
|
1396
|
-
* messages (no tool_calls) always pass through.
|
|
1397
|
-
*
|
|
1398
|
-
* Exported so both char-based and token-based heal can compose it.
|
|
1399
|
-
*/
|
|
684
|
+
/** Drops both unpaired assistant.tool_calls and stray tool messages — DeepSeek 400s on either. */
|
|
1400
685
|
declare function fixToolCallPairing(messages: ChatMessage[]): {
|
|
1401
686
|
messages: ChatMessage[];
|
|
1402
687
|
droppedAssistantCalls: number;
|
|
@@ -1407,67 +692,19 @@ declare function healLoadedMessages(messages: ChatMessage[], maxChars: number):
|
|
|
1407
692
|
healedCount: number;
|
|
1408
693
|
healedFrom: number;
|
|
1409
694
|
};
|
|
1410
|
-
/**
|
|
1411
|
-
* Token-aware counterpart of {@link healLoadedMessages}. Used at
|
|
1412
|
-
* session-load time so resumed sessions come back capped at the same
|
|
1413
|
-
* token budget (not char budget) as live tool results — CJK text no
|
|
1414
|
-
* longer slips past at 2× the intended token cost when re-hydrated.
|
|
1415
|
-
*
|
|
1416
|
-
* Still does the same structural pass for tool_calls ↔ tool pairing;
|
|
1417
|
-
* that logic is orthogonal to the truncation cap.
|
|
1418
|
-
*/
|
|
695
|
+
/** Token-cap variant — char cap would let CJK slip past at 2× the intended token cost. */
|
|
1419
696
|
declare function healLoadedMessagesByTokens(messages: ChatMessage[], maxTokens: number): {
|
|
1420
697
|
messages: ChatMessage[];
|
|
1421
698
|
healedCount: number;
|
|
1422
699
|
tokensSaved: number;
|
|
1423
700
|
charsSaved: number;
|
|
1424
701
|
};
|
|
1425
|
-
/**
|
|
1426
|
-
* Turn raw `DeepSeek NNN: {json}` errors into short actionable hints.
|
|
1427
|
-
* Client code throws these verbatim from the HTTP layer (see client.ts);
|
|
1428
|
-
* this is the one place the UI text layer reads to decide what the user
|
|
1429
|
-
* actually needs to do about it.
|
|
1430
|
-
*
|
|
1431
|
-
* Covered codes (per DeepSeek's error-code doc):
|
|
1432
|
-
* - 400 + "maximum context length" → context-overflow, point at /forget
|
|
1433
|
-
* - 400 generic → strip the JSON, show inner message
|
|
1434
|
-
* - 401 → API key rejected, point at `reasonix setup`
|
|
1435
|
-
* - 402 → balance depleted, link to top-up page
|
|
1436
|
-
* - 422 → param error, show inner message (usually explains which field)
|
|
1437
|
-
*
|
|
1438
|
-
* 429/500/502/503/504 are swallowed by retry.ts before they reach here;
|
|
1439
|
-
* if they DO reach here (all retries exhausted), the raw string already
|
|
1440
|
-
* says "DeepSeek 503: server busy" etc. which is informative enough.
|
|
1441
|
-
*/
|
|
702
|
+
/** Single text-layer DeepSeek-error formatter — 429/5xx never reach here (retry.ts swallows). */
|
|
1442
703
|
declare function formatLoopError(err: Error): string;
|
|
1443
704
|
|
|
1444
|
-
/**
|
|
1445
|
-
* Expand `@path/to/file` mentions in a user prompt to inline file
|
|
1446
|
-
* content.
|
|
1447
|
-
*
|
|
1448
|
-
* Why: most interactive coding sessions start with "look at X, then
|
|
1449
|
-
* change Y". Typing `@src/loop.ts` reads faster and cheaper than
|
|
1450
|
-
* "look at src/loop.ts (and the model fires read_file, and we pay for
|
|
1451
|
-
* the round trip)" — the model sees the file content from turn 1
|
|
1452
|
-
* instead of round-tripping a tool call for it.
|
|
1453
|
-
*
|
|
1454
|
-
* Shape: the user's text is kept verbatim. Expanded file contents are
|
|
1455
|
-
* appended in a "Referenced files" block at the end, each wrapped in
|
|
1456
|
-
* `<file path="...">...</file>` so the model can cite them back
|
|
1457
|
-
* unambiguously.
|
|
1458
|
-
*
|
|
1459
|
-
* Safety: paths must resolve inside `rootDir` (no `..` escape, no
|
|
1460
|
-
* absolute paths), must exist as a regular file, and must be under
|
|
1461
|
-
* `maxBytes`. Missing / too-large / escaping paths get a short note
|
|
1462
|
-
* appended instead of content so the user sees why it was skipped.
|
|
1463
|
-
*/
|
|
705
|
+
/** Expand `@path` mentions inline. Paths must resolve inside rootDir; escapes / oversize get a skip note, not content. */
|
|
1464
706
|
/** Caps match tool-result dispatch truncation (0.5.2). */
|
|
1465
707
|
declare const DEFAULT_AT_MENTION_MAX_BYTES: number;
|
|
1466
|
-
/**
|
|
1467
|
-
* Default directory names skipped when listing files for the picker.
|
|
1468
|
-
* Matches what most repos gitignore AND keeps the picker off the
|
|
1469
|
-
* hottest bloat — `node_modules` alone can be 100k+ entries.
|
|
1470
|
-
*/
|
|
1471
708
|
declare const DEFAULT_PICKER_IGNORE_DIRS: readonly string[];
|
|
1472
709
|
interface ListFilesOptions {
|
|
1473
710
|
/** Cap the walk once we've collected this many entries. Default 500. */
|
|
@@ -1475,23 +712,7 @@ interface ListFilesOptions {
|
|
|
1475
712
|
/** Directory names to skip entirely. Defaults to {@link DEFAULT_PICKER_IGNORE_DIRS}. */
|
|
1476
713
|
ignoreDirs?: readonly string[];
|
|
1477
714
|
}
|
|
1478
|
-
/**
|
|
1479
|
-
* Walk `root` recursively and return relative file paths (forward-slash
|
|
1480
|
-
* separator, regardless of platform) for the `@` picker.
|
|
1481
|
-
*
|
|
1482
|
-
* Synchronous on purpose: this runs once at App mount (and on each turn
|
|
1483
|
-
* so newly-created files show up) and blocks the render thread for a
|
|
1484
|
-
* predictable ~10-50ms on a moderate repo. An async variant would need
|
|
1485
|
-
* to coordinate with the Ink render loop; sync fits the rest of the
|
|
1486
|
-
* TUI's single-turn-per-tick model cleanly.
|
|
1487
|
-
*
|
|
1488
|
-
* Skips:
|
|
1489
|
-
* - directories in `ignoreDirs` (default: DEFAULT_PICKER_IGNORE_DIRS)
|
|
1490
|
-
* - any directory whose name starts with `.` (covers `.git`,
|
|
1491
|
-
* `.vscode`, dotfile vendors). Dotfile REGULAR FILES (`.env`,
|
|
1492
|
-
* `.gitignore`, `.prettierrc`) are kept — users reference them.
|
|
1493
|
-
* - entries the walker can't read (permission errors, broken links).
|
|
1494
|
-
*/
|
|
715
|
+
/** Sync on purpose — fits the TUI's single-turn-per-tick model. Skips dot-DIRS but keeps dotfiles. */
|
|
1495
716
|
declare function listFilesSync(root: string, opts?: ListFilesOptions): string[];
|
|
1496
717
|
interface FileWithStats {
|
|
1497
718
|
/** Relative path with forward-slash separator. */
|
|
@@ -1499,46 +720,12 @@ interface FileWithStats {
|
|
|
1499
720
|
/** Modification time (Date.getTime() / ms since epoch). 0 when stat failed. */
|
|
1500
721
|
mtimeMs: number;
|
|
1501
722
|
}
|
|
1502
|
-
/**
|
|
1503
|
-
* Same walk as {@link listFilesSync} but also statS each file for
|
|
1504
|
-
* modification time. Used by the `@` picker to surface recently-
|
|
1505
|
-
* edited files first — matches VS Code Quick Open / similar UX.
|
|
1506
|
-
*
|
|
1507
|
-
* Stat failures don't throw: the entry is kept with `mtimeMs: 0` so
|
|
1508
|
-
* it still appears in the picker (just sinks to the bottom of the
|
|
1509
|
-
* recency sort).
|
|
1510
|
-
*/
|
|
723
|
+
/** Stat failures kept as `mtimeMs: 0` — entry still appears, sinks to bottom of recency sort. */
|
|
1511
724
|
declare function listFilesWithStatsSync(root: string, opts?: ListFilesOptions): FileWithStats[];
|
|
1512
|
-
/**
|
|
1513
|
-
* Async variant of {@link listFilesWithStatsSync}. Same walk semantics
|
|
1514
|
-
* (DFS, alphabetical, respects ignore + maxResults), but each
|
|
1515
|
-
* directory's entries are stat'd in parallel via `Promise.all`,
|
|
1516
|
-
* which slashes wall-clock time on Windows where individual stat
|
|
1517
|
-
* syscalls are 3-5x slower than Linux.
|
|
1518
|
-
*
|
|
1519
|
-
* Use this from the TUI mount path so a 500-file repo doesn't add
|
|
1520
|
-
* 200-300ms of synchronous block to first paint. Sync variant is
|
|
1521
|
-
* kept for paths where the caller can't `await` (server APIs,
|
|
1522
|
-
* test scaffolding).
|
|
1523
|
-
*/
|
|
725
|
+
/** Parallel stat per directory — Windows stat syscalls are 3-5× slower than Linux. */
|
|
1524
726
|
declare function listFilesWithStatsAsync(root: string, opts?: ListFilesOptions): Promise<FileWithStats[]>;
|
|
1525
|
-
/**
|
|
1526
|
-
* Prefix pattern used by the `@` picker to detect an IN-PROGRESS
|
|
1527
|
-
* mention at the END of the input buffer. Captures the partial path
|
|
1528
|
-
* (which may be empty — just `@`) so the picker can use it as a
|
|
1529
|
-
* substring filter.
|
|
1530
|
-
*
|
|
1531
|
-
* Distinct from {@link AT_MENTION_PATTERN} (which finds completed
|
|
1532
|
-
* mentions anywhere in the text for expansion-at-submit). This one
|
|
1533
|
-
* fires on the trailing token only, anchored at end-of-input.
|
|
1534
|
-
*/
|
|
727
|
+
/** Trailing-token only, anchored at end-of-input — distinct from `AT_MENTION_PATTERN` which scans all. */
|
|
1535
728
|
declare const AT_PICKER_PREFIX: RegExp;
|
|
1536
|
-
/**
|
|
1537
|
-
* Return the picker state for a given input buffer: the partial query
|
|
1538
|
-
* (may be empty string — just `@`) and the buffer offset of the `@`
|
|
1539
|
-
* character. `null` when the buffer doesn't end in a mention-in-
|
|
1540
|
-
* progress.
|
|
1541
|
-
*/
|
|
1542
729
|
declare function detectAtPicker(input: string): {
|
|
1543
730
|
query: string;
|
|
1544
731
|
atOffset: number;
|
|
@@ -1548,42 +735,10 @@ type PickerCandidate = string | FileWithStats;
|
|
|
1548
735
|
interface RankPickerOptions {
|
|
1549
736
|
/** Upper bound on returned entries. Default 40. */
|
|
1550
737
|
limit?: number;
|
|
1551
|
-
/**
|
|
1552
|
-
* Paths the user or model has touched recently (via tool calls like
|
|
1553
|
-
* `read_file` / `edit_file`). Matching paths get a recency boost so
|
|
1554
|
-
* the picker surfaces "stuff I just looked at" near the top.
|
|
1555
|
-
*/
|
|
1556
738
|
recentlyUsed?: readonly string[];
|
|
1557
739
|
}
|
|
1558
|
-
/**
|
|
1559
|
-
* Filter and rank candidate files against the picker's partial query.
|
|
1560
|
-
*
|
|
1561
|
-
* Empty query:
|
|
1562
|
-
* - Sort by "recently used" bucket first (if provided), then mtime
|
|
1563
|
-
* descending (newer first), then path alpha.
|
|
1564
|
-
* - Pure-string input (no mtime data) falls back to alpha since
|
|
1565
|
-
* recency info isn't available.
|
|
1566
|
-
*
|
|
1567
|
-
* Non-empty query:
|
|
1568
|
-
* - Case-insensitive substring match, with a basename-prefix boost
|
|
1569
|
-
* so `lo` floats `loop.ts`-shaped paths to the top.
|
|
1570
|
-
* - Ties broken first by recently-used membership, then mtime.
|
|
1571
|
-
*
|
|
1572
|
-
* Back-compat: passes `string[]` through the same logic (mtime = 0,
|
|
1573
|
-
* recently-used still honored).
|
|
1574
|
-
*/
|
|
1575
740
|
declare function rankPickerCandidates(files: readonly PickerCandidate[], query: string, limitOrOpts?: number | RankPickerOptions): string[];
|
|
1576
|
-
/**
|
|
1577
|
-
* Matches `@` at a word boundary (start-of-string or preceded by
|
|
1578
|
-
* whitespace) followed by a path-like token. Deliberately rejects `@`
|
|
1579
|
-
* embedded in longer words (email addresses, mentions on social sites)
|
|
1580
|
-
* by requiring the word boundary.
|
|
1581
|
-
*
|
|
1582
|
-
* Path charset keeps it to the characters that appear in real repo
|
|
1583
|
-
* paths — letters, digits, `_` `-` `.` `/` `\`. Trailing `.` (e.g.
|
|
1584
|
-
* `@foo.ts.`) is stripped before lookup so a sentence-terminating
|
|
1585
|
-
* period doesn't break the mention.
|
|
1586
|
-
*/
|
|
741
|
+
/** Word-boundary anchor rejects `@` embedded in emails / social handles; trailing `.` stripped before lookup. */
|
|
1587
742
|
declare const AT_MENTION_PATTERN: RegExp;
|
|
1588
743
|
interface AtMentionExpansion {
|
|
1589
744
|
/** The raw `@path` token as it appeared in the text. */
|
|
@@ -1600,10 +755,6 @@ interface AtMentionExpansion {
|
|
|
1600
755
|
interface AtMentionOptions {
|
|
1601
756
|
/** Max file size in bytes before a mention is skipped. */
|
|
1602
757
|
maxBytes?: number;
|
|
1603
|
-
/**
|
|
1604
|
-
* Optional file-system overrides for tests. Real callers omit these;
|
|
1605
|
-
* the helper falls through to `node:fs`.
|
|
1606
|
-
*/
|
|
1607
758
|
fs?: {
|
|
1608
759
|
exists: (path: string) => boolean;
|
|
1609
760
|
isFile: (path: string) => boolean;
|
|
@@ -1611,36 +762,12 @@ interface AtMentionOptions {
|
|
|
1611
762
|
read: (path: string) => string;
|
|
1612
763
|
};
|
|
1613
764
|
}
|
|
1614
|
-
/**
|
|
1615
|
-
* Expand `@path` mentions in `text`. Returns the (possibly augmented)
|
|
1616
|
-
* text plus a per-mention report so the caller can surface expansions
|
|
1617
|
-
* in the UI.
|
|
1618
|
-
*/
|
|
1619
765
|
declare function expandAtMentions(text: string, rootDir: string, opts?: AtMentionOptions): {
|
|
1620
766
|
text: string;
|
|
1621
767
|
expansions: AtMentionExpansion[];
|
|
1622
768
|
};
|
|
1623
769
|
|
|
1624
|
-
/**
|
|
1625
|
-
* Project memory — a user-authored `REASONIX.md` in the project root
|
|
1626
|
-
* that gets pinned into the immutable-prefix system prompt.
|
|
1627
|
-
*
|
|
1628
|
-
* Design notes:
|
|
1629
|
-
*
|
|
1630
|
-
* - The file lands in `ImmutablePrefix.system`, so the whole memory
|
|
1631
|
-
* block is hashed into the cache prefix fingerprint. Editing the
|
|
1632
|
-
* file invalidates the prefix; unchanged memory across sessions
|
|
1633
|
-
* keeps the DeepSeek prefix cache warm. That matches Pillar 1 —
|
|
1634
|
-
* memory is a deliberate, stable prefix, not per-turn drift.
|
|
1635
|
-
* - Only one source: the working-root `REASONIX.md`. No parent walk,
|
|
1636
|
-
* no `~/.reasonix/REASONIX.md`, no CLAUDE.md fallback. User-global
|
|
1637
|
-
* memory can come later; for v1 one file == one mental model.
|
|
1638
|
-
* - Truncated at 8 000 chars (≈ 2k tokens). `.gitignore` gets 2 000
|
|
1639
|
-
* because it's a constraint dump; memory gets more headroom because
|
|
1640
|
-
* it's deliberate instructions.
|
|
1641
|
-
* - Opt-out via `REASONIX_MEMORY=off|false|0`. No CLI flag — memory
|
|
1642
|
-
* is a file, `rm REASONIX.md` is the other opt-out.
|
|
1643
|
-
*/
|
|
770
|
+
/** REASONIX.md pinned into ImmutablePrefix.system; edits invalidate the prefix-cache fingerprint. */
|
|
1644
771
|
declare const PROJECT_MEMORY_FILE = "REASONIX.md";
|
|
1645
772
|
declare const PROJECT_MEMORY_MAX_CHARS = 8000;
|
|
1646
773
|
interface ProjectMemory {
|
|
@@ -1653,44 +780,13 @@ interface ProjectMemory {
|
|
|
1653
780
|
/** True iff `originalChars > PROJECT_MEMORY_MAX_CHARS`. */
|
|
1654
781
|
truncated: boolean;
|
|
1655
782
|
}
|
|
1656
|
-
/**
|
|
1657
|
-
* Read `REASONIX.md` from `rootDir`. Returns `null` when the file is
|
|
1658
|
-
* missing, unreadable, or empty (whitespace-only counts as empty — an
|
|
1659
|
-
* empty memory file shouldn't perturb the cache prefix).
|
|
1660
|
-
*/
|
|
783
|
+
/** Empty / whitespace-only files return null so they don't perturb the cache prefix. */
|
|
1661
784
|
declare function readProjectMemory(rootDir: string): ProjectMemory | null;
|
|
1662
|
-
/**
|
|
1663
|
-
* Resolve whether project memory should be read. Default: on.
|
|
1664
|
-
* `REASONIX_MEMORY=off|false|0` turns it off (CI, reproducing issues,
|
|
1665
|
-
* intentional offline runs).
|
|
1666
|
-
*/
|
|
1667
785
|
declare function memoryEnabled(): boolean;
|
|
1668
|
-
/**
|
|
1669
|
-
* Return `basePrompt` with the project's `REASONIX.md` appended as a
|
|
1670
|
-
* "Project memory" section. No-op when the file is absent, empty, or
|
|
1671
|
-
* memory is disabled via env.
|
|
1672
|
-
*
|
|
1673
|
-
* The appended block is deterministic — identical input ⇒ identical
|
|
1674
|
-
* output — so every session that opens against the same memory file
|
|
1675
|
-
* gets the same prefix hash.
|
|
1676
|
-
*/
|
|
786
|
+
/** Deterministic — same memory file always yields the same prefix hash. */
|
|
1677
787
|
declare function applyProjectMemory(basePrompt: string, rootDir: string): string;
|
|
1678
788
|
|
|
1679
|
-
/**
|
|
1680
|
-
* User memory — `~/.reasonix/memory/` markdown notes pinned into the
|
|
1681
|
-
* immutable-prefix system prompt across sessions.
|
|
1682
|
-
*
|
|
1683
|
-
* Two scopes:
|
|
1684
|
-
* - `global` → `~/.reasonix/memory/global/` (cross-project)
|
|
1685
|
-
* - `project` → `~/.reasonix/memory/<hash>/` (per sandbox root)
|
|
1686
|
-
*
|
|
1687
|
-
* Each scope has an always-loaded `MEMORY.md` index plus zero-or-more
|
|
1688
|
-
* `<name>.md` detail files loaded on demand via `recall_memory`.
|
|
1689
|
-
*
|
|
1690
|
-
* Distinct from `src/project-memory.ts` (REASONIX.md) in purpose:
|
|
1691
|
-
* REASONIX.md is committable, team-shared project memory.
|
|
1692
|
-
* ~/.reasonix/memory is user-private memory, never committed.
|
|
1693
|
-
*/
|
|
789
|
+
/** User-private memory pinned into the immutable prefix; distinct from committable REASONIX.md. */
|
|
1694
790
|
declare const USER_MEMORY_DIR = "memory";
|
|
1695
791
|
declare const MEMORY_INDEX_FILE = "MEMORY.md";
|
|
1696
792
|
/** Cap on the index file content loaded into the prefix, per scope. */
|
|
@@ -1719,10 +815,7 @@ interface WriteInput {
|
|
|
1719
815
|
description: string;
|
|
1720
816
|
body: string;
|
|
1721
817
|
}
|
|
1722
|
-
/**
|
|
1723
|
-
* Throws on filename injection attempts (`../foo`, `foo/bar`, leading
|
|
1724
|
-
* dots, etc.). Allowed: 3-40 chars, alnum + `_` + `-` + interior `.`.
|
|
1725
|
-
*/
|
|
818
|
+
/** Throws on path-injection (../, /, leading dot). Allowed: 3-40 chars, alnum/_/-, interior `.`. */
|
|
1726
819
|
declare function sanitizeMemoryName(raw: string): string;
|
|
1727
820
|
/** Stable 16-hex-char hash of an absolute sandbox root path. */
|
|
1728
821
|
declare function projectHash(rootDir: string): string;
|
|
@@ -1736,10 +829,6 @@ declare class MemoryStore {
|
|
|
1736
829
|
pathFor(scope: MemoryScope, name: string): string;
|
|
1737
830
|
/** True iff this store is configured with a project scope available. */
|
|
1738
831
|
hasProjectScope(): boolean;
|
|
1739
|
-
/**
|
|
1740
|
-
* Read the `MEMORY.md` index for a scope. Returns post-cap content
|
|
1741
|
-
* (with a truncation marker if clipped), or `null` when absent / empty.
|
|
1742
|
-
*/
|
|
1743
832
|
loadIndex(scope: MemoryScope): {
|
|
1744
833
|
content: string;
|
|
1745
834
|
originalChars: number;
|
|
@@ -1747,108 +836,36 @@ declare class MemoryStore {
|
|
|
1747
836
|
} | null;
|
|
1748
837
|
/** Read one memory file's body (frontmatter stripped). Throws if missing. */
|
|
1749
838
|
read(scope: MemoryScope, name: string): MemoryEntry;
|
|
1750
|
-
/**
|
|
1751
|
-
* List every memory in this store. Scans both scopes (skips project
|
|
1752
|
-
* scope if unconfigured). Silently skips malformed files; the index
|
|
1753
|
-
* must stay queryable even if one file is hand-edited into nonsense.
|
|
1754
|
-
*/
|
|
839
|
+
/** Skips malformed files — index stays queryable even if one file is hand-edited into nonsense. */
|
|
1755
840
|
list(): MemoryEntry[];
|
|
1756
|
-
/**
|
|
1757
|
-
* Write a new memory (or overwrite existing). Creates the scope dir,
|
|
1758
|
-
* writes the `.md` file, and regenerates `MEMORY.md`. Returns the
|
|
1759
|
-
* absolute path written to.
|
|
1760
|
-
*/
|
|
1761
841
|
write(input: WriteInput): string;
|
|
1762
842
|
/** Delete one memory + its index line. No-op if the file is already gone. */
|
|
1763
843
|
delete(scope: MemoryScope, rawName: string): boolean;
|
|
1764
|
-
/**
|
|
1765
|
-
* Rebuild `MEMORY.md` from the `.md` files currently in the scope dir.
|
|
1766
|
-
* Called after every write/delete. Sorted by name for stable prefix
|
|
1767
|
-
* hashing — two stores with the same set of files produce byte-identical
|
|
1768
|
-
* MEMORY.md content, keeping the cache prefix reproducible.
|
|
1769
|
-
*/
|
|
844
|
+
/** Sorted by name — same file set must produce byte-identical MEMORY.md for stable prefix hashing. */
|
|
1770
845
|
private regenerateIndex;
|
|
1771
846
|
}
|
|
1772
|
-
/**
|
|
1773
|
-
* Append `MEMORY_GLOBAL` and (optionally) `MEMORY_PROJECT` blocks to
|
|
1774
|
-
* `basePrompt`. Omits a block entirely when its index is absent — an
|
|
1775
|
-
* empty tag would add bytes to the prefix hash without content.
|
|
1776
|
-
* Respects `REASONIX_MEMORY=off` via `memoryEnabled()` from
|
|
1777
|
-
* `project-memory.ts`.
|
|
1778
|
-
*/
|
|
847
|
+
/** Empty index → omit the whole block (otherwise we'd add bytes to the prefix hash for nothing). */
|
|
1779
848
|
declare function applyUserMemory(basePrompt: string, opts?: {
|
|
1780
849
|
homeDir?: string;
|
|
1781
850
|
projectRoot?: string;
|
|
1782
851
|
}): string;
|
|
1783
|
-
/**
|
|
1784
|
-
* Compose every lazy-loaded prefix block in one call: project REASONIX.md,
|
|
1785
|
-
* global REASONIX.md (`#g` destination), user memory indexes (global +
|
|
1786
|
-
* per-project), and the skills index. Drop-in replacement for
|
|
1787
|
-
* `applyProjectMemory` at CLI entry points. Stacking order is stable —
|
|
1788
|
-
* the prefix hash only changes when block *content* changes, not when
|
|
1789
|
-
* this helper is called a second time with the same filesystem state.
|
|
1790
|
-
*/
|
|
1791
852
|
declare function applyMemoryStack(basePrompt: string, rootDir: string): string;
|
|
1792
853
|
|
|
1793
|
-
/**
|
|
1794
|
-
* Built-in filesystem tools for `reasonix code`.
|
|
1795
|
-
*
|
|
1796
|
-
* Why native instead of the official `@modelcontextprotocol/server-filesystem`:
|
|
1797
|
-
* - No subprocess overhead — every call is 50-200 ms cheaper.
|
|
1798
|
-
* - Schema shapes tuned for R1: `edit_file` takes a single
|
|
1799
|
-
* SEARCH/REPLACE string instead of `string="false"`-encoded
|
|
1800
|
-
* JSON arrays, which was the biggest single source of DSML
|
|
1801
|
-
* hallucinations in 0.4.x.
|
|
1802
|
-
* - Sandbox enforcement lives here so Reasonix can reason about
|
|
1803
|
-
* it (tests cover path-traversal, symlink-escape, and the
|
|
1804
|
-
* cwd-outside-root case) rather than trusting an external server.
|
|
1805
|
-
* - No `npx install` / network dependency in `reasonix code`.
|
|
1806
|
-
*
|
|
1807
|
-
* Tool names + argument shapes intentionally mirror the official
|
|
1808
|
-
* filesystem server so R1's muscle memory carries over. The only
|
|
1809
|
-
* intentional divergence is `edit_file`, noted above.
|
|
1810
|
-
*/
|
|
854
|
+
/** Native FS tools — sandbox enforced here, not delegated. `edit_file` takes a single SEARCH/REPLACE string. */
|
|
1811
855
|
|
|
1812
856
|
interface FilesystemToolsOptions {
|
|
1813
857
|
/** Absolute directory the tools may read/write. Paths outside this are refused. */
|
|
1814
858
|
rootDir: string;
|
|
1815
|
-
/**
|
|
1816
|
-
* When `false`, register only read-side tools (read_file, list_directory,
|
|
1817
|
-
* search_files, get_file_info, directory_tree). Useful for read-only
|
|
1818
|
-
* workflows where the model should never mutate the tree. Default: true.
|
|
1819
|
-
*/
|
|
859
|
+
/** false → register only read-side tools. Default true. */
|
|
1820
860
|
allowWriting?: boolean;
|
|
1821
|
-
/**
|
|
1822
|
-
* Cap for a single file read, in bytes. Prevents a stray `read_file`
|
|
1823
|
-
* on a multi-GB blob from OOM'ing Node. 2 MB is enough for any realistic
|
|
1824
|
-
* source file (the biggest single-file TypeScript project checked in to
|
|
1825
|
-
* GitHub is ~500 KB); pass higher when working with data files.
|
|
1826
|
-
*/
|
|
861
|
+
/** Per-read byte cap; floor against OOM on a multi-GB blob. */
|
|
1827
862
|
maxReadBytes?: number;
|
|
1828
|
-
/**
|
|
1829
|
-
* Cap for total bytes returned from search_files / directory_tree /
|
|
1830
|
-
* grep, so the model can't accidentally pull down the whole tree as
|
|
1831
|
-
* one giant string. 256 KB by default.
|
|
1832
|
-
*/
|
|
863
|
+
/** Cap on total bytes from listing/grep tools — bounds tree-as-one-string accidents. */
|
|
1833
864
|
maxListBytes?: number;
|
|
1834
865
|
}
|
|
1835
866
|
declare function registerFilesystemTools(registry: ToolRegistry, opts: FilesystemToolsOptions): ToolRegistry;
|
|
1836
867
|
|
|
1837
|
-
/**
|
|
1838
|
-
* `remember` / `forget` / `recall_memory` — tools that let the model
|
|
1839
|
-
* read and write the user-memory store across sessions.
|
|
1840
|
-
*
|
|
1841
|
-
* Scope rules:
|
|
1842
|
-
* - `global` — always available (no sandbox needed).
|
|
1843
|
-
* - `project` — requires a `projectRoot` on MemoryStore. In chat mode
|
|
1844
|
-
* (no sandbox), the tools still register but a `scope=project` call
|
|
1845
|
-
* returns a structured refusal so the model can try `global` instead.
|
|
1846
|
-
*
|
|
1847
|
-
* Memory changes are written eagerly but NOT re-loaded into the prefix
|
|
1848
|
-
* mid-session (cache invariant). The user notices at `/new` or the next
|
|
1849
|
-
* launch — or they can read fresh content via `recall_memory` which
|
|
1850
|
-
* always hits disk.
|
|
1851
|
-
*/
|
|
868
|
+
/** Writes are eager but the prefix is NOT re-loaded mid-session — keeps prompt-cache stable. */
|
|
1852
869
|
|
|
1853
870
|
interface MemoryToolsOptions {
|
|
1854
871
|
/** Sandbox root for the `project` scope. Omit for chat mode. */
|
|
@@ -1858,50 +875,13 @@ interface MemoryToolsOptions {
|
|
|
1858
875
|
}
|
|
1859
876
|
declare function registerMemoryTools(registry: ToolRegistry, opts?: MemoryToolsOptions): ToolRegistry;
|
|
1860
877
|
|
|
1861
|
-
/**
|
|
1862
|
-
* ask_choice — the primitive for "user needs to pick between alternatives".
|
|
1863
|
-
*
|
|
1864
|
-
* Why it exists: `submit_plan` is for ONE concrete plan the user approves.
|
|
1865
|
-
* Models routinely misused it to present A/B/C option menus, leaving the
|
|
1866
|
-
* user stuck with an approve/refine/cancel picker that had no way to
|
|
1867
|
-
* select a route. `ask_choice` gives branching its own tool so plan
|
|
1868
|
-
* mode stays about one actionable thing at a time.
|
|
1869
|
-
*
|
|
1870
|
-
* Shape mirrors `submit_plan`:
|
|
1871
|
-
* 1. Model calls `ask_choice` with a question and 2–4 options.
|
|
1872
|
-
* 2. The tool throws `ChoiceRequestedError`; the registry serializes
|
|
1873
|
-
* the payload via `toToolResult`.
|
|
1874
|
-
* 3. TUI parses the tagged error, mounts `ChoiceConfirm`, user picks
|
|
1875
|
-
* one option (or types a custom answer via the escape hatch, or
|
|
1876
|
-
* cancels).
|
|
1877
|
-
* 4. A synthetic user message feeds the choice back — "user picked
|
|
1878
|
-
* <id>" or "user answered: <text>" — and the loop resumes.
|
|
1879
|
-
*
|
|
1880
|
-
* Auto-flatten note: the `options` array of objects is exactly the
|
|
1881
|
-
* schema shape that DeepSeek V3/R1 is known to drop. `ToolRegistry`
|
|
1882
|
-
* auto-flattens and re-nests on dispatch (Pillar 3), so we don't need
|
|
1883
|
-
* to hand-flatten here. We still `sanitizeOptions` at runtime because
|
|
1884
|
-
* even with flatten-repair, models occasionally emit empty strings or
|
|
1885
|
-
* miss fields entirely.
|
|
1886
|
-
*/
|
|
878
|
+
/** Branching primitive separate from submit_plan; throws ChoiceRequestedError so the TUI can mount a picker and the model stops. */
|
|
1887
879
|
|
|
1888
|
-
/**
|
|
1889
|
-
* One option in a branching question. `id` is what gets fed back to
|
|
1890
|
-
* the model when the user picks; keep it short and stable (A, B, C,
|
|
1891
|
-
* or option-1 / option-2 / ...). `summary` is optional extra context
|
|
1892
|
-
* the UI shows as a dimmed sub-line under the title.
|
|
1893
|
-
*/
|
|
1894
880
|
interface ChoiceOption {
|
|
1895
881
|
id: string;
|
|
1896
882
|
title: string;
|
|
1897
883
|
summary?: string;
|
|
1898
884
|
}
|
|
1899
|
-
/**
|
|
1900
|
-
* Thrown by `ask_choice`. Carries the branching question plus the
|
|
1901
|
-
* options list out to the TUI via the `toToolResult` protocol. The
|
|
1902
|
-
* error message tells the model to STOP so it doesn't race past the
|
|
1903
|
-
* picker with more tool calls — same pattern as `PlanProposedError`.
|
|
1904
|
-
*/
|
|
1905
885
|
declare class ChoiceRequestedError extends Error {
|
|
1906
886
|
readonly question: string;
|
|
1907
887
|
readonly options: ChoiceOption[];
|
|
@@ -1915,47 +895,17 @@ declare class ChoiceRequestedError extends Error {
|
|
|
1915
895
|
};
|
|
1916
896
|
}
|
|
1917
897
|
interface ChoiceToolOptions {
|
|
1918
|
-
/**
|
|
1919
|
-
* Side-channel preview fired when the model asks. The tool-result
|
|
1920
|
-
* event also carries the payload; this is the earlier hook for
|
|
1921
|
-
* test harnesses or alternative UIs that don't want to parse JSON.
|
|
1922
|
-
*/
|
|
1923
898
|
onChoiceRequested?: (question: string, options: ChoiceOption[]) => void;
|
|
1924
899
|
}
|
|
1925
900
|
declare function registerChoiceTool(registry: ToolRegistry, opts?: ChoiceToolOptions): ToolRegistry;
|
|
1926
901
|
|
|
1927
|
-
/**
|
|
1928
|
-
* Shared types for Plan Mode. Consumed by plan-errors.ts (error
|
|
1929
|
-
* classes carry these as fields) and plan-core.ts (tool registration
|
|
1930
|
-
* validates against them). Kept in a separate module so a consumer
|
|
1931
|
-
* that only wants the types doesn't pull in either the error classes
|
|
1932
|
-
* or the registration machinery.
|
|
1933
|
-
*/
|
|
1934
902
|
type PlanStepRisk = "low" | "med" | "high";
|
|
1935
|
-
/**
|
|
1936
|
-
* Structured step in a submitted plan. Optional — plans can still be
|
|
1937
|
-
* pure markdown. When provided, each step is addressable by `id` so
|
|
1938
|
-
* the model can later mark it complete via `mark_step_complete`.
|
|
1939
|
-
*/
|
|
1940
903
|
interface PlanStep {
|
|
1941
904
|
id: string;
|
|
1942
905
|
title: string;
|
|
1943
906
|
action: string;
|
|
1944
|
-
/**
|
|
1945
|
-
* Optional self-reported risk level. Drives the colored dot gutter
|
|
1946
|
-
* in PlanConfirm / PlanCheckpointConfirm: green (low) / yellow
|
|
1947
|
-
* (med) / red (high). High-risk steps are the ones the user should
|
|
1948
|
-
* actually read before approving — everything else is noise.
|
|
1949
|
-
* Omitted when the model didn't categorize (treated as neutral).
|
|
1950
|
-
*/
|
|
1951
907
|
risk?: PlanStepRisk;
|
|
1952
908
|
}
|
|
1953
|
-
/**
|
|
1954
|
-
* Payload surfaced by `mark_step_complete` via `PlanCheckpointError`.
|
|
1955
|
-
* The TUI parses the tool result JSON, pushes a `✓ step` progress row,
|
|
1956
|
-
* and mounts the checkpoint picker. `kind` is kept on the payload so
|
|
1957
|
-
* consumers that peek at the JSON can dispatch on a stable tag.
|
|
1958
|
-
*/
|
|
1959
909
|
interface StepCompletion {
|
|
1960
910
|
kind: "step_completed";
|
|
1961
911
|
stepId: string;
|
|
@@ -1964,33 +914,13 @@ interface StepCompletion {
|
|
|
1964
914
|
notes?: string;
|
|
1965
915
|
}
|
|
1966
916
|
|
|
1967
|
-
/**
|
|
1968
|
-
* Error classes for Plan Mode tools. Each one implements the
|
|
1969
|
-
* `toToolResult` protocol so `ToolRegistry.dispatch` serializes the
|
|
1970
|
-
* structured payload into the tool-result JSON — the TUI parses that
|
|
1971
|
-
* shape to mount the right picker (approve / checkpoint / revise).
|
|
1972
|
-
*
|
|
1973
|
-
* Types live in plan-types.ts; registration logic in plan-core.ts.
|
|
1974
|
-
* Dependency direction: plan-core → plan-errors → plan-types.
|
|
1975
|
-
*/
|
|
917
|
+
/** Plan-mode errors carry `toToolResult` so dispatch serializes structured payloads the TUI parses to mount pickers. */
|
|
1976
918
|
|
|
1977
|
-
/**
|
|
1978
|
-
* Thrown by `submit_plan` when the model has produced a plan for the
|
|
1979
|
-
* user to approve. Carries the markdown body, optional structured
|
|
1980
|
-
* steps, and an optional one-line summary. The TUI uses all three to
|
|
1981
|
-
* render the PlanConfirm picker.
|
|
1982
|
-
*/
|
|
1983
919
|
declare class PlanProposedError extends Error {
|
|
1984
920
|
readonly plan: string;
|
|
1985
921
|
readonly steps?: PlanStep[];
|
|
1986
922
|
readonly summary?: string;
|
|
1987
923
|
constructor(plan: string, steps?: PlanStep[], summary?: string);
|
|
1988
|
-
/**
|
|
1989
|
-
* Structured tool-result shape. Consumed by the TUI to extract the
|
|
1990
|
-
* plan without regex-scraping the error message. Optional fields
|
|
1991
|
-
* are omitted from the payload when absent so consumers don't see
|
|
1992
|
-
* `undefined` keys in the JSON.
|
|
1993
|
-
*/
|
|
1994
924
|
toToolResult(): {
|
|
1995
925
|
error: string;
|
|
1996
926
|
plan: string;
|
|
@@ -1998,13 +928,6 @@ declare class PlanProposedError extends Error {
|
|
|
1998
928
|
summary?: string;
|
|
1999
929
|
};
|
|
2000
930
|
}
|
|
2001
|
-
/**
|
|
2002
|
-
* Thrown by `mark_step_complete`. The registry serializes the
|
|
2003
|
-
* structured payload via `toToolResult`, the TUI catches the error
|
|
2004
|
-
* tag and pauses the loop until the user decides continue / revise /
|
|
2005
|
-
* stop. The error message tells the model to stop calling tools so
|
|
2006
|
-
* it doesn't race past the picker.
|
|
2007
|
-
*/
|
|
2008
931
|
declare class PlanCheckpointError extends Error {
|
|
2009
932
|
readonly stepId: string;
|
|
2010
933
|
readonly title?: string;
|
|
@@ -2020,19 +943,7 @@ declare class PlanCheckpointError extends Error {
|
|
|
2020
943
|
error: string;
|
|
2021
944
|
} & StepCompletion;
|
|
2022
945
|
}
|
|
2023
|
-
/**
|
|
2024
|
-
* Thrown by `revise_plan`. Carries the proposed remaining-step list,
|
|
2025
|
-
* a one-sentence reason, and an optional updated summary out to the
|
|
2026
|
-
* TUI. Mirrors PlanProposedError / PlanCheckpointError. The picker
|
|
2027
|
-
* shows a diff between the current remaining steps and the proposed
|
|
2028
|
-
* ones; the user accepts (replaces) or rejects (keeps current).
|
|
2029
|
-
*
|
|
2030
|
-
* Why a separate tool from submit_plan: revising is surgical (replace
|
|
2031
|
-
* the tail of an in-flight plan), submitting is a fresh proposal.
|
|
2032
|
-
* Different intent, different UI. Calling submit_plan again mid-
|
|
2033
|
-
* execution would reset the whole plan including done steps, which
|
|
2034
|
-
* is heavier than usually needed.
|
|
2035
|
-
*/
|
|
946
|
+
/** Surgical replace of in-flight plan tail; submit_plan would reset done steps. */
|
|
2036
947
|
declare class PlanRevisionProposedError extends Error {
|
|
2037
948
|
readonly reason: string;
|
|
2038
949
|
readonly remainingSteps: PlanStep[];
|
|
@@ -2046,195 +957,51 @@ declare class PlanRevisionProposedError extends Error {
|
|
|
2046
957
|
};
|
|
2047
958
|
}
|
|
2048
959
|
|
|
2049
|
-
/**
|
|
2050
|
-
* Plan Mode tool registration. Owns `registerPlanTool` — which wires
|
|
2051
|
-
* `submit_plan`, `mark_step_complete`, and `revise_plan` into a
|
|
2052
|
-
* ToolRegistry — plus the arg sanitizers these tools share.
|
|
2053
|
-
*
|
|
2054
|
-
* Structure rationale: the three registrations are parallel in shape
|
|
2055
|
-
* (each throws a structured error the TUI renders as a picker), so
|
|
2056
|
-
* they're broken out into `registerSubmitPlan` / `registerMarkStep` /
|
|
2057
|
-
* `registerRevisePlan` — one per screen of logic rather than one
|
|
2058
|
-
* 230-line `registerPlanTool` body. Tool descriptions live at the top
|
|
2059
|
-
* as named constants so the function bodies stay readable; the strings
|
|
2060
|
-
* themselves are long because they teach the model when to call each
|
|
2061
|
-
* tool, which is load-bearing behavior.
|
|
2062
|
-
*
|
|
2063
|
-
* Dependency direction: plan-core → plan-errors → plan-types.
|
|
2064
|
-
*/
|
|
2065
|
-
|
|
2066
960
|
interface PlanToolOptions {
|
|
2067
|
-
/**
|
|
2068
|
-
* Optional side-channel callback fired when the model submits a plan.
|
|
2069
|
-
* The TUI uses this to preview the plan in real time (the tool-result
|
|
2070
|
-
* event is also emitted; this is just earlier and friendlier to
|
|
2071
|
-
* test harnesses that don't want to parse JSON).
|
|
2072
|
-
*/
|
|
2073
961
|
onPlanSubmitted?: (plan: string, steps?: PlanStep[]) => void;
|
|
2074
|
-
/**
|
|
2075
|
-
* Optional callback fired when the model marks a step complete via
|
|
2076
|
-
* `mark_step_complete`. Analogous to `onPlanSubmitted` — the tool
|
|
2077
|
-
* event carries the same payload, but this firing point is earlier
|
|
2078
|
-
* and avoids JSON parsing for consumers that don't need it.
|
|
2079
|
-
*/
|
|
2080
962
|
onStepCompleted?: (update: StepCompletion) => void;
|
|
2081
|
-
/**
|
|
2082
|
-
* Optional preview callback fired when the model proposes a plan
|
|
2083
|
-
* revision via `revise_plan`. Same earlier-than-event timing as
|
|
2084
|
-
* the other on* hooks.
|
|
2085
|
-
*/
|
|
2086
963
|
onPlanRevisionProposed?: (reason: string, remainingSteps: PlanStep[], summary?: string) => void;
|
|
2087
964
|
}
|
|
2088
965
|
declare function registerPlanTool(registry: ToolRegistry, opts?: PlanToolOptions): ToolRegistry;
|
|
2089
966
|
|
|
2090
|
-
/**
|
|
2091
|
-
* Subagent runtime — isolated child loops for offloading exploration or
|
|
2092
|
-
* self-contained subtasks.
|
|
2093
|
-
*
|
|
2094
|
-
* Two surfaces sit on top of the same `spawnSubagent` core:
|
|
2095
|
-
*
|
|
2096
|
-
* 1. `registerSubagentTool` — exposes a low-level `spawn_subagent`
|
|
2097
|
-
* function-call tool. Library API. NOT registered into the model
|
|
2098
|
-
* tool list by `reasonix code` since 0.4.26 — Skills (with
|
|
2099
|
-
* `runAs: subagent` frontmatter) became the user-facing surface.
|
|
2100
|
-
* Kept exported because library callers and tests still want
|
|
2101
|
-
* direct access to the primitive.
|
|
2102
|
-
*
|
|
2103
|
-
* 2. `run_skill` (in src/tools/skills.ts) — when the resolved skill
|
|
2104
|
-
* has `runAs: subagent`, it calls `spawnSubagent` with the skill
|
|
2105
|
-
* body as the system prompt and the user's `arguments` as the
|
|
2106
|
-
* task. Subagent skills are listed in the pinned Skills index
|
|
2107
|
-
* with a 🧬 marker, which gives the model a clear pattern-match
|
|
2108
|
-
* trigger without forcing it to reason about "is this task big
|
|
2109
|
-
* enough to delegate."
|
|
2110
|
-
*
|
|
2111
|
-
* Why R1 specifically benefits:
|
|
2112
|
-
* - R1 reasoning tokens are expensive AND inflate the parent context.
|
|
2113
|
-
* A subagent runs its own private loop, then surfaces only the
|
|
2114
|
-
* distilled final answer back to the parent — the main session
|
|
2115
|
-
* never sees the reasoning trail.
|
|
2116
|
-
*
|
|
2117
|
-
* Invariants common to both surfaces:
|
|
2118
|
-
* - Serial only — no parallel spawn (MVP).
|
|
2119
|
-
* - Inherits parent's tool registry MINUS `spawn_subagent` itself
|
|
2120
|
-
* (no recursion via the tool API) and MINUS `submit_plan`
|
|
2121
|
-
* (subagents don't propose plans to the user).
|
|
2122
|
-
* - No hooks, no session — runs are ephemeral.
|
|
2123
|
-
* - Lower default `maxToolIters` than the parent (16 vs 64).
|
|
2124
|
-
* - Independent prefix cache (subagent's prefix has its own
|
|
2125
|
-
* fingerprint).
|
|
2126
|
-
* - Parent registry's plan-mode state propagates: subagents can't
|
|
2127
|
-
* escape `/plan`.
|
|
2128
|
-
* - Non-streaming child loop — the parent isn't watching deltas, so
|
|
2129
|
-
* streaming would only add an SSE parser to the critical path.
|
|
2130
|
-
* Cancellation still works via the AbortSignal.
|
|
2131
|
-
*/
|
|
967
|
+
/** Isolated child loop. Inherits parent registry minus spawn_subagent + submit_plan; no hooks; non-streaming. */
|
|
2132
968
|
|
|
2133
|
-
/**
|
|
2134
|
-
* Live event emitted by a running subagent. Surfaced via the optional
|
|
2135
|
-
* `sink` ref the TUI attaches its handler to. Side-channel only — these
|
|
2136
|
-
* events do NOT pass through the parent loop's `LoopEvent` stream
|
|
2137
|
-
* because subagents run inside a tool-dispatch frame, after the parent's
|
|
2138
|
-
* `step()` has already yielded `tool_start` and is awaiting the result.
|
|
2139
|
-
*/
|
|
969
|
+
/** Side-channel — subagents run inside a tool-dispatch frame, can't go through parent's `LoopEvent` stream. */
|
|
2140
970
|
interface SubagentEvent {
|
|
2141
971
|
kind: "start" | "progress" | "end";
|
|
2142
|
-
/** First ~30 chars of the task prompt — used for the TUI status row. */
|
|
2143
972
|
task: string;
|
|
2144
|
-
/** Skill that spawned this subagent, when applicable. Stamped on every event so the TUI/logger can attribute without extra plumbing. */
|
|
2145
973
|
skillName?: string;
|
|
2146
|
-
/** Model id the child loop ran on. Stamped alongside skillName. */
|
|
2147
974
|
model?: string;
|
|
2148
|
-
/** Iteration count inside the child loop (number of tool results so far). */
|
|
2149
975
|
iter?: number;
|
|
2150
|
-
/** Wall-clock ms since the subagent started. */
|
|
2151
976
|
elapsedMs?: number;
|
|
2152
|
-
/** First ~120 chars of the final assistant message. Set on `end`. */
|
|
2153
977
|
summary?: string;
|
|
2154
|
-
/** Error message if the subagent failed. Set on `end`. */
|
|
2155
978
|
error?: string;
|
|
2156
|
-
/** Total turns the subagent took. Set on `end`. */
|
|
2157
979
|
turns?: number;
|
|
2158
|
-
/** Total USD spent inside the child loop. Set on `end`. */
|
|
2159
980
|
costUsd?: number;
|
|
2160
|
-
/** Aggregated child-loop Usage (sum across turns). Set on `end`. */
|
|
2161
981
|
usage?: Usage;
|
|
2162
982
|
}
|
|
2163
|
-
/**
|
|
2164
|
-
* Mutable ref the registration writes through. The TUI sets `.current`
|
|
2165
|
-
* to its own handler on mount; nothing receives events before that
|
|
2166
|
-
* happens (and headless callers leave `.current = null`, which is the
|
|
2167
|
-
* library-mode default — they read the final result from the helper's
|
|
2168
|
-
* return value instead).
|
|
2169
|
-
*/
|
|
2170
983
|
interface SubagentSink {
|
|
2171
984
|
current: ((ev: SubagentEvent) => void) | null;
|
|
2172
985
|
}
|
|
2173
986
|
interface SubagentToolOptions {
|
|
2174
|
-
/** Shared DeepSeek client. */
|
|
2175
987
|
client: DeepSeekClient;
|
|
2176
|
-
/**
|
|
2177
|
-
* Default system prompt used when the model doesn't pass one. Project
|
|
2178
|
-
* memory (REASONIX.md) is appended automatically when `projectRoot` is
|
|
2179
|
-
* set.
|
|
2180
|
-
*/
|
|
2181
988
|
defaultSystem?: string;
|
|
2182
|
-
/** Project root for `applyProjectMemory` lookup. Omit in chat mode. */
|
|
2183
989
|
projectRoot?: string;
|
|
2184
|
-
/** Default model. `deepseek-v4-flash` by default (see DEFAULT_SUBAGENT_MODEL). */
|
|
2185
990
|
defaultModel?: string;
|
|
2186
|
-
/** Iteration ceiling. Lower than the parent (16 by default). */
|
|
2187
991
|
maxToolIters?: number;
|
|
2188
|
-
/** Maximum chars returned in the tool result. */
|
|
2189
992
|
maxResultChars?: number;
|
|
2190
|
-
/** Optional sink the TUI attaches its handler to for live updates. */
|
|
2191
993
|
sink?: SubagentSink;
|
|
2192
994
|
}
|
|
2193
|
-
/**
|
|
2194
|
-
* Register the spawn_subagent tool into the parent registry. Library
|
|
2195
|
-
* surface — `reasonix code` does NOT call this since 0.4.26 (Skills
|
|
2196
|
-
* with `runAs: subagent` are the user-facing surface), but library
|
|
2197
|
-
* consumers who want the low-level tool can opt in.
|
|
2198
|
-
*/
|
|
995
|
+
/** Library surface only — `reasonix code` uses Skills `runAs: subagent` as the user-facing path. */
|
|
2199
996
|
declare function registerSubagentTool(parentRegistry: ToolRegistry, opts: SubagentToolOptions): ToolRegistry;
|
|
2200
|
-
/**
|
|
2201
|
-
* Build a child ToolRegistry that copies every tool from `parent` except
|
|
2202
|
-
* those whose names are in `exclude`. Plan-mode state propagates so a
|
|
2203
|
-
* subagent spawned while the parent is under `/plan` cannot escape it.
|
|
2204
|
-
*
|
|
2205
|
-
* Exported for tests + library callers who want the same fork behavior
|
|
2206
|
-
* for their own nested-loop patterns.
|
|
2207
|
-
*/
|
|
997
|
+
/** Plan-mode state propagates — a subagent spawned under `/plan` MUST NOT escape it. */
|
|
2208
998
|
declare function forkRegistryExcluding(parent: ToolRegistry, exclude: ReadonlySet<string>): ToolRegistry;
|
|
2209
999
|
|
|
2210
|
-
/**
|
|
2211
|
-
* Long-running process registry — the "background run" counterpart to
|
|
2212
|
-
* `run_command`. `run_command` spawns a child, waits for it to exit,
|
|
2213
|
-
* then returns combined output; perfect for tests / builds / one-shots
|
|
2214
|
-
* but useless for `npm run dev` / `python -m http.server` / watchers,
|
|
2215
|
-
* which never exit and just time the tool out.
|
|
2216
|
-
*
|
|
2217
|
-
* JobRegistry lets the model fire-and-almost-forget: we spawn the
|
|
2218
|
-
* child, wait at most `waitSec` (default 3s) OR until output matches
|
|
2219
|
-
* a readiness regex, then return the startup preview plus a job id.
|
|
2220
|
-
* The child keeps running in the background; later tool calls tail
|
|
2221
|
-
* its output, stop it, or list what's still alive.
|
|
2222
|
-
*
|
|
2223
|
-
* Shape-wise this is modeled on Claude Code's `BashOutput` / `KillBash`
|
|
2224
|
-
* pair. We diverge on one point: ready-signal detection is on by default
|
|
2225
|
-
* because dev servers almost universally print "Local:", "listening on",
|
|
2226
|
-
* "ready in N ms", "compiled successfully" when they come up — short-
|
|
2227
|
-
* circuiting the wait on those keeps the model's first tool-result
|
|
2228
|
-
* useful ("server is up at http://localhost:5173") instead of spending
|
|
2229
|
-
* the full 3s on a stabilization timer.
|
|
2230
|
-
*/
|
|
1000
|
+
/** Background process registry for never-exiting commands; ready-signal detection short-circuits the startup wait. */
|
|
2231
1001
|
interface JobStartOptions {
|
|
2232
1002
|
/** Absolute path to cwd for the spawned child. */
|
|
2233
1003
|
cwd: string;
|
|
2234
|
-
/**
|
|
2235
|
-
* Max seconds to wait for the initial burst before returning. Capped
|
|
2236
|
-
* at 30. A ready-signal match short-circuits this. Default 3.
|
|
2237
|
-
*/
|
|
1004
|
+
/** Capped at 30; ready-signal match short-circuits. Default 3. */
|
|
2238
1005
|
waitSec?: number;
|
|
2239
1006
|
/** Signal plumbed through from the calling tool's AbortSignal. */
|
|
2240
1007
|
signal?: AbortSignal;
|
|
@@ -2262,10 +1029,7 @@ interface JobRecord {
|
|
|
2262
1029
|
exitCode: number | null;
|
|
2263
1030
|
/** Combined stdout+stderr, ring-trimmed. */
|
|
2264
1031
|
output: string;
|
|
2265
|
-
/**
|
|
2266
|
-
* Total bytes ever written by the child (not just what's in `output`).
|
|
2267
|
-
* Useful for "how much got dropped" diagnostics.
|
|
2268
|
-
*/
|
|
1032
|
+
/** Counts all bytes the child wrote, not just what's still buffered in `output`. */
|
|
2269
1033
|
totalBytesWritten: number;
|
|
2270
1034
|
/** True iff the child is still alive. */
|
|
2271
1035
|
running: boolean;
|
|
@@ -2275,36 +1039,17 @@ interface JobRecord {
|
|
|
2275
1039
|
declare class JobRegistry {
|
|
2276
1040
|
private readonly jobs;
|
|
2277
1041
|
private nextId;
|
|
2278
|
-
/**
|
|
2279
|
-
* Spawn a background child. Resolves after `waitSec` OR on ready
|
|
2280
|
-
* signal OR on early exit, whichever comes first. The child continues
|
|
2281
|
-
* to run (and buffer output) regardless of which path fires.
|
|
2282
|
-
*/
|
|
1042
|
+
/** Resolves on (a) ready signal, (b) early exit, or (c) waitSec deadline — child keeps running regardless. */
|
|
2283
1043
|
start(command: string, opts: JobStartOptions): Promise<JobStartResult>;
|
|
2284
|
-
/**
|
|
2285
|
-
* Read a job's accumulated output. `since` lets a caller poll
|
|
2286
|
-
* incrementally: pass the byte count returned from the last call to
|
|
2287
|
-
* get only newly-written content. Returns both full output and a
|
|
2288
|
-
* running snapshot so the caller can use whichever.
|
|
2289
|
-
*/
|
|
2290
1044
|
read(id: number, opts?: {
|
|
2291
1045
|
since?: number;
|
|
2292
1046
|
tailLines?: number;
|
|
2293
1047
|
}): JobReadResult | null;
|
|
2294
|
-
/**
|
|
2295
|
-
* Send SIGTERM, wait `graceMs`, then SIGKILL if still alive. Returns
|
|
2296
|
-
* the final job record (or null when the job id is unknown). Safe to
|
|
2297
|
-
* call on an already-exited job — returns the record unchanged.
|
|
2298
|
-
*/
|
|
1048
|
+
/** SIGTERM, wait graceMs, then SIGKILL. Idempotent on already-exited jobs. */
|
|
2299
1049
|
stop(id: number, opts?: {
|
|
2300
1050
|
graceMs?: number;
|
|
2301
1051
|
}): Promise<JobRecord | null>;
|
|
2302
1052
|
list(): JobRecord[];
|
|
2303
|
-
/**
|
|
2304
|
-
* Best-effort kill of every still-running job. Called on TUI shutdown
|
|
2305
|
-
* so dev servers don't outlive the Reasonix process. Resolves after
|
|
2306
|
-
* every child has closed or a hard deadline passes (3s total).
|
|
2307
|
-
*/
|
|
2308
1053
|
shutdown(deadlineMs?: number): Promise<void>;
|
|
2309
1054
|
/** Count of still-running jobs — drives the TUI status-bar indicator. */
|
|
2310
1055
|
runningCount(): number;
|
|
@@ -2320,109 +1065,25 @@ interface JobReadResult {
|
|
|
2320
1065
|
spawnError?: string;
|
|
2321
1066
|
}
|
|
2322
1067
|
|
|
2323
|
-
/**
|
|
2324
|
-
* Native shell tool — lets the model run commands inside the sandbox
|
|
2325
|
-
* root so it can actually verify its own work (run tests, check git
|
|
2326
|
-
* status, inspect a lockfile, etc.). Without this the coding-mode
|
|
2327
|
-
* loop is "write code, hope it works, ask the user to run it" —
|
|
2328
|
-
* defeats the purpose.
|
|
2329
|
-
*
|
|
2330
|
-
* Safety model:
|
|
2331
|
-
* - Commands run with `cwd` pinned to the registered root. No
|
|
2332
|
-
* path traversal via the command itself is enforced (users can
|
|
2333
|
-
* `cat ../outside.txt`); the trust boundary is the directory
|
|
2334
|
-
* you opened Reasonix from.
|
|
2335
|
-
* - Commands are matched against a read-only / testing allowlist.
|
|
2336
|
-
* Allowlisted commands execute immediately and return stdout +
|
|
2337
|
-
* stderr merged. Everything else throws with a clear message —
|
|
2338
|
-
* the UI translates that into an `/apply`-style confirm gate so
|
|
2339
|
-
* the user sees the exact command before it runs.
|
|
2340
|
-
* - Default timeout: 60s. Output cap: matches tool-result budget.
|
|
2341
|
-
* - Every command that DOES run is spawned with `shell: false` and
|
|
2342
|
-
* a tokenized argv — no string-to-shell interpolation, so the
|
|
2343
|
-
* model can't accidentally construct a chained `rm` via quoting.
|
|
2344
|
-
*
|
|
2345
|
-
* This is intentionally narrower than what Claude Code / Aider ship:
|
|
2346
|
-
* we gate more commands behind confirmation by default. Users who
|
|
2347
|
-
* trust the model can widen the allowlist by instantiating their
|
|
2348
|
-
* own tool registry.
|
|
2349
|
-
*/
|
|
1068
|
+
/** cwd pinned to root; non-allowlisted commands throw to a UI confirm gate; spawn is `shell: false`, tokenized argv only. */
|
|
2350
1069
|
|
|
2351
1070
|
interface ShellToolsOptions {
|
|
2352
1071
|
/** Directory to run commands in. Must be an absolute path. */
|
|
2353
1072
|
rootDir: string;
|
|
2354
1073
|
/** Seconds before an individual command is killed. Default: 60. */
|
|
2355
1074
|
timeoutSec?: number;
|
|
2356
|
-
/**
|
|
2357
|
-
* Per-command stdout+stderr cap in characters. Default: 32_000 to
|
|
2358
|
-
* match the tool-result budget.
|
|
2359
|
-
*/
|
|
2360
1075
|
maxOutputChars?: number;
|
|
2361
|
-
/**
|
|
2362
|
-
* Extra command-name prefixes the user explicitly trusts. Added on
|
|
2363
|
-
* top of the built-in allowlist. Examples: `["my-ci-script", "lint"]`.
|
|
2364
|
-
*
|
|
2365
|
-
* Accepts either a fixed array (captured once at registration) or a
|
|
2366
|
-
* getter called on every dispatch. The getter form is load-bearing:
|
|
2367
|
-
* when the TUI's `ShellConfirm` writes a new prefix to config mid-
|
|
2368
|
-
* session, the running `run_command` must pick it up immediately —
|
|
2369
|
-
* otherwise the same command gets re-prompted until the next launch.
|
|
2370
|
-
*/
|
|
1076
|
+
/** Getter form is load-bearing — newly-persisted "always allow" prefixes MUST take effect mid-session. */
|
|
2371
1077
|
extraAllowed?: readonly string[] | (() => readonly string[]);
|
|
2372
|
-
/**
|
|
2373
|
-
* When true, skip the allowlist entirely and auto-run every command.
|
|
2374
|
-
* Off by default — this is an escape hatch for non-interactive use
|
|
2375
|
-
* (CI, benchmarks) where a human can't be in the loop to confirm.
|
|
2376
|
-
*
|
|
2377
|
-
* Accepts either a static boolean (captured once) or a getter called
|
|
2378
|
-
* on every dispatch. The getter form is what `reasonix code` uses to
|
|
2379
|
-
* wire `editMode === "yolo"` into the registry: flipping the mode
|
|
2380
|
-
* mid-session must take effect on the next tool call without forcing
|
|
2381
|
-
* a re-registration. Static `true` is fine for CI / benchmark code.
|
|
2382
|
-
*/
|
|
1078
|
+
/** Getter form lets `editMode === "yolo"` flip mid-session without re-registering tools. */
|
|
2383
1079
|
allowAll?: boolean | (() => boolean);
|
|
2384
|
-
/**
|
|
2385
|
-
* Background-process registry shared between `run_background`,
|
|
2386
|
-
* `job_output`, `stop_job`, `list_jobs`, and the /jobs /kill slashes.
|
|
2387
|
-
* When omitted, the registrar builds its own — but the caller
|
|
2388
|
-
* usually wants to provide one so the TUI can tail it too.
|
|
2389
|
-
*/
|
|
2390
1080
|
jobs?: JobRegistry;
|
|
2391
1081
|
}
|
|
2392
|
-
/**
|
|
2393
|
-
* Tokenize a shell-ish command string into argv. Handles single/double
|
|
2394
|
-
* quoting; rejects unclosed quotes. Does NOT expand env vars, globs,
|
|
2395
|
-
* backticks, or `$(…)` — the goal is to prevent the model from
|
|
2396
|
-
* accidentally (or not) sneaking arbitrary shells past the allowlist
|
|
2397
|
-
* via concatenation. Exported for testing.
|
|
2398
|
-
*/
|
|
1082
|
+
/** No env / glob / backtick / `$(…)` expansion — prevents bypass of allowlist via concatenation. */
|
|
2399
1083
|
declare function tokenizeCommand(cmd: string): string[];
|
|
2400
|
-
/**
|
|
2401
|
-
* Scan `cmd` for a shell operator (`|`, `||`, `>`, `>>`, `<`, `<<`,
|
|
2402
|
-
* `&`, `&&`, `2>`, `2>>`, `2>&1`, `&>`) that appears unquoted at a
|
|
2403
|
-
* token boundary. Returns the operator string, or null if none.
|
|
2404
|
-
*
|
|
2405
|
-
* Why this exists: `run_command` documents "no shell expansion, no
|
|
2406
|
-
* pipes, no redirects" (the tool spawns argv directly, not through a
|
|
2407
|
-
* shell), but when the model writes `dir | findstr foo` the `|`
|
|
2408
|
-
* survives tokenization as a standalone token and gets quoted as the
|
|
2409
|
-
* literal string `"|"` by `quoteForCmdExe` — cmd.exe sees it as an
|
|
2410
|
-
* argument, not an operator, so the pipe silently fails. Detecting
|
|
2411
|
-
* operators up front lets us throw a clear error ("split into separate
|
|
2412
|
-
* calls") instead of letting the command run with surprising results.
|
|
2413
|
-
*
|
|
2414
|
-
* Quoted operators (`grep "a|b"`) and operator characters embedded in
|
|
2415
|
-
* larger tokens (`--flag=1&2`) are NOT flagged — those are literal
|
|
2416
|
-
* argv bytes and are safe to pass through.
|
|
2417
|
-
*
|
|
2418
|
-
* Exported for testing.
|
|
2419
|
-
*/
|
|
1084
|
+
/** Up-front detection — without it, `dir | findstr foo` quotes `|` literal and pipe silently fails. */
|
|
2420
1085
|
declare function detectShellOperator(cmd: string): string | null;
|
|
2421
|
-
/**
|
|
2422
|
-
* Return true when `cmd` matches an allowlisted prefix. Exported for
|
|
2423
|
-
* testing. Match is on the space-normalized leading tokens so
|
|
2424
|
-
* `git status -s ` and `git status` both match `git status`.
|
|
2425
|
-
*/
|
|
1086
|
+
/** Match on space-normalized leading tokens — `git status -s` matches the `git status` prefix. */
|
|
2426
1087
|
declare function isAllowed(cmd: string, extra?: readonly string[]): boolean;
|
|
2427
1088
|
interface RunCommandResult {
|
|
2428
1089
|
exitCode: number | null;
|
|
@@ -2437,95 +1098,28 @@ declare function runCommand(cmd: string, opts: {
|
|
|
2437
1098
|
maxOutputChars?: number;
|
|
2438
1099
|
signal?: AbortSignal;
|
|
2439
1100
|
}): Promise<RunCommandResult>;
|
|
2440
|
-
/**
|
|
2441
|
-
* Test/override hooks for {@link resolveExecutable}. Omitting any field
|
|
2442
|
-
* falls through to the real process globals — the runtime call path
|
|
2443
|
-
* uses defaults; tests inject `platform` + `env` + `isFile` to exercise
|
|
2444
|
-
* Windows-specific lookup from a Linux CI runner without touching
|
|
2445
|
-
* actual fs.
|
|
2446
|
-
*/
|
|
2447
1101
|
interface ResolveExecutableOptions {
|
|
2448
1102
|
platform?: NodeJS.Platform;
|
|
2449
1103
|
env?: {
|
|
2450
1104
|
PATH?: string;
|
|
2451
1105
|
PATHEXT?: string;
|
|
2452
1106
|
};
|
|
2453
|
-
/** Predicate swapped in by tests to avoid creating real files. */
|
|
2454
1107
|
isFile?: (path: string) => boolean;
|
|
2455
|
-
/** Path.join used for the lookup. Defaults to Windows semantics on Windows. */
|
|
2456
1108
|
pathDelimiter?: string;
|
|
2457
1109
|
}
|
|
2458
|
-
/**
|
|
2459
|
-
* Resolve a bare command name (e.g. `npm`) to its on-disk path via
|
|
2460
|
-
* PATH × PATHEXT on Windows. Returns the input unchanged on non-Windows
|
|
2461
|
-
* platforms, when the input is already a path (contains `/`, `\`, or is
|
|
2462
|
-
* absolute), or when no match is found in PATH × PATHEXT (caller gets a
|
|
2463
|
-
* natural ENOENT from spawn, which surfaces cleanly).
|
|
2464
|
-
*
|
|
2465
|
-
* Why this exists: `child_process.spawn` with `shell: false` invokes
|
|
2466
|
-
* Windows `CreateProcess`, which does not honor `PATHEXT` and does not
|
|
2467
|
-
* search for `.cmd` / `.bat` wrappers. Node-ecosystem tools ship as
|
|
2468
|
-
* `npm.cmd`, `npx.cmd`, `yarn.cmd`, etc., so a bare `npm` fails with
|
|
2469
|
-
* ENOENT under `shell: false`. Flipping to `shell: true` would work
|
|
2470
|
-
* but reintroduces shell-expansion (pipes, redirects, chained cmds)
|
|
2471
|
-
* that the tool was explicitly designed to forbid. This resolver
|
|
2472
|
-
* threads the needle.
|
|
2473
|
-
*/
|
|
1110
|
+
/** CreateProcess ignores PATHEXT — bare `npm` fails ENOENT under `shell:false` without this resolver. */
|
|
2474
1111
|
declare function resolveExecutable(cmd: string, opts?: ResolveExecutableOptions): string;
|
|
2475
|
-
/**
|
|
2476
|
-
* Prepare `(bin, args, spawnOpts)` for the runCommand spawn call,
|
|
2477
|
-
* applying Windows-specific workarounds for (a) PATHEXT lookup and
|
|
2478
|
-
* (b) the CVE-2024-27980 prohibition on direct `.cmd`/`.bat` spawns.
|
|
2479
|
-
*
|
|
2480
|
-
* Exported so tests can assert the transformation without booting an
|
|
2481
|
-
* actual child process.
|
|
2482
|
-
*/
|
|
1112
|
+
/** Windows workarounds: PATHEXT lookup + CVE-2024-27980 prohibition on direct `.cmd`/`.bat` spawn. */
|
|
2483
1113
|
declare function prepareSpawn(argv: readonly string[], opts?: ResolveExecutableOptions): {
|
|
2484
1114
|
bin: string;
|
|
2485
1115
|
args: string[];
|
|
2486
1116
|
spawnOverrides: SpawnOptions;
|
|
2487
1117
|
};
|
|
2488
|
-
/**
|
|
2489
|
-
* Locate `-Command` / `-c` in `args` and prepend the UTF-8 setup prelude
|
|
2490
|
-
* to its value. Returns the patched args, or `null` when no `-Command`
|
|
2491
|
-
* arg is present (in which case we leave the invocation untouched —
|
|
2492
|
-
* inline-expression and script-file modes have their own conventions
|
|
2493
|
-
* we don't want to silently rewrite).
|
|
2494
|
-
*
|
|
2495
|
-
* Why not always wrap: PowerShell's quoting semantics are finicky enough
|
|
2496
|
-
* that adding a prelude to a script file invocation could break it.
|
|
2497
|
-
* `-Command` is the case the model actually uses, and where mojibake
|
|
2498
|
-
* matters; targeting just it keeps the blast radius small.
|
|
2499
|
-
*
|
|
2500
|
-
* Exported for tests.
|
|
2501
|
-
*/
|
|
1118
|
+
/** Targets `-Command` only — PowerShell quoting is finicky enough that wrapping script-file mode could break it. */
|
|
2502
1119
|
declare function injectPowerShellUtf8(args: readonly string[]): string[] | null;
|
|
2503
|
-
/**
|
|
2504
|
-
* Prefix a cmd.exe command line with `chcp 65001 >nul &` so output
|
|
2505
|
-
* (from cmd.exe and any child it spawns) is UTF-8-encoded. Without
|
|
2506
|
-
* this, on Chinese / Japanese / Korean Windows, `dir`, `findstr`,
|
|
2507
|
-
* `where`, etc. emit text in the system codepage (CP936, CP932,
|
|
2508
|
-
* CP949, …) and `chunk.toString()` — which decodes as UTF-8 — produces
|
|
2509
|
-
* garbled mojibake the model then sees as poisoned input on the next
|
|
2510
|
-
* turn.
|
|
2511
|
-
*
|
|
2512
|
-
* Scope: chcp affects ONLY this cmd.exe instance, which exits after
|
|
2513
|
-
* `/c`. No global console state changes. Single `&` (not `&&`) so the
|
|
2514
|
-
* command still runs even on the rare Windows builds where chcp
|
|
2515
|
-
* itself returns a non-zero exit (Win7 quirks; harmless on Win10+).
|
|
2516
|
-
*
|
|
2517
|
-
* Exported so tests can verify the wrapping shape.
|
|
2518
|
-
*/
|
|
1120
|
+
/** Single `&` (not `&&`) so the command still runs on Win7 where chcp can return non-zero. */
|
|
2519
1121
|
declare function withUtf8Codepage(cmdline: string): string;
|
|
2520
|
-
/**
|
|
2521
|
-
* Quote an argument so cmd.exe parses it back as a single token. We
|
|
2522
|
-
* always wrap in double quotes when the arg contains whitespace or
|
|
2523
|
-
* any cmd.exe metacharacter, doubling embedded quotes per cmd.exe's
|
|
2524
|
-
* `""` escape rule. Bare alphanumeric args pass through unquoted for
|
|
2525
|
-
* readability in logs.
|
|
2526
|
-
*
|
|
2527
|
-
* Exported for test coverage of the quoting semantics.
|
|
2528
|
-
*/
|
|
1122
|
+
/** Doubles embedded quotes per cmd.exe's `""` escape rule; bare alnum passes through unquoted. */
|
|
2529
1123
|
declare function quoteForCmdExe(arg: string): string;
|
|
2530
1124
|
/** Error thrown by `run_command` when the command isn't allowlisted. */
|
|
2531
1125
|
declare class NeedsConfirmationError extends Error {
|
|
@@ -2535,22 +1129,7 @@ declare class NeedsConfirmationError extends Error {
|
|
|
2535
1129
|
declare function registerShellTools(registry: ToolRegistry, opts: ShellToolsOptions): ToolRegistry;
|
|
2536
1130
|
declare function formatCommandResult(cmd: string, r: RunCommandResult): string;
|
|
2537
1131
|
|
|
2538
|
-
/**
|
|
2539
|
-
* Built-in web search + fetch tools.
|
|
2540
|
-
*
|
|
2541
|
-
* - `web_search(query, topK?)` — Mojeek's public search page. No API
|
|
2542
|
-
* key, no signup. We originally shipped this backed by DuckDuckGo's
|
|
2543
|
-
* HTML endpoint, but DDG started serving anti-bot interstitials
|
|
2544
|
-
* (HTTP 202 with a challenge page) for every unauthenticated POST.
|
|
2545
|
-
* Mojeek runs its own independent index, is bot-friendly, and
|
|
2546
|
-
* returns parseable HTML.
|
|
2547
|
-
* - `web_fetch(url)` — HTTP GET + naïve HTML-to-text extraction.
|
|
2548
|
-
*
|
|
2549
|
-
* Both are registered by default on `reasonix chat` / `reasonix code`;
|
|
2550
|
-
* set `search: false` in config (or `REASONIX_SEARCH=off`) to turn
|
|
2551
|
-
* them off. The model decides when to call them based on the query —
|
|
2552
|
-
* no slash command required.
|
|
2553
|
-
*/
|
|
1132
|
+
/** web_search uses Mojeek (DDG returns anti-bot 202 to unauthenticated POSTs); web_fetch sniffs HTML to text. */
|
|
2554
1133
|
|
|
2555
1134
|
interface SearchResult {
|
|
2556
1135
|
title: string;
|
|
@@ -2575,44 +1154,11 @@ interface WebSearchOptions {
|
|
|
2575
1154
|
topK?: number;
|
|
2576
1155
|
signal?: AbortSignal;
|
|
2577
1156
|
}
|
|
2578
|
-
/**
|
|
2579
|
-
* Search the public web via Mojeek. Returns up to `topK` ranked
|
|
2580
|
-
* results with title, url, snippet.
|
|
2581
|
-
*
|
|
2582
|
-
* Mojeek is an independent index (not a Google/Bing front-end) which
|
|
2583
|
-
* means coverage on niche or very recent topics can be thinner, but
|
|
2584
|
-
* it's reliable from scripts and doesn't gate on cookies or sessions.
|
|
2585
|
-
* If the response has 0 results we distinguish "truly empty" from
|
|
2586
|
-
* "layout changed or blocked" so the caller isn't left guessing.
|
|
2587
|
-
*/
|
|
1157
|
+
/** Distinguishes "truly 0 results" from "layout changed / blocked" so callers can tell. */
|
|
2588
1158
|
declare function webSearch(query: string, opts?: WebSearchOptions): Promise<SearchResult[]>;
|
|
2589
|
-
/**
|
|
2590
|
-
* Extract results from a Mojeek search page.
|
|
2591
|
-
*
|
|
2592
|
-
* Mojeek's stable shape (as of April 2026):
|
|
2593
|
-
* <a … class="ob" href="URL"> … breadcrumb … </a>
|
|
2594
|
-
* <h2><a class="title" href="URL">Title</a></h2>
|
|
2595
|
-
* <p class="s">snippet text …</p>
|
|
2596
|
-
*
|
|
2597
|
-
* We do two tolerant passes — title anchors, then snippet paragraphs —
|
|
2598
|
-
* and pair them positionally. Attribute order inside a tag varies
|
|
2599
|
-
* between versions, so each pass captures the whole element and we
|
|
2600
|
-
* re-extract href / inner text with a second regex. Exported for
|
|
2601
|
-
* unit testing against a fixture.
|
|
2602
|
-
*/
|
|
1159
|
+
/** Title-anchor + snippet-paragraph passes paired positionally — robust to attribute reorder. */
|
|
2603
1160
|
declare function parseMojeekResults(html: string): SearchResult[];
|
|
2604
|
-
/**
|
|
2605
|
-
* Download a URL, strip HTML down to readable text, return it. Times
|
|
2606
|
-
* out at 15s, caps extracted text at 32k chars to fit the tool-result
|
|
2607
|
-
* budget.
|
|
2608
|
-
*/
|
|
2609
1161
|
declare function webFetch(url: string, opts?: WebFetchOptions): Promise<PageContent>;
|
|
2610
|
-
/**
|
|
2611
|
-
* Strip HTML to readable text. Removes scripts/styles/nav/footer/aside
|
|
2612
|
-
* blocks first, then tags, then collapses whitespace. Not a Readability
|
|
2613
|
-
* clone — purpose-built to keep the extracted text small enough for the
|
|
2614
|
-
* tool-result budget while preserving paragraph breaks.
|
|
2615
|
-
*/
|
|
2616
1162
|
declare function htmlToText(html: string): string;
|
|
2617
1163
|
interface WebToolsOptions {
|
|
2618
1164
|
/** Default top-K for `web_search` when the model doesn't specify. */
|
|
@@ -2620,30 +1166,10 @@ interface WebToolsOptions {
|
|
|
2620
1166
|
/** Byte cap for `web_fetch` extracted text. */
|
|
2621
1167
|
maxFetchChars?: number;
|
|
2622
1168
|
}
|
|
2623
|
-
/**
|
|
2624
|
-
* Register `web_search` + `web_fetch` on a ToolRegistry. The model
|
|
2625
|
-
* invokes them automatically when a question needs current info —
|
|
2626
|
-
* no slash command from the user is required.
|
|
2627
|
-
*/
|
|
2628
1169
|
declare function registerWebTools(registry: ToolRegistry, opts?: WebToolsOptions): ToolRegistry;
|
|
2629
1170
|
declare function formatSearchResults(query: string, results: SearchResult[]): string;
|
|
2630
1171
|
|
|
2631
|
-
/**
|
|
2632
|
-
* Session persistence.
|
|
2633
|
-
*
|
|
2634
|
-
* Every turn's log entries (user / assistant / tool messages) are appended to
|
|
2635
|
-
* a JSONL file under `~/.reasonix/sessions/<name>.jsonl`. Next time the user
|
|
2636
|
-
* starts the CLI with the same session name, the loop pre-loads the file
|
|
2637
|
-
* into its AppendOnlyLog so the new turn has full prior context.
|
|
2638
|
-
*
|
|
2639
|
-
* Design notes:
|
|
2640
|
-
* - JSONL rather than JSON so concurrent writes don't corrupt.
|
|
2641
|
-
* - 0600 permissions on Unix (chmod no-ops on Windows).
|
|
2642
|
-
* - Name sanitization keeps paths safe: only [\w-] and CJK letters pass;
|
|
2643
|
-
* anything else is replaced with underscore, max 64 chars.
|
|
2644
|
-
* - The loop's stats/session aren't persisted — only the message log.
|
|
2645
|
-
* Cost accounting resets each run (by design — old costs are sunk).
|
|
2646
|
-
*/
|
|
1172
|
+
/** JSONL append-only message log under `~/.reasonix/sessions/`; concurrent-write safe. */
|
|
2647
1173
|
|
|
2648
1174
|
interface SessionInfo {
|
|
2649
1175
|
name: string;
|
|
@@ -2660,32 +1186,9 @@ declare function appendSessionMessage(name: string, message: ChatMessage): void;
|
|
|
2660
1186
|
declare function listSessions(): SessionInfo[];
|
|
2661
1187
|
declare function deleteSession(name: string): boolean;
|
|
2662
1188
|
|
|
2663
|
-
/**
|
|
2664
|
-
* Minimal `.env` loader; no dependency on dotenv.
|
|
2665
|
-
*
|
|
2666
|
-
* Reads KEY=VALUE lines and populates `process.env` for keys not already set.
|
|
2667
|
-
* Silently no-ops if the file is missing. Safe to call from library entry
|
|
2668
|
-
* points, CLI commands, examples, and benchmark runners.
|
|
2669
|
-
*/
|
|
2670
1189
|
declare function loadDotenv(path?: string): void;
|
|
2671
1190
|
|
|
2672
|
-
/**
|
|
2673
|
-
* Transcript format — the canonical "audit log" of a Reasonix session.
|
|
2674
|
-
*
|
|
2675
|
-
* Design split:
|
|
2676
|
-
* - Session file (`~/.reasonix/sessions/<name>.jsonl`) stores only the
|
|
2677
|
-
* `ChatMessage`s the model needs to resume. See session.ts.
|
|
2678
|
-
* - Transcript file (this module) stores every LoopEvent with usage, cost,
|
|
2679
|
-
* model, and prefix fingerprint attached where available — enough for
|
|
2680
|
-
* replay and diff to reconstruct economics.
|
|
2681
|
-
*
|
|
2682
|
-
* The two are different contracts: sessions are the user's *memory*;
|
|
2683
|
-
* transcripts are the *receipts*. Don't conflate them.
|
|
2684
|
-
*
|
|
2685
|
-
* Backward compatibility: all fields beyond {ts, turn, role, content} are
|
|
2686
|
-
* optional on read. A v0.1 transcript (pre-usage) still parses and renders
|
|
2687
|
-
* — it just shows cost/cache as n/a.
|
|
2688
|
-
*/
|
|
1191
|
+
/** Transcripts are receipts (cost/usage/prefix); sessions are memory (ChatMessages). Don't conflate. */
|
|
2689
1192
|
|
|
2690
1193
|
interface TranscriptRecord {
|
|
2691
1194
|
/** ISO-8601 timestamp at emit time. */
|
|
@@ -2706,28 +1209,14 @@ interface TranscriptRecord {
|
|
|
2706
1209
|
cost?: number;
|
|
2707
1210
|
/** Model id that produced this turn. */
|
|
2708
1211
|
model?: string;
|
|
2709
|
-
/**
|
|
2710
|
-
* The ImmutablePrefix fingerprint at this turn. Lets diff prove two runs
|
|
2711
|
-
* share a prefix — i.e. any cache-hit delta is attributable to log
|
|
2712
|
-
* stability, not to a different system prompt.
|
|
2713
|
-
*/
|
|
1212
|
+
/** Lets diff attribute cache-hit delta to log stability vs prompt change. */
|
|
2714
1213
|
prefixHash?: string;
|
|
2715
|
-
/**
|
|
2716
|
-
* Structured plan state extracted by the Pillar 2 harvester. Present on
|
|
2717
|
-
* assistant_final records when harvest was enabled and produced non-empty
|
|
2718
|
-
* state. Omitted entirely when harvest is off or produced nothing —
|
|
2719
|
-
* absence means "no data", not "empty plan".
|
|
2720
|
-
*/
|
|
1214
|
+
/** Absent means "no data", not "empty plan". */
|
|
2721
1215
|
planState?: TypedPlanState;
|
|
2722
1216
|
/** Optional error message (role === "error"). */
|
|
2723
1217
|
error?: string;
|
|
2724
1218
|
}
|
|
2725
1219
|
interface TranscriptMeta {
|
|
2726
|
-
/**
|
|
2727
|
-
* Optional metadata written as the first line of a transcript. Lets
|
|
2728
|
-
* downstream tooling know what it's reading without guessing.
|
|
2729
|
-
* Recognized by a special role "_meta".
|
|
2730
|
-
*/
|
|
2731
1220
|
version: 1;
|
|
2732
1221
|
source: string;
|
|
2733
1222
|
model?: string;
|
|
@@ -2740,11 +1229,6 @@ interface ReadTranscriptResult {
|
|
|
2740
1229
|
meta: TranscriptMeta | null;
|
|
2741
1230
|
records: TranscriptRecord[];
|
|
2742
1231
|
}
|
|
2743
|
-
/**
|
|
2744
|
-
* Build a TranscriptRecord from a LoopEvent. Extra fields (model,
|
|
2745
|
-
* prefixHash) that the LoopEvent doesn't carry are passed in separately
|
|
2746
|
-
* because they're session-level, not event-level.
|
|
2747
|
-
*/
|
|
2748
1232
|
declare function recordFromLoopEvent(ev: LoopEvent, extra: {
|
|
2749
1233
|
model: string;
|
|
2750
1234
|
prefixHash: string;
|
|
@@ -2761,30 +1245,11 @@ declare function writeMeta(stream: WriteStream, meta: TranscriptMeta): void;
|
|
|
2761
1245
|
* Convenience: open a stream, write meta, return stream.
|
|
2762
1246
|
*/
|
|
2763
1247
|
declare function openTranscriptFile(path: string, meta: TranscriptMeta): WriteStream;
|
|
2764
|
-
/**
|
|
2765
|
-
* Parse a transcript file. Returns meta (if the first line is a _meta record)
|
|
2766
|
-
* and the full record list.
|
|
2767
|
-
*
|
|
2768
|
-
* Robustness contract:
|
|
2769
|
-
* - Empty lines are skipped.
|
|
2770
|
-
* - Malformed JSON lines are skipped silently (do not crash on partial
|
|
2771
|
-
* files — live chats may be mid-write).
|
|
2772
|
-
* - Records missing optional fields still parse — they're just rendered
|
|
2773
|
-
* with n/a where the optional value would go.
|
|
2774
|
-
*/
|
|
1248
|
+
/** Tolerant: empty / malformed lines skipped, missing optionals OK — live chats may be mid-write. */
|
|
2775
1249
|
declare function readTranscript(path: string): ReadTranscriptResult;
|
|
2776
1250
|
declare function parseTranscript(raw: string): ReadTranscriptResult;
|
|
2777
1251
|
|
|
2778
|
-
/**
|
|
2779
|
-
* Replay — reconstruct session economics from a transcript file.
|
|
2780
|
-
*
|
|
2781
|
-
* Given a transcript written by App.tsx or the bench runner, rebuild a
|
|
2782
|
-
* SessionSummary-compatible aggregate (turn count, total cost, cache-hit
|
|
2783
|
-
* ratio, vs-Claude estimate) without replaying the LLM calls.
|
|
2784
|
-
*
|
|
2785
|
-
* The whole point is offline auditing: a reader should be able to reproduce
|
|
2786
|
-
* the headline numbers from a transcript alone, without an API key.
|
|
2787
|
-
*/
|
|
1252
|
+
/** Reconstruct session economics from a transcript alone — offline audit, no API key. */
|
|
2788
1253
|
|
|
2789
1254
|
interface ReplayStats extends SessionSummary {
|
|
2790
1255
|
/** Per-turn stats, in turn order. Only assistant_final records contribute. */
|
|
@@ -2804,35 +1269,13 @@ interface ReplayStats extends SessionSummary {
|
|
|
2804
1269
|
/** Sum of subgoals across all harvested turns. */
|
|
2805
1270
|
totalSubgoals: number;
|
|
2806
1271
|
}
|
|
2807
|
-
/**
|
|
2808
|
-
* Parse a transcript file and compute replay stats. Throws only on I/O
|
|
2809
|
-
* errors; malformed lines inside the file are skipped silently.
|
|
2810
|
-
*/
|
|
2811
1272
|
declare function replayFromFile(path: string): {
|
|
2812
1273
|
parsed: ReadTranscriptResult;
|
|
2813
1274
|
stats: ReplayStats;
|
|
2814
1275
|
};
|
|
2815
1276
|
declare function computeReplayStats(records: TranscriptRecord[]): ReplayStats;
|
|
2816
1277
|
|
|
2817
|
-
/**
|
|
2818
|
-
* Diff — compare two transcripts and produce a summary + divergence report.
|
|
2819
|
-
*
|
|
2820
|
-
* Two transcripts are "comparable" when they stem from the same task (or
|
|
2821
|
-
* the same user prompt). Alignment is by turn number: assistant_final #N
|
|
2822
|
-
* in A pairs with assistant_final #N in B. If one side ran more turns, the
|
|
2823
|
-
* extras are labeled "only in A" / "only in B".
|
|
2824
|
-
*
|
|
2825
|
-
* What we compute:
|
|
2826
|
-
* - Aggregate deltas: turns, tool calls, cache hit, cost, token counts
|
|
2827
|
-
* - First divergence: the lowest turn where A and B's tool calls or
|
|
2828
|
-
* assistant text differ meaningfully
|
|
2829
|
-
* - Prefix-stability story: how many unique prefix hashes each side used
|
|
2830
|
-
*
|
|
2831
|
-
* Non-goals (deliberately):
|
|
2832
|
-
* - LLM-judge quality comparison
|
|
2833
|
-
* - Per-token delta rendering — not useful at the fidelity we're at
|
|
2834
|
-
* - Embedding similarity — Levenshtein ratio is cheap and good enough
|
|
2835
|
-
*/
|
|
1278
|
+
/** Transcript diff — pairs assistant_final by turn number; unmatched extras become only_in_a / only_in_b. */
|
|
2836
1279
|
|
|
2837
1280
|
interface DiffSide {
|
|
2838
1281
|
label: string;
|
|
@@ -2846,13 +1289,6 @@ interface TurnPair {
|
|
|
2846
1289
|
bAssistant?: TranscriptRecord;
|
|
2847
1290
|
aTools: TranscriptRecord[];
|
|
2848
1291
|
bTools: TranscriptRecord[];
|
|
2849
|
-
/**
|
|
2850
|
-
* Classification of the pair:
|
|
2851
|
-
* "match" — both sides present, text & tool calls within threshold
|
|
2852
|
-
* "diverge" — both sides present, but text or tool calls differ
|
|
2853
|
-
* "only_in_a" — assistant_final in A but not B
|
|
2854
|
-
* "only_in_b" — assistant_final in B but not A
|
|
2855
|
-
*/
|
|
2856
1292
|
kind: "match" | "diverge" | "only_in_a" | "only_in_b";
|
|
2857
1293
|
/** When kind === "diverge", a short one-liner pointing at what differs. */
|
|
2858
1294
|
divergenceNote?: string;
|
|
@@ -2870,11 +1306,7 @@ declare function diffTranscripts(a: {
|
|
|
2870
1306
|
label: string;
|
|
2871
1307
|
parsed: ReadTranscriptResult;
|
|
2872
1308
|
}): DiffReport;
|
|
2873
|
-
/**
|
|
2874
|
-
* Normalized Levenshtein similarity ratio in [0, 1]. 1 = identical.
|
|
2875
|
-
* Early-exits for long strings (> 2000 chars) with a cheap token-overlap
|
|
2876
|
-
* estimate to keep diff fast on chatty transcripts.
|
|
2877
|
-
*/
|
|
1309
|
+
/** Falls back to token-overlap above 2000 chars to keep diff fast on chatty transcripts. */
|
|
2878
1310
|
declare function similarity(a: string, b: string): number;
|
|
2879
1311
|
interface RenderOptions {
|
|
2880
1312
|
/** Monochrome output (for file redirection or piping). Defaults to true. */
|
|
@@ -2883,26 +1315,7 @@ interface RenderOptions {
|
|
|
2883
1315
|
declare function renderSummaryTable(report: DiffReport, _opts?: RenderOptions): string;
|
|
2884
1316
|
declare function renderMarkdown(report: DiffReport): string;
|
|
2885
1317
|
|
|
2886
|
-
/**
|
|
2887
|
-
* MCP (Model Context Protocol) type definitions.
|
|
2888
|
-
*
|
|
2889
|
-
* Hand-rolled rather than importing @modelcontextprotocol/sdk because:
|
|
2890
|
-
* - Reasonix's value-add isn't reimplementing the protocol, but *caching*
|
|
2891
|
-
* it. Owning the types lets us tune them for our integration (strip
|
|
2892
|
-
* fields we don't use, add the ones we do like Reasonix's prefixHash).
|
|
2893
|
-
* - Zero dependencies — consistent with how we wrote the DeepSeek client.
|
|
2894
|
-
* - If Anthropic bumps the SDK and introduces a breaking change, we're
|
|
2895
|
-
* insulated as long as we keep up with the spec itself.
|
|
2896
|
-
*
|
|
2897
|
-
* Spec reference: https://spec.modelcontextprotocol.io/ (2024-11-05 draft
|
|
2898
|
-
* at time of writing). Reasonix models the subset it consumes: tools
|
|
2899
|
-
* list/call, resources list/read, prompts list/get, plus the init
|
|
2900
|
-
* handshake. Sampling and progress notifications remain deferred.
|
|
2901
|
-
*
|
|
2902
|
-
* Transport note: the wire format for stdio MCP is **newline-delimited
|
|
2903
|
-
* JSON** (NDJSON), not the LSP-style Content-Length header framing that
|
|
2904
|
-
* some readers might expect. One JSON-RPC message per line.
|
|
2905
|
-
*/
|
|
1318
|
+
/** MCP types (spec 2024-11-05). Stdio wire format is NDJSON — one JSON-RPC message per line, no Content-Length framing. */
|
|
2906
1319
|
type JsonRpcId = string | number;
|
|
2907
1320
|
interface JsonRpcRequest<P = unknown> {
|
|
2908
1321
|
jsonrpc: "2.0";
|
|
@@ -2968,13 +1381,6 @@ interface ListToolsResult {
|
|
|
2968
1381
|
tools: McpTool[];
|
|
2969
1382
|
nextCursor?: string;
|
|
2970
1383
|
}
|
|
2971
|
-
/**
|
|
2972
|
-
* Server → client notification emitted during a long-running request
|
|
2973
|
-
* that the client subscribed to via `_meta.progressToken`. `progress`
|
|
2974
|
-
* and `total` are typically matched units (files scanned, bytes
|
|
2975
|
-
* processed, etc.); `total` may be missing when the server can't
|
|
2976
|
-
* estimate the upper bound up front.
|
|
2977
|
-
*/
|
|
2978
1384
|
interface ProgressNotificationParams {
|
|
2979
1385
|
progressToken: string | number;
|
|
2980
1386
|
progress: number;
|
|
@@ -3004,11 +1410,6 @@ interface CallToolResult {
|
|
|
3004
1410
|
/** True = tool raised an error; the content describes it. */
|
|
3005
1411
|
isError?: boolean;
|
|
3006
1412
|
}
|
|
3007
|
-
/**
|
|
3008
|
-
* A resource the server can expose — think "file the model can read."
|
|
3009
|
-
* The URI is opaque to the client: servers may use `file://`, custom
|
|
3010
|
-
* schemes, or bare strings. Reasonix doesn't interpret them.
|
|
3011
|
-
*/
|
|
3012
1413
|
interface McpResource {
|
|
3013
1414
|
uri: string;
|
|
3014
1415
|
name: string;
|
|
@@ -3020,12 +1421,7 @@ interface ListResourcesResult {
|
|
|
3020
1421
|
resources: McpResource[];
|
|
3021
1422
|
nextCursor?: string;
|
|
3022
1423
|
}
|
|
3023
|
-
/**
|
|
3024
|
-
* One resource can return multiple content blobs (e.g. the file + a
|
|
3025
|
-
* side-car). `text` is the common case for UTF-8 content; `blob` is
|
|
3026
|
-
* base64-encoded bytes for binary content. Servers populate exactly
|
|
3027
|
-
* one of the two for each entry.
|
|
3028
|
-
*/
|
|
1424
|
+
/** Server populates exactly one of `text` (UTF-8) or `blob` (base64) per entry. */
|
|
3029
1425
|
interface McpResourceContentsText {
|
|
3030
1426
|
uri: string;
|
|
3031
1427
|
mimeType?: string;
|
|
@@ -3040,10 +1436,6 @@ type McpResourceContents = McpResourceContentsText | McpResourceContentsBlob;
|
|
|
3040
1436
|
interface ReadResourceResult {
|
|
3041
1437
|
contents: McpResourceContents[];
|
|
3042
1438
|
}
|
|
3043
|
-
/**
|
|
3044
|
-
* A parameterizable prompt template the server exposes. Clients fetch
|
|
3045
|
-
* it with `prompts/get` and pass the result to the model as-is.
|
|
3046
|
-
*/
|
|
3047
1439
|
interface McpPromptArgument {
|
|
3048
1440
|
name: string;
|
|
3049
1441
|
description?: string;
|
|
@@ -3058,12 +1450,6 @@ interface ListPromptsResult {
|
|
|
3058
1450
|
prompts: McpPrompt[];
|
|
3059
1451
|
nextCursor?: string;
|
|
3060
1452
|
}
|
|
3061
|
-
/**
|
|
3062
|
-
* MCP prompt messages are modeled after chat completions: role + content.
|
|
3063
|
-
* Content can be a text block OR (per the spec) a resource/image block;
|
|
3064
|
-
* Reasonix cares about text in v1, but surfaces the raw array so callers
|
|
3065
|
-
* can render other kinds if they need to.
|
|
3066
|
-
*/
|
|
3067
1453
|
interface McpPromptMessage {
|
|
3068
1454
|
role: "user" | "assistant";
|
|
3069
1455
|
content: McpContentBlock | McpPromptResourceBlock;
|
|
@@ -3081,23 +1467,8 @@ declare const MCP_PROTOCOL_VERSION = "2024-11-05";
|
|
|
3081
1467
|
/** Type guard — success vs error response. */
|
|
3082
1468
|
declare function isJsonRpcError(msg: JsonRpcResponse): msg is JsonRpcError;
|
|
3083
1469
|
|
|
3084
|
-
/**
|
|
3085
|
-
* Stdio transport for MCP.
|
|
3086
|
-
*
|
|
3087
|
-
* MCP's stdio wire format is **newline-delimited JSON** (one JSON-RPC
|
|
3088
|
-
* message per line). We spawn the server as a child process, write
|
|
3089
|
-
* frames to its stdin, parse its stdout line-by-line as they arrive.
|
|
3090
|
-
*
|
|
3091
|
-
* Transport is abstracted behind an interface so unit tests can fake it
|
|
3092
|
-
* with an in-process duplex pair — spawning real servers in unit tests
|
|
3093
|
-
* is flaky and slow.
|
|
3094
|
-
*/
|
|
1470
|
+
/** MCP stdio = newline-delimited JSON-RPC; transport iface lets tests fake it without spawning. */
|
|
3095
1471
|
|
|
3096
|
-
/**
|
|
3097
|
-
* A transport sends JSON-RPC messages upstream and surfaces messages
|
|
3098
|
-
* arriving downstream via an async iterator. One instance per server
|
|
3099
|
-
* connection.
|
|
3100
|
-
*/
|
|
3101
1472
|
interface McpTransport {
|
|
3102
1473
|
/** Send one JSON-RPC message. Resolves when the bytes are accepted. */
|
|
3103
1474
|
send(message: JsonRpcMessage): Promise<void>;
|
|
@@ -3116,19 +1487,9 @@ interface StdioTransportOptions {
|
|
|
3116
1487
|
replaceEnv?: boolean;
|
|
3117
1488
|
/** CWD for the child. Default: process.cwd(). */
|
|
3118
1489
|
cwd?: string;
|
|
3119
|
-
/**
|
|
3120
|
-
* Spawn through a shell. Default: true on win32 (needed to resolve
|
|
3121
|
-
* `.cmd` wrappers like `npx.cmd`, `pnpm.cmd`), false elsewhere.
|
|
3122
|
-
* Explicitly pass `false` to opt out on Windows; pass `true` to force
|
|
3123
|
-
* it on POSIX (rarely needed).
|
|
3124
|
-
*/
|
|
1490
|
+
/** Default true on win32 to resolve `.cmd`/`.bat` wrappers (npx.cmd etc.). */
|
|
3125
1491
|
shell?: boolean;
|
|
3126
1492
|
}
|
|
3127
|
-
/**
|
|
3128
|
-
* Spawn `command args...` as a child process and use its stdin/stdout as
|
|
3129
|
-
* an MCP transport. Stderr is forwarded to the parent's stderr so server
|
|
3130
|
-
* diagnostics are still visible.
|
|
3131
|
-
*/
|
|
3132
1493
|
declare class StdioTransport implements McpTransport {
|
|
3133
1494
|
private readonly child;
|
|
3134
1495
|
private readonly queue;
|
|
@@ -3145,12 +1506,6 @@ declare class StdioTransport implements McpTransport {
|
|
|
3145
1506
|
private push;
|
|
3146
1507
|
}
|
|
3147
1508
|
|
|
3148
|
-
/**
|
|
3149
|
-
* MCP client — request/response correlation, initialize handshake,
|
|
3150
|
-
* tools/list, tools/call. Built on top of a McpTransport so the same
|
|
3151
|
-
* logic works against a real stdio server or an in-process fake.
|
|
3152
|
-
*/
|
|
3153
|
-
|
|
3154
1509
|
interface McpClientOptions {
|
|
3155
1510
|
transport: McpTransport;
|
|
3156
1511
|
clientInfo?: McpClientInfo;
|
|
@@ -3180,50 +1535,21 @@ declare class McpClient {
|
|
|
3180
1535
|
get protocolVersion(): string;
|
|
3181
1536
|
/** Optional free-form instructions the server provides at handshake. */
|
|
3182
1537
|
get serverInstructions(): string | undefined;
|
|
3183
|
-
/**
|
|
3184
|
-
* Complete the initialize → initialized handshake. Must be called
|
|
3185
|
-
* before any other method (otherwise compliant servers reject).
|
|
3186
|
-
*/
|
|
1538
|
+
/** Compliant servers reject other methods until this completes. */
|
|
3187
1539
|
initialize(): Promise<InitializeResult>;
|
|
3188
1540
|
/** List tools the server exposes. */
|
|
3189
1541
|
listTools(): Promise<ListToolsResult>;
|
|
3190
|
-
/**
|
|
3191
|
-
* Invoke a tool by name. When `onProgress` is supplied, attaches a
|
|
3192
|
-
* fresh progress token so the server can send incremental updates
|
|
3193
|
-
* via `notifications/progress`; they're routed to the callback until
|
|
3194
|
-
* the final response arrives (or the request times out, in which
|
|
3195
|
-
* case the handler is simply dropped — no extra notification).
|
|
3196
|
-
*
|
|
3197
|
-
* When `signal` is supplied, aborting it:
|
|
3198
|
-
* 1) fires `notifications/cancelled` to the server (MCP 2024-11-05
|
|
3199
|
-
* way of saying "forget this request, I no longer care"), and
|
|
3200
|
-
* 2) rejects the pending promise immediately with an AbortError,
|
|
3201
|
-
* so the caller doesn't have to wait for the subprocess to
|
|
3202
|
-
* finish its in-flight file write or network request.
|
|
3203
|
-
* The server MAY still emit a late response; we drop it in dispatch
|
|
3204
|
-
* since the request id is gone from `pending`.
|
|
3205
|
-
*/
|
|
1542
|
+
/** Abort sends `notifications/cancelled` and rejects immediately; late server responses are dropped. */
|
|
3206
1543
|
callTool(name: string, args?: Record<string, unknown>, opts?: {
|
|
3207
1544
|
onProgress?: McpProgressHandler;
|
|
3208
1545
|
signal?: AbortSignal;
|
|
3209
1546
|
}): Promise<CallToolResult>;
|
|
3210
|
-
/**
|
|
3211
|
-
* List resources the server exposes. Supports a pagination cursor;
|
|
3212
|
-
* callers interested in the full set should loop on `nextCursor`.
|
|
3213
|
-
* Servers that don't support resources respond with method-not-found
|
|
3214
|
-
* (−32601) — we surface that as a thrown Error so callers can gate
|
|
3215
|
-
* on the `serverCapabilities.resources` field first.
|
|
3216
|
-
*/
|
|
1547
|
+
/** Throws on method-not-found; callers should gate on `serverCapabilities.resources` first. */
|
|
3217
1548
|
listResources(cursor?: string): Promise<ListResourcesResult>;
|
|
3218
1549
|
/** Read the contents of a resource by URI. */
|
|
3219
1550
|
readResource(uri: string): Promise<ReadResourceResult>;
|
|
3220
1551
|
/** List prompt templates the server exposes. */
|
|
3221
1552
|
listPrompts(cursor?: string): Promise<ListPromptsResult>;
|
|
3222
|
-
/**
|
|
3223
|
-
* Fetch a rendered prompt by name. `args` supplies values for any
|
|
3224
|
-
* required template arguments; the server validates. Returns messages
|
|
3225
|
-
* ready to prepend to the model's input.
|
|
3226
|
-
*/
|
|
3227
1553
|
getPrompt(name: string, args?: Record<string, string>): Promise<GetPromptResult>;
|
|
3228
1554
|
/** Close the transport and reject any outstanding requests. */
|
|
3229
1555
|
close(): Promise<void>;
|
|
@@ -3234,27 +1560,7 @@ declare class McpClient {
|
|
|
3234
1560
|
private dispatch;
|
|
3235
1561
|
}
|
|
3236
1562
|
|
|
3237
|
-
/**
|
|
3238
|
-
* HTTP+SSE transport for MCP (spec version 2024-11-05).
|
|
3239
|
-
*
|
|
3240
|
-
* Wire shape:
|
|
3241
|
-
* 1. Client opens GET to the SSE URL with `Accept: text/event-stream`.
|
|
3242
|
-
* 2. Server's first SSE event is `event: endpoint`, `data: <url>` — the
|
|
3243
|
-
* URL (relative or absolute) the client must POST JSON-RPC requests
|
|
3244
|
-
* to. All subsequent server → client messages arrive as `event: message`
|
|
3245
|
-
* SSE frames carrying a JSON-RPC response or server-initiated frame.
|
|
3246
|
-
* 3. Client POSTs each outgoing JSON-RPC frame to the endpoint URL.
|
|
3247
|
-
* The POST response body is ignored — replies land on the SSE stream.
|
|
3248
|
-
*
|
|
3249
|
-
* This transport exists so Reasonix can talk to hosted/remote MCP servers
|
|
3250
|
-
* (e.g. a company's internal knowledge server fronted by auth). Stdio
|
|
3251
|
-
* covers local subprocesses; SSE covers everything else.
|
|
3252
|
-
*
|
|
3253
|
-
* Note: the newer "Streamable HTTP" transport (2025 spec) folds the POST
|
|
3254
|
-
* and SSE streams onto a single endpoint. We stay on 2024-11-05 here —
|
|
3255
|
-
* that's what `MCP_PROTOCOL_VERSION` advertises in the initialize handshake
|
|
3256
|
-
* and what currently-published servers implement.
|
|
3257
|
-
*/
|
|
1563
|
+
/** MCP HTTP+SSE transport (spec 2024-11-05) — POST endpoint URL arrives as the first `event: endpoint` SSE frame. */
|
|
3258
1564
|
|
|
3259
1565
|
interface SseTransportOptions {
|
|
3260
1566
|
/** SSE endpoint URL, e.g. `https://mcp.example.com/sse`. */
|
|
@@ -3262,10 +1568,6 @@ interface SseTransportOptions {
|
|
|
3262
1568
|
/** Extra headers sent on both the SSE GET and the JSON-RPC POSTs (e.g. `Authorization`). */
|
|
3263
1569
|
headers?: Record<string, string>;
|
|
3264
1570
|
}
|
|
3265
|
-
/**
|
|
3266
|
-
* Open an SSE stream to `url`, parse incoming events into JsonRpcMessages,
|
|
3267
|
-
* POST outgoing frames to the endpoint URL the server advertises.
|
|
3268
|
-
*/
|
|
3269
1571
|
declare class SseTransport implements McpTransport {
|
|
3270
1572
|
private readonly url;
|
|
3271
1573
|
private readonly headers;
|
|
@@ -3289,40 +1591,7 @@ declare class SseTransport implements McpTransport {
|
|
|
3289
1591
|
private markClosed;
|
|
3290
1592
|
}
|
|
3291
1593
|
|
|
3292
|
-
/**
|
|
3293
|
-
* Streamable HTTP transport for MCP (spec version 2025-03-26).
|
|
3294
|
-
*
|
|
3295
|
-
* Wire shape (single endpoint, no separate POST URL handshake):
|
|
3296
|
-
*
|
|
3297
|
-
* 1. Client POSTs each outgoing JSON-RPC frame to the endpoint with
|
|
3298
|
-
* `Accept: application/json, text/event-stream`. The server picks
|
|
3299
|
-
* ONE of three responses:
|
|
3300
|
-
* a. `202 Accepted`, no body → notification or response
|
|
3301
|
-
* was accepted; nothing more to deliver.
|
|
3302
|
-
* b. `200 OK`, `Content-Type: application/json` → body is a
|
|
3303
|
-
* single JSON-RPC response (or batch). Connection closes.
|
|
3304
|
-
* c. `200 OK`, `Content-Type: text/event-stream` → an SSE
|
|
3305
|
-
* stream of `event: message` frames carrying responses,
|
|
3306
|
-
* server-initiated requests, and notifications. Stream may
|
|
3307
|
-
* close after the matching response or stay open longer.
|
|
3308
|
-
* 2. The server may include `Mcp-Session-Id: <opaque>` on the response
|
|
3309
|
-
* to `initialize`. Client echoes that header on every subsequent
|
|
3310
|
-
* request. A 404 on a request with a session id means the session
|
|
3311
|
-
* expired — caller must reinitialize.
|
|
3312
|
-
*
|
|
3313
|
-
* Compared to 2024-11-05 HTTP+SSE:
|
|
3314
|
-
* - No two-endpoint dance (no `event: endpoint` handshake).
|
|
3315
|
-
* - Replies arrive on the POST response, not on a separate GET stream.
|
|
3316
|
-
* - Session continuity is explicit (`Mcp-Session-Id`), not implicit.
|
|
3317
|
-
*
|
|
3318
|
-
* Not yet implemented in this transport (acceptable for v1):
|
|
3319
|
-
* - Long-lived GET stream for unsolicited server-initiated frames
|
|
3320
|
-
* (sampling requests, etc.). Most MCP servers we care about today
|
|
3321
|
-
* don't issue server-initiated requests, and POST-only handles
|
|
3322
|
-
* full request/response/notification traffic. Add when a real
|
|
3323
|
-
* server we're integrating against needs it.
|
|
3324
|
-
* - Resumability via `Last-Event-ID` on reconnect.
|
|
3325
|
-
*/
|
|
1594
|
+
/** MCP Streamable HTTP transport (2025-03-26) — POST-only; no long-lived GET stream, no Last-Event-ID resume. */
|
|
3326
1595
|
|
|
3327
1596
|
interface StreamableHttpTransportOptions {
|
|
3328
1597
|
/** Streamable HTTP endpoint URL, e.g. `https://mcp.example.com/mcp`. */
|
|
@@ -3351,46 +1620,16 @@ declare class StreamableHttpTransport implements McpTransport {
|
|
|
3351
1620
|
private pushMessage;
|
|
3352
1621
|
}
|
|
3353
1622
|
|
|
3354
|
-
/**
|
|
3355
|
-
* Bridge: register an MCP server's tools into a Reasonix ToolRegistry.
|
|
3356
|
-
*
|
|
3357
|
-
* This is the integration surface. Once done, `CacheFirstLoop` sees the
|
|
3358
|
-
* MCP tools as if they were native — they inherit Cache-First + repair
|
|
3359
|
-
* (scavenge / truncation / storm) automatically. That's the payoff: any
|
|
3360
|
-
* MCP ecosystem tool, wrapped in Reasonix's Pillar 1 + Pillar 3 benefits.
|
|
3361
|
-
*/
|
|
3362
|
-
|
|
3363
1623
|
interface BridgeOptions {
|
|
3364
|
-
/**
|
|
3365
|
-
* Prefix prepended to every MCP tool name when registered. Defaults to
|
|
3366
|
-
* empty (no prefix). Useful when bridging multiple servers into one
|
|
3367
|
-
* registry and names collide — e.g. `fs` + `gh` both exposing `search`.
|
|
3368
|
-
*/
|
|
1624
|
+
/** Prefix for tool names — disambiguates collisions when bridging multiple servers. */
|
|
3369
1625
|
namePrefix?: string;
|
|
3370
1626
|
/** Registry to populate. Creates a fresh one if omitted. */
|
|
3371
1627
|
registry?: ToolRegistry;
|
|
3372
1628
|
/** Auto-flatten deep schemas (Pillar 3). Defaults to the registry's own default (true). */
|
|
3373
1629
|
autoFlatten?: boolean;
|
|
3374
|
-
/**
|
|
3375
|
-
* Per-tool-call result cap, in characters. If a tool returns more than
|
|
3376
|
-
* this, the result is truncated and a `[…truncated N chars…]` marker is
|
|
3377
|
-
* appended before the last KB so the model still sees a useful tail.
|
|
3378
|
-
* Defaults to {@link DEFAULT_MAX_RESULT_CHARS}.
|
|
3379
|
-
*
|
|
3380
|
-
* Why this exists: DeepSeek V3's context is 131,072 tokens. A single
|
|
3381
|
-
* `read_file` against a big source file can return >3 MB of text
|
|
3382
|
-
* (~900k tokens) and permanently poison the session — every subsequent
|
|
3383
|
-
* turn rebuilds the history and 400s. This cap is a floor. Users who
|
|
3384
|
-
* legitimately want bigger payloads can raise it explicitly.
|
|
3385
|
-
*/
|
|
1630
|
+
/** Cap on tool result chars; head+tail truncation. Floor against context-poisoning oversized reads. */
|
|
3386
1631
|
maxResultChars?: number;
|
|
3387
|
-
/**
|
|
3388
|
-
* Callback fired for every `notifications/progress` frame the server
|
|
3389
|
-
* emits during any bridged tool call. Includes the registered
|
|
3390
|
-
* (prefix-applied) tool name so a multi-server UI can attribute
|
|
3391
|
-
* progress correctly. Absent → no `_meta.progressToken` is sent and
|
|
3392
|
-
* the server won't emit progress for these calls.
|
|
3393
|
-
*/
|
|
1632
|
+
/** Absent → no `_meta.progressToken` sent and server won't emit progress. */
|
|
3394
1633
|
onProgress?: (info: {
|
|
3395
1634
|
toolName: string;
|
|
3396
1635
|
progress: number;
|
|
@@ -3398,22 +1637,8 @@ interface BridgeOptions {
|
|
|
3398
1637
|
message?: string;
|
|
3399
1638
|
}) => void;
|
|
3400
1639
|
}
|
|
3401
|
-
/**
|
|
3402
|
-
* 32,000 chars ≈ 8k English tokens, or ~16k CJK tokens. Small enough to
|
|
3403
|
-
* fit comfortably in history even across 5–10 tool calls, large enough
|
|
3404
|
-
* that most file reads and directory listings fit un-truncated.
|
|
3405
|
-
*/
|
|
3406
1640
|
declare const DEFAULT_MAX_RESULT_CHARS = 32000;
|
|
3407
|
-
/**
|
|
3408
|
-
* Token-aware cap for tool results, in DeepSeek V3 tokens.
|
|
3409
|
-
*
|
|
3410
|
-
* 8,000 tokens ≈ 6% of DeepSeek V3's 131K context. One oversized tool
|
|
3411
|
-
* result can't eat more than that no matter what character density the
|
|
3412
|
-
* content has. The char cap (32K chars) only bounds tokens for English
|
|
3413
|
-
* — CJK text at 1 char/token blows past 16K tokens under the same
|
|
3414
|
-
* ceiling. With the tokenizer shipped in 0.5.0 we can cap the thing
|
|
3415
|
-
* that actually matters.
|
|
3416
|
-
*/
|
|
1641
|
+
/** ~6% of DeepSeek V3 context. Char cap alone fails on CJK (~1 char/token). */
|
|
3417
1642
|
declare const DEFAULT_MAX_RESULT_TOKENS = 8000;
|
|
3418
1643
|
interface BridgeResult {
|
|
3419
1644
|
registry: ToolRegistry;
|
|
@@ -3425,86 +1650,18 @@ interface BridgeResult {
|
|
|
3425
1650
|
reason: string;
|
|
3426
1651
|
}>;
|
|
3427
1652
|
}
|
|
3428
|
-
/**
|
|
3429
|
-
* Walk a connected `McpClient`'s tools/list result, register each into a
|
|
3430
|
-
* Reasonix `ToolRegistry`. Each registered `fn` proxies through the
|
|
3431
|
-
* client's tools/call. Tool results are flattened into a string (joining
|
|
3432
|
-
* text blocks with newlines, prefixing image blocks as placeholders) so
|
|
3433
|
-
* they fit Reasonix's existing tool-dispatch contract.
|
|
3434
|
-
*/
|
|
3435
1653
|
declare function bridgeMcpTools(client: McpClient, opts?: BridgeOptions): Promise<BridgeResult>;
|
|
3436
1654
|
interface FlattenOptions {
|
|
3437
1655
|
/** Cap the flattened string at this many characters. Default: no cap. */
|
|
3438
1656
|
maxChars?: number;
|
|
3439
1657
|
}
|
|
3440
|
-
/**
|
|
3441
|
-
* Turn an MCP CallToolResult into a string — the contract Reasonix's
|
|
3442
|
-
* ToolRegistry.dispatch returns. We:
|
|
3443
|
-
* - join text blocks with newlines (most common case)
|
|
3444
|
-
* - stringify image blocks as placeholders (LLM can't use bytes anyway
|
|
3445
|
-
* in Reasonix's current surface; image support comes with multimodal
|
|
3446
|
-
* prompts later)
|
|
3447
|
-
* - prefix error results with "ERROR: " so the calling model sees the
|
|
3448
|
-
* failure clearly even through JSON mode
|
|
3449
|
-
* - optionally truncate to `maxChars` so a single oversized tool result
|
|
3450
|
-
* (e.g. a big `read_file`) can't poison the session by blowing past
|
|
3451
|
-
* the model's context window
|
|
3452
|
-
*/
|
|
3453
1658
|
declare function flattenMcpResult(result: CallToolResult, opts?: FlattenOptions): string;
|
|
3454
|
-
/**
|
|
3455
|
-
* Keep the head AND a short tail so the model sees both "what the tool
|
|
3456
|
-
* started returning" and "how it ended". Head-only loses file endings
|
|
3457
|
-
* (e.g. an error message appended at the bottom of a stack trace); the
|
|
3458
|
-
* 1KB tail window covers that while costing almost nothing. Exported for
|
|
3459
|
-
* tests and reuse by non-MCP tool adapters that want the same policy.
|
|
3460
|
-
*/
|
|
1659
|
+
/** Head + 1KB tail so error messages at end of stack traces aren't lost. */
|
|
3461
1660
|
declare function truncateForModel(s: string, maxChars: number): string;
|
|
3462
|
-
/**
|
|
3463
|
-
* Token-aware truncation. Same head+tail policy as `truncateForModel`,
|
|
3464
|
-
* but sizes the slices against a DeepSeek V3 token budget instead of a
|
|
3465
|
-
* raw character count — so CJK text (which previously survived at 2×
|
|
3466
|
-
* the token cost per char) gets capped at the same effective context
|
|
3467
|
-
* footprint as English.
|
|
3468
|
-
*
|
|
3469
|
-
* Strategy: fast path when `s.length <= maxTokens` (every token is ≥1
|
|
3470
|
-
* char, so this bounds tokens ≤ maxTokens — skip tokenize entirely).
|
|
3471
|
-
* Short-ish strings are confirmed against the real token count.
|
|
3472
|
-
* Long strings go straight to char-sliced head+tail with one or two
|
|
3473
|
-
* tokenize-verify-and-shrink rounds per slice — we deliberately never
|
|
3474
|
-
* tokenize the full input, because pathological repetitive text
|
|
3475
|
-
* (megabytes of `AAAA…`) can cost 30s+ on the pure-TS BPE port.
|
|
3476
|
-
*/
|
|
1661
|
+
/** Never tokenizes full input — pathological repetitive text (`AAAA…`) costs 30s+ on the pure-TS BPE port. */
|
|
3477
1662
|
declare function truncateForModelByTokens(s: string, maxTokens: number): string;
|
|
3478
1663
|
|
|
3479
|
-
/**
|
|
3480
|
-
* Parse the `--mcp` CLI argument into a transport-tagged spec.
|
|
3481
|
-
*
|
|
3482
|
-
* Accepted forms:
|
|
3483
|
-
* "name=command args..." → stdio, namespaced (tools prefixed with `name_`)
|
|
3484
|
-
* "command args..." → stdio, anonymous
|
|
3485
|
-
* "name=https://host/sse" → HTTP+SSE (2024-11-05), namespaced
|
|
3486
|
-
* "https://host/sse" → HTTP+SSE (2024-11-05), anonymous
|
|
3487
|
-
* "name=streamable+https://host/mcp" → Streamable HTTP (2025-03-26), namespaced
|
|
3488
|
-
* "streamable+https://host/mcp" → Streamable HTTP (2025-03-26), anonymous
|
|
3489
|
-
* ("http://" / "streamable+http://" also honored — useful for local dev.)
|
|
3490
|
-
*
|
|
3491
|
-
* The identifier regex before `=` is deliberately narrow
|
|
3492
|
-
* (`[a-zA-Z_][a-zA-Z0-9_]*`) so Windows drive letters ("C:\\...") and
|
|
3493
|
-
* other strings containing `=` or `:` don't accidentally trigger the
|
|
3494
|
-
* namespace branch. If a user ever wants their command to literally start
|
|
3495
|
-
* with `foo=...` as a bare command, they can wrap it in quotes inside the
|
|
3496
|
-
* shell command string.
|
|
3497
|
-
*
|
|
3498
|
-
* Transport selection:
|
|
3499
|
-
* - body starts with `streamable+http(s)://` → Streamable HTTP. The
|
|
3500
|
-
* `streamable+` prefix is stripped from the URL we hand the transport.
|
|
3501
|
-
* - body starts with `http(s)://` → HTTP+SSE (2024-11-05).
|
|
3502
|
-
* Default for plain http URLs to preserve back-compat with users who
|
|
3503
|
-
* already have `--mcp https://...` config entries pointed at SSE
|
|
3504
|
-
* servers; opt into Streamable HTTP explicitly.
|
|
3505
|
-
* - anything else → stdio (including ws://,
|
|
3506
|
-
* which will surface later as a spawn error).
|
|
3507
|
-
*/
|
|
1664
|
+
/** Plain http:// stays HTTP+SSE for back-compat; Streamable HTTP is opt-in via the `streamable+` URL prefix. */
|
|
3508
1665
|
interface StdioMcpSpec {
|
|
3509
1666
|
transport: "stdio";
|
|
3510
1667
|
/** Namespace prefix applied to each registered tool, or null if anonymous. */
|
|
@@ -3529,16 +1686,7 @@ interface StreamableHttpMcpSpec {
|
|
|
3529
1686
|
type McpSpec = StdioMcpSpec | SseMcpSpec | StreamableHttpMcpSpec;
|
|
3530
1687
|
declare function parseMcpSpec(input: string): McpSpec;
|
|
3531
1688
|
|
|
3532
|
-
/**
|
|
3533
|
-
* Gather a full inspection report from an initialized MCP client:
|
|
3534
|
-
* server info, capabilities, tools, resources, prompts. Methods the
|
|
3535
|
-
* server doesn't support come back as `{ supported: false }` instead
|
|
3536
|
-
* of throwing, so a CLI or UI can render a consistent "what this
|
|
3537
|
-
* server exposes" summary even against minimal implementations.
|
|
3538
|
-
*
|
|
3539
|
-
* Pure with respect to I/O beyond the passed-in client — the CLI
|
|
3540
|
-
* layer owns argument parsing, connection setup, and printing.
|
|
3541
|
-
*/
|
|
1689
|
+
/** Unsupported list methods surface as `{supported:false}` instead of throwing — minimal servers still get a clean report. */
|
|
3542
1690
|
|
|
3543
1691
|
interface InspectionReport {
|
|
3544
1692
|
protocolVersion: string;
|
|
@@ -3559,39 +1707,10 @@ type SectionResult<T> = {
|
|
|
3559
1707
|
supported: false;
|
|
3560
1708
|
reason: string;
|
|
3561
1709
|
};
|
|
3562
|
-
/**
|
|
3563
|
-
* Run an inspection against a **already-initialized** client. Caller
|
|
3564
|
-
* is responsible for `initialize()` before this and `close()` after.
|
|
3565
|
-
* We keep this pure so unit tests can feed in a FakeMcpTransport and
|
|
3566
|
-
* verify the aggregate shape without spinning up a real process.
|
|
3567
|
-
*/
|
|
1710
|
+
/** Caller owns initialize() / close() — keeps this pure so tests can feed a FakeMcpTransport. */
|
|
3568
1711
|
declare function inspectMcpServer(client: McpClient): Promise<InspectionReport>;
|
|
3569
1712
|
|
|
3570
|
-
/**
|
|
3571
|
-
* Aider-style SEARCH/REPLACE edit blocks.
|
|
3572
|
-
*
|
|
3573
|
-
* The model emits blocks in this exact shape, one or more per response:
|
|
3574
|
-
*
|
|
3575
|
-
* path/to/file.ts
|
|
3576
|
-
* <<<<<<< SEARCH
|
|
3577
|
-
* exact existing lines (whitespace-sensitive)
|
|
3578
|
-
* =======
|
|
3579
|
-
* replacement lines
|
|
3580
|
-
* >>>>>>> REPLACE
|
|
3581
|
-
*
|
|
3582
|
-
* We chose this over unified diffs because:
|
|
3583
|
-
* - Models produce it reliably — no line-number drift.
|
|
3584
|
-
* - It tolerates multi-edit responses without ambiguity over which
|
|
3585
|
-
* hunk belongs to which file.
|
|
3586
|
-
* - Aider has years of evidence that this format works even against
|
|
3587
|
-
* weaker models than DeepSeek R1, so it's a conservative pick.
|
|
3588
|
-
*
|
|
3589
|
-
* The SEARCH text must match the file byte-for-byte. Empty SEARCH is a
|
|
3590
|
-
* sentinel for "create new file" — the REPLACE becomes the whole file.
|
|
3591
|
-
* If SEARCH doesn't match we refuse the edit and surface the failure;
|
|
3592
|
-
* we do NOT guess or fuzzy-match. A wrong silent edit is worse than a
|
|
3593
|
-
* missing one — the user can re-ask with the exact current content.
|
|
3594
|
-
*/
|
|
1713
|
+
/** SEARCH must match byte-for-byte; empty SEARCH = create new file. No fuzzy match — silent wrong edit beats a missing one. */
|
|
3595
1714
|
interface EditBlock {
|
|
3596
1715
|
/** Path as written by the model — relative to rootDir, or absolute. */
|
|
3597
1716
|
path: string;
|
|
@@ -3627,42 +1746,13 @@ declare function applyEditBlocks(blocks: EditBlock[], rootDir: string): ApplyRes
|
|
|
3627
1746
|
interface EditSnapshot {
|
|
3628
1747
|
/** Path relative to rootDir, as the block named it. */
|
|
3629
1748
|
path: string;
|
|
3630
|
-
/**
|
|
3631
|
-
* File content before the edit batch was applied. `null` means the
|
|
3632
|
-
* file didn't exist yet — restoring that means deleting whatever the
|
|
3633
|
-
* edit created.
|
|
3634
|
-
*/
|
|
1749
|
+
/** `null` = file didn't exist; restore means delete. */
|
|
3635
1750
|
prevContent: string | null;
|
|
3636
1751
|
}
|
|
3637
|
-
/**
|
|
3638
|
-
* Capture the current state of every file an edit batch is about to
|
|
3639
|
-
* touch, so `/undo` can roll back if the user doesn't like the result.
|
|
3640
|
-
* De-duplicates by path because one batch can contain multiple blocks
|
|
3641
|
-
* for the same file, and we only want one "before" snapshot per file.
|
|
3642
|
-
*/
|
|
1752
|
+
/** De-duped by path — one "before" snapshot per file even with multiple blocks. */
|
|
3643
1753
|
declare function snapshotBeforeEdits(blocks: EditBlock[], rootDir: string): EditSnapshot[];
|
|
3644
|
-
/**
|
|
3645
|
-
* Restore files to their snapshotted state. Snapshots with
|
|
3646
|
-
* `prevContent === null` were created by the edit, so undo = delete.
|
|
3647
|
-
* Otherwise the prior content is written back, replacing whatever the
|
|
3648
|
-
* edit left behind.
|
|
3649
|
-
*/
|
|
3650
1754
|
declare function restoreSnapshots(snapshots: EditSnapshot[], rootDir: string): ApplyResult[];
|
|
3651
1755
|
|
|
3652
|
-
/**
|
|
3653
|
-
* System prompt used by `reasonix code`. Teaches the model:
|
|
3654
|
-
*
|
|
3655
|
-
* 1. It has a filesystem MCP bridge rooted at the user's CWD.
|
|
3656
|
-
* 2. To modify files it emits SEARCH/REPLACE blocks (not
|
|
3657
|
-
* `write_file` — that would whole-file rewrite and kill diff
|
|
3658
|
-
* reviewability).
|
|
3659
|
-
* 3. Read first, edit second — SEARCH must match byte-for-byte.
|
|
3660
|
-
* 4. Be concise. The user can read a diff faster than prose.
|
|
3661
|
-
*
|
|
3662
|
-
* Kept short on purpose. Long system prompts eat context budget that
|
|
3663
|
-
* the Cache-First Loop is trying to conserve. The SEARCH/REPLACE spec
|
|
3664
|
-
* is the one unavoidable bloat; we trim everything else.
|
|
3665
|
-
*/
|
|
3666
1756
|
declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nIf the user asks to switch / change / open a different directory or project (\"\u5207\u6362\u5230...\", \"switch to ...\", \"let's work in ...\", \"open the X project\"), call **`change_workspace`** with the absolute target path. The tool always requires the user's explicit approval via a TUI modal \u2014 your call surfaces a \"switch / deny\" prompt, and STOPS your turn until they pick. After approval the filesystem / shell / memory tools re-register against the new root and your subsequent calls land there.\n\nHard rules:\n- Do NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n- Do NOT chain other tool calls in the same turn as `change_workspace` \u2014 wait for the user's confirmation. Their next message will tell you whether the switch happened.\n- Do NOT call `change_workspace` to \"preview\" a sibling directory; only when the user explicitly asked to change projects.\n- The user can also type `/cwd <path>` themselves \u2014 fine, you'll see the new root take effect on the next turn either way.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
|
|
3667
1757
|
interface CodeSystemPromptOptions {
|
|
3668
1758
|
/** True when semantic_search is registered for this run. Adds an
|
|
@@ -3672,116 +1762,24 @@ interface CodeSystemPromptOptions {
|
|
|
3672
1762
|
}
|
|
3673
1763
|
declare function codeSystemPrompt(rootDir: string, opts?: CodeSystemPromptOptions): string;
|
|
3674
1764
|
|
|
3675
|
-
/**
|
|
3676
|
-
|
|
3677
|
-
*
|
|
3678
|
-
* Lookup order for the API key:
|
|
3679
|
-
* 1. `DEEPSEEK_API_KEY` env var (highest priority — for CI / power users)
|
|
3680
|
-
* 2. `~/.reasonix/config.json` (set by the first-run setup flow)
|
|
3681
|
-
*
|
|
3682
|
-
* The library itself never touches the config file — it only reads
|
|
3683
|
-
* `DEEPSEEK_API_KEY` from the environment. The CLI is responsible for
|
|
3684
|
-
* pulling from the config file and exposing it via env var to the loop.
|
|
3685
|
-
*
|
|
3686
|
-
* Beyond the API key, the config also remembers the user's *defaults*
|
|
3687
|
-
* from `reasonix setup`: preset, MCP servers, session. This is what
|
|
3688
|
-
* makes `reasonix chat` with no flags "just work" after first-run.
|
|
3689
|
-
*/
|
|
3690
|
-
/**
|
|
3691
|
-
* Preset names — three model-commitment levels.
|
|
3692
|
-
* - `auto` — flash baseline + auto-escalate to pro on hard turns
|
|
3693
|
-
* (NEEDS_PRO marker / failure-count threshold both fire).
|
|
3694
|
-
* Default. Closest match to the legacy `smart` preset.
|
|
3695
|
-
* - `flash` — flash always. No auto-escalation. `/pro` still works
|
|
3696
|
-
* for one-shot manual escalation. Cheapest predictable.
|
|
3697
|
-
* - `pro` — pro always. No downgrade. ~3× cost vs flash at the
|
|
3698
|
-
* 2026-04 discount rate; more outside the window.
|
|
3699
|
-
*
|
|
3700
|
-
* Legacy `fast | smart | max` names stay in the union for back-compat
|
|
3701
|
-
* with existing `~/.reasonix/config.json` files; resolvePreset() maps
|
|
3702
|
-
* them to the new semantics.
|
|
3703
|
-
*/
|
|
1765
|
+
/** Library reads only DEEPSEEK_API_KEY from env; the CLI bridges config.json → env var. */
|
|
1766
|
+
/** Legacy `fast|smart|max` kept for back-compat with existing config.json files. */
|
|
3704
1767
|
type PresetName = "auto" | "flash" | "pro" | "fast" | "smart" | "max";
|
|
3705
|
-
/**
|
|
3706
|
-
* How `reasonix code` handles model-issued tool calls. Two axes folded
|
|
3707
|
-
* into one enum because users think about "how trusting am I right now?"
|
|
3708
|
-
* as a single dial, not as "writes vs shell" pairs.
|
|
3709
|
-
*
|
|
3710
|
-
* - "review" — queue edits into pendingEdits (user /apply or `y` to
|
|
3711
|
-
* commit); shell commands NOT on the read-only allowlist
|
|
3712
|
-
* hit ShellConfirm. Default.
|
|
3713
|
-
* - "auto" — apply edits immediately, snapshot for /undo, show a
|
|
3714
|
-
* short undo banner. Shell still goes through ShellConfirm
|
|
3715
|
-
* for non-allowlisted commands.
|
|
3716
|
-
* - "yolo" — apply edits immediately AND auto-approve every shell
|
|
3717
|
-
* command. No prompts at all. Use when you trust the
|
|
3718
|
-
* current direction and just want to iterate fast; /undo
|
|
3719
|
-
* still rolls back individual edit batches.
|
|
3720
|
-
*
|
|
3721
|
-
* Persisted so `/mode <x>` survives a relaunch. Missing → "review".
|
|
3722
|
-
*
|
|
3723
|
-
* Codex-equivalence note: review ≈ untrusted, auto ≈ on-request,
|
|
3724
|
-
* yolo ≈ never.
|
|
3725
|
-
*/
|
|
1768
|
+
/** Single trust dial: review queues edits + gates shell; auto applies + gates shell; yolo skips both gates. */
|
|
3726
1769
|
type EditMode = "review" | "auto" | "yolo";
|
|
3727
|
-
/**
|
|
3728
|
-
* reasoning_effort cap for the model. "max" is the agent-class default;
|
|
3729
|
-
* "high" is cheaper / faster. Persisted so `/effort high` survives a
|
|
3730
|
-
* relaunch — earlier versions silently reverted to "max" on every new
|
|
3731
|
-
* session, which burned budget unexpectedly.
|
|
3732
|
-
*/
|
|
3733
1770
|
type ReasoningEffort = "high" | "max";
|
|
3734
1771
|
interface ReasonixConfig {
|
|
3735
1772
|
apiKey?: string;
|
|
3736
1773
|
baseUrl?: string;
|
|
3737
|
-
/**
|
|
3738
|
-
* Default preset for `reasonix chat` / `reasonix run` when no flags override.
|
|
3739
|
-
* Maps to model + autoEscalate (see presets.ts). Missing → "auto".
|
|
3740
|
-
*/
|
|
3741
1774
|
preset?: PresetName;
|
|
3742
|
-
/**
|
|
3743
|
-
* Edit-gate mode for `reasonix code`. See EditMode doc. Absent → "review".
|
|
3744
|
-
*/
|
|
3745
1775
|
editMode?: EditMode;
|
|
3746
|
-
/**
|
|
3747
|
-
* Set to `true` the first time we've shown the "Shift+Tab cycles
|
|
3748
|
-
* review/AUTO" onboarding tip in `reasonix code`. Once seen, we stop
|
|
3749
|
-
* posting the tip — the bottom status bar carries the knowledge
|
|
3750
|
-
* forward without further nagging.
|
|
3751
|
-
*/
|
|
3752
1776
|
editModeHintShown?: boolean;
|
|
3753
|
-
/**
|
|
3754
|
-
* Last reasoning_effort chosen via `/effort`. Loaded on launch so
|
|
3755
|
-
* "high" stays "high" — default is "max" when unset.
|
|
3756
|
-
*/
|
|
3757
1777
|
reasoningEffort?: ReasoningEffort;
|
|
3758
|
-
/**
|
|
3759
|
-
* Default MCP server specs to bridge on every `reasonix chat`, in the
|
|
3760
|
-
* same `"name=cmd args..."` format that `--mcp` takes. Stored as strings
|
|
3761
|
-
* so `reasonix setup` stays symmetrical with the flag — one parser, one
|
|
3762
|
-
* format in the config file, grep-friendly.
|
|
3763
|
-
*/
|
|
1778
|
+
/** Stored as `--mcp`-format strings so one parser handles both flag and config. */
|
|
3764
1779
|
mcp?: string[];
|
|
3765
|
-
/**
|
|
3766
|
-
* Default session name (null/missing → "default", which is what the
|
|
3767
|
-
* CLI has been doing anyway). `reasonix setup` lets users pick a name
|
|
3768
|
-
* or opt into ephemeral.
|
|
3769
|
-
*/
|
|
3770
1780
|
session?: string | null;
|
|
3771
|
-
/** Marks that `reasonix setup` has completed at least once. */
|
|
3772
1781
|
setupCompleted?: boolean;
|
|
3773
|
-
/**
|
|
3774
|
-
* Whether `web_search` + `web_fetch` tools are registered. Default:
|
|
3775
|
-
* enabled (no key required — backed by DuckDuckGo's public HTML
|
|
3776
|
-
* endpoint). Set to `false` to keep the session offline.
|
|
3777
|
-
*/
|
|
3778
1782
|
search?: boolean;
|
|
3779
|
-
/**
|
|
3780
|
-
* Per-project state keyed by absolute directory path. Written by the
|
|
3781
|
-
* "always allow" choice on a shell confirmation prompt; merged into
|
|
3782
|
-
* `registerShellTools({ extraAllowed })` when `reasonix code` runs
|
|
3783
|
-
* against that directory again.
|
|
3784
|
-
*/
|
|
3785
1783
|
projects?: {
|
|
3786
1784
|
[absoluteRootDir: string]: {
|
|
3787
1785
|
shellAllowed?: string[];
|
|
@@ -3798,27 +1796,7 @@ declare function isPlausibleKey(key: string): boolean;
|
|
|
3798
1796
|
/** Mask a key for display: `sk-abcd...wxyz`. */
|
|
3799
1797
|
declare function redactKey(key: string): string;
|
|
3800
1798
|
|
|
3801
|
-
/**
|
|
3802
|
-
* Version module.
|
|
3803
|
-
*
|
|
3804
|
-
* Two jobs:
|
|
3805
|
-
*
|
|
3806
|
-
* 1. Expose `VERSION` sourced from the real `package.json` so the
|
|
3807
|
-
* constant never drifts from what npm publishes. Works in dev
|
|
3808
|
-
* (`tsx src/...`) AND after `tsup` bundles to `dist/` — both
|
|
3809
|
-
* layouts sit two levels below the manifest, so a short
|
|
3810
|
-
* walk-up finds it.
|
|
3811
|
-
*
|
|
3812
|
-
* 2. Offer an opt-in `getLatestVersion()` that hits the npm
|
|
3813
|
-
* registry with a bounded timeout and a 24-hour on-disk
|
|
3814
|
-
* cache at `~/.reasonix/version-cache.json`. Returns `null`
|
|
3815
|
-
* on any failure — offline / restricted-network launches
|
|
3816
|
-
* should stay silent rather than nag the user.
|
|
3817
|
-
*
|
|
3818
|
-
* The CLI wires `getLatestVersion` asynchronously at App mount
|
|
3819
|
-
* (never in a hot path) and renders the outcome in the stats
|
|
3820
|
-
* panel when there's a newer published version.
|
|
3821
|
-
*/
|
|
1799
|
+
/** VERSION sourced from package.json so it never drifts from npm; latest-check returns null on any failure. */
|
|
3822
1800
|
/** TTL for the on-disk cache entry. 24h keeps noise low; users who
|
|
3823
1801
|
* want a fresh check can run `reasonix update` which passes
|
|
3824
1802
|
* `force: true`. */
|
|
@@ -3840,71 +1818,14 @@ interface GetLatestVersionOptions {
|
|
|
3840
1818
|
/** Network timeout override (tests). */
|
|
3841
1819
|
timeoutMs?: number;
|
|
3842
1820
|
}
|
|
3843
|
-
/**
|
|
3844
|
-
* Resolve the latest published `reasonix` version from the npm registry.
|
|
3845
|
-
*
|
|
3846
|
-
* Returns `null` on any network / parse failure. Callers treat `null`
|
|
3847
|
-
* as "don't know, don't nag the user." The cache entry is only
|
|
3848
|
-
* written on a successful fetch — a bad registry response won't
|
|
3849
|
-
* poison the cache.
|
|
3850
|
-
*/
|
|
1821
|
+
/** Returns null on failure; cache only writes on success so bad responses can't poison it. */
|
|
3851
1822
|
declare function getLatestVersion(opts?: GetLatestVersionOptions): Promise<string | null>;
|
|
3852
|
-
/**
|
|
3853
|
-
* Semver compare. Returns a negative number when `a < b`, positive
|
|
3854
|
-
* when `a > b`, zero when equal.
|
|
3855
|
-
*
|
|
3856
|
-
* Minimal pre-release handling: when the CORE (`x.y.z`) parts match,
|
|
3857
|
-
* any version WITH a suffix (`-rc.1`, `-alpha.4`) compares LOWER
|
|
3858
|
-
* than the bare version. That matches npm's dist-tag semantics —
|
|
3859
|
-
* `reasonix@latest` resolves to a real release, not a pre-release.
|
|
3860
|
-
*
|
|
3861
|
-
* We're deliberately not pulling in `semver` (~50KB). The three
|
|
3862
|
-
* cases we care about are: current > latest (future build, no
|
|
3863
|
-
* prompt), current < latest (prompt), current === latest (no prompt).
|
|
3864
|
-
*/
|
|
1823
|
+
/** Pre-release with same core sorts BELOW the bare version — matches npm `latest` dist-tag semantics. */
|
|
3865
1824
|
declare function compareVersions(a: string, b: string): number;
|
|
3866
|
-
/**
|
|
3867
|
-
* Heuristic: did this process launch via `npx` / `pnpm dlx` instead
|
|
3868
|
-
* of a global install? The update command takes different advice in
|
|
3869
|
-
* each case — a global install can `npm i -g reasonix@latest`, while
|
|
3870
|
-
* npx just needs its cache to roll over on next launch.
|
|
3871
|
-
*
|
|
3872
|
-
* Signals checked, in order:
|
|
3873
|
-
* - `process.argv[1]` contains `_npx` (npm's ephemeral dir name)
|
|
3874
|
-
* - `process.argv[1]` contains `.pnpm` + `dlx`
|
|
3875
|
-
* - `npm_config_user_agent` contains `npx/`
|
|
3876
|
-
*
|
|
3877
|
-
* Any one hit → npx. False negatives are safe (worst case we suggest
|
|
3878
|
-
* `npm i -g` to an npx user, which is a valid way to upgrade too).
|
|
3879
|
-
*/
|
|
1825
|
+
/** False negatives are safe — `npm i -g` works for npx users too. */
|
|
3880
1826
|
declare function isNpxInstall(): boolean;
|
|
3881
1827
|
|
|
3882
|
-
/**
|
|
3883
|
-
* Persistent per-turn usage log at `~/.reasonix/usage.jsonl`.
|
|
3884
|
-
*
|
|
3885
|
-
* Each line is a single `UsageRecord` — one turn's tokens + cost
|
|
3886
|
-
* snapshot — appended after every `assistant_final` event. This is
|
|
3887
|
-
* what drives `reasonix stats` (the dashboard, no-arg form), so the
|
|
3888
|
-
* user can see how much they've spent vs what the equivalent Claude
|
|
3889
|
-
* spend would have been. The Pillar 1 pitch (94–97% cost reduction
|
|
3890
|
-
* vs Claude, from the v0.3 hard-number table) becomes a fact users
|
|
3891
|
-
* can verify on their own machine.
|
|
3892
|
-
*
|
|
3893
|
-
* Format choices:
|
|
3894
|
-
* - **append-only JSONL** — one line per turn, durable, survives
|
|
3895
|
-
* abrupt exits. A corrupted tail line loses at most one record.
|
|
3896
|
-
* - **flat keys, no nesting** — readable with `jq` / `cut` / `awk`;
|
|
3897
|
-
* the model doesn't need to parse this, humans do.
|
|
3898
|
-
* - **best-effort writes** — disk errors never propagate into the
|
|
3899
|
-
* turn. We log nothing (no `console.error`) because the TUI is
|
|
3900
|
-
* rendering Ink; a silent skip is the least-worst failure mode.
|
|
3901
|
-
* - **no PII, no prompts, no completions** — the log contains
|
|
3902
|
-
* tokens and costs, that's it. Sessions are identified by the
|
|
3903
|
-
* user-chosen name (never a prompt).
|
|
3904
|
-
*
|
|
3905
|
-
* This file is deliberately NOT wired through project memory or
|
|
3906
|
-
* skills — those are content pins. Usage is pure telemetry.
|
|
3907
|
-
*/
|
|
1828
|
+
/** Append-only JSONL of per-turn tokens + cost; best-effort writes, never blocks the turn. No prompts/completions logged. */
|
|
3908
1829
|
|
|
3909
1830
|
/** One turn's snapshot — serialized verbatim as a JSONL line. */
|
|
3910
1831
|
interface UsageRecord {
|
|
@@ -3922,10 +1843,7 @@ interface UsageRecord {
|
|
|
3922
1843
|
costUsd: number;
|
|
3923
1844
|
/** What the same turn would have cost at Claude Sonnet 4.6 rates. */
|
|
3924
1845
|
claudeEquivUsd: number;
|
|
3925
|
-
/**
|
|
3926
|
-
* Distinguishes ordinary parent-loop turns from subagent summary rows.
|
|
3927
|
-
* Absent on pre-0.5.14 records — treat as "turn" when missing.
|
|
3928
|
-
*/
|
|
1846
|
+
/** Absent on legacy records — treat as "turn" when missing. */
|
|
3929
1847
|
kind?: "turn" | "subagent";
|
|
3930
1848
|
/** Present when `kind === "subagent"`. Attribution metadata for the /stats roll-up. */
|
|
3931
1849
|
subagent?: {
|
|
@@ -3953,26 +1871,8 @@ interface AppendUsageInput {
|
|
|
3953
1871
|
kind?: "turn" | "subagent";
|
|
3954
1872
|
subagent?: UsageRecord["subagent"];
|
|
3955
1873
|
}
|
|
3956
|
-
/**
|
|
3957
|
-
* Append one record and return it. Swallows disk errors — the TUI
|
|
3958
|
-
* should keep working even if `~/.reasonix/` is read-only.
|
|
3959
|
-
*
|
|
3960
|
-
* Returns the record that was written (or would have been written
|
|
3961
|
-
* if the disk had cooperated) so tests / callers can assert on the
|
|
3962
|
-
* computed cost fields without a round trip through the log file.
|
|
3963
|
-
*
|
|
3964
|
-
* On every Nth append the log is checked for size; if it crosses
|
|
3965
|
-
* {@link USAGE_COMPACTION_THRESHOLD_BYTES} we drop records older
|
|
3966
|
-
* than {@link USAGE_RETENTION_DAYS}. Cheaper than a startup-time
|
|
3967
|
-
* scan because most processes don't reach the threshold; the size
|
|
3968
|
-
* check is one statSync regardless.
|
|
3969
|
-
*/
|
|
1874
|
+
/** Returns the record so tests can assert cost fields without re-reading the log. */
|
|
3970
1875
|
declare function appendUsage(input: AppendUsageInput): UsageRecord;
|
|
3971
|
-
/**
|
|
3972
|
-
* Read + parse the log. Malformed lines are silently skipped so a
|
|
3973
|
-
* single corrupted write (half-flushed on power loss, user hand-edit)
|
|
3974
|
-
* doesn't throw away the rest of the history.
|
|
3975
|
-
*/
|
|
3976
1876
|
declare function readUsageLog(path?: string): UsageRecord[];
|
|
3977
1877
|
/** One row of the `reasonix stats` dashboard — a rolled-up window. */
|
|
3978
1878
|
interface UsageBucket {
|
|
@@ -3986,15 +1886,7 @@ interface UsageBucket {
|
|
|
3986
1886
|
cacheMissTokens: number;
|
|
3987
1887
|
costUsd: number;
|
|
3988
1888
|
claudeEquivUsd: number;
|
|
3989
|
-
/**
|
|
3990
|
-
* USD that DeepSeek's prompt cache shaved off the bill — sum of
|
|
3991
|
-
* `cacheHitTokens × (missPrice − hitPrice)` per record. Recomputed
|
|
3992
|
-
* from the current pricing table on every aggregate, not frozen at
|
|
3993
|
-
* write time, so a price-cut announcement updates retroactively. The
|
|
3994
|
-
* trade-off is mild inconsistency with `costUsd` (which IS frozen);
|
|
3995
|
-
* acceptable because cache savings is a "what does this mechanism
|
|
3996
|
-
* give me" narrative, not a billing record.
|
|
3997
|
-
*/
|
|
1889
|
+
/** Recomputed from current pricing each aggregate — intentionally NOT frozen with `costUsd`. */
|
|
3998
1890
|
cacheSavingsUsd: number;
|
|
3999
1891
|
}
|
|
4000
1892
|
/** Cache hit ratio for a bucket — zero denominator returns 0. */
|
|
@@ -4022,11 +1914,7 @@ interface UsageAggregate {
|
|
|
4022
1914
|
firstSeen: number | null;
|
|
4023
1915
|
/** Latest record's ts, or `null` when the log is empty. */
|
|
4024
1916
|
lastSeen: number | null;
|
|
4025
|
-
/**
|
|
4026
|
-
* Subagent-specific rollup. Undefined when no subagent records exist
|
|
4027
|
-
* in the log so consumers can cheaply skip the section. Counts reflect
|
|
4028
|
-
* subagent SPAWNS (not internal child-loop turns) — one row per run.
|
|
4029
|
-
*/
|
|
1917
|
+
/** Undefined when no subagent records exist; counts spawns, not internal child-loop turns. */
|
|
4030
1918
|
subagents?: SubagentAggregate;
|
|
4031
1919
|
}
|
|
4032
1920
|
/** Rolled-up view of all `kind: "subagent"` records. */
|
|
@@ -4042,15 +1930,7 @@ interface SubagentAggregate {
|
|
|
4042
1930
|
durationMs: number;
|
|
4043
1931
|
}>;
|
|
4044
1932
|
}
|
|
4045
|
-
/**
|
|
4046
|
-
* Fold a flat record list into the dashboard shape — rolling windows
|
|
4047
|
-
* plus model / session histograms. Windows are INCLUSIVE of boundary:
|
|
4048
|
-
* - today = last 24h (rolling, not calendar-day)
|
|
4049
|
-
* - week = last 7d
|
|
4050
|
-
* - month = last 30d
|
|
4051
|
-
* - all = every record
|
|
4052
|
-
* Rolling windows avoid "it's 00:03, 'today' is empty" surprises.
|
|
4053
|
-
*/
|
|
1933
|
+
/** Rolling 24h/7d/30d windows — avoids "it's 00:03, 'today' is empty" surprises. */
|
|
4054
1934
|
declare function aggregateUsage(records: UsageRecord[], opts?: AggregateOptions): UsageAggregate;
|
|
4055
1935
|
/** File-size helper for the stats header — "1.2 MB" etc. Returns "" if missing. */
|
|
4056
1936
|
declare function formatLogSize(path?: string): string;
|