reasonix 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,20 +1,7 @@
1
1
  import { SpawnOptions } from 'node:child_process';
2
2
  import { WriteStream } from 'node:fs';
3
3
 
4
- /**
5
- * Retry layer for DeepSeek API calls.
6
- *
7
- * Wraps a `fetch` function so that transient failures (rate limiting, server
8
- * overload, network blips) don't kill an agent session. We explicitly DO NOT
9
- * retry:
10
- * - 4xx client errors other than 408 / 429 (bad key, bad request, ...)
11
- * - aborted requests (user cancelled)
12
- * - mid-stream body read errors (retrying costs money AND would desync)
13
- *
14
- * Retrying is controlled by attempt count + exponential backoff with jitter.
15
- * If the server sends a `Retry-After` header we honor it (capped by
16
- * `maxBackoffMs` so a misconfigured upstream can't park us forever).
17
- */
4
+ /** No retry on aborts or mid-stream body errors — re-billing the user for desynced output is worse than failing. */
18
5
  interface RetryOptions {
19
6
  /** Maximum total attempts (including the first). Default 4. */
20
7
  maxAttempts?: number;
@@ -69,13 +56,7 @@ interface ChatMessage {
69
56
  name?: string;
70
57
  tool_call_id?: string;
71
58
  tool_calls?: ToolCall[];
72
- /**
73
- * R1 `reasoning_content` captured from the assistant's thinking turn.
74
- * DeepSeek's thinking mode 400s with "reasoning_content in the
75
- * thinking mode must be passed back" when a tool-loop continuation
76
- * omits it from the preceding assistant message. Round-tripped for
77
- * deepseek-reasoner turns with tool_calls; absent for deepseek-chat.
78
- */
59
+ /** Must round-trip in tool-loop continuations — thinking mode 400s without it. */
79
60
  reasoning_content?: string | null;
80
61
  }
81
62
  interface RawUsage {
@@ -97,20 +78,7 @@ interface ChatRequestOptions {
97
78
  responseFormat?: {
98
79
  type: "json_object" | "text";
99
80
  };
100
- /**
101
- * Explicitly toggle V4 thinking mode. Serialized as
102
- * `extra_body.thinking.type = enabled|disabled`. Omit to let the
103
- * server default apply (thinking enabled). Mainly used so the loop
104
- * can pin the mode per model: `deepseek-chat` → disabled (legacy
105
- * non-thinking compat), everything else → enabled.
106
- */
107
81
  thinking?: "enabled" | "disabled";
108
- /**
109
- * Per-request reasoning-effort cap. Serialized as the top-level
110
- * `reasoning_effort` field. DeepSeek accepts `high` (standard) or
111
- * `max` (Agent-class, auto-applied to Claude-Code-style flows per
112
- * the V4 docs). Reasonix pins `max` because every turn is agent-like.
113
- */
114
82
  reasoningEffort?: "high" | "max";
115
83
  }
116
84
 
@@ -144,13 +112,6 @@ interface StreamChunk {
144
112
  finishReason?: string;
145
113
  raw: any;
146
114
  }
147
- /**
148
- * Response shape for DeepSeek's `/user/balance` endpoint. One entry
149
- * per currency the account is funded in (typically CNY, sometimes
150
- * USD). `total_balance` is the spendable figure; `granted_balance`
151
- * counts promotional credits that expire, `topped_up_balance` is
152
- * what the user paid for and keeps.
153
- */
154
115
  interface BalanceInfo {
155
116
  currency: string;
156
117
  total_balance: string;
@@ -161,12 +122,6 @@ interface UserBalance {
161
122
  is_available: boolean;
162
123
  balance_infos: BalanceInfo[];
163
124
  }
164
- /**
165
- * Response shape for DeepSeek's `/models` endpoint. Mirrors the OpenAI
166
- * models list shape DeepSeek copied — `id` is the model name to pass to
167
- * `/chat/completions`, `owned_by` is the provider string (always
168
- * `"deepseek"` today).
169
- */
170
125
  interface ModelInfo {
171
126
  id: string;
172
127
  object: "model";
@@ -192,22 +147,11 @@ declare class DeepSeekClient {
192
147
  private readonly _fetch;
193
148
  constructor(opts?: DeepSeekClientOptions);
194
149
  private buildPayload;
195
- /**
196
- * Fetch the current DeepSeek account balance. Separate endpoint
197
- * from chat completions, no billing impact. Returns null on any
198
- * network/auth failure so callers can gate the balance display
199
- * without a hard error — the rest of the session works regardless.
200
- */
150
+ /** Returns null on failure so callers can degrade — session must keep working without balance UI. */
201
151
  getBalance(opts?: {
202
152
  signal?: AbortSignal;
203
153
  }): Promise<UserBalance | null>;
204
- /**
205
- * Fetch the model catalog DeepSeek currently exposes. Today this is
206
- * `deepseek-chat` (V3) and `deepseek-reasoner` (R1), but querying is
207
- * the only way to learn about new ones without a Reasonix release.
208
- * Returns null on any network/auth failure so callers can degrade
209
- * gracefully — e.g. `/models` falls back to the hardcoded hint.
210
- */
154
+ /** Returns null on failure — callers fall back to a hardcoded model hint. */
211
155
  listModels(opts?: {
212
156
  signal?: AbortSignal;
213
157
  }): Promise<ModelList | null>;
@@ -215,19 +159,7 @@ declare class DeepSeekClient {
215
159
  stream(opts: ChatRequestOptions): AsyncGenerator<StreamChunk>;
216
160
  }
217
161
 
218
- /**
219
- * Pillar 2 — R1 Thought Harvesting.
220
- *
221
- * Takes the `reasoning_content` emitted by a thinking model (deepseek-reasoner
222
- * / R1) and extracts a structured plan state by making a cheap secondary call
223
- * to V3 in JSON mode. The typed state is intended for the orchestrator to
224
- * branch on — e.g. trigger self-consistency sampling when `uncertainties.length
225
- * > 2`, or surface the subgoals to the user.
226
- *
227
- * Opt-in: loops disable harvesting by default. Failures (bad JSON, API error,
228
- * empty reasoning) return an empty TypedPlanState — the main turn is never
229
- * aborted because of a harvest hiccup.
230
- */
162
+ /** Harvest failures return an empty state — main turn must never abort on a hiccup here. */
231
163
 
232
164
  interface TypedPlanState {
233
165
  subgoals: string[];
@@ -249,19 +181,7 @@ declare function emptyPlanState(): TypedPlanState;
249
181
  declare function isPlanStateEmpty(s: TypedPlanState | null | undefined): boolean;
250
182
  declare function harvest(reasoningContent: string | null | undefined, client?: DeepSeekClient, options?: HarvestOptions, signal?: AbortSignal): Promise<TypedPlanState>;
251
183
 
252
- /**
253
- * Self-consistency branching.
254
- *
255
- * When enabled, the loop fans out into N parallel samples per turn (varied
256
- * temperatures), runs Pillar 2 harvest on each, and selects the sample with
257
- * the fewest flagged uncertainties (ties broken by answer length — a crude
258
- * Occam prior).
259
- *
260
- * The unique opportunity here: because DeepSeek is ~20× cheaper than Claude,
261
- * running N=3–5 samples per turn is still cheaper than a single Claude call,
262
- * while the majority-confidence selection tends to dominate single-sample
263
- * answers on fuzzy multi-step reasoning tasks.
264
- */
184
+ /** N parallel samples; selector picks fewest uncertainties with shorter-answer tie-break (Occam prior). */
265
185
 
266
186
  interface BranchSample {
267
187
  index: number;
@@ -279,10 +199,7 @@ interface BranchOptions {
279
199
  harvestOptions?: HarvestOptions;
280
200
  /** Custom selector. Default: min uncertainties, tie-break shortest answer. */
281
201
  selector?: BranchSelector;
282
- /**
283
- * Fires as each sample finishes (main call + harvest both complete).
284
- * Useful for progress UI. Not awaited; exceptions are swallowed.
285
- */
202
+ /** Not awaited; exceptions swallowed. Fires when sample's main + harvest both complete. */
286
203
  onSampleDone?: (sample: BranchSample) => void;
287
204
  }
288
205
  interface BranchResult {
@@ -301,53 +218,13 @@ declare function aggregateBranchUsage(samples: readonly BranchSample[]): {
301
218
  promptCacheMissTokens: number;
302
219
  };
303
220
 
304
- /**
305
- * Hooks — user-defined automation that fires at well-known points in
306
- * the agent loop. Mirrors the two-scope layout we use for memory and
307
- * skills:
308
- *
309
- * - `<project>/.reasonix/settings.json` — committable per-project
310
- * - `~/.reasonix/settings.json` — every session
311
- *
312
- * A hook is a shell command. We invoke it with stdin = a JSON
313
- * payload describing the event, and interpret the exit code:
314
- *
315
- * - `0` — pass; loop continues normally
316
- * - `2` — block; for `PreToolUse` / `UserPromptSubmit` the
317
- * loop refuses to continue with that step and surfaces the
318
- * hook's stderr as the reason. For `PostToolUse` / `Stop` block
319
- * is meaningless (the action already happened) — treat as warn.
320
- * - anything else — warn; loop continues but stderr is rendered
321
- * to the user as an inline notice.
322
- *
323
- * stdin JSON shape (one envelope per event):
324
- *
325
- * {
326
- * "event": "PreToolUse" | "PostToolUse" | "UserPromptSubmit" | "Stop",
327
- * "cwd": "<absolute project root or process.cwd()>",
328
- * "toolName": "<string>", // tool events only
329
- * "toolArgs": <unknown>, // tool events only — already JSON-decoded
330
- * "toolResult": "<string>", // PostToolUse only — same body the model sees
331
- * "prompt": "<string>", // UserPromptSubmit only
332
- * "lastAssistantText": "<string>", // Stop only
333
- * "turn": <number>, // Stop only
334
- * }
335
- *
336
- * Hooks are executed in order: project scope first, then global.
337
- * `Pre*` events stop dispatching at the first block; non-block
338
- * outcomes accumulate into a single report so the UI can render
339
- * each warning inline.
340
- */
221
+ /** Shell-command hooks; project scope first, then global. Exit 0=pass, 2=block on Pre*, other=warn. */
341
222
  type HookEvent = "PreToolUse" | "PostToolUse" | "UserPromptSubmit" | "Stop";
342
223
  /** All four events as a const array — drives slash listing + validation. */
343
224
  declare const HOOK_EVENTS: readonly HookEvent[];
344
225
  type HookScope = "project" | "global";
345
226
  interface HookConfig {
346
- /**
347
- * Tool-name pattern (PreToolUse / PostToolUse only). Anchored regex.
348
- * Omitted or `"*"` matches every tool. Ignored for prompt / Stop
349
- * events (they have no tool name to match against).
350
- */
227
+ /** Anchored regex; `"*"` / omitted = every tool. Pre/PostToolUse only. */
351
228
  match?: string;
352
229
  /** Shell command to run. Spawned through the platform shell. */
353
230
  command: string;
@@ -355,11 +232,7 @@ interface HookConfig {
355
232
  description?: string;
356
233
  /** Per-hook timeout override in ms. */
357
234
  timeout?: number;
358
- /**
359
- * Working directory for the spawned process. Defaults to:
360
- * - project scope → the project root
361
- * - global scope → process.cwd()
362
- */
235
+ /** Defaults: project scope → project root; global scope → process.cwd(). */
363
236
  cwd?: string;
364
237
  }
365
238
  /** Shape of `<scope>/.reasonix/settings.json` — only `hooks` for now. */
@@ -377,14 +250,7 @@ interface ResolvedHook extends HookConfig {
377
250
  interface HookOutcome {
378
251
  /** Which hook fired. */
379
252
  hook: ResolvedHook;
380
- /**
381
- * Decision:
382
- * - `pass` — exit 0
383
- * - `block` — exit 2 on a blocking event (otherwise downgraded to `warn`)
384
- * - `warn` — non-zero exit that is not a successful block
385
- * - `timeout` — the spawn was killed past `timeout`
386
- * - `error` — could not spawn at all (missing command, etc.)
387
- */
253
+ /** pass=exit 0; block=exit 2 on blocking event; warn=other non-zero; timeout=killed; error=spawn failed. */
388
254
  decision: "pass" | "block" | "warn" | "timeout" | "error";
389
255
  exitCode: number | null;
390
256
  /** Captured stdout (trimmed). May be empty. */
@@ -392,12 +258,7 @@ interface HookOutcome {
392
258
  /** Captured stderr (trimmed). The block / warn message comes from here. */
393
259
  stderr: string;
394
260
  durationMs: number;
395
- /**
396
- * True when stdout or stderr crossed the per-stream byte cap and was
397
- * truncated. The hook still completed; the loop just sees a clipped
398
- * view of its output. Surfaced via `formatHookOutcomeMessage` so the
399
- * user knows their script wrote more than Reasonix kept.
400
- */
261
+ /** Output crossed the per-stream byte cap; surfaced so user knows we kept less than the script wrote. */
401
262
  truncated?: boolean;
402
263
  }
403
264
  /** Aggregate report for `runHooks`. */
@@ -413,16 +274,7 @@ declare const HOOK_SETTINGS_DIRNAME = ".reasonix";
413
274
  declare function globalSettingsPath(homeDirOverride?: string): string;
414
275
  /** Where the project settings.json lives for a given root. */
415
276
  declare function projectSettingsPath(projectRoot: string): string;
416
- /**
417
- * Pull every configured hook out of the project + global settings
418
- * files, in the order they should fire (project first, global second,
419
- * within each scope: array order from the file).
420
- *
421
- * Returns a flat list — the dispatcher filters by event + match
422
- * pattern at run time. Loading is cheap (one or two JSON files), so
423
- * we don't memoize across processes; re-load is allowed via
424
- * `/hooks reload` and on every fresh App mount.
425
- */
277
+ /** Project hooks fire before global; within a scope, array order. */
426
278
  interface LoadHookSettingsOptions {
427
279
  /** Absolute project root, if any. Without it, only global hooks load. */
428
280
  projectRoot?: string;
@@ -430,12 +282,7 @@ interface LoadHookSettingsOptions {
430
282
  homeDir?: string;
431
283
  }
432
284
  declare function loadHooks(opts?: LoadHookSettingsOptions): ResolvedHook[];
433
- /**
434
- * True if `toolName` matches the hook's `match` field. `"*"` and
435
- * undefined match everything. Otherwise we anchor the field as a
436
- * regex — partial-name matches don't fire, so `"file"` would not
437
- * trigger on `read_file` (use `".*file"` for that).
438
- */
285
+ /** Match field is an ANCHORED regex — `"file"` won't trigger on `read_file`; use `".*file"`. */
439
286
  declare function matchesTool(hook: ResolvedHook, toolName: string): boolean;
440
287
  /** Payload envelope passed to hook stdin. */
441
288
  interface HookPayload {
@@ -462,27 +309,11 @@ interface HookSpawnResult {
462
309
  timedOut: boolean;
463
310
  /** True iff spawn() itself failed (ENOENT, EACCES, …). */
464
311
  spawnError?: Error;
465
- /**
466
- * True iff stdout or stderr was capped at the byte limit. The hook
467
- * still ran to completion / timeout, but downstream consumers see a
468
- * truncated view of its output. Surface this in the UI so a hook
469
- * author who relies on long output knows the loop didn't see all
470
- * of it.
471
- */
312
+ /** Output capped at byte limit — hook ran to completion but consumers see clipped view. */
472
313
  truncated?: boolean;
473
314
  }
474
315
  type HookSpawner = (input: HookSpawnInput) => Promise<HookSpawnResult>;
475
- /**
476
- * Format a hook outcome as a single-line UI string. Used by both the
477
- * loop (for `warning` events) and the App (for UserPromptSubmit /
478
- * Stop outcomes). Centralizing keeps the language consistent across
479
- * scopes.
480
- */
481
316
  declare function formatHookOutcomeMessage(outcome: HookOutcome): string;
482
- /**
483
- * Decide the hook's outcome decision from raw spawn results.
484
- * Pulled out as a pure function so tests can pin the matrix.
485
- */
486
317
  declare function decideOutcome(event: HookEvent, raw: HookSpawnResult): "pass" | "block" | "warn" | "timeout" | "error";
487
318
  interface RunHooksOptions {
488
319
  payload: HookPayload;
@@ -490,13 +321,7 @@ interface RunHooksOptions {
490
321
  /** Test seam — defaults to a real `spawn`. */
491
322
  spawner?: HookSpawner;
492
323
  }
493
- /**
494
- * Filter hooks down to the ones that match `payload.event` (and
495
- * `payload.toolName`, for tool events), then run them in order.
496
- * Stops at the first `block` outcome on a blocking event so a
497
- * gating hook can prevent later hooks from incorrectly seeing a
498
- * success that wasn't going to happen.
499
- */
324
+ /** Stops at first `block` so a gating hook can prevent later hooks running against a phantom success. */
500
325
  declare function runHooks(opts: RunHooksOptions): Promise<HookReport>;
501
326
 
502
327
  interface ImmutablePrefixOptions {
@@ -506,54 +331,18 @@ interface ImmutablePrefixOptions {
506
331
  }
507
332
  declare class ImmutablePrefix {
508
333
  readonly system: string;
509
- /**
510
- * Backing array for `toolSpecs`. Originally `Object.freeze`d at
511
- * construction (hence the class name) — but `addTool` now lets the
512
- * dashboard register `semantic_search` after a mid-session
513
- * `reasonix index` build without forcing the user to restart. Each
514
- * add is documented to cost one cache-miss turn (the cached prefix
515
- * on DeepSeek's side is keyed by the full tool list); subsequent
516
- * turns re-cache against the new shape.
517
- */
334
+ /** Each `addTool` costs one cache-miss turn — DeepSeek's prefix cache is keyed by full tool list. */
518
335
  private _toolSpecs;
519
336
  readonly fewShots: readonly ChatMessage[];
520
- /**
521
- * Cached SHA-256 of the prefix payload. Computed lazily on first
522
- * `fingerprint` access, invalidated only by mutations that go
523
- * through `addTool` (the one legitimate post-construction mutation
524
- * path). The TUI reads `fingerprint` on every render — without the
525
- * cache, that means a fresh `JSON.stringify` + sha256 over the
526
- * full prefix (system prompt + tools list + few-shots, typically
527
- * 5-10KB) on every keystroke.
528
- *
529
- * The lazy-init also acts as a cheap drift guard: if some future
530
- * code path mutates `_toolSpecs` directly without going through
531
- * `addTool`, `fingerprint` will return the stale cached value
532
- * while the actual prefix sent to DeepSeek diverges — the cache
533
- * miss would be the first symptom. {@link verifyFingerprint}
534
- * lets dev / test code assert the cache matches reality.
535
- */
337
+ /** Invalidated only via `addTool`; bypassing it leaves cache stale → fingerprint diverges from sent prefix. */
536
338
  private _fingerprintCache;
537
339
  constructor(opts: ImmutablePrefixOptions);
538
340
  get toolSpecs(): readonly ToolSpec[];
539
341
  toMessages(): ChatMessage[];
540
342
  tools(): ToolSpec[];
541
- /**
542
- * Add a tool spec to the prefix. Returns `true` if added, `false`
543
- * if a tool with the same name was already present (callers can
544
- * decide whether to ignore or surface the no-op). The model picks
545
- * up the new tool on the next turn after the cache busts once.
546
- */
547
343
  addTool(spec: ToolSpec): boolean;
548
344
  get fingerprint(): string;
549
- /**
550
- * Recompute the fingerprint from scratch and assert it matches the
551
- * cached value. Returns the freshly-computed hash on success; throws
552
- * with a diff if the cache drifted, which always indicates a bug —
553
- * either a non-`addTool` mutation path was added, or `addTool`
554
- * forgot to invalidate the cache. Dev / test only; the live loop
555
- * doesn't call this on the hot path.
556
- */
345
+ /** Dev/test only — throws on cache drift, which always means a non-`addTool` mutation slipped in. */
557
346
  verifyFingerprint(): string;
558
347
  private computeFingerprint;
559
348
  }
@@ -561,13 +350,7 @@ declare class AppendOnlyLog {
561
350
  private _entries;
562
351
  append(message: ChatMessage): void;
563
352
  extend(messages: ChatMessage[]): void;
564
- /**
565
- * Bulk-replace entries. Intentionally named to be hard to reach for —
566
- * this is the one mutation path that breaks the log's append-only
567
- * spirit, reserved for compaction flows (`/compact`) and recovery
568
- * where the caller has consciously decided to drop old history. Any
569
- * other use is almost certainly wrong; append() is what you want.
570
- */
353
+ /** The one append-only-breaking path — reserved for `/compact` + recovery. Use `append()` otherwise. */
571
354
  compactInPlace(replacement: ChatMessage[]): void;
572
355
  get entries(): readonly ChatMessage[];
573
356
  toMessages(): ChatMessage[];
@@ -580,33 +363,9 @@ declare class VolatileScratch {
580
363
  reset(): void;
581
364
  }
582
365
 
583
- /**
584
- * Predicate the breaker consults to decide whether a call mutates state.
585
- * Mutating calls clear the recent-args buffer: re-reading a file after
586
- * `edit_file` shouldn't count as "saw the same args before" — the file
587
- * legitimately changed. Wire this from the caller using whatever source
588
- * of truth is appropriate (e.g. the ToolRegistry's `readOnly` /
589
- * `readOnlyCheck` flags). When undefined, every call is tracked the
590
- * old way — preserves the original behavior for callers that don't
591
- * thread a registry through.
592
- */
366
+ /** Mutating calls clear prior read-only entries so a post-edit re-read isn't flagged as repeat. */
593
367
  type IsMutating = (call: ToolCall) => boolean;
594
- /**
595
- * Call-storm breaker.
596
- *
597
- * Detects (tool, args) tuples repeating within a sliding window and suppresses
598
- * the offending call. Surfaces a synthetic tool_result advising the model to
599
- * change strategy on its next turn.
600
- *
601
- * Buffer entries are tagged read-only vs mutating. When a mutating call
602
- * runs, the breaker drops prior read-only entries — a re-read of the
603
- * same path after `edit_file` is fresh, not a repeat. Mutating calls
604
- * still count among themselves, so a model looping on identical
605
- * `edit_file` invocations still trips on the threshold.
606
- *
607
- * Without an `isMutating` predicate everything is tracked the same way
608
- * (back-compat for callers that don't thread a registry through).
609
- */
368
+ /** Tracks (name, args) repeats; mutating calls clear prior read-only entries while still counting amongst themselves. */
610
369
  declare class StormBreaker {
611
370
  private readonly windowSize;
612
371
  private readonly threshold;
@@ -620,16 +379,7 @@ declare class StormBreaker {
620
379
  reset(): void;
621
380
  }
622
381
 
623
- /**
624
- * Schema flattening for DeepSeek tool calls.
625
- *
626
- * DeepSeek loses arguments on schemas that are deep (>2 levels of nesting) or
627
- * wide (>10 leaf parameters). This module transforms such schemas into a
628
- * dot-notation flat schema and re-nests the model's arguments before dispatch.
629
- *
630
- * Example:
631
- * { user: { profile: { name, age } } } ⇄ "user.profile.name", "user.profile.age"
632
- */
382
+ /** DeepSeek drops args on schemas >2 levels deep or >10 leaves; flatten to dot-paths and re-nest after dispatch. */
633
383
 
634
384
  interface FlattenDecision {
635
385
  shouldFlatten: boolean;
@@ -640,14 +390,7 @@ declare function analyzeSchema(schema: JSONSchema | undefined): FlattenDecision;
640
390
  declare function flattenSchema(schema: JSONSchema): JSONSchema;
641
391
  declare function nestArguments(flatArgs: Record<string, unknown>): Record<string, unknown>;
642
392
 
643
- /**
644
- * Truncation recovery for tool-call argument JSON cut off mid-structure
645
- * (typically when the model hits max_tokens before finishing the JSON object).
646
- *
647
- * Strategy is purely local: balance braces, close strings, fill missing values
648
- * with `null`. We deliberately do NOT make a continuation API call here — that
649
- * decision belongs to the loop, which knows about budgets.
650
- */
393
+ /** Local-only repair (balance braces, close strings, fill nulls); continuation calls belong to the loop, which owns budgets. */
651
394
  interface TruncationRepairResult {
652
395
  repaired: string;
653
396
  changed: boolean;
@@ -655,14 +398,7 @@ interface TruncationRepairResult {
655
398
  }
656
399
  declare function repairTruncatedJson(input: string): TruncationRepairResult;
657
400
 
658
- /**
659
- * Scavenge tool calls leaked into reasoning_content.
660
- *
661
- * R1 sometimes emits tool-call JSON inside <think>…</think> and then forgets
662
- * to surface it in `tool_calls`. This pass extracts plausible calls and
663
- * proposes them to the loop, which decides whether to merge them with the
664
- * declared calls.
665
- */
401
+ /** R1 sometimes emits tool-call JSON inside reasoning_content and forgets `tool_calls`; recover those calls. */
666
402
 
667
403
  interface ScavengeOptions {
668
404
  /** Names of tools the model may legitimately call. Other names are ignored. */
@@ -676,17 +412,7 @@ interface ScavengeResult {
676
412
  }
677
413
  declare function scavengeToolCalls(reasoningContent: string | null | undefined, opts: ScavengeOptions): ScavengeResult;
678
414
 
679
- /**
680
- * Pillar 3 — Tool-Call Repair pipeline.
681
- *
682
- * Order of passes per turn:
683
- * 1. scavenge — recover tool calls leaked into <think>
684
- * 2. truncation — close any half-emitted argument JSON
685
- * 3. storm breaker — drop call-storm repeats
686
- *
687
- * Schema flattening is applied during loop construction (it changes what we
688
- * advertise to the model), not per-turn.
689
- */
415
+ /** Pass order: scavenge → truncation → storm. Schema flatten runs at loop construction, not per-turn. */
690
416
 
691
417
  interface RepairReport {
692
418
  scavenged: number;
@@ -699,26 +425,14 @@ interface ToolCallRepairOptions {
699
425
  stormWindow?: number;
700
426
  stormThreshold?: number;
701
427
  maxScavenge?: number;
702
- /**
703
- * Optional predicate the storm breaker consults to identify state-
704
- * changing calls — those clear the sliding window so a post-edit
705
- * verify-read isn't mistaken for a repeat. Production callers wire
706
- * this off the ToolRegistry's `readOnly` / `readOnlyCheck` flags;
707
- * tests that don't supply it keep the original behavior.
708
- */
428
+ /** Mutating calls clear the storm window so a post-edit verify-read isn't seen as a repeat. */
709
429
  isMutating?: IsMutating;
710
430
  }
711
431
  declare class ToolCallRepair {
712
432
  private readonly storm;
713
433
  private readonly opts;
714
434
  constructor(opts: ToolCallRepairOptions);
715
- /**
716
- * Drop the StormBreaker's sliding window of recent (name, args)
717
- * signatures. Called at the start of every user turn — a fresh user
718
- * message is a new intent, so carrying old repetition state into it
719
- * would turn a valid "try again with different input" flow into a
720
- * false-positive block.
721
- */
435
+ /** Called at start of every user turn — fresh intent shouldn't inherit old repetition state. */
722
436
  resetStorm(): void;
723
437
  process(declaredCalls: ToolCall[], reasoningContent: string | null, content?: string | null): {
724
438
  calls: ToolCall[];
@@ -742,11 +456,6 @@ interface TurnStats {
742
456
  interface SessionSummary {
743
457
  turns: number;
744
458
  totalCostUsd: number;
745
- /**
746
- * Input-side (prompt) cost aggregated across the session. Split
747
- * from totalCostUsd so the panel can render "cost $X (in $Y · out
748
- * $Z)" — users asked for visibility into where the spend lands.
749
- */
750
459
  totalInputCostUsd: number;
751
460
  /** Output-side (completion) cost aggregated across the session. */
752
461
  totalOutputCostUsd: number;
@@ -755,19 +464,8 @@ interface SessionSummary {
755
464
  /** @deprecated. Same as claudeEquivalentUsd — synthetic ratio, not a real measurement. */
756
465
  savingsVsClaudePct: number;
757
466
  cacheHitRatio: number;
758
- /**
759
- * Most recent turn's prompt-token count. Used by the TUI's context
760
- * gauge: we can't know the next call's cost without making it, but
761
- * the last turn's prompt tokens is the floor (next call is last
762
- * prompt + user delta + any new tool outputs).
763
- */
467
+ /** Floor estimate for next call — actual cost = this + user delta + new tool outputs. */
764
468
  lastPromptTokens: number;
765
- /**
766
- * Most recent turn's USD cost. Complements `totalCostUsd` so the TUI
767
- * can render "this turn: $X · session: $Y" — users asked for a
768
- * per-turn signal so a mid-session jump from flash to pro is
769
- * immediately visible, not hidden inside the session aggregate.
770
- */
771
469
  lastTurnCostUsd: number;
772
470
  }
773
471
  declare class SessionStats {
@@ -782,14 +480,6 @@ declare class SessionStats {
782
480
  summary(): SessionSummary;
783
481
  }
784
482
 
785
- /**
786
- * Per-call context a tool `fn` can optionally consume. Today the only
787
- * field is `signal`, plumbed through so long-running tools (MCP calls,
788
- * HTTP requests) can abort when the user presses Esc. Omitted fields
789
- * stay optional — tools written against the pre-0.4.9 signature keep
790
- * working; they just ignore cancellation, which is fine for fast
791
- * local work where "await finishes" happens before the next tick anyway.
792
- */
793
483
  interface ToolCallContext {
794
484
  signal?: AbortSignal;
795
485
  }
@@ -797,74 +487,29 @@ interface ToolDefinition<A = any, R = any> {
797
487
  name: string;
798
488
  description?: string;
799
489
  parameters?: JSONSchema;
800
- /**
801
- * Marks a tool as read-only: safe to invoke during plan mode. `true`
802
- * for tools that only observe (read_file, list_directory, search, web
803
- * fetch/search). Leave undefined / `false` for anything that can write,
804
- * execute, or mutate state.
805
- *
806
- * The registry enforces this at dispatch: non-readonly tools called
807
- * while `planMode` is on return a refusal string the model can
808
- * learn from, instead of actually running.
809
- */
490
+ /** Safe in plan mode — registry refuses non-readonly calls when `planMode` is on. */
810
491
  readOnly?: boolean;
811
- /**
812
- * Dynamic read-only check for tools whose safety depends on arguments
813
- * — `run_command` with an allowlisted argv is safe, `run_command
814
- * rm -rf` isn't. Called with the parsed arguments; `true` means "treat
815
- * as read-only for plan mode". Takes precedence over `readOnly` when
816
- * both are set.
817
- */
492
+ /** Per-args check; takes precedence over `readOnly`. e.g. `run_command` + allowlisted argv. */
818
493
  readOnlyCheck?: (args: A) => boolean;
819
494
  fn: (args: A, ctx?: ToolCallContext) => R | Promise<R>;
820
495
  }
821
496
  interface ToolRegistryOptions {
822
- /**
823
- * Auto-flatten schemas that exceed depth/width thresholds before sending
824
- * them to the model. Re-nests arguments transparently on dispatch.
825
- * Default: true. Pass false to opt out.
826
- */
497
+ /** Auto-flatten + re-nest at dispatch; default true. */
827
498
  autoFlatten?: boolean;
828
499
  }
829
- /**
830
- * Callback form for `setToolInterceptor` — receives the tool name and
831
- * already-parsed arguments; returns a string to short-circuit dispatch
832
- * (the returned value becomes the tool result the model sees), or
833
- * `null` / `undefined` to fall through to the registered tool fn.
834
- *
835
- * Used by `reasonix code`'s edit-mode gate: `edit_file` / `write_file`
836
- * are intercepted in "review" mode (queued into pendingEdits, returning
837
- * "queued for /apply") or handled inline in "auto" mode (snapshot +
838
- * apply, then surface an undo banner). Other tools pass through.
839
- */
500
+ /** String return short-circuits dispatch; null/undefined falls through to the tool fn. */
840
501
  type ToolInterceptor = (name: string, args: Record<string, unknown>) => string | null | undefined | Promise<string | null | undefined>;
841
502
  declare class ToolRegistry {
842
503
  private readonly _tools;
843
504
  private readonly _autoFlatten;
844
- /**
845
- * When true, `dispatch` refuses any tool whose `readOnly` flag isn't
846
- * set (and whose `readOnlyCheck` doesn't pass on the specific args).
847
- * Drives `reasonix code`'s Plan Mode — the model can still explore
848
- * via read tools but its writes and non-allowlisted shell calls are
849
- * bounced until the user approves a submitted plan.
850
- */
851
505
  private _planMode;
852
- /**
853
- * Optional hook run after arg parsing but before tool.fn. Lets the TUI
854
- * reroute specific tool calls (e.g. edit_file in review mode) without
855
- * modifying the tool definitions themselves.
856
- */
857
506
  private _interceptor;
858
507
  constructor(opts?: ToolRegistryOptions);
859
508
  /** Enable / disable plan-mode enforcement at dispatch. */
860
509
  setPlanMode(on: boolean): void;
861
510
  /** True when the registry is currently refusing non-readonly calls. */
862
511
  get planMode(): boolean;
863
- /**
864
- * Install or clear the dispatch interceptor. At most one interceptor
865
- * is active at a time — calling twice replaces the previous. Pass
866
- * `null` to remove.
867
- */
512
+ /** At most one interceptor active; calling twice replaces. */
868
513
  setToolInterceptor(fn: ToolInterceptor | null): void;
869
514
  register<A, R>(def: ToolDefinition<A, R>): this;
870
515
  has(name: string): boolean;
@@ -881,29 +526,11 @@ declare class ToolRegistry {
881
526
  }
882
527
 
883
528
  type EventRole = "assistant_delta" | "assistant_final"
884
- /**
885
- * Emitted as `tool_calls[].function.arguments` streams in. A tool
886
- * call with a large arguments payload produces no `content` or
887
- * `reasoning_content` bytes — this is the only signal the UI has
888
- * that the stream is alive during that window.
889
- */
529
+ /** Only liveness signal during a large-args tool call (no content/reasoning bytes). */
890
530
  | "tool_call_delta"
891
- /**
892
- * Yielded immediately before a tool is dispatched. Lets the TUI put
893
- * up a "▸ tool<X> running…" spinner while the tool's Promise is
894
- * pending — otherwise the UI looks frozen whenever a tool call
895
- * takes more than a few hundred ms (a big `filesystem_edit_file`
896
- * is a typical trigger).
897
- */
531
+ /** Pre-dispatch ping so the TUI can show a spinner during long tool awaits. */
898
532
  | "tool_start" | "tool" | "done" | "error" | "warning"
899
- /**
900
- * Transient "what's happening right now" indicator. Emitted during
901
- * silent phases — between a tool result and the next iteration's
902
- * first streaming byte, and right before harvest — so the TUI can
903
- * show a spinner with explanatory text instead of looking frozen.
904
- * The UI clears it on the next primary event (assistant_delta,
905
- * tool_start, tool, assistant_final, error).
906
- */
533
+ /** Transient indicator for silent phases; UI clears on next primary event. */
907
534
  | "status" | "branch_start" | "branch_progress" | "branch_done";
908
535
  interface BranchSummary {
909
536
  budget: number;
@@ -924,26 +551,13 @@ interface LoopEvent {
924
551
  content: string;
925
552
  reasoningDelta?: string;
926
553
  toolName?: string;
927
- /**
928
- * Raw JSON-string arguments the model sent for a tool call (role === "tool").
929
- * Populated so transcripts can persist *why* a tool was called, not just
930
- * what it returned. Needed by `reasonix diff` to explain divergences.
931
- */
554
+ /** Raw args JSON — needed by `reasonix diff` to explain why a tool was called. */
932
555
  toolArgs?: string;
933
556
  /** Cumulative arguments-string length for `role === "tool_call_delta"`. */
934
557
  toolCallArgsChars?: number;
935
- /**
936
- * Zero-based index of the tool call this delta belongs to. Surfaces
937
- * multi-tool turns: on a response emitting 4 write_file calls the UI
938
- * can show "building call 3/?" instead of a context-free spinner.
939
- */
558
+ /** Zero-based index of the tool call this delta belongs to (multi-tool progress). */
940
559
  toolCallIndex?: number;
941
- /**
942
- * Count of prior tool calls (this turn) whose arguments have finished
943
- * streaming into valid JSON. Not all ready calls have been dispatched
944
- * yet — dispatch still happens post-stream — but the user gets "2
945
- * ready" progress feedback while later calls keep streaming.
946
- */
560
+ /** Count of tool calls whose args have parsed as valid JSON (UI progress, not dispatch gate). */
947
561
  toolCallReadyCount?: number;
948
562
  stats?: TurnStats;
949
563
  planState?: TypedPlanState;
@@ -951,15 +565,7 @@ interface LoopEvent {
951
565
  branch?: BranchSummary;
952
566
  branchProgress?: BranchProgress;
953
567
  error?: string;
954
- /**
955
- * True on `assistant_final` events emitted by the no-tools fallback
956
- * when the loop hit its budget, was aborted, or tripped the
957
- * token-context guard. Consumers that act on assistant text (notably
958
- * the code-mode edit applier) MUST treat these as display-only —
959
- * the model is "wrapping up," not proposing new work. Applying
960
- * SEARCH/REPLACE blocks found in a forced summary caused the
961
- * "analysis became edits" bug in v0.4.1 and earlier.
962
- */
568
+ /** Display-only — code-mode applier MUST skip SEARCH/REPLACE in forced-summary text. */
963
569
  forcedSummary?: boolean;
964
570
  }
965
571
  interface CacheFirstLoopOptions {
@@ -969,94 +575,27 @@ interface CacheFirstLoopOptions {
969
575
  model?: string;
970
576
  maxToolIters?: number;
971
577
  stream?: boolean;
972
- /**
973
- * Pillar 2 — structured harvesting of R1 reasoning into a typed plan state.
974
- * Pass `true` for defaults or an options object. Off by default (adds a
975
- * cheap but non-zero V3 call per turn).
976
- */
977
578
  harvest?: boolean | HarvestOptions;
978
- /**
979
- * Self-consistency branching. Pass a number for just a budget (e.g. 3) or
980
- * a full `BranchOptions` object. Disables streaming for the branched turn
981
- * because all samples must complete before selection. Auto-enables harvest
982
- * since the default selector scores samples by plan-state uncertainty.
983
- */
579
+ /** Branching disables streaming (need all samples) and force-enables harvest (selector input). */
984
580
  branch?: number | BranchOptions;
985
- /**
986
- * Reasoning-effort cap. See {@link ReconfigurableOptions} — default
987
- * `max` for Reasonix (agent-class use per DeepSeek V4 docs).
988
- */
989
581
  reasoningEffort?: "high" | "max";
990
- /**
991
- * Master switch for auto-escalation paths. See ReconfigurableOptions
992
- * — defaults to `true` (current behavior); the `flash` and `pro`
993
- * presets pass `false` to lock the running session to one model.
994
- */
995
582
  autoEscalate?: boolean;
996
- /**
997
- * Soft USD budget for the entire session. When set, the loop:
998
- * - Emits a one-shot warning event when cumulative cost crosses 80%
999
- * - Refuses to run the next turn once cumulative cost ≥ budget,
1000
- * yielding an error that explains how to bump or clear the cap
1001
- *
1002
- * Default `undefined` — no cap, no warnings. Reasonix is the cost-
1003
- * focused agent; the budget is opt-in so users new to the tool
1004
- * don't get blocked at $0.50 wondering what happened, but heavy /
1005
- * headless / CI users have a clean circuit breaker available.
1006
- */
583
+ /** Soft USD cap — warns at 80%, refuses next turn at 100%. Opt-in (default no cap). */
1007
584
  budgetUsd?: number;
1008
- /**
1009
- * Session name. When set, the loop pre-loads the session's prior messages
1010
- * into its log on construction, and appends every new log entry to
1011
- * `~/.reasonix/sessions/<name>.jsonl` so the next run can resume.
1012
- */
1013
585
  session?: string;
1014
- /**
1015
- * Resolved hook list — loaded from `<project>/.reasonix/settings.json`
1016
- * + `~/.reasonix/settings.json` by the CLI before constructing the loop.
1017
- * The loop dispatches `PreToolUse` and `PostToolUse` events itself; the
1018
- * CLI handles `UserPromptSubmit` and `Stop` since they live at the App
1019
- * boundary. Empty / unset → no hooks fire (the runtime cost of an empty
1020
- * filter is one ms). See `src/hooks.ts` for the full contract.
1021
- */
586
+ /** PreToolUse + PostToolUse only — UserPromptSubmit / Stop live at the App boundary. */
1022
587
  hooks?: ResolvedHook[];
1023
- /**
1024
- * `cwd` reported to hooks via the stdin payload. Defaults to `process.cwd()`.
1025
- * `reasonix code` overrides this to the sandbox root so a hook that does
1026
- * `cd $REASONIX_CWD` lands in the project, not in the user's shell home.
1027
- */
588
+ /** `cwd` reported to hooks; `reasonix code` sets this to the sandbox root, not shell home. */
1028
589
  hookCwd?: string;
1029
590
  }
1030
- /**
1031
- * Pillar 1 — Cache-First Loop.
1032
- *
1033
- * - prefix is immutable (cache target)
1034
- * - log is append-only (preserves prior-turn prefix)
1035
- * - scratch is per-turn volatile (never sent upstream)
1036
- *
1037
- * Yields a stream of events so a TUI can render progressively.
1038
- */
1039
591
  interface ReconfigurableOptions {
1040
592
  model?: string;
1041
593
  harvest?: boolean | HarvestOptions;
1042
594
  branch?: number | BranchOptions;
1043
595
  stream?: boolean;
1044
- /**
1045
- * Reasoning-effort cap sent per turn (V4 thinking mode only;
1046
- * deepseek-chat ignores it). Reasonix pins `max` by default because
1047
- * DeepSeek's V4 docs flag Claude-Code-style agent loops as the
1048
- * canonical `max` use case. `/effort high` lets a user step down
1049
- * mid-session for cheaper, faster turns on simple tasks.
1050
- */
596
+ /** V4 thinking mode only; deepseek-chat ignores. */
1051
597
  reasoningEffort?: "high" | "max";
1052
- /**
1053
- * Master switch for the auto-escalation paths — both the
1054
- * `<<<NEEDS_PRO>>>` marker scavenge and the failure-count threshold.
1055
- * `true` (default) preserves the original "flash baseline, jump to
1056
- * pro when struggling" behavior. `false` locks the active turn to
1057
- * whatever `model` is set to — used by the `flash` and `pro` presets
1058
- * which want a hard model commitment.
1059
- */
598
+ /** `false` pins to `model` — kills both NEEDS_PRO marker scavenge and failure-count threshold. */
1060
599
  autoEscalate?: boolean;
1061
600
  }
1062
601
  declare class CacheFirstLoop {
@@ -1074,156 +613,28 @@ declare class CacheFirstLoop {
1074
613
  harvestOptions: HarvestOptions;
1075
614
  branchEnabled: boolean;
1076
615
  branchOptions: BranchOptions;
1077
- /** See ReconfigurableOptions — mutable so `/effort` can flip mid-session. */
1078
616
  reasoningEffort: "high" | "max";
1079
- /**
1080
- * Auto-escalation toggle. `true` lets the loop self-promote to pro
1081
- * mid-turn (NEEDS_PRO marker / failure threshold); `false` keeps it
1082
- * pinned to `model`. Mutable so the dashboard's preset switcher can
1083
- * flip it live alongside `model`.
1084
- */
1085
617
  autoEscalate: boolean;
1086
- /**
1087
- * Soft USD budget — see {@link CacheFirstLoopOptions.budgetUsd}.
1088
- * Mutable so `/budget` slash can set / change / clear it mid-session.
1089
- * `null` (the default) disables all budget checks.
1090
- */
1091
618
  budgetUsd: number | null;
1092
- /**
1093
- * Set the first time a turn crosses 80% of the budget so the warning
1094
- * doesn't repeat every turn afterwards. Cleared by `setBudget` (any
1095
- * change re-arms the warning, including raising the cap above the
1096
- * current spend).
1097
- */
619
+ /** One-shot 80% warning latch — cleared by setBudget so a bump re-arms at the new boundary. */
1098
620
  private _budgetWarned;
1099
621
  sessionName: string | null;
1100
- /**
1101
- * Hook list, mutable so `/hooks reload` can swap it without
1102
- * reconstructing the loop. Default empty — the filter cost on a
1103
- * tool call is one array length check.
1104
- */
1105
622
  hooks: ResolvedHook[];
1106
- /**
1107
- * `cwd` reported to hook stdin. Mutable so `/cwd` can switch the
1108
- * working directory mid-session — the App keeps it in sync with
1109
- * the same currentRootDir that drives tool re-registration.
1110
- */
1111
623
  hookCwd: string;
1112
624
  /** Number of messages that were pre-loaded from the session file. */
1113
625
  readonly resumedMessageCount: number;
1114
626
  private _turn;
1115
627
  private _streamPreference;
1116
- /**
1117
- * AbortController per active turn. Threaded through the DeepSeek
1118
- * HTTP calls AND every tool dispatch so Esc actually cancels the
1119
- * in-flight network/subprocess work — not "we'll get to it after
1120
- * the current call finishes." Re-created at the start of each
1121
- * `step()` (the prior turn's signal has already fired).
1122
- */
628
+ /** Threaded through HTTP + every tool dispatch so Esc cancels in-flight work, not after. */
1123
629
  private _turnAbort;
1124
- /**
1125
- * "Next turn should run on pro, regardless of this.model." Set by the
1126
- * `/pro` slash command; consumed at the next turn's start (flipping
1127
- * `_escalateThisTurn` on and self-clearing) so it's a fire-and-forget
1128
- * single-turn upgrade. Survives across multiple slash inputs so
1129
- * typing `/pro` and then hesitating a while before submitting a real
1130
- * message still applies.
1131
- */
1132
630
  private _proArmedForNextTurn;
1133
- /**
1134
- * Active for the current turn only — true means every model call
1135
- * this turn uses pro instead of `this.model`. Turned on by EITHER
1136
- * the pro-armed consumption OR the mid-turn auto-escalation
1137
- * threshold (see `_turnFailureCount`). Cleared at turn end.
1138
- */
1139
631
  private _escalateThisTurn;
1140
- /**
1141
- * Visible-failure count for the current turn. Incremented by tool
1142
- * dispatch paths when a result matches a known "flash is struggling"
1143
- * shape (SEARCH-not-found errors, scavenge / truncation / storm
1144
- * repair fires). Once it hits {@link FAILURE_ESCALATION_THRESHOLD},
1145
- * the remainder of the turn's model calls auto-upgrade to pro so
1146
- * the user doesn't watch flash retry the same edit 5 times.
1147
- */
1148
632
  private _turnFailureCount;
1149
- /**
1150
- * Per-type breakdown of failure signals counted toward the turn's
1151
- * auto-escalation threshold. Surfaced in the warning when the
1152
- * threshold trips so the user sees what kind of trouble flash
1153
- * actually hit ("3× search-mismatch, 2× truncated") rather than
1154
- * just a bare count. Reset alongside _turnFailureCount.
1155
- */
1156
633
  private _turnFailureTypes;
1157
634
  constructor(opts: CacheFirstLoopOptions);
1158
- /**
1159
- * Shrink the log by re-truncating oversized tool results to a tighter
1160
- * token cap, and persist the result back to disk so the next launch
1161
- * doesn't re-inherit a fat session file. Returns a summary the TUI
1162
- * can display.
1163
- *
1164
- * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
1165
- * capped at the same effective context footprint as English instead
1166
- * of slipping past a char cap at 2× the token cost. Default 4000
1167
- * tokens, matching the token-aware dispatch cap from 0.5.2.
1168
- *
1169
- * Only tool-role messages are touched (same rationale as
1170
- * {@link healLoadedMessages}). User and assistant messages carry
1171
- * authored intent we can't mechanically shrink without losing
1172
- * meaning.
1173
- */
1174
- /**
1175
- * Conservative args-only shrink fired after every tool response —
1176
- * strictly about ONE thing: stop oversized `edit_file` / `write_file`
1177
- * arguments from riding every future turn's prompt.
1178
- *
1179
- * Why this is worth doing AUTOMATICALLY (not just on /compact):
1180
- * Each tool-call arguments string sticks in the log verbatim. On a
1181
- * coding session with ~10 edits, that's 20-40K tokens of stale
1182
- * SEARCH/REPLACE text riding along on every turn. Even at a 98.9%
1183
- * cache hit rate the input cost still adds up linearly (cache-hit
1184
- * price × tokens × turns). Compacting IMMEDIATELY after the tool
1185
- * responds means the next turn's prompt is already smaller — the
1186
- * shrink is a one-time write that saves every future prompt.
1187
- *
1188
- * Threshold rationale: 800 tokens ≈ 3 KB. A typical 20-line edit's
1189
- * args land well under that; massive rewrites (whole-file content,
1190
- * 100+ line refactors) land above and get the compaction. Small
1191
- * edits stay byte-verbatim so nothing common-case changes.
1192
- *
1193
- * Safety: we ONLY shrink args whose tool has ALREADY responded.
1194
- * Structurally that's every call in `log.toMessages()` at this
1195
- * point — the current turn's assistant/tool pairing is by
1196
- * construction closed by the time we get here (append happens
1197
- * AFTER dispatch). The in-flight assistant message being built
1198
- * lives in scratch, not the log, so this pass can't touch it.
1199
- *
1200
- * Model impact: the model may occasionally want to reference the
1201
- * exact SEARCH text of a prior edit — it then reads the file
1202
- * directly (which shows current state) or looks at the preceding
1203
- * assistant text (which has its plan). Losing the stale args is a
1204
- * net win: one extra read_file vs. dragging N KB of stale text
1205
- * through every subsequent turn.
1206
- */
635
+ /** Shrink huge edit_file/write_file args post-dispatch — tool result already explains. */
1207
636
  private compactToolCallArgsAfterResponse;
1208
- /**
1209
- * Fired at the END of a turn (just before `done` is yielded). Shrinks
1210
- * every tool RESULT in the log that exceeds {@link TURN_END_RESULT_CAP_TOKENS}
1211
- * to a tight cap so the NEXT turn's prompt doesn't re-pay for big
1212
- * reads or searches done earlier. Unlike the reactive 40/80%
1213
- * thresholds which react to context pressure, this runs unconditionally
1214
- * — the win is preventive: each turn's big outputs get trimmed before
1215
- * they ride into the next prompt. Saves compounding cost on long
1216
- * sessions.
1217
- *
1218
- * Why compact the JUST-finished turn's results too (not just older
1219
- * turns)? The same-turn iters already consumed the raw content to
1220
- * make their decisions — the log is only carried forward for future
1221
- * prompts. And "let me re-read the file" is vastly cheaper than
1222
- * "carry this 12KB result in every future turn's prompt forever."
1223
- *
1224
- * Safe by construction: args-compact for THIS turn already ran
1225
- * inside `compactToolCallArgsAfterResponse`; this pass is orthogonal.
1226
- */
637
+ /** Preventive end-of-turn shrink — trim big results before they ride into the next prompt. */
1227
638
  private autoCompactToolResultsOnTurnEnd;
1228
639
  compact(maxTokens?: number): {
1229
640
  healedCount: number;
@@ -1231,40 +642,14 @@ declare class CacheFirstLoop {
1231
642
  charsSaved: number;
1232
643
  };
1233
644
  appendAndPersist(message: ChatMessage): void;
1234
- /**
1235
- * Start a fresh conversation WITHOUT exiting. Drops every message
1236
- * in the in-memory log AND rewrites the session file to empty so
1237
- * a resume won't re-hydrate the old turns. Unlike `/forget`, which
1238
- * deletes the session entirely, this keeps the session name and
1239
- * config intact — it's the "new chat" button.
1240
- *
1241
- * The immutable prefix (system prompt + tool specs) is preserved
1242
- * — that's the cache-first invariant, not part of the conversation.
1243
- * Returns the number of messages dropped so the UI can show it.
1244
- */
645
+ /** "New chat" — drops messages but keeps session + immutable prefix (cache-first invariant). */
1245
646
  clearLog(): {
1246
647
  dropped: number;
1247
648
  };
1248
- /**
1249
- * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
1250
- * scratch, and stats are preserved — only the per-turn behavior changes.
1251
- * Used by the TUI's slash commands and by library callers who want to
1252
- * flip a knob between turns.
1253
- */
1254
649
  configure(opts: ReconfigurableOptions): void;
1255
- /**
1256
- * Set / change / clear the soft USD budget. `null` (or any non-
1257
- * positive number) disables the cap entirely. Re-arms the 80%
1258
- * warning so a user who bumps the cap mid-session sees a fresh
1259
- * threshold message at the new boundary.
1260
- */
650
+ /** `null` disables the cap; any change re-arms the 80% warning. */
1261
651
  setBudget(usd: number | null): void;
1262
- /**
1263
- * Arm pro for the next turn (consumed at turn start). Called by
1264
- * `/pro`. Idempotent — repeated calls stay armed, `disarmPro()`
1265
- * clears. Separate from `/preset max` which persistently switches
1266
- * this.model; armed state is strictly single-turn.
1267
- */
652
+ /** Single-turn upgrade consumed at next step() — distinct from `/preset max` (persistent). */
1268
653
  armProForNextTurn(): void;
1269
654
  /** Cancel `/pro` arming before the next turn starts. */
1270
655
  disarmPro(): void;
@@ -1272,131 +657,31 @@ declare class CacheFirstLoop {
1272
657
  get proArmed(): boolean;
1273
658
  /** UI surface — true while the current turn is running on pro (armed or auto-escalated). */
1274
659
  get escalatedThisTurn(): boolean;
1275
- /**
1276
- * Model the current model call should use. Defaults to `this.model`;
1277
- * upgrades to {@link ESCALATION_MODEL} when the turn is armed for
1278
- * pro (via `/pro`) or has hit the failure-escalation threshold.
1279
- * Same thinking + effort policy applies regardless — pro defaults
1280
- * to thinking=enabled and effort=max, which the current turn wanted
1281
- * anyway when flash was struggling.
1282
- */
1283
660
  private modelForCurrentCall;
1284
- /**
1285
- * Parse the escalation marker out of the model's leading content.
1286
- * Returns `{ matched: true, reason? }` for both bare and reason-
1287
- * carrying forms. Only the FIRST line matters — the model is
1288
- * instructed to emit the marker as the first output token if at
1289
- * all. Matches anywhere else in the text are normal content
1290
- * references (e.g. the user asked about the marker itself).
1291
- */
661
+ /** Anchored to lead — mid-text matches are normal content (user asking about the marker). */
1292
662
  private parseEscalationMarker;
1293
663
  /** Convenience boolean — same gate the streaming path used to call. */
1294
664
  private isEscalationRequest;
1295
- /**
1296
- * Could `buf` STILL plausibly become the full marker as more chunks
1297
- * arrive? Drives the streaming buffer's flush decision: while this
1298
- * is true we keep accumulating; once it's false (or the buffer
1299
- * exceeds the byte limit) we flush so the user isn't staring at a
1300
- * delayed display for arbitrary content that just happens to start
1301
- * with `<`.
1302
- */
665
+ /** Drives streaming flush — while plausibly partial, keep accumulating; else flush. */
1303
666
  private looksLikePartialEscalationMarker;
1304
- /**
1305
- * Check whether a tool result string looks like a "flash struggled"
1306
- * signal and, if so, increment the turn's failure counter. Escalates
1307
- * the REST of the current turn to pro once the threshold is hit.
1308
- * Idempotent after escalation — further failures don't re-escalate,
1309
- * but the turn is already on pro so it doesn't matter.
1310
- *
1311
- * Return: `true` when this call tipped the turn into escalation
1312
- * mode (so the loop can surface a one-time warning to the user).
1313
- */
667
+ /** Returns true ONLY on the tipping call — caller surfaces a one-shot warning. */
1314
668
  private noteToolFailureSignal;
1315
- /**
1316
- * Render `_turnFailureTypes` as a comma-separated breakdown like
1317
- * "2× search-mismatch, 1× truncated" for the auto-escalation
1318
- * warning. Empty if no types have been recorded yet (defensive —
1319
- * the warning sites only call this after a bump).
1320
- */
1321
669
  private formatFailureBreakdown;
1322
670
  private buildMessages;
1323
- /**
1324
- * Signal the currently-running {@link step} to stop **now**. Cancels
1325
- * the in-flight network request (DeepSeek HTTP/SSE) AND any tool call
1326
- * currently dispatching (MCP `notifications/cancelled` + promise
1327
- * reject). The loop itself also sees `signal.aborted` at each
1328
- * iteration boundary and exits quickly instead of looping again.
1329
- * Called by the TUI on Esc.
1330
- */
1331
671
  abort(): void;
1332
- /**
1333
- * Drop everything in the log after (and including) the most recent
1334
- * user message. Used by `/retry` so the caller can re-send that
1335
- * message with a fresh turn instead of layering another response on
1336
- * top of the prior exchange. Returns the content of the dropped user
1337
- * message, or `null` if there isn't one yet.
1338
- *
1339
- * Persists by rewriting the session file — otherwise the next
1340
- * launch would rehydrate the old exchange and `/retry` would seem
1341
- * to have done nothing.
1342
- */
672
+ /** Drop the last user message + everything after; caller re-sends. Persists to session file. */
1343
673
  retryLastUser(): string | null;
1344
674
  step(userInput: string): AsyncGenerator<LoopEvent>;
1345
675
  private forceSummaryAfterIterLimit;
1346
676
  run(userInput: string, onEvent?: (ev: LoopEvent) => void): Promise<string>;
1347
- /**
1348
- * Build an assistant message for the log. The `producingModel` arg is
1349
- * the model that actually generated this turn (flash, pro, the
1350
- * forced-summary flash call, `this.model` for synthetics, etc.) —
1351
- * NOT `this.model`, because escalation + forced-summary can both
1352
- * route a single turn to a different model.
1353
- *
1354
- * The single invariant this encodes: if the producing model is
1355
- * thinking-mode, `reasoning_content` MUST be present on the
1356
- * persisted message — even as an empty string. DeepSeek's validator
1357
- * 400s the NEXT request if any historical thinking-mode assistant
1358
- * turn is missing it. We used to gate on `reasoning.length > 0`,
1359
- * which silently dropped the field whenever the stream emitted zero
1360
- * reasoning deltas or the API returned `reasoning_content: null` —
1361
- * both legitimate edge cases the 0.5.15/0.5.18 fixes missed.
1362
- */
677
+ /** Thinking-mode producer ⇒ reasoning_content MUST be set (even ""), or next call 400s. */
1363
678
  private assistantMessage;
1364
- /**
1365
- * Synthetic assistant message (abort notices, future system injections)
1366
- * — no real API round trip. Delegates to {@link assistantMessage} with
1367
- * `this.model` as the stand-in producer, so the same thinking-mode
1368
- * invariant applies: reasoner sessions get an empty-string
1369
- * `reasoning_content`; V3 sessions get nothing.
1370
- */
679
+ /** Abort notices etc — uses this.model as stand-in producer for the thinking-mode stamp. */
1371
680
  private syntheticAssistantMessage;
1372
681
  }
1373
- /**
1374
- * R1 occasionally hallucinates tool-call markup as plain text when the
1375
- * real tool channel has been closed — typically our forced-summary
1376
- * path, where `tools: undefined` is supposed to force prose but isn't
1377
- * always respected. The markup isn't parsed by our tool-call path
1378
- * (the API response's structured `tool_calls` field is empty), so
1379
- * it's just noise in the user's view. Strip known envelope shapes.
1380
- *
1381
- * Exported so tests can exercise it against concrete R1 outputs.
1382
- */
682
+ /** Strip hallucinated tool-call envelopes — `tools: undefined` doesn't always force prose. */
1383
683
  declare function stripHallucinatedToolMarkup(s: string): string;
1384
- /**
1385
- * Enforce tool_calls ↔ tool pairing across a message log. DeepSeek
1386
- * rejects two shapes at the API boundary:
1387
- * (a) assistant with tool_calls not followed by matching tool
1388
- * responses ("insufficient tool messages following tool_calls")
1389
- * (b) tool message without a preceding assistant.tool_calls with
1390
- * the matching tool_call_id ("must be a response to a preceding
1391
- * message with 'tool_calls'")
1392
- *
1393
- * Corrupted session files from earlier builds have hit both. This pass
1394
- * rebuilds the message stream so only well-formed (assistant.tool_calls
1395
- * + all matching responses) groups survive. Plain user/assistant/system
1396
- * messages (no tool_calls) always pass through.
1397
- *
1398
- * Exported so both char-based and token-based heal can compose it.
1399
- */
684
+ /** Drops both unpaired assistant.tool_calls and stray tool messages — DeepSeek 400s on either. */
1400
685
  declare function fixToolCallPairing(messages: ChatMessage[]): {
1401
686
  messages: ChatMessage[];
1402
687
  droppedAssistantCalls: number;
@@ -1407,67 +692,19 @@ declare function healLoadedMessages(messages: ChatMessage[], maxChars: number):
1407
692
  healedCount: number;
1408
693
  healedFrom: number;
1409
694
  };
1410
- /**
1411
- * Token-aware counterpart of {@link healLoadedMessages}. Used at
1412
- * session-load time so resumed sessions come back capped at the same
1413
- * token budget (not char budget) as live tool results — CJK text no
1414
- * longer slips past at 2× the intended token cost when re-hydrated.
1415
- *
1416
- * Still does the same structural pass for tool_calls ↔ tool pairing;
1417
- * that logic is orthogonal to the truncation cap.
1418
- */
695
+ /** Token-cap variant — char cap would let CJK slip past at 2× the intended token cost. */
1419
696
  declare function healLoadedMessagesByTokens(messages: ChatMessage[], maxTokens: number): {
1420
697
  messages: ChatMessage[];
1421
698
  healedCount: number;
1422
699
  tokensSaved: number;
1423
700
  charsSaved: number;
1424
701
  };
1425
- /**
1426
- * Turn raw `DeepSeek NNN: {json}` errors into short actionable hints.
1427
- * Client code throws these verbatim from the HTTP layer (see client.ts);
1428
- * this is the one place the UI text layer reads to decide what the user
1429
- * actually needs to do about it.
1430
- *
1431
- * Covered codes (per DeepSeek's error-code doc):
1432
- * - 400 + "maximum context length" → context-overflow, point at /forget
1433
- * - 400 generic → strip the JSON, show inner message
1434
- * - 401 → API key rejected, point at `reasonix setup`
1435
- * - 402 → balance depleted, link to top-up page
1436
- * - 422 → param error, show inner message (usually explains which field)
1437
- *
1438
- * 429/500/502/503/504 are swallowed by retry.ts before they reach here;
1439
- * if they DO reach here (all retries exhausted), the raw string already
1440
- * says "DeepSeek 503: server busy" etc. which is informative enough.
1441
- */
702
+ /** Single text-layer DeepSeek-error formatter — 429/5xx never reach here (retry.ts swallows). */
1442
703
  declare function formatLoopError(err: Error): string;
1443
704
 
1444
- /**
1445
- * Expand `@path/to/file` mentions in a user prompt to inline file
1446
- * content.
1447
- *
1448
- * Why: most interactive coding sessions start with "look at X, then
1449
- * change Y". Typing `@src/loop.ts` reads faster and cheaper than
1450
- * "look at src/loop.ts (and the model fires read_file, and we pay for
1451
- * the round trip)" — the model sees the file content from turn 1
1452
- * instead of round-tripping a tool call for it.
1453
- *
1454
- * Shape: the user's text is kept verbatim. Expanded file contents are
1455
- * appended in a "Referenced files" block at the end, each wrapped in
1456
- * `<file path="...">...</file>` so the model can cite them back
1457
- * unambiguously.
1458
- *
1459
- * Safety: paths must resolve inside `rootDir` (no `..` escape, no
1460
- * absolute paths), must exist as a regular file, and must be under
1461
- * `maxBytes`. Missing / too-large / escaping paths get a short note
1462
- * appended instead of content so the user sees why it was skipped.
1463
- */
705
+ /** Expand `@path` mentions inline. Paths must resolve inside rootDir; escapes / oversize get a skip note, not content. */
1464
706
  /** Caps match tool-result dispatch truncation (0.5.2). */
1465
707
  declare const DEFAULT_AT_MENTION_MAX_BYTES: number;
1466
- /**
1467
- * Default directory names skipped when listing files for the picker.
1468
- * Matches what most repos gitignore AND keeps the picker off the
1469
- * hottest bloat — `node_modules` alone can be 100k+ entries.
1470
- */
1471
708
  declare const DEFAULT_PICKER_IGNORE_DIRS: readonly string[];
1472
709
  interface ListFilesOptions {
1473
710
  /** Cap the walk once we've collected this many entries. Default 500. */
@@ -1475,23 +712,7 @@ interface ListFilesOptions {
1475
712
  /** Directory names to skip entirely. Defaults to {@link DEFAULT_PICKER_IGNORE_DIRS}. */
1476
713
  ignoreDirs?: readonly string[];
1477
714
  }
1478
- /**
1479
- * Walk `root` recursively and return relative file paths (forward-slash
1480
- * separator, regardless of platform) for the `@` picker.
1481
- *
1482
- * Synchronous on purpose: this runs once at App mount (and on each turn
1483
- * so newly-created files show up) and blocks the render thread for a
1484
- * predictable ~10-50ms on a moderate repo. An async variant would need
1485
- * to coordinate with the Ink render loop; sync fits the rest of the
1486
- * TUI's single-turn-per-tick model cleanly.
1487
- *
1488
- * Skips:
1489
- * - directories in `ignoreDirs` (default: DEFAULT_PICKER_IGNORE_DIRS)
1490
- * - any directory whose name starts with `.` (covers `.git`,
1491
- * `.vscode`, dotfile vendors). Dotfile REGULAR FILES (`.env`,
1492
- * `.gitignore`, `.prettierrc`) are kept — users reference them.
1493
- * - entries the walker can't read (permission errors, broken links).
1494
- */
715
+ /** Sync on purpose — fits the TUI's single-turn-per-tick model. Skips dot-DIRS but keeps dotfiles. */
1495
716
  declare function listFilesSync(root: string, opts?: ListFilesOptions): string[];
1496
717
  interface FileWithStats {
1497
718
  /** Relative path with forward-slash separator. */
@@ -1499,46 +720,12 @@ interface FileWithStats {
1499
720
  /** Modification time (Date.getTime() / ms since epoch). 0 when stat failed. */
1500
721
  mtimeMs: number;
1501
722
  }
1502
- /**
1503
- * Same walk as {@link listFilesSync} but also statS each file for
1504
- * modification time. Used by the `@` picker to surface recently-
1505
- * edited files first — matches VS Code Quick Open / similar UX.
1506
- *
1507
- * Stat failures don't throw: the entry is kept with `mtimeMs: 0` so
1508
- * it still appears in the picker (just sinks to the bottom of the
1509
- * recency sort).
1510
- */
723
+ /** Stat failures kept as `mtimeMs: 0` — entry still appears, sinks to bottom of recency sort. */
1511
724
  declare function listFilesWithStatsSync(root: string, opts?: ListFilesOptions): FileWithStats[];
1512
- /**
1513
- * Async variant of {@link listFilesWithStatsSync}. Same walk semantics
1514
- * (DFS, alphabetical, respects ignore + maxResults), but each
1515
- * directory's entries are stat'd in parallel via `Promise.all`,
1516
- * which slashes wall-clock time on Windows where individual stat
1517
- * syscalls are 3-5x slower than Linux.
1518
- *
1519
- * Use this from the TUI mount path so a 500-file repo doesn't add
1520
- * 200-300ms of synchronous block to first paint. Sync variant is
1521
- * kept for paths where the caller can't `await` (server APIs,
1522
- * test scaffolding).
1523
- */
725
+ /** Parallel stat per directory — Windows stat syscalls are 3-5× slower than Linux. */
1524
726
  declare function listFilesWithStatsAsync(root: string, opts?: ListFilesOptions): Promise<FileWithStats[]>;
1525
- /**
1526
- * Prefix pattern used by the `@` picker to detect an IN-PROGRESS
1527
- * mention at the END of the input buffer. Captures the partial path
1528
- * (which may be empty — just `@`) so the picker can use it as a
1529
- * substring filter.
1530
- *
1531
- * Distinct from {@link AT_MENTION_PATTERN} (which finds completed
1532
- * mentions anywhere in the text for expansion-at-submit). This one
1533
- * fires on the trailing token only, anchored at end-of-input.
1534
- */
727
+ /** Trailing-token only, anchored at end-of-input — distinct from `AT_MENTION_PATTERN` which scans all. */
1535
728
  declare const AT_PICKER_PREFIX: RegExp;
1536
- /**
1537
- * Return the picker state for a given input buffer: the partial query
1538
- * (may be empty string — just `@`) and the buffer offset of the `@`
1539
- * character. `null` when the buffer doesn't end in a mention-in-
1540
- * progress.
1541
- */
1542
729
  declare function detectAtPicker(input: string): {
1543
730
  query: string;
1544
731
  atOffset: number;
@@ -1548,42 +735,10 @@ type PickerCandidate = string | FileWithStats;
1548
735
  interface RankPickerOptions {
1549
736
  /** Upper bound on returned entries. Default 40. */
1550
737
  limit?: number;
1551
- /**
1552
- * Paths the user or model has touched recently (via tool calls like
1553
- * `read_file` / `edit_file`). Matching paths get a recency boost so
1554
- * the picker surfaces "stuff I just looked at" near the top.
1555
- */
1556
738
  recentlyUsed?: readonly string[];
1557
739
  }
1558
- /**
1559
- * Filter and rank candidate files against the picker's partial query.
1560
- *
1561
- * Empty query:
1562
- * - Sort by "recently used" bucket first (if provided), then mtime
1563
- * descending (newer first), then path alpha.
1564
- * - Pure-string input (no mtime data) falls back to alpha since
1565
- * recency info isn't available.
1566
- *
1567
- * Non-empty query:
1568
- * - Case-insensitive substring match, with a basename-prefix boost
1569
- * so `lo` floats `loop.ts`-shaped paths to the top.
1570
- * - Ties broken first by recently-used membership, then mtime.
1571
- *
1572
- * Back-compat: passes `string[]` through the same logic (mtime = 0,
1573
- * recently-used still honored).
1574
- */
1575
740
  declare function rankPickerCandidates(files: readonly PickerCandidate[], query: string, limitOrOpts?: number | RankPickerOptions): string[];
1576
- /**
1577
- * Matches `@` at a word boundary (start-of-string or preceded by
1578
- * whitespace) followed by a path-like token. Deliberately rejects `@`
1579
- * embedded in longer words (email addresses, mentions on social sites)
1580
- * by requiring the word boundary.
1581
- *
1582
- * Path charset keeps it to the characters that appear in real repo
1583
- * paths — letters, digits, `_` `-` `.` `/` `\`. Trailing `.` (e.g.
1584
- * `@foo.ts.`) is stripped before lookup so a sentence-terminating
1585
- * period doesn't break the mention.
1586
- */
741
+ /** Word-boundary anchor rejects `@` embedded in emails / social handles; trailing `.` stripped before lookup. */
1587
742
  declare const AT_MENTION_PATTERN: RegExp;
1588
743
  interface AtMentionExpansion {
1589
744
  /** The raw `@path` token as it appeared in the text. */
@@ -1600,10 +755,6 @@ interface AtMentionExpansion {
1600
755
  interface AtMentionOptions {
1601
756
  /** Max file size in bytes before a mention is skipped. */
1602
757
  maxBytes?: number;
1603
- /**
1604
- * Optional file-system overrides for tests. Real callers omit these;
1605
- * the helper falls through to `node:fs`.
1606
- */
1607
758
  fs?: {
1608
759
  exists: (path: string) => boolean;
1609
760
  isFile: (path: string) => boolean;
@@ -1611,36 +762,12 @@ interface AtMentionOptions {
1611
762
  read: (path: string) => string;
1612
763
  };
1613
764
  }
1614
- /**
1615
- * Expand `@path` mentions in `text`. Returns the (possibly augmented)
1616
- * text plus a per-mention report so the caller can surface expansions
1617
- * in the UI.
1618
- */
1619
765
  declare function expandAtMentions(text: string, rootDir: string, opts?: AtMentionOptions): {
1620
766
  text: string;
1621
767
  expansions: AtMentionExpansion[];
1622
768
  };
1623
769
 
1624
- /**
1625
- * Project memory — a user-authored `REASONIX.md` in the project root
1626
- * that gets pinned into the immutable-prefix system prompt.
1627
- *
1628
- * Design notes:
1629
- *
1630
- * - The file lands in `ImmutablePrefix.system`, so the whole memory
1631
- * block is hashed into the cache prefix fingerprint. Editing the
1632
- * file invalidates the prefix; unchanged memory across sessions
1633
- * keeps the DeepSeek prefix cache warm. That matches Pillar 1 —
1634
- * memory is a deliberate, stable prefix, not per-turn drift.
1635
- * - Only one source: the working-root `REASONIX.md`. No parent walk,
1636
- * no `~/.reasonix/REASONIX.md`, no CLAUDE.md fallback. User-global
1637
- * memory can come later; for v1 one file == one mental model.
1638
- * - Truncated at 8 000 chars (≈ 2k tokens). `.gitignore` gets 2 000
1639
- * because it's a constraint dump; memory gets more headroom because
1640
- * it's deliberate instructions.
1641
- * - Opt-out via `REASONIX_MEMORY=off|false|0`. No CLI flag — memory
1642
- * is a file, `rm REASONIX.md` is the other opt-out.
1643
- */
770
+ /** REASONIX.md pinned into ImmutablePrefix.system; edits invalidate the prefix-cache fingerprint. */
1644
771
  declare const PROJECT_MEMORY_FILE = "REASONIX.md";
1645
772
  declare const PROJECT_MEMORY_MAX_CHARS = 8000;
1646
773
  interface ProjectMemory {
@@ -1653,44 +780,13 @@ interface ProjectMemory {
1653
780
  /** True iff `originalChars > PROJECT_MEMORY_MAX_CHARS`. */
1654
781
  truncated: boolean;
1655
782
  }
1656
- /**
1657
- * Read `REASONIX.md` from `rootDir`. Returns `null` when the file is
1658
- * missing, unreadable, or empty (whitespace-only counts as empty — an
1659
- * empty memory file shouldn't perturb the cache prefix).
1660
- */
783
+ /** Empty / whitespace-only files return null so they don't perturb the cache prefix. */
1661
784
  declare function readProjectMemory(rootDir: string): ProjectMemory | null;
1662
- /**
1663
- * Resolve whether project memory should be read. Default: on.
1664
- * `REASONIX_MEMORY=off|false|0` turns it off (CI, reproducing issues,
1665
- * intentional offline runs).
1666
- */
1667
785
  declare function memoryEnabled(): boolean;
1668
- /**
1669
- * Return `basePrompt` with the project's `REASONIX.md` appended as a
1670
- * "Project memory" section. No-op when the file is absent, empty, or
1671
- * memory is disabled via env.
1672
- *
1673
- * The appended block is deterministic — identical input ⇒ identical
1674
- * output — so every session that opens against the same memory file
1675
- * gets the same prefix hash.
1676
- */
786
+ /** Deterministic — same memory file always yields the same prefix hash. */
1677
787
  declare function applyProjectMemory(basePrompt: string, rootDir: string): string;
1678
788
 
1679
- /**
1680
- * User memory — `~/.reasonix/memory/` markdown notes pinned into the
1681
- * immutable-prefix system prompt across sessions.
1682
- *
1683
- * Two scopes:
1684
- * - `global` → `~/.reasonix/memory/global/` (cross-project)
1685
- * - `project` → `~/.reasonix/memory/<hash>/` (per sandbox root)
1686
- *
1687
- * Each scope has an always-loaded `MEMORY.md` index plus zero-or-more
1688
- * `<name>.md` detail files loaded on demand via `recall_memory`.
1689
- *
1690
- * Distinct from `src/project-memory.ts` (REASONIX.md) in purpose:
1691
- * REASONIX.md is committable, team-shared project memory.
1692
- * ~/.reasonix/memory is user-private memory, never committed.
1693
- */
789
+ /** User-private memory pinned into the immutable prefix; distinct from committable REASONIX.md. */
1694
790
  declare const USER_MEMORY_DIR = "memory";
1695
791
  declare const MEMORY_INDEX_FILE = "MEMORY.md";
1696
792
  /** Cap on the index file content loaded into the prefix, per scope. */
@@ -1719,10 +815,7 @@ interface WriteInput {
1719
815
  description: string;
1720
816
  body: string;
1721
817
  }
1722
- /**
1723
- * Throws on filename injection attempts (`../foo`, `foo/bar`, leading
1724
- * dots, etc.). Allowed: 3-40 chars, alnum + `_` + `-` + interior `.`.
1725
- */
818
+ /** Throws on path-injection (../, /, leading dot). Allowed: 3-40 chars, alnum/_/-, interior `.`. */
1726
819
  declare function sanitizeMemoryName(raw: string): string;
1727
820
  /** Stable 16-hex-char hash of an absolute sandbox root path. */
1728
821
  declare function projectHash(rootDir: string): string;
@@ -1736,10 +829,6 @@ declare class MemoryStore {
1736
829
  pathFor(scope: MemoryScope, name: string): string;
1737
830
  /** True iff this store is configured with a project scope available. */
1738
831
  hasProjectScope(): boolean;
1739
- /**
1740
- * Read the `MEMORY.md` index for a scope. Returns post-cap content
1741
- * (with a truncation marker if clipped), or `null` when absent / empty.
1742
- */
1743
832
  loadIndex(scope: MemoryScope): {
1744
833
  content: string;
1745
834
  originalChars: number;
@@ -1747,108 +836,36 @@ declare class MemoryStore {
1747
836
  } | null;
1748
837
  /** Read one memory file's body (frontmatter stripped). Throws if missing. */
1749
838
  read(scope: MemoryScope, name: string): MemoryEntry;
1750
- /**
1751
- * List every memory in this store. Scans both scopes (skips project
1752
- * scope if unconfigured). Silently skips malformed files; the index
1753
- * must stay queryable even if one file is hand-edited into nonsense.
1754
- */
839
+ /** Skips malformed files — index stays queryable even if one file is hand-edited into nonsense. */
1755
840
  list(): MemoryEntry[];
1756
- /**
1757
- * Write a new memory (or overwrite existing). Creates the scope dir,
1758
- * writes the `.md` file, and regenerates `MEMORY.md`. Returns the
1759
- * absolute path written to.
1760
- */
1761
841
  write(input: WriteInput): string;
1762
842
  /** Delete one memory + its index line. No-op if the file is already gone. */
1763
843
  delete(scope: MemoryScope, rawName: string): boolean;
1764
- /**
1765
- * Rebuild `MEMORY.md` from the `.md` files currently in the scope dir.
1766
- * Called after every write/delete. Sorted by name for stable prefix
1767
- * hashing — two stores with the same set of files produce byte-identical
1768
- * MEMORY.md content, keeping the cache prefix reproducible.
1769
- */
844
+ /** Sorted by name — same file set must produce byte-identical MEMORY.md for stable prefix hashing. */
1770
845
  private regenerateIndex;
1771
846
  }
1772
- /**
1773
- * Append `MEMORY_GLOBAL` and (optionally) `MEMORY_PROJECT` blocks to
1774
- * `basePrompt`. Omits a block entirely when its index is absent — an
1775
- * empty tag would add bytes to the prefix hash without content.
1776
- * Respects `REASONIX_MEMORY=off` via `memoryEnabled()` from
1777
- * `project-memory.ts`.
1778
- */
847
+ /** Empty index → omit the whole block (otherwise we'd add bytes to the prefix hash for nothing). */
1779
848
  declare function applyUserMemory(basePrompt: string, opts?: {
1780
849
  homeDir?: string;
1781
850
  projectRoot?: string;
1782
851
  }): string;
1783
- /**
1784
- * Compose every lazy-loaded prefix block in one call: project REASONIX.md,
1785
- * global REASONIX.md (`#g` destination), user memory indexes (global +
1786
- * per-project), and the skills index. Drop-in replacement for
1787
- * `applyProjectMemory` at CLI entry points. Stacking order is stable —
1788
- * the prefix hash only changes when block *content* changes, not when
1789
- * this helper is called a second time with the same filesystem state.
1790
- */
1791
852
  declare function applyMemoryStack(basePrompt: string, rootDir: string): string;
1792
853
 
1793
- /**
1794
- * Built-in filesystem tools for `reasonix code`.
1795
- *
1796
- * Why native instead of the official `@modelcontextprotocol/server-filesystem`:
1797
- * - No subprocess overhead — every call is 50-200 ms cheaper.
1798
- * - Schema shapes tuned for R1: `edit_file` takes a single
1799
- * SEARCH/REPLACE string instead of `string="false"`-encoded
1800
- * JSON arrays, which was the biggest single source of DSML
1801
- * hallucinations in 0.4.x.
1802
- * - Sandbox enforcement lives here so Reasonix can reason about
1803
- * it (tests cover path-traversal, symlink-escape, and the
1804
- * cwd-outside-root case) rather than trusting an external server.
1805
- * - No `npx install` / network dependency in `reasonix code`.
1806
- *
1807
- * Tool names + argument shapes intentionally mirror the official
1808
- * filesystem server so R1's muscle memory carries over. The only
1809
- * intentional divergence is `edit_file`, noted above.
1810
- */
854
+ /** Native FS tools — sandbox enforced here, not delegated. `edit_file` takes a single SEARCH/REPLACE string. */
1811
855
 
1812
856
  interface FilesystemToolsOptions {
1813
857
  /** Absolute directory the tools may read/write. Paths outside this are refused. */
1814
858
  rootDir: string;
1815
- /**
1816
- * When `false`, register only read-side tools (read_file, list_directory,
1817
- * search_files, get_file_info, directory_tree). Useful for read-only
1818
- * workflows where the model should never mutate the tree. Default: true.
1819
- */
859
+ /** false → register only read-side tools. Default true. */
1820
860
  allowWriting?: boolean;
1821
- /**
1822
- * Cap for a single file read, in bytes. Prevents a stray `read_file`
1823
- * on a multi-GB blob from OOM'ing Node. 2 MB is enough for any realistic
1824
- * source file (the biggest single-file TypeScript project checked in to
1825
- * GitHub is ~500 KB); pass higher when working with data files.
1826
- */
861
+ /** Per-read byte cap; floor against OOM on a multi-GB blob. */
1827
862
  maxReadBytes?: number;
1828
- /**
1829
- * Cap for total bytes returned from search_files / directory_tree /
1830
- * grep, so the model can't accidentally pull down the whole tree as
1831
- * one giant string. 256 KB by default.
1832
- */
863
+ /** Cap on total bytes from listing/grep tools — bounds tree-as-one-string accidents. */
1833
864
  maxListBytes?: number;
1834
865
  }
1835
866
  declare function registerFilesystemTools(registry: ToolRegistry, opts: FilesystemToolsOptions): ToolRegistry;
1836
867
 
1837
- /**
1838
- * `remember` / `forget` / `recall_memory` — tools that let the model
1839
- * read and write the user-memory store across sessions.
1840
- *
1841
- * Scope rules:
1842
- * - `global` — always available (no sandbox needed).
1843
- * - `project` — requires a `projectRoot` on MemoryStore. In chat mode
1844
- * (no sandbox), the tools still register but a `scope=project` call
1845
- * returns a structured refusal so the model can try `global` instead.
1846
- *
1847
- * Memory changes are written eagerly but NOT re-loaded into the prefix
1848
- * mid-session (cache invariant). The user notices at `/new` or the next
1849
- * launch — or they can read fresh content via `recall_memory` which
1850
- * always hits disk.
1851
- */
868
+ /** Writes are eager but the prefix is NOT re-loaded mid-session — keeps prompt-cache stable. */
1852
869
 
1853
870
  interface MemoryToolsOptions {
1854
871
  /** Sandbox root for the `project` scope. Omit for chat mode. */
@@ -1858,50 +875,13 @@ interface MemoryToolsOptions {
1858
875
  }
1859
876
  declare function registerMemoryTools(registry: ToolRegistry, opts?: MemoryToolsOptions): ToolRegistry;
1860
877
 
1861
- /**
1862
- * ask_choice — the primitive for "user needs to pick between alternatives".
1863
- *
1864
- * Why it exists: `submit_plan` is for ONE concrete plan the user approves.
1865
- * Models routinely misused it to present A/B/C option menus, leaving the
1866
- * user stuck with an approve/refine/cancel picker that had no way to
1867
- * select a route. `ask_choice` gives branching its own tool so plan
1868
- * mode stays about one actionable thing at a time.
1869
- *
1870
- * Shape mirrors `submit_plan`:
1871
- * 1. Model calls `ask_choice` with a question and 2–4 options.
1872
- * 2. The tool throws `ChoiceRequestedError`; the registry serializes
1873
- * the payload via `toToolResult`.
1874
- * 3. TUI parses the tagged error, mounts `ChoiceConfirm`, user picks
1875
- * one option (or types a custom answer via the escape hatch, or
1876
- * cancels).
1877
- * 4. A synthetic user message feeds the choice back — "user picked
1878
- * <id>" or "user answered: <text>" — and the loop resumes.
1879
- *
1880
- * Auto-flatten note: the `options` array of objects is exactly the
1881
- * schema shape that DeepSeek V3/R1 is known to drop. `ToolRegistry`
1882
- * auto-flattens and re-nests on dispatch (Pillar 3), so we don't need
1883
- * to hand-flatten here. We still `sanitizeOptions` at runtime because
1884
- * even with flatten-repair, models occasionally emit empty strings or
1885
- * miss fields entirely.
1886
- */
878
+ /** Branching primitive separate from submit_plan; throws ChoiceRequestedError so the TUI can mount a picker and the model stops. */
1887
879
 
1888
- /**
1889
- * One option in a branching question. `id` is what gets fed back to
1890
- * the model when the user picks; keep it short and stable (A, B, C,
1891
- * or option-1 / option-2 / ...). `summary` is optional extra context
1892
- * the UI shows as a dimmed sub-line under the title.
1893
- */
1894
880
  interface ChoiceOption {
1895
881
  id: string;
1896
882
  title: string;
1897
883
  summary?: string;
1898
884
  }
1899
- /**
1900
- * Thrown by `ask_choice`. Carries the branching question plus the
1901
- * options list out to the TUI via the `toToolResult` protocol. The
1902
- * error message tells the model to STOP so it doesn't race past the
1903
- * picker with more tool calls — same pattern as `PlanProposedError`.
1904
- */
1905
885
  declare class ChoiceRequestedError extends Error {
1906
886
  readonly question: string;
1907
887
  readonly options: ChoiceOption[];
@@ -1915,47 +895,17 @@ declare class ChoiceRequestedError extends Error {
1915
895
  };
1916
896
  }
1917
897
  interface ChoiceToolOptions {
1918
- /**
1919
- * Side-channel preview fired when the model asks. The tool-result
1920
- * event also carries the payload; this is the earlier hook for
1921
- * test harnesses or alternative UIs that don't want to parse JSON.
1922
- */
1923
898
  onChoiceRequested?: (question: string, options: ChoiceOption[]) => void;
1924
899
  }
1925
900
  declare function registerChoiceTool(registry: ToolRegistry, opts?: ChoiceToolOptions): ToolRegistry;
1926
901
 
1927
- /**
1928
- * Shared types for Plan Mode. Consumed by plan-errors.ts (error
1929
- * classes carry these as fields) and plan-core.ts (tool registration
1930
- * validates against them). Kept in a separate module so a consumer
1931
- * that only wants the types doesn't pull in either the error classes
1932
- * or the registration machinery.
1933
- */
1934
902
  type PlanStepRisk = "low" | "med" | "high";
1935
- /**
1936
- * Structured step in a submitted plan. Optional — plans can still be
1937
- * pure markdown. When provided, each step is addressable by `id` so
1938
- * the model can later mark it complete via `mark_step_complete`.
1939
- */
1940
903
  interface PlanStep {
1941
904
  id: string;
1942
905
  title: string;
1943
906
  action: string;
1944
- /**
1945
- * Optional self-reported risk level. Drives the colored dot gutter
1946
- * in PlanConfirm / PlanCheckpointConfirm: green (low) / yellow
1947
- * (med) / red (high). High-risk steps are the ones the user should
1948
- * actually read before approving — everything else is noise.
1949
- * Omitted when the model didn't categorize (treated as neutral).
1950
- */
1951
907
  risk?: PlanStepRisk;
1952
908
  }
1953
- /**
1954
- * Payload surfaced by `mark_step_complete` via `PlanCheckpointError`.
1955
- * The TUI parses the tool result JSON, pushes a `✓ step` progress row,
1956
- * and mounts the checkpoint picker. `kind` is kept on the payload so
1957
- * consumers that peek at the JSON can dispatch on a stable tag.
1958
- */
1959
909
  interface StepCompletion {
1960
910
  kind: "step_completed";
1961
911
  stepId: string;
@@ -1964,33 +914,13 @@ interface StepCompletion {
1964
914
  notes?: string;
1965
915
  }
1966
916
 
1967
- /**
1968
- * Error classes for Plan Mode tools. Each one implements the
1969
- * `toToolResult` protocol so `ToolRegistry.dispatch` serializes the
1970
- * structured payload into the tool-result JSON — the TUI parses that
1971
- * shape to mount the right picker (approve / checkpoint / revise).
1972
- *
1973
- * Types live in plan-types.ts; registration logic in plan-core.ts.
1974
- * Dependency direction: plan-core → plan-errors → plan-types.
1975
- */
917
+ /** Plan-mode errors carry `toToolResult` so dispatch serializes structured payloads the TUI parses to mount pickers. */
1976
918
 
1977
- /**
1978
- * Thrown by `submit_plan` when the model has produced a plan for the
1979
- * user to approve. Carries the markdown body, optional structured
1980
- * steps, and an optional one-line summary. The TUI uses all three to
1981
- * render the PlanConfirm picker.
1982
- */
1983
919
  declare class PlanProposedError extends Error {
1984
920
  readonly plan: string;
1985
921
  readonly steps?: PlanStep[];
1986
922
  readonly summary?: string;
1987
923
  constructor(plan: string, steps?: PlanStep[], summary?: string);
1988
- /**
1989
- * Structured tool-result shape. Consumed by the TUI to extract the
1990
- * plan without regex-scraping the error message. Optional fields
1991
- * are omitted from the payload when absent so consumers don't see
1992
- * `undefined` keys in the JSON.
1993
- */
1994
924
  toToolResult(): {
1995
925
  error: string;
1996
926
  plan: string;
@@ -1998,13 +928,6 @@ declare class PlanProposedError extends Error {
1998
928
  summary?: string;
1999
929
  };
2000
930
  }
2001
- /**
2002
- * Thrown by `mark_step_complete`. The registry serializes the
2003
- * structured payload via `toToolResult`, the TUI catches the error
2004
- * tag and pauses the loop until the user decides continue / revise /
2005
- * stop. The error message tells the model to stop calling tools so
2006
- * it doesn't race past the picker.
2007
- */
2008
931
  declare class PlanCheckpointError extends Error {
2009
932
  readonly stepId: string;
2010
933
  readonly title?: string;
@@ -2020,19 +943,7 @@ declare class PlanCheckpointError extends Error {
2020
943
  error: string;
2021
944
  } & StepCompletion;
2022
945
  }
2023
- /**
2024
- * Thrown by `revise_plan`. Carries the proposed remaining-step list,
2025
- * a one-sentence reason, and an optional updated summary out to the
2026
- * TUI. Mirrors PlanProposedError / PlanCheckpointError. The picker
2027
- * shows a diff between the current remaining steps and the proposed
2028
- * ones; the user accepts (replaces) or rejects (keeps current).
2029
- *
2030
- * Why a separate tool from submit_plan: revising is surgical (replace
2031
- * the tail of an in-flight plan), submitting is a fresh proposal.
2032
- * Different intent, different UI. Calling submit_plan again mid-
2033
- * execution would reset the whole plan including done steps, which
2034
- * is heavier than usually needed.
2035
- */
946
+ /** Surgical replace of in-flight plan tail; submit_plan would reset done steps. */
2036
947
  declare class PlanRevisionProposedError extends Error {
2037
948
  readonly reason: string;
2038
949
  readonly remainingSteps: PlanStep[];
@@ -2046,195 +957,51 @@ declare class PlanRevisionProposedError extends Error {
2046
957
  };
2047
958
  }
2048
959
 
2049
- /**
2050
- * Plan Mode tool registration. Owns `registerPlanTool` — which wires
2051
- * `submit_plan`, `mark_step_complete`, and `revise_plan` into a
2052
- * ToolRegistry — plus the arg sanitizers these tools share.
2053
- *
2054
- * Structure rationale: the three registrations are parallel in shape
2055
- * (each throws a structured error the TUI renders as a picker), so
2056
- * they're broken out into `registerSubmitPlan` / `registerMarkStep` /
2057
- * `registerRevisePlan` — one per screen of logic rather than one
2058
- * 230-line `registerPlanTool` body. Tool descriptions live at the top
2059
- * as named constants so the function bodies stay readable; the strings
2060
- * themselves are long because they teach the model when to call each
2061
- * tool, which is load-bearing behavior.
2062
- *
2063
- * Dependency direction: plan-core → plan-errors → plan-types.
2064
- */
2065
-
2066
960
  interface PlanToolOptions {
2067
- /**
2068
- * Optional side-channel callback fired when the model submits a plan.
2069
- * The TUI uses this to preview the plan in real time (the tool-result
2070
- * event is also emitted; this is just earlier and friendlier to
2071
- * test harnesses that don't want to parse JSON).
2072
- */
2073
961
  onPlanSubmitted?: (plan: string, steps?: PlanStep[]) => void;
2074
- /**
2075
- * Optional callback fired when the model marks a step complete via
2076
- * `mark_step_complete`. Analogous to `onPlanSubmitted` — the tool
2077
- * event carries the same payload, but this firing point is earlier
2078
- * and avoids JSON parsing for consumers that don't need it.
2079
- */
2080
962
  onStepCompleted?: (update: StepCompletion) => void;
2081
- /**
2082
- * Optional preview callback fired when the model proposes a plan
2083
- * revision via `revise_plan`. Same earlier-than-event timing as
2084
- * the other on* hooks.
2085
- */
2086
963
  onPlanRevisionProposed?: (reason: string, remainingSteps: PlanStep[], summary?: string) => void;
2087
964
  }
2088
965
  declare function registerPlanTool(registry: ToolRegistry, opts?: PlanToolOptions): ToolRegistry;
2089
966
 
2090
- /**
2091
- * Subagent runtime — isolated child loops for offloading exploration or
2092
- * self-contained subtasks.
2093
- *
2094
- * Two surfaces sit on top of the same `spawnSubagent` core:
2095
- *
2096
- * 1. `registerSubagentTool` — exposes a low-level `spawn_subagent`
2097
- * function-call tool. Library API. NOT registered into the model
2098
- * tool list by `reasonix code` since 0.4.26 — Skills (with
2099
- * `runAs: subagent` frontmatter) became the user-facing surface.
2100
- * Kept exported because library callers and tests still want
2101
- * direct access to the primitive.
2102
- *
2103
- * 2. `run_skill` (in src/tools/skills.ts) — when the resolved skill
2104
- * has `runAs: subagent`, it calls `spawnSubagent` with the skill
2105
- * body as the system prompt and the user's `arguments` as the
2106
- * task. Subagent skills are listed in the pinned Skills index
2107
- * with a 🧬 marker, which gives the model a clear pattern-match
2108
- * trigger without forcing it to reason about "is this task big
2109
- * enough to delegate."
2110
- *
2111
- * Why R1 specifically benefits:
2112
- * - R1 reasoning tokens are expensive AND inflate the parent context.
2113
- * A subagent runs its own private loop, then surfaces only the
2114
- * distilled final answer back to the parent — the main session
2115
- * never sees the reasoning trail.
2116
- *
2117
- * Invariants common to both surfaces:
2118
- * - Serial only — no parallel spawn (MVP).
2119
- * - Inherits parent's tool registry MINUS `spawn_subagent` itself
2120
- * (no recursion via the tool API) and MINUS `submit_plan`
2121
- * (subagents don't propose plans to the user).
2122
- * - No hooks, no session — runs are ephemeral.
2123
- * - Lower default `maxToolIters` than the parent (16 vs 64).
2124
- * - Independent prefix cache (subagent's prefix has its own
2125
- * fingerprint).
2126
- * - Parent registry's plan-mode state propagates: subagents can't
2127
- * escape `/plan`.
2128
- * - Non-streaming child loop — the parent isn't watching deltas, so
2129
- * streaming would only add an SSE parser to the critical path.
2130
- * Cancellation still works via the AbortSignal.
2131
- */
967
+ /** Isolated child loop. Inherits parent registry minus spawn_subagent + submit_plan; no hooks; non-streaming. */
2132
968
 
2133
- /**
2134
- * Live event emitted by a running subagent. Surfaced via the optional
2135
- * `sink` ref the TUI attaches its handler to. Side-channel only — these
2136
- * events do NOT pass through the parent loop's `LoopEvent` stream
2137
- * because subagents run inside a tool-dispatch frame, after the parent's
2138
- * `step()` has already yielded `tool_start` and is awaiting the result.
2139
- */
969
+ /** Side-channel — subagents run inside a tool-dispatch frame, can't go through parent's `LoopEvent` stream. */
2140
970
  interface SubagentEvent {
2141
971
  kind: "start" | "progress" | "end";
2142
- /** First ~30 chars of the task prompt — used for the TUI status row. */
2143
972
  task: string;
2144
- /** Skill that spawned this subagent, when applicable. Stamped on every event so the TUI/logger can attribute without extra plumbing. */
2145
973
  skillName?: string;
2146
- /** Model id the child loop ran on. Stamped alongside skillName. */
2147
974
  model?: string;
2148
- /** Iteration count inside the child loop (number of tool results so far). */
2149
975
  iter?: number;
2150
- /** Wall-clock ms since the subagent started. */
2151
976
  elapsedMs?: number;
2152
- /** First ~120 chars of the final assistant message. Set on `end`. */
2153
977
  summary?: string;
2154
- /** Error message if the subagent failed. Set on `end`. */
2155
978
  error?: string;
2156
- /** Total turns the subagent took. Set on `end`. */
2157
979
  turns?: number;
2158
- /** Total USD spent inside the child loop. Set on `end`. */
2159
980
  costUsd?: number;
2160
- /** Aggregated child-loop Usage (sum across turns). Set on `end`. */
2161
981
  usage?: Usage;
2162
982
  }
2163
- /**
2164
- * Mutable ref the registration writes through. The TUI sets `.current`
2165
- * to its own handler on mount; nothing receives events before that
2166
- * happens (and headless callers leave `.current = null`, which is the
2167
- * library-mode default — they read the final result from the helper's
2168
- * return value instead).
2169
- */
2170
983
  interface SubagentSink {
2171
984
  current: ((ev: SubagentEvent) => void) | null;
2172
985
  }
2173
986
  interface SubagentToolOptions {
2174
- /** Shared DeepSeek client. */
2175
987
  client: DeepSeekClient;
2176
- /**
2177
- * Default system prompt used when the model doesn't pass one. Project
2178
- * memory (REASONIX.md) is appended automatically when `projectRoot` is
2179
- * set.
2180
- */
2181
988
  defaultSystem?: string;
2182
- /** Project root for `applyProjectMemory` lookup. Omit in chat mode. */
2183
989
  projectRoot?: string;
2184
- /** Default model. `deepseek-v4-flash` by default (see DEFAULT_SUBAGENT_MODEL). */
2185
990
  defaultModel?: string;
2186
- /** Iteration ceiling. Lower than the parent (16 by default). */
2187
991
  maxToolIters?: number;
2188
- /** Maximum chars returned in the tool result. */
2189
992
  maxResultChars?: number;
2190
- /** Optional sink the TUI attaches its handler to for live updates. */
2191
993
  sink?: SubagentSink;
2192
994
  }
2193
- /**
2194
- * Register the spawn_subagent tool into the parent registry. Library
2195
- * surface — `reasonix code` does NOT call this since 0.4.26 (Skills
2196
- * with `runAs: subagent` are the user-facing surface), but library
2197
- * consumers who want the low-level tool can opt in.
2198
- */
995
+ /** Library surface only — `reasonix code` uses Skills `runAs: subagent` as the user-facing path. */
2199
996
  declare function registerSubagentTool(parentRegistry: ToolRegistry, opts: SubagentToolOptions): ToolRegistry;
2200
- /**
2201
- * Build a child ToolRegistry that copies every tool from `parent` except
2202
- * those whose names are in `exclude`. Plan-mode state propagates so a
2203
- * subagent spawned while the parent is under `/plan` cannot escape it.
2204
- *
2205
- * Exported for tests + library callers who want the same fork behavior
2206
- * for their own nested-loop patterns.
2207
- */
997
+ /** Plan-mode state propagates — a subagent spawned under `/plan` MUST NOT escape it. */
2208
998
  declare function forkRegistryExcluding(parent: ToolRegistry, exclude: ReadonlySet<string>): ToolRegistry;
2209
999
 
2210
- /**
2211
- * Long-running process registry — the "background run" counterpart to
2212
- * `run_command`. `run_command` spawns a child, waits for it to exit,
2213
- * then returns combined output; perfect for tests / builds / one-shots
2214
- * but useless for `npm run dev` / `python -m http.server` / watchers,
2215
- * which never exit and just time the tool out.
2216
- *
2217
- * JobRegistry lets the model fire-and-almost-forget: we spawn the
2218
- * child, wait at most `waitSec` (default 3s) OR until output matches
2219
- * a readiness regex, then return the startup preview plus a job id.
2220
- * The child keeps running in the background; later tool calls tail
2221
- * its output, stop it, or list what's still alive.
2222
- *
2223
- * Shape-wise this is modeled on Claude Code's `BashOutput` / `KillBash`
2224
- * pair. We diverge on one point: ready-signal detection is on by default
2225
- * because dev servers almost universally print "Local:", "listening on",
2226
- * "ready in N ms", "compiled successfully" when they come up — short-
2227
- * circuiting the wait on those keeps the model's first tool-result
2228
- * useful ("server is up at http://localhost:5173") instead of spending
2229
- * the full 3s on a stabilization timer.
2230
- */
1000
+ /** Background process registry for never-exiting commands; ready-signal detection short-circuits the startup wait. */
2231
1001
  interface JobStartOptions {
2232
1002
  /** Absolute path to cwd for the spawned child. */
2233
1003
  cwd: string;
2234
- /**
2235
- * Max seconds to wait for the initial burst before returning. Capped
2236
- * at 30. A ready-signal match short-circuits this. Default 3.
2237
- */
1004
+ /** Capped at 30; ready-signal match short-circuits. Default 3. */
2238
1005
  waitSec?: number;
2239
1006
  /** Signal plumbed through from the calling tool's AbortSignal. */
2240
1007
  signal?: AbortSignal;
@@ -2262,10 +1029,7 @@ interface JobRecord {
2262
1029
  exitCode: number | null;
2263
1030
  /** Combined stdout+stderr, ring-trimmed. */
2264
1031
  output: string;
2265
- /**
2266
- * Total bytes ever written by the child (not just what's in `output`).
2267
- * Useful for "how much got dropped" diagnostics.
2268
- */
1032
+ /** Counts all bytes the child wrote, not just what's still buffered in `output`. */
2269
1033
  totalBytesWritten: number;
2270
1034
  /** True iff the child is still alive. */
2271
1035
  running: boolean;
@@ -2275,36 +1039,17 @@ interface JobRecord {
2275
1039
  declare class JobRegistry {
2276
1040
  private readonly jobs;
2277
1041
  private nextId;
2278
- /**
2279
- * Spawn a background child. Resolves after `waitSec` OR on ready
2280
- * signal OR on early exit, whichever comes first. The child continues
2281
- * to run (and buffer output) regardless of which path fires.
2282
- */
1042
+ /** Resolves on (a) ready signal, (b) early exit, or (c) waitSec deadline — child keeps running regardless. */
2283
1043
  start(command: string, opts: JobStartOptions): Promise<JobStartResult>;
2284
- /**
2285
- * Read a job's accumulated output. `since` lets a caller poll
2286
- * incrementally: pass the byte count returned from the last call to
2287
- * get only newly-written content. Returns both full output and a
2288
- * running snapshot so the caller can use whichever.
2289
- */
2290
1044
  read(id: number, opts?: {
2291
1045
  since?: number;
2292
1046
  tailLines?: number;
2293
1047
  }): JobReadResult | null;
2294
- /**
2295
- * Send SIGTERM, wait `graceMs`, then SIGKILL if still alive. Returns
2296
- * the final job record (or null when the job id is unknown). Safe to
2297
- * call on an already-exited job — returns the record unchanged.
2298
- */
1048
+ /** SIGTERM, wait graceMs, then SIGKILL. Idempotent on already-exited jobs. */
2299
1049
  stop(id: number, opts?: {
2300
1050
  graceMs?: number;
2301
1051
  }): Promise<JobRecord | null>;
2302
1052
  list(): JobRecord[];
2303
- /**
2304
- * Best-effort kill of every still-running job. Called on TUI shutdown
2305
- * so dev servers don't outlive the Reasonix process. Resolves after
2306
- * every child has closed or a hard deadline passes (3s total).
2307
- */
2308
1053
  shutdown(deadlineMs?: number): Promise<void>;
2309
1054
  /** Count of still-running jobs — drives the TUI status-bar indicator. */
2310
1055
  runningCount(): number;
@@ -2320,109 +1065,25 @@ interface JobReadResult {
2320
1065
  spawnError?: string;
2321
1066
  }
2322
1067
 
2323
- /**
2324
- * Native shell tool — lets the model run commands inside the sandbox
2325
- * root so it can actually verify its own work (run tests, check git
2326
- * status, inspect a lockfile, etc.). Without this the coding-mode
2327
- * loop is "write code, hope it works, ask the user to run it" —
2328
- * defeats the purpose.
2329
- *
2330
- * Safety model:
2331
- * - Commands run with `cwd` pinned to the registered root. No
2332
- * path traversal via the command itself is enforced (users can
2333
- * `cat ../outside.txt`); the trust boundary is the directory
2334
- * you opened Reasonix from.
2335
- * - Commands are matched against a read-only / testing allowlist.
2336
- * Allowlisted commands execute immediately and return stdout +
2337
- * stderr merged. Everything else throws with a clear message —
2338
- * the UI translates that into an `/apply`-style confirm gate so
2339
- * the user sees the exact command before it runs.
2340
- * - Default timeout: 60s. Output cap: matches tool-result budget.
2341
- * - Every command that DOES run is spawned with `shell: false` and
2342
- * a tokenized argv — no string-to-shell interpolation, so the
2343
- * model can't accidentally construct a chained `rm` via quoting.
2344
- *
2345
- * This is intentionally narrower than what Claude Code / Aider ship:
2346
- * we gate more commands behind confirmation by default. Users who
2347
- * trust the model can widen the allowlist by instantiating their
2348
- * own tool registry.
2349
- */
1068
+ /** cwd pinned to root; non-allowlisted commands throw to a UI confirm gate; spawn is `shell: false`, tokenized argv only. */
2350
1069
 
2351
1070
  interface ShellToolsOptions {
2352
1071
  /** Directory to run commands in. Must be an absolute path. */
2353
1072
  rootDir: string;
2354
1073
  /** Seconds before an individual command is killed. Default: 60. */
2355
1074
  timeoutSec?: number;
2356
- /**
2357
- * Per-command stdout+stderr cap in characters. Default: 32_000 to
2358
- * match the tool-result budget.
2359
- */
2360
1075
  maxOutputChars?: number;
2361
- /**
2362
- * Extra command-name prefixes the user explicitly trusts. Added on
2363
- * top of the built-in allowlist. Examples: `["my-ci-script", "lint"]`.
2364
- *
2365
- * Accepts either a fixed array (captured once at registration) or a
2366
- * getter called on every dispatch. The getter form is load-bearing:
2367
- * when the TUI's `ShellConfirm` writes a new prefix to config mid-
2368
- * session, the running `run_command` must pick it up immediately —
2369
- * otherwise the same command gets re-prompted until the next launch.
2370
- */
1076
+ /** Getter form is load-bearing — newly-persisted "always allow" prefixes MUST take effect mid-session. */
2371
1077
  extraAllowed?: readonly string[] | (() => readonly string[]);
2372
- /**
2373
- * When true, skip the allowlist entirely and auto-run every command.
2374
- * Off by default — this is an escape hatch for non-interactive use
2375
- * (CI, benchmarks) where a human can't be in the loop to confirm.
2376
- *
2377
- * Accepts either a static boolean (captured once) or a getter called
2378
- * on every dispatch. The getter form is what `reasonix code` uses to
2379
- * wire `editMode === "yolo"` into the registry: flipping the mode
2380
- * mid-session must take effect on the next tool call without forcing
2381
- * a re-registration. Static `true` is fine for CI / benchmark code.
2382
- */
1078
+ /** Getter form lets `editMode === "yolo"` flip mid-session without re-registering tools. */
2383
1079
  allowAll?: boolean | (() => boolean);
2384
- /**
2385
- * Background-process registry shared between `run_background`,
2386
- * `job_output`, `stop_job`, `list_jobs`, and the /jobs /kill slashes.
2387
- * When omitted, the registrar builds its own — but the caller
2388
- * usually wants to provide one so the TUI can tail it too.
2389
- */
2390
1080
  jobs?: JobRegistry;
2391
1081
  }
2392
- /**
2393
- * Tokenize a shell-ish command string into argv. Handles single/double
2394
- * quoting; rejects unclosed quotes. Does NOT expand env vars, globs,
2395
- * backticks, or `$(…)` — the goal is to prevent the model from
2396
- * accidentally (or not) sneaking arbitrary shells past the allowlist
2397
- * via concatenation. Exported for testing.
2398
- */
1082
+ /** No env / glob / backtick / `$(…)` expansion — prevents bypass of allowlist via concatenation. */
2399
1083
  declare function tokenizeCommand(cmd: string): string[];
2400
- /**
2401
- * Scan `cmd` for a shell operator (`|`, `||`, `>`, `>>`, `<`, `<<`,
2402
- * `&`, `&&`, `2>`, `2>>`, `2>&1`, `&>`) that appears unquoted at a
2403
- * token boundary. Returns the operator string, or null if none.
2404
- *
2405
- * Why this exists: `run_command` documents "no shell expansion, no
2406
- * pipes, no redirects" (the tool spawns argv directly, not through a
2407
- * shell), but when the model writes `dir | findstr foo` the `|`
2408
- * survives tokenization as a standalone token and gets quoted as the
2409
- * literal string `"|"` by `quoteForCmdExe` — cmd.exe sees it as an
2410
- * argument, not an operator, so the pipe silently fails. Detecting
2411
- * operators up front lets us throw a clear error ("split into separate
2412
- * calls") instead of letting the command run with surprising results.
2413
- *
2414
- * Quoted operators (`grep "a|b"`) and operator characters embedded in
2415
- * larger tokens (`--flag=1&2`) are NOT flagged — those are literal
2416
- * argv bytes and are safe to pass through.
2417
- *
2418
- * Exported for testing.
2419
- */
1084
+ /** Up-front detection — without it, `dir | findstr foo` quotes `|` literal and pipe silently fails. */
2420
1085
  declare function detectShellOperator(cmd: string): string | null;
2421
- /**
2422
- * Return true when `cmd` matches an allowlisted prefix. Exported for
2423
- * testing. Match is on the space-normalized leading tokens so
2424
- * `git status -s ` and `git status` both match `git status`.
2425
- */
1086
+ /** Match on space-normalized leading tokens — `git status -s` matches the `git status` prefix. */
2426
1087
  declare function isAllowed(cmd: string, extra?: readonly string[]): boolean;
2427
1088
  interface RunCommandResult {
2428
1089
  exitCode: number | null;
@@ -2437,95 +1098,28 @@ declare function runCommand(cmd: string, opts: {
2437
1098
  maxOutputChars?: number;
2438
1099
  signal?: AbortSignal;
2439
1100
  }): Promise<RunCommandResult>;
2440
- /**
2441
- * Test/override hooks for {@link resolveExecutable}. Omitting any field
2442
- * falls through to the real process globals — the runtime call path
2443
- * uses defaults; tests inject `platform` + `env` + `isFile` to exercise
2444
- * Windows-specific lookup from a Linux CI runner without touching
2445
- * actual fs.
2446
- */
2447
1101
  interface ResolveExecutableOptions {
2448
1102
  platform?: NodeJS.Platform;
2449
1103
  env?: {
2450
1104
  PATH?: string;
2451
1105
  PATHEXT?: string;
2452
1106
  };
2453
- /** Predicate swapped in by tests to avoid creating real files. */
2454
1107
  isFile?: (path: string) => boolean;
2455
- /** Path.join used for the lookup. Defaults to Windows semantics on Windows. */
2456
1108
  pathDelimiter?: string;
2457
1109
  }
2458
- /**
2459
- * Resolve a bare command name (e.g. `npm`) to its on-disk path via
2460
- * PATH × PATHEXT on Windows. Returns the input unchanged on non-Windows
2461
- * platforms, when the input is already a path (contains `/`, `\`, or is
2462
- * absolute), or when no match is found in PATH × PATHEXT (caller gets a
2463
- * natural ENOENT from spawn, which surfaces cleanly).
2464
- *
2465
- * Why this exists: `child_process.spawn` with `shell: false` invokes
2466
- * Windows `CreateProcess`, which does not honor `PATHEXT` and does not
2467
- * search for `.cmd` / `.bat` wrappers. Node-ecosystem tools ship as
2468
- * `npm.cmd`, `npx.cmd`, `yarn.cmd`, etc., so a bare `npm` fails with
2469
- * ENOENT under `shell: false`. Flipping to `shell: true` would work
2470
- * but reintroduces shell-expansion (pipes, redirects, chained cmds)
2471
- * that the tool was explicitly designed to forbid. This resolver
2472
- * threads the needle.
2473
- */
1110
+ /** CreateProcess ignores PATHEXT — bare `npm` fails ENOENT under `shell:false` without this resolver. */
2474
1111
  declare function resolveExecutable(cmd: string, opts?: ResolveExecutableOptions): string;
2475
- /**
2476
- * Prepare `(bin, args, spawnOpts)` for the runCommand spawn call,
2477
- * applying Windows-specific workarounds for (a) PATHEXT lookup and
2478
- * (b) the CVE-2024-27980 prohibition on direct `.cmd`/`.bat` spawns.
2479
- *
2480
- * Exported so tests can assert the transformation without booting an
2481
- * actual child process.
2482
- */
1112
+ /** Windows workarounds: PATHEXT lookup + CVE-2024-27980 prohibition on direct `.cmd`/`.bat` spawn. */
2483
1113
  declare function prepareSpawn(argv: readonly string[], opts?: ResolveExecutableOptions): {
2484
1114
  bin: string;
2485
1115
  args: string[];
2486
1116
  spawnOverrides: SpawnOptions;
2487
1117
  };
2488
- /**
2489
- * Locate `-Command` / `-c` in `args` and prepend the UTF-8 setup prelude
2490
- * to its value. Returns the patched args, or `null` when no `-Command`
2491
- * arg is present (in which case we leave the invocation untouched —
2492
- * inline-expression and script-file modes have their own conventions
2493
- * we don't want to silently rewrite).
2494
- *
2495
- * Why not always wrap: PowerShell's quoting semantics are finicky enough
2496
- * that adding a prelude to a script file invocation could break it.
2497
- * `-Command` is the case the model actually uses, and where mojibake
2498
- * matters; targeting just it keeps the blast radius small.
2499
- *
2500
- * Exported for tests.
2501
- */
1118
+ /** Targets `-Command` only — PowerShell quoting is finicky enough that wrapping script-file mode could break it. */
2502
1119
  declare function injectPowerShellUtf8(args: readonly string[]): string[] | null;
2503
- /**
2504
- * Prefix a cmd.exe command line with `chcp 65001 >nul &` so output
2505
- * (from cmd.exe and any child it spawns) is UTF-8-encoded. Without
2506
- * this, on Chinese / Japanese / Korean Windows, `dir`, `findstr`,
2507
- * `where`, etc. emit text in the system codepage (CP936, CP932,
2508
- * CP949, …) and `chunk.toString()` — which decodes as UTF-8 — produces
2509
- * garbled mojibake the model then sees as poisoned input on the next
2510
- * turn.
2511
- *
2512
- * Scope: chcp affects ONLY this cmd.exe instance, which exits after
2513
- * `/c`. No global console state changes. Single `&` (not `&&`) so the
2514
- * command still runs even on the rare Windows builds where chcp
2515
- * itself returns a non-zero exit (Win7 quirks; harmless on Win10+).
2516
- *
2517
- * Exported so tests can verify the wrapping shape.
2518
- */
1120
+ /** Single `&` (not `&&`) so the command still runs on Win7 where chcp can return non-zero. */
2519
1121
  declare function withUtf8Codepage(cmdline: string): string;
2520
- /**
2521
- * Quote an argument so cmd.exe parses it back as a single token. We
2522
- * always wrap in double quotes when the arg contains whitespace or
2523
- * any cmd.exe metacharacter, doubling embedded quotes per cmd.exe's
2524
- * `""` escape rule. Bare alphanumeric args pass through unquoted for
2525
- * readability in logs.
2526
- *
2527
- * Exported for test coverage of the quoting semantics.
2528
- */
1122
+ /** Doubles embedded quotes per cmd.exe's `""` escape rule; bare alnum passes through unquoted. */
2529
1123
  declare function quoteForCmdExe(arg: string): string;
2530
1124
  /** Error thrown by `run_command` when the command isn't allowlisted. */
2531
1125
  declare class NeedsConfirmationError extends Error {
@@ -2535,22 +1129,7 @@ declare class NeedsConfirmationError extends Error {
2535
1129
  declare function registerShellTools(registry: ToolRegistry, opts: ShellToolsOptions): ToolRegistry;
2536
1130
  declare function formatCommandResult(cmd: string, r: RunCommandResult): string;
2537
1131
 
2538
- /**
2539
- * Built-in web search + fetch tools.
2540
- *
2541
- * - `web_search(query, topK?)` — Mojeek's public search page. No API
2542
- * key, no signup. We originally shipped this backed by DuckDuckGo's
2543
- * HTML endpoint, but DDG started serving anti-bot interstitials
2544
- * (HTTP 202 with a challenge page) for every unauthenticated POST.
2545
- * Mojeek runs its own independent index, is bot-friendly, and
2546
- * returns parseable HTML.
2547
- * - `web_fetch(url)` — HTTP GET + naïve HTML-to-text extraction.
2548
- *
2549
- * Both are registered by default on `reasonix chat` / `reasonix code`;
2550
- * set `search: false` in config (or `REASONIX_SEARCH=off`) to turn
2551
- * them off. The model decides when to call them based on the query —
2552
- * no slash command required.
2553
- */
1132
+ /** web_search uses Mojeek (DDG returns anti-bot 202 to unauthenticated POSTs); web_fetch sniffs HTML to text. */
2554
1133
 
2555
1134
  interface SearchResult {
2556
1135
  title: string;
@@ -2575,44 +1154,11 @@ interface WebSearchOptions {
2575
1154
  topK?: number;
2576
1155
  signal?: AbortSignal;
2577
1156
  }
2578
- /**
2579
- * Search the public web via Mojeek. Returns up to `topK` ranked
2580
- * results with title, url, snippet.
2581
- *
2582
- * Mojeek is an independent index (not a Google/Bing front-end) which
2583
- * means coverage on niche or very recent topics can be thinner, but
2584
- * it's reliable from scripts and doesn't gate on cookies or sessions.
2585
- * If the response has 0 results we distinguish "truly empty" from
2586
- * "layout changed or blocked" so the caller isn't left guessing.
2587
- */
1157
+ /** Distinguishes "truly 0 results" from "layout changed / blocked" so callers can tell. */
2588
1158
  declare function webSearch(query: string, opts?: WebSearchOptions): Promise<SearchResult[]>;
2589
- /**
2590
- * Extract results from a Mojeek search page.
2591
- *
2592
- * Mojeek's stable shape (as of April 2026):
2593
- * <a … class="ob" href="URL"> … breadcrumb … </a>
2594
- * <h2><a class="title" href="URL">Title</a></h2>
2595
- * <p class="s">snippet text …</p>
2596
- *
2597
- * We do two tolerant passes — title anchors, then snippet paragraphs —
2598
- * and pair them positionally. Attribute order inside a tag varies
2599
- * between versions, so each pass captures the whole element and we
2600
- * re-extract href / inner text with a second regex. Exported for
2601
- * unit testing against a fixture.
2602
- */
1159
+ /** Title-anchor + snippet-paragraph passes paired positionally — robust to attribute reorder. */
2603
1160
  declare function parseMojeekResults(html: string): SearchResult[];
2604
- /**
2605
- * Download a URL, strip HTML down to readable text, return it. Times
2606
- * out at 15s, caps extracted text at 32k chars to fit the tool-result
2607
- * budget.
2608
- */
2609
1161
  declare function webFetch(url: string, opts?: WebFetchOptions): Promise<PageContent>;
2610
- /**
2611
- * Strip HTML to readable text. Removes scripts/styles/nav/footer/aside
2612
- * blocks first, then tags, then collapses whitespace. Not a Readability
2613
- * clone — purpose-built to keep the extracted text small enough for the
2614
- * tool-result budget while preserving paragraph breaks.
2615
- */
2616
1162
  declare function htmlToText(html: string): string;
2617
1163
  interface WebToolsOptions {
2618
1164
  /** Default top-K for `web_search` when the model doesn't specify. */
@@ -2620,30 +1166,10 @@ interface WebToolsOptions {
2620
1166
  /** Byte cap for `web_fetch` extracted text. */
2621
1167
  maxFetchChars?: number;
2622
1168
  }
2623
- /**
2624
- * Register `web_search` + `web_fetch` on a ToolRegistry. The model
2625
- * invokes them automatically when a question needs current info —
2626
- * no slash command from the user is required.
2627
- */
2628
1169
  declare function registerWebTools(registry: ToolRegistry, opts?: WebToolsOptions): ToolRegistry;
2629
1170
  declare function formatSearchResults(query: string, results: SearchResult[]): string;
2630
1171
 
2631
- /**
2632
- * Session persistence.
2633
- *
2634
- * Every turn's log entries (user / assistant / tool messages) are appended to
2635
- * a JSONL file under `~/.reasonix/sessions/<name>.jsonl`. Next time the user
2636
- * starts the CLI with the same session name, the loop pre-loads the file
2637
- * into its AppendOnlyLog so the new turn has full prior context.
2638
- *
2639
- * Design notes:
2640
- * - JSONL rather than JSON so concurrent writes don't corrupt.
2641
- * - 0600 permissions on Unix (chmod no-ops on Windows).
2642
- * - Name sanitization keeps paths safe: only [\w-] and CJK letters pass;
2643
- * anything else is replaced with underscore, max 64 chars.
2644
- * - The loop's stats/session aren't persisted — only the message log.
2645
- * Cost accounting resets each run (by design — old costs are sunk).
2646
- */
1172
+ /** JSONL append-only message log under `~/.reasonix/sessions/`; concurrent-write safe. */
2647
1173
 
2648
1174
  interface SessionInfo {
2649
1175
  name: string;
@@ -2660,32 +1186,9 @@ declare function appendSessionMessage(name: string, message: ChatMessage): void;
2660
1186
  declare function listSessions(): SessionInfo[];
2661
1187
  declare function deleteSession(name: string): boolean;
2662
1188
 
2663
- /**
2664
- * Minimal `.env` loader; no dependency on dotenv.
2665
- *
2666
- * Reads KEY=VALUE lines and populates `process.env` for keys not already set.
2667
- * Silently no-ops if the file is missing. Safe to call from library entry
2668
- * points, CLI commands, examples, and benchmark runners.
2669
- */
2670
1189
  declare function loadDotenv(path?: string): void;
2671
1190
 
2672
- /**
2673
- * Transcript format — the canonical "audit log" of a Reasonix session.
2674
- *
2675
- * Design split:
2676
- * - Session file (`~/.reasonix/sessions/<name>.jsonl`) stores only the
2677
- * `ChatMessage`s the model needs to resume. See session.ts.
2678
- * - Transcript file (this module) stores every LoopEvent with usage, cost,
2679
- * model, and prefix fingerprint attached where available — enough for
2680
- * replay and diff to reconstruct economics.
2681
- *
2682
- * The two are different contracts: sessions are the user's *memory*;
2683
- * transcripts are the *receipts*. Don't conflate them.
2684
- *
2685
- * Backward compatibility: all fields beyond {ts, turn, role, content} are
2686
- * optional on read. A v0.1 transcript (pre-usage) still parses and renders
2687
- * — it just shows cost/cache as n/a.
2688
- */
1191
+ /** Transcripts are receipts (cost/usage/prefix); sessions are memory (ChatMessages). Don't conflate. */
2689
1192
 
2690
1193
  interface TranscriptRecord {
2691
1194
  /** ISO-8601 timestamp at emit time. */
@@ -2706,28 +1209,14 @@ interface TranscriptRecord {
2706
1209
  cost?: number;
2707
1210
  /** Model id that produced this turn. */
2708
1211
  model?: string;
2709
- /**
2710
- * The ImmutablePrefix fingerprint at this turn. Lets diff prove two runs
2711
- * share a prefix — i.e. any cache-hit delta is attributable to log
2712
- * stability, not to a different system prompt.
2713
- */
1212
+ /** Lets diff attribute cache-hit delta to log stability vs prompt change. */
2714
1213
  prefixHash?: string;
2715
- /**
2716
- * Structured plan state extracted by the Pillar 2 harvester. Present on
2717
- * assistant_final records when harvest was enabled and produced non-empty
2718
- * state. Omitted entirely when harvest is off or produced nothing —
2719
- * absence means "no data", not "empty plan".
2720
- */
1214
+ /** Absent means "no data", not "empty plan". */
2721
1215
  planState?: TypedPlanState;
2722
1216
  /** Optional error message (role === "error"). */
2723
1217
  error?: string;
2724
1218
  }
2725
1219
  interface TranscriptMeta {
2726
- /**
2727
- * Optional metadata written as the first line of a transcript. Lets
2728
- * downstream tooling know what it's reading without guessing.
2729
- * Recognized by a special role "_meta".
2730
- */
2731
1220
  version: 1;
2732
1221
  source: string;
2733
1222
  model?: string;
@@ -2740,11 +1229,6 @@ interface ReadTranscriptResult {
2740
1229
  meta: TranscriptMeta | null;
2741
1230
  records: TranscriptRecord[];
2742
1231
  }
2743
- /**
2744
- * Build a TranscriptRecord from a LoopEvent. Extra fields (model,
2745
- * prefixHash) that the LoopEvent doesn't carry are passed in separately
2746
- * because they're session-level, not event-level.
2747
- */
2748
1232
  declare function recordFromLoopEvent(ev: LoopEvent, extra: {
2749
1233
  model: string;
2750
1234
  prefixHash: string;
@@ -2761,30 +1245,11 @@ declare function writeMeta(stream: WriteStream, meta: TranscriptMeta): void;
2761
1245
  * Convenience: open a stream, write meta, return stream.
2762
1246
  */
2763
1247
  declare function openTranscriptFile(path: string, meta: TranscriptMeta): WriteStream;
2764
- /**
2765
- * Parse a transcript file. Returns meta (if the first line is a _meta record)
2766
- * and the full record list.
2767
- *
2768
- * Robustness contract:
2769
- * - Empty lines are skipped.
2770
- * - Malformed JSON lines are skipped silently (do not crash on partial
2771
- * files — live chats may be mid-write).
2772
- * - Records missing optional fields still parse — they're just rendered
2773
- * with n/a where the optional value would go.
2774
- */
1248
+ /** Tolerant: empty / malformed lines skipped, missing optionals OK — live chats may be mid-write. */
2775
1249
  declare function readTranscript(path: string): ReadTranscriptResult;
2776
1250
  declare function parseTranscript(raw: string): ReadTranscriptResult;
2777
1251
 
2778
- /**
2779
- * Replay — reconstruct session economics from a transcript file.
2780
- *
2781
- * Given a transcript written by App.tsx or the bench runner, rebuild a
2782
- * SessionSummary-compatible aggregate (turn count, total cost, cache-hit
2783
- * ratio, vs-Claude estimate) without replaying the LLM calls.
2784
- *
2785
- * The whole point is offline auditing: a reader should be able to reproduce
2786
- * the headline numbers from a transcript alone, without an API key.
2787
- */
1252
+ /** Reconstruct session economics from a transcript alone — offline audit, no API key. */
2788
1253
 
2789
1254
  interface ReplayStats extends SessionSummary {
2790
1255
  /** Per-turn stats, in turn order. Only assistant_final records contribute. */
@@ -2804,35 +1269,13 @@ interface ReplayStats extends SessionSummary {
2804
1269
  /** Sum of subgoals across all harvested turns. */
2805
1270
  totalSubgoals: number;
2806
1271
  }
2807
- /**
2808
- * Parse a transcript file and compute replay stats. Throws only on I/O
2809
- * errors; malformed lines inside the file are skipped silently.
2810
- */
2811
1272
  declare function replayFromFile(path: string): {
2812
1273
  parsed: ReadTranscriptResult;
2813
1274
  stats: ReplayStats;
2814
1275
  };
2815
1276
  declare function computeReplayStats(records: TranscriptRecord[]): ReplayStats;
2816
1277
 
2817
- /**
2818
- * Diff — compare two transcripts and produce a summary + divergence report.
2819
- *
2820
- * Two transcripts are "comparable" when they stem from the same task (or
2821
- * the same user prompt). Alignment is by turn number: assistant_final #N
2822
- * in A pairs with assistant_final #N in B. If one side ran more turns, the
2823
- * extras are labeled "only in A" / "only in B".
2824
- *
2825
- * What we compute:
2826
- * - Aggregate deltas: turns, tool calls, cache hit, cost, token counts
2827
- * - First divergence: the lowest turn where A and B's tool calls or
2828
- * assistant text differ meaningfully
2829
- * - Prefix-stability story: how many unique prefix hashes each side used
2830
- *
2831
- * Non-goals (deliberately):
2832
- * - LLM-judge quality comparison
2833
- * - Per-token delta rendering — not useful at the fidelity we're at
2834
- * - Embedding similarity — Levenshtein ratio is cheap and good enough
2835
- */
1278
+ /** Transcript diff — pairs assistant_final by turn number; unmatched extras become only_in_a / only_in_b. */
2836
1279
 
2837
1280
  interface DiffSide {
2838
1281
  label: string;
@@ -2846,13 +1289,6 @@ interface TurnPair {
2846
1289
  bAssistant?: TranscriptRecord;
2847
1290
  aTools: TranscriptRecord[];
2848
1291
  bTools: TranscriptRecord[];
2849
- /**
2850
- * Classification of the pair:
2851
- * "match" — both sides present, text & tool calls within threshold
2852
- * "diverge" — both sides present, but text or tool calls differ
2853
- * "only_in_a" — assistant_final in A but not B
2854
- * "only_in_b" — assistant_final in B but not A
2855
- */
2856
1292
  kind: "match" | "diverge" | "only_in_a" | "only_in_b";
2857
1293
  /** When kind === "diverge", a short one-liner pointing at what differs. */
2858
1294
  divergenceNote?: string;
@@ -2870,11 +1306,7 @@ declare function diffTranscripts(a: {
2870
1306
  label: string;
2871
1307
  parsed: ReadTranscriptResult;
2872
1308
  }): DiffReport;
2873
- /**
2874
- * Normalized Levenshtein similarity ratio in [0, 1]. 1 = identical.
2875
- * Early-exits for long strings (> 2000 chars) with a cheap token-overlap
2876
- * estimate to keep diff fast on chatty transcripts.
2877
- */
1309
+ /** Falls back to token-overlap above 2000 chars to keep diff fast on chatty transcripts. */
2878
1310
  declare function similarity(a: string, b: string): number;
2879
1311
  interface RenderOptions {
2880
1312
  /** Monochrome output (for file redirection or piping). Defaults to true. */
@@ -2883,26 +1315,7 @@ interface RenderOptions {
2883
1315
  declare function renderSummaryTable(report: DiffReport, _opts?: RenderOptions): string;
2884
1316
  declare function renderMarkdown(report: DiffReport): string;
2885
1317
 
2886
- /**
2887
- * MCP (Model Context Protocol) type definitions.
2888
- *
2889
- * Hand-rolled rather than importing @modelcontextprotocol/sdk because:
2890
- * - Reasonix's value-add isn't reimplementing the protocol, but *caching*
2891
- * it. Owning the types lets us tune them for our integration (strip
2892
- * fields we don't use, add the ones we do like Reasonix's prefixHash).
2893
- * - Zero dependencies — consistent with how we wrote the DeepSeek client.
2894
- * - If Anthropic bumps the SDK and introduces a breaking change, we're
2895
- * insulated as long as we keep up with the spec itself.
2896
- *
2897
- * Spec reference: https://spec.modelcontextprotocol.io/ (2024-11-05 draft
2898
- * at time of writing). Reasonix models the subset it consumes: tools
2899
- * list/call, resources list/read, prompts list/get, plus the init
2900
- * handshake. Sampling and progress notifications remain deferred.
2901
- *
2902
- * Transport note: the wire format for stdio MCP is **newline-delimited
2903
- * JSON** (NDJSON), not the LSP-style Content-Length header framing that
2904
- * some readers might expect. One JSON-RPC message per line.
2905
- */
1318
+ /** MCP types (spec 2024-11-05). Stdio wire format is NDJSON — one JSON-RPC message per line, no Content-Length framing. */
2906
1319
  type JsonRpcId = string | number;
2907
1320
  interface JsonRpcRequest<P = unknown> {
2908
1321
  jsonrpc: "2.0";
@@ -2968,13 +1381,6 @@ interface ListToolsResult {
2968
1381
  tools: McpTool[];
2969
1382
  nextCursor?: string;
2970
1383
  }
2971
- /**
2972
- * Server → client notification emitted during a long-running request
2973
- * that the client subscribed to via `_meta.progressToken`. `progress`
2974
- * and `total` are typically matched units (files scanned, bytes
2975
- * processed, etc.); `total` may be missing when the server can't
2976
- * estimate the upper bound up front.
2977
- */
2978
1384
  interface ProgressNotificationParams {
2979
1385
  progressToken: string | number;
2980
1386
  progress: number;
@@ -3004,11 +1410,6 @@ interface CallToolResult {
3004
1410
  /** True = tool raised an error; the content describes it. */
3005
1411
  isError?: boolean;
3006
1412
  }
3007
- /**
3008
- * A resource the server can expose — think "file the model can read."
3009
- * The URI is opaque to the client: servers may use `file://`, custom
3010
- * schemes, or bare strings. Reasonix doesn't interpret them.
3011
- */
3012
1413
  interface McpResource {
3013
1414
  uri: string;
3014
1415
  name: string;
@@ -3020,12 +1421,7 @@ interface ListResourcesResult {
3020
1421
  resources: McpResource[];
3021
1422
  nextCursor?: string;
3022
1423
  }
3023
- /**
3024
- * One resource can return multiple content blobs (e.g. the file + a
3025
- * side-car). `text` is the common case for UTF-8 content; `blob` is
3026
- * base64-encoded bytes for binary content. Servers populate exactly
3027
- * one of the two for each entry.
3028
- */
1424
+ /** Server populates exactly one of `text` (UTF-8) or `blob` (base64) per entry. */
3029
1425
  interface McpResourceContentsText {
3030
1426
  uri: string;
3031
1427
  mimeType?: string;
@@ -3040,10 +1436,6 @@ type McpResourceContents = McpResourceContentsText | McpResourceContentsBlob;
3040
1436
  interface ReadResourceResult {
3041
1437
  contents: McpResourceContents[];
3042
1438
  }
3043
- /**
3044
- * A parameterizable prompt template the server exposes. Clients fetch
3045
- * it with `prompts/get` and pass the result to the model as-is.
3046
- */
3047
1439
  interface McpPromptArgument {
3048
1440
  name: string;
3049
1441
  description?: string;
@@ -3058,12 +1450,6 @@ interface ListPromptsResult {
3058
1450
  prompts: McpPrompt[];
3059
1451
  nextCursor?: string;
3060
1452
  }
3061
- /**
3062
- * MCP prompt messages are modeled after chat completions: role + content.
3063
- * Content can be a text block OR (per the spec) a resource/image block;
3064
- * Reasonix cares about text in v1, but surfaces the raw array so callers
3065
- * can render other kinds if they need to.
3066
- */
3067
1453
  interface McpPromptMessage {
3068
1454
  role: "user" | "assistant";
3069
1455
  content: McpContentBlock | McpPromptResourceBlock;
@@ -3081,23 +1467,8 @@ declare const MCP_PROTOCOL_VERSION = "2024-11-05";
3081
1467
  /** Type guard — success vs error response. */
3082
1468
  declare function isJsonRpcError(msg: JsonRpcResponse): msg is JsonRpcError;
3083
1469
 
3084
- /**
3085
- * Stdio transport for MCP.
3086
- *
3087
- * MCP's stdio wire format is **newline-delimited JSON** (one JSON-RPC
3088
- * message per line). We spawn the server as a child process, write
3089
- * frames to its stdin, parse its stdout line-by-line as they arrive.
3090
- *
3091
- * Transport is abstracted behind an interface so unit tests can fake it
3092
- * with an in-process duplex pair — spawning real servers in unit tests
3093
- * is flaky and slow.
3094
- */
1470
+ /** MCP stdio = newline-delimited JSON-RPC; transport iface lets tests fake it without spawning. */
3095
1471
 
3096
- /**
3097
- * A transport sends JSON-RPC messages upstream and surfaces messages
3098
- * arriving downstream via an async iterator. One instance per server
3099
- * connection.
3100
- */
3101
1472
  interface McpTransport {
3102
1473
  /** Send one JSON-RPC message. Resolves when the bytes are accepted. */
3103
1474
  send(message: JsonRpcMessage): Promise<void>;
@@ -3116,19 +1487,9 @@ interface StdioTransportOptions {
3116
1487
  replaceEnv?: boolean;
3117
1488
  /** CWD for the child. Default: process.cwd(). */
3118
1489
  cwd?: string;
3119
- /**
3120
- * Spawn through a shell. Default: true on win32 (needed to resolve
3121
- * `.cmd` wrappers like `npx.cmd`, `pnpm.cmd`), false elsewhere.
3122
- * Explicitly pass `false` to opt out on Windows; pass `true` to force
3123
- * it on POSIX (rarely needed).
3124
- */
1490
+ /** Default true on win32 to resolve `.cmd`/`.bat` wrappers (npx.cmd etc.). */
3125
1491
  shell?: boolean;
3126
1492
  }
3127
- /**
3128
- * Spawn `command args...` as a child process and use its stdin/stdout as
3129
- * an MCP transport. Stderr is forwarded to the parent's stderr so server
3130
- * diagnostics are still visible.
3131
- */
3132
1493
  declare class StdioTransport implements McpTransport {
3133
1494
  private readonly child;
3134
1495
  private readonly queue;
@@ -3145,12 +1506,6 @@ declare class StdioTransport implements McpTransport {
3145
1506
  private push;
3146
1507
  }
3147
1508
 
3148
- /**
3149
- * MCP client — request/response correlation, initialize handshake,
3150
- * tools/list, tools/call. Built on top of a McpTransport so the same
3151
- * logic works against a real stdio server or an in-process fake.
3152
- */
3153
-
3154
1509
  interface McpClientOptions {
3155
1510
  transport: McpTransport;
3156
1511
  clientInfo?: McpClientInfo;
@@ -3180,50 +1535,21 @@ declare class McpClient {
3180
1535
  get protocolVersion(): string;
3181
1536
  /** Optional free-form instructions the server provides at handshake. */
3182
1537
  get serverInstructions(): string | undefined;
3183
- /**
3184
- * Complete the initialize → initialized handshake. Must be called
3185
- * before any other method (otherwise compliant servers reject).
3186
- */
1538
+ /** Compliant servers reject other methods until this completes. */
3187
1539
  initialize(): Promise<InitializeResult>;
3188
1540
  /** List tools the server exposes. */
3189
1541
  listTools(): Promise<ListToolsResult>;
3190
- /**
3191
- * Invoke a tool by name. When `onProgress` is supplied, attaches a
3192
- * fresh progress token so the server can send incremental updates
3193
- * via `notifications/progress`; they're routed to the callback until
3194
- * the final response arrives (or the request times out, in which
3195
- * case the handler is simply dropped — no extra notification).
3196
- *
3197
- * When `signal` is supplied, aborting it:
3198
- * 1) fires `notifications/cancelled` to the server (MCP 2024-11-05
3199
- * way of saying "forget this request, I no longer care"), and
3200
- * 2) rejects the pending promise immediately with an AbortError,
3201
- * so the caller doesn't have to wait for the subprocess to
3202
- * finish its in-flight file write or network request.
3203
- * The server MAY still emit a late response; we drop it in dispatch
3204
- * since the request id is gone from `pending`.
3205
- */
1542
+ /** Abort sends `notifications/cancelled` and rejects immediately; late server responses are dropped. */
3206
1543
  callTool(name: string, args?: Record<string, unknown>, opts?: {
3207
1544
  onProgress?: McpProgressHandler;
3208
1545
  signal?: AbortSignal;
3209
1546
  }): Promise<CallToolResult>;
3210
- /**
3211
- * List resources the server exposes. Supports a pagination cursor;
3212
- * callers interested in the full set should loop on `nextCursor`.
3213
- * Servers that don't support resources respond with method-not-found
3214
- * (−32601) — we surface that as a thrown Error so callers can gate
3215
- * on the `serverCapabilities.resources` field first.
3216
- */
1547
+ /** Throws on method-not-found; callers should gate on `serverCapabilities.resources` first. */
3217
1548
  listResources(cursor?: string): Promise<ListResourcesResult>;
3218
1549
  /** Read the contents of a resource by URI. */
3219
1550
  readResource(uri: string): Promise<ReadResourceResult>;
3220
1551
  /** List prompt templates the server exposes. */
3221
1552
  listPrompts(cursor?: string): Promise<ListPromptsResult>;
3222
- /**
3223
- * Fetch a rendered prompt by name. `args` supplies values for any
3224
- * required template arguments; the server validates. Returns messages
3225
- * ready to prepend to the model's input.
3226
- */
3227
1553
  getPrompt(name: string, args?: Record<string, string>): Promise<GetPromptResult>;
3228
1554
  /** Close the transport and reject any outstanding requests. */
3229
1555
  close(): Promise<void>;
@@ -3234,27 +1560,7 @@ declare class McpClient {
3234
1560
  private dispatch;
3235
1561
  }
3236
1562
 
3237
- /**
3238
- * HTTP+SSE transport for MCP (spec version 2024-11-05).
3239
- *
3240
- * Wire shape:
3241
- * 1. Client opens GET to the SSE URL with `Accept: text/event-stream`.
3242
- * 2. Server's first SSE event is `event: endpoint`, `data: <url>` — the
3243
- * URL (relative or absolute) the client must POST JSON-RPC requests
3244
- * to. All subsequent server → client messages arrive as `event: message`
3245
- * SSE frames carrying a JSON-RPC response or server-initiated frame.
3246
- * 3. Client POSTs each outgoing JSON-RPC frame to the endpoint URL.
3247
- * The POST response body is ignored — replies land on the SSE stream.
3248
- *
3249
- * This transport exists so Reasonix can talk to hosted/remote MCP servers
3250
- * (e.g. a company's internal knowledge server fronted by auth). Stdio
3251
- * covers local subprocesses; SSE covers everything else.
3252
- *
3253
- * Note: the newer "Streamable HTTP" transport (2025 spec) folds the POST
3254
- * and SSE streams onto a single endpoint. We stay on 2024-11-05 here —
3255
- * that's what `MCP_PROTOCOL_VERSION` advertises in the initialize handshake
3256
- * and what currently-published servers implement.
3257
- */
1563
+ /** MCP HTTP+SSE transport (spec 2024-11-05) — POST endpoint URL arrives as the first `event: endpoint` SSE frame. */
3258
1564
 
3259
1565
  interface SseTransportOptions {
3260
1566
  /** SSE endpoint URL, e.g. `https://mcp.example.com/sse`. */
@@ -3262,10 +1568,6 @@ interface SseTransportOptions {
3262
1568
  /** Extra headers sent on both the SSE GET and the JSON-RPC POSTs (e.g. `Authorization`). */
3263
1569
  headers?: Record<string, string>;
3264
1570
  }
3265
- /**
3266
- * Open an SSE stream to `url`, parse incoming events into JsonRpcMessages,
3267
- * POST outgoing frames to the endpoint URL the server advertises.
3268
- */
3269
1571
  declare class SseTransport implements McpTransport {
3270
1572
  private readonly url;
3271
1573
  private readonly headers;
@@ -3289,40 +1591,7 @@ declare class SseTransport implements McpTransport {
3289
1591
  private markClosed;
3290
1592
  }
3291
1593
 
3292
- /**
3293
- * Streamable HTTP transport for MCP (spec version 2025-03-26).
3294
- *
3295
- * Wire shape (single endpoint, no separate POST URL handshake):
3296
- *
3297
- * 1. Client POSTs each outgoing JSON-RPC frame to the endpoint with
3298
- * `Accept: application/json, text/event-stream`. The server picks
3299
- * ONE of three responses:
3300
- * a. `202 Accepted`, no body → notification or response
3301
- * was accepted; nothing more to deliver.
3302
- * b. `200 OK`, `Content-Type: application/json` → body is a
3303
- * single JSON-RPC response (or batch). Connection closes.
3304
- * c. `200 OK`, `Content-Type: text/event-stream` → an SSE
3305
- * stream of `event: message` frames carrying responses,
3306
- * server-initiated requests, and notifications. Stream may
3307
- * close after the matching response or stay open longer.
3308
- * 2. The server may include `Mcp-Session-Id: <opaque>` on the response
3309
- * to `initialize`. Client echoes that header on every subsequent
3310
- * request. A 404 on a request with a session id means the session
3311
- * expired — caller must reinitialize.
3312
- *
3313
- * Compared to 2024-11-05 HTTP+SSE:
3314
- * - No two-endpoint dance (no `event: endpoint` handshake).
3315
- * - Replies arrive on the POST response, not on a separate GET stream.
3316
- * - Session continuity is explicit (`Mcp-Session-Id`), not implicit.
3317
- *
3318
- * Not yet implemented in this transport (acceptable for v1):
3319
- * - Long-lived GET stream for unsolicited server-initiated frames
3320
- * (sampling requests, etc.). Most MCP servers we care about today
3321
- * don't issue server-initiated requests, and POST-only handles
3322
- * full request/response/notification traffic. Add when a real
3323
- * server we're integrating against needs it.
3324
- * - Resumability via `Last-Event-ID` on reconnect.
3325
- */
1594
+ /** MCP Streamable HTTP transport (2025-03-26) — POST-only; no long-lived GET stream, no Last-Event-ID resume. */
3326
1595
 
3327
1596
  interface StreamableHttpTransportOptions {
3328
1597
  /** Streamable HTTP endpoint URL, e.g. `https://mcp.example.com/mcp`. */
@@ -3351,46 +1620,16 @@ declare class StreamableHttpTransport implements McpTransport {
3351
1620
  private pushMessage;
3352
1621
  }
3353
1622
 
3354
- /**
3355
- * Bridge: register an MCP server's tools into a Reasonix ToolRegistry.
3356
- *
3357
- * This is the integration surface. Once done, `CacheFirstLoop` sees the
3358
- * MCP tools as if they were native — they inherit Cache-First + repair
3359
- * (scavenge / truncation / storm) automatically. That's the payoff: any
3360
- * MCP ecosystem tool, wrapped in Reasonix's Pillar 1 + Pillar 3 benefits.
3361
- */
3362
-
3363
1623
  interface BridgeOptions {
3364
- /**
3365
- * Prefix prepended to every MCP tool name when registered. Defaults to
3366
- * empty (no prefix). Useful when bridging multiple servers into one
3367
- * registry and names collide — e.g. `fs` + `gh` both exposing `search`.
3368
- */
1624
+ /** Prefix for tool names — disambiguates collisions when bridging multiple servers. */
3369
1625
  namePrefix?: string;
3370
1626
  /** Registry to populate. Creates a fresh one if omitted. */
3371
1627
  registry?: ToolRegistry;
3372
1628
  /** Auto-flatten deep schemas (Pillar 3). Defaults to the registry's own default (true). */
3373
1629
  autoFlatten?: boolean;
3374
- /**
3375
- * Per-tool-call result cap, in characters. If a tool returns more than
3376
- * this, the result is truncated and a `[…truncated N chars…]` marker is
3377
- * appended before the last KB so the model still sees a useful tail.
3378
- * Defaults to {@link DEFAULT_MAX_RESULT_CHARS}.
3379
- *
3380
- * Why this exists: DeepSeek V3's context is 131,072 tokens. A single
3381
- * `read_file` against a big source file can return >3 MB of text
3382
- * (~900k tokens) and permanently poison the session — every subsequent
3383
- * turn rebuilds the history and 400s. This cap is a floor. Users who
3384
- * legitimately want bigger payloads can raise it explicitly.
3385
- */
1630
+ /** Cap on tool result chars; head+tail truncation. Floor against context-poisoning oversized reads. */
3386
1631
  maxResultChars?: number;
3387
- /**
3388
- * Callback fired for every `notifications/progress` frame the server
3389
- * emits during any bridged tool call. Includes the registered
3390
- * (prefix-applied) tool name so a multi-server UI can attribute
3391
- * progress correctly. Absent → no `_meta.progressToken` is sent and
3392
- * the server won't emit progress for these calls.
3393
- */
1632
+ /** Absent → no `_meta.progressToken` sent and server won't emit progress. */
3394
1633
  onProgress?: (info: {
3395
1634
  toolName: string;
3396
1635
  progress: number;
@@ -3398,22 +1637,8 @@ interface BridgeOptions {
3398
1637
  message?: string;
3399
1638
  }) => void;
3400
1639
  }
3401
- /**
3402
- * 32,000 chars ≈ 8k English tokens, or ~16k CJK tokens. Small enough to
3403
- * fit comfortably in history even across 5–10 tool calls, large enough
3404
- * that most file reads and directory listings fit un-truncated.
3405
- */
3406
1640
  declare const DEFAULT_MAX_RESULT_CHARS = 32000;
3407
- /**
3408
- * Token-aware cap for tool results, in DeepSeek V3 tokens.
3409
- *
3410
- * 8,000 tokens ≈ 6% of DeepSeek V3's 131K context. One oversized tool
3411
- * result can't eat more than that no matter what character density the
3412
- * content has. The char cap (32K chars) only bounds tokens for English
3413
- * — CJK text at 1 char/token blows past 16K tokens under the same
3414
- * ceiling. With the tokenizer shipped in 0.5.0 we can cap the thing
3415
- * that actually matters.
3416
- */
1641
+ /** ~6% of DeepSeek V3 context. Char cap alone fails on CJK (~1 char/token). */
3417
1642
  declare const DEFAULT_MAX_RESULT_TOKENS = 8000;
3418
1643
  interface BridgeResult {
3419
1644
  registry: ToolRegistry;
@@ -3425,86 +1650,18 @@ interface BridgeResult {
3425
1650
  reason: string;
3426
1651
  }>;
3427
1652
  }
3428
- /**
3429
- * Walk a connected `McpClient`'s tools/list result, register each into a
3430
- * Reasonix `ToolRegistry`. Each registered `fn` proxies through the
3431
- * client's tools/call. Tool results are flattened into a string (joining
3432
- * text blocks with newlines, prefixing image blocks as placeholders) so
3433
- * they fit Reasonix's existing tool-dispatch contract.
3434
- */
3435
1653
  declare function bridgeMcpTools(client: McpClient, opts?: BridgeOptions): Promise<BridgeResult>;
3436
1654
  interface FlattenOptions {
3437
1655
  /** Cap the flattened string at this many characters. Default: no cap. */
3438
1656
  maxChars?: number;
3439
1657
  }
3440
- /**
3441
- * Turn an MCP CallToolResult into a string — the contract Reasonix's
3442
- * ToolRegistry.dispatch returns. We:
3443
- * - join text blocks with newlines (most common case)
3444
- * - stringify image blocks as placeholders (LLM can't use bytes anyway
3445
- * in Reasonix's current surface; image support comes with multimodal
3446
- * prompts later)
3447
- * - prefix error results with "ERROR: " so the calling model sees the
3448
- * failure clearly even through JSON mode
3449
- * - optionally truncate to `maxChars` so a single oversized tool result
3450
- * (e.g. a big `read_file`) can't poison the session by blowing past
3451
- * the model's context window
3452
- */
3453
1658
  declare function flattenMcpResult(result: CallToolResult, opts?: FlattenOptions): string;
3454
- /**
3455
- * Keep the head AND a short tail so the model sees both "what the tool
3456
- * started returning" and "how it ended". Head-only loses file endings
3457
- * (e.g. an error message appended at the bottom of a stack trace); the
3458
- * 1KB tail window covers that while costing almost nothing. Exported for
3459
- * tests and reuse by non-MCP tool adapters that want the same policy.
3460
- */
1659
+ /** Head + 1KB tail so error messages at end of stack traces aren't lost. */
3461
1660
  declare function truncateForModel(s: string, maxChars: number): string;
3462
- /**
3463
- * Token-aware truncation. Same head+tail policy as `truncateForModel`,
3464
- * but sizes the slices against a DeepSeek V3 token budget instead of a
3465
- * raw character count — so CJK text (which previously survived at 2×
3466
- * the token cost per char) gets capped at the same effective context
3467
- * footprint as English.
3468
- *
3469
- * Strategy: fast path when `s.length <= maxTokens` (every token is ≥1
3470
- * char, so this bounds tokens ≤ maxTokens — skip tokenize entirely).
3471
- * Short-ish strings are confirmed against the real token count.
3472
- * Long strings go straight to char-sliced head+tail with one or two
3473
- * tokenize-verify-and-shrink rounds per slice — we deliberately never
3474
- * tokenize the full input, because pathological repetitive text
3475
- * (megabytes of `AAAA…`) can cost 30s+ on the pure-TS BPE port.
3476
- */
1661
+ /** Never tokenizes full input — pathological repetitive text (`AAAA…`) costs 30s+ on the pure-TS BPE port. */
3477
1662
  declare function truncateForModelByTokens(s: string, maxTokens: number): string;
3478
1663
 
3479
- /**
3480
- * Parse the `--mcp` CLI argument into a transport-tagged spec.
3481
- *
3482
- * Accepted forms:
3483
- * "name=command args..." → stdio, namespaced (tools prefixed with `name_`)
3484
- * "command args..." → stdio, anonymous
3485
- * "name=https://host/sse" → HTTP+SSE (2024-11-05), namespaced
3486
- * "https://host/sse" → HTTP+SSE (2024-11-05), anonymous
3487
- * "name=streamable+https://host/mcp" → Streamable HTTP (2025-03-26), namespaced
3488
- * "streamable+https://host/mcp" → Streamable HTTP (2025-03-26), anonymous
3489
- * ("http://" / "streamable+http://" also honored — useful for local dev.)
3490
- *
3491
- * The identifier regex before `=` is deliberately narrow
3492
- * (`[a-zA-Z_][a-zA-Z0-9_]*`) so Windows drive letters ("C:\\...") and
3493
- * other strings containing `=` or `:` don't accidentally trigger the
3494
- * namespace branch. If a user ever wants their command to literally start
3495
- * with `foo=...` as a bare command, they can wrap it in quotes inside the
3496
- * shell command string.
3497
- *
3498
- * Transport selection:
3499
- * - body starts with `streamable+http(s)://` → Streamable HTTP. The
3500
- * `streamable+` prefix is stripped from the URL we hand the transport.
3501
- * - body starts with `http(s)://` → HTTP+SSE (2024-11-05).
3502
- * Default for plain http URLs to preserve back-compat with users who
3503
- * already have `--mcp https://...` config entries pointed at SSE
3504
- * servers; opt into Streamable HTTP explicitly.
3505
- * - anything else → stdio (including ws://,
3506
- * which will surface later as a spawn error).
3507
- */
1664
+ /** Plain http:// stays HTTP+SSE for back-compat; Streamable HTTP is opt-in via the `streamable+` URL prefix. */
3508
1665
  interface StdioMcpSpec {
3509
1666
  transport: "stdio";
3510
1667
  /** Namespace prefix applied to each registered tool, or null if anonymous. */
@@ -3529,16 +1686,7 @@ interface StreamableHttpMcpSpec {
3529
1686
  type McpSpec = StdioMcpSpec | SseMcpSpec | StreamableHttpMcpSpec;
3530
1687
  declare function parseMcpSpec(input: string): McpSpec;
3531
1688
 
3532
- /**
3533
- * Gather a full inspection report from an initialized MCP client:
3534
- * server info, capabilities, tools, resources, prompts. Methods the
3535
- * server doesn't support come back as `{ supported: false }` instead
3536
- * of throwing, so a CLI or UI can render a consistent "what this
3537
- * server exposes" summary even against minimal implementations.
3538
- *
3539
- * Pure with respect to I/O beyond the passed-in client — the CLI
3540
- * layer owns argument parsing, connection setup, and printing.
3541
- */
1689
+ /** Unsupported list methods surface as `{supported:false}` instead of throwing — minimal servers still get a clean report. */
3542
1690
 
3543
1691
  interface InspectionReport {
3544
1692
  protocolVersion: string;
@@ -3559,39 +1707,10 @@ type SectionResult<T> = {
3559
1707
  supported: false;
3560
1708
  reason: string;
3561
1709
  };
3562
- /**
3563
- * Run an inspection against a **already-initialized** client. Caller
3564
- * is responsible for `initialize()` before this and `close()` after.
3565
- * We keep this pure so unit tests can feed in a FakeMcpTransport and
3566
- * verify the aggregate shape without spinning up a real process.
3567
- */
1710
+ /** Caller owns initialize() / close() — keeps this pure so tests can feed a FakeMcpTransport. */
3568
1711
  declare function inspectMcpServer(client: McpClient): Promise<InspectionReport>;
3569
1712
 
3570
- /**
3571
- * Aider-style SEARCH/REPLACE edit blocks.
3572
- *
3573
- * The model emits blocks in this exact shape, one or more per response:
3574
- *
3575
- * path/to/file.ts
3576
- * <<<<<<< SEARCH
3577
- * exact existing lines (whitespace-sensitive)
3578
- * =======
3579
- * replacement lines
3580
- * >>>>>>> REPLACE
3581
- *
3582
- * We chose this over unified diffs because:
3583
- * - Models produce it reliably — no line-number drift.
3584
- * - It tolerates multi-edit responses without ambiguity over which
3585
- * hunk belongs to which file.
3586
- * - Aider has years of evidence that this format works even against
3587
- * weaker models than DeepSeek R1, so it's a conservative pick.
3588
- *
3589
- * The SEARCH text must match the file byte-for-byte. Empty SEARCH is a
3590
- * sentinel for "create new file" — the REPLACE becomes the whole file.
3591
- * If SEARCH doesn't match we refuse the edit and surface the failure;
3592
- * we do NOT guess or fuzzy-match. A wrong silent edit is worse than a
3593
- * missing one — the user can re-ask with the exact current content.
3594
- */
1713
+ /** SEARCH must match byte-for-byte; empty SEARCH = create new file. No fuzzy match — silent wrong edit beats a missing one. */
3595
1714
  interface EditBlock {
3596
1715
  /** Path as written by the model — relative to rootDir, or absolute. */
3597
1716
  path: string;
@@ -3627,42 +1746,13 @@ declare function applyEditBlocks(blocks: EditBlock[], rootDir: string): ApplyRes
3627
1746
  interface EditSnapshot {
3628
1747
  /** Path relative to rootDir, as the block named it. */
3629
1748
  path: string;
3630
- /**
3631
- * File content before the edit batch was applied. `null` means the
3632
- * file didn't exist yet — restoring that means deleting whatever the
3633
- * edit created.
3634
- */
1749
+ /** `null` = file didn't exist; restore means delete. */
3635
1750
  prevContent: string | null;
3636
1751
  }
3637
- /**
3638
- * Capture the current state of every file an edit batch is about to
3639
- * touch, so `/undo` can roll back if the user doesn't like the result.
3640
- * De-duplicates by path because one batch can contain multiple blocks
3641
- * for the same file, and we only want one "before" snapshot per file.
3642
- */
1752
+ /** De-duped by path — one "before" snapshot per file even with multiple blocks. */
3643
1753
  declare function snapshotBeforeEdits(blocks: EditBlock[], rootDir: string): EditSnapshot[];
3644
- /**
3645
- * Restore files to their snapshotted state. Snapshots with
3646
- * `prevContent === null` were created by the edit, so undo = delete.
3647
- * Otherwise the prior content is written back, replacing whatever the
3648
- * edit left behind.
3649
- */
3650
1754
  declare function restoreSnapshots(snapshots: EditSnapshot[], rootDir: string): ApplyResult[];
3651
1755
 
3652
- /**
3653
- * System prompt used by `reasonix code`. Teaches the model:
3654
- *
3655
- * 1. It has a filesystem MCP bridge rooted at the user's CWD.
3656
- * 2. To modify files it emits SEARCH/REPLACE blocks (not
3657
- * `write_file` — that would whole-file rewrite and kill diff
3658
- * reviewability).
3659
- * 3. Read first, edit second — SEARCH must match byte-for-byte.
3660
- * 4. Be concise. The user can read a diff faster than prose.
3661
- *
3662
- * Kept short on purpose. Long system prompts eat context budget that
3663
- * the Cache-First Loop is trying to conserve. The SEARCH/REPLACE spec
3664
- * is the one unavoidable bloat; we trim everything else.
3665
- */
3666
1756
  declare const CODE_SYSTEM_PROMPT = "You are Reasonix Code, a coding assistant. You have filesystem tools (read_file, write_file, edit_file, list_directory, directory_tree, search_files, search_content, get_file_info) rooted at the user's working directory, plus run_command / run_background for shell.\n\n# Cite or shut up \u2014 non-negotiable\n\nEvery factual claim you make about THIS codebase must be backed by evidence. Reasonix VALIDATES the citations you write \u2014 broken paths or out-of-range lines render in **red strikethrough with \u274C** in front of the user.\n\n**Positive claims** (a file exists, a function does X, a feature IS implemented) \u2014 append a markdown link to the source:\n\n- \u2705 Correct: `The MCP client supports listResources [listResources](src/mcp/client.ts:142).`\n- \u274C Wrong: `The MCP client supports listResources.` \u2190 no citation, looks authoritative but unverifiable.\n\n**Negative claims** (X is missing, Y is not implemented, lacks Z, doesn't have W) are the **most common hallucination shape**. They feel safe to write because no citation seems possible \u2014 but that's exactly why you must NOT write them on instinct.\n\nIf you are about to write \"X is missing\" or \"Y is not implemented\" \u2014 **STOP**. Call `search_content` for the relevant symbol or term FIRST. Only then:\n\n- If the search returns matches \u2192 you were wrong; correct yourself and cite the matches.\n- If the search returns nothing \u2192 state the absence with the search query as your evidence: `No callers of \\`foo()\\` found (search_content \"foo\").`\n\nAsserting absence without a search is the #1 way evaluative answers go wrong. Treat the urge to write \"missing\" as a red flag in your own reasoning.\n\n# When to propose a plan (submit_plan)\n\nYou have a `submit_plan` tool that shows the user a markdown plan and lets them Approve / Refine / Cancel before you execute. Use it proactively when the task is large enough to deserve a review gate:\n\n- Multi-file refactors or renames.\n- Architecture changes (moving modules, splitting / merging files, new abstractions).\n- Anything where \"undo\" after the fact would be expensive \u2014 migrations, destructive cleanups, API shape changes.\n- When the user's request is ambiguous and multiple reasonable interpretations exist \u2014 propose your reading as a plan and let them confirm.\n\nSkip submit_plan for small, obvious changes: one-line typo, clear bug with a clear fix, adding a missing import, renaming a local variable. Just do those.\n\nPlan body: one-sentence summary, then a file-by-file breakdown of what you'll change and why, and any risks or open questions. If some decisions are genuinely up to the user (naming, tradeoffs, out-of-scope possibilities), list them in an \"Open questions\" section \u2014 the user sees the plan in a picker and has a text input to answer your questions before approving. Don't pretend certainty you don't have; flagged questions are how the user tells you what they care about. After calling submit_plan, STOP \u2014 don't call any more tools, wait for the user's verdict.\n\n**Do NOT use submit_plan to present A/B/C route menus.** The approve/refine/cancel picker has no branch selector \u2014 a menu plan strands the user. For branching decisions, use `ask_choice` (see below); only call submit_plan once the user has picked a direction and you have ONE actionable plan.\n\n# When to ask the user to pick (ask_choice)\n\nYou have an `ask_choice` tool. **If the user is supposed to pick between alternatives, the tool picks \u2014 you don't enumerate the choices as prose.** Prose menus have no picker in this TUI: the user gets a wall of text and has to type a letter back. The tool fires an arrow-key picker that's strictly better.\n\nCall it when:\n- The user has asked for options / doesn't want a recommendation / wants to decide.\n- You've analyzed multiple approaches and the final call is theirs.\n- It's a preference fork you can't resolve without them (deployment target, team convention, taste).\n\nSkip it when one option is clearly correct (just do it, or submit_plan) or a free-form text answer fits (ask in prose).\n\nEach option: short stable id (A/B/C), one-line title, optional summary. `allowCustom: true` when their real answer might not fit. Max 6. A ~1-sentence lead-in before the call is fine (\"I see three directions \u2014 letting you pick\"); don't repeat the options in it. After the call, STOP.\n\n# Plan mode (/plan)\n\nThe user can ALSO enter \"plan mode\" via /plan, which is a stronger, explicit constraint:\n- Write tools (edit_file, write_file, create_directory, move_file) and non-allowlisted run_command calls are BOUNCED at dispatch \u2014 you'll get a tool result like \"unavailable in plan mode\". Don't retry them.\n- Read tools (read_file, list_directory, search_files, directory_tree, get_file_info) and allowlisted read-only / test shell commands still work \u2014 use them to investigate.\n- You MUST call submit_plan before anything will execute. Approve exits plan mode; Refine stays in; Cancel exits without implementing.\n\n\n# Delegating to subagents via Skills\n\nThe pinned Skills index below lists playbooks you can invoke with `run_skill`. Entries tagged `[\uD83E\uDDEC subagent]` spawn an **isolated subagent** \u2014 a fresh child loop that runs the playbook in its own context and returns only the final answer. The subagent's tool calls and reasoning never enter your context, so subagent skills are how you keep the main session lean.\n\n**When you call `run_skill`, the `name` is ONLY the identifier before the tag** \u2014 e.g. `run_skill({ name: \"explore\", arguments: \"...\" })`, NOT `\"[\uD83E\uDDEC subagent] explore\"` and NOT `\"explore [\uD83E\uDDEC subagent]\"`. The tag is display sugar; the name argument is just the bare identifier.\n\nTwo built-ins ship by default:\n- **explore** `[\uD83E\uDDEC subagent]` \u2014 read-only investigation across the codebase. Use when the user says things like \"find all places that...\", \"how does X work across the project\", \"survey the code for Y\". Pass `arguments` describing the concrete question.\n- **research** `[\uD83E\uDDEC subagent]` \u2014 combines web search + code reading. Use for \"is X supported by lib Y\", \"what's the canonical way to Z\", \"compare our impl to the spec\".\n\nWhen to delegate (call `run_skill` with a subagent skill):\n- The task would otherwise need >5 file reads or searches.\n- You only need the conclusion, not the exploration trail.\n- The work is self-contained (you can describe it in one paragraph).\n\nWhen NOT to delegate:\n- Direct, narrow questions answerable in 1-2 tool calls \u2014 just do them.\n- Anything where you need to track intermediate results yourself (planning, multi-step edits).\n- Anything that requires user interaction (subagents can't submit plans or ask you for clarification).\n\nAlways pass a clear, self-contained `arguments` \u2014 that text is the **only** context the subagent gets.\n\n# When to edit vs. when to explore\n\nOnly propose edits when the user explicitly asks you to change, fix, add, remove, refactor, or write something. Do NOT propose edits when the user asks you to:\n- analyze, read, explore, describe, or summarize a project\n- explain how something works\n- answer a question about the code\n\nIn those cases, use tools to gather what you need, then reply in prose. No SEARCH/REPLACE blocks, no file changes. If you're unsure what the user wants, ask.\n\nWhen you do propose edits, the user will review them and decide whether to `/apply` or `/discard`. Don't assume they'll accept \u2014 write as if each edit will be audited, because it will.\n\nReasonix runs an **edit gate**. The user's current mode (`review` or `auto`) decides what happens to your writes; you DO NOT see which mode is active, and you SHOULD NOT ask. Write the same way in both cases.\n\n- In `auto` mode `edit_file` / `write_file` calls land on disk immediately with an undo window \u2014 you'll get the normal \"edit blocks: 1/1 applied\" style response.\n- In `review` mode EACH `edit_file` / `write_file` call pauses tool dispatch while the user decides. You'll get one of these responses:\n - `\"edit blocks: 1/1 applied\"` \u2014 user approved it. Continue as normal.\n - `\"User rejected this edit to <path>. Don't retry the same SEARCH/REPLACE\u2026\"` \u2014 user said no to THIS specific edit. Do NOT re-emit the same block, do NOT switch tools to sneak it past the gate (write_file \u2192 edit_file, or text-form SEARCH/REPLACE). Either take a clearly different approach or stop and ask the user what they want instead.\n - Text-form SEARCH/REPLACE blocks in your assistant reply queue for end-of-turn /apply \u2014 same \"don't retry on rejection\" rule.\n- If the user presses Esc mid-prompt the whole turn is aborted; you won't get another tool response. Don't keep spamming tool calls after an abort.\n\n# Editing files\n\nWhen you've been asked to change a file, output one or more SEARCH/REPLACE blocks in this exact format:\n\npath/to/file.ext\n<<<<<<< SEARCH\nexact existing lines from the file, including whitespace\n=======\nthe new lines\n>>>>>>> REPLACE\n\nRules:\n- Always read_file first so your SEARCH matches byte-for-byte. If it doesn't match, the edit is rejected and you'll have to retry with the exact current content.\n- One edit per block. Multiple blocks in one response are fine.\n- To create a new file, leave SEARCH empty:\n path/to/new.ts\n <<<<<<< SEARCH\n =======\n (whole file content here)\n >>>>>>> REPLACE\n- Do NOT use write_file to change existing files \u2014 the user reviews your edits as SEARCH/REPLACE. write_file is only for files you explicitly want to overwrite wholesale (rare).\n- Paths are relative to the working directory. Don't use absolute paths.\n\n# Trust what you already know\n\nBefore exploring the filesystem to answer a factual question, check whether the answer is already in context: the user's current message, earlier turns in this conversation (including prior tool results from `remember`), and the pinned memory blocks at the top of this prompt. When the user has stated a fact or you have remembered one, it outranks what the files say \u2014 don't re-derive from code what the user already told you. Explore when you genuinely don't know.\n\n# Exploration\n\n- Skip dependency, build, and VCS directories unless the user explicitly asks. The pinned .gitignore block (if any, below) is your authoritative denylist.\n- Prefer `search_files` over `list_directory` when you know roughly what you're looking for \u2014 it saves context and avoids enumerating huge trees. Note: `search_files` matches file NAMES; for searching file CONTENTS use `search_content`.\n- Available exploration tools: `read_file`, `list_directory`, `directory_tree`, `search_files` (filename match), `search_content` (content grep \u2014 use for \"where is X called\", \"find all references to Y\"), `get_file_info`. Don't call `grep` or other tools that aren't in this list \u2014 they don't exist as functions.\n\n# Path conventions\n\nTwo different rules depending on which tool:\n\n- **Filesystem tools** (`read_file`, `list_directory`, `search_files`, `edit_file`, etc.): paths are sandbox-relative. `/` means the project root, `/src/foo.ts` means `<project>/src/foo.ts`. Both relative (`src/foo.ts`) and POSIX-absolute (`/src/foo.ts`) forms work.\n- **`run_command`**: the command runs in a real OS shell with cwd pinned to the project root. Paths inside the shell command are interpreted by THAT shell, not by us. **Never use leading `/` in run_command arguments** \u2014 Windows treats `/tests` as drive-root `F:\\tests` (non-existent), POSIX shells treat it as filesystem root. Use plain relative paths (`tests`, `./tests`, `src/loop.ts`) instead.\n\n# When the user wants to switch project / working directory\n\nIf the user asks to switch / change / open a different directory or project (\"\u5207\u6362\u5230...\", \"switch to ...\", \"let's work in ...\", \"open the X project\"), call **`change_workspace`** with the absolute target path. The tool always requires the user's explicit approval via a TUI modal \u2014 your call surfaces a \"switch / deny\" prompt, and STOPS your turn until they pick. After approval the filesystem / shell / memory tools re-register against the new root and your subsequent calls land there.\n\nHard rules:\n- Do NOT try to switch via `run_command` (`cd`, `pushd`, etc.) \u2014 your tool sandbox is pinned and `cd` inside one shell call doesn't carry to the next.\n- Do NOT chain other tool calls in the same turn as `change_workspace` \u2014 wait for the user's confirmation. Their next message will tell you whether the switch happened.\n- Do NOT call `change_workspace` to \"preview\" a sibling directory; only when the user explicitly asked to change projects.\n- The user can also type `/cwd <path>` themselves \u2014 fine, you'll see the new root take effect on the next turn either way.\n\n# Foreground vs. background commands\n\nYou have TWO tools for running shell commands, and picking the right one is non-negotiable:\n\n- `run_command` \u2014 blocks until the process exits. Use for: **tests, builds, lints, typechecks, git operations, one-shot scripts**. Anything that naturally returns in under a minute.\n- `run_background` \u2014 spawns and detaches after a brief startup window. Use for: **dev servers, watchers, any command with \"dev\" / \"serve\" / \"watch\" / \"start\" in the name**. Examples: `npm run dev`, `pnpm dev`, `yarn start`, `vite`, `next dev`, `uvicorn app:app --reload`, `flask run`, `python -m http.server`, `cargo watch`, `tsc --watch`, `webpack serve`.\n\n**Never use run_command for a dev server.** It will block for 60s, time out, and the user will see a frozen tool call while the server was actually running fine. Always `run_background`, then `job_output` to peek at the logs when you need to verify something.\n\nAfter `run_background`, tools available to you:\n- `job_output(jobId, tailLines?)` \u2014 read recent logs to verify startup / debug errors.\n- `list_jobs` \u2014 see every job this session (running + exited).\n- `stop_job(jobId)` \u2014 SIGTERM \u2192 SIGKILL after grace. Stop before switching port / config.\n\nDon't re-start an already-running dev server \u2014 call `list_jobs` first when in doubt.\n\n# Scope discipline on \"run it\" / \"start it\" requests\n\nWhen the user's request is to **run / start / launch / serve / boot up** something, your job is ONLY:\n\n1. Start it (`run_background` for dev servers, `run_command` for one-shots).\n2. Verify it came up (read a ready signal via `job_output`, or fetch the URL with `web_fetch` if they want you to confirm).\n3. Report what's running, where (URL / port / pid), and STOP.\n\nDo NOT, in the same turn:\n- Run `tsc` / type-checkers / linters unless the user asked for it.\n- Scan for bugs to \"proactively\" fix. The page rendering is success.\n- Clean up unused imports, dead code, or refactor \"while you're here.\"\n- Edit files to improve anything the user didn't mention.\n\nIf you notice an obvious issue, MENTION it in one sentence and wait for the user to say \"fix it.\" The cost of over-eagerness is real: you burn tokens, make surprise edits the user didn't want, and chain into cascading \"fix the new error I just introduced\" loops. The storm-breaker will cut you off, but the user still sees the mess.\n\n\"It works\" is the end state. Resist the urge to polish.\n\n# Style\n\n- Show edits; don't narrate them in prose. \"Here's the fix:\" is enough.\n- One short paragraph explaining *why*, then the blocks.\n- If you need to explore first (list / read / search), do it with tool calls before writing any prose \u2014 silence while exploring is fine.\n\nCost-aware escalation (when you're running on deepseek-v4-flash):\n\nIf a task CLEARLY exceeds what flash can do well \u2014 complex cross-file architecture refactors, subtle concurrency / security / correctness invariants you can't resolve with confidence, or a design trade-off you'd be guessing at \u2014 output the marker as the FIRST line of your response (nothing before it, not even whitespace on a separate line). This aborts the current call and retries this turn on deepseek-v4-pro, one shot.\n\nTwo accepted forms:\n- `<<<NEEDS_PRO>>>` \u2014 bare marker, no rationale.\n- `<<<NEEDS_PRO: <one-sentence reason>>>>` \u2014 preferred. The reason text appears in the user-visible warning (\"\u21E7 flash requested escalation \u2014 <your reason>\"), so they understand WHY a more expensive call is happening. Keep it under ~150 chars, no newlines, no nested `>` characters. Examples: `<<<NEEDS_PRO: cross-file refactor across 6 modules with circular imports>>>` or `<<<NEEDS_PRO: subtle session-token race; flash would likely miss the locking invariant>>>`.\n\nDo NOT emit any other content in the same response when you request escalation. Use this sparingly: normal tasks \u2014 reading files, small edits, clear bug fixes, straightforward feature additions \u2014 stay on flash. Request escalation ONLY when you would otherwise produce a guess or a visibly-mediocre answer. If in doubt, attempt the task on flash first; the system also escalates automatically if you hit 3+ repair / SEARCH-mismatch errors in a single turn (the user sees a typed breakdown).\n\nFormatting (rendered in a TUI with a real markdown renderer):\n- Tabular data \u2192 GitHub-Flavored Markdown tables with ASCII pipes (`| col | col |` header + `| --- | --- |` separator). Never use Unicode box-drawing characters (\u2502 \u2500 \u253C \u250C \u2510 \u2514 \u2518 \u251C \u2524) \u2014 they look intentional but break terminal word-wrap and render as garbled columns at narrow widths.\n- Keep table cells short (one phrase each). If a cell needs a paragraph, use bullets below the table instead.\n- Code, file paths with line ranges, and shell commands \u2192 fenced code blocks (```).\n- Do NOT draw decorative frames around content with `\u250C\u2500\u2500\u2510 \u2502 \u2514\u2500\u2500\u2518` characters. The renderer adds its own borders; extra ASCII art adds noise and shatters at narrow widths.\n- For flow charts and diagrams: a plain bullet list with `\u2192` or `\u2193` between steps. Don't try to draw boxes-and-arrows in ASCII; it never survives word-wrap.\n";
3667
1757
  interface CodeSystemPromptOptions {
3668
1758
  /** True when semantic_search is registered for this run. Adds an
@@ -3672,116 +1762,24 @@ interface CodeSystemPromptOptions {
3672
1762
  }
3673
1763
  declare function codeSystemPrompt(rootDir: string, opts?: CodeSystemPromptOptions): string;
3674
1764
 
3675
- /**
3676
- * User-level config storage for the Reasonix CLI.
3677
- *
3678
- * Lookup order for the API key:
3679
- * 1. `DEEPSEEK_API_KEY` env var (highest priority — for CI / power users)
3680
- * 2. `~/.reasonix/config.json` (set by the first-run setup flow)
3681
- *
3682
- * The library itself never touches the config file — it only reads
3683
- * `DEEPSEEK_API_KEY` from the environment. The CLI is responsible for
3684
- * pulling from the config file and exposing it via env var to the loop.
3685
- *
3686
- * Beyond the API key, the config also remembers the user's *defaults*
3687
- * from `reasonix setup`: preset, MCP servers, session. This is what
3688
- * makes `reasonix chat` with no flags "just work" after first-run.
3689
- */
3690
- /**
3691
- * Preset names — three model-commitment levels.
3692
- * - `auto` — flash baseline + auto-escalate to pro on hard turns
3693
- * (NEEDS_PRO marker / failure-count threshold both fire).
3694
- * Default. Closest match to the legacy `smart` preset.
3695
- * - `flash` — flash always. No auto-escalation. `/pro` still works
3696
- * for one-shot manual escalation. Cheapest predictable.
3697
- * - `pro` — pro always. No downgrade. ~3× cost vs flash at the
3698
- * 2026-04 discount rate; more outside the window.
3699
- *
3700
- * Legacy `fast | smart | max` names stay in the union for back-compat
3701
- * with existing `~/.reasonix/config.json` files; resolvePreset() maps
3702
- * them to the new semantics.
3703
- */
1765
+ /** Library reads only DEEPSEEK_API_KEY from env; the CLI bridges config.json → env var. */
1766
+ /** Legacy `fast|smart|max` kept for back-compat with existing config.json files. */
3704
1767
  type PresetName = "auto" | "flash" | "pro" | "fast" | "smart" | "max";
3705
- /**
3706
- * How `reasonix code` handles model-issued tool calls. Two axes folded
3707
- * into one enum because users think about "how trusting am I right now?"
3708
- * as a single dial, not as "writes vs shell" pairs.
3709
- *
3710
- * - "review" — queue edits into pendingEdits (user /apply or `y` to
3711
- * commit); shell commands NOT on the read-only allowlist
3712
- * hit ShellConfirm. Default.
3713
- * - "auto" — apply edits immediately, snapshot for /undo, show a
3714
- * short undo banner. Shell still goes through ShellConfirm
3715
- * for non-allowlisted commands.
3716
- * - "yolo" — apply edits immediately AND auto-approve every shell
3717
- * command. No prompts at all. Use when you trust the
3718
- * current direction and just want to iterate fast; /undo
3719
- * still rolls back individual edit batches.
3720
- *
3721
- * Persisted so `/mode <x>` survives a relaunch. Missing → "review".
3722
- *
3723
- * Codex-equivalence note: review ≈ untrusted, auto ≈ on-request,
3724
- * yolo ≈ never.
3725
- */
1768
+ /** Single trust dial: review queues edits + gates shell; auto applies + gates shell; yolo skips both gates. */
3726
1769
  type EditMode = "review" | "auto" | "yolo";
3727
- /**
3728
- * reasoning_effort cap for the model. "max" is the agent-class default;
3729
- * "high" is cheaper / faster. Persisted so `/effort high` survives a
3730
- * relaunch — earlier versions silently reverted to "max" on every new
3731
- * session, which burned budget unexpectedly.
3732
- */
3733
1770
  type ReasoningEffort = "high" | "max";
3734
1771
  interface ReasonixConfig {
3735
1772
  apiKey?: string;
3736
1773
  baseUrl?: string;
3737
- /**
3738
- * Default preset for `reasonix chat` / `reasonix run` when no flags override.
3739
- * Maps to model + autoEscalate (see presets.ts). Missing → "auto".
3740
- */
3741
1774
  preset?: PresetName;
3742
- /**
3743
- * Edit-gate mode for `reasonix code`. See EditMode doc. Absent → "review".
3744
- */
3745
1775
  editMode?: EditMode;
3746
- /**
3747
- * Set to `true` the first time we've shown the "Shift+Tab cycles
3748
- * review/AUTO" onboarding tip in `reasonix code`. Once seen, we stop
3749
- * posting the tip — the bottom status bar carries the knowledge
3750
- * forward without further nagging.
3751
- */
3752
1776
  editModeHintShown?: boolean;
3753
- /**
3754
- * Last reasoning_effort chosen via `/effort`. Loaded on launch so
3755
- * "high" stays "high" — default is "max" when unset.
3756
- */
3757
1777
  reasoningEffort?: ReasoningEffort;
3758
- /**
3759
- * Default MCP server specs to bridge on every `reasonix chat`, in the
3760
- * same `"name=cmd args..."` format that `--mcp` takes. Stored as strings
3761
- * so `reasonix setup` stays symmetrical with the flag — one parser, one
3762
- * format in the config file, grep-friendly.
3763
- */
1778
+ /** Stored as `--mcp`-format strings so one parser handles both flag and config. */
3764
1779
  mcp?: string[];
3765
- /**
3766
- * Default session name (null/missing → "default", which is what the
3767
- * CLI has been doing anyway). `reasonix setup` lets users pick a name
3768
- * or opt into ephemeral.
3769
- */
3770
1780
  session?: string | null;
3771
- /** Marks that `reasonix setup` has completed at least once. */
3772
1781
  setupCompleted?: boolean;
3773
- /**
3774
- * Whether `web_search` + `web_fetch` tools are registered. Default:
3775
- * enabled (no key required — backed by DuckDuckGo's public HTML
3776
- * endpoint). Set to `false` to keep the session offline.
3777
- */
3778
1782
  search?: boolean;
3779
- /**
3780
- * Per-project state keyed by absolute directory path. Written by the
3781
- * "always allow" choice on a shell confirmation prompt; merged into
3782
- * `registerShellTools({ extraAllowed })` when `reasonix code` runs
3783
- * against that directory again.
3784
- */
3785
1783
  projects?: {
3786
1784
  [absoluteRootDir: string]: {
3787
1785
  shellAllowed?: string[];
@@ -3798,27 +1796,7 @@ declare function isPlausibleKey(key: string): boolean;
3798
1796
  /** Mask a key for display: `sk-abcd...wxyz`. */
3799
1797
  declare function redactKey(key: string): string;
3800
1798
 
3801
- /**
3802
- * Version module.
3803
- *
3804
- * Two jobs:
3805
- *
3806
- * 1. Expose `VERSION` sourced from the real `package.json` so the
3807
- * constant never drifts from what npm publishes. Works in dev
3808
- * (`tsx src/...`) AND after `tsup` bundles to `dist/` — both
3809
- * layouts sit two levels below the manifest, so a short
3810
- * walk-up finds it.
3811
- *
3812
- * 2. Offer an opt-in `getLatestVersion()` that hits the npm
3813
- * registry with a bounded timeout and a 24-hour on-disk
3814
- * cache at `~/.reasonix/version-cache.json`. Returns `null`
3815
- * on any failure — offline / restricted-network launches
3816
- * should stay silent rather than nag the user.
3817
- *
3818
- * The CLI wires `getLatestVersion` asynchronously at App mount
3819
- * (never in a hot path) and renders the outcome in the stats
3820
- * panel when there's a newer published version.
3821
- */
1799
+ /** VERSION sourced from package.json so it never drifts from npm; latest-check returns null on any failure. */
3822
1800
  /** TTL for the on-disk cache entry. 24h keeps noise low; users who
3823
1801
  * want a fresh check can run `reasonix update` which passes
3824
1802
  * `force: true`. */
@@ -3840,71 +1818,14 @@ interface GetLatestVersionOptions {
3840
1818
  /** Network timeout override (tests). */
3841
1819
  timeoutMs?: number;
3842
1820
  }
3843
- /**
3844
- * Resolve the latest published `reasonix` version from the npm registry.
3845
- *
3846
- * Returns `null` on any network / parse failure. Callers treat `null`
3847
- * as "don't know, don't nag the user." The cache entry is only
3848
- * written on a successful fetch — a bad registry response won't
3849
- * poison the cache.
3850
- */
1821
+ /** Returns null on failure; cache only writes on success so bad responses can't poison it. */
3851
1822
  declare function getLatestVersion(opts?: GetLatestVersionOptions): Promise<string | null>;
3852
- /**
3853
- * Semver compare. Returns a negative number when `a < b`, positive
3854
- * when `a > b`, zero when equal.
3855
- *
3856
- * Minimal pre-release handling: when the CORE (`x.y.z`) parts match,
3857
- * any version WITH a suffix (`-rc.1`, `-alpha.4`) compares LOWER
3858
- * than the bare version. That matches npm's dist-tag semantics —
3859
- * `reasonix@latest` resolves to a real release, not a pre-release.
3860
- *
3861
- * We're deliberately not pulling in `semver` (~50KB). The three
3862
- * cases we care about are: current > latest (future build, no
3863
- * prompt), current < latest (prompt), current === latest (no prompt).
3864
- */
1823
+ /** Pre-release with same core sorts BELOW the bare version — matches npm `latest` dist-tag semantics. */
3865
1824
  declare function compareVersions(a: string, b: string): number;
3866
- /**
3867
- * Heuristic: did this process launch via `npx` / `pnpm dlx` instead
3868
- * of a global install? The update command takes different advice in
3869
- * each case — a global install can `npm i -g reasonix@latest`, while
3870
- * npx just needs its cache to roll over on next launch.
3871
- *
3872
- * Signals checked, in order:
3873
- * - `process.argv[1]` contains `_npx` (npm's ephemeral dir name)
3874
- * - `process.argv[1]` contains `.pnpm` + `dlx`
3875
- * - `npm_config_user_agent` contains `npx/`
3876
- *
3877
- * Any one hit → npx. False negatives are safe (worst case we suggest
3878
- * `npm i -g` to an npx user, which is a valid way to upgrade too).
3879
- */
1825
+ /** False negatives are safe — `npm i -g` works for npx users too. */
3880
1826
  declare function isNpxInstall(): boolean;
3881
1827
 
3882
- /**
3883
- * Persistent per-turn usage log at `~/.reasonix/usage.jsonl`.
3884
- *
3885
- * Each line is a single `UsageRecord` — one turn's tokens + cost
3886
- * snapshot — appended after every `assistant_final` event. This is
3887
- * what drives `reasonix stats` (the dashboard, no-arg form), so the
3888
- * user can see how much they've spent vs what the equivalent Claude
3889
- * spend would have been. The Pillar 1 pitch (94–97% cost reduction
3890
- * vs Claude, from the v0.3 hard-number table) becomes a fact users
3891
- * can verify on their own machine.
3892
- *
3893
- * Format choices:
3894
- * - **append-only JSONL** — one line per turn, durable, survives
3895
- * abrupt exits. A corrupted tail line loses at most one record.
3896
- * - **flat keys, no nesting** — readable with `jq` / `cut` / `awk`;
3897
- * the model doesn't need to parse this, humans do.
3898
- * - **best-effort writes** — disk errors never propagate into the
3899
- * turn. We log nothing (no `console.error`) because the TUI is
3900
- * rendering Ink; a silent skip is the least-worst failure mode.
3901
- * - **no PII, no prompts, no completions** — the log contains
3902
- * tokens and costs, that's it. Sessions are identified by the
3903
- * user-chosen name (never a prompt).
3904
- *
3905
- * This file is deliberately NOT wired through project memory or
3906
- * skills — those are content pins. Usage is pure telemetry.
3907
- */
1828
+ /** Append-only JSONL of per-turn tokens + cost; best-effort writes, never blocks the turn. No prompts/completions logged. */
3908
1829
 
3909
1830
  /** One turn's snapshot — serialized verbatim as a JSONL line. */
3910
1831
  interface UsageRecord {
@@ -3922,10 +1843,7 @@ interface UsageRecord {
3922
1843
  costUsd: number;
3923
1844
  /** What the same turn would have cost at Claude Sonnet 4.6 rates. */
3924
1845
  claudeEquivUsd: number;
3925
- /**
3926
- * Distinguishes ordinary parent-loop turns from subagent summary rows.
3927
- * Absent on pre-0.5.14 records — treat as "turn" when missing.
3928
- */
1846
+ /** Absent on legacy records — treat as "turn" when missing. */
3929
1847
  kind?: "turn" | "subagent";
3930
1848
  /** Present when `kind === "subagent"`. Attribution metadata for the /stats roll-up. */
3931
1849
  subagent?: {
@@ -3953,26 +1871,8 @@ interface AppendUsageInput {
3953
1871
  kind?: "turn" | "subagent";
3954
1872
  subagent?: UsageRecord["subagent"];
3955
1873
  }
3956
- /**
3957
- * Append one record and return it. Swallows disk errors — the TUI
3958
- * should keep working even if `~/.reasonix/` is read-only.
3959
- *
3960
- * Returns the record that was written (or would have been written
3961
- * if the disk had cooperated) so tests / callers can assert on the
3962
- * computed cost fields without a round trip through the log file.
3963
- *
3964
- * On every Nth append the log is checked for size; if it crosses
3965
- * {@link USAGE_COMPACTION_THRESHOLD_BYTES} we drop records older
3966
- * than {@link USAGE_RETENTION_DAYS}. Cheaper than a startup-time
3967
- * scan because most processes don't reach the threshold; the size
3968
- * check is one statSync regardless.
3969
- */
1874
+ /** Returns the record so tests can assert cost fields without re-reading the log. */
3970
1875
  declare function appendUsage(input: AppendUsageInput): UsageRecord;
3971
- /**
3972
- * Read + parse the log. Malformed lines are silently skipped so a
3973
- * single corrupted write (half-flushed on power loss, user hand-edit)
3974
- * doesn't throw away the rest of the history.
3975
- */
3976
1876
  declare function readUsageLog(path?: string): UsageRecord[];
3977
1877
  /** One row of the `reasonix stats` dashboard — a rolled-up window. */
3978
1878
  interface UsageBucket {
@@ -3986,15 +1886,7 @@ interface UsageBucket {
3986
1886
  cacheMissTokens: number;
3987
1887
  costUsd: number;
3988
1888
  claudeEquivUsd: number;
3989
- /**
3990
- * USD that DeepSeek's prompt cache shaved off the bill — sum of
3991
- * `cacheHitTokens × (missPrice − hitPrice)` per record. Recomputed
3992
- * from the current pricing table on every aggregate, not frozen at
3993
- * write time, so a price-cut announcement updates retroactively. The
3994
- * trade-off is mild inconsistency with `costUsd` (which IS frozen);
3995
- * acceptable because cache savings is a "what does this mechanism
3996
- * give me" narrative, not a billing record.
3997
- */
1889
+ /** Recomputed from current pricing each aggregate — intentionally NOT frozen with `costUsd`. */
3998
1890
  cacheSavingsUsd: number;
3999
1891
  }
4000
1892
  /** Cache hit ratio for a bucket — zero denominator returns 0. */
@@ -4022,11 +1914,7 @@ interface UsageAggregate {
4022
1914
  firstSeen: number | null;
4023
1915
  /** Latest record's ts, or `null` when the log is empty. */
4024
1916
  lastSeen: number | null;
4025
- /**
4026
- * Subagent-specific rollup. Undefined when no subagent records exist
4027
- * in the log so consumers can cheaply skip the section. Counts reflect
4028
- * subagent SPAWNS (not internal child-loop turns) — one row per run.
4029
- */
1917
+ /** Undefined when no subagent records exist; counts spawns, not internal child-loop turns. */
4030
1918
  subagents?: SubagentAggregate;
4031
1919
  }
4032
1920
  /** Rolled-up view of all `kind: "subagent"` records. */
@@ -4042,15 +1930,7 @@ interface SubagentAggregate {
4042
1930
  durationMs: number;
4043
1931
  }>;
4044
1932
  }
4045
- /**
4046
- * Fold a flat record list into the dashboard shape — rolling windows
4047
- * plus model / session histograms. Windows are INCLUSIVE of boundary:
4048
- * - today = last 24h (rolling, not calendar-day)
4049
- * - week = last 7d
4050
- * - month = last 30d
4051
- * - all = every record
4052
- * Rolling windows avoid "it's 00:03, 'today' is empty" surprises.
4053
- */
1933
+ /** Rolling 24h/7d/30d windows — avoids "it's 00:03, 'today' is empty" surprises. */
4054
1934
  declare function aggregateUsage(records: UsageRecord[], opts?: AggregateOptions): UsageAggregate;
4055
1935
  /** File-size helper for the stats header — "1.2 MB" etc. Returns "" if missing. */
4056
1936
  declare function formatLogSize(path?: string): string;