agent.libx.js 0.93.6 → 0.93.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,7 +54,7 @@ Beyond file tools, the runtime ships the higher-altitude pieces too — each an
54
54
  - **Subagents** (`subagents`; **typed agents** via `agentsDir` — `<dir>/<name>.md` defines a persona + model + scoped tools, selected with the `Task` `agentType`), **hooks** (`hooks`: preToolUse/postToolUse/onStop — block or audit any tool call), **slash-commands** (`commandsDir`), **TodoWrite**, **MCP** (`mcpToolsToAgentTools`).
55
55
  - **Streaming** (`stream: true` → `text_delta` via `HostBridge.notify`) and **context compaction** (`compaction: { maxMessages }` → edge-safe summarize-and-boundary). Defaults preserve the original non-stream, drop-oldest behavior.
56
56
  - **Multi-turn + project context** — `Agent.send()` continues a conversation across turns (vs `run()`, which starts fresh); **project instructions** (`instructionFiles`: `AGENTS.md`/`CLAUDE.md` at the FS root) inject into the system prompt.
57
- - **DuplexAgent** (`src/duplex.ts`) — voice-optimized dual-model engine: a fast voice agent streams instant single-voice replies and escalates real work via a `Delegate` tool to detached slow workers; results are pushed back and re-voiced by the fast persona (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
57
+ - **DuplexAgent** (`src/duplex.ts`) — voice-optimized three-tier engine (reflex/act/think): a fast reflex agent streams instant replies and self-selects escalation — `Act` for standard tool work (Sonnet-class), `Think` for deep reasoning (Opus-class, configurable, default on). Results are pushed back and re-voiced by the reflex (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
58
58
  - **Budget kill-switches** — always-on per-run guards (`maxTokens`/`timeoutMs`/`maxRepeats`/`maxToolCalls`/`signal` → `finishReason` `budget`/`timeout`/`loop`/`max_tool_calls`/`aborted`) protect the API spend against runaway loops. The *enforceable* billing cap is server-side in the web key-proxy: a VFS-backed budget config (`/.agent/budget.json`, USD-metered, hot-reloaded, $100/wk default) a browser client can't bypass. See [`web/`](./web) and [`mind/06`](./mind/06-agent-features.md).
59
59
 
60
60
  ## The `agentx` CLI
package/cli/cli.ts CHANGED
@@ -92,7 +92,7 @@ interface Args {
92
92
  task?: string; model?: string; cwd?: string; stream: boolean; plan: boolean; ask: boolean; yes: boolean;
93
93
  vfs: boolean; shell: boolean | undefined; boddb?: string; seed: boolean;
94
94
  subagents: boolean; maxSteps?: number; maxTokens?: number; timeoutMs?: number; reasoning?: ReasoningEffort; help: boolean; version: boolean;
95
- duplex: boolean; voiceModel?: string; voice: boolean;
95
+ duplex: boolean; voiceModel?: string; thinkModel?: string | false; voice: boolean;
96
96
  cont: boolean; resume?: string; sessionId?: string; fork?: boolean; outputFormat: 'text' | 'json' | 'stream-json';
97
97
  allowedTools?: string[]; disallowedTools?: string[]; appendSystemPrompt?: string; addDirs?: string[];
98
98
  print?: boolean; debug?: boolean;
@@ -149,6 +149,8 @@ export function parseArgs(argv: string[]): Args {
149
149
  else if (x === '--duplex') a.duplex = true;
150
150
  else if (x === '--conversational' || x === '--convo' || x === '--voice') { a.voice = true; a.duplex = true; } // duplex + human conversational register (--convo/--voice = aliases)
151
151
  else if (x === '--voice-model') a.voiceModel = val(++i, x);
152
+ else if (x === '--think-model') a.thinkModel = val(++i, x);
153
+ else if (x === '--no-think') a.thinkModel = false;
152
154
  else if (x === '--allowedTools' || x === '--allowed-tools') a.allowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
153
155
  else if (x === '--disallowedTools' || x === '--disallowed-tools') a.disallowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
154
156
  else if (x === '--append-system-prompt') a.appendSystemPrompt = val(++i, x);
@@ -172,6 +174,7 @@ export function parseArgs(argv: string[]): Args {
172
174
  if (a.duplex && (a.task || a.print)) throw new Error('--duplex is interactive-only (a conversational mode) — drop the task/-p');
173
175
  if (a.duplex && a.plan) throw new Error('--plan is not supported in --duplex (workers are non-interactive; a plan could never be approved)');
174
176
  if (a.voiceModel && !a.duplex) throw new Error('--voice-model only applies with --duplex');
177
+ if (a.thinkModel !== undefined && !a.duplex) throw new Error('--think-model/--no-think only apply with --duplex');
175
178
  return a;
176
179
  }
177
180
 
@@ -209,6 +212,8 @@ Flags:
209
212
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
210
213
  spoken replies out (echo-cancelled; speak over it to interrupt)
211
214
  --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
215
+ --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
216
+ --no-think with --duplex: disable the Think tier (Act handles everything)
212
217
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
213
218
  --subagents allow the Task tool (spawn child agents)
214
219
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
@@ -238,7 +243,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
238
243
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
239
244
 
240
245
  REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
241
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit
246
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
242
247
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
243
248
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
244
249
  REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -493,15 +498,24 @@ function printHistory(messages: Message[]): void {
493
498
  if (s) err(s);
494
499
  }
495
500
 
496
- /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
497
- export function costOf(pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined, promptTokens = 0, completionTokens = 0): number {
501
+ /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
502
+ * Cache-aware: promptTokens includes cache reads/writes priced at their real multipliers
503
+ * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
504
+ export function costOf(
505
+ pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined,
506
+ promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0,
507
+ ): number {
498
508
  if (!pricing) return 0;
499
- return (promptTokens / 1000) * pricing.inputCostPer1K + (completionTokens / 1000) * pricing.outputCostPer1K;
509
+ const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
510
+ return (fresh / 1000) * pricing.inputCostPer1K
511
+ + (cacheCreationTokens / 1000) * pricing.inputCostPer1K * 1.25
512
+ + (cacheReadTokens / 1000) * pricing.inputCostPer1K * 0.1
513
+ + (completionTokens / 1000) * pricing.outputCostPer1K;
500
514
  }
501
515
 
502
516
  /** Cost of one turn at `model`'s rate (looks up ai.libx.js pricing). */
503
- function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number }): number {
504
- return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0);
517
+ function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number; cacheCreationTokens?: number; cacheReadTokens?: number }): number {
518
+ return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
505
519
  }
506
520
 
507
521
  /** Evaluate whether a goal condition has been met, based on recent transcript. */
@@ -994,14 +1008,14 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
994
1008
  const agent = await makeAgent(args, ai, cfg, mounted.flatMap((m) => m.tools));
995
1009
 
996
1010
  // Non-duplex voice: let the model exit the session when the user says goodbye.
997
- // Duplex voice gets ExitSession via voiceOptions.tools (the agent here is the worker template — workers don't need it).
1011
+ // Duplex voice gets ExitSession via reflexOptions.tools (the agent here is the worker template — workers don't need it).
998
1012
  if (args.voice && !args.duplex) agent.options.tools = [...(agent.options.tools ?? []), exitSessionTool(() => { exitRequested = true; })];
999
1013
 
1000
1014
  // ── Duplex mode (`--duplex`): the REPL runs unchanged, but turns go through a fast VOICE agent
1001
1015
  // that answers instantly and delegates real work to background workers (re-voiced when done).
1002
1016
  // `face` = the transcript-owning agent the REPL drives (sessions, footer, Esc-abort, /compact);
1003
1017
  // `work` = the options that mean "the working agent" (/model, /reasoning, /tools, permissions —
1004
- // in duplex these are the WORKERS' options; workers are constructed fresh per Delegate).
1018
+ // in duplex these are the WORKERS' options; workers are constructed fresh per Act/Think).
1005
1019
  const duplex = args.duplex;
1006
1020
  let dx: DuplexAgent | undefined;
1007
1021
  let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
@@ -1070,6 +1084,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1070
1084
  voiceIO.speakDelta(e.message);
1071
1085
  editorRef?.suspend(); // no-op when already suspended
1072
1086
  }
1087
+ if (e.kind === 'hold_filler' && voiceIO) {
1088
+ voiceIO.speakFiller(e.message);
1089
+ return;
1090
+ }
1073
1091
  if (e.kind === 'revoice_done') { // a re-voice turn ended outside runTurn's flush — drain the markdown tail now
1074
1092
  base.flushText();
1075
1093
  process.stdout.write('\n');
@@ -1108,7 +1126,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1108
1126
  // Conversational undo: the voice can roll back per-task checkpoint frames ("undo that").
1109
1127
  const rewindFilesTool: AgentTool = {
1110
1128
  name: 'RewindFiles',
1111
- description: 'Undo file changes made by delegated tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
1129
+ description: 'Undo file changes made by Act/Think tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
1112
1130
  parameters: { type: 'object', properties: { steps: { type: 'number', description: 'how many task checkpoints to undo (default 1)' } } },
1113
1131
  run: async ({ steps }) => {
1114
1132
  if (!checkpoints.size) return 'No file checkpoints to rewind yet.';
@@ -1124,9 +1142,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1124
1142
  fs: agent.options.fs,
1125
1143
  memoryDir: agent.options.memoryDir,
1126
1144
  memoryUserDir: agent.options.memoryUserDir,
1127
- ...((args.voiceModel ?? cfg.voiceModel) ? { voiceModel: resolveModelOrNewest((args.voiceModel ?? cfg.voiceModel)!) } : {}),
1128
- workerModel: agent.options.model,
1129
- workerOptions,
1145
+ ...((args.voiceModel ?? cfg.reflexModel) ? { reflexModel: resolveModelOrNewest((args.voiceModel ?? cfg.reflexModel)!) } : {}),
1146
+ actModel: agent.options.model,
1147
+ actOptions: workerOptions,
1148
+ ...((args.thinkModel ?? cfg.thinkModel) !== undefined ? { thinkModel: (args.thinkModel ?? cfg.thinkModel) === false ? false : resolveModelOrNewest(String(args.thinkModel ?? cfg.thinkModel)) } : {}),
1130
1149
  host,
1131
1150
  ...(args.voice ? { voiceStyle: 'conversational' as const, progressUpdates: true, askRelay: true } : {}), // voice: progress asides + worker questions relayed through the conversation
1132
1151
  // Per-TASK checkpoint frames (the natural undo unit in duplex = one delegation): opened BEFORE
@@ -1152,8 +1171,8 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1152
1171
  },
1153
1172
  },
1154
1173
  // The voice runs on the REAL fs (it has no fs tools — harmless) so @mentions, !cmd and #note
1155
- // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Delegate …).
1156
- voiceOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
1174
+ // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Act …).
1175
+ reflexOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
1157
1176
  });
1158
1177
  }
1159
1178
  const face: Agent = dx ? dx.voice : agent; // the transcript-owning agent the REPL drives
@@ -1197,7 +1216,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1197
1216
  return next;
1198
1217
  };
1199
1218
  // Model switching targets the WORKER in duplex (the voice model is a --voice-model startup choice).
1200
- const setModel = (m: string) => { work.model = m; if (dx) dx.options.workerModel = m; persistModel(cwd, m); err(dim(' model → ' + m + '\n')); };
1219
+ const setModel = (m: string) => { work.model = m; if (dx) dx.options.actModel = m; persistModel(cwd, m); err(dim(' model → ' + m + '\n')); };
1201
1220
  // Tool mutations (/mcp add|remove|login) — duplex workers are constructed per spawn from work.tools.
1202
1221
  const addWorkTools = (ts: AgentTool[]) => { if (duplex) work.tools = [...(work.tools ?? []), ...ts]; else agent.addTools(ts); };
1203
1222
  const removeWorkTools = (names: string[]) => { if (duplex) work.tools = (work.tools ?? []).filter((t) => !names.includes(t.name)); else agent.removeTools(names); };
@@ -1446,7 +1465,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1446
1465
  desc: 'show CLI version + runtime',
1447
1466
  run: () => {
1448
1467
  const rt = (process.versions as any).bun ? `bun ${(process.versions as any).bun}` : `node ${process.versions.node}`;
1449
- err(` ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model} · ${rt}`)}\n`);
1468
+ err(` ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model} · ${rt}`)}\n`);
1450
1469
  },
1451
1470
  },
1452
1471
  tools: {
@@ -1472,7 +1491,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1472
1491
  const mode = args.vfs ? 'sandbox (VFS — disk untouched)' : args.boddb ? `boddb (database workspace at ${args.boddb} — disk untouched)` : args.shell ? 'disk + real /bin/sh' : 'disk (full real FS, like Claude Code)';
1473
1492
  const pol = work.permissions;
1474
1493
  const perm = !pol ? 'allow all (unattended)' : `${pol.options.rules.length} rule(s), default ${pol.options.default}`;
1475
- const model = duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model;
1494
+ const model = duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model;
1476
1495
  err(formatStatus({ model, cwd, mode, tools: (duplex ? work.tools ?? [] : agent.options.tools).map((t) => t.name), permissions: perm, turns: session.meta.turns, tokens: session.meta.tokens ?? 0, sessionId: session.meta.id, estimated: session.meta.costEstimated ?? false }));
1477
1496
  if (duplex && dx!.tasks.size) err(dim(` tasks: ${[...dx!.tasks.values()].map((t) => `${t.id}:${t.status}`).join(' ')}\n`));
1478
1497
  },
@@ -1523,7 +1542,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1523
1542
  if (a[0]) { setModel(a[0]); return; }
1524
1543
  const picked = await pickModel(work.model);
1525
1544
  if (picked) setModel(picked);
1526
- else err(dim(' ' + (duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model) + '\n'));
1545
+ else err(dim(' ' + (duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model) + '\n'));
1527
1546
  },
1528
1547
  },
1529
1548
  ...(duplex ? { workers: {
@@ -1533,18 +1552,52 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1533
1552
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
1534
1553
  },
1535
1554
  }, 'voice-model': {
1536
- desc: 'switch the duplex voice (fast) model — /voice-model <id>, or alone for a picker',
1555
+ desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
1537
1556
  run: async (a: string[]) => {
1538
1557
  const apply = (id: string) => {
1539
1558
  const m = resolveModelOrNewest(id);
1540
- dx!.options.voiceModel = m;
1541
- dx!.voice.options.model = m; // live agent — next voice turn uses it (transcript untouched)
1542
- err(green(` ✓ voice model → ${m}\n`));
1559
+ dx!.options.reflexModel = m;
1560
+ dx!.voice.options.model = m;
1561
+ err(green(` ✓ reflex model → ${m}\n`));
1543
1562
  };
1544
1563
  if (a[0]) { apply(a[0]); return; }
1545
- const picked = await pickModel(dx!.options.voiceModel);
1564
+ const picked = await pickModel(dx!.options.reflexModel);
1546
1565
  if (picked) apply(picked);
1547
- else err(dim(` voice ${dx!.options.voiceModel}\n`));
1566
+ else err(dim(` reflex ${dx!.options.reflexModel}\n`));
1567
+ },
1568
+ }, 'think-model': {
1569
+ desc: 'switch the think (premium) model, or /think-model off to disable',
1570
+ run: async (a: string[]) => {
1571
+ if (a[0] === 'off' || a[0] === 'false') {
1572
+ dx!.setThinkModel(false); // live: removes the Think tool from the voice agent
1573
+ err(green(` ✓ think tier disabled\n`));
1574
+ return;
1575
+ }
1576
+ const apply = (id: string) => {
1577
+ const m = resolveModelOrNewest(id);
1578
+ dx!.setThinkModel(m); // live: adds the Think tool if it was disabled
1579
+ err(green(` ✓ think model → ${m}\n`));
1580
+ };
1581
+ if (a[0]) { apply(a[0]); return; }
1582
+ const current = dx!.options.thinkModel === false ? undefined : dx!.options.thinkModel;
1583
+ const picked = await pickModel(current ?? 'anthropic/claude-opus-4-6');
1584
+ if (picked) apply(picked);
1585
+ else err(dim(` think ${dx!.options.thinkModel === false ? 'off' : dx!.options.thinkModel}\n`));
1586
+ },
1587
+ }, act: {
1588
+ desc: 'spawn a standard worker — /act <brief>',
1589
+ run: async (a: string[]) => {
1590
+ if (!a.length) { err(dim(' usage: /act <what to do>\n')); return; }
1591
+ const id = await dx!.dispatch(a.join(' '), 'act');
1592
+ err(dim(` → task ${id} started\n`));
1593
+ },
1594
+ }, think: {
1595
+ desc: 'spawn a deep-reasoning worker — /think <question>',
1596
+ run: async (a: string[]) => {
1597
+ if (!a.length) { err(dim(' usage: /think <what to reason about>\n')); return; }
1598
+ const off = dx!.options.thinkModel === false; // dispatch silently downgrades — tell the user
1599
+ const id = await dx!.dispatch(a.join(' '), 'think');
1600
+ err(dim(` → task ${id} ${off ? '(think tier off — running as act)' : '(think)'} started\n`));
1548
1601
  },
1549
1602
  } } : {}),
1550
1603
  reasoning: {
@@ -1813,7 +1866,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1813
1866
 
1814
1867
  err(bold('agent.libx.js') + cyan(' v' + VERSION) + dim(` — ${work.model} · ${cwd}\n`));
1815
1868
  err(dim('Type a task, or /help. Type / or @ for live suggestions (↑/↓ ⏎). Esc cancels/clears; double-Esc jumps back; Ctrl-D exits.\n'));
1816
- if (dx) err(dim(`◑ duplex — voice: ${dx.options.voiceModel} · worker: ${work.model} (real work runs in background tasks, re-voiced when done)\n`));
1869
+ if (dx) err(dim(`◑ duplex — reflex: ${dx.options.reflexModel} · act: ${work.model}${dx.options.thinkModel !== false ? ` · think: ${dx.options.thinkModel}` : ''} (real work runs in background tasks, re-voiced when done)\n`));
1817
1870
  // Live suggestions: file/dir entries from the real cwd; command/skill descriptions for the menu.
1818
1871
  const listDir: DirLister = (absDir) => {
1819
1872
  try {
@@ -183,11 +183,14 @@ interface RunResult {
183
183
  /** Why the loop ended. The middle group are automatic kill-switches (budget/abuse guards). */
184
184
  finishReason: 'stop' | 'max_steps' | 'budget' | 'timeout' | 'loop' | 'max_tool_calls' | 'aborted' | 'error';
185
185
  messages: Message[];
186
- /** Accumulated token usage across all turns (non-stream path). */
186
+ /** Accumulated token usage across all turns (non-stream path). With prompt caching,
187
+ * promptTokens includes cached reads/writes; the cache splits ride along for exact pricing. */
187
188
  usage?: {
188
189
  promptTokens: number;
189
190
  completionTokens: number;
190
191
  totalTokens: number;
192
+ cacheCreationTokens?: number;
193
+ cacheReadTokens?: number;
191
194
  };
192
195
  /** True if ANY turn's usage was estimated (provider gave none) rather than exact — lets the UI mark cost `~`. */
193
196
  usageEstimated?: boolean;
@@ -275,6 +278,10 @@ declare class AgentOptions {
275
278
  };
276
279
  /** Provider-specific options forwarded to ai.chat() (e.g. cursor mcpServers, cwd). */
277
280
  providerOptions?: Record<string, unknown>;
281
+ /** Prompt caching (providers that support it, e.g. Anthropic): cache tools/system/conversation
282
+ * prefix across the loop's steps — reads cost 0.1x, writes 1.25x. A multi-step agent loop
283
+ * re-sends its whole prefix every step, so this is a large net cost cut. Default on. */
284
+ promptCache: boolean;
278
285
  /** Tool selection mode: 'auto' = model decides (needed for Groq); undefined = provider default. */
279
286
  toolChoice?: 'auto' | 'required' | 'none';
280
287
  /** Extended-thinking / reasoning effort, normalized across providers (anthropic, openai).
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { h as RunResult, R as ReasoningEffort } from './Agent-B_xvSHlG.js';
2
+ import { h as RunResult, R as ReasoningEffort } from './Agent-Di1u5nH0.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
5
5
 
@@ -76,6 +76,7 @@ interface Args {
76
76
  version: boolean;
77
77
  duplex: boolean;
78
78
  voiceModel?: string;
79
+ thinkModel?: string | false;
79
80
  voice: boolean;
80
81
  cont: boolean;
81
82
  resume?: string;
@@ -106,11 +107,13 @@ declare function exportMarkdown(meta: {
106
107
  costUsd?: number;
107
108
  costEstimated?: boolean;
108
109
  }, messages: Message[]): string;
109
- /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
110
+ /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
111
+ * Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
112
+ * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
110
113
  declare function costOf(pricing: {
111
114
  inputCostPer1K: number;
112
115
  outputCostPer1K: number;
113
- } | undefined, promptTokens?: number, completionTokens?: number): number;
116
+ } | undefined, promptTokens?: number, completionTokens?: number, cacheCreationTokens?: number, cacheReadTokens?: number): number;
114
117
  /** Format a USD amount: 2 decimals at $1+, 4 below (agent turns are sub-cent). */
115
118
  declare function fmtUsd(n: number): string;
116
119
  /** ~4 chars/token estimate over a transcript (matches the Agent's context-budget heuristic). */
@@ -177,6 +180,8 @@ declare function jsonResult(res: RunResult, session: SessionData): {
177
180
  promptTokens: number;
178
181
  completionTokens: number;
179
182
  totalTokens: number;
183
+ cacheCreationTokens?: number;
184
+ cacheReadTokens?: number;
180
185
  } | undefined;
181
186
  sessionId: string;
182
187
  };