agent.libx.js 0.93.8 → 0.93.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,7 +54,7 @@ Beyond file tools, the runtime ships the higher-altitude pieces too — each an
54
54
  - **Subagents** (`subagents`; **typed agents** via `agentsDir` — `<dir>/<name>.md` defines a persona + model + scoped tools, selected with the `Task` `agentType`), **hooks** (`hooks`: preToolUse/postToolUse/onStop — block or audit any tool call), **slash-commands** (`commandsDir`), **TodoWrite**, **MCP** (`mcpToolsToAgentTools`).
55
55
  - **Streaming** (`stream: true` → `text_delta` via `HostBridge.notify`) and **context compaction** (`compaction: { maxMessages }` → edge-safe summarize-and-boundary). Defaults preserve the original non-stream, drop-oldest behavior.
56
56
  - **Multi-turn + project context** — `Agent.send()` continues a conversation across turns (vs `run()`, which starts fresh); **project instructions** (`instructionFiles`: `AGENTS.md`/`CLAUDE.md` at the FS root) inject into the system prompt.
57
- - **DuplexAgent** (`src/duplex.ts`) — voice-optimized dual-model engine: a fast voice agent streams instant single-voice replies and escalates real work via a `Delegate` tool to detached slow workers; results are pushed back and re-voiced by the fast persona (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
57
+ - **DuplexAgent** (`src/duplex.ts`) — voice-optimized three-tier engine (reflex/act/think): a fast reflex agent streams instant replies and self-selects escalation — `Act` for standard tool work (Sonnet-class), `Think` for deep reasoning (Opus-class, configurable, default on). Results are pushed back and re-voiced by the reflex (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
58
58
  - **Budget kill-switches** — always-on per-run guards (`maxTokens`/`timeoutMs`/`maxRepeats`/`maxToolCalls`/`signal` → `finishReason` `budget`/`timeout`/`loop`/`max_tool_calls`/`aborted`) protect the API spend against runaway loops. The *enforceable* billing cap is server-side in the web key-proxy: a VFS-backed budget config (`/.agent/budget.json`, USD-metered, hot-reloaded, $100/wk default) a browser client can't bypass. See [`web/`](./web) and [`mind/06`](./mind/06-agent-features.md).
59
59
 
60
60
  ## The `agentx` CLI
package/cli/cli.ts CHANGED
@@ -92,7 +92,7 @@ interface Args {
92
92
  task?: string; model?: string; cwd?: string; stream: boolean; plan: boolean; ask: boolean; yes: boolean;
93
93
  vfs: boolean; shell: boolean | undefined; boddb?: string; seed: boolean;
94
94
  subagents: boolean; maxSteps?: number; maxTokens?: number; timeoutMs?: number; reasoning?: ReasoningEffort; help: boolean; version: boolean;
95
- duplex: boolean; voiceModel?: string; voice: boolean;
95
+ duplex: boolean; voiceModel?: string; thinkModel?: string | false; voice: boolean;
96
96
  cont: boolean; resume?: string; sessionId?: string; fork?: boolean; outputFormat: 'text' | 'json' | 'stream-json';
97
97
  allowedTools?: string[]; disallowedTools?: string[]; appendSystemPrompt?: string; addDirs?: string[];
98
98
  print?: boolean; debug?: boolean;
@@ -149,6 +149,8 @@ export function parseArgs(argv: string[]): Args {
149
149
  else if (x === '--duplex') a.duplex = true;
150
150
  else if (x === '--conversational' || x === '--convo' || x === '--voice') { a.voice = true; a.duplex = true; } // duplex + human conversational register (--convo/--voice = aliases)
151
151
  else if (x === '--voice-model') a.voiceModel = val(++i, x);
152
+ else if (x === '--think-model') a.thinkModel = val(++i, x);
153
+ else if (x === '--no-think') a.thinkModel = false;
152
154
  else if (x === '--allowedTools' || x === '--allowed-tools') a.allowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
153
155
  else if (x === '--disallowedTools' || x === '--disallowed-tools') a.disallowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
154
156
  else if (x === '--append-system-prompt') a.appendSystemPrompt = val(++i, x);
@@ -172,6 +174,7 @@ export function parseArgs(argv: string[]): Args {
172
174
  if (a.duplex && (a.task || a.print)) throw new Error('--duplex is interactive-only (a conversational mode) — drop the task/-p');
173
175
  if (a.duplex && a.plan) throw new Error('--plan is not supported in --duplex (workers are non-interactive; a plan could never be approved)');
174
176
  if (a.voiceModel && !a.duplex) throw new Error('--voice-model only applies with --duplex');
177
+ if (a.thinkModel !== undefined && !a.duplex) throw new Error('--think-model/--no-think only apply with --duplex');
175
178
  return a;
176
179
  }
177
180
 
@@ -209,6 +212,8 @@ Flags:
209
212
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
210
213
  spoken replies out (echo-cancelled; speak over it to interrupt)
211
214
  --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
215
+ --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
216
+ --no-think with --duplex: disable the Think tier (Act handles everything)
212
217
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
213
218
  --subagents allow the Task tool (spawn child agents)
214
219
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
@@ -238,7 +243,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
238
243
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
239
244
 
240
245
  REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
241
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit
246
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
242
247
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
243
248
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
244
249
  REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -493,15 +498,24 @@ function printHistory(messages: Message[]): void {
493
498
  if (s) err(s);
494
499
  }
495
500
 
496
- /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
497
- export function costOf(pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined, promptTokens = 0, completionTokens = 0): number {
501
+ /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
502
+ * Cache-aware: promptTokens includes cache reads/writes priced at their real multipliers
503
+ * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
504
+ export function costOf(
505
+ pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined,
506
+ promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0,
507
+ ): number {
498
508
  if (!pricing) return 0;
499
- return (promptTokens / 1000) * pricing.inputCostPer1K + (completionTokens / 1000) * pricing.outputCostPer1K;
509
+ const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
510
+ return (fresh / 1000) * pricing.inputCostPer1K
511
+ + (cacheCreationTokens / 1000) * pricing.inputCostPer1K * 1.25
512
+ + (cacheReadTokens / 1000) * pricing.inputCostPer1K * 0.1
513
+ + (completionTokens / 1000) * pricing.outputCostPer1K;
500
514
  }
501
515
 
502
516
  /** Cost of one turn at `model`'s rate (looks up ai.libx.js pricing). */
503
- function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number }): number {
504
- return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0);
517
+ function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number; cacheCreationTokens?: number; cacheReadTokens?: number }): number {
518
+ return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
505
519
  }
506
520
 
507
521
  /** Evaluate whether a goal condition has been met, based on recent transcript. */
@@ -994,14 +1008,14 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
994
1008
  const agent = await makeAgent(args, ai, cfg, mounted.flatMap((m) => m.tools));
995
1009
 
996
1010
  // Non-duplex voice: let the model exit the session when the user says goodbye.
997
- // Duplex voice gets ExitSession via voiceOptions.tools (the agent here is the worker template — workers don't need it).
1011
+ // Duplex voice gets ExitSession via reflexOptions.tools (the agent here is the worker template — workers don't need it).
998
1012
  if (args.voice && !args.duplex) agent.options.tools = [...(agent.options.tools ?? []), exitSessionTool(() => { exitRequested = true; })];
999
1013
 
1000
1014
  // ── Duplex mode (`--duplex`): the REPL runs unchanged, but turns go through a fast VOICE agent
1001
1015
  // that answers instantly and delegates real work to background workers (re-voiced when done).
1002
1016
  // `face` = the transcript-owning agent the REPL drives (sessions, footer, Esc-abort, /compact);
1003
1017
  // `work` = the options that mean "the working agent" (/model, /reasoning, /tools, permissions —
1004
- // in duplex these are the WORKERS' options; workers are constructed fresh per Delegate).
1018
+ // in duplex these are the WORKERS' options; workers are constructed fresh per Act/Think).
1005
1019
  const duplex = args.duplex;
1006
1020
  let dx: DuplexAgent | undefined;
1007
1021
  let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
@@ -1020,13 +1034,25 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1020
1034
  const duplexAsk = async (call: ToolUse): Promise<{ decision: 'allow' | 'deny' }> => {
1021
1035
  if (args.voice && dx) {
1022
1036
  const hint = summarizeCall(call.name, call.args).slice(0, 80);
1023
- // 'menu' mode: approve like a normal session — suspend the editor, pop the picker.
1024
- if ((cfg as any).voiceAskUi === 'menu') {
1037
+ // Default: approve like a normal session — suspend the editor, pop an interactive picker
1038
+ // (Allow once / always / Deny). Set `voiceAskUi: 'relay'` to opt into the spoken park/relay flow.
1039
+ if ((cfg as any).voiceAskUi !== 'relay') {
1025
1040
  editorRef?.suspend();
1026
- const v = await selectMenu(process.stderr, { title: `? background worker asks to run ${call.name} ${hint}`, items: [{ label: 'Allow', value: 'y' }, { label: 'Deny', value: 'n' }], current: 'n' });
1041
+ const v = await selectMenu(process.stderr, {
1042
+ title: `? background worker asks to run ${call.name} ${hint}`,
1043
+ items: [{ label: 'Allow once', value: 'y' }, { label: 'Allow always', value: 'a' }, { label: 'Deny', value: 'n' }],
1044
+ current: 'y',
1045
+ });
1027
1046
  editorRef?.resume();
1028
1047
  editorRef?.redrawNow();
1029
- return { decision: v === 'y' ? 'allow' : 'deny' };
1048
+ if (v === 'a') {
1049
+ // Remember a command-scoped allow: a live session rule (wins next ask; glob has no `*`
1050
+ // → exact-command match) + persist to .agent/permissions.json for future sessions.
1051
+ const cmd = typeof call.args?.command === 'string' ? call.args.command : null;
1052
+ work.permissions?.options.rules.unshift(cmd ? { tool: call.name, pathGlob: cmd, decision: 'allow' } : { tool: call.name, decision: 'allow' });
1053
+ persistRule(cwd, 'allow', cmd ? `${call.name}(${cmd})` : call.name);
1054
+ }
1055
+ return { decision: v === 'y' || v === 'a' ? 'allow' : 'deny' };
1030
1056
  }
1031
1057
  // NB: perm asks are keyed perm-N (PermissionPolicy.ask carries no task identity), so a
1032
1058
  // cancelled task can't clean its parked perm question — bounded by askTimeoutMs → deny.
@@ -1070,6 +1096,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1070
1096
  voiceIO.speakDelta(e.message);
1071
1097
  editorRef?.suspend(); // no-op when already suspended
1072
1098
  }
1099
+ if (e.kind === 'hold_filler' && voiceIO) {
1100
+ voiceIO.speakFiller(e.message);
1101
+ return;
1102
+ }
1073
1103
  if (e.kind === 'revoice_done') { // a re-voice turn ended outside runTurn's flush — drain the markdown tail now
1074
1104
  base.flushText();
1075
1105
  process.stdout.write('\n');
@@ -1108,7 +1138,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1108
1138
  // Conversational undo: the voice can roll back per-task checkpoint frames ("undo that").
1109
1139
  const rewindFilesTool: AgentTool = {
1110
1140
  name: 'RewindFiles',
1111
- description: 'Undo file changes made by delegated tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
1141
+ description: 'Undo file changes made by Act/Think tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
1112
1142
  parameters: { type: 'object', properties: { steps: { type: 'number', description: 'how many task checkpoints to undo (default 1)' } } },
1113
1143
  run: async ({ steps }) => {
1114
1144
  if (!checkpoints.size) return 'No file checkpoints to rewind yet.';
@@ -1124,9 +1154,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1124
1154
  fs: agent.options.fs,
1125
1155
  memoryDir: agent.options.memoryDir,
1126
1156
  memoryUserDir: agent.options.memoryUserDir,
1127
- ...((args.voiceModel ?? cfg.voiceModel) ? { voiceModel: resolveModelOrNewest((args.voiceModel ?? cfg.voiceModel)!) } : {}),
1128
- workerModel: agent.options.model,
1129
- workerOptions,
1157
+ ...((args.voiceModel ?? cfg.reflexModel) ? { reflexModel: resolveModelOrNewest((args.voiceModel ?? cfg.reflexModel)!) } : {}),
1158
+ actModel: agent.options.model,
1159
+ actOptions: workerOptions,
1160
+ ...((args.thinkModel ?? cfg.thinkModel) !== undefined ? { thinkModel: (args.thinkModel ?? cfg.thinkModel) === false ? false : resolveModelOrNewest(String(args.thinkModel ?? cfg.thinkModel)) } : {}),
1130
1161
  host,
1131
1162
  ...(args.voice ? { voiceStyle: 'conversational' as const, progressUpdates: true, askRelay: true } : {}), // voice: progress asides + worker questions relayed through the conversation
1132
1163
  // Per-TASK checkpoint frames (the natural undo unit in duplex = one delegation): opened BEFORE
@@ -1152,8 +1183,8 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1152
1183
  },
1153
1184
  },
1154
1185
  // The voice runs on the REAL fs (it has no fs tools — harmless) so @mentions, !cmd and #note
1155
- // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Delegate …).
1156
- voiceOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
1186
+ // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Act …).
1187
+ reflexOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
1157
1188
  });
1158
1189
  }
1159
1190
  const face: Agent = dx ? dx.voice : agent; // the transcript-owning agent the REPL drives
@@ -1197,7 +1228,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1197
1228
  return next;
1198
1229
  };
1199
1230
  // Model switching targets the WORKER in duplex (the voice model is a --voice-model startup choice).
1200
- const setModel = (m: string) => { work.model = m; if (dx) dx.options.workerModel = m; persistModel(cwd, m); err(dim(' model → ' + m + '\n')); };
1231
+ const setModel = (m: string) => { work.model = m; if (dx) dx.options.actModel = m; persistModel(cwd, m); err(dim(' model → ' + m + '\n')); };
1201
1232
  // Tool mutations (/mcp add|remove|login) — duplex workers are constructed per spawn from work.tools.
1202
1233
  const addWorkTools = (ts: AgentTool[]) => { if (duplex) work.tools = [...(work.tools ?? []), ...ts]; else agent.addTools(ts); };
1203
1234
  const removeWorkTools = (names: string[]) => { if (duplex) work.tools = (work.tools ?? []).filter((t) => !names.includes(t.name)); else agent.removeTools(names); };
@@ -1446,7 +1477,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1446
1477
  desc: 'show CLI version + runtime',
1447
1478
  run: () => {
1448
1479
  const rt = (process.versions as any).bun ? `bun ${(process.versions as any).bun}` : `node ${process.versions.node}`;
1449
- err(` ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model} · ${rt}`)}\n`);
1480
+ err(` ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model} · ${rt}`)}\n`);
1450
1481
  },
1451
1482
  },
1452
1483
  tools: {
@@ -1472,7 +1503,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1472
1503
  const mode = args.vfs ? 'sandbox (VFS — disk untouched)' : args.boddb ? `boddb (database workspace at ${args.boddb} — disk untouched)` : args.shell ? 'disk + real /bin/sh' : 'disk (full real FS, like Claude Code)';
1473
1504
  const pol = work.permissions;
1474
1505
  const perm = !pol ? 'allow all (unattended)' : `${pol.options.rules.length} rule(s), default ${pol.options.default}`;
1475
- const model = duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model;
1506
+ const model = duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model;
1476
1507
  err(formatStatus({ model, cwd, mode, tools: (duplex ? work.tools ?? [] : agent.options.tools).map((t) => t.name), permissions: perm, turns: session.meta.turns, tokens: session.meta.tokens ?? 0, sessionId: session.meta.id, estimated: session.meta.costEstimated ?? false }));
1477
1508
  if (duplex && dx!.tasks.size) err(dim(` tasks: ${[...dx!.tasks.values()].map((t) => `${t.id}:${t.status}`).join(' ')}\n`));
1478
1509
  },
@@ -1523,7 +1554,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1523
1554
  if (a[0]) { setModel(a[0]); return; }
1524
1555
  const picked = await pickModel(work.model);
1525
1556
  if (picked) setModel(picked);
1526
- else err(dim(' ' + (duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model) + '\n'));
1557
+ else err(dim(' ' + (duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model) + '\n'));
1527
1558
  },
1528
1559
  },
1529
1560
  ...(duplex ? { workers: {
@@ -1533,18 +1564,52 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1533
1564
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
1534
1565
  },
1535
1566
  }, 'voice-model': {
1536
- desc: 'switch the duplex voice (fast) model — /voice-model <id>, or alone for a picker',
1567
+ desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
1537
1568
  run: async (a: string[]) => {
1538
1569
  const apply = (id: string) => {
1539
1570
  const m = resolveModelOrNewest(id);
1540
- dx!.options.voiceModel = m;
1541
- dx!.voice.options.model = m; // live agent — next voice turn uses it (transcript untouched)
1542
- err(green(` ✓ voice model → ${m}\n`));
1571
+ dx!.options.reflexModel = m;
1572
+ dx!.voice.options.model = m;
1573
+ err(green(` ✓ reflex model → ${m}\n`));
1543
1574
  };
1544
1575
  if (a[0]) { apply(a[0]); return; }
1545
- const picked = await pickModel(dx!.options.voiceModel);
1576
+ const picked = await pickModel(dx!.options.reflexModel);
1546
1577
  if (picked) apply(picked);
1547
- else err(dim(` voice ${dx!.options.voiceModel}\n`));
1578
+ else err(dim(` reflex ${dx!.options.reflexModel}\n`));
1579
+ },
1580
+ }, 'think-model': {
1581
+ desc: 'switch the think (premium) model, or /think-model off to disable',
1582
+ run: async (a: string[]) => {
1583
+ if (a[0] === 'off' || a[0] === 'false') {
1584
+ dx!.setThinkModel(false); // live: removes the Think tool from the voice agent
1585
+ err(green(` ✓ think tier disabled\n`));
1586
+ return;
1587
+ }
1588
+ const apply = (id: string) => {
1589
+ const m = resolveModelOrNewest(id);
1590
+ dx!.setThinkModel(m); // live: adds the Think tool if it was disabled
1591
+ err(green(` ✓ think model → ${m}\n`));
1592
+ };
1593
+ if (a[0]) { apply(a[0]); return; }
1594
+ const current = dx!.options.thinkModel === false ? undefined : dx!.options.thinkModel;
1595
+ const picked = await pickModel(current ?? 'anthropic/claude-opus-4-6');
1596
+ if (picked) apply(picked);
1597
+ else err(dim(` think ${dx!.options.thinkModel === false ? 'off' : dx!.options.thinkModel}\n`));
1598
+ },
1599
+ }, act: {
1600
+ desc: 'spawn a standard worker — /act <brief>',
1601
+ run: async (a: string[]) => {
1602
+ if (!a.length) { err(dim(' usage: /act <what to do>\n')); return; }
1603
+ const id = await dx!.dispatch(a.join(' '), 'act');
1604
+ err(dim(` → task ${id} started\n`));
1605
+ },
1606
+ }, think: {
1607
+ desc: 'spawn a deep-reasoning worker — /think <question>',
1608
+ run: async (a: string[]) => {
1609
+ if (!a.length) { err(dim(' usage: /think <what to reason about>\n')); return; }
1610
+ const off = dx!.options.thinkModel === false; // dispatch silently downgrades — tell the user
1611
+ const id = await dx!.dispatch(a.join(' '), 'think');
1612
+ err(dim(` → task ${id} ${off ? '(think tier off — running as act)' : '(think)'} started\n`));
1548
1613
  },
1549
1614
  } } : {}),
1550
1615
  reasoning: {
@@ -1813,7 +1878,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1813
1878
 
1814
1879
  err(bold('agent.libx.js') + cyan(' v' + VERSION) + dim(` — ${work.model} · ${cwd}\n`));
1815
1880
  err(dim('Type a task, or /help. Type / or @ for live suggestions (↑/↓ ⏎). Esc cancels/clears; double-Esc jumps back; Ctrl-D exits.\n'));
1816
- if (dx) err(dim(`◑ duplex — voice: ${dx.options.voiceModel} · worker: ${work.model} (real work runs in background tasks, re-voiced when done)\n`));
1881
+ if (dx) err(dim(`◑ duplex — reflex: ${dx.options.reflexModel} · act: ${work.model}${dx.options.thinkModel !== false ? ` · think: ${dx.options.thinkModel}` : ''} (real work runs in background tasks, re-voiced when done)\n`));
1817
1882
  // Live suggestions: file/dir entries from the real cwd; command/skill descriptions for the menu.
1818
1883
  const listDir: DirLister = (absDir) => {
1819
1884
  try {
@@ -2067,7 +2132,28 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2067
2132
  err(dim(` … cancelled ${running.length} background task(s)\n`));
2068
2133
  } else if (running.length) {
2069
2134
  err(dim(` … waiting for ${running.length} background task(s) (Ctrl-C to force quit)\n`));
2070
- await dx.idle();
2135
+ // stdin is still in raw mode here, so Ctrl-C arrives as a 0x03 byte (no SIGINT).
2136
+ // Race the drain against a raw Ctrl-C: on press, abort all workers and bail.
2137
+ let forced = false;
2138
+ let onCtrlC = () => {};
2139
+ const onByte = (b: Buffer) => {
2140
+ if (!b.includes(0x03)) return; // Ctrl-C
2141
+ forced = true;
2142
+ for (const t of running) { t.status = 'cancelled'; t.controller.abort(); }
2143
+ err(dim(`\n … force-quit — cancelled ${running.length} background task(s)\n`));
2144
+ onCtrlC();
2145
+ };
2146
+ process.stdin.on('data', onByte);
2147
+ await Promise.race([dx.idle(), new Promise<void>((res) => { onCtrlC = res; })]);
2148
+ process.stdin.off('data', onByte);
2149
+ if (forced) {
2150
+ // User force-quit: tear down and hard-exit — don't trust the event loop to drain
2151
+ // (voice children / sockets / MCP handles can keep the process alive otherwise).
2152
+ voiceIO?.stop();
2153
+ releaseStdin();
2154
+ await closeMcp(mounted);
2155
+ process.exit(130);
2156
+ }
2071
2157
  (face.options.host as { flushText?: () => void } | undefined)?.flushText?.();
2072
2158
  duplexPersist();
2073
2159
  }
@@ -183,11 +183,14 @@ interface RunResult {
183
183
  /** Why the loop ended. The middle group are automatic kill-switches (budget/abuse guards). */
184
184
  finishReason: 'stop' | 'max_steps' | 'budget' | 'timeout' | 'loop' | 'max_tool_calls' | 'aborted' | 'error';
185
185
  messages: Message[];
186
- /** Accumulated token usage across all turns (non-stream path). */
186
+ /** Accumulated token usage across all turns (non-stream path). With prompt caching,
187
+ * promptTokens includes cached reads/writes; the cache splits ride along for exact pricing. */
187
188
  usage?: {
188
189
  promptTokens: number;
189
190
  completionTokens: number;
190
191
  totalTokens: number;
192
+ cacheCreationTokens?: number;
193
+ cacheReadTokens?: number;
191
194
  };
192
195
  /** True if ANY turn's usage was estimated (provider gave none) rather than exact — lets the UI mark cost `~`. */
193
196
  usageEstimated?: boolean;
@@ -275,6 +278,10 @@ declare class AgentOptions {
275
278
  };
276
279
  /** Provider-specific options forwarded to ai.chat() (e.g. cursor mcpServers, cwd). */
277
280
  providerOptions?: Record<string, unknown>;
281
+ /** Prompt caching (providers that support it, e.g. Anthropic): cache tools/system/conversation
282
+ * prefix across the loop's steps — reads cost 0.1x, writes 1.25x. A multi-step agent loop
283
+ * re-sends its whole prefix every step, so this is a large net cost cut. Default on. */
284
+ promptCache: boolean;
278
285
  /** Tool selection mode: 'auto' = model decides (needed for Groq); undefined = provider default. */
279
286
  toolChoice?: 'auto' | 'required' | 'none';
280
287
  /** Extended-thinking / reasoning effort, normalized across providers (anthropic, openai).
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { h as RunResult, R as ReasoningEffort } from './Agent-B_xvSHlG.js';
2
+ import { h as RunResult, R as ReasoningEffort } from './Agent-Di1u5nH0.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
5
5
 
@@ -76,6 +76,7 @@ interface Args {
76
76
  version: boolean;
77
77
  duplex: boolean;
78
78
  voiceModel?: string;
79
+ thinkModel?: string | false;
79
80
  voice: boolean;
80
81
  cont: boolean;
81
82
  resume?: string;
@@ -106,11 +107,13 @@ declare function exportMarkdown(meta: {
106
107
  costUsd?: number;
107
108
  costEstimated?: boolean;
108
109
  }, messages: Message[]): string;
109
- /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
110
+ /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
111
+ * Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
112
+ * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
110
113
  declare function costOf(pricing: {
111
114
  inputCostPer1K: number;
112
115
  outputCostPer1K: number;
113
- } | undefined, promptTokens?: number, completionTokens?: number): number;
116
+ } | undefined, promptTokens?: number, completionTokens?: number, cacheCreationTokens?: number, cacheReadTokens?: number): number;
114
117
  /** Format a USD amount: 2 decimals at $1+, 4 below (agent turns are sub-cent). */
115
118
  declare function fmtUsd(n: number): string;
116
119
  /** ~4 chars/token estimate over a transcript (matches the Agent's context-budget heuristic). */
@@ -177,6 +180,8 @@ declare function jsonResult(res: RunResult, session: SessionData): {
177
180
  promptTokens: number;
178
181
  completionTokens: number;
179
182
  totalTokens: number;
183
+ cacheCreationTokens?: number;
184
+ cacheReadTokens?: number;
180
185
  } | undefined;
181
186
  sessionId: string;
182
187
  };