npm - agent.libx.js - Versions diffs - 0.93.8 → 0.93.11 - Mend

agent.libx.js 0.93.8 → 0.93.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +1 -1
package/cli/cli.ts +117 -31
package/dist/{Agent-B_xvSHlG.d.ts → Agent-Di1u5nH0.d.ts} +8 -1
package/dist/cli.d.ts +8 -3
package/dist/cli.js +344 -76
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +61 -21
package/dist/index.js +227 -52
package/dist/index.js.map +1 -1
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -54,7 +54,7 @@ Beyond file tools, the runtime ships the higher-altitude pieces too — each an
 - **Subagents** (`subagents`; **typed agents** via `agentsDir` — `<dir>/<name>.md` defines a persona + model + scoped tools, selected with the `Task` `agentType`), **hooks** (`hooks`: preToolUse/postToolUse/onStop — block or audit any tool call), **slash-commands** (`commandsDir`), **TodoWrite**, **MCP** (`mcpToolsToAgentTools`).
 - **Streaming** (`stream: true` → `text_delta` via `HostBridge.notify`) and **context compaction** (`compaction: { maxMessages }` → edge-safe summarize-and-boundary). Defaults preserve the original non-stream, drop-oldest behavior.
 - **Multi-turn + project context** — `Agent.send()` continues a conversation across turns (vs `run()`, which starts fresh); **project instructions** (`instructionFiles`: `AGENTS.md`/`CLAUDE.md` at the FS root) inject into the system prompt.
-- **DuplexAgent** (`src/duplex.ts`) — voice-optimized dual-model engine: a fast voice agent streams instant single-voice replies and escalates real work via a `Delegate` tool to detached slow workers; results are pushed back and re-voiced by the fast persona (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
+- **DuplexAgent** (`src/duplex.ts`) — voice-optimized three-tier engine (reflex/act/think): a fast reflex agent streams instant replies and self-selects escalation — `Act` for standard tool work (Sonnet-class), `Think` for deep reasoning (Opus-class, configurable, default on). Results are pushed back and re-voiced by the reflex (turn mutex, coalesced completions, `TaskStatus`/`CancelTask`). See [`mind/10`](./mind/10-duplex.md).
 - **Budget kill-switches** — always-on per-run guards (`maxTokens`/`timeoutMs`/`maxRepeats`/`maxToolCalls`/`signal` → `finishReason` `budget`/`timeout`/`loop`/`max_tool_calls`/`aborted`) protect the API spend against runaway loops. The *enforceable* billing cap is server-side in the web key-proxy: a VFS-backed budget config (`/.agent/budget.json`, USD-metered, hot-reloaded, $100/wk default) a browser client can't bypass. See [`web/`](./web) and [`mind/06`](./mind/06-agent-features.md).
 ## The `agentx` CLI

package/cli/cli.ts CHANGED Viewed

@@ -92,7 +92,7 @@ interface Args {
   task?: string; model?: string; cwd?: string; stream: boolean; plan: boolean; ask: boolean; yes: boolean;
   vfs: boolean; shell: boolean | undefined; boddb?: string; seed: boolean;
   subagents: boolean; maxSteps?: number; maxTokens?: number; timeoutMs?: number; reasoning?: ReasoningEffort; help: boolean; version: boolean;
-  duplex: boolean; voiceModel?: string; voice: boolean;
+  duplex: boolean; voiceModel?: string; thinkModel?: string | false; voice: boolean;
   cont: boolean; resume?: string; sessionId?: string; fork?: boolean; outputFormat: 'text' | 'json' | 'stream-json';
   allowedTools?: string[]; disallowedTools?: string[]; appendSystemPrompt?: string; addDirs?: string[];
   print?: boolean; debug?: boolean;
@@ -149,6 +149,8 @@ export function parseArgs(argv: string[]): Args {
     else if (x === '--duplex') a.duplex = true;
     else if (x === '--conversational' || x === '--convo' || x === '--voice') { a.voice = true; a.duplex = true; } // duplex + human conversational register (--convo/--voice = aliases)
     else if (x === '--voice-model') a.voiceModel = val(++i, x);
+    else if (x === '--think-model') a.thinkModel = val(++i, x);
+    else if (x === '--no-think') a.thinkModel = false;
     else if (x === '--allowedTools' || x === '--allowed-tools') a.allowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
     else if (x === '--disallowedTools' || x === '--disallowed-tools') a.disallowedTools = val(++i, x).split(',').map((s) => s.trim()).filter(Boolean);
     else if (x === '--append-system-prompt') a.appendSystemPrompt = val(++i, x);
@@ -172,6 +174,7 @@ export function parseArgs(argv: string[]): Args {
   if (a.duplex && (a.task || a.print)) throw new Error('--duplex is interactive-only (a conversational mode) — drop the task/-p');
   if (a.duplex && a.plan) throw new Error('--plan is not supported in --duplex (workers are non-interactive; a plan could never be approved)');
   if (a.voiceModel && !a.duplex) throw new Error('--voice-model only applies with --duplex');
+  if (a.thinkModel !== undefined && !a.duplex) throw new Error('--think-model/--no-think only apply with --duplex');
   return a;
 }
@@ -209,6 +212,8 @@ Flags:
                        with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
                        spoken replies out (echo-cancelled; speak over it to interrupt)
   --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
+  --think-model <id>   with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
+  --no-think           with --duplex: disable the Think tier (Act handles everything)
   --add-dir <path>     mount another directory into the workspace (repeatable; disk mode only)
   --subagents          allow the Task tool (spawn child agents)
   --reasoning <e>      extended thinking: off|low|medium|high or a token budget (anthropic/openai)
@@ -238,7 +243,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
 Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
 REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
-REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit
+REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
 REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
 REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
 REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -493,15 +498,24 @@ function printHistory(messages: Message[]): void {
   if (s) err(s);
 }
-/** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
-export function costOf(pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined, promptTokens = 0, completionTokens = 0): number {
+/** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
+ *  Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
+ *  (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
+export function costOf(
+  pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined,
+  promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0,
+): number {
   if (!pricing) return 0;
-  return (promptTokens / 1000) * pricing.inputCostPer1K + (completionTokens / 1000) * pricing.outputCostPer1K;
+  const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
+  return (fresh / 1000) * pricing.inputCostPer1K
+    + (cacheCreationTokens / 1000) * pricing.inputCostPer1K * 1.25
+    + (cacheReadTokens / 1000) * pricing.inputCostPer1K * 0.1
+    + (completionTokens / 1000) * pricing.outputCostPer1K;
 }
 /** Cost of one turn at `model`'s rate (looks up ai.libx.js pricing). */
-function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number }): number {
-  return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0);
+function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number; cacheCreationTokens?: number; cacheReadTokens?: number }): number {
+  return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
 }
 /** Evaluate whether a goal condition has been met, based on recent transcript. */
@@ -994,14 +1008,14 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
   const agent = await makeAgent(args, ai, cfg, mounted.flatMap((m) => m.tools));
   // Non-duplex voice: let the model exit the session when the user says goodbye.
-  // Duplex voice gets ExitSession via voiceOptions.tools (the agent here is the worker template — workers don't need it).
+  // Duplex voice gets ExitSession via reflexOptions.tools (the agent here is the worker template — workers don't need it).
   if (args.voice && !args.duplex) agent.options.tools = [...(agent.options.tools ?? []), exitSessionTool(() => { exitRequested = true; })];
   // ── Duplex mode (`--duplex`): the REPL runs unchanged, but turns go through a fast VOICE agent
   // that answers instantly and delegates real work to background workers (re-voiced when done).
   // `face` = the transcript-owning agent the REPL drives (sessions, footer, Esc-abort, /compact);
   // `work` = the options that mean "the working agent" (/model, /reasoning, /tools, permissions —
-  // in duplex these are the WORKERS' options; workers are constructed fresh per Delegate).
+  // in duplex these are the WORKERS' options; workers are constructed fresh per Act/Think).
   const duplex = args.duplex;
   let dx: DuplexAgent | undefined;
   let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
@@ -1020,13 +1034,25 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
   const duplexAsk = async (call: ToolUse): Promise<{ decision: 'allow' | 'deny' }> => {
     if (args.voice && dx) {
       const hint = summarizeCall(call.name, call.args).slice(0, 80);
-      // 'menu' mode: approve like a normal session — suspend the editor, pop the picker.
-      if ((cfg as any).voiceAskUi === 'menu') {
+      // Default: approve like a normal session — suspend the editor, pop an interactive picker
+      // (Allow once / always / Deny). Set `voiceAskUi: 'relay'` to opt into the spoken park/relay flow.
+      if ((cfg as any).voiceAskUi !== 'relay') {
         editorRef?.suspend();
-        const v = await selectMenu(process.stderr, { title: `? background worker asks to run ${call.name} ${hint}`, items: [{ label: 'Allow', value: 'y' }, { label: 'Deny', value: 'n' }], current: 'n' });
+        const v = await selectMenu(process.stderr, {
+          title: `? background worker asks to run ${call.name} ${hint}`,
+          items: [{ label: 'Allow once', value: 'y' }, { label: 'Allow always', value: 'a' }, { label: 'Deny', value: 'n' }],
+          current: 'y',
+        });
         editorRef?.resume();
         editorRef?.redrawNow();
-        return { decision: v === 'y' ? 'allow' : 'deny' };
+        if (v === 'a') {
+          // Remember a command-scoped allow: a live session rule (wins next ask; glob has no `*`
+          // → exact-command match) + persist to .agent/permissions.json for future sessions.
+          const cmd = typeof call.args?.command === 'string' ? call.args.command : null;
+          work.permissions?.options.rules.unshift(cmd ? { tool: call.name, pathGlob: cmd, decision: 'allow' } : { tool: call.name, decision: 'allow' });
+          persistRule(cwd, 'allow', cmd ? `${call.name}(${cmd})` : call.name);
+        }
+        return { decision: v === 'y' || v === 'a' ? 'allow' : 'deny' };
       }
       // NB: perm asks are keyed perm-N (PermissionPolicy.ask carries no task identity), so a
       // cancelled task can't clean its parked perm question — bounded by askTimeoutMs → deny.
@@ -1070,6 +1096,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
           voiceIO.speakDelta(e.message);
           editorRef?.suspend(); // no-op when already suspended
         }
+        if (e.kind === 'hold_filler' && voiceIO) {
+          voiceIO.speakFiller(e.message);
+          return;
+        }
         if (e.kind === 'revoice_done') { // a re-voice turn ended outside runTurn's flush — drain the markdown tail now
           base.flushText();
           process.stdout.write('\n');
@@ -1108,7 +1138,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
     // Conversational undo: the voice can roll back per-task checkpoint frames ("undo that").
     const rewindFilesTool: AgentTool = {
       name: 'RewindFiles',
-      description: 'Undo file changes made by delegated tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
+      description: 'Undo file changes made by Act/Think tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.',
       parameters: { type: 'object', properties: { steps: { type: 'number', description: 'how many task checkpoints to undo (default 1)' } } },
       run: async ({ steps }) => {
         if (!checkpoints.size) return 'No file checkpoints to rewind yet.';
@@ -1124,9 +1154,10 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
       fs: agent.options.fs,
       memoryDir: agent.options.memoryDir,
       memoryUserDir: agent.options.memoryUserDir,
-      ...((args.voiceModel ?? cfg.voiceModel) ? { voiceModel: resolveModelOrNewest((args.voiceModel ?? cfg.voiceModel)!) } : {}),
-      workerModel: agent.options.model,
-      workerOptions,
+      ...((args.voiceModel ?? cfg.reflexModel) ? { reflexModel: resolveModelOrNewest((args.voiceModel ?? cfg.reflexModel)!) } : {}),
+      actModel: agent.options.model,
+      actOptions: workerOptions,
+      ...((args.thinkModel ?? cfg.thinkModel) !== undefined ? { thinkModel: (args.thinkModel ?? cfg.thinkModel) === false ? false : resolveModelOrNewest(String(args.thinkModel ?? cfg.thinkModel)) } : {}),
       host,
       ...(args.voice ? { voiceStyle: 'conversational' as const, progressUpdates: true, askRelay: true } : {}), // voice: progress asides + worker questions relayed through the conversation
       // Per-TASK checkpoint frames (the natural undo unit in duplex = one delegation): opened BEFORE
@@ -1152,8 +1183,8 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
         },
       },
       // The voice runs on the REAL fs (it has no fs tools — harmless) so @mentions, !cmd and #note
-      // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Delegate …).
-      voiceOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
+      // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Act …).
+      reflexOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => { exitRequested = true; })] },
     });
   }
   const face: Agent = dx ? dx.voice : agent; // the transcript-owning agent the REPL drives
@@ -1197,7 +1228,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
     return next;
   };
   // Model switching targets the WORKER in duplex (the voice model is a --voice-model startup choice).
-  const setModel = (m: string) => { work.model = m; if (dx) dx.options.workerModel = m; persistModel(cwd, m); err(dim('  model → ' + m + '\n')); };
+  const setModel = (m: string) => { work.model = m; if (dx) dx.options.actModel = m; persistModel(cwd, m); err(dim('  model → ' + m + '\n')); };
   // Tool mutations (/mcp add|remove|login) — duplex workers are constructed per spawn from work.tools.
   const addWorkTools = (ts: AgentTool[]) => { if (duplex) work.tools = [...(work.tools ?? []), ...ts]; else agent.addTools(ts); };
   const removeWorkTools = (names: string[]) => { if (duplex) work.tools = (work.tools ?? []).filter((t) => !names.includes(t.name)); else agent.removeTools(names); };
@@ -1446,7 +1477,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
       desc: 'show CLI version + runtime',
       run: () => {
         const rt = (process.versions as any).bun ? `bun ${(process.versions as any).bun}` : `node ${process.versions.node}`;
-        err(`  ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model} · ${rt}`)}\n`);
+        err(`  ${bold('agent.libx.js')} ${cyan('v' + VERSION)}${dim(` · ${duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model} · ${rt}`)}\n`);
       },
     },
     tools: {
@@ -1472,7 +1503,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
         const mode = args.vfs ? 'sandbox (VFS — disk untouched)' : args.boddb ? `boddb (database workspace at ${args.boddb} — disk untouched)` : args.shell ? 'disk + real /bin/sh' : 'disk (full real FS, like Claude Code)';
         const pol = work.permissions;
         const perm = !pol ? 'allow all (unattended)' : `${pol.options.rules.length} rule(s), default ${pol.options.default}`;
-        const model = duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model;
+        const model = duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model;
         err(formatStatus({ model, cwd, mode, tools: (duplex ? work.tools ?? [] : agent.options.tools).map((t) => t.name), permissions: perm, turns: session.meta.turns, tokens: session.meta.tokens ?? 0, sessionId: session.meta.id, estimated: session.meta.costEstimated ?? false }));
         if (duplex && dx!.tasks.size) err(dim(`  tasks: ${[...dx!.tasks.values()].map((t) => `${t.id}:${t.status}`).join(' ')}\n`));
       },
@@ -1523,7 +1554,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
         if (a[0]) { setModel(a[0]); return; }
         const picked = await pickModel(work.model);
         if (picked) setModel(picked);
-        else err(dim('  ' + (duplex ? `voice ${dx!.options.voiceModel} · worker ${work.model}` : work.model) + '\n'));
+        else err(dim('  ' + (duplex ? `reflex ${dx!.options.reflexModel} · act ${work.model}${dx!.options.thinkModel !== false ? ` · think ${dx!.options.thinkModel}` : ''}` : work.model) + '\n'));
       },
     },
     ...(duplex ? { workers: {
@@ -1533,18 +1564,52 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
         err(dim(`  worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
       },
     }, 'voice-model': {
-      desc: 'switch the duplex voice (fast) model — /voice-model <id>, or alone for a picker',
+      desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
       run: async (a: string[]) => {
         const apply = (id: string) => {
           const m = resolveModelOrNewest(id);
-          dx!.options.voiceModel = m;
-          dx!.voice.options.model = m; // live agent — next voice turn uses it (transcript untouched)
-          err(green(`  ✓ voice model → ${m}\n`));
+          dx!.options.reflexModel = m;
+          dx!.voice.options.model = m;
+          err(green(`  ✓ reflex model → ${m}\n`));
         };
         if (a[0]) { apply(a[0]); return; }
-        const picked = await pickModel(dx!.options.voiceModel);
+        const picked = await pickModel(dx!.options.reflexModel);
         if (picked) apply(picked);
-        else err(dim(`  voice ${dx!.options.voiceModel}\n`));
+        else err(dim(`  reflex ${dx!.options.reflexModel}\n`));
+      },
+    }, 'think-model': {
+      desc: 'switch the think (premium) model, or /think-model off to disable',
+      run: async (a: string[]) => {
+        if (a[0] === 'off' || a[0] === 'false') {
+          dx!.setThinkModel(false); // live: removes the Think tool from the voice agent
+          err(green(`  ✓ think tier disabled\n`));
+          return;
+        }
+        const apply = (id: string) => {
+          const m = resolveModelOrNewest(id);
+          dx!.setThinkModel(m); // live: adds the Think tool if it was disabled
+          err(green(`  ✓ think model → ${m}\n`));
+        };
+        if (a[0]) { apply(a[0]); return; }
+        const current = dx!.options.thinkModel === false ? undefined : dx!.options.thinkModel;
+        const picked = await pickModel(current ?? 'anthropic/claude-opus-4-6');
+        if (picked) apply(picked);
+        else err(dim(`  think ${dx!.options.thinkModel === false ? 'off' : dx!.options.thinkModel}\n`));
+      },
+    }, act: {
+      desc: 'spawn a standard worker — /act <brief>',
+      run: async (a: string[]) => {
+        if (!a.length) { err(dim('  usage: /act <what to do>\n')); return; }
+        const id = await dx!.dispatch(a.join(' '), 'act');
+        err(dim(`  → task ${id} started\n`));
+      },
+    }, think: {
+      desc: 'spawn a deep-reasoning worker — /think <question>',
+      run: async (a: string[]) => {
+        if (!a.length) { err(dim('  usage: /think <what to reason about>\n')); return; }
+        const off = dx!.options.thinkModel === false; // dispatch silently downgrades — tell the user
+        const id = await dx!.dispatch(a.join(' '), 'think');
+        err(dim(`  → task ${id} ${off ? '(think tier off — running as act)' : '(think)'} started\n`));
       },
     } } : {}),
     reasoning: {
@@ -1813,7 +1878,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
   err(bold('agent.libx.js') + cyan(' v' + VERSION) + dim(` — ${work.model} · ${cwd}\n`));
   err(dim('Type a task, or /help. Type / or @ for live suggestions (↑/↓ ⏎). Esc cancels/clears; double-Esc jumps back; Ctrl-D exits.\n'));
-  if (dx) err(dim(`◑ duplex — voice: ${dx.options.voiceModel} · worker: ${work.model} (real work runs in background tasks, re-voiced when done)\n`));
+  if (dx) err(dim(`◑ duplex — reflex: ${dx.options.reflexModel} · act: ${work.model}${dx.options.thinkModel !== false ? ` · think: ${dx.options.thinkModel}` : ''} (real work runs in background tasks, re-voiced when done)\n`));
   // Live suggestions: file/dir entries from the real cwd; command/skill descriptions for the menu.
   const listDir: DirLister = (absDir) => {
     try {
@@ -2067,7 +2132,28 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
       err(dim(`  … cancelled ${running.length} background task(s)\n`));
     } else if (running.length) {
       err(dim(`  … waiting for ${running.length} background task(s) (Ctrl-C to force quit)\n`));
-      await dx.idle();
+      // stdin is still in raw mode here, so Ctrl-C arrives as a 0x03 byte (no SIGINT).
+      // Race the drain against a raw Ctrl-C: on press, abort all workers and bail.
+      let forced = false;
+      let onCtrlC = () => {};
+      const onByte = (b: Buffer) => {
+        if (!b.includes(0x03)) return; // Ctrl-C
+        forced = true;
+        for (const t of running) { t.status = 'cancelled'; t.controller.abort(); }
+        err(dim(`\n  … force-quit — cancelled ${running.length} background task(s)\n`));
+        onCtrlC();
+      };
+      process.stdin.on('data', onByte);
+      await Promise.race([dx.idle(), new Promise<void>((res) => { onCtrlC = res; })]);
+      process.stdin.off('data', onByte);
+      if (forced) {
+        // User force-quit: tear down and hard-exit — don't trust the event loop to drain
+        // (voice children / sockets / MCP handles can keep the process alive otherwise).
+        voiceIO?.stop();
+        releaseStdin();
+        await closeMcp(mounted);
+        process.exit(130);
+      }
       (face.options.host as { flushText?: () => void } | undefined)?.flushText?.();
       duplexPersist();
     }

package/dist/{Agent-B_xvSHlG.d.ts → Agent-Di1u5nH0.d.ts} RENAMED Viewed

@@ -183,11 +183,14 @@ interface RunResult {
     /** Why the loop ended. The middle group are automatic kill-switches (budget/abuse guards). */
     finishReason: 'stop' | 'max_steps' | 'budget' | 'timeout' | 'loop' | 'max_tool_calls' | 'aborted' | 'error';
     messages: Message[];
-    /** Accumulated token usage across all turns (non-stream path). */
+    /** Accumulated token usage across all turns (non-stream path). With prompt caching,
+     *  promptTokens includes cached reads/writes; the cache splits ride along for exact pricing. */
     usage?: {
         promptTokens: number;
         completionTokens: number;
         totalTokens: number;
+        cacheCreationTokens?: number;
+        cacheReadTokens?: number;
     };
     /** True if ANY turn's usage was estimated (provider gave none) rather than exact — lets the UI mark cost `~`. */
     usageEstimated?: boolean;
@@ -275,6 +278,10 @@ declare class AgentOptions {
     };
     /** Provider-specific options forwarded to ai.chat() (e.g. cursor mcpServers, cwd). */
     providerOptions?: Record<string, unknown>;
+    /** Prompt caching (providers that support it, e.g. Anthropic): cache tools/system/conversation
+     *  prefix across the loop's steps — reads cost 0.1x, writes 1.25x. A multi-step agent loop
+     *  re-sends its whole prefix every step, so this is a large net cost cut. Default on. */
+    promptCache: boolean;
     /** Tool selection mode: 'auto' = model decides (needed for Groq); undefined = provider default. */
     toolChoice?: 'auto' | 'required' | 'none';
     /** Extended-thinking / reasoning effort, normalized across providers (anthropic, openai).

package/dist/cli.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env bun
-import { h as RunResult, R as ReasoningEffort } from './Agent-B_xvSHlG.js';
+import { h as RunResult, R as ReasoningEffort } from './Agent-Di1u5nH0.js';
 import { IFilesystem } from '@livx.cc/wcli/core';
 import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
@@ -76,6 +76,7 @@ interface Args {
     version: boolean;
     duplex: boolean;
     voiceModel?: string;
+    thinkModel?: string | false;
     voice: boolean;
     cont: boolean;
     resume?: string;
@@ -106,11 +107,13 @@ declare function exportMarkdown(meta: {
     costUsd?: number;
     costEstimated?: boolean;
 }, messages: Message[]): string;
-/** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced. */
+/** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
+ *  Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
+ *  (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
 declare function costOf(pricing: {
     inputCostPer1K: number;
     outputCostPer1K: number;
-} | undefined, promptTokens?: number, completionTokens?: number): number;
+} | undefined, promptTokens?: number, completionTokens?: number, cacheCreationTokens?: number, cacheReadTokens?: number): number;
 /** Format a USD amount: 2 decimals at $1+, 4 below (agent turns are sub-cent). */
 declare function fmtUsd(n: number): string;
 /** ~4 chars/token estimate over a transcript (matches the Agent's context-budget heuristic). */
@@ -177,6 +180,8 @@ declare function jsonResult(res: RunResult, session: SessionData): {
         promptTokens: number;
         completionTokens: number;
         totalTokens: number;
+        cacheCreationTokens?: number;
+        cacheReadTokens?: number;
     } | undefined;
     sessionId: string;
 };