agent.libx.js 0.93.10 → 0.93.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cli.ts CHANGED
@@ -498,24 +498,39 @@ function printHistory(messages: Message[]): void {
498
498
  if (s) err(s);
499
499
  }
500
500
 
501
+ /** Cache-read/write price multipliers over the input rate, by provider (derived from the model
502
+ * prefix). Anthropic: write 1.25x / read 0.1x. OpenAI & Gemini auto-cache (no write surcharge),
503
+ * reads 0.5x / 0.25x. DeepSeek read 0.1x. Unknown → no discount (1x/1x, safe over-estimate). */
504
+ export function cacheMultipliers(model?: string): { read: number; write: number } {
505
+ const p = (model ?? '').split('/')[0];
506
+ switch (p) {
507
+ case 'anthropic': return { read: 0.1, write: 1.25 };
508
+ case 'openai': return { read: 0.5, write: 1 };
509
+ case 'google': return { read: 0.25, write: 1 };
510
+ case 'deepseek': return { read: 0.1, write: 1 };
511
+ default: return { read: 1, write: 1 };
512
+ }
513
+ }
514
+
501
515
  /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
502
- * Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
503
- * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
516
+ * Cache-aware: promptTokens includes cache reads/writes — priced at the provider's real multipliers
517
+ * (via `model`) so cached runs aren't overstated. Omitting `model` falls back to Anthropic's rates. */
504
518
  export function costOf(
505
519
  pricing: { inputCostPer1K: number; outputCostPer1K: number } | undefined,
506
- promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0,
520
+ promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0, model?: string,
507
521
  ): number {
508
522
  if (!pricing) return 0;
523
+ const mult = model ? cacheMultipliers(model) : { read: 0.1, write: 1.25 };
509
524
  const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
510
525
  return (fresh / 1000) * pricing.inputCostPer1K
511
- + (cacheCreationTokens / 1000) * pricing.inputCostPer1K * 1.25
512
- + (cacheReadTokens / 1000) * pricing.inputCostPer1K * 0.1
526
+ + (cacheCreationTokens / 1000) * pricing.inputCostPer1K * mult.write
527
+ + (cacheReadTokens / 1000) * pricing.inputCostPer1K * mult.read
513
528
  + (completionTokens / 1000) * pricing.outputCostPer1K;
514
529
  }
515
530
 
516
531
  /** Cost of one turn at `model`'s rate (looks up ai.libx.js pricing). */
517
532
  function turnCost(model: string, usage?: { promptTokens?: number; completionTokens?: number; cacheCreationTokens?: number; cacheReadTokens?: number }): number {
518
- return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
533
+ return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0, model);
519
534
  }
520
535
 
521
536
  /** Evaluate whether a goal condition has been met, based on recent transcript. */
@@ -1034,13 +1049,25 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1034
1049
  const duplexAsk = async (call: ToolUse): Promise<{ decision: 'allow' | 'deny' }> => {
1035
1050
  if (args.voice && dx) {
1036
1051
  const hint = summarizeCall(call.name, call.args).slice(0, 80);
1037
- // 'menu' mode: approve like a normal session — suspend the editor, pop the picker.
1038
- if ((cfg as any).voiceAskUi === 'menu') {
1052
+ // Default: approve like a normal session — suspend the editor, pop an interactive picker
1053
+ // (Allow once / always / Deny). Set `voiceAskUi: 'relay'` to opt into the spoken park/relay flow.
1054
+ if ((cfg as any).voiceAskUi !== 'relay') {
1039
1055
  editorRef?.suspend();
1040
- const v = await selectMenu(process.stderr, { title: `? background worker asks to run ${call.name} ${hint}`, items: [{ label: 'Allow', value: 'y' }, { label: 'Deny', value: 'n' }], current: 'n' });
1056
+ const v = await selectMenu(process.stderr, {
1057
+ title: `? background worker asks to run ${call.name} ${hint}`,
1058
+ items: [{ label: 'Allow once', value: 'y' }, { label: 'Allow always', value: 'a' }, { label: 'Deny', value: 'n' }],
1059
+ current: 'y',
1060
+ });
1041
1061
  editorRef?.resume();
1042
1062
  editorRef?.redrawNow();
1043
- return { decision: v === 'y' ? 'allow' : 'deny' };
1063
+ if (v === 'a') {
1064
+ // Remember a command-scoped allow: a live session rule (wins next ask; glob has no `*`
1065
+ // → exact-command match) + persist to .agent/permissions.json for future sessions.
1066
+ const cmd = typeof call.args?.command === 'string' ? call.args.command : null;
1067
+ work.permissions?.options.rules.unshift(cmd ? { tool: call.name, pathGlob: cmd, decision: 'allow' } : { tool: call.name, decision: 'allow' });
1068
+ persistRule(cwd, 'allow', cmd ? `${call.name}(${cmd})` : call.name);
1069
+ }
1070
+ return { decision: v === 'y' || v === 'a' ? 'allow' : 'deny' };
1044
1071
  }
1045
1072
  // NB: perm asks are keyed perm-N (PermissionPolicy.ask carries no task identity), so a
1046
1073
  // cancelled task can't clean its parked perm question — bounded by askTimeoutMs → deny.
@@ -2120,7 +2147,28 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2120
2147
  err(dim(` … cancelled ${running.length} background task(s)\n`));
2121
2148
  } else if (running.length) {
2122
2149
  err(dim(` … waiting for ${running.length} background task(s) (Ctrl-C to force quit)\n`));
2123
- await dx.idle();
2150
+ // stdin is still in raw mode here, so Ctrl-C arrives as a 0x03 byte (no SIGINT).
2151
+ // Race the drain against a raw Ctrl-C: on press, abort all workers and bail.
2152
+ let forced = false;
2153
+ let onCtrlC = () => {};
2154
+ const onByte = (b: Buffer) => {
2155
+ if (!b.includes(0x03)) return; // Ctrl-C
2156
+ forced = true;
2157
+ for (const t of running) { t.status = 'cancelled'; t.controller.abort(); }
2158
+ err(dim(`\n … force-quit — cancelled ${running.length} background task(s)\n`));
2159
+ onCtrlC();
2160
+ };
2161
+ process.stdin.on('data', onByte);
2162
+ await Promise.race([dx.idle(), new Promise<void>((res) => { onCtrlC = res; })]);
2163
+ process.stdin.off('data', onByte);
2164
+ if (forced) {
2165
+ // User force-quit: tear down and hard-exit — don't trust the event loop to drain
2166
+ // (voice children / sockets / MCP handles can keep the process alive otherwise).
2167
+ voiceIO?.stop();
2168
+ releaseStdin();
2169
+ await closeMcp(mounted);
2170
+ process.exit(130);
2171
+ }
2124
2172
  (face.options.host as { flushText?: () => void } | undefined)?.flushText?.();
2125
2173
  duplexPersist();
2126
2174
  }
@@ -243,6 +243,9 @@ declare class AgentOptions {
243
243
  instructionFiles: boolean | string[];
244
244
  /** Host interaction channel (human-in-the-loop). If set: adds the `AskUserQuestion` tool. */
245
245
  host?: HostBridge;
246
+ /** Add the `AskUserQuestion` tool when a host is present (default true). Set false for an agent that
247
+ * must never block a turn on a structured question — e.g. a voice reflex that confirms inline. */
248
+ askUserQuestion: boolean;
246
249
  /** Deterministic interception points around tool execution (pre/post/stop). */
247
250
  hooks?: Hooks;
248
251
  /** If true: add the `Task` tool so the agent can spawn depth-limited child agents over the VFS. */
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { h as RunResult, R as ReasoningEffort } from './Agent-Di1u5nH0.js';
2
+ import { h as RunResult, R as ReasoningEffort } from './Agent-kWrJvtZM.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
5
5
 
@@ -107,13 +107,20 @@ declare function exportMarkdown(meta: {
107
107
  costUsd?: number;
108
108
  costEstimated?: boolean;
109
109
  }, messages: Message[]): string;
110
+ /** Cache-read/write price multipliers over the input rate, by provider (derived from the model
111
+ * prefix). Anthropic: write 1.25x / read 0.1x. OpenAI & Gemini auto-cache (no write surcharge),
112
+ * reads 0.5x / 0.25x. DeepSeek read 0.1x. Unknown → no discount (1x/1x, safe over-estimate). */
113
+ declare function cacheMultipliers(model?: string): {
114
+ read: number;
115
+ write: number;
116
+ };
110
117
  /** USD cost from a model's per-1K pricing (ai.libx.js ModelPricing) + token usage. 0 if unpriced.
111
- * Cache-aware: promptTokens includes cache reads/writes — priced at their real multipliers
112
- * (writes 1.25x, reads 0.1x input rate) so cached runs aren't overstated. */
118
+ * Cache-aware: promptTokens includes cache reads/writes — priced at the provider's real multipliers
119
+ * (via `model`) so cached runs aren't overstated. Omitting `model` falls back to Anthropic's rates. */
113
120
  declare function costOf(pricing: {
114
121
  inputCostPer1K: number;
115
122
  outputCostPer1K: number;
116
- } | undefined, promptTokens?: number, completionTokens?: number, cacheCreationTokens?: number, cacheReadTokens?: number): number;
123
+ } | undefined, promptTokens?: number, completionTokens?: number, cacheCreationTokens?: number, cacheReadTokens?: number, model?: string): number;
117
124
  /** Format a USD amount: 2 decimals at $1+, 4 below (agent turns are sub-cent). */
118
125
  declare function fmtUsd(n: number): string;
119
126
  /** ~4 chars/token estimate over a transcript (matches the Agent's context-budget heuristic). */
@@ -192,4 +199,4 @@ declare function jsonResult(res: RunResult, session: SessionData): {
192
199
  */
193
200
  declare function readMultiline(readLine: (continuing: boolean) => Promise<string | null>): Promise<string | null>;
194
201
 
195
- export { type PermMode, appendMemoryNote, costOf, estimateTranscriptTokens, expandMentions, exportMarkdown, fmtUsd, formatHistory, formatStatus, jsonResult, parseArgs, pastePathClassifier, readImageParts, readMultiline, resolvePermMode, runShellLine };
202
+ export { type PermMode, appendMemoryNote, cacheMultipliers, costOf, estimateTranscriptTokens, expandMentions, exportMarkdown, fmtUsd, formatHistory, formatStatus, jsonResult, parseArgs, pastePathClassifier, readImageParts, readMultiline, resolvePermMode, runShellLine };
package/dist/cli.js CHANGED
@@ -2661,6 +2661,9 @@ var AgentOptions = class {
2661
2661
  instructionFiles = true;
2662
2662
  /** Host interaction channel (human-in-the-loop). If set: adds the `AskUserQuestion` tool. */
2663
2663
  host;
2664
+ /** Add the `AskUserQuestion` tool when a host is present (default true). Set false for an agent that
2665
+ * must never block a turn on a structured question — e.g. a voice reflex that confirms inline. */
2666
+ askUserQuestion = true;
2664
2667
  /** Deterministic interception points around tool execution (pre/post/stop). */
2665
2668
  hooks;
2666
2669
  /** If true: add the `Task` tool so the agent can spawn depth-limited child agents over the VFS. */
@@ -2797,7 +2800,7 @@ var Agent = class _Agent {
2797
2800
  if (catalog) systemPrompt += "\n\n" + catalog;
2798
2801
  if (tool) tools = [...tools, tool];
2799
2802
  }
2800
- if (o.host) tools = [...tools, askUserQuestionTool];
2803
+ if (o.host && o.askUserQuestion) tools = [...tools, askUserQuestionTool];
2801
2804
  if (o.subagents) {
2802
2805
  let agents;
2803
2806
  if (o.agentsDir) {
@@ -3520,11 +3523,16 @@ var DuplexAgentOptions = class {
3520
3523
  reflexModel = "groq/openai/gpt-oss-20b";
3521
3524
  actModel = "anthropic/claude-sonnet-4-6";
3522
3525
  /** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
3523
- thinkModel = "anthropic/claude-opus-4-6";
3526
+ thinkModel = "anthropic/claude-opus-4-8";
3524
3527
  /** Escape hatches merged over the derived per-agent options. */
3525
3528
  reflexOptions;
3526
3529
  actOptions;
3527
3530
  thinkOptions;
3531
+ /** Fresh-context check on each successful Act task: a NEW agent (no self-confirmation bias) re-reads
3532
+ * the file state against the brief and fixes any gap before the result is re-voiced. Bounded to one
3533
+ * pass; ~2x Act cost so default OFF. The self-verify FOOTER (same context) was measured ineffective —
3534
+ * this is the structural fix (see mind/10). Think tasks are pure reasoning, never checked. */
3535
+ verifyActTasks = false;
3528
3536
  /** Receives the voice text_delta stream + task lifecycle events. */
3529
3537
  host;
3530
3538
  /** How many recent transcript messages are rendered into a worker's brief. */
@@ -3555,7 +3563,7 @@ var DuplexAgentOptions = class {
3555
3563
  /** User-scope memory dir for global facts (type=user/feedback). Forwarded to Remember's routing. */
3556
3564
  memoryUserDir;
3557
3565
  };
3558
- var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A standard background worker with FULL access to the user\'s environment (files, shell, web). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nYou can find out or do ANYTHING by calling `Act` with a clear, self-contained brief \u2014 so NEVER tell the user you can\'t see, access, or do something. Act and find out. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Act or Think, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight \u2014 check `TaskStatus` first. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3566
+ var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A standard background worker with FULL access to the user\'s environment (files, shell, web). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nYou can find out or do ANYTHING by calling `Act` with a clear, self-contained brief \u2014 so NEVER tell the user you can\'t see, access, or do something. Act and find out. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Act or Think, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3559
3567
  var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
3560
3568
  var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
3561
3569
  var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
@@ -3597,6 +3605,10 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3597
3605
  model: o.reflexModel,
3598
3606
  stream: true,
3599
3607
  host: o.host,
3608
+ // The reflex IS the conversational channel — it confirms ambiguity inline ("did you mean…?"),
3609
+ // never via the blocking AskUserQuestion tool (Agent auto-adds it whenever a host is set). Left in,
3610
+ // it stalls a voice turn until the kill-switch. Worker questions still reach the user via parkQuestion.
3611
+ askUserQuestion: false,
3600
3612
  systemPrompt: prompt,
3601
3613
  instructionFiles: false,
3602
3614
  maxSteps: 8,
@@ -3696,7 +3708,7 @@ ${recent}` : brief) + verify;
3696
3708
  return a || "(no answer from the user \u2014 use your best judgment and note the assumption)";
3697
3709
  };
3698
3710
  const workerHost = o.askRelay ? { ask: relayAsk } : o.host?.ask ? { ask: (q2) => o.host.ask(q2) } : void 0;
3699
- const worker = new Agent({
3711
+ const agentOpts = {
3700
3712
  ai: o.ai,
3701
3713
  fs: o.fs,
3702
3714
  model: tierModel,
@@ -3705,10 +3717,46 @@ ${recent}` : brief) + verify;
3705
3717
  ...workerHost ? { host: workerHost } : {},
3706
3718
  ...hooks ? { hooks } : {},
3707
3719
  signal: controller.signal
3708
- });
3709
- const promise = worker.run(briefText).then((res) => this.onWorkerSettled(id, res)).catch((err2) => this.onWorkerFailed(id, err2));
3720
+ // shared with the checker so a cancel tears down both
3721
+ };
3722
+ const promise = new Agent(agentOpts).run(briefText).then((res) => this.maybeVerify(id, briefText, res, tier, agentOpts)).then((res) => this.onWorkerSettled(id, res)).catch((err2) => this.onWorkerFailed(id, err2));
3710
3723
  this.tasks.set(id, { id, label, status: "running", controller, promise });
3711
3724
  }
3725
+ /** Fresh-context check of a successful Act task: a NEW agent (same model/fs/tools, but NO shared
3726
+ * conversation context) re-reads the file state against the brief and fixes any gap. The fix lands
3727
+ * on the shared fs automatically (workers write fs directly, no overlay), so grading sees the
3728
+ * corrected state. Bounded to ONE pass. Off unless `verifyActTasks`; never runs for think/failed/
3729
+ * cancelled tasks. Usage is merged so /cost reflects the real (worker + checker) spend. */
3730
+ async maybeVerify(id, briefText, res, tier, agentOpts) {
3731
+ if (!this.options.verifyActTasks || tier !== "act" || res.finishReason !== "stop") return res;
3732
+ if (this.tasks.get(id)?.status === "cancelled") return res;
3733
+ const checkBrief = `${briefText}
3734
+
3735
+ ## VERIFY MODE
3736
+ Another agent just implemented the above. Independently check the CURRENT state of the files against EVERY requirement. Fix any gap you find. If everything is already correct, make NO changes \u2014 do not refactor or improve \u2014 and report "verified".`;
3737
+ this.notify("task_verify", `task ${id}: verifying`, { id });
3738
+ const cres = await new Agent(agentOpts).run(checkBrief);
3739
+ if (cres.finishReason !== "stop") {
3740
+ log6.warn(`task ${id}: verify inconclusive (${cres.finishReason})`);
3741
+ this.notify("task_verify", `task ${id}: verify inconclusive (${cres.finishReason})`, { id, finishReason: cres.finishReason });
3742
+ }
3743
+ const sum = (a = 0, b = 0) => a + b;
3744
+ return {
3745
+ ...res,
3746
+ steps: res.steps + cres.steps,
3747
+ // Merge the checker's messages so downstream tool-call/step accounting includes BOTH agents
3748
+ // (else a verified task's toolCalls would undercount vs its steps/usage).
3749
+ messages: [...res.messages, ...cres.messages],
3750
+ usageEstimated: res.usageEstimated || cres.usageEstimated,
3751
+ usage: res.usage && cres.usage ? {
3752
+ promptTokens: sum(res.usage.promptTokens, cres.usage.promptTokens),
3753
+ completionTokens: sum(res.usage.completionTokens, cres.usage.completionTokens),
3754
+ totalTokens: sum(res.usage.totalTokens, cres.usage.totalTokens),
3755
+ cacheCreationTokens: sum(res.usage.cacheCreationTokens, cres.usage.cacheCreationTokens),
3756
+ cacheReadTokens: sum(res.usage.cacheReadTokens, cres.usage.cacheReadTokens)
3757
+ } : res.usage ?? cres.usage
3758
+ };
3759
+ }
3712
3760
  /** Throttled per-task progress: worker tool calls → at most one progress re-voice per interval.
3713
3761
  * Two sources, one throttle: completed steps (post) and a heartbeat for a SINGLE long tool call
3714
3762
  * (pre records the in-flight call; a self-cleaning timer narrates "still inside Bash — 70s").
@@ -4125,7 +4173,7 @@ var VoiceEngine = class _VoiceEngine {
4125
4173
  this.stt.onLevel = (rms) => this.handleLevel(rms);
4126
4174
  await Promise.all([this.tts.connect(), this.stt.start()]);
4127
4175
  this.setState("listening");
4128
- log7.info(`voice I/O up (${this.stt.usingAec ? "AEC" : "heuristic echo"} capture)`);
4176
+ log7.debug(`voice I/O up (${this.stt.usingAec ? "AEC" : "heuristic echo"} capture)`);
4129
4177
  }
4130
4178
  get usingAec() {
4131
4179
  return this.stt.usingAec;
@@ -4168,7 +4216,7 @@ var VoiceEngine = class _VoiceEngine {
4168
4216
  this.spokeDeltas = true;
4169
4217
  this.ackAt = now();
4170
4218
  }
4171
- this.turnStartAt = now();
4219
+ if (!this.turnStartAt) this.turnStartAt = now();
4172
4220
  this.setState("thinking");
4173
4221
  }
4174
4222
  speakDelta(text) {
@@ -4177,7 +4225,7 @@ var VoiceEngine = class _VoiceEngine {
4177
4225
  this.reply += text;
4178
4226
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4179
4227
  this.tts.speak(text, true);
4180
- if (!this.spokeDeltas && this.turnStartAt) log7.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4228
+ if (!this.spokeDeltas && this.turnStartAt) log7.debug(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4181
4229
  this.spokeDeltas = true;
4182
4230
  this.setState("speaking");
4183
4231
  }
@@ -4198,7 +4246,7 @@ var VoiceEngine = class _VoiceEngine {
4198
4246
  }
4199
4247
  this.drainTimer = null;
4200
4248
  this.speaking = false;
4201
- if (this.turnStartAt) log7.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4249
+ if (this.turnStartAt) log7.debug(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4202
4250
  this.echoUntil = now() + 2500;
4203
4251
  if (!this.usingAec) this.stt.reset();
4204
4252
  this.setState("listening");
@@ -4353,7 +4401,10 @@ var VoiceEngine = class _VoiceEngine {
4353
4401
  }
4354
4402
  const text = this.pendingUtt;
4355
4403
  this.pendingUtt = "";
4356
- if (text) this.options.onUtterance(text);
4404
+ if (text) {
4405
+ this.turnStartAt = now();
4406
+ this.options.onUtterance(text);
4407
+ }
4357
4408
  }
4358
4409
  get overlapCapable() {
4359
4410
  return this.usingAec && this.options.overlapPause && !!this.player.pause && !!this.player.resume;
@@ -4494,7 +4545,7 @@ var SonioxSTT = class {
4494
4545
  this.endpointTimer = setInterval(() => {
4495
4546
  const combined = (this.finalText + this.partialText).trim();
4496
4547
  if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
4497
- if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4548
+ if (this.firstTokenAt) log8.debug(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4498
4549
  this.reset();
4499
4550
  this.onUtterance(combined, now2());
4500
4551
  }, 120);
@@ -4527,7 +4578,7 @@ var SonioxSTT = class {
4527
4578
  this.onPartial(combined);
4528
4579
  if (endpoint && this.finalText.trim()) {
4529
4580
  const utterance = this.finalText.trim();
4530
- if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4581
+ if (this.firstTokenAt) log8.debug(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4531
4582
  this.reset();
4532
4583
  this.onUtterance(utterance, now2());
4533
4584
  }
@@ -5127,7 +5178,7 @@ import { existsSync as existsSync2, mkdirSync as mkdirSync2 } from "fs";
5127
5178
  import { platform, arch, release, userInfo, homedir } from "os";
5128
5179
  init_tools_shell();
5129
5180
  import { BodDB as BodDB2 } from "@bod.ee/db";
5130
- var DEFAULT_TOOLS = ["bash", "Read", "Edit", "Write", "Grep", "Glob", "MultiEdit", "TodoWrite"];
5181
+ var DEFAULT_TOOLS = ["bash", "Read", "Edit", "Write", "Grep", "Glob", "MultiEdit", "ApplyEdits", "RepoMap", "TodoWrite"];
5131
5182
  function autoWebTools() {
5132
5183
  const tools = [];
5133
5184
  tools.push("WebFetch");
@@ -7871,13 +7922,29 @@ function printHistory(messages) {
7871
7922
  const s = formatHistory(messages);
7872
7923
  if (s) err(s);
7873
7924
  }
7874
- function costOf(pricing, promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0) {
7925
+ function cacheMultipliers(model) {
7926
+ const p = (model ?? "").split("/")[0];
7927
+ switch (p) {
7928
+ case "anthropic":
7929
+ return { read: 0.1, write: 1.25 };
7930
+ case "openai":
7931
+ return { read: 0.5, write: 1 };
7932
+ case "google":
7933
+ return { read: 0.25, write: 1 };
7934
+ case "deepseek":
7935
+ return { read: 0.1, write: 1 };
7936
+ default:
7937
+ return { read: 1, write: 1 };
7938
+ }
7939
+ }
7940
+ function costOf(pricing, promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0, model) {
7875
7941
  if (!pricing) return 0;
7942
+ const mult = model ? cacheMultipliers(model) : { read: 0.1, write: 1.25 };
7876
7943
  const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
7877
- return fresh / 1e3 * pricing.inputCostPer1K + cacheCreationTokens / 1e3 * pricing.inputCostPer1K * 1.25 + cacheReadTokens / 1e3 * pricing.inputCostPer1K * 0.1 + completionTokens / 1e3 * pricing.outputCostPer1K;
7944
+ return fresh / 1e3 * pricing.inputCostPer1K + cacheCreationTokens / 1e3 * pricing.inputCostPer1K * mult.write + cacheReadTokens / 1e3 * pricing.inputCostPer1K * mult.read + completionTokens / 1e3 * pricing.outputCostPer1K;
7878
7945
  }
7879
7946
  function turnCost(model, usage) {
7880
- return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
7947
+ return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0, model);
7881
7948
  }
7882
7949
  async function evaluateGoal(ai, condition, transcript, log17) {
7883
7950
  const recent = transcript.filter((m) => m.role === "assistant").slice(-8).map((m) => {
@@ -8350,12 +8417,21 @@ async function repl(args, ai, cfg, cwd) {
8350
8417
  const duplexAsk = async (call) => {
8351
8418
  if (args.voice && dx) {
8352
8419
  const hint = summarizeCall(call.name, call.args).slice(0, 80);
8353
- if (cfg.voiceAskUi === "menu") {
8420
+ if (cfg.voiceAskUi !== "relay") {
8354
8421
  editorRef?.suspend();
8355
- const v = await selectMenu(process.stderr, { title: `? background worker asks to run ${call.name} ${hint}`, items: [{ label: "Allow", value: "y" }, { label: "Deny", value: "n" }], current: "n" });
8422
+ const v = await selectMenu(process.stderr, {
8423
+ title: `? background worker asks to run ${call.name} ${hint}`,
8424
+ items: [{ label: "Allow once", value: "y" }, { label: "Allow always", value: "a" }, { label: "Deny", value: "n" }],
8425
+ current: "y"
8426
+ });
8356
8427
  editorRef?.resume();
8357
8428
  editorRef?.redrawNow();
8358
- return { decision: v === "y" ? "allow" : "deny" };
8429
+ if (v === "a") {
8430
+ const cmd = typeof call.args?.command === "string" ? call.args.command : null;
8431
+ work.permissions?.options.rules.unshift(cmd ? { tool: call.name, pathGlob: cmd, decision: "allow" } : { tool: call.name, decision: "allow" });
8432
+ persistRule(cwd, "allow", cmd ? `${call.name}(${cmd})` : call.name);
8433
+ }
8434
+ return { decision: v === "y" || v === "a" ? "allow" : "deny" };
8359
8435
  }
8360
8436
  const id = `perm-${++permSeq}`;
8361
8437
  const a = await dx.parkQuestion(id, `Permission: may the background worker run ${call.name}${hint ? ` (${hint})` : ""}? Answer yes or no (you can also type it).`);
@@ -9704,7 +9780,32 @@ ${extra}` : body);
9704
9780
  } else if (running.length) {
9705
9781
  err(dim(` \u2026 waiting for ${running.length} background task(s) (Ctrl-C to force quit)
9706
9782
  `));
9707
- await dx.idle();
9783
+ let forced = false;
9784
+ let onCtrlC = () => {
9785
+ };
9786
+ const onByte = (b) => {
9787
+ if (!b.includes(3)) return;
9788
+ forced = true;
9789
+ for (const t of running) {
9790
+ t.status = "cancelled";
9791
+ t.controller.abort();
9792
+ }
9793
+ err(dim(`
9794
+ \u2026 force-quit \u2014 cancelled ${running.length} background task(s)
9795
+ `));
9796
+ onCtrlC();
9797
+ };
9798
+ process.stdin.on("data", onByte);
9799
+ await Promise.race([dx.idle(), new Promise((res) => {
9800
+ onCtrlC = res;
9801
+ })]);
9802
+ process.stdin.off("data", onByte);
9803
+ if (forced) {
9804
+ voiceIO?.stop();
9805
+ releaseStdin();
9806
+ await closeMcp(mounted);
9807
+ process.exit(130);
9808
+ }
9708
9809
  face.options.host?.flushText?.();
9709
9810
  duplexPersist();
9710
9811
  }
@@ -9805,6 +9906,7 @@ if (import.meta.main) main().catch((e) => {
9805
9906
  });
9806
9907
  export {
9807
9908
  appendMemoryNote,
9909
+ cacheMultipliers,
9808
9910
  costOf,
9809
9911
  estimateTranscriptTokens,
9810
9912
  expandMentions,