agent.libx.js 0.93.6 → 0.93.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1982,7 +1982,7 @@ IMPLICIT CAPTURE: when the user shares their name, role, a preference, a correct
1982
1982
  For explicit "remember X" requests, also call Remember directly and confirm briefly ("got it").
1983
1983
  Do NOT remember: transient task details, conversation filler, things you'd forget in a real conversation.
1984
1984
  Keep it invisible: never announce "saving to memory" or list what you remembered unless asked.
1985
- For anything requiring files, shell, or web \u2014 still Delegate.`;
1985
+ For anything requiring files, shell, or web \u2014 still Act.`;
1986
1986
  async function loadMemory(fs, dir, opts = {}) {
1987
1987
  const dirs = (Array.isArray(dir) ? dir : [dir]).filter(Boolean);
1988
1988
  const writeDir = dirs[0];
@@ -2691,6 +2691,10 @@ var AgentOptions = class {
2691
2691
  autoTest;
2692
2692
  /** Provider-specific options forwarded to ai.chat() (e.g. cursor mcpServers, cwd). */
2693
2693
  providerOptions;
2694
+ /** Prompt caching (providers that support it, e.g. Anthropic): cache tools/system/conversation
2695
+ * prefix across the loop's steps — reads cost 0.1x, writes 1.25x. A multi-step agent loop
2696
+ * re-sends its whole prefix every step, so this is a large net cost cut. Default on. */
2697
+ promptCache = true;
2694
2698
  /** Tool selection mode: 'auto' = model decides (needed for Groq); undefined = provider default. */
2695
2699
  toolChoice;
2696
2700
  /** Extended-thinking / reasoning effort, normalized across providers (anthropic, openai).
@@ -2880,7 +2884,7 @@ var Agent = class _Agent {
2880
2884
  const wireTools = toWireTools(this.activeTools);
2881
2885
  const useStream = o.stream === true && typeof o.host?.notify === "function";
2882
2886
  let steps = 0;
2883
- const usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
2887
+ const usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
2884
2888
  let usageEstimated = false;
2885
2889
  const start = Date.now();
2886
2890
  let toolCallsTotal = 0;
@@ -2897,6 +2901,7 @@ var Agent = class _Agent {
2897
2901
  if (o.timeoutMs && Date.now() - start >= o.timeoutMs) return kill("timeout");
2898
2902
  if (o.maxTokens && usage.totalTokens >= o.maxTokens) return kill("budget");
2899
2903
  steps++;
2904
+ this.options.host?.notify?.({ kind: "turn_start", message: `step ${steps}` });
2900
2905
  let res;
2901
2906
  const sent = this.trimContext();
2902
2907
  const frag = reasoningToChatFragment(o.model, o.reasoning);
@@ -2910,6 +2915,7 @@ var Agent = class _Agent {
2910
2915
  } : void 0;
2911
2916
  const reasonOpts = {
2912
2917
  ...frag,
2918
+ ...o.promptCache ? { promptCache: true } : {},
2913
2919
  ...o.providerOptions || cursorPo ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions, ...cursorPo } } : {}
2914
2920
  };
2915
2921
  try {
@@ -2937,6 +2943,8 @@ var Agent = class _Agent {
2937
2943
  usage.promptTokens += res.usage.promptTokens ?? 0;
2938
2944
  usage.completionTokens += res.usage.completionTokens ?? 0;
2939
2945
  usage.totalTokens += res.usage.totalTokens ?? 0;
2946
+ usage.cacheCreationTokens += res.usage.cacheCreationTokens ?? 0;
2947
+ usage.cacheReadTokens += res.usage.cacheReadTokens ?? 0;
2940
2948
  }
2941
2949
  const toolCalls = res.toolCalls ?? [];
2942
2950
  this.transcript.push({
@@ -3505,15 +3513,18 @@ function describeCall(call) {
3505
3513
  return `${call.name}${hint}`;
3506
3514
  }
3507
3515
  var DuplexAgentOptions = class {
3508
- /** Any ai.libx.js AIClient — shared by the voice and worker agents (routed by model). */
3516
+ /** Any ai.libx.js AIClient — shared by all tiers (routed by model). */
3509
3517
  ai;
3510
- /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3518
+ /** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3511
3519
  fs;
3512
- voiceModel = "groq/openai/gpt-oss-20b";
3513
- workerModel = "anthropic/claude-sonnet-4-6";
3520
+ reflexModel = "groq/openai/gpt-oss-20b";
3521
+ actModel = "anthropic/claude-sonnet-4-6";
3522
+ /** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
3523
+ thinkModel = "anthropic/claude-opus-4-6";
3514
3524
  /** Escape hatches merged over the derived per-agent options. */
3515
- voiceOptions;
3516
- workerOptions;
3525
+ reflexOptions;
3526
+ actOptions;
3527
+ thinkOptions;
3517
3528
  /** Receives the voice text_delta stream + task lifecycle events. */
3518
3529
  host;
3519
3530
  /** How many recent transcript messages are rendered into a worker's brief. */
@@ -3521,7 +3532,7 @@ var DuplexAgentOptions = class {
3521
3532
  /** Voice register: 'neutral' = clean spoken style; 'conversational' = human-like — fillers,
3522
3533
  * backchannels, impulsive first reactions before content (mimics real duplex conversation). */
3523
3534
  voiceStyle = "neutral";
3524
- /** Awaited BEFORE a delegated worker spawns — open a per-task checkpoint frame, audit, etc.
3535
+ /** Awaited BEFORE a worker spawns — open a per-task checkpoint frame, audit, etc.
3525
3536
  * (post-spawn would race the worker's first edits). */
3526
3537
  onTaskStart;
3527
3538
  /** Re-voice throttled worker progress asides ('[task t1 progress] …') so long tasks aren't dead
@@ -3544,8 +3555,10 @@ var DuplexAgentOptions = class {
3544
3555
  /** User-scope memory dir for global facts (type=user/feedback). Forwarded to Remember's routing. */
3545
3556
  memoryUserDir;
3546
3557
  };
3547
- var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou work in a pair: you talk, and a background worker with FULL access to the user\'s environment (files, shell, web) does the hands-on work. You can find out or do ANYTHING by calling `Delegate` with a clear, self-contained brief \u2014 so NEVER tell the user you can\'t see, access, or do something. Delegate and find out. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), delegate IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Delegate, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nDo not fire a second Delegate for work already in flight \u2014 check `TaskStatus` first. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not delegate, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing still goes through `Delegate`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3548
- var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you delegate, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now delegate", no task ids out loud).`;
3558
+ var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A standard background worker with FULL access to the user\'s environment (files, shell, web). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nYou can find out or do ANYTHING by calling `Act` with a clear, self-contained brief \u2014 so NEVER tell the user you can\'t see, access, or do something. Act and find out. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Act or Think, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight \u2014 check `TaskStatus` first. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
3559
+ var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
3560
+ var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
3561
+ var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
3549
3562
  var DuplexAgent = class {
3550
3563
  options;
3551
3564
  voice;
@@ -3564,21 +3577,32 @@ var DuplexAgent = class {
3564
3577
  if (o.memoryDir && o.fs) {
3565
3578
  this.memoryReady = loadMemory(o.fs, o.memoryDir, { maxWritesPerSession: 10, userDir: o.memoryUserDir });
3566
3579
  }
3567
- const memSlot = o.memoryDir && o.fs ? VOICE_MEMORY_PROMPT : "NEVER claim to have stored, saved, or remembered something durably \u2014 you cannot. Anything the user wants persisted (their name, preferences, notes) must be Delegated so a worker writes it to memory.";
3568
- const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3580
+ const memSlot = o.memoryDir && o.fs ? VOICE_MEMORY_PROMPT : "NEVER claim to have stored, saved, or remembered something durably \u2014 you cannot. Anything the user wants persisted (their name, preferences, notes) must go through Act so a worker writes it to memory.";
3581
+ const thinkSlot = o.thinkModel !== false ? THINK_GUIDANCE : THINK_DISABLED_GUIDANCE;
3582
+ const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3569
3583
  Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3584
+ const tools = [
3585
+ ...o.reflexOptions?.tools ?? [],
3586
+ this.actTool(),
3587
+ ...o.thinkModel !== false ? [this.thinkTool()] : [],
3588
+ this.taskStatusTool(),
3589
+ this.cancelTaskTool(),
3590
+ this.quickLookTool(),
3591
+ this.answerTaskTool(),
3592
+ this.holdTool()
3593
+ ];
3570
3594
  this.voice = new Agent({
3571
3595
  ai: o.ai,
3572
3596
  fs: new MemFilesystem2(),
3573
- model: o.voiceModel,
3597
+ model: o.reflexModel,
3574
3598
  stream: true,
3575
3599
  host: o.host,
3576
3600
  systemPrompt: prompt,
3577
3601
  instructionFiles: false,
3578
3602
  maxSteps: 8,
3579
3603
  timeoutMs: 3e4,
3580
- ...o.voiceOptions,
3581
- tools: [...o.voiceOptions?.tools ?? [], this.delegateTool(), this.taskStatusTool(), this.cancelTaskTool(), this.quickLookTool(), this.answerTaskTool()]
3604
+ ...o.reflexOptions,
3605
+ tools
3582
3606
  });
3583
3607
  }
3584
3608
  /** Resolve memory tools + inject index into voice system prompt (once). */
@@ -3589,7 +3613,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3589
3613
  this.voice.options.tools.push(...mem.tools);
3590
3614
  if (mem.index) this.voice.options.systemPrompt += "\n\n" + mem.index;
3591
3615
  }
3592
- /** One user turn: the voice agent streams the reply (and may Delegate). Serialized with re-voice turns. */
3616
+ /** One user turn: the voice agent streams the reply (and may Act/Think). Serialized with re-voice turns. */
3593
3617
  send(content) {
3594
3618
  return this.enqueue(async () => {
3595
3619
  await this.initMemory();
@@ -3630,19 +3654,25 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3630
3654
  this.notify("revoice_done", "");
3631
3655
  });
3632
3656
  }
3633
- /** The worker's brief: the Delegate args + a STATIC text snapshot of the recent conversation. */
3634
- buildBrief(brief) {
3657
+ /** The worker's brief: the Act/Think args + a STATIC text snapshot of the recent conversation.
3658
+ * Act briefs get a self-verify footer — the worker's report is trusted without review, so it
3659
+ * must check its own work before reporting (nearly free under prompt caching; measured honest:
3660
+ * it does NOT fix one-shot logic bugs — see mind/10). Think tasks are pure reasoning — no footer. */
3661
+ buildBrief(brief, tier = "act") {
3635
3662
  const recent = this.voice.transcript.filter((m) => (m.role === "user" || m.role === "assistant") && contentText(m.content).trim()).slice(-this.options.excerptTurns).map((m) => `${m.role}: ${contentText(m.content)}`).join("\n");
3636
- return recent ? `${brief}
3663
+ const verify = tier === "act" ? "\n\nBefore reporting done: re-read what you changed and check it against EVERY requirement above \u2014 fix any gap first. Your report is trusted without review." : "";
3664
+ return (recent ? `${brief}
3637
3665
 
3638
3666
  ## Recent conversation (for context)
3639
- ${recent}` : brief;
3667
+ ${recent}` : brief) + verify;
3640
3668
  }
3641
3669
  /** Spawn a detached worker for task `id`; its settlement notifies + enqueues the re-voice turn. */
3642
- spawnWorker(id, label, briefText) {
3670
+ spawnWorker(id, label, briefText, tier = "act") {
3643
3671
  const o = this.options;
3672
+ const tierOpts = tier === "think" ? o.thinkOptions : o.actOptions;
3673
+ const tierModel = tier === "think" ? o.thinkModel : o.actModel;
3644
3674
  const controller = new AbortController();
3645
- const base = o.workerOptions?.hooks;
3675
+ const base = tierOpts?.hooks ?? o.actOptions?.hooks;
3646
3676
  const report = o.progressUpdates ? this.progressReporter(id) : void 0;
3647
3677
  const hooks = report ? {
3648
3678
  ...base,
@@ -3669,13 +3699,12 @@ ${recent}` : brief;
3669
3699
  const worker = new Agent({
3670
3700
  ai: o.ai,
3671
3701
  fs: o.fs,
3672
- model: o.workerModel,
3673
- ...o.workerOptions,
3674
- // may override ai/fs/model/tools/… —
3702
+ model: tierModel,
3703
+ ...tier === "think" ? { reasoning: tierOpts?.reasoning ?? "high" } : {},
3704
+ ...tierOpts,
3675
3705
  ...workerHost ? { host: workerHost } : {},
3676
3706
  ...hooks ? { hooks } : {},
3677
3707
  signal: controller.signal
3678
- // …but never the per-task cancellation signal
3679
3708
  });
3680
3709
  const promise = worker.run(briefText).then((res) => this.onWorkerSettled(id, res)).catch((err2) => this.onWorkerFailed(id, err2));
3681
3710
  this.tasks.set(id, { id, label, status: "running", controller, promise });
@@ -3763,7 +3792,14 @@ ${recent}` : brief;
3763
3792
  }
3764
3793
  rec.status = "done";
3765
3794
  log6.verbose(`task ${id} done (${res.steps} steps)`);
3766
- this.notify("task_done", `task ${id} (${rec.label}) completed`, { id, text: res.text, usage: res.usage, usageEstimated: res.usageEstimated });
3795
+ this.notify("task_done", `task ${id} (${rec.label}) completed`, {
3796
+ id,
3797
+ text: res.text,
3798
+ usage: res.usage,
3799
+ usageEstimated: res.usageEstimated,
3800
+ steps: res.steps,
3801
+ toolCalls: res.messages.filter((m) => m.role === "tool").length
3802
+ });
3767
3803
  this.queueRevoice(`[task ${id} completed] ${res.text}`);
3768
3804
  }
3769
3805
  onWorkerFailed(id, err2) {
@@ -3776,11 +3812,32 @@ ${recent}` : brief;
3776
3812
  this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
3777
3813
  this.queueRevoice(`[task ${rec.id} failed] ${msg}`);
3778
3814
  }
3779
- // --- the three voice tools (closures over this instance) ---
3780
- delegateTool() {
3815
+ // --- voice tools (closures over this instance) ---
3816
+ /** Live-switch the think tier: `false` disables (removes the Think tool from the voice agent),
3817
+ * a model id enables (adds the tool if missing). The system-prompt THINK_SLOT text is frozen at
3818
+ * construction — the tool's own description carries the routing guidance, so a live enable works;
3819
+ * dispatch()'s think→act fallback covers any straggler calls after a live disable. */
3820
+ setThinkModel(model) {
3821
+ this.options.thinkModel = model;
3822
+ const tools = this.voice.options.tools;
3823
+ const i = tools.findIndex((t) => t.name === "Think");
3824
+ if (model === false && i >= 0) tools.splice(i, 1);
3825
+ else if (model !== false && i < 0) tools.push(this.thinkTool());
3826
+ }
3827
+ /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
3828
+ async dispatch(brief, tier = "act", label) {
3829
+ if (tier === "think" && this.options.thinkModel === false) tier = "act";
3830
+ const id = `t${++this.seq}`;
3831
+ const lbl = label ?? tier;
3832
+ await this.options.onTaskStart?.(id, lbl);
3833
+ this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier);
3834
+ this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief, tier });
3835
+ return id;
3836
+ }
3837
+ actTool() {
3781
3838
  return {
3782
- name: "Delegate",
3783
- description: 'Escalate real work (reading/editing files, searching, running tasks, deep analysis) to a background worker agent. Returns immediately with a task id; the result arrives later as a "[task <id> completed]" event. Provide a clear, self-contained `brief` (the worker does not hear the live conversation).',
3839
+ name: "Act",
3840
+ description: 'Escalate real work (reading/editing files, searching, running tasks, building) to a standard background worker. Returns immediately with a task id; the result arrives later as a "[task <id> completed]" event. Provide a clear, self-contained `brief` (the worker does not hear the live conversation).',
3784
3841
  parameters: {
3785
3842
  type: "object",
3786
3843
  required: ["brief"],
@@ -3790,12 +3847,26 @@ ${recent}` : brief;
3790
3847
  }
3791
3848
  },
3792
3849
  run: async ({ brief, label }) => {
3793
- const id = `t${++this.seq}`;
3794
- const lbl = String(label ?? "task");
3795
- await this.options.onTaskStart?.(id, lbl);
3796
- this.spawnWorker(id, lbl, this.buildBrief(String(brief ?? "")));
3797
- this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief });
3798
- return `Delegated as task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
3850
+ const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0);
3851
+ return `Acting on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
3852
+ }
3853
+ };
3854
+ }
3855
+ thinkTool() {
3856
+ return {
3857
+ name: "Think",
3858
+ description: "Escalate to a premium deep-reasoning agent for complex analysis, architecture decisions, hard debugging, or planning. Same async pattern as Act \u2014 returns a task id. Use when the problem needs careful thought before (or instead of) action. Do not use Think for simple tasks \u2014 Act is cheaper and faster.",
3859
+ parameters: {
3860
+ type: "object",
3861
+ required: ["brief"],
3862
+ properties: {
3863
+ brief: { type: "string", description: "the question or problem to reason about deeply" },
3864
+ label: { type: "string", description: "a short (2-4 word) label for the task" }
3865
+ }
3866
+ },
3867
+ run: async ({ brief, label }) => {
3868
+ const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0);
3869
+ return `Thinking on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
3799
3870
  }
3800
3871
  };
3801
3872
  }
@@ -3811,7 +3882,7 @@ ${recent}` : brief;
3811
3882
  }
3812
3883
  };
3813
3884
  }
3814
- /** Sub-100ms read-only lookups the voice may do itself — everything else stays Delegate-only.
3885
+ /** Sub-100ms read-only lookups the voice may do itself — everything else stays Act-only.
3815
3886
  * fs-only (no shell; the engine is VFS-abstracted): time, git branch (.git/HEAD read), ls, file
3816
3887
  * head. Output is hard-capped so a lookup can never bloat the skinny voice context. */
3817
3888
  quickLookTool() {
@@ -3819,7 +3890,7 @@ ${recent}` : brief;
3819
3890
  const kinds = [.../* @__PURE__ */ new Set(["time", "branch", "ls", "file", ...Object.keys(this.options.quickLook ?? {})])];
3820
3891
  return {
3821
3892
  name: "QuickLook",
3822
- description: `Instant read-only lookup \u2014 one of: ${kinds.join(", ")}. For trivial facts only; anything needing search, commands, or reasoning goes through Delegate.`,
3893
+ description: `Instant read-only lookup \u2014 one of: ${kinds.join(", ")}. For trivial facts only; anything needing search, commands, or reasoning goes through Act.`,
3823
3894
  parameters: {
3824
3895
  type: "object",
3825
3896
  required: ["what"],
@@ -3852,7 +3923,7 @@ ${recent}` : brief;
3852
3923
  if (!path) return "file lookup needs a path";
3853
3924
  const text = await fs.readFile(String(path));
3854
3925
  return text.length > CAP2 ? text.slice(0, CAP2) + `
3855
- \u2026 (truncated \u2014 ${text.length} chars total; Delegate for the full file)` : text;
3926
+ \u2026 (truncated \u2014 ${text.length} chars total; Act for the full file)` : text;
3856
3927
  }
3857
3928
  default:
3858
3929
  return `unknown lookup '${what}'`;
@@ -3880,6 +3951,22 @@ ${recent}` : brief;
3880
3951
  }
3881
3952
  };
3882
3953
  }
3954
+ holdTool() {
3955
+ return {
3956
+ name: "Hold",
3957
+ description: 'The user seems mid-thought \u2014 hold the turn (stay listening) instead of answering. Optionally pass a short filler ("mhm", "go on") to speak while waiting. Use when the message sounds incomplete, trailing off, or like they paused to think.',
3958
+ parameters: {
3959
+ type: "object",
3960
+ properties: {
3961
+ filler: { type: "string", description: 'optional short filler to speak ("mhm", "go on", "mm-hm")' }
3962
+ }
3963
+ },
3964
+ run: async ({ filler }) => {
3965
+ if (filler) this.notify("hold_filler", String(filler));
3966
+ return "Holding \u2014 listening for the rest of the user's thought. Do not respond further this turn.";
3967
+ }
3968
+ };
3969
+ }
3883
3970
  cancelTaskTool() {
3884
3971
  return {
3885
3972
  name: "CancelTask",
@@ -3964,6 +4051,14 @@ var VoiceEngineOptions = class {
3964
4051
  * letters, mid-thought pauses), the next utterance MERGES instead of dispatching a truncated one
3965
4052
  * ("E-L-Y." / "A."). Costs this much latency per turn; 0 disables. */
3966
4053
  utteranceMergeMs = 350;
4054
+ /** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
4055
+ * Gives the user time to finish their thought without triggering a model call. */
4056
+ incompleteMergeMs = 1500;
4057
+ /** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
4058
+ holdFiller = "";
4059
+ /** Called when the engine holds an incomplete utterance (host can render a visual cue). */
4060
+ onHold = () => {
4061
+ };
3967
4062
  /** heuristic (non-AEC) energy barge-in tuning */
3968
4063
  bargeRmsMult = 2;
3969
4064
  bargeRmsFloor = 500;
@@ -3977,7 +4072,7 @@ var VoiceEngineOptions = class {
3977
4072
  /** no new partial activity for this long while paused → resume, drop the interjection */
3978
4073
  overlapResumeMs = 700;
3979
4074
  };
3980
- var VoiceEngine = class {
4075
+ var VoiceEngine = class _VoiceEngine {
3981
4076
  options;
3982
4077
  state = "idle";
3983
4078
  stt;
@@ -4126,6 +4221,13 @@ var VoiceEngine = class {
4126
4221
  this.lastInterrupted = null;
4127
4222
  return r;
4128
4223
  }
4224
+ /** Speak a short filler phrase without starting a model turn (stays in listening mode after). */
4225
+ speakFiller(text) {
4226
+ if (!text || this.speaking) return;
4227
+ this.beginSpeech();
4228
+ this.speakDelta(text);
4229
+ this.endSpeech();
4230
+ }
4129
4231
  /** barge-in: stop audio NOW, cancel generation, reset for the user's utterance */
4130
4232
  interrupt() {
4131
4233
  if (!this.speaking && !this.drainTimer) return;
@@ -4209,6 +4311,11 @@ var VoiceEngine = class {
4209
4311
  }
4210
4312
  if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
4211
4313
  }
4314
+ static TRAIL_RE = /(?:^|\s)(?:and|but|or|so|to|the|a|an|of|in|for|with|that|if|uh|um|like|about|from|into|on|is|are|was|were|,)$/i;
4315
+ /** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
4316
+ looksIncomplete(text) {
4317
+ return _VoiceEngine.TRAIL_RE.test(text.trim());
4318
+ }
4212
4319
  handleUtterance(text) {
4213
4320
  if (this.speaking && (this.ctxOpen || this.pausedAt) && this.overlapCapable) {
4214
4321
  this.stt.reset();
@@ -4225,6 +4332,17 @@ var VoiceEngine = class {
4225
4332
  }
4226
4333
  this.pendingUtt = this.pendingUtt ? `${this.pendingUtt} ${text}` : text;
4227
4334
  if (this.pendingTimer) clearTimeout(this.pendingTimer);
4335
+ if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
4336
+ log7.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
4337
+ this.options.onHold();
4338
+ if (this.options.holdFiller && !this.speaking) {
4339
+ this.beginSpeech();
4340
+ this.speakDelta(this.options.holdFiller);
4341
+ this.endSpeech();
4342
+ }
4343
+ this.pendingTimer = setTimeout(() => this.flushUtterance(), this.options.incompleteMergeMs);
4344
+ return;
4345
+ }
4228
4346
  if (!this.options.utteranceMergeMs || this.words(this.pendingUtt).length >= 4) return this.flushUtterance();
4229
4347
  this.pendingTimer = setTimeout(() => this.flushUtterance(), this.options.utteranceMergeMs);
4230
4348
  }
@@ -4440,7 +4558,7 @@ var CartesiaTTSOptions = class {
4440
4558
  /** 'apiKey' (server/CLI) → `api_key=` URL param; 'token' (browser, BE-minted) → `access_token=`. */
4441
4559
  authMode = "apiKey";
4442
4560
  };
4443
- var CartesiaTTS = class {
4561
+ var CartesiaTTS = class _CartesiaTTS {
4444
4562
  options;
4445
4563
  ws;
4446
4564
  ctxSeq = 0;
@@ -4450,6 +4568,12 @@ var CartesiaTTS = class {
4450
4568
  onDone = () => {
4451
4569
  };
4452
4570
  firstAudioAt = 0;
4571
+ /** Circuit breaker: consecutive error count + down flag. */
4572
+ consecutiveErrors = 0;
4573
+ down = false;
4574
+ probeTimer = null;
4575
+ static CB_THRESHOLD = 3;
4576
+ static CB_PROBE_MS = 3e4;
4453
4577
  constructor(options) {
4454
4578
  this.options = { ...new CartesiaTTSOptions(), ...options };
4455
4579
  }
@@ -4479,10 +4603,34 @@ var CartesiaTTS = class {
4479
4603
  const m = JSON.parse(String(ev.data));
4480
4604
  if (m.context_id && m.context_id !== this.ctxId) return;
4481
4605
  if (m.type === "chunk" && m.data) {
4606
+ this.consecutiveErrors = 0;
4607
+ if (this.down) {
4608
+ this.down = false;
4609
+ log9.info("TTS recovered");
4610
+ this.stopProbe();
4611
+ }
4482
4612
  if (!this.firstAudioAt) this.firstAudioAt = now3();
4483
4613
  this.onAudio(base64ToBytes(m.data));
4484
- } else if (m.type === "done") this.onDone();
4485
- else if (m.type === "error" && !/already been cancelled|does not exist/.test(m.message || "")) log9.warn(`cartesia: ${JSON.stringify(m)}`);
4614
+ } else if (m.type === "done") {
4615
+ this.consecutiveErrors = 0;
4616
+ if (this.down) {
4617
+ this.down = false;
4618
+ log9.info("TTS recovered");
4619
+ this.stopProbe();
4620
+ }
4621
+ this.onDone();
4622
+ } else if (m.type === "error") {
4623
+ if (/already been cancelled|does not exist/.test(m.message || "")) return;
4624
+ this.consecutiveErrors++;
4625
+ if (!this.down && this.consecutiveErrors >= _CartesiaTTS.CB_THRESHOLD) {
4626
+ this.down = true;
4627
+ log9.warn(`TTS circuit breaker open \u2014 ${this.consecutiveErrors} consecutive errors, switching to text-only`);
4628
+ this.onDone();
4629
+ this.startProbe();
4630
+ } else if (!this.down) {
4631
+ log9.warn(`cartesia: ${JSON.stringify(m)}`);
4632
+ }
4633
+ }
4486
4634
  };
4487
4635
  }
4488
4636
  /** Ensure the WS is open before sending — reconnects if idle-closed. */
@@ -4506,17 +4654,42 @@ var CartesiaTTS = class {
4506
4654
  });
4507
4655
  }
4508
4656
  speak(text, cont) {
4657
+ if (this.down) return;
4509
4658
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(text, cont));
4510
4659
  else void this.ensureConnected().then(() => this.ws?.readyState === WebSocket.OPEN && this.ws.send(this.frame(text, cont)));
4511
4660
  }
4512
4661
  end() {
4662
+ if (this.down) {
4663
+ this.onDone();
4664
+ return;
4665
+ }
4513
4666
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame("", false));
4514
4667
  }
4515
4668
  cancel() {
4516
4669
  if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify({ context_id: this.ctxId, cancel: true }));
4517
4670
  }
4671
+ startProbe() {
4672
+ if (this.probeTimer) return;
4673
+ this.probeTimer = setInterval(() => {
4674
+ if (!this.down) {
4675
+ this.stopProbe();
4676
+ return;
4677
+ }
4678
+ this.consecutiveErrors = 0;
4679
+ this.newContext();
4680
+ if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(".", false));
4681
+ }, _CartesiaTTS.CB_PROBE_MS);
4682
+ this.probeTimer.unref?.();
4683
+ }
4684
+ stopProbe() {
4685
+ if (this.probeTimer) {
4686
+ clearInterval(this.probeTimer);
4687
+ this.probeTimer = null;
4688
+ }
4689
+ }
4518
4690
  close() {
4519
4691
  this.closed = true;
4692
+ this.stopProbe();
4520
4693
  if (this.ws) this.ws.onclose = null;
4521
4694
  this.ws?.close();
4522
4695
  }
@@ -7367,6 +7540,8 @@ function parseArgs(argv) {
7367
7540
  a.voice = true;
7368
7541
  a.duplex = true;
7369
7542
  } else if (x === "--voice-model") a.voiceModel = val(++i, x);
7543
+ else if (x === "--think-model") a.thinkModel = val(++i, x);
7544
+ else if (x === "--no-think") a.thinkModel = false;
7370
7545
  else if (x === "--allowedTools" || x === "--allowed-tools") a.allowedTools = val(++i, x).split(",").map((s) => s.trim()).filter(Boolean);
7371
7546
  else if (x === "--disallowedTools" || x === "--disallowed-tools") a.disallowedTools = val(++i, x).split(",").map((s) => s.trim()).filter(Boolean);
7372
7547
  else if (x === "--append-system-prompt") a.appendSystemPrompt = val(++i, x);
@@ -7389,6 +7564,7 @@ function parseArgs(argv) {
7389
7564
  if (a.duplex && (a.task || a.print)) throw new Error("--duplex is interactive-only (a conversational mode) \u2014 drop the task/-p");
7390
7565
  if (a.duplex && a.plan) throw new Error("--plan is not supported in --duplex (workers are non-interactive; a plan could never be approved)");
7391
7566
  if (a.voiceModel && !a.duplex) throw new Error("--voice-model only applies with --duplex");
7567
+ if (a.thinkModel !== void 0 && !a.duplex) throw new Error("--think-model/--no-think only apply with --duplex");
7392
7568
  return a;
7393
7569
  }
7394
7570
  var HELP = `agentx \u2014 agent.libx.js CLI
@@ -7425,6 +7601,8 @@ Flags:
7425
7601
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
7426
7602
  spoken replies out (echo-cancelled; speak over it to interrupt)
7427
7603
  --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
7604
+ --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
7605
+ --no-think with --duplex: disable the Think tier (Act handles everything)
7428
7606
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
7429
7607
  --subagents allow the Task tool (spawn child agents)
7430
7608
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
@@ -7454,7 +7632,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
7454
7632
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
7455
7633
 
7456
7634
  REPL shortcuts: !<cmd> runs a shell command inline \xB7 #<note> saves a memory \xB7 @path inlines a file
7457
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit
7635
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
7458
7636
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu \u2014 \u2191/\u2193 select, \u23CE/Tab accept, Esc dismiss.
7459
7637
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
7460
7638
  REPL shortcuts: Shift+Tab cycles permission posture (ask \u2192 accept-edits \u2192 plan) \xB7 Alt+T toggles reasoning \xB7 Alt+P switches model \xB7 Ctrl+O toggles verbose tool output \xB7 \u2192 or Tab accepts the dim history ghost-suggestion \xB7 Alt+S/Ctrl+S stash/unstash.
@@ -7693,12 +7871,13 @@ function printHistory(messages) {
7693
7871
  const s = formatHistory(messages);
7694
7872
  if (s) err(s);
7695
7873
  }
7696
- function costOf(pricing, promptTokens = 0, completionTokens = 0) {
7874
+ function costOf(pricing, promptTokens = 0, completionTokens = 0, cacheCreationTokens = 0, cacheReadTokens = 0) {
7697
7875
  if (!pricing) return 0;
7698
- return promptTokens / 1e3 * pricing.inputCostPer1K + completionTokens / 1e3 * pricing.outputCostPer1K;
7876
+ const fresh = Math.max(0, promptTokens - cacheCreationTokens - cacheReadTokens);
7877
+ return fresh / 1e3 * pricing.inputCostPer1K + cacheCreationTokens / 1e3 * pricing.inputCostPer1K * 1.25 + cacheReadTokens / 1e3 * pricing.inputCostPer1K * 0.1 + completionTokens / 1e3 * pricing.outputCostPer1K;
7699
7878
  }
7700
7879
  function turnCost(model, usage) {
7701
- return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0);
7880
+ return costOf(getModelInfo(model)?.pricing, usage?.promptTokens ?? 0, usage?.completionTokens ?? 0, usage?.cacheCreationTokens ?? 0, usage?.cacheReadTokens ?? 0);
7702
7881
  }
7703
7882
  async function evaluateGoal(ai, condition, transcript, log17) {
7704
7883
  const recent = transcript.filter((m) => m.role === "assistant").slice(-8).map((m) => {
@@ -8208,6 +8387,10 @@ async function repl(args, ai, cfg, cwd) {
8208
8387
  voiceIO.speakDelta(e.message);
8209
8388
  editorRef?.suspend();
8210
8389
  }
8390
+ if (e.kind === "hold_filler" && voiceIO) {
8391
+ voiceIO.speakFiller(e.message);
8392
+ return;
8393
+ }
8211
8394
  if (e.kind === "revoice_done") {
8212
8395
  base.flushText();
8213
8396
  process.stdout.write("\n");
@@ -8242,7 +8425,7 @@ async function repl(args, ai, cfg, cwd) {
8242
8425
  };
8243
8426
  const rewindFilesTool = {
8244
8427
  name: "RewindFiles",
8245
- description: "Undo file changes made by delegated tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.",
8428
+ description: "Undo file changes made by Act/Think tasks: roll back the last N task checkpoints (default 1). Use when the user asks to undo/revert what a task changed.",
8246
8429
  parameters: { type: "object", properties: { steps: { type: "number", description: "how many task checkpoints to undo (default 1)" } } },
8247
8430
  run: async ({ steps }) => {
8248
8431
  if (!checkpoints.size) return "No file checkpoints to rewind yet.";
@@ -8258,9 +8441,10 @@ async function repl(args, ai, cfg, cwd) {
8258
8441
  fs: agent.options.fs,
8259
8442
  memoryDir: agent.options.memoryDir,
8260
8443
  memoryUserDir: agent.options.memoryUserDir,
8261
- ...args.voiceModel ?? cfg.voiceModel ? { voiceModel: resolveModelOrNewest(args.voiceModel ?? cfg.voiceModel) } : {},
8262
- workerModel: agent.options.model,
8263
- workerOptions,
8444
+ ...args.voiceModel ?? cfg.reflexModel ? { reflexModel: resolveModelOrNewest(args.voiceModel ?? cfg.reflexModel) } : {},
8445
+ actModel: agent.options.model,
8446
+ actOptions: workerOptions,
8447
+ ...(args.thinkModel ?? cfg.thinkModel) !== void 0 ? { thinkModel: (args.thinkModel ?? cfg.thinkModel) === false ? false : resolveModelOrNewest(String(args.thinkModel ?? cfg.thinkModel)) } : {},
8264
8448
  host,
8265
8449
  ...args.voice ? { voiceStyle: "conversational", progressUpdates: true, askRelay: true } : {},
8266
8450
  // voice: progress asides + worker questions relayed through the conversation
@@ -8295,8 +8479,8 @@ async function repl(args, ai, cfg, cwd) {
8295
8479
  }
8296
8480
  },
8297
8481
  // The voice runs on the REAL fs (it has no fs tools — harmless) so @mentions, !cmd and #note
8298
- // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Delegate …).
8299
- voiceOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => {
8482
+ // resolve against the project; + CC-parity chrome for its own tool calls (⚙ Act …).
8483
+ reflexOptions: { fs: agent.options.fs, hooks: displayHooks(agent.options.fs), tools: [rewindFilesTool, exitSessionTool(() => {
8300
8484
  exitRequested = true;
8301
8485
  })] }
8302
8486
  });
@@ -8340,7 +8524,7 @@ async function repl(args, ai, cfg, cwd) {
8340
8524
  };
8341
8525
  const setModel = (m) => {
8342
8526
  work.model = m;
8343
- if (dx) dx.options.workerModel = m;
8527
+ if (dx) dx.options.actModel = m;
8344
8528
  persistModel(cwd, m);
8345
8529
  err(dim(" model \u2192 " + m + "\n"));
8346
8530
  };
@@ -8621,7 +8805,7 @@ ${extra}` : body);
8621
8805
  desc: "show CLI version + runtime",
8622
8806
  run: () => {
8623
8807
  const rt = process.versions.bun ? `bun ${process.versions.bun}` : `node ${process.versions.node}`;
8624
- err(` ${bold("agent.libx.js")} ${cyan("v" + VERSION)}${dim(` \xB7 ${duplex ? `voice ${dx.options.voiceModel} \xB7 worker ${work.model}` : work.model} \xB7 ${rt}`)}
8808
+ err(` ${bold("agent.libx.js")} ${cyan("v" + VERSION)}${dim(` \xB7 ${duplex ? `reflex ${dx.options.reflexModel} \xB7 act ${work.model}${dx.options.thinkModel !== false ? ` \xB7 think ${dx.options.thinkModel}` : ""}` : work.model} \xB7 ${rt}`)}
8625
8809
  `);
8626
8810
  }
8627
8811
  },
@@ -8651,7 +8835,7 @@ ${extra}` : body);
8651
8835
  const mode = args.vfs ? "sandbox (VFS \u2014 disk untouched)" : args.boddb ? `boddb (database workspace at ${args.boddb} \u2014 disk untouched)` : args.shell ? "disk + real /bin/sh" : "disk (full real FS, like Claude Code)";
8652
8836
  const pol = work.permissions;
8653
8837
  const perm = !pol ? "allow all (unattended)" : `${pol.options.rules.length} rule(s), default ${pol.options.default}`;
8654
- const model = duplex ? `voice ${dx.options.voiceModel} \xB7 worker ${work.model}` : work.model;
8838
+ const model = duplex ? `reflex ${dx.options.reflexModel} \xB7 act ${work.model}${dx.options.thinkModel !== false ? ` \xB7 think ${dx.options.thinkModel}` : ""}` : work.model;
8655
8839
  err(formatStatus({ model, cwd, mode, tools: (duplex ? work.tools ?? [] : agent.options.tools).map((t) => t.name), permissions: perm, turns: session.meta.turns, tokens: session.meta.tokens ?? 0, sessionId: session.meta.id, estimated: session.meta.costEstimated ?? false }));
8656
8840
  if (duplex && dx.tasks.size) err(dim(` tasks: ${[...dx.tasks.values()].map((t) => `${t.id}:${t.status}`).join(" ")}
8657
8841
  `));
@@ -8713,7 +8897,7 @@ ${extra}` : body);
8713
8897
  }
8714
8898
  const picked = await pickModel(work.model);
8715
8899
  if (picked) setModel(picked);
8716
- else err(dim(" " + (duplex ? `voice ${dx.options.voiceModel} \xB7 worker ${work.model}` : work.model) + "\n"));
8900
+ else err(dim(" " + (duplex ? `reflex ${dx.options.reflexModel} \xB7 act ${work.model}${dx.options.thinkModel !== false ? ` \xB7 think ${dx.options.thinkModel}` : ""}` : work.model) + "\n"));
8717
8901
  }
8718
8902
  },
8719
8903
  ...duplex ? { workers: {
@@ -8729,22 +8913,70 @@ ${extra}` : body);
8729
8913
  `));
8730
8914
  }
8731
8915
  }, "voice-model": {
8732
- desc: "switch the duplex voice (fast) model \u2014 /voice-model <id>, or alone for a picker",
8916
+ desc: "switch the reflex (voice) model \u2014 /voice-model <id>, or alone for a picker",
8733
8917
  run: async (a) => {
8734
8918
  const apply = (id) => {
8735
8919
  const m = resolveModelOrNewest(id);
8736
- dx.options.voiceModel = m;
8920
+ dx.options.reflexModel = m;
8737
8921
  dx.voice.options.model = m;
8738
- err(green(` \u2713 voice model \u2192 ${m}
8922
+ err(green(` \u2713 reflex model \u2192 ${m}
8739
8923
  `));
8740
8924
  };
8741
8925
  if (a[0]) {
8742
8926
  apply(a[0]);
8743
8927
  return;
8744
8928
  }
8745
- const picked = await pickModel(dx.options.voiceModel);
8929
+ const picked = await pickModel(dx.options.reflexModel);
8746
8930
  if (picked) apply(picked);
8747
- else err(dim(` voice ${dx.options.voiceModel}
8931
+ else err(dim(` reflex ${dx.options.reflexModel}
8932
+ `));
8933
+ }
8934
+ }, "think-model": {
8935
+ desc: "switch the think (premium) model, or /think-model off to disable",
8936
+ run: async (a) => {
8937
+ if (a[0] === "off" || a[0] === "false") {
8938
+ dx.setThinkModel(false);
8939
+ err(green(` \u2713 think tier disabled
8940
+ `));
8941
+ return;
8942
+ }
8943
+ const apply = (id) => {
8944
+ const m = resolveModelOrNewest(id);
8945
+ dx.setThinkModel(m);
8946
+ err(green(` \u2713 think model \u2192 ${m}
8947
+ `));
8948
+ };
8949
+ if (a[0]) {
8950
+ apply(a[0]);
8951
+ return;
8952
+ }
8953
+ const current = dx.options.thinkModel === false ? void 0 : dx.options.thinkModel;
8954
+ const picked = await pickModel(current ?? "anthropic/claude-opus-4-6");
8955
+ if (picked) apply(picked);
8956
+ else err(dim(` think ${dx.options.thinkModel === false ? "off" : dx.options.thinkModel}
8957
+ `));
8958
+ }
8959
+ }, act: {
8960
+ desc: "spawn a standard worker \u2014 /act <brief>",
8961
+ run: async (a) => {
8962
+ if (!a.length) {
8963
+ err(dim(" usage: /act <what to do>\n"));
8964
+ return;
8965
+ }
8966
+ const id = await dx.dispatch(a.join(" "), "act");
8967
+ err(dim(` \u2192 task ${id} started
8968
+ `));
8969
+ }
8970
+ }, think: {
8971
+ desc: "spawn a deep-reasoning worker \u2014 /think <question>",
8972
+ run: async (a) => {
8973
+ if (!a.length) {
8974
+ err(dim(" usage: /think <what to reason about>\n"));
8975
+ return;
8976
+ }
8977
+ const off = dx.options.thinkModel === false;
8978
+ const id = await dx.dispatch(a.join(" "), "think");
8979
+ err(dim(` \u2192 task ${id} ${off ? "(think tier off \u2014 running as act)" : "(think)"} started
8748
8980
  `));
8749
8981
  }
8750
8982
  } } : {},
@@ -9154,7 +9386,7 @@ ${extra}` : body);
9154
9386
  err(bold("agent.libx.js") + cyan(" v" + VERSION) + dim(` \u2014 ${work.model} \xB7 ${cwd}
9155
9387
  `));
9156
9388
  err(dim("Type a task, or /help. Type / or @ for live suggestions (\u2191/\u2193 \u23CE). Esc cancels/clears; double-Esc jumps back; Ctrl-D exits.\n"));
9157
- if (dx) err(dim(`\u25D1 duplex \u2014 voice: ${dx.options.voiceModel} \xB7 worker: ${work.model} (real work runs in background tasks, re-voiced when done)
9389
+ if (dx) err(dim(`\u25D1 duplex \u2014 reflex: ${dx.options.reflexModel} \xB7 act: ${work.model}${dx.options.thinkModel !== false ? ` \xB7 think: ${dx.options.thinkModel}` : ""} (real work runs in background tasks, re-voiced when done)
9158
9390
  `));
9159
9391
  const listDir = (absDir) => {
9160
9392
  try {