npm - agent.libx.js - Versions diffs - 0.93.30 → 0.93.31 - Mend

agent.libx.js 0.93.30 → 0.93.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/cli/cli.ts +44 -16
package/dist/{Agent-kWrJvtZM.d.ts → Agent-uWtu_WFY.d.ts} +11 -0
package/dist/cli.d.ts +1 -1
package/dist/cli.js +153 -60
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +12 -2
package/dist/index.js +64 -4
package/dist/index.js.map +1 -1
package/package.json +1 -1

package/cli/cli.ts CHANGED Viewed

@@ -213,7 +213,7 @@ Flags:
                        impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
                        with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
                        spoken replies out (echo-cancelled; speak over it to interrupt)
-  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
+  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
   --think-model <id>   with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
   --no-think           with --duplex: disable the Think tier (Act handles everything)
   --add-dir <path>     mount another directory into the workspace (repeatable; disk mode only)
@@ -245,7 +245,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
 Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
 REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
-REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
+REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
 REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
 REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
 REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -1047,6 +1047,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
   const duplex = args.duplex;
   let dx: DuplexAgent | undefined;
   let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
+  let toggleVoice: (() => Promise<void>) | undefined; // bound below (duplex + TTY): /voice flips mic on/off live
   let editorRef: LineEditor | undefined; // bound once the line editor exists — async chrome repaints the prompt via it
   // During a turn the user's type-ahead lives on a "stash ›" line (no active editor to own it). Async
   // chrome (streamed deltas, task events) lands on top of it — repaint the stash below, so it survives.
@@ -1618,6 +1619,12 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
         if (a[0] === 'full' || a[0] === 'minimal') { workerChrome = a[0]; err(green(`  ✓ worker chrome → ${a[0]}\n`)); return; }
         err(dim(`  worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
       },
+    }, voice: {
+      desc: 'toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)',
+      run: async () => {
+        if (!toggleVoice) { err(dim('  (voice needs --duplex on a TTY)\n')); return; }
+        await toggleVoice();
+      },
     }, 'voice-model': {
       desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
       run: async (a: string[]) => {
@@ -2065,11 +2072,17 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
   // spoken via the host tap above. Missing keys → conversational text mode, one-line note.
   let voicePartial = ''; // live partial transcript, rendered in the prompt footer
   let partialRedraw: ReturnType<typeof setTimeout> | null = null;
-  if (args.voice && duplex && process.stdin.isTTY) {
+  // Spin VoiceIO up live (launch with --voice, or /voice mid-session). `greet` opens with a spoken
+  // greeting turn (launch only); a manual toggle just turns the mic on quietly. Returns true if voice
+  // is now live. Duplex + TTY only — bound to `toggleVoice` below so /voice can flip it off again.
+  const startVoice = async (greet: boolean): Promise<boolean> => {
+    if (voiceIO) return true;
+    if (!duplex || !process.stdin.isTTY) { err(dim('  (voice needs --duplex on a TTY)\n')); return false; }
     if (!VoiceIO.available()) {
       err(dim('  (voice I/O off — set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n'));
-    } else {
-      voiceIO = new VoiceIO({
+      return false;
+    }
+    voiceIO = new VoiceIO({
         // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
         // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
         // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
@@ -2097,14 +2110,11 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
           err(`\r\x1b[K  ${bold(cyan('🎤 ›'))} ${text}\n`);
           void dispatchLine(text + note).then(async (r) => { if (r === 'quit') { await voiceIO?.awaitIdle(); editorRef?.abort(); } }).finally(() => editorRef?.redrawNow());
         },
-      });
-      try {
-        await voiceIO.start();
-        process.on('exit', () => voiceIO?.stop()); // last-resort child cleanup (uncaughtException path exits hard)
-        // SIGHUP/SIGTERM (terminal closed, kill) bypass 'exit' handlers by default — without these the
-        // mic/player children outlive the CLI and hold the microphone (verified leak in PTY testing).
-        for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
-        err(dim(`  🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
+    });
+    try {
+      await voiceIO.start();
+      err(dim(`  🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
+      if (greet) {
         // Greeting: the agent makes the first turn — spoken, personalized from what it can see.
         // Straight to turn() (not dispatchLine): the synthetic prompt must not enter ↑-history.
         const where = cwd.split('/').pop();
@@ -2115,12 +2125,30 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
           `Context: working directory "${where}"${resumed ? '; this resumes an earlier conversation — glance at it and pick up naturally' : ''}. ` +
           `Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`,
         ).finally(() => editorRef?.redrawNow());
-      } catch (e: any) {
-        err(yellow(`  ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
-        voiceIO = undefined;
       }
+      return true;
+    } catch (e: any) {
+      err(yellow(`  ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
+      voiceIO = undefined;
+      return false;
     }
+  };
+  // Child cleanup, registered ONCE (not per start — toggling on/off must not stack listeners). They
+  // close over the live `voiceIO`, so they cover whichever instance is up. SIGHUP/SIGTERM (terminal
+  // closed, kill) bypass 'exit' handlers by default — without these the mic/player children outlive
+  // the CLI and hold the microphone (verified leak in PTY testing).
+  if (duplex && process.stdin.isTTY) {
+    process.on('exit', () => voiceIO?.stop());
+    for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
   }
+  // /voice toggle: flip the mic on or off without leaving the session (kills STT/TTS children on off).
+  if (duplex && process.stdin.isTTY) toggleVoice = async () => {
+    if (voiceIO) { voiceIO.stop(); voiceIO = undefined; voicePartial = ''; err(dim('  🔇 voice off\n')); editorRef?.redrawNow(); return; }
+    await startVoice(false);
+    editorRef?.redrawNow();
+  };
+  // Launch with --voice: start now, with the spoken greeting.
+  if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
   while (true) {
     // Double-Esc fired during the just-finished turn → open the jump-back picker now (turn has unwound).

package/dist/{Agent-kWrJvtZM.d.ts → Agent-uWtu_WFY.d.ts} RENAMED Viewed

@@ -228,6 +228,17 @@ declare class AgentOptions {
     /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
      *  sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
     maxContextTokens: number;
+    /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
+     *  a marker telling the model it was cropped (refine the query, or page further). Guards against one
+     *  Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
+    maxToolResultBytes: number;
+    /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
+     *  and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
+     *  paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
+    capToolResult?: (full: string, info: {
+        tool: string;
+        args: any;
+    }) => string | Promise<string>;
     /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
     skillsDir?: string | string[];
     /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */

package/dist/cli.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env bun
-import { h as RunResult, R as ReasoningEffort } from './Agent-kWrJvtZM.js';
+import { h as RunResult, R as ReasoningEffort } from './Agent-uWtu_WFY.js';
 import { IFilesystem } from '@livx.cc/wcli/core';
 import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';

package/dist/cli.js CHANGED Viewed

@@ -2692,6 +2692,14 @@ var AgentOptions = class {
   /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
    *  sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
   maxContextTokens = 0;
+  /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
+   *  a marker telling the model it was cropped (refine the query, or page further). Guards against one
+   *  Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
+  maxToolResultBytes = 6e4;
+  /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
+   *  and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
+   *  paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
+  capToolResult;
   /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
   skillsDir;
   /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3107,6 +3115,11 @@ var Agent = class _Agent {
       this.ctx.emit = void 0;
     }
     if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
+    const cap = this.options.maxToolResultBytes ?? 0;
+    if (!threw && cap > 0 && result.length > cap) {
+      const info = { tool: tc.function.name, args };
+      result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
+    }
     await hooks?.postToolUse?.(call, result, meta);
     this.options.host?.notify?.({ kind: "tool_result", id: tc.id ?? "", output: result, isError: threw });
     if (images?.length) {
@@ -3160,6 +3173,15 @@ function estimateTokens(m) {
   for (const x of m) chars += contentText(x.content).length + (x.tool_calls ? JSON.stringify(x.tool_calls).length : 0);
   return Math.ceil(chars / 4);
 }
+function cropResult(result, cap) {
+  const head = result.slice(0, cap);
+  const nl = head.lastIndexOf("\n");
+  const page = nl > cap * 0.5 ? head.slice(0, nl) : head;
+  const omitted = result.length - page.length;
+  return `${page}
+[output cropped \u2014 showing ${page.length} of ${result.length} bytes; ${omitted} omitted. This is page 1. Refine your query/command to narrow it, or call the tool again with a tighter scope to see more.]`;
+}
 function stubOldToolResults(messages, keep) {
   const meta = /* @__PURE__ */ new Map();
   for (const msg of messages)
@@ -3533,6 +3555,34 @@ To pull a specific detail, Grep/Read ${path}, or call Ask({ question: "\u2026",
   captureAll(tools) {
     return tools.map((t) => this.capture(t));
   }
+  /**
+   * Spill an oversized tool result to a scratch file and return PAGE 1 + a recoverable, paginated stub.
+   * Drop-in for `Agent.capToolResult`: the agent sees usable content immediately and knows how to get
+   * the rest (refine the query, Read the file in pages with offset/limit, or Ask to extract specifics).
+   * Lossless — unlike a plain crop, the full output stays available on the scratch FS.
+   */
+  async spill(full, info, pageBytes = 8e3) {
+    const { dir } = this.options;
+    const id = "a" + ++this.seq;
+    const path = `${dir}/${id}-${slug(info.tool)}.txt`;
+    const header = `# ${info.tool}(${shortArgs(info.args)}) \u2014 ${full.length} bytes
+`;
+    try {
+      await (this.dirReady ??= mkdirp(this.fs, dir));
+      await this.fs.writeFile(path, header + full);
+    } catch (e) {
+      log4.debug("scratch spill failed; cropping lossy", e);
+      return full.slice(0, pageBytes) + `
+[output cropped to ${pageBytes} of ${full.length} bytes; full output unavailable (scratch write failed) \u2014 refine your query]`;
+    }
+    const head = full.slice(0, pageBytes);
+    const nl = head.lastIndexOf("\n");
+    const page = nl > pageBytes * 0.5 ? head.slice(0, nl) : head;
+    return `${page}
+[output cropped \u2014 page 1 (${page.length} of ${full.length} bytes). Full output saved to ${path}. To see more: refine your query/command to narrow it, or Read ${path} with offset/limit to page through it, or Ask({ question: "\u2026", over: "${path}" }) to extract specifics.]`;
+  }
 };
 var ASK_PROMPT = "You are a retrieval-extraction step with Read, Grep and Glob over a scratch filesystem holding raw outputs from earlier tools. Find the information that answers the question and return it concisely, quoting values/facts verbatim. Do NOT add analysis or anything not grounded in the files. If the answer is not present, say so plainly.";
 function makeAskTool(o) {
@@ -3666,7 +3716,10 @@ var DuplexAgentOptions = class {
   ai;
   /** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
   fs;
-  reflexModel = "groq/openai/gpt-oss-20b";
+  // The reflex IS the voice. 120b (not 20b) for channel discipline + instruction-following: the 20b
+  // mislabels gpt-oss harmony channels under load, leaking raw analysis into the spoken `final` channel
+  // (and misfiring Hold). 120b is the same price tier (~$0.15/$0.60) — the quality/cost trade is free.
+  reflexModel = "groq/openai/gpt-oss-120b";
   actModel = "anthropic/claude-sonnet-4-6";
   /** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
   thinkModel = "anthropic/claude-opus-4-8";
@@ -3753,7 +3806,12 @@ var DuplexAgent = class {
     const canSearch = workerToolNames.some((n) => /WebSearch/i.test(n));
     const canFetch = workerToolNames.some((n) => /WebFetch/i.test(n));
     const workerWeb = canSearch ? `, and it CAN search the web and read web pages \u2014 so when the user gives you something specific to look up ("search for X", "find me\u2026", "what's the latest on\u2026"), route it to Act. But a bare capability QUESTION like "can you search the web?" just gets a short spoken "yes, I can" \u2014 do NOT dispatch and NEVER invent a query the user did not give you` : canFetch ? ", and it can fetch a specific web page URL (but cannot search the web)" : "";
-    const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
+    const mcpNames = [
+      ...Object.keys(o.actOptions?.providerOptions?.mcpServers ?? {}),
+      ...new Set(workerToolNames.filter((n) => n.startsWith("mcp__")).map((n) => n.slice(5).split("__")[0]))
+    ];
+    const workerMcp = mcpNames.length ? `, and it can use these MCP servers: ${[...new Set(mcpNames)].join(", ")}` + (mcpNames.some((n) => /browser/i.test(n)) ? ' \u2014 including driving a REAL browser (open tabs, navigate, click, screenshot), so answer "yes" if asked whether you can control/drive a browser and route an actual browse to Act' : "") : "";
+    const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb + workerMcp) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
 Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
     const tools = [
       ...o.reflexOptions?.tools ?? [],
@@ -4188,8 +4246,10 @@ Another agent just implemented the above. Independently check the CURRENT state
             case "capabilities": {
               const actTools = this.options.actOptions?.tools ?? [];
               const names = actTools.map((t) => t.name);
+              const mcpServers = Object.keys(this.options.actOptions?.providerOptions?.mcpServers ?? {});
+              const mcpNote = mcpServers.length ? ` Plus MCP servers your worker can use: ${mcpServers.join(", ")} (e.g. browser-bridge \u2192 drive a real browser: open tabs, navigate, click, screenshot).` : "";
               if (!names.length)
-                return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so.";
+                return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so." + mcpNote;
               const hasFetch = names.some((n) => /WebFetch/i.test(n));
               const hasBrowser = names.some((n) => /browser.*(navigate|click|page|type)/i.test(n));
               const hasSearch = names.some((n) => /(^|_)WebSearch$|search/i.test(n) && !/WebFetch|browser/i.test(n));
@@ -4198,7 +4258,7 @@ Another agent just implemented the above. Independently check the CURRENT state
               if (hasBrowser) notes.push("The browser tools drive a real browser: you CAN open a site and, if needed, navigate to a search engine and search there \u2014 but it is manual and takes a moment, not an instant lookup.");
               else if (!hasSearch && hasFetch) notes.push('You have no general web-search tool, so for an instant "search the web" you can only fetch a URL they provide.');
               const webNote = notes.length ? " NOTE: " + notes.join(" ") : "";
-              return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote;
+              return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote + mcpNote;
             }
             case "time":
               return (/* @__PURE__ */ new Date()).toString();
@@ -5723,13 +5783,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
       return { systemPrompt: basePrompt + "\n\n" + extra };
     })(),
     tools: (() => {
-      let base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
+      const base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
       const tail = [...o.extraTools ?? []];
-      if (scratch) {
-        const CAPTURE2 = /* @__PURE__ */ new Set(["WebSearch", "WebFetch"]);
-        base = base.map((t) => CAPTURE2.has(t.name) ? scratch.capture(t) : t);
-        tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
-      }
+      if (scratch) tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
       if (!realShell.length) return [...base, ...tail];
       const filtered = base.filter((t) => t.name !== "bash");
       return [...filtered, ...realShell, ...tail];
@@ -5742,6 +5798,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
     planMode: o.planMode ?? false,
     permissions: o.permissions,
     subagents: o.subagents ?? false,
+    // When scratch is on, an oversized tool result spills to a scratch file + recoverable paginated stub
+    // (lossless). Without scratch, the Agent's default crop (lossy) still guards the context window.
+    ...scratch ? { capToolResult: (full, info) => scratch.spill(full, info) } : {},
     backgroundJobs: o.backgroundJobs ?? virtual,
     // default ON in virtual modes (no real shell there); disk uses ShellJobRegistry
     skillsDir: dots("skills"),
@@ -8038,7 +8097,7 @@ Flags:
                        impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
                        with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
                        spoken replies out (echo-cancelled; speak over it to interrupt)
-  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
+  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
   --think-model <id>   with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
   --no-think           with --duplex: disable the Think tier (Act handles everything)
   --add-dir <path>     mount another directory into the workspace (repeatable; disk mode only)
@@ -8070,7 +8129,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
 Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
 REPL shortcuts: !<cmd> runs a shell command inline \xB7 #<note> saves a memory \xB7 @path inlines a file
-REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
+REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
 REPL completion: type / (commands+skills) or @ (files) for a LIVE menu \u2014 \u2191/\u2193 select, \u23CE/Tab accept, Esc dismiss.
 REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
 REPL shortcuts: Shift+Tab cycles permission posture (ask \u2192 accept-edits \u2192 plan) \xB7 Alt+T toggles reasoning \xB7 Alt+P switches model \xB7 Ctrl+O toggles verbose tool output \xB7 \u2192 or Tab accepts the dim history ghost-suggestion \xB7 Alt+S/Ctrl+S stash/unstash.
@@ -8808,6 +8867,7 @@ async function repl(args, ai, cfg, cwd) {
   const duplex = args.duplex;
   let dx;
   let voiceIO;
+  let toggleVoice;
   let editorRef;
   let repaintStash = () => {
   };
@@ -9415,6 +9475,15 @@ ${extra}` : body);
         err(dim(`  worker chrome: ${workerChrome} (use /workers full|minimal)
 `));
       }
+    }, voice: {
+      desc: "toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)",
+      run: async () => {
+        if (!toggleVoice) {
+          err(dim("  (voice needs --duplex on a TTY)\n"));
+          return;
+        }
+        await toggleVoice();
+      }
     }, "voice-model": {
       desc: "switch the reflex (voice) model \u2014 /voice-model <id>, or alone for a picker",
       run: async (a) => {
@@ -10059,67 +10128,91 @@ ${extra}` : body);
   };
   let voicePartial = "";
   let partialRedraw = null;
-  if (args.voice && duplex && process.stdin.isTTY) {
+  const startVoice = async (greet) => {
+    if (voiceIO) return true;
+    if (!duplex || !process.stdin.isTTY) {
+      err(dim("  (voice needs --duplex on a TTY)\n"));
+      return false;
+    }
     if (!VoiceIO.available()) {
       err(dim("  (voice I/O off \u2014 set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n"));
-    } else {
-      voiceIO = new VoiceIO({
-        // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
-        // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
-        // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
-        onState: () => editorRef?.redrawNow(),
-        // Throttled: each redraw clears the screen below the prompt — a partial-per-token storm
-        // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
-        onPartial: (text) => {
-          if (text === voicePartial) return;
-          voicePartial = text;
-          if (!partialRedraw) partialRedraw = setTimeout(() => {
-            partialRedraw = null;
-            editorRef?.redrawNow();
-          }, 250);
-        },
-        onBargeIn: (phase) => {
-          activeTurn?.abort();
-          if (phase === "speaking") err(yellow("\n  \u270B interrupted\n"));
-        },
-        onUtterance: (text) => {
-          voicePartial = "";
-          if (!text.trim()) return;
-          const cut = voiceIO.takeInterruptedReply();
-          const note = cut && cut.full.length - cut.heard.length > 40 ? `
+      return false;
+    }
+    voiceIO = new VoiceIO({
+      // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
+      // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
+      // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
+      onState: () => editorRef?.redrawNow(),
+      // Throttled: each redraw clears the screen below the prompt — a partial-per-token storm
+      // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
+      onPartial: (text) => {
+        if (text === voicePartial) return;
+        voicePartial = text;
+        if (!partialRedraw) partialRedraw = setTimeout(() => {
+          partialRedraw = null;
+          editorRef?.redrawNow();
+        }, 250);
+      },
+      onBargeIn: (phase) => {
+        activeTurn?.abort();
+        if (phase === "speaking") err(yellow("\n  \u270B interrupted\n"));
+      },
+      onUtterance: (text) => {
+        voicePartial = "";
+        if (!text.trim()) return;
+        const cut = voiceIO.takeInterruptedReply();
+        const note = cut && cut.full.length - cut.heard.length > 40 ? `
 [the user interrupted you mid-speech \u2014 they only heard up to: "\u2026${cut.heard.slice(-80)}". Work any unheard essentials into your reply naturally, only if still relevant.]` : "";
-          if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
-          err(`\r\x1B[K  ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
+        if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
+        err(`\r\x1B[K  ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
 `);
-          void dispatchLine(text + note).then(async (r) => {
-            if (r === "quit") {
-              await voiceIO?.awaitIdle();
-              editorRef?.abort();
-            }
-          }).finally(() => editorRef?.redrawNow());
-        }
-      });
-      try {
-        await voiceIO.start();
-        process.on("exit", () => voiceIO?.stop());
-        for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
-          voiceIO?.stop();
-          process.exit(0);
-        });
-        err(dim(`  \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
+        void dispatchLine(text + note).then(async (r) => {
+          if (r === "quit") {
+            await voiceIO?.awaitIdle();
+            editorRef?.abort();
+          }
+        }).finally(() => editorRef?.redrawNow());
+      }
+    });
+    try {
+      await voiceIO.start();
+      err(dim(`  \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
 `));
+      if (greet) {
         const where = cwd.split("/").pop();
         const resumed = session.messages.length > 0;
         void turn(
           `[session started] First call QuickLook with what:"memory" \u2014 if it knows the user's name or preferences, use them. Then greet the user warmly in one or two short sentences, as the opener of a live voice conversation. Context: working directory "${where}"${resumed ? "; this resumes an earlier conversation \u2014 glance at it and pick up naturally" : ""}. Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`
         ).finally(() => editorRef?.redrawNow());
-      } catch (e) {
-        err(yellow(`  \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
-`));
-        voiceIO = void 0;
       }
+      return true;
+    } catch (e) {
+      err(yellow(`  \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
+`));
+      voiceIO = void 0;
+      return false;
     }
+  };
+  if (duplex && process.stdin.isTTY) {
+    process.on("exit", () => voiceIO?.stop());
+    for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
+      voiceIO?.stop();
+      process.exit(0);
+    });
   }
+  if (duplex && process.stdin.isTTY) toggleVoice = async () => {
+    if (voiceIO) {
+      voiceIO.stop();
+      voiceIO = void 0;
+      voicePartial = "";
+      err(dim("  \u{1F507} voice off\n"));
+      editorRef?.redrawNow();
+      return;
+    }
+    await startVoice(false);
+    editorRef?.redrawNow();
+  };
+  if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
   while (true) {
     if (pendingRewind) {
       pendingRewind = false;