agent.libx.js 0.93.30 → 0.93.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cli.ts CHANGED
@@ -213,7 +213,7 @@ Flags:
213
213
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
214
214
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
215
215
  spoken replies out (echo-cancelled; speak over it to interrupt)
216
- --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
216
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
217
217
  --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
218
218
  --no-think with --duplex: disable the Think tier (Act handles everything)
219
219
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
@@ -245,7 +245,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
245
245
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
246
246
 
247
247
  REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
248
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
248
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
249
249
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
250
250
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
251
251
  REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -1047,6 +1047,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1047
1047
  const duplex = args.duplex;
1048
1048
  let dx: DuplexAgent | undefined;
1049
1049
  let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
1050
+ let toggleVoice: (() => Promise<void>) | undefined; // bound below (duplex + TTY): /voice flips mic on/off live
1050
1051
  let editorRef: LineEditor | undefined; // bound once the line editor exists — async chrome repaints the prompt via it
1051
1052
  // During a turn the user's type-ahead lives on a "stash ›" line (no active editor to own it). Async
1052
1053
  // chrome (streamed deltas, task events) lands on top of it — repaint the stash below, so it survives.
@@ -1618,6 +1619,12 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1618
1619
  if (a[0] === 'full' || a[0] === 'minimal') { workerChrome = a[0]; err(green(` ✓ worker chrome → ${a[0]}\n`)); return; }
1619
1620
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
1620
1621
  },
1622
+ }, voice: {
1623
+ desc: 'toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)',
1624
+ run: async () => {
1625
+ if (!toggleVoice) { err(dim(' (voice needs --duplex on a TTY)\n')); return; }
1626
+ await toggleVoice();
1627
+ },
1621
1628
  }, 'voice-model': {
1622
1629
  desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
1623
1630
  run: async (a: string[]) => {
@@ -2065,11 +2072,17 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2065
2072
  // spoken via the host tap above. Missing keys → conversational text mode, one-line note.
2066
2073
  let voicePartial = ''; // live partial transcript, rendered in the prompt footer
2067
2074
  let partialRedraw: ReturnType<typeof setTimeout> | null = null;
2068
- if (args.voice && duplex && process.stdin.isTTY) {
2075
+ // Spin VoiceIO up live (launch with --voice, or /voice mid-session). `greet` opens with a spoken
2076
+ // greeting turn (launch only); a manual toggle just turns the mic on quietly. Returns true if voice
2077
+ // is now live. Duplex + TTY only — bound to `toggleVoice` below so /voice can flip it off again.
2078
+ const startVoice = async (greet: boolean): Promise<boolean> => {
2079
+ if (voiceIO) return true;
2080
+ if (!duplex || !process.stdin.isTTY) { err(dim(' (voice needs --duplex on a TTY)\n')); return false; }
2069
2081
  if (!VoiceIO.available()) {
2070
2082
  err(dim(' (voice I/O off — set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n'));
2071
- } else {
2072
- voiceIO = new VoiceIO({
2083
+ return false;
2084
+ }
2085
+ voiceIO = new VoiceIO({
2073
2086
  // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
2074
2087
  // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
2075
2088
  // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
@@ -2097,14 +2110,11 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2097
2110
  err(`\r\x1b[K ${bold(cyan('🎤 ›'))} ${text}\n`);
2098
2111
  void dispatchLine(text + note).then(async (r) => { if (r === 'quit') { await voiceIO?.awaitIdle(); editorRef?.abort(); } }).finally(() => editorRef?.redrawNow());
2099
2112
  },
2100
- });
2101
- try {
2102
- await voiceIO.start();
2103
- process.on('exit', () => voiceIO?.stop()); // last-resort child cleanup (uncaughtException path exits hard)
2104
- // SIGHUP/SIGTERM (terminal closed, kill) bypass 'exit' handlers by default — without these the
2105
- // mic/player children outlive the CLI and hold the microphone (verified leak in PTY testing).
2106
- for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
2107
- err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
2113
+ });
2114
+ try {
2115
+ await voiceIO.start();
2116
+ err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) just talk; speak over it to interrupt\n`));
2117
+ if (greet) {
2108
2118
  // Greeting: the agent makes the first turn — spoken, personalized from what it can see.
2109
2119
  // Straight to turn() (not dispatchLine): the synthetic prompt must not enter ↑-history.
2110
2120
  const where = cwd.split('/').pop();
@@ -2115,12 +2125,30 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2115
2125
  `Context: working directory "${where}"${resumed ? '; this resumes an earlier conversation — glance at it and pick up naturally' : ''}. ` +
2116
2126
  `Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`,
2117
2127
  ).finally(() => editorRef?.redrawNow());
2118
- } catch (e: any) {
2119
- err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
2120
- voiceIO = undefined;
2121
2128
  }
2129
+ return true;
2130
+ } catch (e: any) {
2131
+ err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
2132
+ voiceIO = undefined;
2133
+ return false;
2122
2134
  }
2135
+ };
2136
+ // Child cleanup, registered ONCE (not per start — toggling on/off must not stack listeners). They
2137
+ // close over the live `voiceIO`, so they cover whichever instance is up. SIGHUP/SIGTERM (terminal
2138
+ // closed, kill) bypass 'exit' handlers by default — without these the mic/player children outlive
2139
+ // the CLI and hold the microphone (verified leak in PTY testing).
2140
+ if (duplex && process.stdin.isTTY) {
2141
+ process.on('exit', () => voiceIO?.stop());
2142
+ for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
2123
2143
  }
2144
+ // /voice toggle: flip the mic on or off without leaving the session (kills STT/TTS children on off).
2145
+ if (duplex && process.stdin.isTTY) toggleVoice = async () => {
2146
+ if (voiceIO) { voiceIO.stop(); voiceIO = undefined; voicePartial = ''; err(dim(' 🔇 voice off\n')); editorRef?.redrawNow(); return; }
2147
+ await startVoice(false);
2148
+ editorRef?.redrawNow();
2149
+ };
2150
+ // Launch with --voice: start now, with the spoken greeting.
2151
+ if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
2124
2152
 
2125
2153
  while (true) {
2126
2154
  // Double-Esc fired during the just-finished turn → open the jump-back picker now (turn has unwound).
@@ -228,6 +228,17 @@ declare class AgentOptions {
228
228
  /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
229
229
  * sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
230
230
  maxContextTokens: number;
231
+ /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
232
+ * a marker telling the model it was cropped (refine the query, or page further). Guards against one
233
+ * Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
234
+ maxToolResultBytes: number;
235
+ /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
236
+ * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
237
+ * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
238
+ capToolResult?: (full: string, info: {
239
+ tool: string;
240
+ args: any;
241
+ }) => string | Promise<string>;
231
242
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
232
243
  skillsDir?: string | string[];
233
244
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { h as RunResult, R as ReasoningEffort } from './Agent-kWrJvtZM.js';
2
+ import { h as RunResult, R as ReasoningEffort } from './Agent-uWtu_WFY.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
5
5
 
package/dist/cli.js CHANGED
@@ -2692,6 +2692,14 @@ var AgentOptions = class {
2692
2692
  /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
2693
2693
  * sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
2694
2694
  maxContextTokens = 0;
2695
+ /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
2696
+ * a marker telling the model it was cropped (refine the query, or page further). Guards against one
2697
+ * Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
2698
+ maxToolResultBytes = 6e4;
2699
+ /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
2700
+ * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
2701
+ * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
2702
+ capToolResult;
2695
2703
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
2696
2704
  skillsDir;
2697
2705
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3107,6 +3115,11 @@ var Agent = class _Agent {
3107
3115
  this.ctx.emit = void 0;
3108
3116
  }
3109
3117
  if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
3118
+ const cap = this.options.maxToolResultBytes ?? 0;
3119
+ if (!threw && cap > 0 && result.length > cap) {
3120
+ const info = { tool: tc.function.name, args };
3121
+ result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
3122
+ }
3110
3123
  await hooks?.postToolUse?.(call, result, meta);
3111
3124
  this.options.host?.notify?.({ kind: "tool_result", id: tc.id ?? "", output: result, isError: threw });
3112
3125
  if (images?.length) {
@@ -3160,6 +3173,15 @@ function estimateTokens(m) {
3160
3173
  for (const x of m) chars += contentText(x.content).length + (x.tool_calls ? JSON.stringify(x.tool_calls).length : 0);
3161
3174
  return Math.ceil(chars / 4);
3162
3175
  }
3176
+ function cropResult(result, cap) {
3177
+ const head = result.slice(0, cap);
3178
+ const nl = head.lastIndexOf("\n");
3179
+ const page = nl > cap * 0.5 ? head.slice(0, nl) : head;
3180
+ const omitted = result.length - page.length;
3181
+ return `${page}
3182
+
3183
+ [output cropped \u2014 showing ${page.length} of ${result.length} bytes; ${omitted} omitted. This is page 1. Refine your query/command to narrow it, or call the tool again with a tighter scope to see more.]`;
3184
+ }
3163
3185
  function stubOldToolResults(messages, keep) {
3164
3186
  const meta = /* @__PURE__ */ new Map();
3165
3187
  for (const msg of messages)
@@ -3533,6 +3555,34 @@ To pull a specific detail, Grep/Read ${path}, or call Ask({ question: "\u2026",
3533
3555
  captureAll(tools) {
3534
3556
  return tools.map((t) => this.capture(t));
3535
3557
  }
3558
+ /**
3559
+ * Spill an oversized tool result to a scratch file and return PAGE 1 + a recoverable, paginated stub.
3560
+ * Drop-in for `Agent.capToolResult`: the agent sees usable content immediately and knows how to get
3561
+ * the rest (refine the query, Read the file in pages with offset/limit, or Ask to extract specifics).
3562
+ * Lossless — unlike a plain crop, the full output stays available on the scratch FS.
3563
+ */
3564
+ async spill(full, info, pageBytes = 8e3) {
3565
+ const { dir } = this.options;
3566
+ const id = "a" + ++this.seq;
3567
+ const path = `${dir}/${id}-${slug(info.tool)}.txt`;
3568
+ const header = `# ${info.tool}(${shortArgs(info.args)}) \u2014 ${full.length} bytes
3569
+ `;
3570
+ try {
3571
+ await (this.dirReady ??= mkdirp(this.fs, dir));
3572
+ await this.fs.writeFile(path, header + full);
3573
+ } catch (e) {
3574
+ log4.debug("scratch spill failed; cropping lossy", e);
3575
+ return full.slice(0, pageBytes) + `
3576
+
3577
+ [output cropped to ${pageBytes} of ${full.length} bytes; full output unavailable (scratch write failed) \u2014 refine your query]`;
3578
+ }
3579
+ const head = full.slice(0, pageBytes);
3580
+ const nl = head.lastIndexOf("\n");
3581
+ const page = nl > pageBytes * 0.5 ? head.slice(0, nl) : head;
3582
+ return `${page}
3583
+
3584
+ [output cropped \u2014 page 1 (${page.length} of ${full.length} bytes). Full output saved to ${path}. To see more: refine your query/command to narrow it, or Read ${path} with offset/limit to page through it, or Ask({ question: "\u2026", over: "${path}" }) to extract specifics.]`;
3585
+ }
3536
3586
  };
3537
3587
  var ASK_PROMPT = "You are a retrieval-extraction step with Read, Grep and Glob over a scratch filesystem holding raw outputs from earlier tools. Find the information that answers the question and return it concisely, quoting values/facts verbatim. Do NOT add analysis or anything not grounded in the files. If the answer is not present, say so plainly.";
3538
3588
  function makeAskTool(o) {
@@ -3666,7 +3716,10 @@ var DuplexAgentOptions = class {
3666
3716
  ai;
3667
3717
  /** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3668
3718
  fs;
3669
- reflexModel = "groq/openai/gpt-oss-20b";
3719
+ // The reflex IS the voice. 120b (not 20b) for channel discipline + instruction-following: the 20b
3720
+ // mislabels gpt-oss harmony channels under load, leaking raw analysis into the spoken `final` channel
3721
+ // (and misfiring Hold). 120b is the same price tier (~$0.15/$0.60) — the quality/cost trade is free.
3722
+ reflexModel = "groq/openai/gpt-oss-120b";
3670
3723
  actModel = "anthropic/claude-sonnet-4-6";
3671
3724
  /** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
3672
3725
  thinkModel = "anthropic/claude-opus-4-8";
@@ -3753,7 +3806,12 @@ var DuplexAgent = class {
3753
3806
  const canSearch = workerToolNames.some((n) => /WebSearch/i.test(n));
3754
3807
  const canFetch = workerToolNames.some((n) => /WebFetch/i.test(n));
3755
3808
  const workerWeb = canSearch ? `, and it CAN search the web and read web pages \u2014 so when the user gives you something specific to look up ("search for X", "find me\u2026", "what's the latest on\u2026"), route it to Act. But a bare capability QUESTION like "can you search the web?" just gets a short spoken "yes, I can" \u2014 do NOT dispatch and NEVER invent a query the user did not give you` : canFetch ? ", and it can fetch a specific web page URL (but cannot search the web)" : "";
3756
- const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3809
+ const mcpNames = [
3810
+ ...Object.keys(o.actOptions?.providerOptions?.mcpServers ?? {}),
3811
+ ...new Set(workerToolNames.filter((n) => n.startsWith("mcp__")).map((n) => n.slice(5).split("__")[0]))
3812
+ ];
3813
+ const workerMcp = mcpNames.length ? `, and it can use these MCP servers: ${[...new Set(mcpNames)].join(", ")}` + (mcpNames.some((n) => /browser/i.test(n)) ? ' \u2014 including driving a REAL browser (open tabs, navigate, click, screenshot), so answer "yes" if asked whether you can control/drive a browser and route an actual browse to Act' : "") : "";
3814
+ const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb + workerMcp) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3757
3815
  Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3758
3816
  const tools = [
3759
3817
  ...o.reflexOptions?.tools ?? [],
@@ -4188,8 +4246,10 @@ Another agent just implemented the above. Independently check the CURRENT state
4188
4246
  case "capabilities": {
4189
4247
  const actTools = this.options.actOptions?.tools ?? [];
4190
4248
  const names = actTools.map((t) => t.name);
4249
+ const mcpServers = Object.keys(this.options.actOptions?.providerOptions?.mcpServers ?? {});
4250
+ const mcpNote = mcpServers.length ? ` Plus MCP servers your worker can use: ${mcpServers.join(", ")} (e.g. browser-bridge \u2192 drive a real browser: open tabs, navigate, click, screenshot).` : "";
4191
4251
  if (!names.length)
4192
- return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so.";
4252
+ return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so." + mcpNote;
4193
4253
  const hasFetch = names.some((n) => /WebFetch/i.test(n));
4194
4254
  const hasBrowser = names.some((n) => /browser.*(navigate|click|page|type)/i.test(n));
4195
4255
  const hasSearch = names.some((n) => /(^|_)WebSearch$|search/i.test(n) && !/WebFetch|browser/i.test(n));
@@ -4198,7 +4258,7 @@ Another agent just implemented the above. Independently check the CURRENT state
4198
4258
  if (hasBrowser) notes.push("The browser tools drive a real browser: you CAN open a site and, if needed, navigate to a search engine and search there \u2014 but it is manual and takes a moment, not an instant lookup.");
4199
4259
  else if (!hasSearch && hasFetch) notes.push('You have no general web-search tool, so for an instant "search the web" you can only fetch a URL they provide.');
4200
4260
  const webNote = notes.length ? " NOTE: " + notes.join(" ") : "";
4201
- return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote;
4261
+ return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote + mcpNote;
4202
4262
  }
4203
4263
  case "time":
4204
4264
  return (/* @__PURE__ */ new Date()).toString();
@@ -5723,13 +5783,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
5723
5783
  return { systemPrompt: basePrompt + "\n\n" + extra };
5724
5784
  })(),
5725
5785
  tools: (() => {
5726
- let base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
5786
+ const base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
5727
5787
  const tail = [...o.extraTools ?? []];
5728
- if (scratch) {
5729
- const CAPTURE2 = /* @__PURE__ */ new Set(["WebSearch", "WebFetch"]);
5730
- base = base.map((t) => CAPTURE2.has(t.name) ? scratch.capture(t) : t);
5731
- tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
5732
- }
5788
+ if (scratch) tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
5733
5789
  if (!realShell.length) return [...base, ...tail];
5734
5790
  const filtered = base.filter((t) => t.name !== "bash");
5735
5791
  return [...filtered, ...realShell, ...tail];
@@ -5742,6 +5798,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
5742
5798
  planMode: o.planMode ?? false,
5743
5799
  permissions: o.permissions,
5744
5800
  subagents: o.subagents ?? false,
5801
+ // When scratch is on, an oversized tool result spills to a scratch file + recoverable paginated stub
5802
+ // (lossless). Without scratch, the Agent's default crop (lossy) still guards the context window.
5803
+ ...scratch ? { capToolResult: (full, info) => scratch.spill(full, info) } : {},
5745
5804
  backgroundJobs: o.backgroundJobs ?? virtual,
5746
5805
  // default ON in virtual modes (no real shell there); disk uses ShellJobRegistry
5747
5806
  skillsDir: dots("skills"),
@@ -8038,7 +8097,7 @@ Flags:
8038
8097
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
8039
8098
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
8040
8099
  spoken replies out (echo-cancelled; speak over it to interrupt)
8041
- --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
8100
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
8042
8101
  --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
8043
8102
  --no-think with --duplex: disable the Think tier (Act handles everything)
8044
8103
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
@@ -8070,7 +8129,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
8070
8129
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
8071
8130
 
8072
8131
  REPL shortcuts: !<cmd> runs a shell command inline \xB7 #<note> saves a memory \xB7 @path inlines a file
8073
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
8132
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
8074
8133
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu \u2014 \u2191/\u2193 select, \u23CE/Tab accept, Esc dismiss.
8075
8134
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
8076
8135
  REPL shortcuts: Shift+Tab cycles permission posture (ask \u2192 accept-edits \u2192 plan) \xB7 Alt+T toggles reasoning \xB7 Alt+P switches model \xB7 Ctrl+O toggles verbose tool output \xB7 \u2192 or Tab accepts the dim history ghost-suggestion \xB7 Alt+S/Ctrl+S stash/unstash.
@@ -8808,6 +8867,7 @@ async function repl(args, ai, cfg, cwd) {
8808
8867
  const duplex = args.duplex;
8809
8868
  let dx;
8810
8869
  let voiceIO;
8870
+ let toggleVoice;
8811
8871
  let editorRef;
8812
8872
  let repaintStash = () => {
8813
8873
  };
@@ -9415,6 +9475,15 @@ ${extra}` : body);
9415
9475
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)
9416
9476
  `));
9417
9477
  }
9478
+ }, voice: {
9479
+ desc: "toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)",
9480
+ run: async () => {
9481
+ if (!toggleVoice) {
9482
+ err(dim(" (voice needs --duplex on a TTY)\n"));
9483
+ return;
9484
+ }
9485
+ await toggleVoice();
9486
+ }
9418
9487
  }, "voice-model": {
9419
9488
  desc: "switch the reflex (voice) model \u2014 /voice-model <id>, or alone for a picker",
9420
9489
  run: async (a) => {
@@ -10059,67 +10128,91 @@ ${extra}` : body);
10059
10128
  };
10060
10129
  let voicePartial = "";
10061
10130
  let partialRedraw = null;
10062
- if (args.voice && duplex && process.stdin.isTTY) {
10131
+ const startVoice = async (greet) => {
10132
+ if (voiceIO) return true;
10133
+ if (!duplex || !process.stdin.isTTY) {
10134
+ err(dim(" (voice needs --duplex on a TTY)\n"));
10135
+ return false;
10136
+ }
10063
10137
  if (!VoiceIO.available()) {
10064
10138
  err(dim(" (voice I/O off \u2014 set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n"));
10065
- } else {
10066
- voiceIO = new VoiceIO({
10067
- // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
10068
- // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
10069
- // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
10070
- onState: () => editorRef?.redrawNow(),
10071
- // Throttled: each redraw clears the screen below the prompt — a partial-per-token storm
10072
- // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
10073
- onPartial: (text) => {
10074
- if (text === voicePartial) return;
10075
- voicePartial = text;
10076
- if (!partialRedraw) partialRedraw = setTimeout(() => {
10077
- partialRedraw = null;
10078
- editorRef?.redrawNow();
10079
- }, 250);
10080
- },
10081
- onBargeIn: (phase) => {
10082
- activeTurn?.abort();
10083
- if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
10084
- },
10085
- onUtterance: (text) => {
10086
- voicePartial = "";
10087
- if (!text.trim()) return;
10088
- const cut = voiceIO.takeInterruptedReply();
10089
- const note = cut && cut.full.length - cut.heard.length > 40 ? `
10139
+ return false;
10140
+ }
10141
+ voiceIO = new VoiceIO({
10142
+ // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
10143
+ // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
10144
+ // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
10145
+ onState: () => editorRef?.redrawNow(),
10146
+ // Throttled: each redraw clears the screen below the prompt a partial-per-token storm
10147
+ // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
10148
+ onPartial: (text) => {
10149
+ if (text === voicePartial) return;
10150
+ voicePartial = text;
10151
+ if (!partialRedraw) partialRedraw = setTimeout(() => {
10152
+ partialRedraw = null;
10153
+ editorRef?.redrawNow();
10154
+ }, 250);
10155
+ },
10156
+ onBargeIn: (phase) => {
10157
+ activeTurn?.abort();
10158
+ if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
10159
+ },
10160
+ onUtterance: (text) => {
10161
+ voicePartial = "";
10162
+ if (!text.trim()) return;
10163
+ const cut = voiceIO.takeInterruptedReply();
10164
+ const note = cut && cut.full.length - cut.heard.length > 40 ? `
10090
10165
  [the user interrupted you mid-speech \u2014 they only heard up to: "\u2026${cut.heard.slice(-80)}". Work any unheard essentials into your reply naturally, only if still relevant.]` : "";
10091
- if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
10092
- err(`\r\x1B[K ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
10166
+ if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
10167
+ err(`\r\x1B[K ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
10093
10168
  `);
10094
- void dispatchLine(text + note).then(async (r) => {
10095
- if (r === "quit") {
10096
- await voiceIO?.awaitIdle();
10097
- editorRef?.abort();
10098
- }
10099
- }).finally(() => editorRef?.redrawNow());
10100
- }
10101
- });
10102
- try {
10103
- await voiceIO.start();
10104
- process.on("exit", () => voiceIO?.stop());
10105
- for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
10106
- voiceIO?.stop();
10107
- process.exit(0);
10108
- });
10109
- err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
10169
+ void dispatchLine(text + note).then(async (r) => {
10170
+ if (r === "quit") {
10171
+ await voiceIO?.awaitIdle();
10172
+ editorRef?.abort();
10173
+ }
10174
+ }).finally(() => editorRef?.redrawNow());
10175
+ }
10176
+ });
10177
+ try {
10178
+ await voiceIO.start();
10179
+ err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
10110
10180
  `));
10181
+ if (greet) {
10111
10182
  const where = cwd.split("/").pop();
10112
10183
  const resumed = session.messages.length > 0;
10113
10184
  void turn(
10114
10185
  `[session started] First call QuickLook with what:"memory" \u2014 if it knows the user's name or preferences, use them. Then greet the user warmly in one or two short sentences, as the opener of a live voice conversation. Context: working directory "${where}"${resumed ? "; this resumes an earlier conversation \u2014 glance at it and pick up naturally" : ""}. Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`
10115
10186
  ).finally(() => editorRef?.redrawNow());
10116
- } catch (e) {
10117
- err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
10118
- `));
10119
- voiceIO = void 0;
10120
10187
  }
10188
+ return true;
10189
+ } catch (e) {
10190
+ err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
10191
+ `));
10192
+ voiceIO = void 0;
10193
+ return false;
10121
10194
  }
10195
+ };
10196
+ if (duplex && process.stdin.isTTY) {
10197
+ process.on("exit", () => voiceIO?.stop());
10198
+ for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
10199
+ voiceIO?.stop();
10200
+ process.exit(0);
10201
+ });
10122
10202
  }
10203
+ if (duplex && process.stdin.isTTY) toggleVoice = async () => {
10204
+ if (voiceIO) {
10205
+ voiceIO.stop();
10206
+ voiceIO = void 0;
10207
+ voicePartial = "";
10208
+ err(dim(" \u{1F507} voice off\n"));
10209
+ editorRef?.redrawNow();
10210
+ return;
10211
+ }
10212
+ await startVoice(false);
10213
+ editorRef?.redrawNow();
10214
+ };
10215
+ if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
10123
10216
  while (true) {
10124
10217
  if (pendingRewind) {
10125
10218
  pendingRewind = false;