agent.libx.js 0.93.30 → 0.93.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cli.ts CHANGED
@@ -113,7 +113,7 @@ function parseReasoning(raw: string): ReasoningEffort {
113
113
  }
114
114
 
115
115
  export function parseArgs(argv: string[]): Args {
116
- const a: Args = { stream: true, plan: false, ask: false, yes: false, vfs: false, shell: undefined, seed: false, subagents: false, help: false, version: false, cont: false, outputFormat: 'text', duplex: false, voice: false };
116
+ const a: Args = { stream: true, plan: false, ask: false, yes: false, vfs: false, shell: undefined, seed: false, subagents: false, help: false, version: false, cont: false, outputFormat: 'text', duplex: false, voice: false, scratch: true };
117
117
  const rest: string[] = [];
118
118
  // read the value that follows a flag, failing loudly if it's missing (instead of a surprise default)
119
119
  const val = (i: number, flag: string): string => { const v = argv[i]; if (v === undefined) throw new Error(`${flag} requires a value`); return v; };
@@ -142,6 +142,7 @@ export function parseArgs(argv: string[]): Args {
142
142
  else if (x === '--yes' || x === '-y') a.yes = true;
143
143
  else if (x === '--vfs' || x === '--sandbox') a.vfs = true;
144
144
  else if (x === '--scratch') a.scratch = true;
145
+ else if (x === '--no-scratch') a.scratch = false;
145
146
  else if (x === '--boddb') a.boddb = val(++i, x);
146
147
  else if (x === '--seed') a.seed = true;
147
148
  else if (x === '--shell') a.shell = true;
@@ -195,7 +196,8 @@ Flags:
195
196
  --no-stream disable token streaming
196
197
  (default: disk mode — full real filesystem access, like Claude Code)
197
198
  --vfs, --sandbox sandbox mode: work over an in-memory copy of cwd — real disk is NEVER modified
198
- --scratch spill big web outputs to scratch files (kept out of context; peek via Grep/Ask)
199
+ --no-scratch disable scratch (on by default): paginate oversized tool output recoverable
200
+ scratch files (peek via Grep/Read/Ask) instead of a lossy crop
199
201
  --boddb <dir> database-backed workspace: files live in a persistent bod-db store at <dir>,
200
202
  surviving across runs — real disk is NEVER modified (DB-native; add --seed below)
201
203
  --seed with --boddb: hydrate the store from cwd on the first run (empty DB) only
@@ -213,7 +215,7 @@ Flags:
213
215
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
214
216
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
215
217
  spoken replies out (echo-cancelled; speak over it to interrupt)
216
- --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
218
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
217
219
  --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
218
220
  --no-think with --duplex: disable the Think tier (Act handles everything)
219
221
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
@@ -245,7 +247,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
245
247
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
246
248
 
247
249
  REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
248
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
250
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
249
251
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
250
252
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
251
253
  REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
@@ -1047,6 +1049,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1047
1049
  const duplex = args.duplex;
1048
1050
  let dx: DuplexAgent | undefined;
1049
1051
  let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
1052
+ let toggleVoice: (() => Promise<void>) | undefined; // bound below (duplex + TTY): /voice flips mic on/off live
1050
1053
  let editorRef: LineEditor | undefined; // bound once the line editor exists — async chrome repaints the prompt via it
1051
1054
  // During a turn the user's type-ahead lives on a "stash ›" line (no active editor to own it). Async
1052
1055
  // chrome (streamed deltas, task events) lands on top of it — repaint the stash below, so it survives.
@@ -1618,6 +1621,12 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
1618
1621
  if (a[0] === 'full' || a[0] === 'minimal') { workerChrome = a[0]; err(green(` ✓ worker chrome → ${a[0]}\n`)); return; }
1619
1622
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
1620
1623
  },
1624
+ }, voice: {
1625
+ desc: 'toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)',
1626
+ run: async () => {
1627
+ if (!toggleVoice) { err(dim(' (voice needs --duplex on a TTY)\n')); return; }
1628
+ await toggleVoice();
1629
+ },
1621
1630
  }, 'voice-model': {
1622
1631
  desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
1623
1632
  run: async (a: string[]) => {
@@ -2065,11 +2074,17 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2065
2074
  // spoken via the host tap above. Missing keys → conversational text mode, one-line note.
2066
2075
  let voicePartial = ''; // live partial transcript, rendered in the prompt footer
2067
2076
  let partialRedraw: ReturnType<typeof setTimeout> | null = null;
2068
- if (args.voice && duplex && process.stdin.isTTY) {
2077
+ // Spin VoiceIO up live (launch with --voice, or /voice mid-session). `greet` opens with a spoken
2078
+ // greeting turn (launch only); a manual toggle just turns the mic on quietly. Returns true if voice
2079
+ // is now live. Duplex + TTY only — bound to `toggleVoice` below so /voice can flip it off again.
2080
+ const startVoice = async (greet: boolean): Promise<boolean> => {
2081
+ if (voiceIO) return true;
2082
+ if (!duplex || !process.stdin.isTTY) { err(dim(' (voice needs --duplex on a TTY)\n')); return false; }
2069
2083
  if (!VoiceIO.available()) {
2070
2084
  err(dim(' (voice I/O off — set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n'));
2071
- } else {
2072
- voiceIO = new VoiceIO({
2085
+ return false;
2086
+ }
2087
+ voiceIO = new VoiceIO({
2073
2088
  // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
2074
2089
  // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
2075
2090
  // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
@@ -2097,14 +2112,11 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2097
2112
  err(`\r\x1b[K ${bold(cyan('🎤 ›'))} ${text}\n`);
2098
2113
  void dispatchLine(text + note).then(async (r) => { if (r === 'quit') { await voiceIO?.awaitIdle(); editorRef?.abort(); } }).finally(() => editorRef?.redrawNow());
2099
2114
  },
2100
- });
2101
- try {
2102
- await voiceIO.start();
2103
- process.on('exit', () => voiceIO?.stop()); // last-resort child cleanup (uncaughtException path exits hard)
2104
- // SIGHUP/SIGTERM (terminal closed, kill) bypass 'exit' handlers by default — without these the
2105
- // mic/player children outlive the CLI and hold the microphone (verified leak in PTY testing).
2106
- for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
2107
- err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
2115
+ });
2116
+ try {
2117
+ await voiceIO.start();
2118
+ err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) just talk; speak over it to interrupt\n`));
2119
+ if (greet) {
2108
2120
  // Greeting: the agent makes the first turn — spoken, personalized from what it can see.
2109
2121
  // Straight to turn() (not dispatchLine): the synthetic prompt must not enter ↑-history.
2110
2122
  const where = cwd.split('/').pop();
@@ -2115,12 +2127,30 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
2115
2127
  `Context: working directory "${where}"${resumed ? '; this resumes an earlier conversation — glance at it and pick up naturally' : ''}. ` +
2116
2128
  `Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`,
2117
2129
  ).finally(() => editorRef?.redrawNow());
2118
- } catch (e: any) {
2119
- err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
2120
- voiceIO = undefined;
2121
2130
  }
2131
+ return true;
2132
+ } catch (e: any) {
2133
+ err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
2134
+ voiceIO = undefined;
2135
+ return false;
2122
2136
  }
2137
+ };
2138
+ // Child cleanup, registered ONCE (not per start — toggling on/off must not stack listeners). They
2139
+ // close over the live `voiceIO`, so they cover whichever instance is up. SIGHUP/SIGTERM (terminal
2140
+ // closed, kill) bypass 'exit' handlers by default — without these the mic/player children outlive
2141
+ // the CLI and hold the microphone (verified leak in PTY testing).
2142
+ if (duplex && process.stdin.isTTY) {
2143
+ process.on('exit', () => voiceIO?.stop());
2144
+ for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
2123
2145
  }
2146
+ // /voice toggle: flip the mic on or off without leaving the session (kills STT/TTS children on off).
2147
+ if (duplex && process.stdin.isTTY) toggleVoice = async () => {
2148
+ if (voiceIO) { voiceIO.stop(); voiceIO = undefined; voicePartial = ''; err(dim(' 🔇 voice off\n')); editorRef?.redrawNow(); return; }
2149
+ await startVoice(false);
2150
+ editorRef?.redrawNow();
2151
+ };
2152
+ // Launch with --voice: start now, with the spoken greeting.
2153
+ if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
2124
2154
 
2125
2155
  while (true) {
2126
2156
  // Double-Esc fired during the just-finished turn → open the jump-back picker now (turn has unwound).
@@ -228,6 +228,17 @@ declare class AgentOptions {
228
228
  /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
229
229
  * sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
230
230
  maxContextTokens: number;
231
+ /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
232
+ * a marker telling the model it was cropped (refine the query, or page further). Guards against one
233
+ * Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
234
+ maxToolResultBytes: number;
235
+ /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
236
+ * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
237
+ * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
238
+ capToolResult?: (full: string, info: {
239
+ tool: string;
240
+ args: any;
241
+ }) => string | Promise<string>;
231
242
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
232
243
  skillsDir?: string | string[];
233
244
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { h as RunResult, R as ReasoningEffort } from './Agent-kWrJvtZM.js';
2
+ import { h as RunResult, R as ReasoningEffort } from './Agent-uWtu_WFY.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
5
5
 
package/dist/cli.js CHANGED
@@ -1620,7 +1620,7 @@ var init_tools_shell = __esm({
1620
1620
  // cli/cli.ts
1621
1621
  import { createInterface } from "readline/promises";
1622
1622
  import { existsSync as existsSync8, readFileSync as readFileSync5, appendFileSync, mkdirSync as mkdirSync7, writeFileSync as writeFileSync6, readdirSync as readdirSync2, statSync as statSync3 } from "fs";
1623
- import { homedir as homedir5, tmpdir } from "os";
1623
+ import { homedir as homedir5, tmpdir as tmpdir2 } from "os";
1624
1624
 
1625
1625
  // cli/clipboard.ts
1626
1626
  import { execFileSync } from "child_process";
@@ -2692,6 +2692,14 @@ var AgentOptions = class {
2692
2692
  /** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
2693
2693
  * sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
2694
2694
  maxContextTokens = 0;
2695
+ /** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
2696
+ * a marker telling the model it was cropped (refine the query, or page further). Guards against one
2697
+ * Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
2698
+ maxToolResultBytes = 6e4;
2699
+ /** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
2700
+ * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
2701
+ * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
2702
+ capToolResult;
2695
2703
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
2696
2704
  skillsDir;
2697
2705
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3107,6 +3115,11 @@ var Agent = class _Agent {
3107
3115
  this.ctx.emit = void 0;
3108
3116
  }
3109
3117
  if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
3118
+ const cap = this.options.maxToolResultBytes ?? 0;
3119
+ if (!threw && cap > 0 && result.length > cap) {
3120
+ const info = { tool: tc.function.name, args };
3121
+ result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
3122
+ }
3110
3123
  await hooks?.postToolUse?.(call, result, meta);
3111
3124
  this.options.host?.notify?.({ kind: "tool_result", id: tc.id ?? "", output: result, isError: threw });
3112
3125
  if (images?.length) {
@@ -3160,6 +3173,15 @@ function estimateTokens(m) {
3160
3173
  for (const x of m) chars += contentText(x.content).length + (x.tool_calls ? JSON.stringify(x.tool_calls).length : 0);
3161
3174
  return Math.ceil(chars / 4);
3162
3175
  }
3176
+ function cropResult(result, cap) {
3177
+ const head = result.slice(0, cap);
3178
+ const nl = head.lastIndexOf("\n");
3179
+ const page = nl > cap * 0.5 ? head.slice(0, nl) : head;
3180
+ const omitted = result.length - page.length;
3181
+ return `${page}
3182
+
3183
+ [output cropped \u2014 showing ${page.length} of ${result.length} bytes; ${omitted} omitted. This is page 1. Refine your query/command to narrow it, or call the tool again with a tighter scope to see more.]`;
3184
+ }
3163
3185
  function stubOldToolResults(messages, keep) {
3164
3186
  const meta = /* @__PURE__ */ new Map();
3165
3187
  for (const msg of messages)
@@ -3533,6 +3555,34 @@ To pull a specific detail, Grep/Read ${path}, or call Ask({ question: "\u2026",
3533
3555
  captureAll(tools) {
3534
3556
  return tools.map((t) => this.capture(t));
3535
3557
  }
3558
+ /**
3559
+ * Spill an oversized tool result to a scratch file and return PAGE 1 + a recoverable, paginated stub.
3560
+ * Drop-in for `Agent.capToolResult`: the agent sees usable content immediately and knows how to get
3561
+ * the rest (refine the query, Read the file in pages with offset/limit, or Ask to extract specifics).
3562
+ * Lossless — unlike a plain crop, the full output stays available on the scratch FS.
3563
+ */
3564
+ async spill(full, info, pageBytes = 8e3) {
3565
+ const { dir } = this.options;
3566
+ const id = "a" + ++this.seq;
3567
+ const path = `${dir}/${id}-${slug(info.tool)}.txt`;
3568
+ const header = `# ${info.tool}(${shortArgs(info.args)}) \u2014 ${full.length} bytes
3569
+ `;
3570
+ try {
3571
+ await (this.dirReady ??= mkdirp(this.fs, dir));
3572
+ await this.fs.writeFile(path, header + full);
3573
+ } catch (e) {
3574
+ log4.debug("scratch spill failed; cropping lossy", e);
3575
+ return full.slice(0, pageBytes) + `
3576
+
3577
+ [output cropped to ${pageBytes} of ${full.length} bytes; full output unavailable (scratch write failed) \u2014 refine your query]`;
3578
+ }
3579
+ const head = full.slice(0, pageBytes);
3580
+ const nl = head.lastIndexOf("\n");
3581
+ const page = nl > pageBytes * 0.5 ? head.slice(0, nl) : head;
3582
+ return `${page}
3583
+
3584
+ [output cropped \u2014 page 1 (${page.length} of ${full.length} bytes). Full output saved to ${path}. To see more: refine your query/command to narrow it, or Read ${path} with offset/limit to page through it, or Ask({ question: "\u2026", over: "${path}" }) to extract specifics.]`;
3585
+ }
3536
3586
  };
3537
3587
  var ASK_PROMPT = "You are a retrieval-extraction step with Read, Grep and Glob over a scratch filesystem holding raw outputs from earlier tools. Find the information that answers the question and return it concisely, quoting values/facts verbatim. Do NOT add analysis or anything not grounded in the files. If the answer is not present, say so plainly.";
3538
3588
  function makeAskTool(o) {
@@ -3666,7 +3716,10 @@ var DuplexAgentOptions = class {
3666
3716
  ai;
3667
3717
  /** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3668
3718
  fs;
3669
- reflexModel = "groq/openai/gpt-oss-20b";
3719
+ // The reflex IS the voice. 120b (not 20b) for channel discipline + instruction-following: the 20b
3720
+ // mislabels gpt-oss harmony channels under load, leaking raw analysis into the spoken `final` channel
3721
+ // (and misfiring Hold). 120b is the same price tier (~$0.15/$0.60) — the quality/cost trade is free.
3722
+ reflexModel = "groq/openai/gpt-oss-120b";
3670
3723
  actModel = "anthropic/claude-sonnet-4-6";
3671
3724
  /** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
3672
3725
  thinkModel = "anthropic/claude-opus-4-8";
@@ -3753,7 +3806,12 @@ var DuplexAgent = class {
3753
3806
  const canSearch = workerToolNames.some((n) => /WebSearch/i.test(n));
3754
3807
  const canFetch = workerToolNames.some((n) => /WebFetch/i.test(n));
3755
3808
  const workerWeb = canSearch ? `, and it CAN search the web and read web pages \u2014 so when the user gives you something specific to look up ("search for X", "find me\u2026", "what's the latest on\u2026"), route it to Act. But a bare capability QUESTION like "can you search the web?" just gets a short spoken "yes, I can" \u2014 do NOT dispatch and NEVER invent a query the user did not give you` : canFetch ? ", and it can fetch a specific web page URL (but cannot search the web)" : "";
3756
- const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3809
+ const mcpNames = [
3810
+ ...Object.keys(o.actOptions?.providerOptions?.mcpServers ?? {}),
3811
+ ...new Set(workerToolNames.filter((n) => n.startsWith("mcp__")).map((n) => n.slice(5).split("__")[0]))
3812
+ ];
3813
+ const workerMcp = mcpNames.length ? `, and it can use these MCP servers: ${[...new Set(mcpNames)].join(", ")}` + (mcpNames.some((n) => /browser/i.test(n)) ? ' \u2014 including driving a REAL browser (open tabs, navigate, click, screenshot), so answer "yes" if asked whether you can control/drive a browser and route an actual browse to Act' : "") : "";
3814
+ const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb + workerMcp) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
3757
3815
  Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
3758
3816
  const tools = [
3759
3817
  ...o.reflexOptions?.tools ?? [],
@@ -4188,8 +4246,10 @@ Another agent just implemented the above. Independently check the CURRENT state
4188
4246
  case "capabilities": {
4189
4247
  const actTools = this.options.actOptions?.tools ?? [];
4190
4248
  const names = actTools.map((t) => t.name);
4249
+ const mcpServers = Object.keys(this.options.actOptions?.providerOptions?.mcpServers ?? {});
4250
+ const mcpNote = mcpServers.length ? ` Plus MCP servers your worker can use: ${mcpServers.join(", ")} (e.g. browser-bridge \u2192 drive a real browser: open tabs, navigate, click, screenshot).` : "";
4191
4251
  if (!names.length)
4192
- return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so.";
4252
+ return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so." + mcpNote;
4193
4253
  const hasFetch = names.some((n) => /WebFetch/i.test(n));
4194
4254
  const hasBrowser = names.some((n) => /browser.*(navigate|click|page|type)/i.test(n));
4195
4255
  const hasSearch = names.some((n) => /(^|_)WebSearch$|search/i.test(n) && !/WebFetch|browser/i.test(n));
@@ -4198,7 +4258,7 @@ Another agent just implemented the above. Independently check the CURRENT state
4198
4258
  if (hasBrowser) notes.push("The browser tools drive a real browser: you CAN open a site and, if needed, navigate to a search engine and search there \u2014 but it is manual and takes a moment, not an instant lookup.");
4199
4259
  else if (!hasSearch && hasFetch) notes.push('You have no general web-search tool, so for an instant "search the web" you can only fetch a URL they provide.');
4200
4260
  const webNote = notes.length ? " NOTE: " + notes.join(" ") : "";
4201
- return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote;
4261
+ return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote + mcpNote;
4202
4262
  }
4203
4263
  case "time":
4204
4264
  return (/* @__PURE__ */ new Date()).toString();
@@ -5540,7 +5600,7 @@ function defaultOpenBrowser(url) {
5540
5600
  import { randomUUID } from "crypto";
5541
5601
  import { resolve, basename, join as join3 } from "path";
5542
5602
  import { existsSync as existsSync2, mkdirSync as mkdirSync2 } from "fs";
5543
- import { platform, arch, release, userInfo, homedir } from "os";
5603
+ import { platform, arch, release, userInfo, homedir, tmpdir } from "os";
5544
5604
  init_tools_shell();
5545
5605
  import { BodDB as BodDB2 } from "@bod.ee/db";
5546
5606
  var DEFAULT_TOOLS = ["bash", "Read", "Edit", "Write", "Grep", "Glob", "MultiEdit", "ApplyEdits", "RepoMap", "TodoWrite"];
@@ -5678,7 +5738,7 @@ Reference files in them by their mount path (the left side).`;
5678
5738
  const jobs = new ShellJobRegistry({ cwd, killOnExit: true });
5679
5739
  realShell = [makeRealShellTool({ cwd, registry: jobs }), ...makeShellJobTools(jobs)];
5680
5740
  }
5681
- const scratchDir = o.scratch ? o.scratchDir ?? `${cwd}/.agent/scratch` : void 0;
5741
+ const scratchDir = o.scratch ? o.scratchDir ?? (virtual ? `${cwd}/.agent/scratch` : `${tmpdir()}/agentx-scratch-${process.pid}`) : void 0;
5682
5742
  const scratch = scratchDir ? new Scratch(fs, { dir: scratchDir }) : void 0;
5683
5743
  return new Agent({
5684
5744
  ai: o.ai,
@@ -5723,13 +5783,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
5723
5783
  return { systemPrompt: basePrompt + "\n\n" + extra };
5724
5784
  })(),
5725
5785
  tools: (() => {
5726
- let base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
5786
+ const base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
5727
5787
  const tail = [...o.extraTools ?? []];
5728
- if (scratch) {
5729
- const CAPTURE2 = /* @__PURE__ */ new Set(["WebSearch", "WebFetch"]);
5730
- base = base.map((t) => CAPTURE2.has(t.name) ? scratch.capture(t) : t);
5731
- tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
5732
- }
5788
+ if (scratch) tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
5733
5789
  if (!realShell.length) return [...base, ...tail];
5734
5790
  const filtered = base.filter((t) => t.name !== "bash");
5735
5791
  return [...filtered, ...realShell, ...tail];
@@ -5742,6 +5798,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
5742
5798
  planMode: o.planMode ?? false,
5743
5799
  permissions: o.permissions,
5744
5800
  subagents: o.subagents ?? false,
5801
+ // When scratch is on, an oversized tool result spills to a scratch file + recoverable paginated stub
5802
+ // (lossless). Without scratch, the Agent's default crop (lossy) still guards the context window.
5803
+ ...scratch ? { capToolResult: (full, info) => scratch.spill(full, info) } : {},
5745
5804
  backgroundJobs: o.backgroundJobs ?? virtual,
5746
5805
  // default ON in virtual modes (no real shell there); disk uses ShellJobRegistry
5747
5806
  skillsDir: dots("skills"),
@@ -7939,7 +7998,7 @@ function parseReasoning(raw) {
7939
7998
  throw new Error(`invalid --reasoning: ${raw} (use off|low|medium|high or a token budget)`);
7940
7999
  }
7941
8000
  function parseArgs(argv) {
7942
- const a = { stream: true, plan: false, ask: false, yes: false, vfs: false, shell: void 0, seed: false, subagents: false, help: false, version: false, cont: false, outputFormat: "text", duplex: false, voice: false };
8001
+ const a = { stream: true, plan: false, ask: false, yes: false, vfs: false, shell: void 0, seed: false, subagents: false, help: false, version: false, cont: false, outputFormat: "text", duplex: false, voice: false, scratch: true };
7943
8002
  const rest = [];
7944
8003
  const val = (i, flag) => {
7945
8004
  const v = argv[i];
@@ -7967,6 +8026,7 @@ function parseArgs(argv) {
7967
8026
  else if (x === "--yes" || x === "-y") a.yes = true;
7968
8027
  else if (x === "--vfs" || x === "--sandbox") a.vfs = true;
7969
8028
  else if (x === "--scratch") a.scratch = true;
8029
+ else if (x === "--no-scratch") a.scratch = false;
7970
8030
  else if (x === "--boddb") a.boddb = val(++i, x);
7971
8031
  else if (x === "--seed") a.seed = true;
7972
8032
  else if (x === "--shell") a.shell = true;
@@ -8020,7 +8080,8 @@ Flags:
8020
8080
  --no-stream disable token streaming
8021
8081
  (default: disk mode \u2014 full real filesystem access, like Claude Code)
8022
8082
  --vfs, --sandbox sandbox mode: work over an in-memory copy of cwd \u2014 real disk is NEVER modified
8023
- --scratch spill big web outputs to scratch files (kept out of context; peek via Grep/Ask)
8083
+ --no-scratch disable scratch (on by default): paginate oversized tool output \u2192 recoverable
8084
+ scratch files (peek via Grep/Read/Ask) instead of a lossy crop
8024
8085
  --boddb <dir> database-backed workspace: files live in a persistent bod-db store at <dir>,
8025
8086
  surviving across runs \u2014 real disk is NEVER modified (DB-native; add --seed below)
8026
8087
  --seed with --boddb: hydrate the store from cwd on the first run (empty DB) only
@@ -8038,7 +8099,7 @@ Flags:
8038
8099
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
8039
8100
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
8040
8101
  spoken replies out (echo-cancelled; speak over it to interrupt)
8041
- --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
8102
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
8042
8103
  --think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
8043
8104
  --no-think with --duplex: disable the Think tier (Act handles everything)
8044
8105
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
@@ -8070,7 +8131,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
8070
8131
  Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
8071
8132
 
8072
8133
  REPL shortcuts: !<cmd> runs a shell command inline \xB7 #<note> saves a memory \xB7 @path inlines a file
8073
- REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
8134
+ REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
8074
8135
  REPL completion: type / (commands+skills) or @ (files) for a LIVE menu \u2014 \u2191/\u2193 select, \u23CE/Tab accept, Esc dismiss.
8075
8136
  REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
8076
8137
  REPL shortcuts: Shift+Tab cycles permission posture (ask \u2192 accept-edits \u2192 plan) \xB7 Alt+T toggles reasoning \xB7 Alt+P switches model \xB7 Ctrl+O toggles verbose tool output \xB7 \u2192 or Tab accepts the dim history ghost-suggestion \xB7 Alt+S/Ctrl+S stash/unstash.
@@ -8808,6 +8869,7 @@ async function repl(args, ai, cfg, cwd) {
8808
8869
  const duplex = args.duplex;
8809
8870
  let dx;
8810
8871
  let voiceIO;
8872
+ let toggleVoice;
8811
8873
  let editorRef;
8812
8874
  let repaintStash = () => {
8813
8875
  };
@@ -9026,7 +9088,7 @@ async function repl(args, ai, cfg, cwd) {
9026
9088
  };
9027
9089
  const pendingImages = [];
9028
9090
  const grabClipboardAttachment = () => {
9029
- const dir = join9(tmpdir(), "agentx-pasted");
9091
+ const dir = join9(tmpdir2(), "agentx-pasted");
9030
9092
  try {
9031
9093
  mkdirSync7(dir, { recursive: true });
9032
9094
  } catch {
@@ -9415,6 +9477,15 @@ ${extra}` : body);
9415
9477
  err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)
9416
9478
  `));
9417
9479
  }
9480
+ }, voice: {
9481
+ desc: "toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)",
9482
+ run: async () => {
9483
+ if (!toggleVoice) {
9484
+ err(dim(" (voice needs --duplex on a TTY)\n"));
9485
+ return;
9486
+ }
9487
+ await toggleVoice();
9488
+ }
9418
9489
  }, "voice-model": {
9419
9490
  desc: "switch the reflex (voice) model \u2014 /voice-model <id>, or alone for a picker",
9420
9491
  run: async (a) => {
@@ -10059,67 +10130,91 @@ ${extra}` : body);
10059
10130
  };
10060
10131
  let voicePartial = "";
10061
10132
  let partialRedraw = null;
10062
- if (args.voice && duplex && process.stdin.isTTY) {
10133
+ const startVoice = async (greet) => {
10134
+ if (voiceIO) return true;
10135
+ if (!duplex || !process.stdin.isTTY) {
10136
+ err(dim(" (voice needs --duplex on a TTY)\n"));
10137
+ return false;
10138
+ }
10063
10139
  if (!VoiceIO.available()) {
10064
10140
  err(dim(" (voice I/O off \u2014 set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n"));
10065
- } else {
10066
- voiceIO = new VoiceIO({
10067
- // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
10068
- // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
10069
- // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
10070
- onState: () => editorRef?.redrawNow(),
10071
- // Throttled: each redraw clears the screen below the prompt — a partial-per-token storm
10072
- // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
10073
- onPartial: (text) => {
10074
- if (text === voicePartial) return;
10075
- voicePartial = text;
10076
- if (!partialRedraw) partialRedraw = setTimeout(() => {
10077
- partialRedraw = null;
10078
- editorRef?.redrawNow();
10079
- }, 250);
10080
- },
10081
- onBargeIn: (phase) => {
10082
- activeTurn?.abort();
10083
- if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
10084
- },
10085
- onUtterance: (text) => {
10086
- voicePartial = "";
10087
- if (!text.trim()) return;
10088
- const cut = voiceIO.takeInterruptedReply();
10089
- const note = cut && cut.full.length - cut.heard.length > 40 ? `
10141
+ return false;
10142
+ }
10143
+ voiceIO = new VoiceIO({
10144
+ // No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
10145
+ // need masking (~0.7-1.2s full turns), and the conversational register already opens with a
10146
+ // natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
10147
+ onState: () => editorRef?.redrawNow(),
10148
+ // Throttled: each redraw clears the screen below the prompt a partial-per-token storm
10149
+ // (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
10150
+ onPartial: (text) => {
10151
+ if (text === voicePartial) return;
10152
+ voicePartial = text;
10153
+ if (!partialRedraw) partialRedraw = setTimeout(() => {
10154
+ partialRedraw = null;
10155
+ editorRef?.redrawNow();
10156
+ }, 250);
10157
+ },
10158
+ onBargeIn: (phase) => {
10159
+ activeTurn?.abort();
10160
+ if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
10161
+ },
10162
+ onUtterance: (text) => {
10163
+ voicePartial = "";
10164
+ if (!text.trim()) return;
10165
+ const cut = voiceIO.takeInterruptedReply();
10166
+ const note = cut && cut.full.length - cut.heard.length > 40 ? `
10090
10167
  [the user interrupted you mid-speech \u2014 they only heard up to: "\u2026${cut.heard.slice(-80)}". Work any unheard essentials into your reply naturally, only if still relevant.]` : "";
10091
- if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
10092
- err(`\r\x1B[K ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
10168
+ if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
10169
+ err(`\r\x1B[K ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
10093
10170
  `);
10094
- void dispatchLine(text + note).then(async (r) => {
10095
- if (r === "quit") {
10096
- await voiceIO?.awaitIdle();
10097
- editorRef?.abort();
10098
- }
10099
- }).finally(() => editorRef?.redrawNow());
10100
- }
10101
- });
10102
- try {
10103
- await voiceIO.start();
10104
- process.on("exit", () => voiceIO?.stop());
10105
- for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
10106
- voiceIO?.stop();
10107
- process.exit(0);
10108
- });
10109
- err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
10171
+ void dispatchLine(text + note).then(async (r) => {
10172
+ if (r === "quit") {
10173
+ await voiceIO?.awaitIdle();
10174
+ editorRef?.abort();
10175
+ }
10176
+ }).finally(() => editorRef?.redrawNow());
10177
+ }
10178
+ });
10179
+ try {
10180
+ await voiceIO.start();
10181
+ err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
10110
10182
  `));
10183
+ if (greet) {
10111
10184
  const where = cwd.split("/").pop();
10112
10185
  const resumed = session.messages.length > 0;
10113
10186
  void turn(
10114
10187
  `[session started] First call QuickLook with what:"memory" \u2014 if it knows the user's name or preferences, use them. Then greet the user warmly in one or two short sentences, as the opener of a live voice conversation. Context: working directory "${where}"${resumed ? "; this resumes an earlier conversation \u2014 glance at it and pick up naturally" : ""}. Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`
10115
10188
  ).finally(() => editorRef?.redrawNow());
10116
- } catch (e) {
10117
- err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
10118
- `));
10119
- voiceIO = void 0;
10120
10189
  }
10190
+ return true;
10191
+ } catch (e) {
10192
+ err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
10193
+ `));
10194
+ voiceIO = void 0;
10195
+ return false;
10121
10196
  }
10197
+ };
10198
+ if (duplex && process.stdin.isTTY) {
10199
+ process.on("exit", () => voiceIO?.stop());
10200
+ for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
10201
+ voiceIO?.stop();
10202
+ process.exit(0);
10203
+ });
10122
10204
  }
10205
+ if (duplex && process.stdin.isTTY) toggleVoice = async () => {
10206
+ if (voiceIO) {
10207
+ voiceIO.stop();
10208
+ voiceIO = void 0;
10209
+ voicePartial = "";
10210
+ err(dim(" \u{1F507} voice off\n"));
10211
+ editorRef?.redrawNow();
10212
+ return;
10213
+ }
10214
+ await startVoice(false);
10215
+ editorRef?.redrawNow();
10216
+ };
10217
+ if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
10123
10218
  while (true) {
10124
10219
  if (pendingRewind) {
10125
10220
  pendingRewind = false;