agent.libx.js 0.93.30 → 0.93.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/cli.ts +44 -16
- package/dist/{Agent-kWrJvtZM.d.ts → Agent-uWtu_WFY.d.ts} +11 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +153 -60
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +12 -2
- package/dist/index.js +64 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/cli/cli.ts
CHANGED
|
@@ -213,7 +213,7 @@ Flags:
|
|
|
213
213
|
impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
|
|
214
214
|
with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
|
|
215
215
|
spoken replies out (echo-cancelled; speak over it to interrupt)
|
|
216
|
-
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-
|
|
216
|
+
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
|
|
217
217
|
--think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
|
|
218
218
|
--no-think with --duplex: disable the Think tier (Act handles everything)
|
|
219
219
|
--add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
|
|
@@ -245,7 +245,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
|
|
|
245
245
|
Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
|
|
246
246
|
|
|
247
247
|
REPL shortcuts: !<cmd> runs a shell command inline · #<note> saves a memory · @path inlines a file
|
|
248
|
-
REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
|
|
248
|
+
REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
|
|
249
249
|
REPL completion: type / (commands+skills) or @ (files) for a LIVE menu — ↑/↓ select, ⏎/Tab accept, Esc dismiss.
|
|
250
250
|
REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
|
|
251
251
|
REPL shortcuts: Shift+Tab cycles permission posture (ask → accept-edits → plan) · Alt+T toggles reasoning · Alt+P switches model · Ctrl+O toggles verbose tool output · → or Tab accepts the dim history ghost-suggestion · Alt+S/Ctrl+S stash/unstash.
|
|
@@ -1047,6 +1047,7 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
|
|
|
1047
1047
|
const duplex = args.duplex;
|
|
1048
1048
|
let dx: DuplexAgent | undefined;
|
|
1049
1049
|
let voiceIO: VoiceIO | undefined; // real voice I/O (--voice + keys): mic→STT in, text_delta→TTS out
|
|
1050
|
+
let toggleVoice: (() => Promise<void>) | undefined; // bound below (duplex + TTY): /voice flips mic on/off live
|
|
1050
1051
|
let editorRef: LineEditor | undefined; // bound once the line editor exists — async chrome repaints the prompt via it
|
|
1051
1052
|
// During a turn the user's type-ahead lives on a "stash ›" line (no active editor to own it). Async
|
|
1052
1053
|
// chrome (streamed deltas, task events) lands on top of it — repaint the stash below, so it survives.
|
|
@@ -1618,6 +1619,12 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
|
|
|
1618
1619
|
if (a[0] === 'full' || a[0] === 'minimal') { workerChrome = a[0]; err(green(` ✓ worker chrome → ${a[0]}\n`)); return; }
|
|
1619
1620
|
err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)\n`));
|
|
1620
1621
|
},
|
|
1622
|
+
}, voice: {
|
|
1623
|
+
desc: 'toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)',
|
|
1624
|
+
run: async () => {
|
|
1625
|
+
if (!toggleVoice) { err(dim(' (voice needs --duplex on a TTY)\n')); return; }
|
|
1626
|
+
await toggleVoice();
|
|
1627
|
+
},
|
|
1621
1628
|
}, 'voice-model': {
|
|
1622
1629
|
desc: 'switch the reflex (voice) model — /voice-model <id>, or alone for a picker',
|
|
1623
1630
|
run: async (a: string[]) => {
|
|
@@ -2065,11 +2072,17 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
|
|
|
2065
2072
|
// spoken via the host tap above. Missing keys → conversational text mode, one-line note.
|
|
2066
2073
|
let voicePartial = ''; // live partial transcript, rendered in the prompt footer
|
|
2067
2074
|
let partialRedraw: ReturnType<typeof setTimeout> | null = null;
|
|
2068
|
-
|
|
2075
|
+
// Spin VoiceIO up live (launch with --voice, or /voice mid-session). `greet` opens with a spoken
|
|
2076
|
+
// greeting turn (launch only); a manual toggle just turns the mic on quietly. Returns true if voice
|
|
2077
|
+
// is now live. Duplex + TTY only — bound to `toggleVoice` below so /voice can flip it off again.
|
|
2078
|
+
const startVoice = async (greet: boolean): Promise<boolean> => {
|
|
2079
|
+
if (voiceIO) return true;
|
|
2080
|
+
if (!duplex || !process.stdin.isTTY) { err(dim(' (voice needs --duplex on a TTY)\n')); return false; }
|
|
2069
2081
|
if (!VoiceIO.available()) {
|
|
2070
2082
|
err(dim(' (voice I/O off — set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n'));
|
|
2071
|
-
|
|
2072
|
-
|
|
2083
|
+
return false;
|
|
2084
|
+
}
|
|
2085
|
+
voiceIO = new VoiceIO({
|
|
2073
2086
|
// No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
|
|
2074
2087
|
// need masking (~0.7-1.2s full turns), and the conversational register already opens with a
|
|
2075
2088
|
// natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
|
|
@@ -2097,14 +2110,11 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
|
|
|
2097
2110
|
err(`\r\x1b[K ${bold(cyan('🎤 ›'))} ${text}\n`);
|
|
2098
2111
|
void dispatchLine(text + note).then(async (r) => { if (r === 'quit') { await voiceIO?.awaitIdle(); editorRef?.abort(); } }).finally(() => editorRef?.redrawNow());
|
|
2099
2112
|
},
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
// mic/player children outlive the CLI and hold the microphone (verified leak in PTY testing).
|
|
2106
|
-
for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
|
|
2107
|
-
err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
|
|
2113
|
+
});
|
|
2114
|
+
try {
|
|
2115
|
+
await voiceIO.start();
|
|
2116
|
+
err(dim(` 🎤 voice on (${voiceIO.usingAec ? 'echo-cancelled' : 'heuristic echo — headphones recommended'}) — just talk; speak over it to interrupt\n`));
|
|
2117
|
+
if (greet) {
|
|
2108
2118
|
// Greeting: the agent makes the first turn — spoken, personalized from what it can see.
|
|
2109
2119
|
// Straight to turn() (not dispatchLine): the synthetic prompt must not enter ↑-history.
|
|
2110
2120
|
const where = cwd.split('/').pop();
|
|
@@ -2115,12 +2125,30 @@ async function repl(args: Args, ai: ChatLike, cfg: Partial<AgentConfig>, cwd: st
|
|
|
2115
2125
|
`Context: working directory "${where}"${resumed ? '; this resumes an earlier conversation — glance at it and pick up naturally' : ''}. ` +
|
|
2116
2126
|
`Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`,
|
|
2117
2127
|
).finally(() => editorRef?.redrawNow());
|
|
2118
|
-
} catch (e: any) {
|
|
2119
|
-
err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
|
|
2120
|
-
voiceIO = undefined;
|
|
2121
2128
|
}
|
|
2129
|
+
return true;
|
|
2130
|
+
} catch (e: any) {
|
|
2131
|
+
err(yellow(` ⚠ voice I/O failed to start: ${e?.message ?? e} — continuing text-only\n`));
|
|
2132
|
+
voiceIO = undefined;
|
|
2133
|
+
return false;
|
|
2122
2134
|
}
|
|
2135
|
+
};
|
|
2136
|
+
// Child cleanup, registered ONCE (not per start — toggling on/off must not stack listeners). They
|
|
2137
|
+
// close over the live `voiceIO`, so they cover whichever instance is up. SIGHUP/SIGTERM (terminal
|
|
2138
|
+
// closed, kill) bypass 'exit' handlers by default — without these the mic/player children outlive
|
|
2139
|
+
// the CLI and hold the microphone (verified leak in PTY testing).
|
|
2140
|
+
if (duplex && process.stdin.isTTY) {
|
|
2141
|
+
process.on('exit', () => voiceIO?.stop());
|
|
2142
|
+
for (const sig of ['SIGHUP', 'SIGTERM'] as const) process.on(sig, () => { voiceIO?.stop(); process.exit(0); });
|
|
2123
2143
|
}
|
|
2144
|
+
// /voice toggle: flip the mic on or off without leaving the session (kills STT/TTS children on off).
|
|
2145
|
+
if (duplex && process.stdin.isTTY) toggleVoice = async () => {
|
|
2146
|
+
if (voiceIO) { voiceIO.stop(); voiceIO = undefined; voicePartial = ''; err(dim(' 🔇 voice off\n')); editorRef?.redrawNow(); return; }
|
|
2147
|
+
await startVoice(false);
|
|
2148
|
+
editorRef?.redrawNow();
|
|
2149
|
+
};
|
|
2150
|
+
// Launch with --voice: start now, with the spoken greeting.
|
|
2151
|
+
if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
|
|
2124
2152
|
|
|
2125
2153
|
while (true) {
|
|
2126
2154
|
// Double-Esc fired during the just-finished turn → open the jump-back picker now (turn has unwound).
|
|
@@ -228,6 +228,17 @@ declare class AgentOptions {
|
|
|
228
228
|
/** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
|
|
229
229
|
* sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
|
|
230
230
|
maxContextTokens: number;
|
|
231
|
+
/** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
|
|
232
|
+
* a marker telling the model it was cropped (refine the query, or page further). Guards against one
|
|
233
|
+
* Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
|
|
234
|
+
maxToolResultBytes: number;
|
|
235
|
+
/** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
|
|
236
|
+
* and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
|
|
237
|
+
* paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
|
|
238
|
+
capToolResult?: (full: string, info: {
|
|
239
|
+
tool: string;
|
|
240
|
+
args: any;
|
|
241
|
+
}) => string | Promise<string>;
|
|
231
242
|
/** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
|
|
232
243
|
skillsDir?: string | string[];
|
|
233
244
|
/** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
|
package/dist/cli.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
|
-
import { h as RunResult, R as ReasoningEffort } from './Agent-
|
|
2
|
+
import { h as RunResult, R as ReasoningEffort } from './Agent-uWtu_WFY.js';
|
|
3
3
|
import { IFilesystem } from '@livx.cc/wcli/core';
|
|
4
4
|
import { M as Message, c as ContentPart } from './tools-GPWp7oXq.js';
|
|
5
5
|
|
package/dist/cli.js
CHANGED
|
@@ -2692,6 +2692,14 @@ var AgentOptions = class {
|
|
|
2692
2692
|
/** Token-aware backstop (~4 chars/token estimate). After note-taking, drop oldest messages from the
|
|
2693
2693
|
* sent context until the estimate is under this ceiling (pairing-safe). 0 = off. */
|
|
2694
2694
|
maxContextTokens = 0;
|
|
2695
|
+
/** Pagination ceiling for a SINGLE tool result (bytes). A result over this is cropped to page 1 with
|
|
2696
|
+
* a marker telling the model it was cropped (refine the query, or page further). Guards against one
|
|
2697
|
+
* Grep/Read/MCP call blowing the whole context window. 0 = off. Default 60k (~15k tokens). */
|
|
2698
|
+
maxToolResultBytes = 6e4;
|
|
2699
|
+
/** Hook to handle an oversized tool result instead of the default lossy crop: receives the FULL output
|
|
2700
|
+
* and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
|
|
2701
|
+
* paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
|
|
2702
|
+
capToolResult;
|
|
2695
2703
|
/** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
|
|
2696
2704
|
skillsDir;
|
|
2697
2705
|
/** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
|
|
@@ -3107,6 +3115,11 @@ var Agent = class _Agent {
|
|
|
3107
3115
|
this.ctx.emit = void 0;
|
|
3108
3116
|
}
|
|
3109
3117
|
if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
|
|
3118
|
+
const cap = this.options.maxToolResultBytes ?? 0;
|
|
3119
|
+
if (!threw && cap > 0 && result.length > cap) {
|
|
3120
|
+
const info = { tool: tc.function.name, args };
|
|
3121
|
+
result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
|
|
3122
|
+
}
|
|
3110
3123
|
await hooks?.postToolUse?.(call, result, meta);
|
|
3111
3124
|
this.options.host?.notify?.({ kind: "tool_result", id: tc.id ?? "", output: result, isError: threw });
|
|
3112
3125
|
if (images?.length) {
|
|
@@ -3160,6 +3173,15 @@ function estimateTokens(m) {
|
|
|
3160
3173
|
for (const x of m) chars += contentText(x.content).length + (x.tool_calls ? JSON.stringify(x.tool_calls).length : 0);
|
|
3161
3174
|
return Math.ceil(chars / 4);
|
|
3162
3175
|
}
|
|
3176
|
+
function cropResult(result, cap) {
|
|
3177
|
+
const head = result.slice(0, cap);
|
|
3178
|
+
const nl = head.lastIndexOf("\n");
|
|
3179
|
+
const page = nl > cap * 0.5 ? head.slice(0, nl) : head;
|
|
3180
|
+
const omitted = result.length - page.length;
|
|
3181
|
+
return `${page}
|
|
3182
|
+
|
|
3183
|
+
[output cropped \u2014 showing ${page.length} of ${result.length} bytes; ${omitted} omitted. This is page 1. Refine your query/command to narrow it, or call the tool again with a tighter scope to see more.]`;
|
|
3184
|
+
}
|
|
3163
3185
|
function stubOldToolResults(messages, keep) {
|
|
3164
3186
|
const meta = /* @__PURE__ */ new Map();
|
|
3165
3187
|
for (const msg of messages)
|
|
@@ -3533,6 +3555,34 @@ To pull a specific detail, Grep/Read ${path}, or call Ask({ question: "\u2026",
|
|
|
3533
3555
|
captureAll(tools) {
|
|
3534
3556
|
return tools.map((t) => this.capture(t));
|
|
3535
3557
|
}
|
|
3558
|
+
/**
|
|
3559
|
+
* Spill an oversized tool result to a scratch file and return PAGE 1 + a recoverable, paginated stub.
|
|
3560
|
+
* Drop-in for `Agent.capToolResult`: the agent sees usable content immediately and knows how to get
|
|
3561
|
+
* the rest (refine the query, Read the file in pages with offset/limit, or Ask to extract specifics).
|
|
3562
|
+
* Lossless — unlike a plain crop, the full output stays available on the scratch FS.
|
|
3563
|
+
*/
|
|
3564
|
+
async spill(full, info, pageBytes = 8e3) {
|
|
3565
|
+
const { dir } = this.options;
|
|
3566
|
+
const id = "a" + ++this.seq;
|
|
3567
|
+
const path = `${dir}/${id}-${slug(info.tool)}.txt`;
|
|
3568
|
+
const header = `# ${info.tool}(${shortArgs(info.args)}) \u2014 ${full.length} bytes
|
|
3569
|
+
`;
|
|
3570
|
+
try {
|
|
3571
|
+
await (this.dirReady ??= mkdirp(this.fs, dir));
|
|
3572
|
+
await this.fs.writeFile(path, header + full);
|
|
3573
|
+
} catch (e) {
|
|
3574
|
+
log4.debug("scratch spill failed; cropping lossy", e);
|
|
3575
|
+
return full.slice(0, pageBytes) + `
|
|
3576
|
+
|
|
3577
|
+
[output cropped to ${pageBytes} of ${full.length} bytes; full output unavailable (scratch write failed) \u2014 refine your query]`;
|
|
3578
|
+
}
|
|
3579
|
+
const head = full.slice(0, pageBytes);
|
|
3580
|
+
const nl = head.lastIndexOf("\n");
|
|
3581
|
+
const page = nl > pageBytes * 0.5 ? head.slice(0, nl) : head;
|
|
3582
|
+
return `${page}
|
|
3583
|
+
|
|
3584
|
+
[output cropped \u2014 page 1 (${page.length} of ${full.length} bytes). Full output saved to ${path}. To see more: refine your query/command to narrow it, or Read ${path} with offset/limit to page through it, or Ask({ question: "\u2026", over: "${path}" }) to extract specifics.]`;
|
|
3585
|
+
}
|
|
3536
3586
|
};
|
|
3537
3587
|
var ASK_PROMPT = "You are a retrieval-extraction step with Read, Grep and Glob over a scratch filesystem holding raw outputs from earlier tools. Find the information that answers the question and return it concisely, quoting values/facts verbatim. Do NOT add analysis or anything not grounded in the files. If the answer is not present, say so plainly.";
|
|
3538
3588
|
function makeAskTool(o) {
|
|
@@ -3666,7 +3716,10 @@ var DuplexAgentOptions = class {
|
|
|
3666
3716
|
ai;
|
|
3667
3717
|
/** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
|
|
3668
3718
|
fs;
|
|
3669
|
-
|
|
3719
|
+
// The reflex IS the voice. 120b (not 20b) for channel discipline + instruction-following: the 20b
|
|
3720
|
+
// mislabels gpt-oss harmony channels under load, leaking raw analysis into the spoken `final` channel
|
|
3721
|
+
// (and misfiring Hold). 120b is the same price tier (~$0.15/$0.60) — the quality/cost trade is free.
|
|
3722
|
+
reflexModel = "groq/openai/gpt-oss-120b";
|
|
3670
3723
|
actModel = "anthropic/claude-sonnet-4-6";
|
|
3671
3724
|
/** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
|
|
3672
3725
|
thinkModel = "anthropic/claude-opus-4-8";
|
|
@@ -3753,7 +3806,12 @@ var DuplexAgent = class {
|
|
|
3753
3806
|
const canSearch = workerToolNames.some((n) => /WebSearch/i.test(n));
|
|
3754
3807
|
const canFetch = workerToolNames.some((n) => /WebFetch/i.test(n));
|
|
3755
3808
|
const workerWeb = canSearch ? `, and it CAN search the web and read web pages \u2014 so when the user gives you something specific to look up ("search for X", "find me\u2026", "what's the latest on\u2026"), route it to Act. But a bare capability QUESTION like "can you search the web?" just gets a short spoken "yes, I can" \u2014 do NOT dispatch and NEVER invent a query the user did not give you` : canFetch ? ", and it can fetch a specific web page URL (but cannot search the web)" : "";
|
|
3756
|
-
const
|
|
3809
|
+
const mcpNames = [
|
|
3810
|
+
...Object.keys(o.actOptions?.providerOptions?.mcpServers ?? {}),
|
|
3811
|
+
...new Set(workerToolNames.filter((n) => n.startsWith("mcp__")).map((n) => n.slice(5).split("__")[0]))
|
|
3812
|
+
];
|
|
3813
|
+
const workerMcp = mcpNames.length ? `, and it can use these MCP servers: ${[...new Set(mcpNames)].join(", ")}` + (mcpNames.some((n) => /browser/i.test(n)) ? ' \u2014 including driving a REAL browser (open tabs, navigate, click, screenshot), so answer "yes" if asked whether you can control/drive a browser and route an actual browse to Act' : "") : "";
|
|
3814
|
+
const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot).replace("{{WORKER_WEB}}", workerWeb + workerMcp) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
|
|
3757
3815
|
Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
3758
3816
|
const tools = [
|
|
3759
3817
|
...o.reflexOptions?.tools ?? [],
|
|
@@ -4188,8 +4246,10 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
4188
4246
|
case "capabilities": {
|
|
4189
4247
|
const actTools = this.options.actOptions?.tools ?? [];
|
|
4190
4248
|
const names = actTools.map((t) => t.name);
|
|
4249
|
+
const mcpServers = Object.keys(this.options.actOptions?.providerOptions?.mcpServers ?? {});
|
|
4250
|
+
const mcpNote = mcpServers.length ? ` Plus MCP servers your worker can use: ${mcpServers.join(", ")} (e.g. browser-bridge \u2192 drive a real browser: open tabs, navigate, click, screenshot).` : "";
|
|
4191
4251
|
if (!names.length)
|
|
4192
|
-
return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so.";
|
|
4252
|
+
return "Your worker uses Act's default local toolset (reading/editing files, running shell commands). No extra tools (e.g. web/internet) are configured; if a request is not a basic file or shell operation, assume you can't do it and say so." + mcpNote;
|
|
4193
4253
|
const hasFetch = names.some((n) => /WebFetch/i.test(n));
|
|
4194
4254
|
const hasBrowser = names.some((n) => /browser.*(navigate|click|page|type)/i.test(n));
|
|
4195
4255
|
const hasSearch = names.some((n) => /(^|_)WebSearch$|search/i.test(n) && !/WebFetch|browser/i.test(n));
|
|
@@ -4198,7 +4258,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
4198
4258
|
if (hasBrowser) notes.push("The browser tools drive a real browser: you CAN open a site and, if needed, navigate to a search engine and search there \u2014 but it is manual and takes a moment, not an instant lookup.");
|
|
4199
4259
|
else if (!hasSearch && hasFetch) notes.push('You have no general web-search tool, so for an instant "search the web" you can only fetch a URL they provide.');
|
|
4200
4260
|
const webNote = notes.length ? " NOTE: " + notes.join(" ") : "";
|
|
4201
|
-
return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote;
|
|
4261
|
+
return `Tools your background worker (Act) can actually use: ${names.join(", ")}. Read each name literally and match the request to a SPECIFIC tool; if none fits, you do NOT have that ability \u2014 say so honestly.` + webNote + mcpNote;
|
|
4202
4262
|
}
|
|
4203
4263
|
case "time":
|
|
4204
4264
|
return (/* @__PURE__ */ new Date()).toString();
|
|
@@ -5723,13 +5783,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
|
|
|
5723
5783
|
return { systemPrompt: basePrompt + "\n\n" + extra };
|
|
5724
5784
|
})(),
|
|
5725
5785
|
tools: (() => {
|
|
5726
|
-
|
|
5786
|
+
const base = toolsByName([...o.tools ?? DEFAULT_TOOLS, ...autoWebTools()]);
|
|
5727
5787
|
const tail = [...o.extraTools ?? []];
|
|
5728
|
-
if (scratch) {
|
|
5729
|
-
const CAPTURE2 = /* @__PURE__ */ new Set(["WebSearch", "WebFetch"]);
|
|
5730
|
-
base = base.map((t) => CAPTURE2.has(t.name) ? scratch.capture(t) : t);
|
|
5731
|
-
tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
|
|
5732
|
-
}
|
|
5788
|
+
if (scratch) tail.push(makeAskTool({ fs, ai: o.ai, model: o.scratchAskModel ?? o.model ?? "anthropic/claude-sonnet-4-6", dir: scratchDir }));
|
|
5733
5789
|
if (!realShell.length) return [...base, ...tail];
|
|
5734
5790
|
const filtered = base.filter((t) => t.name !== "bash");
|
|
5735
5791
|
return [...filtered, ...realShell, ...tail];
|
|
@@ -5742,6 +5798,9 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
|
|
|
5742
5798
|
planMode: o.planMode ?? false,
|
|
5743
5799
|
permissions: o.permissions,
|
|
5744
5800
|
subagents: o.subagents ?? false,
|
|
5801
|
+
// When scratch is on, an oversized tool result spills to a scratch file + recoverable paginated stub
|
|
5802
|
+
// (lossless). Without scratch, the Agent's default crop (lossy) still guards the context window.
|
|
5803
|
+
...scratch ? { capToolResult: (full, info) => scratch.spill(full, info) } : {},
|
|
5745
5804
|
backgroundJobs: o.backgroundJobs ?? virtual,
|
|
5746
5805
|
// default ON in virtual modes (no real shell there); disk uses ShellJobRegistry
|
|
5747
5806
|
skillsDir: dots("skills"),
|
|
@@ -8038,7 +8097,7 @@ Flags:
|
|
|
8038
8097
|
impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
|
|
8039
8098
|
with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
|
|
8040
8099
|
spoken replies out (echo-cancelled; speak over it to interrupt)
|
|
8041
|
-
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-
|
|
8100
|
+
--voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-120b)
|
|
8042
8101
|
--think-model <id> with --duplex: the premium deep-reasoning model (default anthropic/claude-opus-4-6)
|
|
8043
8102
|
--no-think with --duplex: disable the Think tier (Act handles everything)
|
|
8044
8103
|
--add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
|
|
@@ -8070,7 +8129,7 @@ Project instructions: ./AGENTS.md or ./CLAUDE.md are auto-loaded (scaffold with
|
|
|
8070
8129
|
Auto-loaded from ./.agent/: commands/, skills/, memory/, agents/.
|
|
8071
8130
|
|
|
8072
8131
|
REPL shortcuts: !<cmd> runs a shell command inline \xB7 #<note> saves a memory \xB7 @path inlines a file
|
|
8073
|
-
REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice-model /think-model)
|
|
8132
|
+
REPL slash commands: /help /version /tools /permissions /status /cost /context /cwd /model /reasoning /config /rename /compact /rewind /undo /clear /sessions /resume /commands /skills /mcp /init /export /paste /goal /exit (duplex: /act /think /voice /voice-model /think-model)
|
|
8074
8133
|
REPL completion: type / (commands+skills) or @ (files) for a LIVE menu \u2014 \u2191/\u2193 select, \u23CE/Tab accept, Esc dismiss.
|
|
8075
8134
|
REPL multi-line: Option/Alt+Enter inserts a newline, or end a line with \\ to continue. Esc cancels a running turn / clears the input line; double-Esc jumps back to edit a previous message.
|
|
8076
8135
|
REPL shortcuts: Shift+Tab cycles permission posture (ask \u2192 accept-edits \u2192 plan) \xB7 Alt+T toggles reasoning \xB7 Alt+P switches model \xB7 Ctrl+O toggles verbose tool output \xB7 \u2192 or Tab accepts the dim history ghost-suggestion \xB7 Alt+S/Ctrl+S stash/unstash.
|
|
@@ -8808,6 +8867,7 @@ async function repl(args, ai, cfg, cwd) {
|
|
|
8808
8867
|
const duplex = args.duplex;
|
|
8809
8868
|
let dx;
|
|
8810
8869
|
let voiceIO;
|
|
8870
|
+
let toggleVoice;
|
|
8811
8871
|
let editorRef;
|
|
8812
8872
|
let repaintStash = () => {
|
|
8813
8873
|
};
|
|
@@ -9415,6 +9475,15 @@ ${extra}` : body);
|
|
|
9415
9475
|
err(dim(` worker chrome: ${workerChrome} (use /workers full|minimal)
|
|
9416
9476
|
`));
|
|
9417
9477
|
}
|
|
9478
|
+
}, voice: {
|
|
9479
|
+
desc: "toggle live voice I/O on/off mid-session (needs SONIOX/CARTESIA keys + a TTY)",
|
|
9480
|
+
run: async () => {
|
|
9481
|
+
if (!toggleVoice) {
|
|
9482
|
+
err(dim(" (voice needs --duplex on a TTY)\n"));
|
|
9483
|
+
return;
|
|
9484
|
+
}
|
|
9485
|
+
await toggleVoice();
|
|
9486
|
+
}
|
|
9418
9487
|
}, "voice-model": {
|
|
9419
9488
|
desc: "switch the reflex (voice) model \u2014 /voice-model <id>, or alone for a picker",
|
|
9420
9489
|
run: async (a) => {
|
|
@@ -10059,67 +10128,91 @@ ${extra}` : body);
|
|
|
10059
10128
|
};
|
|
10060
10129
|
let voicePartial = "";
|
|
10061
10130
|
let partialRedraw = null;
|
|
10062
|
-
|
|
10131
|
+
const startVoice = async (greet) => {
|
|
10132
|
+
if (voiceIO) return true;
|
|
10133
|
+
if (!duplex || !process.stdin.isTTY) {
|
|
10134
|
+
err(dim(" (voice needs --duplex on a TTY)\n"));
|
|
10135
|
+
return false;
|
|
10136
|
+
}
|
|
10063
10137
|
if (!VoiceIO.available()) {
|
|
10064
10138
|
err(dim(" (voice I/O off \u2014 set SONIOX_API_KEY, CARTESIA_API_KEY, CARTESIA_VOICE_ID to talk)\n"));
|
|
10065
|
-
|
|
10066
|
-
|
|
10067
|
-
|
|
10068
|
-
|
|
10069
|
-
|
|
10070
|
-
|
|
10071
|
-
|
|
10072
|
-
|
|
10073
|
-
|
|
10074
|
-
|
|
10075
|
-
|
|
10076
|
-
|
|
10077
|
-
|
|
10078
|
-
|
|
10079
|
-
|
|
10080
|
-
},
|
|
10081
|
-
|
|
10082
|
-
|
|
10083
|
-
|
|
10084
|
-
|
|
10085
|
-
|
|
10086
|
-
|
|
10087
|
-
|
|
10088
|
-
|
|
10089
|
-
|
|
10139
|
+
return false;
|
|
10140
|
+
}
|
|
10141
|
+
voiceIO = new VoiceIO({
|
|
10142
|
+
// No ack phrase by default: a fixed "Mm-hm," every turn reads robotic, Haiku's TTFT doesn't
|
|
10143
|
+
// need masking (~0.7-1.2s full turns), and the conversational register already opens with a
|
|
10144
|
+
// natural reaction. The mechanism (+ echo-leak guard) stays for slower voice models.
|
|
10145
|
+
onState: () => editorRef?.redrawNow(),
|
|
10146
|
+
// Throttled: each redraw clears the screen below the prompt — a partial-per-token storm
|
|
10147
|
+
// (fast speech, or echo bleed if AEC degrades) would continuously erase streamed text.
|
|
10148
|
+
onPartial: (text) => {
|
|
10149
|
+
if (text === voicePartial) return;
|
|
10150
|
+
voicePartial = text;
|
|
10151
|
+
if (!partialRedraw) partialRedraw = setTimeout(() => {
|
|
10152
|
+
partialRedraw = null;
|
|
10153
|
+
editorRef?.redrawNow();
|
|
10154
|
+
}, 250);
|
|
10155
|
+
},
|
|
10156
|
+
onBargeIn: (phase) => {
|
|
10157
|
+
activeTurn?.abort();
|
|
10158
|
+
if (phase === "speaking") err(yellow("\n \u270B interrupted\n"));
|
|
10159
|
+
},
|
|
10160
|
+
onUtterance: (text) => {
|
|
10161
|
+
voicePartial = "";
|
|
10162
|
+
if (!text.trim()) return;
|
|
10163
|
+
const cut = voiceIO.takeInterruptedReply();
|
|
10164
|
+
const note = cut && cut.full.length - cut.heard.length > 40 ? `
|
|
10090
10165
|
[the user interrupted you mid-speech \u2014 they only heard up to: "\u2026${cut.heard.slice(-80)}". Work any unheard essentials into your reply naturally, only if still relevant.]` : "";
|
|
10091
|
-
|
|
10092
|
-
|
|
10166
|
+
if (!/^[!#/]/.test(text.trim())) voiceIO.beginSpeech(true);
|
|
10167
|
+
err(`\r\x1B[K ${bold(cyan("\u{1F3A4} \u203A"))} ${text}
|
|
10093
10168
|
`);
|
|
10094
|
-
|
|
10095
|
-
|
|
10096
|
-
|
|
10097
|
-
|
|
10098
|
-
|
|
10099
|
-
|
|
10100
|
-
|
|
10101
|
-
|
|
10102
|
-
|
|
10103
|
-
|
|
10104
|
-
|
|
10105
|
-
for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
|
|
10106
|
-
voiceIO?.stop();
|
|
10107
|
-
process.exit(0);
|
|
10108
|
-
});
|
|
10109
|
-
err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
|
|
10169
|
+
void dispatchLine(text + note).then(async (r) => {
|
|
10170
|
+
if (r === "quit") {
|
|
10171
|
+
await voiceIO?.awaitIdle();
|
|
10172
|
+
editorRef?.abort();
|
|
10173
|
+
}
|
|
10174
|
+
}).finally(() => editorRef?.redrawNow());
|
|
10175
|
+
}
|
|
10176
|
+
});
|
|
10177
|
+
try {
|
|
10178
|
+
await voiceIO.start();
|
|
10179
|
+
err(dim(` \u{1F3A4} voice on (${voiceIO.usingAec ? "echo-cancelled" : "heuristic echo \u2014 headphones recommended"}) \u2014 just talk; speak over it to interrupt
|
|
10110
10180
|
`));
|
|
10181
|
+
if (greet) {
|
|
10111
10182
|
const where = cwd.split("/").pop();
|
|
10112
10183
|
const resumed = session.messages.length > 0;
|
|
10113
10184
|
void turn(
|
|
10114
10185
|
`[session started] First call QuickLook with what:"memory" \u2014 if it knows the user's name or preferences, use them. Then greet the user warmly in one or two short sentences, as the opener of a live voice conversation. Context: working directory "${where}"${resumed ? "; this resumes an earlier conversation \u2014 glance at it and pick up naturally" : ""}. Personalize from whatever you learned (memory, prior conversation). Then ask what they'd like to do.`
|
|
10115
10186
|
).finally(() => editorRef?.redrawNow());
|
|
10116
|
-
} catch (e) {
|
|
10117
|
-
err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
|
|
10118
|
-
`));
|
|
10119
|
-
voiceIO = void 0;
|
|
10120
10187
|
}
|
|
10188
|
+
return true;
|
|
10189
|
+
} catch (e) {
|
|
10190
|
+
err(yellow(` \u26A0 voice I/O failed to start: ${e?.message ?? e} \u2014 continuing text-only
|
|
10191
|
+
`));
|
|
10192
|
+
voiceIO = void 0;
|
|
10193
|
+
return false;
|
|
10121
10194
|
}
|
|
10195
|
+
};
|
|
10196
|
+
if (duplex && process.stdin.isTTY) {
|
|
10197
|
+
process.on("exit", () => voiceIO?.stop());
|
|
10198
|
+
for (const sig of ["SIGHUP", "SIGTERM"]) process.on(sig, () => {
|
|
10199
|
+
voiceIO?.stop();
|
|
10200
|
+
process.exit(0);
|
|
10201
|
+
});
|
|
10122
10202
|
}
|
|
10203
|
+
if (duplex && process.stdin.isTTY) toggleVoice = async () => {
|
|
10204
|
+
if (voiceIO) {
|
|
10205
|
+
voiceIO.stop();
|
|
10206
|
+
voiceIO = void 0;
|
|
10207
|
+
voicePartial = "";
|
|
10208
|
+
err(dim(" \u{1F507} voice off\n"));
|
|
10209
|
+
editorRef?.redrawNow();
|
|
10210
|
+
return;
|
|
10211
|
+
}
|
|
10212
|
+
await startVoice(false);
|
|
10213
|
+
editorRef?.redrawNow();
|
|
10214
|
+
};
|
|
10215
|
+
if (args.voice && duplex && process.stdin.isTTY) await startVoice(true);
|
|
10123
10216
|
while (true) {
|
|
10124
10217
|
if (pendingRewind) {
|
|
10125
10218
|
pendingRewind = false;
|