agent.libx.js 0.93.6 → 0.93.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/cli/cli.ts +79 -26
- package/dist/{Agent-B_xvSHlG.d.ts → Agent-Di1u5nH0.d.ts} +8 -1
- package/dist/cli.d.ts +8 -3
- package/dist/cli.js +297 -65
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +61 -21
- package/dist/index.js +218 -45
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1978,7 +1978,7 @@ IMPLICIT CAPTURE: when the user shares their name, role, a preference, a correct
|
|
|
1978
1978
|
For explicit "remember X" requests, also call Remember directly and confirm briefly ("got it").
|
|
1979
1979
|
Do NOT remember: transient task details, conversation filler, things you'd forget in a real conversation.
|
|
1980
1980
|
Keep it invisible: never announce "saving to memory" or list what you remembered unless asked.
|
|
1981
|
-
For anything requiring files, shell, or web \u2014 still
|
|
1981
|
+
For anything requiring files, shell, or web \u2014 still Act.`;
|
|
1982
1982
|
async function loadMemory(fs, dir, opts = {}) {
|
|
1983
1983
|
const dirs = (Array.isArray(dir) ? dir : [dir]).filter(Boolean);
|
|
1984
1984
|
const writeDir = dirs[0];
|
|
@@ -2687,6 +2687,10 @@ var AgentOptions = class {
|
|
|
2687
2687
|
autoTest;
|
|
2688
2688
|
/** Provider-specific options forwarded to ai.chat() (e.g. cursor mcpServers, cwd). */
|
|
2689
2689
|
providerOptions;
|
|
2690
|
+
/** Prompt caching (providers that support it, e.g. Anthropic): cache tools/system/conversation
|
|
2691
|
+
* prefix across the loop's steps — reads cost 0.1x, writes 1.25x. A multi-step agent loop
|
|
2692
|
+
* re-sends its whole prefix every step, so this is a large net cost cut. Default on. */
|
|
2693
|
+
promptCache = true;
|
|
2690
2694
|
/** Tool selection mode: 'auto' = model decides (needed for Groq); undefined = provider default. */
|
|
2691
2695
|
toolChoice;
|
|
2692
2696
|
/** Extended-thinking / reasoning effort, normalized across providers (anthropic, openai).
|
|
@@ -2876,7 +2880,7 @@ var Agent = class _Agent {
|
|
|
2876
2880
|
const wireTools = toWireTools(this.activeTools);
|
|
2877
2881
|
const useStream = o.stream === true && typeof o.host?.notify === "function";
|
|
2878
2882
|
let steps = 0;
|
|
2879
|
-
const usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
2883
|
+
const usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
2880
2884
|
let usageEstimated = false;
|
|
2881
2885
|
const start = Date.now();
|
|
2882
2886
|
let toolCallsTotal = 0;
|
|
@@ -2893,6 +2897,7 @@ var Agent = class _Agent {
|
|
|
2893
2897
|
if (o.timeoutMs && Date.now() - start >= o.timeoutMs) return kill("timeout");
|
|
2894
2898
|
if (o.maxTokens && usage.totalTokens >= o.maxTokens) return kill("budget");
|
|
2895
2899
|
steps++;
|
|
2900
|
+
this.options.host?.notify?.({ kind: "turn_start", message: `step ${steps}` });
|
|
2896
2901
|
let res;
|
|
2897
2902
|
const sent = this.trimContext();
|
|
2898
2903
|
const frag = reasoningToChatFragment(o.model, o.reasoning);
|
|
@@ -2906,6 +2911,7 @@ var Agent = class _Agent {
|
|
|
2906
2911
|
} : void 0;
|
|
2907
2912
|
const reasonOpts = {
|
|
2908
2913
|
...frag,
|
|
2914
|
+
...o.promptCache ? { promptCache: true } : {},
|
|
2909
2915
|
...o.providerOptions || cursorPo ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions, ...cursorPo } } : {}
|
|
2910
2916
|
};
|
|
2911
2917
|
try {
|
|
@@ -2933,6 +2939,8 @@ var Agent = class _Agent {
|
|
|
2933
2939
|
usage.promptTokens += res.usage.promptTokens ?? 0;
|
|
2934
2940
|
usage.completionTokens += res.usage.completionTokens ?? 0;
|
|
2935
2941
|
usage.totalTokens += res.usage.totalTokens ?? 0;
|
|
2942
|
+
usage.cacheCreationTokens += res.usage.cacheCreationTokens ?? 0;
|
|
2943
|
+
usage.cacheReadTokens += res.usage.cacheReadTokens ?? 0;
|
|
2936
2944
|
}
|
|
2937
2945
|
const toolCalls = res.toolCalls ?? [];
|
|
2938
2946
|
this.transcript.push({
|
|
@@ -3605,15 +3613,18 @@ function describeCall(call) {
|
|
|
3605
3613
|
return `${call.name}${hint}`;
|
|
3606
3614
|
}
|
|
3607
3615
|
var DuplexAgentOptions = class {
|
|
3608
|
-
/** Any ai.libx.js AIClient — shared by
|
|
3616
|
+
/** Any ai.libx.js AIClient — shared by all tiers (routed by model). */
|
|
3609
3617
|
ai;
|
|
3610
|
-
/** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
|
|
3618
|
+
/** The WORKER's filesystem (act + think). If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
|
|
3611
3619
|
fs;
|
|
3612
|
-
|
|
3613
|
-
|
|
3620
|
+
reflexModel = "groq/openai/gpt-oss-20b";
|
|
3621
|
+
actModel = "anthropic/claude-sonnet-4-6";
|
|
3622
|
+
/** Premium reasoning model. Set to `false` to disable the Think tier entirely. */
|
|
3623
|
+
thinkModel = "anthropic/claude-opus-4-6";
|
|
3614
3624
|
/** Escape hatches merged over the derived per-agent options. */
|
|
3615
|
-
|
|
3616
|
-
|
|
3625
|
+
reflexOptions;
|
|
3626
|
+
actOptions;
|
|
3627
|
+
thinkOptions;
|
|
3617
3628
|
/** Receives the voice text_delta stream + task lifecycle events. */
|
|
3618
3629
|
host;
|
|
3619
3630
|
/** How many recent transcript messages are rendered into a worker's brief. */
|
|
@@ -3621,7 +3632,7 @@ var DuplexAgentOptions = class {
|
|
|
3621
3632
|
/** Voice register: 'neutral' = clean spoken style; 'conversational' = human-like — fillers,
|
|
3622
3633
|
* backchannels, impulsive first reactions before content (mimics real duplex conversation). */
|
|
3623
3634
|
voiceStyle = "neutral";
|
|
3624
|
-
/** Awaited BEFORE a
|
|
3635
|
+
/** Awaited BEFORE a worker spawns — open a per-task checkpoint frame, audit, etc.
|
|
3625
3636
|
* (post-spawn would race the worker's first edits). */
|
|
3626
3637
|
onTaskStart;
|
|
3627
3638
|
/** Re-voice throttled worker progress asides ('[task t1 progress] …') so long tasks aren't dead
|
|
@@ -3644,8 +3655,10 @@ var DuplexAgentOptions = class {
|
|
|
3644
3655
|
/** User-scope memory dir for global facts (type=user/feedback). Forwarded to Remember's routing. */
|
|
3645
3656
|
memoryUserDir;
|
|
3646
3657
|
};
|
|
3647
|
-
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou
|
|
3648
|
-
var
|
|
3658
|
+
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A standard background worker with FULL access to the user\'s environment (files, shell, web). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nYou can find out or do ANYTHING by calling `Act` with a clear, self-contained brief \u2014 so NEVER tell the user you can\'t see, access, or do something. Act and find out. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Act or Think, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight \u2014 check `TaskStatus` first. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
|
|
3659
|
+
var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
|
|
3660
|
+
var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
|
|
3661
|
+
var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
|
|
3649
3662
|
var DuplexAgent = class {
|
|
3650
3663
|
options;
|
|
3651
3664
|
voice;
|
|
@@ -3664,21 +3677,32 @@ var DuplexAgent = class {
|
|
|
3664
3677
|
if (o.memoryDir && o.fs) {
|
|
3665
3678
|
this.memoryReady = loadMemory(o.fs, o.memoryDir, { maxWritesPerSession: 10, userDir: o.memoryUserDir });
|
|
3666
3679
|
}
|
|
3667
|
-
const memSlot = o.memoryDir && o.fs ? VOICE_MEMORY_PROMPT : "NEVER claim to have stored, saved, or remembered something durably \u2014 you cannot. Anything the user wants persisted (their name, preferences, notes) must
|
|
3668
|
-
const
|
|
3680
|
+
const memSlot = o.memoryDir && o.fs ? VOICE_MEMORY_PROMPT : "NEVER claim to have stored, saved, or remembered something durably \u2014 you cannot. Anything the user wants persisted (their name, preferences, notes) must go through Act so a worker writes it to memory.";
|
|
3681
|
+
const thinkSlot = o.thinkModel !== false ? THINK_GUIDANCE : THINK_DISABLED_GUIDANCE;
|
|
3682
|
+
const prompt = VOICE_SYSTEM_PROMPT.replace("{{MEMORY_SLOT}}", memSlot).replace("{{THINK_SLOT}}", thinkSlot) + (o.voiceStyle === "conversational" ? "\n" + VOICE_STYLE_CONVERSATIONAL : "") + `
|
|
3669
3683
|
Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
3684
|
+
const tools = [
|
|
3685
|
+
...o.reflexOptions?.tools ?? [],
|
|
3686
|
+
this.actTool(),
|
|
3687
|
+
...o.thinkModel !== false ? [this.thinkTool()] : [],
|
|
3688
|
+
this.taskStatusTool(),
|
|
3689
|
+
this.cancelTaskTool(),
|
|
3690
|
+
this.quickLookTool(),
|
|
3691
|
+
this.answerTaskTool(),
|
|
3692
|
+
this.holdTool()
|
|
3693
|
+
];
|
|
3670
3694
|
this.voice = new Agent({
|
|
3671
3695
|
ai: o.ai,
|
|
3672
3696
|
fs: new MemFilesystem2(),
|
|
3673
|
-
model: o.
|
|
3697
|
+
model: o.reflexModel,
|
|
3674
3698
|
stream: true,
|
|
3675
3699
|
host: o.host,
|
|
3676
3700
|
systemPrompt: prompt,
|
|
3677
3701
|
instructionFiles: false,
|
|
3678
3702
|
maxSteps: 8,
|
|
3679
3703
|
timeoutMs: 3e4,
|
|
3680
|
-
...o.
|
|
3681
|
-
tools
|
|
3704
|
+
...o.reflexOptions,
|
|
3705
|
+
tools
|
|
3682
3706
|
});
|
|
3683
3707
|
}
|
|
3684
3708
|
/** Resolve memory tools + inject index into voice system prompt (once). */
|
|
@@ -3689,7 +3713,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
3689
3713
|
this.voice.options.tools.push(...mem.tools);
|
|
3690
3714
|
if (mem.index) this.voice.options.systemPrompt += "\n\n" + mem.index;
|
|
3691
3715
|
}
|
|
3692
|
-
/** One user turn: the voice agent streams the reply (and may
|
|
3716
|
+
/** One user turn: the voice agent streams the reply (and may Act/Think). Serialized with re-voice turns. */
|
|
3693
3717
|
send(content) {
|
|
3694
3718
|
return this.enqueue(async () => {
|
|
3695
3719
|
await this.initMemory();
|
|
@@ -3730,19 +3754,25 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
3730
3754
|
this.notify("revoice_done", "");
|
|
3731
3755
|
});
|
|
3732
3756
|
}
|
|
3733
|
-
/** The worker's brief: the
|
|
3734
|
-
|
|
3757
|
+
/** The worker's brief: the Act/Think args + a STATIC text snapshot of the recent conversation.
|
|
3758
|
+
* Act briefs get a self-verify footer — the worker's report is trusted without review, so it
|
|
3759
|
+
* must check its own work before reporting (nearly free under prompt caching; measured honest:
|
|
3760
|
+
* it does NOT fix one-shot logic bugs — see mind/10). Think tasks are pure reasoning — no footer. */
|
|
3761
|
+
buildBrief(brief, tier = "act") {
|
|
3735
3762
|
const recent = this.voice.transcript.filter((m) => (m.role === "user" || m.role === "assistant") && contentText(m.content).trim()).slice(-this.options.excerptTurns).map((m) => `${m.role}: ${contentText(m.content)}`).join("\n");
|
|
3736
|
-
|
|
3763
|
+
const verify = tier === "act" ? "\n\nBefore reporting done: re-read what you changed and check it against EVERY requirement above \u2014 fix any gap first. Your report is trusted without review." : "";
|
|
3764
|
+
return (recent ? `${brief}
|
|
3737
3765
|
|
|
3738
3766
|
## Recent conversation (for context)
|
|
3739
|
-
${recent}` : brief;
|
|
3767
|
+
${recent}` : brief) + verify;
|
|
3740
3768
|
}
|
|
3741
3769
|
/** Spawn a detached worker for task `id`; its settlement notifies + enqueues the re-voice turn. */
|
|
3742
|
-
spawnWorker(id, label, briefText) {
|
|
3770
|
+
spawnWorker(id, label, briefText, tier = "act") {
|
|
3743
3771
|
const o = this.options;
|
|
3772
|
+
const tierOpts = tier === "think" ? o.thinkOptions : o.actOptions;
|
|
3773
|
+
const tierModel = tier === "think" ? o.thinkModel : o.actModel;
|
|
3744
3774
|
const controller = new AbortController();
|
|
3745
|
-
const base = o.
|
|
3775
|
+
const base = tierOpts?.hooks ?? o.actOptions?.hooks;
|
|
3746
3776
|
const report = o.progressUpdates ? this.progressReporter(id) : void 0;
|
|
3747
3777
|
const hooks = report ? {
|
|
3748
3778
|
...base,
|
|
@@ -3769,13 +3799,12 @@ ${recent}` : brief;
|
|
|
3769
3799
|
const worker = new Agent({
|
|
3770
3800
|
ai: o.ai,
|
|
3771
3801
|
fs: o.fs,
|
|
3772
|
-
model:
|
|
3773
|
-
...
|
|
3774
|
-
|
|
3802
|
+
model: tierModel,
|
|
3803
|
+
...tier === "think" ? { reasoning: tierOpts?.reasoning ?? "high" } : {},
|
|
3804
|
+
...tierOpts,
|
|
3775
3805
|
...workerHost ? { host: workerHost } : {},
|
|
3776
3806
|
...hooks ? { hooks } : {},
|
|
3777
3807
|
signal: controller.signal
|
|
3778
|
-
// …but never the per-task cancellation signal
|
|
3779
3808
|
});
|
|
3780
3809
|
const promise = worker.run(briefText).then((res) => this.onWorkerSettled(id, res)).catch((err) => this.onWorkerFailed(id, err));
|
|
3781
3810
|
this.tasks.set(id, { id, label, status: "running", controller, promise });
|
|
@@ -3863,7 +3892,14 @@ ${recent}` : brief;
|
|
|
3863
3892
|
}
|
|
3864
3893
|
rec.status = "done";
|
|
3865
3894
|
log7.verbose(`task ${id} done (${res.steps} steps)`);
|
|
3866
|
-
this.notify("task_done", `task ${id} (${rec.label}) completed`, {
|
|
3895
|
+
this.notify("task_done", `task ${id} (${rec.label}) completed`, {
|
|
3896
|
+
id,
|
|
3897
|
+
text: res.text,
|
|
3898
|
+
usage: res.usage,
|
|
3899
|
+
usageEstimated: res.usageEstimated,
|
|
3900
|
+
steps: res.steps,
|
|
3901
|
+
toolCalls: res.messages.filter((m) => m.role === "tool").length
|
|
3902
|
+
});
|
|
3867
3903
|
this.queueRevoice(`[task ${id} completed] ${res.text}`);
|
|
3868
3904
|
}
|
|
3869
3905
|
onWorkerFailed(id, err) {
|
|
@@ -3876,11 +3912,32 @@ ${recent}` : brief;
|
|
|
3876
3912
|
this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
|
|
3877
3913
|
this.queueRevoice(`[task ${rec.id} failed] ${msg}`);
|
|
3878
3914
|
}
|
|
3879
|
-
// ---
|
|
3880
|
-
|
|
3915
|
+
// --- voice tools (closures over this instance) ---
|
|
3916
|
+
/** Live-switch the think tier: `false` disables (removes the Think tool from the voice agent),
|
|
3917
|
+
* a model id enables (adds the tool if missing). The system-prompt THINK_SLOT text is frozen at
|
|
3918
|
+
* construction — the tool's own description carries the routing guidance, so a live enable works;
|
|
3919
|
+
* dispatch()'s think→act fallback covers any straggler calls after a live disable. */
|
|
3920
|
+
setThinkModel(model) {
|
|
3921
|
+
this.options.thinkModel = model;
|
|
3922
|
+
const tools = this.voice.options.tools;
|
|
3923
|
+
const i = tools.findIndex((t) => t.name === "Think");
|
|
3924
|
+
if (model === false && i >= 0) tools.splice(i, 1);
|
|
3925
|
+
else if (model !== false && i < 0) tools.push(this.thinkTool());
|
|
3926
|
+
}
|
|
3927
|
+
/** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
|
|
3928
|
+
async dispatch(brief, tier = "act", label) {
|
|
3929
|
+
if (tier === "think" && this.options.thinkModel === false) tier = "act";
|
|
3930
|
+
const id = `t${++this.seq}`;
|
|
3931
|
+
const lbl = label ?? tier;
|
|
3932
|
+
await this.options.onTaskStart?.(id, lbl);
|
|
3933
|
+
this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier);
|
|
3934
|
+
this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief, tier });
|
|
3935
|
+
return id;
|
|
3936
|
+
}
|
|
3937
|
+
actTool() {
|
|
3881
3938
|
return {
|
|
3882
|
-
name: "
|
|
3883
|
-
description: 'Escalate real work (reading/editing files, searching, running tasks,
|
|
3939
|
+
name: "Act",
|
|
3940
|
+
description: 'Escalate real work (reading/editing files, searching, running tasks, building) to a standard background worker. Returns immediately with a task id; the result arrives later as a "[task <id> completed]" event. Provide a clear, self-contained `brief` (the worker does not hear the live conversation).',
|
|
3884
3941
|
parameters: {
|
|
3885
3942
|
type: "object",
|
|
3886
3943
|
required: ["brief"],
|
|
@@ -3890,12 +3947,26 @@ ${recent}` : brief;
|
|
|
3890
3947
|
}
|
|
3891
3948
|
},
|
|
3892
3949
|
run: async ({ brief, label }) => {
|
|
3893
|
-
const id =
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3950
|
+
const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0);
|
|
3951
|
+
return `Acting on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
|
|
3952
|
+
}
|
|
3953
|
+
};
|
|
3954
|
+
}
|
|
3955
|
+
thinkTool() {
|
|
3956
|
+
return {
|
|
3957
|
+
name: "Think",
|
|
3958
|
+
description: "Escalate to a premium deep-reasoning agent for complex analysis, architecture decisions, hard debugging, or planning. Same async pattern as Act \u2014 returns a task id. Use when the problem needs careful thought before (or instead of) action. Do not use Think for simple tasks \u2014 Act is cheaper and faster.",
|
|
3959
|
+
parameters: {
|
|
3960
|
+
type: "object",
|
|
3961
|
+
required: ["brief"],
|
|
3962
|
+
properties: {
|
|
3963
|
+
brief: { type: "string", description: "the question or problem to reason about deeply" },
|
|
3964
|
+
label: { type: "string", description: "a short (2-4 word) label for the task" }
|
|
3965
|
+
}
|
|
3966
|
+
},
|
|
3967
|
+
run: async ({ brief, label }) => {
|
|
3968
|
+
const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0);
|
|
3969
|
+
return `Thinking on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
|
|
3899
3970
|
}
|
|
3900
3971
|
};
|
|
3901
3972
|
}
|
|
@@ -3911,7 +3982,7 @@ ${recent}` : brief;
|
|
|
3911
3982
|
}
|
|
3912
3983
|
};
|
|
3913
3984
|
}
|
|
3914
|
-
/** Sub-100ms read-only lookups the voice may do itself — everything else stays
|
|
3985
|
+
/** Sub-100ms read-only lookups the voice may do itself — everything else stays Act-only.
|
|
3915
3986
|
* fs-only (no shell; the engine is VFS-abstracted): time, git branch (.git/HEAD read), ls, file
|
|
3916
3987
|
* head. Output is hard-capped so a lookup can never bloat the skinny voice context. */
|
|
3917
3988
|
quickLookTool() {
|
|
@@ -3919,7 +3990,7 @@ ${recent}` : brief;
|
|
|
3919
3990
|
const kinds = [.../* @__PURE__ */ new Set(["time", "branch", "ls", "file", ...Object.keys(this.options.quickLook ?? {})])];
|
|
3920
3991
|
return {
|
|
3921
3992
|
name: "QuickLook",
|
|
3922
|
-
description: `Instant read-only lookup \u2014 one of: ${kinds.join(", ")}. For trivial facts only; anything needing search, commands, or reasoning goes through
|
|
3993
|
+
description: `Instant read-only lookup \u2014 one of: ${kinds.join(", ")}. For trivial facts only; anything needing search, commands, or reasoning goes through Act.`,
|
|
3923
3994
|
parameters: {
|
|
3924
3995
|
type: "object",
|
|
3925
3996
|
required: ["what"],
|
|
@@ -3952,7 +4023,7 @@ ${recent}` : brief;
|
|
|
3952
4023
|
if (!path) return "file lookup needs a path";
|
|
3953
4024
|
const text = await fs.readFile(String(path));
|
|
3954
4025
|
return text.length > CAP ? text.slice(0, CAP) + `
|
|
3955
|
-
\u2026 (truncated \u2014 ${text.length} chars total;
|
|
4026
|
+
\u2026 (truncated \u2014 ${text.length} chars total; Act for the full file)` : text;
|
|
3956
4027
|
}
|
|
3957
4028
|
default:
|
|
3958
4029
|
return `unknown lookup '${what}'`;
|
|
@@ -3980,6 +4051,22 @@ ${recent}` : brief;
|
|
|
3980
4051
|
}
|
|
3981
4052
|
};
|
|
3982
4053
|
}
|
|
4054
|
+
holdTool() {
|
|
4055
|
+
return {
|
|
4056
|
+
name: "Hold",
|
|
4057
|
+
description: 'The user seems mid-thought \u2014 hold the turn (stay listening) instead of answering. Optionally pass a short filler ("mhm", "go on") to speak while waiting. Use when the message sounds incomplete, trailing off, or like they paused to think.',
|
|
4058
|
+
parameters: {
|
|
4059
|
+
type: "object",
|
|
4060
|
+
properties: {
|
|
4061
|
+
filler: { type: "string", description: 'optional short filler to speak ("mhm", "go on", "mm-hm")' }
|
|
4062
|
+
}
|
|
4063
|
+
},
|
|
4064
|
+
run: async ({ filler }) => {
|
|
4065
|
+
if (filler) this.notify("hold_filler", String(filler));
|
|
4066
|
+
return "Holding \u2014 listening for the rest of the user's thought. Do not respond further this turn.";
|
|
4067
|
+
}
|
|
4068
|
+
};
|
|
4069
|
+
}
|
|
3983
4070
|
cancelTaskTool() {
|
|
3984
4071
|
return {
|
|
3985
4072
|
name: "CancelTask",
|
|
@@ -4159,6 +4246,14 @@ var VoiceEngineOptions = class {
|
|
|
4159
4246
|
* letters, mid-thought pauses), the next utterance MERGES instead of dispatching a truncated one
|
|
4160
4247
|
* ("E-L-Y." / "A."). Costs this much latency per turn; 0 disables. */
|
|
4161
4248
|
utteranceMergeMs = 350;
|
|
4249
|
+
/** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
|
|
4250
|
+
* Gives the user time to finish their thought without triggering a model call. */
|
|
4251
|
+
incompleteMergeMs = 1500;
|
|
4252
|
+
/** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
|
|
4253
|
+
holdFiller = "";
|
|
4254
|
+
/** Called when the engine holds an incomplete utterance (host can render a visual cue). */
|
|
4255
|
+
onHold = () => {
|
|
4256
|
+
};
|
|
4162
4257
|
/** heuristic (non-AEC) energy barge-in tuning */
|
|
4163
4258
|
bargeRmsMult = 2;
|
|
4164
4259
|
bargeRmsFloor = 500;
|
|
@@ -4172,7 +4267,7 @@ var VoiceEngineOptions = class {
|
|
|
4172
4267
|
/** no new partial activity for this long while paused → resume, drop the interjection */
|
|
4173
4268
|
overlapResumeMs = 700;
|
|
4174
4269
|
};
|
|
4175
|
-
var VoiceEngine = class {
|
|
4270
|
+
var VoiceEngine = class _VoiceEngine {
|
|
4176
4271
|
options;
|
|
4177
4272
|
state = "idle";
|
|
4178
4273
|
stt;
|
|
@@ -4321,6 +4416,13 @@ var VoiceEngine = class {
|
|
|
4321
4416
|
this.lastInterrupted = null;
|
|
4322
4417
|
return r;
|
|
4323
4418
|
}
|
|
4419
|
+
/** Speak a short filler phrase without starting a model turn (stays in listening mode after). */
|
|
4420
|
+
speakFiller(text) {
|
|
4421
|
+
if (!text || this.speaking) return;
|
|
4422
|
+
this.beginSpeech();
|
|
4423
|
+
this.speakDelta(text);
|
|
4424
|
+
this.endSpeech();
|
|
4425
|
+
}
|
|
4324
4426
|
/** barge-in: stop audio NOW, cancel generation, reset for the user's utterance */
|
|
4325
4427
|
interrupt() {
|
|
4326
4428
|
if (!this.speaking && !this.drainTimer) return;
|
|
@@ -4404,6 +4506,11 @@ var VoiceEngine = class {
|
|
|
4404
4506
|
}
|
|
4405
4507
|
if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
|
|
4406
4508
|
}
|
|
4509
|
+
static TRAIL_RE = /(?:^|\s)(?:and|but|or|so|to|the|a|an|of|in|for|with|that|if|uh|um|like|about|from|into|on|is|are|was|were|,)$/i;
|
|
4510
|
+
/** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
|
|
4511
|
+
looksIncomplete(text) {
|
|
4512
|
+
return _VoiceEngine.TRAIL_RE.test(text.trim());
|
|
4513
|
+
}
|
|
4407
4514
|
handleUtterance(text) {
|
|
4408
4515
|
if (this.speaking && (this.ctxOpen || this.pausedAt) && this.overlapCapable) {
|
|
4409
4516
|
this.stt.reset();
|
|
@@ -4420,6 +4527,17 @@ var VoiceEngine = class {
|
|
|
4420
4527
|
}
|
|
4421
4528
|
this.pendingUtt = this.pendingUtt ? `${this.pendingUtt} ${text}` : text;
|
|
4422
4529
|
if (this.pendingTimer) clearTimeout(this.pendingTimer);
|
|
4530
|
+
if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
|
|
4531
|
+
log8.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
|
|
4532
|
+
this.options.onHold();
|
|
4533
|
+
if (this.options.holdFiller && !this.speaking) {
|
|
4534
|
+
this.beginSpeech();
|
|
4535
|
+
this.speakDelta(this.options.holdFiller);
|
|
4536
|
+
this.endSpeech();
|
|
4537
|
+
}
|
|
4538
|
+
this.pendingTimer = setTimeout(() => this.flushUtterance(), this.options.incompleteMergeMs);
|
|
4539
|
+
return;
|
|
4540
|
+
}
|
|
4423
4541
|
if (!this.options.utteranceMergeMs || this.words(this.pendingUtt).length >= 4) return this.flushUtterance();
|
|
4424
4542
|
this.pendingTimer = setTimeout(() => this.flushUtterance(), this.options.utteranceMergeMs);
|
|
4425
4543
|
}
|
|
@@ -4635,7 +4753,7 @@ var CartesiaTTSOptions = class {
|
|
|
4635
4753
|
/** 'apiKey' (server/CLI) → `api_key=` URL param; 'token' (browser, BE-minted) → `access_token=`. */
|
|
4636
4754
|
authMode = "apiKey";
|
|
4637
4755
|
};
|
|
4638
|
-
var CartesiaTTS = class {
|
|
4756
|
+
var CartesiaTTS = class _CartesiaTTS {
|
|
4639
4757
|
options;
|
|
4640
4758
|
ws;
|
|
4641
4759
|
ctxSeq = 0;
|
|
@@ -4645,6 +4763,12 @@ var CartesiaTTS = class {
|
|
|
4645
4763
|
onDone = () => {
|
|
4646
4764
|
};
|
|
4647
4765
|
firstAudioAt = 0;
|
|
4766
|
+
/** Circuit breaker: consecutive error count + down flag. */
|
|
4767
|
+
consecutiveErrors = 0;
|
|
4768
|
+
down = false;
|
|
4769
|
+
probeTimer = null;
|
|
4770
|
+
static CB_THRESHOLD = 3;
|
|
4771
|
+
static CB_PROBE_MS = 3e4;
|
|
4648
4772
|
constructor(options) {
|
|
4649
4773
|
this.options = { ...new CartesiaTTSOptions(), ...options };
|
|
4650
4774
|
}
|
|
@@ -4674,10 +4798,34 @@ var CartesiaTTS = class {
|
|
|
4674
4798
|
const m = JSON.parse(String(ev.data));
|
|
4675
4799
|
if (m.context_id && m.context_id !== this.ctxId) return;
|
|
4676
4800
|
if (m.type === "chunk" && m.data) {
|
|
4801
|
+
this.consecutiveErrors = 0;
|
|
4802
|
+
if (this.down) {
|
|
4803
|
+
this.down = false;
|
|
4804
|
+
log10.info("TTS recovered");
|
|
4805
|
+
this.stopProbe();
|
|
4806
|
+
}
|
|
4677
4807
|
if (!this.firstAudioAt) this.firstAudioAt = now3();
|
|
4678
4808
|
this.onAudio(base64ToBytes(m.data));
|
|
4679
|
-
} else if (m.type === "done")
|
|
4680
|
-
|
|
4809
|
+
} else if (m.type === "done") {
|
|
4810
|
+
this.consecutiveErrors = 0;
|
|
4811
|
+
if (this.down) {
|
|
4812
|
+
this.down = false;
|
|
4813
|
+
log10.info("TTS recovered");
|
|
4814
|
+
this.stopProbe();
|
|
4815
|
+
}
|
|
4816
|
+
this.onDone();
|
|
4817
|
+
} else if (m.type === "error") {
|
|
4818
|
+
if (/already been cancelled|does not exist/.test(m.message || "")) return;
|
|
4819
|
+
this.consecutiveErrors++;
|
|
4820
|
+
if (!this.down && this.consecutiveErrors >= _CartesiaTTS.CB_THRESHOLD) {
|
|
4821
|
+
this.down = true;
|
|
4822
|
+
log10.warn(`TTS circuit breaker open \u2014 ${this.consecutiveErrors} consecutive errors, switching to text-only`);
|
|
4823
|
+
this.onDone();
|
|
4824
|
+
this.startProbe();
|
|
4825
|
+
} else if (!this.down) {
|
|
4826
|
+
log10.warn(`cartesia: ${JSON.stringify(m)}`);
|
|
4827
|
+
}
|
|
4828
|
+
}
|
|
4681
4829
|
};
|
|
4682
4830
|
}
|
|
4683
4831
|
/** Ensure the WS is open before sending — reconnects if idle-closed. */
|
|
@@ -4701,17 +4849,42 @@ var CartesiaTTS = class {
|
|
|
4701
4849
|
});
|
|
4702
4850
|
}
|
|
4703
4851
|
speak(text, cont) {
|
|
4852
|
+
if (this.down) return;
|
|
4704
4853
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(text, cont));
|
|
4705
4854
|
else void this.ensureConnected().then(() => this.ws?.readyState === WebSocket.OPEN && this.ws.send(this.frame(text, cont)));
|
|
4706
4855
|
}
|
|
4707
4856
|
end() {
|
|
4857
|
+
if (this.down) {
|
|
4858
|
+
this.onDone();
|
|
4859
|
+
return;
|
|
4860
|
+
}
|
|
4708
4861
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame("", false));
|
|
4709
4862
|
}
|
|
4710
4863
|
cancel() {
|
|
4711
4864
|
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify({ context_id: this.ctxId, cancel: true }));
|
|
4712
4865
|
}
|
|
4866
|
+
startProbe() {
|
|
4867
|
+
if (this.probeTimer) return;
|
|
4868
|
+
this.probeTimer = setInterval(() => {
|
|
4869
|
+
if (!this.down) {
|
|
4870
|
+
this.stopProbe();
|
|
4871
|
+
return;
|
|
4872
|
+
}
|
|
4873
|
+
this.consecutiveErrors = 0;
|
|
4874
|
+
this.newContext();
|
|
4875
|
+
if (this.ws?.readyState === WebSocket.OPEN) this.ws.send(this.frame(".", false));
|
|
4876
|
+
}, _CartesiaTTS.CB_PROBE_MS);
|
|
4877
|
+
this.probeTimer.unref?.();
|
|
4878
|
+
}
|
|
4879
|
+
stopProbe() {
|
|
4880
|
+
if (this.probeTimer) {
|
|
4881
|
+
clearInterval(this.probeTimer);
|
|
4882
|
+
this.probeTimer = null;
|
|
4883
|
+
}
|
|
4884
|
+
}
|
|
4713
4885
|
close() {
|
|
4714
4886
|
this.closed = true;
|
|
4887
|
+
this.stopProbe();
|
|
4715
4888
|
if (this.ws) this.ws.onclose = null;
|
|
4716
4889
|
this.ws?.close();
|
|
4717
4890
|
}
|