agent.libx.js 0.93.25 → 0.93.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +83 -79
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +60 -1
- package/dist/index.js +133 -31
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -374,6 +374,65 @@ declare function makeWebSearchTool(options?: WebSearchOptions): AgentTool;
|
|
|
374
374
|
declare const webFetchTool: AgentTool;
|
|
375
375
|
declare const webSearchTool: AgentTool;
|
|
376
376
|
|
|
377
|
+
/**
|
|
378
|
+
* Artifact blackboard — keep large tool/subagent outputs OUT of the main context, but queryable.
|
|
379
|
+
*
|
|
380
|
+
* Pattern: many-tool / many-subagent engines bloat context with raw outputs (search results, big
|
|
381
|
+
* file reads, subagent reports) that are mostly noise once a gist is taken. Here, a tool wrapped with
|
|
382
|
+
* `withArtifactCapture` stashes any oversized result in an `ArtifactStore` and returns a compact stub
|
|
383
|
+
* (id + preview) to context. When the caller later needs a buried detail, `Ask({question, over})`
|
|
384
|
+
* peeks into the full artifact in a side-step — a (cheap) model extracts just the answer, so the raw
|
|
385
|
+
* blob never re-enters the caller's context. The caller (often a stronger model) synthesizes from the
|
|
386
|
+
* tight answer Ask returns. Two-tier division of labour: cheap RETRIEVE/EXTRACT, strong SYNTHESIZE.
|
|
387
|
+
*
|
|
388
|
+
* Spike, not load-bearing yet — segregated module (IoC via options), zero Agent-core changes.
|
|
389
|
+
*/
|
|
390
|
+
|
|
391
|
+
interface Artifact {
|
|
392
|
+
id: string;
|
|
393
|
+
tool: string;
|
|
394
|
+
brief: string;
|
|
395
|
+
content: string;
|
|
396
|
+
bytes: number;
|
|
397
|
+
}
|
|
398
|
+
/** In-memory store of captured tool outputs. Ids are a monotonic `a1, a2, …` (test-friendly, no RNG). */
|
|
399
|
+
declare class ArtifactStore {
|
|
400
|
+
private items;
|
|
401
|
+
private seq;
|
|
402
|
+
put(tool: string, content: string, brief?: string): Artifact;
|
|
403
|
+
get(id: string): Artifact | undefined;
|
|
404
|
+
list(): Artifact[];
|
|
405
|
+
get size(): number;
|
|
406
|
+
/** Cheap keyword retrieval over brief+content — rank by how many query terms each artifact contains. */
|
|
407
|
+
search(query: string, k?: number): Artifact[];
|
|
408
|
+
}
|
|
409
|
+
interface CaptureOptions {
|
|
410
|
+
/** Min result length (chars) to capture; smaller results pass through untouched. Default 1500. */
|
|
411
|
+
threshold?: number;
|
|
412
|
+
/** Chars of the captured output to preview in the stub. Default 320. */
|
|
413
|
+
previewChars?: number;
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* Wrap a tool so any oversized STRING result is stored as an artifact and replaced with a compact
|
|
417
|
+
* stub. Non-string results (images/structured) and small results pass through unchanged.
|
|
418
|
+
*/
|
|
419
|
+
declare function withArtifactCapture(tool: AgentTool, store: ArtifactStore, opts?: CaptureOptions): AgentTool;
|
|
420
|
+
interface AskOptions {
|
|
421
|
+
store: ArtifactStore;
|
|
422
|
+
ai: ChatLike;
|
|
423
|
+
/** Model for the extraction step — intentionally a CHEAP model; the caller synthesizes. */
|
|
424
|
+
model: string;
|
|
425
|
+
/** Max chars of each artifact fed to the extractor (default 12000). */
|
|
426
|
+
maxChars?: number;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* `Ask` — peek into stored artifacts to answer a question, WITHOUT loading the raw data into the
|
|
430
|
+
* caller's context. Pass `over` (artifact id(s)) for a targeted peek, or omit to auto-retrieve by
|
|
431
|
+
* relevance. Returns only the extracted answer. Built for the two-tier split: a cheap model does the
|
|
432
|
+
* retrieve+extract here; the (stronger) caller synthesizes the final answer from what comes back.
|
|
433
|
+
*/
|
|
434
|
+
declare function makeAskTool(o: AskOptions): AgentTool;
|
|
435
|
+
|
|
377
436
|
/**
|
|
378
437
|
* `AskUserQuestion` tool — lets the model pose a structured choice to a human.
|
|
379
438
|
* Delegates to `ctx.host.ask`. Registered only when a HostBridge is provided
|
|
@@ -1075,4 +1134,4 @@ declare class CartesiaTTS {
|
|
|
1075
1134
|
close(): void;
|
|
1076
1135
|
}
|
|
1077
1136
|
|
|
1078
|
-
export { Agent, type AgentDef, AgentOptions, AgentTool, type Attempt, type AudioSink, type AudioSource, type AuthProvider, BodDbFilesystem, CartesiaTTS, CartesiaTTSOptions, ChatLike, ChatOptions, ChatResponse, type CommandInfo, ConsoleHostBridge, DEFAULT_DENY, DuplexAgent, DuplexAgentOptions, type DuplexTaskStatus, FakeAIClient, Hooks, HostBridge, JailOptions, JailedFilesystem, type LessonOptions, LessonOptionsDefaults, type LoadMemoryOpts, MEMORY_PROMPT, MessageContent, type Mount, MountFilesystem, NodeDiskFilesystem, OverlayFilesystem, type ReflectOptions, RunResult, STT_SAMPLE_RATE, ScriptedHostBridge, type SkillInfo, SonioxSTT, SonioxSTTOptions, type SttLike, TTS_SAMPLE_RATE, type TaskRecord, type TaskToolOptions, ToolCall, type ToolSpec, type TtsLike, UserQuestion, VOICE_MEMORY_PROMPT, VOICE_SYSTEM_PROMPT, VoiceEngine, VoiceEngineOptions, type VoiceState, type WebFetchOptions, type WebSearchOptions, type WorkerTier, applyEditsTool, askUserQuestionTool, checkpointTool, checkpointTools, compileSynthesizedTool, decodeDdgUrl, diskAgentOptions, expandCommand, expandTemplate, forComponent, fullAgentOptions, globTool, grepTool, htmlToText, idfWeights, lessonCapture, loadAgents, loadCommands, loadInstructions, loadMemory, loadSkills, makeTaskBatchTool, makeTaskTool, makeWebFetchTool, makeWebSearchTool, mkdirp, multiEditTool, parseDdgHtml, raceAttempts, reflectOnRun, relevanceScore, repoIndex, repoMapTool, resolveAuth, rollbackTool, sandboxAgentOptions, slugify, tokenize, toolCall, topByRelevance, validateToolCode, webFetchTool, webSearchTool, writeFact, writeTool };
|
|
1137
|
+
export { Agent, type AgentDef, AgentOptions, AgentTool, type Artifact, ArtifactStore, type AskOptions, type Attempt, type AudioSink, type AudioSource, type AuthProvider, BodDbFilesystem, type CaptureOptions, CartesiaTTS, CartesiaTTSOptions, ChatLike, ChatOptions, ChatResponse, type CommandInfo, ConsoleHostBridge, DEFAULT_DENY, DuplexAgent, DuplexAgentOptions, type DuplexTaskStatus, FakeAIClient, Hooks, HostBridge, JailOptions, JailedFilesystem, type LessonOptions, LessonOptionsDefaults, type LoadMemoryOpts, MEMORY_PROMPT, MessageContent, type Mount, MountFilesystem, NodeDiskFilesystem, OverlayFilesystem, type ReflectOptions, RunResult, STT_SAMPLE_RATE, ScriptedHostBridge, type SkillInfo, SonioxSTT, SonioxSTTOptions, type SttLike, TTS_SAMPLE_RATE, type TaskRecord, type TaskToolOptions, ToolCall, type ToolSpec, type TtsLike, UserQuestion, VOICE_MEMORY_PROMPT, VOICE_SYSTEM_PROMPT, VoiceEngine, VoiceEngineOptions, type VoiceState, type WebFetchOptions, type WebSearchOptions, type WorkerTier, applyEditsTool, askUserQuestionTool, checkpointTool, checkpointTools, compileSynthesizedTool, decodeDdgUrl, diskAgentOptions, expandCommand, expandTemplate, forComponent, fullAgentOptions, globTool, grepTool, htmlToText, idfWeights, lessonCapture, loadAgents, loadCommands, loadInstructions, loadMemory, loadSkills, makeAskTool, makeTaskBatchTool, makeTaskTool, makeWebFetchTool, makeWebSearchTool, mkdirp, multiEditTool, parseDdgHtml, raceAttempts, reflectOnRun, relevanceScore, repoIndex, repoMapTool, resolveAuth, rollbackTool, sandboxAgentOptions, slugify, tokenize, toolCall, topByRelevance, validateToolCode, webFetchTool, webSearchTool, withArtifactCapture, writeFact, writeTool };
|
package/dist/index.js
CHANGED
|
@@ -3568,9 +3568,108 @@ init_tools_structured();
|
|
|
3568
3568
|
init_todo();
|
|
3569
3569
|
init_tools_web();
|
|
3570
3570
|
|
|
3571
|
+
// src/artifacts.ts
|
|
3572
|
+
init_logging();
|
|
3573
|
+
var log5 = forComponent("artifacts");
|
|
3574
|
+
var ArtifactStore = class {
|
|
3575
|
+
items = /* @__PURE__ */ new Map();
|
|
3576
|
+
seq = 0;
|
|
3577
|
+
put(tool, content, brief) {
|
|
3578
|
+
const id = "a" + ++this.seq;
|
|
3579
|
+
const art = { id, tool, brief: brief || content.replace(/\s+/g, " ").trim().slice(0, 80), content, bytes: content.length };
|
|
3580
|
+
this.items.set(id, art);
|
|
3581
|
+
return art;
|
|
3582
|
+
}
|
|
3583
|
+
get(id) {
|
|
3584
|
+
return this.items.get(id);
|
|
3585
|
+
}
|
|
3586
|
+
list() {
|
|
3587
|
+
return [...this.items.values()];
|
|
3588
|
+
}
|
|
3589
|
+
get size() {
|
|
3590
|
+
return this.items.size;
|
|
3591
|
+
}
|
|
3592
|
+
/** Cheap keyword retrieval over brief+content — rank by how many query terms each artifact contains. */
|
|
3593
|
+
search(query, k = 3) {
|
|
3594
|
+
const terms = [...new Set(query.toLowerCase().split(/\W+/).filter((t) => t.length > 2))];
|
|
3595
|
+
if (!terms.length) return this.list().slice(-k);
|
|
3596
|
+
return this.list().map((a) => {
|
|
3597
|
+
const hay = (a.brief + " " + a.content).toLowerCase();
|
|
3598
|
+
return { a, score: terms.reduce((s, t) => s + (hay.includes(t) ? 1 : 0), 0) };
|
|
3599
|
+
}).filter((x) => x.score > 0).sort((x, y) => y.score - x.score).slice(0, k).map((x) => x.a);
|
|
3600
|
+
}
|
|
3601
|
+
};
|
|
3602
|
+
function shortArgs(args) {
|
|
3603
|
+
try {
|
|
3604
|
+
const s = JSON.stringify(args);
|
|
3605
|
+
return s.length > 60 ? s.slice(0, 57) + "\u2026" : s;
|
|
3606
|
+
} catch {
|
|
3607
|
+
return "";
|
|
3608
|
+
}
|
|
3609
|
+
}
|
|
3610
|
+
function withArtifactCapture(tool, store, opts = {}) {
|
|
3611
|
+
const threshold = opts.threshold ?? 1500;
|
|
3612
|
+
const previewChars = opts.previewChars ?? 320;
|
|
3613
|
+
return {
|
|
3614
|
+
...tool,
|
|
3615
|
+
run: async (args, ctx) => {
|
|
3616
|
+
const raw = await tool.run(args, ctx);
|
|
3617
|
+
if (typeof raw !== "string" || raw.length <= threshold) return raw;
|
|
3618
|
+
const art = store.put(tool.name, raw, `${tool.name}(${shortArgs(args)})`);
|
|
3619
|
+
const preview = raw.slice(0, previewChars).replace(/\s+/g, " ").trim();
|
|
3620
|
+
return `[artifact ${art.id} \xB7 ${tool.name} \xB7 ${art.bytes} bytes \u2014 full output stored out of context to keep it clean]
|
|
3621
|
+
preview: ${preview}\u2026
|
|
3622
|
+
To pull a specific detail from it, call Ask({ question: "\u2026", over: "${art.id}" }) \u2014 do NOT guess at what the preview cuts off.`;
|
|
3623
|
+
}
|
|
3624
|
+
};
|
|
3625
|
+
}
|
|
3626
|
+
var ASK_SYSTEM = "You are a retrieval-extraction step. Given one or more ARTIFACTS (raw outputs from earlier tools) and a QUESTION, find and return exactly the information that answers the question \u2014 quote values/facts verbatim from the artifacts. Do NOT add analysis, opinions, or anything not grounded in the artifacts. If the answer is not present, say so plainly. Be concise.";
|
|
3627
|
+
function makeAskTool(o) {
|
|
3628
|
+
const maxChars = o.maxChars ?? 12e3;
|
|
3629
|
+
return {
|
|
3630
|
+
name: "Ask",
|
|
3631
|
+
description: 'Answer a question by peeking into stored artifacts \u2014 large earlier outputs (web search, big file reads, subagent reports) kept out of your context. Pass `over` with an artifact id like "a3" (or several, comma-separated) for a targeted lookup, or omit it to auto-find relevant artifacts. Returns only the extracted answer; the full data never enters your context.',
|
|
3632
|
+
parameters: {
|
|
3633
|
+
type: "object",
|
|
3634
|
+
required: ["question"],
|
|
3635
|
+
properties: {
|
|
3636
|
+
question: { type: "string", description: "what you need from the artifact(s)" },
|
|
3637
|
+
over: { type: "string", description: 'artifact id(s) to read, e.g. "a3" or "a1,a4"; omit to auto-retrieve' }
|
|
3638
|
+
}
|
|
3639
|
+
},
|
|
3640
|
+
async run({ question, over }) {
|
|
3641
|
+
const q = String(question ?? "").trim();
|
|
3642
|
+
if (!q) return "Error: empty question";
|
|
3643
|
+
const arts = over ? String(over).split(/[,\s]+/).filter(Boolean).map((id) => o.store.get(id)).filter((a) => !!a) : o.store.search(q);
|
|
3644
|
+
if (!arts.length) return over ? `Error: no artifact matching '${over}' (have: ${o.store.list().map((a) => a.id).join(", ") || "none"})` : "(no relevant artifacts found)";
|
|
3645
|
+
const corpus = arts.map((a) => `### artifact ${a.id} (${a.tool})
|
|
3646
|
+
${a.content.slice(0, maxChars)}`).join("\n\n");
|
|
3647
|
+
try {
|
|
3648
|
+
const res = await o.ai.chat({
|
|
3649
|
+
model: o.model,
|
|
3650
|
+
messages: [
|
|
3651
|
+
{ role: "system", content: ASK_SYSTEM },
|
|
3652
|
+
{ role: "user", content: `ARTIFACTS:
|
|
3653
|
+
${corpus}
|
|
3654
|
+
|
|
3655
|
+
QUESTION: ${q}` }
|
|
3656
|
+
]
|
|
3657
|
+
});
|
|
3658
|
+
const answer = (res?.content ?? "").trim();
|
|
3659
|
+
return answer ? `${answer}
|
|
3660
|
+
|
|
3661
|
+
(from ${arts.map((a) => a.id).join(", ")})` : "(no answer found in artifacts)";
|
|
3662
|
+
} catch (e) {
|
|
3663
|
+
log5.debug("Ask extraction failed", e);
|
|
3664
|
+
return `Error querying artifacts: ${e?.message ?? e}`;
|
|
3665
|
+
}
|
|
3666
|
+
}
|
|
3667
|
+
};
|
|
3668
|
+
}
|
|
3669
|
+
|
|
3571
3670
|
// src/lessons.ts
|
|
3572
3671
|
init_logging();
|
|
3573
|
-
var
|
|
3672
|
+
var log6 = forComponent("Lessons");
|
|
3574
3673
|
var LessonOptionsDefaults = class {
|
|
3575
3674
|
minRepeats = 2;
|
|
3576
3675
|
};
|
|
@@ -3595,15 +3694,15 @@ function lessonCapture(options) {
|
|
|
3595
3694
|
counts.set(lesson.slug, n);
|
|
3596
3695
|
if (n < o.minRepeats) return;
|
|
3597
3696
|
written.add(lesson.slug);
|
|
3598
|
-
await writeFact(o.fs, o.dir, lesson.slug, lesson.body).catch((e) =>
|
|
3599
|
-
|
|
3697
|
+
await writeFact(o.fs, o.dir, lesson.slug, lesson.body).catch((e) => log6.warn(`could not persist ${lesson.slug}: ${e?.message ?? e}`));
|
|
3698
|
+
log6.debug(`captured lesson ${lesson.slug} (recurred ${n}\xD7)`);
|
|
3600
3699
|
}
|
|
3601
3700
|
};
|
|
3602
3701
|
}
|
|
3603
3702
|
|
|
3604
3703
|
// src/reflect.ts
|
|
3605
3704
|
init_logging();
|
|
3606
|
-
var
|
|
3705
|
+
var log7 = forComponent("Reflect");
|
|
3607
3706
|
async function reflectOnRun(o) {
|
|
3608
3707
|
const digest = digestRun(o.result.messages, o.maxDigestChars ?? 6e3);
|
|
3609
3708
|
if (!digest.trim()) return null;
|
|
@@ -3619,7 +3718,7 @@ If the run was fine or the issue was purely task-specific (not generalizable), r
|
|
|
3619
3718
|
const r = await o.ai.chat({ model: o.model, messages: [{ role: "user", content: prompt }], stream: false });
|
|
3620
3719
|
text = r?.content ?? "";
|
|
3621
3720
|
} catch (e) {
|
|
3622
|
-
|
|
3721
|
+
log7.warn(`reflection call failed: ${e?.message ?? e}`);
|
|
3623
3722
|
return null;
|
|
3624
3723
|
}
|
|
3625
3724
|
const m = text.match(/LESSON:\s*(.+)/i);
|
|
@@ -3629,10 +3728,10 @@ If the run was fine or the issue was purely task-specific (not generalizable), r
|
|
|
3629
3728
|
try {
|
|
3630
3729
|
await writeFact(o.fs, o.dir, slug, lesson);
|
|
3631
3730
|
} catch (e) {
|
|
3632
|
-
|
|
3731
|
+
log7.warn(`could not persist lesson: ${e?.message ?? e}`);
|
|
3633
3732
|
return null;
|
|
3634
3733
|
}
|
|
3635
|
-
|
|
3734
|
+
log7.debug(`reflection persisted ${slug}`);
|
|
3636
3735
|
return slug;
|
|
3637
3736
|
}
|
|
3638
3737
|
function digestRun(messages, maxChars) {
|
|
@@ -3649,7 +3748,7 @@ function digestRun(messages, maxChars) {
|
|
|
3649
3748
|
// src/duplex.ts
|
|
3650
3749
|
import { MemFilesystem as MemFilesystem2 } from "@livx.cc/wcli/core";
|
|
3651
3750
|
init_logging();
|
|
3652
|
-
var
|
|
3751
|
+
var log8 = forComponent("DuplexAgent");
|
|
3653
3752
|
function describeCall(call) {
|
|
3654
3753
|
const v = call.args && Object.values(call.args).find((x) => typeof x === "string" && x.trim());
|
|
3655
3754
|
const hint = v ? ` (${String(v).replace(/\s+/g, " ").trim().slice(0, 48)})` : "";
|
|
@@ -3703,7 +3802,7 @@ var DuplexAgentOptions = class {
|
|
|
3703
3802
|
/** User-scope memory dir for global facts (type=user/feedback). Forwarded to Remember's routing. */
|
|
3704
3803
|
memoryUserDir;
|
|
3705
3804
|
};
|
|
3706
|
-
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives,
|
|
3805
|
+
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
|
|
3707
3806
|
var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
|
|
3708
3807
|
var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
|
|
3709
3808
|
var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
|
|
@@ -3826,7 +3925,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
3826
3925
|
try {
|
|
3827
3926
|
await this.voice.send("[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools.");
|
|
3828
3927
|
} catch (e) {
|
|
3829
|
-
|
|
3928
|
+
log8.warn(`ack nudge failed: ${e instanceof Error ? e.message : e}`);
|
|
3830
3929
|
} finally {
|
|
3831
3930
|
this.nudging = false;
|
|
3832
3931
|
}
|
|
@@ -3949,7 +4048,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
3949
4048
|
this.notify("task_verify", `task ${id}: verifying`, { id });
|
|
3950
4049
|
const cres = await new Agent(agentOpts).run(checkBrief);
|
|
3951
4050
|
if (cres.finishReason !== "stop") {
|
|
3952
|
-
|
|
4051
|
+
log8.warn(`task ${id}: verify inconclusive (${cres.finishReason})`);
|
|
3953
4052
|
this.notify("task_verify", `task ${id}: verify inconclusive (${cres.finishReason})`, { id, finishReason: cres.finishReason });
|
|
3954
4053
|
}
|
|
3955
4054
|
const sum = (a = 0, b = 0) => a + b;
|
|
@@ -4051,7 +4150,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
4051
4150
|
return this.failTask(rec, msg);
|
|
4052
4151
|
}
|
|
4053
4152
|
rec.status = "done";
|
|
4054
|
-
|
|
4153
|
+
log8.verbose(`task ${id} done (${res.steps} steps)`);
|
|
4055
4154
|
this.notify("task_done", `task ${id} (${rec.label}) completed`, {
|
|
4056
4155
|
id,
|
|
4057
4156
|
text: res.text,
|
|
@@ -4068,7 +4167,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
4068
4167
|
failTask(rec, msg) {
|
|
4069
4168
|
this.dropAsk(rec.id);
|
|
4070
4169
|
rec.status = "error";
|
|
4071
|
-
|
|
4170
|
+
log8.warn(`task ${rec.id} failed: ${msg}`);
|
|
4072
4171
|
this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
|
|
4073
4172
|
this.queueRevoice(`[task ${rec.id} failed] ${msg}`);
|
|
4074
4173
|
}
|
|
@@ -4433,7 +4532,7 @@ init_logging();
|
|
|
4433
4532
|
|
|
4434
4533
|
// src/voice/engine.ts
|
|
4435
4534
|
init_logging();
|
|
4436
|
-
var
|
|
4535
|
+
var log9 = forComponent("VoiceEngine");
|
|
4437
4536
|
var now = () => performance.now();
|
|
4438
4537
|
var VoiceEngineOptions = class {
|
|
4439
4538
|
stt;
|
|
@@ -4540,7 +4639,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4540
4639
|
this.stt.onLevel = (rms) => this.handleLevel(rms);
|
|
4541
4640
|
await Promise.all([this.tts.connect(), this.stt.start()]);
|
|
4542
4641
|
this.setState("listening");
|
|
4543
|
-
|
|
4642
|
+
log9.debug(`voice I/O up (${this.stt.usingAec ? "AEC" : "heuristic echo"} capture)`);
|
|
4544
4643
|
}
|
|
4545
4644
|
get usingAec() {
|
|
4546
4645
|
return this.stt.usingAec;
|
|
@@ -4592,7 +4691,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4592
4691
|
this.reply += text;
|
|
4593
4692
|
for (const w of this.words(this.reply)) this.echoWords.add(w);
|
|
4594
4693
|
this.tts.speak(text, true);
|
|
4595
|
-
if (!this.spokeDeltas && this.turnStartAt)
|
|
4694
|
+
if (!this.spokeDeltas && this.turnStartAt) log9.debug(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
|
|
4596
4695
|
this.spokeDeltas = true;
|
|
4597
4696
|
this.setState("speaking");
|
|
4598
4697
|
}
|
|
@@ -4613,7 +4712,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4613
4712
|
}
|
|
4614
4713
|
this.drainTimer = null;
|
|
4615
4714
|
this.speaking = false;
|
|
4616
|
-
if (this.turnStartAt)
|
|
4715
|
+
if (this.turnStartAt) log9.debug(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
|
|
4617
4716
|
this.echoUntil = now() + 2500;
|
|
4618
4717
|
if (!this.usingAec) this.stt.reset();
|
|
4619
4718
|
this.setState("listening");
|
|
@@ -4754,7 +4853,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4754
4853
|
this.pendingUtt = this.pendingUtt ? `${this.pendingUtt} ${text}` : text;
|
|
4755
4854
|
if (this.pendingTimer) clearTimeout(this.pendingTimer);
|
|
4756
4855
|
if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
|
|
4757
|
-
|
|
4856
|
+
log9.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
|
|
4758
4857
|
this.options.onHold();
|
|
4759
4858
|
if (this.options.holdFiller && !this.speaking) {
|
|
4760
4859
|
this.beginSpeech();
|
|
@@ -4852,7 +4951,7 @@ async function resolveAuth(auth) {
|
|
|
4852
4951
|
}
|
|
4853
4952
|
|
|
4854
4953
|
// src/voice/soniox.ts
|
|
4855
|
-
var
|
|
4954
|
+
var log10 = forComponent("SonioxSTT");
|
|
4856
4955
|
var now2 = () => performance.now();
|
|
4857
4956
|
var SonioxSTTOptions = class {
|
|
4858
4957
|
auth = "";
|
|
@@ -4909,9 +5008,9 @@ var SonioxSTT = class {
|
|
|
4909
5008
|
this.ws.onmessage = (ev) => this.handle(JSON.parse(String(ev.data)));
|
|
4910
5009
|
this.ws.onclose = (ev) => {
|
|
4911
5010
|
if (this.stopped) return;
|
|
4912
|
-
|
|
5011
|
+
log10.warn(`soniox ws closed (${ev.code} ${ev.reason || ""}) \u2014 reconnecting`);
|
|
4913
5012
|
this.reset();
|
|
4914
|
-
this.connectWs().catch((e) =>
|
|
5013
|
+
this.connectWs().catch((e) => log10.error(`soniox reconnect failed: ${e.message}`));
|
|
4915
5014
|
};
|
|
4916
5015
|
}
|
|
4917
5016
|
async start() {
|
|
@@ -4921,7 +5020,7 @@ var SonioxSTT = class {
|
|
|
4921
5020
|
this.endpointTimer = setInterval(() => {
|
|
4922
5021
|
const combined = (this.finalText + this.partialText).trim();
|
|
4923
5022
|
if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
|
|
4924
|
-
if (this.firstTokenAt)
|
|
5023
|
+
if (this.firstTokenAt) log10.debug(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
|
|
4925
5024
|
this.reset();
|
|
4926
5025
|
this.onUtterance(combined, now2());
|
|
4927
5026
|
}, 120);
|
|
@@ -4938,7 +5037,7 @@ var SonioxSTT = class {
|
|
|
4938
5037
|
});
|
|
4939
5038
|
}
|
|
4940
5039
|
handle(m) {
|
|
4941
|
-
if (m.error_message) return
|
|
5040
|
+
if (m.error_message) return log10.error(`soniox: ${m.error_message}`);
|
|
4942
5041
|
let endpoint = false;
|
|
4943
5042
|
for (const t of m.tokens ?? []) {
|
|
4944
5043
|
if (t.text === "<end>") endpoint = true;
|
|
@@ -4954,7 +5053,7 @@ var SonioxSTT = class {
|
|
|
4954
5053
|
this.onPartial(combined);
|
|
4955
5054
|
if (endpoint && this.finalText.trim()) {
|
|
4956
5055
|
const utterance = this.finalText.trim();
|
|
4957
|
-
if (this.firstTokenAt)
|
|
5056
|
+
if (this.firstTokenAt) log10.debug(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
|
|
4958
5057
|
this.reset();
|
|
4959
5058
|
this.onUtterance(utterance, now2());
|
|
4960
5059
|
}
|
|
@@ -4976,7 +5075,7 @@ var SonioxSTT = class {
|
|
|
4976
5075
|
|
|
4977
5076
|
// src/voice/cartesia.ts
|
|
4978
5077
|
init_logging();
|
|
4979
|
-
var
|
|
5078
|
+
var log11 = forComponent("CartesiaTTS");
|
|
4980
5079
|
var now3 = () => performance.now();
|
|
4981
5080
|
var CartesiaTTSOptions = class {
|
|
4982
5081
|
auth = "";
|
|
@@ -5021,9 +5120,9 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
5021
5120
|
this.ws.onerror = (e) => rej(new Error(`cartesia ws: ${e.message || "connect failed"}`));
|
|
5022
5121
|
});
|
|
5023
5122
|
this.ws.onclose = (ev) => {
|
|
5024
|
-
|
|
5123
|
+
log11.warn(`cartesia ws closed (${ev.code} ${ev.reason || ""})`);
|
|
5025
5124
|
if (!this.closed) {
|
|
5026
|
-
this.connecting = this.doConnect().catch((e) =>
|
|
5125
|
+
this.connecting = this.doConnect().catch((e) => log11.error(`cartesia reconnect failed: ${e.message}`));
|
|
5027
5126
|
}
|
|
5028
5127
|
};
|
|
5029
5128
|
this.ws.onmessage = (ev) => {
|
|
@@ -5033,7 +5132,7 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
5033
5132
|
this.consecutiveErrors = 0;
|
|
5034
5133
|
if (this.down) {
|
|
5035
5134
|
this.down = false;
|
|
5036
|
-
|
|
5135
|
+
log11.info("TTS recovered");
|
|
5037
5136
|
this.stopProbe();
|
|
5038
5137
|
}
|
|
5039
5138
|
if (!this.firstAudioAt) this.firstAudioAt = now3();
|
|
@@ -5042,7 +5141,7 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
5042
5141
|
this.consecutiveErrors = 0;
|
|
5043
5142
|
if (this.down) {
|
|
5044
5143
|
this.down = false;
|
|
5045
|
-
|
|
5144
|
+
log11.info("TTS recovered");
|
|
5046
5145
|
this.stopProbe();
|
|
5047
5146
|
}
|
|
5048
5147
|
this.onDone();
|
|
@@ -5051,11 +5150,11 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
5051
5150
|
this.consecutiveErrors++;
|
|
5052
5151
|
if (!this.down && this.consecutiveErrors >= _CartesiaTTS.CB_THRESHOLD) {
|
|
5053
5152
|
this.down = true;
|
|
5054
|
-
|
|
5153
|
+
log11.warn(`TTS circuit breaker open \u2014 ${this.consecutiveErrors} consecutive errors, switching to text-only`);
|
|
5055
5154
|
this.onDone();
|
|
5056
5155
|
this.startProbe();
|
|
5057
5156
|
} else if (!this.down) {
|
|
5058
|
-
|
|
5157
|
+
log11.warn(`cartesia: ${JSON.stringify(m)}`);
|
|
5059
5158
|
}
|
|
5060
5159
|
}
|
|
5061
5160
|
};
|
|
@@ -5134,6 +5233,7 @@ import { MemFilesystem as MemFilesystem3, IndexedDbFilesystem, CommandExecutor a
|
|
|
5134
5233
|
export {
|
|
5135
5234
|
Agent,
|
|
5136
5235
|
AgentOptions,
|
|
5236
|
+
ArtifactStore,
|
|
5137
5237
|
BodDbFilesystem,
|
|
5138
5238
|
CartesiaTTS,
|
|
5139
5239
|
CartesiaTTSOptions,
|
|
@@ -5197,6 +5297,7 @@ export {
|
|
|
5197
5297
|
loadMemory,
|
|
5198
5298
|
loadSkills,
|
|
5199
5299
|
log,
|
|
5300
|
+
makeAskTool,
|
|
5200
5301
|
makeContext,
|
|
5201
5302
|
makeJobTools,
|
|
5202
5303
|
makeLazyMcpToolSearch,
|
|
@@ -5234,6 +5335,7 @@ export {
|
|
|
5234
5335
|
validateToolCode,
|
|
5235
5336
|
webFetchTool,
|
|
5236
5337
|
webSearchTool,
|
|
5338
|
+
withArtifactCapture,
|
|
5237
5339
|
writeFact,
|
|
5238
5340
|
writeTool
|
|
5239
5341
|
};
|