agent.libx.js 0.93.12 → 0.93.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +19 -2
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.js +19 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -888,6 +888,12 @@ declare class VoiceEngineOptions {
|
|
|
888
888
|
overlapPause: boolean;
|
|
889
889
|
/** no new partial activity for this long while paused → resume, drop the interjection */
|
|
890
890
|
overlapResumeMs: number;
|
|
891
|
+
/** A genuine barge over a LONG reply is defeated by the dominant-novel gate: Meet echoes our own
|
|
892
|
+
* speech back, so the partial is mostly our words + a few of hers → never "dominant novel" → it
|
|
893
|
+
* resumes (replaying old audio — the audible "completes the buffer" blip) instead of ceding.
|
|
894
|
+
* Mechanism-based discriminator: a re-PAUSE this soon after a resume = a persistent human, not an
|
|
895
|
+
* echo blip (which pauses once and stalls). Cede on the re-pause regardless of the novel gate. */
|
|
896
|
+
overlapRepauseCedeMs: number;
|
|
891
897
|
}
|
|
892
898
|
declare class VoiceEngine {
|
|
893
899
|
options: VoiceEngineOptions;
|
|
@@ -912,6 +918,7 @@ declare class VoiceEngine {
|
|
|
912
918
|
private pendingTimer;
|
|
913
919
|
private lastInterrupted;
|
|
914
920
|
private pausedAt;
|
|
921
|
+
private lastResumeAt;
|
|
915
922
|
private lastOverlapPartial;
|
|
916
923
|
private resumeTimer;
|
|
917
924
|
private turnStartAt;
|
package/dist/index.js
CHANGED
|
@@ -3663,7 +3663,7 @@ var DuplexAgentOptions = class {
|
|
|
3663
3663
|
/** User-scope memory dir for global facts (type=user/feedback). Forwarded to Remember's routing. */
|
|
3664
3664
|
memoryUserDir;
|
|
3665
3665
|
};
|
|
3666
|
-
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A
|
|
3666
|
+
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (typically files and shell; other tools such as web access only if this deployment wired them). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can see, access, or do something, do NOT assume and do NOT claim a capability you have not confirmed \u2014 call `Act` with a clear, self-contained brief to find out (it knows its real tools), then report back honestly. If Act lacks a capability, tell the user plainly; never promise a capability (web search, live data, etc.) before confirming the worker actually has it. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nAfter calling Act or Think, tell the user you are on it in one short sentence, then end your turn. Do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, summarize it for the ear in one or two short sentences. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file \u2014 use `QuickLook` (instant, no task). Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
|
|
3667
3667
|
var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
|
|
3668
3668
|
var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
|
|
3669
3669
|
var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
|
|
@@ -4314,6 +4314,12 @@ var VoiceEngineOptions = class {
|
|
|
4314
4314
|
overlapPause = true;
|
|
4315
4315
|
/** no new partial activity for this long while paused → resume, drop the interjection */
|
|
4316
4316
|
overlapResumeMs = 700;
|
|
4317
|
+
/** A genuine barge over a LONG reply is defeated by the dominant-novel gate: Meet echoes our own
|
|
4318
|
+
* speech back, so the partial is mostly our words + a few of hers → never "dominant novel" → it
|
|
4319
|
+
* resumes (replaying old audio — the audible "completes the buffer" blip) instead of ceding.
|
|
4320
|
+
* Mechanism-based discriminator: a re-PAUSE this soon after a resume = a persistent human, not an
|
|
4321
|
+
* echo blip (which pauses once and stalls). Cede on the re-pause regardless of the novel gate. */
|
|
4322
|
+
overlapRepauseCedeMs = 1500;
|
|
4317
4323
|
};
|
|
4318
4324
|
var VoiceEngine = class _VoiceEngine {
|
|
4319
4325
|
options;
|
|
@@ -4346,6 +4352,8 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4346
4352
|
lastInterrupted = null;
|
|
4347
4353
|
// overlap (pause) tier state — AEC + pause-capable sinks only
|
|
4348
4354
|
pausedAt = 0;
|
|
4355
|
+
lastResumeAt = 0;
|
|
4356
|
+
// when the overlap last resumed from a false alarm — a quick re-pause cedes
|
|
4349
4357
|
lastOverlapPartial = "";
|
|
4350
4358
|
// change-detection: only NEW partial text counts as activity
|
|
4351
4359
|
resumeTimer = null;
|
|
@@ -4479,6 +4487,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4479
4487
|
this.drainTimer = null;
|
|
4480
4488
|
}
|
|
4481
4489
|
this.resetOverlap(false);
|
|
4490
|
+
this.lastResumeAt = 0;
|
|
4482
4491
|
const heardChars = Math.round(Math.max(0, this.player.playedMs()) / 1e3 * 15);
|
|
4483
4492
|
if (this.reply) this.lastInterrupted = { full: this.reply, heard: this.reply.slice(0, heardChars) };
|
|
4484
4493
|
this.speaking = false;
|
|
@@ -4530,6 +4539,11 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4530
4539
|
if (!this.pausedAt) {
|
|
4531
4540
|
this.pausedAt = now();
|
|
4532
4541
|
this.player.pause();
|
|
4542
|
+
if (this.lastResumeAt && now() - this.lastResumeAt < this.options.overlapRepauseCedeMs) {
|
|
4543
|
+
this.interrupt();
|
|
4544
|
+
this.options.onBargeIn(this.ctxOpen ? "speaking" : "drain");
|
|
4545
|
+
return;
|
|
4546
|
+
}
|
|
4533
4547
|
}
|
|
4534
4548
|
if (this.genuine(txt) && this.words(txt).length >= 2) {
|
|
4535
4549
|
const phase = this.ctxOpen ? "speaking" : "drain";
|
|
@@ -4618,7 +4632,10 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
4618
4632
|
clearTimeout(this.resumeTimer);
|
|
4619
4633
|
this.resumeTimer = null;
|
|
4620
4634
|
}
|
|
4621
|
-
if (this.pausedAt && resume)
|
|
4635
|
+
if (this.pausedAt && resume) {
|
|
4636
|
+
this.player.resume?.();
|
|
4637
|
+
this.lastResumeAt = now();
|
|
4638
|
+
}
|
|
4622
4639
|
this.pausedAt = 0;
|
|
4623
4640
|
this.lastOverlapPartial = "";
|
|
4624
4641
|
this.gatePassTimes = [];
|