@livx.cc/agentx 0.95.6 → 0.96.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +184 -41
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +66 -3
- package/dist/index.js +145 -21
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -831,6 +831,11 @@ interface TaskRecord {
|
|
|
831
831
|
tail: string[];
|
|
832
832
|
/** Final report text (or error message) once the task settled. */
|
|
833
833
|
result?: string;
|
|
834
|
+
/** The original (untemplated) brief — replayed verbatim into an escalation/re-delegation. */
|
|
835
|
+
brief: string;
|
|
836
|
+
/** True when this task is ITSELF an automatic follow-up (escalation/re-delegate) of a prior task.
|
|
837
|
+
* Its integration turn may NOT escalate again — caps auto-follow-up to one hop per original task. */
|
|
838
|
+
followUp: boolean;
|
|
834
839
|
}
|
|
835
840
|
type WorkerTier = 'act' | 'think';
|
|
836
841
|
declare class DuplexAgentOptions {
|
|
@@ -903,6 +908,14 @@ declare class DuplexAgent {
|
|
|
903
908
|
private queue;
|
|
904
909
|
private seq;
|
|
905
910
|
private pendingEvents;
|
|
911
|
+
/** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
|
|
912
|
+
* the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
|
|
913
|
+
* by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
|
|
914
|
+
* worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
|
|
915
|
+
* string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
|
|
916
|
+
* RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
|
|
917
|
+
* [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
|
|
918
|
+
private pendingNonClean;
|
|
906
919
|
private flushQueued;
|
|
907
920
|
/** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
|
|
908
921
|
* left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
|
|
@@ -914,6 +927,21 @@ declare class DuplexAgent {
|
|
|
914
927
|
private reflexBuf;
|
|
915
928
|
private reflexForwarded;
|
|
916
929
|
private fabricationCut;
|
|
930
|
+
/** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
|
|
931
|
+
* carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
|
|
932
|
+
* turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
|
|
933
|
+
* escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
|
|
934
|
+
* ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
|
|
935
|
+
* means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
|
|
936
|
+
* ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
|
|
937
|
+
* flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
|
|
938
|
+
* fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
|
|
939
|
+
* ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
|
|
940
|
+
private turnFollowUp;
|
|
941
|
+
/** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
|
|
942
|
+
* the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
|
|
943
|
+
private autoEscalations;
|
|
944
|
+
private static readonly MAX_AUTO_ESCALATIONS;
|
|
917
945
|
/** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
|
|
918
946
|
readonly pendingAsks: Map<string, {
|
|
919
947
|
question: string;
|
|
@@ -924,6 +952,9 @@ declare class DuplexAgent {
|
|
|
924
952
|
constructor(options?: Partial<DuplexAgentOptions>);
|
|
925
953
|
/** Resolve memory tools + inject index into voice system prompt (once). */
|
|
926
954
|
private initMemory;
|
|
955
|
+
/** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
|
|
956
|
+
* turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
|
|
957
|
+
private flushHeldReflexTail;
|
|
927
958
|
/** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
|
|
928
959
|
private resetTurn;
|
|
929
960
|
/** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
|
|
@@ -950,7 +981,10 @@ declare class DuplexAgent {
|
|
|
950
981
|
/** Promise-chain mutex: turns run strictly one at a time; a failed turn doesn't poison the chain. */
|
|
951
982
|
private enqueue;
|
|
952
983
|
private notify;
|
|
953
|
-
/** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
|
|
984
|
+
/** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
|
|
985
|
+
* `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
|
|
986
|
+
* marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
|
|
987
|
+
* text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
|
|
954
988
|
private queueRevoice;
|
|
955
989
|
/** The worker's brief: the Act/Think args + a STATIC text snapshot of the recent conversation.
|
|
956
990
|
* Act briefs get a self-verify footer — the worker's report is trusted without review, so it
|
|
@@ -976,6 +1010,21 @@ declare class DuplexAgent {
|
|
|
976
1010
|
parkQuestion(askId: string, question: string): Promise<string>;
|
|
977
1011
|
/** Resolve any question a settling/cancelled task left parked (its answer can no longer matter). */
|
|
978
1012
|
private dropAsk;
|
|
1013
|
+
/** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
|
|
1014
|
+
* result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
|
|
1015
|
+
* agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
|
|
1016
|
+
*
|
|
1017
|
+
* Decision branches (the reflex acts on them with EXISTING tools — no new surface):
|
|
1018
|
+
* • accept → just SPEAK the result to the user (happy path; the only move on a clean success).
|
|
1019
|
+
* • escalate → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
|
|
1020
|
+
* exists AND this task wasn't already a follow-up (one hop max). Wires the dead
|
|
1021
|
+
* "Reserve Think for a problem Act already FAILED at" promise.
|
|
1022
|
+
* • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
|
|
1023
|
+
* • ask → ask the user ONE concrete question if genuinely blocked.
|
|
1024
|
+
*
|
|
1025
|
+
* Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
|
|
1026
|
+
* failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
|
|
1027
|
+
private integrationPrompt;
|
|
979
1028
|
private onWorkerSettled;
|
|
980
1029
|
private onWorkerFailed;
|
|
981
1030
|
private failTask;
|
|
@@ -984,8 +1033,10 @@ declare class DuplexAgent {
|
|
|
984
1033
|
* construction — the tool's own description carries the routing guidance, so a live enable works;
|
|
985
1034
|
* dispatch()'s think→act fallback covers any straggler calls after a live disable. */
|
|
986
1035
|
setThinkModel(model: string | false): void;
|
|
987
|
-
/** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
|
|
988
|
-
|
|
1036
|
+
/** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
|
|
1037
|
+
* `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
|
|
1038
|
+
* task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
|
|
1039
|
+
dispatch(brief: string, tier?: WorkerTier, label?: string, followUp?: boolean): Promise<string>;
|
|
989
1040
|
private actTool;
|
|
990
1041
|
private thinkTool;
|
|
991
1042
|
private taskStatusTool;
|
|
@@ -1148,6 +1199,11 @@ declare class VoiceEngineOptions {
|
|
|
1148
1199
|
/** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
|
|
1149
1200
|
* Gives the user time to finish their thought without triggering a model call. */
|
|
1150
1201
|
incompleteMergeMs: number;
|
|
1202
|
+
/** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
|
|
1203
|
+
* barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
|
|
1204
|
+
* as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
|
|
1205
|
+
* abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
|
|
1206
|
+
bargeGraceMs: number;
|
|
1151
1207
|
/** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
|
|
1152
1208
|
holdFiller: string;
|
|
1153
1209
|
/** Called when the engine holds an incomplete utterance (host can render a visual cue). */
|
|
@@ -1196,6 +1252,7 @@ declare class VoiceEngine {
|
|
|
1196
1252
|
private hot;
|
|
1197
1253
|
private suspectUntil;
|
|
1198
1254
|
private ackAt;
|
|
1255
|
+
private bargeGraceUntil;
|
|
1199
1256
|
private pendingUtt;
|
|
1200
1257
|
private pendingTimer;
|
|
1201
1258
|
private lastInterrupted;
|
|
@@ -1239,6 +1296,12 @@ declare class VoiceEngine {
|
|
|
1239
1296
|
* longer ones on count. */
|
|
1240
1297
|
private genuine;
|
|
1241
1298
|
private handlePartial;
|
|
1299
|
+
/** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
|
|
1300
|
+
* overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
|
|
1301
|
+
* utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
|
|
1302
|
+
* a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
|
|
1303
|
+
* check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
|
|
1304
|
+
private mergeUtterance;
|
|
1242
1305
|
private static readonly TRAIL_RE;
|
|
1243
1306
|
/** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
|
|
1244
1307
|
private looksIncomplete;
|
package/dist/index.js
CHANGED
|
@@ -4690,17 +4690,26 @@ var DuplexAgentOptions = class {
|
|
|
4690
4690
|
memoryUserDir;
|
|
4691
4691
|
};
|
|
4692
4692
|
var RESERVED_EVENT_MARKER = /\[task\b[^\]\n]*\b(?:completed|failed|progress|asks)\b/i;
|
|
4693
|
+
var RESERVED_EVENT_OPENER = /\[\s*task\b/i;
|
|
4693
4694
|
var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nThis holds even when asked to "print", "list", "show", or "make a table" \u2014 there is no screen for the spoken channel. Speak it as flowing prose ("Tuesday is half a meter, Wednesday a bit less\u2026"), or if they truly need it on screen, route it to Act to render. Never emit dashes or pipes into speech.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nYou cannot mute the microphone or stop voice capture yourself \u2014 no tool does it. If the user asks you to stop listening or turn the voice off, never claim you did: tell them to say exactly "voice off" (handled by the app directly), or type /voice.\nYou are NOT a knowledge base. For any question whose answer needs SPECIFIC verifiable facts you do not already have in hand \u2014 how to build/configure/implement something, exact API, library, entitlement, command or option names, current events, or particular numbers, dates, or names \u2014 do NOT answer from your own memory: you will confidently make things up (a fake API, a wrong entitlement, an event that did not happen). Route it to `Act`, which can search and verify, and speak only what its report says. Answer inline ONLY for general conversation, chit-chat, and trivia you are sure of, or facts you can see via QuickLook. When elaborating on a completed task ("tell me more", "the gist"), stay strictly within what that result actually said \u2014 if the user asks for something the result did not cover, that is NEW information: dispatch `Act`, do not improvise.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. DISTILL vs DELIVER \u2014 know which the request wants. When the result is a FACT to extract (a forecast, a count, a status), distill the headline. But when the user wanted specific CONTENT \u2014 a joke, a quote, a name, a definition, the actual lines \u2014 that content IS the deliverable: LEAD WITH IT. Your first words ARE the joke / the quote / the answer itself, before any "got it" or offer. SPEAK the content, never a comment ABOUT it: "why was six afraid of seven? because seven ate nine" \u2014 NOT "those are funny" or "I found a couple". If you did not actually say the joke/quote/answer aloud this turn, you FAILED the request, no matter how friendly the wrapper. A short joke is short \u2014 just say it. NEVER speak as if you already delivered something you did not actually say aloud THIS turn: do not say "those are\u2026", "there you go", or offer "a few MORE" when you never voiced the first one. The on-screen text is invisible to a voice user \u2014 if you did not speak it, they did not get it, so deliver it before you comment on it or offer more. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\nDo NOT end every turn with the same canned offer ("want a rundown?", "want the steps?"). Offer once at most; if the user pushes back, repeats themselves, or sounds unsatisfied ("you know what I mean?", "think deeper", "are you sure?"), do NOT re-offer the same thing \u2014 change approach: dispatch `Act`/`Think` to actually dig in, or ask one concrete clarifying question. Repeating a non-answer is worse than silence.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
|
|
4694
4695
|
var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
|
|
4695
4696
|
var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
|
|
4696
4697
|
var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
|
|
4697
|
-
var DuplexAgent = class {
|
|
4698
|
+
var DuplexAgent = class _DuplexAgent {
|
|
4698
4699
|
options;
|
|
4699
4700
|
voice;
|
|
4700
4701
|
tasks = /* @__PURE__ */ new Map();
|
|
4701
4702
|
queue = Promise.resolve();
|
|
4702
4703
|
seq = 0;
|
|
4703
4704
|
pendingEvents = [];
|
|
4705
|
+
/** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
|
|
4706
|
+
* the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
|
|
4707
|
+
* by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
|
|
4708
|
+
* worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
|
|
4709
|
+
* string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
|
|
4710
|
+
* RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
|
|
4711
|
+
* [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
|
|
4712
|
+
pendingNonClean = false;
|
|
4704
4713
|
flushQueued = false;
|
|
4705
4714
|
/** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
|
|
4706
4715
|
* left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
|
|
@@ -4719,6 +4728,21 @@ var DuplexAgent = class {
|
|
|
4719
4728
|
// chars of reflexBuf already forwarded to the host/TTS
|
|
4720
4729
|
fabricationCut = false;
|
|
4721
4730
|
// reflex emitted a reserved [task …] marker → suppress its tail
|
|
4731
|
+
/** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
|
|
4732
|
+
* carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
|
|
4733
|
+
* turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
|
|
4734
|
+
* escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
|
|
4735
|
+
* ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
|
|
4736
|
+
* means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
|
|
4737
|
+
* ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
|
|
4738
|
+
* flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
|
|
4739
|
+
* fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
|
|
4740
|
+
* ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
|
|
4741
|
+
turnFollowUp = false;
|
|
4742
|
+
/** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
|
|
4743
|
+
* the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
|
|
4744
|
+
autoEscalations = 0;
|
|
4745
|
+
static MAX_AUTO_ESCALATIONS = 8;
|
|
4722
4746
|
/** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
|
|
4723
4747
|
pendingAsks = /* @__PURE__ */ new Map();
|
|
4724
4748
|
/** Lazily resolved memory tools (async loadMemory runs in initMemory). */
|
|
@@ -4761,7 +4785,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4761
4785
|
if (this.fabricationCut) return;
|
|
4762
4786
|
const msg = ev.message;
|
|
4763
4787
|
this.reflexBuf += msg;
|
|
4764
|
-
const m = this.reflexBuf.match(RESERVED_EVENT_MARKER);
|
|
4788
|
+
const m = this.reflexBuf.match(RESERVED_EVENT_MARKER) ?? this.reflexBuf.match(RESERVED_EVENT_OPENER);
|
|
4765
4789
|
if (m) {
|
|
4766
4790
|
this.fabricationCut = true;
|
|
4767
4791
|
log9.warn(`reflex fabricated a [task \u2026] event in its spoken stream \u2014 cutting it (kept ${m.index} chars)`);
|
|
@@ -4771,8 +4795,15 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4771
4795
|
host.notify?.({ ...ev, message: safe });
|
|
4772
4796
|
return;
|
|
4773
4797
|
}
|
|
4774
|
-
|
|
4775
|
-
|
|
4798
|
+
const held = this.reflexBuf.length - this.reflexForwarded;
|
|
4799
|
+
const partial = held > 0 && /\[\s*t?a?s?k?$/i.test(this.reflexBuf.slice(-Math.min(held, 6)));
|
|
4800
|
+
const upto = partial ? this.reflexBuf.length - this.reflexBuf.slice(-6).match(/\[\s*t?a?s?k?$/i)[0].length : this.reflexBuf.length;
|
|
4801
|
+
const out = this.reflexBuf.slice(this.reflexForwarded, upto);
|
|
4802
|
+
this.reflexForwarded = upto;
|
|
4803
|
+
if (!out) return;
|
|
4804
|
+
if (out.trim()) this.spokeThisTurn = true;
|
|
4805
|
+
host.notify?.({ ...ev, message: out });
|
|
4806
|
+
return;
|
|
4776
4807
|
}
|
|
4777
4808
|
host.notify?.(ev);
|
|
4778
4809
|
}
|
|
@@ -4805,6 +4836,16 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4805
4836
|
this.voice.options.tools.push(...mem.tools);
|
|
4806
4837
|
if (mem.index) this.voice.options.systemPrompt += "\n\n" + mem.index;
|
|
4807
4838
|
}
|
|
4839
|
+
/** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
|
|
4840
|
+
* turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
|
|
4841
|
+
flushHeldReflexTail() {
|
|
4842
|
+
if (this.fabricationCut) return;
|
|
4843
|
+
const tail = this.reflexBuf.slice(this.reflexForwarded);
|
|
4844
|
+
this.reflexForwarded = this.reflexBuf.length;
|
|
4845
|
+
if (!tail) return;
|
|
4846
|
+
if (tail.trim()) this.spokeThisTurn = true;
|
|
4847
|
+
this.options.host?.notify?.({ kind: "text_delta", message: tail });
|
|
4848
|
+
}
|
|
4808
4849
|
/** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
|
|
4809
4850
|
resetTurn() {
|
|
4810
4851
|
this.turnDispatched = false;
|
|
@@ -4813,6 +4854,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4813
4854
|
this.reflexBuf = "";
|
|
4814
4855
|
this.reflexForwarded = 0;
|
|
4815
4856
|
this.fabricationCut = false;
|
|
4857
|
+
this.turnFollowUp = false;
|
|
4816
4858
|
this.voice.options.toolChoice = void 0;
|
|
4817
4859
|
}
|
|
4818
4860
|
/** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
|
|
@@ -4842,17 +4884,18 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4842
4884
|
/** A turn that voiced nothing is dead air. Re-prompt the reflex ONCE so the LLM itself voices a short
|
|
4843
4885
|
* line (no template). If it STILL says nothing, fall back to a minimal line so silence never ships.
|
|
4844
4886
|
* Wording adapts to whether work was dispatched (an ack) or the inline reply was simply lost. */
|
|
4845
|
-
async ackIfSilent() {
|
|
4887
|
+
async ackIfSilent(fallback) {
|
|
4846
4888
|
const dispatched = this.turnDispatched;
|
|
4847
4889
|
this.nudging = true;
|
|
4848
4890
|
try {
|
|
4849
|
-
await this.voice.send(dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
|
|
4891
|
+
await this.voice.send(fallback ? "[reminder] You said nothing to the user this turn. Tell them, in ONE short spoken sentence, what just happened \u2014 no tools." : dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
|
|
4850
4892
|
} catch (e) {
|
|
4851
4893
|
log9.warn(`ack nudge failed: ${e instanceof Error ? e.message : e}`);
|
|
4852
4894
|
} finally {
|
|
4853
4895
|
this.nudging = false;
|
|
4854
4896
|
}
|
|
4855
|
-
if (!this.spokeThisTurn)
|
|
4897
|
+
if (!this.spokeThisTurn)
|
|
4898
|
+
this.options.host?.notify?.({ kind: "text_delta", message: fallback ?? (dispatched ? "Okay, on it." : "Sorry, could you say that again?") });
|
|
4856
4899
|
}
|
|
4857
4900
|
/** One user turn: the voice agent streams the reply (and may Act/Think). Serialized with re-voice turns. */
|
|
4858
4901
|
send(content) {
|
|
@@ -4860,6 +4903,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4860
4903
|
await this.initMemory();
|
|
4861
4904
|
this.resetTurn();
|
|
4862
4905
|
const res = await this.voice.send(content);
|
|
4906
|
+
this.flushHeldReflexTail();
|
|
4863
4907
|
if (this.silentTurn) await this.ackIfSilent();
|
|
4864
4908
|
return res;
|
|
4865
4909
|
});
|
|
@@ -4894,18 +4938,27 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4894
4938
|
notify(kind, message, data) {
|
|
4895
4939
|
this.options.host?.notify?.({ kind, message, data });
|
|
4896
4940
|
}
|
|
4897
|
-
/** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
|
|
4898
|
-
|
|
4941
|
+
/** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
|
|
4942
|
+
* `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
|
|
4943
|
+
* marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
|
|
4944
|
+
* text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
|
|
4945
|
+
queueRevoice(event, nonClean = false) {
|
|
4899
4946
|
this.pendingEvents.push(event);
|
|
4947
|
+
if (nonClean) this.pendingNonClean = true;
|
|
4900
4948
|
if (this.flushQueued) return;
|
|
4901
4949
|
this.flushQueued = true;
|
|
4902
4950
|
void this.enqueue(async () => {
|
|
4903
4951
|
this.flushQueued = false;
|
|
4904
4952
|
const events = this.pendingEvents.splice(0);
|
|
4953
|
+
const nonCleanTurn = this.pendingNonClean;
|
|
4954
|
+
this.pendingNonClean = false;
|
|
4905
4955
|
if (!events.length) return;
|
|
4956
|
+
const failed = events.find((e) => /^\[task\b[^\]\n]*\bfailed\b/i.test(e));
|
|
4906
4957
|
this.resetTurn();
|
|
4958
|
+
this.turnFollowUp = nonCleanTurn;
|
|
4907
4959
|
await this.voice.send(events.join("\n"));
|
|
4908
|
-
|
|
4960
|
+
this.flushHeldReflexTail();
|
|
4961
|
+
if (this.silentTurn) await this.ackIfSilent(failed ? "Sorry, that didn't work \u2014 the task failed." : void 0);
|
|
4909
4962
|
this.notify("revoice_done", "");
|
|
4910
4963
|
});
|
|
4911
4964
|
}
|
|
@@ -4922,7 +4975,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
|
|
|
4922
4975
|
${recent}` : brief) + verify;
|
|
4923
4976
|
}
|
|
4924
4977
|
/** Spawn a detached worker for task `id`; its settlement notifies + enqueues the re-voice turn. */
|
|
4925
|
-
spawnWorker(id, label, briefText, tier
|
|
4978
|
+
spawnWorker(id, label, briefText, tier, brief, followUp) {
|
|
4926
4979
|
const o = this.options;
|
|
4927
4980
|
const tierOpts = tier === "think" ? o.thinkOptions : o.actOptions;
|
|
4928
4981
|
const tierModel = tier === "think" ? o.thinkModel : o.actModel;
|
|
@@ -4974,7 +5027,7 @@ ${recent}` : brief) + verify;
|
|
|
4974
5027
|
// shared with the checker so a cancel tears down both
|
|
4975
5028
|
};
|
|
4976
5029
|
const promise = new Agent(agentOpts).run(briefText).then((res) => this.maybeVerify(id, briefText, res, tier, agentOpts)).then((res) => this.onWorkerSettled(id, res)).catch((err) => this.onWorkerFailed(id, err));
|
|
4977
|
-
this.tasks.set(id, { id, label, status: "running", controller, promise, tail });
|
|
5030
|
+
this.tasks.set(id, { id, label, status: "running", controller, promise, tail, brief, followUp });
|
|
4978
5031
|
if (this.tasks.size > this.options.maxTaskRecords)
|
|
4979
5032
|
for (const [tid, rec] of this.tasks) {
|
|
4980
5033
|
if (this.tasks.size <= this.options.maxTaskRecords) break;
|
|
@@ -5085,6 +5138,38 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5085
5138
|
dropAsk(id) {
|
|
5086
5139
|
this.pendingAsks.get(id)?.resolve("");
|
|
5087
5140
|
}
|
|
5141
|
+
/** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
|
|
5142
|
+
* result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
|
|
5143
|
+
* agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
|
|
5144
|
+
*
|
|
5145
|
+
* Decision branches (the reflex acts on them with EXISTING tools — no new surface):
|
|
5146
|
+
* • accept → just SPEAK the result to the user (happy path; the only move on a clean success).
|
|
5147
|
+
* • escalate → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
|
|
5148
|
+
* exists AND this task wasn't already a follow-up (one hop max). Wires the dead
|
|
5149
|
+
* "Reserve Think for a problem Act already FAILED at" promise.
|
|
5150
|
+
* • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
|
|
5151
|
+
* • ask → ask the user ONE concrete question if genuinely blocked.
|
|
5152
|
+
*
|
|
5153
|
+
* Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
|
|
5154
|
+
* failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
|
|
5155
|
+
integrationPrompt(rec, outcome, body, finishReason) {
|
|
5156
|
+
const opener = outcome === "error" ? `[task ${rec.id} failed]` : `[task ${rec.id} completed]`;
|
|
5157
|
+
if (outcome === "ok")
|
|
5158
|
+
return `${opener} ${body}`;
|
|
5159
|
+
const underCap = this.autoEscalations < _DuplexAgent.MAX_AUTO_ESCALATIONS;
|
|
5160
|
+
const canEscalate = (outcome === "error" || outcome === "incomplete") && underCap;
|
|
5161
|
+
const hasThink = this.options.thinkModel !== false;
|
|
5162
|
+
const options = [];
|
|
5163
|
+
if (!rec.followUp && canEscalate && hasThink)
|
|
5164
|
+
options.push("ESCALATE to the Think tier (call Think with the same brief) if this is a hard/architectural problem the Act worker stalled or failed on");
|
|
5165
|
+
if (!rec.followUp && canEscalate)
|
|
5166
|
+
options.push("RE-DELEGATE to Act with a corrected brief if the failure looks recoverable (a wrong path, a fixable mistake)");
|
|
5167
|
+
options.push("ASK the user one short, concrete question if you genuinely cannot proceed without their input");
|
|
5168
|
+
options.push("ACCEPT and tell the user plainly what happened (don't dress a failure up as success)");
|
|
5169
|
+
const decision = options.length > 1 ? ` You must decide what to do next \u2014 choose ONE: ${options.map((o, i) => `(${i + 1}) ${o}`).join("; ")}. Pick exactly one and act on it; do not voice this as a finished success.` : ` Tell the user plainly what happened \u2014 do not present this as a finished success.`;
|
|
5170
|
+
const state = outcome === "error" ? `the worker FAILED with: ${body}` : `the worker STOPPED EARLY (${finishReason}) \u2014 its result is PARTIAL, not a finished success: ${body}`;
|
|
5171
|
+
return `${opener} Original request: "${rec.brief}". Outcome: ${state}.${decision}`;
|
|
5172
|
+
}
|
|
5088
5173
|
onWorkerSettled(id, res) {
|
|
5089
5174
|
this.dropAsk(id);
|
|
5090
5175
|
const rec = this.tasks.get(id);
|
|
@@ -5099,16 +5184,18 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5099
5184
|
}
|
|
5100
5185
|
rec.status = "done";
|
|
5101
5186
|
rec.result = res.text;
|
|
5102
|
-
|
|
5187
|
+
const incomplete = res.finishReason !== "stop";
|
|
5188
|
+
log9.verbose(`task ${id} done (${res.steps} steps${incomplete ? `, INCOMPLETE: ${res.finishReason}` : ""})`);
|
|
5103
5189
|
this.notify("task_done", `task ${id} (${rec.label}) completed`, {
|
|
5104
5190
|
id,
|
|
5105
5191
|
text: res.text,
|
|
5106
5192
|
usage: res.usage,
|
|
5107
5193
|
usageEstimated: res.usageEstimated,
|
|
5194
|
+
finishReason: res.finishReason,
|
|
5108
5195
|
steps: res.steps,
|
|
5109
5196
|
toolCalls: res.messages.filter((m) => m.role === "tool").length
|
|
5110
5197
|
});
|
|
5111
|
-
this.queueRevoice(
|
|
5198
|
+
this.queueRevoice(this.integrationPrompt(rec, incomplete ? "incomplete" : "ok", res.text, res.finishReason), incomplete);
|
|
5112
5199
|
}
|
|
5113
5200
|
onWorkerFailed(id, err) {
|
|
5114
5201
|
this.failTask(this.tasks.get(id), err instanceof Error ? err.message : String(err));
|
|
@@ -5119,7 +5206,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5119
5206
|
rec.result = msg;
|
|
5120
5207
|
log9.warn(`task ${rec.id} failed: ${msg}`);
|
|
5121
5208
|
this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
|
|
5122
|
-
this.queueRevoice(
|
|
5209
|
+
this.queueRevoice(this.integrationPrompt(rec, "error", msg, "error"), true);
|
|
5123
5210
|
}
|
|
5124
5211
|
// --- voice tools (closures over this instance) ---
|
|
5125
5212
|
/** Live-switch the think tier: `false` disables (removes the Think tool from the voice agent),
|
|
@@ -5133,13 +5220,16 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5133
5220
|
if (model === false && i >= 0) tools.splice(i, 1);
|
|
5134
5221
|
else if (model !== false && i < 0) tools.push(this.thinkTool());
|
|
5135
5222
|
}
|
|
5136
|
-
/** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
|
|
5137
|
-
|
|
5223
|
+
/** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
|
|
5224
|
+
* `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
|
|
5225
|
+
* task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
|
|
5226
|
+
async dispatch(brief, tier = "act", label, followUp = false) {
|
|
5138
5227
|
if (tier === "think" && this.options.thinkModel === false) tier = "act";
|
|
5228
|
+
if (followUp) this.autoEscalations++;
|
|
5139
5229
|
const id = `t${++this.seq}`;
|
|
5140
5230
|
const lbl = label ?? tier;
|
|
5141
5231
|
await this.options.onTaskStart?.(id, lbl);
|
|
5142
|
-
this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier);
|
|
5232
|
+
this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier, brief, followUp);
|
|
5143
5233
|
this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief, tier });
|
|
5144
5234
|
return id;
|
|
5145
5235
|
}
|
|
@@ -5159,7 +5249,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5159
5249
|
this.turnDispatched = true;
|
|
5160
5250
|
this.turnBriefs.add(String(brief ?? ""));
|
|
5161
5251
|
this.voice.options.toolChoice = "none";
|
|
5162
|
-
const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0);
|
|
5252
|
+
const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0, this.turnFollowUp);
|
|
5163
5253
|
return `Acting on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
|
|
5164
5254
|
}
|
|
5165
5255
|
};
|
|
@@ -5180,7 +5270,7 @@ Another agent just implemented the above. Independently check the CURRENT state
|
|
|
5180
5270
|
this.turnDispatched = true;
|
|
5181
5271
|
this.turnBriefs.add(String(brief ?? ""));
|
|
5182
5272
|
this.voice.options.toolChoice = "none";
|
|
5183
|
-
const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0);
|
|
5273
|
+
const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0, this.turnFollowUp);
|
|
5184
5274
|
return `Thinking on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
|
|
5185
5275
|
}
|
|
5186
5276
|
};
|
|
@@ -5506,6 +5596,11 @@ var VoiceEngineOptions = class {
|
|
|
5506
5596
|
/** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
|
|
5507
5597
|
* Gives the user time to finish their thought without triggering a model call. */
|
|
5508
5598
|
incompleteMergeMs = 1500;
|
|
5599
|
+
/** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
|
|
5600
|
+
* barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
|
|
5601
|
+
* as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
|
|
5602
|
+
* abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
|
|
5603
|
+
bargeGraceMs = 600;
|
|
5509
5604
|
/** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
|
|
5510
5605
|
holdFiller = "";
|
|
5511
5606
|
/** Called when the engine holds an incomplete utterance (host can render a visual cue). */
|
|
@@ -5561,6 +5656,9 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
5561
5656
|
suspectUntil = 0;
|
|
5562
5657
|
ackAt = 0;
|
|
5563
5658
|
// when the micro-ack was spoken — its echo can leak before the AEC filter converges
|
|
5659
|
+
bargeGraceUntil = 0;
|
|
5660
|
+
// no barge-in until this time — the user's OWN trailing audio (after the
|
|
5661
|
+
// utterance that JUST dispatched this turn) must not immediately re-interrupt the reply it requested.
|
|
5564
5662
|
pendingUtt = "";
|
|
5565
5663
|
// endpointed text held for the merge window
|
|
5566
5664
|
pendingTimer = null;
|
|
@@ -5747,6 +5845,10 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
5747
5845
|
}
|
|
5748
5846
|
handlePartial(text) {
|
|
5749
5847
|
if (this.speaking) {
|
|
5848
|
+
if (now() < this.bargeGraceUntil) {
|
|
5849
|
+
if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
|
|
5850
|
+
return;
|
|
5851
|
+
}
|
|
5750
5852
|
if (this.overlapCapable) {
|
|
5751
5853
|
const txt = text.trim();
|
|
5752
5854
|
if (!txt || txt === this.lastOverlapPartial) return;
|
|
@@ -5787,6 +5889,27 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
5787
5889
|
}
|
|
5788
5890
|
if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
|
|
5789
5891
|
}
|
|
5892
|
+
/** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
|
|
5893
|
+
* overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
|
|
5894
|
+
* utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
|
|
5895
|
+
* a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
|
|
5896
|
+
* check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
|
|
5897
|
+
mergeUtterance(prev, next) {
|
|
5898
|
+
if (!prev) return next;
|
|
5899
|
+
if (!next) return prev;
|
|
5900
|
+
const pw = prev.split(/\s+/), nw = next.split(/\s+/);
|
|
5901
|
+
const norm2 = (w) => w.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
5902
|
+
const max = Math.min(pw.length, nw.length);
|
|
5903
|
+
for (let k = max; k > 0; k--) {
|
|
5904
|
+
let match = true;
|
|
5905
|
+
for (let i = 0; i < k; i++) if (norm2(pw[pw.length - k + i]) !== norm2(nw[i])) {
|
|
5906
|
+
match = false;
|
|
5907
|
+
break;
|
|
5908
|
+
}
|
|
5909
|
+
if (match) return [...pw, ...nw.slice(k)].join(" ");
|
|
5910
|
+
}
|
|
5911
|
+
return `${prev} ${next}`;
|
|
5912
|
+
}
|
|
5790
5913
|
static TRAIL_RE = /(?:^|\s)(?:and|but|or|so|to|the|a|an|of|in|for|with|that|if|uh|um|like|about|from|into|on|is|are|was|were|,)$/i;
|
|
5791
5914
|
/** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
|
|
5792
5915
|
looksIncomplete(text) {
|
|
@@ -5806,7 +5929,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
5806
5929
|
this.ackAt = 0;
|
|
5807
5930
|
return;
|
|
5808
5931
|
}
|
|
5809
|
-
this.pendingUtt = this.
|
|
5932
|
+
this.pendingUtt = this.mergeUtterance(this.pendingUtt, text);
|
|
5810
5933
|
if (this.pendingTimer) clearTimeout(this.pendingTimer);
|
|
5811
5934
|
if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
|
|
5812
5935
|
log10.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
|
|
@@ -5831,6 +5954,7 @@ var VoiceEngine = class _VoiceEngine {
|
|
|
5831
5954
|
this.pendingUtt = "";
|
|
5832
5955
|
if (text) {
|
|
5833
5956
|
this.turnStartAt = now();
|
|
5957
|
+
this.bargeGraceUntil = now() + this.options.bargeGraceMs;
|
|
5834
5958
|
this.options.onUtterance(text);
|
|
5835
5959
|
}
|
|
5836
5960
|
}
|