@livx.cc/agentx 0.95.6 → 0.96.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -831,6 +831,11 @@ interface TaskRecord {
831
831
  tail: string[];
832
832
  /** Final report text (or error message) once the task settled. */
833
833
  result?: string;
834
+ /** The original (untemplated) brief — replayed verbatim into an escalation/re-delegation. */
835
+ brief: string;
836
+ /** True when this task is ITSELF an automatic follow-up (escalation/re-delegate) of a prior task.
837
+ * Its integration turn may NOT escalate again — caps auto-follow-up to one hop per original task. */
838
+ followUp: boolean;
834
839
  }
835
840
  type WorkerTier = 'act' | 'think';
836
841
  declare class DuplexAgentOptions {
@@ -903,6 +908,14 @@ declare class DuplexAgent {
903
908
  private queue;
904
909
  private seq;
905
910
  private pendingEvents;
911
+ /** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
912
+ * the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
913
+ * by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
914
+ * worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
915
+ * string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
916
+ * RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
917
+ * [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
918
+ private pendingNonClean;
906
919
  private flushQueued;
907
920
  /** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
908
921
  * left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
@@ -914,6 +927,21 @@ declare class DuplexAgent {
914
927
  private reflexBuf;
915
928
  private reflexForwarded;
916
929
  private fabricationCut;
930
+ /** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
931
+ * carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
932
+ * turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
933
+ * escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
934
+ * ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
935
+ * means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
936
+ * ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
937
+ * flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
938
+ * fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
939
+ * ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
940
+ private turnFollowUp;
941
+ /** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
942
+ * the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
943
+ private autoEscalations;
944
+ private static readonly MAX_AUTO_ESCALATIONS;
917
945
  /** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
918
946
  readonly pendingAsks: Map<string, {
919
947
  question: string;
@@ -924,6 +952,9 @@ declare class DuplexAgent {
924
952
  constructor(options?: Partial<DuplexAgentOptions>);
925
953
  /** Resolve memory tools + inject index into voice system prompt (once). */
926
954
  private initMemory;
955
+ /** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
956
+ * turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
957
+ private flushHeldReflexTail;
927
958
  /** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
928
959
  private resetTurn;
929
960
  /** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
@@ -950,7 +981,10 @@ declare class DuplexAgent {
950
981
  /** Promise-chain mutex: turns run strictly one at a time; a failed turn doesn't poison the chain. */
951
982
  private enqueue;
952
983
  private notify;
953
- /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn. */
984
+ /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
985
+ * `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
986
+ * marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
987
+ * text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
954
988
  private queueRevoice;
955
989
  /** The worker's brief: the Act/Think args + a STATIC text snapshot of the recent conversation.
956
990
  * Act briefs get a self-verify footer — the worker's report is trusted without review, so it
@@ -976,6 +1010,21 @@ declare class DuplexAgent {
976
1010
  parkQuestion(askId: string, question: string): Promise<string>;
977
1011
  /** Resolve any question a settling/cancelled task left parked (its answer can no longer matter). */
978
1012
  private dropAsk;
1013
+ /** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
1014
+ * result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
1015
+ * agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
1016
+ *
1017
+ * Decision branches (the reflex acts on them with EXISTING tools — no new surface):
1018
+ * • accept → just SPEAK the result to the user (happy path; the only move on a clean success).
1019
+ * • escalate → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
1020
+ * exists AND this task wasn't already a follow-up (one hop max). Wires the dead
1021
+ * "Reserve Think for a problem Act already FAILED at" promise.
1022
+ * • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
1023
+ * • ask → ask the user ONE concrete question if genuinely blocked.
1024
+ *
1025
+ * Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
1026
+ * failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
1027
+ private integrationPrompt;
979
1028
  private onWorkerSettled;
980
1029
  private onWorkerFailed;
981
1030
  private failTask;
@@ -984,8 +1033,10 @@ declare class DuplexAgent {
984
1033
  * construction — the tool's own description carries the routing guidance, so a live enable works;
985
1034
  * dispatch()'s think→act fallback covers any straggler calls after a live disable. */
986
1035
  setThinkModel(model: string | false): void;
987
- /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
988
- dispatch(brief: string, tier?: WorkerTier, label?: string): Promise<string>;
1036
+ /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
1037
+ * `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
1038
+ * task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
1039
+ dispatch(brief: string, tier?: WorkerTier, label?: string, followUp?: boolean): Promise<string>;
989
1040
  private actTool;
990
1041
  private thinkTool;
991
1042
  private taskStatusTool;
@@ -1148,6 +1199,11 @@ declare class VoiceEngineOptions {
1148
1199
  /** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
1149
1200
  * Gives the user time to finish their thought without triggering a model call. */
1150
1201
  incompleteMergeMs: number;
1202
+ /** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
1203
+ * barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
1204
+ * as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
1205
+ * abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
1206
+ bargeGraceMs: number;
1151
1207
  /** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
1152
1208
  holdFiller: string;
1153
1209
  /** Called when the engine holds an incomplete utterance (host can render a visual cue). */
@@ -1196,6 +1252,7 @@ declare class VoiceEngine {
1196
1252
  private hot;
1197
1253
  private suspectUntil;
1198
1254
  private ackAt;
1255
+ private bargeGraceUntil;
1199
1256
  private pendingUtt;
1200
1257
  private pendingTimer;
1201
1258
  private lastInterrupted;
@@ -1239,6 +1296,12 @@ declare class VoiceEngine {
1239
1296
  * longer ones on count. */
1240
1297
  private genuine;
1241
1298
  private handlePartial;
1299
+ /** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
1300
+ * overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
1301
+ * utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
1302
+ * a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
1303
+ * check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
1304
+ private mergeUtterance;
1242
1305
  private static readonly TRAIL_RE;
1243
1306
  /** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
1244
1307
  private looksIncomplete;
package/dist/index.js CHANGED
@@ -4690,17 +4690,26 @@ var DuplexAgentOptions = class {
4690
4690
  memoryUserDir;
4691
4691
  };
4692
4692
  var RESERVED_EVENT_MARKER = /\[task\b[^\]\n]*\b(?:completed|failed|progress|asks)\b/i;
4693
+ var RESERVED_EVENT_OPENER = /\[\s*task\b/i;
4693
4694
  var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nThis holds even when asked to "print", "list", "show", or "make a table" \u2014 there is no screen for the spoken channel. Speak it as flowing prose ("Tuesday is half a meter, Wednesday a bit less\u2026"), or if they truly need it on screen, route it to Act to render. Never emit dashes or pipes into speech.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nYou cannot mute the microphone or stop voice capture yourself \u2014 no tool does it. If the user asks you to stop listening or turn the voice off, never claim you did: tell them to say exactly "voice off" (handled by the app directly), or type /voice.\nYou are NOT a knowledge base. For any question whose answer needs SPECIFIC verifiable facts you do not already have in hand \u2014 how to build/configure/implement something, exact API, library, entitlement, command or option names, current events, or particular numbers, dates, or names \u2014 do NOT answer from your own memory: you will confidently make things up (a fake API, a wrong entitlement, an event that did not happen). Route it to `Act`, which can search and verify, and speak only what its report says. Answer inline ONLY for general conversation, chit-chat, and trivia you are sure of, or facts you can see via QuickLook. When elaborating on a completed task ("tell me more", "the gist"), stay strictly within what that result actually said \u2014 if the user asks for something the result did not cover, that is NEW information: dispatch `Act`, do not improvise.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. DISTILL vs DELIVER \u2014 know which the request wants. When the result is a FACT to extract (a forecast, a count, a status), distill the headline. But when the user wanted specific CONTENT \u2014 a joke, a quote, a name, a definition, the actual lines \u2014 that content IS the deliverable: LEAD WITH IT. Your first words ARE the joke / the quote / the answer itself, before any "got it" or offer. SPEAK the content, never a comment ABOUT it: "why was six afraid of seven? because seven ate nine" \u2014 NOT "those are funny" or "I found a couple". If you did not actually say the joke/quote/answer aloud this turn, you FAILED the request, no matter how friendly the wrapper. A short joke is short \u2014 just say it. NEVER speak as if you already delivered something you did not actually say aloud THIS turn: do not say "those are\u2026", "there you go", or offer "a few MORE" when you never voiced the first one. The on-screen text is invisible to a voice user \u2014 if you did not speak it, they did not get it, so deliver it before you comment on it or offer more. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\nDo NOT end every turn with the same canned offer ("want a rundown?", "want the steps?"). Offer once at most; if the user pushes back, repeats themselves, or sounds unsatisfied ("you know what I mean?", "think deeper", "are you sure?"), do NOT re-offer the same thing \u2014 change approach: dispatch `Act`/`Think` to actually dig in, or ask one concrete clarifying question. Repeating a non-answer is worse than silence.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
4694
4695
  var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
4695
4696
  var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
4696
4697
  var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
4697
- var DuplexAgent = class {
4698
+ var DuplexAgent = class _DuplexAgent {
4698
4699
  options;
4699
4700
  voice;
4700
4701
  tasks = /* @__PURE__ */ new Map();
4701
4702
  queue = Promise.resolve();
4702
4703
  seq = 0;
4703
4704
  pendingEvents = [];
4705
+ /** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
4706
+ * the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
4707
+ * by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
4708
+ * worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
4709
+ * string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
4710
+ * RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
4711
+ * [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
4712
+ pendingNonClean = false;
4704
4713
  flushQueued = false;
4705
4714
  /** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
4706
4715
  * left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
@@ -4719,6 +4728,21 @@ var DuplexAgent = class {
4719
4728
  // chars of reflexBuf already forwarded to the host/TTS
4720
4729
  fabricationCut = false;
4721
4730
  // reflex emitted a reserved [task …] marker → suppress its tail
4731
+ /** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
4732
+ * carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
4733
+ * turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
4734
+ * escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
4735
+ * ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
4736
+ * means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
4737
+ * ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
4738
+ * flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
4739
+ * fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
4740
+ * ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
4741
+ turnFollowUp = false;
4742
+ /** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
4743
+ * the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
4744
+ autoEscalations = 0;
4745
+ static MAX_AUTO_ESCALATIONS = 8;
4722
4746
  /** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
4723
4747
  pendingAsks = /* @__PURE__ */ new Map();
4724
4748
  /** Lazily resolved memory tools (async loadMemory runs in initMemory). */
@@ -4761,7 +4785,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4761
4785
  if (this.fabricationCut) return;
4762
4786
  const msg = ev.message;
4763
4787
  this.reflexBuf += msg;
4764
- const m = this.reflexBuf.match(RESERVED_EVENT_MARKER);
4788
+ const m = this.reflexBuf.match(RESERVED_EVENT_MARKER) ?? this.reflexBuf.match(RESERVED_EVENT_OPENER);
4765
4789
  if (m) {
4766
4790
  this.fabricationCut = true;
4767
4791
  log9.warn(`reflex fabricated a [task \u2026] event in its spoken stream \u2014 cutting it (kept ${m.index} chars)`);
@@ -4771,8 +4795,15 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4771
4795
  host.notify?.({ ...ev, message: safe });
4772
4796
  return;
4773
4797
  }
4774
- this.reflexForwarded = this.reflexBuf.length;
4775
- if (msg.trim()) this.spokeThisTurn = true;
4798
+ const held = this.reflexBuf.length - this.reflexForwarded;
4799
+ const partial = held > 0 && /\[\s*t?a?s?k?$/i.test(this.reflexBuf.slice(-Math.min(held, 6)));
4800
+ const upto = partial ? this.reflexBuf.length - this.reflexBuf.slice(-6).match(/\[\s*t?a?s?k?$/i)[0].length : this.reflexBuf.length;
4801
+ const out = this.reflexBuf.slice(this.reflexForwarded, upto);
4802
+ this.reflexForwarded = upto;
4803
+ if (!out) return;
4804
+ if (out.trim()) this.spokeThisTurn = true;
4805
+ host.notify?.({ ...ev, message: out });
4806
+ return;
4776
4807
  }
4777
4808
  host.notify?.(ev);
4778
4809
  }
@@ -4805,6 +4836,16 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4805
4836
  this.voice.options.tools.push(...mem.tools);
4806
4837
  if (mem.index) this.voice.options.systemPrompt += "\n\n" + mem.index;
4807
4838
  }
4839
+ /** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
4840
+ * turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
4841
+ flushHeldReflexTail() {
4842
+ if (this.fabricationCut) return;
4843
+ const tail = this.reflexBuf.slice(this.reflexForwarded);
4844
+ this.reflexForwarded = this.reflexBuf.length;
4845
+ if (!tail) return;
4846
+ if (tail.trim()) this.spokeThisTurn = true;
4847
+ this.options.host?.notify?.({ kind: "text_delta", message: tail });
4848
+ }
4808
4849
  /** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
4809
4850
  resetTurn() {
4810
4851
  this.turnDispatched = false;
@@ -4813,6 +4854,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4813
4854
  this.reflexBuf = "";
4814
4855
  this.reflexForwarded = 0;
4815
4856
  this.fabricationCut = false;
4857
+ this.turnFollowUp = false;
4816
4858
  this.voice.options.toolChoice = void 0;
4817
4859
  }
4818
4860
  /** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
@@ -4842,17 +4884,18 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4842
4884
  /** A turn that voiced nothing is dead air. Re-prompt the reflex ONCE so the LLM itself voices a short
4843
4885
  * line (no template). If it STILL says nothing, fall back to a minimal line so silence never ships.
4844
4886
  * Wording adapts to whether work was dispatched (an ack) or the inline reply was simply lost. */
4845
- async ackIfSilent() {
4887
+ async ackIfSilent(fallback) {
4846
4888
  const dispatched = this.turnDispatched;
4847
4889
  this.nudging = true;
4848
4890
  try {
4849
- await this.voice.send(dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
4891
+ await this.voice.send(fallback ? "[reminder] You said nothing to the user this turn. Tell them, in ONE short spoken sentence, what just happened \u2014 no tools." : dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
4850
4892
  } catch (e) {
4851
4893
  log9.warn(`ack nudge failed: ${e instanceof Error ? e.message : e}`);
4852
4894
  } finally {
4853
4895
  this.nudging = false;
4854
4896
  }
4855
- if (!this.spokeThisTurn) this.options.host?.notify?.({ kind: "text_delta", message: dispatched ? "Okay, on it." : "Sorry, could you say that again?" });
4897
+ if (!this.spokeThisTurn)
4898
+ this.options.host?.notify?.({ kind: "text_delta", message: fallback ?? (dispatched ? "Okay, on it." : "Sorry, could you say that again?") });
4856
4899
  }
4857
4900
  /** One user turn: the voice agent streams the reply (and may Act/Think). Serialized with re-voice turns. */
4858
4901
  send(content) {
@@ -4860,6 +4903,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4860
4903
  await this.initMemory();
4861
4904
  this.resetTurn();
4862
4905
  const res = await this.voice.send(content);
4906
+ this.flushHeldReflexTail();
4863
4907
  if (this.silentTurn) await this.ackIfSilent();
4864
4908
  return res;
4865
4909
  });
@@ -4894,18 +4938,27 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4894
4938
  notify(kind, message, data) {
4895
4939
  this.options.host?.notify?.({ kind, message, data });
4896
4940
  }
4897
- /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn. */
4898
- queueRevoice(event) {
4941
+ /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
4942
+ * `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
4943
+ * marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
4944
+ * text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
4945
+ queueRevoice(event, nonClean = false) {
4899
4946
  this.pendingEvents.push(event);
4947
+ if (nonClean) this.pendingNonClean = true;
4900
4948
  if (this.flushQueued) return;
4901
4949
  this.flushQueued = true;
4902
4950
  void this.enqueue(async () => {
4903
4951
  this.flushQueued = false;
4904
4952
  const events = this.pendingEvents.splice(0);
4953
+ const nonCleanTurn = this.pendingNonClean;
4954
+ this.pendingNonClean = false;
4905
4955
  if (!events.length) return;
4956
+ const failed = events.find((e) => /^\[task\b[^\]\n]*\bfailed\b/i.test(e));
4906
4957
  this.resetTurn();
4958
+ this.turnFollowUp = nonCleanTurn;
4907
4959
  await this.voice.send(events.join("\n"));
4908
- if (this.silentTurn) await this.ackIfSilent();
4960
+ this.flushHeldReflexTail();
4961
+ if (this.silentTurn) await this.ackIfSilent(failed ? "Sorry, that didn't work \u2014 the task failed." : void 0);
4909
4962
  this.notify("revoice_done", "");
4910
4963
  });
4911
4964
  }
@@ -4922,7 +4975,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
4922
4975
  ${recent}` : brief) + verify;
4923
4976
  }
4924
4977
  /** Spawn a detached worker for task `id`; its settlement notifies + enqueues the re-voice turn. */
4925
- spawnWorker(id, label, briefText, tier = "act") {
4978
+ spawnWorker(id, label, briefText, tier, brief, followUp) {
4926
4979
  const o = this.options;
4927
4980
  const tierOpts = tier === "think" ? o.thinkOptions : o.actOptions;
4928
4981
  const tierModel = tier === "think" ? o.thinkModel : o.actModel;
@@ -4974,7 +5027,7 @@ ${recent}` : brief) + verify;
4974
5027
  // shared with the checker so a cancel tears down both
4975
5028
  };
4976
5029
  const promise = new Agent(agentOpts).run(briefText).then((res) => this.maybeVerify(id, briefText, res, tier, agentOpts)).then((res) => this.onWorkerSettled(id, res)).catch((err) => this.onWorkerFailed(id, err));
4977
- this.tasks.set(id, { id, label, status: "running", controller, promise, tail });
5030
+ this.tasks.set(id, { id, label, status: "running", controller, promise, tail, brief, followUp });
4978
5031
  if (this.tasks.size > this.options.maxTaskRecords)
4979
5032
  for (const [tid, rec] of this.tasks) {
4980
5033
  if (this.tasks.size <= this.options.maxTaskRecords) break;
@@ -5085,6 +5138,38 @@ Another agent just implemented the above. Independently check the CURRENT state
5085
5138
  dropAsk(id) {
5086
5139
  this.pendingAsks.get(id)?.resolve("");
5087
5140
  }
5141
+ /** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
5142
+ * result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
5143
+ * agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
5144
+ *
5145
+ * Decision branches (the reflex acts on them with EXISTING tools — no new surface):
5146
+ * • accept → just SPEAK the result to the user (happy path; the only move on a clean success).
5147
+ * • escalate → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
5148
+ * exists AND this task wasn't already a follow-up (one hop max). Wires the dead
5149
+ * "Reserve Think for a problem Act already FAILED at" promise.
5150
+ * • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
5151
+ * • ask → ask the user ONE concrete question if genuinely blocked.
5152
+ *
5153
+ * Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
5154
+ * failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
5155
+ integrationPrompt(rec, outcome, body, finishReason) {
5156
+ const opener = outcome === "error" ? `[task ${rec.id} failed]` : `[task ${rec.id} completed]`;
5157
+ if (outcome === "ok")
5158
+ return `${opener} ${body}`;
5159
+ const underCap = this.autoEscalations < _DuplexAgent.MAX_AUTO_ESCALATIONS;
5160
+ const canEscalate = (outcome === "error" || outcome === "incomplete") && underCap;
5161
+ const hasThink = this.options.thinkModel !== false;
5162
+ const options = [];
5163
+ if (!rec.followUp && canEscalate && hasThink)
5164
+ options.push("ESCALATE to the Think tier (call Think with the same brief) if this is a hard/architectural problem the Act worker stalled or failed on");
5165
+ if (!rec.followUp && canEscalate)
5166
+ options.push("RE-DELEGATE to Act with a corrected brief if the failure looks recoverable (a wrong path, a fixable mistake)");
5167
+ options.push("ASK the user one short, concrete question if you genuinely cannot proceed without their input");
5168
+ options.push("ACCEPT and tell the user plainly what happened (don't dress a failure up as success)");
5169
+ const decision = options.length > 1 ? ` You must decide what to do next \u2014 choose ONE: ${options.map((o, i) => `(${i + 1}) ${o}`).join("; ")}. Pick exactly one and act on it; do not voice this as a finished success.` : ` Tell the user plainly what happened \u2014 do not present this as a finished success.`;
5170
+ const state = outcome === "error" ? `the worker FAILED with: ${body}` : `the worker STOPPED EARLY (${finishReason}) \u2014 its result is PARTIAL, not a finished success: ${body}`;
5171
+ return `${opener} Original request: "${rec.brief}". Outcome: ${state}.${decision}`;
5172
+ }
5088
5173
  onWorkerSettled(id, res) {
5089
5174
  this.dropAsk(id);
5090
5175
  const rec = this.tasks.get(id);
@@ -5099,16 +5184,18 @@ Another agent just implemented the above. Independently check the CURRENT state
5099
5184
  }
5100
5185
  rec.status = "done";
5101
5186
  rec.result = res.text;
5102
- log9.verbose(`task ${id} done (${res.steps} steps)`);
5187
+ const incomplete = res.finishReason !== "stop";
5188
+ log9.verbose(`task ${id} done (${res.steps} steps${incomplete ? `, INCOMPLETE: ${res.finishReason}` : ""})`);
5103
5189
  this.notify("task_done", `task ${id} (${rec.label}) completed`, {
5104
5190
  id,
5105
5191
  text: res.text,
5106
5192
  usage: res.usage,
5107
5193
  usageEstimated: res.usageEstimated,
5194
+ finishReason: res.finishReason,
5108
5195
  steps: res.steps,
5109
5196
  toolCalls: res.messages.filter((m) => m.role === "tool").length
5110
5197
  });
5111
- this.queueRevoice(`[task ${id} completed] ${res.text}`);
5198
+ this.queueRevoice(this.integrationPrompt(rec, incomplete ? "incomplete" : "ok", res.text, res.finishReason), incomplete);
5112
5199
  }
5113
5200
  onWorkerFailed(id, err) {
5114
5201
  this.failTask(this.tasks.get(id), err instanceof Error ? err.message : String(err));
@@ -5119,7 +5206,7 @@ Another agent just implemented the above. Independently check the CURRENT state
5119
5206
  rec.result = msg;
5120
5207
  log9.warn(`task ${rec.id} failed: ${msg}`);
5121
5208
  this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
5122
- this.queueRevoice(`[task ${rec.id} failed] ${msg}`);
5209
+ this.queueRevoice(this.integrationPrompt(rec, "error", msg, "error"), true);
5123
5210
  }
5124
5211
  // --- voice tools (closures over this instance) ---
5125
5212
  /** Live-switch the think tier: `false` disables (removes the Think tool from the voice agent),
@@ -5133,13 +5220,16 @@ Another agent just implemented the above. Independently check the CURRENT state
5133
5220
  if (model === false && i >= 0) tools.splice(i, 1);
5134
5221
  else if (model !== false && i < 0) tools.push(this.thinkTool());
5135
5222
  }
5136
- /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
5137
- async dispatch(brief, tier = "act", label) {
5223
+ /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
5224
+ * `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
5225
+ * task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
5226
+ async dispatch(brief, tier = "act", label, followUp = false) {
5138
5227
  if (tier === "think" && this.options.thinkModel === false) tier = "act";
5228
+ if (followUp) this.autoEscalations++;
5139
5229
  const id = `t${++this.seq}`;
5140
5230
  const lbl = label ?? tier;
5141
5231
  await this.options.onTaskStart?.(id, lbl);
5142
- this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier);
5232
+ this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier, brief, followUp);
5143
5233
  this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief, tier });
5144
5234
  return id;
5145
5235
  }
@@ -5159,7 +5249,7 @@ Another agent just implemented the above. Independently check the CURRENT state
5159
5249
  this.turnDispatched = true;
5160
5250
  this.turnBriefs.add(String(brief ?? ""));
5161
5251
  this.voice.options.toolChoice = "none";
5162
- const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0);
5252
+ const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0, this.turnFollowUp);
5163
5253
  return `Acting on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
5164
5254
  }
5165
5255
  };
@@ -5180,7 +5270,7 @@ Another agent just implemented the above. Independently check the CURRENT state
5180
5270
  this.turnDispatched = true;
5181
5271
  this.turnBriefs.add(String(brief ?? ""));
5182
5272
  this.voice.options.toolChoice = "none";
5183
- const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0);
5273
+ const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0, this.turnFollowUp);
5184
5274
  return `Thinking on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
5185
5275
  }
5186
5276
  };
@@ -5506,6 +5596,11 @@ var VoiceEngineOptions = class {
5506
5596
  /** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
5507
5597
  * Gives the user time to finish their thought without triggering a model call. */
5508
5598
  incompleteMergeMs = 1500;
5599
+ /** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
5600
+ * barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
5601
+ * as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
5602
+ * abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
5603
+ bargeGraceMs = 600;
5509
5604
  /** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
5510
5605
  holdFiller = "";
5511
5606
  /** Called when the engine holds an incomplete utterance (host can render a visual cue). */
@@ -5561,6 +5656,9 @@ var VoiceEngine = class _VoiceEngine {
5561
5656
  suspectUntil = 0;
5562
5657
  ackAt = 0;
5563
5658
  // when the micro-ack was spoken — its echo can leak before the AEC filter converges
5659
+ bargeGraceUntil = 0;
5660
+ // no barge-in until this time — the user's OWN trailing audio (after the
5661
+ // utterance that JUST dispatched this turn) must not immediately re-interrupt the reply it requested.
5564
5662
  pendingUtt = "";
5565
5663
  // endpointed text held for the merge window
5566
5664
  pendingTimer = null;
@@ -5747,6 +5845,10 @@ var VoiceEngine = class _VoiceEngine {
5747
5845
  }
5748
5846
  handlePartial(text) {
5749
5847
  if (this.speaking) {
5848
+ if (now() < this.bargeGraceUntil) {
5849
+ if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
5850
+ return;
5851
+ }
5750
5852
  if (this.overlapCapable) {
5751
5853
  const txt = text.trim();
5752
5854
  if (!txt || txt === this.lastOverlapPartial) return;
@@ -5787,6 +5889,27 @@ var VoiceEngine = class _VoiceEngine {
5787
5889
  }
5788
5890
  if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
5789
5891
  }
5892
+ /** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
5893
+ * overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
5894
+ * utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
5895
+ * a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
5896
+ * check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
5897
+ mergeUtterance(prev, next) {
5898
+ if (!prev) return next;
5899
+ if (!next) return prev;
5900
+ const pw = prev.split(/\s+/), nw = next.split(/\s+/);
5901
+ const norm2 = (w) => w.toLowerCase().replace(/[^a-z0-9]/g, "");
5902
+ const max = Math.min(pw.length, nw.length);
5903
+ for (let k = max; k > 0; k--) {
5904
+ let match = true;
5905
+ for (let i = 0; i < k; i++) if (norm2(pw[pw.length - k + i]) !== norm2(nw[i])) {
5906
+ match = false;
5907
+ break;
5908
+ }
5909
+ if (match) return [...pw, ...nw.slice(k)].join(" ");
5910
+ }
5911
+ return `${prev} ${next}`;
5912
+ }
5790
5913
  static TRAIL_RE = /(?:^|\s)(?:and|but|or|so|to|the|a|an|of|in|for|with|that|if|uh|um|like|about|from|into|on|is|are|was|were|,)$/i;
5791
5914
  /** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
5792
5915
  looksIncomplete(text) {
@@ -5806,7 +5929,7 @@ var VoiceEngine = class _VoiceEngine {
5806
5929
  this.ackAt = 0;
5807
5930
  return;
5808
5931
  }
5809
- this.pendingUtt = this.pendingUtt ? `${this.pendingUtt} ${text}` : text;
5932
+ this.pendingUtt = this.mergeUtterance(this.pendingUtt, text);
5810
5933
  if (this.pendingTimer) clearTimeout(this.pendingTimer);
5811
5934
  if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
5812
5935
  log10.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
@@ -5831,6 +5954,7 @@ var VoiceEngine = class _VoiceEngine {
5831
5954
  this.pendingUtt = "";
5832
5955
  if (text) {
5833
5956
  this.turnStartAt = now();
5957
+ this.bargeGraceUntil = now() + this.options.bargeGraceMs;
5834
5958
  this.options.onUtterance(text);
5835
5959
  }
5836
5960
  }