npm - @livx.cc/agentx - Versions diffs - 0.95.6 → 0.96.1 - Mend

@livx.cc/agentx 0.95.6 → 0.96.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -831,6 +831,11 @@ interface TaskRecord {
     tail: string[];
     /** Final report text (or error message) once the task settled. */
     result?: string;
+    /** The original (untemplated) brief — replayed verbatim into an escalation/re-delegation. */
+    brief: string;
+    /** True when this task is ITSELF an automatic follow-up (escalation/re-delegate) of a prior task.
+     *  Its integration turn may NOT escalate again — caps auto-follow-up to one hop per original task. */
+    followUp: boolean;
 }
 type WorkerTier = 'act' | 'think';
 declare class DuplexAgentOptions {
@@ -903,6 +908,14 @@ declare class DuplexAgent {
     private queue;
     private seq;
     private pendingEvents;
+    /** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
+     *  the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
+     *  by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
+     *  worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
+     *  string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
+     *  RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
+     *  [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
+    private pendingNonClean;
     private flushQueued;
     /** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
      *  left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
@@ -914,6 +927,21 @@ declare class DuplexAgent {
     private reflexBuf;
     private reflexForwarded;
     private fabricationCut;
+    /** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
+     *  carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
+     *  turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
+     *  escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
+     *  ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
+     *  means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
+     *  ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
+     *  flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
+     *  fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
+     *  ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
+    private turnFollowUp;
+    /** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
+     *  the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
+    private autoEscalations;
+    private static readonly MAX_AUTO_ESCALATIONS;
     /** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
     readonly pendingAsks: Map<string, {
         question: string;
@@ -924,6 +952,9 @@ declare class DuplexAgent {
     constructor(options?: Partial<DuplexAgentOptions>);
     /** Resolve memory tools + inject index into voice system prompt (once). */
     private initMemory;
+    /** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
+     *  turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
+    private flushHeldReflexTail;
     /** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
     private resetTurn;
     /** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
@@ -950,7 +981,10 @@ declare class DuplexAgent {
     /** Promise-chain mutex: turns run strictly one at a time; a failed turn doesn't poison the chain. */
     private enqueue;
     private notify;
-    /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn. */
+    /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
+     *  `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
+     *  marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
+     *  text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
     private queueRevoice;
     /** The worker's brief: the Act/Think args + a STATIC text snapshot of the recent conversation.
      *  Act briefs get a self-verify footer — the worker's report is trusted without review, so it
@@ -976,6 +1010,21 @@ declare class DuplexAgent {
     parkQuestion(askId: string, question: string): Promise<string>;
     /** Resolve any question a settling/cancelled task left parked (its answer can no longer matter). */
     private dropAsk;
+    /** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
+     *  result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
+     *  agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
+     *
+     *  Decision branches (the reflex acts on them with EXISTING tools — no new surface):
+     *    • accept     → just SPEAK the result to the user (happy path; the only move on a clean success).
+     *    • escalate   → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
+     *                   exists AND this task wasn't already a follow-up (one hop max). Wires the dead
+     *                   "Reserve Think for a problem Act already FAILED at" promise.
+     *    • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
+     *    • ask        → ask the user ONE concrete question if genuinely blocked.
+     *
+     *  Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
+     *  failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
+    private integrationPrompt;
     private onWorkerSettled;
     private onWorkerFailed;
     private failTask;
@@ -984,8 +1033,10 @@ declare class DuplexAgent {
      *  construction — the tool's own description carries the routing guidance, so a live enable works;
      *  dispatch()'s think→act fallback covers any straggler calls after a live disable. */
     setThinkModel(model: string | false): void;
-    /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
-    dispatch(brief: string, tier?: WorkerTier, label?: string): Promise<string>;
+    /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
+     *  `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
+     *  task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
+    dispatch(brief: string, tier?: WorkerTier, label?: string, followUp?: boolean): Promise<string>;
     private actTool;
     private thinkTool;
     private taskStatusTool;
@@ -1148,6 +1199,11 @@ declare class VoiceEngineOptions {
     /** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
      *  Gives the user time to finish their thought without triggering a model call. */
     incompleteMergeMs: number;
+    /** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
+     *  barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
+     *  as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
+     *  abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
+    bargeGraceMs: number;
     /** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
     holdFiller: string;
     /** Called when the engine holds an incomplete utterance (host can render a visual cue). */
@@ -1196,6 +1252,7 @@ declare class VoiceEngine {
     private hot;
     private suspectUntil;
     private ackAt;
+    private bargeGraceUntil;
     private pendingUtt;
     private pendingTimer;
     private lastInterrupted;
@@ -1239,6 +1296,12 @@ declare class VoiceEngine {
      *  longer ones on count. */
     private genuine;
     private handlePartial;
+    /** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
+     *  overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
+     *  utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
+     *  a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
+     *  check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
+    private mergeUtterance;
     private static readonly TRAIL_RE;
     /** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
     private looksIncomplete;

package/dist/index.js CHANGED Viewed

@@ -4690,17 +4690,26 @@ var DuplexAgentOptions = class {
   memoryUserDir;
 };
 var RESERVED_EVENT_MARKER = /\[task\b[^\]\n]*\b(?:completed|failed|progress|asks)\b/i;
+var RESERVED_EVENT_OPENER = /\[\s*task\b/i;
 var VOICE_SYSTEM_PROMPT = 'You are a spoken voice assistant \u2014 the user HEARS everything you say. Use short sentences. One idea per sentence. No markdown, no bullet lists, no code blocks, no headings, no emoji.\nThis holds even when asked to "print", "list", "show", or "make a table" \u2014 there is no screen for the spoken channel. Speak it as flowing prose ("Tuesday is half a meter, Wednesday a bit less\u2026"), or if they truly need it on screen, route it to Act to render. Never emit dashes or pipes into speech.\nKeep turns SHORT \u2014 one to three sentences, then stop. Never lecture, enumerate cases, or add caveats unprompted. Conversation is a fast exchange: give the one thing asked, and let the user pull more if they want it.\nYou have three cognitive tiers \u2014 like a human brain:\n\u2022 YOU (reflex) \u2014 instant, lightweight. Handle greetings, simple questions, status checks, QuickLook.\n\u2022 `Act` \u2014 your hands. A background worker with its own configured tools and access to the user\'s environment (files and shell{{WORKER_WEB}}). Use for reading, editing, searching, running tasks, building \u2014 any real work.\n{{THINK_SLOT}}\nWhen you are unsure whether you can do or access something, do NOT assume and do NOT claim a capability you have not confirmed. To check what you can do, QuickLook `capabilities` (instant \u2014 it lists your worker\'s real tools) and answer from that. Never promise an ability that is not in your capabilities; if it is not there, tell the user plainly you can\'t. To actually DO real work, call `Act`. When the user mentions their project, folder, files, or environment ("this project", "the current folder", "my code"), call `Act` IMMEDIATELY \u2014 do not ask for paths or details the worker can discover itself. Never pretend to have done the work or invent results \u2014 the worker\'s report is your only source.\nYou cannot mute the microphone or stop voice capture yourself \u2014 no tool does it. If the user asks you to stop listening or turn the voice off, never claim you did: tell them to say exactly "voice off" (handled by the app directly), or type /voice.\nYou are NOT a knowledge base. For any question whose answer needs SPECIFIC verifiable facts you do not already have in hand \u2014 how to build/configure/implement something, exact API, library, entitlement, command or option names, current events, or particular numbers, dates, or names \u2014 do NOT answer from your own memory: you will confidently make things up (a fake API, a wrong entitlement, an event that did not happen). Route it to `Act`, which can search and verify, and speak only what its report says. Answer inline ONLY for general conversation, chit-chat, and trivia you are sure of, or facts you can see via QuickLook. When elaborating on a completed task ("tell me more", "the gist"), stay strictly within what that result actually said \u2014 if the user asks for something the result did not cover, that is NEW information: dispatch `Act`, do not improvise.\nALWAYS react before you work: the FIRST thing in your turn is a brief spoken acknowledgement of what you heard and what you are about to do ("got it \u2014 opening that now", "sure, let me pull it up", "okay, checking"). NEVER call a tool (Act, Think, QuickLook) silently \u2014 the user must hear you react before you go quiet to work. After dispatching Act or Think, that same one short sentence IS your turn \u2014 end it and do not wait for the result.\nResults arrive later as events like "[task t1 completed] \u2026" or "[task t1 failed] \u2026". When one arrives, speak the USEFUL gist in one or two short sentences \u2014 the actual answer the user wanted (the headline finding, the key numbers), not the thinnest possible "it\'s done". A forecast \u2192 say it\'s calm AND that it\'s good for swimming but not surf; a count \u2192 say the number. Be brief, but do not drop the substance. DISTILL vs DELIVER \u2014 know which the request wants. When the result is a FACT to extract (a forecast, a count, a status), distill the headline. But when the user wanted specific CONTENT \u2014 a joke, a quote, a name, a definition, the actual lines \u2014 that content IS the deliverable: LEAD WITH IT. Your first words ARE the joke / the quote / the answer itself, before any "got it" or offer. SPEAK the content, never a comment ABOUT it: "why was six afraid of seven? because seven ate nine" \u2014 NOT "those are funny" or "I found a couple". If you did not actually say the joke/quote/answer aloud this turn, you FAILED the request, no matter how friendly the wrapper. A short joke is short \u2014 just say it. NEVER speak as if you already delivered something you did not actually say aloud THIS turn: do not say "those are\u2026", "there you go", or offer "a few MORE" when you never voiced the first one. The on-screen text is invisible to a voice user \u2014 if you did not speak it, they did not get it, so deliver it before you comment on it or offer more. If the result is a LIST (search results, multiple files/matches), the user CANNOT see it \u2014 there is no screen and no numbered menu to point at. Speak the gist: say what you found and name the top one or two by NAME (the source, not "the first one" or a number), then ask plainly if they want more. Never ask them to "pick which one" or reference items by position. The completed result stays in YOUR context \u2014 it is yours to draw on. When the user follows up ("tell me more", "what else", "and?"), answer FROM that result first: you already have the detail, so elaborate on what you have. Do NOT spawn a fresh worker to re-search or re-gather what you were just handed. Re-dispatch ONLY when genuinely new information is needed \u2014 e.g. the user wants the full contents of a SPECIFIC source, which is one WebFetch of that URL, not a brand-new search. "[task t1 progress] \u2026" events are interim status, NOT results \u2014 give at most a half-sentence aside ("still on it \u2014 running tests now") and end your turn. Never present progress as a finished result.\nCRITICAL: while a task is still running you have NO answer yet \u2014 never state a specific result of any kind (a number, size, count, name, path, or value). The real answer arrives ONLY in the "[task \u2026 completed]" event; inventing one meanwhile (a made-up disk size, commit count, etc.) is a serious error. Until then, only acknowledge and wait.\nNever read raw file paths, diffs, or code aloud verbatim.\nDo NOT end every turn with the same canned offer ("want a rundown?", "want the steps?"). Offer once at most; if the user pushes back, repeats themselves, or sounds unsatisfied ("you know what I mean?", "think deeper", "are you sure?"), do NOT re-offer the same thing \u2014 change approach: dispatch `Act`/`Think` to actually dig in, or ask one concrete clarifying question. Repeating a non-answer is worse than silence.\n"[task t1 asks] \u2026" events are QUESTIONS from a background task \u2014 relay to the user in your own words, short, then end your turn. When the user answers, call `AnswerTask` with that id and their answer. NEVER answer on the user\'s behalf for permissions or risky operations; if their reply is ambiguous, confirm first.\nIf the user\'s message sounds INCOMPLETE \u2014 trailing off mid-sentence, a fragment that needs more context ("and then we", "but the problem is"), hesitation fillers ("uh", "um") \u2014 call `Hold` instead of answering. This keeps listening for the rest of their thought. Only respond with substance when you have a complete question or request.\nDispatch discipline: send ONE self-contained task per request \u2014 a single worker with the full brief beats several workers with fragments (each worker starts fresh and re-discovers context). NEVER dispatch a worker just to read files or gather information \u2014 workers explore and discover context themselves; pass on what you already know and let one worker do the whole job. Split into parallel tasks only when the user asks for genuinely independent things. When a task completes, report its result and stop \u2014 do NOT dispatch follow-up work (verification, polish, extras) the user did not ask for, unless the report itself signals failure or doubt.\nDo not fire a second Act/Think for work already in flight, and NEVER spawn a second task to re-count, cross-check, or verify a result a worker already gave you \u2014 trust its answer; a single question gets ONE task. Call `TaskStatus` at most ONCE per turn; if a task is still running, just say "still on it" and end the turn \u2014 never poll it again and again in a loop. Use `CancelTask` when the user asks to stop something.\nPRIORITY: when the user says goodbye or wants to end/finish/wrap up the session ("ok bye", "that\'s all", "let\'s finish", "let\'s end", "goodnight", "exit", "wrap up"), call `ExitSession` IMMEDIATELY \u2014 do not act, do not check status, just exit.\nFor TRIVIAL instant lookups only \u2014 current time, git branch, listing a folder, peeking at a small file, or checking your own `capabilities`/tools \u2014 use `QuickLook` (instant, no task). Whenever the user asks what you can do or whether you have some ability, QuickLook `capabilities` and answer from that \u2014 never guess. Anything requiring searching, reasoning, running commands, or editing goes through `Act`.\n{{MEMORY_SLOT}}\nUser messages may arrive via speech-to-text and can carry transcription artifacts \u2014 odd words, cut-offs, homophones ("for you" vs "folder"). Read for INTENT, not surface text. If a message seems garbled or surprising, briefly confirm what they meant ("did you mean\u2026?") instead of answering the literal words.';
 var THINK_GUIDANCE = "\u2022 `Think` \u2014 your brain. A premium reasoning model, FAR more expensive than Act. Reserve it for open-ended architecture/design questions, or a problem Act already FAILED at. ALL implementation work \u2014 coding, refactoring, debugging, edge cases, tests \u2014 goes to Act; Act is highly capable. Never send the same work to both.";
 var THINK_DISABLED_GUIDANCE = "(Think tier is not available \u2014 use Act for all escalations.)";
 var VOICE_STYLE_CONVERSATIONAL = `Speak like a person in a live conversation, not an assistant reading a script. React first, then deliver: a quick impulsive beat ("oh nice", "hmm, hold on", "ah, got it") before the substance. Use contractions always. Vary sentence length \u2014 some very short. Light fillers and backchannels are fine ("mm-hm", "right", "let's see") but at most one per reply \u2014 never stack them. When you escalate to Act or Think, say it like a human would ("hang on, let me actually dig into that \u2014 gimme a minute") instead of announcing a task. When a result comes back, react to it like you just found out ("okay so \u2014 turns out\u2026"). Match the user's energy: a quick question gets a quick answer \u2014 a few words is a perfectly good turn. Prefer a short answer plus an offer ("want the details?") over covering everything. Never narrate your own mechanics (no "I will now act", no task ids out loud).`;
-var DuplexAgent = class {
+var DuplexAgent = class _DuplexAgent {
   options;
   voice;
   tasks = /* @__PURE__ */ new Map();
   queue = Promise.resolve();
   seq = 0;
   pendingEvents = [];
+  /** Out-of-band follow-up attribution for the events coalescing into the next flush turn: TRUE iff ≥1 of
+   *  the tasks being integrated was NON-CLEAN (early-stop/failure). Carried out-of-band on the enqueue call
+   *  by the caller that KNOWS the outcome — a plain boolean the MODEL CANNOT PERTURB. It is NOT scanned from
+   *  worker-authored event text (v1: an "Outcome:" substring over-stamped siblings) and NOT keyed on a brief
+   *  string the reflex re-authors (v2: a paraphrased escalation brief missed the Set → followUp:false →
+   *  RE-ENABLED unbounded auto-escalation, the dangerous runaway direction). See [[wrong-discriminator]] /
+   *  [[drive-real-reflex]] / [[fakeaiclient-blind-to-wire-format]]. */
+  pendingNonClean = false;
   flushQueued = false;
   /** Per-voice-turn guards (reset by resetTurn at each turn's start). The reflex is a weak model:
    *  left unguarded it polls TaskStatus after a dispatch and/or dispatches silently (dead air).
@@ -4719,6 +4728,21 @@ var DuplexAgent = class {
   // chars of reflexBuf already forwarded to the host/TTS
   fabricationCut = false;
   // reflex emitted a reserved [task …] marker → suppress its tail
+  /** TRUE for the duration of a re-voice turn that is integrating ≥1 NON-CLEAN task (turn-eligibility,
+   *  carried out-of-band — NOT derived from any worker/brief string). ANY Act/Think dispatched in such a
+   *  turn is stamped followUp:true. This GUARANTEES the dangerous direction is impossible: a genuine
+   *  escalation (even one with a paraphrased brief) ALWAYS lands in a non-clean integration turn, so it is
+   *  ALWAYS recognized as a follow-up and CANNOT re-escalate (one hop). The single-dispatch-per-turn guard
+   *  means at most one dispatch happens per flush, so realistically "the one dispatch IS the escalation".
+   *  ACCEPTED SAFE-DIRECTION ERROR: if the reflex instead dispatches FRESH unrelated work during a non-clean
+   *  flush (rare — and only possible when it batches multiple calls in one step, bypassing the guard), that
+   *  fresh task is over-stamped followUp:true and forgoes ONE future auto-escalation. That is SAFE (it only
+   *  ever REMOVES a future escalation, never adds one — no runaway) and is the correct side to err on. */
+  turnFollowUp = false;
+  /** Hard absolute backstop against runaway regardless of attribution: total automatic escalations across
+   *  the whole conversation. Once it hits MAX_AUTO_ESCALATIONS, no integration turn offers escalate/re-delegate. */
+  autoEscalations = 0;
+  static MAX_AUTO_ESCALATIONS = 8;
   /** Parked worker questions awaiting a (voice-relayed) user answer, keyed by ask id. */
   pendingAsks = /* @__PURE__ */ new Map();
   /** Lazily resolved memory tools (async loadMemory runs in initMemory). */
@@ -4761,7 +4785,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
           if (this.fabricationCut) return;
           const msg = ev.message;
           this.reflexBuf += msg;
-          const m = this.reflexBuf.match(RESERVED_EVENT_MARKER);
+          const m = this.reflexBuf.match(RESERVED_EVENT_MARKER) ?? this.reflexBuf.match(RESERVED_EVENT_OPENER);
           if (m) {
             this.fabricationCut = true;
             log9.warn(`reflex fabricated a [task \u2026] event in its spoken stream \u2014 cutting it (kept ${m.index} chars)`);
@@ -4771,8 +4795,15 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
             host.notify?.({ ...ev, message: safe });
             return;
           }
-          this.reflexForwarded = this.reflexBuf.length;
-          if (msg.trim()) this.spokeThisTurn = true;
+          const held = this.reflexBuf.length - this.reflexForwarded;
+          const partial = held > 0 && /\[\s*t?a?s?k?$/i.test(this.reflexBuf.slice(-Math.min(held, 6)));
+          const upto = partial ? this.reflexBuf.length - this.reflexBuf.slice(-6).match(/\[\s*t?a?s?k?$/i)[0].length : this.reflexBuf.length;
+          const out = this.reflexBuf.slice(this.reflexForwarded, upto);
+          this.reflexForwarded = upto;
+          if (!out) return;
+          if (out.trim()) this.spokeThisTurn = true;
+          host.notify?.({ ...ev, message: out });
+          return;
         }
         host.notify?.(ev);
       }
@@ -4805,6 +4836,16 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
     this.voice.options.tools.push(...mem.tools);
     if (mem.index) this.voice.options.systemPrompt += "\n\n" + mem.index;
   }
+  /** Flush any held-back trailing fragment (a possible `[task` opener that never completed) once the
+   *  turn's stream is done — so a legit message ending in "[t" isn't silently dropped. */
+  flushHeldReflexTail() {
+    if (this.fabricationCut) return;
+    const tail = this.reflexBuf.slice(this.reflexForwarded);
+    this.reflexForwarded = this.reflexBuf.length;
+    if (!tail) return;
+    if (tail.trim()) this.spokeThisTurn = true;
+    this.options.host?.notify?.({ kind: "text_delta", message: tail });
+  }
   /** Clear the per-turn guards. Called at the head of every voice turn (user send + re-voice flush). */
   resetTurn() {
     this.turnDispatched = false;
@@ -4813,6 +4854,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
     this.reflexBuf = "";
     this.reflexForwarded = 0;
     this.fabricationCut = false;
+    this.turnFollowUp = false;
     this.voice.options.toolChoice = void 0;
   }
   /** preToolUse guard on the reflex: once it has dispatched this turn, a dispatch is "said my piece,
@@ -4842,17 +4884,18 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
   /** A turn that voiced nothing is dead air. Re-prompt the reflex ONCE so the LLM itself voices a short
    *  line (no template). If it STILL says nothing, fall back to a minimal line so silence never ships.
    *  Wording adapts to whether work was dispatched (an ack) or the inline reply was simply lost. */
-  async ackIfSilent() {
+  async ackIfSilent(fallback) {
     const dispatched = this.turnDispatched;
     this.nudging = true;
     try {
-      await this.voice.send(dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
+      await this.voice.send(fallback ? "[reminder] You said nothing to the user this turn. Tell them, in ONE short spoken sentence, what just happened \u2014 no tools." : dispatched ? "[reminder] You dispatched a task but said nothing to the user. Say ONE short spoken acknowledgement now \u2014 no tools." : "[reminder] You said nothing to the user this turn. Give your ONE short spoken reply now \u2014 no tools.");
     } catch (e) {
       log9.warn(`ack nudge failed: ${e instanceof Error ? e.message : e}`);
     } finally {
       this.nudging = false;
     }
-    if (!this.spokeThisTurn) this.options.host?.notify?.({ kind: "text_delta", message: dispatched ? "Okay, on it." : "Sorry, could you say that again?" });
+    if (!this.spokeThisTurn)
+      this.options.host?.notify?.({ kind: "text_delta", message: fallback ?? (dispatched ? "Okay, on it." : "Sorry, could you say that again?") });
   }
   /** One user turn: the voice agent streams the reply (and may Act/Think). Serialized with re-voice turns. */
   send(content) {
@@ -4860,6 +4903,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
       await this.initMemory();
       this.resetTurn();
       const res = await this.voice.send(content);
+      this.flushHeldReflexTail();
       if (this.silentTurn) await this.ackIfSilent();
       return res;
     });
@@ -4894,18 +4938,27 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
   notify(kind, message, data) {
     this.options.host?.notify?.({ kind, message, data });
   }
-  /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn. */
-  queueRevoice(event) {
+  /** Queue a `[task …]` event for re-voicing. Events arriving while the voice is busy coalesce into ONE turn.
+   *  `nonClean` (out-of-band boolean, set by the caller that KNOWS this event integrates a NON-CLEAN outcome)
+   *  marks the coalesced flush as a non-clean integration turn — turn-eligibility, never inferred from event
+   *  text and never keyed on a (re-authored) brief string. Any dispatch in such a turn is a follow-up. */
+  queueRevoice(event, nonClean = false) {
     this.pendingEvents.push(event);
+    if (nonClean) this.pendingNonClean = true;
     if (this.flushQueued) return;
     this.flushQueued = true;
     void this.enqueue(async () => {
       this.flushQueued = false;
       const events = this.pendingEvents.splice(0);
+      const nonCleanTurn = this.pendingNonClean;
+      this.pendingNonClean = false;
       if (!events.length) return;
+      const failed = events.find((e) => /^\[task\b[^\]\n]*\bfailed\b/i.test(e));
       this.resetTurn();
+      this.turnFollowUp = nonCleanTurn;
       await this.voice.send(events.join("\n"));
-      if (this.silentTurn) await this.ackIfSilent();
+      this.flushHeldReflexTail();
+      if (this.silentTurn) await this.ackIfSilent(failed ? "Sorry, that didn't work \u2014 the task failed." : void 0);
       this.notify("revoice_done", "");
     });
   }
@@ -4922,7 +4975,7 @@ Today's date: ${(/* @__PURE__ */ new Date()).toDateString()}.`;
 ${recent}` : brief) + verify;
   }
   /** Spawn a detached worker for task `id`; its settlement notifies + enqueues the re-voice turn. */
-  spawnWorker(id, label, briefText, tier = "act") {
+  spawnWorker(id, label, briefText, tier, brief, followUp) {
     const o = this.options;
     const tierOpts = tier === "think" ? o.thinkOptions : o.actOptions;
     const tierModel = tier === "think" ? o.thinkModel : o.actModel;
@@ -4974,7 +5027,7 @@ ${recent}` : brief) + verify;
       // shared with the checker so a cancel tears down both
     };
     const promise = new Agent(agentOpts).run(briefText).then((res) => this.maybeVerify(id, briefText, res, tier, agentOpts)).then((res) => this.onWorkerSettled(id, res)).catch((err) => this.onWorkerFailed(id, err));
-    this.tasks.set(id, { id, label, status: "running", controller, promise, tail });
+    this.tasks.set(id, { id, label, status: "running", controller, promise, tail, brief, followUp });
     if (this.tasks.size > this.options.maxTaskRecords)
       for (const [tid, rec] of this.tasks) {
         if (this.tasks.size <= this.options.maxTaskRecords) break;
@@ -5085,6 +5138,38 @@ Another agent just implemented the above. Independently check the CURRENT state
   dropAsk(id) {
     this.pendingAsks.get(id)?.resolve("");
   }
+  /** Build the INTEGRATION TURN prompt for a settled worker. Instead of trust-and-forwarding the raw
+   *  result, the result re-enters the reflex as a decision (like a tool_result flowing back into a normal
+   *  agent loop): the reflex evaluates the outcome against the original intent and chooses what to do next.
+   *
+   *  Decision branches (the reflex acts on them with EXISTING tools — no new surface):
+   *    • accept     → just SPEAK the result to the user (happy path; the only move on a clean success).
+   *    • escalate   → call `Think` with the SAME brief — only when Act failed/stalled AND a Think tier
+   *                   exists AND this task wasn't already a follow-up (one hop max). Wires the dead
+   *                   "Reserve Think for a problem Act already FAILED at" promise.
+   *    • re-delegate→ call `Act` with a CORRECTED brief — for a recoverable error / partial result.
+   *    • ask        → ask the user ONE concrete question if genuinely blocked.
+   *
+   *  Keeps the `[task <id> completed]` / `[task <id> failed]` opener so existing coalescing + the
+   *  failed-revoice fallback still fire, and the per-event transcript markers stay intact. */
+  integrationPrompt(rec, outcome, body, finishReason) {
+    const opener = outcome === "error" ? `[task ${rec.id} failed]` : `[task ${rec.id} completed]`;
+    if (outcome === "ok")
+      return `${opener} ${body}`;
+    const underCap = this.autoEscalations < _DuplexAgent.MAX_AUTO_ESCALATIONS;
+    const canEscalate = (outcome === "error" || outcome === "incomplete") && underCap;
+    const hasThink = this.options.thinkModel !== false;
+    const options = [];
+    if (!rec.followUp && canEscalate && hasThink)
+      options.push("ESCALATE to the Think tier (call Think with the same brief) if this is a hard/architectural problem the Act worker stalled or failed on");
+    if (!rec.followUp && canEscalate)
+      options.push("RE-DELEGATE to Act with a corrected brief if the failure looks recoverable (a wrong path, a fixable mistake)");
+    options.push("ASK the user one short, concrete question if you genuinely cannot proceed without their input");
+    options.push("ACCEPT and tell the user plainly what happened (don't dress a failure up as success)");
+    const decision = options.length > 1 ? ` You must decide what to do next \u2014 choose ONE: ${options.map((o, i) => `(${i + 1}) ${o}`).join("; ")}. Pick exactly one and act on it; do not voice this as a finished success.` : ` Tell the user plainly what happened \u2014 do not present this as a finished success.`;
+    const state = outcome === "error" ? `the worker FAILED with: ${body}` : `the worker STOPPED EARLY (${finishReason}) \u2014 its result is PARTIAL, not a finished success: ${body}`;
+    return `${opener} Original request: "${rec.brief}". Outcome: ${state}.${decision}`;
+  }
   onWorkerSettled(id, res) {
     this.dropAsk(id);
     const rec = this.tasks.get(id);
@@ -5099,16 +5184,18 @@ Another agent just implemented the above. Independently check the CURRENT state
     }
     rec.status = "done";
     rec.result = res.text;
-    log9.verbose(`task ${id} done (${res.steps} steps)`);
+    const incomplete = res.finishReason !== "stop";
+    log9.verbose(`task ${id} done (${res.steps} steps${incomplete ? `, INCOMPLETE: ${res.finishReason}` : ""})`);
     this.notify("task_done", `task ${id} (${rec.label}) completed`, {
       id,
       text: res.text,
       usage: res.usage,
       usageEstimated: res.usageEstimated,
+      finishReason: res.finishReason,
       steps: res.steps,
       toolCalls: res.messages.filter((m) => m.role === "tool").length
     });
-    this.queueRevoice(`[task ${id} completed] ${res.text}`);
+    this.queueRevoice(this.integrationPrompt(rec, incomplete ? "incomplete" : "ok", res.text, res.finishReason), incomplete);
   }
   onWorkerFailed(id, err) {
     this.failTask(this.tasks.get(id), err instanceof Error ? err.message : String(err));
@@ -5119,7 +5206,7 @@ Another agent just implemented the above. Independently check the CURRENT state
     rec.result = msg;
     log9.warn(`task ${rec.id} failed: ${msg}`);
     this.notify("task_error", `task ${rec.id} (${rec.label}) failed: ${msg}`);
-    this.queueRevoice(`[task ${rec.id} failed] ${msg}`);
+    this.queueRevoice(this.integrationPrompt(rec, "error", msg, "error"), true);
   }
   // --- voice tools (closures over this instance) ---
   /** Live-switch the think tier: `false` disables (removes the Think tool from the voice agent),
@@ -5133,13 +5220,16 @@ Another agent just implemented the above. Independently check the CURRENT state
     if (model === false && i >= 0) tools.splice(i, 1);
     else if (model !== false && i < 0) tools.push(this.thinkTool());
   }
-  /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id. */
-  async dispatch(brief, tier = "act", label) {
+  /** User/programmatic spawn: the CLI's /act and /think commands. Returns the task id.
+   *  `followUp` marks an automatic escalation/re-delegation (set by the integration turn) so the new
+   *  task's own integration turn won't escalate again — capping auto-follow-ups to one hop. */
+  async dispatch(brief, tier = "act", label, followUp = false) {
     if (tier === "think" && this.options.thinkModel === false) tier = "act";
+    if (followUp) this.autoEscalations++;
     const id = `t${++this.seq}`;
     const lbl = label ?? tier;
     await this.options.onTaskStart?.(id, lbl);
-    this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier);
+    this.spawnWorker(id, lbl, this.buildBrief(brief, tier), tier, brief, followUp);
     this.notify("task_started", `task ${id} (${lbl}) started`, { id, brief, tier });
     return id;
   }
@@ -5159,7 +5249,7 @@ Another agent just implemented the above. Independently check the CURRENT state
         this.turnDispatched = true;
         this.turnBriefs.add(String(brief ?? ""));
         this.voice.options.toolChoice = "none";
-        const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0);
+        const id = await this.dispatch(String(brief ?? ""), "act", label ? String(label) : void 0, this.turnFollowUp);
         return `Acting on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
       }
     };
@@ -5180,7 +5270,7 @@ Another agent just implemented the above. Independently check the CURRENT state
         this.turnDispatched = true;
         this.turnBriefs.add(String(brief ?? ""));
         this.voice.options.toolChoice = "none";
-        const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0);
+        const id = await this.dispatch(String(brief ?? ""), "think", label ? String(label) : void 0, this.turnFollowUp);
         return `Thinking on task ${id}. Acknowledge briefly; the result will arrive as a [task ${id} completed] event.`;
       }
     };
@@ -5506,6 +5596,11 @@ var VoiceEngineOptions = class {
   /** Extended merge window (ms) for utterances that look incomplete (trailing conjunction/filler).
    *  Gives the user time to finish their thought without triggering a model call. */
   incompleteMergeMs = 1500;
+  /** Grace window (ms) after an utterance dispatches, during which the user's own trailing audio cannot
+   *  barge the reply it requested. Soniox keeps finalizing partials past <end>; without this they read
+   *  as a barge and abort the fresh turn (live: mid-sentence self-interruption + steps=1→steps=0 double
+   *  abort). Short enough that a genuine immediate barge ("no wait—") still lands right after. */
+  bargeGraceMs = 600;
   /** Filler phrase spoken when holding for an incomplete utterance ('' disables). */
   holdFiller = "";
   /** Called when the engine holds an incomplete utterance (host can render a visual cue). */
@@ -5561,6 +5656,9 @@ var VoiceEngine = class _VoiceEngine {
   suspectUntil = 0;
   ackAt = 0;
   // when the micro-ack was spoken — its echo can leak before the AEC filter converges
+  bargeGraceUntil = 0;
+  // no barge-in until this time — the user's OWN trailing audio (after the
+  // utterance that JUST dispatched this turn) must not immediately re-interrupt the reply it requested.
   pendingUtt = "";
   // endpointed text held for the merge window
   pendingTimer = null;
@@ -5747,6 +5845,10 @@ var VoiceEngine = class _VoiceEngine {
   }
   handlePartial(text) {
     if (this.speaking) {
+      if (now() < this.bargeGraceUntil) {
+        if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
+        return;
+      }
       if (this.overlapCapable) {
         const txt = text.trim();
         if (!txt || txt === this.lastOverlapPartial) return;
@@ -5787,6 +5889,27 @@ var VoiceEngine = class _VoiceEngine {
     }
     if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
   }
+  /** Merge a resumed utterance into the pending one, deduping any word-overlap. Soniox re-finalizes
+   *  overlapping audio when the silence-timer and the semantic `<end>` both endpoint a growing
+   *  utterance (or after a reconnect): the next "utterance" repeats the tail of the previous one, and
+   *  a naive `${prev} ${next}` produced the live duplication ("Um, I want to check if Um, I want to
+   *  check if…"). Find the longest suffix of `prev`'s words that prefixes `next` and drop it. */
+  mergeUtterance(prev, next) {
+    if (!prev) return next;
+    if (!next) return prev;
+    const pw = prev.split(/\s+/), nw = next.split(/\s+/);
+    const norm2 = (w) => w.toLowerCase().replace(/[^a-z0-9]/g, "");
+    const max = Math.min(pw.length, nw.length);
+    for (let k = max; k > 0; k--) {
+      let match = true;
+      for (let i = 0; i < k; i++) if (norm2(pw[pw.length - k + i]) !== norm2(nw[i])) {
+        match = false;
+        break;
+      }
+      if (match) return [...pw, ...nw.slice(k)].join(" ");
+    }
+    return `${prev} ${next}`;
+  }
   static TRAIL_RE = /(?:^|\s)(?:and|but|or|so|to|the|a|an|of|in|for|with|that|if|uh|um|like|about|from|into|on|is|are|was|were|,)$/i;
   /** The utterance sounds like the user paused mid-thought (trailing conjunction/filler/comma). */
   looksIncomplete(text) {
@@ -5806,7 +5929,7 @@ var VoiceEngine = class _VoiceEngine {
       this.ackAt = 0;
       return;
     }
-    this.pendingUtt = this.pendingUtt ? `${this.pendingUtt} ${text}` : text;
+    this.pendingUtt = this.mergeUtterance(this.pendingUtt, text);
     if (this.pendingTimer) clearTimeout(this.pendingTimer);
     if (this.options.incompleteMergeMs && this.looksIncomplete(this.pendingUtt)) {
       log10.verbose(`hold: incomplete utterance "${this.pendingUtt.slice(-40)}"`);
@@ -5831,6 +5954,7 @@ var VoiceEngine = class _VoiceEngine {
     this.pendingUtt = "";
     if (text) {
       this.turnStartAt = now();
+      this.bargeGraceUntil = now() + this.options.bargeGraceMs;
       this.options.onUtterance(text);
     }
   }