npm - @absolutejs/voice - Versions diffs - 0.0.22-beta.562 → 0.0.22-beta.564 - Mend

@absolutejs/voice 0.0.22-beta.562 → 0.0.22-beta.564

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core/types.d.ts +14 -0
package/dist/index.js +44 -3
package/dist/telephony/twilio.d.ts +8 -0
package/dist/testing/index.js +44 -3
package/package.json +1 -1

package/dist/core/types.d.ts CHANGED Viewed

@@ -913,6 +913,20 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
      * Set `fillerPhrases: []` (or omit) to disable. Reasonable defaults if
      * you enable: `["Hmm.", "Got it.", "Right.", "Mm-hm.", "Let me think.", "Okay."]`.
      */
+    /**
+     * Minimum word count in an STT partial transcript before speech-gated
+     * barge-in cancels the in-flight assistant TTS. Default 1 (any non-empty
+     * partial triggers barge-in — backwards-compatible).
+     *
+     * Set to 2 (or higher) on phone routes where the caller's brief
+     * acknowledgements ("yeah", "uh-huh", "you", "am i") would otherwise
+     * cut the bot off mid-question. Each extra word added typically delays
+     * barge-in by ~100-200ms (one extra STT partial cycle) — cheap compared
+     * to losing the bot's response.
+     *
+     * Word splitting is whitespace-based. Punctuation is left attached.
+     */
+    bargeInMinPartialWords?: number;
     fillerPhrases?: ReadonlyArray<string>;
     /** Milliseconds after turn-commit before the filler fires. Default 250ms — short enough to feel instant, long enough to skip if the LLM is very fast. */
     fillerDelayMs?: number;

package/dist/index.js CHANGED Viewed

@@ -3852,7 +3852,7 @@ var createVoiceSession = (options) => {
   };
   const appendTurnLatencyStage = async (input) => appendTrace({
     at: input.at,
-    payload: { stage: input.stage },
+    payload: { stage: input.stage, ...input.metadata ?? {} },
     session: input.session,
     turnId: input.turnId,
     type: "turn_latency.stage"
@@ -3875,6 +3875,7 @@ var createVoiceSession = (options) => {
   let fillerToken = 0;
   const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
   const fillerDelayMs = options.fillerDelayMs ?? 250;
+  const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
   const fillerFor = options.fillerFor;
   const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
   const currentTurnAudio = [];
@@ -4261,6 +4262,11 @@ var createVoiceSession = (options) => {
       return;
     }
     activeTTSTurnId = undefined;
+    appendTurnLatencyStage({
+      metadata: { reason },
+      stage: "tts_canceled",
+      turnId: cancelledTurnId
+    }).catch(() => {});
     Promise.resolve(socket.clear?.()).catch(() => {});
     if (!ttsAdapterSessionCanCancel(activeSession)) {
       return;
@@ -4919,8 +4925,33 @@ var createVoiceSession = (options) => {
     };
   };
   const handlePartial = async (transcript) => {
-    if (activeTTSTurnId !== undefined && transcript.text.trim()) {
-      cancelActiveTTS("barge-in");
+    if (activeTTSTurnId !== undefined) {
+      const triggeringText = transcript.text.trim();
+      if (triggeringText) {
+        const wordCount = triggeringText.split(/\s+/).length;
+        if (wordCount >= bargeInMinPartialWords) {
+          appendTurnLatencyStage({
+            metadata: {
+              partial: triggeringText.slice(0, 200),
+              source: "stt_partial",
+              wordCount
+            },
+            stage: "barge_in",
+            turnId: activeTTSTurnId
+          }).catch(() => {});
+          cancelActiveTTS("barge-in");
+        } else {
+          appendTurnLatencyStage({
+            metadata: {
+              partial: triggeringText.slice(0, 200),
+              reason: "below_min_words",
+              wordCount
+            },
+            stage: "barge_in_suppressed",
+            turnId: activeTTSTurnId
+          }).catch(() => {});
+        }
+      }
     }
     const session = await writeSession((session2) => {
       const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
@@ -5327,13 +5358,17 @@ var createVoiceSession = (options) => {
           if (myToken !== fillerToken || activeTTSTurnId === turn.id)
             return;
           let phrase = null;
+          let source = "static";
           if (fillerForPromise) {
             phrase = await fillerForPromise;
+            if (phrase)
+              source = "fillerFor";
             if (myToken !== fillerToken || activeTTSTurnId === turn.id)
               return;
           }
           if (!phrase && fillerPhrases.length > 0) {
             phrase = fillerPhrases[Math.floor(Math.random() * fillerPhrases.length)] ?? null;
+            source = "static";
           }
           if (!phrase)
             return;
@@ -5341,6 +5376,11 @@ var createVoiceSession = (options) => {
           if (!adapterSession)
             return;
           fillerActive = true;
+          appendTurnLatencyStage({
+            metadata: { phrase, source },
+            stage: "filler_sent",
+            turnId: turn.id
+          }).catch(() => {});
           try {
             await adapterSession.send(phrase);
           } catch {
@@ -24742,6 +24782,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
       ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
       ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
+      ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},

package/dist/telephony/twilio.d.ts CHANGED Viewed

@@ -132,6 +132,14 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
     fillerPhrases?: ReadonlyArray<string>;
     /** Milliseconds after turn-commit before the filler fires. Default 250ms. */
     fillerDelayMs?: number;
+    /**
+     * Minimum word count in an STT partial transcript before barge-in
+     * cancels in-flight assistant TTS. Default 1 (any partial).
+     * Recommended 2 on phone routes — single-word partials ("you", "am i")
+     * cut the bot off mid-question per live-test 2026-05-27. See
+     * CreateVoiceSessionOptions for full semantics.
+     */
+    bargeInMinPartialWords?: number;
     /**
      * Content-aware filler (Latency Theater). Called in parallel with the
      * main LLM turn; if it resolves within `fillerForTimeoutMs` the runtime

package/dist/testing/index.js CHANGED Viewed

@@ -5723,7 +5723,7 @@ var createVoiceSession = (options) => {
   };
   const appendTurnLatencyStage = async (input) => appendTrace({
     at: input.at,
-    payload: { stage: input.stage },
+    payload: { stage: input.stage, ...input.metadata ?? {} },
     session: input.session,
     turnId: input.turnId,
     type: "turn_latency.stage"
@@ -5746,6 +5746,7 @@ var createVoiceSession = (options) => {
   let fillerToken = 0;
   const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
   const fillerDelayMs = options.fillerDelayMs ?? 250;
+  const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
   const fillerFor = options.fillerFor;
   const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
   const currentTurnAudio = [];
@@ -6132,6 +6133,11 @@ var createVoiceSession = (options) => {
       return;
     }
     activeTTSTurnId = undefined;
+    appendTurnLatencyStage({
+      metadata: { reason },
+      stage: "tts_canceled",
+      turnId: cancelledTurnId
+    }).catch(() => {});
     Promise.resolve(socket.clear?.()).catch(() => {});
     if (!ttsAdapterSessionCanCancel(activeSession)) {
       return;
@@ -6790,8 +6796,33 @@ var createVoiceSession = (options) => {
     };
   };
   const handlePartial = async (transcript) => {
-    if (activeTTSTurnId !== undefined && transcript.text.trim()) {
-      cancelActiveTTS("barge-in");
+    if (activeTTSTurnId !== undefined) {
+      const triggeringText = transcript.text.trim();
+      if (triggeringText) {
+        const wordCount = triggeringText.split(/\s+/).length;
+        if (wordCount >= bargeInMinPartialWords) {
+          appendTurnLatencyStage({
+            metadata: {
+              partial: triggeringText.slice(0, 200),
+              source: "stt_partial",
+              wordCount
+            },
+            stage: "barge_in",
+            turnId: activeTTSTurnId
+          }).catch(() => {});
+          cancelActiveTTS("barge-in");
+        } else {
+          appendTurnLatencyStage({
+            metadata: {
+              partial: triggeringText.slice(0, 200),
+              reason: "below_min_words",
+              wordCount
+            },
+            stage: "barge_in_suppressed",
+            turnId: activeTTSTurnId
+          }).catch(() => {});
+        }
+      }
     }
     const session = await writeSession((session2) => {
       const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
@@ -7198,13 +7229,17 @@ var createVoiceSession = (options) => {
           if (myToken !== fillerToken || activeTTSTurnId === turn.id)
             return;
           let phrase = null;
+          let source = "static";
           if (fillerForPromise) {
             phrase = await fillerForPromise;
+            if (phrase)
+              source = "fillerFor";
             if (myToken !== fillerToken || activeTTSTurnId === turn.id)
               return;
           }
           if (!phrase && fillerPhrases.length > 0) {
             phrase = fillerPhrases[Math.floor(Math.random() * fillerPhrases.length)] ?? null;
+            source = "static";
           }
           if (!phrase)
             return;
@@ -7212,6 +7247,11 @@ var createVoiceSession = (options) => {
           if (!adapterSession)
             return;
           fillerActive = true;
+          appendTurnLatencyStage({
+            metadata: { phrase, source },
+            stage: "filler_sent",
+            turnId: turn.id
+          }).catch(() => {});
           try {
             await adapterSession.send(phrase);
           } catch {
@@ -13332,6 +13372,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
       ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
       ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
+      ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@absolutejs/voice",
-  "version": "0.0.22-beta.562",
+  "version": "0.0.22-beta.564",
   "description": "Voice primitives and Elysia plugin for AbsoluteJS",
   "repository": {
     "type": "git",