npm - @absolutejs/voice - Versions diffs - 0.0.22-beta.578 → 0.0.22-beta.579 - Mend

@absolutejs/voice 0.0.22-beta.578 → 0.0.22-beta.579

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/core/backchannel.d.ts +6 -0
package/dist/core/types.d.ts +9 -0
package/dist/index.d.ts +1 -1
package/dist/index.js +94 -64
package/dist/telephony/twilio.d.ts +3 -0
package/dist/testing/index.js +93 -0
package/package.json +1 -1

package/dist/core/backchannel.d.ts CHANGED Viewed

@@ -10,6 +10,12 @@ export type VoiceBackchannelDriverOptions = {
     minSpeechMs?: number;
     onCue: (cue: VoiceBackchannelCue) => Promise<void> | void;
 };
+export type VoiceBackchannelConfig = {
+    enabled?: boolean;
+    cues?: ReadonlyArray<string>;
+    minSpeechMs?: number;
+    cueIntervalMs?: number;
+};
 export type VoiceBackchannelDriver = {
     noteSpeech: (timestampMs?: number) => void;
     noteSilence: (timestampMs?: number) => void;

package/dist/core/types.d.ts CHANGED Viewed

@@ -783,6 +783,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
         userText: string;
     }) => Promise<string | null>;
     fillerForTimeoutMs?: number;
+    backchannel?: import("./backchannel").VoiceBackchannelConfig;
     defaultSilentTurnAck?: string;
     routeOnTurnTimeoutMs?: number;
     audioConditioning?: VoiceAudioConditioningConfig;
@@ -968,6 +969,14 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
     }) => Promise<string | null>;
     /** Ceiling for the `fillerFor` call before we fall back to a static phrase. Default 600ms. */
     fillerForTimeoutMs?: number;
+    /**
+     * Backchannel cues — short "mm-hm"/"right" acknowledgements played while the
+     * CALLER is mid-turn (a long answer) so they feel heard, the way a human
+     * listener interjects. Plays on the same non-turn TTS path as fillers, so it
+     * never registers as the assistant's turn or trips barge-in. Off unless
+     * `enabled` is set. Fires only while the assistant is silent.
+     */
+    backchannel?: import("./backchannel").VoiceBackchannelConfig;
     /**
      * Default spoken ack if the model returns ONLY tool calls (no text) and the
      * turn isn't ending. Without this, the caller hears total silence after

package/dist/index.d.ts CHANGED Viewed

@@ -98,7 +98,7 @@ export type { VoiceCampaignDisposition, VoiceCampaignDispositionRetryPolicy, Voi
 export { createVoiceBackchannelDriver } from "./core/backchannel";
 export { createVoiceOAuth2TokenSource } from "./core/oauth2TokenSource";
 export type { CreateVoiceOAuth2TokenSourceOptions, VoiceOAuth2TokenResponse, VoiceOAuth2TokenSource, } from "./core/oauth2TokenSource";
-export type { VoiceBackchannelCue, VoiceBackchannelDriver, VoiceBackchannelDriverOptions, } from "./core/backchannel";
+export type { VoiceBackchannelConfig, VoiceBackchannelCue, VoiceBackchannelDriver, VoiceBackchannelDriverOptions, } from "./core/backchannel";
 export { createVoiceIVRSession, describeVoiceIVRPlan, evaluateVoiceIVRPlan, } from "./core/ivrPlan";
 export type { VoiceIVRBranch, VoiceIVRDecision, VoiceIVRInput, VoiceIVRMatch, VoiceIVRPlan, VoiceIVRSession, } from "./core/ivrPlan";
 export { VOICE_CALLER_MEMORY_KEY, buildVoiceCallerMemoryNamespace, createVoiceCallerMemoryNamespace, summarizeVoiceCallerTranscript, } from "./core/callerMemory";

package/dist/index.js CHANGED Viewed

@@ -3091,6 +3091,71 @@ var toVoiceSessionSummary = (session) => ({
 // src/core/session.ts
 import { Buffer as Buffer2 } from "buffer";
+// src/core/backchannel.ts
+var DEFAULT_CUES = [
+  { text: "mm-hmm" },
+  { text: "I see" },
+  { text: "right" },
+  { text: "go on" }
+];
+var createVoiceBackchannelDriver = (options) => {
+  const cues = options.cues ?? DEFAULT_CUES;
+  const minSpeechMs = options.minSpeechMs ?? 2500;
+  const cueIntervalMs = options.cueIntervalMs ?? 2500;
+  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
+  let speechStartedAt;
+  let lastCueAt;
+  let cueCount = 0;
+  let firing = false;
+  const tryFire = async (now) => {
+    if (firing || cues.length === 0) {
+      return;
+    }
+    if (speechStartedAt === undefined) {
+      return;
+    }
+    const elapsed = now - speechStartedAt;
+    if (elapsed < minSpeechMs) {
+      return;
+    }
+    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
+      return;
+    }
+    const cue = cues[cueIndexFn(cueCount)];
+    if (!cue) {
+      return;
+    }
+    firing = true;
+    try {
+      await options.onCue(cue);
+    } finally {
+      firing = false;
+      lastCueAt = now;
+      cueCount += 1;
+    }
+  };
+  return {
+    noteSilence: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
+        speechStartedAt = undefined;
+      }
+    },
+    noteSpeech: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (speechStartedAt === undefined) {
+        speechStartedAt = now;
+      }
+      tryFire(now);
+    },
+    reset: () => {
+      speechStartedAt = undefined;
+      lastCueAt = undefined;
+      cueCount = 0;
+    }
+  };
+};
 // src/core/handoff.ts
 var toHex3 = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
 var signHandoffBody = async (input) => {
@@ -5217,6 +5282,30 @@ var createVoiceSession = (options) => {
       });
     });
   };
+  const emitBackchannelCue = (text) => {
+    if (!text || !options.tts)
+      return;
+    if (activeTTSTurnId !== undefined || fillerActive)
+      return;
+    runSerial("backchannel.send", async () => {
+      if (activeTTSTurnId !== undefined || fillerActive)
+        return;
+      const adapterSession = await ensureTTSSession();
+      if (!adapterSession)
+        return;
+      try {
+        await adapterSession.send(text);
+      } catch {}
+    });
+  };
+  const backchannelDriver = options.backchannel?.enabled && options.tts ? createVoiceBackchannelDriver({
+    ...options.backchannel.cueIntervalMs !== undefined ? { cueIntervalMs: options.backchannel.cueIntervalMs } : {},
+    ...options.backchannel.cues ? {
+      cues: options.backchannel.cues.filter((cue) => typeof cue === "string" && cue.trim().length > 0).map((cue) => ({ text: cue }))
+    } : {},
+    ...options.backchannel.minSpeechMs !== undefined ? { minSpeechMs: options.backchannel.minSpeechMs } : {},
+    onCue: (cue) => emitBackchannelCue(cue.text)
+  }) : null;
   const createTurnTTSStreamer = (turn, session) => {
     let buffer = "";
     let full = "";
@@ -5708,6 +5797,7 @@ var createVoiceSession = (options) => {
   };
   const commitTurnInternal = async (reason = "manual") => {
     clearSilenceTimer();
+    backchannelDriver?.reset();
     amdLastTurnCommitAt = Date.now();
     const session = await readSession();
     if (session.status === "completed" || session.status === "failed") {
@@ -6051,7 +6141,9 @@ var createVoiceSession = (options) => {
       speechDetected = true;
       clearSilenceTimer();
       kickCallSilenceWatchdog();
+      backchannelDriver?.noteSpeech();
     } else if (speechDetected) {
+      backchannelDriver?.noteSilence();
       const currentSession = await readSession();
       const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
         partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
@@ -24811,6 +24903,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
+      ...options.backchannel ? { backchannel: options.backchannel } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
       ...options.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: options.routeOnTurnTimeoutMs } : {},
       trace: options.trace,
@@ -39177,6 +39270,7 @@ var voice = (config) => {
       ...config.fillerDelayMs !== undefined ? { fillerDelayMs: config.fillerDelayMs } : {},
       ...config.fillerFor ? { fillerFor: config.fillerFor } : {},
       ...config.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: config.fillerForTimeoutMs } : {},
+      ...config.backchannel ? { backchannel: config.backchannel } : {},
       ...config.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: config.defaultSilentTurnAck } : {},
       ...config.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: config.routeOnTurnTimeoutMs } : {},
       tts: config.tts,
@@ -41569,70 +41663,6 @@ var summarizeVoiceCampaignDispositions = (record) => {
     totalRecipients: record.recipients.length
   };
 };
-// src/core/backchannel.ts
-var DEFAULT_CUES = [
-  { text: "mm-hmm" },
-  { text: "I see" },
-  { text: "right" },
-  { text: "go on" }
-];
-var createVoiceBackchannelDriver = (options) => {
-  const cues = options.cues ?? DEFAULT_CUES;
-  const minSpeechMs = options.minSpeechMs ?? 2500;
-  const cueIntervalMs = options.cueIntervalMs ?? 2500;
-  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
-  let speechStartedAt;
-  let lastCueAt;
-  let cueCount = 0;
-  let firing = false;
-  const tryFire = async (now) => {
-    if (firing || cues.length === 0) {
-      return;
-    }
-    if (speechStartedAt === undefined) {
-      return;
-    }
-    const elapsed = now - speechStartedAt;
-    if (elapsed < minSpeechMs) {
-      return;
-    }
-    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
-      return;
-    }
-    const cue = cues[cueIndexFn(cueCount)];
-    if (!cue) {
-      return;
-    }
-    firing = true;
-    try {
-      await options.onCue(cue);
-    } finally {
-      firing = false;
-      lastCueAt = now;
-      cueCount += 1;
-    }
-  };
-  return {
-    noteSilence: (timestampMs) => {
-      const now = timestampMs ?? Date.now();
-      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
-        speechStartedAt = undefined;
-      }
-    },
-    noteSpeech: (timestampMs) => {
-      const now = timestampMs ?? Date.now();
-      if (speechStartedAt === undefined) {
-        speechStartedAt = now;
-      }
-      tryFire(now);
-    },
-    reset: () => {
-      speechStartedAt = undefined;
-      lastCueAt = undefined;
-      cueCount = 0;
-    }
-  };
-};
 // src/core/oauth2TokenSource.ts
 var createVoiceOAuth2TokenSource = (options) => {
   const fetchImpl = options.fetch ?? globalThis.fetch.bind(globalThis);

package/dist/telephony/twilio.d.ts CHANGED Viewed

@@ -164,6 +164,9 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
     }) => Promise<string | null>;
     /** Cap on the `fillerFor` race before falling back to a static phrase. Default 600ms. */
     fillerForTimeoutMs?: number;
+    /** Backchannel cues played while the caller is mid-turn so they feel heard.
+     *  Non-turn TTS path (no barge-in interaction). Off unless `enabled`. */
+    backchannel?: import("../core/backchannel").VoiceBackchannelConfig;
     /**
      * Default spoken ack if the model returns ONLY tool calls (no text) and
      * the turn isn't ending. Without this, the caller hears silence and

package/dist/testing/index.js CHANGED Viewed

@@ -5311,6 +5311,71 @@ var createVoiceMemoryStore = () => {
 // src/core/session.ts
 import { Buffer as Buffer2 } from "buffer";
+// src/core/backchannel.ts
+var DEFAULT_CUES = [
+  { text: "mm-hmm" },
+  { text: "I see" },
+  { text: "right" },
+  { text: "go on" }
+];
+var createVoiceBackchannelDriver = (options) => {
+  const cues = options.cues ?? DEFAULT_CUES;
+  const minSpeechMs = options.minSpeechMs ?? 2500;
+  const cueIntervalMs = options.cueIntervalMs ?? 2500;
+  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
+  let speechStartedAt;
+  let lastCueAt;
+  let cueCount = 0;
+  let firing = false;
+  const tryFire = async (now) => {
+    if (firing || cues.length === 0) {
+      return;
+    }
+    if (speechStartedAt === undefined) {
+      return;
+    }
+    const elapsed = now - speechStartedAt;
+    if (elapsed < minSpeechMs) {
+      return;
+    }
+    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
+      return;
+    }
+    const cue = cues[cueIndexFn(cueCount)];
+    if (!cue) {
+      return;
+    }
+    firing = true;
+    try {
+      await options.onCue(cue);
+    } finally {
+      firing = false;
+      lastCueAt = now;
+      cueCount += 1;
+    }
+  };
+  return {
+    noteSilence: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
+        speechStartedAt = undefined;
+      }
+    },
+    noteSpeech: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (speechStartedAt === undefined) {
+        speechStartedAt = now;
+      }
+      tryFire(now);
+    },
+    reset: () => {
+      speechStartedAt = undefined;
+      lastCueAt = undefined;
+      cueCount = 0;
+    }
+  };
+};
 // src/core/handoff.ts
 var toHex = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
 var signHandoffBody = async (input) => {
@@ -7333,6 +7398,30 @@ var createVoiceSession = (options) => {
       });
     });
   };
+  const emitBackchannelCue = (text) => {
+    if (!text || !options.tts)
+      return;
+    if (activeTTSTurnId !== undefined || fillerActive)
+      return;
+    runSerial("backchannel.send", async () => {
+      if (activeTTSTurnId !== undefined || fillerActive)
+        return;
+      const adapterSession = await ensureTTSSession();
+      if (!adapterSession)
+        return;
+      try {
+        await adapterSession.send(text);
+      } catch {}
+    });
+  };
+  const backchannelDriver = options.backchannel?.enabled && options.tts ? createVoiceBackchannelDriver({
+    ...options.backchannel.cueIntervalMs !== undefined ? { cueIntervalMs: options.backchannel.cueIntervalMs } : {},
+    ...options.backchannel.cues ? {
+      cues: options.backchannel.cues.filter((cue) => typeof cue === "string" && cue.trim().length > 0).map((cue) => ({ text: cue }))
+    } : {},
+    ...options.backchannel.minSpeechMs !== undefined ? { minSpeechMs: options.backchannel.minSpeechMs } : {},
+    onCue: (cue) => emitBackchannelCue(cue.text)
+  }) : null;
   const createTurnTTSStreamer = (turn, session) => {
     let buffer = "";
     let full = "";
@@ -7824,6 +7913,7 @@ var createVoiceSession = (options) => {
   };
   const commitTurnInternal = async (reason = "manual") => {
     clearSilenceTimer();
+    backchannelDriver?.reset();
     amdLastTurnCommitAt = Date.now();
     const session = await readSession();
     if (session.status === "completed" || session.status === "failed") {
@@ -8167,7 +8257,9 @@ var createVoiceSession = (options) => {
       speechDetected = true;
       clearSilenceTimer();
       kickCallSilenceWatchdog();
+      backchannelDriver?.noteSpeech();
     } else if (speechDetected) {
+      backchannelDriver?.noteSilence();
       const currentSession = await readSession();
       const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
         partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
@@ -13646,6 +13738,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
+      ...options.backchannel ? { backchannel: options.backchannel } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
       ...options.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: options.routeOnTurnTimeoutMs } : {},
       trace: options.trace,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@absolutejs/voice",
-  "version": "0.0.22-beta.578",
+  "version": "0.0.22-beta.579",
   "description": "Voice primitives and Elysia plugin for AbsoluteJS",
   "repository": {
     "type": "git",