npm - @absolutejs/voice - Versions diffs - 0.0.22-beta.577 → 0.0.22-beta.579 - Mend

@absolutejs/voice 0.0.22-beta.577 → 0.0.22-beta.579

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/client/htmxBootstrap.js +186 -5
package/dist/client/index.d.ts +1 -0
package/dist/client/index.js +187 -5
package/dist/client/timeStretch.d.ts +5 -0
package/dist/core/backchannel.d.ts +6 -0
package/dist/core/types.d.ts +9 -0
package/dist/index.d.ts +1 -1
package/dist/index.js +94 -64
package/dist/telephony/twilio.d.ts +3 -0
package/dist/testing/index.js +279 -5
package/package.json +1 -1

package/dist/client/htmxBootstrap.js CHANGED Viewed

@@ -1533,12 +1533,165 @@ var createVoiceController = (path, options = {}) => {
   };
 };
+// src/client/timeStretch.ts
+var HOP_MS = 10;
+var SEEK_MS = 5;
+var ENERGY_EPSILON = 0.000001;
+var HALF = 0.5;
+var MS_PER_SECOND = 1000;
+var makeHann = (length) => {
+  const weights = new Float32Array(length);
+  for (let index = 0;index < length; index += 1) {
+    weights[index] = HALF - HALF * Math.cos(2 * Math.PI * index / length);
+  }
+  return weights;
+};
+var correlationScore = (base, start, ref, length) => {
+  let dot = 0;
+  let energy = 0;
+  for (let index = 0;index < length; index += 1) {
+    const sample = base[start + index] ?? 0;
+    dot += sample * (ref[index] ?? 0);
+    energy += sample * sample;
+  }
+  return dot / Math.sqrt(energy + ENERGY_EPSILON);
+};
+var overlapAddGrain = (src, off, tail, weights, hop) => {
+  const out = new Float32Array(hop);
+  const nextTail = new Float32Array(hop);
+  for (let index = 0;index < hop; index += 1) {
+    out[index] = (tail[index] ?? 0) + (src[off + index] ?? 0) * (weights[index] ?? 0);
+    nextTail[index] = (src[off + hop + index] ?? 0) * (weights[hop + index] ?? 0);
+  }
+  return { nextTail, out };
+};
+var createTimeStretcher = () => {
+  let sampleRate = 0;
+  let channelCount = 0;
+  let hop = 0;
+  let frameLen = 0;
+  let seek = 0;
+  let weights = new Float32Array(0);
+  let buffers = [];
+  let inputStart = 0;
+  let analysisPos = 0;
+  let olaTail = [];
+  let naturalRef = null;
+  const init = (rate, channels) => {
+    sampleRate = rate;
+    channelCount = channels;
+    hop = Math.max(1, Math.round(sampleRate * HOP_MS / MS_PER_SECOND));
+    frameLen = hop * 2;
+    seek = Math.max(1, Math.round(sampleRate * SEEK_MS / MS_PER_SECOND));
+    weights = makeHann(frameLen);
+    buffers = Array.from({ length: channels }, () => new Float32Array(0));
+    olaTail = Array.from({ length: channels }, () => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const reset = () => {
+    buffers = buffers.map(() => new Float32Array(0));
+    olaTail = olaTail.map(() => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const append = (input) => {
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      const incoming = input[channel] ?? input[0] ?? new Float32Array(0);
+      const existing = buffers[channel] ?? new Float32Array(0);
+      const merged = new Float32Array(existing.length + incoming.length);
+      merged.set(existing, 0);
+      merged.set(incoming, existing.length);
+      buffers[channel] = merged;
+    }
+  };
+  const inputEnd = () => inputStart + (buffers[0]?.length ?? 0);
+  const compact = () => {
+    const keepFrom = Math.max(inputStart, Math.floor(analysisPos) - seek - 1);
+    if (keepFrom <= inputStart)
+      return;
+    const drop = keepFrom - inputStart;
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      buffers[channel] = (buffers[channel] ?? new Float32Array(0)).slice(drop);
+    }
+    inputStart = keepFrom;
+  };
+  const bestOffset = (center) => {
+    if (!naturalRef)
+      return 0;
+    const [base] = buffers;
+    if (!base)
+      return 0;
+    let bestDelta = 0;
+    let bestScore = -Infinity;
+    for (let delta = -seek;delta <= seek; delta += 1) {
+      const score = correlationScore(base, center + delta - inputStart, naturalRef, frameLen);
+      if (score <= bestScore)
+        continue;
+      bestScore = score;
+      bestDelta = delta;
+    }
+    return bestDelta;
+  };
+  const process = (input, speed, rate) => {
+    const channels = Math.max(1, input.length);
+    if (sampleRate !== rate || channelCount !== channels)
+      init(rate, channels);
+    append(input);
+    const analysisHop = hop * speed;
+    const segments = Array.from({ length: channelCount }, () => []);
+    const emitGrain = (pos) => {
+      const off = pos - inputStart;
+      for (let channel = 0;channel < channelCount; channel += 1) {
+        const src = buffers[channel];
+        const tail = olaTail[channel];
+        if (!src || !tail)
+          continue;
+        const grain = overlapAddGrain(src, off, tail, weights, hop);
+        olaTail[channel] = grain.nextTail;
+        segments[channel]?.push(grain.out);
+      }
+    };
+    const captureRef = (pos) => {
+      const ref = new Float32Array(frameLen);
+      const refOff = pos + hop - inputStart;
+      const [base] = buffers;
+      if (base)
+        ref.set(base.subarray(refOff, refOff + frameLen));
+      naturalRef = ref;
+    };
+    const canEmit = () => Math.floor(analysisPos) - seek >= inputStart && Math.floor(analysisPos) + seek + frameLen + hop <= inputEnd();
+    while (canEmit()) {
+      const center = Math.round(analysisPos);
+      const pos = center + bestOffset(center);
+      emitGrain(pos);
+      captureRef(pos);
+      analysisPos += analysisHop;
+    }
+    compact();
+    return segments.map((channelSegments) => {
+      const total = channelSegments.reduce((sum, seg) => sum + seg.length, 0);
+      const merged = new Float32Array(total);
+      let offset = 0;
+      for (const seg of channelSegments) {
+        merged.set(seg, offset);
+        offset += seg.length;
+      }
+      return merged;
+    });
+  };
+  return { process, reset };
+};
 // src/client/audioPlayer.ts
 var DEFAULT_LOOKAHEAD_MS = 15;
 var DEFAULT_VOLUME = 1;
 var DEFAULT_PLAYBACK_RATE = 1;
 var MIN_PLAYBACK_RATE = 0.5;
 var MAX_PLAYBACK_RATE = 2;
+var STRETCH_BYPASS_EPSILON = 0.01;
 var createInitialState3 = () => ({
   activeSourceCount: 0,
   error: null,
@@ -1601,6 +1754,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   let outputNode = null;
   let volume = clampVolume(options.volume);
   let playbackRate = clampPlaybackRate(options.playbackRate);
+  let stretcher = null;
   let queueEndTime = 0;
   let syncPromise = Promise.resolve();
   let interruptStartedAt = null;
@@ -1633,6 +1787,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   const resolveInterrupt = (latencyMs) => {
     clearInterruptTimer();
     interruptStartedAt = null;
+    stretcher?.reset();
     setState({
       activeSourceCount: sourceNodes.size,
       isPlaying: false,
@@ -1697,13 +1852,11 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     queueEndTime = audioContext.currentTime;
     return audioContext;
   };
-  const scheduleChunk = async (chunk) => {
-    const context = await ensureAudioContext();
-    const buffer = decodePCM16LEChunk(context, chunk);
+  const scheduleBuffer = (context, buffer, rate) => {
     const node = context.createBufferSource();
     node.buffer = buffer;
     if (node.playbackRate) {
-      node.playbackRate.value = playbackRate;
+      node.playbackRate.value = rate;
     }
     node.connect(outputNode ?? context.destination);
     node.onended = () => {
@@ -1716,7 +1869,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
       maybeResolveInterrupt();
     };
     const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
-    queueEndTime = startAt + buffer.duration / playbackRate;
+    queueEndTime = startAt + buffer.duration / rate;
     sourceNodes.add(node);
     setState({
       activeSourceCount: sourceNodes.size,
@@ -1724,6 +1877,34 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     });
     node.start(startAt);
   };
+  const scheduleChunk = async (chunk) => {
+    const context = await ensureAudioContext();
+    const buffer = decodePCM16LEChunk(context, chunk);
+    if (Math.abs(playbackRate - 1) <= STRETCH_BYPASS_EPSILON) {
+      stretcher?.reset();
+      scheduleBuffer(context, buffer, playbackRate);
+      return;
+    }
+    const channels = Math.max(1, chunk.format.channels);
+    const input = [];
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      input.push(buffer.getChannelData(channelIndex));
+    }
+    stretcher ??= createTimeStretcher();
+    const stretched = stretcher.process(input, playbackRate, chunk.format.sampleRateHz);
+    const outLength = stretched[0]?.length ?? 0;
+    if (outLength === 0) {
+      return;
+    }
+    const outBuffer = context.createBuffer(channels, outLength, chunk.format.sampleRateHz);
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      const channelOut = stretched[channelIndex];
+      if (!channelOut)
+        continue;
+      outBuffer.getChannelData(channelIndex).set(channelOut);
+    }
+    scheduleBuffer(context, outBuffer, 1);
+  };
   const stopQueuedPlayback = (options2) => {
     for (const node of [...sourceNodes]) {
       node.stop?.();

package/dist/client/index.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ export { bindVoiceReactiveSource, voiceSseReactiveSource, } from "./reactiveSour
 export type { VoiceReactiveSource, VoiceSseReactiveSourceOptions, } from "./reactiveSource";
 export { createVoiceConnection } from "./connection";
 export { createVoiceAudioPlayer, decodeVoiceAudioChunk } from "./audioPlayer";
+export { createTimeStretcher, type TimeStretcher } from "./timeStretch";
 export { createVoiceStream } from "./createVoiceStream";
 export { createVoiceBrowserMediaReporter } from "./browserMedia";
 export type { VoiceBrowserMediaReporter } from "./browserMedia";

package/dist/client/index.js CHANGED Viewed

@@ -370,12 +370,165 @@ var createVoiceConnection = (path, options = {}) => {
     getSessionId: () => state.sessionId
   };
 };
+// src/client/timeStretch.ts
+var HOP_MS = 10;
+var SEEK_MS = 5;
+var ENERGY_EPSILON = 0.000001;
+var HALF = 0.5;
+var MS_PER_SECOND = 1000;
+var makeHann = (length) => {
+  const weights = new Float32Array(length);
+  for (let index = 0;index < length; index += 1) {
+    weights[index] = HALF - HALF * Math.cos(2 * Math.PI * index / length);
+  }
+  return weights;
+};
+var correlationScore = (base, start, ref, length) => {
+  let dot = 0;
+  let energy = 0;
+  for (let index = 0;index < length; index += 1) {
+    const sample = base[start + index] ?? 0;
+    dot += sample * (ref[index] ?? 0);
+    energy += sample * sample;
+  }
+  return dot / Math.sqrt(energy + ENERGY_EPSILON);
+};
+var overlapAddGrain = (src, off, tail, weights, hop) => {
+  const out = new Float32Array(hop);
+  const nextTail = new Float32Array(hop);
+  for (let index = 0;index < hop; index += 1) {
+    out[index] = (tail[index] ?? 0) + (src[off + index] ?? 0) * (weights[index] ?? 0);
+    nextTail[index] = (src[off + hop + index] ?? 0) * (weights[hop + index] ?? 0);
+  }
+  return { nextTail, out };
+};
+var createTimeStretcher = () => {
+  let sampleRate = 0;
+  let channelCount = 0;
+  let hop = 0;
+  let frameLen = 0;
+  let seek = 0;
+  let weights = new Float32Array(0);
+  let buffers = [];
+  let inputStart = 0;
+  let analysisPos = 0;
+  let olaTail = [];
+  let naturalRef = null;
+  const init = (rate, channels) => {
+    sampleRate = rate;
+    channelCount = channels;
+    hop = Math.max(1, Math.round(sampleRate * HOP_MS / MS_PER_SECOND));
+    frameLen = hop * 2;
+    seek = Math.max(1, Math.round(sampleRate * SEEK_MS / MS_PER_SECOND));
+    weights = makeHann(frameLen);
+    buffers = Array.from({ length: channels }, () => new Float32Array(0));
+    olaTail = Array.from({ length: channels }, () => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const reset = () => {
+    buffers = buffers.map(() => new Float32Array(0));
+    olaTail = olaTail.map(() => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const append = (input) => {
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      const incoming = input[channel] ?? input[0] ?? new Float32Array(0);
+      const existing = buffers[channel] ?? new Float32Array(0);
+      const merged = new Float32Array(existing.length + incoming.length);
+      merged.set(existing, 0);
+      merged.set(incoming, existing.length);
+      buffers[channel] = merged;
+    }
+  };
+  const inputEnd = () => inputStart + (buffers[0]?.length ?? 0);
+  const compact = () => {
+    const keepFrom = Math.max(inputStart, Math.floor(analysisPos) - seek - 1);
+    if (keepFrom <= inputStart)
+      return;
+    const drop = keepFrom - inputStart;
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      buffers[channel] = (buffers[channel] ?? new Float32Array(0)).slice(drop);
+    }
+    inputStart = keepFrom;
+  };
+  const bestOffset = (center) => {
+    if (!naturalRef)
+      return 0;
+    const [base] = buffers;
+    if (!base)
+      return 0;
+    let bestDelta = 0;
+    let bestScore = -Infinity;
+    for (let delta = -seek;delta <= seek; delta += 1) {
+      const score = correlationScore(base, center + delta - inputStart, naturalRef, frameLen);
+      if (score <= bestScore)
+        continue;
+      bestScore = score;
+      bestDelta = delta;
+    }
+    return bestDelta;
+  };
+  const process = (input, speed, rate) => {
+    const channels = Math.max(1, input.length);
+    if (sampleRate !== rate || channelCount !== channels)
+      init(rate, channels);
+    append(input);
+    const analysisHop = hop * speed;
+    const segments = Array.from({ length: channelCount }, () => []);
+    const emitGrain = (pos) => {
+      const off = pos - inputStart;
+      for (let channel = 0;channel < channelCount; channel += 1) {
+        const src = buffers[channel];
+        const tail = olaTail[channel];
+        if (!src || !tail)
+          continue;
+        const grain = overlapAddGrain(src, off, tail, weights, hop);
+        olaTail[channel] = grain.nextTail;
+        segments[channel]?.push(grain.out);
+      }
+    };
+    const captureRef = (pos) => {
+      const ref = new Float32Array(frameLen);
+      const refOff = pos + hop - inputStart;
+      const [base] = buffers;
+      if (base)
+        ref.set(base.subarray(refOff, refOff + frameLen));
+      naturalRef = ref;
+    };
+    const canEmit = () => Math.floor(analysisPos) - seek >= inputStart && Math.floor(analysisPos) + seek + frameLen + hop <= inputEnd();
+    while (canEmit()) {
+      const center = Math.round(analysisPos);
+      const pos = center + bestOffset(center);
+      emitGrain(pos);
+      captureRef(pos);
+      analysisPos += analysisHop;
+    }
+    compact();
+    return segments.map((channelSegments) => {
+      const total = channelSegments.reduce((sum, seg) => sum + seg.length, 0);
+      const merged = new Float32Array(total);
+      let offset = 0;
+      for (const seg of channelSegments) {
+        merged.set(seg, offset);
+        offset += seg.length;
+      }
+      return merged;
+    });
+  };
+  return { process, reset };
+};
 // src/client/audioPlayer.ts
 var DEFAULT_LOOKAHEAD_MS = 15;
 var DEFAULT_VOLUME = 1;
 var DEFAULT_PLAYBACK_RATE = 1;
 var MIN_PLAYBACK_RATE = 0.5;
 var MAX_PLAYBACK_RATE = 2;
+var STRETCH_BYPASS_EPSILON = 0.01;
 var createInitialState = () => ({
   activeSourceCount: 0,
   error: null,
@@ -438,6 +591,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   let outputNode = null;
   let volume = clampVolume(options.volume);
   let playbackRate = clampPlaybackRate(options.playbackRate);
+  let stretcher = null;
   let queueEndTime = 0;
   let syncPromise = Promise.resolve();
   let interruptStartedAt = null;
@@ -470,6 +624,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   const resolveInterrupt = (latencyMs) => {
     clearInterruptTimer();
     interruptStartedAt = null;
+    stretcher?.reset();
     setState({
       activeSourceCount: sourceNodes.size,
       isPlaying: false,
@@ -534,13 +689,11 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     queueEndTime = audioContext.currentTime;
     return audioContext;
   };
-  const scheduleChunk = async (chunk) => {
-    const context = await ensureAudioContext();
-    const buffer = decodePCM16LEChunk(context, chunk);
+  const scheduleBuffer = (context, buffer, rate) => {
     const node = context.createBufferSource();
     node.buffer = buffer;
     if (node.playbackRate) {
-      node.playbackRate.value = playbackRate;
+      node.playbackRate.value = rate;
     }
     node.connect(outputNode ?? context.destination);
     node.onended = () => {
@@ -553,7 +706,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
       maybeResolveInterrupt();
     };
     const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
-    queueEndTime = startAt + buffer.duration / playbackRate;
+    queueEndTime = startAt + buffer.duration / rate;
     sourceNodes.add(node);
     setState({
       activeSourceCount: sourceNodes.size,
@@ -561,6 +714,34 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     });
     node.start(startAt);
   };
+  const scheduleChunk = async (chunk) => {
+    const context = await ensureAudioContext();
+    const buffer = decodePCM16LEChunk(context, chunk);
+    if (Math.abs(playbackRate - 1) <= STRETCH_BYPASS_EPSILON) {
+      stretcher?.reset();
+      scheduleBuffer(context, buffer, playbackRate);
+      return;
+    }
+    const channels = Math.max(1, chunk.format.channels);
+    const input = [];
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      input.push(buffer.getChannelData(channelIndex));
+    }
+    stretcher ??= createTimeStretcher();
+    const stretched = stretcher.process(input, playbackRate, chunk.format.sampleRateHz);
+    const outLength = stretched[0]?.length ?? 0;
+    if (outLength === 0) {
+      return;
+    }
+    const outBuffer = context.createBuffer(channels, outLength, chunk.format.sampleRateHz);
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      const channelOut = stretched[channelIndex];
+      if (!channelOut)
+        continue;
+      outBuffer.getChannelData(channelIndex).set(channelOut);
+    }
+    scheduleBuffer(context, outBuffer, 1);
+  };
   const stopQueuedPlayback = (options2) => {
     for (const node of [...sourceNodes]) {
       node.stop?.();
@@ -12303,6 +12484,7 @@ export {
   createVoiceAudioPlayer,
   createVoiceAgentSquadStatusViewModel,
   createVoiceAgentSquadStatusStore,
+  createTimeStretcher,
   createMicrophoneCapture,
   buildVoiceAgentSquadStatusReport,
   bindVoiceReactiveSource,

package/dist/client/timeStretch.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+export type TimeStretcher = {
+    process: (input: Float32Array[], speed: number, sampleRate: number) => Float32Array[];
+    reset: () => void;
+};
+export declare const createTimeStretcher: () => TimeStretcher;

package/dist/core/backchannel.d.ts CHANGED Viewed

@@ -10,6 +10,12 @@ export type VoiceBackchannelDriverOptions = {
     minSpeechMs?: number;
     onCue: (cue: VoiceBackchannelCue) => Promise<void> | void;
 };
+export type VoiceBackchannelConfig = {
+    enabled?: boolean;
+    cues?: ReadonlyArray<string>;
+    minSpeechMs?: number;
+    cueIntervalMs?: number;
+};
 export type VoiceBackchannelDriver = {
     noteSpeech: (timestampMs?: number) => void;
     noteSilence: (timestampMs?: number) => void;

package/dist/core/types.d.ts CHANGED Viewed

@@ -783,6 +783,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
         userText: string;
     }) => Promise<string | null>;
     fillerForTimeoutMs?: number;
+    backchannel?: import("./backchannel").VoiceBackchannelConfig;
     defaultSilentTurnAck?: string;
     routeOnTurnTimeoutMs?: number;
     audioConditioning?: VoiceAudioConditioningConfig;
@@ -968,6 +969,14 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
     }) => Promise<string | null>;
     /** Ceiling for the `fillerFor` call before we fall back to a static phrase. Default 600ms. */
     fillerForTimeoutMs?: number;
+    /**
+     * Backchannel cues — short "mm-hm"/"right" acknowledgements played while the
+     * CALLER is mid-turn (a long answer) so they feel heard, the way a human
+     * listener interjects. Plays on the same non-turn TTS path as fillers, so it
+     * never registers as the assistant's turn or trips barge-in. Off unless
+     * `enabled` is set. Fires only while the assistant is silent.
+     */
+    backchannel?: import("./backchannel").VoiceBackchannelConfig;
     /**
      * Default spoken ack if the model returns ONLY tool calls (no text) and the
      * turn isn't ending. Without this, the caller hears total silence after

package/dist/index.d.ts CHANGED Viewed

@@ -98,7 +98,7 @@ export type { VoiceCampaignDisposition, VoiceCampaignDispositionRetryPolicy, Voi
 export { createVoiceBackchannelDriver } from "./core/backchannel";
 export { createVoiceOAuth2TokenSource } from "./core/oauth2TokenSource";
 export type { CreateVoiceOAuth2TokenSourceOptions, VoiceOAuth2TokenResponse, VoiceOAuth2TokenSource, } from "./core/oauth2TokenSource";
-export type { VoiceBackchannelCue, VoiceBackchannelDriver, VoiceBackchannelDriverOptions, } from "./core/backchannel";
+export type { VoiceBackchannelConfig, VoiceBackchannelCue, VoiceBackchannelDriver, VoiceBackchannelDriverOptions, } from "./core/backchannel";
 export { createVoiceIVRSession, describeVoiceIVRPlan, evaluateVoiceIVRPlan, } from "./core/ivrPlan";
 export type { VoiceIVRBranch, VoiceIVRDecision, VoiceIVRInput, VoiceIVRMatch, VoiceIVRPlan, VoiceIVRSession, } from "./core/ivrPlan";
 export { VOICE_CALLER_MEMORY_KEY, buildVoiceCallerMemoryNamespace, createVoiceCallerMemoryNamespace, summarizeVoiceCallerTranscript, } from "./core/callerMemory";

package/dist/index.js CHANGED Viewed

@@ -3091,6 +3091,71 @@ var toVoiceSessionSummary = (session) => ({
 // src/core/session.ts
 import { Buffer as Buffer2 } from "buffer";
+// src/core/backchannel.ts
+var DEFAULT_CUES = [
+  { text: "mm-hmm" },
+  { text: "I see" },
+  { text: "right" },
+  { text: "go on" }
+];
+var createVoiceBackchannelDriver = (options) => {
+  const cues = options.cues ?? DEFAULT_CUES;
+  const minSpeechMs = options.minSpeechMs ?? 2500;
+  const cueIntervalMs = options.cueIntervalMs ?? 2500;
+  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
+  let speechStartedAt;
+  let lastCueAt;
+  let cueCount = 0;
+  let firing = false;
+  const tryFire = async (now) => {
+    if (firing || cues.length === 0) {
+      return;
+    }
+    if (speechStartedAt === undefined) {
+      return;
+    }
+    const elapsed = now - speechStartedAt;
+    if (elapsed < minSpeechMs) {
+      return;
+    }
+    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
+      return;
+    }
+    const cue = cues[cueIndexFn(cueCount)];
+    if (!cue) {
+      return;
+    }
+    firing = true;
+    try {
+      await options.onCue(cue);
+    } finally {
+      firing = false;
+      lastCueAt = now;
+      cueCount += 1;
+    }
+  };
+  return {
+    noteSilence: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
+        speechStartedAt = undefined;
+      }
+    },
+    noteSpeech: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (speechStartedAt === undefined) {
+        speechStartedAt = now;
+      }
+      tryFire(now);
+    },
+    reset: () => {
+      speechStartedAt = undefined;
+      lastCueAt = undefined;
+      cueCount = 0;
+    }
+  };
+};
 // src/core/handoff.ts
 var toHex3 = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
 var signHandoffBody = async (input) => {
@@ -5217,6 +5282,30 @@ var createVoiceSession = (options) => {
       });
     });
   };
+  const emitBackchannelCue = (text) => {
+    if (!text || !options.tts)
+      return;
+    if (activeTTSTurnId !== undefined || fillerActive)
+      return;
+    runSerial("backchannel.send", async () => {
+      if (activeTTSTurnId !== undefined || fillerActive)
+        return;
+      const adapterSession = await ensureTTSSession();
+      if (!adapterSession)
+        return;
+      try {
+        await adapterSession.send(text);
+      } catch {}
+    });
+  };
+  const backchannelDriver = options.backchannel?.enabled && options.tts ? createVoiceBackchannelDriver({
+    ...options.backchannel.cueIntervalMs !== undefined ? { cueIntervalMs: options.backchannel.cueIntervalMs } : {},
+    ...options.backchannel.cues ? {
+      cues: options.backchannel.cues.filter((cue) => typeof cue === "string" && cue.trim().length > 0).map((cue) => ({ text: cue }))
+    } : {},
+    ...options.backchannel.minSpeechMs !== undefined ? { minSpeechMs: options.backchannel.minSpeechMs } : {},
+    onCue: (cue) => emitBackchannelCue(cue.text)
+  }) : null;
   const createTurnTTSStreamer = (turn, session) => {
     let buffer = "";
     let full = "";
@@ -5708,6 +5797,7 @@ var createVoiceSession = (options) => {
   };
   const commitTurnInternal = async (reason = "manual") => {
     clearSilenceTimer();
+    backchannelDriver?.reset();
     amdLastTurnCommitAt = Date.now();
     const session = await readSession();
     if (session.status === "completed" || session.status === "failed") {
@@ -6051,7 +6141,9 @@ var createVoiceSession = (options) => {
       speechDetected = true;
       clearSilenceTimer();
       kickCallSilenceWatchdog();
+      backchannelDriver?.noteSpeech();
     } else if (speechDetected) {
+      backchannelDriver?.noteSilence();
       const currentSession = await readSession();
       const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
         partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
@@ -24811,6 +24903,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
+      ...options.backchannel ? { backchannel: options.backchannel } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
       ...options.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: options.routeOnTurnTimeoutMs } : {},
       trace: options.trace,
@@ -39177,6 +39270,7 @@ var voice = (config) => {
       ...config.fillerDelayMs !== undefined ? { fillerDelayMs: config.fillerDelayMs } : {},
       ...config.fillerFor ? { fillerFor: config.fillerFor } : {},
       ...config.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: config.fillerForTimeoutMs } : {},
+      ...config.backchannel ? { backchannel: config.backchannel } : {},
       ...config.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: config.defaultSilentTurnAck } : {},
       ...config.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: config.routeOnTurnTimeoutMs } : {},
       tts: config.tts,
@@ -41569,70 +41663,6 @@ var summarizeVoiceCampaignDispositions = (record) => {
     totalRecipients: record.recipients.length
   };
 };
-// src/core/backchannel.ts
-var DEFAULT_CUES = [
-  { text: "mm-hmm" },
-  { text: "I see" },
-  { text: "right" },
-  { text: "go on" }
-];
-var createVoiceBackchannelDriver = (options) => {
-  const cues = options.cues ?? DEFAULT_CUES;
-  const minSpeechMs = options.minSpeechMs ?? 2500;
-  const cueIntervalMs = options.cueIntervalMs ?? 2500;
-  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
-  let speechStartedAt;
-  let lastCueAt;
-  let cueCount = 0;
-  let firing = false;
-  const tryFire = async (now) => {
-    if (firing || cues.length === 0) {
-      return;
-    }
-    if (speechStartedAt === undefined) {
-      return;
-    }
-    const elapsed = now - speechStartedAt;
-    if (elapsed < minSpeechMs) {
-      return;
-    }
-    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
-      return;
-    }
-    const cue = cues[cueIndexFn(cueCount)];
-    if (!cue) {
-      return;
-    }
-    firing = true;
-    try {
-      await options.onCue(cue);
-    } finally {
-      firing = false;
-      lastCueAt = now;
-      cueCount += 1;
-    }
-  };
-  return {
-    noteSilence: (timestampMs) => {
-      const now = timestampMs ?? Date.now();
-      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
-        speechStartedAt = undefined;
-      }
-    },
-    noteSpeech: (timestampMs) => {
-      const now = timestampMs ?? Date.now();
-      if (speechStartedAt === undefined) {
-        speechStartedAt = now;
-      }
-      tryFire(now);
-    },
-    reset: () => {
-      speechStartedAt = undefined;
-      lastCueAt = undefined;
-      cueCount = 0;
-    }
-  };
-};
 // src/core/oauth2TokenSource.ts
 var createVoiceOAuth2TokenSource = (options) => {
   const fetchImpl = options.fetch ?? globalThis.fetch.bind(globalThis);

package/dist/telephony/twilio.d.ts CHANGED Viewed

@@ -164,6 +164,9 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
     }) => Promise<string | null>;
     /** Cap on the `fillerFor` race before falling back to a static phrase. Default 600ms. */
     fillerForTimeoutMs?: number;
+    /** Backchannel cues played while the caller is mid-turn so they feel heard.
+     *  Non-turn TTS path (no barge-in interaction). Off unless `enabled`. */
+    backchannel?: import("../core/backchannel").VoiceBackchannelConfig;
     /**
      * Default spoken ack if the model returns ONLY tool calls (no text) and
      * the turn isn't ending. Without this, the caller hears silence and

package/dist/testing/index.js CHANGED Viewed

@@ -1577,12 +1577,165 @@ var buildSessionCorrectionAudit = (raw, generic, experimental, benchmarkSeeded,
     }
   };
 };
+// src/client/timeStretch.ts
+var HOP_MS = 10;
+var SEEK_MS = 5;
+var ENERGY_EPSILON = 0.000001;
+var HALF = 0.5;
+var MS_PER_SECOND = 1000;
+var makeHann = (length) => {
+  const weights = new Float32Array(length);
+  for (let index = 0;index < length; index += 1) {
+    weights[index] = HALF - HALF * Math.cos(2 * Math.PI * index / length);
+  }
+  return weights;
+};
+var correlationScore = (base, start, ref, length) => {
+  let dot = 0;
+  let energy = 0;
+  for (let index = 0;index < length; index += 1) {
+    const sample = base[start + index] ?? 0;
+    dot += sample * (ref[index] ?? 0);
+    energy += sample * sample;
+  }
+  return dot / Math.sqrt(energy + ENERGY_EPSILON);
+};
+var overlapAddGrain = (src, off, tail, weights, hop) => {
+  const out = new Float32Array(hop);
+  const nextTail = new Float32Array(hop);
+  for (let index = 0;index < hop; index += 1) {
+    out[index] = (tail[index] ?? 0) + (src[off + index] ?? 0) * (weights[index] ?? 0);
+    nextTail[index] = (src[off + hop + index] ?? 0) * (weights[hop + index] ?? 0);
+  }
+  return { nextTail, out };
+};
+var createTimeStretcher = () => {
+  let sampleRate = 0;
+  let channelCount = 0;
+  let hop = 0;
+  let frameLen = 0;
+  let seek = 0;
+  let weights = new Float32Array(0);
+  let buffers = [];
+  let inputStart = 0;
+  let analysisPos = 0;
+  let olaTail = [];
+  let naturalRef = null;
+  const init = (rate, channels) => {
+    sampleRate = rate;
+    channelCount = channels;
+    hop = Math.max(1, Math.round(sampleRate * HOP_MS / MS_PER_SECOND));
+    frameLen = hop * 2;
+    seek = Math.max(1, Math.round(sampleRate * SEEK_MS / MS_PER_SECOND));
+    weights = makeHann(frameLen);
+    buffers = Array.from({ length: channels }, () => new Float32Array(0));
+    olaTail = Array.from({ length: channels }, () => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const reset = () => {
+    buffers = buffers.map(() => new Float32Array(0));
+    olaTail = olaTail.map(() => new Float32Array(hop));
+    inputStart = 0;
+    analysisPos = seek;
+    naturalRef = null;
+  };
+  const append = (input) => {
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      const incoming = input[channel] ?? input[0] ?? new Float32Array(0);
+      const existing = buffers[channel] ?? new Float32Array(0);
+      const merged = new Float32Array(existing.length + incoming.length);
+      merged.set(existing, 0);
+      merged.set(incoming, existing.length);
+      buffers[channel] = merged;
+    }
+  };
+  const inputEnd = () => inputStart + (buffers[0]?.length ?? 0);
+  const compact = () => {
+    const keepFrom = Math.max(inputStart, Math.floor(analysisPos) - seek - 1);
+    if (keepFrom <= inputStart)
+      return;
+    const drop = keepFrom - inputStart;
+    for (let channel = 0;channel < channelCount; channel += 1) {
+      buffers[channel] = (buffers[channel] ?? new Float32Array(0)).slice(drop);
+    }
+    inputStart = keepFrom;
+  };
+  const bestOffset = (center) => {
+    if (!naturalRef)
+      return 0;
+    const [base] = buffers;
+    if (!base)
+      return 0;
+    let bestDelta = 0;
+    let bestScore = -Infinity;
+    for (let delta = -seek;delta <= seek; delta += 1) {
+      const score = correlationScore(base, center + delta - inputStart, naturalRef, frameLen);
+      if (score <= bestScore)
+        continue;
+      bestScore = score;
+      bestDelta = delta;
+    }
+    return bestDelta;
+  };
+  const process2 = (input, speed, rate) => {
+    const channels = Math.max(1, input.length);
+    if (sampleRate !== rate || channelCount !== channels)
+      init(rate, channels);
+    append(input);
+    const analysisHop = hop * speed;
+    const segments = Array.from({ length: channelCount }, () => []);
+    const emitGrain = (pos) => {
+      const off = pos - inputStart;
+      for (let channel = 0;channel < channelCount; channel += 1) {
+        const src = buffers[channel];
+        const tail = olaTail[channel];
+        if (!src || !tail)
+          continue;
+        const grain = overlapAddGrain(src, off, tail, weights, hop);
+        olaTail[channel] = grain.nextTail;
+        segments[channel]?.push(grain.out);
+      }
+    };
+    const captureRef = (pos) => {
+      const ref = new Float32Array(frameLen);
+      const refOff = pos + hop - inputStart;
+      const [base] = buffers;
+      if (base)
+        ref.set(base.subarray(refOff, refOff + frameLen));
+      naturalRef = ref;
+    };
+    const canEmit = () => Math.floor(analysisPos) - seek >= inputStart && Math.floor(analysisPos) + seek + frameLen + hop <= inputEnd();
+    while (canEmit()) {
+      const center = Math.round(analysisPos);
+      const pos = center + bestOffset(center);
+      emitGrain(pos);
+      captureRef(pos);
+      analysisPos += analysisHop;
+    }
+    compact();
+    return segments.map((channelSegments) => {
+      const total = channelSegments.reduce((sum, seg) => sum + seg.length, 0);
+      const merged = new Float32Array(total);
+      let offset = 0;
+      for (const seg of channelSegments) {
+        merged.set(seg, offset);
+        offset += seg.length;
+      }
+      return merged;
+    });
+  };
+  return { process: process2, reset };
+};
 // src/client/audioPlayer.ts
 var DEFAULT_LOOKAHEAD_MS = 15;
 var DEFAULT_VOLUME = 1;
 var DEFAULT_PLAYBACK_RATE = 1;
 var MIN_PLAYBACK_RATE = 0.5;
 var MAX_PLAYBACK_RATE = 2;
+var STRETCH_BYPASS_EPSILON = 0.01;
 var createInitialState = () => ({
   activeSourceCount: 0,
   error: null,
@@ -1645,6 +1798,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   let outputNode = null;
   let volume = clampVolume(options.volume);
   let playbackRate = clampPlaybackRate(options.playbackRate);
+  let stretcher = null;
   let queueEndTime = 0;
   let syncPromise = Promise.resolve();
   let interruptStartedAt = null;
@@ -1677,6 +1831,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
   const resolveInterrupt = (latencyMs) => {
     clearInterruptTimer();
     interruptStartedAt = null;
+    stretcher?.reset();
     setState({
       activeSourceCount: sourceNodes.size,
       isPlaying: false,
@@ -1741,13 +1896,11 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     queueEndTime = audioContext.currentTime;
     return audioContext;
   };
-  const scheduleChunk = async (chunk) => {
-    const context = await ensureAudioContext();
-    const buffer = decodePCM16LEChunk(context, chunk);
+  const scheduleBuffer = (context, buffer, rate) => {
     const node = context.createBufferSource();
     node.buffer = buffer;
     if (node.playbackRate) {
-      node.playbackRate.value = playbackRate;
+      node.playbackRate.value = rate;
     }
     node.connect(outputNode ?? context.destination);
     node.onended = () => {
@@ -1760,7 +1913,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
       maybeResolveInterrupt();
     };
     const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
-    queueEndTime = startAt + buffer.duration / playbackRate;
+    queueEndTime = startAt + buffer.duration / rate;
     sourceNodes.add(node);
     setState({
       activeSourceCount: sourceNodes.size,
@@ -1768,6 +1921,34 @@ var createVoiceAudioPlayer = (source, options = {}) => {
     });
     node.start(startAt);
   };
+  const scheduleChunk = async (chunk) => {
+    const context = await ensureAudioContext();
+    const buffer = decodePCM16LEChunk(context, chunk);
+    if (Math.abs(playbackRate - 1) <= STRETCH_BYPASS_EPSILON) {
+      stretcher?.reset();
+      scheduleBuffer(context, buffer, playbackRate);
+      return;
+    }
+    const channels = Math.max(1, chunk.format.channels);
+    const input = [];
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      input.push(buffer.getChannelData(channelIndex));
+    }
+    stretcher ??= createTimeStretcher();
+    const stretched = stretcher.process(input, playbackRate, chunk.format.sampleRateHz);
+    const outLength = stretched[0]?.length ?? 0;
+    if (outLength === 0) {
+      return;
+    }
+    const outBuffer = context.createBuffer(channels, outLength, chunk.format.sampleRateHz);
+    for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
+      const channelOut = stretched[channelIndex];
+      if (!channelOut)
+        continue;
+      outBuffer.getChannelData(channelIndex).set(channelOut);
+    }
+    scheduleBuffer(context, outBuffer, 1);
+  };
   const stopQueuedPlayback = (options2) => {
     for (const node of [...sourceNodes]) {
       node.stop?.();
@@ -5130,6 +5311,71 @@ var createVoiceMemoryStore = () => {
 // src/core/session.ts
 import { Buffer as Buffer2 } from "buffer";
+// src/core/backchannel.ts
+var DEFAULT_CUES = [
+  { text: "mm-hmm" },
+  { text: "I see" },
+  { text: "right" },
+  { text: "go on" }
+];
+var createVoiceBackchannelDriver = (options) => {
+  const cues = options.cues ?? DEFAULT_CUES;
+  const minSpeechMs = options.minSpeechMs ?? 2500;
+  const cueIntervalMs = options.cueIntervalMs ?? 2500;
+  const cueIndexFn = options.cueIndex ?? ((index) => index % Math.max(cues.length, 1));
+  let speechStartedAt;
+  let lastCueAt;
+  let cueCount = 0;
+  let firing = false;
+  const tryFire = async (now) => {
+    if (firing || cues.length === 0) {
+      return;
+    }
+    if (speechStartedAt === undefined) {
+      return;
+    }
+    const elapsed = now - speechStartedAt;
+    if (elapsed < minSpeechMs) {
+      return;
+    }
+    if (lastCueAt !== undefined && now - lastCueAt < cueIntervalMs) {
+      return;
+    }
+    const cue = cues[cueIndexFn(cueCount)];
+    if (!cue) {
+      return;
+    }
+    firing = true;
+    try {
+      await options.onCue(cue);
+    } finally {
+      firing = false;
+      lastCueAt = now;
+      cueCount += 1;
+    }
+  };
+  return {
+    noteSilence: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (lastCueAt !== undefined && now - lastCueAt > cueIntervalMs * 2) {
+        speechStartedAt = undefined;
+      }
+    },
+    noteSpeech: (timestampMs) => {
+      const now = timestampMs ?? Date.now();
+      if (speechStartedAt === undefined) {
+        speechStartedAt = now;
+      }
+      tryFire(now);
+    },
+    reset: () => {
+      speechStartedAt = undefined;
+      lastCueAt = undefined;
+      cueCount = 0;
+    }
+  };
+};
 // src/core/handoff.ts
 var toHex = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
 var signHandoffBody = async (input) => {
@@ -7152,6 +7398,30 @@ var createVoiceSession = (options) => {
       });
     });
   };
+  const emitBackchannelCue = (text) => {
+    if (!text || !options.tts)
+      return;
+    if (activeTTSTurnId !== undefined || fillerActive)
+      return;
+    runSerial("backchannel.send", async () => {
+      if (activeTTSTurnId !== undefined || fillerActive)
+        return;
+      const adapterSession = await ensureTTSSession();
+      if (!adapterSession)
+        return;
+      try {
+        await adapterSession.send(text);
+      } catch {}
+    });
+  };
+  const backchannelDriver = options.backchannel?.enabled && options.tts ? createVoiceBackchannelDriver({
+    ...options.backchannel.cueIntervalMs !== undefined ? { cueIntervalMs: options.backchannel.cueIntervalMs } : {},
+    ...options.backchannel.cues ? {
+      cues: options.backchannel.cues.filter((cue) => typeof cue === "string" && cue.trim().length > 0).map((cue) => ({ text: cue }))
+    } : {},
+    ...options.backchannel.minSpeechMs !== undefined ? { minSpeechMs: options.backchannel.minSpeechMs } : {},
+    onCue: (cue) => emitBackchannelCue(cue.text)
+  }) : null;
   const createTurnTTSStreamer = (turn, session) => {
     let buffer = "";
     let full = "";
@@ -7643,6 +7913,7 @@ var createVoiceSession = (options) => {
   };
   const commitTurnInternal = async (reason = "manual") => {
     clearSilenceTimer();
+    backchannelDriver?.reset();
     amdLastTurnCommitAt = Date.now();
     const session = await readSession();
     if (session.status === "completed" || session.status === "failed") {
@@ -7986,7 +8257,9 @@ var createVoiceSession = (options) => {
       speechDetected = true;
       clearSilenceTimer();
       kickCallSilenceWatchdog();
+      backchannelDriver?.noteSpeech();
     } else if (speechDetected) {
+      backchannelDriver?.noteSilence();
       const currentSession = await readSession();
       const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
         partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
@@ -13465,6 +13738,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
       ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
       ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
       ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
+      ...options.backchannel ? { backchannel: options.backchannel } : {},
       ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
       ...options.routeOnTurnTimeoutMs !== undefined ? { routeOnTurnTimeoutMs: options.routeOnTurnTimeoutMs } : {},
       trace: options.trace,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@absolutejs/voice",
-  "version": "0.0.22-beta.577",
+  "version": "0.0.22-beta.579",
   "description": "Voice primitives and Elysia plugin for AbsoluteJS",
   "repository": {
     "type": "git",