npm - @voice-kit/core - Versions diffs - 0.1.0 → 0.1.2 - Mend

@voice-kit/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/audio.cjs +533 -0
package/dist/audio.cjs.map +1 -0
package/dist/audio.d.cts +260 -0
package/dist/audio.d.ts +260 -0
package/dist/audio.js +514 -0
package/dist/audio.js.map +1 -0
package/dist/compliance.d.cts +1 -1
package/dist/compliance.d.ts +1 -1
package/dist/errors.d.cts +4 -79
package/dist/errors.d.ts +4 -79
package/dist/index-D3KfRXMP.d.cts +319 -0
package/dist/index-D3KfRXMP.d.ts +319 -0
package/dist/index.cjs +280 -0
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +4 -319
package/dist/index.d.ts +4 -319
package/dist/index.js +259 -0
package/dist/index.js.map +1 -1
package/dist/memory.d.cts +1 -1
package/dist/memory.d.ts +1 -1
package/dist/observability.d.cts +1 -1
package/dist/observability.d.ts +1 -1
package/dist/stt.d.cts +1 -1
package/dist/stt.d.ts +1 -1
package/dist/telephony.errors-BQYr6-vl.d.cts +80 -0
package/dist/telephony.errors-C0-nScrF.d.ts +80 -0
package/dist/tts.d.cts +1 -1
package/dist/tts.d.ts +1 -1
package/package.json +9 -1

package/dist/audio.d.cts ADDED Viewed

@@ -0,0 +1,260 @@
+import { V as VADConfig, g as VoiceFrame } from './index-D3KfRXMP.cjs';
+import { PassThrough } from 'node:stream';
+import { EventEmitter } from 'node:events';
+import { A as AudioTransportError } from './telephony.errors-BQYr6-vl.cjs';
+import 'ai';
+/**
+ * @voice-kit/core — G.711 µ-law codec
+ *
+ * Pure TypeScript implementation of G.711 µ-law (mu-law) encode/decode.
+ * No external codec library needed for µ-law. This is 100% internal —
+ * never exported from the public API.
+ *
+ * Used by AudioPipeline to convert Twilio/Exotel µ-law audio ↔ PCM.
+ */
+/**
+ * Convert a single µ-law encoded byte (0–255) to a 16-bit linear PCM sample.
+ * Algorithm: ITU-T G.711 Section 3.
+ *
+ * @internal
+ */
+declare function mulawToLinear(sample: number): number;
+/**
+ * Convert a 16-bit linear PCM sample to a µ-law encoded byte.
+ * Algorithm: ITU-T G.711 Section 3.
+ *
+ * @internal
+ */
+declare function linearToMulaw(sample: number): number;
+/**
+ * Convert a Buffer of µ-law encoded bytes to 16-bit little-endian PCM.
+ * Each µ-law byte expands to 2 PCM bytes (16-bit LE signed).
+ *
+ * Input:  N bytes  (µ-law, 8kHz mono as sent by Twilio/Exotel)
+ * Output: N*2 bytes (PCM 16-bit LE, same sample rate)
+ *
+ * @internal
+ */
+declare function mulawBufferToPcm(buf: Buffer): Buffer;
+/**
+ * Convert a Buffer of 16-bit little-endian PCM to µ-law bytes.
+ * Each pair of PCM bytes compresses to 1 µ-law byte.
+ *
+ * Input:  N bytes (PCM 16-bit LE)
+ * Output: N/2 bytes (µ-law)
+ *
+ * @internal
+ */
+declare function pcmBufferToMulaw(buf: Buffer): Buffer;
+/**
+ * Convert a base64-encoded µ-law string (as sent by Twilio Media Streams)
+ * directly to PCM Buffer. Convenience wrapper used in TwilioProvider.
+ *
+ * @internal
+ */
+declare function base64MulawToPcm(base64: string): Buffer;
+/**
+ * Convert a PCM Buffer to a base64-encoded µ-law string (for sending
+ * back to Twilio Media Streams).
+ *
+ * @internal
+ */
+declare function pcmToBase64Mulaw(pcm: Buffer): string;
+/**
+ * @voice-kit/core — AudioPipeline
+ *
+ * Automatically selects codec, sample rate, and VAD config based on the
+ * telephony provider. Developers never configure codecs — the pipeline
+ * handles all conversions transparently.
+ *
+ * Provider audio formats:
+ *   Twilio / Exotel → 8kHz µ-law → decode → 8kHz PCM → upsample → 16kHz PCM (for STT)
+ *   Plivo / Telnyx  → 8kHz µ-law (same as Twilio)
+ *   LiveKit         → 48kHz Opus → decode → 48kHz PCM → downsample → 16kHz PCM (for STT)
+ *   SIP (generic)   → 8kHz G.711 (same as Twilio)
+ *
+ * TTS output path (reverse):
+ *   STT/LLM → TTS PCM (provider-native rate) → resample → telephony-native rate → encode
+ */
+/** Telephony providers handled by the pipeline. */
+type TelephonyProviderName = 'twilio' | 'exotel' | 'plivo' | 'telnyx' | 'livekit' | 'sip';
+/**
+ * AudioPipeline: auto-wires codec → resample → VAD for a specific telephony provider.
+ *
+ * Developers never call this directly — it is instantiated by TelephonyProvider
+ * implementations and consumed by VoiceAgent.
+ *
+ * @internal
+ */
+declare class AudioPipeline {
+    private readonly profile;
+    readonly provider: TelephonyProviderName;
+    constructor(provider: TelephonyProviderName);
+    /**
+     * Transform incoming telephony audio to 16kHz PCM for STT.
+     * Handles µ-law decode + resampling automatically.
+     *
+     * @param raw  Raw audio bytes as received from telephony provider
+     * @returns    Async iterable of 16kHz PCM buffers for STT
+     *
+     * @internal
+     */
+    inboundForSTT(raw: AsyncIterable<Buffer>): AsyncIterable<Buffer>;
+    /**
+     * Transform TTS output PCM to telephony-native format for sending to caller.
+     * Handles resampling + µ-law encode automatically.
+     *
+     * @param ttsAudio  Raw PCM from TTS provider (at TTS provider's native rate)
+     * @param ttsSampleRate  Native sample rate of the TTS provider
+     * @returns  Async iterable of audio bytes ready to send to telephony provider
+     *
+     * @internal
+     */
+    outboundFromTTS(ttsAudio: AsyncIterable<Buffer>, ttsSampleRate: number): AsyncIterable<Buffer>;
+    /** Get the VAD config tuned for this provider's audio quality. @internal */
+    get vadConfig(): Required<VADConfig>;
+    /** Sample rate that STT expects (post-pipeline). @internal */
+    get sttSampleRate(): number;
+    /** Async generator: decode µ-law stream to PCM. @internal */
+    private decodeMulaw;
+}
+/**
+ * Factory: create an AudioPipeline pre-configured for the given telephony provider.
+ *
+ * @internal — used by TelephonyProvider implementations
+ */
+declare function createAudioPipeline(provider: TelephonyProviderName): AudioPipeline;
+/**
+ * @voice-kit/core — PCM audio resampler
+ *
+ * Resamples raw PCM audio between sample rates using fluent-ffmpeg.
+ * 100% internal — never exported from the public API.
+ * Used by AudioPipeline to convert provider-native rates to STT-required rates.
+ */
+/**
+ * Resample a PCM Buffer from one sample rate to another.
+ * Both input and output are signed 16-bit little-endian PCM, mono.
+ *
+ * Common conversions:
+ *   8kHz → 16kHz  (Twilio/Exotel µ-law decoded → Deepgram input)
+ *   48kHz → 16kHz (LiveKit Opus decoded → Deepgram input)
+ *   24kHz → 8kHz  (ElevenLabs output → Twilio send)
+ *
+ * @param buf     Raw PCM bytes (s16le mono)
+ * @param fromHz  Source sample rate in Hz
+ * @param toHz    Target sample rate in Hz
+ * @returns       Resampled PCM bytes (s16le mono)
+ *
+ * @internal
+ */
+declare function resample(buf: Buffer, fromHz: number, toHz: number): Promise<Buffer>;
+/**
+ * Create a streaming resampler Transform stream.
+ * More efficient than buffering for large audio chunks.
+ *
+ * @param fromHz  Source sample rate in Hz
+ * @param toHz    Target sample rate in Hz
+ * @returns       Node.js Transform stream: PCM in, resampled PCM out
+ *
+ * @internal
+ */
+declare function createResamplerStream(fromHz: number, toHz: number): PassThrough;
+/**
+ * Async generator that resamples chunks from an audio iterable on the fly.
+ * Used by AudioPipeline for realtime streaming paths.
+ *
+ * @param audio   Async iterable of raw PCM buffers at fromHz
+ * @param fromHz  Source sample rate
+ * @param toHz    Target sample rate
+ *
+ * @internal
+ */
+declare function resampleStream(audio: AsyncIterable<Buffer>, fromHz: number, toHz: number): AsyncIterable<Buffer>;
+/**
+ * @voice-kit/core — Voice Activity Detection engine
+ *
+ * Wraps @ricky0123/vad-web and emits strongly-typed VoiceFrame events.
+ * Developers subscribe to VoiceFrame events — they never touch the raw VAD API.
+ *
+ * @example
+ * ```ts
+ * const vad = createVAD({ threshold: 0.6 })
+ * vad.on('frame', (frame) => {
+ *   if (frame.type === 'speech_start') startRecording()
+ *   if (frame.type === 'speech_end') stopRecording()
+ * })
+ * await vad.processStream(audioStream)
+ * ```
+ */
+type VADEventMap = {
+    frame: [VoiceFrame];
+    error: [AudioTransportError];
+};
+/**
+ * Internal VAD engine. Processes a 16kHz PCM stream and emits VoiceFrame events.
+ * Automatically debounces rapid speech_start/speech_end transitions.
+ *
+ * Input: 16kHz, 16-bit little-endian PCM, mono.
+ * Output: VoiceFrame events on the emitter.
+ */
+declare class VADEngine extends EventEmitter<VADEventMap> {
+    private readonly config;
+    private isSpeaking;
+    private positiveFrameCount;
+    private negativeFrameCount;
+    private debounceTimer;
+    private frameBuffer;
+    private vadModel;
+    constructor(config?: VADConfig);
+    /**
+     * Process an async stream of PCM audio frames.
+     * Automatically frames the input into 30ms chunks for VAD processing.
+     *
+     * @param audio  Async iterable of PCM buffers (16kHz, s16le, mono)
+     */
+    processStream(audio: AsyncIterable<Buffer>): Promise<void>;
+    /**
+     * Process a single 30ms PCM frame through the VAD model.
+     *
+     * @internal
+     */
+    private processFrame;
+    /**
+     * Run Silero VAD model inference on a single frame.
+     * Returns confidence score 0–1.
+     *
+     * @internal
+     */
+    private runVADInference;
+    private emitFrame;
+    private scheduleDebounce;
+    private clearDebounce;
+    /**
+     * Load the Silero VAD model if not already loaded.
+     * @internal
+     */
+    private ensureModelLoaded;
+    /** Clean up resources. Call when the call ends. */
+    destroy(): void;
+}
+/**
+ * Create a configured VAD engine instance.
+ * Input must be 16kHz, 16-bit LE, mono PCM (handled automatically by AudioPipeline).
+ *
+ * @example
+ * ```ts
+ * const vad = createVAD({ threshold: 0.7, debounceMs: 200 })
+ * vad.on('frame', (frame) => handleFrame(frame))
+ * await vad.processStream(audioStream)
+ * ```
+ */
+declare function createVAD(config?: VADConfig): VADEngine;
+export { AudioPipeline, type TelephonyProviderName, VADEngine, base64MulawToPcm, createAudioPipeline, createResamplerStream, createVAD, linearToMulaw, mulawBufferToPcm, mulawToLinear, pcmBufferToMulaw, pcmToBase64Mulaw, resample, resampleStream };

package/dist/audio.d.ts ADDED Viewed

@@ -0,0 +1,260 @@
+import { V as VADConfig, g as VoiceFrame } from './index-D3KfRXMP.js';
+import { PassThrough } from 'node:stream';
+import { EventEmitter } from 'node:events';
+import { A as AudioTransportError } from './telephony.errors-C0-nScrF.js';
+import 'ai';
+/**
+ * @voice-kit/core — G.711 µ-law codec
+ *
+ * Pure TypeScript implementation of G.711 µ-law (mu-law) encode/decode.
+ * No external codec library needed for µ-law. This is 100% internal —
+ * never exported from the public API.
+ *
+ * Used by AudioPipeline to convert Twilio/Exotel µ-law audio ↔ PCM.
+ */
+/**
+ * Convert a single µ-law encoded byte (0–255) to a 16-bit linear PCM sample.
+ * Algorithm: ITU-T G.711 Section 3.
+ *
+ * @internal
+ */
+declare function mulawToLinear(sample: number): number;
+/**
+ * Convert a 16-bit linear PCM sample to a µ-law encoded byte.
+ * Algorithm: ITU-T G.711 Section 3.
+ *
+ * @internal
+ */
+declare function linearToMulaw(sample: number): number;
+/**
+ * Convert a Buffer of µ-law encoded bytes to 16-bit little-endian PCM.
+ * Each µ-law byte expands to 2 PCM bytes (16-bit LE signed).
+ *
+ * Input:  N bytes  (µ-law, 8kHz mono as sent by Twilio/Exotel)
+ * Output: N*2 bytes (PCM 16-bit LE, same sample rate)
+ *
+ * @internal
+ */
+declare function mulawBufferToPcm(buf: Buffer): Buffer;
+/**
+ * Convert a Buffer of 16-bit little-endian PCM to µ-law bytes.
+ * Each pair of PCM bytes compresses to 1 µ-law byte.
+ *
+ * Input:  N bytes (PCM 16-bit LE)
+ * Output: N/2 bytes (µ-law)
+ *
+ * @internal
+ */
+declare function pcmBufferToMulaw(buf: Buffer): Buffer;
+/**
+ * Convert a base64-encoded µ-law string (as sent by Twilio Media Streams)
+ * directly to PCM Buffer. Convenience wrapper used in TwilioProvider.
+ *
+ * @internal
+ */
+declare function base64MulawToPcm(base64: string): Buffer;
+/**
+ * Convert a PCM Buffer to a base64-encoded µ-law string (for sending
+ * back to Twilio Media Streams).
+ *
+ * @internal
+ */
+declare function pcmToBase64Mulaw(pcm: Buffer): string;
+/**
+ * @voice-kit/core — AudioPipeline
+ *
+ * Automatically selects codec, sample rate, and VAD config based on the
+ * telephony provider. Developers never configure codecs — the pipeline
+ * handles all conversions transparently.
+ *
+ * Provider audio formats:
+ *   Twilio / Exotel → 8kHz µ-law → decode → 8kHz PCM → upsample → 16kHz PCM (for STT)
+ *   Plivo / Telnyx  → 8kHz µ-law (same as Twilio)
+ *   LiveKit         → 48kHz Opus → decode → 48kHz PCM → downsample → 16kHz PCM (for STT)
+ *   SIP (generic)   → 8kHz G.711 (same as Twilio)
+ *
+ * TTS output path (reverse):
+ *   STT/LLM → TTS PCM (provider-native rate) → resample → telephony-native rate → encode
+ */
+/** Telephony providers handled by the pipeline. */
+type TelephonyProviderName = 'twilio' | 'exotel' | 'plivo' | 'telnyx' | 'livekit' | 'sip';
+/**
+ * AudioPipeline: auto-wires codec → resample → VAD for a specific telephony provider.
+ *
+ * Developers never call this directly — it is instantiated by TelephonyProvider
+ * implementations and consumed by VoiceAgent.
+ *
+ * @internal
+ */
+declare class AudioPipeline {
+    private readonly profile;
+    readonly provider: TelephonyProviderName;
+    constructor(provider: TelephonyProviderName);
+    /**
+     * Transform incoming telephony audio to 16kHz PCM for STT.
+     * Handles µ-law decode + resampling automatically.
+     *
+     * @param raw  Raw audio bytes as received from telephony provider
+     * @returns    Async iterable of 16kHz PCM buffers for STT
+     *
+     * @internal
+     */
+    inboundForSTT(raw: AsyncIterable<Buffer>): AsyncIterable<Buffer>;
+    /**
+     * Transform TTS output PCM to telephony-native format for sending to caller.
+     * Handles resampling + µ-law encode automatically.
+     *
+     * @param ttsAudio  Raw PCM from TTS provider (at TTS provider's native rate)
+     * @param ttsSampleRate  Native sample rate of the TTS provider
+     * @returns  Async iterable of audio bytes ready to send to telephony provider
+     *
+     * @internal
+     */
+    outboundFromTTS(ttsAudio: AsyncIterable<Buffer>, ttsSampleRate: number): AsyncIterable<Buffer>;
+    /** Get the VAD config tuned for this provider's audio quality. @internal */
+    get vadConfig(): Required<VADConfig>;
+    /** Sample rate that STT expects (post-pipeline). @internal */
+    get sttSampleRate(): number;
+    /** Async generator: decode µ-law stream to PCM. @internal */
+    private decodeMulaw;
+}
+/**
+ * Factory: create an AudioPipeline pre-configured for the given telephony provider.
+ *
+ * @internal — used by TelephonyProvider implementations
+ */
+declare function createAudioPipeline(provider: TelephonyProviderName): AudioPipeline;
+/**
+ * @voice-kit/core — PCM audio resampler
+ *
+ * Resamples raw PCM audio between sample rates using fluent-ffmpeg.
+ * 100% internal — never exported from the public API.
+ * Used by AudioPipeline to convert provider-native rates to STT-required rates.
+ */
+/**
+ * Resample a PCM Buffer from one sample rate to another.
+ * Both input and output are signed 16-bit little-endian PCM, mono.
+ *
+ * Common conversions:
+ *   8kHz → 16kHz  (Twilio/Exotel µ-law decoded → Deepgram input)
+ *   48kHz → 16kHz (LiveKit Opus decoded → Deepgram input)
+ *   24kHz → 8kHz  (ElevenLabs output → Twilio send)
+ *
+ * @param buf     Raw PCM bytes (s16le mono)
+ * @param fromHz  Source sample rate in Hz
+ * @param toHz    Target sample rate in Hz
+ * @returns       Resampled PCM bytes (s16le mono)
+ *
+ * @internal
+ */
+declare function resample(buf: Buffer, fromHz: number, toHz: number): Promise<Buffer>;
+/**
+ * Create a streaming resampler Transform stream.
+ * More efficient than buffering for large audio chunks.
+ *
+ * @param fromHz  Source sample rate in Hz
+ * @param toHz    Target sample rate in Hz
+ * @returns       Node.js Transform stream: PCM in, resampled PCM out
+ *
+ * @internal
+ */
+declare function createResamplerStream(fromHz: number, toHz: number): PassThrough;
+/**
+ * Async generator that resamples chunks from an audio iterable on the fly.
+ * Used by AudioPipeline for realtime streaming paths.
+ *
+ * @param audio   Async iterable of raw PCM buffers at fromHz
+ * @param fromHz  Source sample rate
+ * @param toHz    Target sample rate
+ *
+ * @internal
+ */
+declare function resampleStream(audio: AsyncIterable<Buffer>, fromHz: number, toHz: number): AsyncIterable<Buffer>;
+/**
+ * @voice-kit/core — Voice Activity Detection engine
+ *
+ * Wraps @ricky0123/vad-web and emits strongly-typed VoiceFrame events.
+ * Developers subscribe to VoiceFrame events — they never touch the raw VAD API.
+ *
+ * @example
+ * ```ts
+ * const vad = createVAD({ threshold: 0.6 })
+ * vad.on('frame', (frame) => {
+ *   if (frame.type === 'speech_start') startRecording()
+ *   if (frame.type === 'speech_end') stopRecording()
+ * })
+ * await vad.processStream(audioStream)
+ * ```
+ */
+type VADEventMap = {
+    frame: [VoiceFrame];
+    error: [AudioTransportError];
+};
+/**
+ * Internal VAD engine. Processes a 16kHz PCM stream and emits VoiceFrame events.
+ * Automatically debounces rapid speech_start/speech_end transitions.
+ *
+ * Input: 16kHz, 16-bit little-endian PCM, mono.
+ * Output: VoiceFrame events on the emitter.
+ */
+declare class VADEngine extends EventEmitter<VADEventMap> {
+    private readonly config;
+    private isSpeaking;
+    private positiveFrameCount;
+    private negativeFrameCount;
+    private debounceTimer;
+    private frameBuffer;
+    private vadModel;
+    constructor(config?: VADConfig);
+    /**
+     * Process an async stream of PCM audio frames.
+     * Automatically frames the input into 30ms chunks for VAD processing.
+     *
+     * @param audio  Async iterable of PCM buffers (16kHz, s16le, mono)
+     */
+    processStream(audio: AsyncIterable<Buffer>): Promise<void>;
+    /**
+     * Process a single 30ms PCM frame through the VAD model.
+     *
+     * @internal
+     */
+    private processFrame;
+    /**
+     * Run Silero VAD model inference on a single frame.
+     * Returns confidence score 0–1.
+     *
+     * @internal
+     */
+    private runVADInference;
+    private emitFrame;
+    private scheduleDebounce;
+    private clearDebounce;
+    /**
+     * Load the Silero VAD model if not already loaded.
+     * @internal
+     */
+    private ensureModelLoaded;
+    /** Clean up resources. Call when the call ends. */
+    destroy(): void;
+}
+/**
+ * Create a configured VAD engine instance.
+ * Input must be 16kHz, 16-bit LE, mono PCM (handled automatically by AudioPipeline).
+ *
+ * @example
+ * ```ts
+ * const vad = createVAD({ threshold: 0.7, debounceMs: 200 })
+ * vad.on('frame', (frame) => handleFrame(frame))
+ * await vad.processStream(audioStream)
+ * ```
+ */
+declare function createVAD(config?: VADConfig): VADEngine;
+export { AudioPipeline, type TelephonyProviderName, VADEngine, base64MulawToPcm, createAudioPipeline, createResamplerStream, createVAD, linearToMulaw, mulawBufferToPcm, mulawToLinear, pcmBufferToMulaw, pcmToBase64Mulaw, resample, resampleStream };