@voice-kit/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ import { V as VADConfig, g as VoiceFrame } from './index-D3KfRXMP.cjs';
2
+ import { PassThrough } from 'node:stream';
3
+ import { EventEmitter } from 'node:events';
4
+ import { A as AudioTransportError } from './telephony.errors-BQYr6-vl.cjs';
5
+ import 'ai';
6
+
7
+ /**
8
+ * @voice-kit/core — G.711 µ-law codec
9
+ *
10
+ * Pure TypeScript implementation of G.711 µ-law (mu-law) encode/decode.
11
+ * No external codec library needed for µ-law. This is 100% internal —
12
+ * never exported from the public API.
13
+ *
14
+ * Used by AudioPipeline to convert Twilio/Exotel µ-law audio ↔ PCM.
15
+ */
16
+ /**
17
+ * Convert a single µ-law encoded byte (0–255) to a 16-bit linear PCM sample.
18
+ * Algorithm: ITU-T G.711 Section 3.
19
+ *
20
+ * @internal
21
+ */
22
+ declare function mulawToLinear(sample: number): number;
23
+ /**
24
+ * Convert a 16-bit linear PCM sample to a µ-law encoded byte.
25
+ * Algorithm: ITU-T G.711 Section 3.
26
+ *
27
+ * @internal
28
+ */
29
+ declare function linearToMulaw(sample: number): number;
30
+ /**
31
+ * Convert a Buffer of µ-law encoded bytes to 16-bit little-endian PCM.
32
+ * Each µ-law byte expands to 2 PCM bytes (16-bit LE signed).
33
+ *
34
+ * Input: N bytes (µ-law, 8kHz mono as sent by Twilio/Exotel)
35
+ * Output: N*2 bytes (PCM 16-bit LE, same sample rate)
36
+ *
37
+ * @internal
38
+ */
39
+ declare function mulawBufferToPcm(buf: Buffer): Buffer;
40
+ /**
41
+ * Convert a Buffer of 16-bit little-endian PCM to µ-law bytes.
42
+ * Each pair of PCM bytes compresses to 1 µ-law byte.
43
+ *
44
+ * Input: N bytes (PCM 16-bit LE)
45
+ * Output: N/2 bytes (µ-law)
46
+ *
47
+ * @internal
48
+ */
49
+ declare function pcmBufferToMulaw(buf: Buffer): Buffer;
50
+ /**
51
+ * Convert a base64-encoded µ-law string (as sent by Twilio Media Streams)
52
+ * directly to PCM Buffer. Convenience wrapper used in TwilioProvider.
53
+ *
54
+ * @internal
55
+ */
56
+ declare function base64MulawToPcm(base64: string): Buffer;
57
+ /**
58
+ * Convert a PCM Buffer to a base64-encoded µ-law string (for sending
59
+ * back to Twilio Media Streams).
60
+ *
61
+ * @internal
62
+ */
63
+ declare function pcmToBase64Mulaw(pcm: Buffer): string;
64
+
65
+ /**
66
+ * @voice-kit/core — AudioPipeline
67
+ *
68
+ * Automatically selects codec, sample rate, and VAD config based on the
69
+ * telephony provider. Developers never configure codecs — the pipeline
70
+ * handles all conversions transparently.
71
+ *
72
+ * Provider audio formats:
73
+ * Twilio / Exotel → 8kHz µ-law → decode → 8kHz PCM → upsample → 16kHz PCM (for STT)
74
+ * Plivo / Telnyx → 8kHz µ-law (same as Twilio)
75
+ * LiveKit → 48kHz Opus → decode → 48kHz PCM → downsample → 16kHz PCM (for STT)
76
+ * SIP (generic) → 8kHz G.711 (same as Twilio)
77
+ *
78
+ * TTS output path (reverse):
79
+ * STT/LLM → TTS PCM (provider-native rate) → resample → telephony-native rate → encode
80
+ */
81
+
82
+ /** Telephony providers handled by the pipeline. */
83
+ type TelephonyProviderName = 'twilio' | 'exotel' | 'plivo' | 'telnyx' | 'livekit' | 'sip';
84
+ /**
85
+ * AudioPipeline: auto-wires codec → resample → VAD for a specific telephony provider.
86
+ *
87
+ * Developers never call this directly — it is instantiated by TelephonyProvider
88
+ * implementations and consumed by VoiceAgent.
89
+ *
90
+ * @internal
91
+ */
92
+ declare class AudioPipeline {
93
+ private readonly profile;
94
+ readonly provider: TelephonyProviderName;
95
+ constructor(provider: TelephonyProviderName);
96
+ /**
97
+ * Transform incoming telephony audio to 16kHz PCM for STT.
98
+ * Handles µ-law decode + resampling automatically.
99
+ *
100
+ * @param raw Raw audio bytes as received from telephony provider
101
+ * @returns Async iterable of 16kHz PCM buffers for STT
102
+ *
103
+ * @internal
104
+ */
105
+ inboundForSTT(raw: AsyncIterable<Buffer>): AsyncIterable<Buffer>;
106
+ /**
107
+ * Transform TTS output PCM to telephony-native format for sending to caller.
108
+ * Handles resampling + µ-law encode automatically.
109
+ *
110
+ * @param ttsAudio Raw PCM from TTS provider (at TTS provider's native rate)
111
+ * @param ttsSampleRate Native sample rate of the TTS provider
112
+ * @returns Async iterable of audio bytes ready to send to telephony provider
113
+ *
114
+ * @internal
115
+ */
116
+ outboundFromTTS(ttsAudio: AsyncIterable<Buffer>, ttsSampleRate: number): AsyncIterable<Buffer>;
117
+ /** Get the VAD config tuned for this provider's audio quality. @internal */
118
+ get vadConfig(): Required<VADConfig>;
119
+ /** Sample rate that STT expects (post-pipeline). @internal */
120
+ get sttSampleRate(): number;
121
+ /** Async generator: decode µ-law stream to PCM. @internal */
122
+ private decodeMulaw;
123
+ }
124
+ /**
125
+ * Factory: create an AudioPipeline pre-configured for the given telephony provider.
126
+ *
127
+ * @internal — used by TelephonyProvider implementations
128
+ */
129
+ declare function createAudioPipeline(provider: TelephonyProviderName): AudioPipeline;
130
+
131
+ /**
132
+ * @voice-kit/core — PCM audio resampler
133
+ *
134
+ * Resamples raw PCM audio between sample rates using fluent-ffmpeg.
135
+ * 100% internal — never exported from the public API.
136
+ * Used by AudioPipeline to convert provider-native rates to STT-required rates.
137
+ */
138
+
139
+ /**
140
+ * Resample a PCM Buffer from one sample rate to another.
141
+ * Both input and output are signed 16-bit little-endian PCM, mono.
142
+ *
143
+ * Common conversions:
144
+ * 8kHz → 16kHz (Twilio/Exotel µ-law decoded → Deepgram input)
145
+ * 48kHz → 16kHz (LiveKit Opus decoded → Deepgram input)
146
+ * 24kHz → 8kHz (ElevenLabs output → Twilio send)
147
+ *
148
+ * @param buf Raw PCM bytes (s16le mono)
149
+ * @param fromHz Source sample rate in Hz
150
+ * @param toHz Target sample rate in Hz
151
+ * @returns Resampled PCM bytes (s16le mono)
152
+ *
153
+ * @internal
154
+ */
155
+ declare function resample(buf: Buffer, fromHz: number, toHz: number): Promise<Buffer>;
156
+ /**
157
+ * Create a streaming resampler Transform stream.
158
+ * More efficient than buffering for large audio chunks.
159
+ *
160
+ * @param fromHz Source sample rate in Hz
161
+ * @param toHz Target sample rate in Hz
162
+ * @returns Node.js Transform stream: PCM in, resampled PCM out
163
+ *
164
+ * @internal
165
+ */
166
+ declare function createResamplerStream(fromHz: number, toHz: number): PassThrough;
167
+ /**
168
+ * Async generator that resamples chunks from an audio iterable on the fly.
169
+ * Used by AudioPipeline for realtime streaming paths.
170
+ *
171
+ * @param audio Async iterable of raw PCM buffers at fromHz
172
+ * @param fromHz Source sample rate
173
+ * @param toHz Target sample rate
174
+ *
175
+ * @internal
176
+ */
177
+ declare function resampleStream(audio: AsyncIterable<Buffer>, fromHz: number, toHz: number): AsyncIterable<Buffer>;
178
+
179
+ /**
180
+ * @voice-kit/core — Voice Activity Detection engine
181
+ *
182
+ * Wraps @ricky0123/vad-web and emits strongly-typed VoiceFrame events.
183
+ * Developers subscribe to VoiceFrame events — they never touch the raw VAD API.
184
+ *
185
+ * @example
186
+ * ```ts
187
+ * const vad = createVAD({ threshold: 0.6 })
188
+ * vad.on('frame', (frame) => {
189
+ * if (frame.type === 'speech_start') startRecording()
190
+ * if (frame.type === 'speech_end') stopRecording()
191
+ * })
192
+ * await vad.processStream(audioStream)
193
+ * ```
194
+ */
195
+
196
+ type VADEventMap = {
197
+ frame: [VoiceFrame];
198
+ error: [AudioTransportError];
199
+ };
200
+ /**
201
+ * Internal VAD engine. Processes a 16kHz PCM stream and emits VoiceFrame events.
202
+ * Automatically debounces rapid speech_start/speech_end transitions.
203
+ *
204
+ * Input: 16kHz, 16-bit little-endian PCM, mono.
205
+ * Output: VoiceFrame events on the emitter.
206
+ */
207
+ declare class VADEngine extends EventEmitter<VADEventMap> {
208
+ private readonly config;
209
+ private isSpeaking;
210
+ private positiveFrameCount;
211
+ private negativeFrameCount;
212
+ private debounceTimer;
213
+ private frameBuffer;
214
+ private vadModel;
215
+ constructor(config?: VADConfig);
216
+ /**
217
+ * Process an async stream of PCM audio frames.
218
+ * Automatically frames the input into 30ms chunks for VAD processing.
219
+ *
220
+ * @param audio Async iterable of PCM buffers (16kHz, s16le, mono)
221
+ */
222
+ processStream(audio: AsyncIterable<Buffer>): Promise<void>;
223
+ /**
224
+ * Process a single 30ms PCM frame through the VAD model.
225
+ *
226
+ * @internal
227
+ */
228
+ private processFrame;
229
+ /**
230
+ * Run Silero VAD model inference on a single frame.
231
+ * Returns confidence score 0–1.
232
+ *
233
+ * @internal
234
+ */
235
+ private runVADInference;
236
+ private emitFrame;
237
+ private scheduleDebounce;
238
+ private clearDebounce;
239
+ /**
240
+ * Load the Silero VAD model if not already loaded.
241
+ * @internal
242
+ */
243
+ private ensureModelLoaded;
244
+ /** Clean up resources. Call when the call ends. */
245
+ destroy(): void;
246
+ }
247
+ /**
248
+ * Create a configured VAD engine instance.
249
+ * Input must be 16kHz, 16-bit LE, mono PCM (handled automatically by AudioPipeline).
250
+ *
251
+ * @example
252
+ * ```ts
253
+ * const vad = createVAD({ threshold: 0.7, debounceMs: 200 })
254
+ * vad.on('frame', (frame) => handleFrame(frame))
255
+ * await vad.processStream(audioStream)
256
+ * ```
257
+ */
258
+ declare function createVAD(config?: VADConfig): VADEngine;
259
+
260
+ export { AudioPipeline, type TelephonyProviderName, VADEngine, base64MulawToPcm, createAudioPipeline, createResamplerStream, createVAD, linearToMulaw, mulawBufferToPcm, mulawToLinear, pcmBufferToMulaw, pcmToBase64Mulaw, resample, resampleStream };
@@ -0,0 +1,260 @@
1
+ import { V as VADConfig, g as VoiceFrame } from './index-D3KfRXMP.js';
2
+ import { PassThrough } from 'node:stream';
3
+ import { EventEmitter } from 'node:events';
4
+ import { A as AudioTransportError } from './telephony.errors-C0-nScrF.js';
5
+ import 'ai';
6
+
7
+ /**
8
+ * @voice-kit/core — G.711 µ-law codec
9
+ *
10
+ * Pure TypeScript implementation of G.711 µ-law (mu-law) encode/decode.
11
+ * No external codec library needed for µ-law. This is 100% internal —
12
+ * never exported from the public API.
13
+ *
14
+ * Used by AudioPipeline to convert Twilio/Exotel µ-law audio ↔ PCM.
15
+ */
16
+ /**
17
+ * Convert a single µ-law encoded byte (0–255) to a 16-bit linear PCM sample.
18
+ * Algorithm: ITU-T G.711 Section 3.
19
+ *
20
+ * @internal
21
+ */
22
+ declare function mulawToLinear(sample: number): number;
23
+ /**
24
+ * Convert a 16-bit linear PCM sample to a µ-law encoded byte.
25
+ * Algorithm: ITU-T G.711 Section 3.
26
+ *
27
+ * @internal
28
+ */
29
+ declare function linearToMulaw(sample: number): number;
30
+ /**
31
+ * Convert a Buffer of µ-law encoded bytes to 16-bit little-endian PCM.
32
+ * Each µ-law byte expands to 2 PCM bytes (16-bit LE signed).
33
+ *
34
+ * Input: N bytes (µ-law, 8kHz mono as sent by Twilio/Exotel)
35
+ * Output: N*2 bytes (PCM 16-bit LE, same sample rate)
36
+ *
37
+ * @internal
38
+ */
39
+ declare function mulawBufferToPcm(buf: Buffer): Buffer;
40
+ /**
41
+ * Convert a Buffer of 16-bit little-endian PCM to µ-law bytes.
42
+ * Each pair of PCM bytes compresses to 1 µ-law byte.
43
+ *
44
+ * Input: N bytes (PCM 16-bit LE)
45
+ * Output: N/2 bytes (µ-law)
46
+ *
47
+ * @internal
48
+ */
49
+ declare function pcmBufferToMulaw(buf: Buffer): Buffer;
50
+ /**
51
+ * Convert a base64-encoded µ-law string (as sent by Twilio Media Streams)
52
+ * directly to PCM Buffer. Convenience wrapper used in TwilioProvider.
53
+ *
54
+ * @internal
55
+ */
56
+ declare function base64MulawToPcm(base64: string): Buffer;
57
+ /**
58
+ * Convert a PCM Buffer to a base64-encoded µ-law string (for sending
59
+ * back to Twilio Media Streams).
60
+ *
61
+ * @internal
62
+ */
63
+ declare function pcmToBase64Mulaw(pcm: Buffer): string;
64
+
65
+ /**
66
+ * @voice-kit/core — AudioPipeline
67
+ *
68
+ * Automatically selects codec, sample rate, and VAD config based on the
69
+ * telephony provider. Developers never configure codecs — the pipeline
70
+ * handles all conversions transparently.
71
+ *
72
+ * Provider audio formats:
73
+ * Twilio / Exotel → 8kHz µ-law → decode → 8kHz PCM → upsample → 16kHz PCM (for STT)
74
+ * Plivo / Telnyx → 8kHz µ-law (same as Twilio)
75
+ * LiveKit → 48kHz Opus → decode → 48kHz PCM → downsample → 16kHz PCM (for STT)
76
+ * SIP (generic) → 8kHz G.711 (same as Twilio)
77
+ *
78
+ * TTS output path (reverse):
79
+ * STT/LLM → TTS PCM (provider-native rate) → resample → telephony-native rate → encode
80
+ */
81
+
82
+ /** Telephony providers handled by the pipeline. */
83
+ type TelephonyProviderName = 'twilio' | 'exotel' | 'plivo' | 'telnyx' | 'livekit' | 'sip';
84
+ /**
85
+ * AudioPipeline: auto-wires codec → resample → VAD for a specific telephony provider.
86
+ *
87
+ * Developers never call this directly — it is instantiated by TelephonyProvider
88
+ * implementations and consumed by VoiceAgent.
89
+ *
90
+ * @internal
91
+ */
92
+ declare class AudioPipeline {
93
+ private readonly profile;
94
+ readonly provider: TelephonyProviderName;
95
+ constructor(provider: TelephonyProviderName);
96
+ /**
97
+ * Transform incoming telephony audio to 16kHz PCM for STT.
98
+ * Handles µ-law decode + resampling automatically.
99
+ *
100
+ * @param raw Raw audio bytes as received from telephony provider
101
+ * @returns Async iterable of 16kHz PCM buffers for STT
102
+ *
103
+ * @internal
104
+ */
105
+ inboundForSTT(raw: AsyncIterable<Buffer>): AsyncIterable<Buffer>;
106
+ /**
107
+ * Transform TTS output PCM to telephony-native format for sending to caller.
108
+ * Handles resampling + µ-law encode automatically.
109
+ *
110
+ * @param ttsAudio Raw PCM from TTS provider (at TTS provider's native rate)
111
+ * @param ttsSampleRate Native sample rate of the TTS provider
112
+ * @returns Async iterable of audio bytes ready to send to telephony provider
113
+ *
114
+ * @internal
115
+ */
116
+ outboundFromTTS(ttsAudio: AsyncIterable<Buffer>, ttsSampleRate: number): AsyncIterable<Buffer>;
117
+ /** Get the VAD config tuned for this provider's audio quality. @internal */
118
+ get vadConfig(): Required<VADConfig>;
119
+ /** Sample rate that STT expects (post-pipeline). @internal */
120
+ get sttSampleRate(): number;
121
+ /** Async generator: decode µ-law stream to PCM. @internal */
122
+ private decodeMulaw;
123
+ }
124
+ /**
125
+ * Factory: create an AudioPipeline pre-configured for the given telephony provider.
126
+ *
127
+ * @internal — used by TelephonyProvider implementations
128
+ */
129
+ declare function createAudioPipeline(provider: TelephonyProviderName): AudioPipeline;
130
+
131
+ /**
132
+ * @voice-kit/core — PCM audio resampler
133
+ *
134
+ * Resamples raw PCM audio between sample rates using fluent-ffmpeg.
135
+ * 100% internal — never exported from the public API.
136
+ * Used by AudioPipeline to convert provider-native rates to STT-required rates.
137
+ */
138
+
139
+ /**
140
+ * Resample a PCM Buffer from one sample rate to another.
141
+ * Both input and output are signed 16-bit little-endian PCM, mono.
142
+ *
143
+ * Common conversions:
144
+ * 8kHz → 16kHz (Twilio/Exotel µ-law decoded → Deepgram input)
145
+ * 48kHz → 16kHz (LiveKit Opus decoded → Deepgram input)
146
+ * 24kHz → 8kHz (ElevenLabs output → Twilio send)
147
+ *
148
+ * @param buf Raw PCM bytes (s16le mono)
149
+ * @param fromHz Source sample rate in Hz
150
+ * @param toHz Target sample rate in Hz
151
+ * @returns Resampled PCM bytes (s16le mono)
152
+ *
153
+ * @internal
154
+ */
155
+ declare function resample(buf: Buffer, fromHz: number, toHz: number): Promise<Buffer>;
156
+ /**
157
+ * Create a streaming resampler Transform stream.
158
+ * More efficient than buffering for large audio chunks.
159
+ *
160
+ * @param fromHz Source sample rate in Hz
161
+ * @param toHz Target sample rate in Hz
162
+ * @returns Node.js Transform stream: PCM in, resampled PCM out
163
+ *
164
+ * @internal
165
+ */
166
+ declare function createResamplerStream(fromHz: number, toHz: number): PassThrough;
167
+ /**
168
+ * Async generator that resamples chunks from an audio iterable on the fly.
169
+ * Used by AudioPipeline for realtime streaming paths.
170
+ *
171
+ * @param audio Async iterable of raw PCM buffers at fromHz
172
+ * @param fromHz Source sample rate
173
+ * @param toHz Target sample rate
174
+ *
175
+ * @internal
176
+ */
177
+ declare function resampleStream(audio: AsyncIterable<Buffer>, fromHz: number, toHz: number): AsyncIterable<Buffer>;
178
+
179
+ /**
180
+ * @voice-kit/core — Voice Activity Detection engine
181
+ *
182
+ * Wraps @ricky0123/vad-web and emits strongly-typed VoiceFrame events.
183
+ * Developers subscribe to VoiceFrame events — they never touch the raw VAD API.
184
+ *
185
+ * @example
186
+ * ```ts
187
+ * const vad = createVAD({ threshold: 0.6 })
188
+ * vad.on('frame', (frame) => {
189
+ * if (frame.type === 'speech_start') startRecording()
190
+ * if (frame.type === 'speech_end') stopRecording()
191
+ * })
192
+ * await vad.processStream(audioStream)
193
+ * ```
194
+ */
195
+
196
+ type VADEventMap = {
197
+ frame: [VoiceFrame];
198
+ error: [AudioTransportError];
199
+ };
200
+ /**
201
+ * Internal VAD engine. Processes a 16kHz PCM stream and emits VoiceFrame events.
202
+ * Automatically debounces rapid speech_start/speech_end transitions.
203
+ *
204
+ * Input: 16kHz, 16-bit little-endian PCM, mono.
205
+ * Output: VoiceFrame events on the emitter.
206
+ */
207
+ declare class VADEngine extends EventEmitter<VADEventMap> {
208
+ private readonly config;
209
+ private isSpeaking;
210
+ private positiveFrameCount;
211
+ private negativeFrameCount;
212
+ private debounceTimer;
213
+ private frameBuffer;
214
+ private vadModel;
215
+ constructor(config?: VADConfig);
216
+ /**
217
+ * Process an async stream of PCM audio frames.
218
+ * Automatically frames the input into 30ms chunks for VAD processing.
219
+ *
220
+ * @param audio Async iterable of PCM buffers (16kHz, s16le, mono)
221
+ */
222
+ processStream(audio: AsyncIterable<Buffer>): Promise<void>;
223
+ /**
224
+ * Process a single 30ms PCM frame through the VAD model.
225
+ *
226
+ * @internal
227
+ */
228
+ private processFrame;
229
+ /**
230
+ * Run Silero VAD model inference on a single frame.
231
+ * Returns confidence score 0–1.
232
+ *
233
+ * @internal
234
+ */
235
+ private runVADInference;
236
+ private emitFrame;
237
+ private scheduleDebounce;
238
+ private clearDebounce;
239
+ /**
240
+ * Load the Silero VAD model if not already loaded.
241
+ * @internal
242
+ */
243
+ private ensureModelLoaded;
244
+ /** Clean up resources. Call when the call ends. */
245
+ destroy(): void;
246
+ }
247
+ /**
248
+ * Create a configured VAD engine instance.
249
+ * Input must be 16kHz, 16-bit LE, mono PCM (handled automatically by AudioPipeline).
250
+ *
251
+ * @example
252
+ * ```ts
253
+ * const vad = createVAD({ threshold: 0.7, debounceMs: 200 })
254
+ * vad.on('frame', (frame) => handleFrame(frame))
255
+ * await vad.processStream(audioStream)
256
+ * ```
257
+ */
258
+ declare function createVAD(config?: VADConfig): VADEngine;
259
+
260
+ export { AudioPipeline, type TelephonyProviderName, VADEngine, base64MulawToPcm, createAudioPipeline, createResamplerStream, createVAD, linearToMulaw, mulawBufferToPcm, mulawToLinear, pcmBufferToMulaw, pcmToBase64Mulaw, resample, resampleStream };