@voice-kit/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/errors.d.cts CHANGED
@@ -1,55 +1,8 @@
1
- import { ErrorSeverity } from './index.cjs';
1
+ import { V as VoiceKitError } from './telephony.errors-BQYr6-vl.cjs';
2
+ export { A as AudioTransportError, C as CallConnectionError, a as CallNotFoundError, T as TelephonyError } from './telephony.errors-BQYr6-vl.cjs';
3
+ import { E as ErrorSeverity } from './index-D3KfRXMP.cjs';
2
4
  import 'ai';
3
5
 
4
- /**
5
- * @voice-kit/core — Typed error hierarchy
6
- *
7
- * All VoiceKit errors extend VoiceKitError. Never throw raw Error.
8
- * Every error carries: code, message, provider, callId, retryable, severity.
9
- */
10
-
11
- /**
12
- * Base class for all VoiceKit errors. Provides structured context for
13
- * logging, alerting, and programmatic error handling.
14
- *
15
- * @example
16
- * ```ts
17
- * try {
18
- * await stt.transcribeBatch(audio)
19
- * } catch (err) {
20
- * if (err instanceof STTError) {
21
- * console.error(err.code, err.provider, err.retryable)
22
- * }
23
- * }
24
- * ```
25
- */
26
- declare class VoiceKitError extends Error {
27
- readonly code: string;
28
- readonly callId?: string;
29
- readonly provider?: string;
30
- readonly retryable: boolean;
31
- readonly severity: ErrorSeverity;
32
- readonly cause?: unknown;
33
- constructor(params: {
34
- code: string;
35
- message: string;
36
- callId?: string;
37
- provider?: string;
38
- retryable?: boolean;
39
- severity?: ErrorSeverity;
40
- cause?: unknown;
41
- });
42
- toJSON(): {
43
- name: string;
44
- code: string;
45
- message: string;
46
- callId: string | undefined;
47
- provider: string | undefined;
48
- retryable: boolean;
49
- severity: ErrorSeverity;
50
- };
51
- }
52
-
53
6
  /**
54
7
  * Errors from agent orchestration (turn engine, handoff, injection).
55
8
  */
@@ -129,34 +82,6 @@ declare class STTLanguageNotSupportedError extends STTError {
129
82
  constructor(provider: string, language: string);
130
83
  }
131
84
 
132
- /**
133
- * Errors from telephony providers.
134
- */
135
- declare class TelephonyError extends VoiceKitError {
136
- readonly to?: string;
137
- readonly from?: string;
138
- constructor(params: {
139
- code: string;
140
- message: string;
141
- callId?: string;
142
- provider?: string;
143
- retryable?: boolean;
144
- severity?: ErrorSeverity;
145
- cause?: unknown;
146
- to?: string;
147
- from?: string;
148
- });
149
- }
150
- declare class CallConnectionError extends TelephonyError {
151
- constructor(provider: string, to: string, cause?: unknown);
152
- }
153
- declare class CallNotFoundError extends TelephonyError {
154
- constructor(callId: string, provider: string);
155
- }
156
- declare class AudioTransportError extends TelephonyError {
157
- constructor(provider: string, cause?: unknown, callId?: string);
158
- }
159
-
160
85
  /**
161
86
  * Errors from text-to-speech providers.
162
87
  */
@@ -172,4 +97,4 @@ declare class TTSVoiceNotFoundError extends TTSError {
172
97
  constructor(provider: string, voiceId: string);
173
98
  }
174
99
 
175
- export { AgentError, AgentHandoffError, AudioTransportError, CallConnectionError, CallNotFoundError, CallingHoursError, ComplianceError, ConsentMissingError, DNCBlockedError, InngestError, STTConnectionError, STTError, STTLanguageNotSupportedError, STTStreamError, TTSConnectionError, TTSError, TTSStreamError, TTSVoiceNotFoundError, TelephonyError, TurnTransitionError, VoiceKitError };
100
+ export { AgentError, AgentHandoffError, CallingHoursError, ComplianceError, ConsentMissingError, DNCBlockedError, InngestError, STTConnectionError, STTError, STTLanguageNotSupportedError, STTStreamError, TTSConnectionError, TTSError, TTSStreamError, TTSVoiceNotFoundError, TurnTransitionError, VoiceKitError };
package/dist/errors.d.ts CHANGED
@@ -1,55 +1,8 @@
1
- import { ErrorSeverity } from './index.js';
1
+ import { V as VoiceKitError } from './telephony.errors-C0-nScrF.js';
2
+ export { A as AudioTransportError, C as CallConnectionError, a as CallNotFoundError, T as TelephonyError } from './telephony.errors-C0-nScrF.js';
3
+ import { E as ErrorSeverity } from './index-D3KfRXMP.js';
2
4
  import 'ai';
3
5
 
4
- /**
5
- * @voice-kit/core — Typed error hierarchy
6
- *
7
- * All VoiceKit errors extend VoiceKitError. Never throw raw Error.
8
- * Every error carries: code, message, provider, callId, retryable, severity.
9
- */
10
-
11
- /**
12
- * Base class for all VoiceKit errors. Provides structured context for
13
- * logging, alerting, and programmatic error handling.
14
- *
15
- * @example
16
- * ```ts
17
- * try {
18
- * await stt.transcribeBatch(audio)
19
- * } catch (err) {
20
- * if (err instanceof STTError) {
21
- * console.error(err.code, err.provider, err.retryable)
22
- * }
23
- * }
24
- * ```
25
- */
26
- declare class VoiceKitError extends Error {
27
- readonly code: string;
28
- readonly callId?: string;
29
- readonly provider?: string;
30
- readonly retryable: boolean;
31
- readonly severity: ErrorSeverity;
32
- readonly cause?: unknown;
33
- constructor(params: {
34
- code: string;
35
- message: string;
36
- callId?: string;
37
- provider?: string;
38
- retryable?: boolean;
39
- severity?: ErrorSeverity;
40
- cause?: unknown;
41
- });
42
- toJSON(): {
43
- name: string;
44
- code: string;
45
- message: string;
46
- callId: string | undefined;
47
- provider: string | undefined;
48
- retryable: boolean;
49
- severity: ErrorSeverity;
50
- };
51
- }
52
-
53
6
  /**
54
7
  * Errors from agent orchestration (turn engine, handoff, injection).
55
8
  */
@@ -129,34 +82,6 @@ declare class STTLanguageNotSupportedError extends STTError {
129
82
  constructor(provider: string, language: string);
130
83
  }
131
84
 
132
- /**
133
- * Errors from telephony providers.
134
- */
135
- declare class TelephonyError extends VoiceKitError {
136
- readonly to?: string;
137
- readonly from?: string;
138
- constructor(params: {
139
- code: string;
140
- message: string;
141
- callId?: string;
142
- provider?: string;
143
- retryable?: boolean;
144
- severity?: ErrorSeverity;
145
- cause?: unknown;
146
- to?: string;
147
- from?: string;
148
- });
149
- }
150
- declare class CallConnectionError extends TelephonyError {
151
- constructor(provider: string, to: string, cause?: unknown);
152
- }
153
- declare class CallNotFoundError extends TelephonyError {
154
- constructor(callId: string, provider: string);
155
- }
156
- declare class AudioTransportError extends TelephonyError {
157
- constructor(provider: string, cause?: unknown, callId?: string);
158
- }
159
-
160
85
  /**
161
86
  * Errors from text-to-speech providers.
162
87
  */
@@ -172,4 +97,4 @@ declare class TTSVoiceNotFoundError extends TTSError {
172
97
  constructor(provider: string, voiceId: string);
173
98
  }
174
99
 
175
- export { AgentError, AgentHandoffError, AudioTransportError, CallConnectionError, CallNotFoundError, CallingHoursError, ComplianceError, ConsentMissingError, DNCBlockedError, InngestError, STTConnectionError, STTError, STTLanguageNotSupportedError, STTStreamError, TTSConnectionError, TTSError, TTSStreamError, TTSVoiceNotFoundError, TelephonyError, TurnTransitionError, VoiceKitError };
100
+ export { AgentError, AgentHandoffError, CallingHoursError, ComplianceError, ConsentMissingError, DNCBlockedError, InngestError, STTConnectionError, STTError, STTLanguageNotSupportedError, STTStreamError, TTSConnectionError, TTSError, TTSStreamError, TTSVoiceNotFoundError, TurnTransitionError, VoiceKitError };
@@ -0,0 +1,319 @@
1
+ import * as ai from 'ai';
2
+
3
+ /**
4
+ * @voice-kit/core — Type definitions
5
+ */
6
+ /**
7
+ * A single word with timing information from an STT provider.
8
+ */
9
+ interface WordTimestamp {
10
+ word: string;
11
+ startMs: number;
12
+ endMs: number;
13
+ confidence: number;
14
+ }
15
+ /**
16
+ * The result of a speech-to-text transcription, either streaming partial
17
+ * or final. `isFinal` distinguishes the two.
18
+ *
19
+ * @example
20
+ * ```ts
21
+ * for await (const result of stt.transcribeStream(audioIterable)) {
22
+ * if (result.isFinal) console.log('Final:', result.transcript)
23
+ * }
24
+ * ```
25
+ */
26
+ interface STTResult {
27
+ /** The transcribed text. May be a partial result if `isFinal` is false. */
28
+ transcript: string;
29
+ /** Whether this is the final result for this utterance. */
30
+ isFinal: boolean;
31
+ /** Confidence score from the provider, 0–1. */
32
+ confidence: number;
33
+ /** BCP-47 language tag, e.g. 'hi-IN', 'en-IN'. */
34
+ language: string;
35
+ /** True if a mid-sentence language switch was detected (e.g. Hinglish). */
36
+ languageSwitchDetected: boolean;
37
+ /** Word-level timestamps if supported by the provider. */
38
+ words?: WordTimestamp[];
39
+ /** Time from audio start to this result being emitted, in ms. */
40
+ latencyMs: number;
41
+ }
42
+ /**
43
+ * Configuration for STT provider instantiation.
44
+ */
45
+ interface STTConfig {
46
+ /** BCP-47 language code. Defaults to 'en-IN'. */
47
+ language?: string;
48
+ /** Additional languages to detect for code-switching. */
49
+ alternateLanguages?: string[];
50
+ /** API key. Falls back to provider-specific env var if omitted. */
51
+ apiKey?: string;
52
+ /** Custom model name. Provider-specific. */
53
+ model?: string;
54
+ /** Enable word-level timestamps. Default false. */
55
+ wordTimestamps?: boolean;
56
+ /** Enable interim / partial results. Default true. */
57
+ interimResults?: boolean;
58
+ /** Deepgram-specific: smart formatting. Default true. */
59
+ smartFormat?: boolean;
60
+ /** Sarvam-specific: region hint. */
61
+ region?: string;
62
+ }
63
+ /**
64
+ * The STTProvider interface. Obtained via `createSTT()` — never instantiate
65
+ * provider classes directly.
66
+ *
67
+ * @example
68
+ * ```ts
69
+ * const stt = createSTT('deepgram', { language: 'en-IN' })
70
+ * for await (const result of stt.transcribeStream(audioStream)) {
71
+ * console.log(result.transcript)
72
+ * }
73
+ * ```
74
+ */
75
+ interface STTProvider {
76
+ /** Stream audio in, stream STTResults out. Primary realtime path. */
77
+ transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
78
+ /** Batch transcription for recordings. Returns single final result. */
79
+ transcribeBatch(audio: Buffer): Promise<STTResult>;
80
+ /** Whether this provider supports streaming (all except Whisper). */
81
+ readonly supportsStreaming: boolean;
82
+ /** BCP-47 codes this provider can handle. */
83
+ readonly supportedLanguages: string[];
84
+ /** Human-readable provider name for logging. */
85
+ readonly name: string;
86
+ }
87
+ /**
88
+ * Configuration for TTS provider instantiation.
89
+ */
90
+ interface TTSConfig {
91
+ /** Voice identifier. Provider-specific. */
92
+ voiceId?: string;
93
+ /** Output sample rate. Defaults to provider native rate. */
94
+ sampleRate?: number;
95
+ /** Speaking speed multiplier. Default 1.0. */
96
+ speed?: number;
97
+ /** Pitch adjustment. Provider-specific. */
98
+ pitch?: number;
99
+ /** API key. Falls back to provider-specific env var if omitted. */
100
+ apiKey?: string;
101
+ /** ElevenLabs-specific: model ID. */
102
+ modelId?: string;
103
+ /** Cartesia-specific: emotion control. */
104
+ emotion?: string;
105
+ /** Sarvam-specific: target language for Indic voices. */
106
+ targetLanguage?: string;
107
+ }
108
+ /**
109
+ * The TTSProvider interface. Obtained via `createTTS()` — never instantiate
110
+ * provider classes directly.
111
+ *
112
+ * @example
113
+ * ```ts
114
+ * const tts = createTTS('elevenlabs', { voiceId: 'your-voice-id' })
115
+ * for await (const chunk of tts.synthesizeStream('Hello, how can I help?')) {
116
+ * socket.write(chunk)
117
+ * }
118
+ * ```
119
+ */
120
+ interface TTSProvider {
121
+ /** Stream synthesis — preferred for realtime. First chunk < 300ms. */
122
+ synthesizeStream(text: string, config?: TTSConfig): AsyncIterable<Buffer>;
123
+ /** Synthesize full audio — for pre-recorded prompts or caching. */
124
+ synthesizeFull(text: string, config?: TTSConfig): Promise<Buffer>;
125
+ /** Native output sample rate of this provider in Hz. */
126
+ readonly outputSampleRate: number;
127
+ /** Native output format before any resampling. */
128
+ readonly outputFormat: 'pcm' | 'mulaw' | 'opus' | 'mp3';
129
+ /** Human-readable provider name for logging. */
130
+ readonly name: string;
131
+ }
132
+ /**
133
+ * A frame of audio classified by the VAD engine.
134
+ * Developers subscribe to these events — never to raw VAD API.
135
+ */
136
+ interface VoiceFrame {
137
+ /** Event type. */
138
+ type: 'speech_start' | 'speech_end' | 'speech';
139
+ /** VAD confidence 0–1. */
140
+ confidence: number;
141
+ /** Raw PCM audio bytes for this frame. */
142
+ audioBuffer: Buffer;
143
+ /** Duration of audio in this frame, in ms. */
144
+ durationMs: number;
145
+ }
146
+ /**
147
+ * Configuration for the VAD engine.
148
+ */
149
+ interface VADConfig {
150
+ /** Activation threshold 0–1. Default 0.6. */
151
+ threshold?: number;
152
+ /** Consecutive positive frames before speech_start. Default 3. */
153
+ positiveSpeechFrames?: number;
154
+ /** Consecutive negative frames before speech_end. Default 5. */
155
+ negativeSpeechFrames?: number;
156
+ /** Debounce window in ms to prevent rapid flip-flop. Default 150. */
157
+ debounceMs?: number;
158
+ /** Input sample rate. Auto-set by AudioPipeline — do not override. */
159
+ sampleRate?: number;
160
+ }
161
+ /**
162
+ * Configuration for call memory (LRU-backed sliding window of turns).
163
+ */
164
+ interface CallMemoryConfig {
165
+ /** Maximum number of turns to retain. Default 20. */
166
+ maxTurns?: number;
167
+ /** Maximum bytes of conversation history to retain. Default 512KB. */
168
+ maxBytes?: number;
169
+ /** TTL for the entire call memory entry in ms. Default 30 minutes. */
170
+ ttlMs?: number;
171
+ }
172
+ /**
173
+ * In-process LRU-backed call memory. Obtained via `createCallMemory()`.
174
+ *
175
+ * @example
176
+ * ```ts
177
+ * const memory = createCallMemory({ maxTurns: 20 })
178
+ * memory.addTurn(callId, { role: 'user', content: 'Hello' })
179
+ * const history = memory.getTurns(callId)
180
+ * ```
181
+ */
182
+ interface CallMemory {
183
+ addTurn(callId: string, message: ai.ModelMessage): void;
184
+ getTurns(callId: string): ai.ModelMessage[];
185
+ clearCall(callId: string): void;
186
+ getTokenEstimate(callId: string): number;
187
+ /** Truncate oldest turns to stay within budget. */
188
+ trimToTokenBudget(callId: string, maxTokens: number): void;
189
+ }
190
+ /**
191
+ * Type of call for TRAI DND classification.
192
+ */
193
+ type CallPurpose = 'TRANSACTIONAL' | 'PROMOTIONAL' | 'SERVICE' | 'EMERGENCY';
194
+ /**
195
+ * TRAI DNC check parameters.
196
+ */
197
+ interface DNCCheckParams {
198
+ /** E.164 format phone number, validated via libphonenumber-js. */
199
+ to: string;
200
+ /** Purpose category for TRAI classification. */
201
+ purpose: CallPurpose;
202
+ /** Scheduled call time. Defaults to now. */
203
+ scheduledAt?: Date;
204
+ }
205
+ /**
206
+ * Result of a TRAI DNC check.
207
+ */
208
+ interface DNCCheckResult {
209
+ /** Whether the call is permitted. */
210
+ allowed: boolean;
211
+ /** Human-readable reason if not allowed. */
212
+ reason?: string;
213
+ /** When this result was fetched (from LRU cache). */
214
+ cachedAt?: Date;
215
+ /** Whether result came from local LRU cache. */
216
+ fromCache: boolean;
217
+ }
218
+ /**
219
+ * Consent record stored for TRAI compliance.
220
+ */
221
+ interface ConsentRecord {
222
+ phoneNumber: string;
223
+ consentedAt: Date;
224
+ /** Channel through which consent was obtained. */
225
+ channel: 'voice' | 'sms' | 'web' | 'ivr';
226
+ /** Call purpose consent was given for. */
227
+ purpose: CallPurpose;
228
+ /** Optional reference ID (e.g. recording URL). */
229
+ referenceId?: string;
230
+ }
231
+ /**
232
+ * TRAI compliance configuration.
233
+ */
234
+ interface TRAIConfig {
235
+ /** Disable TRAI checks entirely. Default false. */
236
+ disabled?: boolean;
237
+ /** Calling timezone override. Default 'Asia/Kolkata'. */
238
+ timezone?: string;
239
+ /** Override calling hours start (24h). Default 9. */
240
+ callingHoursStart?: number;
241
+ /** Override calling hours end (24h). Default 21. */
242
+ callingHoursEnd?: number;
243
+ /** Custom DNC API endpoint. Default: mock endpoint (must be replaced in production). */
244
+ dncApiEndpoint?: string;
245
+ }
246
+ /**
247
+ * Aggregated metrics for a completed or in-progress call.
248
+ */
249
+ interface CallMetricsSummary {
250
+ callId: string;
251
+ sttFirstByteMs: number[];
252
+ ttsFirstByteMs: number[];
253
+ llmFirstTokenMs: number[];
254
+ turnLatencyMs: number[];
255
+ interruptionCount: number;
256
+ interruptionPositions: number[];
257
+ tokenCost: {
258
+ model: string;
259
+ inputTokens: number;
260
+ outputTokens: number;
261
+ estimatedUsdCost: number;
262
+ }[];
263
+ avgTurnLatencyMs: number;
264
+ p95TurnLatencyMs: number;
265
+ }
266
+ /**
267
+ * Error severity level.
268
+ */
269
+ type ErrorSeverity = 'low' | 'medium' | 'high' | 'critical';
270
+ /**
271
+ * Base error context shared by all VoiceKit errors.
272
+ */
273
+ interface VoiceKitErrorContext {
274
+ /** Error code for programmatic handling. */
275
+ code: string;
276
+ /** Associated call ID if applicable. */
277
+ callId?: string;
278
+ /** The provider that threw (e.g. 'deepgram', 'elevenlabs'). */
279
+ provider?: string;
280
+ /** Whether this error is safe to retry. */
281
+ retryable: boolean;
282
+ /** Severity for alerting/logging. */
283
+ severity: ErrorSeverity;
284
+ /** Original upstream error if wrapping. */
285
+ cause?: unknown;
286
+ }
287
+ /**
288
+ * Create an STT provider instance. This is the ONLY public API for STT.
289
+ * Never instantiate provider classes directly.
290
+ *
291
+ * @example
292
+ * ```ts
293
+ * const stt = createSTT('deepgram', { language: 'hi-IN' })
294
+ * const stt2 = createSTT('sarvam', { language: 'ta-IN' })
295
+ * ```
296
+ */
297
+ declare function createSTT(provider: 'deepgram' | 'whisper' | 'assemblyai' | 'sarvam', config?: STTConfig): STTProvider;
298
+ /**
299
+ * Create a TTS provider instance. This is the ONLY public API for TTS.
300
+ * Never instantiate provider classes directly.
301
+ *
302
+ * @example
303
+ * ```ts
304
+ * const tts = createTTS('elevenlabs', { voiceId: 'your-voice-id' })
305
+ * const tts2 = createTTS('sarvam', { targetLanguage: 'hi-IN' })
306
+ * ```
307
+ */
308
+ declare function createTTS(provider: 'elevenlabs' | 'cartesia' | 'sarvam', config?: TTSConfig): TTSProvider;
309
+ /**
310
+ * Create an LRU-backed call memory instance.
311
+ *
312
+ * @example
313
+ * ```ts
314
+ * const memory = createCallMemory({ maxTurns: 20, maxBytes: 512_000 })
315
+ * ```
316
+ */
317
+ declare function createCallMemory(config?: CallMemoryConfig): CallMemory;
318
+
319
+ export { type CallMemoryConfig as C, type DNCCheckParams as D, type ErrorSeverity as E, type STTProvider as S, type TRAIConfig as T, type VADConfig as V, type WordTimestamp as W, type CallMemory as a, type DNCCheckResult as b, type ConsentRecord as c, type CallMetricsSummary as d, type TTSProvider as e, type TTSConfig as f, type VoiceFrame as g, type STTConfig as h, type STTResult as i, type CallPurpose as j, type VoiceKitErrorContext as k, createCallMemory as l, createSTT as m, createTTS as n };