@voice-kit/core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/index.cjs +2137 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +1466 -4
  4. package/dist/index.d.ts +1466 -4
  5. package/dist/index.js +2102 -1
  6. package/dist/index.js.map +1 -1
  7. package/package.json +1 -31
  8. package/dist/audio.cjs +0 -533
  9. package/dist/audio.cjs.map +0 -1
  10. package/dist/audio.d.cts +0 -260
  11. package/dist/audio.d.ts +0 -260
  12. package/dist/audio.js +0 -514
  13. package/dist/audio.js.map +0 -1
  14. package/dist/compliance.cjs +0 -343
  15. package/dist/compliance.cjs.map +0 -1
  16. package/dist/compliance.d.cts +0 -163
  17. package/dist/compliance.d.ts +0 -163
  18. package/dist/compliance.js +0 -335
  19. package/dist/compliance.js.map +0 -1
  20. package/dist/errors.cjs +0 -284
  21. package/dist/errors.cjs.map +0 -1
  22. package/dist/errors.d.cts +0 -100
  23. package/dist/errors.d.ts +0 -100
  24. package/dist/errors.js +0 -262
  25. package/dist/errors.js.map +0 -1
  26. package/dist/index-D3KfRXMP.d.cts +0 -319
  27. package/dist/index-D3KfRXMP.d.ts +0 -319
  28. package/dist/memory.cjs +0 -121
  29. package/dist/memory.cjs.map +0 -1
  30. package/dist/memory.d.cts +0 -29
  31. package/dist/memory.d.ts +0 -29
  32. package/dist/memory.js +0 -115
  33. package/dist/memory.js.map +0 -1
  34. package/dist/observability.cjs +0 -229
  35. package/dist/observability.cjs.map +0 -1
  36. package/dist/observability.d.cts +0 -122
  37. package/dist/observability.d.ts +0 -122
  38. package/dist/observability.js +0 -222
  39. package/dist/observability.js.map +0 -1
  40. package/dist/stt.cjs +0 -828
  41. package/dist/stt.cjs.map +0 -1
  42. package/dist/stt.d.cts +0 -308
  43. package/dist/stt.d.ts +0 -308
  44. package/dist/stt.js +0 -815
  45. package/dist/stt.js.map +0 -1
  46. package/dist/telephony.errors-BQYr6-vl.d.cts +0 -80
  47. package/dist/telephony.errors-C0-nScrF.d.ts +0 -80
  48. package/dist/tts.cjs +0 -429
  49. package/dist/tts.cjs.map +0 -1
  50. package/dist/tts.d.cts +0 -151
  51. package/dist/tts.d.ts +0 -151
  52. package/dist/tts.js +0 -418
  53. package/dist/tts.js.map +0 -1
package/dist/stt.d.ts DELETED
@@ -1,308 +0,0 @@
1
- import { S as STTProvider, h as STTConfig, i as STTResult } from './index-D3KfRXMP.js';
2
- import { EventEmitter } from 'node:events';
3
- import 'ai';
4
-
5
- /**
6
- * @voice-kit/core — AssemblyAI STT Provider
7
- *
8
- * Async long-form transcription using AssemblyAI SDK.
9
- * Best for post-call recordings, meeting notes, long interviews.
10
- * Does not support realtime streaming — use Deepgram for live calls.
11
- */
12
-
13
- /**
14
- * AssemblyAI async transcription provider.
15
- * @internal — obtained via createSTT('assemblyai', config)
16
- */
17
- declare class AssemblyAISTTProvider implements STTProvider {
18
- readonly name = "assemblyai";
19
- readonly supportsStreaming = false;
20
- readonly supportedLanguages: string[];
21
- private readonly client;
22
- private readonly config;
23
- constructor(config: STTConfig);
24
- /**
25
- * Batch-transcribes collected audio. AssemblyAI has no realtime streaming.
26
- * Collects all audio from the iterable, uploads, then polls for result.
27
- *
28
- * @param audio Async iterable of PCM buffers
29
- */
30
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
31
- /**
32
- * Upload audio to AssemblyAI and wait for async transcription.
33
- * Suitable for call recordings. Average latency: 15–45s per minute of audio.
34
- *
35
- * @param audio Raw WAV/PCM/MP3 buffer
36
- *
37
- * @example
38
- * ```ts
39
- * const stt = createSTT('assemblyai', { wordTimestamps: true })
40
- * const result = await stt.transcribeBatch(recordingBuffer)
41
- * console.log(result.words) // Word-level timestamps
42
- * ```
43
- */
44
- transcribeBatch(audio: Buffer): Promise<STTResult>;
45
- }
46
-
47
- /**
48
- * @voice-kit/core — Deepgram Nova-3 STT Provider
49
- *
50
- * Streaming STT using Deepgram Nova-3. Handles WebSocket reconnect with
51
- * exponential backoff, interim + final results, language detection.
52
- * Never instantiate directly — use createSTT('deepgram', config).
53
- *
54
- * SDK: @deepgram/sdk v5 (beta) — https://github.com/deepgram/deepgram-js-sdk
55
- */
56
-
57
- /**
58
- * Deepgram Nova-3 streaming STT provider.
59
- * @internal — obtained via createSTT('deepgram', config)
60
- */
61
- declare class DeepgramSTTProvider implements STTProvider {
62
- readonly name = "deepgram";
63
- readonly supportsStreaming = true;
64
- readonly supportedLanguages: string[];
65
- private readonly client;
66
- private readonly config;
67
- constructor(config: STTConfig);
68
- /**
69
- * Stream audio to Deepgram and receive interim + final transcription results.
70
- * Handles reconnection transparently with exponential backoff.
71
- *
72
- * @param audio Async iterable of 16kHz PCM buffers from AudioPipeline
73
- *
74
- * @example
75
- * ```ts
76
- * const stt = createSTT('deepgram', { language: 'hi-IN' })
77
- * for await (const result of stt.transcribeStream(audioIterable)) {
78
- * if (result.isFinal) console.log('User said:', result.transcript)
79
- * }
80
- * ```
81
- */
82
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
83
- /**
84
- * Transcribe a complete audio buffer (non-streaming).
85
- * Uses Deepgram pre-recorded API.
86
- *
87
- * @param audio Raw PCM or WAV buffer
88
- */
89
- transcribeBatch(audio: Buffer): Promise<STTResult>;
90
- /**
91
- * Create and open a live WebSocket connection to Deepgram.
92
- *
93
- * v5 connection lifecycle (3 explicit steps):
94
- * 1. await listen.v1.connect(options) — constructs the connection object
95
- * 2. connection.connect() — initiates the WebSocket handshake
96
- * 3. await connection.waitForOpen() — resolves once the socket is ready
97
- *
98
- * @internal
99
- */
100
- private connectWithRetry;
101
- }
102
-
103
- /**
104
- * @voice-kit/core — Sarvam AI Indic STT Provider
105
- *
106
- * Sarvam AI provides state-of-the-art STT for Indian languages:
107
- * hi-IN, kn-IN, ta-IN, te-IN, mr-IN, bn-IN, gu-IN, pa-IN, or-IN
108
- *
109
- * Uses axios for HTTP calls. No official JS SDK — we use the REST API directly.
110
- */
111
-
112
- /**
113
- * Sarvam AI Indic STT provider.
114
- * @internal — obtained via createSTT('sarvam', config)
115
- */
116
- declare class SarvamSTTProvider implements STTProvider {
117
- readonly name = "sarvam";
118
- readonly supportsStreaming = false;
119
- readonly supportedLanguages: string[];
120
- private readonly http;
121
- private readonly config;
122
- constructor(config: STTConfig);
123
- /**
124
- * Collects audio and transcribes via Sarvam batch API.
125
- * Sarvam doesn't support realtime streaming.
126
- *
127
- * @param audio Async iterable of 16kHz PCM buffers
128
- */
129
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
130
- /**
131
- * Transcribe a WAV/PCM audio buffer in an Indic language.
132
- *
133
- * @param audio 16kHz PCM or WAV buffer
134
- *
135
- * @example
136
- * ```ts
137
- * const stt = createSTT('sarvam', { language: 'ta-IN' })
138
- * const result = await stt.transcribeBatch(tamilAudioBuffer)
139
- * console.log(result.transcript) // Tamil text
140
- * ```
141
- */
142
- transcribeBatch(audio: Buffer): Promise<STTResult>;
143
- }
144
-
145
- /**
146
- * @voice-kit/core — Hinglish language switch detector
147
- *
148
- * Detects mid-sentence Hindi↔English (Hinglish) code-switching in realtime STT output.
149
- * Pure algorithmic detection — no external API calls, no latency overhead.
150
- *
151
- * Detection signals:
152
- * 1. Devanagari Unicode range (U+0900–U+097F) for Hindi
153
- * 2. Latin character runs for English
154
- * 3. Common Hinglish transition patterns (e.g. "main think karta hun")
155
- * 4. Script boundary crossing mid-sentence
156
- */
157
-
158
- type LanguageCode = 'hi-IN' | 'en-IN' | 'unknown';
159
- interface LanguageSwitchEvent {
160
- /** Language switched from. */
161
- from: LanguageCode;
162
- /** Language switched to. */
163
- to: LanguageCode;
164
- /** Position in transcript where switch occurred (word index). */
165
- position: number;
166
- /** Confidence of the detection 0–1. */
167
- confidence: number;
168
- /** Full transcript at time of detection. */
169
- transcript: string;
170
- /** Timestamp of detection. */
171
- detectedAt: Date;
172
- }
173
- type LanguageDetectorEventMap = {
174
- 'language.switched': [LanguageSwitchEvent];
175
- };
176
- /**
177
- * Hinglish language switch detector.
178
- *
179
- * Analyzes STT transcripts word-by-word in realtime.
180
- * Emits 'language.switched' events when a significant script change is detected.
181
- *
182
- * @example
183
- * ```ts
184
- * const detector = new LanguageSwitchDetector('en-IN')
185
- * detector.on('language.switched', ({ from, to, transcript }) => {
186
- * console.log(`Language switched: ${from} → ${to} in: "${transcript}"`)
187
- * })
188
- *
189
- * // Call on every STT final result
190
- * detector.analyze('main yeh kaam kal karonga I promise')
191
- * ```
192
- */
193
- declare class LanguageSwitchDetector extends EventEmitter<LanguageDetectorEventMap> {
194
- private currentLanguage;
195
- private readonly primaryLanguage;
196
- /** Rolling window of recent language classifications for smoothing. */
197
- private recentClassifications;
198
- private readonly windowSize;
199
- constructor(primaryLanguage?: LanguageCode);
200
- /**
201
- * Analyze a transcript for language switches.
202
- * Should be called on every STT final result.
203
- *
204
- * @param transcript The transcribed text to analyze
205
- * @returns Detected language of the transcript
206
- */
207
- analyze(transcript: string): LanguageCode;
208
- /**
209
- * Analyze a transcript and return per-word language classification.
210
- * Useful for word-level Hinglish mixing visualization.
211
- *
212
- * @param transcript Text to analyze
213
- * @returns Array of { word, language } pairs
214
- */
215
- analyzeWords(transcript: string): Array<{
216
- word: string;
217
- language: LanguageCode;
218
- }>;
219
- /** Reset to primary language (e.g., on new call). */
220
- reset(): void;
221
- /** Current detected language. */
222
- get language(): LanguageCode;
223
- private tokenize;
224
- private classifyWord;
225
- private classifySegment;
226
- private computeConfidence;
227
- private smoothedLanguage;
228
- }
229
- /**
230
- * Detect whether a transcript contains mixed Hindi+English (Hinglish).
231
- * Stateless convenience function for one-shot analysis.
232
- *
233
- * @param transcript Text to analyze
234
- * @returns True if both Devanagari and Latin characters are present
235
- *
236
- * @example
237
- * ```ts
238
- * isHinglish('main kal office jaaunga') // true
239
- * isHinglish('I will go to the office') // false
240
- * isHinglish('मैं कल ऑफिस जाऊंगा') // false (pure Hindi)
241
- * ```
242
- */
243
- declare function isInglish(transcript: string): boolean;
244
-
245
- /**
246
- * @voice-kit/core — STT factory
247
- *
248
- * createSTT() is the ONLY public API for speech-to-text.
249
- * Never instantiate provider classes directly.
250
- */
251
-
252
- /**
253
- * Create an STT provider instance. This is the ONLY public API for STT.
254
- *
255
- * Provider selection guide:
256
- * - 'deepgram' → Default. Realtime streaming, best latency, supports en-IN + Indic
257
- * - 'sarvam' → Best accuracy for pure Indic languages (hi-IN, ta-IN, kn-IN, te-IN, mr-IN)
258
- * - 'assemblyai' → Best for long-form recordings (post-call analysis)
259
- * - 'whisper' → Fallback batch transcription, broad language support
260
- *
261
- * @example
262
- * ```ts
263
- * // Realtime English (India) — default
264
- * const stt = createSTT('deepgram', { language: 'en-IN' })
265
- *
266
- * // Realtime Hindi
267
- * const stt = createSTT('deepgram', { language: 'hi-IN' })
268
- *
269
- * // Best Indic accuracy
270
- * const stt = createSTT('sarvam', { language: 'ta-IN' })
271
- *
272
- * // Post-call recording
273
- * const stt = createSTT('assemblyai', { wordTimestamps: true })
274
- * ```
275
- */
276
- declare function createSTT(provider: 'deepgram' | 'whisper' | 'assemblyai' | 'sarvam', config?: STTConfig): STTProvider;
277
-
278
- /**
279
- * @voice-kit/core — OpenAI Whisper STT Provider (batch fallback)
280
- *
281
- * Uses @ai-sdk/openai for batch transcription. Does not support streaming.
282
- * Use as fallback for long-form audio or when Deepgram is unavailable.
283
- */
284
-
285
- /**
286
- * OpenAI Whisper STT provider. Batch-only — does not support streaming.
287
- * @internal — obtained via createSTT('whisper', config)
288
- */
289
- declare class WhisperSTTProvider implements STTProvider {
290
- readonly name = "whisper";
291
- readonly supportsStreaming = false;
292
- readonly supportedLanguages: string[];
293
- private readonly config;
294
- constructor(config: STTConfig);
295
- /**
296
- * Streaming not supported by Whisper. Collects all audio then transcribes.
297
- * For realtime use, use createSTT('deepgram') instead.
298
- */
299
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
300
- /**
301
- * Transcribe a complete audio buffer via Whisper.
302
- *
303
- * @param audio WAV or PCM buffer
304
- */
305
- transcribeBatch(audio: Buffer): Promise<STTResult>;
306
- }
307
-
308
- export { AssemblyAISTTProvider, DeepgramSTTProvider, type LanguageCode, LanguageSwitchDetector, type LanguageSwitchEvent, SarvamSTTProvider, WhisperSTTProvider, createSTT, isInglish };