@voice-kit/core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/index.cjs +2137 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +1466 -3
  4. package/dist/index.d.ts +1466 -3
  5. package/dist/index.js +2102 -1
  6. package/dist/index.js.map +1 -1
  7. package/package.json +1 -26
  8. package/dist/compliance.cjs +0 -343
  9. package/dist/compliance.cjs.map +0 -1
  10. package/dist/compliance.d.cts +0 -163
  11. package/dist/compliance.d.ts +0 -163
  12. package/dist/compliance.js +0 -335
  13. package/dist/compliance.js.map +0 -1
  14. package/dist/errors.cjs +0 -284
  15. package/dist/errors.cjs.map +0 -1
  16. package/dist/errors.d.cts +0 -175
  17. package/dist/errors.d.ts +0 -175
  18. package/dist/errors.js +0 -262
  19. package/dist/errors.js.map +0 -1
  20. package/dist/index-CkTG6DOa.d.cts +0 -319
  21. package/dist/index-CkTG6DOa.d.ts +0 -319
  22. package/dist/memory.cjs +0 -121
  23. package/dist/memory.cjs.map +0 -1
  24. package/dist/memory.d.cts +0 -29
  25. package/dist/memory.d.ts +0 -29
  26. package/dist/memory.js +0 -115
  27. package/dist/memory.js.map +0 -1
  28. package/dist/observability.cjs +0 -229
  29. package/dist/observability.cjs.map +0 -1
  30. package/dist/observability.d.cts +0 -122
  31. package/dist/observability.d.ts +0 -122
  32. package/dist/observability.js +0 -222
  33. package/dist/observability.js.map +0 -1
  34. package/dist/stt.cjs +0 -828
  35. package/dist/stt.cjs.map +0 -1
  36. package/dist/stt.d.cts +0 -308
  37. package/dist/stt.d.ts +0 -308
  38. package/dist/stt.js +0 -815
  39. package/dist/stt.js.map +0 -1
  40. package/dist/tts.cjs +0 -429
  41. package/dist/tts.cjs.map +0 -1
  42. package/dist/tts.d.cts +0 -151
  43. package/dist/tts.d.ts +0 -151
  44. package/dist/tts.js +0 -418
  45. package/dist/tts.js.map +0 -1
package/dist/stt.d.ts DELETED
@@ -1,308 +0,0 @@
1
- import { S as STTProvider, g as STTConfig, h as STTResult } from './index-CkTG6DOa.js';
2
- import { EventEmitter } from 'node:events';
3
- import 'ai';
4
-
5
- /**
6
- * @voice-kit/core — AssemblyAI STT Provider
7
- *
8
- * Async long-form transcription using AssemblyAI SDK.
9
- * Best for post-call recordings, meeting notes, long interviews.
10
- * Does not support realtime streaming — use Deepgram for live calls.
11
- */
12
-
13
- /**
14
- * AssemblyAI async transcription provider.
15
- * @internal — obtained via createSTT('assemblyai', config)
16
- */
17
- declare class AssemblyAISTTProvider implements STTProvider {
18
- readonly name = "assemblyai";
19
- readonly supportsStreaming = false;
20
- readonly supportedLanguages: string[];
21
- private readonly client;
22
- private readonly config;
23
- constructor(config: STTConfig);
24
- /**
25
- * Batch-transcribes collected audio. AssemblyAI has no realtime streaming.
26
- * Collects all audio from the iterable, uploads, then polls for result.
27
- *
28
- * @param audio Async iterable of PCM buffers
29
- */
30
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
31
- /**
32
- * Upload audio to AssemblyAI and wait for async transcription.
33
- * Suitable for call recordings. Average latency: 15–45s per minute of audio.
34
- *
35
- * @param audio Raw WAV/PCM/MP3 buffer
36
- *
37
- * @example
38
- * ```ts
39
- * const stt = createSTT('assemblyai', { wordTimestamps: true })
40
- * const result = await stt.transcribeBatch(recordingBuffer)
41
- * console.log(result.words) // Word-level timestamps
42
- * ```
43
- */
44
- transcribeBatch(audio: Buffer): Promise<STTResult>;
45
- }
46
-
47
- /**
48
- * @voice-kit/core — Deepgram Nova-3 STT Provider
49
- *
50
- * Streaming STT using Deepgram Nova-3. Handles WebSocket reconnect with
51
- * exponential backoff, interim + final results, language detection.
52
- * Never instantiate directly — use createSTT('deepgram', config).
53
- *
54
- * SDK: @deepgram/sdk v5 (beta) — https://github.com/deepgram/deepgram-js-sdk
55
- */
56
-
57
- /**
58
- * Deepgram Nova-3 streaming STT provider.
59
- * @internal — obtained via createSTT('deepgram', config)
60
- */
61
- declare class DeepgramSTTProvider implements STTProvider {
62
- readonly name = "deepgram";
63
- readonly supportsStreaming = true;
64
- readonly supportedLanguages: string[];
65
- private readonly client;
66
- private readonly config;
67
- constructor(config: STTConfig);
68
- /**
69
- * Stream audio to Deepgram and receive interim + final transcription results.
70
- * Handles reconnection transparently with exponential backoff.
71
- *
72
- * @param audio Async iterable of 16kHz PCM buffers from AudioPipeline
73
- *
74
- * @example
75
- * ```ts
76
- * const stt = createSTT('deepgram', { language: 'hi-IN' })
77
- * for await (const result of stt.transcribeStream(audioIterable)) {
78
- * if (result.isFinal) console.log('User said:', result.transcript)
79
- * }
80
- * ```
81
- */
82
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
83
- /**
84
- * Transcribe a complete audio buffer (non-streaming).
85
- * Uses Deepgram pre-recorded API.
86
- *
87
- * @param audio Raw PCM or WAV buffer
88
- */
89
- transcribeBatch(audio: Buffer): Promise<STTResult>;
90
- /**
91
- * Create and open a live WebSocket connection to Deepgram.
92
- *
93
- * v5 connection lifecycle (3 explicit steps):
94
- * 1. await listen.v1.connect(options) — constructs the connection object
95
- * 2. connection.connect() — initiates the WebSocket handshake
96
- * 3. await connection.waitForOpen() — resolves once the socket is ready
97
- *
98
- * @internal
99
- */
100
- private connectWithRetry;
101
- }
102
-
103
- /**
104
- * @voice-kit/core — Sarvam AI Indic STT Provider
105
- *
106
- * Sarvam AI provides state-of-the-art STT for Indian languages:
107
- * hi-IN, kn-IN, ta-IN, te-IN, mr-IN, bn-IN, gu-IN, pa-IN, or-IN
108
- *
109
- * Uses axios for HTTP calls. No official JS SDK — we use the REST API directly.
110
- */
111
-
112
- /**
113
- * Sarvam AI Indic STT provider.
114
- * @internal — obtained via createSTT('sarvam', config)
115
- */
116
- declare class SarvamSTTProvider implements STTProvider {
117
- readonly name = "sarvam";
118
- readonly supportsStreaming = false;
119
- readonly supportedLanguages: string[];
120
- private readonly http;
121
- private readonly config;
122
- constructor(config: STTConfig);
123
- /**
124
- * Collects audio and transcribes via Sarvam batch API.
125
- * Sarvam doesn't support realtime streaming.
126
- *
127
- * @param audio Async iterable of 16kHz PCM buffers
128
- */
129
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
130
- /**
131
- * Transcribe a WAV/PCM audio buffer in an Indic language.
132
- *
133
- * @param audio 16kHz PCM or WAV buffer
134
- *
135
- * @example
136
- * ```ts
137
- * const stt = createSTT('sarvam', { language: 'ta-IN' })
138
- * const result = await stt.transcribeBatch(tamilAudioBuffer)
139
- * console.log(result.transcript) // Tamil text
140
- * ```
141
- */
142
- transcribeBatch(audio: Buffer): Promise<STTResult>;
143
- }
144
-
145
- /**
146
- * @voice-kit/core — Hinglish language switch detector
147
- *
148
- * Detects mid-sentence Hindi↔English (Hinglish) code-switching in realtime STT output.
149
- * Pure algorithmic detection — no external API calls, no latency overhead.
150
- *
151
- * Detection signals:
152
- * 1. Devanagari Unicode range (U+0900–U+097F) for Hindi
153
- * 2. Latin character runs for English
154
- * 3. Common Hinglish transition patterns (e.g. "main think karta hun")
155
- * 4. Script boundary crossing mid-sentence
156
- */
157
-
158
- type LanguageCode = 'hi-IN' | 'en-IN' | 'unknown';
159
- interface LanguageSwitchEvent {
160
- /** Language switched from. */
161
- from: LanguageCode;
162
- /** Language switched to. */
163
- to: LanguageCode;
164
- /** Position in transcript where switch occurred (word index). */
165
- position: number;
166
- /** Confidence of the detection 0–1. */
167
- confidence: number;
168
- /** Full transcript at time of detection. */
169
- transcript: string;
170
- /** Timestamp of detection. */
171
- detectedAt: Date;
172
- }
173
- type LanguageDetectorEventMap = {
174
- 'language.switched': [LanguageSwitchEvent];
175
- };
176
- /**
177
- * Hinglish language switch detector.
178
- *
179
- * Analyzes STT transcripts word-by-word in realtime.
180
- * Emits 'language.switched' events when a significant script change is detected.
181
- *
182
- * @example
183
- * ```ts
184
- * const detector = new LanguageSwitchDetector('en-IN')
185
- * detector.on('language.switched', ({ from, to, transcript }) => {
186
- * console.log(`Language switched: ${from} → ${to} in: "${transcript}"`)
187
- * })
188
- *
189
- * // Call on every STT final result
190
- * detector.analyze('main yeh kaam kal karonga I promise')
191
- * ```
192
- */
193
- declare class LanguageSwitchDetector extends EventEmitter<LanguageDetectorEventMap> {
194
- private currentLanguage;
195
- private readonly primaryLanguage;
196
- /** Rolling window of recent language classifications for smoothing. */
197
- private recentClassifications;
198
- private readonly windowSize;
199
- constructor(primaryLanguage?: LanguageCode);
200
- /**
201
- * Analyze a transcript for language switches.
202
- * Should be called on every STT final result.
203
- *
204
- * @param transcript The transcribed text to analyze
205
- * @returns Detected language of the transcript
206
- */
207
- analyze(transcript: string): LanguageCode;
208
- /**
209
- * Analyze a transcript and return per-word language classification.
210
- * Useful for word-level Hinglish mixing visualization.
211
- *
212
- * @param transcript Text to analyze
213
- * @returns Array of { word, language } pairs
214
- */
215
- analyzeWords(transcript: string): Array<{
216
- word: string;
217
- language: LanguageCode;
218
- }>;
219
- /** Reset to primary language (e.g., on new call). */
220
- reset(): void;
221
- /** Current detected language. */
222
- get language(): LanguageCode;
223
- private tokenize;
224
- private classifyWord;
225
- private classifySegment;
226
- private computeConfidence;
227
- private smoothedLanguage;
228
- }
229
- /**
230
- * Detect whether a transcript contains mixed Hindi+English (Hinglish).
231
- * Stateless convenience function for one-shot analysis.
232
- *
233
- * @param transcript Text to analyze
234
- * @returns True if both Devanagari and Latin characters are present
235
- *
236
- * @example
237
- * ```ts
238
- * isHinglish('main kal office jaaunga') // true
239
- * isHinglish('I will go to the office') // false
240
- * isHinglish('मैं कल ऑफिस जाऊंगा') // false (pure Hindi)
241
- * ```
242
- */
243
- declare function isInglish(transcript: string): boolean;
244
-
245
- /**
246
- * @voice-kit/core — STT factory
247
- *
248
- * createSTT() is the ONLY public API for speech-to-text.
249
- * Never instantiate provider classes directly.
250
- */
251
-
252
- /**
253
- * Create an STT provider instance. This is the ONLY public API for STT.
254
- *
255
- * Provider selection guide:
256
- * - 'deepgram' → Default. Realtime streaming, best latency, supports en-IN + Indic
257
- * - 'sarvam' → Best accuracy for pure Indic languages (hi-IN, ta-IN, kn-IN, te-IN, mr-IN)
258
- * - 'assemblyai' → Best for long-form recordings (post-call analysis)
259
- * - 'whisper' → Fallback batch transcription, broad language support
260
- *
261
- * @example
262
- * ```ts
263
- * // Realtime English (India) — default
264
- * const stt = createSTT('deepgram', { language: 'en-IN' })
265
- *
266
- * // Realtime Hindi
267
- * const stt = createSTT('deepgram', { language: 'hi-IN' })
268
- *
269
- * // Best Indic accuracy
270
- * const stt = createSTT('sarvam', { language: 'ta-IN' })
271
- *
272
- * // Post-call recording
273
- * const stt = createSTT('assemblyai', { wordTimestamps: true })
274
- * ```
275
- */
276
- declare function createSTT(provider: 'deepgram' | 'whisper' | 'assemblyai' | 'sarvam', config?: STTConfig): STTProvider;
277
-
278
- /**
279
- * @voice-kit/core — OpenAI Whisper STT Provider (batch fallback)
280
- *
281
- * Uses @ai-sdk/openai for batch transcription. Does not support streaming.
282
- * Use as fallback for long-form audio or when Deepgram is unavailable.
283
- */
284
-
285
- /**
286
- * OpenAI Whisper STT provider. Batch-only — does not support streaming.
287
- * @internal — obtained via createSTT('whisper', config)
288
- */
289
- declare class WhisperSTTProvider implements STTProvider {
290
- readonly name = "whisper";
291
- readonly supportsStreaming = false;
292
- readonly supportedLanguages: string[];
293
- private readonly config;
294
- constructor(config: STTConfig);
295
- /**
296
- * Streaming not supported by Whisper. Collects all audio then transcribes.
297
- * For realtime use, use createSTT('deepgram') instead.
298
- */
299
- transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
300
- /**
301
- * Transcribe a complete audio buffer via Whisper.
302
- *
303
- * @param audio WAV or PCM buffer
304
- */
305
- transcribeBatch(audio: Buffer): Promise<STTResult>;
306
- }
307
-
308
- export { AssemblyAISTTProvider, DeepgramSTTProvider, type LanguageCode, LanguageSwitchDetector, type LanguageSwitchEvent, SarvamSTTProvider, WhisperSTTProvider, createSTT, isInglish };