@voice-kit/core 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2137 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1466 -4
- package/dist/index.d.ts +1466 -4
- package/dist/index.js +2102 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -31
- package/dist/audio.cjs +0 -533
- package/dist/audio.cjs.map +0 -1
- package/dist/audio.d.cts +0 -260
- package/dist/audio.d.ts +0 -260
- package/dist/audio.js +0 -514
- package/dist/audio.js.map +0 -1
- package/dist/compliance.cjs +0 -343
- package/dist/compliance.cjs.map +0 -1
- package/dist/compliance.d.cts +0 -163
- package/dist/compliance.d.ts +0 -163
- package/dist/compliance.js +0 -335
- package/dist/compliance.js.map +0 -1
- package/dist/errors.cjs +0 -284
- package/dist/errors.cjs.map +0 -1
- package/dist/errors.d.cts +0 -100
- package/dist/errors.d.ts +0 -100
- package/dist/errors.js +0 -262
- package/dist/errors.js.map +0 -1
- package/dist/index-D3KfRXMP.d.cts +0 -319
- package/dist/index-D3KfRXMP.d.ts +0 -319
- package/dist/memory.cjs +0 -121
- package/dist/memory.cjs.map +0 -1
- package/dist/memory.d.cts +0 -29
- package/dist/memory.d.ts +0 -29
- package/dist/memory.js +0 -115
- package/dist/memory.js.map +0 -1
- package/dist/observability.cjs +0 -229
- package/dist/observability.cjs.map +0 -1
- package/dist/observability.d.cts +0 -122
- package/dist/observability.d.ts +0 -122
- package/dist/observability.js +0 -222
- package/dist/observability.js.map +0 -1
- package/dist/stt.cjs +0 -828
- package/dist/stt.cjs.map +0 -1
- package/dist/stt.d.cts +0 -308
- package/dist/stt.d.ts +0 -308
- package/dist/stt.js +0 -815
- package/dist/stt.js.map +0 -1
- package/dist/telephony.errors-BQYr6-vl.d.cts +0 -80
- package/dist/telephony.errors-C0-nScrF.d.ts +0 -80
- package/dist/tts.cjs +0 -429
- package/dist/tts.cjs.map +0 -1
- package/dist/tts.d.cts +0 -151
- package/dist/tts.d.ts +0 -151
- package/dist/tts.js +0 -418
- package/dist/tts.js.map +0 -1
package/dist/stt.d.ts
DELETED
|
@@ -1,308 +0,0 @@
|
|
|
1
|
-
import { S as STTProvider, h as STTConfig, i as STTResult } from './index-D3KfRXMP.js';
|
|
2
|
-
import { EventEmitter } from 'node:events';
|
|
3
|
-
import 'ai';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* @voice-kit/core — AssemblyAI STT Provider
|
|
7
|
-
*
|
|
8
|
-
* Async long-form transcription using AssemblyAI SDK.
|
|
9
|
-
* Best for post-call recordings, meeting notes, long interviews.
|
|
10
|
-
* Does not support realtime streaming — use Deepgram for live calls.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* AssemblyAI async transcription provider.
|
|
15
|
-
* @internal — obtained via createSTT('assemblyai', config)
|
|
16
|
-
*/
|
|
17
|
-
declare class AssemblyAISTTProvider implements STTProvider {
|
|
18
|
-
readonly name = "assemblyai";
|
|
19
|
-
readonly supportsStreaming = false;
|
|
20
|
-
readonly supportedLanguages: string[];
|
|
21
|
-
private readonly client;
|
|
22
|
-
private readonly config;
|
|
23
|
-
constructor(config: STTConfig);
|
|
24
|
-
/**
|
|
25
|
-
* Batch-transcribes collected audio. AssemblyAI has no realtime streaming.
|
|
26
|
-
* Collects all audio from the iterable, uploads, then polls for result.
|
|
27
|
-
*
|
|
28
|
-
* @param audio Async iterable of PCM buffers
|
|
29
|
-
*/
|
|
30
|
-
transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
|
|
31
|
-
/**
|
|
32
|
-
* Upload audio to AssemblyAI and wait for async transcription.
|
|
33
|
-
* Suitable for call recordings. Average latency: 15–45s per minute of audio.
|
|
34
|
-
*
|
|
35
|
-
* @param audio Raw WAV/PCM/MP3 buffer
|
|
36
|
-
*
|
|
37
|
-
* @example
|
|
38
|
-
* ```ts
|
|
39
|
-
* const stt = createSTT('assemblyai', { wordTimestamps: true })
|
|
40
|
-
* const result = await stt.transcribeBatch(recordingBuffer)
|
|
41
|
-
* console.log(result.words) // Word-level timestamps
|
|
42
|
-
* ```
|
|
43
|
-
*/
|
|
44
|
-
transcribeBatch(audio: Buffer): Promise<STTResult>;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* @voice-kit/core — Deepgram Nova-3 STT Provider
|
|
49
|
-
*
|
|
50
|
-
* Streaming STT using Deepgram Nova-3. Handles WebSocket reconnect with
|
|
51
|
-
* exponential backoff, interim + final results, language detection.
|
|
52
|
-
* Never instantiate directly — use createSTT('deepgram', config).
|
|
53
|
-
*
|
|
54
|
-
* SDK: @deepgram/sdk v5 (beta) — https://github.com/deepgram/deepgram-js-sdk
|
|
55
|
-
*/
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Deepgram Nova-3 streaming STT provider.
|
|
59
|
-
* @internal — obtained via createSTT('deepgram', config)
|
|
60
|
-
*/
|
|
61
|
-
declare class DeepgramSTTProvider implements STTProvider {
|
|
62
|
-
readonly name = "deepgram";
|
|
63
|
-
readonly supportsStreaming = true;
|
|
64
|
-
readonly supportedLanguages: string[];
|
|
65
|
-
private readonly client;
|
|
66
|
-
private readonly config;
|
|
67
|
-
constructor(config: STTConfig);
|
|
68
|
-
/**
|
|
69
|
-
* Stream audio to Deepgram and receive interim + final transcription results.
|
|
70
|
-
* Handles reconnection transparently with exponential backoff.
|
|
71
|
-
*
|
|
72
|
-
* @param audio Async iterable of 16kHz PCM buffers from AudioPipeline
|
|
73
|
-
*
|
|
74
|
-
* @example
|
|
75
|
-
* ```ts
|
|
76
|
-
* const stt = createSTT('deepgram', { language: 'hi-IN' })
|
|
77
|
-
* for await (const result of stt.transcribeStream(audioIterable)) {
|
|
78
|
-
* if (result.isFinal) console.log('User said:', result.transcript)
|
|
79
|
-
* }
|
|
80
|
-
* ```
|
|
81
|
-
*/
|
|
82
|
-
transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
|
|
83
|
-
/**
|
|
84
|
-
* Transcribe a complete audio buffer (non-streaming).
|
|
85
|
-
* Uses Deepgram pre-recorded API.
|
|
86
|
-
*
|
|
87
|
-
* @param audio Raw PCM or WAV buffer
|
|
88
|
-
*/
|
|
89
|
-
transcribeBatch(audio: Buffer): Promise<STTResult>;
|
|
90
|
-
/**
|
|
91
|
-
* Create and open a live WebSocket connection to Deepgram.
|
|
92
|
-
*
|
|
93
|
-
* v5 connection lifecycle (3 explicit steps):
|
|
94
|
-
* 1. await listen.v1.connect(options) — constructs the connection object
|
|
95
|
-
* 2. connection.connect() — initiates the WebSocket handshake
|
|
96
|
-
* 3. await connection.waitForOpen() — resolves once the socket is ready
|
|
97
|
-
*
|
|
98
|
-
* @internal
|
|
99
|
-
*/
|
|
100
|
-
private connectWithRetry;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
* @voice-kit/core — Sarvam AI Indic STT Provider
|
|
105
|
-
*
|
|
106
|
-
* Sarvam AI provides state-of-the-art STT for Indian languages:
|
|
107
|
-
* hi-IN, kn-IN, ta-IN, te-IN, mr-IN, bn-IN, gu-IN, pa-IN, or-IN
|
|
108
|
-
*
|
|
109
|
-
* Uses axios for HTTP calls. No official JS SDK — we use the REST API directly.
|
|
110
|
-
*/
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Sarvam AI Indic STT provider.
|
|
114
|
-
* @internal — obtained via createSTT('sarvam', config)
|
|
115
|
-
*/
|
|
116
|
-
declare class SarvamSTTProvider implements STTProvider {
|
|
117
|
-
readonly name = "sarvam";
|
|
118
|
-
readonly supportsStreaming = false;
|
|
119
|
-
readonly supportedLanguages: string[];
|
|
120
|
-
private readonly http;
|
|
121
|
-
private readonly config;
|
|
122
|
-
constructor(config: STTConfig);
|
|
123
|
-
/**
|
|
124
|
-
* Collects audio and transcribes via Sarvam batch API.
|
|
125
|
-
* Sarvam doesn't support realtime streaming.
|
|
126
|
-
*
|
|
127
|
-
* @param audio Async iterable of 16kHz PCM buffers
|
|
128
|
-
*/
|
|
129
|
-
transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
|
|
130
|
-
/**
|
|
131
|
-
* Transcribe a WAV/PCM audio buffer in an Indic language.
|
|
132
|
-
*
|
|
133
|
-
* @param audio 16kHz PCM or WAV buffer
|
|
134
|
-
*
|
|
135
|
-
* @example
|
|
136
|
-
* ```ts
|
|
137
|
-
* const stt = createSTT('sarvam', { language: 'ta-IN' })
|
|
138
|
-
* const result = await stt.transcribeBatch(tamilAudioBuffer)
|
|
139
|
-
* console.log(result.transcript) // Tamil text
|
|
140
|
-
* ```
|
|
141
|
-
*/
|
|
142
|
-
transcribeBatch(audio: Buffer): Promise<STTResult>;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* @voice-kit/core — Hinglish language switch detector
|
|
147
|
-
*
|
|
148
|
-
* Detects mid-sentence Hindi↔English (Hinglish) code-switching in realtime STT output.
|
|
149
|
-
* Pure algorithmic detection — no external API calls, no latency overhead.
|
|
150
|
-
*
|
|
151
|
-
* Detection signals:
|
|
152
|
-
* 1. Devanagari Unicode range (U+0900–U+097F) for Hindi
|
|
153
|
-
* 2. Latin character runs for English
|
|
154
|
-
* 3. Common Hinglish transition patterns (e.g. "main think karta hun")
|
|
155
|
-
* 4. Script boundary crossing mid-sentence
|
|
156
|
-
*/
|
|
157
|
-
|
|
158
|
-
type LanguageCode = 'hi-IN' | 'en-IN' | 'unknown';
|
|
159
|
-
interface LanguageSwitchEvent {
|
|
160
|
-
/** Language switched from. */
|
|
161
|
-
from: LanguageCode;
|
|
162
|
-
/** Language switched to. */
|
|
163
|
-
to: LanguageCode;
|
|
164
|
-
/** Position in transcript where switch occurred (word index). */
|
|
165
|
-
position: number;
|
|
166
|
-
/** Confidence of the detection 0–1. */
|
|
167
|
-
confidence: number;
|
|
168
|
-
/** Full transcript at time of detection. */
|
|
169
|
-
transcript: string;
|
|
170
|
-
/** Timestamp of detection. */
|
|
171
|
-
detectedAt: Date;
|
|
172
|
-
}
|
|
173
|
-
type LanguageDetectorEventMap = {
|
|
174
|
-
'language.switched': [LanguageSwitchEvent];
|
|
175
|
-
};
|
|
176
|
-
/**
|
|
177
|
-
* Hinglish language switch detector.
|
|
178
|
-
*
|
|
179
|
-
* Analyzes STT transcripts word-by-word in realtime.
|
|
180
|
-
* Emits 'language.switched' events when a significant script change is detected.
|
|
181
|
-
*
|
|
182
|
-
* @example
|
|
183
|
-
* ```ts
|
|
184
|
-
* const detector = new LanguageSwitchDetector('en-IN')
|
|
185
|
-
* detector.on('language.switched', ({ from, to, transcript }) => {
|
|
186
|
-
* console.log(`Language switched: ${from} → ${to} in: "${transcript}"`)
|
|
187
|
-
* })
|
|
188
|
-
*
|
|
189
|
-
* // Call on every STT final result
|
|
190
|
-
* detector.analyze('main yeh kaam kal karonga I promise')
|
|
191
|
-
* ```
|
|
192
|
-
*/
|
|
193
|
-
declare class LanguageSwitchDetector extends EventEmitter<LanguageDetectorEventMap> {
|
|
194
|
-
private currentLanguage;
|
|
195
|
-
private readonly primaryLanguage;
|
|
196
|
-
/** Rolling window of recent language classifications for smoothing. */
|
|
197
|
-
private recentClassifications;
|
|
198
|
-
private readonly windowSize;
|
|
199
|
-
constructor(primaryLanguage?: LanguageCode);
|
|
200
|
-
/**
|
|
201
|
-
* Analyze a transcript for language switches.
|
|
202
|
-
* Should be called on every STT final result.
|
|
203
|
-
*
|
|
204
|
-
* @param transcript The transcribed text to analyze
|
|
205
|
-
* @returns Detected language of the transcript
|
|
206
|
-
*/
|
|
207
|
-
analyze(transcript: string): LanguageCode;
|
|
208
|
-
/**
|
|
209
|
-
* Analyze a transcript and return per-word language classification.
|
|
210
|
-
* Useful for word-level Hinglish mixing visualization.
|
|
211
|
-
*
|
|
212
|
-
* @param transcript Text to analyze
|
|
213
|
-
* @returns Array of { word, language } pairs
|
|
214
|
-
*/
|
|
215
|
-
analyzeWords(transcript: string): Array<{
|
|
216
|
-
word: string;
|
|
217
|
-
language: LanguageCode;
|
|
218
|
-
}>;
|
|
219
|
-
/** Reset to primary language (e.g., on new call). */
|
|
220
|
-
reset(): void;
|
|
221
|
-
/** Current detected language. */
|
|
222
|
-
get language(): LanguageCode;
|
|
223
|
-
private tokenize;
|
|
224
|
-
private classifyWord;
|
|
225
|
-
private classifySegment;
|
|
226
|
-
private computeConfidence;
|
|
227
|
-
private smoothedLanguage;
|
|
228
|
-
}
|
|
229
|
-
/**
|
|
230
|
-
* Detect whether a transcript contains mixed Hindi+English (Hinglish).
|
|
231
|
-
* Stateless convenience function for one-shot analysis.
|
|
232
|
-
*
|
|
233
|
-
* @param transcript Text to analyze
|
|
234
|
-
* @returns True if both Devanagari and Latin characters are present
|
|
235
|
-
*
|
|
236
|
-
* @example
|
|
237
|
-
* ```ts
|
|
238
|
-
* isHinglish('main kal office jaaunga') // true
|
|
239
|
-
* isHinglish('I will go to the office') // false
|
|
240
|
-
* isHinglish('मैं कल ऑफिस जाऊंगा') // false (pure Hindi)
|
|
241
|
-
* ```
|
|
242
|
-
*/
|
|
243
|
-
declare function isInglish(transcript: string): boolean;
|
|
244
|
-
|
|
245
|
-
/**
|
|
246
|
-
* @voice-kit/core — STT factory
|
|
247
|
-
*
|
|
248
|
-
* createSTT() is the ONLY public API for speech-to-text.
|
|
249
|
-
* Never instantiate provider classes directly.
|
|
250
|
-
*/
|
|
251
|
-
|
|
252
|
-
/**
|
|
253
|
-
* Create an STT provider instance. This is the ONLY public API for STT.
|
|
254
|
-
*
|
|
255
|
-
* Provider selection guide:
|
|
256
|
-
* - 'deepgram' → Default. Realtime streaming, best latency, supports en-IN + Indic
|
|
257
|
-
* - 'sarvam' → Best accuracy for pure Indic languages (hi-IN, ta-IN, kn-IN, te-IN, mr-IN)
|
|
258
|
-
* - 'assemblyai' → Best for long-form recordings (post-call analysis)
|
|
259
|
-
* - 'whisper' → Fallback batch transcription, broad language support
|
|
260
|
-
*
|
|
261
|
-
* @example
|
|
262
|
-
* ```ts
|
|
263
|
-
* // Realtime English (India) — default
|
|
264
|
-
* const stt = createSTT('deepgram', { language: 'en-IN' })
|
|
265
|
-
*
|
|
266
|
-
* // Realtime Hindi
|
|
267
|
-
* const stt = createSTT('deepgram', { language: 'hi-IN' })
|
|
268
|
-
*
|
|
269
|
-
* // Best Indic accuracy
|
|
270
|
-
* const stt = createSTT('sarvam', { language: 'ta-IN' })
|
|
271
|
-
*
|
|
272
|
-
* // Post-call recording
|
|
273
|
-
* const stt = createSTT('assemblyai', { wordTimestamps: true })
|
|
274
|
-
* ```
|
|
275
|
-
*/
|
|
276
|
-
declare function createSTT(provider: 'deepgram' | 'whisper' | 'assemblyai' | 'sarvam', config?: STTConfig): STTProvider;
|
|
277
|
-
|
|
278
|
-
/**
|
|
279
|
-
* @voice-kit/core — OpenAI Whisper STT Provider (batch fallback)
|
|
280
|
-
*
|
|
281
|
-
* Uses @ai-sdk/openai for batch transcription. Does not support streaming.
|
|
282
|
-
* Use as fallback for long-form audio or when Deepgram is unavailable.
|
|
283
|
-
*/
|
|
284
|
-
|
|
285
|
-
/**
|
|
286
|
-
* OpenAI Whisper STT provider. Batch-only — does not support streaming.
|
|
287
|
-
* @internal — obtained via createSTT('whisper', config)
|
|
288
|
-
*/
|
|
289
|
-
declare class WhisperSTTProvider implements STTProvider {
|
|
290
|
-
readonly name = "whisper";
|
|
291
|
-
readonly supportsStreaming = false;
|
|
292
|
-
readonly supportedLanguages: string[];
|
|
293
|
-
private readonly config;
|
|
294
|
-
constructor(config: STTConfig);
|
|
295
|
-
/**
|
|
296
|
-
* Streaming not supported by Whisper. Collects all audio then transcribes.
|
|
297
|
-
* For realtime use, use createSTT('deepgram') instead.
|
|
298
|
-
*/
|
|
299
|
-
transcribeStream(audio: AsyncIterable<Buffer>): AsyncIterable<STTResult>;
|
|
300
|
-
/**
|
|
301
|
-
* Transcribe a complete audio buffer via Whisper.
|
|
302
|
-
*
|
|
303
|
-
* @param audio WAV or PCM buffer
|
|
304
|
-
*/
|
|
305
|
-
transcribeBatch(audio: Buffer): Promise<STTResult>;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
export { AssemblyAISTTProvider, DeepgramSTTProvider, type LanguageCode, LanguageSwitchDetector, type LanguageSwitchEvent, SarvamSTTProvider, WhisperSTTProvider, createSTT, isInglish };
|