even-toolkit 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/stt/engine.ts ADDED
@@ -0,0 +1,274 @@
1
+ import type {
2
+ STTEngineConfig,
3
+ STTProvider,
4
+ STTState,
5
+ STTTranscript,
6
+ STTError,
7
+ AudioSource,
8
+ } from './types';
9
+ import { createProvider } from './registry';
10
+ import { MicrophoneSource } from './sources/microphone';
11
+ import { resample } from './audio/resample';
12
+ import { createVAD } from './audio/vad';
13
+ import { createAudioBuffer } from './audio/buffer';
14
+
15
+ /**
16
+ * STTEngine orchestrates source -> processing -> provider.
17
+ *
18
+ * For `web-speech` provider: skips audio source (it handles its own mic).
19
+ * For other providers: starts audio source, pipes through optional resample
20
+ * and VAD, buffers audio, and calls provider.transcribe() on speech end.
21
+ */
22
+ export class STTEngine {
23
+ private config: STTEngineConfig;
24
+ private provider: STTProvider | null = null;
25
+ private source: AudioSource | null = null;
26
+ private sourceUnsub: (() => void) | null = null;
27
+
28
+ private transcriptListeners: Array<(t: STTTranscript) => void> = [];
29
+ private stateListeners: Array<(s: STTState) => void> = [];
30
+ private errorListeners: Array<(e: STTError) => void> = [];
31
+
32
+ private providerUnsubs: Array<() => void> = [];
33
+
34
+ private vad: ReturnType<typeof createVAD> | null = null;
35
+ private buffer: ReturnType<typeof createAudioBuffer> | null = null;
36
+ private targetSampleRate: number;
37
+
38
+ constructor(config: STTEngineConfig) {
39
+ this.config = config;
40
+ this.targetSampleRate = config.sampleRate ?? 16000;
41
+ }
42
+
43
+ // ── Event subscriptions ──
44
+
45
+ onTranscript(cb: (t: STTTranscript) => void): () => void {
46
+ this.transcriptListeners.push(cb);
47
+ return () => {
48
+ const idx = this.transcriptListeners.indexOf(cb);
49
+ if (idx >= 0) this.transcriptListeners.splice(idx, 1);
50
+ };
51
+ }
52
+
53
+ onStateChange(cb: (s: STTState) => void): () => void {
54
+ this.stateListeners.push(cb);
55
+ return () => {
56
+ const idx = this.stateListeners.indexOf(cb);
57
+ if (idx >= 0) this.stateListeners.splice(idx, 1);
58
+ };
59
+ }
60
+
61
+ onError(cb: (e: STTError) => void): () => void {
62
+ this.errorListeners.push(cb);
63
+ return () => {
64
+ const idx = this.errorListeners.indexOf(cb);
65
+ if (idx >= 0) this.errorListeners.splice(idx, 1);
66
+ };
67
+ }
68
+
69
+ private emitTranscript(t: STTTranscript): void {
70
+ for (const cb of this.transcriptListeners) cb(t);
71
+ }
72
+
73
+ private emitState(s: STTState): void {
74
+ for (const cb of this.stateListeners) cb(s);
75
+ }
76
+
77
+ private emitError(e: STTError): void {
78
+ for (const cb of this.errorListeners) cb(e);
79
+ }
80
+
81
+ // ── Lifecycle ──
82
+
83
+ async start(): Promise<void> {
84
+ this.emitState('loading');
85
+
86
+ try {
87
+ // Create and init provider
88
+ this.provider = await createProvider(this.config.provider);
89
+ this.subscribeProvider(this.provider);
90
+
91
+ await this.provider.init({
92
+ language: this.config.language,
93
+ mode: this.config.mode,
94
+ apiKey: this.config.apiKey,
95
+ modelId: this.config.modelId,
96
+ continuous: this.config.continuous,
97
+ vadEnabled: typeof this.config.vad === 'boolean' ? this.config.vad : !!this.config.vad,
98
+ vadSilenceMs: typeof this.config.vad === 'object' ? this.config.vad.silenceMs : undefined,
99
+ sampleRate: this.targetSampleRate,
100
+ });
101
+
102
+ // web-speech handles its own microphone
103
+ if (this.config.provider === 'web-speech') {
104
+ this.provider.start();
105
+ return;
106
+ }
107
+
108
+ // Set up audio source
109
+ this.source = this.resolveSource();
110
+ await this.source.start();
111
+
112
+ // Set up VAD if enabled
113
+ if (this.config.vad) {
114
+ const vadConfig = typeof this.config.vad === 'object' ? {
115
+ silenceThresholdMs: this.config.vad.silenceMs,
116
+ speechThresholdDb: this.config.vad.thresholdDb,
117
+ } : undefined;
118
+ this.vad = createVAD(vadConfig);
119
+ }
120
+
121
+ // Set up audio buffer for batch mode
122
+ this.buffer = createAudioBuffer({ sampleRate: this.targetSampleRate });
123
+
124
+ // Wire audio pipeline
125
+ this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
126
+ this.processAudio(pcm, sampleRate);
127
+ });
128
+
129
+ this.provider.start();
130
+ } catch (err) {
131
+ const error: STTError = {
132
+ code: 'unknown',
133
+ message: err instanceof Error ? err.message : String(err),
134
+ provider: this.config.provider,
135
+ };
136
+ this.emitError(error);
137
+ this.emitState('error');
138
+
139
+ // Attempt fallback
140
+ if (this.config.fallback) {
141
+ await this.switchToFallback();
142
+ }
143
+ }
144
+ }
145
+
146
+ stop(): void {
147
+ this.provider?.stop();
148
+ this.sourceUnsub?.();
149
+ this.sourceUnsub = null;
150
+ this.source?.stop();
151
+ this.vad?.reset();
152
+ this.buffer?.clear();
153
+ }
154
+
155
+ abort(): void {
156
+ this.provider?.abort();
157
+ this.sourceUnsub?.();
158
+ this.sourceUnsub = null;
159
+ this.source?.stop();
160
+ this.vad?.reset();
161
+ this.buffer?.clear();
162
+ }
163
+
164
+ dispose(): void {
165
+ this.abort();
166
+ for (const unsub of this.providerUnsubs) unsub();
167
+ this.providerUnsubs.length = 0;
168
+ this.provider?.dispose();
169
+ this.provider = null;
170
+ this.source?.dispose();
171
+ this.source = null;
172
+ this.transcriptListeners.length = 0;
173
+ this.stateListeners.length = 0;
174
+ this.errorListeners.length = 0;
175
+ }
176
+
177
+ // ── Internal ──
178
+
179
+ private resolveSource(): AudioSource {
180
+ const src = this.config.source;
181
+ if (!src || src === 'microphone') {
182
+ return new MicrophoneSource();
183
+ }
184
+ if (src === 'glass-bridge') {
185
+ throw new Error(
186
+ 'glass-bridge source requires a GlassBridgeSource instance. ' +
187
+ 'Pass an AudioSource object directly via config.source.'
188
+ );
189
+ }
190
+ // Custom AudioSource instance
191
+ return src;
192
+ }
193
+
194
+ private processAudio(pcm: Float32Array, sampleRate: number): void {
195
+ // Resample if needed
196
+ let samples = sampleRate !== this.targetSampleRate
197
+ ? resample(pcm, sampleRate, this.targetSampleRate)
198
+ : pcm;
199
+
200
+ if (!this.buffer) return;
201
+
202
+ // If VAD is enabled, check for speech boundaries
203
+ if (this.vad) {
204
+ const result = this.vad.process(samples);
205
+
206
+ if (result.isSpeech || result.speechEnded) {
207
+ this.buffer.append(samples);
208
+ }
209
+
210
+ if (result.speechEnded) {
211
+ this.flushBuffer();
212
+ }
213
+ } else {
214
+ // No VAD: accumulate everything, provider handles streaming
215
+ this.buffer.append(samples);
216
+ }
217
+ }
218
+
219
+ private async flushBuffer(): Promise<void> {
220
+ if (!this.buffer || !this.provider) return;
221
+
222
+ const audio = this.buffer.getAll();
223
+ this.buffer.clear();
224
+
225
+ if (audio.length === 0) return;
226
+
227
+ // If provider supports batch transcription
228
+ if (this.provider.transcribe) {
229
+ try {
230
+ const transcript = await this.provider.transcribe(audio, this.targetSampleRate);
231
+ this.emitTranscript(transcript);
232
+ } catch (err) {
233
+ this.emitError({
234
+ code: 'unknown',
235
+ message: err instanceof Error ? err.message : String(err),
236
+ provider: this.config.provider,
237
+ });
238
+ }
239
+ }
240
+ }
241
+
242
+ private subscribeProvider(provider: STTProvider): void {
243
+ this.providerUnsubs.push(
244
+ provider.onTranscript((t) => this.emitTranscript(t)),
245
+ provider.onStateChange((s) => this.emitState(s)),
246
+ provider.onError((e) => {
247
+ this.emitError(e);
248
+ if (this.config.fallback) {
249
+ this.switchToFallback();
250
+ }
251
+ }),
252
+ );
253
+ }
254
+
255
+ private async switchToFallback(): Promise<void> {
256
+ if (!this.config.fallback) return;
257
+
258
+ // Clean up current provider
259
+ for (const unsub of this.providerUnsubs) unsub();
260
+ this.providerUnsubs.length = 0;
261
+ this.provider?.dispose();
262
+ this.provider = null;
263
+
264
+ // Switch to fallback
265
+ const fallbackType = this.config.fallback;
266
+ this.config = { ...this.config, provider: fallbackType, fallback: undefined };
267
+
268
+ try {
269
+ await this.start();
270
+ } catch {
271
+ // Fallback also failed — nothing more to do
272
+ }
273
+ }
274
+ }
package/stt/i18n.ts ADDED
@@ -0,0 +1,39 @@
1
+ /** Language mapping utilities for STT providers */
2
+
3
+ /** BCP 47 → Whisper ISO 639-1 */
4
+ export function toWhisperLang(bcp47: string): string {
5
+ return bcp47.split('-')[0].toLowerCase();
6
+ }
7
+
8
+ /** Short code → BCP 47 (best guess) */
9
+ export function toWebSpeechLang(lang: string): string {
10
+ const map: Record<string, string> = {
11
+ en: 'en-US', it: 'it-IT', es: 'es-ES', fr: 'fr-FR',
12
+ de: 'de-DE', pt: 'pt-BR', zh: 'zh-CN', ja: 'ja-JP',
13
+ ko: 'ko-KR', ru: 'ru-RU', ar: 'ar-SA', hi: 'hi-IN',
14
+ };
15
+ if (lang.includes('-')) return lang;
16
+ return map[lang.toLowerCase()] ?? `${lang}-${lang.toUpperCase()}`;
17
+ }
18
+
19
+ export interface SupportedLanguage {
20
+ code: string;
21
+ name: string;
22
+ whisper: boolean;
23
+ webSpeech: boolean;
24
+ }
25
+
26
+ export const SUPPORTED_LANGUAGES: SupportedLanguage[] = [
27
+ { code: 'en-US', name: 'English', whisper: true, webSpeech: true },
28
+ { code: 'it-IT', name: 'Italian', whisper: true, webSpeech: true },
29
+ { code: 'es-ES', name: 'Spanish', whisper: true, webSpeech: true },
30
+ { code: 'fr-FR', name: 'French', whisper: true, webSpeech: true },
31
+ { code: 'de-DE', name: 'German', whisper: true, webSpeech: true },
32
+ { code: 'pt-BR', name: 'Portuguese', whisper: true, webSpeech: true },
33
+ { code: 'zh-CN', name: 'Chinese', whisper: true, webSpeech: true },
34
+ { code: 'ja-JP', name: 'Japanese', whisper: true, webSpeech: true },
35
+ { code: 'ko-KR', name: 'Korean', whisper: true, webSpeech: true },
36
+ { code: 'ru-RU', name: 'Russian', whisper: true, webSpeech: true },
37
+ { code: 'ar-SA', name: 'Arabic', whisper: true, webSpeech: true },
38
+ { code: 'hi-IN', name: 'Hindi', whisper: true, webSpeech: true },
39
+ ];
package/stt/index.ts ADDED
@@ -0,0 +1,10 @@
1
+ export * from './types';
2
+ export { STTEngine } from './engine';
3
+ export { createProvider } from './registry';
4
+ export * from './i18n';
5
+ export { MicrophoneSource } from './sources/microphone';
6
+ export { GlassBridgeSource } from './sources/glass-bridge';
7
+ export * from './audio/pcm-utils';
8
+ export * from './audio/resample';
9
+ export * from './audio/vad';
10
+ export { createAudioBuffer } from './audio/buffer';
@@ -0,0 +1,178 @@
1
+ import type {
2
+ STTProvider,
3
+ STTProviderConfig,
4
+ STTMode,
5
+ STTState,
6
+ STTTranscript,
7
+ STTError,
8
+ } from '../types';
9
+
10
+ interface DeepgramResult {
11
+ channel?: {
12
+ alternatives?: Array<{
13
+ transcript?: string;
14
+ confidence?: number;
15
+ }>;
16
+ };
17
+ is_final?: boolean;
18
+ speech_final?: boolean;
19
+ }
20
+
21
+ export class DeepgramProvider implements STTProvider {
22
+ readonly type = 'deepgram' as const;
23
+ readonly supportedModes: STTMode[] = ['streaming'];
24
+
25
+ private _state: STTState = 'idle';
26
+ private apiKey = '';
27
+ private language = 'en';
28
+ private modelId = 'nova-2';
29
+ private ws: WebSocket | null = null;
30
+
31
+ private transcriptCbs: Array<(t: STTTranscript) => void> = [];
32
+ private stateCbs: Array<(s: STTState) => void> = [];
33
+ private errorCbs: Array<(e: STTError) => void> = [];
34
+
35
+ get state(): STTState {
36
+ return this._state;
37
+ }
38
+
39
+ async init(config: STTProviderConfig): Promise<void> {
40
+ this.apiKey = config.apiKey ?? '';
41
+ this.language = config.language ?? 'en';
42
+ this.modelId = config.modelId ?? 'nova-2';
43
+
44
+ if (!this.apiKey) {
45
+ const err: STTError = { code: 'not-allowed', message: 'Deepgram API key is required', provider: this.type };
46
+ this.emitError(err);
47
+ throw new Error(err.message);
48
+ }
49
+ }
50
+
51
+ start(): void {
52
+ if (this.ws) {
53
+ this.closeSocket();
54
+ }
55
+
56
+ const params = new URLSearchParams({
57
+ model: this.modelId,
58
+ language: this.language,
59
+ interim_results: 'true',
60
+ punctuate: 'true',
61
+ encoding: 'linear16',
62
+ sample_rate: '16000',
63
+ });
64
+
65
+ const url = `wss://api.deepgram.com/v1/listen?${params.toString()}`;
66
+
67
+ this.ws = new WebSocket(url, ['token', this.apiKey]);
68
+ this.ws.binaryType = 'arraybuffer';
69
+
70
+ this.ws.onopen = () => {
71
+ this.setState('listening');
72
+ };
73
+
74
+ this.ws.onmessage = (event: MessageEvent) => {
75
+ try {
76
+ const data = JSON.parse(event.data as string) as DeepgramResult;
77
+ const alt = data.channel?.alternatives?.[0];
78
+ if (!alt?.transcript) return;
79
+
80
+ const transcript: STTTranscript = {
81
+ text: alt.transcript,
82
+ isFinal: data.is_final ?? false,
83
+ confidence: alt.confidence ?? 0,
84
+ timestamp: Date.now(),
85
+ };
86
+ this.emitTranscript(transcript);
87
+ } catch {
88
+ // Non-JSON message, ignore
89
+ }
90
+ };
91
+
92
+ this.ws.onerror = () => {
93
+ const err: STTError = {
94
+ code: 'network',
95
+ message: 'Deepgram WebSocket error',
96
+ provider: this.type,
97
+ };
98
+ this.emitError(err);
99
+ this.setState('error');
100
+ };
101
+
102
+ this.ws.onclose = () => {
103
+ this.ws = null;
104
+ if (this._state === 'listening') {
105
+ this.setState('idle');
106
+ }
107
+ };
108
+ }
109
+
110
+ /** Send raw audio data (PCM Int16 or Float32 as ArrayBuffer) to the Deepgram stream. */
111
+ sendAudio(data: ArrayBuffer | Int16Array | Float32Array): void {
112
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
113
+
114
+ if (data instanceof ArrayBuffer) {
115
+ this.ws.send(data);
116
+ } else {
117
+ this.ws.send(data.buffer);
118
+ }
119
+ }
120
+
121
+ stop(): void {
122
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) {
123
+ // Send close message per Deepgram protocol
124
+ this.ws.send(JSON.stringify({ type: 'CloseStream' }));
125
+ }
126
+ this.closeSocket();
127
+ }
128
+
129
+ abort(): void {
130
+ this.closeSocket();
131
+ }
132
+
133
+ dispose(): void {
134
+ this.closeSocket();
135
+ this.transcriptCbs = [];
136
+ this.stateCbs = [];
137
+ this.errorCbs = [];
138
+ }
139
+
140
+ onTranscript(cb: (t: STTTranscript) => void): () => void {
141
+ this.transcriptCbs.push(cb);
142
+ return () => { this.transcriptCbs = this.transcriptCbs.filter((c) => c !== cb); };
143
+ }
144
+
145
+ onStateChange(cb: (s: STTState) => void): () => void {
146
+ this.stateCbs.push(cb);
147
+ return () => { this.stateCbs = this.stateCbs.filter((c) => c !== cb); };
148
+ }
149
+
150
+ onError(cb: (e: STTError) => void): () => void {
151
+ this.errorCbs.push(cb);
152
+ return () => { this.errorCbs = this.errorCbs.filter((c) => c !== cb); };
153
+ }
154
+
155
+ // ── Private ──
156
+
157
+ private closeSocket(): void {
158
+ if (this.ws) {
159
+ try { this.ws.close(); } catch { /* ignore */ }
160
+ this.ws = null;
161
+ }
162
+ this.setState('idle');
163
+ }
164
+
165
+ private setState(s: STTState): void {
166
+ if (this._state === s) return;
167
+ this._state = s;
168
+ for (const cb of this.stateCbs) cb(s);
169
+ }
170
+
171
+ private emitTranscript(t: STTTranscript): void {
172
+ for (const cb of this.transcriptCbs) cb(t);
173
+ }
174
+
175
+ private emitError(e: STTError): void {
176
+ for (const cb of this.errorCbs) cb(e);
177
+ }
178
+ }