even-toolkit 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ import type {
2
+ STTProvider,
3
+ STTProviderConfig,
4
+ STTMode,
5
+ STTState,
6
+ STTTranscript,
7
+ STTError,
8
+ } from '../types';
9
+
10
+ // ── Inline SpeechRecognition types (not in standard TS lib) ──
11
+
12
+ interface SpeechRecognitionEvent extends Event {
13
+ readonly results: SpeechRecognitionResultList;
14
+ readonly resultIndex: number;
15
+ }
16
+
17
+ interface SpeechRecognitionErrorEvent extends Event {
18
+ readonly error: string;
19
+ readonly message: string;
20
+ }
21
+
22
+ interface SpeechRecognitionResultList {
23
+ readonly length: number;
24
+ item(index: number): SpeechRecognitionResult;
25
+ [index: number]: SpeechRecognitionResult;
26
+ }
27
+
28
+ interface SpeechRecognitionResult {
29
+ readonly length: number;
30
+ readonly isFinal: boolean;
31
+ item(index: number): SpeechRecognitionAlternative;
32
+ [index: number]: SpeechRecognitionAlternative;
33
+ }
34
+
35
+ interface SpeechRecognitionAlternative {
36
+ readonly transcript: string;
37
+ readonly confidence: number;
38
+ }
39
+
40
+ interface SpeechRecognitionInstance {
41
+ continuous: boolean;
42
+ interimResults: boolean;
43
+ lang: string;
44
+ onresult: ((event: SpeechRecognitionEvent) => void) | null;
45
+ onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
46
+ onstart: (() => void) | null;
47
+ onend: (() => void) | null;
48
+ start(): void;
49
+ stop(): void;
50
+ abort(): void;
51
+ }
52
+
53
+ type SpeechRecognitionConstructor = new () => SpeechRecognitionInstance;
54
+
55
+ function getSpeechRecognitionCtor(): SpeechRecognitionConstructor | null {
56
+ if (typeof window === 'undefined') return null;
57
+ const w = window as unknown as Record<string, unknown>;
58
+ return (w.SpeechRecognition ?? w.webkitSpeechRecognition) as SpeechRecognitionConstructor | null;
59
+ }
60
+
61
+ // ── Provider ──
62
+
63
+ export class WebSpeechProvider implements STTProvider {
64
+ readonly type = 'web-speech' as const;
65
+ readonly supportedModes: STTMode[] = ['streaming'];
66
+
67
+ private _state: STTState = 'idle';
68
+ private recognition: SpeechRecognitionInstance | null = null;
69
+ private config: STTProviderConfig = {};
70
+ private stopping = false;
71
+
72
+ private transcriptCbs: Array<(t: STTTranscript) => void> = [];
73
+ private stateCbs: Array<(s: STTState) => void> = [];
74
+ private errorCbs: Array<(e: STTError) => void> = [];
75
+
76
+ get state(): STTState {
77
+ return this._state;
78
+ }
79
+
80
+ async init(config: STTProviderConfig): Promise<void> {
81
+ const Ctor = getSpeechRecognitionCtor();
82
+ if (!Ctor) {
83
+ this.emitError({ code: 'unsupported', message: 'SpeechRecognition not available in this browser', provider: this.type });
84
+ throw new Error('SpeechRecognition not supported');
85
+ }
86
+ this.config = config;
87
+ }
88
+
89
+ start(): void {
90
+ const Ctor = getSpeechRecognitionCtor();
91
+ if (!Ctor) {
92
+ this.emitError({ code: 'unsupported', message: 'SpeechRecognition not available', provider: this.type });
93
+ return;
94
+ }
95
+
96
+ // Tear down previous instance if any
97
+ if (this.recognition) {
98
+ try { this.recognition.abort(); } catch { /* ignore */ }
99
+ }
100
+
101
+ this.stopping = false;
102
+ const recognition = new Ctor();
103
+ recognition.continuous = this.config.continuous ?? true;
104
+ recognition.interimResults = true;
105
+ recognition.lang = this.config.language ?? 'en-US';
106
+
107
+ recognition.onstart = () => {
108
+ this.setState('listening');
109
+ };
110
+
111
+ recognition.onresult = (event: SpeechRecognitionEvent) => {
112
+ for (let i = event.resultIndex; i < event.results.length; i++) {
113
+ const result = event.results[i];
114
+ if (!result?.[0]) continue;
115
+
116
+ const transcript: STTTranscript = {
117
+ text: result[0].transcript,
118
+ isFinal: result.isFinal,
119
+ confidence: result[0].confidence ?? 0,
120
+ timestamp: Date.now(),
121
+ };
122
+ this.emitTranscript(transcript);
123
+ }
124
+ };
125
+
126
+ recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
127
+ // Suppress no-speech and aborted-while-stopping
128
+ if (event.error === 'no-speech') return;
129
+ if (event.error === 'aborted' && this.stopping) return;
130
+
131
+ const code = mapErrorCode(event.error);
132
+ this.emitError({ code, message: event.message || event.error, provider: this.type });
133
+
134
+ if (code !== 'no-speech') {
135
+ this.setState('error');
136
+ }
137
+ };
138
+
139
+ recognition.onend = () => {
140
+ this.recognition = null;
141
+ this.setState('idle');
142
+ };
143
+
144
+ this.recognition = recognition;
145
+ recognition.start();
146
+ }
147
+
148
+ stop(): void {
149
+ this.stopping = true;
150
+ if (this.recognition) {
151
+ this.recognition.stop();
152
+ }
153
+ }
154
+
155
+ abort(): void {
156
+ this.stopping = true;
157
+ if (this.recognition) {
158
+ this.recognition.abort();
159
+ }
160
+ }
161
+
162
+ dispose(): void {
163
+ this.abort();
164
+ this.transcriptCbs = [];
165
+ this.stateCbs = [];
166
+ this.errorCbs = [];
167
+ }
168
+
169
+ onTranscript(cb: (t: STTTranscript) => void): () => void {
170
+ this.transcriptCbs.push(cb);
171
+ return () => {
172
+ this.transcriptCbs = this.transcriptCbs.filter((c) => c !== cb);
173
+ };
174
+ }
175
+
176
+ onStateChange(cb: (s: STTState) => void): () => void {
177
+ this.stateCbs.push(cb);
178
+ return () => {
179
+ this.stateCbs = this.stateCbs.filter((c) => c !== cb);
180
+ };
181
+ }
182
+
183
+ onError(cb: (e: STTError) => void): () => void {
184
+ this.errorCbs.push(cb);
185
+ return () => {
186
+ this.errorCbs = this.errorCbs.filter((c) => c !== cb);
187
+ };
188
+ }
189
+
190
+ // ── Private helpers ──
191
+
192
+ private setState(s: STTState): void {
193
+ if (this._state === s) return;
194
+ this._state = s;
195
+ for (const cb of this.stateCbs) cb(s);
196
+ }
197
+
198
+ private emitTranscript(t: STTTranscript): void {
199
+ for (const cb of this.transcriptCbs) cb(t);
200
+ }
201
+
202
+ private emitError(e: STTError): void {
203
+ for (const cb of this.errorCbs) cb(e);
204
+ }
205
+ }
206
+
207
+ function mapErrorCode(error: string): STTError['code'] {
208
+ switch (error) {
209
+ case 'not-allowed':
210
+ case 'service-not-allowed':
211
+ return 'not-allowed';
212
+ case 'no-speech':
213
+ return 'no-speech';
214
+ case 'network':
215
+ return 'network';
216
+ case 'aborted':
217
+ return 'aborted';
218
+ default:
219
+ return 'unknown';
220
+ }
221
+ }
@@ -0,0 +1,146 @@
1
+ import type {
2
+ STTProvider,
3
+ STTProviderConfig,
4
+ STTMode,
5
+ STTState,
6
+ STTTranscript,
7
+ STTError,
8
+ } from '../types';
9
+ import { float32ToWav } from '../audio/pcm-utils';
10
+
11
+ const WHISPER_API_URL = 'https://api.openai.com/v1/audio/transcriptions';
12
+
13
+ export class WhisperApiProvider implements STTProvider {
14
+ readonly type = 'whisper-api' as const;
15
+ readonly supportedModes: STTMode[] = ['batch'];
16
+
17
+ private _state: STTState = 'idle';
18
+ private apiKey = '';
19
+ private language = 'en';
20
+ private modelId = 'whisper-1';
21
+
22
+ private transcriptCbs: Array<(t: STTTranscript) => void> = [];
23
+ private stateCbs: Array<(s: STTState) => void> = [];
24
+ private errorCbs: Array<(e: STTError) => void> = [];
25
+
26
+ get state(): STTState {
27
+ return this._state;
28
+ }
29
+
30
+ async init(config: STTProviderConfig): Promise<void> {
31
+ this.apiKey = config.apiKey ?? '';
32
+ this.language = config.language ?? 'en';
33
+ this.modelId = config.modelId ?? 'whisper-1';
34
+
35
+ if (!this.apiKey) {
36
+ const err: STTError = { code: 'not-allowed', message: 'API key is required', provider: this.type };
37
+ this.emitError(err);
38
+ throw new Error(err.message);
39
+ }
40
+ }
41
+
42
+ start(): void {
43
+ // Batch mode — no-op; audio is fed via transcribe()
44
+ }
45
+
46
+ stop(): void {
47
+ this.setState('idle');
48
+ }
49
+
50
+ abort(): void {
51
+ this.setState('idle');
52
+ }
53
+
54
+ dispose(): void {
55
+ this.transcriptCbs = [];
56
+ this.stateCbs = [];
57
+ this.errorCbs = [];
58
+ this.setState('idle');
59
+ }
60
+
61
+ async transcribe(audio: Float32Array, sampleRate: number): Promise<STTTranscript> {
62
+ this.setState('processing');
63
+
64
+ try {
65
+ const wavBlob = float32ToWav(audio, sampleRate);
66
+
67
+ const formData = new FormData();
68
+ formData.append('file', wavBlob, 'audio.wav');
69
+ formData.append('model', this.modelId);
70
+ formData.append('language', this.language);
71
+
72
+ const response = await fetch(WHISPER_API_URL, {
73
+ method: 'POST',
74
+ headers: {
75
+ Authorization: `Bearer ${this.apiKey}`,
76
+ },
77
+ body: formData,
78
+ });
79
+
80
+ if (!response.ok) {
81
+ const code: STTError['code'] = response.status === 401 ? 'not-allowed' : 'network';
82
+ const message = `Whisper API error: ${response.status} ${response.statusText}`;
83
+ const err: STTError = { code, message, provider: this.type };
84
+ this.emitError(err);
85
+ this.setState('error');
86
+ throw new Error(message);
87
+ }
88
+
89
+ const json = (await response.json()) as { text: string };
90
+
91
+ const transcript: STTTranscript = {
92
+ text: json.text,
93
+ isFinal: true,
94
+ confidence: 1,
95
+ timestamp: Date.now(),
96
+ };
97
+
98
+ this.emitTranscript(transcript);
99
+ this.setState('idle');
100
+ return transcript;
101
+ } catch (err: any) {
102
+ // If already handled (HTTP error), just rethrow
103
+ if (this._state === 'error') throw err;
104
+
105
+ const sttError: STTError = {
106
+ code: 'network',
107
+ message: err?.message ?? 'Network error',
108
+ provider: this.type,
109
+ };
110
+ this.emitError(sttError);
111
+ this.setState('error');
112
+ throw err;
113
+ }
114
+ }
115
+
116
+ onTranscript(cb: (t: STTTranscript) => void): () => void {
117
+ this.transcriptCbs.push(cb);
118
+ return () => { this.transcriptCbs = this.transcriptCbs.filter((c) => c !== cb); };
119
+ }
120
+
121
+ onStateChange(cb: (s: STTState) => void): () => void {
122
+ this.stateCbs.push(cb);
123
+ return () => { this.stateCbs = this.stateCbs.filter((c) => c !== cb); };
124
+ }
125
+
126
+ onError(cb: (e: STTError) => void): () => void {
127
+ this.errorCbs.push(cb);
128
+ return () => { this.errorCbs = this.errorCbs.filter((c) => c !== cb); };
129
+ }
130
+
131
+ // ── Private ──
132
+
133
+ private setState(s: STTState): void {
134
+ if (this._state === s) return;
135
+ this._state = s;
136
+ for (const cb of this.stateCbs) cb(s);
137
+ }
138
+
139
+ private emitTranscript(t: STTTranscript): void {
140
+ for (const cb of this.transcriptCbs) cb(t);
141
+ }
142
+
143
+ private emitError(e: STTError): void {
144
+ for (const cb of this.errorCbs) cb(e);
145
+ }
146
+ }
@@ -0,0 +1,226 @@
1
+ import type {
2
+ STTProvider,
3
+ STTProviderConfig,
4
+ STTMode,
5
+ STTState,
6
+ STTTranscript,
7
+ STTError,
8
+ } from '../../types';
9
+
10
+ interface WorkerMessage {
11
+ type: 'init' | 'transcribe';
12
+ modelId?: string;
13
+ language?: string;
14
+ audio?: Float32Array;
15
+ }
16
+
17
+ interface WorkerResponse {
18
+ type: 'ready' | 'progress' | 'result' | 'error';
19
+ loaded?: number;
20
+ total?: number;
21
+ text?: string;
22
+ confidence?: number;
23
+ message?: string;
24
+ }
25
+
26
+ export class WhisperLocalProvider implements STTProvider {
27
+ readonly type = 'whisper-local' as const;
28
+ readonly supportedModes: STTMode[] = ['batch'];
29
+
30
+ private _state: STTState = 'idle';
31
+ private worker: Worker | null = null;
32
+ private config: STTProviderConfig = {};
33
+ private ready = false;
34
+
35
+ private transcriptCbs: Array<(t: STTTranscript) => void> = [];
36
+ private stateCbs: Array<(s: STTState) => void> = [];
37
+ private errorCbs: Array<(e: STTError) => void> = [];
38
+
39
+ private pendingResolve: ((t: STTTranscript) => void) | null = null;
40
+ private pendingReject: ((e: Error) => void) | null = null;
41
+ private initResolve: (() => void) | null = null;
42
+ private initReject: ((e: Error) => void) | null = null;
43
+
44
+ get state(): STTState {
45
+ return this._state;
46
+ }
47
+
48
+ async init(config: STTProviderConfig): Promise<void> {
49
+ this.config = config;
50
+ this.setState('loading');
51
+
52
+ return new Promise<void>((resolve, reject) => {
53
+ this.initResolve = resolve;
54
+ this.initReject = reject;
55
+
56
+ this.worker = new Worker(new URL('./worker.js', import.meta.url), { type: 'module' });
57
+
58
+ this.worker.onmessage = (e: MessageEvent<WorkerResponse>) => {
59
+ this.handleWorkerMessage(e.data);
60
+ };
61
+
62
+ this.worker.onerror = (err) => {
63
+ const sttError: STTError = {
64
+ code: 'model-load',
65
+ message: err.message || 'Worker error',
66
+ provider: this.type,
67
+ };
68
+ this.emitError(sttError);
69
+ this.setState('error');
70
+ if (this.initReject) {
71
+ this.initReject(new Error(sttError.message));
72
+ this.initResolve = null;
73
+ this.initReject = null;
74
+ }
75
+ };
76
+
77
+ const msg: WorkerMessage = {
78
+ type: 'init',
79
+ modelId: config.modelId,
80
+ language: config.language,
81
+ };
82
+ this.worker.postMessage(msg);
83
+ });
84
+ }
85
+
86
+ start(): void {
87
+ // Batch mode: start is a no-op; audio is fed via transcribe()
88
+ if (this.ready) {
89
+ this.setState('listening');
90
+ }
91
+ }
92
+
93
+ stop(): void {
94
+ if (this._state === 'listening' || this._state === 'processing') {
95
+ this.setState('idle');
96
+ }
97
+ }
98
+
99
+ abort(): void {
100
+ if (this.pendingReject) {
101
+ this.pendingReject(new Error('Aborted'));
102
+ this.pendingResolve = null;
103
+ this.pendingReject = null;
104
+ }
105
+ this.setState('idle');
106
+ }
107
+
108
+ dispose(): void {
109
+ this.abort();
110
+ if (this.worker) {
111
+ this.worker.terminate();
112
+ this.worker = null;
113
+ }
114
+ this.ready = false;
115
+ this.transcriptCbs = [];
116
+ this.stateCbs = [];
117
+ this.errorCbs = [];
118
+ }
119
+
120
+ async transcribe(audio: Float32Array, sampleRate: number): Promise<STTTranscript> {
121
+ if (!this.worker || !this.ready) {
122
+ throw new Error('Provider not initialized');
123
+ }
124
+
125
+ this.setState('processing');
126
+
127
+ return new Promise<STTTranscript>((resolve, reject) => {
128
+ this.pendingResolve = resolve;
129
+ this.pendingReject = reject;
130
+
131
+ this.worker!.postMessage(
132
+ { type: 'transcribe', audio, language: this.config.language ?? 'en', sampleRate },
133
+ [audio.buffer],
134
+ );
135
+ });
136
+ }
137
+
138
+ onTranscript(cb: (t: STTTranscript) => void): () => void {
139
+ this.transcriptCbs.push(cb);
140
+ return () => { this.transcriptCbs = this.transcriptCbs.filter((c) => c !== cb); };
141
+ }
142
+
143
+ onStateChange(cb: (s: STTState) => void): () => void {
144
+ this.stateCbs.push(cb);
145
+ return () => { this.stateCbs = this.stateCbs.filter((c) => c !== cb); };
146
+ }
147
+
148
+ onError(cb: (e: STTError) => void): () => void {
149
+ this.errorCbs.push(cb);
150
+ return () => { this.errorCbs = this.errorCbs.filter((c) => c !== cb); };
151
+ }
152
+
153
+ // ── Private ──
154
+
155
+ private handleWorkerMessage(data: WorkerResponse): void {
156
+ switch (data.type) {
157
+ case 'ready':
158
+ this.ready = true;
159
+ this.setState('idle');
160
+ if (this.initResolve) {
161
+ this.initResolve();
162
+ this.initResolve = null;
163
+ this.initReject = null;
164
+ }
165
+ break;
166
+
167
+ case 'progress':
168
+ // Model download progress — stay in loading state
169
+ break;
170
+
171
+ case 'result': {
172
+ const transcript: STTTranscript = {
173
+ text: data.text ?? '',
174
+ isFinal: true,
175
+ confidence: data.confidence ?? 0.95,
176
+ timestamp: Date.now(),
177
+ };
178
+ this.emitTranscript(transcript);
179
+ this.setState('listening');
180
+
181
+ if (this.pendingResolve) {
182
+ this.pendingResolve(transcript);
183
+ this.pendingResolve = null;
184
+ this.pendingReject = null;
185
+ }
186
+ break;
187
+ }
188
+
189
+ case 'error': {
190
+ const sttError: STTError = {
191
+ code: 'model-load',
192
+ message: data.message ?? 'Worker error',
193
+ provider: this.type,
194
+ };
195
+ this.emitError(sttError);
196
+ this.setState('error');
197
+
198
+ if (this.pendingReject) {
199
+ this.pendingReject(new Error(sttError.message));
200
+ this.pendingResolve = null;
201
+ this.pendingReject = null;
202
+ }
203
+ if (this.initReject) {
204
+ this.initReject(new Error(sttError.message));
205
+ this.initResolve = null;
206
+ this.initReject = null;
207
+ }
208
+ break;
209
+ }
210
+ }
211
+ }
212
+
213
+ private setState(s: STTState): void {
214
+ if (this._state === s) return;
215
+ this._state = s;
216
+ for (const cb of this.stateCbs) cb(s);
217
+ }
218
+
219
+ private emitTranscript(t: STTTranscript): void {
220
+ for (const cb of this.transcriptCbs) cb(t);
221
+ }
222
+
223
+ private emitError(e: STTError): void {
224
+ for (const cb of this.errorCbs) cb(e);
225
+ }
226
+ }
@@ -0,0 +1,40 @@
1
+ // WebWorker for Whisper local inference via @huggingface/transformers
2
+
3
+ let pipe: any = null;
4
+
5
+ self.onmessage = async (e: MessageEvent) => {
6
+ const { type } = e.data;
7
+
8
+ if (type === 'init') {
9
+ try {
10
+ const { pipeline } = await import('@huggingface/transformers');
11
+ pipe = await pipeline(
12
+ 'automatic-speech-recognition',
13
+ e.data.modelId ?? 'onnx-community/whisper-tiny',
14
+ {
15
+ progress_callback: (p: any) => self.postMessage({ type: 'progress', ...p }),
16
+ },
17
+ );
18
+ self.postMessage({ type: 'ready' });
19
+ } catch (err: any) {
20
+ self.postMessage({ type: 'error', message: err?.message ?? 'Failed to load model' });
21
+ }
22
+ }
23
+
24
+ if (type === 'transcribe') {
25
+ if (!pipe) {
26
+ self.postMessage({ type: 'error', message: 'Model not loaded' });
27
+ return;
28
+ }
29
+
30
+ try {
31
+ const result = await pipe(e.data.audio, {
32
+ language: e.data.language ?? 'en',
33
+ return_timestamps: false,
34
+ });
35
+ self.postMessage({ type: 'result', text: result.text, confidence: 0.95 });
36
+ } catch (err: any) {
37
+ self.postMessage({ type: 'error', message: err?.message ?? 'Transcription failed' });
38
+ }
39
+ }
40
+ };