even-toolkit 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/dist/glasses/bridge.d.ts +5 -2
- package/dist/glasses/bridge.d.ts.map +1 -1
- package/dist/glasses/bridge.js +25 -2
- package/dist/glasses/bridge.js.map +1 -1
- package/dist/glasses/composer.js +2 -2
- package/dist/glasses/layout.d.ts +2 -0
- package/dist/glasses/layout.d.ts.map +1 -1
- package/dist/glasses/layout.js +4 -0
- package/dist/glasses/layout.js.map +1 -1
- package/dist/glasses/types.d.ts +23 -0
- package/dist/glasses/types.d.ts.map +1 -1
- package/dist/glasses/types.js +15 -0
- package/dist/glasses/types.js.map +1 -1
- package/dist/glasses/useGlasses.d.ts.map +1 -1
- package/dist/glasses/useGlasses.js +17 -4
- package/dist/glasses/useGlasses.js.map +1 -1
- package/dist/stt/debug.d.ts +8 -0
- package/dist/stt/debug.d.ts.map +1 -0
- package/dist/stt/debug.js +34 -0
- package/dist/stt/debug.js.map +1 -0
- package/dist/stt/engine.d.ts +9 -6
- package/dist/stt/engine.d.ts.map +1 -1
- package/dist/stt/engine.js +141 -75
- package/dist/stt/engine.js.map +1 -1
- package/dist/stt/index.d.ts +1 -0
- package/dist/stt/index.d.ts.map +1 -1
- package/dist/stt/index.js +1 -0
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/providers/deepgram.d.ts +1 -1
- package/dist/stt/providers/deepgram.d.ts.map +1 -1
- package/dist/stt/providers/deepgram.js +24 -9
- package/dist/stt/providers/deepgram.js.map +1 -1
- package/dist/stt/providers/whisper-api.d.ts.map +1 -1
- package/dist/stt/providers/whisper-api.js +75 -4
- package/dist/stt/providers/whisper-api.js.map +1 -1
- package/dist/stt/react/useSTT.d.ts.map +1 -1
- package/dist/stt/react/useSTT.js +44 -11
- package/dist/stt/react/useSTT.js.map +1 -1
- package/dist/stt/registry.d.ts.map +1 -1
- package/dist/stt/registry.js +0 -8
- package/dist/stt/registry.js.map +1 -1
- package/dist/stt/sources/glass-bridge.d.ts +8 -15
- package/dist/stt/sources/glass-bridge.d.ts.map +1 -1
- package/dist/stt/sources/glass-bridge.js +66 -9
- package/dist/stt/sources/glass-bridge.js.map +1 -1
- package/dist/stt/sources/microphone.d.ts.map +1 -1
- package/dist/stt/sources/microphone.js +4 -0
- package/dist/stt/sources/microphone.js.map +1 -1
- package/dist/stt/types.d.ts +7 -3
- package/dist/stt/types.d.ts.map +1 -1
- package/glasses/bridge.ts +24 -3
- package/glasses/composer.ts +2 -2
- package/glasses/layout.ts +6 -0
- package/glasses/types.ts +28 -0
- package/glasses/useGlasses.ts +18 -5
- package/package.json +7 -19
- package/stt/debug.ts +38 -0
- package/stt/engine.ts +158 -83
- package/stt/index.ts +1 -0
- package/stt/providers/deepgram.ts +26 -9
- package/stt/providers/whisper-api.ts +78 -4
- package/stt/react/useSTT.ts +45 -11
- package/stt/registry.ts +0 -8
- package/stt/sources/glass-bridge.ts +69 -25
- package/stt/sources/microphone.ts +7 -0
- package/stt/types.ts +4 -3
- package/dist/stt/providers/web-speech.d.ts +0 -25
- package/dist/stt/providers/web-speech.d.ts.map +0 -1
- package/dist/stt/providers/web-speech.js +0 -153
- package/dist/stt/providers/web-speech.js.map +0 -1
- package/dist/stt/providers/whisper-local/provider.d.ts +0 -31
- package/dist/stt/providers/whisper-local/provider.d.ts.map +0 -1
- package/dist/stt/providers/whisper-local/provider.js +0 -174
- package/dist/stt/providers/whisper-local/provider.js.map +0 -1
- package/dist/stt/providers/whisper-local/worker.d.ts +0 -2
- package/dist/stt/providers/whisper-local/worker.d.ts.map +0 -1
- package/dist/stt/providers/whisper-local/worker.js +0 -35
- package/dist/stt/providers/whisper-local/worker.js.map +0 -1
- package/stt/providers/web-speech.ts +0 -221
- package/stt/providers/whisper-local/provider.ts +0 -226
- package/stt/providers/whisper-local/worker.ts +0 -40
package/glasses/useGlasses.ts
CHANGED
|
@@ -7,6 +7,12 @@ import { bindKeyboard } from './keyboard';
|
|
|
7
7
|
import { activateKeepAlive, deactivateKeepAlive } from './keep-alive';
|
|
8
8
|
import type { SplashHandle } from './splash';
|
|
9
9
|
|
|
10
|
+
/** Debug overlay — only shows if window.__glassesDebug is true */
|
|
11
|
+
function showDebugOverlay(msg: string): void {
|
|
12
|
+
if (!(window as any).__glassesDebug) return;
|
|
13
|
+
// visible via __glassesDebug flag — no console output in production
|
|
14
|
+
}
|
|
15
|
+
|
|
10
16
|
export interface UseGlassesConfig<S> {
|
|
11
17
|
getSnapshot: () => S;
|
|
12
18
|
/** Convert snapshot to single text display (for 'text' mode) */
|
|
@@ -65,7 +71,7 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
|
|
|
65
71
|
// Build display text from lines
|
|
66
72
|
const data = configRef.current.toDisplayData(snapshot, nav);
|
|
67
73
|
const text = data.lines.map(l => {
|
|
68
|
-
if (l.style === 'separator') return '\u2500'.repeat(
|
|
74
|
+
if (l.style === 'separator') return '\u2500'.repeat(28) + '\n';
|
|
69
75
|
if (l.inverted) return `\u25B6 ${l.text}`;
|
|
70
76
|
return ` ${l.text}`;
|
|
71
77
|
}).join('\n');
|
|
@@ -144,15 +150,20 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
|
|
|
144
150
|
};
|
|
145
151
|
|
|
146
152
|
async function initBridge() {
|
|
153
|
+
showDebugOverlay('initBridge: starting...');
|
|
147
154
|
try {
|
|
148
155
|
await hub.init();
|
|
156
|
+
showDebugOverlay('initBridge: bridge ready');
|
|
157
|
+
// Expose bridge globally for STT GlassBridgeSource
|
|
158
|
+
(window as any).__evenBridge = hub;
|
|
149
159
|
if (disposed) return;
|
|
150
160
|
|
|
151
161
|
const splash = configRef.current.splash;
|
|
152
162
|
|
|
153
163
|
if (splash) {
|
|
154
|
-
|
|
164
|
+
showDebugOverlay('initBridge: showing splash...');
|
|
155
165
|
await splash.show(hub);
|
|
166
|
+
showDebugOverlay('initBridge: splash shown');
|
|
156
167
|
if (disposed) return;
|
|
157
168
|
|
|
158
169
|
hub.onEvent((event) => {
|
|
@@ -170,7 +181,7 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
|
|
|
170
181
|
// uses updateHomeText instead of rebuilding (avoids blink)
|
|
171
182
|
lastHadImagesRef.current = !!configRef.current.homeImageTiles?.length;
|
|
172
183
|
} else {
|
|
173
|
-
|
|
184
|
+
showDebugOverlay('initBridge: no splash, showing text...');
|
|
174
185
|
await hub.showTextPage(`\n\n ${configRef.current.appName}`);
|
|
175
186
|
if (disposed) return;
|
|
176
187
|
|
|
@@ -179,8 +190,9 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
|
|
|
179
190
|
if (action) handleAction(action);
|
|
180
191
|
});
|
|
181
192
|
}
|
|
182
|
-
} catch {
|
|
193
|
+
} catch (err) {
|
|
183
194
|
// SDK not available — app continues without glasses
|
|
195
|
+
showDebugOverlay('Bridge init failed: ' + (err instanceof Error ? err.message : String(err)));
|
|
184
196
|
}
|
|
185
197
|
|
|
186
198
|
// Start polling for state changes
|
|
@@ -207,8 +219,9 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
|
|
|
207
219
|
unbindKeyboard();
|
|
208
220
|
hub.dispose();
|
|
209
221
|
hubRef.current = null;
|
|
222
|
+
(window as any).__evenBridge = null;
|
|
210
223
|
deactivateKeepAlive();
|
|
211
224
|
};
|
|
212
|
-
|
|
225
|
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
213
226
|
}, []);
|
|
214
227
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "even-toolkit",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Design system & component library for Even Realities G2 smart glasses apps — 55+ web components, 191 pixel-art icons, glasses SDK bridge, and design tokens.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/glasses/index.js",
|
|
@@ -318,14 +318,6 @@
|
|
|
318
318
|
"types": "./dist/stt/audio/pcm-utils.d.ts",
|
|
319
319
|
"import": "./dist/stt/audio/pcm-utils.js"
|
|
320
320
|
},
|
|
321
|
-
"./stt/providers/web-speech": {
|
|
322
|
-
"types": "./dist/stt/providers/web-speech.d.ts",
|
|
323
|
-
"import": "./dist/stt/providers/web-speech.js"
|
|
324
|
-
},
|
|
325
|
-
"./stt/providers/whisper-local": {
|
|
326
|
-
"types": "./dist/stt/providers/whisper-local/provider.d.ts",
|
|
327
|
-
"import": "./dist/stt/providers/whisper-local/provider.js"
|
|
328
|
-
},
|
|
329
321
|
"./stt/providers/whisper-api": {
|
|
330
322
|
"types": "./dist/stt/providers/whisper-api.d.ts",
|
|
331
323
|
"import": "./dist/stt/providers/whisper-api.js"
|
|
@@ -348,14 +340,13 @@
|
|
|
348
340
|
"prepublishOnly": "npm run build"
|
|
349
341
|
},
|
|
350
342
|
"peerDependencies": {
|
|
351
|
-
"@evenrealities/even_hub_sdk": ">=0.0.
|
|
352
|
-
"@jappyjan/even-better-sdk": ">=0.0.
|
|
343
|
+
"@evenrealities/even_hub_sdk": ">=0.0.9",
|
|
344
|
+
"@jappyjan/even-better-sdk": ">=0.0.11",
|
|
353
345
|
"class-variance-authority": ">=0.7.0",
|
|
354
346
|
"clsx": ">=2.0.0",
|
|
355
347
|
"react": ">=18.0.0",
|
|
356
348
|
"react-router": ">=7.0.0",
|
|
357
|
-
"tailwind-merge": ">=2.0.0"
|
|
358
|
-
"@huggingface/transformers": ">=3.0.0"
|
|
349
|
+
"tailwind-merge": ">=2.0.0"
|
|
359
350
|
},
|
|
360
351
|
"peerDependenciesMeta": {
|
|
361
352
|
"react": {
|
|
@@ -372,15 +363,11 @@
|
|
|
372
363
|
},
|
|
373
364
|
"class-variance-authority": {
|
|
374
365
|
"optional": true
|
|
375
|
-
},
|
|
376
|
-
"@huggingface/transformers": {
|
|
377
|
-
"optional": true
|
|
378
366
|
}
|
|
379
367
|
},
|
|
380
368
|
"devDependencies": {
|
|
381
|
-
"@evenrealities/even_hub_sdk": "^0.0.
|
|
382
|
-
|
|
383
|
-
"@jappyjan/even-better-sdk": "^0.0.10",
|
|
369
|
+
"@evenrealities/even_hub_sdk": "^0.0.9",
|
|
370
|
+
"@jappyjan/even-better-sdk": "^0.0.11",
|
|
384
371
|
"@types/react": "^19.0.0",
|
|
385
372
|
"class-variance-authority": "^0.7.1",
|
|
386
373
|
"clsx": "^2.1.1",
|
|
@@ -408,6 +395,7 @@
|
|
|
408
395
|
"url": "https://github.com/fabioglimb/even-toolkit/issues"
|
|
409
396
|
},
|
|
410
397
|
"dependencies": {
|
|
398
|
+
"react-is": "^19.2.4",
|
|
411
399
|
"recharts": "^3.8.0"
|
|
412
400
|
}
|
|
413
401
|
}
|
package/stt/debug.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STT Debug logging — enabled via window.__sttDebug = true
|
|
3
|
+
* or by calling enableSTTDebug() from the console.
|
|
4
|
+
*
|
|
5
|
+
* All logs are also stored in window.__sttLogs[] for inspection.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const MAX_LOGS = 200;
|
|
9
|
+
|
|
10
|
+
export function sttLog(...args: any[]): void {
|
|
11
|
+
const w = window as any;
|
|
12
|
+
if (!w.__sttLogs) w.__sttLogs = [];
|
|
13
|
+
|
|
14
|
+
const entry = {
|
|
15
|
+
t: new Date().toISOString().slice(11, 23),
|
|
16
|
+
msg: args.map(a => typeof a === 'object' ? JSON.stringify(a) : String(a)).join(' '),
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
w.__sttLogs.push(entry);
|
|
20
|
+
if (w.__sttLogs.length > MAX_LOGS) w.__sttLogs.shift();
|
|
21
|
+
|
|
22
|
+
if (w.__sttDebug) {
|
|
23
|
+
console.log(`[STT ${entry.t}]`, ...args);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Call from browser console: enableSTTDebug() */
|
|
28
|
+
(window as any).enableSTTDebug = () => {
|
|
29
|
+
(window as any).__sttDebug = true;
|
|
30
|
+
console.log('[STT] Debug enabled. Logs:', (window as any).__sttLogs);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/** Call from browser console: getSTTLogs() */
|
|
34
|
+
(window as any).getSTTLogs = () => {
|
|
35
|
+
return ((window as any).__sttLogs ?? [])
|
|
36
|
+
.map((e: any) => `${e.t} ${e.msg}`)
|
|
37
|
+
.join('\n');
|
|
38
|
+
};
|
package/stt/engine.ts
CHANGED
|
@@ -8,17 +8,19 @@ import type {
|
|
|
8
8
|
} from './types';
|
|
9
9
|
import { createProvider } from './registry';
|
|
10
10
|
import { MicrophoneSource } from './sources/microphone';
|
|
11
|
+
import { GlassBridgeSource } from './sources/glass-bridge';
|
|
11
12
|
import { resample } from './audio/resample';
|
|
12
13
|
import { createVAD } from './audio/vad';
|
|
13
14
|
import { createAudioBuffer } from './audio/buffer';
|
|
15
|
+
import { sttLog } from './debug';
|
|
14
16
|
|
|
15
17
|
/**
|
|
16
|
-
* STTEngine orchestrates source -> processing -> provider.
|
|
18
|
+
* STTEngine — orchestrates audio source -> processing -> provider.
|
|
17
19
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* and VAD, buffers audio, and calls provider.transcribe() on speech end.
|
|
20
|
+
* Batch providers (whisper-api): record audio, then transcribe on stop.
|
|
21
|
+
* Streaming providers (deepgram): pipe audio in real-time via sendAudio.
|
|
21
22
|
*/
|
|
23
|
+
|
|
22
24
|
export class STTEngine {
|
|
23
25
|
private config: STTEngineConfig;
|
|
24
26
|
private provider: STTProvider | null = null;
|
|
@@ -32,15 +34,16 @@ export class STTEngine {
|
|
|
32
34
|
private providerUnsubs: Array<() => void> = [];
|
|
33
35
|
|
|
34
36
|
private vad: ReturnType<typeof createVAD> | null = null;
|
|
35
|
-
private
|
|
37
|
+
private chunkBuffer: ReturnType<typeof createAudioBuffer> | null = null;
|
|
36
38
|
private targetSampleRate: number;
|
|
39
|
+
private stopped = false;
|
|
37
40
|
|
|
38
41
|
constructor(config: STTEngineConfig) {
|
|
39
42
|
this.config = config;
|
|
40
43
|
this.targetSampleRate = config.sampleRate ?? 16000;
|
|
41
44
|
}
|
|
42
45
|
|
|
43
|
-
//
|
|
46
|
+
// -- Event subscriptions --
|
|
44
47
|
|
|
45
48
|
onTranscript(cb: (t: STTTranscript) => void): () => void {
|
|
46
49
|
this.transcriptListeners.push(cb);
|
|
@@ -67,25 +70,37 @@ export class STTEngine {
|
|
|
67
70
|
}
|
|
68
71
|
|
|
69
72
|
private emitTranscript(t: STTTranscript): void {
|
|
73
|
+
sttLog('transcript:', t.isFinal ? 'FINAL' : 'interim', `"${t.text}"`);
|
|
70
74
|
for (const cb of this.transcriptListeners) cb(t);
|
|
71
75
|
}
|
|
72
76
|
|
|
73
77
|
private emitState(s: STTState): void {
|
|
78
|
+
sttLog('state ->', s);
|
|
74
79
|
for (const cb of this.stateListeners) cb(s);
|
|
75
80
|
}
|
|
76
81
|
|
|
77
82
|
private emitError(e: STTError): void {
|
|
83
|
+
sttLog('ERROR:', e.code, e.message);
|
|
78
84
|
for (const cb of this.errorListeners) cb(e);
|
|
79
85
|
}
|
|
80
86
|
|
|
81
|
-
//
|
|
87
|
+
// -- Lifecycle --
|
|
82
88
|
|
|
83
89
|
async start(): Promise<void> {
|
|
90
|
+
sttLog('engine.start()', 'provider:', this.config.provider, 'source:', this.config.source ?? 'auto');
|
|
91
|
+
this.stopped = false;
|
|
92
|
+
|
|
93
|
+
// Reuse existing provider if already initialized
|
|
94
|
+
if (this.provider) {
|
|
95
|
+
sttLog('engine: reusing provider');
|
|
96
|
+
return this.startAudioPipeline();
|
|
97
|
+
}
|
|
98
|
+
|
|
84
99
|
this.emitState('loading');
|
|
85
100
|
|
|
86
101
|
try {
|
|
87
|
-
// Create and init provider
|
|
88
102
|
this.provider = await createProvider(this.config.provider);
|
|
103
|
+
sttLog('provider created:', this.provider.type, 'modes:', this.provider.supportedModes);
|
|
89
104
|
this.subscribeProvider(this.provider);
|
|
90
105
|
|
|
91
106
|
await this.provider.init({
|
|
@@ -98,36 +113,10 @@ export class STTEngine {
|
|
|
98
113
|
vadSilenceMs: typeof this.config.vad === 'object' ? this.config.vad.silenceMs : undefined,
|
|
99
114
|
sampleRate: this.targetSampleRate,
|
|
100
115
|
});
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if (this.config.provider === 'web-speech') {
|
|
104
|
-
this.provider.start();
|
|
105
|
-
return;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Set up audio source
|
|
109
|
-
this.source = this.resolveSource();
|
|
110
|
-
await this.source.start();
|
|
111
|
-
|
|
112
|
-
// Set up VAD if enabled
|
|
113
|
-
if (this.config.vad) {
|
|
114
|
-
const vadConfig = typeof this.config.vad === 'object' ? {
|
|
115
|
-
silenceThresholdMs: this.config.vad.silenceMs,
|
|
116
|
-
speechThresholdDb: this.config.vad.thresholdDb,
|
|
117
|
-
} : undefined;
|
|
118
|
-
this.vad = createVAD(vadConfig);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// Set up audio buffer for batch mode
|
|
122
|
-
this.buffer = createAudioBuffer({ sampleRate: this.targetSampleRate });
|
|
123
|
-
|
|
124
|
-
// Wire audio pipeline
|
|
125
|
-
this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
|
|
126
|
-
this.processAudio(pcm, sampleRate);
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
this.provider.start();
|
|
116
|
+
sttLog('provider.init() done');
|
|
117
|
+
await this.startAudioPipeline();
|
|
130
118
|
} catch (err) {
|
|
119
|
+
sttLog('engine.start() FAILED:', err);
|
|
131
120
|
const error: STTError = {
|
|
132
121
|
code: 'unknown',
|
|
133
122
|
message: err instanceof Error ? err.message : String(err),
|
|
@@ -136,29 +125,78 @@ export class STTEngine {
|
|
|
136
125
|
this.emitError(error);
|
|
137
126
|
this.emitState('error');
|
|
138
127
|
|
|
139
|
-
// Attempt fallback
|
|
140
128
|
if (this.config.fallback) {
|
|
141
129
|
await this.switchToFallback();
|
|
142
130
|
}
|
|
143
131
|
}
|
|
144
132
|
}
|
|
145
133
|
|
|
134
|
+
/** Set up audio source + wire to provider. Reusable for restart. */
|
|
135
|
+
private async startAudioPipeline(): Promise<void> {
|
|
136
|
+
if (!this.provider) throw new Error('No provider');
|
|
137
|
+
|
|
138
|
+
// Streaming providers -- pipe audio via sendAudio
|
|
139
|
+
if ('sendAudio' in this.provider) {
|
|
140
|
+
this.source = this.resolveSource();
|
|
141
|
+
sttLog('streaming + sendAudio: source =', this.source.constructor.name);
|
|
142
|
+
await this.source.start();
|
|
143
|
+
const provider = this.provider;
|
|
144
|
+
this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
|
|
145
|
+
const samples = sampleRate !== this.targetSampleRate
|
|
146
|
+
? resample(pcm, sampleRate, this.targetSampleRate)
|
|
147
|
+
: pcm;
|
|
148
|
+
(provider as any).sendAudio(samples);
|
|
149
|
+
});
|
|
150
|
+
this.emitState('listening');
|
|
151
|
+
this.provider.start();
|
|
152
|
+
sttLog('streaming provider started');
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Batch providers: set up audio pipeline
|
|
157
|
+
this.source = this.resolveSource();
|
|
158
|
+
sttLog('audio source resolved:', this.source.constructor.name);
|
|
159
|
+
await this.source.start();
|
|
160
|
+
|
|
161
|
+
const vadConfig = typeof this.config.vad === 'object' ? {
|
|
162
|
+
silenceThresholdMs: this.config.vad.silenceMs ?? 2500,
|
|
163
|
+
speechThresholdDb: this.config.vad.thresholdDb,
|
|
164
|
+
} : { silenceThresholdMs: 2500 };
|
|
165
|
+
this.vad = createVAD(vadConfig);
|
|
166
|
+
this.chunkBuffer = createAudioBuffer({ sampleRate: this.targetSampleRate, maxSeconds: 120 });
|
|
167
|
+
|
|
168
|
+
this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
|
|
169
|
+
this.processAudio(pcm, sampleRate);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
this.emitState('listening');
|
|
173
|
+
this.provider.start();
|
|
174
|
+
sttLog('engine listening');
|
|
175
|
+
}
|
|
176
|
+
|
|
146
177
|
stop(): void {
|
|
147
|
-
this.
|
|
178
|
+
if (this.stopped) return;
|
|
179
|
+
this.stopped = true;
|
|
180
|
+
sttLog('engine.stop()');
|
|
181
|
+
|
|
148
182
|
this.sourceUnsub?.();
|
|
149
183
|
this.sourceUnsub = null;
|
|
150
184
|
this.source?.stop();
|
|
185
|
+
this.provider?.stop();
|
|
151
186
|
this.vad?.reset();
|
|
152
|
-
|
|
187
|
+
|
|
188
|
+
this.transcribeFullBuffer();
|
|
153
189
|
}
|
|
154
190
|
|
|
155
191
|
abort(): void {
|
|
192
|
+
this.stopped = true;
|
|
193
|
+
sttLog('engine.abort()');
|
|
156
194
|
this.provider?.abort();
|
|
157
195
|
this.sourceUnsub?.();
|
|
158
196
|
this.sourceUnsub = null;
|
|
159
197
|
this.source?.stop();
|
|
160
198
|
this.vad?.reset();
|
|
161
|
-
this.
|
|
199
|
+
this.chunkBuffer?.clear();
|
|
162
200
|
}
|
|
163
201
|
|
|
164
202
|
dispose(): void {
|
|
@@ -174,75 +212,113 @@ export class STTEngine {
|
|
|
174
212
|
this.errorListeners.length = 0;
|
|
175
213
|
}
|
|
176
214
|
|
|
177
|
-
//
|
|
215
|
+
// -- Internal --
|
|
178
216
|
|
|
179
217
|
private resolveSource(): AudioSource {
|
|
180
218
|
const src = this.config.source;
|
|
181
|
-
|
|
182
|
-
|
|
219
|
+
|
|
220
|
+
// Explicit AudioSource object passed
|
|
221
|
+
if (src && typeof src === 'object') {
|
|
222
|
+
sttLog('resolveSource: using custom AudioSource object');
|
|
223
|
+
return src;
|
|
183
224
|
}
|
|
225
|
+
|
|
226
|
+
// Explicit glass-bridge
|
|
184
227
|
if (src === 'glass-bridge') {
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
228
|
+
sttLog('resolveSource: explicit glass-bridge');
|
|
229
|
+
return new GlassBridgeSource();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Auto-detect: if glasses bridge is available, prefer it
|
|
233
|
+
if ((window as any).__evenBridge) {
|
|
234
|
+
sttLog('resolveSource: auto-detected __evenBridge -> using GlassBridgeSource');
|
|
235
|
+
return new GlassBridgeSource();
|
|
189
236
|
}
|
|
190
|
-
|
|
237
|
+
|
|
238
|
+
// Explicit microphone or fallback
|
|
239
|
+
if (!src || src === 'microphone') {
|
|
240
|
+
sttLog('resolveSource: using MicrophoneSource (browser mic)');
|
|
241
|
+
return new MicrophoneSource();
|
|
242
|
+
}
|
|
243
|
+
|
|
191
244
|
return src;
|
|
192
245
|
}
|
|
193
246
|
|
|
194
247
|
private processAudio(pcm: Float32Array, sampleRate: number): void {
|
|
195
|
-
|
|
196
|
-
|
|
248
|
+
if (this.stopped) return;
|
|
249
|
+
|
|
250
|
+
const samples = sampleRate !== this.targetSampleRate
|
|
197
251
|
? resample(pcm, sampleRate, this.targetSampleRate)
|
|
198
252
|
: pcm;
|
|
199
253
|
|
|
200
|
-
|
|
254
|
+
this.chunkBuffer?.append(samples);
|
|
201
255
|
|
|
202
|
-
//
|
|
256
|
+
// VAD: detect speech end for auto-stop
|
|
203
257
|
if (this.vad) {
|
|
204
258
|
const result = this.vad.process(samples);
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
this.
|
|
259
|
+
if (result.speechEnded && !this.config.continuous) {
|
|
260
|
+
sttLog('VAD: speech ended -> auto-stop');
|
|
261
|
+
this.stop();
|
|
208
262
|
}
|
|
209
|
-
|
|
210
|
-
if (result.speechEnded) {
|
|
211
|
-
this.flushBuffer();
|
|
212
|
-
}
|
|
213
|
-
} else {
|
|
214
|
-
// No VAD: accumulate everything, provider handles streaming
|
|
215
|
-
this.buffer.append(samples);
|
|
216
263
|
}
|
|
217
264
|
}
|
|
218
265
|
|
|
219
|
-
|
|
220
|
-
|
|
266
|
+
/** On stop: transcribe the full recording buffer */
|
|
267
|
+
private async transcribeFullBuffer(): Promise<void> {
|
|
268
|
+
if (!this.provider?.transcribe || !this.chunkBuffer) {
|
|
269
|
+
this.emitState('idle');
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const audio = this.chunkBuffer.getAll();
|
|
274
|
+
this.chunkBuffer.clear();
|
|
275
|
+
this.chunkBuffer = null;
|
|
221
276
|
|
|
222
|
-
|
|
223
|
-
|
|
277
|
+
if (audio.length < this.targetSampleRate * 0.3) {
|
|
278
|
+
sttLog('audio too short, skipping');
|
|
279
|
+
this.emitState('idle');
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
224
282
|
|
|
225
|
-
|
|
283
|
+
this.emitState('processing');
|
|
284
|
+
sttLog('transcribing full buffer:', (audio.length / this.targetSampleRate).toFixed(1), 's,', (audio.byteLength / 1024).toFixed(0), 'KB');
|
|
226
285
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
this.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
286
|
+
try {
|
|
287
|
+
const result = await this.provider.transcribe(audio, this.targetSampleRate);
|
|
288
|
+
const text = result.text.trim();
|
|
289
|
+
sttLog('final result:', `"${text}"`);
|
|
290
|
+
|
|
291
|
+
if (text) {
|
|
292
|
+
this.emitTranscript({
|
|
293
|
+
text,
|
|
294
|
+
isFinal: true,
|
|
295
|
+
confidence: result.confidence,
|
|
296
|
+
timestamp: Date.now(),
|
|
237
297
|
});
|
|
238
298
|
}
|
|
299
|
+
} catch (err) {
|
|
300
|
+
this.emitError({
|
|
301
|
+
code: 'unknown',
|
|
302
|
+
message: err instanceof Error ? err.message : String(err),
|
|
303
|
+
provider: this.config.provider,
|
|
304
|
+
});
|
|
239
305
|
}
|
|
306
|
+
|
|
307
|
+
this.emitState('idle');
|
|
240
308
|
}
|
|
241
309
|
|
|
242
310
|
private subscribeProvider(provider: STTProvider): void {
|
|
311
|
+
const isStreaming = provider.supportedModes.includes('streaming');
|
|
312
|
+
|
|
243
313
|
this.providerUnsubs.push(
|
|
244
|
-
|
|
245
|
-
|
|
314
|
+
// Forward transcripts from streaming providers (deepgram emits them directly)
|
|
315
|
+
// Batch providers (whisper-api) are handled by transcribeFullBuffer — don't double-emit
|
|
316
|
+
provider.onTranscript((t) => {
|
|
317
|
+
if (isStreaming) this.emitTranscript(t);
|
|
318
|
+
}),
|
|
319
|
+
provider.onStateChange((s) => {
|
|
320
|
+
if (isStreaming) this.emitState(s);
|
|
321
|
+
}),
|
|
246
322
|
provider.onError((e) => {
|
|
247
323
|
this.emitError(e);
|
|
248
324
|
if (this.config.fallback) {
|
|
@@ -254,21 +330,20 @@ export class STTEngine {
|
|
|
254
330
|
|
|
255
331
|
private async switchToFallback(): Promise<void> {
|
|
256
332
|
if (!this.config.fallback) return;
|
|
333
|
+
sttLog('switching to fallback provider:', this.config.fallback);
|
|
257
334
|
|
|
258
|
-
// Clean up current provider
|
|
259
335
|
for (const unsub of this.providerUnsubs) unsub();
|
|
260
336
|
this.providerUnsubs.length = 0;
|
|
261
337
|
this.provider?.dispose();
|
|
262
338
|
this.provider = null;
|
|
263
339
|
|
|
264
|
-
// Switch to fallback
|
|
265
340
|
const fallbackType = this.config.fallback;
|
|
266
341
|
this.config = { ...this.config, provider: fallbackType, fallback: undefined };
|
|
267
342
|
|
|
268
343
|
try {
|
|
269
344
|
await this.start();
|
|
270
345
|
} catch {
|
|
271
|
-
// Fallback also failed
|
|
346
|
+
// Fallback also failed
|
|
272
347
|
}
|
|
273
348
|
}
|
|
274
349
|
}
|
package/stt/index.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type {
|
|
|
6
6
|
STTTranscript,
|
|
7
7
|
STTError,
|
|
8
8
|
} from '../types';
|
|
9
|
+
import { sttLog } from '../debug';
|
|
9
10
|
|
|
10
11
|
interface DeepgramResult {
|
|
11
12
|
channel?: {
|
|
@@ -38,7 +39,7 @@ export class DeepgramProvider implements STTProvider {
|
|
|
38
39
|
|
|
39
40
|
async init(config: STTProviderConfig): Promise<void> {
|
|
40
41
|
this.apiKey = config.apiKey ?? '';
|
|
41
|
-
this.language = config.language ?? 'en';
|
|
42
|
+
this.language = (config.language ?? 'en').split('-')[0]; // Deepgram wants 'en' not 'en-US'
|
|
42
43
|
this.modelId = config.modelId ?? 'nova-2';
|
|
43
44
|
|
|
44
45
|
if (!this.apiKey) {
|
|
@@ -67,7 +68,10 @@ export class DeepgramProvider implements STTProvider {
|
|
|
67
68
|
this.ws = new WebSocket(url, ['token', this.apiKey]);
|
|
68
69
|
this.ws.binaryType = 'arraybuffer';
|
|
69
70
|
|
|
71
|
+
sttLog('deepgram: connecting to', url.substring(0, 60) + '...');
|
|
72
|
+
|
|
70
73
|
this.ws.onopen = () => {
|
|
74
|
+
sttLog('deepgram: connected');
|
|
71
75
|
this.setState('listening');
|
|
72
76
|
};
|
|
73
77
|
|
|
@@ -75,12 +79,16 @@ export class DeepgramProvider implements STTProvider {
|
|
|
75
79
|
try {
|
|
76
80
|
const data = JSON.parse(event.data as string) as DeepgramResult;
|
|
77
81
|
const alt = data.channel?.alternatives?.[0];
|
|
78
|
-
|
|
82
|
+
const text = alt?.transcript ?? '';
|
|
83
|
+
|
|
84
|
+
if (!text) return;
|
|
85
|
+
|
|
86
|
+
sttLog('deepgram: got', data.is_final ? 'FINAL' : 'interim', `"${text}"`);
|
|
79
87
|
|
|
80
88
|
const transcript: STTTranscript = {
|
|
81
|
-
text
|
|
89
|
+
text,
|
|
82
90
|
isFinal: data.is_final ?? false,
|
|
83
|
-
confidence: alt
|
|
91
|
+
confidence: alt?.confidence ?? 0,
|
|
84
92
|
timestamp: Date.now(),
|
|
85
93
|
};
|
|
86
94
|
this.emitTranscript(transcript);
|
|
@@ -89,7 +97,8 @@ export class DeepgramProvider implements STTProvider {
|
|
|
89
97
|
}
|
|
90
98
|
};
|
|
91
99
|
|
|
92
|
-
this.ws.onerror = () => {
|
|
100
|
+
this.ws.onerror = (event) => {
|
|
101
|
+
sttLog('deepgram: WebSocket error', event);
|
|
93
102
|
const err: STTError = {
|
|
94
103
|
code: 'network',
|
|
95
104
|
message: 'Deepgram WebSocket error',
|
|
@@ -107,14 +116,22 @@ export class DeepgramProvider implements STTProvider {
|
|
|
107
116
|
};
|
|
108
117
|
}
|
|
109
118
|
|
|
110
|
-
/** Send
|
|
119
|
+
/** Send audio data to the Deepgram stream. Converts Float32 to Int16 (linear16). */
|
|
111
120
|
sendAudio(data: ArrayBuffer | Int16Array | Float32Array): void {
|
|
112
121
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
113
122
|
|
|
114
|
-
if (data instanceof
|
|
115
|
-
|
|
116
|
-
|
|
123
|
+
if (data instanceof Float32Array) {
|
|
124
|
+
// Convert Float32 [-1, 1] to Int16 PCM (Deepgram expects linear16)
|
|
125
|
+
const int16 = new Int16Array(data.length);
|
|
126
|
+
for (let i = 0; i < data.length; i++) {
|
|
127
|
+
const s = Math.max(-1, Math.min(1, data[i]!));
|
|
128
|
+
int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
|
129
|
+
}
|
|
130
|
+
this.ws.send(int16.buffer);
|
|
131
|
+
} else if (data instanceof Int16Array) {
|
|
117
132
|
this.ws.send(data.buffer);
|
|
133
|
+
} else {
|
|
134
|
+
this.ws.send(data);
|
|
118
135
|
}
|
|
119
136
|
}
|
|
120
137
|
|