even-toolkit 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +74 -0
  2. package/dist/glasses/bridge.d.ts +5 -2
  3. package/dist/glasses/bridge.d.ts.map +1 -1
  4. package/dist/glasses/bridge.js +25 -2
  5. package/dist/glasses/bridge.js.map +1 -1
  6. package/dist/glasses/composer.js +2 -2
  7. package/dist/glasses/layout.d.ts +2 -0
  8. package/dist/glasses/layout.d.ts.map +1 -1
  9. package/dist/glasses/layout.js +4 -0
  10. package/dist/glasses/layout.js.map +1 -1
  11. package/dist/glasses/types.d.ts +23 -0
  12. package/dist/glasses/types.d.ts.map +1 -1
  13. package/dist/glasses/types.js +15 -0
  14. package/dist/glasses/types.js.map +1 -1
  15. package/dist/glasses/useGlasses.d.ts.map +1 -1
  16. package/dist/glasses/useGlasses.js +17 -4
  17. package/dist/glasses/useGlasses.js.map +1 -1
  18. package/dist/stt/debug.d.ts +8 -0
  19. package/dist/stt/debug.d.ts.map +1 -0
  20. package/dist/stt/debug.js +34 -0
  21. package/dist/stt/debug.js.map +1 -0
  22. package/dist/stt/engine.d.ts +9 -6
  23. package/dist/stt/engine.d.ts.map +1 -1
  24. package/dist/stt/engine.js +141 -75
  25. package/dist/stt/engine.js.map +1 -1
  26. package/dist/stt/index.d.ts +1 -0
  27. package/dist/stt/index.d.ts.map +1 -1
  28. package/dist/stt/index.js +1 -0
  29. package/dist/stt/index.js.map +1 -1
  30. package/dist/stt/providers/deepgram.d.ts +1 -1
  31. package/dist/stt/providers/deepgram.d.ts.map +1 -1
  32. package/dist/stt/providers/deepgram.js +24 -9
  33. package/dist/stt/providers/deepgram.js.map +1 -1
  34. package/dist/stt/providers/whisper-api.d.ts.map +1 -1
  35. package/dist/stt/providers/whisper-api.js +75 -4
  36. package/dist/stt/providers/whisper-api.js.map +1 -1
  37. package/dist/stt/react/useSTT.d.ts.map +1 -1
  38. package/dist/stt/react/useSTT.js +44 -11
  39. package/dist/stt/react/useSTT.js.map +1 -1
  40. package/dist/stt/registry.d.ts.map +1 -1
  41. package/dist/stt/registry.js +0 -8
  42. package/dist/stt/registry.js.map +1 -1
  43. package/dist/stt/sources/glass-bridge.d.ts +8 -15
  44. package/dist/stt/sources/glass-bridge.d.ts.map +1 -1
  45. package/dist/stt/sources/glass-bridge.js +66 -9
  46. package/dist/stt/sources/glass-bridge.js.map +1 -1
  47. package/dist/stt/sources/microphone.d.ts.map +1 -1
  48. package/dist/stt/sources/microphone.js +4 -0
  49. package/dist/stt/sources/microphone.js.map +1 -1
  50. package/dist/stt/types.d.ts +7 -3
  51. package/dist/stt/types.d.ts.map +1 -1
  52. package/glasses/bridge.ts +24 -3
  53. package/glasses/composer.ts +2 -2
  54. package/glasses/layout.ts +6 -0
  55. package/glasses/types.ts +28 -0
  56. package/glasses/useGlasses.ts +18 -5
  57. package/package.json +7 -19
  58. package/stt/debug.ts +38 -0
  59. package/stt/engine.ts +158 -83
  60. package/stt/index.ts +1 -0
  61. package/stt/providers/deepgram.ts +26 -9
  62. package/stt/providers/whisper-api.ts +78 -4
  63. package/stt/react/useSTT.ts +45 -11
  64. package/stt/registry.ts +0 -8
  65. package/stt/sources/glass-bridge.ts +69 -25
  66. package/stt/sources/microphone.ts +7 -0
  67. package/stt/types.ts +4 -3
  68. package/dist/stt/providers/web-speech.d.ts +0 -25
  69. package/dist/stt/providers/web-speech.d.ts.map +0 -1
  70. package/dist/stt/providers/web-speech.js +0 -153
  71. package/dist/stt/providers/web-speech.js.map +0 -1
  72. package/dist/stt/providers/whisper-local/provider.d.ts +0 -31
  73. package/dist/stt/providers/whisper-local/provider.d.ts.map +0 -1
  74. package/dist/stt/providers/whisper-local/provider.js +0 -174
  75. package/dist/stt/providers/whisper-local/provider.js.map +0 -1
  76. package/dist/stt/providers/whisper-local/worker.d.ts +0 -2
  77. package/dist/stt/providers/whisper-local/worker.d.ts.map +0 -1
  78. package/dist/stt/providers/whisper-local/worker.js +0 -35
  79. package/dist/stt/providers/whisper-local/worker.js.map +0 -1
  80. package/stt/providers/web-speech.ts +0 -221
  81. package/stt/providers/whisper-local/provider.ts +0 -226
  82. package/stt/providers/whisper-local/worker.ts +0 -40
@@ -7,6 +7,12 @@ import { bindKeyboard } from './keyboard';
7
7
  import { activateKeepAlive, deactivateKeepAlive } from './keep-alive';
8
8
  import type { SplashHandle } from './splash';
9
9
 
10
+ /** Debug overlay — only shows if window.__glassesDebug is true */
11
+ function showDebugOverlay(msg: string): void {
12
+ if (!(window as any).__glassesDebug) return;
13
+ // visible via __glassesDebug flag — no console output in production
14
+ }
15
+
10
16
  export interface UseGlassesConfig<S> {
11
17
  getSnapshot: () => S;
12
18
  /** Convert snapshot to single text display (for 'text' mode) */
@@ -65,7 +71,7 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
65
71
  // Build display text from lines
66
72
  const data = configRef.current.toDisplayData(snapshot, nav);
67
73
  const text = data.lines.map(l => {
68
- if (l.style === 'separator') return '\u2500'.repeat(44);
74
+ if (l.style === 'separator') return '\u2500'.repeat(28) + '\n';
69
75
  if (l.inverted) return `\u25B6 ${l.text}`;
70
76
  return ` ${l.text}`;
71
77
  }).join('\n');
@@ -144,15 +150,20 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
144
150
  };
145
151
 
146
152
  async function initBridge() {
153
+ showDebugOverlay('initBridge: starting...');
147
154
  try {
148
155
  await hub.init();
156
+ showDebugOverlay('initBridge: bridge ready');
157
+ // Expose bridge globally for STT GlassBridgeSource
158
+ (window as any).__evenBridge = hub;
149
159
  if (disposed) return;
150
160
 
151
161
  const splash = configRef.current.splash;
152
162
 
153
163
  if (splash) {
154
- // Image-based splash: show canvas-rendered image, then wait minTime
164
+ showDebugOverlay('initBridge: showing splash...');
155
165
  await splash.show(hub);
166
+ showDebugOverlay('initBridge: splash shown');
156
167
  if (disposed) return;
157
168
 
158
169
  hub.onEvent((event) => {
@@ -170,7 +181,7 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
170
181
  // uses updateHomeText instead of rebuilding (avoids blink)
171
182
  lastHadImagesRef.current = !!configRef.current.homeImageTiles?.length;
172
183
  } else {
173
- // Default text splash
184
+ showDebugOverlay('initBridge: no splash, showing text...');
174
185
  await hub.showTextPage(`\n\n ${configRef.current.appName}`);
175
186
  if (disposed) return;
176
187
 
@@ -179,8 +190,9 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
179
190
  if (action) handleAction(action);
180
191
  });
181
192
  }
182
- } catch {
193
+ } catch (err) {
183
194
  // SDK not available — app continues without glasses
195
+ showDebugOverlay('Bridge init failed: ' + (err instanceof Error ? err.message : String(err)));
184
196
  }
185
197
 
186
198
  // Start polling for state changes
@@ -207,8 +219,9 @@ export function useGlasses<S>(config: UseGlassesConfig<S>): void {
207
219
  unbindKeyboard();
208
220
  hub.dispose();
209
221
  hubRef.current = null;
222
+ (window as any).__evenBridge = null;
210
223
  deactivateKeepAlive();
211
224
  };
212
- // eslint-disable-next-line react-hooks/exhaustive-deps
225
+ // eslint-disable-next-line react-hooks/exhaustive-deps
213
226
  }, []);
214
227
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "even-toolkit",
3
- "version": "1.1.1",
3
+ "version": "1.3.0",
4
4
  "description": "Design system & component library for Even Realities G2 smart glasses apps — 55+ web components, 191 pixel-art icons, glasses SDK bridge, and design tokens.",
5
5
  "type": "module",
6
6
  "main": "./dist/glasses/index.js",
@@ -318,14 +318,6 @@
318
318
  "types": "./dist/stt/audio/pcm-utils.d.ts",
319
319
  "import": "./dist/stt/audio/pcm-utils.js"
320
320
  },
321
- "./stt/providers/web-speech": {
322
- "types": "./dist/stt/providers/web-speech.d.ts",
323
- "import": "./dist/stt/providers/web-speech.js"
324
- },
325
- "./stt/providers/whisper-local": {
326
- "types": "./dist/stt/providers/whisper-local/provider.d.ts",
327
- "import": "./dist/stt/providers/whisper-local/provider.js"
328
- },
329
321
  "./stt/providers/whisper-api": {
330
322
  "types": "./dist/stt/providers/whisper-api.d.ts",
331
323
  "import": "./dist/stt/providers/whisper-api.js"
@@ -348,14 +340,13 @@
348
340
  "prepublishOnly": "npm run build"
349
341
  },
350
342
  "peerDependencies": {
351
- "@evenrealities/even_hub_sdk": ">=0.0.7",
352
- "@jappyjan/even-better-sdk": ">=0.0.10",
343
+ "@evenrealities/even_hub_sdk": ">=0.0.9",
344
+ "@jappyjan/even-better-sdk": ">=0.0.11",
353
345
  "class-variance-authority": ">=0.7.0",
354
346
  "clsx": ">=2.0.0",
355
347
  "react": ">=18.0.0",
356
348
  "react-router": ">=7.0.0",
357
- "tailwind-merge": ">=2.0.0",
358
- "@huggingface/transformers": ">=3.0.0"
349
+ "tailwind-merge": ">=2.0.0"
359
350
  },
360
351
  "peerDependenciesMeta": {
361
352
  "react": {
@@ -372,15 +363,11 @@
372
363
  },
373
364
  "class-variance-authority": {
374
365
  "optional": true
375
- },
376
- "@huggingface/transformers": {
377
- "optional": true
378
366
  }
379
367
  },
380
368
  "devDependencies": {
381
- "@evenrealities/even_hub_sdk": "^0.0.7",
382
- "@huggingface/transformers": "^3.8.1",
383
- "@jappyjan/even-better-sdk": "^0.0.10",
369
+ "@evenrealities/even_hub_sdk": "^0.0.9",
370
+ "@jappyjan/even-better-sdk": "^0.0.11",
384
371
  "@types/react": "^19.0.0",
385
372
  "class-variance-authority": "^0.7.1",
386
373
  "clsx": "^2.1.1",
@@ -408,6 +395,7 @@
408
395
  "url": "https://github.com/fabioglimb/even-toolkit/issues"
409
396
  },
410
397
  "dependencies": {
398
+ "react-is": "^19.2.4",
411
399
  "recharts": "^3.8.0"
412
400
  }
413
401
  }
package/stt/debug.ts ADDED
@@ -0,0 +1,38 @@
1
+ /**
2
+ * STT Debug logging — enabled via window.__sttDebug = true
3
+ * or by calling enableSTTDebug() from the console.
4
+ *
5
+ * All logs are also stored in window.__sttLogs[] for inspection.
6
+ */
7
+
8
+ const MAX_LOGS = 200;
9
+
10
+ export function sttLog(...args: any[]): void {
11
+ const w = window as any;
12
+ if (!w.__sttLogs) w.__sttLogs = [];
13
+
14
+ const entry = {
15
+ t: new Date().toISOString().slice(11, 23),
16
+ msg: args.map(a => typeof a === 'object' ? JSON.stringify(a) : String(a)).join(' '),
17
+ };
18
+
19
+ w.__sttLogs.push(entry);
20
+ if (w.__sttLogs.length > MAX_LOGS) w.__sttLogs.shift();
21
+
22
+ if (w.__sttDebug) {
23
+ console.log(`[STT ${entry.t}]`, ...args);
24
+ }
25
+ }
26
+
27
+ /** Call from browser console: enableSTTDebug() */
28
+ (window as any).enableSTTDebug = () => {
29
+ (window as any).__sttDebug = true;
30
+ console.log('[STT] Debug enabled. Logs:', (window as any).__sttLogs);
31
+ };
32
+
33
+ /** Call from browser console: getSTTLogs() */
34
+ (window as any).getSTTLogs = () => {
35
+ return ((window as any).__sttLogs ?? [])
36
+ .map((e: any) => `${e.t} ${e.msg}`)
37
+ .join('\n');
38
+ };
package/stt/engine.ts CHANGED
@@ -8,17 +8,19 @@ import type {
8
8
  } from './types';
9
9
  import { createProvider } from './registry';
10
10
  import { MicrophoneSource } from './sources/microphone';
11
+ import { GlassBridgeSource } from './sources/glass-bridge';
11
12
  import { resample } from './audio/resample';
12
13
  import { createVAD } from './audio/vad';
13
14
  import { createAudioBuffer } from './audio/buffer';
15
+ import { sttLog } from './debug';
14
16
 
15
17
  /**
16
- * STTEngine orchestrates source -> processing -> provider.
18
+ * STTEngine orchestrates audio source -> processing -> provider.
17
19
  *
18
- * For `web-speech` provider: skips audio source (it handles its own mic).
19
- * For other providers: starts audio source, pipes through optional resample
20
- * and VAD, buffers audio, and calls provider.transcribe() on speech end.
20
+ * Batch providers (whisper-api): record audio, then transcribe on stop.
21
+ * Streaming providers (deepgram): pipe audio in real-time via sendAudio.
21
22
  */
23
+
22
24
  export class STTEngine {
23
25
  private config: STTEngineConfig;
24
26
  private provider: STTProvider | null = null;
@@ -32,15 +34,16 @@ export class STTEngine {
32
34
  private providerUnsubs: Array<() => void> = [];
33
35
 
34
36
  private vad: ReturnType<typeof createVAD> | null = null;
35
- private buffer: ReturnType<typeof createAudioBuffer> | null = null;
37
+ private chunkBuffer: ReturnType<typeof createAudioBuffer> | null = null;
36
38
  private targetSampleRate: number;
39
+ private stopped = false;
37
40
 
38
41
  constructor(config: STTEngineConfig) {
39
42
  this.config = config;
40
43
  this.targetSampleRate = config.sampleRate ?? 16000;
41
44
  }
42
45
 
43
- // ── Event subscriptions ──
46
+ // -- Event subscriptions --
44
47
 
45
48
  onTranscript(cb: (t: STTTranscript) => void): () => void {
46
49
  this.transcriptListeners.push(cb);
@@ -67,25 +70,37 @@ export class STTEngine {
67
70
  }
68
71
 
69
72
  private emitTranscript(t: STTTranscript): void {
73
+ sttLog('transcript:', t.isFinal ? 'FINAL' : 'interim', `"${t.text}"`);
70
74
  for (const cb of this.transcriptListeners) cb(t);
71
75
  }
72
76
 
73
77
  private emitState(s: STTState): void {
78
+ sttLog('state ->', s);
74
79
  for (const cb of this.stateListeners) cb(s);
75
80
  }
76
81
 
77
82
  private emitError(e: STTError): void {
83
+ sttLog('ERROR:', e.code, e.message);
78
84
  for (const cb of this.errorListeners) cb(e);
79
85
  }
80
86
 
81
- // ── Lifecycle ──
87
+ // -- Lifecycle --
82
88
 
83
89
  async start(): Promise<void> {
90
+ sttLog('engine.start()', 'provider:', this.config.provider, 'source:', this.config.source ?? 'auto');
91
+ this.stopped = false;
92
+
93
+ // Reuse existing provider if already initialized
94
+ if (this.provider) {
95
+ sttLog('engine: reusing provider');
96
+ return this.startAudioPipeline();
97
+ }
98
+
84
99
  this.emitState('loading');
85
100
 
86
101
  try {
87
- // Create and init provider
88
102
  this.provider = await createProvider(this.config.provider);
103
+ sttLog('provider created:', this.provider.type, 'modes:', this.provider.supportedModes);
89
104
  this.subscribeProvider(this.provider);
90
105
 
91
106
  await this.provider.init({
@@ -98,36 +113,10 @@ export class STTEngine {
98
113
  vadSilenceMs: typeof this.config.vad === 'object' ? this.config.vad.silenceMs : undefined,
99
114
  sampleRate: this.targetSampleRate,
100
115
  });
101
-
102
- // web-speech handles its own microphone
103
- if (this.config.provider === 'web-speech') {
104
- this.provider.start();
105
- return;
106
- }
107
-
108
- // Set up audio source
109
- this.source = this.resolveSource();
110
- await this.source.start();
111
-
112
- // Set up VAD if enabled
113
- if (this.config.vad) {
114
- const vadConfig = typeof this.config.vad === 'object' ? {
115
- silenceThresholdMs: this.config.vad.silenceMs,
116
- speechThresholdDb: this.config.vad.thresholdDb,
117
- } : undefined;
118
- this.vad = createVAD(vadConfig);
119
- }
120
-
121
- // Set up audio buffer for batch mode
122
- this.buffer = createAudioBuffer({ sampleRate: this.targetSampleRate });
123
-
124
- // Wire audio pipeline
125
- this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
126
- this.processAudio(pcm, sampleRate);
127
- });
128
-
129
- this.provider.start();
116
+ sttLog('provider.init() done');
117
+ await this.startAudioPipeline();
130
118
  } catch (err) {
119
+ sttLog('engine.start() FAILED:', err);
131
120
  const error: STTError = {
132
121
  code: 'unknown',
133
122
  message: err instanceof Error ? err.message : String(err),
@@ -136,29 +125,78 @@ export class STTEngine {
136
125
  this.emitError(error);
137
126
  this.emitState('error');
138
127
 
139
- // Attempt fallback
140
128
  if (this.config.fallback) {
141
129
  await this.switchToFallback();
142
130
  }
143
131
  }
144
132
  }
145
133
 
134
+ /** Set up audio source + wire to provider. Reusable for restart. */
135
+ private async startAudioPipeline(): Promise<void> {
136
+ if (!this.provider) throw new Error('No provider');
137
+
138
+ // Streaming providers -- pipe audio via sendAudio
139
+ if ('sendAudio' in this.provider) {
140
+ this.source = this.resolveSource();
141
+ sttLog('streaming + sendAudio: source =', this.source.constructor.name);
142
+ await this.source.start();
143
+ const provider = this.provider;
144
+ this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
145
+ const samples = sampleRate !== this.targetSampleRate
146
+ ? resample(pcm, sampleRate, this.targetSampleRate)
147
+ : pcm;
148
+ (provider as any).sendAudio(samples);
149
+ });
150
+ this.emitState('listening');
151
+ this.provider.start();
152
+ sttLog('streaming provider started');
153
+ return;
154
+ }
155
+
156
+ // Batch providers: set up audio pipeline
157
+ this.source = this.resolveSource();
158
+ sttLog('audio source resolved:', this.source.constructor.name);
159
+ await this.source.start();
160
+
161
+ const vadConfig = typeof this.config.vad === 'object' ? {
162
+ silenceThresholdMs: this.config.vad.silenceMs ?? 2500,
163
+ speechThresholdDb: this.config.vad.thresholdDb,
164
+ } : { silenceThresholdMs: 2500 };
165
+ this.vad = createVAD(vadConfig);
166
+ this.chunkBuffer = createAudioBuffer({ sampleRate: this.targetSampleRate, maxSeconds: 120 });
167
+
168
+ this.sourceUnsub = this.source.onAudioData((pcm, sampleRate) => {
169
+ this.processAudio(pcm, sampleRate);
170
+ });
171
+
172
+ this.emitState('listening');
173
+ this.provider.start();
174
+ sttLog('engine listening');
175
+ }
176
+
146
177
  stop(): void {
147
- this.provider?.stop();
178
+ if (this.stopped) return;
179
+ this.stopped = true;
180
+ sttLog('engine.stop()');
181
+
148
182
  this.sourceUnsub?.();
149
183
  this.sourceUnsub = null;
150
184
  this.source?.stop();
185
+ this.provider?.stop();
151
186
  this.vad?.reset();
152
- this.buffer?.clear();
187
+
188
+ this.transcribeFullBuffer();
153
189
  }
154
190
 
155
191
  abort(): void {
192
+ this.stopped = true;
193
+ sttLog('engine.abort()');
156
194
  this.provider?.abort();
157
195
  this.sourceUnsub?.();
158
196
  this.sourceUnsub = null;
159
197
  this.source?.stop();
160
198
  this.vad?.reset();
161
- this.buffer?.clear();
199
+ this.chunkBuffer?.clear();
162
200
  }
163
201
 
164
202
  dispose(): void {
@@ -174,75 +212,113 @@ export class STTEngine {
174
212
  this.errorListeners.length = 0;
175
213
  }
176
214
 
177
- // ── Internal ──
215
+ // -- Internal --
178
216
 
179
217
  private resolveSource(): AudioSource {
180
218
  const src = this.config.source;
181
- if (!src || src === 'microphone') {
182
- return new MicrophoneSource();
219
+
220
+ // Explicit AudioSource object passed
221
+ if (src && typeof src === 'object') {
222
+ sttLog('resolveSource: using custom AudioSource object');
223
+ return src;
183
224
  }
225
+
226
+ // Explicit glass-bridge
184
227
  if (src === 'glass-bridge') {
185
- throw new Error(
186
- 'glass-bridge source requires a GlassBridgeSource instance. ' +
187
- 'Pass an AudioSource object directly via config.source.'
188
- );
228
+ sttLog('resolveSource: explicit glass-bridge');
229
+ return new GlassBridgeSource();
230
+ }
231
+
232
+ // Auto-detect: if glasses bridge is available, prefer it
233
+ if ((window as any).__evenBridge) {
234
+ sttLog('resolveSource: auto-detected __evenBridge -> using GlassBridgeSource');
235
+ return new GlassBridgeSource();
189
236
  }
190
- // Custom AudioSource instance
237
+
238
+ // Explicit microphone or fallback
239
+ if (!src || src === 'microphone') {
240
+ sttLog('resolveSource: using MicrophoneSource (browser mic)');
241
+ return new MicrophoneSource();
242
+ }
243
+
191
244
  return src;
192
245
  }
193
246
 
194
247
  private processAudio(pcm: Float32Array, sampleRate: number): void {
195
- // Resample if needed
196
- let samples = sampleRate !== this.targetSampleRate
248
+ if (this.stopped) return;
249
+
250
+ const samples = sampleRate !== this.targetSampleRate
197
251
  ? resample(pcm, sampleRate, this.targetSampleRate)
198
252
  : pcm;
199
253
 
200
- if (!this.buffer) return;
254
+ this.chunkBuffer?.append(samples);
201
255
 
202
- // If VAD is enabled, check for speech boundaries
256
+ // VAD: detect speech end for auto-stop
203
257
  if (this.vad) {
204
258
  const result = this.vad.process(samples);
205
-
206
- if (result.isSpeech || result.speechEnded) {
207
- this.buffer.append(samples);
259
+ if (result.speechEnded && !this.config.continuous) {
260
+ sttLog('VAD: speech ended -> auto-stop');
261
+ this.stop();
208
262
  }
209
-
210
- if (result.speechEnded) {
211
- this.flushBuffer();
212
- }
213
- } else {
214
- // No VAD: accumulate everything, provider handles streaming
215
- this.buffer.append(samples);
216
263
  }
217
264
  }
218
265
 
219
- private async flushBuffer(): Promise<void> {
220
- if (!this.buffer || !this.provider) return;
266
+ /** On stop: transcribe the full recording buffer */
267
+ private async transcribeFullBuffer(): Promise<void> {
268
+ if (!this.provider?.transcribe || !this.chunkBuffer) {
269
+ this.emitState('idle');
270
+ return;
271
+ }
272
+
273
+ const audio = this.chunkBuffer.getAll();
274
+ this.chunkBuffer.clear();
275
+ this.chunkBuffer = null;
221
276
 
222
- const audio = this.buffer.getAll();
223
- this.buffer.clear();
277
+ if (audio.length < this.targetSampleRate * 0.3) {
278
+ sttLog('audio too short, skipping');
279
+ this.emitState('idle');
280
+ return;
281
+ }
224
282
 
225
- if (audio.length === 0) return;
283
+ this.emitState('processing');
284
+ sttLog('transcribing full buffer:', (audio.length / this.targetSampleRate).toFixed(1), 's,', (audio.byteLength / 1024).toFixed(0), 'KB');
226
285
 
227
- // If provider supports batch transcription
228
- if (this.provider.transcribe) {
229
- try {
230
- const transcript = await this.provider.transcribe(audio, this.targetSampleRate);
231
- this.emitTranscript(transcript);
232
- } catch (err) {
233
- this.emitError({
234
- code: 'unknown',
235
- message: err instanceof Error ? err.message : String(err),
236
- provider: this.config.provider,
286
+ try {
287
+ const result = await this.provider.transcribe(audio, this.targetSampleRate);
288
+ const text = result.text.trim();
289
+ sttLog('final result:', `"${text}"`);
290
+
291
+ if (text) {
292
+ this.emitTranscript({
293
+ text,
294
+ isFinal: true,
295
+ confidence: result.confidence,
296
+ timestamp: Date.now(),
237
297
  });
238
298
  }
299
+ } catch (err) {
300
+ this.emitError({
301
+ code: 'unknown',
302
+ message: err instanceof Error ? err.message : String(err),
303
+ provider: this.config.provider,
304
+ });
239
305
  }
306
+
307
+ this.emitState('idle');
240
308
  }
241
309
 
242
310
  private subscribeProvider(provider: STTProvider): void {
311
+ const isStreaming = provider.supportedModes.includes('streaming');
312
+
243
313
  this.providerUnsubs.push(
244
- provider.onTranscript((t) => this.emitTranscript(t)),
245
- provider.onStateChange((s) => this.emitState(s)),
314
+ // Forward transcripts from streaming providers (deepgram emits them directly)
315
+ // Batch providers (whisper-api) are handled by transcribeFullBuffer — don't double-emit
316
+ provider.onTranscript((t) => {
317
+ if (isStreaming) this.emitTranscript(t);
318
+ }),
319
+ provider.onStateChange((s) => {
320
+ if (isStreaming) this.emitState(s);
321
+ }),
246
322
  provider.onError((e) => {
247
323
  this.emitError(e);
248
324
  if (this.config.fallback) {
@@ -254,21 +330,20 @@ export class STTEngine {
254
330
 
255
331
  private async switchToFallback(): Promise<void> {
256
332
  if (!this.config.fallback) return;
333
+ sttLog('switching to fallback provider:', this.config.fallback);
257
334
 
258
- // Clean up current provider
259
335
  for (const unsub of this.providerUnsubs) unsub();
260
336
  this.providerUnsubs.length = 0;
261
337
  this.provider?.dispose();
262
338
  this.provider = null;
263
339
 
264
- // Switch to fallback
265
340
  const fallbackType = this.config.fallback;
266
341
  this.config = { ...this.config, provider: fallbackType, fallback: undefined };
267
342
 
268
343
  try {
269
344
  await this.start();
270
345
  } catch {
271
- // Fallback also failed — nothing more to do
346
+ // Fallback also failed
272
347
  }
273
348
  }
274
349
  }
package/stt/index.ts CHANGED
@@ -8,3 +8,4 @@ export * from './audio/pcm-utils';
8
8
  export * from './audio/resample';
9
9
  export * from './audio/vad';
10
10
  export { createAudioBuffer } from './audio/buffer';
11
+ export { sttLog } from './debug';
@@ -6,6 +6,7 @@ import type {
6
6
  STTTranscript,
7
7
  STTError,
8
8
  } from '../types';
9
+ import { sttLog } from '../debug';
9
10
 
10
11
  interface DeepgramResult {
11
12
  channel?: {
@@ -38,7 +39,7 @@ export class DeepgramProvider implements STTProvider {
38
39
 
39
40
  async init(config: STTProviderConfig): Promise<void> {
40
41
  this.apiKey = config.apiKey ?? '';
41
- this.language = config.language ?? 'en';
42
+ this.language = (config.language ?? 'en').split('-')[0]; // Deepgram wants 'en' not 'en-US'
42
43
  this.modelId = config.modelId ?? 'nova-2';
43
44
 
44
45
  if (!this.apiKey) {
@@ -67,7 +68,10 @@ export class DeepgramProvider implements STTProvider {
67
68
  this.ws = new WebSocket(url, ['token', this.apiKey]);
68
69
  this.ws.binaryType = 'arraybuffer';
69
70
 
71
+ sttLog('deepgram: connecting to', url.substring(0, 60) + '...');
72
+
70
73
  this.ws.onopen = () => {
74
+ sttLog('deepgram: connected');
71
75
  this.setState('listening');
72
76
  };
73
77
 
@@ -75,12 +79,16 @@ export class DeepgramProvider implements STTProvider {
75
79
  try {
76
80
  const data = JSON.parse(event.data as string) as DeepgramResult;
77
81
  const alt = data.channel?.alternatives?.[0];
78
- if (!alt?.transcript) return;
82
+ const text = alt?.transcript ?? '';
83
+
84
+ if (!text) return;
85
+
86
+ sttLog('deepgram: got', data.is_final ? 'FINAL' : 'interim', `"${text}"`);
79
87
 
80
88
  const transcript: STTTranscript = {
81
- text: alt.transcript,
89
+ text,
82
90
  isFinal: data.is_final ?? false,
83
- confidence: alt.confidence ?? 0,
91
+ confidence: alt?.confidence ?? 0,
84
92
  timestamp: Date.now(),
85
93
  };
86
94
  this.emitTranscript(transcript);
@@ -89,7 +97,8 @@ export class DeepgramProvider implements STTProvider {
89
97
  }
90
98
  };
91
99
 
92
- this.ws.onerror = () => {
100
+ this.ws.onerror = (event) => {
101
+ sttLog('deepgram: WebSocket error', event);
93
102
  const err: STTError = {
94
103
  code: 'network',
95
104
  message: 'Deepgram WebSocket error',
@@ -107,14 +116,22 @@ export class DeepgramProvider implements STTProvider {
107
116
  };
108
117
  }
109
118
 
110
- /** Send raw audio data (PCM Int16 or Float32 as ArrayBuffer) to the Deepgram stream. */
119
+ /** Send audio data to the Deepgram stream. Converts Float32 to Int16 (linear16). */
111
120
  sendAudio(data: ArrayBuffer | Int16Array | Float32Array): void {
112
121
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
113
122
 
114
- if (data instanceof ArrayBuffer) {
115
- this.ws.send(data);
116
- } else {
123
+ if (data instanceof Float32Array) {
124
+ // Convert Float32 [-1, 1] to Int16 PCM (Deepgram expects linear16)
125
+ const int16 = new Int16Array(data.length);
126
+ for (let i = 0; i < data.length; i++) {
127
+ const s = Math.max(-1, Math.min(1, data[i]!));
128
+ int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
129
+ }
130
+ this.ws.send(int16.buffer);
131
+ } else if (data instanceof Int16Array) {
117
132
  this.ws.send(data.buffer);
133
+ } else {
134
+ this.ws.send(data);
118
135
  }
119
136
  }
120
137