even-toolkit 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ import { useState, useRef, useEffect, useCallback } from 'react';
2
+ import type { UseSTTConfig, UseSTTReturn, STTState, STTError } from '../types';
3
+ import { STTEngine } from '../engine';
4
+
5
+ export function useSTT(config: UseSTTConfig = {}): UseSTTReturn {
6
+ const [transcript, setTranscript] = useState('');
7
+ const [interimTranscript, setInterimTranscript] = useState('');
8
+ const [isListening, setIsListening] = useState(false);
9
+ const [isLoading, setIsLoading] = useState(false);
10
+ const [loadProgress] = useState(0);
11
+ const [error, setError] = useState<STTError | null>(null);
12
+ const [state, setState] = useState<STTState>('idle');
13
+
14
+ const engineRef = useRef<STTEngine | null>(null);
15
+ const configRef = useRef(config);
16
+ configRef.current = config;
17
+
18
+ // Cleanup on unmount
19
+ useEffect(() => {
20
+ return () => {
21
+ engineRef.current?.dispose();
22
+ engineRef.current = null;
23
+ };
24
+ }, []);
25
+
26
+ const start = useCallback(async () => {
27
+ // Dispose previous engine
28
+ engineRef.current?.dispose();
29
+
30
+ const cfg = configRef.current;
31
+ const engine = new STTEngine({
32
+ provider: cfg.provider ?? 'web-speech',
33
+ source: cfg.source,
34
+ language: cfg.language,
35
+ mode: cfg.mode,
36
+ apiKey: cfg.apiKey,
37
+ modelId: cfg.modelId,
38
+ continuous: cfg.continuous,
39
+ vad: cfg.vad,
40
+ fallback: cfg.fallback,
41
+ });
42
+
43
+ engineRef.current = engine;
44
+
45
+ // Subscribe to events
46
+ engine.onTranscript((t) => {
47
+ if (t.isFinal) {
48
+ setTranscript((prev) => (prev ? prev + ' ' + t.text : t.text));
49
+ setInterimTranscript('');
50
+ } else {
51
+ setInterimTranscript(t.text);
52
+ }
53
+ cfg.onTranscript?.(t.text, t.isFinal);
54
+ });
55
+
56
+ engine.onStateChange((s) => {
57
+ setState(s);
58
+ setIsListening(s === 'listening');
59
+ setIsLoading(s === 'loading');
60
+ if (s === 'idle') {
61
+ setInterimTranscript('');
62
+ }
63
+ });
64
+
65
+ engine.onError((e) => {
66
+ setError(e);
67
+ });
68
+
69
+ setError(null);
70
+ await engine.start();
71
+ }, []);
72
+
73
+ const stop = useCallback(() => {
74
+ engineRef.current?.stop();
75
+ }, []);
76
+
77
+ const abort = useCallback(() => {
78
+ engineRef.current?.abort();
79
+ }, []);
80
+
81
+ const reset = useCallback(() => {
82
+ engineRef.current?.abort();
83
+ setTranscript('');
84
+ setInterimTranscript('');
85
+ setError(null);
86
+ setState('idle');
87
+ setIsListening(false);
88
+ setIsLoading(false);
89
+ }, []);
90
+
91
+ // Auto-start if configured
92
+ useEffect(() => {
93
+ if (config.autoStart) {
94
+ start();
95
+ }
96
+ // Only run on mount
97
+ // eslint-disable-next-line react-hooks/exhaustive-deps
98
+ }, []);
99
+
100
+ return {
101
+ transcript,
102
+ interimTranscript,
103
+ isListening,
104
+ isLoading,
105
+ loadProgress,
106
+ error,
107
+ state,
108
+ start,
109
+ stop,
110
+ abort,
111
+ reset,
112
+ };
113
+ }
@@ -0,0 +1,24 @@
1
+ import type { STTProvider } from './types';
2
+
3
+ export async function createProvider(type: string): Promise<STTProvider> {
4
+ switch (type) {
5
+ case 'web-speech': {
6
+ const { WebSpeechProvider } = await import('./providers/web-speech');
7
+ return new WebSpeechProvider();
8
+ }
9
+ case 'whisper-local': {
10
+ const { WhisperLocalProvider } = await import('./providers/whisper-local/provider');
11
+ return new WhisperLocalProvider();
12
+ }
13
+ case 'whisper-api': {
14
+ const { WhisperApiProvider } = await import('./providers/whisper-api');
15
+ return new WhisperApiProvider();
16
+ }
17
+ case 'deepgram': {
18
+ const { DeepgramProvider } = await import('./providers/deepgram');
19
+ return new DeepgramProvider();
20
+ }
21
+ default:
22
+ throw new Error(`Unknown STT provider: ${type}`);
23
+ }
24
+ }
@@ -0,0 +1,67 @@
1
+ import type { AudioSource } from '../types';
2
+ import { uint8ToPcm16, pcm16ToFloat32 } from '../audio/pcm-utils';
3
+
4
+ const GLASS_SAMPLE_RATE = 16000;
5
+
6
+ export interface GlassBridgeSourceConfig {
7
+ /** The EvenHub bridge instance that fires audio events */
8
+ bridge: {
9
+ onEvent(handler: (event: GlassAudioEvent) => void): void;
10
+ };
11
+ }
12
+
13
+ export interface GlassAudioEvent {
14
+ audioEvent?: {
15
+ audioPcm?: Uint8Array;
16
+ };
17
+ }
18
+
19
+ /**
20
+ * AudioSource for G2 smart glasses.
21
+ * Listens for audio PCM events from the EvenHub SDK bridge
22
+ * and converts 16-bit PCM to Float32.
23
+ */
24
+ export class GlassBridgeSource implements AudioSource {
25
+ private config: GlassBridgeSourceConfig;
26
+ private listeners: Array<(pcm: Float32Array, sampleRate: number) => void> = [];
27
+ private listening = false;
28
+
29
+ constructor(config: GlassBridgeSourceConfig) {
30
+ this.config = config;
31
+ }
32
+
33
+ async start(): Promise<void> {
34
+ if (this.listening) return;
35
+ this.listening = true;
36
+
37
+ this.config.bridge.onEvent((event: GlassAudioEvent) => {
38
+ if (!this.listening) return;
39
+ const audioPcm = event.audioEvent?.audioPcm;
40
+ if (!audioPcm || audioPcm.length === 0) return;
41
+
42
+ const pcm16 = uint8ToPcm16(audioPcm);
43
+ const float32 = pcm16ToFloat32(pcm16);
44
+
45
+ for (const cb of this.listeners) {
46
+ cb(float32, GLASS_SAMPLE_RATE);
47
+ }
48
+ });
49
+ }
50
+
51
+ stop(): void {
52
+ this.listening = false;
53
+ }
54
+
55
+ onAudioData(cb: (pcm: Float32Array, sampleRate: number) => void): () => void {
56
+ this.listeners.push(cb);
57
+ return () => {
58
+ const idx = this.listeners.indexOf(cb);
59
+ if (idx >= 0) this.listeners.splice(idx, 1);
60
+ };
61
+ }
62
+
63
+ dispose(): void {
64
+ this.stop();
65
+ this.listeners.length = 0;
66
+ }
67
+ }
@@ -0,0 +1,75 @@
1
+ import type { AudioSource } from '../types';
2
+
3
+ const CHUNK_SIZE = 4096;
4
+ const DEFAULT_SAMPLE_RATE = 16000;
5
+
6
+ /**
7
+ * AudioSource that captures PCM audio from the device microphone
8
+ * using getUserMedia and ScriptProcessorNode.
9
+ */
10
+ export class MicrophoneSource implements AudioSource {
11
+ private stream: MediaStream | null = null;
12
+ private audioContext: AudioContext | null = null;
13
+ private scriptNode: ScriptProcessorNode | null = null;
14
+ private sourceNode: MediaStreamAudioSourceNode | null = null;
15
+ private listeners: Array<(pcm: Float32Array, sampleRate: number) => void> = [];
16
+
17
+ async start(): Promise<void> {
18
+ this.stream = await navigator.mediaDevices.getUserMedia({
19
+ audio: { sampleRate: DEFAULT_SAMPLE_RATE, channelCount: 1 },
20
+ });
21
+
22
+ this.audioContext = new AudioContext({ sampleRate: DEFAULT_SAMPLE_RATE });
23
+ this.sourceNode = this.audioContext.createMediaStreamSource(this.stream);
24
+ this.scriptNode = this.audioContext.createScriptProcessor(CHUNK_SIZE, 1, 1);
25
+
26
+ this.scriptNode.onaudioprocess = (event) => {
27
+ const input = event.inputBuffer.getChannelData(0);
28
+ // Copy the buffer — it's reused by the browser
29
+ const chunk = new Float32Array(input.length);
30
+ chunk.set(input);
31
+ const rate = this.audioContext?.sampleRate ?? DEFAULT_SAMPLE_RATE;
32
+ for (const cb of this.listeners) {
33
+ cb(chunk, rate);
34
+ }
35
+ };
36
+
37
+ this.sourceNode.connect(this.scriptNode);
38
+ this.scriptNode.connect(this.audioContext.destination);
39
+ }
40
+
41
+ stop(): void {
42
+ if (this.scriptNode) {
43
+ this.scriptNode.onaudioprocess = null;
44
+ this.scriptNode.disconnect();
45
+ this.scriptNode = null;
46
+ }
47
+ if (this.sourceNode) {
48
+ this.sourceNode.disconnect();
49
+ this.sourceNode = null;
50
+ }
51
+ if (this.stream) {
52
+ for (const track of this.stream.getTracks()) {
53
+ track.stop();
54
+ }
55
+ this.stream = null;
56
+ }
57
+ if (this.audioContext) {
58
+ this.audioContext.close().catch(() => {});
59
+ this.audioContext = null;
60
+ }
61
+ }
62
+
63
+ onAudioData(cb: (pcm: Float32Array, sampleRate: number) => void): () => void {
64
+ this.listeners.push(cb);
65
+ return () => {
66
+ const idx = this.listeners.indexOf(cb);
67
+ if (idx >= 0) this.listeners.splice(idx, 1);
68
+ };
69
+ }
70
+
71
+ dispose(): void {
72
+ this.stop();
73
+ this.listeners.length = 0;
74
+ }
75
+ }
package/stt/types.ts ADDED
@@ -0,0 +1,104 @@
1
+ // ── STT Provider Types ──
2
+
3
+ export type STTProviderType = 'web-speech' | 'whisper-local' | 'whisper-api' | 'deepgram' | string;
4
+ export type STTMode = 'streaming' | 'batch';
5
+ export type STTState = 'idle' | 'loading' | 'listening' | 'processing' | 'error';
6
+
7
+ export interface STTTranscript {
8
+ text: string;
9
+ isFinal: boolean;
10
+ confidence: number;
11
+ language?: string;
12
+ timestamp: number;
13
+ }
14
+
15
+ export interface STTProviderConfig {
16
+ language?: string; // BCP 47 tag, default 'en-US'
17
+ mode?: STTMode;
18
+ apiKey?: string;
19
+ modelId?: string;
20
+ continuous?: boolean;
21
+ vadEnabled?: boolean;
22
+ vadSilenceMs?: number; // default 1500
23
+ sampleRate?: number; // default 16000
24
+ maxDurationMs?: number;
25
+ }
26
+
27
+ export interface STTError {
28
+ code: 'not-allowed' | 'no-speech' | 'network' | 'model-load' | 'aborted' | 'unsupported' | 'unknown';
29
+ message: string;
30
+ provider: STTProviderType;
31
+ }
32
+
33
+ export interface STTProvider {
34
+ readonly type: STTProviderType;
35
+ readonly supportedModes: STTMode[];
36
+ readonly state: STTState;
37
+
38
+ init(config: STTProviderConfig): Promise<void>;
39
+ start(): void;
40
+ stop(): void;
41
+ abort(): void;
42
+ dispose(): void;
43
+
44
+ onTranscript(cb: (t: STTTranscript) => void): () => void;
45
+ onStateChange(cb: (s: STTState) => void): () => void;
46
+ onError(cb: (e: STTError) => void): () => void;
47
+
48
+ /** Batch mode: feed raw audio for transcription */
49
+ transcribe?(audio: Float32Array, sampleRate: number): Promise<STTTranscript>;
50
+ }
51
+
52
+ // ── Audio Source Types ──
53
+
54
+ export interface AudioSource {
55
+ start(): Promise<void>;
56
+ stop(): void;
57
+ onAudioData(cb: (pcm: Float32Array, sampleRate: number) => void): () => void;
58
+ dispose(): void;
59
+ }
60
+
61
+ // ── Engine Config ──
62
+
63
+ export interface STTEngineConfig {
64
+ provider: STTProviderType;
65
+ source?: 'microphone' | 'glass-bridge' | AudioSource;
66
+ language?: string;
67
+ mode?: STTMode;
68
+ apiKey?: string;
69
+ modelId?: string;
70
+ continuous?: boolean;
71
+ vad?: boolean | { silenceMs?: number; thresholdDb?: number };
72
+ sampleRate?: number;
73
+ fallback?: STTProviderType;
74
+ }
75
+
76
+ // ── React Hook Types ──
77
+
78
+ export interface UseSTTConfig {
79
+ provider?: STTProviderType;
80
+ source?: 'microphone' | 'glass-bridge';
81
+ language?: string;
82
+ mode?: STTMode;
83
+ apiKey?: string;
84
+ modelId?: string;
85
+ continuous?: boolean;
86
+ vad?: boolean;
87
+ autoStart?: boolean;
88
+ fallback?: STTProviderType;
89
+ onTranscript?: (text: string, isFinal: boolean) => void;
90
+ }
91
+
92
+ export interface UseSTTReturn {
93
+ transcript: string;
94
+ interimTranscript: string;
95
+ isListening: boolean;
96
+ isLoading: boolean;
97
+ loadProgress: number;
98
+ error: STTError | null;
99
+ state: STTState;
100
+ start: () => Promise<void>;
101
+ stop: () => void;
102
+ abort: () => void;
103
+ reset: () => void;
104
+ }